diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/compile_commands.json b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/compile_commands.json
index 904a0846e..97b3e9bfa 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/compile_commands.json
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/compile_commands.json
@@ -1,151 +1,151 @@
 [
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o CoralBenchmark.o -D__CUDACC__=1 CoralBenchmark.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CoralBenchmark.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CoralBenchmark.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o CycleTracking.o -D__CUDACC__=1 CycleTracking.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o DecompositionObject.o -D__CUDACC__=1 DecompositionObject.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DecompositionObject.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DecompositionObject.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o DirectionCosine.o -D__CUDACC__=1 DirectionCosine.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o EnergySpectrum.o -D__CUDACC__=1 EnergySpectrum.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o GlobalFccGrid.o -D__CUDACC__=1 GlobalFccGrid.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o GridAssignmentObject.o -D__CUDACC__=1 GridAssignmentObject.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o InputBlock.o -D__CUDACC__=1 InputBlock.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/InputBlock.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/InputBlock.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MC_Base_Particle.o -D__CUDACC__=1 MC_Base_Particle.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Base_Particle.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Base_Particle.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MC_Domain.o -D__CUDACC__=1 MC_Domain.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MC_Fast_Timer.o -D__CUDACC__=1 MC_Fast_Timer.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Fast_Timer.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Fast_Timer.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MC_Particle_Buffer.o -D__CUDACC__=1 MC_Particle_Buffer.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle_Buffer.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle_Buffer.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MeshPartition.o -D__CUDACC__=1 MeshPartition.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MeshPartition.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MeshPartition.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MonteCarlo.o -D__CUDACC__=1 MonteCarlo.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MpiCommObject.o -D__CUDACC__=1 MpiCommObject.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MpiCommObject.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MpiCommObject.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o Parameters.o -D__CUDACC__=1 Parameters.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/Parameters.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/Parameters.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o ParticleVault.o -D__CUDACC__=1 ParticleVault.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o ParticleVaultContainer.o -D__CUDACC__=1 ParticleVaultContainer.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVaultContainer.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVaultContainer.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o PopulationControl.o -D__CUDACC__=1 PopulationControl.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/PopulationControl.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/PopulationControl.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o SharedMemoryCommObject.o -D__CUDACC__=1 SharedMemoryCommObject.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/SharedMemoryCommObject.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/SharedMemoryCommObject.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o Tallies.o -D__CUDACC__=1 Tallies.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/Tallies.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/Tallies.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o cmdLineParser.o -D__CUDACC__=1 cmdLineParser.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cmdLineParser.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cmdLineParser.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o cudaFunctions.o -D__CUDACC__=1 cudaFunctions.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o initMC.o -D__CUDACC__=1 initMC.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o main.o -D__CUDACC__=1 main.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o parseUtils.o -D__CUDACC__=1 parseUtils.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/parseUtils.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/parseUtils.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o utils.o -D__CUDACC__=1 utils.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utils.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utils.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o utilsMpi.o -D__CUDACC__=1 utilsMpi.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utilsMpi.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utilsMpi.cc"
     },
     {
         "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o Random.o -D__CUDACC__=1 Random.cc",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/Random.cc"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/Random.cc"
     },
     {
         "command": "nvcc -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o qs CoralBenchmark.o CycleTracking.o DecompositionObject.o DirectionCosine.o EnergySpectrum.o GlobalFccGrid.o GridAssignmentObject.o InputBlock.o MC_Base_Particle.o MC_Domain.o MC_Fast_Timer.o MC_Particle_Buffer.o MeshPartition.o MonteCarlo.o MpiCommObject.o Parameters.o ParticleVault.o ParticleVaultContainer.o PopulationControl.o SharedMemoryCommObject.o Tallies.o cmdLineParser.o cudaFunctions.o initMC.o main.o parseUtils.o utils.o utilsMpi.o Random.o -D__CUDACC__=1",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src"
     }
 ]
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/AtomicMacro.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/AtomicMacro.hh.yaml
index b2447aeee..bc99732c8 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/AtomicMacro.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/AtomicMacro.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/AtomicMacro.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/AtomicMacro.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
     Offset:          3129
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
     Offset:          3193
     Length:          11
     ReplacementText: ''
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
     Offset:          3300
     Length:          18
     ReplacementText: ''
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
     Offset:          3331
     Length:          13
     ReplacementText: DPCT_COMPATIBILITY_TEMP
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
     Offset:          3349
     Length:          13
     ReplacementText: DPCT_COMPATIBILITY_TEMP
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
     Offset:          4096
     Length:          13
     ReplacementText: DPCT_COMPATIBILITY_TEMP
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
     Offset:          4281
     Length:          16
     ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(&x, v)'
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
     Offset:          4331
     Length:          16
     ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(&x, 1)'
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
     Offset:          4392
     Length:          16
     ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(&x, v)'
@@ -83,7 +83,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh'
     Digest:          b95fb138e417bb4c5ab15dc45f6ad43d
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -92,7 +92,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -101,7 +101,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CollisionEvent.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CollisionEvent.hh.yaml
index f748328f3..0dbb5b0fc 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CollisionEvent.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CollisionEvent.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CollisionEvent.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CollisionEvent.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          3070
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          3455
     Length:          0
     ReplacementText: "\n#include <cmath>\n"
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          3818
     Length:          8
     ReplacementText: 'sycl::sin(phi)'
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          3847
     Length:          8
     ReplacementText: 'sycl::cos(phi)'
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          3878
     Length:          35
     ReplacementText: 'sycl::sqrt((1.0 - (cosTheta * cosTheta)))'
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          4069
     Length:          313
     ReplacementText: "sycl::sqrt((1.0 - ((PhysicalConstants::_neutronRestMassEnergy *\n                                 PhysicalConstants::_neutronRestMassEnergy) /\n                                ((energy + PhysicalConstants::_neutronRestMassEnergy) *\n                                 (energy + PhysicalConstants::_neutronRestMassEnergy)))))"
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          4682
     Length:          17
     ReplacementText: 'sycl::log(randomNumber)'
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          4720
     Length:          0
     ReplacementText: "/*\nDPCT1110:31: The total declared local variable size in device function CollisionEvent exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          4874
     Length:          20
     ReplacementText: ''
@@ -82,7 +82,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          4997
     Length:          112
     ReplacementText: ''
@@ -91,7 +91,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          5714
     Length:          20
     ReplacementText: ''
@@ -100,7 +100,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          5807
     Length:          99
     ReplacementText: ''
@@ -109,7 +109,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          5999
     Length:          20
     ReplacementText: ''
@@ -118,7 +118,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          6190
     Length:          192
     ReplacementText: ''
@@ -127,7 +127,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          7366
     Length:          20
     ReplacementText: ''
@@ -136,7 +136,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          7710
     Length:          345
     ReplacementText: ''
@@ -145,7 +145,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          8377
     Length:          20
     ReplacementText: ''
@@ -154,7 +154,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          8459
     Length:          90
     ReplacementText: ''
@@ -163,7 +163,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          8551
     Length:          20
     ReplacementText: ''
@@ -172,7 +172,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          8756
     Length:          167
     ReplacementText: ''
@@ -181,7 +181,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          8993
     Length:          20
     ReplacementText: ''
@@ -190,7 +190,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          9078
     Length:          91
     ReplacementText: ''
@@ -199,7 +199,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          9224
     Length:          20
     ReplacementText: ''
@@ -208,7 +208,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          9309
     Length:          90
     ReplacementText: ''
@@ -217,7 +217,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          9451
     Length:          20
     ReplacementText: ''
@@ -226,7 +226,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          9603
     Length:          173
     ReplacementText: ''
@@ -235,7 +235,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          11168
     Length:          20
     ReplacementText: ''
@@ -244,7 +244,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          11291
     Length:          112
     ReplacementText: ''
@@ -253,7 +253,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Offset:          11427
     Length:          0
     ReplacementText: "\ninline HOST_DEVICE bool CollisionEvent_host_ct7(MonteCarlo *monteCarlo, MC_Particle &mc_particle, unsigned int tally_index, int particle_index, int *tallyArray)\n{\n\n   const MC_Cell_State &cell = monteCarlo->domain[mc_particle.domain].cell_state[mc_particle.cell];\n\n\n   int globalMatIndex = cell._material;\n\n   //------------------------------------------------------------------------------------------------------------------\n   //    Pick the isotope and reaction.\n   //------------------------------------------------------------------------------------------------------------------\n   double randomNumber = rngSample(&mc_particle.random_number_seed);\n   double totalCrossSection = mc_particle.totalCrossSection;\n   double currentCrossSection = totalCrossSection * randomNumber;\n   int selectedIso = -1;\n   int selectedUniqueNumber = -1;\n   int selectedReact = -1;\n\n   int numIsos = (int)monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso.size();\n\n\n   for (int isoIndex = 0; isoIndex < numIsos && currentCrossSection >= 0; isoIndex++)\n   {\n\n      int uniqueNumber = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._gid;\n      int numReacts = monteCarlo->_nuclearData->getNumberReactions(uniqueNumber);\n\n      for (int reactIndex = 0; reactIndex < numReacts; reactIndex++)\n      {\n         currentCrossSection -= macroscopicCrossSection(monteCarlo, reactIndex, mc_particle.domain, mc_particle.cell,\n                                                        isoIndex, mc_particle.energy_group);\n         if (currentCrossSection < 0)\n         {\n            selectedIso = isoIndex;\n            selectedUniqueNumber = uniqueNumber;\n            selectedReact = reactIndex;\n            break;\n         }\n      }\n   }\n   qs_assert(selectedIso != -1);\n   qs_assert(selectedUniqueNumber != -1);\n   qs_assert(selectedReact != -1);\n\n   //------------------------------------------------------------------------------------------------------------------\n   //    Do the collision.\n   //------------------------------------------------------------------------------------------------------------------\n   double energyOut[MAX_PRODUCTION_SIZE];\n   double angleOut[MAX_PRODUCTION_SIZE];\n   int nOut = 0;\n\n   double mat_mass = monteCarlo->_materialDatabase->_mat[globalMatIndex]._mass;\n   monteCarlo->_nuclearData->_isotopes[selectedUniqueNumber]._species[0]._reactions[selectedReact].sampleCollision(\n       mc_particle.kinetic_energy, mat_mass, &energyOut[0], &angleOut[0], nOut, &(mc_particle.random_number_seed), MAX_PRODUCTION_SIZE);\n\n\n//--------------------------------------------------------------------------------------------------------------\n//  Post-Collision Phase 1:\n//    Tally the collision\n//--------------------------------------------------------------------------------------------------------------\n\n// Set the reaction for this particle.\n\n   ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._collision);\n\n\n\n   NuclearDataReaction::Enum reactionType = monteCarlo->_nuclearData->_isotopes[selectedUniqueNumber]._species[0]._reactions[selectedReact]._reactionType;\n\n\n   switch (reactionType)\n   {\n   case NuclearDataReaction::Scatter:\n\n      ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._scatter);\n\n      break;\n   case NuclearDataReaction::Absorption:\n\n      ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._absorb);\n\n      break;\n   case NuclearDataReaction::Fission:\n\n      ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._fission);\n      ATOMIC_ADD(monteCarlo->_tallies->_balanceTask[tally_index]._produce, nOut);\n\n      break;\n   case NuclearDataReaction::Undefined:\n#ifdef DEBUG\n      printf(\"reactionType invalid\\n\");\n#endif\n      qs_assert(false);\n   }\n\n   if (nOut == 0)\n   {\n      return false;\n   }\n\n   for (int secondaryIndex = 1; secondaryIndex < nOut; secondaryIndex++)\n   {\n      // Newly created particles start as copies of their parent\n      MC_Particle secondaryParticle = mc_particle;\n      secondaryParticle.random_number_seed = rngSpawn_Random_Number_Seed(&mc_particle.random_number_seed);\n      secondaryParticle.identifier = secondaryParticle.random_number_seed;\n      updateTrajectory(energyOut[secondaryIndex], angleOut[secondaryIndex], secondaryParticle);\n\n      // Atomic capture will be called here\n      monteCarlo->_particleVaultContainer->addExtraParticle(secondaryParticle);\n   }\n\n   updateTrajectory(energyOut[0], angleOut[0], mc_particle);\n\n   // If a fission reaction produces secondary particles we also add the original\n   // particle to the \"extras\" that we will handle later.  This avoids the\n   // possibility of a particle doing multiple fission reactions in a single\n   // kernel invocation and overflowing the extra storage with secondary particles.\n   if (nOut > 1)\n   {\n      // Atomic capture will be called here\n      monteCarlo->_particleVaultContainer->addExtraParticle(mc_particle);\n   }\n\n// If we are still tracking this particle the update its energy group\n\n   mc_particle.energy_group = monteCarlo->_nuclearData->getEnergyGroup(mc_particle.kinetic_energy);\n\n\n   return nOut == 1;\n}"
@@ -263,7 +263,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh'
     Digest:          4a7ff666215bd49f99cacb6e5372493a
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -272,7 +272,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -281,7 +281,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CycleTracking.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CycleTracking.hh.yaml
index 9606427f6..70dd49f44 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CycleTracking.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CycleTracking.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CycleTracking.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CycleTracking.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
     Offset:          5270
     Length:          20
     ReplacementText: ''
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
     Offset:          5361
     Length:          101
     ReplacementText: ''
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
     Offset:          6947
     Length:          20
     ReplacementText: ''
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
     Offset:          7042
     Length:          100
     ReplacementText: ''
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
     Offset:          7926
     Length:          20
     ReplacementText: ''
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
     Offset:          8017
     Length:          96
     ReplacementText: ''
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
     Offset:          8738
     Length:          0
     ReplacementText: "\ninline HOST_DEVICE_CUDA void CycleTrackingFunction_host_ct3(MonteCarlo *monteCarlo, MC_Particle &mc_particle, int particle_index, ParticleVault *processingVault, ParticleVault *processedVault, int *tallyArray)\n{\n    bool keepTrackingThisParticle = true;\n    unsigned int tally_index = (particle_index) % monteCarlo->_tallies->GetNumBalanceReplications();\n    unsigned int flux_tally_index = (particle_index) % monteCarlo->_tallies->GetNumFluxReplications();\n    unsigned int cell_tally_index = (particle_index) % monteCarlo->_tallies->GetNumCellTallyReplications();\n\n    int i1 = 0;\n    // The while loop will exit after a particle reaches census or goes through MaxIters iterations, whichever comes first. If a particle reaches MaxIters it will be added to the ExtraVaults and processed in a later kernel. MaxIt can be defined in the makefile, otherwise it defaults to a large number that should ensure that it is never reached.\n    int MaxIters = MaxIt;\n\n    do\n    {\n        // Determine the outcome of a particle at the end of this segment such as:\n        //\n        //   (0) Undergo a collision within the current cell,\n        //   (1) Cross a facet of the current cell,\n        //   (2) Reach the end of the time step and enter census,\n        //\n        MC_Segment_Outcome_type::Enum segment_outcome = MC_Segment_Outcome_type::Max_Number;\n        i1 += 1;\n        if (keepTrackingThisParticle)\n        {\n\n#ifdef EXPONENTIAL_TALLY\n            monteCarlo->_tallies->TallyCellValue(exp(rngSample(&mc_particle.random_number_seed)), mc_particle.domain, cell_tally_index, mc_particle.cell);\n#endif\n            segment_outcome = MC_Segment_Outcome(monteCarlo, mc_particle, flux_tally_index);\n\n\n            ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._numSegments);\n\n\n\n        mc_particle.num_segments += 1.;  /* Track the number of segments this particle has\n                                            undergone this cycle on all processes. */\n        // segment_outcome = keepTrackingThisParticle ? segment_outcome : MC_Segment_Outcome_type::Max_Number;\n        }\n        switch (segment_outcome)\n        {\n\n        case MC_Segment_Outcome_type::Collision:\n        {\n            // The particle undergoes a collision event producing:\n            //   (0) Other-than-one same-species secondary particle, or\n            //   (1) Exactly one same-species secondary particle.\n            if (CollisionEvent(monteCarlo, mc_particle, tally_index, particle_index, tallyArray) == MC_Collision_Event_Return::Continue_Tracking)\n            {\n                keepTrackingThisParticle = true;\n            }\n            else\n            {\n                keepTrackingThisParticle = false;\n            }\n        }\n        break;\n\n        case MC_Segment_Outcome_type::Facet_Crossing:\n        {\n            // The particle has reached a cell facet.\n            MC_Tally_Event::Enum facet_crossing_type = MC_Facet_Crossing_Event(mc_particle, monteCarlo, particle_index, processingVault);\n\n            if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Transit_Exit)\n            {\n                keepTrackingThisParticle = true; // Transit Event\n            }\n            else if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Escape)\n            {\n\n                ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._escape);\n\n\n                mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Escape;\n                mc_particle.species = -1;\n                keepTrackingThisParticle = false;\n            }\n            else if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Reflection)\n            {\n                MCT_Reflect_Particle(monteCarlo, mc_particle);\n                keepTrackingThisParticle = true;\n            }\n            else\n            {\n                // Enters an adjacent cell in an off-processor domain.\n                keepTrackingThisParticle = false;\n            }\n        }\n        break;\n\n        case MC_Segment_Outcome_type::Census:\n        {\n            // The particle has reached the end of the time step.\n            processedVault->pushParticle(mc_particle);\n\n            ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._census);\n\n\n            keepTrackingThisParticle = false;\n        }\n        break;\n\n        case MC_Segment_Outcome_type::Max_Number:\n        {\n\n            keepTrackingThisParticle = false;\n        }\n        break;\n\n        default:\n            qs_assert(false);\n            keepTrackingThisParticle = false;\n            break; // should this be an error\n        }\n    } while (keepTrackingThisParticle && i1 < MaxIt);\n\n    if (keepTrackingThisParticle == false)\n    {\n        processingVault->invalidateParticle(particle_index);\n    }\n    else\n    {\n        monteCarlo->_particleVaultContainer->addExtraParticle(mc_particle);\n    }\n}"
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
     Offset:          8740
     Length:          0
     ReplacementText: "/*\nDPCT1110:32: The total declared local variable size in device function CycleTrackingGuts exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
@@ -74,7 +74,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh'
     Digest:          988d9764f5170ed6e4181d97f202ea12
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -83,7 +83,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -92,7 +92,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DeclareMacro.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DeclareMacro.hh.yaml
index cc4cf750a..7dbab02a3 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DeclareMacro.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DeclareMacro.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DeclareMacro.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DeclareMacro.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
     Offset:          0
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
     Offset:          3101
     Length:          9
     ReplacementText: ''
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
     Offset:          3110
     Length:          10
     ReplacementText: ''
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
     Offset:          3146
     Length:          9
     ReplacementText: ''
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
     Offset:          3155
     Length:          10
     ReplacementText: ''
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
     Offset:          3231
     Length:          10
     ReplacementText: ''
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
     Offset:          3318
     Length:          10
     ReplacementText: ''
@@ -65,7 +65,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh'
     Digest:          b27ca1aa9f3c6327a99223417c8c8855
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -74,7 +74,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -83,7 +83,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DirectionCosine.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DirectionCosine.hh.yaml
index 1249091fd..f1924db69 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DirectionCosine.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DirectionCosine.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DirectionCosine.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DirectionCosine.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.hh'
     Offset:          3082
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.hh'
     Offset:          8229
     Length:          37
     ReplacementText: 'sycl::sqrt((1.0 - (cos_theta * cos_theta)))'
@@ -20,7 +20,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.hh'
     Digest:          bdb2736f769bc82a098178607a09589d
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -29,7 +29,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -38,7 +38,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MCT.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MCT.hh.yaml
index 08fd45153..7457d22ec 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MCT.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MCT.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MCT.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MCT.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          3881
     Length:          0
     ReplacementText: "\nHOST_DEVICE\nMC_Nearest_Facet MCT_Nearest_Facet_host_ct2(\n    MC_Particle *mc_particle,\n    MC_Location &location,\n    MC_Vector &coordinate,\n    const DirectionCosine *direction_cosine,\n    double distance_threshold,\n    double current_best_distance,\n    bool new_segment,\n    MonteCarlo *monteCarlo);"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          4069
     Length:          0
     ReplacementText: "\nHOST_DEVICE\nvoid MCT_Generate_Coordinate_3D_G_host_ct9(\n    uint64_t *random_number_seed,\n    int domain_num,\n    int cell,\n    MC_Vector &coordinate,\n    MonteCarlo *monteCarlo);"
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          4380
     Length:          20
     ReplacementText: ''
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          4465
     Length:          72
     ReplacementText: ''
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          4674
     Length:          0
     ReplacementText: "\ninline HOST_DEVICE\n\n    Subfacet_Adjacency &\n    MCT_Adjacent_Facet_host_ct0(const MC_Location &location, MC_Particle &mc_particle, MonteCarlo *monteCarlo)\n\n{\n\n   MC_Domain &domain = monteCarlo->domain[location.domain];\n\n   Subfacet_Adjacency &adjacency = domain.mesh._cellConnectivity[location.cell]._facet[location.facet].subfacet;\n\n   return adjacency;\n}"
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          4772
     Length:          0
     ReplacementText: "\nHOST_DEVICE\nvoid MCT_Reflect_Particle_host_ct4(MonteCarlo *mcco, MC_Particle &particle);"
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          8118
     Length:          20
     ReplacementText: ''
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          8203
     Length:          72
     ReplacementText: ''
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          9522
     Length:          0
     ReplacementText: "\ninline HOST_DEVICE\n    MC_Nearest_Facet\n    MCT_Nearest_Facet_host_ct2(MC_Particle *mc_particle,\n                      MC_Location &location,\n                      MC_Vector &coordinate,\n                      const DirectionCosine *direction_cosine,\n                      double distance_threshold,\n                      double current_best_distance,\n                      bool new_segment,\n                      MonteCarlo *monteCarlo)\n{\n   //    #ifndef BCMN_HAVE_OPENMP\n   //    MC_FASTTIMER_START(MC_Fast_Timer::Nearest_Facet);\n   //    #endif\n   //\n\n   if (location.domain < 0 || location.cell < 0)\n   {\n      qs_assert(false);\n      //         std::string output_string;\n      //         mc_particle->Copy_Particle_To_String(output_string);\n      //         MC_Fatal_Jump( \"Bad location value. region: %d domain: %d, cell: %d.\\nParticle record\\n%s\\n\",\n      //                          location.region, location.domain, location.cell, output_string.c_str());\n   }\n\n   MC_Domain &domain = monteCarlo->domain[location.domain];\n\n\n   MC_Nearest_Facet nearest_facet =\n       MCT_Nearest_Facet_3D_G(mc_particle, domain, location, coordinate, direction_cosine);\n\n   if (nearest_facet.distance_to_facet < 0)\n   {\n      nearest_facet.distance_to_facet = 0;\n   }\n\n   if (nearest_facet.distance_to_facet >= PhysicalConstants::_hugeDouble)\n   {\n      qs_assert(false);\n      //        MC_Warning( \"Infinite distance (cell not bound) for location [Reg:%d Local Dom:%d \"\n      //                    \"Global Dom: %d Cell:%d Fac:%d], coordinate (%g %g %g) and direction (%g %g %g).\\n\",\n      //                    location.region, location.domain,\n      //                    mcco->region->Global_Domain_Number(location.region, location.domain),\n      //                    location.cell, location.facet,\n      //                    coordinate.x, coordinate.y, coordinate.z,\n      //                    direction_cosine->alpha, direction_cosine->beta, direction_cosine->gamma);\n      //        if ( mc_particle )\n      //        {\n      //           MC_Warning( \"mc_particle.identifier %\" PRIu64 \"\\n\", mc_particle->identifier );\n      //        }\n   }\n\n   //    #ifndef BCMN_HAVE_OPENMP\n   //    MC_FASTTIMER_STOP(MC_Fast_Timer::Nearest_Facet);\n   //    #endif\n\n   return nearest_facet;\n}"
@@ -82,7 +82,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          9627
     Length:          0
     ReplacementText: "/*\nDPCT1110:29: The total declared local variable size in device function MCT_Generate_Coordinate_3D_G exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
@@ -91,7 +91,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          10022
     Length:          20
     ReplacementText: ''
@@ -100,7 +100,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          10108
     Length:          73
     ReplacementText: ''
@@ -109,7 +109,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          12500
     Length:          0
     ReplacementText: "\ninline HOST_DEVICE_CUDA void MCT_Generate_Coordinate_3D_G_host_ct9(uint64_t *random_number_seed,\n                                                          int domain_num,\n                                                          int cell,\n                                                          MC_Vector &coordinate,\n                                                          MonteCarlo *monteCarlo)\n{\n\n   const MC_Domain &domain = monteCarlo->domain[domain_num];\n\n\n   // Determine the cell-center nodal point coordinates.\n   MC_Vector center = MCT_Cell_Position_3D_G(domain, cell);\n\n   int num_facets = domain.mesh._cellConnectivity[cell].num_facets;\n   if (num_facets == 0)\n   {\n      coordinate.x = coordinate.y = coordinate.z = 0;\n      return;\n   }\n\n   double random_number = rngSample(random_number_seed);\n   double which_volume = random_number * 6.0 * domain.cell_state[cell]._volume;\n\n   // Find the tet to sample from.\n   double current_volume = 0.0;\n   int facet_index = -1;\n   const MC_Vector *point0 = NULL;\n   const MC_Vector *point1 = NULL;\n   const MC_Vector *point2 = NULL;\n   while (current_volume < which_volume)\n   {\n      facet_index++;\n\n      if (facet_index == num_facets)\n      {\n         break;\n      }\n\n      int facet_points[3];\n      MCT_Facet_Points_3D_G(domain, cell, facet_index, 3, facet_points);\n      point0 = &domain.mesh._node[facet_points[0]];\n      point1 = &domain.mesh._node[facet_points[1]];\n      point2 = &domain.mesh._node[facet_points[2]];\n\n      double subvolume = MCT_Cell_Volume_3D_G_vector_tetDet(*point0, *point1, *point2, center);\n      current_volume += subvolume;\n   }\n\n   // Sample from the tet.\n   double r1 = rngSample(random_number_seed);\n   double r2 = rngSample(random_number_seed);\n   double r3 = rngSample(random_number_seed);\n\n   // Cut and fold cube into prism.\n   if (r1 + r2 > 1.0)\n   {\n      r1 = 1.0 - r1;\n      r2 = 1.0 - r2;\n   }\n   // Cut and fold prism into tetrahedron.\n   if (r2 + r3 > 1.0)\n   {\n      double tmp = r3;\n      r3 = 1.0 - r1 - r2;\n      r2 = 1.0 - tmp;\n   }\n   else if (r1 + r2 + r3 > 1.0)\n   {\n      double tmp = r3;\n      r3 = r1 + r2 + r3 - 1.0;\n      r1 = 1.0 - r2 - tmp;\n   }\n\n   // numbers 1-4 are the barycentric coordinates of the random point.\n   double r4 = 1.0 - r1 - r2 - r3;\n\n   // error check\n   if ((point0 == NULL) || (point1 == NULL) || (point2 == NULL))\n   {\n      MC_Fatal_Jump(\"Programmer Error: points must not be NULL: point0=%p point1=%p point2=%p\",\n                    point0, point1, point2);\n      return;\n   }\n\n   coordinate.x = (r4 * center.x + r1 * point0->x + r2 * point1->x + r3 * point2->x);\n   coordinate.y = (r4 * center.y + r1 * point0->y + r2 * point1->y + r3 * point2->y);\n   coordinate.z = (r4 * center.z + r1 * point0->z + r2 * point1->z + r3 * point2->z);\n}"
@@ -118,7 +118,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          21135
     Length:          20
     ReplacementText: ''
@@ -127,7 +127,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          21222
     Length:          74
     ReplacementText: ''
@@ -136,7 +136,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          22265
     Length:          0
     ReplacementText: "\ninline HOST_DEVICE void MCT_Reflect_Particle_host_ct4(MonteCarlo *monteCarlo, MC_Particle &particle)\n{\n   DirectionCosine *direction_cosine = particle.Get_Direction_Cosine();\n   MC_Location location = particle.Get_Location();\n\n\n   const MC_Domain &domain = location.get_domain(monteCarlo);\n\n   const MC_General_Plane &plane = domain.mesh._cellGeometry[location.cell]._facet[location.facet];\n\n   MC_Vector facet_normal(plane.A, plane.B, plane.C);\n\n   double dot = 2.0 * (direction_cosine->alpha * facet_normal.x +\n                       direction_cosine->beta * facet_normal.y +\n                       direction_cosine->gamma * facet_normal.z);\n\n   if (dot > 0) // do not reflect a particle that is ALREADY pointing inward\n   {\n      // reflect the particle\n      direction_cosine->alpha -= dot * facet_normal.x;\n      direction_cosine->beta -= dot * facet_normal.y;\n      direction_cosine->gamma -= dot * facet_normal.z;\n   }\n\n   // Calculate the reflected, velocity components.\n   double particle_speed = particle.velocity.Length();\n   particle.velocity.x = particle_speed * particle.direction_cosine.alpha;\n   particle.velocity.y = particle_speed * particle.direction_cosine.beta;\n   particle.velocity.z = particle_speed * particle.direction_cosine.gamma;\n}"
@@ -145,7 +145,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Offset:          26507
     Length:          0
     ReplacementText: "   /*\n   DPCT1110:28: The total declared local variable size in device function MCT_Nearest_Facet_3D_G exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n   */\n"
@@ -155,7 +155,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh'
     Digest:          c5cb5924e7e11dcced00f7acc3b224d6
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -164,7 +164,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -173,7 +173,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.cc.dp.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.cc.dp.o
index f1a664afd..db03f6ec0 100644
Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.cc.dp.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.cc.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.hh.yaml
index 8a1843694..623b0e7f3 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          3068
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          7089
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          7199
     Length:          21
     ReplacementText: ''
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          7221
     Length:          0
     ReplacementText: '.wait())'
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          7291
     Length:          0
     ReplacementText: "            /*\n            DPCT1064:37: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n            */\n"
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          7312
     Length:          78
     ReplacementText: 'DPCT_CHECK_ERROR(cell_state_h[j]._total = sycl::malloc_device<double>(numEnergyGroups, dpct::get_in_order_queue()))'
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          7414
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          7509
     Length:          23
     ReplacementText: ''
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          7533
     Length:          0
     ReplacementText: '.wait())'
@@ -82,7 +82,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          7547
     Length:          0
     ReplacementText: "        /*\n        DPCT1064:38: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n        */\n"
@@ -91,7 +91,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          7564
     Length:          95
     ReplacementText: 'DPCT_CHECK_ERROR(domain_h[i].cell_state = sycl::malloc_device<MC_Cell_State>(domain[i].cell_state.size(), dpct::get_in_order_queue()))'
@@ -100,7 +100,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          7835
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -109,7 +109,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          7931
     Length:          23
     ReplacementText: ''
@@ -118,7 +118,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          7955
     Length:          0
     ReplacementText: '.wait())'
@@ -127,7 +127,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          8118
     Length:          0
     ReplacementText: "        /*\n        DPCT1064:39: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n        */\n"
@@ -136,7 +136,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          8135
     Length:          91
     ReplacementText: 'DPCT_CHECK_ERROR(domain_h[i].mesh._nbrRank = sycl::malloc_device<int>(domain[i].mesh._nbrRank.size(), dpct::get_in_order_queue()))'
@@ -145,7 +145,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          8246
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -154,7 +154,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          8365
     Length:          23
     ReplacementText: ''
@@ -163,7 +163,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          8389
     Length:          0
     ReplacementText: '.wait())'
@@ -172,7 +172,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          8459
     Length:          0
     ReplacementText: "        /*\n        DPCT1064:40: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n        */\n"
@@ -181,7 +181,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          8476
     Length:          92
     ReplacementText: 'DPCT_CHECK_ERROR(domain_h[i].mesh._node = sycl::malloc_device<MC_Vector>(domain_h[i].mesh._nodeSize, dpct::get_in_order_queue()))'
@@ -190,7 +190,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          8588
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -199,7 +199,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          8703
     Length:          23
     ReplacementText: ''
@@ -208,7 +208,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          8727
     Length:          0
     ReplacementText: '.wait())'
@@ -217,7 +217,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          9264
     Length:          0
     ReplacementText: "           /*\n           DPCT1064:41: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n           */\n"
@@ -226,7 +226,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          9284
     Length:          91
     ReplacementText: 'DPCT_CHECK_ERROR(cellConnectivity[j]._point = sycl::malloc_device<int>(cellConnectivity[j].num_points, dpct::get_in_order_queue()))'
@@ -235,7 +235,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          9398
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -244,7 +244,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          9521
     Length:          23
     ReplacementText: ''
@@ -253,7 +253,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          9545
     Length:          0
     ReplacementText: '.wait())'
@@ -262,7 +262,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          9548
     Length:          0
     ReplacementText: "           /*\n           DPCT1064:42: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n           */\n"
@@ -271,7 +271,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          9568
     Length:          106
     ReplacementText: 'DPCT_CHECK_ERROR(cellConnectivity[j]._facet = sycl::malloc_device<MC_Facet_Adjacency>(cellConnectivity[j].num_facets, dpct::get_in_order_queue()))'
@@ -280,7 +280,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          9697
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -289,7 +289,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          9835
     Length:          23
     ReplacementText: ''
@@ -298,7 +298,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          9859
     Length:          0
     ReplacementText: '.wait())'
@@ -307,7 +307,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          9872
     Length:          0
     ReplacementText: "        /*\n        DPCT1064:43: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n        */\n"
@@ -316,7 +316,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          9889
     Length:          111
     ReplacementText: 'DPCT_CHECK_ERROR(domain_h[i].mesh._cellConnectivity = sycl::malloc_device<MC_Facet_Adjacency_Cell>(_cellConnectivitySize, dpct::get_in_order_queue()))'
@@ -325,7 +325,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          10020
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -334,7 +334,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          10136
     Length:          23
     ReplacementText: ''
@@ -343,7 +343,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          10160
     Length:          0
     ReplacementText: '.wait())'
@@ -352,7 +352,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          10590
     Length:          0
     ReplacementText: "            /*\n            DPCT1064:44: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n            */\n"
@@ -361,7 +361,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          10611
     Length:          92
     ReplacementText: 'DPCT_CHECK_ERROR(cellGeometry[j]._facet = sycl::malloc_device<MC_General_Plane>(cellGeometry[j]._size, dpct::get_in_order_queue()))'
@@ -370,7 +370,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          10727
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -379,7 +379,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          10846
     Length:          23
     ReplacementText: ''
@@ -388,7 +388,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          10870
     Length:          0
     ReplacementText: '.wait())'
@@ -397,7 +397,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          10883
     Length:          0
     ReplacementText: "        /*\n        DPCT1064:45: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n        */\n"
@@ -406,7 +406,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          10900
     Length:          102
     ReplacementText: 'DPCT_CHECK_ERROR(domain_h[i].mesh._cellGeometry = sycl::malloc_device<MC_Facet_Geometry_Cell>(_cellGeometrySize, dpct::get_in_order_queue()))'
@@ -415,7 +415,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          11022
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -424,7 +424,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          11125
     Length:          23
     ReplacementText: ''
@@ -433,7 +433,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          11149
     Length:          0
     ReplacementText: '.wait())'
@@ -442,7 +442,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          11201
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -451,7 +451,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          11260
     Length:          23
     ReplacementText: ''
@@ -460,7 +460,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Offset:          11284
     Length:          0
     ReplacementText: '.wait())'
@@ -470,7 +470,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh'
     Digest:          74e3ff7df1b40e6fb49829ea224af620
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -479,7 +479,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -488,7 +488,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Crossing_Event.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Crossing_Event.hh.yaml
index 3441b3655..685687728 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Crossing_Event.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Crossing_Event.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Crossing_Event.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Crossing_Event.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh'
     Offset:          5710
     Length:          20
     ReplacementText: ''
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh'
     Offset:          5859
     Length:          172
     ReplacementText: ''
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh'
     Offset:          6308
     Length:          0
     ReplacementText: "\ninline HOST_DEVICE\n\n    MC_Tally_Event::Enum\n    MC_Facet_Crossing_Event_host_ct8(MC_Particle &mc_particle, MonteCarlo *monteCarlo, int particle_index, ParticleVault *processingVault)\n{\n    MC_Location location = mc_particle.Get_Location();\n\n    Subfacet_Adjacency &facet_adjacency = MCT_Adjacent_Facet(location, mc_particle, monteCarlo);\n\n    if (facet_adjacency.event == MC_Subfacet_Adjacency_Event::Transit_On_Processor)\n    {\n        // The particle will enter into an adjacent cell.\n        mc_particle.domain = facet_adjacency.adjacent.domain;\n        mc_particle.cell = facet_adjacency.adjacent.cell;\n        mc_particle.facet = facet_adjacency.adjacent.facet;\n        mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Transit_Exit;\n    }\n    else if (facet_adjacency.event == MC_Subfacet_Adjacency_Event::Boundary_Escape)\n    {\n        // The particle will escape across the system boundary.\n        mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Escape;\n    }\n    else if (facet_adjacency.event == MC_Subfacet_Adjacency_Event::Boundary_Reflection)\n    {\n        // The particle will reflect off of the system boundary.\n        mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Reflection;\n    }\n    else if (facet_adjacency.event == MC_Subfacet_Adjacency_Event::Transit_Off_Processor)\n    {\n        // The particle will enter into an adjacent cell on a spatial neighbor.\n        // The neighboring domain is on another processor. Set domain local domain on neighbor proc\n\n        mc_particle.domain = facet_adjacency.adjacent.domain;\n        mc_particle.cell = facet_adjacency.adjacent.cell;\n        mc_particle.facet = facet_adjacency.adjacent.facet;\n        mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Communication;\n\n\n        // Select particle buffer\n        int neighbor_rank = monteCarlo->domain[facet_adjacency.current.domain].mesh._nbrRank[facet_adjacency.neighbor_index];\n\n\n        processingVault->putParticle(mc_particle, particle_index);\n\n        // Push neighbor rank and mc_particle onto the send queue\n        monteCarlo->_particleVaultContainer->getSendQueue()->push(neighbor_rank, particle_index);\n    }\n\n    return mc_particle.last_event;\n}"
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh'
     Offset:          6486
     Length:          0
     ReplacementText: "\nHOST_DEVICE\nMC_Tally_Event::Enum MC_Facet_Crossing_Event_host_ct8(MC_Particle &mc_particle, MonteCarlo *monteCarlo, int particle_index, ParticleVault *processingVault);"
@@ -38,7 +38,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh'
     Digest:          0542db4059d5ecfa00807413ab577331
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -47,7 +47,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -56,7 +56,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Geometry.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Geometry.hh.yaml
index 33d6cce10..369ec4dc1 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Geometry.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Geometry.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Geometry.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Geometry.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Geometry.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Geometry.hh'
     Offset:          116
     Length:          0
     ReplacementText: "\n#include <cmath>\n"
@@ -11,7 +11,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Geometry.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Geometry.hh'
     Digest:          25e923f58af9f1fe977f65ecc36dd782
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -20,7 +20,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -29,7 +29,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Particle.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Particle.hh.yaml
index 86447a50a..c6d6e397e 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Particle.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Particle.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Particle.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Particle.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          7178
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:0: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          7269
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:1: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          7354
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:2: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          7475
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:3: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          7532
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:4: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          7581
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:5: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          7638
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:6: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          7698
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:7: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          7744
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:8: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -82,7 +82,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          7806
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:9: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -91,7 +91,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          7863
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:10: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -100,7 +100,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          7925
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:11: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -109,7 +109,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          7987
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:12: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -118,7 +118,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          8041
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:13: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -127,7 +127,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          8094
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:14: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -136,7 +136,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          8151
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:15: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -145,7 +145,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          8206
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:16: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -154,7 +154,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          8253
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:17: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -163,7 +163,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          8303
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:18: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -172,7 +172,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          8351
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:19: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -181,7 +181,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          8406
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:20: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -190,7 +190,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          8455
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:21: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -199,7 +199,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          8502
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:22: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -208,7 +208,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          8550
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:23: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -217,7 +217,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Offset:          8603
     Length:          0
     ReplacementText: "    /*\n    DPCT1040:24: Use sycl::stream instead of printf if your code is used on the device.\n    */\n"
@@ -227,7 +227,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh'
     Digest:          527af0727d791ca55e1a8b82f6151284
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -236,7 +236,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -245,7 +245,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Segment_Outcome.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Segment_Outcome.hh.yaml
index 1d3125223..f32d01536 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Segment_Outcome.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Segment_Outcome.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Segment_Outcome.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Segment_Outcome.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh'
     Offset:          1546
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh'
     Offset:          1908
     Length:          0
     ReplacementText: "\n#include <cmath>\n"
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh'
     Offset:          3030
     Length:          0
     ReplacementText: "/*\nDPCT1110:30: The total declared local variable size in device function MC_Segment_Outcome exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh'
     Offset:          5052
     Length:          18
     ReplacementText: 'sycl::log(random_number)'
@@ -38,7 +38,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh'
     Digest:          8ca386619c55f71fc592b27f15599c56
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -47,7 +47,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -56,7 +56,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_SourceNow.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_SourceNow.hh.yaml
index b3ffd5b27..db673e190 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_SourceNow.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_SourceNow.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_SourceNow.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_SourceNow.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_SourceNow.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_SourceNow.hh'
     Offset:          2069
     Length:          0
     ReplacementText: "\n#include <cmath>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_SourceNow.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_SourceNow.hh'
     Offset:          2680
     Length:          13
     ReplacementText: DPCT_COMPATIBILITY_TEMP
@@ -20,7 +20,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_SourceNow.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_SourceNow.hh'
     Digest:          36feb2beaac4882ff223e7d81f8a9db8
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -29,7 +29,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -38,7 +38,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Vector.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Vector.hh.yaml
index 1bcbf54ba..c4d49d7a0 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Vector.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Vector.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Vector.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Vector.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh'
     Offset:          3068
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh'
     Offset:          4788
     Length:          27
     ReplacementText: 'sycl::sqrt(x * x + y * y + z * z)'
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh'
     Offset:          4961
     Length:          81
     ReplacementText: 'sycl::sqrt((x - vv.x) * (x - vv.x) + (y - vv.y) * (y - vv.y) + (z - vv.z) * (z - vv.z))'
@@ -29,7 +29,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh'
     Digest:          0ca62b380fd6ba895e7109a8d0c566c0
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -38,7 +38,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -47,7 +47,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MacroscopicCrossSection.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MacroscopicCrossSection.hh.yaml
index 3099bb689..0e9032a5f 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MacroscopicCrossSection.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MacroscopicCrossSection.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MacroscopicCrossSection.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MacroscopicCrossSection.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          3466
     Length:          0
     ReplacementText: "\nHOST_DEVICE\ndouble macroscopicCrossSection_host_ct5(MonteCarlo *monteCarlo, int reactionIndex, int domainIndex, int cellIndex,\n                               int isoIndex, int energyGroup);"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          3662
     Length:          0
     ReplacementText: "\nHOST_DEVICE\ndouble weightedMacroscopicCrossSection_host_ct6(MonteCarlo *monteCarlo, int taskIndex, int domainIndex,\n                                       int cellIndex, int energyGroup);"
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          4337
     Length:          20
     ReplacementText: ''
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          4544
     Length:          208
     ReplacementText: ''
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          5047
     Length:          20
     ReplacementText: ''
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          5255
     Length:          209
     ReplacementText: ''
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          5552
     Length:          20
     ReplacementText: ''
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          5952
     Length:          387
     ReplacementText: ''
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          6412
     Length:          0
     ReplacementText: "\ninline HOST_DEVICE double macroscopicCrossSection_host_ct5(MonteCarlo *monteCarlo, int reactionIndex, int domainIndex, int cellIndex,\n                                                  int isoIndex, int energyGroup)\n{\n// Initialize various data items.\n\n   int globalMatIndex = monteCarlo->domain[domainIndex].cell_state[cellIndex]._material;\n   double atomFraction = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._atomFraction;\n\n\n   double microscopicCrossSection = 0.0;\n   // The cell number density is the fraction of the atoms in cell\n   // volume of this isotope.  We set this (elsewhere) to 1/nIsotopes.\n   // This is a statement that we treat materials as if all of their\n   // isotopes are present in equal amounts\n\n\n   double cellNumberDensity = monteCarlo->domain[domainIndex].cell_state[cellIndex]._cellNumberDensity;\n   int isotopeGid = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._gid;\n\n   if (atomFraction == 0.0 || cellNumberDensity == 0.0)\n   {\n      return 1e-20;\n   }\n\n\n   if (reactionIndex < 0)\n   {\n      // Return total cross section\n      microscopicCrossSection = monteCarlo->_nuclearData->getTotalCrossSection(isotopeGid, energyGroup);\n   }\n   else\n   {\n      // Return the reaction cross section\n      microscopicCrossSection = monteCarlo->_nuclearData->getReactionCrossSection((unsigned int)reactionIndex, isotopeGid, energyGroup);\n   }\n\n\n   return atomFraction * cellNumberDensity * microscopicCrossSection;\n}"
@@ -82,7 +82,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          7198
     Length:          20
     ReplacementText: ''
@@ -91,7 +91,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          7341
     Length:          132
     ReplacementText: ''
@@ -100,7 +100,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          7600
     Length:          20
     ReplacementText: ''
@@ -109,7 +109,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          7786
     Length:          190
     ReplacementText: ''
@@ -118,7 +118,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Offset:          8272
     Length:          0
     ReplacementText: "\ninline HOST_DEVICE double weightedMacroscopicCrossSection_host_ct6(MonteCarlo *monteCarlo, int taskIndex, int domainIndex,\n                                                          int cellIndex, int energyGroup)\n{\n\n   double *precomputedCrossSection =\n       &monteCarlo->domain[domainIndex].cell_state[cellIndex]._total[energyGroup];\n\n   qs_assert(precomputedCrossSection != NULL);\n   if (*precomputedCrossSection > 0.0)\n      return *precomputedCrossSection;\n\n\n   int globalMatIndex = monteCarlo->domain[domainIndex].cell_state[cellIndex]._material;\n   int nIsotopes = (int)monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso.size();\n\n   double sum = 0.0;\n   for (int isoIndex = 0; isoIndex < nIsotopes; isoIndex++)\n   {\n      sum += macroscopicCrossSection(monteCarlo, -1, domainIndex, cellIndex,\n                                     isoIndex, energyGroup);\n   }\n\n   ATOMIC_WRITE(*precomputedCrossSection, sum);\n\n   return sum;\n}"
@@ -128,7 +128,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh'
     Digest:          41eb50e42544ae2d846972b7c47008f8
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -137,7 +137,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -146,7 +146,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MainSourceFiles.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MainSourceFiles.yaml
index b8a6b8d3c..2ff4c1bf3 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MainSourceFiles.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MainSourceFiles.yaml
@@ -1,7 +1,7 @@
 ---
 MainSourceFile:  MainSrcFiles_placehold
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.cc'
     Offset:          88
     Length:          0
     ReplacementText: "\n#include <cmath>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc'
     Offset:          2148
     Length:          0
     ReplacementText: _host_ct1
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc'
     Offset:          2604
     Length:          0
     ReplacementText: _host_ct1
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc'
     Offset:          0
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc'
     Offset:          3649
     Length:          26
     ReplacementText: 'std::min(std::max(0, tt.x()), _nx-1)'
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc'
     Offset:          3689
     Length:          26
     ReplacementText: 'std::min(std::max(0, tt.y()), _ny-1)'
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc'
     Offset:          3729
     Length:          26
     ReplacementText: 'std::min(std::max(0, tt.z()), _nz-1)'
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
     Offset:          0
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
     Offset:          117
     Length:          0
     ReplacementText: "\n#include <cmath>\n"
@@ -82,7 +82,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
     Offset:          3064
     Length:          23
     ReplacementText: 'std::min(minCenter, iCenter)'
@@ -91,7 +91,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
     Offset:          3634
     Length:          10
     ReplacementText: 'std::max(0, ix)'
@@ -100,7 +100,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
     Offset:          3654
     Length:          10
     ReplacementText: 'std::max(0, iy)'
@@ -109,7 +109,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
     Offset:          3674
     Length:          10
     ReplacementText: 'std::max(0, iz)'
@@ -118,7 +118,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
     Offset:          3694
     Length:          14
     ReplacementText: 'std::min(_nx-1, ix)'
@@ -127,7 +127,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
     Offset:          3718
     Length:          14
     ReplacementText: 'std::min(_ny-1, iy)'
@@ -136,7 +136,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
     Offset:          3742
     Length:          14
     ReplacementText: 'std::min(_nz-1, iz)'
@@ -145,7 +145,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.cc'
     Offset:          501
     Length:          0
     ReplacementText: "\n#include <cmath>\n"
@@ -154,7 +154,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Fast_Timer.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Fast_Timer.cc'
     Offset:          1691
     Length:          0
     ReplacementText: "\n#include <cmath>\n"
@@ -163,7 +163,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          3015
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -172,7 +172,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          4027
     Length:          19
     ReplacementText: '0'
@@ -181,7 +181,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          4104
     Length:          19
     ReplacementText: '0'
@@ -190,7 +190,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          4176
     Length:          19
     ReplacementText: '0'
@@ -199,7 +199,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          4259
     Length:          19
     ReplacementText: '0'
@@ -208,7 +208,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          7079
     Length:          19
     ReplacementText: '0'
@@ -217,7 +217,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          7161
     Length:          19
     ReplacementText: '0'
@@ -226,7 +226,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          8250
     Length:          24
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(_nuclearData, dpct::get_in_order_queue())'
@@ -235,7 +235,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          8274
     Length:          0
     ReplacementText: ')'
@@ -244,7 +244,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          8337
     Length:          28
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(_materialDatabase, dpct::get_in_order_queue())'
@@ -253,7 +253,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          8365
     Length:          0
     ReplacementText: ')'
@@ -262,7 +262,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          8542
     Length:          19
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(domain_d, dpct::get_in_order_queue())'
@@ -271,7 +271,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          8561
     Length:          0
     ReplacementText: ')'
@@ -280,7 +280,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          8581
     Length:          21
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(_material_d, dpct::get_in_order_queue())'
@@ -289,7 +289,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          8602
     Length:          0
     ReplacementText: ')'
@@ -298,7 +298,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          8622
     Length:          24
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(_nuclearData_d, dpct::get_in_order_queue())'
@@ -307,7 +307,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Offset:          8646
     Length:          0
     ReplacementText: ')'
@@ -316,7 +316,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/PopulationControl.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/PopulationControl.cc'
     Offset:          267
     Length:          0
     ReplacementText: "\n#include <cmath>\n"
@@ -325,7 +325,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          3015
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -334,7 +334,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          3154
     Length:          11
     ReplacementText: ''
@@ -343,7 +343,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          3183
     Length:          0
     ReplacementText: 'const sycl::nd_item<3> &item_ct1'
@@ -352,7 +352,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          3236
     Length:          0
     ReplacementText: item_ct1
@@ -361,7 +361,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          3394
     Length:          24
     ReplacementText: "dpct::get_in_order_queue().parallel_for(\n          sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), \n          [=](sycl::nd_item<3> item_ct1) {\n            testname::WarmUpKernel(item_ct1);\n          });"
@@ -370,7 +370,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: true
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          3418
     Length:          1
     ReplacementText: ''
@@ -379,7 +379,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          3477
     Length:          23
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
@@ -388,7 +388,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          3558
     Length:          4
     ReplacementText: 'sycl::range<3>'
@@ -397,7 +397,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          3570
     Length:          4
     ReplacementText: 'sycl::range<3>'
@@ -406,7 +406,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          3772
     Length:          7
     ReplacementText: 'block[2]'
@@ -415,7 +415,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          3805
     Length:          7
     ReplacementText: 'block[1]'
@@ -424,7 +424,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          3822
     Length:          7
     ReplacementText: 'block[0]'
@@ -433,7 +433,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          4069
     Length:          6
     ReplacementText: 'grid[2]'
@@ -442,7 +442,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          4098
     Length:          6
     ReplacementText: 'grid[1]'
@@ -451,7 +451,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          4118
     Length:          6
     ReplacementText: 'grid[0]'
@@ -460,7 +460,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          4210
     Length:          6
     ReplacementText: 'grid[2]'
@@ -469,7 +469,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          4243
     Length:          6
     ReplacementText: 'grid[1]'
@@ -478,7 +478,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          4295
     Length:          6
     ReplacementText: 'grid[0]'
@@ -487,7 +487,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          4404
     Length:          6
     ReplacementText: 'grid[2]'
@@ -496,7 +496,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          4437
     Length:          6
     ReplacementText: 'grid[1]'
@@ -505,7 +505,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Offset:          4470
     Length:          6
     ReplacementText: 'grid[0]'
@@ -514,7 +514,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
     Offset:          3015
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -523,7 +523,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
     Offset:          4878
     Length:          19
     ReplacementText: '0'
@@ -532,7 +532,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
     Offset:          5362
     Length:          94
     ReplacementText: 'DPCT_CHECK_ERROR(ptr_dm = (void *)sycl::malloc_device(monteCarlo->_materialDatabase->_mat.size()*sizeof(Material_d), dpct::get_in_order_queue()))'
@@ -541,7 +541,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
     Offset:          5576
     Length:          54
     ReplacementText: 'DPCT_CHECK_ERROR(ptr_dn = (void *)sycl::malloc_device(sizeof(NuclearData_d), dpct::get_in_order_queue()))'
@@ -550,7 +550,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
     Offset:          5705
     Length:          81
     ReplacementText: 'DPCT_CHECK_ERROR(ptr_dmesh = (void *)sycl::malloc_device(monteCarlo->domain.size()*sizeof(MC_Domain_d), dpct::get_in_order_queue()))'
@@ -559,7 +559,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
     Offset:          6115
     Length:          26
     ReplacementText: 'DPCT_CHECK_ERROR(Ngpus = dpct::dev_mgr::instance().device_count())'
@@ -568,7 +568,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
     Offset:          6594
     Length:          0
     ReplacementText: "            /*\n            DPCT1093:55: The \"GPUID\" device may be not the one intended for use. Adjust the selected device if needed.\n            */\n"
@@ -577,7 +577,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
     Offset:          6615
     Length:          13
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::select_device'
@@ -586,7 +586,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
     Offset:          6635
     Length:          0
     ReplacementText: ')'
@@ -595,7 +595,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          3015
     Length:          31
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -604,7 +604,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          4038
     Length:          26
     ReplacementText: 'DPCT_CHECK_ERROR(Ngpus = dpct::dev_mgr::instance().device_count())'
@@ -613,7 +613,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          4138
     Length:          0
     ReplacementText: "    /*\n    DPCT1093:52: The \"GPUID\" device may be not the one intended for use. Adjust the selected device if needed.\n    */\n"
@@ -622,7 +622,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          4151
     Length:          13
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::select_device'
@@ -631,7 +631,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          4171
     Length:          0
     ReplacementText: ')'
@@ -640,7 +640,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          5206
     Length:          81
     ReplacementText: 'DPCT_CHECK_ERROR(tallies = (uint64_cu *)sycl::malloc_host(sizeof(uint64_cu) * NUM_TALLIES * replications, dpct::get_in_order_queue()))'
@@ -649,7 +649,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          5330
     Length:          79
     ReplacementText: 'DPCT_CHECK_ERROR(tallies_d = (uint64_cu *)sycl::malloc_device(sizeof(uint64_cu) * NUM_TALLIES * replications, dpct::get_in_order_queue()))'
@@ -658,7 +658,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          5601
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -667,7 +667,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          5678
     Length:          24
     ReplacementText: ''
@@ -676,7 +676,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          5703
     Length:          0
     ReplacementText: '.wait())'
@@ -685,7 +685,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          6228
     Length:          21
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(tallies, dpct::get_in_order_queue())'
@@ -694,7 +694,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          6249
     Length:          0
     ReplacementText: ')'
@@ -703,7 +703,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          6265
     Length:          19
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(tallies_d, dpct::get_in_order_queue())'
@@ -712,7 +712,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          6284
     Length:          0
     ReplacementText: ')'
@@ -721,7 +721,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          7552
     Length:          23
     ReplacementText: ''
@@ -730,7 +730,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          7575
     Length:          11
     ReplacementText: ''
@@ -739,7 +739,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          7735
     Length:          0
     ReplacementText: ",\n                                                const sycl::nd_item<3> &item_ct1,\n                                                uint8_t *dpct_local"
@@ -748,7 +748,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          7780
     Length:          0
     ReplacementText: item_ct1
@@ -757,7 +757,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          7822
     Length:          0
     ReplacementText: item_ct1
@@ -766,7 +766,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          7904
     Length:          33
     ReplacementText: 'auto values_l = (int *)dpct_local;'
@@ -775,7 +775,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          8039
     Length:          15
     ReplacementText: "/*\n    DPCT1065:53: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
@@ -784,7 +784,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          8212
     Length:          15
     ReplacementText: "/*\n    DPCT1065:54: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
@@ -793,7 +793,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          10887
     Length:          0
     ReplacementText: "                        /*\n                        DPCT1049:33: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed.\n                        */\n"
@@ -802,7 +802,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          10911
     Length:          156
     ReplacementText: "{\n                          dpct::has_capability_or_fail(dpct::get_in_order_queue().get_device(), {sycl::aspect::fp64});\n                          dpct::get_in_order_queue().submit(\n                            [&](sycl::handler &cgh) {\n                              /*\n                              DPCT1083:56: The size of local memory in the migrated code may be different from the original code. Check that the allocated memory size in the migrated code is correct.\n                              */\n                              sycl::local_accessor<uint8_t, 1> dpct_local_acc_ct1(sycl::range<1>(NUM_TALLIES * replications * sizeof(int)), cgh);\n\n                              cgh.parallel_for(\n                                sycl::nd_range<3>(sycl::range<3>(1, 1, num_wgs) * sycl::range<3>(1, 1, wg_size), sycl::range<3>(1, 1, wg_size)), \n                                [=](sycl::nd_item<3> item_ct1) {\n                                  CycleTrackingKernel(monteCarlo, numParticles, processingVault, processedVault, tallies_d, item_ct1, dpct_local_acc_ct1.get_pointer());\n                                });\n                            });\n                        }"
@@ -811,7 +811,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: true
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          11067
     Length:          1
     ReplacementText: ''
@@ -820,7 +820,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          11179
     Length:          23
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
@@ -829,7 +829,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          11238
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -838,7 +838,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          11315
     Length:          24
     ReplacementText: ''
@@ -847,7 +847,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          11340
     Length:          0
     ReplacementText: '.wait())'
@@ -856,7 +856,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          13614
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -865,7 +865,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          13691
     Length:          24
     ReplacementText: ''
@@ -874,7 +874,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Offset:          13716
     Length:          0
     ReplacementText: '.wait())'
@@ -884,27 +884,27 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.cc'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.cc'
     Digest:          f62fe054a60de7c39322557f627f2be9
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc'
     Digest:          9e9b5ddc0d8c6d2b88a226b622c2e957
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc'
     Digest:          c444dc769c997afe7e29ca950cdfdadc
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc'
     Digest:          11ffc8d96e80cdb070efba9a49ce4a3c
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.cc'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.cc'
     Digest:          0d6ea0bd49a66cb8fe99f55f55498dc8
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Fast_Timer.cc'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Fast_Timer.cc'
     Digest:          349b5af82883464b77234521083d1aec
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc'
     Digest:          b320237ce90b5c6fe1c7a592b30434b3
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/PopulationControl.cc'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/PopulationControl.cc'
     Digest:          bc5aa455481a9f7636f401db17783dda
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc'
     Digest:          332692e08b5b3008eb41ac16407ade5c
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc'
     Digest:          e53e6b957b7d1551e9d3981c5a878e3f
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc'
     Digest:          a0beca8ec967f380d8b8819024a8f2f2
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -1001,7 +1001,7 @@ CompileTargets:
       Compiler:        nvcc
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -1010,7 +1010,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MaterialDatabase.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MaterialDatabase.hh.yaml
index 7a6016dfb..b49a4595a 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MaterialDatabase.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MaterialDatabase.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MaterialDatabase.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MaterialDatabase.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
     Offset:          3072
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
     Offset:          5258
     Length:          0
     ReplacementText: "      /*\n      DPCT1064:51: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n      */\n"
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
     Offset:          5273
     Length:          57
     ReplacementText: 'DPCT_CHECK_ERROR(local_I_d = sycl::malloc_device<Isotope>(isosize, dpct::get_in_order_queue()))'
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
     Offset:          5348
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
     Offset:          5446
     Length:          23
     ReplacementText: ''
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
     Offset:          5470
     Length:          0
     ReplacementText: '.wait())'
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
     Offset:          5637
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
     Offset:          5709
     Length:          23
     ReplacementText: ''
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
     Offset:          5733
     Length:          0
     ReplacementText: '.wait())'
@@ -83,7 +83,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh'
     Digest:          2b1834f6239e53594a29b9e21f1ad0e7
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -92,7 +92,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -101,7 +101,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MemoryControl.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MemoryControl.hh.yaml
index 21b0243d5..f71400010 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MemoryControl.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MemoryControl.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MemoryControl.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MemoryControl.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MemoryControl.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MemoryControl.hh'
     Offset:          3068
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MemoryControl.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MemoryControl.hh'
     Offset:          3743
     Length:          19
     ReplacementText: '0'
@@ -20,7 +20,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MemoryControl.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MemoryControl.hh'
     Digest:          d666f14739e9728ac8081ec8b3f7fee4
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -29,7 +29,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -38,7 +38,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MonteCarlo.cc.dp.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MonteCarlo.cc.dp.o
index 3e470fc88..603734f51 100644
Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MonteCarlo.cc.dp.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MonteCarlo.cc.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/NuclearData.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/NuclearData.hh.yaml
index 944ffc51e..dd3f68670 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/NuclearData.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/NuclearData.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/NuclearData.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/NuclearData.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          3064
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          4555
     Length:          34
     ReplacementText: 'dpct::pow(10, polynomial(log10(energy)))'
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          13749
     Length:          34
     ReplacementText: 'dpct::pow(10, polynomial(log10(energy)))'
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          19935
     Length:          0
     ReplacementText: "   /*\n   DPCT1064:46: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n   */\n"
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          19947
     Length:          85
     ReplacementText: 'DPCT_CHECK_ERROR(nuclearIsotope_I_d = sycl::malloc_device<NuclearDataIsotope_d>(isotopesSize, dpct::get_in_order_queue()))'
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          20240
     Length:          0
     ReplacementText: "   /*\n   DPCT1064:47: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n   */\n"
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          20252
     Length:          70
     ReplacementText: 'DPCT_CHECK_ERROR(nuclearEnergy_I_d = sycl::malloc_device<double>(energiesSize, dpct::get_in_order_queue()))'
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          20561
     Length:          0
     ReplacementText: "      /*\n      DPCT1064:48: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n      */\n"
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          20576
     Length:          83
     ReplacementText: 'DPCT_CHECK_ERROR(nuclearSpecies_I_d = sycl::malloc_device<NuclearDataSpecies_d>(speciesSize, dpct::get_in_order_queue()))'
@@ -82,7 +82,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          20963
     Length:          0
     ReplacementText: "         /*\n         DPCT1064:49: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n         */\n"
@@ -91,7 +91,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          20981
     Length:          79
     ReplacementText: 'DPCT_CHECK_ERROR(nuclear_I_d = sycl::malloc_device<NuclearDataReaction_d>(reactionsSize, dpct::get_in_order_queue()))'
@@ -100,7 +100,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          21400
     Length:          0
     ReplacementText: "            /*\n            DPCT1064:50: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n            */\n"
@@ -109,7 +109,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          21421
     Length:          77
     ReplacementText: 'DPCT_CHECK_ERROR(crossSections_I_d = sycl::malloc_device<double>(NumcrossSectionSize, dpct::get_in_order_queue()))'
@@ -118,7 +118,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          21523
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -127,7 +127,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          21671
     Length:          24
     ReplacementText: ''
@@ -136,7 +136,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          21696
     Length:          0
     ReplacementText: '.wait())'
@@ -145,7 +145,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          22077
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -154,7 +154,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          22154
     Length:          23
     ReplacementText: ''
@@ -163,7 +163,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          22178
     Length:          0
     ReplacementText: '.wait())'
@@ -172,7 +172,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          22343
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -181,7 +181,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          22431
     Length:          23
     ReplacementText: ''
@@ -190,7 +190,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          22455
     Length:          0
     ReplacementText: '.wait())'
@@ -199,7 +199,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          22613
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -208,7 +208,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          22702
     Length:          23
     ReplacementText: ''
@@ -217,7 +217,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          22726
     Length:          0
     ReplacementText: '.wait())'
@@ -226,7 +226,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          22863
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -235,7 +235,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          22958
     Length:          23
     ReplacementText: ''
@@ -244,7 +244,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          22982
     Length:          0
     ReplacementText: '.wait())'
@@ -253,7 +253,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          23290
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -262,7 +262,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          23352
     Length:          23
     ReplacementText: ''
@@ -271,7 +271,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Offset:          23376
     Length:          0
     ReplacementText: '.wait())'
@@ -281,7 +281,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh'
     Digest:          d5c0a5d1457a5f95c50e1ebf275d33d7
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -290,7 +290,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -299,7 +299,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVault.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVault.hh.yaml
index c90c157d9..8d9debc9d 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVault.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVault.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVault.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVault.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
     Offset:          5587
     Length:          0
     ReplacementText: "/*\nDPCT1110:25: The total declared local variable size in device function pushParticle exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
     Offset:          7384
     Length:          0
     ReplacementText: "/*\nDPCT1110:26: The total declared local variable size in device function getParticle exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
     Offset:          7772
     Length:          0
     ReplacementText: "/*\nDPCT1110:27: The total declared local variable size in device function putParticle exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
     Offset:          9177
     Length:          20
     ReplacementText: ''
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
     Offset:          9301
     Length:          113
     ReplacementText: ''
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
     Offset:          9489
     Length:          0
     ReplacementText: "\ninline HOST_DEVICE void MC_Load_Particle_host_ct1(MonteCarlo *monteCarlo, MC_Particle &mc_particle, ParticleVault *particleVault, int particle_index)\n{\n    // particleVault.popParticle(mc_particle);\n    particleVault->getParticle(mc_particle, particle_index);\n\n    // Time to Census\n    if (mc_particle.time_to_census <= 0.0)\n    {\n        mc_particle.time_to_census += monteCarlo->time_info->time_step;\n    }\n\n    // Age\n    if (mc_particle.age < 0.0)\n    {\n        mc_particle.age = 0.0;\n    }\n\n//    Energy Group\n\n    mc_particle.energy_group = monteCarlo->_nuclearData->getEnergyGroup(mc_particle.kinetic_energy);\n\n    //                    printf(\"file=%s line=%d\\n\",__FILE__,__LINE__);\n}"
@@ -56,7 +56,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh'
     Digest:          5c484422d6e6caea178fd73adef39cac
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -65,7 +65,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -74,7 +74,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVaultContainer.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVaultContainer.o
index 9f93322fc..0ee720123 100644
Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVaultContainer.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVaultContainer.o differ
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/Tallies.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/Tallies.o
index 9604f11c0..e28e0e1d9 100644
Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/Tallies.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/Tallies.o differ
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.cc.dp.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.cc.dp.o
index bda057986..ad3f1dae1 100644
Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.cc.dp.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.cc.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.hh.yaml
index 125e8bc4f..2c9af8565 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3064
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3183
     Length:          4
     ReplacementText: 'sycl::range<3>'
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3195
     Length:          4
     ReplacementText: 'sycl::range<3>'
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3302
     Length:          0
     ReplacementText: 'const sycl::nd_item<3> &item_ct1'
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3324
     Length:          10
     ReplacementText: 'item_ct1.get_group(2)'
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3355
     Length:          10
     ReplacementText: 'item_ct1.get_group(1)'
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3368
     Length:          9
     ReplacementText: 'item_ct1.get_group_range(2)'
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3398
     Length:          10
     ReplacementText: 'item_ct1.get_group(0)'
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3411
     Length:          9
     ReplacementText: 'item_ct1.get_group_range(2)'
@@ -82,7 +82,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3423
     Length:          9
     ReplacementText: 'item_ct1.get_group_range(1)'
@@ -91,7 +91,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3465
     Length:          10
     ReplacementText: 'item_ct1.get_local_range(2)'
@@ -100,7 +100,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3478
     Length:          10
     ReplacementText: 'item_ct1.get_local_range(1)'
@@ -109,7 +109,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3491
     Length:          10
     ReplacementText: 'item_ct1.get_local_range(0)'
@@ -118,7 +118,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3524
     Length:          11
     ReplacementText: 'item_ct1.get_local_id(0)'
@@ -127,7 +127,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3539
     Length:          10
     ReplacementText: 'item_ct1.get_local_range(2)'
@@ -136,7 +136,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3552
     Length:          10
     ReplacementText: 'item_ct1.get_local_range(1)'
@@ -145,7 +145,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3585
     Length:          11
     ReplacementText: 'item_ct1.get_local_id(1)'
@@ -154,7 +154,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3599
     Length:          10
     ReplacementText: 'item_ct1.get_local_range(2)'
@@ -163,7 +163,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3631
     Length:          11
     ReplacementText: 'item_ct1.get_local_id(2)'
@@ -172,7 +172,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3703
     Length:          0
     ReplacementText: 'const sycl::nd_item<3> &item_ct1'
@@ -181,7 +181,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3727
     Length:          11
     ReplacementText: 'item_ct1.get_local_id(0)'
@@ -190,7 +190,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3742
     Length:          10
     ReplacementText: 'item_ct1.get_local_range(2)'
@@ -199,7 +199,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3755
     Length:          10
     ReplacementText: 'item_ct1.get_local_range(1)'
@@ -208,7 +208,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3788
     Length:          11
     ReplacementText: 'item_ct1.get_local_id(1)'
@@ -217,7 +217,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3802
     Length:          10
     ReplacementText: 'item_ct1.get_local_range(2)'
@@ -226,7 +226,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Offset:          3834
     Length:          11
     ReplacementText: 'item_ct1.get_local_id(2)'
@@ -236,7 +236,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh'
     Digest:          421bbc8c14b09bcf01ba6c1741b6eee6
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -245,7 +245,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -254,7 +254,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaUtils.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaUtils.hh.yaml
index 845b6a3af..ef554d405 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaUtils.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaUtils.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaUtils.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaUtils.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Offset:          3056
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Offset:          3130
     Length:          18
     ReplacementText: ''
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Offset:          3148
     Length:          26
     ReplacementText: ''
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Offset:          3174
     Length:          30
     ReplacementText: ''
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Offset:          3512
     Length:          0
     ReplacementText: "        /*\n        DPCT1010:34: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n        */\n"
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Offset:          3520
     Length:          11
     ReplacementText: 'dpct::err0'
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Offset:          3538
     Length:          18
     ReplacementText: '0'
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Offset:          3566
     Length:          204
     ReplacementText: ''
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Offset:          4639
     Length:          19
     ReplacementText: '0'
@@ -82,7 +82,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Offset:          4941
     Length:          0
     ReplacementText: "                /*\n                DPCT1064:36: Migrated cudaMallocManaged call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n                */\n"
@@ -91,7 +91,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Offset:          4966
     Length:          27
     ReplacementText: 'DPCT_CHECK_ERROR(*ptr = (T *)sycl::malloc_shared(size, dpct::get_in_order_queue()))'
@@ -100,7 +100,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Offset:          5358
     Length:          13
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(ptr, dpct::get_in_order_queue())'
@@ -109,7 +109,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Offset:          5371
     Length:          0
     ReplacementText: ')'
@@ -119,7 +119,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh'
     Digest:          54a377620c3255007518227343afb537
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -128,7 +128,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -137,7 +137,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/initMC.cc.dp.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/initMC.cc.dp.o
index 943c997ec..6667ca4cb 100644
Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/initMC.cc.dp.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/initMC.cc.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/main.cc.dp.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/main.cc.dp.o
index 46c306d23..24638c6fc 100644
Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/main.cc.dp.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/main.cc.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/mpi_stubs_internal.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/mpi_stubs_internal.hh.yaml
index 6e2695f50..4f7dfbd5a 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/mpi_stubs_internal.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/mpi_stubs_internal.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/mpi_stubs_internal.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/mpi_stubs_internal.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/mpi_stubs_internal.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/mpi_stubs_internal.hh'
     Offset:          1159
     Length:          0
     ReplacementText: ' dpct_type_241340'
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/mpi_stubs_internal.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/mpi_stubs_internal.hh'
     Offset:          1248
     Length:          0
     ReplacementText: ' dpct_type_811906'
@@ -20,7 +20,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/mpi_stubs_internal.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/mpi_stubs_internal.hh'
     Digest:          a1fef4b40b5db866c7e27de918dbe7ea
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -29,7 +29,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -38,7 +38,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/qs b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/qs
index 2233466ff..7d6a5b266 100755
Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/qs and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/qs differ
diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/utilsMpi.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/utilsMpi.hh.yaml
index a5e4994c7..487be6d99 100644
--- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/utilsMpi.hh.yaml
+++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/utilsMpi.hh.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/utilsMpi.hh'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/utilsMpi.hh'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utilsMpi.hh'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utilsMpi.hh'
     Offset:          2789
     Length:          0
     ReplacementText: ' dpct_type_169390'
@@ -11,7 +11,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utilsMpi.hh'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utilsMpi.hh'
     Digest:          b0026dd3148b2f5b3e6166f0ace78f9d
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -20,7 +20,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -29,7 +29,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeCache.txt b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeCache.txt
index e37082de2..440d57370 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeCache.txt
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeCache.txt
@@ -1,5 +1,5 @@
 # This is the CMakeCache file.
-# For build in directory: /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build
+# For build in directory: /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build
 # It was generated by CMake: /usr/bin/cmake
 # You can edit this file to change values found and used by cmake.
 # If you do not want to change any of the values, simply exit the editor.
@@ -329,13 +329,13 @@ CUDA_rt_LIBRARY:FILEPATH=/usr/lib/x86_64-linux-gnu/librt.a
 USE_SM:BOOL=OFF
 
 //Value Computed by CMake
-bitcracker_BINARY_DIR:STATIC=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build
+bitcracker_BINARY_DIR:STATIC=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build
 
 //Value Computed by CMake
 bitcracker_IS_TOP_LEVEL:STATIC=ON
 
 //Value Computed by CMake
-bitcracker_SOURCE_DIR:STATIC=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA
+bitcracker_SOURCE_DIR:STATIC=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA
 
 
 ########################
@@ -347,7 +347,7 @@ CMAKE_ADDR2LINE-ADVANCED:INTERNAL=1
 //ADVANCED property for variable: CMAKE_AR
 CMAKE_AR-ADVANCED:INTERNAL=1
 //This is the directory where this CMakeCache.txt was created
-CMAKE_CACHEFILE_DIR:INTERNAL=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build
+CMAKE_CACHEFILE_DIR:INTERNAL=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build
 //Major version of cmake used to create the current loaded cache
 CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3
 //Minor version of cmake used to create the current loaded cache
@@ -410,7 +410,7 @@ CMAKE_HAVE_LIBC_PTHREAD:INTERNAL=1
 CMAKE_HAVE_PTHREAD_H:INTERNAL=1
 //Source directory with the top level CMakeLists.txt file for this
 // project
-CMAKE_HOME_DIRECTORY:INTERNAL=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA
+CMAKE_HOME_DIRECTORY:INTERNAL=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA
 //Install .so files without execute permission.
 CMAKE_INSTALL_SO_NO_EXE:INTERNAL=1
 //ADVANCED property for variable: CMAKE_LINKER
@@ -477,7 +477,7 @@ CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1
 CUDA_64_BIT_DEVICE_CODE-ADVANCED:INTERNAL=1
 //List of intermediate files that are part of the cuda dependency
 // scanning.
-CUDA_ADDITIONAL_CLEAN_FILES:INTERNAL=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend
+CUDA_ADDITIONAL_CLEAN_FILES:INTERNAL=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend
 //ADVANCED property for variable: CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE
 CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE-ADVANCED:INTERNAL=1
 //ADVANCED property for variable: CUDA_BUILD_CUBIN
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake
index 6ccc334a3..a99054488 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake
@@ -77,7 +77,7 @@ endif()
 
 
 
-set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/home/chenshe1/sandbox/dpct_install/include;/opt/intel/oneapi/tbb/2021.10.0/include;/opt/intel/oneapi/mpi/2021.10.0/include;/opt/intel/oneapi/dev-utilities/2021.10.0/include;/opt/intel/oneapi/compiler/2023.2.0/linux/lib/oclfpga/include;/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
+set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/home/local_user/sandbox/dpct_install/include;/opt/intel/oneapi/tbb/2021.10.0/include;/opt/intel/oneapi/mpi/2021.10.0/include;/opt/intel/oneapi/dev-utilities/2021.10.0/include;/opt/intel/oneapi/compiler/2023.2.0/linux/lib/oclfpga/include;/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
 set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc")
 set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/opt/intel/oneapi/mpi/2021.10.0/libfabric/lib;/opt/intel/oneapi/mpi/2021.10.0/lib;/opt/intel/oneapi/compiler/2023.2.0/linux/lib;/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/opt/intel/oneapi/tbb/2021.10.0/lib/intel64/gcc4.8;/opt/intel/oneapi/mpi/2021.10.0/lib/release;/opt/intel/oneapi/compiler/2023.2.0/linux/compiler/lib/intel64_lin")
 set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake
index f2a5d63ad..d2bdced59 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake
@@ -2,8 +2,8 @@
 # Generated by "Unix Makefiles" Generator, CMake Version 3.22
 
 # Relative path conversion top directories.
-set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA")
-set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build")
+set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA")
+set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build")
 
 # Force unix paths in dependencies.
 set(CMAKE_FORCE_UNIX_PATHS 1)
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeOutput.log b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeOutput.log
index f8741541e..0da9e8382 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeOutput.log
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeOutput.log
@@ -10,13 +10,13 @@ The output was:
 
 Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "a.out"
 
-The CXX compiler identification is GNU, found in "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/a.out"
+The CXX compiler identification is GNU, found in "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/a.out"
 
 Detecting CXX compiler ABI info compiled with the following output:
-Change Dir: /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp
+Change Dir: /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp
 
 Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_848f8/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_848f8.dir/build.make CMakeFiles/cmTC_848f8.dir/build
-gmake[1]: Entering directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'
+gmake[1]: Entering directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'
 Building CXX object CMakeFiles/cmTC_848f8.dir/CMakeCXXCompilerABI.cpp.o
 /usr/bin/c++   -v -o CMakeFiles/cmTC_848f8.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp
 Using built-in specs.
@@ -40,7 +40,7 @@ ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed"
 ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86_64-linux-gnu/include"
 #include "..." search starts here:
 #include <...> search starts here:
- /home/chenshe1/sandbox/dpct_install/include
+ /home/local_user/sandbox/dpct_install/include
  /opt/intel/oneapi/tbb/2021.10.0/env/../include
  /opt/intel/oneapi/mpi/2021.10.0//include
  /opt/intel/oneapi/dev-utilities/2021.10.0/include
@@ -82,14 +82,14 @@ LIBRARY_PATH=/opt/intel/oneapi/mpi/2021.10.0//libfabric/lib/../lib/:/opt/intel/o
 COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_848f8' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_848f8.'
  /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/cc539bMd.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_848f8 /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/opt/intel/oneapi/mpi/2021.10.0//libfabric/lib/../lib -L/opt/intel/oneapi/mpi/2021.10.0//lib/../lib -L/opt/intel/oneapi/compiler/2023.2.0/linux/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/opt/intel/oneapi/tbb/2021.10.0/env/../lib/intel64/gcc4.8 -L/opt/intel/oneapi/mpi/2021.10.0//libfabric/lib -L/opt/intel/oneapi/mpi/2021.10.0//lib/release -L/opt/intel/oneapi/mpi/2021.10.0//lib -L/opt/intel/oneapi/compiler/2023.2.0/linux/compiler/lib/intel64_lin -L/opt/intel/oneapi/compiler/2023.2.0/linux/lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_848f8.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o
 COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_848f8' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_848f8.'
-gmake[1]: Leaving directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'
+gmake[1]: Leaving directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'
 
 
 
 Parsed CXX implicit include dir info from above output: rv=done
   found start of include info
   found start of implicit include info
-    add: [/home/chenshe1/sandbox/dpct_install/include]
+    add: [/home/local_user/sandbox/dpct_install/include]
     add: [/opt/intel/oneapi/tbb/2021.10.0/env/../include]
     add: [/opt/intel/oneapi/mpi/2021.10.0//include]
     add: [/opt/intel/oneapi/dev-utilities/2021.10.0/include]
@@ -102,7 +102,7 @@ Parsed CXX implicit include dir info from above output: rv=done
     add: [/usr/include/x86_64-linux-gnu]
     add: [/usr/include]
   end of search list found
-  collapse include dir [/home/chenshe1/sandbox/dpct_install/include] ==> [/home/chenshe1/sandbox/dpct_install/include]
+  collapse include dir [/home/local_user/sandbox/dpct_install/include] ==> [/home/local_user/sandbox/dpct_install/include]
   collapse include dir [/opt/intel/oneapi/tbb/2021.10.0/env/../include] ==> [/opt/intel/oneapi/tbb/2021.10.0/include]
   collapse include dir [/opt/intel/oneapi/mpi/2021.10.0//include] ==> [/opt/intel/oneapi/mpi/2021.10.0/include]
   collapse include dir [/opt/intel/oneapi/dev-utilities/2021.10.0/include] ==> [/opt/intel/oneapi/dev-utilities/2021.10.0/include]
@@ -114,15 +114,15 @@ Parsed CXX implicit include dir info from above output: rv=done
   collapse include dir [/usr/local/include] ==> [/usr/local/include]
   collapse include dir [/usr/include/x86_64-linux-gnu] ==> [/usr/include/x86_64-linux-gnu]
   collapse include dir [/usr/include] ==> [/usr/include]
-  implicit include dirs: [/home/chenshe1/sandbox/dpct_install/include;/opt/intel/oneapi/tbb/2021.10.0/include;/opt/intel/oneapi/mpi/2021.10.0/include;/opt/intel/oneapi/dev-utilities/2021.10.0/include;/opt/intel/oneapi/compiler/2023.2.0/linux/lib/oclfpga/include;/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include]
+  implicit include dirs: [/home/local_user/sandbox/dpct_install/include;/opt/intel/oneapi/tbb/2021.10.0/include;/opt/intel/oneapi/mpi/2021.10.0/include;/opt/intel/oneapi/dev-utilities/2021.10.0/include;/opt/intel/oneapi/compiler/2023.2.0/linux/lib/oclfpga/include;/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include]
 
 
 Parsed CXX implicit link information from above output:
   link line regex: [^( *|.*[/\])(ld|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\]+-)?ld|collect2)[^/\]*( |$)]
-  ignore line: [Change Dir: /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp]
+  ignore line: [Change Dir: /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp]
   ignore line: []
   ignore line: [Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_848f8/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_848f8.dir/build.make CMakeFiles/cmTC_848f8.dir/build]
-  ignore line: [gmake[1]: Entering directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp']
+  ignore line: [gmake[1]: Entering directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp']
   ignore line: [Building CXX object CMakeFiles/cmTC_848f8.dir/CMakeCXXCompilerABI.cpp.o]
   ignore line: [/usr/bin/c++   -v -o CMakeFiles/cmTC_848f8.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp]
   ignore line: [Using built-in specs.]
@@ -146,7 +146,7 @@ Parsed CXX implicit link information from above output:
   ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86_64-linux-gnu/include"]
   ignore line: [#include "..." search starts here:]
   ignore line: [#include <...> search starts here:]
-  ignore line: [ /home/chenshe1/sandbox/dpct_install/include]
+  ignore line: [ /home/local_user/sandbox/dpct_install/include]
   ignore line: [ /opt/intel/oneapi/tbb/2021.10.0/env/../include]
   ignore line: [ /opt/intel/oneapi/mpi/2021.10.0//include]
   ignore line: [ /opt/intel/oneapi/dev-utilities/2021.10.0/include]
@@ -267,30 +267,30 @@ Parsed CXX implicit link information from above output:
 
 
 Determining if the include file pthread.h exists passed with the following output:
-Change Dir: /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp
+Change Dir: /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp
 
 Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_36779/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_36779.dir/build.make CMakeFiles/cmTC_36779.dir/build
-gmake[1]: Entering directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'
+gmake[1]: Entering directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'
 Building CXX object CMakeFiles/cmTC_36779.dir/CheckIncludeFile.cxx.o
-/usr/bin/c++   -O3 -ffast-math     -std=c++17 -o CMakeFiles/cmTC_36779.dir/CheckIncludeFile.cxx.o -c /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp/CheckIncludeFile.cxx
+/usr/bin/c++   -O3 -ffast-math     -std=c++17 -o CMakeFiles/cmTC_36779.dir/CheckIncludeFile.cxx.o -c /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp/CheckIncludeFile.cxx
 Linking CXX executable cmTC_36779
 /usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_36779.dir/link.txt --verbose=1
 /usr/bin/c++  -O3 -ffast-math     CMakeFiles/cmTC_36779.dir/CheckIncludeFile.cxx.o -o cmTC_36779 
-gmake[1]: Leaving directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'
+gmake[1]: Leaving directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'
 
 
 
 Performing C++ SOURCE FILE Test CMAKE_HAVE_LIBC_PTHREAD succeeded with the following output:
-Change Dir: /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp
+Change Dir: /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp
 
 Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_98f23/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_98f23.dir/build.make CMakeFiles/cmTC_98f23.dir/build
-gmake[1]: Entering directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'
+gmake[1]: Entering directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'
 Building CXX object CMakeFiles/cmTC_98f23.dir/src.cxx.o
-/usr/bin/c++ -DCMAKE_HAVE_LIBC_PTHREAD  -O3 -ffast-math     -std=c++17 -o CMakeFiles/cmTC_98f23.dir/src.cxx.o -c /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp/src.cxx
+/usr/bin/c++ -DCMAKE_HAVE_LIBC_PTHREAD  -O3 -ffast-math     -std=c++17 -o CMakeFiles/cmTC_98f23.dir/src.cxx.o -c /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp/src.cxx
 Linking CXX executable cmTC_98f23
 /usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_98f23.dir/link.txt --verbose=1
 /usr/bin/c++  -O3 -ffast-math     CMakeFiles/cmTC_98f23.dir/src.cxx.o -o cmTC_98f23 
-gmake[1]: Leaving directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'
+gmake[1]: Leaving directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'
 
 
 Source file was:
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/Makefile2 b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/Makefile2
index 9227d8d78..593560775 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/Makefile2
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/Makefile2
@@ -54,10 +54,10 @@ RM = /usr/bin/cmake -E rm -f
 EQUALS = =
 
 # The top-level source directory on which CMake was run.
-CMAKE_SOURCE_DIR = /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA
+CMAKE_SOURCE_DIR = /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA
 
 # The top-level build directory on which CMake was run.
-CMAKE_BINARY_DIR = /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build
+CMAKE_BINARY_DIR = /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build
 
 #=============================================================================
 # Directory level rules for the build root directory
@@ -81,14 +81,14 @@ clean: CMakeFiles/bitcracker.dir/clean
 CMakeFiles/bitcracker.dir/all:
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/bitcracker.dir/build.make CMakeFiles/bitcracker.dir/depend
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/bitcracker.dir/build.make CMakeFiles/bitcracker.dir/build
-	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=1,2,3,4,5 "Built target bitcracker"
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=1,2,3,4,5 "Built target bitcracker"
 .PHONY : CMakeFiles/bitcracker.dir/all
 
 # Build rule for subdir invocation for target.
 CMakeFiles/bitcracker.dir/rule: cmake_check_build_system
-	$(CMAKE_COMMAND) -E cmake_progress_start /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles 5
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles 5
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/bitcracker.dir/all
-	$(CMAKE_COMMAND) -E cmake_progress_start /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles 0
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles 0
 .PHONY : CMakeFiles/bitcracker.dir/rule
 
 # Convenience name for target.
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/TargetDirectories.txt b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/TargetDirectories.txt
index e6f34f5d6..d23be5742 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/TargetDirectories.txt
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/TargetDirectories.txt
@@ -1,3 +1,3 @@
-/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir
-/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/edit_cache.dir
-/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/rebuild_cache.dir
+/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir
+/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/edit_cache.dir
+/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/rebuild_cache.dir
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/build.make b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/build.make
index d09a42494..bfa52afbd 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/build.make
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/build.make
@@ -53,10 +53,10 @@ RM = /usr/bin/cmake -E rm -f
 EQUALS = =
 
 # The top-level source directory on which CMake was run.
-CMAKE_SOURCE_DIR = /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA
+CMAKE_SOURCE_DIR = /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA
 
 # The top-level build directory on which CMake was run.
-CMAKE_BINARY_DIR = /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build
+CMAKE_BINARY_DIR = /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build
 
 # Include any dependencies generated for this target.
 include CMakeFiles/bitcracker.dir/depend.make
@@ -72,40 +72,40 @@ include CMakeFiles/bitcracker.dir/flags.make
 CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend
 CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake
 CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o: ../src/main.cu
-	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o"
-	cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.
-	cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o -D generated_cubin_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o.cubin.txt -P /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o"
+	cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.
+	cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o.cubin.txt -P /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake
 
 CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend
 CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake
 CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o: ../src/utils.cu
-	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o"
-	cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.
-	cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o -D generated_cubin_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o.cubin.txt -P /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o"
+	cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.
+	cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o.cubin.txt -P /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake
 
 CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend
 CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake
 CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o: ../src/w_blocks.cu
-	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_3) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o"
-	cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.
-	cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o -D generated_cubin_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o.cubin.txt -P /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_3) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o"
+	cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.
+	cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o.cubin.txt -P /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake
 
 CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend
 CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake
 CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o: ../src/attack.cu
-	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_4) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o"
-	cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.
-	cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o -D generated_cubin_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o.cubin.txt -P /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_4) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o"
+	cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.
+	cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o.cubin.txt -P /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake
 
 # Object files for target bitcracker
 bitcracker_OBJECTS =
 
 # External object files for target bitcracker
 bitcracker_EXTERNAL_OBJECTS = \
-"/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o" \
-"/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o" \
-"/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o" \
-"/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o"
+"/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o" \
+"/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o" \
+"/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o" \
+"/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o"
 
 bitcracker: CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o
 bitcracker: CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o
@@ -117,7 +117,7 @@ bitcracker: /usr/lib/x86_64-linux-gnu/librt.a
 bitcracker: /usr/local/cuda/lib64/libcudart_static.a
 bitcracker: /usr/lib/x86_64-linux-gnu/librt.a
 bitcracker: CMakeFiles/bitcracker.dir/link.txt
-	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_5) "Linking CXX executable bitcracker"
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_5) "Linking CXX executable bitcracker"
 	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/bitcracker.dir/link.txt --verbose=$(VERBOSE)
 
 # Rule to build all files generated by this target.
@@ -132,6 +132,6 @@ CMakeFiles/bitcracker.dir/depend: CMakeFiles/bitcracker.dir/src/bitcracker_gener
 CMakeFiles/bitcracker.dir/depend: CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o
 CMakeFiles/bitcracker.dir/depend: CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o
 CMakeFiles/bitcracker.dir/depend: CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o
-	cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/DependInfo.cmake --color=$(COLOR)
+	cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/DependInfo.cmake --color=$(COLOR)
 .PHONY : CMakeFiles/bitcracker.dir/depend
 
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake
index b1aea4c6c..7adebec63 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake
@@ -58,18 +58,18 @@ endif()
 
 # Set these up as variables to make reading the generated file easier
 set(CMAKE_COMMAND "/usr/bin/cmake") # path
-set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu") # path
-set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.NVCC-depend") # path
-set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend") # path
+set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend") # path
 set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
 set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
 set(build_cubin OFF) # bool
 set(CUDA_HOST_COMPILER "") # path
 # We won't actually use these variables for now, but we need to set this, in
 # order to force this file to be run again if it changes.
-set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
-set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o") # path
-set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o.cubin.txt") # path
+set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
+set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o.cubin.txt") # path
 
 set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
 set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list
@@ -79,7 +79,7 @@ set(CUDA_NVCC_FLAGS_MINSIZEREL  ; )
 set(CUDA_NVCC_FLAGS_RELEASE  ; )
 set(CUDA_NVCC_FLAGS_RELWITHDEBINFO  ; )
 set(nvcc_flags -m64) # list
-set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes)
+set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes)
 string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}")
 set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ).
 set(format_flag "-c") # string
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake.pre-gen
index bf20c2959..011ee063a 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake.pre-gen
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake.pre-gen
@@ -58,18 +58,18 @@ endif()
 
 # Set these up as variables to make reading the generated file easier
 set(CMAKE_COMMAND "/usr/bin/cmake") # path
-set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu") # path
-set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.NVCC-depend") # path
-set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend") # path
+set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend") # path
 set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
 set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
 set(build_cubin OFF) # bool
 set(CUDA_HOST_COMPILER "") # path
 # We won't actually use these variables for now, but we need to set this, in
 # order to force this file to be run again if it changes.
-set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
-set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o") # path
-set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o.cubin.txt") # path
+set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
+set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o.cubin.txt") # path
 
 set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
 set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake
index e1752ec21..e8fd29c5b 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake
@@ -58,18 +58,18 @@ endif()
 
 # Set these up as variables to make reading the generated file easier
 set(CMAKE_COMMAND "/usr/bin/cmake") # path
-set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu") # path
-set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.NVCC-depend") # path
-set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend") # path
+set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend") # path
 set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
 set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
 set(build_cubin OFF) # bool
 set(CUDA_HOST_COMPILER "") # path
 # We won't actually use these variables for now, but we need to set this, in
 # order to force this file to be run again if it changes.
-set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
-set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o") # path
-set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o.cubin.txt") # path
+set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
+set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o.cubin.txt") # path
 
 set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
 set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list
@@ -79,7 +79,7 @@ set(CUDA_NVCC_FLAGS_MINSIZEREL  ; )
 set(CUDA_NVCC_FLAGS_RELEASE  ; )
 set(CUDA_NVCC_FLAGS_RELWITHDEBINFO  ; )
 set(nvcc_flags -m64) # list
-set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes)
+set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes)
 string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}")
 set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ).
 set(format_flag "-c") # string
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake.pre-gen
index 29074bd92..b3887194b 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake.pre-gen
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake.pre-gen
@@ -58,18 +58,18 @@ endif()
 
 # Set these up as variables to make reading the generated file easier
 set(CMAKE_COMMAND "/usr/bin/cmake") # path
-set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu") # path
-set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.NVCC-depend") # path
-set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend") # path
+set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend") # path
 set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
 set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
 set(build_cubin OFF) # bool
 set(CUDA_HOST_COMPILER "") # path
 # We won't actually use these variables for now, but we need to set this, in
 # order to force this file to be run again if it changes.
-set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
-set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o") # path
-set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o.cubin.txt") # path
+set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
+set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o.cubin.txt") # path
 
 set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
 set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake
index e7df3120e..ba2d994d4 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake
@@ -58,18 +58,18 @@ endif()
 
 # Set these up as variables to make reading the generated file easier
 set(CMAKE_COMMAND "/usr/bin/cmake") # path
-set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu") # path
-set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.NVCC-depend") # path
-set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend") # path
+set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend") # path
 set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
 set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
 set(build_cubin OFF) # bool
 set(CUDA_HOST_COMPILER "") # path
 # We won't actually use these variables for now, but we need to set this, in
 # order to force this file to be run again if it changes.
-set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
-set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o") # path
-set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o.cubin.txt") # path
+set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
+set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o.cubin.txt") # path
 
 set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
 set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list
@@ -79,7 +79,7 @@ set(CUDA_NVCC_FLAGS_MINSIZEREL  ; )
 set(CUDA_NVCC_FLAGS_RELEASE  ; )
 set(CUDA_NVCC_FLAGS_RELWITHDEBINFO  ; )
 set(nvcc_flags -m64) # list
-set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes)
+set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes)
 string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}")
 set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ).
 set(format_flag "-c") # string
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake.pre-gen
index f2f4dc692..ebb346937 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake.pre-gen
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake.pre-gen
@@ -58,18 +58,18 @@ endif()
 
 # Set these up as variables to make reading the generated file easier
 set(CMAKE_COMMAND "/usr/bin/cmake") # path
-set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu") # path
-set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.NVCC-depend") # path
-set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend") # path
+set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend") # path
 set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
 set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
 set(build_cubin OFF) # bool
 set(CUDA_HOST_COMPILER "") # path
 # We won't actually use these variables for now, but we need to set this, in
 # order to force this file to be run again if it changes.
-set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
-set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o") # path
-set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o.cubin.txt") # path
+set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
+set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o.cubin.txt") # path
 
 set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
 set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake
index e2f6c56ab..5c9797cc7 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake
@@ -58,18 +58,18 @@ endif()
 
 # Set these up as variables to make reading the generated file easier
 set(CMAKE_COMMAND "/usr/bin/cmake") # path
-set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu") # path
-set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.NVCC-depend") # path
-set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend") # path
+set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend") # path
 set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
 set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
 set(build_cubin OFF) # bool
 set(CUDA_HOST_COMPILER "") # path
 # We won't actually use these variables for now, but we need to set this, in
 # order to force this file to be run again if it changes.
-set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
-set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o") # path
-set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o.cubin.txt") # path
+set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
+set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o.cubin.txt") # path
 
 set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
 set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list
@@ -79,7 +79,7 @@ set(CUDA_NVCC_FLAGS_MINSIZEREL  ; )
 set(CUDA_NVCC_FLAGS_RELEASE  ; )
 set(CUDA_NVCC_FLAGS_RELWITHDEBINFO  ; )
 set(nvcc_flags -m64) # list
-set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes)
+set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes)
 string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}")
 set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ).
 set(format_flag "-c") # string
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake.pre-gen
index dfc5bffb4..543d2bea1 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake.pre-gen
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake.pre-gen
@@ -58,18 +58,18 @@ endif()
 
 # Set these up as variables to make reading the generated file easier
 set(CMAKE_COMMAND "/usr/bin/cmake") # path
-set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu") # path
-set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.NVCC-depend") # path
-set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend") # path
+set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend") # path
 set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
 set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
 set(build_cubin OFF) # bool
 set(CUDA_HOST_COMPILER "") # path
 # We won't actually use these variables for now, but we need to set this, in
 # order to force this file to be run again if it changes.
-set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
-set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o") # path
-set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o.cubin.txt") # path
+set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path
+set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o.cubin.txt") # path
 
 set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
 set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/Makefile b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/Makefile
index 022d6c02e..e4ce5face 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/Makefile
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/Makefile
@@ -57,10 +57,10 @@ RM = /usr/bin/cmake -E rm -f
 EQUALS = =
 
 # The top-level source directory on which CMake was run.
-CMAKE_SOURCE_DIR = /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA
+CMAKE_SOURCE_DIR = /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA
 
 # The top-level build directory on which CMake was run.
-CMAKE_BINARY_DIR = /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build
+CMAKE_BINARY_DIR = /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build
 
 #=============================================================================
 # Targets provided globally by CMake.
@@ -87,9 +87,9 @@ rebuild_cache/fast: rebuild_cache
 
 # The main all target
 all: cmake_check_build_system
-	$(CMAKE_COMMAND) -E cmake_progress_start /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build//CMakeFiles/progress.marks
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build//CMakeFiles/progress.marks
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 all
-	$(CMAKE_COMMAND) -E cmake_progress_start /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles 0
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles 0
 .PHONY : all
 
 # The main clean target
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/cmake_install.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/cmake_install.cmake
index 832a11142..f570cfe8c 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/cmake_install.cmake
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/cmake_install.cmake
@@ -1,4 +1,4 @@
-# Install script for directory: /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA
+# Install script for directory: /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA
 
 # Set the install prefix
 if(NOT DEFINED CMAKE_INSTALL_PREFIX)
@@ -50,5 +50,5 @@ endif()
 
 string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
        "${CMAKE_INSTALL_MANIFEST_FILES}")
-file(WRITE "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/${CMAKE_INSTALL_MANIFEST}"
+file(WRITE "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/${CMAKE_INSTALL_MANIFEST}"
      "${CMAKE_INSTALL_MANIFEST_CONTENT}")
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/compile_commands.json b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/compile_commands.json
index b6250e061..8aa597aa9 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/compile_commands.json
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/compile_commands.json
@@ -1,26 +1,26 @@
 [
     {
-        "command": "nvcc -c  -o /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu"
+        "command": "nvcc -c  -o /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu",
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu"
     },
     {
-        "command": "nvcc -c  -o /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu"
+        "command": "nvcc -c  -o /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu",
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu"
     },
     {
-        "command": "nvcc -c  -o /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu"
+        "command": "nvcc -c  -o /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu",
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu"
     },
     {
-        "command": "nvcc -c  -o /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src",
-        "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu"
+        "command": "nvcc -c  -o /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu",
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src",
+        "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu"
     },
     {
         "command": "ld -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/cc8r5h9i.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -melf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -o bitcracker /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o /usr/local/cuda/lib64/libcudart_static.a /usr/lib/x86_64-linux-gnu/librt.a /usr/local/cuda/lib64/libcudart_static.a /usr/lib/x86_64-linux-gnu/librt.a /usr/lib/gcc/x86_64-linux-gnu/11/crtfastmath.o /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o",
-        "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build"
+        "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build"
     }
 ]
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/MainSourceFiles.yaml b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/MainSourceFiles.yaml
index 1b98a55f8..61e823792 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/MainSourceFiles.yaml
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/MainSourceFiles.yaml
@@ -1,7 +1,7 @@
 ---
 MainSourceFile:  MainSrcFiles_placehold
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          1633
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          2138
     Length:          10
     ReplacementText: ''
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          2413
     Length:          0
     ReplacementText: ",\n\tunsigned int const *TS0,\n\tunsigned int const *TS1,\n\tunsigned int const *TS2,\n\tunsigned int const *TS3"
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          2819
     Length:          26
     ReplacementText: 'dpct::byte_level_permute(m0, 0, 0x0123)'
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          2880
     Length:          26
     ReplacementText: 'dpct::byte_level_permute(m1, 0, 0x0123)'
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          2941
     Length:          26
     ReplacementText: 'dpct::byte_level_permute(m2, 0, 0x0123)'
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          3002
     Length:          26
     ReplacementText: 'dpct::byte_level_permute(m3, 0, 0x0123)'
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          17971
     Length:          37
     ReplacementText: 'dpct::byte_level_permute(enc_schedule0, 0, 0x0123)'
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          18027
     Length:          37
     ReplacementText: 'dpct::byte_level_permute(enc_schedule1, 0, 0x0123)'
@@ -82,7 +82,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          18083
     Length:          37
     ReplacementText: 'dpct::byte_level_permute(enc_schedule2, 0, 0x0123)'
@@ -91,7 +91,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          18139
     Length:          37
     ReplacementText: 'dpct::byte_level_permute(enc_schedule3, 0, 0x0123)'
@@ -100,7 +100,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          18181
     Length:          10
     ReplacementText: "/*\nDPCT1110:3: The total declared local variable size in device function decrypt_vmk_with_mac exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
@@ -109,7 +109,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          18589
     Length:          0
     ReplacementText: ",\n    const sycl::nd_item<3> &item_ct1,\n    unsigned int const *TS0,\n    unsigned int const *TS1,\n    unsigned int const *TS2,\n    unsigned int const *TS3"
@@ -118,7 +118,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          19258
     Length:          11
     ReplacementText: 'item_ct1.get_local_id(2)'
@@ -127,7 +127,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          19272
     Length:          10
     ReplacementText: 'item_ct1.get_group(2)'
@@ -136,7 +136,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          19285
     Length:          10
     ReplacementText: 'item_ct1.get_local_range(2)'
@@ -145,7 +145,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          32334
     Length:          0
     ReplacementText: ', TS0, TS1, TS2, TS3'
@@ -154,7 +154,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          33736
     Length:          0
     ReplacementText: ', TS0, TS1, TS2, TS3'
@@ -163,7 +163,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          35146
     Length:          0
     ReplacementText: ', TS0, TS1, TS2, TS3'
@@ -172,7 +172,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          36386
     Length:          0
     ReplacementText: ', TS0, TS1, TS2, TS3'
@@ -181,7 +181,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          36758
     Length:          0
     ReplacementText: ', TS0, TS1, TS2, TS3'
@@ -190,7 +190,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          37153
     Length:          0
     ReplacementText: ', TS0, TS1, TS2, TS3'
@@ -199,7 +199,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          37548
     Length:          0
     ReplacementText: ', TS0, TS1, TS2, TS3'
@@ -208,7 +208,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          37930
     Length:          0
     ReplacementText: ', TS0, TS1, TS2, TS3'
@@ -217,7 +217,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          40314
     Length:          0
     ReplacementText: ' try '
@@ -226,7 +226,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          42561
     Length:          0
     ReplacementText: "    /*\n    DPCT1048:0: The original value cudaHostAllocDefault is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code.\n    */\n"
@@ -235,7 +235,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          42577
     Length:          120
     ReplacementText: 'DPCT_CHECK_ERROR(h_found = sycl::malloc_host<int>(1, dpct::get_in_order_queue()))'
@@ -244,7 +244,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          42701
     Length:          0
     ReplacementText: "    /*\n    DPCT1048:1: The original value cudaHostAllocDefault is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code.\n    */\n"
@@ -253,7 +253,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          42717
     Length:          120
     ReplacementText: 'DPCT_CHECK_ERROR(h_pswd_char = sycl::malloc_host<char>(max_num_pswd_per_read * PSWD_NUM_CHAR, dpct::get_in_order_queue()))'
@@ -262,7 +262,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          42841
     Length:          0
     ReplacementText: "    /*\n    DPCT1048:2: The original value cudaHostAllocDefault is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code.\n    */\n"
@@ -271,7 +271,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          42857
     Length:          120
     ReplacementText: 'DPCT_CHECK_ERROR(h_pswd_uint32 = sycl::malloc_host<uint32_t>(max_num_pswd_per_read * PSWD_NUM_UINT32, dpct::get_in_order_queue()))'
@@ -280,7 +280,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          43359
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -289,7 +289,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          43421
     Length:          24
     ReplacementText: ''
@@ -298,7 +298,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          43446
     Length:          0
     ReplacementText: '.wait())'
@@ -307,7 +307,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          43622
     Length:          75
     ReplacementText: 'DPCT_CHECK_ERROR(d_vmk = sycl::malloc_device<uint8_t>(VMK_FULL_SIZE, dpct::get_in_order_queue()))'
@@ -316,7 +316,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          43717
     Length:          75
     ReplacementText: 'DPCT_CHECK_ERROR(d_vmkIV = sycl::malloc_device<uint8_t>(IV_SIZE, dpct::get_in_order_queue()))'
@@ -325,7 +325,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          43812
     Length:          75
     ReplacementText: 'DPCT_CHECK_ERROR(d_mac = sycl::malloc_device<uint8_t>(MAC_SIZE, dpct::get_in_order_queue()))'
@@ -334,7 +334,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          43907
     Length:          75
     ReplacementText: 'DPCT_CHECK_ERROR(d_macIV = sycl::malloc_device<uint8_t>(IV_SIZE, dpct::get_in_order_queue()))'
@@ -343,7 +343,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44002
     Length:          75
     ReplacementText: 'DPCT_CHECK_ERROR(d_computedMacIV = sycl::malloc_device<uint8_t>(IV_SIZE, dpct::get_in_order_queue()))'
@@ -352,7 +352,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44097
     Length:          75
     ReplacementText: 'DPCT_CHECK_ERROR(d_found = sycl::malloc_device<int>(1, dpct::get_in_order_queue()))'
@@ -361,7 +361,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44193
     Length:          100
     ReplacementText: 'DPCT_CHECK_ERROR(d_pswd_uint32 = sycl::malloc_device<uint32_t>(max_num_pswd_per_read * PSWD_NUM_UINT32, dpct::get_in_order_queue()))'
@@ -370,7 +370,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44405
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -379,7 +379,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44487
     Length:          25
     ReplacementText: ''
@@ -388,7 +388,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44513
     Length:          0
     ReplacementText: '.wait())'
@@ -397,7 +397,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44534
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -406,7 +406,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44616
     Length:          25
     ReplacementText: ''
@@ -415,7 +415,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44642
     Length:          0
     ReplacementText: '.wait())'
@@ -424,7 +424,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44666
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -433,7 +433,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44748
     Length:          25
     ReplacementText: ''
@@ -442,7 +442,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44774
     Length:          0
     ReplacementText: '.wait())'
@@ -451,7 +451,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44798
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -460,7 +460,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44880
     Length:          25
     ReplacementText: ''
@@ -469,7 +469,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44906
     Length:          0
     ReplacementText: '.wait())'
@@ -478,7 +478,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          44930
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -487,7 +487,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          45012
     Length:          25
     ReplacementText: ''
@@ -496,7 +496,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          45038
     Length:          0
     ReplacementText: '.wait())'
@@ -505,7 +505,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          45058
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -514,7 +514,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          45141
     Length:          24
     ReplacementText: ''
@@ -523,7 +523,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          45166
     Length:          0
     ReplacementText: '.wait())'
@@ -532,7 +532,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          45368
     Length:          23
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
@@ -541,7 +541,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          47337
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -550,7 +550,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          47428
     Length:          24
     ReplacementText: ''
@@ -559,7 +559,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          47453
     Length:          0
     ReplacementText: '.wait())'
@@ -568,7 +568,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          47477
     Length:          23
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
@@ -577,7 +577,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          48003
     Length:          0
     ReplacementText: "        /*\n        DPCT1049:4: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed.\n        */\n"
@@ -586,7 +586,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          48011
     Length:          391
     ReplacementText: "dpct::get_in_order_queue().submit(\n          [&](sycl::handler &cgh) {\n            TS0.init();\n            TS1.init();\n            TS2.init();\n            TS3.init();\n\n            auto TS0_ptr_ct1 = TS0.get_ptr();\n            auto TS1_ptr_ct1 = TS1.get_ptr();\n            auto TS2_ptr_ct1 = TS2.get_ptr();\n            auto TS3_ptr_ct1 = TS3.get_ptr();\n\n            cgh.parallel_for(\n              sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, block_size), sycl::range<3>(1, 1, block_size)), \n              [=](sycl::nd_item<3> item_ct1) {\n                decrypt_vmk_with_mac(num_read_pswd, d_found, d_vmk, d_vmkIV, d_mac, d_macIV, d_computedMacIV, v0, v1, v2, v3, s0, s1, s2, s3, d_pswd_uint32, d_w_words_uint32, item_ct1, TS0_ptr_ct1, TS1_ptr_ct1, TS2_ptr_ct1, TS3_ptr_ct1);\n              });\n          });"
@@ -595,7 +595,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: true
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          48402
     Length:          1
     ReplacementText: ''
@@ -604,7 +604,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          48445
     Length:          23
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
@@ -613,7 +613,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          48881
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -622,7 +622,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          48930
     Length:          24
     ReplacementText: ''
@@ -631,7 +631,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          48955
     Length:          0
     ReplacementText: '.wait())'
@@ -640,7 +640,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          48973
     Length:          23
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
@@ -649,7 +649,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          50751
     Length:          21
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(h_found, dpct::get_in_order_queue())'
@@ -658,7 +658,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          50772
     Length:          0
     ReplacementText: ')'
@@ -667,7 +667,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          50792
     Length:          25
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(h_pswd_char, dpct::get_in_order_queue())'
@@ -676,7 +676,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          50817
     Length:          0
     ReplacementText: ')'
@@ -685,7 +685,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          50837
     Length:          27
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(h_pswd_uint32, dpct::get_in_order_queue())'
@@ -694,7 +694,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          50864
     Length:          0
     ReplacementText: ')'
@@ -703,7 +703,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          50923
     Length:          15
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_vmk, dpct::get_in_order_queue())'
@@ -712,7 +712,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          50938
     Length:          0
     ReplacementText: ')'
@@ -721,7 +721,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          50958
     Length:          17
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_vmkIV, dpct::get_in_order_queue())'
@@ -730,7 +730,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          50975
     Length:          0
     ReplacementText: ')'
@@ -739,7 +739,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          50995
     Length:          15
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_mac, dpct::get_in_order_queue())'
@@ -748,7 +748,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          51010
     Length:          0
     ReplacementText: ')'
@@ -757,7 +757,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          51030
     Length:          17
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_macIV, dpct::get_in_order_queue())'
@@ -766,7 +766,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          51047
     Length:          0
     ReplacementText: ')'
@@ -775,7 +775,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          51067
     Length:          25
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_computedMacIV, dpct::get_in_order_queue())'
@@ -784,7 +784,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          51092
     Length:          0
     ReplacementText: ')'
@@ -793,7 +793,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          51112
     Length:          17
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_found, dpct::get_in_order_queue())'
@@ -802,7 +802,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          51129
     Length:          0
     ReplacementText: ')'
@@ -811,7 +811,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          51149
     Length:          23
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_pswd_uint32, dpct::get_in_order_queue())'
@@ -820,7 +820,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          51172
     Length:          0
     ReplacementText: ')'
@@ -829,7 +829,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Offset:          51247
     Length:          0
     ReplacementText: "\ncatch (sycl::exception const &exc) {\n  std::cerr << exc.what() << \"Exception caught at file:\" << __FILE__ << \", line:\" << __LINE__ << std::endl;\n  std::exit(1);\n}"
@@ -838,7 +838,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
     Offset:          1633
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -847,7 +847,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
     Offset:          2654
     Length:          0
     ReplacementText: ' try '
@@ -856,7 +856,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
     Offset:          5570
     Length:          0
     ReplacementText: "\t/*\n\tDPCT1093:8: The \"0\" device may be not the one intended for use. Adjust the selected device if needed.\n\t*/\n"
@@ -865,7 +865,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
     Offset:          5583
     Length:          13
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::select_device'
@@ -874,7 +874,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
     Offset:          5599
     Length:          0
     ReplacementText: ')'
@@ -883,7 +883,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
     Offset:          5942
     Length:          98
     ReplacementText: 'DPCT_CHECK_ERROR(d_w_words_uint32 = sycl::malloc_device<uint32_t>(NUM_HASH_BLOCKS * HASH_BLOCK_NUM_UINT32, dpct::get_in_order_queue()))'
@@ -892,7 +892,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
     Offset:          6856
     Length:          26
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_w_words_uint32, dpct::get_in_order_queue())'
@@ -901,7 +901,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
     Offset:          6882
     Length:          0
     ReplacementText: ')'
@@ -910,7 +910,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
     Offset:          7075
     Length:          0
     ReplacementText: "\ncatch (sycl::exception const &exc) {\n  std::cerr << exc.what() << \"Exception caught at file:\" << __FILE__ << \", line:\" << __LINE__ << std::endl;\n  std::exit(1);\n}"
@@ -919,7 +919,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          1633
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -928,7 +928,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          2333
     Length:          10
     ReplacementText: ''
@@ -937,7 +937,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          2462
     Length:          0
     ReplacementText: ",\n    const sycl::nd_item<3> &item_ct1"
@@ -946,7 +946,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          2485
     Length:          10
     ReplacementText: 'item_ct1.get_group(2)'
@@ -955,7 +955,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          2498
     Length:          10
     ReplacementText: 'item_ct1.get_local_range(2)'
@@ -964,7 +964,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          2511
     Length:          11
     ReplacementText: 'item_ct1.get_local_id(2)'
@@ -973,7 +973,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          6853
     Length:          69
     ReplacementText: 'DPCT_CHECK_ERROR(salt_d = sycl::malloc_device<unsigned char>(SALT_SIZE, dpct::get_in_order_queue()))'
@@ -982,7 +982,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          6939
     Length:          69
     ReplacementText: 'DPCT_CHECK_ERROR(padding_d = sycl::malloc_device<unsigned char>(PADDING_SIZE, dpct::get_in_order_queue()))'
@@ -991,7 +991,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          7055
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -1000,7 +1000,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          7122
     Length:          24
     ReplacementText: ''
@@ -1009,7 +1009,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          7147
     Length:          0
     ReplacementText: '.wait())'
@@ -1018,7 +1018,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          7164
     Length:          10
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
@@ -1027,7 +1027,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          7231
     Length:          24
     ReplacementText: ''
@@ -1036,7 +1036,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          7256
     Length:          0
     ReplacementText: '.wait())'
@@ -1045,7 +1045,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          7617
     Length:          65
     ReplacementText: "dpct::get_in_order_queue().parallel_for(\n\t  sycl::nd_range<3>(sycl::range<3>(1, 1, 1024) * sycl::range<3>(1, 1, 16), sycl::range<3>(1, 1, 16)), \n\t  [=](sycl::nd_item<3> item_ct1) {\n\t    kernel_w_block(salt_d, padding_d, d_w_words_uint32, item_ct1);\n\t  });"
@@ -1054,7 +1054,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: true
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          7682
     Length:          1
     ReplacementText: ''
@@ -1063,7 +1063,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          7729
     Length:          23
     ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
@@ -1072,7 +1072,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          8096
     Length:          16
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(salt_d, dpct::get_in_order_queue())'
@@ -1081,7 +1081,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          8112
     Length:          0
     ReplacementText: ')'
@@ -1090,7 +1090,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          8132
     Length:          19
     ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(padding_d, dpct::get_in_order_queue())'
@@ -1099,7 +1099,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Offset:          8151
     Length:          0
     ReplacementText: ')'
@@ -1109,11 +1109,11 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu'
     Digest:          1e837048a442f2258c61090d201699d9
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu'
     Digest:          60e076c4d80f751b34af1cc20d4cd774
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu'
     Digest:          6de252b294499305c70f60e14eb8c6a9
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -1135,7 +1135,7 @@ CompileTargets:
       Compiler:        nvcc
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -1144,7 +1144,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/bitcracker b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/bitcracker
index f60370f5b..9003bc97d 100755
Binary files a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/bitcracker and b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/bitcracker differ
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/aes.h.yaml b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/aes.h.yaml
index bd5d375d9..f637c8109 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/aes.h.yaml
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/aes.h.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/aes.h'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/aes.h'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h'
     Offset:          0
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h'
     Offset:          1648
     Length:          3666
     ReplacementText: "static dpct::constant_memory<unsigned int, 1> TS0(sycl::range<1>(256), {\n        0xC66363A5U, 0xF87C7C84U, 0xEE777799U, 0xF67B7B8DU, 0xFFF2F20DU, 0xD66B6BBDU, 0xDE6F6FB1U, 0x91C5C554U, \n        0x60303050U, 0x02010103U, 0xCE6767A9U, 0x562B2B7DU, 0xE7FEFE19U, 0xB5D7D762U, 0x4DABABE6U, 0xEC76769AU, \n        0x8FCACA45U, 0x1F82829DU, 0x89C9C940U, 0xFA7D7D87U, 0xEFFAFA15U, 0xB25959EBU, 0x8E4747C9U, 0xFBF0F00BU, \n        0x41ADADECU, 0xB3D4D467U, 0x5FA2A2FDU, 0x45AFAFEAU, 0x239C9CBFU, 0x53A4A4F7U, 0xE4727296U, 0x9BC0C05BU, \n        0x75B7B7C2U, 0xE1FDFD1CU, 0x3D9393AEU, 0x4C26266AU, 0x6C36365AU, 0x7E3F3F41U, 0xF5F7F702U, 0x83CCCC4FU, \n        0x6834345CU, 0x51A5A5F4U, 0xD1E5E534U, 0xF9F1F108U, 0xE2717193U, 0xABD8D873U, 0x62313153U, 0x2A15153FU, \n        0x0804040CU, 0x95C7C752U, 0x46232365U, 0x9DC3C35EU, 0x30181828U, 0x379696A1U, 0x0A05050FU, 0x2F9A9AB5U, \n        0x0E070709U, 0x24121236U, 0x1B80809BU, 0xDFE2E23DU, 0xCDEBEB26U, 0x4E272769U, 0x7FB2B2CDU, 0xEA75759FU, \n        0x1209091BU, 0x1D83839EU, 0x582C2C74U, 0x341A1A2EU, 0x361B1B2DU, 0xDC6E6EB2U, 0xB45A5AEEU, 0x5BA0A0FBU, \n        0xA45252F6U, 0x763B3B4DU, 0xB7D6D661U, 0x7DB3B3CEU, 0x5229297BU, 0xDDE3E33EU, 0x5E2F2F71U, 0x13848497U, \n        0xA65353F5U, 0xB9D1D168U, 0x00000000U, 0xC1EDED2CU, 0x40202060U, 0xE3FCFC1FU, 0x79B1B1C8U, 0xB65B5BEDU, \n        0xD46A6ABEU, 0x8DCBCB46U, 0x67BEBED9U, 0x7239394BU, 0x944A4ADEU, 0x984C4CD4U, 0xB05858E8U, 0x85CFCF4AU, \n        0xBBD0D06BU, 0xC5EFEF2AU, 0x4FAAAAE5U, 0xEDFBFB16U, 0x864343C5U, 0x9A4D4DD7U, 0x66333355U, 0x11858594U, \n        0x8A4545CFU, 0xE9F9F910U, 0x04020206U, 0xFE7F7F81U, 0xA05050F0U, 0x783C3C44U, 0x259F9FBAU, 0x4BA8A8E3U, \n        0xA25151F3U, 0x5DA3A3FEU, 0x804040C0U, 0x058F8F8AU, 0x3F9292ADU, 0x219D9DBCU, 0x70383848U, 0xF1F5F504U, \n        0x63BCBCDFU, 0x77B6B6C1U, 0xAFDADA75U, 0x42212163U, 0x20101030U, 0xE5FFFF1AU, 0xFDF3F30EU, 0xBFD2D26DU, \n        0x81CDCD4CU, 0x180C0C14U, 0x26131335U, 0xC3ECEC2FU, 0xBE5F5FE1U, 0x359797A2U, 0x884444CCU, 0x2E171739U, \n        0x93C4C457U, 0x55A7A7F2U, 0xFC7E7E82U, 0x7A3D3D47U, 0xC86464ACU, 0xBA5D5DE7U, 0x3219192BU, 0xE6737395U, \n        0xC06060A0U, 0x19818198U, 0x9E4F4FD1U, 0xA3DCDC7FU, 0x44222266U, 0x542A2A7EU, 0x3B9090ABU, 0x0B888883U, \n        0x8C4646CAU, 0xC7EEEE29U, 0x6BB8B8D3U, 0x2814143CU, 0xA7DEDE79U, 0xBC5E5EE2U, 0x160B0B1DU, 0xADDBDB76U, \n        0xDBE0E03BU, 0x64323256U, 0x743A3A4EU, 0x140A0A1EU, 0x924949DBU, 0x0C06060AU, 0x4824246CU, 0xB85C5CE4U, \n        0x9FC2C25DU, 0xBDD3D36EU, 0x43ACACEFU, 0xC46262A6U, 0x399191A8U, 0x319595A4U, 0xD3E4E437U, 0xF279798BU, \n        0xD5E7E732U, 0x8BC8C843U, 0x6E373759U, 0xDA6D6DB7U, 0x018D8D8CU, 0xB1D5D564U, 0x9C4E4ED2U, 0x49A9A9E0U, \n        0xD86C6CB4U, 0xAC5656FAU, 0xF3F4F407U, 0xCFEAEA25U, 0xCA6565AFU, 0xF47A7A8EU, 0x47AEAEE9U, 0x10080818U, \n        0x6FBABAD5U, 0xF0787888U, 0x4A25256FU, 0x5C2E2E72U, 0x381C1C24U, 0x57A6A6F1U, 0x73B4B4C7U, 0x97C6C651U, \n        0xCBE8E823U, 0xA1DDDD7CU, 0xE874749CU, 0x3E1F1F21U, 0x964B4BDDU, 0x61BDBDDCU, 0x0D8B8B86U, 0x0F8A8A85U, \n        0xE0707090U, 0x7C3E3E42U, 0x71B5B5C4U, 0xCC6666AAU, 0x904848D8U, 0x06030305U, 0xF7F6F601U, 0x1C0E0E12U, \n        0xC26161A3U, 0x6A35355FU, 0xAE5757F9U, 0x69B9B9D0U, 0x17868691U, 0x99C1C158U, 0x3A1D1D27U, 0x279E9EB9U, \n        0xD9E1E138U, 0xEBF8F813U, 0x2B9898B3U, 0x22111133U, 0xD26969BBU, 0xA9D9D970U, 0x078E8E89U, 0x339494A7U, \n        0x2D9B9BB6U, 0x3C1E1E22U, 0x15878792U, 0xC9E9E920U, 0x87CECE49U, 0xAA5555FFU, 0x50282878U, 0xA5DFDF7AU, \n        0x038C8C8FU, 0x59A1A1F8U, 0x09898980U, 0x1A0D0D17U, 0x65BFBFDAU, 0xD7E6E631U, 0x844242C6U, 0xD06868B8U, \n        0x824141C3U, 0x299999B0U, 0x5A2D2D77U, 0x1E0F0F11U, 0x7BB0B0CBU, 0xA85454FCU, 0x6DBBBBD6U, 0x2C16163AU\n});"
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         "{\n        0xC66363A5U, 0xF87C7C84U, 0xEE777799U, 0xF67B7B8DU, 0xFFF2F20DU, 0xD66B6BBDU, 0xDE6F6FB1U, 0x91C5C554U, \n        0x60303050U, 0x02010103U, 0xCE6767A9U, 0x562B2B7DU, 0xE7FEFE19U, 0xB5D7D762U, 0x4DABABE6U, 0xEC76769AU, \n        0x8FCACA45U, 0x1F82829DU, 0x89C9C940U, 0xFA7D7D87U, 0xEFFAFA15U, 0xB25959EBU, 0x8E4747C9U, 0xFBF0F00BU, \n        0x41ADADECU, 0xB3D4D467U, 0x5FA2A2FDU, 0x45AFAFEAU, 0x239C9CBFU, 0x53A4A4F7U, 0xE4727296U, 0x9BC0C05BU, \n        0x75B7B7C2U, 0xE1FDFD1CU, 0x3D9393AEU, 0x4C26266AU, 0x6C36365AU, 0x7E3F3F41U, 0xF5F7F702U, 0x83CCCC4FU, \n        0x6834345CU, 0x51A5A5F4U, 0xD1E5E534U, 0xF9F1F108U, 0xE2717193U, 0xABD8D873U, 0x62313153U, 0x2A15153FU, \n        0x0804040CU, 0x95C7C752U, 0x46232365U, 0x9DC3C35EU, 0x30181828U, 0x379696A1U, 0x0A05050FU, 0x2F9A9AB5U, \n        0x0E070709U, 0x24121236U, 0x1B80809BU, 0xDFE2E23DU, 0xCDEBEB26U, 0x4E272769U, 0x7FB2B2CDU, 0xEA75759FU, \n        0x1209091BU, 0x1D83839EU, 0x582C2C74U, 0x341A1A2EU, 0x361B1B2DU, 0xDC6E6EB2U, 0xB45A5AEEU, 0x5BA0A0FBU, \n        0xA45252F6U, 0x763B3B4DU, 0xB7D6D661U, 0x7DB3B3CEU, 0x5229297BU, 0xDDE3E33EU, 0x5E2F2F71U, 0x13848497U, \n        0xA65353F5U, 0xB9D1D168U, 0x00000000U, 0xC1EDED2CU, 0x40202060U, 0xE3FCFC1FU, 0x79B1B1C8U, 0xB65B5BEDU, \n        0xD46A6ABEU, 0x8DCBCB46U, 0x67BEBED9U, 0x7239394BU, 0x944A4ADEU, 0x984C4CD4U, 0xB05858E8U, 0x85CFCF4AU, \n        0xBBD0D06BU, 0xC5EFEF2AU, 0x4FAAAAE5U, 0xEDFBFB16U, 0x864343C5U, 0x9A4D4DD7U, 0x66333355U, 0x11858594U, \n        0x8A4545CFU, 0xE9F9F910U, 0x04020206U, 0xFE7F7F81U, 0xA05050F0U, 0x783C3C44U, 0x259F9FBAU, 0x4BA8A8E3U, \n        0xA25151F3U, 0x5DA3A3FEU, 0x804040C0U, 0x058F8F8AU, 0x3F9292ADU, 0x219D9DBCU, 0x70383848U, 0xF1F5F504U, \n        0x63BCBCDFU, 0x77B6B6C1U, 0xAFDADA75U, 0x42212163U, 0x20101030U, 0xE5FFFF1AU, 0xFDF3F30EU, 0xBFD2D26DU, \n        0x81CDCD4CU, 0x180C0C14U, 0x26131335U, 0xC3ECEC2FU, 0xBE5F5FE1U, 0x359797A2U, 0x884444CCU, 0x2E171739U, \n        0x93C4C457U, 0x55A7A7F2U, 0xFC7E7E82U, 0x7A3D3D47U, 0xC86464ACU, 0xBA5D5DE7U, 0x3219192BU, 0xE6737395U, \n        0xC06060A0U, 0x19818198U, 0x9E4F4FD1U, 0xA3DCDC7FU, 0x44222266U, 0x542A2A7EU, 0x3B9090ABU, 0x0B888883U, \n        0x8C4646CAU, 0xC7EEEE29U, 0x6BB8B8D3U, 0x2814143CU, 0xA7DEDE79U, 0xBC5E5EE2U, 0x160B0B1DU, 0xADDBDB76U, \n        0xDBE0E03BU, 0x64323256U, 0x743A3A4EU, 0x140A0A1EU, 0x924949DBU, 0x0C06060AU, 0x4824246CU, 0xB85C5CE4U, \n        0x9FC2C25DU, 0xBDD3D36EU, 0x43ACACEFU, 0xC46262A6U, 0x399191A8U, 0x319595A4U, 0xD3E4E437U, 0xF279798BU, \n        0xD5E7E732U, 0x8BC8C843U, 0x6E373759U, 0xDA6D6DB7U, 0x018D8D8CU, 0xB1D5D564U, 0x9C4E4ED2U, 0x49A9A9E0U, \n        0xD86C6CB4U, 0xAC5656FAU, 0xF3F4F407U, 0xCFEAEA25U, 0xCA6565AFU, 0xF47A7A8EU, 0x47AEAEE9U, 0x10080818U, \n        0x6FBABAD5U, 0xF0787888U, 0x4A25256FU, 0x5C2E2E72U, 0x381C1C24U, 0x57A6A6F1U, 0x73B4B4C7U, 0x97C6C651U, \n        0xCBE8E823U, 0xA1DDDD7CU, 0xE874749CU, 0x3E1F1F21U, 0x964B4BDDU, 0x61BDBDDCU, 0x0D8B8B86U, 0x0F8A8A85U, \n        0xE0707090U, 0x7C3E3E42U, 0x71B5B5C4U, 0xCC6666AAU, 0x904848D8U, 0x06030305U, 0xF7F6F601U, 0x1C0E0E12U, \n        0xC26161A3U, 0x6A35355FU, 0xAE5757F9U, 0x69B9B9D0U, 0x17868691U, 0x99C1C158U, 0x3A1D1D27U, 0x279E9EB9U, \n        0xD9E1E138U, 0xEBF8F813U, 0x2B9898B3U, 0x22111133U, 0xD26969BBU, 0xA9D9D970U, 0x078E8E89U, 0x339494A7U, \n        0x2D9B9BB6U, 0x3C1E1E22U, 0x15878792U, 0xC9E9E920U, 0x87CECE49U, 0xAA5555FFU, 0x50282878U, 0xA5DFDF7AU, \n        0x038C8C8FU, 0x59A1A1F8U, 0x09898980U, 0x1A0D0D17U, 0x65BFBFDAU, 0xD7E6E631U, 0x844242C6U, 0xD06868B8U, \n        0x824141C3U, 0x299999B0U, 0x5A2D2D77U, 0x1E0F0F11U, 0x7BB0B0CBU, 0xA85454FCU, 0x6DBBBBD6U, 0x2C16163AU\n}"
     NewHostVarName:  TS0_host_ct1
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h'
     Offset:          5316
     Length:          3666
     ReplacementText: "static dpct::constant_memory<unsigned int, 1> TS1(sycl::range<1>(256), {\n        0xA5C66363U, 0x84F87C7CU, 0x99EE7777U, 0x8DF67B7BU, 0x0DFFF2F2U, 0xBDD66B6BU, 0xB1DE6F6FU, 0x5491C5C5U, \n        0x50603030U, 0x03020101U, 0xA9CE6767U, 0x7D562B2BU, 0x19E7FEFEU, 0x62B5D7D7U, 0xE64DABABU, 0x9AEC7676U, \n        0x458FCACAU, 0x9D1F8282U, 0x4089C9C9U, 0x87FA7D7DU, 0x15EFFAFAU, 0xEBB25959U, 0xC98E4747U, 0x0BFBF0F0U, \n        0xEC41ADADU, 0x67B3D4D4U, 0xFD5FA2A2U, 0xEA45AFAFU, 0xBF239C9CU, 0xF753A4A4U, 0x96E47272U, 0x5B9BC0C0U, \n        0xC275B7B7U, 0x1CE1FDFDU, 0xAE3D9393U, 0x6A4C2626U, 0x5A6C3636U, 0x417E3F3FU, 0x02F5F7F7U, 0x4F83CCCCU, \n        0x5C683434U, 0xF451A5A5U, 0x34D1E5E5U, 0x08F9F1F1U, 0x93E27171U, 0x73ABD8D8U, 0x53623131U, 0x3F2A1515U, \n        0x0C080404U, 0x5295C7C7U, 0x65462323U, 0x5E9DC3C3U, 0x28301818U, 0xA1379696U, 0x0F0A0505U, 0xB52F9A9AU, \n        0x090E0707U, 0x36241212U, 0x9B1B8080U, 0x3DDFE2E2U, 0x26CDEBEBU, 0x694E2727U, 0xCD7FB2B2U, 0x9FEA7575U, \n        0x1B120909U, 0x9E1D8383U, 0x74582C2CU, 0x2E341A1AU, 0x2D361B1BU, 0xB2DC6E6EU, 0xEEB45A5AU, 0xFB5BA0A0U, \n        0xF6A45252U, 0x4D763B3BU, 0x61B7D6D6U, 0xCE7DB3B3U, 0x7B522929U, 0x3EDDE3E3U, 0x715E2F2FU, 0x97138484U, \n        0xF5A65353U, 0x68B9D1D1U, 0x00000000U, 0x2CC1EDEDU, 0x60402020U, 0x1FE3FCFCU, 0xC879B1B1U, 0xEDB65B5BU, \n        0xBED46A6AU, 0x468DCBCBU, 0xD967BEBEU, 0x4B723939U, 0xDE944A4AU, 0xD4984C4CU, 0xE8B05858U, 0x4A85CFCFU, \n        0x6BBBD0D0U, 0x2AC5EFEFU, 0xE54FAAAAU, 0x16EDFBFBU, 0xC5864343U, 0xD79A4D4DU, 0x55663333U, 0x94118585U, \n        0xCF8A4545U, 0x10E9F9F9U, 0x06040202U, 0x81FE7F7FU, 0xF0A05050U, 0x44783C3CU, 0xBA259F9FU, 0xE34BA8A8U, \n        0xF3A25151U, 0xFE5DA3A3U, 0xC0804040U, 0x8A058F8FU, 0xAD3F9292U, 0xBC219D9DU, 0x48703838U, 0x04F1F5F5U, \n        0xDF63BCBCU, 0xC177B6B6U, 0x75AFDADAU, 0x63422121U, 0x30201010U, 0x1AE5FFFFU, 0x0EFDF3F3U, 0x6DBFD2D2U, \n        0x4C81CDCDU, 0x14180C0CU, 0x35261313U, 0x2FC3ECECU, 0xE1BE5F5FU, 0xA2359797U, 0xCC884444U, 0x392E1717U, \n        0x5793C4C4U, 0xF255A7A7U, 0x82FC7E7EU, 0x477A3D3DU, 0xACC86464U, 0xE7BA5D5DU, 0x2B321919U, 0x95E67373U, \n        0xA0C06060U, 0x98198181U, 0xD19E4F4FU, 0x7FA3DCDCU, 0x66442222U, 0x7E542A2AU, 0xAB3B9090U, 0x830B8888U, \n        0xCA8C4646U, 0x29C7EEEEU, 0xD36BB8B8U, 0x3C281414U, 0x79A7DEDEU, 0xE2BC5E5EU, 0x1D160B0BU, 0x76ADDBDBU, \n        0x3BDBE0E0U, 0x56643232U, 0x4E743A3AU, 0x1E140A0AU, 0xDB924949U, 0x0A0C0606U, 0x6C482424U, 0xE4B85C5CU, \n        0x5D9FC2C2U, 0x6EBDD3D3U, 0xEF43ACACU, 0xA6C46262U, 0xA8399191U, 0xA4319595U, 0x37D3E4E4U, 0x8BF27979U, \n        0x32D5E7E7U, 0x438BC8C8U, 0x596E3737U, 0xB7DA6D6DU, 0x8C018D8DU, 0x64B1D5D5U, 0xD29C4E4EU, 0xE049A9A9U, \n        0xB4D86C6CU, 0xFAAC5656U, 0x07F3F4F4U, 0x25CFEAEAU, 0xAFCA6565U, 0x8EF47A7AU, 0xE947AEAEU, 0x18100808U, \n        0xD56FBABAU, 0x88F07878U, 0x6F4A2525U, 0x725C2E2EU, 0x24381C1CU, 0xF157A6A6U, 0xC773B4B4U, 0x5197C6C6U, \n        0x23CBE8E8U, 0x7CA1DDDDU, 0x9CE87474U, 0x213E1F1FU, 0xDD964B4BU, 0xDC61BDBDU, 0x860D8B8BU, 0x850F8A8AU, \n        0x90E07070U, 0x427C3E3EU, 0xC471B5B5U, 0xAACC6666U, 0xD8904848U, 0x05060303U, 0x01F7F6F6U, 0x121C0E0EU, \n        0xA3C26161U, 0x5F6A3535U, 0xF9AE5757U, 0xD069B9B9U, 0x91178686U, 0x5899C1C1U, 0x273A1D1DU, 0xB9279E9EU, \n        0x38D9E1E1U, 0x13EBF8F8U, 0xB32B9898U, 0x33221111U, 0xBBD26969U, 0x70A9D9D9U, 0x89078E8EU, 0xA7339494U, \n        0xB62D9B9BU, 0x223C1E1EU, 0x92158787U, 0x20C9E9E9U, 0x4987CECEU, 0xFFAA5555U, 0x78502828U, 0x7AA5DFDFU, \n        0x8F038C8CU, 0xF859A1A1U, 0x80098989U, 0x171A0D0DU, 0xDA65BFBFU, 0x31D7E6E6U, 0xC6844242U, 0xB8D06868U, \n        0xC3824141U, 0xB0299999U, 0x775A2D2DU, 0x111E0F0FU, 0xCB7BB0B0U, 0xFCA85454U, 0xD66DBBBBU, 0x3A2C1616U\n});"
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         "{\n        0xA5C66363U, 0x84F87C7CU, 0x99EE7777U, 0x8DF67B7BU, 0x0DFFF2F2U, 0xBDD66B6BU, 0xB1DE6F6FU, 0x5491C5C5U, \n        0x50603030U, 0x03020101U, 0xA9CE6767U, 0x7D562B2BU, 0x19E7FEFEU, 0x62B5D7D7U, 0xE64DABABU, 0x9AEC7676U, \n        0x458FCACAU, 0x9D1F8282U, 0x4089C9C9U, 0x87FA7D7DU, 0x15EFFAFAU, 0xEBB25959U, 0xC98E4747U, 0x0BFBF0F0U, \n        0xEC41ADADU, 0x67B3D4D4U, 0xFD5FA2A2U, 0xEA45AFAFU, 0xBF239C9CU, 0xF753A4A4U, 0x96E47272U, 0x5B9BC0C0U, \n        0xC275B7B7U, 0x1CE1FDFDU, 0xAE3D9393U, 0x6A4C2626U, 0x5A6C3636U, 0x417E3F3FU, 0x02F5F7F7U, 0x4F83CCCCU, \n        0x5C683434U, 0xF451A5A5U, 0x34D1E5E5U, 0x08F9F1F1U, 0x93E27171U, 0x73ABD8D8U, 0x53623131U, 0x3F2A1515U, \n        0x0C080404U, 0x5295C7C7U, 0x65462323U, 0x5E9DC3C3U, 0x28301818U, 0xA1379696U, 0x0F0A0505U, 0xB52F9A9AU, \n        0x090E0707U, 0x36241212U, 0x9B1B8080U, 0x3DDFE2E2U, 0x26CDEBEBU, 0x694E2727U, 0xCD7FB2B2U, 0x9FEA7575U, \n        0x1B120909U, 0x9E1D8383U, 0x74582C2CU, 0x2E341A1AU, 0x2D361B1BU, 0xB2DC6E6EU, 0xEEB45A5AU, 0xFB5BA0A0U, \n        0xF6A45252U, 0x4D763B3BU, 0x61B7D6D6U, 0xCE7DB3B3U, 0x7B522929U, 0x3EDDE3E3U, 0x715E2F2FU, 0x97138484U, \n        0xF5A65353U, 0x68B9D1D1U, 0x00000000U, 0x2CC1EDEDU, 0x60402020U, 0x1FE3FCFCU, 0xC879B1B1U, 0xEDB65B5BU, \n        0xBED46A6AU, 0x468DCBCBU, 0xD967BEBEU, 0x4B723939U, 0xDE944A4AU, 0xD4984C4CU, 0xE8B05858U, 0x4A85CFCFU, \n        0x6BBBD0D0U, 0x2AC5EFEFU, 0xE54FAAAAU, 0x16EDFBFBU, 0xC5864343U, 0xD79A4D4DU, 0x55663333U, 0x94118585U, \n        0xCF8A4545U, 0x10E9F9F9U, 0x06040202U, 0x81FE7F7FU, 0xF0A05050U, 0x44783C3CU, 0xBA259F9FU, 0xE34BA8A8U, \n        0xF3A25151U, 0xFE5DA3A3U, 0xC0804040U, 0x8A058F8FU, 0xAD3F9292U, 0xBC219D9DU, 0x48703838U, 0x04F1F5F5U, \n        0xDF63BCBCU, 0xC177B6B6U, 0x75AFDADAU, 0x63422121U, 0x30201010U, 0x1AE5FFFFU, 0x0EFDF3F3U, 0x6DBFD2D2U, \n        0x4C81CDCDU, 0x14180C0CU, 0x35261313U, 0x2FC3ECECU, 0xE1BE5F5FU, 0xA2359797U, 0xCC884444U, 0x392E1717U, \n        0x5793C4C4U, 0xF255A7A7U, 0x82FC7E7EU, 0x477A3D3DU, 0xACC86464U, 0xE7BA5D5DU, 0x2B321919U, 0x95E67373U, \n        0xA0C06060U, 0x98198181U, 0xD19E4F4FU, 0x7FA3DCDCU, 0x66442222U, 0x7E542A2AU, 0xAB3B9090U, 0x830B8888U, \n        0xCA8C4646U, 0x29C7EEEEU, 0xD36BB8B8U, 0x3C281414U, 0x79A7DEDEU, 0xE2BC5E5EU, 0x1D160B0BU, 0x76ADDBDBU, \n        0x3BDBE0E0U, 0x56643232U, 0x4E743A3AU, 0x1E140A0AU, 0xDB924949U, 0x0A0C0606U, 0x6C482424U, 0xE4B85C5CU, \n        0x5D9FC2C2U, 0x6EBDD3D3U, 0xEF43ACACU, 0xA6C46262U, 0xA8399191U, 0xA4319595U, 0x37D3E4E4U, 0x8BF27979U, \n        0x32D5E7E7U, 0x438BC8C8U, 0x596E3737U, 0xB7DA6D6DU, 0x8C018D8DU, 0x64B1D5D5U, 0xD29C4E4EU, 0xE049A9A9U, \n        0xB4D86C6CU, 0xFAAC5656U, 0x07F3F4F4U, 0x25CFEAEAU, 0xAFCA6565U, 0x8EF47A7AU, 0xE947AEAEU, 0x18100808U, \n        0xD56FBABAU, 0x88F07878U, 0x6F4A2525U, 0x725C2E2EU, 0x24381C1CU, 0xF157A6A6U, 0xC773B4B4U, 0x5197C6C6U, \n        0x23CBE8E8U, 0x7CA1DDDDU, 0x9CE87474U, 0x213E1F1FU, 0xDD964B4BU, 0xDC61BDBDU, 0x860D8B8BU, 0x850F8A8AU, \n        0x90E07070U, 0x427C3E3EU, 0xC471B5B5U, 0xAACC6666U, 0xD8904848U, 0x05060303U, 0x01F7F6F6U, 0x121C0E0EU, \n        0xA3C26161U, 0x5F6A3535U, 0xF9AE5757U, 0xD069B9B9U, 0x91178686U, 0x5899C1C1U, 0x273A1D1DU, 0xB9279E9EU, \n        0x38D9E1E1U, 0x13EBF8F8U, 0xB32B9898U, 0x33221111U, 0xBBD26969U, 0x70A9D9D9U, 0x89078E8EU, 0xA7339494U, \n        0xB62D9B9BU, 0x223C1E1EU, 0x92158787U, 0x20C9E9E9U, 0x4987CECEU, 0xFFAA5555U, 0x78502828U, 0x7AA5DFDFU, \n        0x8F038C8CU, 0xF859A1A1U, 0x80098989U, 0x171A0D0DU, 0xDA65BFBFU, 0x31D7E6E6U, 0xC6844242U, 0xB8D06868U, \n        0xC3824141U, 0xB0299999U, 0x775A2D2DU, 0x111E0F0FU, 0xCB7BB0B0U, 0xFCA85454U, 0xD66DBBBBU, 0x3A2C1616U\n}"
     NewHostVarName:  TS1_host_ct1
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h'
     Offset:          8984
     Length:          3666
     ReplacementText: "static dpct::constant_memory<unsigned int, 1> TS2(sycl::range<1>(256), {\n        0x63A5C663U, 0x7C84F87CU, 0x7799EE77U, 0x7B8DF67BU, 0xF20DFFF2U, 0x6BBDD66BU, 0x6FB1DE6FU, 0xC55491C5U, \n        0x30506030U, 0x01030201U, 0x67A9CE67U, 0x2B7D562BU, 0xFE19E7FEU, 0xD762B5D7U, 0xABE64DABU, 0x769AEC76U, \n        0xCA458FCAU, 0x829D1F82U, 0xC94089C9U, 0x7D87FA7DU, 0xFA15EFFAU, 0x59EBB259U, 0x47C98E47U, 0xF00BFBF0U, \n        0xADEC41ADU, 0xD467B3D4U, 0xA2FD5FA2U, 0xAFEA45AFU, 0x9CBF239CU, 0xA4F753A4U, 0x7296E472U, 0xC05B9BC0U, \n        0xB7C275B7U, 0xFD1CE1FDU, 0x93AE3D93U, 0x266A4C26U, 0x365A6C36U, 0x3F417E3FU, 0xF702F5F7U, 0xCC4F83CCU, \n        0x345C6834U, 0xA5F451A5U, 0xE534D1E5U, 0xF108F9F1U, 0x7193E271U, 0xD873ABD8U, 0x31536231U, 0x153F2A15U, \n        0x040C0804U, 0xC75295C7U, 0x23654623U, 0xC35E9DC3U, 0x18283018U, 0x96A13796U, 0x050F0A05U, 0x9AB52F9AU, \n        0x07090E07U, 0x12362412U, 0x809B1B80U, 0xE23DDFE2U, 0xEB26CDEBU, 0x27694E27U, 0xB2CD7FB2U, 0x759FEA75U, \n        0x091B1209U, 0x839E1D83U, 0x2C74582CU, 0x1A2E341AU, 0x1B2D361BU, 0x6EB2DC6EU, 0x5AEEB45AU, 0xA0FB5BA0U, \n        0x52F6A452U, 0x3B4D763BU, 0xD661B7D6U, 0xB3CE7DB3U, 0x297B5229U, 0xE33EDDE3U, 0x2F715E2FU, 0x84971384U, \n        0x53F5A653U, 0xD168B9D1U, 0x00000000U, 0xED2CC1EDU, 0x20604020U, 0xFC1FE3FCU, 0xB1C879B1U, 0x5BEDB65BU, \n        0x6ABED46AU, 0xCB468DCBU, 0xBED967BEU, 0x394B7239U, 0x4ADE944AU, 0x4CD4984CU, 0x58E8B058U, 0xCF4A85CFU, \n        0xD06BBBD0U, 0xEF2AC5EFU, 0xAAE54FAAU, 0xFB16EDFBU, 0x43C58643U, 0x4DD79A4DU, 0x33556633U, 0x85941185U, \n        0x45CF8A45U, 0xF910E9F9U, 0x02060402U, 0x7F81FE7FU, 0x50F0A050U, 0x3C44783CU, 0x9FBA259FU, 0xA8E34BA8U, \n        0x51F3A251U, 0xA3FE5DA3U, 0x40C08040U, 0x8F8A058FU, 0x92AD3F92U, 0x9DBC219DU, 0x38487038U, 0xF504F1F5U, \n        0xBCDF63BCU, 0xB6C177B6U, 0xDA75AFDAU, 0x21634221U, 0x10302010U, 0xFF1AE5FFU, 0xF30EFDF3U, 0xD26DBFD2U, \n        0xCD4C81CDU, 0x0C14180CU, 0x13352613U, 0xEC2FC3ECU, 0x5FE1BE5FU, 0x97A23597U, 0x44CC8844U, 0x17392E17U, \n        0xC45793C4U, 0xA7F255A7U, 0x7E82FC7EU, 0x3D477A3DU, 0x64ACC864U, 0x5DE7BA5DU, 0x192B3219U, 0x7395E673U, \n        0x60A0C060U, 0x81981981U, 0x4FD19E4FU, 0xDC7FA3DCU, 0x22664422U, 0x2A7E542AU, 0x90AB3B90U, 0x88830B88U, \n        0x46CA8C46U, 0xEE29C7EEU, 0xB8D36BB8U, 0x143C2814U, 0xDE79A7DEU, 0x5EE2BC5EU, 0x0B1D160BU, 0xDB76ADDBU, \n        0xE03BDBE0U, 0x32566432U, 0x3A4E743AU, 0x0A1E140AU, 0x49DB9249U, 0x060A0C06U, 0x246C4824U, 0x5CE4B85CU, \n        0xC25D9FC2U, 0xD36EBDD3U, 0xACEF43ACU, 0x62A6C462U, 0x91A83991U, 0x95A43195U, 0xE437D3E4U, 0x798BF279U, \n        0xE732D5E7U, 0xC8438BC8U, 0x37596E37U, 0x6DB7DA6DU, 0x8D8C018DU, 0xD564B1D5U, 0x4ED29C4EU, 0xA9E049A9U, \n        0x6CB4D86CU, 0x56FAAC56U, 0xF407F3F4U, 0xEA25CFEAU, 0x65AFCA65U, 0x7A8EF47AU, 0xAEE947AEU, 0x08181008U, \n        0xBAD56FBAU, 0x7888F078U, 0x256F4A25U, 0x2E725C2EU, 0x1C24381CU, 0xA6F157A6U, 0xB4C773B4U, 0xC65197C6U, \n        0xE823CBE8U, 0xDD7CA1DDU, 0x749CE874U, 0x1F213E1FU, 0x4BDD964BU, 0xBDDC61BDU, 0x8B860D8BU, 0x8A850F8AU, \n        0x7090E070U, 0x3E427C3EU, 0xB5C471B5U, 0x66AACC66U, 0x48D89048U, 0x03050603U, 0xF601F7F6U, 0x0E121C0EU, \n        0x61A3C261U, 0x355F6A35U, 0x57F9AE57U, 0xB9D069B9U, 0x86911786U, 0xC15899C1U, 0x1D273A1DU, 0x9EB9279EU, \n        0xE138D9E1U, 0xF813EBF8U, 0x98B32B98U, 0x11332211U, 0x69BBD269U, 0xD970A9D9U, 0x8E89078EU, 0x94A73394U, \n        0x9BB62D9BU, 0x1E223C1EU, 0x87921587U, 0xE920C9E9U, 0xCE4987CEU, 0x55FFAA55U, 0x28785028U, 0xDF7AA5DFU, \n        0x8C8F038CU, 0xA1F859A1U, 0x89800989U, 0x0D171A0DU, 0xBFDA65BFU, 0xE631D7E6U, 0x42C68442U, 0x68B8D068U, \n        0x41C38241U, 0x99B02999U, 0x2D775A2DU, 0x0F111E0FU, 0xB0CB7BB0U, 0x54FCA854U, 0xBBD66DBBU, 0x163A2C16U\n});"
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         "{\n        0x63A5C663U, 0x7C84F87CU, 0x7799EE77U, 0x7B8DF67BU, 0xF20DFFF2U, 0x6BBDD66BU, 0x6FB1DE6FU, 0xC55491C5U, \n        0x30506030U, 0x01030201U, 0x67A9CE67U, 0x2B7D562BU, 0xFE19E7FEU, 0xD762B5D7U, 0xABE64DABU, 0x769AEC76U, \n        0xCA458FCAU, 0x829D1F82U, 0xC94089C9U, 0x7D87FA7DU, 0xFA15EFFAU, 0x59EBB259U, 0x47C98E47U, 0xF00BFBF0U, \n        0xADEC41ADU, 0xD467B3D4U, 0xA2FD5FA2U, 0xAFEA45AFU, 0x9CBF239CU, 0xA4F753A4U, 0x7296E472U, 0xC05B9BC0U, \n        0xB7C275B7U, 0xFD1CE1FDU, 0x93AE3D93U, 0x266A4C26U, 0x365A6C36U, 0x3F417E3FU, 0xF702F5F7U, 0xCC4F83CCU, \n        0x345C6834U, 0xA5F451A5U, 0xE534D1E5U, 0xF108F9F1U, 0x7193E271U, 0xD873ABD8U, 0x31536231U, 0x153F2A15U, \n        0x040C0804U, 0xC75295C7U, 0x23654623U, 0xC35E9DC3U, 0x18283018U, 0x96A13796U, 0x050F0A05U, 0x9AB52F9AU, \n        0x07090E07U, 0x12362412U, 0x809B1B80U, 0xE23DDFE2U, 0xEB26CDEBU, 0x27694E27U, 0xB2CD7FB2U, 0x759FEA75U, \n        0x091B1209U, 0x839E1D83U, 0x2C74582CU, 0x1A2E341AU, 0x1B2D361BU, 0x6EB2DC6EU, 0x5AEEB45AU, 0xA0FB5BA0U, \n        0x52F6A452U, 0x3B4D763BU, 0xD661B7D6U, 0xB3CE7DB3U, 0x297B5229U, 0xE33EDDE3U, 0x2F715E2FU, 0x84971384U, \n        0x53F5A653U, 0xD168B9D1U, 0x00000000U, 0xED2CC1EDU, 0x20604020U, 0xFC1FE3FCU, 0xB1C879B1U, 0x5BEDB65BU, \n        0x6ABED46AU, 0xCB468DCBU, 0xBED967BEU, 0x394B7239U, 0x4ADE944AU, 0x4CD4984CU, 0x58E8B058U, 0xCF4A85CFU, \n        0xD06BBBD0U, 0xEF2AC5EFU, 0xAAE54FAAU, 0xFB16EDFBU, 0x43C58643U, 0x4DD79A4DU, 0x33556633U, 0x85941185U, \n        0x45CF8A45U, 0xF910E9F9U, 0x02060402U, 0x7F81FE7FU, 0x50F0A050U, 0x3C44783CU, 0x9FBA259FU, 0xA8E34BA8U, \n        0x51F3A251U, 0xA3FE5DA3U, 0x40C08040U, 0x8F8A058FU, 0x92AD3F92U, 0x9DBC219DU, 0x38487038U, 0xF504F1F5U, \n        0xBCDF63BCU, 0xB6C177B6U, 0xDA75AFDAU, 0x21634221U, 0x10302010U, 0xFF1AE5FFU, 0xF30EFDF3U, 0xD26DBFD2U, \n        0xCD4C81CDU, 0x0C14180CU, 0x13352613U, 0xEC2FC3ECU, 0x5FE1BE5FU, 0x97A23597U, 0x44CC8844U, 0x17392E17U, \n        0xC45793C4U, 0xA7F255A7U, 0x7E82FC7EU, 0x3D477A3DU, 0x64ACC864U, 0x5DE7BA5DU, 0x192B3219U, 0x7395E673U, \n        0x60A0C060U, 0x81981981U, 0x4FD19E4FU, 0xDC7FA3DCU, 0x22664422U, 0x2A7E542AU, 0x90AB3B90U, 0x88830B88U, \n        0x46CA8C46U, 0xEE29C7EEU, 0xB8D36BB8U, 0x143C2814U, 0xDE79A7DEU, 0x5EE2BC5EU, 0x0B1D160BU, 0xDB76ADDBU, \n        0xE03BDBE0U, 0x32566432U, 0x3A4E743AU, 0x0A1E140AU, 0x49DB9249U, 0x060A0C06U, 0x246C4824U, 0x5CE4B85CU, \n        0xC25D9FC2U, 0xD36EBDD3U, 0xACEF43ACU, 0x62A6C462U, 0x91A83991U, 0x95A43195U, 0xE437D3E4U, 0x798BF279U, \n        0xE732D5E7U, 0xC8438BC8U, 0x37596E37U, 0x6DB7DA6DU, 0x8D8C018DU, 0xD564B1D5U, 0x4ED29C4EU, 0xA9E049A9U, \n        0x6CB4D86CU, 0x56FAAC56U, 0xF407F3F4U, 0xEA25CFEAU, 0x65AFCA65U, 0x7A8EF47AU, 0xAEE947AEU, 0x08181008U, \n        0xBAD56FBAU, 0x7888F078U, 0x256F4A25U, 0x2E725C2EU, 0x1C24381CU, 0xA6F157A6U, 0xB4C773B4U, 0xC65197C6U, \n        0xE823CBE8U, 0xDD7CA1DDU, 0x749CE874U, 0x1F213E1FU, 0x4BDD964BU, 0xBDDC61BDU, 0x8B860D8BU, 0x8A850F8AU, \n        0x7090E070U, 0x3E427C3EU, 0xB5C471B5U, 0x66AACC66U, 0x48D89048U, 0x03050603U, 0xF601F7F6U, 0x0E121C0EU, \n        0x61A3C261U, 0x355F6A35U, 0x57F9AE57U, 0xB9D069B9U, 0x86911786U, 0xC15899C1U, 0x1D273A1DU, 0x9EB9279EU, \n        0xE138D9E1U, 0xF813EBF8U, 0x98B32B98U, 0x11332211U, 0x69BBD269U, 0xD970A9D9U, 0x8E89078EU, 0x94A73394U, \n        0x9BB62D9BU, 0x1E223C1EU, 0x87921587U, 0xE920C9E9U, 0xCE4987CEU, 0x55FFAA55U, 0x28785028U, 0xDF7AA5DFU, \n        0x8C8F038CU, 0xA1F859A1U, 0x89800989U, 0x0D171A0DU, 0xBFDA65BFU, 0xE631D7E6U, 0x42C68442U, 0x68B8D068U, \n        0x41C38241U, 0x99B02999U, 0x2D775A2DU, 0x0F111E0FU, 0xB0CB7BB0U, 0x54FCA854U, 0xBBD66DBBU, 0x163A2C16U\n}"
     NewHostVarName:  TS2_host_ct1
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h'
     Offset:          12651
     Length:          3666
     ReplacementText: "static dpct::constant_memory<unsigned int, 1> TS3(sycl::range<1>(256), {\n        0x6363A5C6U, 0x7C7C84F8U, 0x777799EEU, 0x7B7B8DF6U, 0xF2F20DFFU, 0x6B6BBDD6U, 0x6F6FB1DEU, 0xC5C55491U, \n        0x30305060U, 0x01010302U, 0x6767A9CEU, 0x2B2B7D56U, 0xFEFE19E7U, 0xD7D762B5U, 0xABABE64DU, 0x76769AECU, \n        0xCACA458FU, 0x82829D1FU, 0xC9C94089U, 0x7D7D87FAU, 0xFAFA15EFU, 0x5959EBB2U, 0x4747C98EU, 0xF0F00BFBU, \n        0xADADEC41U, 0xD4D467B3U, 0xA2A2FD5FU, 0xAFAFEA45U, 0x9C9CBF23U, 0xA4A4F753U, 0x727296E4U, 0xC0C05B9BU, \n        0xB7B7C275U, 0xFDFD1CE1U, 0x9393AE3DU, 0x26266A4CU, 0x36365A6CU, 0x3F3F417EU, 0xF7F702F5U, 0xCCCC4F83U, \n        0x34345C68U, 0xA5A5F451U, 0xE5E534D1U, 0xF1F108F9U, 0x717193E2U, 0xD8D873ABU, 0x31315362U, 0x15153F2AU, \n        0x04040C08U, 0xC7C75295U, 0x23236546U, 0xC3C35E9DU, 0x18182830U, 0x9696A137U, 0x05050F0AU, 0x9A9AB52FU, \n        0x0707090EU, 0x12123624U, 0x80809B1BU, 0xE2E23DDFU, 0xEBEB26CDU, 0x2727694EU, 0xB2B2CD7FU, 0x75759FEAU, \n        0x09091B12U, 0x83839E1DU, 0x2C2C7458U, 0x1A1A2E34U, 0x1B1B2D36U, 0x6E6EB2DCU, 0x5A5AEEB4U, 0xA0A0FB5BU, \n        0x5252F6A4U, 0x3B3B4D76U, 0xD6D661B7U, 0xB3B3CE7DU, 0x29297B52U, 0xE3E33EDDU, 0x2F2F715EU, 0x84849713U, \n        0x5353F5A6U, 0xD1D168B9U, 0x00000000U, 0xEDED2CC1U, 0x20206040U, 0xFCFC1FE3U, 0xB1B1C879U, 0x5B5BEDB6U, \n        0x6A6ABED4U, 0xCBCB468DU, 0xBEBED967U, 0x39394B72U, 0x4A4ADE94U, 0x4C4CD498U, 0x5858E8B0U, 0xCFCF4A85U, \n        0xD0D06BBBU, 0xEFEF2AC5U, 0xAAAAE54FU, 0xFBFB16EDU, 0x4343C586U, 0x4D4DD79AU, 0x33335566U, 0x85859411U, \n        0x4545CF8AU, 0xF9F910E9U, 0x02020604U, 0x7F7F81FEU, 0x5050F0A0U, 0x3C3C4478U, 0x9F9FBA25U, 0xA8A8E34BU, \n        0x5151F3A2U, 0xA3A3FE5DU, 0x4040C080U, 0x8F8F8A05U, 0x9292AD3FU, 0x9D9DBC21U, 0x38384870U, 0xF5F504F1U, \n        0xBCBCDF63U, 0xB6B6C177U, 0xDADA75AFU, 0x21216342U, 0x10103020U, 0xFFFF1AE5U, 0xF3F30EFDU, 0xD2D26DBFU, \n        0xCDCD4C81U, 0x0C0C1418U, 0x13133526U, 0xECEC2FC3U, 0x5F5FE1BEU, 0x9797A235U, 0x4444CC88U, 0x1717392EU, \n        0xC4C45793U, 0xA7A7F255U, 0x7E7E82FCU, 0x3D3D477AU, 0x6464ACC8U, 0x5D5DE7BAU, 0x19192B32U, 0x737395E6U, \n        0x6060A0C0U, 0x81819819U, 0x4F4FD19EU, 0xDCDC7FA3U, 0x22226644U, 0x2A2A7E54U, 0x9090AB3BU, 0x8888830BU, \n        0x4646CA8CU, 0xEEEE29C7U, 0xB8B8D36BU, 0x14143C28U, 0xDEDE79A7U, 0x5E5EE2BCU, 0x0B0B1D16U, 0xDBDB76ADU, \n        0xE0E03BDBU, 0x32325664U, 0x3A3A4E74U, 0x0A0A1E14U, 0x4949DB92U, 0x06060A0CU, 0x24246C48U, 0x5C5CE4B8U, \n        0xC2C25D9FU, 0xD3D36EBDU, 0xACACEF43U, 0x6262A6C4U, 0x9191A839U, 0x9595A431U, 0xE4E437D3U, 0x79798BF2U, \n        0xE7E732D5U, 0xC8C8438BU, 0x3737596EU, 0x6D6DB7DAU, 0x8D8D8C01U, 0xD5D564B1U, 0x4E4ED29CU, 0xA9A9E049U, \n        0x6C6CB4D8U, 0x5656FAACU, 0xF4F407F3U, 0xEAEA25CFU, 0x6565AFCAU, 0x7A7A8EF4U, 0xAEAEE947U, 0x08081810U, \n        0xBABAD56FU, 0x787888F0U, 0x25256F4AU, 0x2E2E725CU, 0x1C1C2438U, 0xA6A6F157U, 0xB4B4C773U, 0xC6C65197U, \n        0xE8E823CBU, 0xDDDD7CA1U, 0x74749CE8U, 0x1F1F213EU, 0x4B4BDD96U, 0xBDBDDC61U, 0x8B8B860DU, 0x8A8A850FU, \n        0x707090E0U, 0x3E3E427CU, 0xB5B5C471U, 0x6666AACCU, 0x4848D890U, 0x03030506U, 0xF6F601F7U, 0x0E0E121CU, \n        0x6161A3C2U, 0x35355F6AU, 0x5757F9AEU, 0xB9B9D069U, 0x86869117U, 0xC1C15899U, 0x1D1D273AU, 0x9E9EB927U, \n        0xE1E138D9U, 0xF8F813EBU, 0x9898B32BU, 0x11113322U, 0x6969BBD2U, 0xD9D970A9U, 0x8E8E8907U, 0x9494A733U, \n        0x9B9BB62DU, 0x1E1E223CU, 0x87879215U, 0xE9E920C9U, 0xCECE4987U, 0x5555FFAAU, 0x28287850U, 0xDFDF7AA5U, \n        0x8C8C8F03U, 0xA1A1F859U, 0x89898009U, 0x0D0D171AU, 0xBFBFDA65U, 0xE6E631D7U, 0x4242C684U, 0x6868B8D0U, \n        0x4141C382U, 0x9999B029U, 0x2D2D775AU, 0x0F0F111EU, 0xB0B0CB7BU, 0x5454FCA8U, 0xBBBBD66DU, 0x16163A2CU\n});"
@@ -47,7 +47,7 @@ Replacements:
     NewHostVarName:  TS3_host_ct1
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h'
     Digest:          9ca1105f7e59fd3869626e1b3d07dab5
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -56,7 +56,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -65,7 +65,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/attack.dp.o b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/attack.dp.o
index ee49b1f59..3bdbb5cf2 100644
Binary files a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/attack.dp.o and b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/attack.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/bitcracker.h.yaml b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/bitcracker.h.yaml
index aefd7087f..7f4ffe5de 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/bitcracker.h.yaml
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/bitcracker.h.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/bitcracker.h'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/bitcracker.h'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
     Offset:          1633
     Length:          18
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
     Offset:          1651
     Length:          26
     ReplacementText: ''
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
     Offset:          3084
     Length:          0
     ReplacementText: "/*\nDPCT1009:5: SYCL uses exceptions to report errors and does not use the error codes. The original code was commented out and a warning string was inserted. You need to rewrite this code.\n*/\n"
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
     Offset:          3162
     Length:          9
     ReplacementText: 'dpct::err0'
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
     Offset:          3236
     Length:          297
     ReplacementText: ''
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
     Offset:          3537
     Length:          0
     ReplacementText: "/*\nDPCT1010:6: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n*/\n/*\nDPCT1009:7: SYCL uses exceptions to report errors and does not use the error codes. The original code was commented out and a warning string was inserted. You need to rewrite this code.\n*/\n"
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
     Offset:          3625
     Length:          11
     ReplacementText: '0'
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
     Offset:          3640
     Length:          18
     ReplacementText: '0'
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
     Offset:          3809
     Length:          40
     ReplacementText: '"cudaGetErrorString is not supported"/*cudaGetErrorString(0)*/'
@@ -83,7 +83,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h'
     Digest:          50c4dfdc6a83f7ccc7dad8bf75a4c77b
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -92,7 +92,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -101,7 +101,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/main.dp.o b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/main.dp.o
index f0a4633b9..d799d328b 100644
Binary files a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/main.dp.o and b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/main.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/sha256.h.yaml b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/sha256.h.yaml
index a221fa7b7..5d0b5448a 100644
--- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/sha256.h.yaml
+++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/sha256.h.yaml
@@ -1,7 +1,7 @@
 ---
-MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/sha256.h'
+MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/sha256.h'
 Replacements:
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
     Offset:          0
     Length:          0
     ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
@@ -10,7 +10,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
     Offset:          1819
     Length:          13
     ReplacementText: DPCT_COMPATIBILITY_TEMP
@@ -19,7 +19,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
     Offset:          1850
     Length:          11
     ReplacementText: ''
@@ -28,7 +28,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
     Offset:          1861
     Length:          15
     ReplacementText: __dpct_inline__
@@ -37,7 +37,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
     Offset:          1954
     Length:          72
     ReplacementText: 'd = a ^ b ^ c'
@@ -46,7 +46,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: true
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
     Offset:          2055
     Length:          11
     ReplacementText: ''
@@ -55,7 +55,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
     Offset:          2066
     Length:          15
     ReplacementText: __dpct_inline__
@@ -64,7 +64,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
     Offset:          2162
     Length:          72
     ReplacementText: 'd = (a ^ (b & (c ^ a)))'
@@ -73,7 +73,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: true
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
     Offset:          2263
     Length:          11
     ReplacementText: ''
@@ -82,7 +82,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
     Offset:          2274
     Length:          15
     ReplacementText: __dpct_inline__
@@ -91,7 +91,7 @@ Replacements:
     InitStr:         ''
     NewHostVarName:  ''
     BlockLevelFormatFlag: false
-  - FilePath:        '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
+  - FilePath:        '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
     Offset:          2369
     Length:          72
     ReplacementText: 'd = ((a & (b | c)) | (b & c))'
@@ -101,7 +101,7 @@ Replacements:
     NewHostVarName:  ''
     BlockLevelFormatFlag: true
 MainSourceFilesDigest:
-  - MainSourceFile:  '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
+  - MainSourceFile:  '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h'
     Digest:          d6bf5df3b793bedc87d8c4197f8ce069
 DpctVersion:     18.0.0
 MainHelperFileName: ''
@@ -110,7 +110,7 @@ FeatureMap:      {}
 CompileTargets:  {}
 OptionMap:
   AnalysisScopePath:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA'
     Specified:       false
   AsyncHandler:
     Value:           'false'
@@ -119,7 +119,7 @@ OptionMap:
     Value:           'false'
     Specified:       false
   CompilationsDir:
-    Value:           '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build'
+    Value:           '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build'
     Specified:       true
   CtadEnabled:
     Value:           'false'
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/utils.dp.o b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/utils.dp.o
index b23c22c10..0868d2172 100644
Binary files a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/utils.dp.o and b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/utils.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/w_blocks.dp.o b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/w_blocks.dp.o
index 4121fb83e..d68baa7ed 100644
Binary files a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/w_blocks.dp.o and b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/w_blocks.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/.attack.cu.swp b/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/.attack.cu.swp
index 498392fad..b40b04f14 100644
Binary files a/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/.attack.cu.swp and b/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/.attack.cu.swp differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/CMakeLists.txt b/third-party-programs/Velocity-Bench/cudaSift/CUDA/CMakeLists.txt
new file mode 100644
index 000000000..935f7b79b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/CMakeLists.txt
@@ -0,0 +1,101 @@
+#  Modifications Copyright (C) 2023 Intel Corporation
+
+#  Permission is hereby granted, free of charge, to any person obtaining a copy
+#  of this software and associated documentation files (the "Software"),
+#  to deal in the Software without restriction, including without limitation
+#  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+#  and/or sell copies of the Software, and to permit persons to whom
+#  the Software is furnished to do so, subject to the following conditions:
+
+#  The above copyright notice and this permission notice shall be included
+#  in all copies or substantial portions of the Software.
+
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+#  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+#  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+#  OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+#  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+#  OR OTHER DEALINGS IN THE SOFTWARE.
+
+#  SPDX-License-Identifier: MIT
+
+cmake_minimum_required(VERSION 3.10)
+project(cudaSift C CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+option(DEVICE_TIMER            "Build using Device Timer" OFF)
+option(USE_SM                  "Specifies which streaming multiprocessor architecture to use"     )
+
+set(DEF_WL_CXX_FLAGS           " -msse2 ")
+set(DEF_GENERAL_CXX_FLAGS      " -O3 ")
+set(DEF_COMBINED_CXX_FLAGS     "${DEF_GENERAL_CXX_FLAGS} ${DEF_WL_CXX_FLAGS}")
+
+find_package(OpenCV REQUIRED)
+find_package(CUDA)
+if (NOT CUDA_FOUND)
+  message(STATUS "CUDA not found. Project will not be built.")
+endif(NOT CUDA_FOUND)
+
+set(cuda_sources  
+  cudaImage.cu  
+  cudaImage.h  
+  cudaSiftH.cu 
+  cudaSiftH.h  
+  matching.cu  
+  cudaSiftD.h  
+  cudaSift.h  
+  cudautils.h
+)  
+
+set(sources
+  ${CMAKE_SOURCE_DIR}/../common/Utility.cpp
+  geomFuncs.cpp  
+  mainSift.cpp  
+)
+
+include_directories(
+  ${CMAKE_SOURCE_DIR}/../common/
+  ${CMAKE_CURRENT_SOURCE_DIR} 
+)
+if(DEVICE_TIMER)
+    message(STATUS "Enabling Device Timer")
+    add_compile_options(-DDEVICE_TIMER)
+endif()
+
+# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific)
+# -DOVERRIDE_GENERAL_CXX_FLAGS=" -blah -blah " overrides the general flags only (and not the workload specific flags)
+# passing in both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS is not allowed, in order to prevent ambiguity
+
+if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
+    message(FATAL_ERROR "Both  CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together")
+elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
+    message(STATUS "Using DEFAULT compilation flags")
+    set(CMAKE_CXX_FLAGS "${DEF_COMBINED_CXX_FLAGS}")
+elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
+    message(STATUS "OVERRIDING GENERAL compilation flags")
+    set(CMAKE_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS}")
+    string(APPEND CMAKE_CXX_FLAGS ${DEF_WL_CXX_FLAGS})
+elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
+    message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags")
+endif()
+
+
+set(CUDA_SEPARABLE_COMPILATION ON)
+message(STATUS "CXX  Compilation flags to: ${CMAKE_CXX_FLAGS}")
+
+cuda_add_executable(cudasift ${cuda_sources} ${sources} OPTIONS -arch=sm_${USE_SM})
+target_link_libraries(cudasift ${CUDA_cudadevrt_LIBRARY} ${OpenCV_LIBS})
+ 
+install(FILES 
+  ${cuda_sources} 
+  ${sources}
+  cudaSiftD.cu
+  CMakeLists.txt
+  DESTINATION .
+)
+install(FILES data/left.pgm data/righ.pgm
+  DESTINATION data
+)
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeCache.txt b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeCache.txt
new file mode 100644
index 000000000..69924b468
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeCache.txt
@@ -0,0 +1,628 @@
+# This is the CMakeCache file.
+# For build in directory: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build
+# It was generated by CMake: /usr/bin/cmake
+# You can edit this file to change values found and used by cmake.
+# If you do not want to change any of the values, simply exit the editor.
+# If you do want to change a value, simply edit, save, and exit the editor.
+# The syntax for the file is as follows:
+# KEY:TYPE=VALUE
+# KEY is the name of a variable in the cache.
+# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!.
+# VALUE is the current value for the KEY.
+
+########################
+# EXTERNAL cache entries
+########################
+
+//Path to a program.
+CMAKE_ADDR2LINE:FILEPATH=/usr/bin/addr2line
+
+//Path to a program.
+CMAKE_AR:FILEPATH=/usr/bin/ar
+
+//Choose the type of build, options are: None Debug Release RelWithDebInfo
+// MinSizeRel ...
+CMAKE_BUILD_TYPE:STRING=
+
+//Enable/Disable color output during build.
+CMAKE_COLOR_MAKEFILE:BOOL=ON
+
+//CXX compiler
+CMAKE_CXX_COMPILER:FILEPATH=/usr/bin/c++
+
+//A wrapper around 'ar' adding the appropriate '--plugin' option
+// for the GCC compiler
+CMAKE_CXX_COMPILER_AR:FILEPATH=/usr/bin/gcc-ar-11
+
+//A wrapper around 'ranlib' adding the appropriate '--plugin' option
+// for the GCC compiler
+CMAKE_CXX_COMPILER_RANLIB:FILEPATH=/usr/bin/gcc-ranlib-11
+
+//Flags used by the CXX compiler during all build types.
+CMAKE_CXX_FLAGS:STRING=
+
+//Flags used by the CXX compiler during DEBUG builds.
+CMAKE_CXX_FLAGS_DEBUG:STRING=-g
+
+//Flags used by the CXX compiler during MINSIZEREL builds.
+CMAKE_CXX_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG
+
+//Flags used by the CXX compiler during RELEASE builds.
+CMAKE_CXX_FLAGS_RELEASE:STRING=-O3 -DNDEBUG
+
+//Flags used by the CXX compiler during RELWITHDEBINFO builds.
+CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG
+
+//C compiler
+CMAKE_C_COMPILER:FILEPATH=/usr/bin/cc
+
+//A wrapper around 'ar' adding the appropriate '--plugin' option
+// for the GCC compiler
+CMAKE_C_COMPILER_AR:FILEPATH=/usr/bin/gcc-ar-11
+
+//A wrapper around 'ranlib' adding the appropriate '--plugin' option
+// for the GCC compiler
+CMAKE_C_COMPILER_RANLIB:FILEPATH=/usr/bin/gcc-ranlib-11
+
+//Flags used by the C compiler during all build types.
+CMAKE_C_FLAGS:STRING=
+
+//Flags used by the C compiler during DEBUG builds.
+CMAKE_C_FLAGS_DEBUG:STRING=-g
+
+//Flags used by the C compiler during MINSIZEREL builds.
+CMAKE_C_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG
+
+//Flags used by the C compiler during RELEASE builds.
+CMAKE_C_FLAGS_RELEASE:STRING=-O3 -DNDEBUG
+
+//Flags used by the C compiler during RELWITHDEBINFO builds.
+CMAKE_C_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG
+
+//Path to a program.
+CMAKE_DLLTOOL:FILEPATH=CMAKE_DLLTOOL-NOTFOUND
+
+//Flags used by the linker during all build types.
+CMAKE_EXE_LINKER_FLAGS:STRING=
+
+//Flags used by the linker during DEBUG builds.
+CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING=
+
+//Flags used by the linker during MINSIZEREL builds.
+CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING=
+
+//Flags used by the linker during RELEASE builds.
+CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING=
+
+//Flags used by the linker during RELWITHDEBINFO builds.
+CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+
+//Enable/Disable output of compile commands during generation.
+CMAKE_EXPORT_COMPILE_COMMANDS:BOOL=
+
+//Install path prefix, prepended onto install directories.
+CMAKE_INSTALL_PREFIX:PATH=/usr/local
+
+//Path to a program.
+CMAKE_LINKER:FILEPATH=/usr/bin/ld
+
+//Path to a program.
+CMAKE_MAKE_PROGRAM:FILEPATH=/usr/bin/gmake
+
+//Flags used by the linker during the creation of modules during
+// all build types.
+CMAKE_MODULE_LINKER_FLAGS:STRING=
+
+//Flags used by the linker during the creation of modules during
+// DEBUG builds.
+CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING=
+
+//Flags used by the linker during the creation of modules during
+// MINSIZEREL builds.
+CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING=
+
+//Flags used by the linker during the creation of modules during
+// RELEASE builds.
+CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING=
+
+//Flags used by the linker during the creation of modules during
+// RELWITHDEBINFO builds.
+CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+
+//Path to a program.
+CMAKE_NM:FILEPATH=/usr/bin/nm
+
+//Path to a program.
+CMAKE_OBJCOPY:FILEPATH=/usr/bin/objcopy
+
+//Path to a program.
+CMAKE_OBJDUMP:FILEPATH=/usr/bin/objdump
+
+//Value Computed by CMake
+CMAKE_PROJECT_DESCRIPTION:STATIC=
+
+//Value Computed by CMake
+CMAKE_PROJECT_HOMEPAGE_URL:STATIC=
+
+//Value Computed by CMake
+CMAKE_PROJECT_NAME:STATIC=cudaSift
+
+//Path to a program.
+CMAKE_RANLIB:FILEPATH=/usr/bin/ranlib
+
+//Path to a program.
+CMAKE_READELF:FILEPATH=/usr/bin/readelf
+
+//Flags used by the linker during the creation of shared libraries
+// during all build types.
+CMAKE_SHARED_LINKER_FLAGS:STRING=
+
+//Flags used by the linker during the creation of shared libraries
+// during DEBUG builds.
+CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING=
+
+//Flags used by the linker during the creation of shared libraries
+// during MINSIZEREL builds.
+CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING=
+
+//Flags used by the linker during the creation of shared libraries
+// during RELEASE builds.
+CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING=
+
+//Flags used by the linker during the creation of shared libraries
+// during RELWITHDEBINFO builds.
+CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+
+//If set, runtime paths are not added when installing shared libraries,
+// but are added when building.
+CMAKE_SKIP_INSTALL_RPATH:BOOL=NO
+
+//If set, runtime paths are not added when using shared libraries.
+CMAKE_SKIP_RPATH:BOOL=NO
+
+//Flags used by the linker during the creation of static libraries
+// during all build types.
+CMAKE_STATIC_LINKER_FLAGS:STRING=
+
+//Flags used by the linker during the creation of static libraries
+// during DEBUG builds.
+CMAKE_STATIC_LINKER_FLAGS_DEBUG:STRING=
+
+//Flags used by the linker during the creation of static libraries
+// during MINSIZEREL builds.
+CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL:STRING=
+
+//Flags used by the linker during the creation of static libraries
+// during RELEASE builds.
+CMAKE_STATIC_LINKER_FLAGS_RELEASE:STRING=
+
+//Flags used by the linker during the creation of static libraries
+// during RELWITHDEBINFO builds.
+CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+
+//Path to a program.
+CMAKE_STRIP:FILEPATH=/usr/bin/strip
+
+//If this value is on, makefiles will be generated without the
+// .SILENT directive, and all commands will be echoed to the console
+// during the make.  This is useful for debugging only. With Visual
+// Studio IDE projects all commands are done without /nologo.
+CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE
+
+//Compile device code in 64 bit mode
+CUDA_64_BIT_DEVICE_CODE:BOOL=ON
+
+//Attach the build rule to the CUDA source file.  Enable only when
+// the CUDA source file is added to at most one target.
+CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE:BOOL=ON
+
+//Generate and parse .cubin files in Device mode.
+CUDA_BUILD_CUBIN:BOOL=OFF
+
+//Build in Emulation mode
+CUDA_BUILD_EMULATION:BOOL=OFF
+
+//"cudart" library
+CUDA_CUDART_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcudart.so
+
+//"cuda" library (older versions only).
+CUDA_CUDA_LIBRARY:FILEPATH=CUDA_CUDA_LIBRARY-NOTFOUND
+
+//Directory to put all the output files.  If blank it will default
+// to the CMAKE_CURRENT_BINARY_DIR
+CUDA_GENERATED_OUTPUT_DIR:PATH=
+
+//Generated file extension
+CUDA_HOST_COMPILATION_CPP:BOOL=ON
+
+//Host side compiler used by NVCC
+CUDA_HOST_COMPILER:FILEPATH=/usr/bin/cc
+
+//Path to a program.
+CUDA_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda/bin/nvcc
+
+//Semi-colon delimit multiple arguments. during all build types.
+CUDA_NVCC_FLAGS:STRING=
+
+//Semi-colon delimit multiple arguments. during DEBUG builds.
+CUDA_NVCC_FLAGS_DEBUG:STRING=
+
+//Semi-colon delimit multiple arguments. during MINSIZEREL builds.
+CUDA_NVCC_FLAGS_MINSIZEREL:STRING=
+
+//Semi-colon delimit multiple arguments. during RELEASE builds.
+CUDA_NVCC_FLAGS_RELEASE:STRING=
+
+//Semi-colon delimit multiple arguments. during RELWITHDEBINFO
+// builds.
+CUDA_NVCC_FLAGS_RELWITHDEBINFO:STRING=
+
+//"OpenCL" library
+CUDA_OpenCL_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libOpenCL.so
+
+//Propagate C/CXX_FLAGS and friends to the host compiler via -Xcompile
+CUDA_PROPAGATE_HOST_FLAGS:BOOL=ON
+
+//Path to a file.
+CUDA_SDK_ROOT_DIR:PATH=CUDA_SDK_ROOT_DIR-NOTFOUND
+
+//Compile CUDA objects with separable compilation enabled.  Requires
+// CUDA 5.0+
+CUDA_SEPARABLE_COMPILATION:BOOL=OFF
+
+//Path to a file.
+CUDA_TOOLKIT_INCLUDE:PATH=/usr/local/cuda/include
+
+//Toolkit location.
+CUDA_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda
+
+//Use the static version of the CUDA runtime library if available
+CUDA_USE_STATIC_CUDA_RUNTIME:BOOL=ON
+
+//Print out the commands run while compiling the CUDA source file.
+//  With the Makefile generator this defaults to VERBOSE variable
+// specified on the command line, but can be forced on with this
+// option.
+CUDA_VERBOSE_BUILD:BOOL=OFF
+
+//Version of CUDA as computed from nvcc.
+CUDA_VERSION:STRING=12.2
+
+//"cublas" library
+CUDA_cublas_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcublas.so
+
+//"cudadevrt" library
+CUDA_cudadevrt_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcudadevrt.a
+
+//static CUDA runtime library
+CUDA_cudart_static_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcudart_static.a
+
+//"cufft" library
+CUDA_cufft_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcufft.so
+
+//"cupti" library
+CUDA_cupti_LIBRARY:FILEPATH=/usr/local/cuda/extras/CUPTI/lib64/libcupti.so
+
+//"curand" library
+CUDA_curand_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcurand.so
+
+//"cusolver" library
+CUDA_cusolver_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcusolver.so
+
+//"cusparse" library
+CUDA_cusparse_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcusparse.so
+
+//"nppc" library
+CUDA_nppc_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppc.so
+
+//"nppial" library
+CUDA_nppial_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppial.so
+
+//"nppicc" library
+CUDA_nppicc_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppicc.so
+
+//"nppidei" library
+CUDA_nppidei_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppidei.so
+
+//"nppif" library
+CUDA_nppif_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppif.so
+
+//"nppig" library
+CUDA_nppig_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppig.so
+
+//"nppim" library
+CUDA_nppim_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppim.so
+
+//"nppist" library
+CUDA_nppist_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppist.so
+
+//"nppisu" library
+CUDA_nppisu_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppisu.so
+
+//"nppitc" library
+CUDA_nppitc_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppitc.so
+
+//"npps" library
+CUDA_npps_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnpps.so
+
+//"nvToolsExt" library
+CUDA_nvToolsExt_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnvToolsExt.so
+
+//Path to a library.
+CUDA_rt_LIBRARY:FILEPATH=/usr/lib/x86_64-linux-gnu/librt.a
+
+//Build using Device Timer
+DEVICE_TIMER:BOOL=OFF
+
+//The directory containing a CMake configuration file for OpenCV.
+OpenCV_DIR:PATH=/usr/lib/x86_64-linux-gnu/cmake/opencv4
+
+//Specifies which streaming multiprocessor architecture to use
+USE_SM:BOOL=80
+
+//Value Computed by CMake
+cudaSift_BINARY_DIR:STATIC=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build
+
+//Value Computed by CMake
+cudaSift_IS_TOP_LEVEL:STATIC=ON
+
+//Value Computed by CMake
+cudaSift_SOURCE_DIR:STATIC=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA
+
+
+########################
+# INTERNAL cache entries
+########################
+
+//ADVANCED property for variable: CMAKE_ADDR2LINE
+CMAKE_ADDR2LINE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_AR
+CMAKE_AR-ADVANCED:INTERNAL=1
+//This is the directory where this CMakeCache.txt was created
+CMAKE_CACHEFILE_DIR:INTERNAL=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build
+//Major version of cmake used to create the current loaded cache
+CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3
+//Minor version of cmake used to create the current loaded cache
+CMAKE_CACHE_MINOR_VERSION:INTERNAL=22
+//Patch version of cmake used to create the current loaded cache
+CMAKE_CACHE_PATCH_VERSION:INTERNAL=1
+//ADVANCED property for variable: CMAKE_COLOR_MAKEFILE
+CMAKE_COLOR_MAKEFILE-ADVANCED:INTERNAL=1
+//Path to CMake executable.
+CMAKE_COMMAND:INTERNAL=/usr/bin/cmake
+//Path to cpack program executable.
+CMAKE_CPACK_COMMAND:INTERNAL=/usr/bin/cpack
+//Path to ctest program executable.
+CMAKE_CTEST_COMMAND:INTERNAL=/usr/bin/ctest
+//ADVANCED property for variable: CMAKE_CXX_COMPILER
+CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_COMPILER_AR
+CMAKE_CXX_COMPILER_AR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_COMPILER_RANLIB
+CMAKE_CXX_COMPILER_RANLIB-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS
+CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS_DEBUG
+CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS_MINSIZEREL
+CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELEASE
+CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO
+CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_COMPILER
+CMAKE_C_COMPILER-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_COMPILER_AR
+CMAKE_C_COMPILER_AR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_COMPILER_RANLIB
+CMAKE_C_COMPILER_RANLIB-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS
+CMAKE_C_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS_DEBUG
+CMAKE_C_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS_MINSIZEREL
+CMAKE_C_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS_RELEASE
+CMAKE_C_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_C_FLAGS_RELWITHDEBINFO
+CMAKE_C_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_DLLTOOL
+CMAKE_DLLTOOL-ADVANCED:INTERNAL=1
+//Executable file format
+CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS
+CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG
+CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL
+CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE
+CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_EXPORT_COMPILE_COMMANDS
+CMAKE_EXPORT_COMPILE_COMMANDS-ADVANCED:INTERNAL=1
+//Name of external makefile project generator.
+CMAKE_EXTRA_GENERATOR:INTERNAL=
+//Name of generator.
+CMAKE_GENERATOR:INTERNAL=Unix Makefiles
+//Generator instance identifier.
+CMAKE_GENERATOR_INSTANCE:INTERNAL=
+//Name of generator platform.
+CMAKE_GENERATOR_PLATFORM:INTERNAL=
+//Name of generator toolset.
+CMAKE_GENERATOR_TOOLSET:INTERNAL=
+//Test CMAKE_HAVE_LIBC_PTHREAD
+CMAKE_HAVE_LIBC_PTHREAD:INTERNAL=1
+//Have include pthread.h
+CMAKE_HAVE_PTHREAD_H:INTERNAL=1
+//Source directory with the top level CMakeLists.txt file for this
+// project
+CMAKE_HOME_DIRECTORY:INTERNAL=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA
+//Install .so files without execute permission.
+CMAKE_INSTALL_SO_NO_EXE:INTERNAL=1
+//ADVANCED property for variable: CMAKE_LINKER
+CMAKE_LINKER-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MAKE_PROGRAM
+CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS
+CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG
+CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL
+CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE
+CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_NM
+CMAKE_NM-ADVANCED:INTERNAL=1
+//number of local generators
+CMAKE_NUMBER_OF_MAKEFILES:INTERNAL=1
+//ADVANCED property for variable: CMAKE_OBJCOPY
+CMAKE_OBJCOPY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_OBJDUMP
+CMAKE_OBJDUMP-ADVANCED:INTERNAL=1
+//Platform information initialized
+CMAKE_PLATFORM_INFO_INITIALIZED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_RANLIB
+CMAKE_RANLIB-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_READELF
+CMAKE_READELF-ADVANCED:INTERNAL=1
+//Path to CMake installation.
+CMAKE_ROOT:INTERNAL=/usr/share/cmake-3.22
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG
+CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL
+CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE
+CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SKIP_INSTALL_RPATH
+CMAKE_SKIP_INSTALL_RPATH-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_SKIP_RPATH
+CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS
+CMAKE_STATIC_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_DEBUG
+CMAKE_STATIC_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL
+CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELEASE
+CMAKE_STATIC_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CMAKE_STRIP
+CMAKE_STRIP-ADVANCED:INTERNAL=1
+//uname command
+CMAKE_UNAME:INTERNAL=/usr/bin/uname
+//ADVANCED property for variable: CMAKE_VERBOSE_MAKEFILE
+CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_64_BIT_DEVICE_CODE
+CUDA_64_BIT_DEVICE_CODE-ADVANCED:INTERNAL=1
+//List of intermediate files that are part of the cuda dependency
+// scanning.
+CUDA_ADDITIONAL_CLEAN_FILES:INTERNAL=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaImage.cu.o.depend;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaSiftH.cu.o.depend;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_matching.cu.o.depend
+//ADVANCED property for variable: CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE
+CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_BUILD_CUBIN
+CUDA_BUILD_CUBIN-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_BUILD_EMULATION
+CUDA_BUILD_EMULATION-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_CUDART_LIBRARY
+CUDA_CUDART_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_CUDA_LIBRARY
+CUDA_CUDA_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_GENERATED_OUTPUT_DIR
+CUDA_GENERATED_OUTPUT_DIR-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_HOST_COMPILATION_CPP
+CUDA_HOST_COMPILATION_CPP-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_NVCC_EXECUTABLE
+CUDA_NVCC_EXECUTABLE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_NVCC_FLAGS
+CUDA_NVCC_FLAGS-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_NVCC_FLAGS_DEBUG
+CUDA_NVCC_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_NVCC_FLAGS_MINSIZEREL
+CUDA_NVCC_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_NVCC_FLAGS_RELEASE
+CUDA_NVCC_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_NVCC_FLAGS_RELWITHDEBINFO
+CUDA_NVCC_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_OpenCL_LIBRARY
+CUDA_OpenCL_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_PROPAGATE_HOST_FLAGS
+CUDA_PROPAGATE_HOST_FLAGS-ADVANCED:INTERNAL=1
+//This is the value of the last time CUDA_SDK_ROOT_DIR was set
+// successfully.
+CUDA_SDK_ROOT_DIR_INTERNAL:INTERNAL=CUDA_SDK_ROOT_DIR-NOTFOUND
+//ADVANCED property for variable: CUDA_SEPARABLE_COMPILATION
+CUDA_SEPARABLE_COMPILATION-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_TOOLKIT_INCLUDE
+CUDA_TOOLKIT_INCLUDE-ADVANCED:INTERNAL=1
+//This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was
+// set successfully.
+CUDA_TOOLKIT_ROOT_DIR_INTERNAL:INTERNAL=/usr/local/cuda
+//This is the value of the last time CUDA_TOOLKIT_TARGET_DIR was
+// set successfully.
+CUDA_TOOLKIT_TARGET_DIR_INTERNAL:INTERNAL=/usr/local/cuda
+//ADVANCED property for variable: CUDA_VERBOSE_BUILD
+CUDA_VERBOSE_BUILD-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_VERSION
+CUDA_VERSION-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_cublas_LIBRARY
+CUDA_cublas_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_cudadevrt_LIBRARY
+CUDA_cudadevrt_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_cudart_static_LIBRARY
+CUDA_cudart_static_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_cufft_LIBRARY
+CUDA_cufft_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_cupti_LIBRARY
+CUDA_cupti_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_curand_LIBRARY
+CUDA_curand_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_cusolver_LIBRARY
+CUDA_cusolver_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_cusparse_LIBRARY
+CUDA_cusparse_LIBRARY-ADVANCED:INTERNAL=1
+//Location of make2cmake.cmake
+CUDA_make2cmake:INTERNAL=/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake
+//ADVANCED property for variable: CUDA_nppc_LIBRARY
+CUDA_nppc_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_nppial_LIBRARY
+CUDA_nppial_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_nppicc_LIBRARY
+CUDA_nppicc_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_nppidei_LIBRARY
+CUDA_nppidei_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_nppif_LIBRARY
+CUDA_nppif_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_nppig_LIBRARY
+CUDA_nppig_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_nppim_LIBRARY
+CUDA_nppim_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_nppist_LIBRARY
+CUDA_nppist_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_nppisu_LIBRARY
+CUDA_nppisu_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_nppitc_LIBRARY
+CUDA_nppitc_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_npps_LIBRARY
+CUDA_npps_LIBRARY-ADVANCED:INTERNAL=1
+//ADVANCED property for variable: CUDA_nvToolsExt_LIBRARY
+CUDA_nvToolsExt_LIBRARY-ADVANCED:INTERNAL=1
+//Location of parse_cubin.cmake
+CUDA_parse_cubin:INTERNAL=/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake
+//Location of run_nvcc.cmake
+CUDA_run_nvcc:INTERNAL=/usr/share/cmake-3.22/Modules/FindCUDA/run_nvcc.cmake
+//Details about finding CUDA
+FIND_PACKAGE_MESSAGE_DETAILS_CUDA:INTERNAL=[/usr/local/cuda][/usr/local/cuda/bin/nvcc][/usr/local/cuda/include][/usr/local/cuda/lib64/libcudart_static.a][v12.2()]
+//Details about finding OpenCV
+FIND_PACKAGE_MESSAGE_DETAILS_OpenCV:INTERNAL=[/usr][v4.5.4()]
+//Details about finding Threads
+FIND_PACKAGE_MESSAGE_DETAILS_Threads:INTERNAL=[TRUE][v()]
+
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeCCompiler.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeCCompiler.cmake
new file mode 100644
index 000000000..488ad3751
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeCCompiler.cmake
@@ -0,0 +1,72 @@
+set(CMAKE_C_COMPILER "/usr/bin/cc")
+set(CMAKE_C_COMPILER_ARG1 "")
+set(CMAKE_C_COMPILER_ID "GNU")
+set(CMAKE_C_COMPILER_VERSION "11.4.0")
+set(CMAKE_C_COMPILER_VERSION_INTERNAL "")
+set(CMAKE_C_COMPILER_WRAPPER "")
+set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "17")
+set(CMAKE_C_EXTENSIONS_COMPUTED_DEFAULT "ON")
+set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert;c_std_17;c_std_23")
+set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes")
+set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros")
+set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert")
+set(CMAKE_C17_COMPILE_FEATURES "c_std_17")
+set(CMAKE_C23_COMPILE_FEATURES "c_std_23")
+
+set(CMAKE_C_PLATFORM_ID "Linux")
+set(CMAKE_C_SIMULATE_ID "")
+set(CMAKE_C_COMPILER_FRONTEND_VARIANT "")
+set(CMAKE_C_SIMULATE_VERSION "")
+
+
+
+
+set(CMAKE_AR "/usr/bin/ar")
+set(CMAKE_C_COMPILER_AR "/usr/bin/gcc-ar-11")
+set(CMAKE_RANLIB "/usr/bin/ranlib")
+set(CMAKE_C_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
+set(CMAKE_LINKER "/usr/bin/ld")
+set(CMAKE_MT "")
+set(CMAKE_COMPILER_IS_GNUCC 1)
+set(CMAKE_C_COMPILER_LOADED 1)
+set(CMAKE_C_COMPILER_WORKS TRUE)
+set(CMAKE_C_ABI_COMPILED TRUE)
+
+set(CMAKE_C_COMPILER_ENV_VAR "CC")
+
+set(CMAKE_C_COMPILER_ID_RUN 1)
+set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m)
+set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC)
+set(CMAKE_C_LINKER_PREFERENCE 10)
+
+# Save compiler ABI information.
+set(CMAKE_C_SIZEOF_DATA_PTR "8")
+set(CMAKE_C_COMPILER_ABI "ELF")
+set(CMAKE_C_BYTE_ORDER "LITTLE_ENDIAN")
+set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
+
+if(CMAKE_C_SIZEOF_DATA_PTR)
+  set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}")
+endif()
+
+if(CMAKE_C_COMPILER_ABI)
+  set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}")
+endif()
+
+if(CMAKE_C_LIBRARY_ARCHITECTURE)
+  set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
+endif()
+
+set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "")
+if(CMAKE_C_CL_SHOWINCLUDES_PREFIX)
+  set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}")
+endif()
+
+
+
+
+
+set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
+set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "gcc;gcc_s;c;gcc;gcc_s")
+set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib")
+set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake
new file mode 100644
index 000000000..345e9307d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake
@@ -0,0 +1,83 @@
+set(CMAKE_CXX_COMPILER "/usr/bin/c++")
+set(CMAKE_CXX_COMPILER_ARG1 "")
+set(CMAKE_CXX_COMPILER_ID "GNU")
+set(CMAKE_CXX_COMPILER_VERSION "11.4.0")
+set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "")
+set(CMAKE_CXX_COMPILER_WRAPPER "")
+set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "17")
+set(CMAKE_CXX_EXTENSIONS_COMPUTED_DEFAULT "ON")
+set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17;cxx_std_20;cxx_std_23")
+set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters")
+set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates")
+set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates")
+set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17")
+set(CMAKE_CXX20_COMPILE_FEATURES "cxx_std_20")
+set(CMAKE_CXX23_COMPILE_FEATURES "cxx_std_23")
+
+set(CMAKE_CXX_PLATFORM_ID "Linux")
+set(CMAKE_CXX_SIMULATE_ID "")
+set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "")
+set(CMAKE_CXX_SIMULATE_VERSION "")
+
+
+
+
+set(CMAKE_AR "/usr/bin/ar")
+set(CMAKE_CXX_COMPILER_AR "/usr/bin/gcc-ar-11")
+set(CMAKE_RANLIB "/usr/bin/ranlib")
+set(CMAKE_CXX_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
+set(CMAKE_LINKER "/usr/bin/ld")
+set(CMAKE_MT "")
+set(CMAKE_COMPILER_IS_GNUCXX 1)
+set(CMAKE_CXX_COMPILER_LOADED 1)
+set(CMAKE_CXX_COMPILER_WORKS TRUE)
+set(CMAKE_CXX_ABI_COMPILED TRUE)
+
+set(CMAKE_CXX_COMPILER_ENV_VAR "CXX")
+
+set(CMAKE_CXX_COMPILER_ID_RUN 1)
+set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;mpp;CPP;ixx;cppm)
+set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC)
+
+foreach (lang C OBJC OBJCXX)
+  if (CMAKE_${lang}_COMPILER_ID_RUN)
+    foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS)
+      list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension})
+    endforeach()
+  endif()
+endforeach()
+
+set(CMAKE_CXX_LINKER_PREFERENCE 30)
+set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1)
+
+# Save compiler ABI information.
+set(CMAKE_CXX_SIZEOF_DATA_PTR "8")
+set(CMAKE_CXX_COMPILER_ABI "ELF")
+set(CMAKE_CXX_BYTE_ORDER "LITTLE_ENDIAN")
+set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
+
+if(CMAKE_CXX_SIZEOF_DATA_PTR)
+  set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}")
+endif()
+
+if(CMAKE_CXX_COMPILER_ABI)
+  set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}")
+endif()
+
+if(CMAKE_CXX_LIBRARY_ARCHITECTURE)
+  set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
+endif()
+
+set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "")
+if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX)
+  set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}")
+endif()
+
+
+
+
+
+set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
+set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc")
+set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib")
+set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_C.bin b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_C.bin
new file mode 100755
index 000000000..a4691337f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_C.bin differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_CXX.bin b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_CXX.bin
new file mode 100755
index 000000000..15e6e3f25
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_CXX.bin differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeSystem.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeSystem.cmake
new file mode 100644
index 000000000..42ff9747e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeSystem.cmake
@@ -0,0 +1,15 @@
+set(CMAKE_HOST_SYSTEM "Linux-5.15.90.1-microsoft-standard-WSL2")
+set(CMAKE_HOST_SYSTEM_NAME "Linux")
+set(CMAKE_HOST_SYSTEM_VERSION "5.15.90.1-microsoft-standard-WSL2")
+set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
+
+
+
+set(CMAKE_SYSTEM "Linux-5.15.90.1-microsoft-standard-WSL2")
+set(CMAKE_SYSTEM_NAME "Linux")
+set(CMAKE_SYSTEM_VERSION "5.15.90.1-microsoft-standard-WSL2")
+set(CMAKE_SYSTEM_PROCESSOR "x86_64")
+
+set(CMAKE_CROSSCOMPILING "FALSE")
+
+set(CMAKE_SYSTEM_LOADED 1)
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/CMakeCCompilerId.c b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/CMakeCCompilerId.c
new file mode 100644
index 000000000..41b99d778
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/CMakeCCompilerId.c
@@ -0,0 +1,803 @@
+#ifdef __cplusplus
+# error "A C++ compiler has been selected for C."
+#endif
+
+#if defined(__18CXX)
+# define ID_VOID_MAIN
+#endif
+#if defined(__CLASSIC_C__)
+/* cv-qualifiers did not exist in K&R C */
+# define const
+# define volatile
+#endif
+
+#if !defined(__has_include)
+/* If the compiler does not have __has_include, pretend the answer is
+   always no.  */
+#  define __has_include(x) 0
+#endif
+
+
+/* Version number components: V=Version, R=Revision, P=Patch
+   Version date components:   YYYY=Year, MM=Month,   DD=Day  */
+
+#if defined(__INTEL_COMPILER) || defined(__ICC)
+# define COMPILER_ID "Intel"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# if defined(__GNUC__)
+#  define SIMULATE_ID "GNU"
+# endif
+  /* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later,
+     except that a few beta releases use the old format with V=2021.  */
+# if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111
+#  define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100)
+#  define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10)
+#  if defined(__INTEL_COMPILER_UPDATE)
+#   define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE)
+#  else
+#   define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER   % 10)
+#  endif
+# else
+#  define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER)
+#  define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE)
+   /* The third version component from --version is an update index,
+      but no macro is provided for it.  */
+#  define COMPILER_VERSION_PATCH DEC(0)
+# endif
+# if defined(__INTEL_COMPILER_BUILD_DATE)
+   /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */
+#  define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE)
+# endif
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+# if defined(__GNUC__)
+#  define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
+# elif defined(__GNUG__)
+#  define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
+# endif
+# if defined(__GNUC_MINOR__)
+#  define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
+# endif
+# if defined(__GNUC_PATCHLEVEL__)
+#  define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+# endif
+
+#elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER)
+# define COMPILER_ID "IntelLLVM"
+#if defined(_MSC_VER)
+# define SIMULATE_ID "MSVC"
+#endif
+#if defined(__GNUC__)
+# define SIMULATE_ID "GNU"
+#endif
+/* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and
+ * later.  Look for 6 digit vs. 8 digit version number to decide encoding.
+ * VVVV is no smaller than the current year when a version is released.
+ */
+#if __INTEL_LLVM_COMPILER < 1000000L
+# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100)
+# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER    % 10)
+#else
+# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000)
+# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100)
+# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER     % 100)
+#endif
+#if defined(_MSC_VER)
+  /* _MSC_VER = VVRR */
+# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+#endif
+#if defined(__GNUC__)
+# define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
+#elif defined(__GNUG__)
+# define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
+#endif
+#if defined(__GNUC_MINOR__)
+# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
+#endif
+#if defined(__GNUC_PATCHLEVEL__)
+# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+#endif
+
+#elif defined(__PATHCC__)
+# define COMPILER_ID "PathScale"
+# define COMPILER_VERSION_MAJOR DEC(__PATHCC__)
+# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__)
+# if defined(__PATHCC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__)
+# endif
+
+#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
+# define COMPILER_ID "Embarcadero"
+# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF)
+# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF)
+# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__     & 0xFFFF)
+
+#elif defined(__BORLANDC__)
+# define COMPILER_ID "Borland"
+  /* __BORLANDC__ = 0xVRR */
+# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8)
+# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF)
+
+#elif defined(__WATCOMC__) && __WATCOMC__ < 1200
+# define COMPILER_ID "Watcom"
+   /* __WATCOMC__ = VVRR */
+# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100)
+# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
+# if (__WATCOMC__ % 10) > 0
+#  define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
+# endif
+
+#elif defined(__WATCOMC__)
+# define COMPILER_ID "OpenWatcom"
+   /* __WATCOMC__ = VVRP + 1100 */
+# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100)
+# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
+# if (__WATCOMC__ % 10) > 0
+#  define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
+# endif
+
+#elif defined(__SUNPRO_C)
+# define COMPILER_ID "SunPro"
+# if __SUNPRO_C >= 0x5100
+   /* __SUNPRO_C = 0xVRRP */
+#  define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>12)
+#  define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xFF)
+#  define COMPILER_VERSION_PATCH HEX(__SUNPRO_C    & 0xF)
+# else
+   /* __SUNPRO_CC = 0xVRP */
+#  define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>8)
+#  define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xF)
+#  define COMPILER_VERSION_PATCH HEX(__SUNPRO_C    & 0xF)
+# endif
+
+#elif defined(__HP_cc)
+# define COMPILER_ID "HP"
+  /* __HP_cc = VVRRPP */
+# define COMPILER_VERSION_MAJOR DEC(__HP_cc/10000)
+# define COMPILER_VERSION_MINOR DEC(__HP_cc/100 % 100)
+# define COMPILER_VERSION_PATCH DEC(__HP_cc     % 100)
+
+#elif defined(__DECC)
+# define COMPILER_ID "Compaq"
+  /* __DECC_VER = VVRRTPPPP */
+# define COMPILER_VERSION_MAJOR DEC(__DECC_VER/10000000)
+# define COMPILER_VERSION_MINOR DEC(__DECC_VER/100000  % 100)
+# define COMPILER_VERSION_PATCH DEC(__DECC_VER         % 10000)
+
+#elif defined(__IBMC__) && defined(__COMPILER_VER__)
+# define COMPILER_ID "zOS"
+  /* __IBMC__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMC__    % 10)
+
+#elif defined(__ibmxl__) && defined(__clang__)
+# define COMPILER_ID "XLClang"
+# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__)
+# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__)
+# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__)
+# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__)
+
+
+#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ >= 800
+# define COMPILER_ID "XL"
+  /* __IBMC__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMC__    % 10)
+
+#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ < 800
+# define COMPILER_ID "VisualAge"
+  /* __IBMC__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMC__    % 10)
+
+#elif defined(__NVCOMPILER)
+# define COMPILER_ID "NVHPC"
+# define COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__)
+# define COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__)
+# if defined(__NVCOMPILER_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__)
+# endif
+
+#elif defined(__PGI)
+# define COMPILER_ID "PGI"
+# define COMPILER_VERSION_MAJOR DEC(__PGIC__)
+# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__)
+# if defined(__PGIC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__)
+# endif
+
+#elif defined(_CRAYC)
+# define COMPILER_ID "Cray"
+# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR)
+# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR)
+
+#elif defined(__TI_COMPILER_VERSION__)
+# define COMPILER_ID "TI"
+  /* __TI_COMPILER_VERSION__ = VVVRRRPPP */
+# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000)
+# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000   % 1000)
+# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__        % 1000)
+
+#elif defined(__CLANG_FUJITSU)
+# define COMPILER_ID "FujitsuClang"
+# define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
+# define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
+# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
+# define COMPILER_VERSION_INTERNAL_STR __clang_version__
+
+
+#elif defined(__FUJITSU)
+# define COMPILER_ID "Fujitsu"
+# if defined(__FCC_version__)
+#   define COMPILER_VERSION __FCC_version__
+# elif defined(__FCC_major__)
+#   define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
+#   define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
+#   define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
+# endif
+# if defined(__fcc_version)
+#   define COMPILER_VERSION_INTERNAL DEC(__fcc_version)
+# elif defined(__FCC_VERSION)
+#   define COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION)
+# endif
+
+
+#elif defined(__ghs__)
+# define COMPILER_ID "GHS"
+/* __GHS_VERSION_NUMBER = VVVVRP */
+# ifdef __GHS_VERSION_NUMBER
+# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100)
+# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER      % 10)
+# endif
+
+#elif defined(__TINYC__)
+# define COMPILER_ID "TinyCC"
+
+#elif defined(__BCC__)
+# define COMPILER_ID "Bruce"
+
+#elif defined(__SCO_VERSION__)
+# define COMPILER_ID "SCO"
+
+#elif defined(__ARMCC_VERSION) && !defined(__clang__)
+# define COMPILER_ID "ARMCC"
+#if __ARMCC_VERSION >= 1000000
+  /* __ARMCC_VERSION = VRRPPPP */
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION     % 10000)
+#else
+  /* __ARMCC_VERSION = VRPPPP */
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION    % 10000)
+#endif
+
+
+#elif defined(__clang__) && defined(__apple_build_version__)
+# define COMPILER_ID "AppleClang"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
+# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
+# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__)
+
+#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION)
+# define COMPILER_ID "ARMClang"
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION     % 10000)
+# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION)
+
+#elif defined(__clang__)
+# define COMPILER_ID "Clang"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
+# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
+# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+
+#elif defined(__GNUC__)
+# define COMPILER_ID "GNU"
+# define COMPILER_VERSION_MAJOR DEC(__GNUC__)
+# if defined(__GNUC_MINOR__)
+#  define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__)
+# endif
+# if defined(__GNUC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+# endif
+
+#elif defined(_MSC_VER)
+# define COMPILER_ID "MSVC"
+  /* _MSC_VER = VVRR */
+# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100)
+# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100)
+# if defined(_MSC_FULL_VER)
+#  if _MSC_VER >= 1400
+    /* _MSC_FULL_VER = VVRRPPPPP */
+#   define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000)
+#  else
+    /* _MSC_FULL_VER = VVRRPPPP */
+#   define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000)
+#  endif
+# endif
+# if defined(_MSC_BUILD)
+#  define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD)
+# endif
+
+#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__)
+# define COMPILER_ID "ADSP"
+#if defined(__VISUALDSPVERSION__)
+  /* __VISUALDSPVERSION__ = 0xVVRRPP00 */
+# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24)
+# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF)
+# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8  & 0xFF)
+#endif
+
+#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
+# define COMPILER_ID "IAR"
+# if defined(__VER__) && defined(__ICCARM__)
+#  define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000)
+#  define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000)
+#  define COMPILER_VERSION_PATCH DEC((__VER__) % 1000)
+#  define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
+# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__))
+#  define COMPILER_VERSION_MAJOR DEC((__VER__) / 100)
+#  define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100))
+#  define COMPILER_VERSION_PATCH DEC(__SUBVERSION__)
+#  define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
+# endif
+
+#elif defined(__SDCC_VERSION_MAJOR) || defined(SDCC)
+# define COMPILER_ID "SDCC"
+# if defined(__SDCC_VERSION_MAJOR)
+#  define COMPILER_VERSION_MAJOR DEC(__SDCC_VERSION_MAJOR)
+#  define COMPILER_VERSION_MINOR DEC(__SDCC_VERSION_MINOR)
+#  define COMPILER_VERSION_PATCH DEC(__SDCC_VERSION_PATCH)
+# else
+  /* SDCC = VRP */
+#  define COMPILER_VERSION_MAJOR DEC(SDCC/100)
+#  define COMPILER_VERSION_MINOR DEC(SDCC/10 % 10)
+#  define COMPILER_VERSION_PATCH DEC(SDCC    % 10)
+# endif
+
+
+/* These compilers are either not known or too old to define an
+  identification macro.  Try to identify the platform and guess that
+  it is the native compiler.  */
+#elif defined(__hpux) || defined(__hpua)
+# define COMPILER_ID "HP"
+
+#else /* unknown compiler */
+# define COMPILER_ID ""
+#endif
+
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
+#ifdef SIMULATE_ID
+char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]";
+#endif
+
+#ifdef __QNXNTO__
+char const* qnxnto = "INFO" ":" "qnxnto[]";
+#endif
+
+#if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
+char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]";
+#endif
+
+#define STRINGIFY_HELPER(X) #X
+#define STRINGIFY(X) STRINGIFY_HELPER(X)
+
+/* Identify known platforms by name.  */
+#if defined(__linux) || defined(__linux__) || defined(linux)
+# define PLATFORM_ID "Linux"
+
+#elif defined(__MSYS__)
+# define PLATFORM_ID "MSYS"
+
+#elif defined(__CYGWIN__)
+# define PLATFORM_ID "Cygwin"
+
+#elif defined(__MINGW32__)
+# define PLATFORM_ID "MinGW"
+
+#elif defined(__APPLE__)
+# define PLATFORM_ID "Darwin"
+
+#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
+# define PLATFORM_ID "Windows"
+
+#elif defined(__FreeBSD__) || defined(__FreeBSD)
+# define PLATFORM_ID "FreeBSD"
+
+#elif defined(__NetBSD__) || defined(__NetBSD)
+# define PLATFORM_ID "NetBSD"
+
+#elif defined(__OpenBSD__) || defined(__OPENBSD)
+# define PLATFORM_ID "OpenBSD"
+
+#elif defined(__sun) || defined(sun)
+# define PLATFORM_ID "SunOS"
+
+#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
+# define PLATFORM_ID "AIX"
+
+#elif defined(__hpux) || defined(__hpux__)
+# define PLATFORM_ID "HP-UX"
+
+#elif defined(__HAIKU__)
+# define PLATFORM_ID "Haiku"
+
+#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
+# define PLATFORM_ID "BeOS"
+
+#elif defined(__QNX__) || defined(__QNXNTO__)
+# define PLATFORM_ID "QNX"
+
+#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
+# define PLATFORM_ID "Tru64"
+
+#elif defined(__riscos) || defined(__riscos__)
+# define PLATFORM_ID "RISCos"
+
+#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
+# define PLATFORM_ID "SINIX"
+
+#elif defined(__UNIX_SV__)
+# define PLATFORM_ID "UNIX_SV"
+
+#elif defined(__bsdos__)
+# define PLATFORM_ID "BSDOS"
+
+#elif defined(_MPRAS) || defined(MPRAS)
+# define PLATFORM_ID "MP-RAS"
+
+#elif defined(__osf) || defined(__osf__)
+# define PLATFORM_ID "OSF1"
+
+#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
+# define PLATFORM_ID "SCO_SV"
+
+#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
+# define PLATFORM_ID "ULTRIX"
+
+#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
+# define PLATFORM_ID "Xenix"
+
+#elif defined(__WATCOMC__)
+# if defined(__LINUX__)
+#  define PLATFORM_ID "Linux"
+
+# elif defined(__DOS__)
+#  define PLATFORM_ID "DOS"
+
+# elif defined(__OS2__)
+#  define PLATFORM_ID "OS2"
+
+# elif defined(__WINDOWS__)
+#  define PLATFORM_ID "Windows3x"
+
+# elif defined(__VXWORKS__)
+#  define PLATFORM_ID "VxWorks"
+
+# else /* unknown platform */
+#  define PLATFORM_ID
+# endif
+
+#elif defined(__INTEGRITY)
+# if defined(INT_178B)
+#  define PLATFORM_ID "Integrity178"
+
+# else /* regular Integrity */
+#  define PLATFORM_ID "Integrity"
+# endif
+
+#else /* unknown platform */
+# define PLATFORM_ID
+
+#endif
+
+/* For windows compilers MSVC and Intel we can determine
+   the architecture of the compiler being used.  This is because
+   the compilers do not have flags that can change the architecture,
+   but rather depend on which compiler is being used
+*/
+#if defined(_WIN32) && defined(_MSC_VER)
+# if defined(_M_IA64)
+#  define ARCHITECTURE_ID "IA64"
+
+# elif defined(_M_ARM64EC)
+#  define ARCHITECTURE_ID "ARM64EC"
+
+# elif defined(_M_X64) || defined(_M_AMD64)
+#  define ARCHITECTURE_ID "x64"
+
+# elif defined(_M_IX86)
+#  define ARCHITECTURE_ID "X86"
+
+# elif defined(_M_ARM64)
+#  define ARCHITECTURE_ID "ARM64"
+
+# elif defined(_M_ARM)
+#  if _M_ARM == 4
+#   define ARCHITECTURE_ID "ARMV4I"
+#  elif _M_ARM == 5
+#   define ARCHITECTURE_ID "ARMV5I"
+#  else
+#   define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM)
+#  endif
+
+# elif defined(_M_MIPS)
+#  define ARCHITECTURE_ID "MIPS"
+
+# elif defined(_M_SH)
+#  define ARCHITECTURE_ID "SHx"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#elif defined(__WATCOMC__)
+# if defined(_M_I86)
+#  define ARCHITECTURE_ID "I86"
+
+# elif defined(_M_IX86)
+#  define ARCHITECTURE_ID "X86"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
+# if defined(__ICCARM__)
+#  define ARCHITECTURE_ID "ARM"
+
+# elif defined(__ICCRX__)
+#  define ARCHITECTURE_ID "RX"
+
+# elif defined(__ICCRH850__)
+#  define ARCHITECTURE_ID "RH850"
+
+# elif defined(__ICCRL78__)
+#  define ARCHITECTURE_ID "RL78"
+
+# elif defined(__ICCRISCV__)
+#  define ARCHITECTURE_ID "RISCV"
+
+# elif defined(__ICCAVR__)
+#  define ARCHITECTURE_ID "AVR"
+
+# elif defined(__ICC430__)
+#  define ARCHITECTURE_ID "MSP430"
+
+# elif defined(__ICCV850__)
+#  define ARCHITECTURE_ID "V850"
+
+# elif defined(__ICC8051__)
+#  define ARCHITECTURE_ID "8051"
+
+# elif defined(__ICCSTM8__)
+#  define ARCHITECTURE_ID "STM8"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#elif defined(__ghs__)
+# if defined(__PPC64__)
+#  define ARCHITECTURE_ID "PPC64"
+
+# elif defined(__ppc__)
+#  define ARCHITECTURE_ID "PPC"
+
+# elif defined(__ARM__)
+#  define ARCHITECTURE_ID "ARM"
+
+# elif defined(__x86_64__)
+#  define ARCHITECTURE_ID "x64"
+
+# elif defined(__i386__)
+#  define ARCHITECTURE_ID "X86"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#elif defined(__TI_COMPILER_VERSION__)
+# if defined(__TI_ARM__)
+#  define ARCHITECTURE_ID "ARM"
+
+# elif defined(__MSP430__)
+#  define ARCHITECTURE_ID "MSP430"
+
+# elif defined(__TMS320C28XX__)
+#  define ARCHITECTURE_ID "TMS320C28x"
+
+# elif defined(__TMS320C6X__) || defined(_TMS320C6X)
+#  define ARCHITECTURE_ID "TMS320C6x"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#else
+#  define ARCHITECTURE_ID
+#endif
+
+/* Convert integer to decimal digit literals.  */
+#define DEC(n)                   \
+  ('0' + (((n) / 10000000)%10)), \
+  ('0' + (((n) / 1000000)%10)),  \
+  ('0' + (((n) / 100000)%10)),   \
+  ('0' + (((n) / 10000)%10)),    \
+  ('0' + (((n) / 1000)%10)),     \
+  ('0' + (((n) / 100)%10)),      \
+  ('0' + (((n) / 10)%10)),       \
+  ('0' +  ((n) % 10))
+
+/* Convert integer to hex digit literals.  */
+#define HEX(n)             \
+  ('0' + ((n)>>28 & 0xF)), \
+  ('0' + ((n)>>24 & 0xF)), \
+  ('0' + ((n)>>20 & 0xF)), \
+  ('0' + ((n)>>16 & 0xF)), \
+  ('0' + ((n)>>12 & 0xF)), \
+  ('0' + ((n)>>8  & 0xF)), \
+  ('0' + ((n)>>4  & 0xF)), \
+  ('0' + ((n)     & 0xF))
+
+/* Construct a string literal encoding the version number. */
+#ifdef COMPILER_VERSION
+char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]";
+
+/* Construct a string literal encoding the version number components. */
+#elif defined(COMPILER_VERSION_MAJOR)
+char const info_version[] = {
+  'I', 'N', 'F', 'O', ':',
+  'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
+  COMPILER_VERSION_MAJOR,
+# ifdef COMPILER_VERSION_MINOR
+  '.', COMPILER_VERSION_MINOR,
+#  ifdef COMPILER_VERSION_PATCH
+   '.', COMPILER_VERSION_PATCH,
+#   ifdef COMPILER_VERSION_TWEAK
+    '.', COMPILER_VERSION_TWEAK,
+#   endif
+#  endif
+# endif
+  ']','\0'};
+#endif
+
+/* Construct a string literal encoding the internal version number. */
+#ifdef COMPILER_VERSION_INTERNAL
+char const info_version_internal[] = {
+  'I', 'N', 'F', 'O', ':',
+  'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_',
+  'i','n','t','e','r','n','a','l','[',
+  COMPILER_VERSION_INTERNAL,']','\0'};
+#elif defined(COMPILER_VERSION_INTERNAL_STR)
+char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]";
+#endif
+
+/* Construct a string literal encoding the version number components. */
+#ifdef SIMULATE_VERSION_MAJOR
+char const info_simulate_version[] = {
+  'I', 'N', 'F', 'O', ':',
+  's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[',
+  SIMULATE_VERSION_MAJOR,
+# ifdef SIMULATE_VERSION_MINOR
+  '.', SIMULATE_VERSION_MINOR,
+#  ifdef SIMULATE_VERSION_PATCH
+   '.', SIMULATE_VERSION_PATCH,
+#   ifdef SIMULATE_VERSION_TWEAK
+    '.', SIMULATE_VERSION_TWEAK,
+#   endif
+#  endif
+# endif
+  ']','\0'};
+#endif
+
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
+char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]";
+
+
+
+#if !defined(__STDC__) && !defined(__clang__)
+# if defined(_MSC_VER) || defined(__ibmxl__) || defined(__IBMC__)
+#  define C_VERSION "90"
+# else
+#  define C_VERSION
+# endif
+#elif __STDC_VERSION__ > 201710L
+# define C_VERSION "23"
+#elif __STDC_VERSION__ >= 201710L
+# define C_VERSION "17"
+#elif __STDC_VERSION__ >= 201000L
+# define C_VERSION "11"
+#elif __STDC_VERSION__ >= 199901L
+# define C_VERSION "99"
+#else
+# define C_VERSION "90"
+#endif
+const char* info_language_standard_default =
+  "INFO" ":" "standard_default[" C_VERSION "]";
+
+const char* info_language_extensions_default = "INFO" ":" "extensions_default["
+/* !defined(_MSC_VER) to exclude Clang's MSVC compatibility mode. */
+#if (defined(__clang__) || defined(__GNUC__) ||                               \
+     defined(__TI_COMPILER_VERSION__)) &&                                     \
+  !defined(__STRICT_ANSI__) && !defined(_MSC_VER)
+  "ON"
+#else
+  "OFF"
+#endif
+"]";
+
+/*--------------------------------------------------------------------------*/
+
+#ifdef ID_VOID_MAIN
+void main() {}
+#else
+# if defined(__CLASSIC_C__)
+int main(argc, argv) int argc; char *argv[];
+# else
+int main(int argc, char* argv[])
+# endif
+{
+  int require = 0;
+  require += info_compiler[argc];
+  require += info_platform[argc];
+  require += info_arch[argc];
+#ifdef COMPILER_VERSION_MAJOR
+  require += info_version[argc];
+#endif
+#ifdef COMPILER_VERSION_INTERNAL
+  require += info_version_internal[argc];
+#endif
+#ifdef SIMULATE_ID
+  require += info_simulate[argc];
+#endif
+#ifdef SIMULATE_VERSION_MAJOR
+  require += info_simulate_version[argc];
+#endif
+#if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
+  require += info_cray[argc];
+#endif
+  require += info_language_standard_default[argc];
+  require += info_language_extensions_default[argc];
+  (void)argv;
+  return require;
+}
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/a.out b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/a.out
new file mode 100755
index 000000000..c786756ab
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/a.out differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/CMakeCXXCompilerId.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/CMakeCXXCompilerId.cpp
new file mode 100644
index 000000000..25c62a8c3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/CMakeCXXCompilerId.cpp
@@ -0,0 +1,791 @@
+/* This source file must have a .cpp extension so that all C++ compilers
+   recognize the extension without flags.  Borland does not know .cxx for
+   example.  */
+#ifndef __cplusplus
+# error "A C compiler has been selected for C++."
+#endif
+
+#if !defined(__has_include)
+/* If the compiler does not have __has_include, pretend the answer is
+   always no.  */
+#  define __has_include(x) 0
+#endif
+
+
+/* Version number components: V=Version, R=Revision, P=Patch
+   Version date components:   YYYY=Year, MM=Month,   DD=Day  */
+
+#if defined(__COMO__)
+# define COMPILER_ID "Comeau"
+  /* __COMO_VERSION__ = VRR */
+# define COMPILER_VERSION_MAJOR DEC(__COMO_VERSION__ / 100)
+# define COMPILER_VERSION_MINOR DEC(__COMO_VERSION__ % 100)
+
+#elif defined(__INTEL_COMPILER) || defined(__ICC)
+# define COMPILER_ID "Intel"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# if defined(__GNUC__)
+#  define SIMULATE_ID "GNU"
+# endif
+  /* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later,
+     except that a few beta releases use the old format with V=2021.  */
+# if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111
+#  define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100)
+#  define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10)
+#  if defined(__INTEL_COMPILER_UPDATE)
+#   define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE)
+#  else
+#   define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER   % 10)
+#  endif
+# else
+#  define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER)
+#  define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE)
+   /* The third version component from --version is an update index,
+      but no macro is provided for it.  */
+#  define COMPILER_VERSION_PATCH DEC(0)
+# endif
+# if defined(__INTEL_COMPILER_BUILD_DATE)
+   /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */
+#  define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE)
+# endif
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+# if defined(__GNUC__)
+#  define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
+# elif defined(__GNUG__)
+#  define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
+# endif
+# if defined(__GNUC_MINOR__)
+#  define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
+# endif
+# if defined(__GNUC_PATCHLEVEL__)
+#  define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+# endif
+
+#elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER)
+# define COMPILER_ID "IntelLLVM"
+#if defined(_MSC_VER)
+# define SIMULATE_ID "MSVC"
+#endif
+#if defined(__GNUC__)
+# define SIMULATE_ID "GNU"
+#endif
+/* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and
+ * later.  Look for 6 digit vs. 8 digit version number to decide encoding.
+ * VVVV is no smaller than the current year when a version is released.
+ */
+#if __INTEL_LLVM_COMPILER < 1000000L
+# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100)
+# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER    % 10)
+#else
+# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000)
+# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100)
+# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER     % 100)
+#endif
+#if defined(_MSC_VER)
+  /* _MSC_VER = VVRR */
+# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+#endif
+#if defined(__GNUC__)
+# define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
+#elif defined(__GNUG__)
+# define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
+#endif
+#if defined(__GNUC_MINOR__)
+# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
+#endif
+#if defined(__GNUC_PATCHLEVEL__)
+# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+#endif
+
+#elif defined(__PATHCC__)
+# define COMPILER_ID "PathScale"
+# define COMPILER_VERSION_MAJOR DEC(__PATHCC__)
+# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__)
+# if defined(__PATHCC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__)
+# endif
+
+#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
+# define COMPILER_ID "Embarcadero"
+# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF)
+# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF)
+# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__     & 0xFFFF)
+
+#elif defined(__BORLANDC__)
+# define COMPILER_ID "Borland"
+  /* __BORLANDC__ = 0xVRR */
+# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8)
+# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF)
+
+#elif defined(__WATCOMC__) && __WATCOMC__ < 1200
+# define COMPILER_ID "Watcom"
+   /* __WATCOMC__ = VVRR */
+# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100)
+# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
+# if (__WATCOMC__ % 10) > 0
+#  define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
+# endif
+
+#elif defined(__WATCOMC__)
+# define COMPILER_ID "OpenWatcom"
+   /* __WATCOMC__ = VVRP + 1100 */
+# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100)
+# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
+# if (__WATCOMC__ % 10) > 0
+#  define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
+# endif
+
+#elif defined(__SUNPRO_CC)
+# define COMPILER_ID "SunPro"
+# if __SUNPRO_CC >= 0x5100
+   /* __SUNPRO_CC = 0xVRRP */
+#  define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12)
+#  define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF)
+#  define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC    & 0xF)
+# else
+   /* __SUNPRO_CC = 0xVRP */
+#  define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8)
+#  define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF)
+#  define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC    & 0xF)
+# endif
+
+#elif defined(__HP_aCC)
+# define COMPILER_ID "HP"
+  /* __HP_aCC = VVRRPP */
+# define COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000)
+# define COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100)
+# define COMPILER_VERSION_PATCH DEC(__HP_aCC     % 100)
+
+#elif defined(__DECCXX)
+# define COMPILER_ID "Compaq"
+  /* __DECCXX_VER = VVRRTPPPP */
+# define COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000)
+# define COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000  % 100)
+# define COMPILER_VERSION_PATCH DEC(__DECCXX_VER         % 10000)
+
+#elif defined(__IBMCPP__) && defined(__COMPILER_VER__)
+# define COMPILER_ID "zOS"
+  /* __IBMCPP__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMCPP__    % 10)
+
+#elif defined(__ibmxl__) && defined(__clang__)
+# define COMPILER_ID "XLClang"
+# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__)
+# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__)
+# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__)
+# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__)
+
+
+#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800
+# define COMPILER_ID "XL"
+  /* __IBMCPP__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMCPP__    % 10)
+
+#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800
+# define COMPILER_ID "VisualAge"
+  /* __IBMCPP__ = VRP */
+# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
+# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__IBMCPP__    % 10)
+
+#elif defined(__NVCOMPILER)
+# define COMPILER_ID "NVHPC"
+# define COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__)
+# define COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__)
+# if defined(__NVCOMPILER_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__)
+# endif
+
+#elif defined(__PGI)
+# define COMPILER_ID "PGI"
+# define COMPILER_VERSION_MAJOR DEC(__PGIC__)
+# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__)
+# if defined(__PGIC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__)
+# endif
+
+#elif defined(_CRAYC)
+# define COMPILER_ID "Cray"
+# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR)
+# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR)
+
+#elif defined(__TI_COMPILER_VERSION__)
+# define COMPILER_ID "TI"
+  /* __TI_COMPILER_VERSION__ = VVVRRRPPP */
+# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000)
+# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000   % 1000)
+# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__        % 1000)
+
+#elif defined(__CLANG_FUJITSU)
+# define COMPILER_ID "FujitsuClang"
+# define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
+# define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
+# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
+# define COMPILER_VERSION_INTERNAL_STR __clang_version__
+
+
+#elif defined(__FUJITSU)
+# define COMPILER_ID "Fujitsu"
+# if defined(__FCC_version__)
+#   define COMPILER_VERSION __FCC_version__
+# elif defined(__FCC_major__)
+#   define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
+#   define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
+#   define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
+# endif
+# if defined(__fcc_version)
+#   define COMPILER_VERSION_INTERNAL DEC(__fcc_version)
+# elif defined(__FCC_VERSION)
+#   define COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION)
+# endif
+
+
+#elif defined(__ghs__)
+# define COMPILER_ID "GHS"
+/* __GHS_VERSION_NUMBER = VVVVRP */
+# ifdef __GHS_VERSION_NUMBER
+# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100)
+# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10)
+# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER      % 10)
+# endif
+
+#elif defined(__SCO_VERSION__)
+# define COMPILER_ID "SCO"
+
+#elif defined(__ARMCC_VERSION) && !defined(__clang__)
+# define COMPILER_ID "ARMCC"
+#if __ARMCC_VERSION >= 1000000
+  /* __ARMCC_VERSION = VRRPPPP */
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION     % 10000)
+#else
+  /* __ARMCC_VERSION = VRPPPP */
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION    % 10000)
+#endif
+
+
+#elif defined(__clang__) && defined(__apple_build_version__)
+# define COMPILER_ID "AppleClang"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
+# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
+# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__)
+
+#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION)
+# define COMPILER_ID "ARMClang"
+  # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000)
+  # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100)
+  # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION     % 10000)
+# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION)
+
+#elif defined(__clang__)
+# define COMPILER_ID "Clang"
+# if defined(_MSC_VER)
+#  define SIMULATE_ID "MSVC"
+# endif
+# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
+# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
+# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
+# if defined(_MSC_VER)
+   /* _MSC_VER = VVRR */
+#  define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
+#  define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
+# endif
+
+#elif defined(__GNUC__) || defined(__GNUG__)
+# define COMPILER_ID "GNU"
+# if defined(__GNUC__)
+#  define COMPILER_VERSION_MAJOR DEC(__GNUC__)
+# else
+#  define COMPILER_VERSION_MAJOR DEC(__GNUG__)
+# endif
+# if defined(__GNUC_MINOR__)
+#  define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__)
+# endif
+# if defined(__GNUC_PATCHLEVEL__)
+#  define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
+# endif
+
+#elif defined(_MSC_VER)
+# define COMPILER_ID "MSVC"
+  /* _MSC_VER = VVRR */
+# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100)
+# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100)
+# if defined(_MSC_FULL_VER)
+#  if _MSC_VER >= 1400
+    /* _MSC_FULL_VER = VVRRPPPPP */
+#   define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000)
+#  else
+    /* _MSC_FULL_VER = VVRRPPPP */
+#   define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000)
+#  endif
+# endif
+# if defined(_MSC_BUILD)
+#  define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD)
+# endif
+
+#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__)
+# define COMPILER_ID "ADSP"
+#if defined(__VISUALDSPVERSION__)
+  /* __VISUALDSPVERSION__ = 0xVVRRPP00 */
+# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24)
+# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF)
+# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8  & 0xFF)
+#endif
+
+#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
+# define COMPILER_ID "IAR"
+# if defined(__VER__) && defined(__ICCARM__)
+#  define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000)
+#  define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000)
+#  define COMPILER_VERSION_PATCH DEC((__VER__) % 1000)
+#  define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
+# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__))
+#  define COMPILER_VERSION_MAJOR DEC((__VER__) / 100)
+#  define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100))
+#  define COMPILER_VERSION_PATCH DEC(__SUBVERSION__)
+#  define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
+# endif
+
+
+/* These compilers are either not known or too old to define an
+  identification macro.  Try to identify the platform and guess that
+  it is the native compiler.  */
+#elif defined(__hpux) || defined(__hpua)
+# define COMPILER_ID "HP"
+
+#else /* unknown compiler */
+# define COMPILER_ID ""
+#endif
+
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
+#ifdef SIMULATE_ID
+char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]";
+#endif
+
+#ifdef __QNXNTO__
+char const* qnxnto = "INFO" ":" "qnxnto[]";
+#endif
+
+#if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
+char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]";
+#endif
+
+#define STRINGIFY_HELPER(X) #X
+#define STRINGIFY(X) STRINGIFY_HELPER(X)
+
+/* Identify known platforms by name.  */
+#if defined(__linux) || defined(__linux__) || defined(linux)
+# define PLATFORM_ID "Linux"
+
+#elif defined(__MSYS__)
+# define PLATFORM_ID "MSYS"
+
+#elif defined(__CYGWIN__)
+# define PLATFORM_ID "Cygwin"
+
+#elif defined(__MINGW32__)
+# define PLATFORM_ID "MinGW"
+
+#elif defined(__APPLE__)
+# define PLATFORM_ID "Darwin"
+
+#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
+# define PLATFORM_ID "Windows"
+
+#elif defined(__FreeBSD__) || defined(__FreeBSD)
+# define PLATFORM_ID "FreeBSD"
+
+#elif defined(__NetBSD__) || defined(__NetBSD)
+# define PLATFORM_ID "NetBSD"
+
+#elif defined(__OpenBSD__) || defined(__OPENBSD)
+# define PLATFORM_ID "OpenBSD"
+
+#elif defined(__sun) || defined(sun)
+# define PLATFORM_ID "SunOS"
+
+#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
+# define PLATFORM_ID "AIX"
+
+#elif defined(__hpux) || defined(__hpux__)
+# define PLATFORM_ID "HP-UX"
+
+#elif defined(__HAIKU__)
+# define PLATFORM_ID "Haiku"
+
+#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
+# define PLATFORM_ID "BeOS"
+
+#elif defined(__QNX__) || defined(__QNXNTO__)
+# define PLATFORM_ID "QNX"
+
+#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
+# define PLATFORM_ID "Tru64"
+
+#elif defined(__riscos) || defined(__riscos__)
+# define PLATFORM_ID "RISCos"
+
+#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
+# define PLATFORM_ID "SINIX"
+
+#elif defined(__UNIX_SV__)
+# define PLATFORM_ID "UNIX_SV"
+
+#elif defined(__bsdos__)
+# define PLATFORM_ID "BSDOS"
+
+#elif defined(_MPRAS) || defined(MPRAS)
+# define PLATFORM_ID "MP-RAS"
+
+#elif defined(__osf) || defined(__osf__)
+# define PLATFORM_ID "OSF1"
+
+#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
+# define PLATFORM_ID "SCO_SV"
+
+#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
+# define PLATFORM_ID "ULTRIX"
+
+#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
+# define PLATFORM_ID "Xenix"
+
+#elif defined(__WATCOMC__)
+# if defined(__LINUX__)
+#  define PLATFORM_ID "Linux"
+
+# elif defined(__DOS__)
+#  define PLATFORM_ID "DOS"
+
+# elif defined(__OS2__)
+#  define PLATFORM_ID "OS2"
+
+# elif defined(__WINDOWS__)
+#  define PLATFORM_ID "Windows3x"
+
+# elif defined(__VXWORKS__)
+#  define PLATFORM_ID "VxWorks"
+
+# else /* unknown platform */
+#  define PLATFORM_ID
+# endif
+
+#elif defined(__INTEGRITY)
+# if defined(INT_178B)
+#  define PLATFORM_ID "Integrity178"
+
+# else /* regular Integrity */
+#  define PLATFORM_ID "Integrity"
+# endif
+
+#else /* unknown platform */
+# define PLATFORM_ID
+
+#endif
+
+/* For windows compilers MSVC and Intel we can determine
+   the architecture of the compiler being used.  This is because
+   the compilers do not have flags that can change the architecture,
+   but rather depend on which compiler is being used
+*/
+#if defined(_WIN32) && defined(_MSC_VER)
+# if defined(_M_IA64)
+#  define ARCHITECTURE_ID "IA64"
+
+# elif defined(_M_ARM64EC)
+#  define ARCHITECTURE_ID "ARM64EC"
+
+# elif defined(_M_X64) || defined(_M_AMD64)
+#  define ARCHITECTURE_ID "x64"
+
+# elif defined(_M_IX86)
+#  define ARCHITECTURE_ID "X86"
+
+# elif defined(_M_ARM64)
+#  define ARCHITECTURE_ID "ARM64"
+
+# elif defined(_M_ARM)
+#  if _M_ARM == 4
+#   define ARCHITECTURE_ID "ARMV4I"
+#  elif _M_ARM == 5
+#   define ARCHITECTURE_ID "ARMV5I"
+#  else
+#   define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM)
+#  endif
+
+# elif defined(_M_MIPS)
+#  define ARCHITECTURE_ID "MIPS"
+
+# elif defined(_M_SH)
+#  define ARCHITECTURE_ID "SHx"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#elif defined(__WATCOMC__)
+# if defined(_M_I86)
+#  define ARCHITECTURE_ID "I86"
+
+# elif defined(_M_IX86)
+#  define ARCHITECTURE_ID "X86"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
+# if defined(__ICCARM__)
+#  define ARCHITECTURE_ID "ARM"
+
+# elif defined(__ICCRX__)
+#  define ARCHITECTURE_ID "RX"
+
+# elif defined(__ICCRH850__)
+#  define ARCHITECTURE_ID "RH850"
+
+# elif defined(__ICCRL78__)
+#  define ARCHITECTURE_ID "RL78"
+
+# elif defined(__ICCRISCV__)
+#  define ARCHITECTURE_ID "RISCV"
+
+# elif defined(__ICCAVR__)
+#  define ARCHITECTURE_ID "AVR"
+
+# elif defined(__ICC430__)
+#  define ARCHITECTURE_ID "MSP430"
+
+# elif defined(__ICCV850__)
+#  define ARCHITECTURE_ID "V850"
+
+# elif defined(__ICC8051__)
+#  define ARCHITECTURE_ID "8051"
+
+# elif defined(__ICCSTM8__)
+#  define ARCHITECTURE_ID "STM8"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#elif defined(__ghs__)
+# if defined(__PPC64__)
+#  define ARCHITECTURE_ID "PPC64"
+
+# elif defined(__ppc__)
+#  define ARCHITECTURE_ID "PPC"
+
+# elif defined(__ARM__)
+#  define ARCHITECTURE_ID "ARM"
+
+# elif defined(__x86_64__)
+#  define ARCHITECTURE_ID "x64"
+
+# elif defined(__i386__)
+#  define ARCHITECTURE_ID "X86"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#elif defined(__TI_COMPILER_VERSION__)
+# if defined(__TI_ARM__)
+#  define ARCHITECTURE_ID "ARM"
+
+# elif defined(__MSP430__)
+#  define ARCHITECTURE_ID "MSP430"
+
+# elif defined(__TMS320C28XX__)
+#  define ARCHITECTURE_ID "TMS320C28x"
+
+# elif defined(__TMS320C6X__) || defined(_TMS320C6X)
+#  define ARCHITECTURE_ID "TMS320C6x"
+
+# else /* unknown architecture */
+#  define ARCHITECTURE_ID ""
+# endif
+
+#else
+#  define ARCHITECTURE_ID
+#endif
+
+/* Convert integer to decimal digit literals.  */
+#define DEC(n)                   \
+  ('0' + (((n) / 10000000)%10)), \
+  ('0' + (((n) / 1000000)%10)),  \
+  ('0' + (((n) / 100000)%10)),   \
+  ('0' + (((n) / 10000)%10)),    \
+  ('0' + (((n) / 1000)%10)),     \
+  ('0' + (((n) / 100)%10)),      \
+  ('0' + (((n) / 10)%10)),       \
+  ('0' +  ((n) % 10))
+
+/* Convert integer to hex digit literals.  */
+#define HEX(n)             \
+  ('0' + ((n)>>28 & 0xF)), \
+  ('0' + ((n)>>24 & 0xF)), \
+  ('0' + ((n)>>20 & 0xF)), \
+  ('0' + ((n)>>16 & 0xF)), \
+  ('0' + ((n)>>12 & 0xF)), \
+  ('0' + ((n)>>8  & 0xF)), \
+  ('0' + ((n)>>4  & 0xF)), \
+  ('0' + ((n)     & 0xF))
+
+/* Construct a string literal encoding the version number. */
+#ifdef COMPILER_VERSION
+char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]";
+
+/* Construct a string literal encoding the version number components. */
+#elif defined(COMPILER_VERSION_MAJOR)
+char const info_version[] = {
+  'I', 'N', 'F', 'O', ':',
+  'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
+  COMPILER_VERSION_MAJOR,
+# ifdef COMPILER_VERSION_MINOR
+  '.', COMPILER_VERSION_MINOR,
+#  ifdef COMPILER_VERSION_PATCH
+   '.', COMPILER_VERSION_PATCH,
+#   ifdef COMPILER_VERSION_TWEAK
+    '.', COMPILER_VERSION_TWEAK,
+#   endif
+#  endif
+# endif
+  ']','\0'};
+#endif
+
+/* Construct a string literal encoding the internal version number. */
+#ifdef COMPILER_VERSION_INTERNAL
+char const info_version_internal[] = {
+  'I', 'N', 'F', 'O', ':',
+  'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_',
+  'i','n','t','e','r','n','a','l','[',
+  COMPILER_VERSION_INTERNAL,']','\0'};
+#elif defined(COMPILER_VERSION_INTERNAL_STR)
+char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]";
+#endif
+
+/* Construct a string literal encoding the version number components. */
+#ifdef SIMULATE_VERSION_MAJOR
+char const info_simulate_version[] = {
+  'I', 'N', 'F', 'O', ':',
+  's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[',
+  SIMULATE_VERSION_MAJOR,
+# ifdef SIMULATE_VERSION_MINOR
+  '.', SIMULATE_VERSION_MINOR,
+#  ifdef SIMULATE_VERSION_PATCH
+   '.', SIMULATE_VERSION_PATCH,
+#   ifdef SIMULATE_VERSION_TWEAK
+    '.', SIMULATE_VERSION_TWEAK,
+#   endif
+#  endif
+# endif
+  ']','\0'};
+#endif
+
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
+char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]";
+
+
+
+#if defined(__INTEL_COMPILER) && defined(_MSVC_LANG) && _MSVC_LANG < 201403L
+#  if defined(__INTEL_CXX11_MODE__)
+#    if defined(__cpp_aggregate_nsdmi)
+#      define CXX_STD 201402L
+#    else
+#      define CXX_STD 201103L
+#    endif
+#  else
+#    define CXX_STD 199711L
+#  endif
+#elif defined(_MSC_VER) && defined(_MSVC_LANG)
+#  define CXX_STD _MSVC_LANG
+#else
+#  define CXX_STD __cplusplus
+#endif
+
+const char* info_language_standard_default = "INFO" ":" "standard_default["
+#if CXX_STD > 202002L
+  "23"
+#elif CXX_STD > 201703L
+  "20"
+#elif CXX_STD >= 201703L
+  "17"
+#elif CXX_STD >= 201402L
+  "14"
+#elif CXX_STD >= 201103L
+  "11"
+#else
+  "98"
+#endif
+"]";
+
+const char* info_language_extensions_default = "INFO" ":" "extensions_default["
+/* !defined(_MSC_VER) to exclude Clang's MSVC compatibility mode. */
+#if (defined(__clang__) || defined(__GNUC__) ||                               \
+     defined(__TI_COMPILER_VERSION__)) &&                                     \
+  !defined(__STRICT_ANSI__) && !defined(_MSC_VER)
+  "ON"
+#else
+  "OFF"
+#endif
+"]";
+
+/*--------------------------------------------------------------------------*/
+
+int main(int argc, char* argv[])
+{
+  int require = 0;
+  require += info_compiler[argc];
+  require += info_platform[argc];
+#ifdef COMPILER_VERSION_MAJOR
+  require += info_version[argc];
+#endif
+#ifdef COMPILER_VERSION_INTERNAL
+  require += info_version_internal[argc];
+#endif
+#ifdef SIMULATE_ID
+  require += info_simulate[argc];
+#endif
+#ifdef SIMULATE_VERSION_MAJOR
+  require += info_simulate_version[argc];
+#endif
+#if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
+  require += info_cray[argc];
+#endif
+  require += info_language_standard_default[argc];
+  require += info_language_extensions_default[argc];
+  (void)argv;
+  return require;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/a.out b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/a.out
new file mode 100755
index 000000000..9944be481
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/a.out differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake
new file mode 100644
index 000000000..6e0330d60
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake
@@ -0,0 +1,16 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.22
+
+# Relative path conversion top directories.
+set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA")
+set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build")
+
+# Force unix paths in dependencies.
+set(CMAKE_FORCE_UNIX_PATHS 1)
+
+
+# The C and CXX include file regular expressions for this directory.
+set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
+set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
+set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
+set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeOutput.log b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeOutput.log
new file mode 100644
index 000000000..44029d7c0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeOutput.log
@@ -0,0 +1,497 @@
+The system is: Linux - 5.15.90.1-microsoft-standard-WSL2 - x86_64
+Compiling the C compiler identification source file "CMakeCCompilerId.c" succeeded.
+Compiler: /usr/bin/cc 
+Build flags: 
+Id flags:  
+
+The output was:
+0
+
+
+Compilation of the C compiler identification source "CMakeCCompilerId.c" produced "a.out"
+
+The C compiler identification is GNU, found in "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/a.out"
+
+Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" succeeded.
+Compiler: /usr/bin/c++ 
+Build flags: 
+Id flags:  
+
+The output was:
+0
+
+
+Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "a.out"
+
+The CXX compiler identification is GNU, found in "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/a.out"
+
+Detecting C compiler ABI info compiled with the following output:
+Change Dir: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp
+
+Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_c6496/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_c6496.dir/build.make CMakeFiles/cmTC_c6496.dir/build
+gmake[1]: Entering directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp'
+Building C object CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o
+/usr/bin/cc   -v -o CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o -c /usr/share/cmake-3.22/Modules/CMakeCCompilerABI.c
+Using built-in specs.
+COLLECT_GCC=/usr/bin/cc
+OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa
+OFFLOAD_TARGET_DEFAULT=1
+Target: x86_64-linux-gnu
+Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2
+Thread model: posix
+Supported LTO compression algorithms: zlib zstd
+gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) 
+COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o' '-c' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6496.dir/'
+ /usr/lib/gcc/x86_64-linux-gnu/11/cc1 -quiet -v -imultiarch x86_64-linux-gnu /usr/share/cmake-3.22/Modules/CMakeCCompilerABI.c -quiet -dumpdir CMakeFiles/cmTC_c6496.dir/ -dumpbase CMakeCCompilerABI.c.c -dumpbase-ext .c -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/ccP9g5du.s
+GNU C17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)
+	compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP
+
+GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
+ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"
+ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed"
+ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86_64-linux-gnu/include"
+#include "..." search starts here:
+#include <...> search starts here:
+ /usr/lib/gcc/x86_64-linux-gnu/11/include
+ /usr/local/include
+ /usr/include/x86_64-linux-gnu
+ /usr/include
+End of search list.
+GNU C17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)
+	compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP
+
+GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
+Compiler executable checksum: 50eaa2331df977b8016186198deb2d18
+COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o' '-c' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6496.dir/'
+ as -v --64 -o CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o /tmp/ccP9g5du.s
+GNU assembler version 2.38 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.38
+COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/
+LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/
+COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o' '-c' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.'
+Linking C executable cmTC_c6496
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_c6496.dir/link.txt --verbose=1
+/usr/bin/cc  -v CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o -o cmTC_c6496 
+Using built-in specs.
+COLLECT_GCC=/usr/bin/cc
+COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper
+OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa
+OFFLOAD_TARGET_DEFAULT=1
+Target: x86_64-linux-gnu
+Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2
+Thread model: posix
+Supported LTO compression algorithms: zlib zstd
+gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) 
+COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/
+LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/
+COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_c6496' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_c6496.'
+ /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccAtPUmK.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_c6496 /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o -lgcc --push-state --as-needed -lgcc_s --pop-state -lc -lgcc --push-state --as-needed -lgcc_s --pop-state /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o
+COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_c6496' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_c6496.'
+gmake[1]: Leaving directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp'
+
+
+
+Parsed C implicit include dir info from above output: rv=done
+  found start of include info
+  found start of implicit include info
+    add: [/usr/lib/gcc/x86_64-linux-gnu/11/include]
+    add: [/usr/local/include]
+    add: [/usr/include/x86_64-linux-gnu]
+    add: [/usr/include]
+  end of search list found
+  collapse include dir [/usr/lib/gcc/x86_64-linux-gnu/11/include] ==> [/usr/lib/gcc/x86_64-linux-gnu/11/include]
+  collapse include dir [/usr/local/include] ==> [/usr/local/include]
+  collapse include dir [/usr/include/x86_64-linux-gnu] ==> [/usr/include/x86_64-linux-gnu]
+  collapse include dir [/usr/include] ==> [/usr/include]
+  implicit include dirs: [/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include]
+
+
+Parsed C implicit link information from above output:
+  link line regex: [^( *|.*[/\])(ld|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\]+-)?ld|collect2)[^/\]*( |$)]
+  ignore line: [Change Dir: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp]
+  ignore line: []
+  ignore line: [Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_c6496/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_c6496.dir/build.make CMakeFiles/cmTC_c6496.dir/build]
+  ignore line: [gmake[1]: Entering directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp']
+  ignore line: [Building C object CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o]
+  ignore line: [/usr/bin/cc   -v -o CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o -c /usr/share/cmake-3.22/Modules/CMakeCCompilerABI.c]
+  ignore line: [Using built-in specs.]
+  ignore line: [COLLECT_GCC=/usr/bin/cc]
+  ignore line: [OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa]
+  ignore line: [OFFLOAD_TARGET_DEFAULT=1]
+  ignore line: [Target: x86_64-linux-gnu]
+  ignore line: [Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c ada c++ go brig d fortran objc obj-c++ m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32 m64 mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2]
+  ignore line: [Thread model: posix]
+  ignore line: [Supported LTO compression algorithms: zlib zstd]
+  ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o' '-c' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6496.dir/']
+  ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/cc1 -quiet -v -imultiarch x86_64-linux-gnu /usr/share/cmake-3.22/Modules/CMakeCCompilerABI.c -quiet -dumpdir CMakeFiles/cmTC_c6496.dir/ -dumpbase CMakeCCompilerABI.c.c -dumpbase-ext .c -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/ccP9g5du.s]
+  ignore line: [GNU C17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)]
+  ignore line: [	compiled by GNU C version 11.4.0  GMP version 6.2.1  MPFR version 4.1.0  MPC version 1.2.1  isl version isl-0.24-GMP]
+  ignore line: []
+  ignore line: [GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072]
+  ignore line: [ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"]
+  ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed"]
+  ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86_64-linux-gnu/include"]
+  ignore line: [#include "..." search starts here:]
+  ignore line: [#include <...> search starts here:]
+  ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/include]
+  ignore line: [ /usr/local/include]
+  ignore line: [ /usr/include/x86_64-linux-gnu]
+  ignore line: [ /usr/include]
+  ignore line: [End of search list.]
+  ignore line: [GNU C17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)]
+  ignore line: [	compiled by GNU C version 11.4.0  GMP version 6.2.1  MPFR version 4.1.0  MPC version 1.2.1  isl version isl-0.24-GMP]
+  ignore line: []
+  ignore line: [GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072]
+  ignore line: [Compiler executable checksum: 50eaa2331df977b8016186198deb2d18]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o' '-c' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6496.dir/']
+  ignore line: [ as -v --64 -o CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o /tmp/ccP9g5du.s]
+  ignore line: [GNU assembler version 2.38 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.38]
+  ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/]
+  ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o' '-c' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.']
+  ignore line: [Linking C executable cmTC_c6496]
+  ignore line: [/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_c6496.dir/link.txt --verbose=1]
+  ignore line: [/usr/bin/cc  -v CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o -o cmTC_c6496 ]
+  ignore line: [Using built-in specs.]
+  ignore line: [COLLECT_GCC=/usr/bin/cc]
+  ignore line: [COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper]
+  ignore line: [OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa]
+  ignore line: [OFFLOAD_TARGET_DEFAULT=1]
+  ignore line: [Target: x86_64-linux-gnu]
+  ignore line: [Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c ada c++ go brig d fortran objc obj-c++ m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32 m64 mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2]
+  ignore line: [Thread model: posix]
+  ignore line: [Supported LTO compression algorithms: zlib zstd]
+  ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ]
+  ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/]
+  ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_c6496' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_c6496.']
+  link line: [ /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccAtPUmK.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_c6496 /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o -lgcc --push-state --as-needed -lgcc_s --pop-state -lc -lgcc --push-state --as-needed -lgcc_s --pop-state /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o]
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/collect2] ==> ignore
+    arg [-plugin] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so] ==> ignore
+    arg [-plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper] ==> ignore
+    arg [-plugin-opt=-fresolution=/tmp/ccAtPUmK.res] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore
+    arg [-plugin-opt=-pass-through=-lc] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore
+    arg [--build-id] ==> ignore
+    arg [--eh-frame-hdr] ==> ignore
+    arg [-m] ==> ignore
+    arg [elf_x86_64] ==> ignore
+    arg [--hash-style=gnu] ==> ignore
+    arg [--as-needed] ==> ignore
+    arg [-dynamic-linker] ==> ignore
+    arg [/lib64/ld-linux-x86-64.so.2] ==> ignore
+    arg [-pie] ==> ignore
+    arg [-znow] ==> ignore
+    arg [-zrelro] ==> ignore
+    arg [-o] ==> ignore
+    arg [cmTC_c6496] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o]
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o]
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o]
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/11] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11]
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu]
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib]
+    arg [-L/lib/x86_64-linux-gnu] ==> dir [/lib/x86_64-linux-gnu]
+    arg [-L/lib/../lib] ==> dir [/lib/../lib]
+    arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu]
+    arg [-L/usr/lib/../lib] ==> dir [/usr/lib/../lib]
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../..] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../..]
+    arg [CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o] ==> ignore
+    arg [-lgcc] ==> lib [gcc]
+    arg [--push-state] ==> ignore
+    arg [--as-needed] ==> ignore
+    arg [-lgcc_s] ==> lib [gcc_s]
+    arg [--pop-state] ==> ignore
+    arg [-lc] ==> lib [c]
+    arg [-lgcc] ==> lib [gcc]
+    arg [--push-state] ==> ignore
+    arg [--as-needed] ==> ignore
+    arg [-lgcc_s] ==> lib [gcc_s]
+    arg [--pop-state] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o]
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o]
+  collapse obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o] ==> [/usr/lib/x86_64-linux-gnu/Scrt1.o]
+  collapse obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o] ==> [/usr/lib/x86_64-linux-gnu/crti.o]
+  collapse obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o] ==> [/usr/lib/x86_64-linux-gnu/crtn.o]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11] ==> [/usr/lib/gcc/x86_64-linux-gnu/11]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib] ==> [/usr/lib]
+  collapse library dir [/lib/x86_64-linux-gnu] ==> [/lib/x86_64-linux-gnu]
+  collapse library dir [/lib/../lib] ==> [/lib]
+  collapse library dir [/usr/lib/x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu]
+  collapse library dir [/usr/lib/../lib] ==> [/usr/lib]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../..] ==> [/usr/lib]
+  implicit libs: [gcc;gcc_s;c;gcc;gcc_s]
+  implicit objs: [/usr/lib/x86_64-linux-gnu/Scrt1.o;/usr/lib/x86_64-linux-gnu/crti.o;/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o;/usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o;/usr/lib/x86_64-linux-gnu/crtn.o]
+  implicit dirs: [/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib]
+  implicit fwks: []
+
+
+Detecting CXX compiler ABI info compiled with the following output:
+Change Dir: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp
+
+Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_871bc/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_871bc.dir/build.make CMakeFiles/cmTC_871bc.dir/build
+gmake[1]: Entering directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp'
+Building CXX object CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o
+/usr/bin/c++   -v -o CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp
+Using built-in specs.
+COLLECT_GCC=/usr/bin/c++
+OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa
+OFFLOAD_TARGET_DEFAULT=1
+Target: x86_64-linux-gnu
+Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2
+Thread model: posix
+Supported LTO compression algorithms: zlib zstd
+gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) 
+COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_871bc.dir/'
+ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp -quiet -dumpdir CMakeFiles/cmTC_871bc.dir/ -dumpbase CMakeCXXCompilerABI.cpp.cpp -dumpbase-ext .cpp -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/cczm4rAC.s
+GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)
+	compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP
+
+GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
+ignoring duplicate directory "/usr/include/x86_64-linux-gnu/c++/11"
+ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"
+ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed"
+ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86_64-linux-gnu/include"
+#include "..." search starts here:
+#include <...> search starts here:
+ /usr/include/c++/11
+ /usr/include/x86_64-linux-gnu/c++/11
+ /usr/include/c++/11/backward
+ /usr/lib/gcc/x86_64-linux-gnu/11/include
+ /usr/local/include
+ /usr/include/x86_64-linux-gnu
+ /usr/include
+End of search list.
+GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)
+	compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP
+
+GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
+Compiler executable checksum: d591828bb4d392ae8b7b160e5bb0b95f
+COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_871bc.dir/'
+ as -v --64 -o CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o /tmp/cczm4rAC.s
+GNU assembler version 2.38 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.38
+COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/
+LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/
+COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.'
+Linking CXX executable cmTC_871bc
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_871bc.dir/link.txt --verbose=1
+/usr/bin/c++  -v CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o -o cmTC_871bc 
+Using built-in specs.
+COLLECT_GCC=/usr/bin/c++
+COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper
+OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa
+OFFLOAD_TARGET_DEFAULT=1
+Target: x86_64-linux-gnu
+Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2
+Thread model: posix
+Supported LTO compression algorithms: zlib zstd
+gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) 
+COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/
+LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/
+COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_871bc' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_871bc.'
+ /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccvdFTTw.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_871bc /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o
+COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_871bc' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_871bc.'
+gmake[1]: Leaving directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp'
+
+
+
+Parsed CXX implicit include dir info from above output: rv=done
+  found start of include info
+  found start of implicit include info
+    add: [/usr/include/c++/11]
+    add: [/usr/include/x86_64-linux-gnu/c++/11]
+    add: [/usr/include/c++/11/backward]
+    add: [/usr/lib/gcc/x86_64-linux-gnu/11/include]
+    add: [/usr/local/include]
+    add: [/usr/include/x86_64-linux-gnu]
+    add: [/usr/include]
+  end of search list found
+  collapse include dir [/usr/include/c++/11] ==> [/usr/include/c++/11]
+  collapse include dir [/usr/include/x86_64-linux-gnu/c++/11] ==> [/usr/include/x86_64-linux-gnu/c++/11]
+  collapse include dir [/usr/include/c++/11/backward] ==> [/usr/include/c++/11/backward]
+  collapse include dir [/usr/lib/gcc/x86_64-linux-gnu/11/include] ==> [/usr/lib/gcc/x86_64-linux-gnu/11/include]
+  collapse include dir [/usr/local/include] ==> [/usr/local/include]
+  collapse include dir [/usr/include/x86_64-linux-gnu] ==> [/usr/include/x86_64-linux-gnu]
+  collapse include dir [/usr/include] ==> [/usr/include]
+  implicit include dirs: [/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include]
+
+
+Parsed CXX implicit link information from above output:
+  link line regex: [^( *|.*[/\])(ld|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\]+-)?ld|collect2)[^/\]*( |$)]
+  ignore line: [Change Dir: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp]
+  ignore line: []
+  ignore line: [Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_871bc/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_871bc.dir/build.make CMakeFiles/cmTC_871bc.dir/build]
+  ignore line: [gmake[1]: Entering directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp']
+  ignore line: [Building CXX object CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o]
+  ignore line: [/usr/bin/c++   -v -o CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp]
+  ignore line: [Using built-in specs.]
+  ignore line: [COLLECT_GCC=/usr/bin/c++]
+  ignore line: [OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa]
+  ignore line: [OFFLOAD_TARGET_DEFAULT=1]
+  ignore line: [Target: x86_64-linux-gnu]
+  ignore line: [Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c ada c++ go brig d fortran objc obj-c++ m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32 m64 mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2]
+  ignore line: [Thread model: posix]
+  ignore line: [Supported LTO compression algorithms: zlib zstd]
+  ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_871bc.dir/']
+  ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp -quiet -dumpdir CMakeFiles/cmTC_871bc.dir/ -dumpbase CMakeCXXCompilerABI.cpp.cpp -dumpbase-ext .cpp -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/cczm4rAC.s]
+  ignore line: [GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)]
+  ignore line: [	compiled by GNU C version 11.4.0  GMP version 6.2.1  MPFR version 4.1.0  MPC version 1.2.1  isl version isl-0.24-GMP]
+  ignore line: []
+  ignore line: [GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072]
+  ignore line: [ignoring duplicate directory "/usr/include/x86_64-linux-gnu/c++/11"]
+  ignore line: [ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"]
+  ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed"]
+  ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86_64-linux-gnu/include"]
+  ignore line: [#include "..." search starts here:]
+  ignore line: [#include <...> search starts here:]
+  ignore line: [ /usr/include/c++/11]
+  ignore line: [ /usr/include/x86_64-linux-gnu/c++/11]
+  ignore line: [ /usr/include/c++/11/backward]
+  ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/include]
+  ignore line: [ /usr/local/include]
+  ignore line: [ /usr/include/x86_64-linux-gnu]
+  ignore line: [ /usr/include]
+  ignore line: [End of search list.]
+  ignore line: [GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)]
+  ignore line: [	compiled by GNU C version 11.4.0  GMP version 6.2.1  MPFR version 4.1.0  MPC version 1.2.1  isl version isl-0.24-GMP]
+  ignore line: []
+  ignore line: [GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072]
+  ignore line: [Compiler executable checksum: d591828bb4d392ae8b7b160e5bb0b95f]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_871bc.dir/']
+  ignore line: [ as -v --64 -o CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o /tmp/cczm4rAC.s]
+  ignore line: [GNU assembler version 2.38 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.38]
+  ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/]
+  ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.']
+  ignore line: [Linking CXX executable cmTC_871bc]
+  ignore line: [/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_871bc.dir/link.txt --verbose=1]
+  ignore line: [/usr/bin/c++  -v CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o -o cmTC_871bc ]
+  ignore line: [Using built-in specs.]
+  ignore line: [COLLECT_GCC=/usr/bin/c++]
+  ignore line: [COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper]
+  ignore line: [OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa]
+  ignore line: [OFFLOAD_TARGET_DEFAULT=1]
+  ignore line: [Target: x86_64-linux-gnu]
+  ignore line: [Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c ada c++ go brig d fortran objc obj-c++ m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32 m64 mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2]
+  ignore line: [Thread model: posix]
+  ignore line: [Supported LTO compression algorithms: zlib zstd]
+  ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ]
+  ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/]
+  ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_871bc' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_871bc.']
+  link line: [ /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccvdFTTw.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_871bc /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o]
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/collect2] ==> ignore
+    arg [-plugin] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so] ==> ignore
+    arg [-plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper] ==> ignore
+    arg [-plugin-opt=-fresolution=/tmp/ccvdFTTw.res] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc] ==> ignore
+    arg [-plugin-opt=-pass-through=-lc] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore
+    arg [-plugin-opt=-pass-through=-lgcc] ==> ignore
+    arg [--build-id] ==> ignore
+    arg [--eh-frame-hdr] ==> ignore
+    arg [-m] ==> ignore
+    arg [elf_x86_64] ==> ignore
+    arg [--hash-style=gnu] ==> ignore
+    arg [--as-needed] ==> ignore
+    arg [-dynamic-linker] ==> ignore
+    arg [/lib64/ld-linux-x86-64.so.2] ==> ignore
+    arg [-pie] ==> ignore
+    arg [-znow] ==> ignore
+    arg [-zrelro] ==> ignore
+    arg [-o] ==> ignore
+    arg [cmTC_871bc] ==> ignore
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o]
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o]
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o]
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/11] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11]
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu]
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib]
+    arg [-L/lib/x86_64-linux-gnu] ==> dir [/lib/x86_64-linux-gnu]
+    arg [-L/lib/../lib] ==> dir [/lib/../lib]
+    arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu]
+    arg [-L/usr/lib/../lib] ==> dir [/usr/lib/../lib]
+    arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../..] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../..]
+    arg [CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o] ==> ignore
+    arg [-lstdc++] ==> lib [stdc++]
+    arg [-lm] ==> lib [m]
+    arg [-lgcc_s] ==> lib [gcc_s]
+    arg [-lgcc] ==> lib [gcc]
+    arg [-lc] ==> lib [c]
+    arg [-lgcc_s] ==> lib [gcc_s]
+    arg [-lgcc] ==> lib [gcc]
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o]
+    arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o]
+  collapse obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o] ==> [/usr/lib/x86_64-linux-gnu/Scrt1.o]
+  collapse obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o] ==> [/usr/lib/x86_64-linux-gnu/crti.o]
+  collapse obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o] ==> [/usr/lib/x86_64-linux-gnu/crtn.o]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11] ==> [/usr/lib/gcc/x86_64-linux-gnu/11]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib] ==> [/usr/lib]
+  collapse library dir [/lib/x86_64-linux-gnu] ==> [/lib/x86_64-linux-gnu]
+  collapse library dir [/lib/../lib] ==> [/lib]
+  collapse library dir [/usr/lib/x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu]
+  collapse library dir [/usr/lib/../lib] ==> [/usr/lib]
+  collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../..] ==> [/usr/lib]
+  implicit libs: [stdc++;m;gcc_s;gcc;c;gcc_s;gcc]
+  implicit objs: [/usr/lib/x86_64-linux-gnu/Scrt1.o;/usr/lib/x86_64-linux-gnu/crti.o;/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o;/usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o;/usr/lib/x86_64-linux-gnu/crtn.o]
+  implicit dirs: [/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib]
+  implicit fwks: []
+
+
+Determining if the include file pthread.h exists passed with the following output:
+Change Dir: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp
+
+Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_924ae/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_924ae.dir/build.make CMakeFiles/cmTC_924ae.dir/build
+gmake[1]: Entering directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp'
+Building C object CMakeFiles/cmTC_924ae.dir/CheckIncludeFile.c.o
+/usr/bin/cc   -fPIC  -o CMakeFiles/cmTC_924ae.dir/CheckIncludeFile.c.o -c /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp/CheckIncludeFile.c
+Linking C executable cmTC_924ae
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_924ae.dir/link.txt --verbose=1
+/usr/bin/cc -fPIC  CMakeFiles/cmTC_924ae.dir/CheckIncludeFile.c.o -o cmTC_924ae 
+gmake[1]: Leaving directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp'
+
+
+
+Performing C SOURCE FILE Test CMAKE_HAVE_LIBC_PTHREAD succeeded with the following output:
+Change Dir: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp
+
+Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_31011/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_31011.dir/build.make CMakeFiles/cmTC_31011.dir/build
+gmake[1]: Entering directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp'
+Building C object CMakeFiles/cmTC_31011.dir/src.c.o
+/usr/bin/cc -DCMAKE_HAVE_LIBC_PTHREAD  -fPIC  -o CMakeFiles/cmTC_31011.dir/src.c.o -c /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp/src.c
+Linking C executable cmTC_31011
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_31011.dir/link.txt --verbose=1
+/usr/bin/cc -fPIC  CMakeFiles/cmTC_31011.dir/src.c.o -o cmTC_31011 
+gmake[1]: Leaving directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp'
+
+
+Source file was:
+#include <pthread.h>
+
+static void* test_func(void* data)
+{
+  return data;
+}
+
+int main(void)
+{
+  pthread_t thread;
+  pthread_create(&thread, NULL, test_func, NULL);
+  pthread_detach(thread);
+  pthread_cancel(thread);
+  pthread_join(thread, NULL);
+  pthread_atfork(NULL, NULL, NULL);
+  pthread_exit(NULL);
+
+  return 0;
+}
+
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeRuleHashes.txt b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeRuleHashes.txt
new file mode 100644
index 000000000..17b99f247
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeRuleHashes.txt
@@ -0,0 +1,5 @@
+# Hashes of file build rules.
+26f0f991a0077627a6b7828f81498ad5 CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o
+55db74aac8a85fdaacfdffb72e447185 CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o
+3f710a41fdff272f63aa0a841d96d42f CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o
+f81070b22b3ab370c5639d1f7d659f2d CMakeFiles/cudasift.dir/cudasift_intermediate_link.o
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/Makefile.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/Makefile.cmake
new file mode 100644
index 000000000..9c4baa2ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/Makefile.cmake
@@ -0,0 +1,149 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.22
+
+# The generator used is:
+set(CMAKE_DEPENDS_GENERATOR "Unix Makefiles")
+
+# The top level Makefile was generated from the following files:
+set(CMAKE_MAKEFILE_DEPENDS
+  "CMakeCache.txt"
+  "../CMakeLists.txt"
+  "CMakeFiles/3.22.1/CMakeCCompiler.cmake"
+  "CMakeFiles/3.22.1/CMakeCXXCompiler.cmake"
+  "CMakeFiles/3.22.1/CMakeSystem.cmake"
+  "CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake.pre-gen"
+  "CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.depend"
+  "CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake.pre-gen"
+  "CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.depend"
+  "CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake.pre-gen"
+  "CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.depend"
+  "/usr/lib/x86_64-linux-gnu/cmake/opencv4/OpenCVConfig-version.cmake"
+  "/usr/lib/x86_64-linux-gnu/cmake/opencv4/OpenCVConfig.cmake"
+  "/usr/lib/x86_64-linux-gnu/cmake/opencv4/OpenCVModules-release.cmake"
+  "/usr/lib/x86_64-linux-gnu/cmake/opencv4/OpenCVModules.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeCCompiler.cmake.in"
+  "/usr/share/cmake-3.22/Modules/CMakeCCompilerABI.c"
+  "/usr/share/cmake-3.22/Modules/CMakeCInformation.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeCXXCompiler.cmake.in"
+  "/usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp"
+  "/usr/share/cmake-3.22/Modules/CMakeCXXInformation.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeCommonLanguageInclude.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeCompilerIdDetection.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeDetermineCCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeDetermineCXXCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeDetermineCompileFeatures.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeDetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeDetermineCompilerABI.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeDetermineCompilerId.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeDetermineSystem.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeFindBinUtils.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeGenericSystem.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeInitializeConfigs.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeLanguageInformation.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeParseImplicitIncludeInfo.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeParseImplicitLinkInfo.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeParseLibraryArchitecture.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeSystem.cmake.in"
+  "/usr/share/cmake-3.22/Modules/CMakeSystemSpecificInformation.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeSystemSpecificInitialize.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeTestCCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeTestCXXCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeTestCompilerCommon.cmake"
+  "/usr/share/cmake-3.22/Modules/CMakeUnixFindMake.cmake"
+  "/usr/share/cmake-3.22/Modules/CheckCSourceCompiles.cmake"
+  "/usr/share/cmake-3.22/Modules/CheckIncludeFile.c.in"
+  "/usr/share/cmake-3.22/Modules/CheckIncludeFile.cmake"
+  "/usr/share/cmake-3.22/Modules/CheckLibraryExists.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/ADSP-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/ARMCC-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/ARMClang-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/AppleClang-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/Borland-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/Bruce-C-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/CMakeCommonCompilerMacros.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/Clang-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/Clang-DetermineCompilerInternal.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/Comeau-CXX-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/Compaq-C-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/Compaq-CXX-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/Cray-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/Embarcadero-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/Fujitsu-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/FujitsuClang-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/GHS-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/GNU-C-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/GNU-C.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/GNU-CXX-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/GNU-CXX.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/GNU-FindBinUtils.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/GNU.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/HP-C-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/HP-CXX-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/IAR-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/IBMCPP-C-DetermineVersionInternal.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/IBMCPP-CXX-DetermineVersionInternal.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/Intel-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/IntelLLVM-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/MSVC-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/NVHPC-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/NVIDIA-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/OpenWatcom-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/PGI-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/PathScale-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/SCO-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/SDCC-C-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/SunPro-C-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/SunPro-CXX-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/TI-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/TinyCC-C-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/VisualAge-C-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/VisualAge-CXX-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/Watcom-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/XL-C-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/XL-CXX-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/XLClang-C-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/XLClang-CXX-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/zOS-C-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/Compiler/zOS-CXX-DetermineCompiler.cmake"
+  "/usr/share/cmake-3.22/Modules/FindCUDA.cmake"
+  "/usr/share/cmake-3.22/Modules/FindCUDA/run_nvcc.cmake"
+  "/usr/share/cmake-3.22/Modules/FindCUDA/select_compute_arch.cmake"
+  "/usr/share/cmake-3.22/Modules/FindPackageHandleStandardArgs.cmake"
+  "/usr/share/cmake-3.22/Modules/FindPackageMessage.cmake"
+  "/usr/share/cmake-3.22/Modules/FindThreads.cmake"
+  "/usr/share/cmake-3.22/Modules/Internal/CheckSourceCompiles.cmake"
+  "/usr/share/cmake-3.22/Modules/Internal/FeatureTesting.cmake"
+  "/usr/share/cmake-3.22/Modules/Platform/Linux-Determine-CXX.cmake"
+  "/usr/share/cmake-3.22/Modules/Platform/Linux-GNU-C.cmake"
+  "/usr/share/cmake-3.22/Modules/Platform/Linux-GNU-CXX.cmake"
+  "/usr/share/cmake-3.22/Modules/Platform/Linux-GNU.cmake"
+  "/usr/share/cmake-3.22/Modules/Platform/Linux.cmake"
+  "/usr/share/cmake-3.22/Modules/Platform/UnixPaths.cmake"
+  )
+
+# The corresponding makefile is:
+set(CMAKE_MAKEFILE_OUTPUTS
+  "Makefile"
+  "CMakeFiles/cmake.check_cache"
+  )
+
+# Byproducts of CMake generate step:
+set(CMAKE_MAKEFILE_PRODUCTS
+  "CMakeFiles/3.22.1/CMakeSystem.cmake"
+  "CMakeFiles/3.22.1/CMakeCCompiler.cmake"
+  "CMakeFiles/3.22.1/CMakeCXXCompiler.cmake"
+  "CMakeFiles/3.22.1/CMakeCCompiler.cmake"
+  "CMakeFiles/3.22.1/CMakeCXXCompiler.cmake"
+  "CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake.pre-gen"
+  "CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake.pre-gen"
+  "CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake.pre-gen"
+  "CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake"
+  "CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake"
+  "CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake"
+  "CMakeFiles/CMakeDirectoryInformation.cmake"
+  )
+
+# Dependency information for all targets:
+set(CMAKE_DEPEND_INFO_FILES
+  "CMakeFiles/cudasift.dir/DependInfo.cmake"
+  )
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/Makefile2 b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/Makefile2
new file mode 100644
index 000000000..5b83d3e00
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/Makefile2
@@ -0,0 +1,112 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.22
+
+# Default target executed when no arguments are given to make.
+default_target: all
+.PHONY : default_target
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+# Disable VCS-based implicit rules.
+% : %,v
+
+# Disable VCS-based implicit rules.
+% : RCS/%
+
+# Disable VCS-based implicit rules.
+% : RCS/%,v
+
+# Disable VCS-based implicit rules.
+% : SCCS/s.%
+
+# Disable VCS-based implicit rules.
+% : s.%
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+# Command-line flag to silence nested $(MAKE).
+$(VERBOSE)MAKESILENT = -s
+
+#Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+# A target that is always out of date.
+cmake_force:
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E rm -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build
+
+#=============================================================================
+# Directory level rules for the build root directory
+
+# The main recursive "all" target.
+all: CMakeFiles/cudasift.dir/all
+.PHONY : all
+
+# The main recursive "preinstall" target.
+preinstall:
+.PHONY : preinstall
+
+# The main recursive "clean" target.
+clean: CMakeFiles/cudasift.dir/clean
+.PHONY : clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/cudasift.dir
+
+# All Build rule for target.
+CMakeFiles/cudasift.dir/all:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/depend
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=1,2,3,4,5,6,7,8 "Built target cudasift"
+.PHONY : CMakeFiles/cudasift.dir/all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/cudasift.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles 8
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/cudasift.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles 0
+.PHONY : CMakeFiles/cudasift.dir/rule
+
+# Convenience name for target.
+cudasift: CMakeFiles/cudasift.dir/rule
+.PHONY : cudasift
+
+# clean rule for target.
+CMakeFiles/cudasift.dir/clean:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/clean
+.PHONY : CMakeFiles/cudasift.dir/clean
+
+#=============================================================================
+# Special targets to cleanup operation of make.
+
+# Special rule to run CMake to check the build system integrity.
+# No rule that depends on this can have commands that come from listfiles
+# because they might be regenerated.
+cmake_check_build_system:
+	$(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
+.PHONY : cmake_check_build_system
+
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/TargetDirectories.txt b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/TargetDirectories.txt
new file mode 100644
index 000000000..88efc46e1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/TargetDirectories.txt
@@ -0,0 +1,7 @@
+/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir
+/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/edit_cache.dir
+/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/rebuild_cache.dir
+/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/list_install_components.dir
+/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/install.dir
+/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/install/local.dir
+/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/install/strip.dir
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cmake.check_cache b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cmake.check_cache
new file mode 100644
index 000000000..3dccd7317
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cmake.check_cache
@@ -0,0 +1 @@
+# This file is generated by cmake for dependency checking of the CMakeCache.txt file
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/DependInfo.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/DependInfo.cmake
new file mode 100644
index 000000000..059d1e524
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/DependInfo.cmake
@@ -0,0 +1,21 @@
+
+# Consider dependencies only in project.
+set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
+
+# The set of languages for which implicit dependencies are needed:
+set(CMAKE_DEPENDS_LANGUAGES
+  )
+
+# The set of dependency files which are needed:
+set(CMAKE_DEPENDS_DEPENDENCY_FILES
+  "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp" "CMakeFiles/cudasift.dir/geomFuncs.cpp.o" "gcc" "CMakeFiles/cudasift.dir/geomFuncs.cpp.o.d"
+  "/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp" "CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o" "gcc" "CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o.d"
+  "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp" "CMakeFiles/cudasift.dir/mainSift.cpp.o" "gcc" "CMakeFiles/cudasift.dir/mainSift.cpp.o.d"
+  )
+
+# Targets to which this target links.
+set(CMAKE_TARGET_LINKED_INFO_FILES
+  )
+
+# Fortran module output directory.
+set(CMAKE_Fortran_TARGET_MODULE_DIR "")
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/build.make b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/build.make
new file mode 100644
index 000000000..8ae41e034
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/build.make
@@ -0,0 +1,237 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.22
+
+# Delete rule output on recipe failure.
+.DELETE_ON_ERROR:
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+# Disable VCS-based implicit rules.
+% : %,v
+
+# Disable VCS-based implicit rules.
+% : RCS/%
+
+# Disable VCS-based implicit rules.
+% : RCS/%,v
+
+# Disable VCS-based implicit rules.
+% : SCCS/s.%
+
+# Disable VCS-based implicit rules.
+% : s.%
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+# Command-line flag to silence nested $(MAKE).
+$(VERBOSE)MAKESILENT = -s
+
+#Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+# A target that is always out of date.
+cmake_force:
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E rm -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build
+
+# Include any dependencies generated for this target.
+include CMakeFiles/cudasift.dir/depend.make
+# Include any dependencies generated by the compiler for this target.
+include CMakeFiles/cudasift.dir/compiler_depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/cudasift.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/cudasift.dir/flags.make
+
+CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o: CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.depend
+CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o: CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake
+CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o: ../cudaImage.cu
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building NVCC (Device) object CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o"
+	cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir && /usr/bin/cmake -E make_directory /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.
+	cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o.cubin.txt -P /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaImage.cu.o.cmake
+
+CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o: CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.depend
+CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o: CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake
+CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o: ../cudaSiftH.cu
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Building NVCC (Device) object CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o"
+	cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir && /usr/bin/cmake -E make_directory /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.
+	cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o.cubin.txt -P /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaSiftH.cu.o.cmake
+
+CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o: CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.depend
+CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o: CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake
+CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o: ../matching.cu
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_3) "Building NVCC (Device) object CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o"
+	cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir && /usr/bin/cmake -E make_directory /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.
+	cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o.cubin.txt -P /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_matching.cu.o.cmake
+
+CMakeFiles/cudasift.dir/cudasift_intermediate_link.o: CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o
+CMakeFiles/cudasift.dir/cudasift_intermediate_link.o: CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o
+CMakeFiles/cudasift.dir/cudasift_intermediate_link.o: CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_4) "Building NVCC intermediate link file CMakeFiles/cudasift.dir/cudasift_intermediate_link.o"
+	/usr/local/cuda/bin/nvcc -arch=sm_80 -m64 -ccbin /usr/bin/cc -dlink /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o -o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/./cudasift_intermediate_link.o
+
+CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o: CMakeFiles/cudasift.dir/flags.make
+CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o: /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp
+CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o: CMakeFiles/cudasift.dir/compiler_depend.ts
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_5) "Building CXX object CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o"
+	/usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -MD -MT CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o -MF CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o.d -o CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o -c /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp
+
+CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i"
+	/usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp > CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i
+
+CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s"
+	/usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp -o CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s
+
+CMakeFiles/cudasift.dir/geomFuncs.cpp.o: CMakeFiles/cudasift.dir/flags.make
+CMakeFiles/cudasift.dir/geomFuncs.cpp.o: ../geomFuncs.cpp
+CMakeFiles/cudasift.dir/geomFuncs.cpp.o: CMakeFiles/cudasift.dir/compiler_depend.ts
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_6) "Building CXX object CMakeFiles/cudasift.dir/geomFuncs.cpp.o"
+	/usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -MD -MT CMakeFiles/cudasift.dir/geomFuncs.cpp.o -MF CMakeFiles/cudasift.dir/geomFuncs.cpp.o.d -o CMakeFiles/cudasift.dir/geomFuncs.cpp.o -c /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp
+
+CMakeFiles/cudasift.dir/geomFuncs.cpp.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/cudasift.dir/geomFuncs.cpp.i"
+	/usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp > CMakeFiles/cudasift.dir/geomFuncs.cpp.i
+
+CMakeFiles/cudasift.dir/geomFuncs.cpp.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/cudasift.dir/geomFuncs.cpp.s"
+	/usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp -o CMakeFiles/cudasift.dir/geomFuncs.cpp.s
+
+CMakeFiles/cudasift.dir/mainSift.cpp.o: CMakeFiles/cudasift.dir/flags.make
+CMakeFiles/cudasift.dir/mainSift.cpp.o: ../mainSift.cpp
+CMakeFiles/cudasift.dir/mainSift.cpp.o: CMakeFiles/cudasift.dir/compiler_depend.ts
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_7) "Building CXX object CMakeFiles/cudasift.dir/mainSift.cpp.o"
+	/usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -MD -MT CMakeFiles/cudasift.dir/mainSift.cpp.o -MF CMakeFiles/cudasift.dir/mainSift.cpp.o.d -o CMakeFiles/cudasift.dir/mainSift.cpp.o -c /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp
+
+CMakeFiles/cudasift.dir/mainSift.cpp.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/cudasift.dir/mainSift.cpp.i"
+	/usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp > CMakeFiles/cudasift.dir/mainSift.cpp.i
+
+CMakeFiles/cudasift.dir/mainSift.cpp.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/cudasift.dir/mainSift.cpp.s"
+	/usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp -o CMakeFiles/cudasift.dir/mainSift.cpp.s
+
+# Object files for target cudasift
+cudasift_OBJECTS = \
+"CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o" \
+"CMakeFiles/cudasift.dir/geomFuncs.cpp.o" \
+"CMakeFiles/cudasift.dir/mainSift.cpp.o"
+
+# External object files for target cudasift
+cudasift_EXTERNAL_OBJECTS = \
+"/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o" \
+"/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o" \
+"/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o" \
+"/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_intermediate_link.o"
+
+cudasift: CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o
+cudasift: CMakeFiles/cudasift.dir/geomFuncs.cpp.o
+cudasift: CMakeFiles/cudasift.dir/mainSift.cpp.o
+cudasift: CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o
+cudasift: CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o
+cudasift: CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o
+cudasift: CMakeFiles/cudasift.dir/cudasift_intermediate_link.o
+cudasift: CMakeFiles/cudasift.dir/build.make
+cudasift: /usr/local/cuda/lib64/libcudart_static.a
+cudasift: /usr/lib/x86_64-linux-gnu/librt.a
+cudasift: /usr/local/cuda/lib64/libcudadevrt.a
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_stitching.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_alphamat.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_aruco.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_barcode.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_bgsegm.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_bioinspired.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_ccalib.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_dnn_objdetect.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_dnn_superres.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_dpm.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_face.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_freetype.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_fuzzy.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_hdf.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_hfs.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_img_hash.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_intensity_transform.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_line_descriptor.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_mcc.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_quality.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_rapid.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_reg.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_rgbd.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_saliency.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_shape.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_stereo.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_structured_light.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_superres.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_surface_matching.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_tracking.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_videostab.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_viz.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_wechat_qrcode.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_xobjdetect.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_xphoto.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_highgui.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_datasets.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_plot.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_text.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_ml.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_phase_unwrapping.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_optflow.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_ximgproc.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_video.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_videoio.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_imgcodecs.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_objdetect.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_calib3d.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_dnn.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_features2d.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_flann.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_photo.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_imgproc.so.4.5.4d
+cudasift: /usr/lib/x86_64-linux-gnu/libopencv_core.so.4.5.4d
+cudasift: CMakeFiles/cudasift.dir/link.txt
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_8) "Linking CXX executable cudasift"
+	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/cudasift.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/cudasift.dir/build: cudasift
+.PHONY : CMakeFiles/cudasift.dir/build
+
+CMakeFiles/cudasift.dir/clean:
+	$(CMAKE_COMMAND) -P CMakeFiles/cudasift.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/cudasift.dir/clean
+
+CMakeFiles/cudasift.dir/depend: CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o
+CMakeFiles/cudasift.dir/depend: CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o
+CMakeFiles/cudasift.dir/depend: CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o
+CMakeFiles/cudasift.dir/depend: CMakeFiles/cudasift.dir/cudasift_intermediate_link.o
+	cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/cudasift.dir/depend
+
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cmake_clean.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cmake_clean.cmake
new file mode 100644
index 000000000..29fb8f06c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cmake_clean.cmake
@@ -0,0 +1,19 @@
+file(REMOVE_RECURSE
+  "CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o"
+  "CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o"
+  "CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o"
+  "CMakeFiles/cudasift.dir/cudasift_intermediate_link.o"
+  "CMakeFiles/cudasift.dir/geomFuncs.cpp.o"
+  "CMakeFiles/cudasift.dir/geomFuncs.cpp.o.d"
+  "CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o"
+  "CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o.d"
+  "CMakeFiles/cudasift.dir/mainSift.cpp.o"
+  "CMakeFiles/cudasift.dir/mainSift.cpp.o.d"
+  "cudasift"
+  "cudasift.pdb"
+)
+
+# Per-language clean rules from dependency scanning.
+foreach(lang CXX)
+  include(CMakeFiles/cudasift.dir/cmake_clean_${lang}.cmake OPTIONAL)
+endforeach()
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/compiler_depend.make b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/compiler_depend.make
new file mode 100644
index 000000000..33c8a9a40
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/compiler_depend.make
@@ -0,0 +1,2 @@
+# Empty compiler generated dependencies file for cudasift.
+# This may be replaced when dependencies are built.
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/compiler_depend.ts b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/compiler_depend.ts
new file mode 100644
index 000000000..587836585
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/compiler_depend.ts
@@ -0,0 +1,2 @@
+# CMAKE generated file: DO NOT EDIT!
+# Timestamp file for compiler generated dependencies management for cudasift.
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o
new file mode 100644
index 000000000..34b81727b
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake
new file mode 100644
index 000000000..36ab0c6b1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake
@@ -0,0 +1,314 @@
+#  James Bigler, NVIDIA Corp (nvidia.com - jbigler)
+#
+#  Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
+#
+#  This code is licensed under the MIT License.  See the FindCUDA.cmake script
+#  for the text of the license.
+
+# The MIT License
+#
+# License for the specific language governing rights and limitations under
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+##########################################################################
+# This file runs the nvcc commands to produce the desired output file along with
+# the dependency file needed by CMake to compute dependencies.  In addition the
+# file checks the output of each command and if the command fails it deletes the
+# output files.
+
+# Input variables
+#
+# verbose:BOOL=<>          OFF: Be as quiet as possible (default)
+#                          ON : Describe each step
+#
+# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or
+#                               RelWithDebInfo, but it should match one of the
+#                               entries in CUDA_HOST_FLAGS. This is the build
+#                               configuration used when compiling the code.  If
+#                               blank or unspecified Debug is assumed as this is
+#                               what CMake does.
+#
+# generated_file:STRING=<> File to generate.  This argument must be passed in.
+#
+# generated_cubin_file:STRING=<> File to generate.  This argument must be passed
+#                                                   in if build_cubin is true.
+
+cmake_policy(PUSH)
+cmake_policy(SET CMP0007 NEW)
+if(NOT generated_file)
+  message(FATAL_ERROR "You must specify generated_file on the command line")
+endif()
+
+# Set these up as variables to make reading the generated file easier
+set(CMAKE_COMMAND "/usr/bin/cmake") # path
+set(source_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaImage.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaImage.cu.o.depend") # path
+set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
+set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
+set(build_cubin OFF) # bool
+set(CUDA_HOST_COMPILER "/usr/bin/cc") # path
+# We won't actually use these variables for now, but we need to set this, in
+# order to force this file to be run again if it changes.
+set(generated_file_path "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.") # path
+set(generated_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o.cubin.txt") # path
+
+set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
+set(CUDA_NVCC_FLAGS  ;; -arch=sm_80) # list
+# Build specific configuration flags
+set(CUDA_NVCC_FLAGS_DEBUG  ; )
+set(CUDA_NVCC_FLAGS_MINSIZEREL  ; )
+set(CUDA_NVCC_FLAGS_RELEASE  ; )
+set(CUDA_NVCC_FLAGS_RELWITHDEBINFO  ; )
+set(nvcc_flags -m64) # list
+set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA;/usr/local/cuda/include;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4]==]) # list (needs to be in lua quotes to address backslashes)
+string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}")
+set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ).
+set(format_flag "-dc") # string
+set(cuda_language_flag ) # list
+
+# Clean up list of include directories and add -I flags
+list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS)
+set(CUDA_NVCC_INCLUDE_ARGS)
+foreach(dir ${CUDA_NVCC_INCLUDE_DIRS})
+  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
+  list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}")
+endforeach()
+
+# Clean up list of compile definitions, add -D flags, and append to nvcc_flags
+list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS)
+foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS})
+  list(APPEND nvcc_flags "-D${def}")
+endforeach()
+
+if(build_cubin AND NOT generated_cubin_file)
+  message(FATAL_ERROR "You must specify generated_cubin_file on the command line")
+endif()
+
+# This is the list of host compilation flags.  It C or CXX should already have
+# been chosen by FindCUDA.cmake.
+set(CMAKE_HOST_FLAGS  -O3   -msse2  )
+set(CMAKE_HOST_FLAGS_DEBUG -g)
+set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG)
+set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG)
+set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG)
+
+# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler
+set(nvcc_host_compiler_flags "")
+# If we weren't given a build_configuration, use Debug.
+if(NOT build_configuration)
+  set(build_configuration Debug)
+endif()
+string(TOUPPER "${build_configuration}" build_configuration)
+#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}")
+foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}})
+  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
+  string(APPEND nvcc_host_compiler_flags ",\"${flag}\"")
+endforeach()
+if (nvcc_host_compiler_flags)
+  set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags})
+endif()
+#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"")
+# Add the build specific configuration flags
+list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}})
+
+# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority
+list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 )
+list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 )
+if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
+  if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN)
+    set(CCBIN -ccbin "${CCBIN}")
+  else()
+    set(CCBIN -ccbin "${CUDA_HOST_COMPILER}")
+  endif()
+endif()
+
+# cuda_execute_process - Executes a command with optional command echo and status message.
+#
+#   status  - Status message to print if verbose is true
+#   command - COMMAND argument from the usual execute_process argument structure
+#   ARGN    - Remaining arguments are the command with arguments
+#
+#   CUDA_result - return value from running the command
+#
+# Make this a macro instead of a function, so that things like RESULT_VARIABLE
+# and other return variables are present after executing the process.
+macro(cuda_execute_process status command)
+  set(_command ${command})
+  if(NOT "x${_command}" STREQUAL "xCOMMAND")
+    message(FATAL_ERROR "Malformed call to cuda_execute_process.  Missing COMMAND as second argument. (command = ${command})")
+  endif()
+  if(verbose)
+    execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
+    # Now we need to build up our command string.  We are accounting for quotes
+    # and spaces, anything else is left up to the user to fix if they want to
+    # copy and paste a runnable command line.
+    set(cuda_execute_process_string)
+    foreach(arg ${ARGN})
+      # If there are quotes, escape them, so they come through.
+      string(REPLACE "\"" "\\\"" arg ${arg})
+      # Args with spaces need quotes around them to get them to be parsed as a single argument.
+      if(arg MATCHES " ")
+        list(APPEND cuda_execute_process_string "\"${arg}\"")
+      else()
+        list(APPEND cuda_execute_process_string ${arg})
+      endif()
+    endforeach()
+    # Echo the command
+    execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
+  endif()
+  # Run the command
+  execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
+endmacro()
+
+# Delete the target file
+cuda_execute_process(
+  "Removing ${generated_file}"
+  COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}"
+  )
+
+# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag
+# for dependency generation and hope for the best.
+set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
+set(CUDA_VERSION 12.2)
+if(CUDA_VERSION VERSION_LESS "3.0")
+  # Note that this will remove all occurrences of -G.
+  list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G")
+endif()
+
+# nvcc doesn't define __CUDACC__ for some reason when generating dependency files.  This
+# can cause incorrect dependencies when #including files based on this macro which is
+# defined in the generating passes of nvcc invocation.  We will go ahead and manually
+# define this for now until a future version fixes this bug.
+set(CUDACC_DEFINE -D__CUDACC__)
+
+# Generate the dependency file
+cuda_execute_process(
+  "Generating dependency file: ${NVCC_generated_dependency_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  -M
+  ${CUDACC_DEFINE}
+  "${source_file}"
+  -o "${NVCC_generated_dependency_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${depends_CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the cmake readable dependency file to a temp file.  Don't put the
+# quotes just around the filenames for the input_file and output_file variables.
+# CMake will pass the quotes through and not be able to find the file.
+cuda_execute_process(
+  "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp"
+  COMMAND "${CMAKE_COMMAND}"
+  -D "input_file:FILEPATH=${NVCC_generated_dependency_file}"
+  -D "output_file:FILEPATH=${cmake_dependency_file}.tmp"
+  -D "verbose=${verbose}"
+  -P "${CUDA_make2cmake}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Copy the file if it is different
+cuda_execute_process(
+  "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Delete the temporary file
+cuda_execute_process(
+  "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E rm -f "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the code
+cuda_execute_process(
+  "Generating ${generated_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  "${source_file}"
+  ${cuda_language_flag}
+  ${format_flag} -o "${generated_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  # Since nvcc can sometimes leave half done files make sure that we delete the output file.
+  cuda_execute_process(
+    "Removing ${generated_file}"
+    COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}"
+    )
+  message(FATAL_ERROR "Error generating file ${generated_file}")
+else()
+  if(verbose)
+    message("Generated ${generated_file} successfully.")
+  endif()
+endif()
+
+# Cubin resource report commands.
+if( build_cubin )
+  # Run with -cubin to produce resource usage report.
+  cuda_execute_process(
+    "Generating ${generated_cubin_file}"
+    COMMAND "${CUDA_NVCC_EXECUTABLE}"
+    "${source_file}"
+    ${CUDA_NVCC_FLAGS}
+    ${nvcc_flags}
+    ${CCBIN}
+    ${nvcc_host_compiler_flags}
+    -DNVCC
+    -cubin
+    -o "${generated_cubin_file}"
+    ${CUDA_NVCC_INCLUDE_ARGS}
+    )
+
+  # Execute the parser script.
+  cuda_execute_process(
+    "Executing the parser script"
+    COMMAND  "${CMAKE_COMMAND}"
+    -D "input_file:STRING=${generated_cubin_file}"
+    -P "${CUDA_parse_cubin}"
+    )
+
+endif()
+
+cmake_policy(POP)
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake.pre-gen
new file mode 100644
index 000000000..67c13bf13
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake.pre-gen
@@ -0,0 +1,314 @@
+#  James Bigler, NVIDIA Corp (nvidia.com - jbigler)
+#
+#  Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
+#
+#  This code is licensed under the MIT License.  See the FindCUDA.cmake script
+#  for the text of the license.
+
+# The MIT License
+#
+# License for the specific language governing rights and limitations under
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+##########################################################################
+# This file runs the nvcc commands to produce the desired output file along with
+# the dependency file needed by CMake to compute dependencies.  In addition the
+# file checks the output of each command and if the command fails it deletes the
+# output files.
+
+# Input variables
+#
+# verbose:BOOL=<>          OFF: Be as quiet as possible (default)
+#                          ON : Describe each step
+#
+# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or
+#                               RelWithDebInfo, but it should match one of the
+#                               entries in CUDA_HOST_FLAGS. This is the build
+#                               configuration used when compiling the code.  If
+#                               blank or unspecified Debug is assumed as this is
+#                               what CMake does.
+#
+# generated_file:STRING=<> File to generate.  This argument must be passed in.
+#
+# generated_cubin_file:STRING=<> File to generate.  This argument must be passed
+#                                                   in if build_cubin is true.
+
+cmake_policy(PUSH)
+cmake_policy(SET CMP0007 NEW)
+if(NOT generated_file)
+  message(FATAL_ERROR "You must specify generated_file on the command line")
+endif()
+
+# Set these up as variables to make reading the generated file easier
+set(CMAKE_COMMAND "/usr/bin/cmake") # path
+set(source_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaImage.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaImage.cu.o.depend") # path
+set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
+set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
+set(build_cubin OFF) # bool
+set(CUDA_HOST_COMPILER "/usr/bin/cc") # path
+# We won't actually use these variables for now, but we need to set this, in
+# order to force this file to be run again if it changes.
+set(generated_file_path "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.") # path
+set(generated_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o.cubin.txt") # path
+
+set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
+set(CUDA_NVCC_FLAGS  ;; -arch=sm_80) # list
+# Build specific configuration flags
+set(CUDA_NVCC_FLAGS_DEBUG  ; )
+set(CUDA_NVCC_FLAGS_MINSIZEREL  ; )
+set(CUDA_NVCC_FLAGS_RELEASE  ; )
+set(CUDA_NVCC_FLAGS_RELWITHDEBINFO  ; )
+set(nvcc_flags -m64) # list
+set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;$<TARGET_PROPERTY:cudasift,INCLUDE_DIRECTORIES>]==]) # list (needs to be in lua quotes to address backslashes)
+string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}")
+set(CUDA_NVCC_COMPILE_DEFINITIONS [==[$<TARGET_PROPERTY:cudasift,COMPILE_DEFINITIONS>]==]) # list (needs to be in lua quotes see #16510 ).
+set(format_flag "-dc") # string
+set(cuda_language_flag ) # list
+
+# Clean up list of include directories and add -I flags
+list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS)
+set(CUDA_NVCC_INCLUDE_ARGS)
+foreach(dir ${CUDA_NVCC_INCLUDE_DIRS})
+  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
+  list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}")
+endforeach()
+
+# Clean up list of compile definitions, add -D flags, and append to nvcc_flags
+list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS)
+foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS})
+  list(APPEND nvcc_flags "-D${def}")
+endforeach()
+
+if(build_cubin AND NOT generated_cubin_file)
+  message(FATAL_ERROR "You must specify generated_cubin_file on the command line")
+endif()
+
+# This is the list of host compilation flags.  It C or CXX should already have
+# been chosen by FindCUDA.cmake.
+set(CMAKE_HOST_FLAGS  -O3   -msse2  )
+set(CMAKE_HOST_FLAGS_DEBUG -g)
+set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG)
+set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG)
+set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG)
+
+# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler
+set(nvcc_host_compiler_flags "")
+# If we weren't given a build_configuration, use Debug.
+if(NOT build_configuration)
+  set(build_configuration Debug)
+endif()
+string(TOUPPER "${build_configuration}" build_configuration)
+#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}")
+foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}})
+  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
+  string(APPEND nvcc_host_compiler_flags ",\"${flag}\"")
+endforeach()
+if (nvcc_host_compiler_flags)
+  set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags})
+endif()
+#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"")
+# Add the build specific configuration flags
+list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}})
+
+# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority
+list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 )
+list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 )
+if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
+  if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN)
+    set(CCBIN -ccbin "${CCBIN}")
+  else()
+    set(CCBIN -ccbin "${CUDA_HOST_COMPILER}")
+  endif()
+endif()
+
+# cuda_execute_process - Executes a command with optional command echo and status message.
+#
+#   status  - Status message to print if verbose is true
+#   command - COMMAND argument from the usual execute_process argument structure
+#   ARGN    - Remaining arguments are the command with arguments
+#
+#   CUDA_result - return value from running the command
+#
+# Make this a macro instead of a function, so that things like RESULT_VARIABLE
+# and other return variables are present after executing the process.
+macro(cuda_execute_process status command)
+  set(_command ${command})
+  if(NOT "x${_command}" STREQUAL "xCOMMAND")
+    message(FATAL_ERROR "Malformed call to cuda_execute_process.  Missing COMMAND as second argument. (command = ${command})")
+  endif()
+  if(verbose)
+    execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
+    # Now we need to build up our command string.  We are accounting for quotes
+    # and spaces, anything else is left up to the user to fix if they want to
+    # copy and paste a runnable command line.
+    set(cuda_execute_process_string)
+    foreach(arg ${ARGN})
+      # If there are quotes, escape them, so they come through.
+      string(REPLACE "\"" "\\\"" arg ${arg})
+      # Args with spaces need quotes around them to get them to be parsed as a single argument.
+      if(arg MATCHES " ")
+        list(APPEND cuda_execute_process_string "\"${arg}\"")
+      else()
+        list(APPEND cuda_execute_process_string ${arg})
+      endif()
+    endforeach()
+    # Echo the command
+    execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
+  endif()
+  # Run the command
+  execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
+endmacro()
+
+# Delete the target file
+cuda_execute_process(
+  "Removing ${generated_file}"
+  COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}"
+  )
+
+# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag
+# for dependency generation and hope for the best.
+set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
+set(CUDA_VERSION 12.2)
+if(CUDA_VERSION VERSION_LESS "3.0")
+  # Note that this will remove all occurrences of -G.
+  list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G")
+endif()
+
+# nvcc doesn't define __CUDACC__ for some reason when generating dependency files.  This
+# can cause incorrect dependencies when #including files based on this macro which is
+# defined in the generating passes of nvcc invocation.  We will go ahead and manually
+# define this for now until a future version fixes this bug.
+set(CUDACC_DEFINE -D__CUDACC__)
+
+# Generate the dependency file
+cuda_execute_process(
+  "Generating dependency file: ${NVCC_generated_dependency_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  -M
+  ${CUDACC_DEFINE}
+  "${source_file}"
+  -o "${NVCC_generated_dependency_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${depends_CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the cmake readable dependency file to a temp file.  Don't put the
+# quotes just around the filenames for the input_file and output_file variables.
+# CMake will pass the quotes through and not be able to find the file.
+cuda_execute_process(
+  "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp"
+  COMMAND "${CMAKE_COMMAND}"
+  -D "input_file:FILEPATH=${NVCC_generated_dependency_file}"
+  -D "output_file:FILEPATH=${cmake_dependency_file}.tmp"
+  -D "verbose=${verbose}"
+  -P "${CUDA_make2cmake}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Copy the file if it is different
+cuda_execute_process(
+  "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Delete the temporary file
+cuda_execute_process(
+  "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E rm -f "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the code
+cuda_execute_process(
+  "Generating ${generated_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  "${source_file}"
+  ${cuda_language_flag}
+  ${format_flag} -o "${generated_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  # Since nvcc can sometimes leave half done files make sure that we delete the output file.
+  cuda_execute_process(
+    "Removing ${generated_file}"
+    COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}"
+    )
+  message(FATAL_ERROR "Error generating file ${generated_file}")
+else()
+  if(verbose)
+    message("Generated ${generated_file} successfully.")
+  endif()
+endif()
+
+# Cubin resource report commands.
+if( build_cubin )
+  # Run with -cubin to produce resource usage report.
+  cuda_execute_process(
+    "Generating ${generated_cubin_file}"
+    COMMAND "${CUDA_NVCC_EXECUTABLE}"
+    "${source_file}"
+    ${CUDA_NVCC_FLAGS}
+    ${nvcc_flags}
+    ${CCBIN}
+    ${nvcc_host_compiler_flags}
+    -DNVCC
+    -cubin
+    -o "${generated_cubin_file}"
+    ${CUDA_NVCC_INCLUDE_ARGS}
+    )
+
+  # Execute the parser script.
+  cuda_execute_process(
+    "Executing the parser script"
+    COMMAND  "${CMAKE_COMMAND}"
+    -D "input_file:STRING=${generated_cubin_file}"
+    -P "${CUDA_parse_cubin}"
+    )
+
+endif()
+
+cmake_policy(POP)
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.depend b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.depend
new file mode 100644
index 000000000..3cb252554
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.depend
@@ -0,0 +1,4 @@
+# Generated by: make2cmake.cmake
+SET(CUDA_NVCC_DEPEND
+ )
+
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o
new file mode 100644
index 000000000..34b81727b
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake
new file mode 100644
index 000000000..a1dfc99a0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake
@@ -0,0 +1,314 @@
+#  James Bigler, NVIDIA Corp (nvidia.com - jbigler)
+#
+#  Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
+#
+#  This code is licensed under the MIT License.  See the FindCUDA.cmake script
+#  for the text of the license.
+
+# The MIT License
+#
+# License for the specific language governing rights and limitations under
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+##########################################################################
+# This file runs the nvcc commands to produce the desired output file along with
+# the dependency file needed by CMake to compute dependencies.  In addition the
+# file checks the output of each command and if the command fails it deletes the
+# output files.
+
+# Input variables
+#
+# verbose:BOOL=<>          OFF: Be as quiet as possible (default)
+#                          ON : Describe each step
+#
+# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or
+#                               RelWithDebInfo, but it should match one of the
+#                               entries in CUDA_HOST_FLAGS. This is the build
+#                               configuration used when compiling the code.  If
+#                               blank or unspecified Debug is assumed as this is
+#                               what CMake does.
+#
+# generated_file:STRING=<> File to generate.  This argument must be passed in.
+#
+# generated_cubin_file:STRING=<> File to generate.  This argument must be passed
+#                                                   in if build_cubin is true.
+
+cmake_policy(PUSH)
+cmake_policy(SET CMP0007 NEW)
+if(NOT generated_file)
+  message(FATAL_ERROR "You must specify generated_file on the command line")
+endif()
+
+# Set these up as variables to make reading the generated file easier
+set(CMAKE_COMMAND "/usr/bin/cmake") # path
+set(source_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaSiftH.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaSiftH.cu.o.depend") # path
+set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
+set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
+set(build_cubin OFF) # bool
+set(CUDA_HOST_COMPILER "/usr/bin/cc") # path
+# We won't actually use these variables for now, but we need to set this, in
+# order to force this file to be run again if it changes.
+set(generated_file_path "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.") # path
+set(generated_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o.cubin.txt") # path
+
+set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
+set(CUDA_NVCC_FLAGS  ;; -arch=sm_80) # list
+# Build specific configuration flags
+set(CUDA_NVCC_FLAGS_DEBUG  ; )
+set(CUDA_NVCC_FLAGS_MINSIZEREL  ; )
+set(CUDA_NVCC_FLAGS_RELEASE  ; )
+set(CUDA_NVCC_FLAGS_RELWITHDEBINFO  ; )
+set(nvcc_flags -m64) # list
+set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA;/usr/local/cuda/include;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4]==]) # list (needs to be in lua quotes to address backslashes)
+string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}")
+set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ).
+set(format_flag "-dc") # string
+set(cuda_language_flag ) # list
+
+# Clean up list of include directories and add -I flags
+list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS)
+set(CUDA_NVCC_INCLUDE_ARGS)
+foreach(dir ${CUDA_NVCC_INCLUDE_DIRS})
+  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
+  list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}")
+endforeach()
+
+# Clean up list of compile definitions, add -D flags, and append to nvcc_flags
+list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS)
+foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS})
+  list(APPEND nvcc_flags "-D${def}")
+endforeach()
+
+if(build_cubin AND NOT generated_cubin_file)
+  message(FATAL_ERROR "You must specify generated_cubin_file on the command line")
+endif()
+
+# This is the list of host compilation flags.  It C or CXX should already have
+# been chosen by FindCUDA.cmake.
+set(CMAKE_HOST_FLAGS  -O3   -msse2  )
+set(CMAKE_HOST_FLAGS_DEBUG -g)
+set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG)
+set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG)
+set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG)
+
+# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler
+set(nvcc_host_compiler_flags "")
+# If we weren't given a build_configuration, use Debug.
+if(NOT build_configuration)
+  set(build_configuration Debug)
+endif()
+string(TOUPPER "${build_configuration}" build_configuration)
+#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}")
+foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}})
+  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
+  string(APPEND nvcc_host_compiler_flags ",\"${flag}\"")
+endforeach()
+if (nvcc_host_compiler_flags)
+  set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags})
+endif()
+#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"")
+# Add the build specific configuration flags
+list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}})
+
+# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority
+list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 )
+list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 )
+if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
+  if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN)
+    set(CCBIN -ccbin "${CCBIN}")
+  else()
+    set(CCBIN -ccbin "${CUDA_HOST_COMPILER}")
+  endif()
+endif()
+
+# cuda_execute_process - Executes a command with optional command echo and status message.
+#
+#   status  - Status message to print if verbose is true
+#   command - COMMAND argument from the usual execute_process argument structure
+#   ARGN    - Remaining arguments are the command with arguments
+#
+#   CUDA_result - return value from running the command
+#
+# Make this a macro instead of a function, so that things like RESULT_VARIABLE
+# and other return variables are present after executing the process.
+macro(cuda_execute_process status command)
+  set(_command ${command})
+  if(NOT "x${_command}" STREQUAL "xCOMMAND")
+    message(FATAL_ERROR "Malformed call to cuda_execute_process.  Missing COMMAND as second argument. (command = ${command})")
+  endif()
+  if(verbose)
+    execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
+    # Now we need to build up our command string.  We are accounting for quotes
+    # and spaces, anything else is left up to the user to fix if they want to
+    # copy and paste a runnable command line.
+    set(cuda_execute_process_string)
+    foreach(arg ${ARGN})
+      # If there are quotes, escape them, so they come through.
+      string(REPLACE "\"" "\\\"" arg ${arg})
+      # Args with spaces need quotes around them to get them to be parsed as a single argument.
+      if(arg MATCHES " ")
+        list(APPEND cuda_execute_process_string "\"${arg}\"")
+      else()
+        list(APPEND cuda_execute_process_string ${arg})
+      endif()
+    endforeach()
+    # Echo the command
+    execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
+  endif()
+  # Run the command
+  execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
+endmacro()
+
+# Delete the target file
+cuda_execute_process(
+  "Removing ${generated_file}"
+  COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}"
+  )
+
+# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag
+# for dependency generation and hope for the best.
+set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
+set(CUDA_VERSION 12.2)
+if(CUDA_VERSION VERSION_LESS "3.0")
+  # Note that this will remove all occurrences of -G.
+  list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G")
+endif()
+
+# nvcc doesn't define __CUDACC__ for some reason when generating dependency files.  This
+# can cause incorrect dependencies when #including files based on this macro which is
+# defined in the generating passes of nvcc invocation.  We will go ahead and manually
+# define this for now until a future version fixes this bug.
+set(CUDACC_DEFINE -D__CUDACC__)
+
+# Generate the dependency file
+cuda_execute_process(
+  "Generating dependency file: ${NVCC_generated_dependency_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  -M
+  ${CUDACC_DEFINE}
+  "${source_file}"
+  -o "${NVCC_generated_dependency_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${depends_CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the cmake readable dependency file to a temp file.  Don't put the
+# quotes just around the filenames for the input_file and output_file variables.
+# CMake will pass the quotes through and not be able to find the file.
+cuda_execute_process(
+  "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp"
+  COMMAND "${CMAKE_COMMAND}"
+  -D "input_file:FILEPATH=${NVCC_generated_dependency_file}"
+  -D "output_file:FILEPATH=${cmake_dependency_file}.tmp"
+  -D "verbose=${verbose}"
+  -P "${CUDA_make2cmake}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Copy the file if it is different
+cuda_execute_process(
+  "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Delete the temporary file
+cuda_execute_process(
+  "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E rm -f "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the code
+cuda_execute_process(
+  "Generating ${generated_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  "${source_file}"
+  ${cuda_language_flag}
+  ${format_flag} -o "${generated_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  # Since nvcc can sometimes leave half done files make sure that we delete the output file.
+  cuda_execute_process(
+    "Removing ${generated_file}"
+    COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}"
+    )
+  message(FATAL_ERROR "Error generating file ${generated_file}")
+else()
+  if(verbose)
+    message("Generated ${generated_file} successfully.")
+  endif()
+endif()
+
+# Cubin resource report commands.
+if( build_cubin )
+  # Run with -cubin to produce resource usage report.
+  cuda_execute_process(
+    "Generating ${generated_cubin_file}"
+    COMMAND "${CUDA_NVCC_EXECUTABLE}"
+    "${source_file}"
+    ${CUDA_NVCC_FLAGS}
+    ${nvcc_flags}
+    ${CCBIN}
+    ${nvcc_host_compiler_flags}
+    -DNVCC
+    -cubin
+    -o "${generated_cubin_file}"
+    ${CUDA_NVCC_INCLUDE_ARGS}
+    )
+
+  # Execute the parser script.
+  cuda_execute_process(
+    "Executing the parser script"
+    COMMAND  "${CMAKE_COMMAND}"
+    -D "input_file:STRING=${generated_cubin_file}"
+    -P "${CUDA_parse_cubin}"
+    )
+
+endif()
+
+cmake_policy(POP)
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake.pre-gen
new file mode 100644
index 000000000..a7f671a8f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake.pre-gen
@@ -0,0 +1,314 @@
+#  James Bigler, NVIDIA Corp (nvidia.com - jbigler)
+#
+#  Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
+#
+#  This code is licensed under the MIT License.  See the FindCUDA.cmake script
+#  for the text of the license.
+
+# The MIT License
+#
+# License for the specific language governing rights and limitations under
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+##########################################################################
+# This file runs the nvcc commands to produce the desired output file along with
+# the dependency file needed by CMake to compute dependencies.  In addition the
+# file checks the output of each command and if the command fails it deletes the
+# output files.
+
+# Input variables
+#
+# verbose:BOOL=<>          OFF: Be as quiet as possible (default)
+#                          ON : Describe each step
+#
+# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or
+#                               RelWithDebInfo, but it should match one of the
+#                               entries in CUDA_HOST_FLAGS. This is the build
+#                               configuration used when compiling the code.  If
+#                               blank or unspecified Debug is assumed as this is
+#                               what CMake does.
+#
+# generated_file:STRING=<> File to generate.  This argument must be passed in.
+#
+# generated_cubin_file:STRING=<> File to generate.  This argument must be passed
+#                                                   in if build_cubin is true.
+
+cmake_policy(PUSH)
+cmake_policy(SET CMP0007 NEW)
+if(NOT generated_file)
+  message(FATAL_ERROR "You must specify generated_file on the command line")
+endif()
+
+# Set these up as variables to make reading the generated file easier
+set(CMAKE_COMMAND "/usr/bin/cmake") # path
+set(source_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaSiftH.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaSiftH.cu.o.depend") # path
+set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
+set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
+set(build_cubin OFF) # bool
+set(CUDA_HOST_COMPILER "/usr/bin/cc") # path
+# We won't actually use these variables for now, but we need to set this, in
+# order to force this file to be run again if it changes.
+set(generated_file_path "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.") # path
+set(generated_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o.cubin.txt") # path
+
+set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
+set(CUDA_NVCC_FLAGS  ;; -arch=sm_80) # list
+# Build specific configuration flags
+set(CUDA_NVCC_FLAGS_DEBUG  ; )
+set(CUDA_NVCC_FLAGS_MINSIZEREL  ; )
+set(CUDA_NVCC_FLAGS_RELEASE  ; )
+set(CUDA_NVCC_FLAGS_RELWITHDEBINFO  ; )
+set(nvcc_flags -m64) # list
+set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;$<TARGET_PROPERTY:cudasift,INCLUDE_DIRECTORIES>]==]) # list (needs to be in lua quotes to address backslashes)
+string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}")
+set(CUDA_NVCC_COMPILE_DEFINITIONS [==[$<TARGET_PROPERTY:cudasift,COMPILE_DEFINITIONS>]==]) # list (needs to be in lua quotes see #16510 ).
+set(format_flag "-dc") # string
+set(cuda_language_flag ) # list
+
+# Clean up list of include directories and add -I flags
+list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS)
+set(CUDA_NVCC_INCLUDE_ARGS)
+foreach(dir ${CUDA_NVCC_INCLUDE_DIRS})
+  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
+  list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}")
+endforeach()
+
+# Clean up list of compile definitions, add -D flags, and append to nvcc_flags
+list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS)
+foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS})
+  list(APPEND nvcc_flags "-D${def}")
+endforeach()
+
+if(build_cubin AND NOT generated_cubin_file)
+  message(FATAL_ERROR "You must specify generated_cubin_file on the command line")
+endif()
+
+# This is the list of host compilation flags.  It C or CXX should already have
+# been chosen by FindCUDA.cmake.
+set(CMAKE_HOST_FLAGS  -O3   -msse2  )
+set(CMAKE_HOST_FLAGS_DEBUG -g)
+set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG)
+set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG)
+set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG)
+
+# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler
+set(nvcc_host_compiler_flags "")
+# If we weren't given a build_configuration, use Debug.
+if(NOT build_configuration)
+  set(build_configuration Debug)
+endif()
+string(TOUPPER "${build_configuration}" build_configuration)
+#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}")
+foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}})
+  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
+  string(APPEND nvcc_host_compiler_flags ",\"${flag}\"")
+endforeach()
+if (nvcc_host_compiler_flags)
+  set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags})
+endif()
+#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"")
+# Add the build specific configuration flags
+list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}})
+
+# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority
+list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 )
+list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 )
+if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
+  if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN)
+    set(CCBIN -ccbin "${CCBIN}")
+  else()
+    set(CCBIN -ccbin "${CUDA_HOST_COMPILER}")
+  endif()
+endif()
+
+# cuda_execute_process - Executes a command with optional command echo and status message.
+#
+#   status  - Status message to print if verbose is true
+#   command - COMMAND argument from the usual execute_process argument structure
+#   ARGN    - Remaining arguments are the command with arguments
+#
+#   CUDA_result - return value from running the command
+#
+# Make this a macro instead of a function, so that things like RESULT_VARIABLE
+# and other return variables are present after executing the process.
+macro(cuda_execute_process status command)
+  set(_command ${command})
+  if(NOT "x${_command}" STREQUAL "xCOMMAND")
+    message(FATAL_ERROR "Malformed call to cuda_execute_process.  Missing COMMAND as second argument. (command = ${command})")
+  endif()
+  if(verbose)
+    execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
+    # Now we need to build up our command string.  We are accounting for quotes
+    # and spaces, anything else is left up to the user to fix if they want to
+    # copy and paste a runnable command line.
+    set(cuda_execute_process_string)
+    foreach(arg ${ARGN})
+      # If there are quotes, escape them, so they come through.
+      string(REPLACE "\"" "\\\"" arg ${arg})
+      # Args with spaces need quotes around them to get them to be parsed as a single argument.
+      if(arg MATCHES " ")
+        list(APPEND cuda_execute_process_string "\"${arg}\"")
+      else()
+        list(APPEND cuda_execute_process_string ${arg})
+      endif()
+    endforeach()
+    # Echo the command
+    execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
+  endif()
+  # Run the command
+  execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
+endmacro()
+
+# Delete the target file
+cuda_execute_process(
+  "Removing ${generated_file}"
+  COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}"
+  )
+
+# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag
+# for dependency generation and hope for the best.
+set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
+set(CUDA_VERSION 12.2)
+if(CUDA_VERSION VERSION_LESS "3.0")
+  # Note that this will remove all occurrences of -G.
+  list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G")
+endif()
+
+# nvcc doesn't define __CUDACC__ for some reason when generating dependency files.  This
+# can cause incorrect dependencies when #including files based on this macro which is
+# defined in the generating passes of nvcc invocation.  We will go ahead and manually
+# define this for now until a future version fixes this bug.
+set(CUDACC_DEFINE -D__CUDACC__)
+
+# Generate the dependency file
+cuda_execute_process(
+  "Generating dependency file: ${NVCC_generated_dependency_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  -M
+  ${CUDACC_DEFINE}
+  "${source_file}"
+  -o "${NVCC_generated_dependency_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${depends_CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the cmake readable dependency file to a temp file.  Don't put the
+# quotes just around the filenames for the input_file and output_file variables.
+# CMake will pass the quotes through and not be able to find the file.
+cuda_execute_process(
+  "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp"
+  COMMAND "${CMAKE_COMMAND}"
+  -D "input_file:FILEPATH=${NVCC_generated_dependency_file}"
+  -D "output_file:FILEPATH=${cmake_dependency_file}.tmp"
+  -D "verbose=${verbose}"
+  -P "${CUDA_make2cmake}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Copy the file if it is different
+cuda_execute_process(
+  "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Delete the temporary file
+cuda_execute_process(
+  "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E rm -f "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the code
+cuda_execute_process(
+  "Generating ${generated_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  "${source_file}"
+  ${cuda_language_flag}
+  ${format_flag} -o "${generated_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  # Since nvcc can sometimes leave half done files make sure that we delete the output file.
+  cuda_execute_process(
+    "Removing ${generated_file}"
+    COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}"
+    )
+  message(FATAL_ERROR "Error generating file ${generated_file}")
+else()
+  if(verbose)
+    message("Generated ${generated_file} successfully.")
+  endif()
+endif()
+
+# Cubin resource report commands.
+if( build_cubin )
+  # Run with -cubin to produce resource usage report.
+  cuda_execute_process(
+    "Generating ${generated_cubin_file}"
+    COMMAND "${CUDA_NVCC_EXECUTABLE}"
+    "${source_file}"
+    ${CUDA_NVCC_FLAGS}
+    ${nvcc_flags}
+    ${CCBIN}
+    ${nvcc_host_compiler_flags}
+    -DNVCC
+    -cubin
+    -o "${generated_cubin_file}"
+    ${CUDA_NVCC_INCLUDE_ARGS}
+    )
+
+  # Execute the parser script.
+  cuda_execute_process(
+    "Executing the parser script"
+    COMMAND  "${CMAKE_COMMAND}"
+    -D "input_file:STRING=${generated_cubin_file}"
+    -P "${CUDA_parse_cubin}"
+    )
+
+endif()
+
+cmake_policy(POP)
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.depend b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.depend
new file mode 100644
index 000000000..3cb252554
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.depend
@@ -0,0 +1,4 @@
+# Generated by: make2cmake.cmake
+SET(CUDA_NVCC_DEPEND
+ )
+
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o
new file mode 100644
index 000000000..34b81727b
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake
new file mode 100644
index 000000000..c763b5c39
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake
@@ -0,0 +1,314 @@
+#  James Bigler, NVIDIA Corp (nvidia.com - jbigler)
+#
+#  Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
+#
+#  This code is licensed under the MIT License.  See the FindCUDA.cmake script
+#  for the text of the license.
+
+# The MIT License
+#
+# License for the specific language governing rights and limitations under
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+##########################################################################
+# This file runs the nvcc commands to produce the desired output file along with
+# the dependency file needed by CMake to compute dependencies.  In addition the
+# file checks the output of each command and if the command fails it deletes the
+# output files.
+
+# Input variables
+#
+# verbose:BOOL=<>          OFF: Be as quiet as possible (default)
+#                          ON : Describe each step
+#
+# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or
+#                               RelWithDebInfo, but it should match one of the
+#                               entries in CUDA_HOST_FLAGS. This is the build
+#                               configuration used when compiling the code.  If
+#                               blank or unspecified Debug is assumed as this is
+#                               what CMake does.
+#
+# generated_file:STRING=<> File to generate.  This argument must be passed in.
+#
+# generated_cubin_file:STRING=<> File to generate.  This argument must be passed
+#                                                   in if build_cubin is true.
+
+cmake_policy(PUSH)
+cmake_policy(SET CMP0007 NEW)
+if(NOT generated_file)
+  message(FATAL_ERROR "You must specify generated_file on the command line")
+endif()
+
+# Set these up as variables to make reading the generated file easier
+set(CMAKE_COMMAND "/usr/bin/cmake") # path
+set(source_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_matching.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_matching.cu.o.depend") # path
+set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
+set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
+set(build_cubin OFF) # bool
+set(CUDA_HOST_COMPILER "/usr/bin/cc") # path
+# We won't actually use these variables for now, but we need to set this, in
+# order to force this file to be run again if it changes.
+set(generated_file_path "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.") # path
+set(generated_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o.cubin.txt") # path
+
+set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
+set(CUDA_NVCC_FLAGS  ;; -arch=sm_80) # list
+# Build specific configuration flags
+set(CUDA_NVCC_FLAGS_DEBUG  ; )
+set(CUDA_NVCC_FLAGS_MINSIZEREL  ; )
+set(CUDA_NVCC_FLAGS_RELEASE  ; )
+set(CUDA_NVCC_FLAGS_RELWITHDEBINFO  ; )
+set(nvcc_flags -m64) # list
+set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA;/usr/local/cuda/include;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4]==]) # list (needs to be in lua quotes to address backslashes)
+string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}")
+set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ).
+set(format_flag "-dc") # string
+set(cuda_language_flag ) # list
+
+# Clean up list of include directories and add -I flags
+list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS)
+set(CUDA_NVCC_INCLUDE_ARGS)
+foreach(dir ${CUDA_NVCC_INCLUDE_DIRS})
+  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
+  list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}")
+endforeach()
+
+# Clean up list of compile definitions, add -D flags, and append to nvcc_flags
+list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS)
+foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS})
+  list(APPEND nvcc_flags "-D${def}")
+endforeach()
+
+if(build_cubin AND NOT generated_cubin_file)
+  message(FATAL_ERROR "You must specify generated_cubin_file on the command line")
+endif()
+
+# This is the list of host compilation flags.  It C or CXX should already have
+# been chosen by FindCUDA.cmake.
+set(CMAKE_HOST_FLAGS  -O3   -msse2  )
+set(CMAKE_HOST_FLAGS_DEBUG -g)
+set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG)
+set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG)
+set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG)
+
+# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler
+set(nvcc_host_compiler_flags "")
+# If we weren't given a build_configuration, use Debug.
+if(NOT build_configuration)
+  set(build_configuration Debug)
+endif()
+string(TOUPPER "${build_configuration}" build_configuration)
+#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}")
+foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}})
+  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
+  string(APPEND nvcc_host_compiler_flags ",\"${flag}\"")
+endforeach()
+if (nvcc_host_compiler_flags)
+  set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags})
+endif()
+#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"")
+# Add the build specific configuration flags
+list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}})
+
+# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority
+list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 )
+list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 )
+if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
+  if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN)
+    set(CCBIN -ccbin "${CCBIN}")
+  else()
+    set(CCBIN -ccbin "${CUDA_HOST_COMPILER}")
+  endif()
+endif()
+
+# cuda_execute_process - Executes a command with optional command echo and status message.
+#
+#   status  - Status message to print if verbose is true
+#   command - COMMAND argument from the usual execute_process argument structure
+#   ARGN    - Remaining arguments are the command with arguments
+#
+#   CUDA_result - return value from running the command
+#
+# Make this a macro instead of a function, so that things like RESULT_VARIABLE
+# and other return variables are present after executing the process.
+macro(cuda_execute_process status command)
+  set(_command ${command})
+  if(NOT "x${_command}" STREQUAL "xCOMMAND")
+    message(FATAL_ERROR "Malformed call to cuda_execute_process.  Missing COMMAND as second argument. (command = ${command})")
+  endif()
+  if(verbose)
+    execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
+    # Now we need to build up our command string.  We are accounting for quotes
+    # and spaces, anything else is left up to the user to fix if they want to
+    # copy and paste a runnable command line.
+    set(cuda_execute_process_string)
+    foreach(arg ${ARGN})
+      # If there are quotes, escape them, so they come through.
+      string(REPLACE "\"" "\\\"" arg ${arg})
+      # Args with spaces need quotes around them to get them to be parsed as a single argument.
+      if(arg MATCHES " ")
+        list(APPEND cuda_execute_process_string "\"${arg}\"")
+      else()
+        list(APPEND cuda_execute_process_string ${arg})
+      endif()
+    endforeach()
+    # Echo the command
+    execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
+  endif()
+  # Run the command
+  execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
+endmacro()
+
+# Delete the target file
+cuda_execute_process(
+  "Removing ${generated_file}"
+  COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}"
+  )
+
+# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag
+# for dependency generation and hope for the best.
+set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
+set(CUDA_VERSION 12.2)
+if(CUDA_VERSION VERSION_LESS "3.0")
+  # Note that this will remove all occurrences of -G.
+  list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G")
+endif()
+
+# nvcc doesn't define __CUDACC__ for some reason when generating dependency files.  This
+# can cause incorrect dependencies when #including files based on this macro which is
+# defined in the generating passes of nvcc invocation.  We will go ahead and manually
+# define this for now until a future version fixes this bug.
+set(CUDACC_DEFINE -D__CUDACC__)
+
+# Generate the dependency file
+cuda_execute_process(
+  "Generating dependency file: ${NVCC_generated_dependency_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  -M
+  ${CUDACC_DEFINE}
+  "${source_file}"
+  -o "${NVCC_generated_dependency_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${depends_CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the cmake readable dependency file to a temp file.  Don't put the
+# quotes just around the filenames for the input_file and output_file variables.
+# CMake will pass the quotes through and not be able to find the file.
+cuda_execute_process(
+  "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp"
+  COMMAND "${CMAKE_COMMAND}"
+  -D "input_file:FILEPATH=${NVCC_generated_dependency_file}"
+  -D "output_file:FILEPATH=${cmake_dependency_file}.tmp"
+  -D "verbose=${verbose}"
+  -P "${CUDA_make2cmake}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Copy the file if it is different
+cuda_execute_process(
+  "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Delete the temporary file
+cuda_execute_process(
+  "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E rm -f "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the code
+cuda_execute_process(
+  "Generating ${generated_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  "${source_file}"
+  ${cuda_language_flag}
+  ${format_flag} -o "${generated_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  # Since nvcc can sometimes leave half done files make sure that we delete the output file.
+  cuda_execute_process(
+    "Removing ${generated_file}"
+    COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}"
+    )
+  message(FATAL_ERROR "Error generating file ${generated_file}")
+else()
+  if(verbose)
+    message("Generated ${generated_file} successfully.")
+  endif()
+endif()
+
+# Cubin resource report commands.
+if( build_cubin )
+  # Run with -cubin to produce resource usage report.
+  cuda_execute_process(
+    "Generating ${generated_cubin_file}"
+    COMMAND "${CUDA_NVCC_EXECUTABLE}"
+    "${source_file}"
+    ${CUDA_NVCC_FLAGS}
+    ${nvcc_flags}
+    ${CCBIN}
+    ${nvcc_host_compiler_flags}
+    -DNVCC
+    -cubin
+    -o "${generated_cubin_file}"
+    ${CUDA_NVCC_INCLUDE_ARGS}
+    )
+
+  # Execute the parser script.
+  cuda_execute_process(
+    "Executing the parser script"
+    COMMAND  "${CMAKE_COMMAND}"
+    -D "input_file:STRING=${generated_cubin_file}"
+    -P "${CUDA_parse_cubin}"
+    )
+
+endif()
+
+cmake_policy(POP)
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake.pre-gen
new file mode 100644
index 000000000..13ac4f235
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake.pre-gen
@@ -0,0 +1,314 @@
+#  James Bigler, NVIDIA Corp (nvidia.com - jbigler)
+#
+#  Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
+#
+#  This code is licensed under the MIT License.  See the FindCUDA.cmake script
+#  for the text of the license.
+
+# The MIT License
+#
+# License for the specific language governing rights and limitations under
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+##########################################################################
+# This file runs the nvcc commands to produce the desired output file along with
+# the dependency file needed by CMake to compute dependencies.  In addition the
+# file checks the output of each command and if the command fails it deletes the
+# output files.
+
+# Input variables
+#
+# verbose:BOOL=<>          OFF: Be as quiet as possible (default)
+#                          ON : Describe each step
+#
+# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or
+#                               RelWithDebInfo, but it should match one of the
+#                               entries in CUDA_HOST_FLAGS. This is the build
+#                               configuration used when compiling the code.  If
+#                               blank or unspecified Debug is assumed as this is
+#                               what CMake does.
+#
+# generated_file:STRING=<> File to generate.  This argument must be passed in.
+#
+# generated_cubin_file:STRING=<> File to generate.  This argument must be passed
+#                                                   in if build_cubin is true.
+
+cmake_policy(PUSH)
+cmake_policy(SET CMP0007 NEW)
+if(NOT generated_file)
+  message(FATAL_ERROR "You must specify generated_file on the command line")
+endif()
+
+# Set these up as variables to make reading the generated file easier
+set(CMAKE_COMMAND "/usr/bin/cmake") # path
+set(source_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu") # path
+set(NVCC_generated_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_matching.cu.o.NVCC-depend") # path
+set(cmake_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_matching.cu.o.depend") # path
+set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path
+set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path
+set(build_cubin OFF) # bool
+set(CUDA_HOST_COMPILER "/usr/bin/cc") # path
+# We won't actually use these variables for now, but we need to set this, in
+# order to force this file to be run again if it changes.
+set(generated_file_path "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.") # path
+set(generated_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o") # path
+set(generated_cubin_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o.cubin.txt") # path
+
+set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path
+set(CUDA_NVCC_FLAGS  ;; -arch=sm_80) # list
+# Build specific configuration flags
+set(CUDA_NVCC_FLAGS_DEBUG  ; )
+set(CUDA_NVCC_FLAGS_MINSIZEREL  ; )
+set(CUDA_NVCC_FLAGS_RELEASE  ; )
+set(CUDA_NVCC_FLAGS_RELWITHDEBINFO  ; )
+set(nvcc_flags -m64) # list
+set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;$<TARGET_PROPERTY:cudasift,INCLUDE_DIRECTORIES>]==]) # list (needs to be in lua quotes to address backslashes)
+string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}")
+set(CUDA_NVCC_COMPILE_DEFINITIONS [==[$<TARGET_PROPERTY:cudasift,COMPILE_DEFINITIONS>]==]) # list (needs to be in lua quotes see #16510 ).
+set(format_flag "-dc") # string
+set(cuda_language_flag ) # list
+
+# Clean up list of include directories and add -I flags
+list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS)
+set(CUDA_NVCC_INCLUDE_ARGS)
+foreach(dir ${CUDA_NVCC_INCLUDE_DIRS})
+  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
+  list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}")
+endforeach()
+
+# Clean up list of compile definitions, add -D flags, and append to nvcc_flags
+list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS)
+foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS})
+  list(APPEND nvcc_flags "-D${def}")
+endforeach()
+
+if(build_cubin AND NOT generated_cubin_file)
+  message(FATAL_ERROR "You must specify generated_cubin_file on the command line")
+endif()
+
+# This is the list of host compilation flags.  It C or CXX should already have
+# been chosen by FindCUDA.cmake.
+set(CMAKE_HOST_FLAGS  -O3   -msse2  )
+set(CMAKE_HOST_FLAGS_DEBUG -g)
+set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG)
+set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG)
+set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG)
+
+# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler
+set(nvcc_host_compiler_flags "")
+# If we weren't given a build_configuration, use Debug.
+if(NOT build_configuration)
+  set(build_configuration Debug)
+endif()
+string(TOUPPER "${build_configuration}" build_configuration)
+#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}")
+foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}})
+  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
+  string(APPEND nvcc_host_compiler_flags ",\"${flag}\"")
+endforeach()
+if (nvcc_host_compiler_flags)
+  set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags})
+endif()
+#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"")
+# Add the build specific configuration flags
+list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}})
+
+# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority
+list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 )
+list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 )
+if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
+  if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN)
+    set(CCBIN -ccbin "${CCBIN}")
+  else()
+    set(CCBIN -ccbin "${CUDA_HOST_COMPILER}")
+  endif()
+endif()
+
+# cuda_execute_process - Executes a command with optional command echo and status message.
+#
+#   status  - Status message to print if verbose is true
+#   command - COMMAND argument from the usual execute_process argument structure
+#   ARGN    - Remaining arguments are the command with arguments
+#
+#   CUDA_result - return value from running the command
+#
+# Make this a macro instead of a function, so that things like RESULT_VARIABLE
+# and other return variables are present after executing the process.
+macro(cuda_execute_process status command)
+  set(_command ${command})
+  if(NOT "x${_command}" STREQUAL "xCOMMAND")
+    message(FATAL_ERROR "Malformed call to cuda_execute_process.  Missing COMMAND as second argument. (command = ${command})")
+  endif()
+  if(verbose)
+    execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
+    # Now we need to build up our command string.  We are accounting for quotes
+    # and spaces, anything else is left up to the user to fix if they want to
+    # copy and paste a runnable command line.
+    set(cuda_execute_process_string)
+    foreach(arg ${ARGN})
+      # If there are quotes, escape them, so they come through.
+      string(REPLACE "\"" "\\\"" arg ${arg})
+      # Args with spaces need quotes around them to get them to be parsed as a single argument.
+      if(arg MATCHES " ")
+        list(APPEND cuda_execute_process_string "\"${arg}\"")
+      else()
+        list(APPEND cuda_execute_process_string ${arg})
+      endif()
+    endforeach()
+    # Echo the command
+    execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
+  endif()
+  # Run the command
+  execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
+endmacro()
+
+# Delete the target file
+cuda_execute_process(
+  "Removing ${generated_file}"
+  COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}"
+  )
+
+# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag
+# for dependency generation and hope for the best.
+set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
+set(CUDA_VERSION 12.2)
+if(CUDA_VERSION VERSION_LESS "3.0")
+  # Note that this will remove all occurrences of -G.
+  list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G")
+endif()
+
+# nvcc doesn't define __CUDACC__ for some reason when generating dependency files.  This
+# can cause incorrect dependencies when #including files based on this macro which is
+# defined in the generating passes of nvcc invocation.  We will go ahead and manually
+# define this for now until a future version fixes this bug.
+set(CUDACC_DEFINE -D__CUDACC__)
+
+# Generate the dependency file
+cuda_execute_process(
+  "Generating dependency file: ${NVCC_generated_dependency_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  -M
+  ${CUDACC_DEFINE}
+  "${source_file}"
+  -o "${NVCC_generated_dependency_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${depends_CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the cmake readable dependency file to a temp file.  Don't put the
+# quotes just around the filenames for the input_file and output_file variables.
+# CMake will pass the quotes through and not be able to find the file.
+cuda_execute_process(
+  "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp"
+  COMMAND "${CMAKE_COMMAND}"
+  -D "input_file:FILEPATH=${NVCC_generated_dependency_file}"
+  -D "output_file:FILEPATH=${cmake_dependency_file}.tmp"
+  -D "verbose=${verbose}"
+  -P "${CUDA_make2cmake}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Copy the file if it is different
+cuda_execute_process(
+  "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Delete the temporary file
+cuda_execute_process(
+  "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}"
+  COMMAND "${CMAKE_COMMAND}" -E rm -f "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}"
+  )
+
+if(CUDA_result)
+  message(FATAL_ERROR "Error generating ${generated_file}")
+endif()
+
+# Generate the code
+cuda_execute_process(
+  "Generating ${generated_file}"
+  COMMAND "${CUDA_NVCC_EXECUTABLE}"
+  "${source_file}"
+  ${cuda_language_flag}
+  ${format_flag} -o "${generated_file}"
+  ${CCBIN}
+  ${nvcc_flags}
+  ${nvcc_host_compiler_flags}
+  ${CUDA_NVCC_FLAGS}
+  -DNVCC
+  ${CUDA_NVCC_INCLUDE_ARGS}
+  )
+
+if(CUDA_result)
+  # Since nvcc can sometimes leave half done files make sure that we delete the output file.
+  cuda_execute_process(
+    "Removing ${generated_file}"
+    COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}"
+    )
+  message(FATAL_ERROR "Error generating file ${generated_file}")
+else()
+  if(verbose)
+    message("Generated ${generated_file} successfully.")
+  endif()
+endif()
+
+# Cubin resource report commands.
+if( build_cubin )
+  # Run with -cubin to produce resource usage report.
+  cuda_execute_process(
+    "Generating ${generated_cubin_file}"
+    COMMAND "${CUDA_NVCC_EXECUTABLE}"
+    "${source_file}"
+    ${CUDA_NVCC_FLAGS}
+    ${nvcc_flags}
+    ${CCBIN}
+    ${nvcc_host_compiler_flags}
+    -DNVCC
+    -cubin
+    -o "${generated_cubin_file}"
+    ${CUDA_NVCC_INCLUDE_ARGS}
+    )
+
+  # Execute the parser script.
+  cuda_execute_process(
+    "Executing the parser script"
+    COMMAND  "${CMAKE_COMMAND}"
+    -D "input_file:STRING=${generated_cubin_file}"
+    -P "${CUDA_parse_cubin}"
+    )
+
+endif()
+
+cmake_policy(POP)
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.depend b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.depend
new file mode 100644
index 000000000..3cb252554
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.depend
@@ -0,0 +1,4 @@
+# Generated by: make2cmake.cmake
+SET(CUDA_NVCC_DEPEND
+ )
+
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_intermediate_link.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_intermediate_link.o
new file mode 100644
index 000000000..34b81727b
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_intermediate_link.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/depend.make b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/depend.make
new file mode 100644
index 000000000..175fcce75
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/depend.make
@@ -0,0 +1,2 @@
+# Empty dependencies file for cudasift.
+# This may be replaced when dependencies are built.
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/flags.make b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/flags.make
new file mode 100644
index 000000000..966c980d0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/flags.make
@@ -0,0 +1,10 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.22
+
+# compile CXX with /usr/bin/c++
+CXX_DEFINES = 
+
+CXX_INCLUDES = -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/local/cuda/include -isystem /usr/include/opencv4
+
+CXX_FLAGS =  -O3   -msse2  -std=gnu++17
+
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/geomFuncs.cpp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/geomFuncs.cpp.o
new file mode 100644
index 000000000..8381a70ff
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/geomFuncs.cpp.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/geomFuncs.cpp.o.d b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/geomFuncs.cpp.o.d
new file mode 100644
index 000000000..0f547060b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/geomFuncs.cpp.o.d
@@ -0,0 +1,267 @@
+CMakeFiles/cudasift.dir/geomFuncs.cpp.o: \
+ /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp \
+ /usr/include/stdc-predef.h /usr/include/c++/11/iostream \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/c++config.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/os_defines.h \
+ /usr/include/features.h /usr/include/features-time64.h \
+ /usr/include/x86_64-linux-gnu/bits/wordsize.h \
+ /usr/include/x86_64-linux-gnu/bits/timesize.h \
+ /usr/include/x86_64-linux-gnu/sys/cdefs.h \
+ /usr/include/x86_64-linux-gnu/bits/long-double.h \
+ /usr/include/x86_64-linux-gnu/gnu/stubs.h \
+ /usr/include/x86_64-linux-gnu/gnu/stubs-64.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/cpu_defines.h \
+ /usr/include/c++/11/pstl/pstl_config.h /usr/include/c++/11/ostream \
+ /usr/include/c++/11/ios /usr/include/c++/11/iosfwd \
+ /usr/include/c++/11/bits/stringfwd.h \
+ /usr/include/c++/11/bits/memoryfwd.h /usr/include/c++/11/bits/postypes.h \
+ /usr/include/c++/11/cwchar /usr/include/wchar.h \
+ /usr/include/x86_64-linux-gnu/bits/libc-header-start.h \
+ /usr/include/x86_64-linux-gnu/bits/floatn.h \
+ /usr/include/x86_64-linux-gnu/bits/floatn-common.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/stddef.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/stdarg.h \
+ /usr/include/x86_64-linux-gnu/bits/wchar.h \
+ /usr/include/x86_64-linux-gnu/bits/types/wint_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/mbstate_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__mbstate_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__FILE.h \
+ /usr/include/x86_64-linux-gnu/bits/types/FILE.h \
+ /usr/include/x86_64-linux-gnu/bits/types/locale_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__locale_t.h \
+ /usr/include/x86_64-linux-gnu/bits/wchar2.h \
+ /usr/include/c++/11/exception /usr/include/c++/11/bits/exception.h \
+ /usr/include/c++/11/bits/exception_ptr.h \
+ /usr/include/c++/11/bits/exception_defines.h \
+ /usr/include/c++/11/bits/cxxabi_init_exception.h \
+ /usr/include/c++/11/typeinfo /usr/include/c++/11/bits/hash_bytes.h \
+ /usr/include/c++/11/new /usr/include/c++/11/bits/move.h \
+ /usr/include/c++/11/type_traits \
+ /usr/include/c++/11/bits/nested_exception.h \
+ /usr/include/c++/11/bits/char_traits.h \
+ /usr/include/c++/11/bits/stl_algobase.h \
+ /usr/include/c++/11/bits/functexcept.h \
+ /usr/include/c++/11/bits/cpp_type_traits.h \
+ /usr/include/c++/11/ext/type_traits.h \
+ /usr/include/c++/11/ext/numeric_traits.h \
+ /usr/include/c++/11/bits/stl_pair.h \
+ /usr/include/c++/11/bits/stl_iterator_base_types.h \
+ /usr/include/c++/11/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/11/bits/concept_check.h \
+ /usr/include/c++/11/debug/assertions.h \
+ /usr/include/c++/11/bits/stl_iterator.h \
+ /usr/include/c++/11/bits/ptr_traits.h /usr/include/c++/11/debug/debug.h \
+ /usr/include/c++/11/bits/predefined_ops.h /usr/include/c++/11/cstdint \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/stdint.h /usr/include/stdint.h \
+ /usr/include/x86_64-linux-gnu/bits/types.h \
+ /usr/include/x86_64-linux-gnu/bits/typesizes.h \
+ /usr/include/x86_64-linux-gnu/bits/time64.h \
+ /usr/include/x86_64-linux-gnu/bits/stdint-intn.h \
+ /usr/include/x86_64-linux-gnu/bits/stdint-uintn.h \
+ /usr/include/c++/11/bits/localefwd.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/c++locale.h \
+ /usr/include/c++/11/clocale /usr/include/locale.h \
+ /usr/include/x86_64-linux-gnu/bits/locale.h /usr/include/c++/11/cctype \
+ /usr/include/ctype.h /usr/include/x86_64-linux-gnu/bits/endian.h \
+ /usr/include/x86_64-linux-gnu/bits/endianness.h \
+ /usr/include/c++/11/bits/ios_base.h /usr/include/c++/11/ext/atomicity.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/gthr.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/gthr-default.h \
+ /usr/include/pthread.h /usr/include/sched.h \
+ /usr/include/x86_64-linux-gnu/bits/types/time_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_timespec.h \
+ /usr/include/x86_64-linux-gnu/bits/sched.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_sched_param.h \
+ /usr/include/x86_64-linux-gnu/bits/cpu-set.h /usr/include/time.h \
+ /usr/include/x86_64-linux-gnu/bits/time.h \
+ /usr/include/x86_64-linux-gnu/bits/timex.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_timeval.h \
+ /usr/include/x86_64-linux-gnu/bits/types/clock_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_tm.h \
+ /usr/include/x86_64-linux-gnu/bits/types/clockid_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/timer_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_itimerspec.h \
+ /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h \
+ /usr/include/x86_64-linux-gnu/bits/thread-shared-types.h \
+ /usr/include/x86_64-linux-gnu/bits/pthreadtypes-arch.h \
+ /usr/include/x86_64-linux-gnu/bits/atomic_wide_counter.h \
+ /usr/include/x86_64-linux-gnu/bits/struct_mutex.h \
+ /usr/include/x86_64-linux-gnu/bits/struct_rwlock.h \
+ /usr/include/x86_64-linux-gnu/bits/setjmp.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__sigset_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct___jmp_buf_tag.h \
+ /usr/include/x86_64-linux-gnu/bits/pthread_stack_min-dynamic.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/atomic_word.h \
+ /usr/include/x86_64-linux-gnu/sys/single_threaded.h \
+ /usr/include/c++/11/bits/locale_classes.h /usr/include/c++/11/string \
+ /usr/include/c++/11/bits/allocator.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/c++allocator.h \
+ /usr/include/c++/11/ext/new_allocator.h \
+ /usr/include/c++/11/bits/ostream_insert.h \
+ /usr/include/c++/11/bits/cxxabi_forced.h \
+ /usr/include/c++/11/bits/stl_function.h \
+ /usr/include/c++/11/backward/binders.h \
+ /usr/include/c++/11/bits/range_access.h \
+ /usr/include/c++/11/initializer_list \
+ /usr/include/c++/11/bits/basic_string.h \
+ /usr/include/c++/11/ext/alloc_traits.h \
+ /usr/include/c++/11/bits/alloc_traits.h \
+ /usr/include/c++/11/bits/stl_construct.h /usr/include/c++/11/string_view \
+ /usr/include/c++/11/bits/functional_hash.h \
+ /usr/include/c++/11/bits/string_view.tcc \
+ /usr/include/c++/11/ext/string_conversions.h /usr/include/c++/11/cstdlib \
+ /usr/include/stdlib.h /usr/include/x86_64-linux-gnu/bits/waitflags.h \
+ /usr/include/x86_64-linux-gnu/bits/waitstatus.h \
+ /usr/include/x86_64-linux-gnu/sys/types.h /usr/include/endian.h \
+ /usr/include/x86_64-linux-gnu/bits/byteswap.h \
+ /usr/include/x86_64-linux-gnu/bits/uintn-identity.h \
+ /usr/include/x86_64-linux-gnu/sys/select.h \
+ /usr/include/x86_64-linux-gnu/bits/select.h \
+ /usr/include/x86_64-linux-gnu/bits/types/sigset_t.h \
+ /usr/include/x86_64-linux-gnu/bits/select2.h /usr/include/alloca.h \
+ /usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h \
+ /usr/include/x86_64-linux-gnu/bits/stdlib-float.h \
+ /usr/include/x86_64-linux-gnu/bits/stdlib.h \
+ /usr/include/c++/11/bits/std_abs.h /usr/include/c++/11/cstdio \
+ /usr/include/stdio.h /usr/include/x86_64-linux-gnu/bits/types/__fpos_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__fpos64_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_FILE.h \
+ /usr/include/x86_64-linux-gnu/bits/types/cookie_io_functions_t.h \
+ /usr/include/x86_64-linux-gnu/bits/stdio_lim.h \
+ /usr/include/x86_64-linux-gnu/bits/stdio.h \
+ /usr/include/x86_64-linux-gnu/bits/stdio2.h /usr/include/c++/11/cerrno \
+ /usr/include/errno.h /usr/include/x86_64-linux-gnu/bits/errno.h \
+ /usr/include/linux/errno.h /usr/include/x86_64-linux-gnu/asm/errno.h \
+ /usr/include/asm-generic/errno.h /usr/include/asm-generic/errno-base.h \
+ /usr/include/x86_64-linux-gnu/bits/types/error_t.h \
+ /usr/include/c++/11/bits/charconv.h \
+ /usr/include/c++/11/bits/basic_string.tcc \
+ /usr/include/c++/11/bits/locale_classes.tcc \
+ /usr/include/c++/11/system_error \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/error_constants.h \
+ /usr/include/c++/11/stdexcept /usr/include/c++/11/streambuf \
+ /usr/include/c++/11/bits/streambuf.tcc \
+ /usr/include/c++/11/bits/basic_ios.h \
+ /usr/include/c++/11/bits/locale_facets.h /usr/include/c++/11/cwctype \
+ /usr/include/wctype.h /usr/include/x86_64-linux-gnu/bits/wctype-wchar.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/ctype_base.h \
+ /usr/include/c++/11/bits/streambuf_iterator.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/ctype_inline.h \
+ /usr/include/c++/11/bits/locale_facets.tcc \
+ /usr/include/c++/11/bits/basic_ios.tcc \
+ /usr/include/c++/11/bits/ostream.tcc /usr/include/c++/11/istream \
+ /usr/include/c++/11/bits/istream.tcc /usr/include/c++/11/cmath \
+ /usr/include/math.h /usr/include/x86_64-linux-gnu/bits/math-vector.h \
+ /usr/include/x86_64-linux-gnu/bits/libm-simd-decl-stubs.h \
+ /usr/include/x86_64-linux-gnu/bits/flt-eval-method.h \
+ /usr/include/x86_64-linux-gnu/bits/fp-logb.h \
+ /usr/include/x86_64-linux-gnu/bits/fp-fast.h \
+ /usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h \
+ /usr/include/x86_64-linux-gnu/bits/mathcalls.h \
+ /usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h \
+ /usr/include/x86_64-linux-gnu/bits/iscanonical.h \
+ /usr/include/c++/11/bits/specfun.h /usr/include/c++/11/limits \
+ /usr/include/c++/11/tr1/gamma.tcc \
+ /usr/include/c++/11/tr1/special_function_util.h \
+ /usr/include/c++/11/tr1/bessel_function.tcc \
+ /usr/include/c++/11/tr1/beta_function.tcc \
+ /usr/include/c++/11/tr1/ell_integral.tcc \
+ /usr/include/c++/11/tr1/exp_integral.tcc \
+ /usr/include/c++/11/tr1/hypergeometric.tcc \
+ /usr/include/c++/11/tr1/legendre_function.tcc \
+ /usr/include/c++/11/tr1/modified_bessel_func.tcc \
+ /usr/include/c++/11/tr1/poly_hermite.tcc \
+ /usr/include/c++/11/tr1/poly_laguerre.tcc \
+ /usr/include/c++/11/tr1/riemann_zeta.tcc \
+ /usr/include/opencv4/opencv2/core/core.hpp \
+ /usr/include/opencv4/opencv2/core.hpp \
+ /usr/include/opencv4/opencv2/core/cvdef.h \
+ /usr/include/opencv4/opencv2/core/version.hpp \
+ /usr/include/opencv4/opencv2/core/hal/interface.h \
+ /usr/include/c++/11/cstddef \
+ /usr/include/opencv4/opencv2/core/cv_cpu_dispatch.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/emmintrin.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/xmmintrin.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/mmintrin.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/mm_malloc.h \
+ /usr/include/c++/11/stdlib.h /usr/include/c++/11/array \
+ /usr/include/c++/11/utility /usr/include/c++/11/bits/stl_relops.h \
+ /usr/include/opencv4/opencv2/core/base.hpp \
+ /usr/include/opencv4/opencv2/opencv_modules.hpp \
+ /usr/include/c++/11/climits \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/limits.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/syslimits.h \
+ /usr/include/limits.h /usr/include/x86_64-linux-gnu/bits/posix1_lim.h \
+ /usr/include/x86_64-linux-gnu/bits/local_lim.h \
+ /usr/include/linux/limits.h \
+ /usr/include/x86_64-linux-gnu/bits/posix2_lim.h \
+ /usr/include/x86_64-linux-gnu/bits/xopen_lim.h \
+ /usr/include/x86_64-linux-gnu/bits/uio_lim.h \
+ /usr/include/c++/11/algorithm /usr/include/c++/11/bits/stl_algo.h \
+ /usr/include/c++/11/bits/algorithmfwd.h \
+ /usr/include/c++/11/bits/stl_heap.h \
+ /usr/include/c++/11/bits/stl_tempbuf.h \
+ /usr/include/c++/11/bits/uniform_int_dist.h \
+ /usr/include/c++/11/pstl/glue_algorithm_defs.h \
+ /usr/include/c++/11/functional /usr/include/c++/11/tuple \
+ /usr/include/c++/11/bits/uses_allocator.h \
+ /usr/include/c++/11/bits/invoke.h /usr/include/c++/11/bits/refwrap.h \
+ /usr/include/c++/11/bits/std_function.h \
+ /usr/include/c++/11/unordered_map \
+ /usr/include/c++/11/ext/aligned_buffer.h \
+ /usr/include/c++/11/bits/hashtable.h \
+ /usr/include/c++/11/bits/hashtable_policy.h \
+ /usr/include/c++/11/bits/enable_special_members.h \
+ /usr/include/c++/11/bits/node_handle.h \
+ /usr/include/c++/11/bits/unordered_map.h \
+ /usr/include/c++/11/bits/erase_if.h /usr/include/c++/11/vector \
+ /usr/include/c++/11/bits/stl_uninitialized.h \
+ /usr/include/c++/11/bits/stl_vector.h \
+ /usr/include/c++/11/bits/stl_bvector.h \
+ /usr/include/c++/11/bits/vector.tcc \
+ /usr/include/c++/11/pstl/execution_defs.h \
+ /usr/include/opencv4/opencv2/core/cvstd.hpp /usr/include/c++/11/cstring \
+ /usr/include/string.h /usr/include/strings.h \
+ /usr/include/x86_64-linux-gnu/bits/strings_fortified.h \
+ /usr/include/x86_64-linux-gnu/bits/string_fortified.h \
+ /usr/include/opencv4/opencv2/core/cvstd_wrapper.hpp \
+ /usr/include/c++/11/memory \
+ /usr/include/c++/11/bits/stl_raw_storage_iter.h \
+ /usr/include/c++/11/bits/align.h /usr/include/c++/11/bit \
+ /usr/include/c++/11/bits/unique_ptr.h \
+ /usr/include/c++/11/bits/shared_ptr.h \
+ /usr/include/c++/11/bits/shared_ptr_base.h \
+ /usr/include/c++/11/bits/allocated_ptr.h \
+ /usr/include/c++/11/ext/concurrence.h \
+ /usr/include/c++/11/bits/shared_ptr_atomic.h \
+ /usr/include/c++/11/bits/atomic_base.h \
+ /usr/include/c++/11/bits/atomic_lockfree_defines.h \
+ /usr/include/c++/11/backward/auto_ptr.h \
+ /usr/include/c++/11/pstl/glue_memory_defs.h \
+ /usr/include/opencv4/opencv2/core/neon_utils.hpp \
+ /usr/include/opencv4/opencv2/core/vsx_utils.hpp /usr/include/assert.h \
+ /usr/include/opencv4/opencv2/core/check.hpp \
+ /usr/include/opencv4/opencv2/core/traits.hpp \
+ /usr/include/opencv4/opencv2/core/matx.hpp \
+ /usr/include/opencv4/opencv2/core/saturate.hpp \
+ /usr/include/opencv4/opencv2/core/fast_math.hpp \
+ /usr/include/opencv4/opencv2/core/types.hpp /usr/include/c++/11/cfloat \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/float.h \
+ /usr/include/opencv4/opencv2/core/mat.hpp \
+ /usr/include/opencv4/opencv2/core/bufferpool.hpp \
+ /usr/include/opencv4/opencv2/core/mat.inl.hpp \
+ /usr/include/opencv4/opencv2/core/persistence.hpp \
+ /usr/include/opencv4/opencv2/core/operations.hpp \
+ /usr/include/opencv4/opencv2/core/cvstd.inl.hpp \
+ /usr/include/c++/11/complex /usr/include/c++/11/sstream \
+ /usr/include/c++/11/bits/sstream.tcc \
+ /usr/include/opencv4/opencv2/core/utility.hpp /usr/include/c++/11/mutex \
+ /usr/include/c++/11/chrono /usr/include/c++/11/ratio \
+ /usr/include/c++/11/ctime /usr/include/c++/11/bits/parse_numbers.h \
+ /usr/include/c++/11/bits/std_mutex.h \
+ /usr/include/c++/11/bits/unique_lock.h \
+ /usr/include/opencv4/opencv2/core/optim.hpp \
+ /usr/include/opencv4/opencv2/core/ovx.hpp \
+ /usr/include/opencv4/opencv2/core/cvdef.h \
+ /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSift.h \
+ /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.h
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/home/chenshe1/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/home/chenshe1/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o
new file mode 100644
index 000000000..220855fcd
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/home/chenshe1/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/home/chenshe1/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o.d b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/home/chenshe1/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o.d
new file mode 100644
index 000000000..e610eadb3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/home/chenshe1/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o.d
@@ -0,0 +1,154 @@
+CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o: \
+ /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp \
+ /usr/include/stdc-predef.h /usr/include/c++/11/iostream \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/c++config.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/os_defines.h \
+ /usr/include/features.h /usr/include/features-time64.h \
+ /usr/include/x86_64-linux-gnu/bits/wordsize.h \
+ /usr/include/x86_64-linux-gnu/bits/timesize.h \
+ /usr/include/x86_64-linux-gnu/sys/cdefs.h \
+ /usr/include/x86_64-linux-gnu/bits/long-double.h \
+ /usr/include/x86_64-linux-gnu/gnu/stubs.h \
+ /usr/include/x86_64-linux-gnu/gnu/stubs-64.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/cpu_defines.h \
+ /usr/include/c++/11/pstl/pstl_config.h /usr/include/c++/11/ostream \
+ /usr/include/c++/11/ios /usr/include/c++/11/iosfwd \
+ /usr/include/c++/11/bits/stringfwd.h \
+ /usr/include/c++/11/bits/memoryfwd.h /usr/include/c++/11/bits/postypes.h \
+ /usr/include/c++/11/cwchar /usr/include/wchar.h \
+ /usr/include/x86_64-linux-gnu/bits/libc-header-start.h \
+ /usr/include/x86_64-linux-gnu/bits/floatn.h \
+ /usr/include/x86_64-linux-gnu/bits/floatn-common.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/stddef.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/stdarg.h \
+ /usr/include/x86_64-linux-gnu/bits/wchar.h \
+ /usr/include/x86_64-linux-gnu/bits/types/wint_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/mbstate_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__mbstate_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__FILE.h \
+ /usr/include/x86_64-linux-gnu/bits/types/FILE.h \
+ /usr/include/x86_64-linux-gnu/bits/types/locale_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__locale_t.h \
+ /usr/include/x86_64-linux-gnu/bits/wchar2.h \
+ /usr/include/c++/11/exception /usr/include/c++/11/bits/exception.h \
+ /usr/include/c++/11/bits/exception_ptr.h \
+ /usr/include/c++/11/bits/exception_defines.h \
+ /usr/include/c++/11/bits/cxxabi_init_exception.h \
+ /usr/include/c++/11/typeinfo /usr/include/c++/11/bits/hash_bytes.h \
+ /usr/include/c++/11/new /usr/include/c++/11/bits/move.h \
+ /usr/include/c++/11/type_traits \
+ /usr/include/c++/11/bits/nested_exception.h \
+ /usr/include/c++/11/bits/char_traits.h \
+ /usr/include/c++/11/bits/stl_algobase.h \
+ /usr/include/c++/11/bits/functexcept.h \
+ /usr/include/c++/11/bits/cpp_type_traits.h \
+ /usr/include/c++/11/ext/type_traits.h \
+ /usr/include/c++/11/ext/numeric_traits.h \
+ /usr/include/c++/11/bits/stl_pair.h \
+ /usr/include/c++/11/bits/stl_iterator_base_types.h \
+ /usr/include/c++/11/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/11/bits/concept_check.h \
+ /usr/include/c++/11/debug/assertions.h \
+ /usr/include/c++/11/bits/stl_iterator.h \
+ /usr/include/c++/11/bits/ptr_traits.h /usr/include/c++/11/debug/debug.h \
+ /usr/include/c++/11/bits/predefined_ops.h /usr/include/c++/11/cstdint \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/stdint.h /usr/include/stdint.h \
+ /usr/include/x86_64-linux-gnu/bits/types.h \
+ /usr/include/x86_64-linux-gnu/bits/typesizes.h \
+ /usr/include/x86_64-linux-gnu/bits/time64.h \
+ /usr/include/x86_64-linux-gnu/bits/stdint-intn.h \
+ /usr/include/x86_64-linux-gnu/bits/stdint-uintn.h \
+ /usr/include/c++/11/bits/localefwd.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/c++locale.h \
+ /usr/include/c++/11/clocale /usr/include/locale.h \
+ /usr/include/x86_64-linux-gnu/bits/locale.h /usr/include/c++/11/cctype \
+ /usr/include/ctype.h /usr/include/x86_64-linux-gnu/bits/endian.h \
+ /usr/include/x86_64-linux-gnu/bits/endianness.h \
+ /usr/include/c++/11/bits/ios_base.h /usr/include/c++/11/ext/atomicity.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/gthr.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/gthr-default.h \
+ /usr/include/pthread.h /usr/include/sched.h \
+ /usr/include/x86_64-linux-gnu/bits/types/time_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_timespec.h \
+ /usr/include/x86_64-linux-gnu/bits/sched.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_sched_param.h \
+ /usr/include/x86_64-linux-gnu/bits/cpu-set.h /usr/include/time.h \
+ /usr/include/x86_64-linux-gnu/bits/time.h \
+ /usr/include/x86_64-linux-gnu/bits/timex.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_timeval.h \
+ /usr/include/x86_64-linux-gnu/bits/types/clock_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_tm.h \
+ /usr/include/x86_64-linux-gnu/bits/types/clockid_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/timer_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_itimerspec.h \
+ /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h \
+ /usr/include/x86_64-linux-gnu/bits/thread-shared-types.h \
+ /usr/include/x86_64-linux-gnu/bits/pthreadtypes-arch.h \
+ /usr/include/x86_64-linux-gnu/bits/atomic_wide_counter.h \
+ /usr/include/x86_64-linux-gnu/bits/struct_mutex.h \
+ /usr/include/x86_64-linux-gnu/bits/struct_rwlock.h \
+ /usr/include/x86_64-linux-gnu/bits/setjmp.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__sigset_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct___jmp_buf_tag.h \
+ /usr/include/x86_64-linux-gnu/bits/pthread_stack_min-dynamic.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/atomic_word.h \
+ /usr/include/x86_64-linux-gnu/sys/single_threaded.h \
+ /usr/include/c++/11/bits/locale_classes.h /usr/include/c++/11/string \
+ /usr/include/c++/11/bits/allocator.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/c++allocator.h \
+ /usr/include/c++/11/ext/new_allocator.h \
+ /usr/include/c++/11/bits/ostream_insert.h \
+ /usr/include/c++/11/bits/cxxabi_forced.h \
+ /usr/include/c++/11/bits/stl_function.h \
+ /usr/include/c++/11/backward/binders.h \
+ /usr/include/c++/11/bits/range_access.h \
+ /usr/include/c++/11/initializer_list \
+ /usr/include/c++/11/bits/basic_string.h \
+ /usr/include/c++/11/ext/alloc_traits.h \
+ /usr/include/c++/11/bits/alloc_traits.h \
+ /usr/include/c++/11/bits/stl_construct.h /usr/include/c++/11/string_view \
+ /usr/include/c++/11/bits/functional_hash.h \
+ /usr/include/c++/11/bits/string_view.tcc \
+ /usr/include/c++/11/ext/string_conversions.h /usr/include/c++/11/cstdlib \
+ /usr/include/stdlib.h /usr/include/x86_64-linux-gnu/bits/waitflags.h \
+ /usr/include/x86_64-linux-gnu/bits/waitstatus.h \
+ /usr/include/x86_64-linux-gnu/sys/types.h /usr/include/endian.h \
+ /usr/include/x86_64-linux-gnu/bits/byteswap.h \
+ /usr/include/x86_64-linux-gnu/bits/uintn-identity.h \
+ /usr/include/x86_64-linux-gnu/sys/select.h \
+ /usr/include/x86_64-linux-gnu/bits/select.h \
+ /usr/include/x86_64-linux-gnu/bits/types/sigset_t.h \
+ /usr/include/x86_64-linux-gnu/bits/select2.h /usr/include/alloca.h \
+ /usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h \
+ /usr/include/x86_64-linux-gnu/bits/stdlib-float.h \
+ /usr/include/x86_64-linux-gnu/bits/stdlib.h \
+ /usr/include/c++/11/bits/std_abs.h /usr/include/c++/11/cstdio \
+ /usr/include/stdio.h /usr/include/x86_64-linux-gnu/bits/types/__fpos_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__fpos64_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_FILE.h \
+ /usr/include/x86_64-linux-gnu/bits/types/cookie_io_functions_t.h \
+ /usr/include/x86_64-linux-gnu/bits/stdio_lim.h \
+ /usr/include/x86_64-linux-gnu/bits/stdio.h \
+ /usr/include/x86_64-linux-gnu/bits/stdio2.h /usr/include/c++/11/cerrno \
+ /usr/include/errno.h /usr/include/x86_64-linux-gnu/bits/errno.h \
+ /usr/include/linux/errno.h /usr/include/x86_64-linux-gnu/asm/errno.h \
+ /usr/include/asm-generic/errno.h /usr/include/asm-generic/errno-base.h \
+ /usr/include/x86_64-linux-gnu/bits/types/error_t.h \
+ /usr/include/c++/11/bits/charconv.h \
+ /usr/include/c++/11/bits/basic_string.tcc \
+ /usr/include/c++/11/bits/locale_classes.tcc \
+ /usr/include/c++/11/system_error \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/error_constants.h \
+ /usr/include/c++/11/stdexcept /usr/include/c++/11/streambuf \
+ /usr/include/c++/11/bits/streambuf.tcc \
+ /usr/include/c++/11/bits/basic_ios.h \
+ /usr/include/c++/11/bits/locale_facets.h /usr/include/c++/11/cwctype \
+ /usr/include/wctype.h /usr/include/x86_64-linux-gnu/bits/wctype-wchar.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/ctype_base.h \
+ /usr/include/c++/11/bits/streambuf_iterator.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/ctype_inline.h \
+ /usr/include/c++/11/bits/locale_facets.tcc \
+ /usr/include/c++/11/bits/basic_ios.tcc \
+ /usr/include/c++/11/bits/ostream.tcc /usr/include/c++/11/istream \
+ /usr/include/c++/11/bits/istream.tcc \
+ /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.h
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/link.txt b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/link.txt
new file mode 100644
index 000000000..5d4781220
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/c++  -O3   -msse2  CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o CMakeFiles/cudasift.dir/geomFuncs.cpp.o CMakeFiles/cudasift.dir/mainSift.cpp.o CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o CMakeFiles/cudasift.dir/cudasift_intermediate_link.o -o cudasift  /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/x86_64-linux-gnu/librt.a /usr/local/cuda/lib64/libcudadevrt.a /usr/lib/x86_64-linux-gnu/libopencv_stitching.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_alphamat.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_aruco.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_barcode.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_bgsegm.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_bioinspired.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_ccalib.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dnn_objdetect.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dnn_superres.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dpm.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_face.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_freetype.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_fuzzy.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_hdf.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_hfs.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_img_hash.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_intensity_transform.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_line_descriptor.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_mcc.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_quality.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_rapid.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_reg.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_rgbd.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_saliency.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_shape.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_stereo.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_structured_light.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_superres.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_surface_matching.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_tracking.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_videostab.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_viz.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_wechat_qrcode.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_xobjdetect.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_xphoto.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_highgui.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_datasets.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_plot.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_text.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_ml.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_phase_unwrapping.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_optflow.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_ximgproc.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_video.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_videoio.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_imgcodecs.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_objdetect.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_calib3d.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dnn.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_features2d.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_flann.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_photo.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_imgproc.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_core.so.4.5.4d 
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/mainSift.cpp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/mainSift.cpp.o
new file mode 100644
index 000000000..7a888fa16
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/mainSift.cpp.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/mainSift.cpp.o.d b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/mainSift.cpp.o.d
new file mode 100644
index 000000000..30d1de48f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/mainSift.cpp.o.d
@@ -0,0 +1,298 @@
+CMakeFiles/cudasift.dir/mainSift.cpp.o: \
+ /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp \
+ /usr/include/stdc-predef.h /usr/include/c++/11/iostream \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/c++config.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/os_defines.h \
+ /usr/include/features.h /usr/include/features-time64.h \
+ /usr/include/x86_64-linux-gnu/bits/wordsize.h \
+ /usr/include/x86_64-linux-gnu/bits/timesize.h \
+ /usr/include/x86_64-linux-gnu/sys/cdefs.h \
+ /usr/include/x86_64-linux-gnu/bits/long-double.h \
+ /usr/include/x86_64-linux-gnu/gnu/stubs.h \
+ /usr/include/x86_64-linux-gnu/gnu/stubs-64.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/cpu_defines.h \
+ /usr/include/c++/11/pstl/pstl_config.h /usr/include/c++/11/ostream \
+ /usr/include/c++/11/ios /usr/include/c++/11/iosfwd \
+ /usr/include/c++/11/bits/stringfwd.h \
+ /usr/include/c++/11/bits/memoryfwd.h /usr/include/c++/11/bits/postypes.h \
+ /usr/include/c++/11/cwchar /usr/include/wchar.h \
+ /usr/include/x86_64-linux-gnu/bits/libc-header-start.h \
+ /usr/include/x86_64-linux-gnu/bits/floatn.h \
+ /usr/include/x86_64-linux-gnu/bits/floatn-common.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/stddef.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/stdarg.h \
+ /usr/include/x86_64-linux-gnu/bits/wchar.h \
+ /usr/include/x86_64-linux-gnu/bits/types/wint_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/mbstate_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__mbstate_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__FILE.h \
+ /usr/include/x86_64-linux-gnu/bits/types/FILE.h \
+ /usr/include/x86_64-linux-gnu/bits/types/locale_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__locale_t.h \
+ /usr/include/x86_64-linux-gnu/bits/wchar2.h \
+ /usr/include/c++/11/exception /usr/include/c++/11/bits/exception.h \
+ /usr/include/c++/11/bits/exception_ptr.h \
+ /usr/include/c++/11/bits/exception_defines.h \
+ /usr/include/c++/11/bits/cxxabi_init_exception.h \
+ /usr/include/c++/11/typeinfo /usr/include/c++/11/bits/hash_bytes.h \
+ /usr/include/c++/11/new /usr/include/c++/11/bits/move.h \
+ /usr/include/c++/11/type_traits \
+ /usr/include/c++/11/bits/nested_exception.h \
+ /usr/include/c++/11/bits/char_traits.h \
+ /usr/include/c++/11/bits/stl_algobase.h \
+ /usr/include/c++/11/bits/functexcept.h \
+ /usr/include/c++/11/bits/cpp_type_traits.h \
+ /usr/include/c++/11/ext/type_traits.h \
+ /usr/include/c++/11/ext/numeric_traits.h \
+ /usr/include/c++/11/bits/stl_pair.h \
+ /usr/include/c++/11/bits/stl_iterator_base_types.h \
+ /usr/include/c++/11/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/11/bits/concept_check.h \
+ /usr/include/c++/11/debug/assertions.h \
+ /usr/include/c++/11/bits/stl_iterator.h \
+ /usr/include/c++/11/bits/ptr_traits.h /usr/include/c++/11/debug/debug.h \
+ /usr/include/c++/11/bits/predefined_ops.h /usr/include/c++/11/cstdint \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/stdint.h /usr/include/stdint.h \
+ /usr/include/x86_64-linux-gnu/bits/types.h \
+ /usr/include/x86_64-linux-gnu/bits/typesizes.h \
+ /usr/include/x86_64-linux-gnu/bits/time64.h \
+ /usr/include/x86_64-linux-gnu/bits/stdint-intn.h \
+ /usr/include/x86_64-linux-gnu/bits/stdint-uintn.h \
+ /usr/include/c++/11/bits/localefwd.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/c++locale.h \
+ /usr/include/c++/11/clocale /usr/include/locale.h \
+ /usr/include/x86_64-linux-gnu/bits/locale.h /usr/include/c++/11/cctype \
+ /usr/include/ctype.h /usr/include/x86_64-linux-gnu/bits/endian.h \
+ /usr/include/x86_64-linux-gnu/bits/endianness.h \
+ /usr/include/c++/11/bits/ios_base.h /usr/include/c++/11/ext/atomicity.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/gthr.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/gthr-default.h \
+ /usr/include/pthread.h /usr/include/sched.h \
+ /usr/include/x86_64-linux-gnu/bits/types/time_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_timespec.h \
+ /usr/include/x86_64-linux-gnu/bits/sched.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_sched_param.h \
+ /usr/include/x86_64-linux-gnu/bits/cpu-set.h /usr/include/time.h \
+ /usr/include/x86_64-linux-gnu/bits/time.h \
+ /usr/include/x86_64-linux-gnu/bits/timex.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_timeval.h \
+ /usr/include/x86_64-linux-gnu/bits/types/clock_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_tm.h \
+ /usr/include/x86_64-linux-gnu/bits/types/clockid_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/timer_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_itimerspec.h \
+ /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h \
+ /usr/include/x86_64-linux-gnu/bits/thread-shared-types.h \
+ /usr/include/x86_64-linux-gnu/bits/pthreadtypes-arch.h \
+ /usr/include/x86_64-linux-gnu/bits/atomic_wide_counter.h \
+ /usr/include/x86_64-linux-gnu/bits/struct_mutex.h \
+ /usr/include/x86_64-linux-gnu/bits/struct_rwlock.h \
+ /usr/include/x86_64-linux-gnu/bits/setjmp.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__sigset_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct___jmp_buf_tag.h \
+ /usr/include/x86_64-linux-gnu/bits/pthread_stack_min-dynamic.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/atomic_word.h \
+ /usr/include/x86_64-linux-gnu/sys/single_threaded.h \
+ /usr/include/c++/11/bits/locale_classes.h /usr/include/c++/11/string \
+ /usr/include/c++/11/bits/allocator.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/c++allocator.h \
+ /usr/include/c++/11/ext/new_allocator.h \
+ /usr/include/c++/11/bits/ostream_insert.h \
+ /usr/include/c++/11/bits/cxxabi_forced.h \
+ /usr/include/c++/11/bits/stl_function.h \
+ /usr/include/c++/11/backward/binders.h \
+ /usr/include/c++/11/bits/range_access.h \
+ /usr/include/c++/11/initializer_list \
+ /usr/include/c++/11/bits/basic_string.h \
+ /usr/include/c++/11/ext/alloc_traits.h \
+ /usr/include/c++/11/bits/alloc_traits.h \
+ /usr/include/c++/11/bits/stl_construct.h /usr/include/c++/11/string_view \
+ /usr/include/c++/11/bits/functional_hash.h \
+ /usr/include/c++/11/bits/string_view.tcc \
+ /usr/include/c++/11/ext/string_conversions.h /usr/include/c++/11/cstdlib \
+ /usr/include/stdlib.h /usr/include/x86_64-linux-gnu/bits/waitflags.h \
+ /usr/include/x86_64-linux-gnu/bits/waitstatus.h \
+ /usr/include/x86_64-linux-gnu/sys/types.h /usr/include/endian.h \
+ /usr/include/x86_64-linux-gnu/bits/byteswap.h \
+ /usr/include/x86_64-linux-gnu/bits/uintn-identity.h \
+ /usr/include/x86_64-linux-gnu/sys/select.h \
+ /usr/include/x86_64-linux-gnu/bits/select.h \
+ /usr/include/x86_64-linux-gnu/bits/types/sigset_t.h \
+ /usr/include/x86_64-linux-gnu/bits/select2.h /usr/include/alloca.h \
+ /usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h \
+ /usr/include/x86_64-linux-gnu/bits/stdlib-float.h \
+ /usr/include/x86_64-linux-gnu/bits/stdlib.h \
+ /usr/include/c++/11/bits/std_abs.h /usr/include/c++/11/cstdio \
+ /usr/include/stdio.h /usr/include/x86_64-linux-gnu/bits/types/__fpos_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/__fpos64_t.h \
+ /usr/include/x86_64-linux-gnu/bits/types/struct_FILE.h \
+ /usr/include/x86_64-linux-gnu/bits/types/cookie_io_functions_t.h \
+ /usr/include/x86_64-linux-gnu/bits/stdio_lim.h \
+ /usr/include/x86_64-linux-gnu/bits/stdio.h \
+ /usr/include/x86_64-linux-gnu/bits/stdio2.h /usr/include/c++/11/cerrno \
+ /usr/include/errno.h /usr/include/x86_64-linux-gnu/bits/errno.h \
+ /usr/include/linux/errno.h /usr/include/x86_64-linux-gnu/asm/errno.h \
+ /usr/include/asm-generic/errno.h /usr/include/asm-generic/errno-base.h \
+ /usr/include/x86_64-linux-gnu/bits/types/error_t.h \
+ /usr/include/c++/11/bits/charconv.h \
+ /usr/include/c++/11/bits/basic_string.tcc \
+ /usr/include/c++/11/bits/locale_classes.tcc \
+ /usr/include/c++/11/system_error \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/error_constants.h \
+ /usr/include/c++/11/stdexcept /usr/include/c++/11/streambuf \
+ /usr/include/c++/11/bits/streambuf.tcc \
+ /usr/include/c++/11/bits/basic_ios.h \
+ /usr/include/c++/11/bits/locale_facets.h /usr/include/c++/11/cwctype \
+ /usr/include/wctype.h /usr/include/x86_64-linux-gnu/bits/wctype-wchar.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/ctype_base.h \
+ /usr/include/c++/11/bits/streambuf_iterator.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/ctype_inline.h \
+ /usr/include/c++/11/bits/locale_facets.tcc \
+ /usr/include/c++/11/bits/basic_ios.tcc \
+ /usr/include/c++/11/bits/ostream.tcc /usr/include/c++/11/istream \
+ /usr/include/c++/11/bits/istream.tcc /usr/include/c++/11/cmath \
+ /usr/include/math.h /usr/include/x86_64-linux-gnu/bits/math-vector.h \
+ /usr/include/x86_64-linux-gnu/bits/libm-simd-decl-stubs.h \
+ /usr/include/x86_64-linux-gnu/bits/flt-eval-method.h \
+ /usr/include/x86_64-linux-gnu/bits/fp-logb.h \
+ /usr/include/x86_64-linux-gnu/bits/fp-fast.h \
+ /usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h \
+ /usr/include/x86_64-linux-gnu/bits/mathcalls.h \
+ /usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h \
+ /usr/include/x86_64-linux-gnu/bits/iscanonical.h \
+ /usr/include/c++/11/bits/specfun.h /usr/include/c++/11/limits \
+ /usr/include/c++/11/tr1/gamma.tcc \
+ /usr/include/c++/11/tr1/special_function_util.h \
+ /usr/include/c++/11/tr1/bessel_function.tcc \
+ /usr/include/c++/11/tr1/beta_function.tcc \
+ /usr/include/c++/11/tr1/ell_integral.tcc \
+ /usr/include/c++/11/tr1/exp_integral.tcc \
+ /usr/include/c++/11/tr1/hypergeometric.tcc \
+ /usr/include/c++/11/tr1/legendre_function.tcc \
+ /usr/include/c++/11/tr1/modified_bessel_func.tcc \
+ /usr/include/c++/11/tr1/poly_hermite.tcc \
+ /usr/include/c++/11/tr1/poly_laguerre.tcc \
+ /usr/include/c++/11/tr1/riemann_zeta.tcc /usr/include/c++/11/iomanip \
+ /usr/include/c++/11/locale \
+ /usr/include/c++/11/bits/locale_facets_nonio.h /usr/include/c++/11/ctime \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/time_members.h \
+ /usr/include/x86_64-linux-gnu/c++/11/bits/messages_members.h \
+ /usr/include/libintl.h /usr/include/c++/11/bits/codecvt.h \
+ /usr/include/c++/11/bits/locale_facets_nonio.tcc \
+ /usr/include/c++/11/bits/locale_conv.h \
+ /usr/include/c++/11/bits/unique_ptr.h /usr/include/c++/11/utility \
+ /usr/include/c++/11/bits/stl_relops.h /usr/include/c++/11/tuple \
+ /usr/include/c++/11/array /usr/include/c++/11/bits/uses_allocator.h \
+ /usr/include/c++/11/bits/invoke.h \
+ /usr/include/c++/11/bits/quoted_string.h /usr/include/c++/11/sstream \
+ /usr/include/c++/11/bits/sstream.tcc /usr/local/cuda/include/cuda.h \
+ /usr/include/c++/11/stdlib.h /usr/local/cuda/include/cuda_runtime.h \
+ /usr/local/cuda/include/crt/host_config.h \
+ /usr/local/cuda/include/builtin_types.h \
+ /usr/local/cuda/include/device_types.h \
+ /usr/local/cuda/include/crt/host_defines.h \
+ /usr/local/cuda/include/driver_types.h \
+ /usr/local/cuda/include/vector_types.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/limits.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/syslimits.h \
+ /usr/include/limits.h /usr/include/x86_64-linux-gnu/bits/posix1_lim.h \
+ /usr/include/x86_64-linux-gnu/bits/local_lim.h \
+ /usr/include/linux/limits.h \
+ /usr/include/x86_64-linux-gnu/bits/posix2_lim.h \
+ /usr/include/x86_64-linux-gnu/bits/xopen_lim.h \
+ /usr/include/x86_64-linux-gnu/bits/uio_lim.h \
+ /usr/local/cuda/include/surface_types.h \
+ /usr/local/cuda/include/texture_types.h \
+ /usr/local/cuda/include/library_types.h \
+ /usr/local/cuda/include/channel_descriptor.h \
+ /usr/local/cuda/include/cuda_runtime_api.h \
+ /usr/local/cuda/include/cuda_device_runtime_api.h \
+ /usr/local/cuda/include/driver_functions.h \
+ /usr/local/cuda/include/vector_functions.h \
+ /usr/local/cuda/include/vector_functions.hpp \
+ /usr/include/opencv4/opencv2/core/core.hpp \
+ /usr/include/opencv4/opencv2/core.hpp \
+ /usr/include/opencv4/opencv2/core/cvdef.h \
+ /usr/include/opencv4/opencv2/core/version.hpp \
+ /usr/include/opencv4/opencv2/core/hal/interface.h \
+ /usr/include/c++/11/cstddef \
+ /usr/include/opencv4/opencv2/core/cv_cpu_dispatch.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/emmintrin.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/xmmintrin.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/mmintrin.h \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/mm_malloc.h \
+ /usr/include/opencv4/opencv2/core/base.hpp \
+ /usr/include/opencv4/opencv2/opencv_modules.hpp \
+ /usr/include/c++/11/climits /usr/include/c++/11/algorithm \
+ /usr/include/c++/11/bits/stl_algo.h \
+ /usr/include/c++/11/bits/algorithmfwd.h \
+ /usr/include/c++/11/bits/stl_heap.h \
+ /usr/include/c++/11/bits/stl_tempbuf.h \
+ /usr/include/c++/11/bits/uniform_int_dist.h \
+ /usr/include/c++/11/pstl/glue_algorithm_defs.h \
+ /usr/include/c++/11/functional /usr/include/c++/11/bits/refwrap.h \
+ /usr/include/c++/11/bits/std_function.h \
+ /usr/include/c++/11/unordered_map \
+ /usr/include/c++/11/ext/aligned_buffer.h \
+ /usr/include/c++/11/bits/hashtable.h \
+ /usr/include/c++/11/bits/hashtable_policy.h \
+ /usr/include/c++/11/bits/enable_special_members.h \
+ /usr/include/c++/11/bits/node_handle.h \
+ /usr/include/c++/11/bits/unordered_map.h \
+ /usr/include/c++/11/bits/erase_if.h /usr/include/c++/11/vector \
+ /usr/include/c++/11/bits/stl_uninitialized.h \
+ /usr/include/c++/11/bits/stl_vector.h \
+ /usr/include/c++/11/bits/stl_bvector.h \
+ /usr/include/c++/11/bits/vector.tcc \
+ /usr/include/c++/11/pstl/execution_defs.h \
+ /usr/include/opencv4/opencv2/core/cvstd.hpp /usr/include/c++/11/cstring \
+ /usr/include/string.h /usr/include/strings.h \
+ /usr/include/x86_64-linux-gnu/bits/strings_fortified.h \
+ /usr/include/x86_64-linux-gnu/bits/string_fortified.h \
+ /usr/include/opencv4/opencv2/core/cvstd_wrapper.hpp \
+ /usr/include/c++/11/memory \
+ /usr/include/c++/11/bits/stl_raw_storage_iter.h \
+ /usr/include/c++/11/bits/align.h /usr/include/c++/11/bit \
+ /usr/include/c++/11/bits/shared_ptr.h \
+ /usr/include/c++/11/bits/shared_ptr_base.h \
+ /usr/include/c++/11/bits/allocated_ptr.h \
+ /usr/include/c++/11/ext/concurrence.h \
+ /usr/include/c++/11/bits/shared_ptr_atomic.h \
+ /usr/include/c++/11/bits/atomic_base.h \
+ /usr/include/c++/11/bits/atomic_lockfree_defines.h \
+ /usr/include/c++/11/backward/auto_ptr.h \
+ /usr/include/c++/11/pstl/glue_memory_defs.h \
+ /usr/include/opencv4/opencv2/core/neon_utils.hpp \
+ /usr/include/opencv4/opencv2/core/vsx_utils.hpp /usr/include/assert.h \
+ /usr/include/opencv4/opencv2/core/check.hpp \
+ /usr/include/opencv4/opencv2/core/traits.hpp \
+ /usr/include/opencv4/opencv2/core/matx.hpp \
+ /usr/include/opencv4/opencv2/core/saturate.hpp \
+ /usr/include/opencv4/opencv2/core/fast_math.hpp \
+ /usr/include/opencv4/opencv2/core/types.hpp /usr/include/c++/11/cfloat \
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/float.h \
+ /usr/include/opencv4/opencv2/core/mat.hpp \
+ /usr/include/opencv4/opencv2/core/bufferpool.hpp \
+ /usr/include/opencv4/opencv2/core/mat.inl.hpp \
+ /usr/include/opencv4/opencv2/core/persistence.hpp \
+ /usr/include/opencv4/opencv2/core/operations.hpp \
+ /usr/include/opencv4/opencv2/core/cvstd.inl.hpp \
+ /usr/include/c++/11/complex \
+ /usr/include/opencv4/opencv2/core/utility.hpp /usr/include/c++/11/mutex \
+ /usr/include/c++/11/chrono /usr/include/c++/11/ratio \
+ /usr/include/c++/11/bits/parse_numbers.h \
+ /usr/include/c++/11/bits/std_mutex.h \
+ /usr/include/c++/11/bits/unique_lock.h \
+ /usr/include/opencv4/opencv2/core/optim.hpp \
+ /usr/include/opencv4/opencv2/core/ovx.hpp \
+ /usr/include/opencv4/opencv2/core/cvdef.h \
+ /usr/include/opencv4/opencv2/highgui/highgui.hpp \
+ /usr/include/opencv4/opencv2/highgui.hpp \
+ /usr/include/opencv4/opencv2/imgcodecs.hpp \
+ /usr/include/opencv4/opencv2/videoio.hpp \
+ /usr/include/opencv4/opencv2/imgproc/imgproc.hpp \
+ /usr/include/opencv4/opencv2/imgproc.hpp \
+ /usr/include/opencv4/opencv2/imgproc/segmentation.hpp \
+ /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common/Utility.h \
+ /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.h \
+ /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSift.h
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/progress.make b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/progress.make
new file mode 100644
index 000000000..5b293683d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/progress.make
@@ -0,0 +1,9 @@
+CMAKE_PROGRESS_1 = 1
+CMAKE_PROGRESS_2 = 2
+CMAKE_PROGRESS_3 = 3
+CMAKE_PROGRESS_4 = 4
+CMAKE_PROGRESS_5 = 5
+CMAKE_PROGRESS_6 = 6
+CMAKE_PROGRESS_7 = 7
+CMAKE_PROGRESS_8 = 8
+
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/progress.marks b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/progress.marks
new file mode 100644
index 000000000..45a4fb75d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/progress.marks
@@ -0,0 +1 @@
+8
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/Makefile b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/Makefile
new file mode 100644
index 000000000..ef43b3964
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/Makefile
@@ -0,0 +1,284 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.22
+
+# Default target executed when no arguments are given to make.
+default_target: all
+.PHONY : default_target
+
+# Allow only one "make -f Makefile2" at a time, but pass parallelism.
+.NOTPARALLEL:
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+# Disable VCS-based implicit rules.
+% : %,v
+
+# Disable VCS-based implicit rules.
+% : RCS/%
+
+# Disable VCS-based implicit rules.
+% : RCS/%,v
+
+# Disable VCS-based implicit rules.
+% : SCCS/s.%
+
+# Disable VCS-based implicit rules.
+% : s.%
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+# Command-line flag to silence nested $(MAKE).
+$(VERBOSE)MAKESILENT = -s
+
+#Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+# A target that is always out of date.
+cmake_force:
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E rm -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build
+
+#=============================================================================
+# Targets provided globally by CMake.
+
+# Special rule for the target edit_cache
+edit_cache:
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..."
+	/usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available.
+.PHONY : edit_cache
+
+# Special rule for the target edit_cache
+edit_cache/fast: edit_cache
+.PHONY : edit_cache/fast
+
+# Special rule for the target rebuild_cache
+rebuild_cache:
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
+	/usr/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
+.PHONY : rebuild_cache
+
+# Special rule for the target rebuild_cache
+rebuild_cache/fast: rebuild_cache
+.PHONY : rebuild_cache/fast
+
+# Special rule for the target list_install_components
+list_install_components:
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Available install components are: \"Unspecified\""
+.PHONY : list_install_components
+
+# Special rule for the target list_install_components
+list_install_components/fast: list_install_components
+.PHONY : list_install_components/fast
+
+# Special rule for the target install
+install: preinstall
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..."
+	/usr/bin/cmake -P cmake_install.cmake
+.PHONY : install
+
+# Special rule for the target install
+install/fast: preinstall/fast
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..."
+	/usr/bin/cmake -P cmake_install.cmake
+.PHONY : install/fast
+
+# Special rule for the target install/local
+install/local: preinstall
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..."
+	/usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake
+.PHONY : install/local
+
+# Special rule for the target install/local
+install/local/fast: preinstall/fast
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..."
+	/usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake
+.PHONY : install/local/fast
+
+# Special rule for the target install/strip
+install/strip: preinstall
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..."
+	/usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake
+.PHONY : install/strip
+
+# Special rule for the target install/strip
+install/strip/fast: preinstall/fast
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..."
+	/usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake
+.PHONY : install/strip/fast
+
+# The main all target
+all: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build//CMakeFiles/progress.marks
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles 0
+.PHONY : all
+
+# The main clean target
+clean:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 clean
+.PHONY : clean
+
+# The main clean target
+clean/fast: clean
+.PHONY : clean/fast
+
+# Prepare targets for installation.
+preinstall: all
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 preinstall
+.PHONY : preinstall
+
+# Prepare targets for installation.
+preinstall/fast:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 preinstall
+.PHONY : preinstall/fast
+
+# clear depends
+depend:
+	$(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
+.PHONY : depend
+
+#=============================================================================
+# Target rules for targets named cudasift
+
+# Build rule for target.
+cudasift: cmake_check_build_system
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 cudasift
+.PHONY : cudasift
+
+# fast build rule for target.
+cudasift/fast:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/build
+.PHONY : cudasift/fast
+
+geomFuncs.o: geomFuncs.cpp.o
+.PHONY : geomFuncs.o
+
+# target to build an object file
+geomFuncs.cpp.o:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/geomFuncs.cpp.o
+.PHONY : geomFuncs.cpp.o
+
+geomFuncs.i: geomFuncs.cpp.i
+.PHONY : geomFuncs.i
+
+# target to preprocess a source file
+geomFuncs.cpp.i:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/geomFuncs.cpp.i
+.PHONY : geomFuncs.cpp.i
+
+geomFuncs.s: geomFuncs.cpp.s
+.PHONY : geomFuncs.s
+
+# target to generate assembly for a file
+geomFuncs.cpp.s:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/geomFuncs.cpp.s
+.PHONY : geomFuncs.cpp.s
+
+home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.o: home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o
+.PHONY : home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.o
+
+# target to build an object file
+home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o
+.PHONY : home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o
+
+home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.i: home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i
+.PHONY : home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.i
+
+# target to preprocess a source file
+home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i
+.PHONY : home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i
+
+home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.s: home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s
+.PHONY : home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.s
+
+# target to generate assembly for a file
+home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s
+.PHONY : home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s
+
+mainSift.o: mainSift.cpp.o
+.PHONY : mainSift.o
+
+# target to build an object file
+mainSift.cpp.o:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/mainSift.cpp.o
+.PHONY : mainSift.cpp.o
+
+mainSift.i: mainSift.cpp.i
+.PHONY : mainSift.i
+
+# target to preprocess a source file
+mainSift.cpp.i:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/mainSift.cpp.i
+.PHONY : mainSift.cpp.i
+
+mainSift.s: mainSift.cpp.s
+.PHONY : mainSift.s
+
+# target to generate assembly for a file
+mainSift.cpp.s:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/mainSift.cpp.s
+.PHONY : mainSift.cpp.s
+
+# Help Target
+help:
+	@echo "The following are some of the valid targets for this Makefile:"
+	@echo "... all (the default if no target is provided)"
+	@echo "... clean"
+	@echo "... depend"
+	@echo "... edit_cache"
+	@echo "... install"
+	@echo "... install/local"
+	@echo "... install/strip"
+	@echo "... list_install_components"
+	@echo "... rebuild_cache"
+	@echo "... cudasift"
+	@echo "... geomFuncs.o"
+	@echo "... geomFuncs.i"
+	@echo "... geomFuncs.s"
+	@echo "... home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.o"
+	@echo "... home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.i"
+	@echo "... home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.s"
+	@echo "... mainSift.o"
+	@echo "... mainSift.i"
+	@echo "... mainSift.s"
+.PHONY : help
+
+
+
+#=============================================================================
+# Special targets to cleanup operation of make.
+
+# Special rule to run CMake to check the build system integrity.
+# No rule that depends on this can have commands that come from listfiles
+# because they might be regenerated.
+cmake_check_build_system:
+	$(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
+.PHONY : cmake_check_build_system
+
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/cmake_install.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/cmake_install.cmake
new file mode 100644
index 000000000..b83980cb5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/cmake_install.cmake
@@ -0,0 +1,79 @@
+# Install script for directory: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA
+
+# Set the install prefix
+if(NOT DEFINED CMAKE_INSTALL_PREFIX)
+  set(CMAKE_INSTALL_PREFIX "/usr/local")
+endif()
+string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
+
+# Set the install configuration name.
+if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
+  if(BUILD_TYPE)
+    string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
+           CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
+  else()
+    set(CMAKE_INSTALL_CONFIG_NAME "")
+  endif()
+  message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
+endif()
+
+# Set the component getting installed.
+if(NOT CMAKE_INSTALL_COMPONENT)
+  if(COMPONENT)
+    message(STATUS "Install component: \"${COMPONENT}\"")
+    set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
+  else()
+    set(CMAKE_INSTALL_COMPONENT)
+  endif()
+endif()
+
+# Install shared libraries without execute permission?
+if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
+  set(CMAKE_INSTALL_SO_NO_EXE "1")
+endif()
+
+# Is this installation the result of a crosscompile?
+if(NOT DEFINED CMAKE_CROSSCOMPILING)
+  set(CMAKE_CROSSCOMPILING "FALSE")
+endif()
+
+# Set default install directory permissions.
+if(NOT DEFINED CMAKE_OBJDUMP)
+  set(CMAKE_OBJDUMP "/usr/bin/objdump")
+endif()
+
+if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT)
+  file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/." TYPE FILE FILES
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu"
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.h"
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu"
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.h"
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu"
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.h"
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSift.h"
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h"
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common/Utility.cpp"
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp"
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp"
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu"
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/CMakeLists.txt"
+    )
+endif()
+
+if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT)
+  file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/data" TYPE FILE FILES
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/data/left.pgm"
+    "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/data/righ.pgm"
+    )
+endif()
+
+if(CMAKE_INSTALL_COMPONENT)
+  set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
+else()
+  set(CMAKE_INSTALL_MANIFEST "install_manifest.txt")
+endif()
+
+string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
+       "${CMAKE_INSTALL_MANIFEST_FILES}")
+file(WRITE "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/${CMAKE_INSTALL_MANIFEST}"
+     "${CMAKE_INSTALL_MANIFEST_CONTENT}")
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/compile_commands.json b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/compile_commands.json
new file mode 100644
index 000000000..4a2149538
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/compile_commands.json
@@ -0,0 +1,40 @@
+[
+    {
+        "command": "nvcc  --cuda-gpu-arch=sm_80 -m64 /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o -o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/./cudasift_intermediate_link.o -D__CUDACC__=1",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build"
+    },
+    {
+        "command": "nvcc -c  -o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o -m64 -O3 -msse2 --cuda-gpu-arch=sm_80 -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/include/opencv4 -D__CUDACC__=1 /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir",
+        "file": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu"
+    },
+    {
+        "command": "nvcc -c  -o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o -m64 -O3 -msse2 --cuda-gpu-arch=sm_80 -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/include/opencv4 -D__CUDACC__=1 /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir",
+        "file": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu"
+    },
+    {
+        "command": "nvcc -c  -o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o -m64 -O3 -msse2 --cuda-gpu-arch=sm_80 -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/include/opencv4 -D__CUDACC__=1 /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir",
+        "file": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu"
+    },
+    {
+        "command": "c++ -c -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/local/cuda/include -isystem /usr/include/opencv4 -O3 -msse2 -std=gnu++17 -o CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build",
+        "file": "/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp"
+    },
+    {
+        "command": "c++ -c -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/local/cuda/include -isystem /usr/include/opencv4 -O3 -msse2 -std=gnu++17 -o CMakeFiles/cudasift.dir/geomFuncs.cpp.o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build",
+        "file": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp"
+    },
+    {
+        "command": "c++ -c -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/local/cuda/include -isystem /usr/include/opencv4 -O3 -msse2 -std=gnu++17 -o CMakeFiles/cudasift.dir/mainSift.cpp.o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build",
+        "file": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp"
+    },
+    {
+        "command": "ld -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccK090v3.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -melf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -o cudasift /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o CMakeFiles/cudasift.dir/geomFuncs.cpp.o CMakeFiles/cudasift.dir/mainSift.cpp.o CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o CMakeFiles/cudasift.dir/cudasift_intermediate_link.o /usr/local/cuda/lib64/libcudart_static.a /usr/lib/x86_64-linux-gnu/librt.a /usr/local/cuda/lib64/libcudadevrt.a /usr/lib/x86_64-linux-gnu/libopencv_stitching.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_alphamat.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_aruco.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_barcode.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_bgsegm.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_bioinspired.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_ccalib.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dnn_objdetect.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dnn_superres.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dpm.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_face.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_freetype.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_fuzzy.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_hdf.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_hfs.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_img_hash.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_intensity_transform.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_line_descriptor.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_mcc.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_quality.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_rapid.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_reg.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_rgbd.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_saliency.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_shape.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_stereo.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_structured_light.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_superres.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_surface_matching.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_tracking.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_videostab.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_viz.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_wechat_qrcode.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_xobjdetect.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_xphoto.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_highgui.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_datasets.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_plot.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_text.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_ml.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_phase_unwrapping.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_optflow.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_ximgproc.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_video.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_videoio.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_imgcodecs.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_objdetect.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_calib3d.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dnn.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_features2d.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_flann.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_photo.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_imgproc.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_core.so.4.5.4d /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build"
+    }
+]
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/cudasift b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/cudasift
new file mode 100644
index 000000000..34b81727b
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/cudasift differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaImage.cu b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaImage.cu
new file mode 100644
index 000000000..e12f15093
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaImage.cu
@@ -0,0 +1,107 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <cstdio>
+#include <chrono>
+
+#include "cudautils.h"
+#include "cudaImage.h"
+
+int iDivUp(int a, int b) { return (a % b != 0) ? (a / b + 1) : (a / b); }
+int iDivDown(int a, int b) { return a / b; }
+int iAlignUp(int a, int b) { return (a % b != 0) ? (a - a % b + b) : a; }
+int iAlignDown(int a, int b) { return a - a % b; }
+
+void CudaImage::Allocate(int w, int h, int p, bool host, float &totTime, float *devmem, float *hostmem)
+{
+  width = w;
+  height = h;
+  pitch = p;
+  d_data = devmem;
+  h_data = hostmem;
+  t_data = NULL;
+  if (devmem == NULL)
+  {
+#ifdef DEVICE_TIMER
+    auto start_malloc = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMallocPitch((void **)&d_data, (size_t *)&pitch, (size_t)(sizeof(float) * width), (size_t)height));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_malloc = std::chrono::steady_clock::now();
+    std::cout << "Allocate Time is " << std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count() << " us" << std::endl;
+    totTime += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+    pitch /= sizeof(float);
+    if (d_data == NULL)
+      printf("Failed to allocate device data\n");
+    d_internalAlloc = true;
+  }
+  if (host && hostmem == NULL)
+  {
+    h_data = (float *)malloc(sizeof(float) * pitch * height);
+    h_internalAlloc = true;
+  }
+}
+
+CudaImage::CudaImage() : width(0), height(0), pitch(0), d_data(NULL), h_data(NULL), t_data(NULL), d_internalAlloc(false), h_internalAlloc(false)
+{
+}
+
+CudaImage::~CudaImage()
+{
+  if (d_internalAlloc && d_data != NULL)
+    safeCall(cudaFree(d_data));
+  d_data = NULL;
+  if (h_internalAlloc && h_data != NULL)
+    free(h_data);
+  h_data = NULL;
+  if (t_data != NULL)
+    safeCall(cudaFreeArray((cudaArray *)t_data));
+  t_data = NULL;
+}
+
+double CudaImage::Download(float &totTime)
+{
+  double downloadTime = 0.0;
+  int p = sizeof(float) * pitch;
+  if (d_data != NULL && h_data != NULL)
+  {
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMemcpy2D(d_data, p, h_data, sizeof(float) * width, sizeof(float) * width, height, cudaMemcpyHostToDevice));
+    // safeCall(cudaMemcpy(d_data, h_data, sizeof(float) * width * height, cudaMemcpyHostToDevice));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+    downloadTime = std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+    std::cout << "Download Time is " << downloadTime << " us" << std::endl;
+#endif
+  }
+  return downloadTime;
+}
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaImage.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaImage.h
new file mode 100644
index 000000000..737446686
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaImage.h
@@ -0,0 +1,38 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+#ifndef CUDAIMAGE_H
+#define CUDAIMAGE_H
+
+class CudaImage
+{
+public:
+  int width, height;
+  int pitch;
+  float *h_data;
+  float *d_data;
+  float *t_data;
+  bool d_internalAlloc;
+  bool h_internalAlloc;
+
+public:
+  CudaImage();
+  CudaImage(const CudaImage&) = delete;
+  CudaImage& operator=(const CudaImage&) = delete;
+  ~CudaImage();
+  void Allocate(int width, int height, int pitch, bool withHost, float &totTime, float *devMem = NULL, float *hostMem = NULL);
+  double Download(float &totTime);
+  double Readback();
+  double InitTexture();
+  double CopyToTexture(CudaImage &dst, bool host);
+};
+
+int iDivUp(int a, int b);
+int iDivDown(int a, int b);
+int iAlignUp(int a, int b);
+int iAlignDown(int a, int b);
+void StartTimer(unsigned int *hTimer);
+double StopTimer(unsigned int hTimer);
+
+#endif // CUDAIMAGE_H
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSift.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSift.h
new file mode 100644
index 000000000..b49f6c503
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSift.h
@@ -0,0 +1,48 @@
+#ifndef CUDASIFT_H
+#define CUDASIFT_H
+
+#include "cudaImage.h"
+
+typedef struct
+{
+  float xpos;
+  float ypos;
+  float scale;
+  float sharpness;
+  float edgeness;
+  float orientation;
+  float score;
+  float ambiguity;
+  int match;
+  float match_xpos;
+  float match_ypos;
+  float match_error;
+  float subsampling;
+  float empty[3];
+  float data[128];
+} SiftPoint;
+
+typedef struct
+{
+  int numPts; // Number of available Sift points
+  int maxPts; // Number of allocated Sift points
+#ifdef MANAGEDMEM
+  SiftPoint *m_data; // Managed data
+#else
+  SiftPoint *h_data; // Host (CPU) data
+  SiftPoint *d_data; // Device (GPU) data
+#endif
+} SiftData;
+
+void InitCuda(int devNum = 0);
+float *AllocSiftTempMemory(int width, int height, int numOctaves, float &totTime, bool scaleUp = false);
+void FreeSiftTempMemory(float *memoryTmp);
+void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh,
+                 float &totTime, float lowestScale = 0.0f, bool scaleUp = false, float *tempMemory = 0);
+void InitSiftData(SiftData &data, float &totTime, int num = 1024, bool host = false, bool dev = true);
+void FreeSiftData(SiftData &data);
+void PrintSiftData(SiftData &data);
+double MatchSiftData(SiftData &data1, SiftData &data2, float &matchTime);
+double FindHomography(SiftData &data, float *homography, int *numMatches, float &matchTime, int numLoops = 1000, float minScore = 0.85f, float maxAmbiguity = 0.95f, float thresh = 5.0f);
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu
new file mode 100644
index 000000000..db018dd14
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu
@@ -0,0 +1,2263 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include "cudautils.h"
+#include "cudaSiftD.h"
+#include "cudaSift.h"
+
+///////////////////////////////////////////////////////////////////////////////
+// Kernel configuration
+///////////////////////////////////////////////////////////////////////////////
+
+__constant__ int d_MaxNumPoints;
+__device__ unsigned int d_PointCounter[8 * 2 + 1];
+__constant__ float d_ScaleDownKernel[5];
+__constant__ float d_LowPassKernel[2 * LOWPASS_R + 1];
+__constant__ float d_LaplaceKernel[8 * 12 * 16];
+
+///////////////////////////////////////////////////////////////////////////////
+// Lowpass filter and subsample image
+///////////////////////////////////////////////////////////////////////////////
+__global__ void ScaleDownDenseShift(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch)
+{
+#define BW (SCALEDOWN_W + 4)
+#define BH (SCALEDOWN_H + 4)
+#define W2 (SCALEDOWN_W / 2)
+#define H2 (SCALEDOWN_H / 2)
+  __shared__ float brows[BH * BW];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int xp = blockIdx.x * SCALEDOWN_W + tx;
+  const int yp = blockIdx.y * SCALEDOWN_H + ty;
+  const float k0 = d_ScaleDownKernel[0];
+  const float k1 = d_ScaleDownKernel[1];
+  const float k2 = d_ScaleDownKernel[2];
+  const int xl = min(width - 1, max(0, xp - 2));
+  const int yl = min(height - 1, max(0, yp - 2));
+  if (xp < (width + 4) && yp < (height + 4))
+  {
+    float v = d_Data[yl * pitch + xl];
+    brows[BW * ty + tx] = k0 * (v + ShiftDown(v, 4)) + k1 * (ShiftDown(v, 1) + ShiftDown(v, 3)) + k2 * ShiftDown(v, 2);
+  }
+  __syncthreads();
+  const int xs = blockIdx.x * W2 + tx;
+  const int ys = blockIdx.y * H2 + ty;
+  if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2))
+  {
+    float *ptr = &brows[BW * (ty * 2) + (tx * 2)];
+    d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * BW]) + k1 * (ptr[1 * BW] + ptr[3 * BW]) + k2 * ptr[2 * BW];
+  }
+}
+
+__global__ void ScaleDownDense(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch)
+{
+#define BW (SCALEDOWN_W + 4)
+#define BH (SCALEDOWN_H + 4)
+#define W2 (SCALEDOWN_W / 2)
+#define H2 (SCALEDOWN_H / 2)
+  __shared__ float irows[BH * BW];
+  __shared__ float brows[BH * W2];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int xp = blockIdx.x * SCALEDOWN_W + tx;
+  const int yp = blockIdx.y * SCALEDOWN_H + ty;
+  const int xl = min(width - 1, max(0, xp - 2));
+  const int yl = min(height - 1, max(0, yp - 2));
+  const float k0 = d_ScaleDownKernel[0];
+  const float k1 = d_ScaleDownKernel[1];
+  const float k2 = d_ScaleDownKernel[2];
+  if (xp < (width + 4) && yp < (height + 4))
+    irows[BW * ty + tx] = d_Data[yl * pitch + xl];
+  __syncthreads();
+  if (yp < (height + 4) && tx < W2)
+  {
+    float *ptr = &irows[BW * ty + 2 * tx];
+    brows[W2 * ty + tx] = k0 * (ptr[0] + ptr[4]) + k1 * (ptr[1] + ptr[3]) + k2 * ptr[2];
+  }
+  __syncthreads();
+  const int xs = blockIdx.x * W2 + tx;
+  const int ys = blockIdx.y * H2 + ty;
+  if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2))
+  {
+    float *ptr = &brows[W2 * (ty * 2) + tx];
+    d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * W2]) + k1 * (ptr[1 * W2] + ptr[3 * W2]) + k2 * ptr[2 * W2];
+  }
+}
+
+__global__ void ScaleDown(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch)
+{
+  __shared__ float inrow[SCALEDOWN_W + 4];
+  __shared__ float brow[5 * (SCALEDOWN_W / 2)];
+  __shared__ int yRead[SCALEDOWN_H + 4];
+  __shared__ int yWrite[SCALEDOWN_H + 4];
+#define dx2 (SCALEDOWN_W / 2)
+  const int tx = threadIdx.x;
+  const int tx0 = tx + 0 * dx2;
+  const int tx1 = tx + 1 * dx2;
+  const int tx2 = tx + 2 * dx2;
+  const int tx3 = tx + 3 * dx2;
+  const int tx4 = tx + 4 * dx2;
+  const int xStart = blockIdx.x * SCALEDOWN_W;
+  const int yStart = blockIdx.y * SCALEDOWN_H;
+  const int xWrite = xStart / 2 + tx;
+  float k0 = d_ScaleDownKernel[0];
+  float k1 = d_ScaleDownKernel[1];
+  float k2 = d_ScaleDownKernel[2];
+  if (tx < SCALEDOWN_H + 4)
+  {
+    int y = yStart + tx - 2;
+    y = (y < 0 ? 0 : y);
+    y = (y >= height ? height - 1 : y);
+    yRead[tx] = y * pitch;
+    yWrite[tx] = (yStart + tx - 4) / 2 * newpitch;
+  }
+  __syncthreads();
+  int xRead = xStart + tx - 2;
+  xRead = (xRead < 0 ? 0 : xRead);
+  xRead = (xRead >= width ? width - 1 : xRead);
+
+  int maxtx = min(dx2, width / 2 - xStart / 2);
+  for (int dy = 0; dy < SCALEDOWN_H + 4; dy += 5)
+  {
+    {
+      inrow[tx] = d_Data[yRead[dy + 0] + xRead];
+      __syncthreads();
+      if (tx < maxtx)
+      {
+        brow[tx4] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 4 && !(dy & 1))
+          d_Result[yWrite[dy + 0] + xWrite] = k2 * brow[tx2] + k0 * (brow[tx0] + brow[tx4]) + k1 * (brow[tx1] + brow[tx3]);
+      }
+      __syncthreads();
+    }
+    if (dy < (SCALEDOWN_H + 3))
+    {
+      inrow[tx] = d_Data[yRead[dy + 1] + xRead];
+      __syncthreads();
+      if (tx < maxtx)
+      {
+        brow[tx0] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 3 && (dy & 1))
+          d_Result[yWrite[dy + 1] + xWrite] = k2 * brow[tx3] + k0 * (brow[tx1] + brow[tx0]) + k1 * (brow[tx2] + brow[tx4]);
+      }
+      __syncthreads();
+    }
+    if (dy < (SCALEDOWN_H + 2))
+    {
+      inrow[tx] = d_Data[yRead[dy + 2] + xRead];
+      __syncthreads();
+      if (tx < maxtx)
+      {
+        brow[tx1] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 2 && !(dy & 1))
+          d_Result[yWrite[dy + 2] + xWrite] = k2 * brow[tx4] + k0 * (brow[tx2] + brow[tx1]) + k1 * (brow[tx3] + brow[tx0]);
+      }
+      __syncthreads();
+    }
+    if (dy < (SCALEDOWN_H + 1))
+    {
+      inrow[tx] = d_Data[yRead[dy + 3] + xRead];
+      __syncthreads();
+      if (tx < maxtx)
+      {
+        brow[tx2] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 1 && (dy & 1))
+          d_Result[yWrite[dy + 3] + xWrite] = k2 * brow[tx0] + k0 * (brow[tx3] + brow[tx2]) + k1 * (brow[tx4] + brow[tx1]);
+      }
+      __syncthreads();
+    }
+    if (dy < SCALEDOWN_H)
+    {
+      inrow[tx] = d_Data[yRead[dy + 4] + xRead];
+      __syncthreads();
+      if (tx < dx2 && xWrite < width / 2)
+      {
+        brow[tx3] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (!(dy & 1))
+          d_Result[yWrite[dy + 4] + xWrite] = k2 * brow[tx1] + k0 * (brow[tx4] + brow[tx3]) + k1 * (brow[tx0] + brow[tx2]);
+      }
+      __syncthreads();
+    }
+  }
+}
+
+__global__ void ScaleUp(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch)
+{
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  int x = blockIdx.x * SCALEUP_W + 2 * tx;
+  int y = blockIdx.y * SCALEUP_H + 2 * ty;
+  if (x < 2 * width && y < 2 * height)
+  {
+    int xl = blockIdx.x * (SCALEUP_W / 2) + tx;
+    int yu = blockIdx.y * (SCALEUP_H / 2) + ty;
+    int xr = min(xl + 1, width - 1);
+    int yd = min(yu + 1, height - 1);
+    float vul = d_Data[yu * pitch + xl];
+    float vur = d_Data[yu * pitch + xr];
+    float vdl = d_Data[yd * pitch + xl];
+    float vdr = d_Data[yd * pitch + xr];
+    d_Result[(y + 0) * newpitch + x + 0] = vul;
+    d_Result[(y + 0) * newpitch + x + 1] = 0.50f * (vul + vur);
+    d_Result[(y + 1) * newpitch + x + 0] = 0.50f * (vul + vdl);
+    d_Result[(y + 1) * newpitch + x + 1] = 0.25f * (vul + vur + vdl + vdr);
+  }
+}
+
+__global__ void ExtractSiftDescriptors(cudaTextureObject_t texObj, SiftPoint *d_sift, int fstPts, float subsampling)
+{
+  __shared__ float gauss[16];
+  __shared__ float buffer[128];
+  __shared__ float sums[4];
+
+  const int tx = threadIdx.x; // 0 -> 16
+  const int ty = threadIdx.y; // 0 -> 8
+  const int idx = ty * 16 + tx;
+  const int bx = blockIdx.x + fstPts; // 0 -> numPts
+  if (ty == 0)
+    gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+  buffer[idx] = 0.0f;
+  __syncthreads();
+
+  // Compute angles and gradients
+  float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+  float sina = sinf(theta); // cosa -sina
+  float cosa = cosf(theta); // sina  cosa
+  float scale = 12.0f / 16.0f * d_sift[bx].scale;
+  float ssina = scale * sina;
+  float scosa = scale * cosa;
+
+  for (int y = ty; y < 16; y += 8)
+  {
+    float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+    float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+    float dx = tex2D<float>(texObj, xpos + cosa, ypos + sina) -
+               tex2D<float>(texObj, xpos - cosa, ypos - sina);
+    float dy = tex2D<float>(texObj, xpos - sina, ypos + cosa) -
+               tex2D<float>(texObj, xpos + sina, ypos - cosa);
+    float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy);
+    float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f;
+
+    int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+    float horf = (tx - 1.5f) / 4.0f - hori;
+    float ihorf = 1.0f - horf;
+    int veri = (y + 2) / 4 - 1;
+    float verf = (y - 1.5f) / 4.0f - veri;
+    float iverf = 1.0f - verf;
+    int angi = angf;
+    int angp = (angi < 7 ? angi + 1 : 0);
+    angf -= angi;
+    float iangf = 1.0f - angf;
+
+    int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+    int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+    int p2 = angp + hist;
+    if (tx >= 2)
+    {
+      float grad1 = ihorf * grad;
+      if (y >= 2)
+      { // Upper left
+        float grad2 = iverf * grad1;
+        atomicAdd(buffer + p1, iangf * grad2);
+        atomicAdd(buffer + p2, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower left
+        float grad2 = verf * grad1;
+        atomicAdd(buffer + p1 + 32, iangf * grad2);
+        atomicAdd(buffer + p2 + 32, angf * grad2);
+      }
+    }
+    if (tx <= 13)
+    {
+      float grad1 = horf * grad;
+      if (y >= 2)
+      { // Upper right
+        float grad2 = iverf * grad1;
+        atomicAdd(buffer + p1 + 8, iangf * grad2);
+        atomicAdd(buffer + p2 + 8, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower right
+        float grad2 = verf * grad1;
+        atomicAdd(buffer + p1 + 40, iangf * grad2);
+        atomicAdd(buffer + p2 + 40, angf * grad2);
+      }
+    }
+  }
+  __syncthreads();
+
+  // Normalize twice and suppress peaks first time
+  float sum = buffer[idx] * buffer[idx];
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+  __syncthreads();
+  float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+  tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f);
+
+  sum = tsum1 * tsum1;
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+  __syncthreads();
+
+  float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+  float *desc = d_sift[bx].data;
+  desc[idx] = tsum1 * rsqrtf(tsum2);
+  if (idx == 0)
+  {
+    d_sift[bx].xpos *= subsampling;
+    d_sift[bx].ypos *= subsampling;
+    d_sift[bx].scale *= subsampling;
+  }
+}
+
+__device__ float FastAtan2(float y, float x)
+{
+  float absx = abs(x);
+  float absy = abs(y);
+  float a = __fdiv_rn(min(absx, absy), max(absx, absy));
+  float s = a * a;
+  float r = ((-0.0464964749f * s + 0.15931422f) * s - 0.327622764f) * s * a + a;
+  r = (absy > absx ? 1.57079637f - r : r);
+  r = (x < 0 ? 3.14159274f - r : r);
+  r = (y < 0 ? -r : r);
+  return r;
+}
+
+// __global__ void ExtractSiftDescriptorsCONSTNew(cudaTextureObject_t texObj, SiftPoint *d_sift, float subsampling, int octave)
+__global__ void ExtractSiftDescriptorsCONSTNew(float *texObj, int pitch, SiftPoint *d_sift, float subsampling, int octave)
+{
+  __shared__ float gauss[16];
+  __shared__ float buffer[128];
+  __shared__ float sums[4];
+
+  const int tx = threadIdx.x; // 0 -> 16
+  const int ty = threadIdx.y; // 0 -> 8
+  const int idx = ty * 16 + tx;
+  if (ty == 0)
+    gauss[tx] = __expf(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+
+  int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = min(d_PointCounter[2 * octave + 1], d_MaxNumPoints);
+  // if (tx==0 && ty==0)
+  //   printf("%d %d %d %d\n", octave, fstPts, min(d_PointCounter[2*octave], d_MaxNumPoints), totPts);
+  for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x)
+  {
+
+    buffer[idx] = 0.0f;
+    __syncthreads();
+
+    // Compute angles and gradients
+    float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+    float sina = __sinf(theta); // cosa -sina
+    float cosa = __cosf(theta); // sina  cosa
+    float scale = 12.0f / 16.0f * d_sift[bx].scale;
+    float ssina = scale * sina;
+    float scosa = scale * cosa;
+
+    for (int y = ty; y < 16; y += 8)
+    {
+      float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+      float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+
+      // float dx = tex2D<float>(texObj, xpos + cosa, ypos + sina) -
+      //            tex2D<float>(texObj, xpos - cosa, ypos - sina);
+      // float dy = tex2D<float>(texObj, xpos - sina, ypos + cosa) -
+      //            tex2D<float>(texObj, xpos + sina, ypos - cosa);
+
+      int xi1 = xpos + cosa;
+      int yi1 = ypos + sina;
+
+      int xi2 = xpos - cosa;
+      int yi2 = ypos - sina;
+
+      float dx = *(texObj + yi1 * pitch + xi1) -
+                 *(texObj + yi2 * pitch + xi2);
+
+      xi1 = xpos - sina;
+      yi1 = ypos + cosa;
+
+      xi2 = xpos + sina;
+      yi2 = ypos - cosa;
+
+      float dy = *(texObj + yi1 * pitch + xi1) -
+                 *(texObj + yi2 * pitch + xi2);
+
+      float grad = gauss[y] * gauss[tx] * __fsqrt_rn(dx * dx + dy * dy);
+      float angf = 4.0f / 3.1415f * FastAtan2(dy, dx) + 4.0f;
+
+      int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+      float horf = (tx - 1.5f) / 4.0f - hori;
+      float ihorf = 1.0f - horf;
+      int veri = (y + 2) / 4 - 1;
+      float verf = (y - 1.5f) / 4.0f - veri;
+      float iverf = 1.0f - verf;
+      int angi = angf;
+      int angp = (angi < 7 ? angi + 1 : 0);
+      angf -= angi;
+      float iangf = 1.0f - angf;
+
+      int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+      int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+      int p2 = angp + hist;
+      if (tx >= 2)
+      {
+        float grad1 = ihorf * grad;
+        if (y >= 2)
+        { // Upper left
+          float grad2 = iverf * grad1;
+          atomicAdd(buffer + p1, iangf * grad2);
+          atomicAdd(buffer + p2, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower left
+          float grad2 = verf * grad1;
+          atomicAdd(buffer + p1 + 32, iangf * grad2);
+          atomicAdd(buffer + p2 + 32, angf * grad2);
+        }
+      }
+      if (tx <= 13)
+      {
+        float grad1 = horf * grad;
+        if (y >= 2)
+        { // Upper right
+          float grad2 = iverf * grad1;
+          atomicAdd(buffer + p1 + 8, iangf * grad2);
+          atomicAdd(buffer + p2 + 8, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower right
+          float grad2 = verf * grad1;
+          atomicAdd(buffer + p1 + 40, iangf * grad2);
+          atomicAdd(buffer + p2 + 40, angf * grad2);
+        }
+      }
+    }
+    __syncthreads();
+
+    // Normalize twice and suppress peaks first time
+    float sum = buffer[idx] * buffer[idx];
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    __syncthreads();
+    float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+    tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f);
+
+    sum = tsum1 * tsum1;
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    __syncthreads();
+
+    float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+    float *desc = d_sift[bx].data;
+    desc[idx] = tsum1 * rsqrtf(tsum2);
+    if (idx == 0)
+    {
+      d_sift[bx].xpos *= subsampling;
+      d_sift[bx].ypos *= subsampling;
+      d_sift[bx].scale *= subsampling;
+    }
+    __syncthreads();
+  }
+}
+
+__global__ void ExtractSiftDescriptorsCONST(cudaTextureObject_t texObj, SiftPoint *d_sift, float subsampling, int octave)
+{
+  __shared__ float gauss[16];
+  __shared__ float buffer[128];
+  __shared__ float sums[4];
+
+  const int tx = threadIdx.x; // 0 -> 16
+  const int ty = threadIdx.y; // 0 -> 8
+  const int idx = ty * 16 + tx;
+  if (ty == 0)
+    gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+
+  int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = min(d_PointCounter[2 * octave + 1], d_MaxNumPoints);
+  // if (tx==0 && ty==0)
+  //   printf("%d %d %d %d\n", octave, fstPts, min(d_PointCounter[2*octave], d_MaxNumPoints), totPts);
+  for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x)
+  {
+
+    buffer[idx] = 0.0f;
+    __syncthreads();
+
+    // Compute angles and gradients
+    float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+    float sina = sinf(theta); // cosa -sina
+    float cosa = cosf(theta); // sina  cosa
+    float scale = 12.0f / 16.0f * d_sift[bx].scale;
+    float ssina = scale * sina;
+    float scosa = scale * cosa;
+
+    for (int y = ty; y < 16; y += 8)
+    {
+      float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+      float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+      float dx = tex2D<float>(texObj, xpos + cosa, ypos + sina) -
+                 tex2D<float>(texObj, xpos - cosa, ypos - sina);
+      float dy = tex2D<float>(texObj, xpos - sina, ypos + cosa) -
+                 tex2D<float>(texObj, xpos + sina, ypos - cosa);
+      float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy);
+      float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f;
+
+      int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+      float horf = (tx - 1.5f) / 4.0f - hori;
+      float ihorf = 1.0f - horf;
+      int veri = (y + 2) / 4 - 1;
+      float verf = (y - 1.5f) / 4.0f - veri;
+      float iverf = 1.0f - verf;
+      int angi = angf;
+      int angp = (angi < 7 ? angi + 1 : 0);
+      angf -= angi;
+      float iangf = 1.0f - angf;
+
+      int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+      int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+      int p2 = angp + hist;
+      if (tx >= 2)
+      {
+        float grad1 = ihorf * grad;
+        if (y >= 2)
+        { // Upper left
+          float grad2 = iverf * grad1;
+          atomicAdd(buffer + p1, iangf * grad2);
+          atomicAdd(buffer + p2, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower left
+          float grad2 = verf * grad1;
+          atomicAdd(buffer + p1 + 32, iangf * grad2);
+          atomicAdd(buffer + p2 + 32, angf * grad2);
+        }
+      }
+      if (tx <= 13)
+      {
+        float grad1 = horf * grad;
+        if (y >= 2)
+        { // Upper right
+          float grad2 = iverf * grad1;
+          atomicAdd(buffer + p1 + 8, iangf * grad2);
+          atomicAdd(buffer + p2 + 8, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower right
+          float grad2 = verf * grad1;
+          atomicAdd(buffer + p1 + 40, iangf * grad2);
+          atomicAdd(buffer + p2 + 40, angf * grad2);
+        }
+      }
+    }
+    __syncthreads();
+
+    // Normalize twice and suppress peaks first time
+    float sum = buffer[idx] * buffer[idx];
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    __syncthreads();
+    float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+    tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f);
+
+    sum = tsum1 * tsum1;
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    __syncthreads();
+
+    float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+    float *desc = d_sift[bx].data;
+    desc[idx] = tsum1 * rsqrtf(tsum2);
+    if (idx == 0)
+    {
+      d_sift[bx].xpos *= subsampling;
+      d_sift[bx].ypos *= subsampling;
+      d_sift[bx].scale *= subsampling;
+    }
+    __syncthreads();
+  }
+}
+
+__global__ void ExtractSiftDescriptorsOld(cudaTextureObject_t texObj, SiftPoint *d_sift, int fstPts, float subsampling)
+{
+  __shared__ float gauss[16];
+  __shared__ float buffer[128];
+  __shared__ float sums[128];
+
+  const int tx = threadIdx.x; // 0 -> 16
+  const int ty = threadIdx.y; // 0 -> 8
+  const int idx = ty * 16 + tx;
+  const int bx = blockIdx.x + fstPts; // 0 -> numPts
+  if (ty == 0)
+    gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+  buffer[idx] = 0.0f;
+  __syncthreads();
+
+  // Compute angles and gradients
+  float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+  float sina = sinf(theta); // cosa -sina
+  float cosa = cosf(theta); // sina  cosa
+  float scale = 12.0f / 16.0f * d_sift[bx].scale;
+  float ssina = scale * sina;
+  float scosa = scale * cosa;
+
+  for (int y = ty; y < 16; y += 8)
+  {
+    float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+    float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+    float dx = tex2D<float>(texObj, xpos + cosa, ypos + sina) -
+               tex2D<float>(texObj, xpos - cosa, ypos - sina);
+    float dy = tex2D<float>(texObj, xpos - sina, ypos + cosa) -
+               tex2D<float>(texObj, xpos + sina, ypos - cosa);
+    float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy);
+    float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f;
+
+    int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+    float horf = (tx - 1.5f) / 4.0f - hori;
+    float ihorf = 1.0f - horf;
+    int veri = (y + 2) / 4 - 1;
+    float verf = (y - 1.5f) / 4.0f - veri;
+    float iverf = 1.0f - verf;
+    int angi = angf;
+    int angp = (angi < 7 ? angi + 1 : 0);
+    angf -= angi;
+    float iangf = 1.0f - angf;
+
+    int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+    int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+    int p2 = angp + hist;
+    if (tx >= 2)
+    {
+      float grad1 = ihorf * grad;
+      if (y >= 2)
+      { // Upper left
+        float grad2 = iverf * grad1;
+        atomicAdd(buffer + p1, iangf * grad2);
+        atomicAdd(buffer + p2, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower left
+        float grad2 = verf * grad1;
+        atomicAdd(buffer + p1 + 32, iangf * grad2);
+        atomicAdd(buffer + p2 + 32, angf * grad2);
+      }
+    }
+    if (tx <= 13)
+    {
+      float grad1 = horf * grad;
+      if (y >= 2)
+      { // Upper right
+        float grad2 = iverf * grad1;
+        atomicAdd(buffer + p1 + 8, iangf * grad2);
+        atomicAdd(buffer + p2 + 8, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower right
+        float grad2 = verf * grad1;
+        atomicAdd(buffer + p1 + 40, iangf * grad2);
+        atomicAdd(buffer + p2 + 40, angf * grad2);
+      }
+    }
+  }
+  __syncthreads();
+
+  // Normalize twice and suppress peaks first time
+  if (idx < 64)
+    sums[idx] = buffer[idx] * buffer[idx] + buffer[idx + 64] * buffer[idx + 64];
+  __syncthreads();
+  if (idx < 32)
+    sums[idx] = sums[idx] + sums[idx + 32];
+  __syncthreads();
+  if (idx < 16)
+    sums[idx] = sums[idx] + sums[idx + 16];
+  __syncthreads();
+  if (idx < 8)
+    sums[idx] = sums[idx] + sums[idx + 8];
+  __syncthreads();
+  if (idx < 4)
+    sums[idx] = sums[idx] + sums[idx + 4];
+  __syncthreads();
+  float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+  buffer[idx] = buffer[idx] * rsqrtf(tsum1);
+
+  if (buffer[idx] > 0.2f)
+    buffer[idx] = 0.2f;
+  __syncthreads();
+  if (idx < 64)
+    sums[idx] = buffer[idx] * buffer[idx] + buffer[idx + 64] * buffer[idx + 64];
+  __syncthreads();
+  if (idx < 32)
+    sums[idx] = sums[idx] + sums[idx + 32];
+  __syncthreads();
+  if (idx < 16)
+    sums[idx] = sums[idx] + sums[idx + 16];
+  __syncthreads();
+  if (idx < 8)
+    sums[idx] = sums[idx] + sums[idx + 8];
+  __syncthreads();
+  if (idx < 4)
+    sums[idx] = sums[idx] + sums[idx + 4];
+  __syncthreads();
+  float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+
+  float *desc = d_sift[bx].data;
+  desc[idx] = buffer[idx] * rsqrtf(tsum2);
+  if (idx == 0)
+  {
+    d_sift[bx].xpos *= subsampling;
+    d_sift[bx].ypos *= subsampling;
+    d_sift[bx].scale *= subsampling;
+  }
+}
+
+__device__ void ExtractSiftDescriptor(cudaTextureObject_t texObj, SiftPoint *d_sift, float subsampling, int octave, int bx)
+{
+  __shared__ float gauss[16];
+  __shared__ float buffer[128];
+  __shared__ float sums[4];
+
+  const int idx = threadIdx.x;
+  const int tx = idx & 15; // 0 -> 16
+  const int ty = idx / 16; // 0 -> 8
+  if (ty == 0)
+    gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+  buffer[idx] = 0.0f;
+  __syncthreads();
+
+  // Compute angles and gradients
+  float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+  float sina = sinf(theta); // cosa -sina
+  float cosa = cosf(theta); // sina  cosa
+  float scale = 12.0f / 16.0f * d_sift[bx].scale;
+  float ssina = scale * sina;
+  float scosa = scale * cosa;
+
+  for (int y = ty; y < 16; y += 8)
+  {
+    float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+    float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+    float dx = tex2D<float>(texObj, xpos + cosa, ypos + sina) -
+               tex2D<float>(texObj, xpos - cosa, ypos - sina);
+    float dy = tex2D<float>(texObj, xpos - sina, ypos + cosa) -
+               tex2D<float>(texObj, xpos + sina, ypos - cosa);
+    float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy);
+    float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f;
+
+    int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+    float horf = (tx - 1.5f) / 4.0f - hori;
+    float ihorf = 1.0f - horf;
+    int veri = (y + 2) / 4 - 1;
+    float verf = (y - 1.5f) / 4.0f - veri;
+    float iverf = 1.0f - verf;
+    int angi = angf;
+    int angp = (angi < 7 ? angi + 1 : 0);
+    angf -= angi;
+    float iangf = 1.0f - angf;
+
+    int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+    int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+    int p2 = angp + hist;
+    if (tx >= 2)
+    {
+      float grad1 = ihorf * grad;
+      if (y >= 2)
+      { // Upper left
+        float grad2 = iverf * grad1;
+        atomicAdd(buffer + p1, iangf * grad2);
+        atomicAdd(buffer + p2, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower left
+        float grad2 = verf * grad1;
+        atomicAdd(buffer + p1 + 32, iangf * grad2);
+        atomicAdd(buffer + p2 + 32, angf * grad2);
+      }
+    }
+    if (tx <= 13)
+    {
+      float grad1 = horf * grad;
+      if (y >= 2)
+      { // Upper right
+        float grad2 = iverf * grad1;
+        atomicAdd(buffer + p1 + 8, iangf * grad2);
+        atomicAdd(buffer + p2 + 8, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower right
+        float grad2 = verf * grad1;
+        atomicAdd(buffer + p1 + 40, iangf * grad2);
+        atomicAdd(buffer + p2 + 40, angf * grad2);
+      }
+    }
+  }
+  __syncthreads();
+
+  // Normalize twice and suppress peaks first time
+  float sum = buffer[idx] * buffer[idx];
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+  __syncthreads();
+  float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+  tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f);
+
+  sum = tsum1 * tsum1;
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+  __syncthreads();
+
+  float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+  float *desc = d_sift[bx].data;
+  desc[idx] = tsum1 * rsqrtf(tsum2);
+  if (idx == 0)
+  {
+    d_sift[bx].xpos *= subsampling;
+    d_sift[bx].ypos *= subsampling;
+    d_sift[bx].scale *= subsampling;
+  }
+  __syncthreads();
+}
+
+__global__ void RescalePositions(SiftPoint *d_sift, int numPts, float scale)
+{
+  int num = blockIdx.x * blockDim.x + threadIdx.x;
+  if (num < numPts)
+  {
+    d_sift[num].xpos *= scale;
+    d_sift[num].ypos *= scale;
+    d_sift[num].scale *= scale;
+  }
+}
+
+__global__ void ComputeOrientations(cudaTextureObject_t texObj, SiftPoint *d_Sift, int fstPts)
+{
+  __shared__ float hist[64];
+  __shared__ float gauss[11];
+  const int tx = threadIdx.x;
+  const int bx = blockIdx.x + fstPts;
+  float i2sigma2 = -1.0f / (4.5f * d_Sift[bx].scale * d_Sift[bx].scale);
+  if (tx < 11)
+    gauss[tx] = exp(i2sigma2 * (tx - 5) * (tx - 5));
+  if (tx < 64)
+    hist[tx] = 0.0f;
+  __syncthreads();
+  float xp = d_Sift[bx].xpos - 4.5f;
+  float yp = d_Sift[bx].ypos - 4.5f;
+  int yd = tx / 11;
+  int xd = tx - yd * 11;
+  float xf = xp + xd;
+  float yf = yp + yd;
+  if (yd < 11)
+  {
+    float dx = tex2D<float>(texObj, xf + 1.0, yf) - tex2D<float>(texObj, xf - 1.0, yf);
+    float dy = tex2D<float>(texObj, xf, yf + 1.0) - tex2D<float>(texObj, xf, yf - 1.0);
+    int bin = 16.0f * atan2f(dy, dx) / 3.1416f + 16.5f;
+    if (bin > 31)
+      bin = 0;
+    float grad = sqrtf(dx * dx + dy * dy);
+    atomicAdd(&hist[bin], grad * gauss[xd] * gauss[yd]);
+  }
+  __syncthreads();
+  int x1m = (tx >= 1 ? tx - 1 : tx + 31);
+  int x1p = (tx <= 30 ? tx + 1 : tx - 31);
+  if (tx < 32)
+  {
+    int x2m = (tx >= 2 ? tx - 2 : tx + 30);
+    int x2p = (tx <= 29 ? tx + 2 : tx - 30);
+    hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]);
+  }
+  __syncthreads();
+  if (tx < 32)
+  {
+    float v = hist[32 + tx];
+    if(x1p < 32 && x1m < 32)
+      hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f);
+  }
+  __syncthreads();
+  if (tx == 0)
+  {
+    float maxval1 = 0.0;
+    float maxval2 = 0.0;
+    int i1 = -1;
+    int i2 = -1;
+    for (int i = 0; i < 32; i++)
+    {
+      float v = hist[i];
+      if (v > maxval1)
+      {
+        maxval2 = maxval1;
+        maxval1 = v;
+        i2 = i1;
+        i1 = i;
+      }
+      else if (v > maxval2)
+      {
+        maxval2 = v;
+        i2 = i;
+      }
+    }
+    float val1 = hist[32 + ((i1 + 1) & 31)];
+    float val2 = hist[32 + ((i1 + 31) & 31)];
+    float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+    d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+    if (maxval2 > 0.8f * maxval1)
+    {
+      float val1 = hist[32 + ((i2 + 1) & 31)];
+      float val2 = hist[32 + ((i2 + 31) & 31)];
+      float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+      unsigned int idx = atomicInc(d_PointCounter, 0x7fffffff);
+      if (idx < d_MaxNumPoints)
+      {
+        d_Sift[idx].xpos = d_Sift[bx].xpos;
+        d_Sift[idx].ypos = d_Sift[bx].ypos;
+        d_Sift[idx].scale = d_Sift[bx].scale;
+        d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+        d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+        d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+        ;
+        d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+      }
+    }
+  }
+}
+
+// With constant number of blocks
+__global__ void ComputeOrientationsCONSTNew(float *image, int w, int p, int h, SiftPoint *d_Sift, int octave)
+{
+#define RAD 9
+#define WID (2 * RAD + 1)
+#define LEN 32 //%%%% Note: Lowe suggests 36, not 32
+  __shared__ float img[WID][WID], tmp[WID][WID];
+  __shared__ float hist[2 * LEN];
+  __shared__ float gaussx[WID], gaussy[WID];
+  const int tx = threadIdx.x;
+
+  int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = min(d_PointCounter[2 * octave + 0], d_MaxNumPoints);
+  for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x)
+  {
+
+    float sc = d_Sift[bx].scale;
+    for (int i = tx; i < 2 * LEN; i += blockDim.x)
+      hist[i] = 0.0f;
+    float xp = d_Sift[bx].xpos;
+    float yp = d_Sift[bx].ypos;
+    int xi = (int)xp;
+    int yi = (int)yp;
+    float xf = xp - xi;
+    float yf = yp - yi;
+    for (int i = tx; i < WID * WID; i += blockDim.x)
+    {
+      int y = i / WID;
+      int x = i - y * WID;
+      int xp = max(min(x - RAD + xi, w - 1), 0);
+      int yp = max(min(y - RAD + yi, h - 1), 0);
+      img[y][x] = image[yp * p + xp];
+    }
+    float fac[5];
+    fac[1] = fac[3] = (sc > 0.5f ? __expf(-1.0f / (2.0f * (sc * sc - 0.25f))) : 0.0f);
+    fac[0] = fac[4] = (sc > 0.5f ? __expf(-4.0f / (2.0f * (sc * sc - 0.25f))) : 0.0f);
+    fac[2] = 1.0f;
+    float i2sigma2 = -1.0f / (2.0f * 2.0f * 2.0f * sc * sc); //%%%% Note: Lowe suggests 1.5, not 2.0
+    if (tx < WID)
+    {
+      gaussx[tx] = __expf(i2sigma2 * (tx - RAD - xf) * (tx - RAD - xf));
+      gaussy[tx] = __expf(i2sigma2 * (tx - RAD - yf) * (tx - RAD - yf));
+    }
+    __syncthreads();
+    for (int i = tx; i < (WID - 4) * WID; i += blockDim.x)
+    {
+      int y = i / WID;
+      int x = i - y * WID;
+      y += 2;
+      tmp[y][x] = img[y][x] + fac[1] * (img[y - 1][x] + img[y + 1][x]) +
+                  fac[0] * (img[y - 2][x] + img[y + 2][x]);
+    }
+    __syncthreads();
+    for (int i = tx; i < (WID - 4) * (WID - 4); i += blockDim.x)
+    {
+      int y = i / (WID - 4);
+      int x = i - y * (WID - 4);
+      x += 2;
+      y += 2;
+      img[y][x] = tmp[y][x] + fac[1] * (tmp[y][x - 1] + tmp[y][x + 1]) +
+                  fac[0] * (tmp[y][x - 2] + tmp[y][x + 2]);
+    }
+    __syncthreads();
+    for (int i = tx; i < (WID - 6) * (WID - 6); i += blockDim.x)
+    {
+      int y = i / (WID - 6);
+      int x = i - y * (WID - 6);
+      x += 3;
+      y += 3;
+      float dx = img[y][x + 1] - img[y][x - 1];
+      float dy = img[y + 1][x] - img[y - 1][x];
+      int bin = (int)((LEN / 2) * atan2f(dy, dx) / 3.1416f + (LEN / 2) + 0.5f) % LEN;
+      float grad = __fsqrt_rn(dx * dx + dy * dy);
+      atomicAdd(&hist[LEN + bin], grad * gaussx[x] * gaussy[y]);
+    }
+    __syncthreads();
+    int x1m = (tx >= 1 ? tx - 1 : tx + LEN - 1);
+    int x1p = (tx < (LEN - 1) ? tx + 1 : tx - LEN + 1);
+    int x2m = (tx >= 2 ? tx - 2 : tx + LEN - 2);
+    int x2p = (tx < (LEN - 2) ? tx + 2 : tx - LEN + 2);
+    if (tx < LEN)
+    {
+      hist[tx] = 6.0f * hist[tx + LEN] + 4.0f * (hist[x1m + LEN] + hist[x1p + LEN]) +
+                 1.0f * (hist[x2m + LEN] + hist[x2p + LEN]);
+      hist[tx + LEN] = 8.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) +
+                       0.0f * (hist[x2m] + hist[x2p]);
+      float val = hist[tx + LEN];
+      hist[tx] = (val > hist[x1m + LEN] && val >= hist[x1p + LEN] ? val : 0.0f);
+    }
+    __syncthreads();
+    if (tx == 0)
+    {
+      float maxval1 = 0.0;
+      float maxval2 = 0.0;
+      int i1 = -1;
+      int i2 = -1;
+      for (int i = 0; i < LEN; i++)
+      {
+        float v = hist[i];
+        if (v > maxval1)
+        {
+          maxval2 = maxval1;
+          maxval1 = v;
+          i2 = i1;
+          i1 = i;
+        }
+        else if (v > maxval2)
+        {
+          maxval2 = v;
+          i2 = i;
+        }
+      }
+      float val1 = hist[LEN + ((i1 + 1) % LEN)];
+      float val2 = hist[LEN + ((i1 + LEN - 1) % LEN)];
+      float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+      d_Sift[bx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN;
+      atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]);
+      if (maxval2 > 0.8f * maxval1 && true)
+      {
+        float val1 = hist[LEN + ((i2 + 1) % LEN)];
+        float val2 = hist[LEN + ((i2 + LEN - 1) % LEN)];
+        float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+        unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 1], 0x7fffffff);
+        if (idx < d_MaxNumPoints)
+        {
+          d_Sift[idx].xpos = d_Sift[bx].xpos;
+          d_Sift[idx].ypos = d_Sift[bx].ypos;
+          d_Sift[idx].scale = sc;
+          d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+          d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+          d_Sift[idx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN;
+          d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+        }
+      }
+    }
+  }
+#undef RAD
+#undef WID
+#undef LEN
+}
+
+// With constant number of blocks
+__global__ void ComputeOrientationsCONST(cudaTextureObject_t texObj, SiftPoint *d_Sift, int octave)
+{
+  __shared__ float hist[64];
+  __shared__ float gauss[11];
+  const int tx = threadIdx.x;
+
+  int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = min(d_PointCounter[2 * octave + 0], d_MaxNumPoints);
+  for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x)
+  {
+
+    float i2sigma2 = -1.0f / (2.0f * 1.5f * 1.5f * d_Sift[bx].scale * d_Sift[bx].scale);
+    if (tx < 11)
+      gauss[tx] = exp(i2sigma2 * (tx - 5) * (tx - 5));
+    if (tx < 64)
+      hist[tx] = 0.0f;
+    __syncthreads();
+    float xp = d_Sift[bx].xpos - 4.5f;
+    float yp = d_Sift[bx].ypos - 4.5f;
+    int yd = tx / 11;
+    int xd = tx - yd * 11;
+    float xf = xp + xd;
+    float yf = yp + yd;
+    if (yd < 11)
+    {
+      float dx = tex2D<float>(texObj, xf + 1.0, yf) - tex2D<float>(texObj, xf - 1.0, yf);
+      float dy = tex2D<float>(texObj, xf, yf + 1.0) - tex2D<float>(texObj, xf, yf - 1.0);
+      int bin = 16.0f * atan2f(dy, dx) / 3.1416f + 16.5f;
+      if (bin > 31)
+        bin = 0;
+      float grad = sqrtf(dx * dx + dy * dy);
+      atomicAdd(&hist[bin], grad * gauss[xd] * gauss[yd]);
+    }
+    __syncthreads();
+    int x1m = (tx >= 1 ? tx - 1 : tx + 31);
+    int x1p = (tx <= 30 ? tx + 1 : tx - 31);
+    if (tx < 32)
+    {
+      int x2m = (tx >= 2 ? tx - 2 : tx + 30);
+      int x2p = (tx <= 29 ? tx + 2 : tx - 30);
+      hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]);
+    }
+    __syncthreads();
+    if (tx < 32)
+    {
+      float v = hist[32 + tx];
+      if(x1m < 32)
+        hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f);
+    }
+    __syncthreads();
+    if (tx == 0)
+    {
+      float maxval1 = 0.0;
+      float maxval2 = 0.0;
+      int i1 = -1;
+      int i2 = -1;
+      for (int i = 0; i < 32; i++)
+      {
+        float v = hist[i];
+        if (v > maxval1)
+        {
+          maxval2 = maxval1;
+          maxval1 = v;
+          i2 = i1;
+          i1 = i;
+        }
+        else if (v > maxval2)
+        {
+          maxval2 = v;
+          i2 = i;
+        }
+      }
+      float val1 = hist[32 + ((i1 + 1) & 31)];
+      float val2 = hist[32 + ((i1 + 31) & 31)];
+      float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+      d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+      atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]);
+      if (maxval2 > 0.8f * maxval1 && true)
+      {
+        float val1 = hist[32 + ((i2 + 1) & 31)];
+        float val2 = hist[32 + ((i2 + 31) & 31)];
+        float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+        unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 1], 0x7fffffff);
+        if (idx < d_MaxNumPoints)
+        {
+          d_Sift[idx].xpos = d_Sift[bx].xpos;
+          d_Sift[idx].ypos = d_Sift[bx].ypos;
+          d_Sift[idx].scale = d_Sift[bx].scale;
+          d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+          d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+          d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+          ;
+          d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+        }
+      }
+    }
+    __syncthreads();
+  }
+}
+
+// With constant number of blocks
+__global__ void OrientAndExtractCONST(cudaTextureObject_t texObj, SiftPoint *d_Sift, float subsampling, int octave)
+{
+  __shared__ float hist[64];
+  __shared__ float gauss[11];
+  __shared__ unsigned int idx; //%%%%
+  const int tx = threadIdx.x;
+
+  int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = min(d_PointCounter[2 * octave + 0], d_MaxNumPoints);
+  for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x)
+  {
+
+    float i2sigma2 = -1.0f / (4.5f * d_Sift[bx].scale * d_Sift[bx].scale);
+    if (tx < 11)
+      gauss[tx] = exp(i2sigma2 * (tx - 5) * (tx - 5));
+    if (tx < 64)
+      hist[tx] = 0.0f;
+    __syncthreads();
+    float xp = d_Sift[bx].xpos - 4.5f;
+    float yp = d_Sift[bx].ypos - 4.5f;
+    int yd = tx / 11;
+    int xd = tx - yd * 11;
+    float xf = xp + xd;
+    float yf = yp + yd;
+    if (yd < 11)
+    {
+      float dx = tex2D<float>(texObj, xf + 1.0, yf) - tex2D<float>(texObj, xf - 1.0, yf);
+      float dy = tex2D<float>(texObj, xf, yf + 1.0) - tex2D<float>(texObj, xf, yf - 1.0);
+      int bin = 16.0f * atan2f(dy, dx) / 3.1416f + 16.5f;
+      if (bin > 31)
+        bin = 0;
+      float grad = sqrtf(dx * dx + dy * dy);
+      atomicAdd(&hist[bin], grad * gauss[xd] * gauss[yd]);
+    }
+    __syncthreads();
+    int x1m = (tx >= 1 ? tx - 1 : tx + 31);
+    int x1p = (tx <= 30 ? tx + 1 : tx - 31);
+    if (tx < 32)
+    {
+      int x2m = (tx >= 2 ? tx - 2 : tx + 30);
+      int x2p = (tx <= 29 ? tx + 2 : tx - 30);
+      hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]);
+    }
+    __syncthreads();
+    if (tx < 32)
+    {
+      float v = hist[32 + tx];
+      if(x1m < 32)
+        hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f);
+    }
+    __syncthreads();
+    if (tx == 0)
+    {
+      float maxval1 = 0.0;
+      float maxval2 = 0.0;
+      int i1 = -1;
+      int i2 = -1;
+      for (int i = 0; i < 32; i++)
+      {
+        float v = hist[i];
+        if (v > maxval1)
+        {
+          maxval2 = maxval1;
+          maxval1 = v;
+          i2 = i1;
+          i1 = i;
+        }
+        else if (v > maxval2)
+        {
+          maxval2 = v;
+          i2 = i;
+        }
+      }
+      float val1 = hist[32 + ((i1 + 1) & 31)];
+      float val2 = hist[32 + ((i1 + 31) & 31)];
+      float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+      d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+      idx = 0xffffffff; //%%%%
+      atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]);
+      if (maxval2 > 0.8f * maxval1)
+      {
+        float val1 = hist[32 + ((i2 + 1) & 31)];
+        float val2 = hist[32 + ((i2 + 31) & 31)];
+        float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+        idx = atomicInc(&d_PointCounter[2 * octave + 1], 0x7fffffff); //%%%%
+        if (idx < d_MaxNumPoints)
+        {
+          d_Sift[idx].xpos = d_Sift[bx].xpos;
+          d_Sift[idx].ypos = d_Sift[bx].ypos;
+          d_Sift[idx].scale = d_Sift[bx].scale;
+          d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+          d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+          d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+          ;
+          d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+        }
+      }
+    }
+    __syncthreads();
+    ExtractSiftDescriptor(texObj, d_Sift, subsampling, octave, bx);    //%%%%
+    if (idx < d_MaxNumPoints)                                          //%%%%
+      ExtractSiftDescriptor(texObj, d_Sift, subsampling, octave, idx); //%%%%
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Subtract two images (multi-scale version)
+///////////////////////////////////////////////////////////////////////////////
+
+// __global__ void FindPointsMultiTest(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave)
+// {
+// #define MEMWID (MINMAX_W + 2)
+//   __shared__ unsigned int cnt;
+//   __shared__ unsigned short points[3 * MEMWID];
+
+//   if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0 && threadIdx.y == 0)
+//   {
+//     atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+//     atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]);
+//   }
+//   int tx = threadIdx.x;
+//   int ty = threadIdx.y;
+//   if (tx == 0 && ty == 0)
+//     cnt = 0;
+//   __syncthreads();
+
+//   int ypos = MINMAX_H * blockIdx.y + ty;
+//   if (ypos >= height)
+//     return;
+//   int block = blockIdx.x / NUM_SCALES;
+//   int scale = blockIdx.x - NUM_SCALES * block;
+//   int minx = block * MINMAX_W;
+//   int maxx = min(minx + MINMAX_W, width);
+//   int xpos = minx + tx;
+//   int size = pitch * height;
+//   int ptr = size * scale + max(min(xpos - 1, width - 1), 0);
+
+//   float maxv = fabs(d_Data0[ptr + ypos * pitch + 1 * size]);
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W));
+
+//   if (Shuffle(maxv, 0) > thresh)
+//   {
+//     int yptr1 = ptr + ypos * pitch;
+//     int yptr0 = ptr + max(0, ypos - 1) * pitch;
+//     int yptr2 = ptr + min(height - 1, ypos + 1) * pitch;
+//     float d20 = d_Data0[yptr0 + 1 * size];
+//     float d21 = d_Data0[yptr1 + 1 * size];
+//     float d22 = d_Data0[yptr2 + 1 * size];
+//     float d31 = d_Data0[yptr1 + 2 * size];
+//     float d11 = d_Data0[yptr1];
+
+//     float d10 = d_Data0[yptr0];
+//     float d12 = d_Data0[yptr2];
+//     float ymin1 = fminf(fminf(d10, d11), d12);
+//     float ymax1 = fmaxf(fmaxf(d10, d11), d12);
+//     float d30 = d_Data0[yptr0 + 2 * size];
+//     float d32 = d_Data0[yptr2 + 2 * size];
+//     float ymin3 = fminf(fminf(d30, d31), d32);
+//     float ymax3 = fmaxf(fmaxf(d30, d31), d32);
+//     float ymin2 = fminf(fminf(ymin1, fminf(fminf(d20, d22), d21)), ymin3);
+//     float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d20, d22), d21)), ymax3);
+
+//     float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1));
+//     float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1));
+//     if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx)
+//     {
+//       if (d21 < -thresh)
+//       {
+//         float minv = fminf(fminf(nmin2, ymin1), ymin3);
+//         minv = fminf(fminf(minv, d20), d22);
+//         if (d21 < minv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//       if (d21 > thresh)
+//       {
+//         float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3);
+//         maxv = fmaxf(fmaxf(maxv, d20), d22);
+//         if (d21 > maxv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//     }
+//   }
+//   __syncthreads();
+//   if (ty == 0 && tx < cnt)
+//   {
+//     int xpos = points[3 * tx + 0];
+//     int ypos = points[3 * tx + 1];
+//     int scale = points[3 * tx + 2];
+//     int ptr = xpos + (ypos + (scale + 1) * height) * pitch;
+//     float val = d_Data0[ptr];
+//     float *data1 = &d_Data0[ptr];
+//     float dxx = 2.0f * val - data1[-1] - data1[1];
+//     float dyy = 2.0f * val - data1[-pitch] - data1[pitch];
+//     float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]);
+//     float tra = dxx + dyy;
+//     float det = dxx * dyy - dxy * dxy;
+//     if (tra * tra < edgeLimit * det)
+//     {
+//       float edge = __fdividef(tra * tra, det);
+//       float dx = 0.5f * (data1[1] - data1[-1]);
+//       float dy = 0.5f * (data1[pitch] - data1[-pitch]);
+//       float *data0 = d_Data0 + ptr - height * pitch;
+//       float *data2 = d_Data0 + ptr + height * pitch;
+//       float ds = 0.5f * (data0[0] - data2[0]);
+//       float dss = 2.0f * val - data2[0] - data0[0];
+//       float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]);
+//       float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]);
+//       float idxx = dyy * dss - dys * dys;
+//       float idxy = dys * dxs - dxy * dss;
+//       float idxs = dxy * dys - dyy * dxs;
+//       float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs);
+//       float idyy = dxx * dss - dxs * dxs;
+//       float idys = dxy * dxs - dxx * dys;
+//       float idss = dxx * dyy - dxy * dxy;
+//       float pdx = idet * (idxx * dx + idxy * dy + idxs * ds);
+//       float pdy = idet * (idxy * dx + idyy * dy + idys * ds);
+//       float pds = idet * (idxs * dx + idys * dy + idss * ds);
+//       if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f)
+//       {
+//         pdx = __fdividef(dx, dxx);
+//         pdy = __fdividef(dy, dyy);
+//         pds = __fdividef(ds, dss);
+//       }
+//       float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds);
+//       int maxPts = d_MaxNumPoints;
+//       float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor);
+//       if (sc >= lowestScale)
+//       {
+//         unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff);
+//         idx = (idx >= maxPts ? maxPts - 1 : idx);
+//         d_Sift[idx].xpos = xpos + pdx;
+//         d_Sift[idx].ypos = ypos + pdy;
+//         d_Sift[idx].scale = sc;
+//         d_Sift[idx].sharpness = val + dval;
+//         d_Sift[idx].edgeness = edge;
+//         d_Sift[idx].subsampling = subsampling;
+//       }
+//     }
+//   }
+// }
+
+__global__ void FindPointsMultiNew(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave)
+{
+#define MEMWID (MINMAX_W + 2)
+  __shared__ unsigned short points[2 * MEMWID];
+
+  if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0)
+  {
+    atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+    atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]);
+  }
+  int tx = threadIdx.x;
+  int block = blockIdx.x / NUM_SCALES;
+  int scale = blockIdx.x - NUM_SCALES * block;
+  int minx = block * MINMAX_W;
+  int maxx = min(minx + MINMAX_W, width);
+  int xpos = minx + tx;
+  int size = pitch * height;
+  int ptr = size * scale + max(min(xpos - 1, width - 1), 0);
+
+  int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H);
+  float maxv = 0.0f;
+  for (int y = 0; y < yloops; y++)
+  {
+    int ypos = MINMAX_H * blockIdx.y + y;
+    int yptr1 = ptr + ypos * pitch;
+    float val = d_Data0[yptr1 + 1 * size];
+    maxv = fmaxf(maxv, fabs(val));
+  }
+  // if (tx==0) printf("XXX1\n");
+  if (!__any_sync(0xffffffff, maxv > thresh))
+    return;
+  // if (tx==0) printf("XXX2\n");
+
+  int ptbits = 0;
+  for (int y = 0; y < yloops; y++)
+  {
+
+    int ypos = MINMAX_H * blockIdx.y + y;
+    int yptr1 = ptr + ypos * pitch;
+    float d11 = d_Data0[yptr1 + 1 * size];
+    if (__any_sync(0xffffffff, fabs(d11) > thresh))
+    {
+
+      int yptr0 = ptr + max(0, ypos - 1) * pitch;
+      int yptr2 = ptr + min(height - 1, ypos + 1) * pitch;
+      float d01 = d_Data0[yptr1];
+      float d10 = d_Data0[yptr0 + 1 * size];
+      float d12 = d_Data0[yptr2 + 1 * size];
+      float d21 = d_Data0[yptr1 + 2 * size];
+
+      float d00 = d_Data0[yptr0];
+      float d02 = d_Data0[yptr2];
+      float ymin1 = fminf(fminf(d00, d01), d02);
+      float ymax1 = fmaxf(fmaxf(d00, d01), d02);
+      float d20 = d_Data0[yptr0 + 2 * size];
+      float d22 = d_Data0[yptr2 + 2 * size];
+      float ymin3 = fminf(fminf(d20, d21), d22);
+      float ymax3 = fmaxf(fmaxf(d20, d21), d22);
+      float ymin2 = fminf(fminf(ymin1, fminf(fminf(d10, d12), d11)), ymin3);
+      float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d10, d12), d11)), ymax3);
+
+      float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1));
+      float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1));
+      float minv = fminf(fminf(nmin2, ymin1), ymin3);
+      minv = fminf(fminf(minv, d10), d12);
+      float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3);
+      maxv = fmaxf(fmaxf(maxv, d10), d12);
+
+      if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx)
+        ptbits |= ((d11 < fminf(-thresh, minv)) | (d11 > fmaxf(thresh, maxv))) << y;
+    }
+  }
+
+  unsigned int totbits = __popc(ptbits);
+  unsigned int numbits = totbits;
+  for (int d = 1; d < 32; d <<= 1)
+  {
+    unsigned int num = ShiftUp(totbits, d);
+    if (tx >= d)
+      totbits += num;
+  }
+  int pos = totbits - numbits;
+  for (int y = 0; y < yloops; y++)
+  {
+    int ypos = MINMAX_H * blockIdx.y + y;
+    if (ptbits & (1 << y) && pos < MEMWID)
+    {
+      points[2 * pos + 0] = xpos - 1;
+      points[2 * pos + 1] = ypos;
+      pos++;
+    }
+  }
+
+  totbits = Shuffle(totbits, 31);
+  if (tx < totbits)
+  {
+    int xpos = points[2 * tx + 0];
+    int ypos = points[2 * tx + 1];
+    int ptr = xpos + (ypos + (scale + 1) * height) * pitch;
+    float val = d_Data0[ptr];
+    float *data1 = &d_Data0[ptr];
+    float dxx = 2.0f * val - data1[-1] - data1[1];
+    float dyy = 2.0f * val - data1[-pitch] - data1[pitch];
+    float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]);
+    float tra = dxx + dyy;
+    float det = dxx * dyy - dxy * dxy;
+    if (tra * tra < edgeLimit * det)
+    {
+      float edge = __fdividef(tra * tra, det);
+      float dx = 0.5f * (data1[1] - data1[-1]);
+      float dy = 0.5f * (data1[pitch] - data1[-pitch]);
+      float *data0 = d_Data0 + ptr - height * pitch;
+      float *data2 = d_Data0 + ptr + height * pitch;
+      float ds = 0.5f * (data0[0] - data2[0]);
+      float dss = 2.0f * val - data2[0] - data0[0];
+      float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]);
+      float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]);
+      float idxx = dyy * dss - dys * dys;
+      float idxy = dys * dxs - dxy * dss;
+      float idxs = dxy * dys - dyy * dxs;
+      float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs);
+      float idyy = dxx * dss - dxs * dxs;
+      float idys = dxy * dxs - dxx * dys;
+      float idss = dxx * dyy - dxy * dxy;
+      float pdx = idet * (idxx * dx + idxy * dy + idxs * ds);
+      float pdy = idet * (idxy * dx + idyy * dy + idys * ds);
+      float pds = idet * (idxs * dx + idys * dy + idss * ds);
+      if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f)
+      {
+        pdx = __fdividef(dx, dxx);
+        pdy = __fdividef(dy, dyy);
+        pds = __fdividef(ds, dss);
+      }
+      float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds);
+      int maxPts = d_MaxNumPoints;
+      float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor);
+      if (sc >= lowestScale)
+      {
+        atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+        unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff);
+        idx = (idx >= maxPts ? maxPts - 1 : idx);
+        d_Sift[idx].xpos = xpos + pdx;
+        d_Sift[idx].ypos = ypos + pdy;
+        d_Sift[idx].scale = sc;
+        d_Sift[idx].sharpness = val + dval;
+        d_Sift[idx].edgeness = edge;
+        d_Sift[idx].subsampling = subsampling;
+      }
+    }
+  }
+}
+
+// __global__ void FindPointsMulti(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave)
+// {
+// #define MEMWID (MINMAX_W + 2)
+//   __shared__ unsigned int cnt;
+//   __shared__ unsigned short points[3 * MEMWID];
+
+
+//   if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0)
+//   {
+//     atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+//     atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]);
+//   }
+//   int tx = threadIdx.x;
+//   int block = blockIdx.x / NUM_SCALES;
+//   int scale = blockIdx.x - NUM_SCALES * block;
+//   int minx = block * MINMAX_W;
+//   int maxx = min(minx + MINMAX_W, width);
+//   int xpos = minx + tx;
+//   int size = pitch * height;
+//   int ptr = size * scale + max(min(xpos - 1, width - 1), 0);
+
+//   int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H);
+//   float maxv = 0.0f;
+//   for (int y = 0; y < yloops; y++)
+//   {
+//     int ypos = MINMAX_H * blockIdx.y + y;
+//     int yptr1 = ptr + ypos * pitch;
+//     float val = d_Data0[yptr1 + 1 * size];
+//     maxv = fmaxf(maxv, fabs(val));
+//   }
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W));
+//   if (Shuffle(maxv, 0) <= thresh)
+//     return;
+
+//   if (tx == 0)
+//     cnt = 0;
+//   __syncthreads();
+
+//   for (int y = 0; y < yloops; y++)
+//   {
+
+//     int ypos = MINMAX_H * blockIdx.y + y;
+//     int yptr1 = ptr + ypos * pitch;
+//     int yptr0 = ptr + max(0, ypos - 1) * pitch;
+//     int yptr2 = ptr + min(height - 1, ypos + 1) * pitch;
+//     float d20 = d_Data0[yptr0 + 1 * size];
+//     float d21 = d_Data0[yptr1 + 1 * size];
+//     float d22 = d_Data0[yptr2 + 1 * size];
+//     float d31 = d_Data0[yptr1 + 2 * size];
+//     float d11 = d_Data0[yptr1];
+
+//     float d10 = d_Data0[yptr0];
+//     float d12 = d_Data0[yptr2];
+//     float ymin1 = fminf(fminf(d10, d11), d12);
+//     float ymax1 = fmaxf(fmaxf(d10, d11), d12);
+//     float d30 = d_Data0[yptr0 + 2 * size];
+//     float d32 = d_Data0[yptr2 + 2 * size];
+//     float ymin3 = fminf(fminf(d30, d31), d32);
+//     float ymax3 = fmaxf(fmaxf(d30, d31), d32);
+//     float ymin2 = fminf(fminf(ymin1, fminf(fminf(d20, d22), d21)), ymin3);
+//     float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d20, d22), d21)), ymax3);
+
+//     float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1));
+//     float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1));
+//     if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx)
+//     {
+//       if (d21 < -thresh)
+//       {
+//         float minv = fminf(fminf(nmin2, ymin1), ymin3);
+//         minv = fminf(fminf(minv, d20), d22);
+//         if (d21 < minv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//       if (d21 > thresh)
+//       {
+//         float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3);
+//         maxv = fmaxf(fmaxf(maxv, d20), d22);
+//         if (d21 > maxv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//     }
+//   }
+//   if (tx < cnt)
+//   {
+//     int xpos = points[3 * tx + 0];
+//     int ypos = points[3 * tx + 1];
+//     int scale = points[3 * tx + 2];
+//     int ptr = xpos + (ypos + (scale + 1) * height) * pitch;
+//     float val = d_Data0[ptr];
+//     float *data1 = &d_Data0[ptr];
+//     float dxx = 2.0f * val - data1[-1] - data1[1];
+//     float dyy = 2.0f * val - data1[-pitch] - data1[pitch];
+//     float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]);
+//     float tra = dxx + dyy;
+//     float det = dxx * dyy - dxy * dxy;
+//     if (tra * tra < edgeLimit * det)
+//     {
+//       float edge = __fdividef(tra * tra, det);
+//       float dx = 0.5f * (data1[1] - data1[-1]);
+//       float dy = 0.5f * (data1[pitch] - data1[-pitch]);
+//       float *data0 = d_Data0 + ptr - height * pitch;
+//       float *data2 = d_Data0 + ptr + height * pitch;
+//       float ds = 0.5f * (data0[0] - data2[0]);
+//       float dss = 2.0f * val - data2[0] - data0[0];
+//       float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]);
+//       float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]);
+//       float idxx = dyy * dss - dys * dys;
+//       float idxy = dys * dxs - dxy * dss;
+//       float idxs = dxy * dys - dyy * dxs;
+//       float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs);
+//       float idyy = dxx * dss - dxs * dxs;
+//       float idys = dxy * dxs - dxx * dys;
+//       float idss = dxx * dyy - dxy * dxy;
+//       float pdx = idet * (idxx * dx + idxy * dy + idxs * ds);
+//       float pdy = idet * (idxy * dx + idyy * dy + idys * ds);
+//       float pds = idet * (idxs * dx + idys * dy + idss * ds);
+//       if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f)
+//       {
+//         pdx = __fdividef(dx, dxx);
+//         pdy = __fdividef(dy, dyy);
+//         pds = __fdividef(ds, dss);
+//       }
+//       float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds);
+//       int maxPts = d_MaxNumPoints;
+//       float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor);
+//       if (sc >= lowestScale)
+//       {
+//         atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+//         unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff);
+//         idx = (idx >= maxPts ? maxPts - 1 : idx);
+//         d_Sift[idx].xpos = xpos + pdx;
+//         d_Sift[idx].ypos = ypos + pdy;
+//         d_Sift[idx].scale = sc;
+//         d_Sift[idx].sharpness = val + dval;
+//         d_Sift[idx].edgeness = edge;
+//         d_Sift[idx].subsampling = subsampling;
+//       }
+//     }
+//   }
+// }
+
+// __global__ void FindPointsMultiOld(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave)
+// {
+// #define MEMWID (MINMAX_W + 2)
+//   __shared__ float ymin1[MEMWID], ymin2[MEMWID], ymin3[MEMWID];
+//   __shared__ float ymax1[MEMWID], ymax2[MEMWID], ymax3[MEMWID];
+//   __shared__ unsigned int cnt;
+//   __shared__ unsigned short points[3 * MEMWID];
+
+//   if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0)
+//   {
+//     atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+//     atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]);
+//   }
+//   int tx = threadIdx.x;
+//   int block = blockIdx.x / NUM_SCALES;
+//   int scale = blockIdx.x - NUM_SCALES * block;
+//   int minx = block * MINMAX_W;
+//   int maxx = min(minx + MINMAX_W, width);
+//   int xpos = minx + tx;
+//   int size = pitch * height;
+//   int ptr = size * scale + max(min(xpos - 1, width - 1), 0);
+
+//   int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H);
+//   float maxv = 0.0f;
+//   for (int y = 0; y < yloops; y++)
+//   {
+//     int ypos = MINMAX_H * blockIdx.y + y;
+//     int yptr1 = ptr + ypos * pitch;
+//     float val = d_Data0[yptr1 + 1 * size];
+//     maxv = fmaxf(maxv, fabs(val));
+//   }
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W));
+//   if (Shuffle(maxv, 0) <= thresh)
+//     return;
+
+//   if (tx == 0)
+//     cnt = 0;
+//   __syncthreads();
+
+//   for (int y = 0; y < yloops; y++)
+//   {
+
+//     int ypos = MINMAX_H * blockIdx.y + y;
+//     int yptr1 = ptr + ypos * pitch;
+//     int yptr0 = ptr + max(0, ypos - 1) * pitch;
+//     int yptr2 = ptr + min(height - 1, ypos + 1) * pitch;
+//     float d20 = d_Data0[yptr0 + 1 * size];
+//     float d21 = d_Data0[yptr1 + 1 * size];
+//     float d22 = d_Data0[yptr2 + 1 * size];
+//     float d31 = d_Data0[yptr1 + 2 * size];
+//     float d11 = d_Data0[yptr1];
+
+//     float d10 = d_Data0[yptr0];
+//     float d12 = d_Data0[yptr2];
+//     ymin1[tx] = fminf(fminf(d10, d11), d12);
+//     ymax1[tx] = fmaxf(fmaxf(d10, d11), d12);
+//     float d30 = d_Data0[yptr0 + 2 * size];
+//     float d32 = d_Data0[yptr2 + 2 * size];
+//     ymin3[tx] = fminf(fminf(d30, d31), d32);
+//     ymax3[tx] = fmaxf(fmaxf(d30, d31), d32);
+//     ymin2[tx] = fminf(fminf(ymin1[tx], fminf(fminf(d20, d22), d21)), ymin3[tx]);
+//     ymax2[tx] = fmaxf(fmaxf(ymax1[tx], fmaxf(fmaxf(d20, d22), d21)), ymax3[tx]);
+
+//     __syncthreads();
+
+//     if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx)
+//     {
+//       if (d21 < -thresh)
+//       {
+//         float minv = fminf(fminf(fminf(ymin2[tx - 1], ymin2[tx + 1]), ymin1[tx]), ymin3[tx]);
+//         minv = fminf(fminf(minv, d20), d22);
+//         if (d21 < minv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//       if (d21 > thresh)
+//       {
+//         float maxv = fmaxf(fmaxf(fmaxf(ymax2[tx - 1], ymax2[tx + 1]), ymax1[tx]), ymax3[tx]);
+//         maxv = fmaxf(fmaxf(maxv, d20), d22);
+//         if (d21 > maxv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//     }
+//     __syncthreads();
+//   }
+//   if (tx < cnt)
+//   {
+//     int xpos = points[3 * tx + 0];
+//     int ypos = points[3 * tx + 1];
+//     int scale = points[3 * tx + 2];
+//     int ptr = xpos + (ypos + (scale + 1) * height) * pitch;
+//     float val = d_Data0[ptr];
+//     float *data1 = &d_Data0[ptr];
+//     float dxx = 2.0f * val - data1[-1] - data1[1];
+//     float dyy = 2.0f * val - data1[-pitch] - data1[pitch];
+//     float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]);
+//     float tra = dxx + dyy;
+//     float det = dxx * dyy - dxy * dxy;
+//     if (tra * tra < edgeLimit * det)
+//     {
+//       float edge = __fdividef(tra * tra, det);
+//       float dx = 0.5f * (data1[1] - data1[-1]);
+//       float dy = 0.5f * (data1[pitch] - data1[-pitch]);
+//       float *data0 = d_Data0 + ptr - height * pitch;
+//       float *data2 = d_Data0 + ptr + height * pitch;
+//       float ds = 0.5f * (data0[0] - data2[0]);
+//       float dss = 2.0f * val - data2[0] - data0[0];
+//       float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]);
+//       float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]);
+//       float idxx = dyy * dss - dys * dys;
+//       float idxy = dys * dxs - dxy * dss;
+//       float idxs = dxy * dys - dyy * dxs;
+//       float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs);
+//       float idyy = dxx * dss - dxs * dxs;
+//       float idys = dxy * dxs - dxx * dys;
+//       float idss = dxx * dyy - dxy * dxy;
+//       float pdx = idet * (idxx * dx + idxy * dy + idxs * ds);
+//       float pdy = idet * (idxy * dx + idyy * dy + idys * ds);
+//       float pds = idet * (idxs * dx + idys * dy + idss * ds);
+//       if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f)
+//       {
+//         pdx = __fdividef(dx, dxx);
+//         pdy = __fdividef(dy, dyy);
+//         pds = __fdividef(ds, dss);
+//       }
+//       float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds);
+//       int maxPts = d_MaxNumPoints;
+//       float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor);
+//       if (sc >= lowestScale)
+//       {
+//         unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff);
+//         idx = (idx >= maxPts ? maxPts - 1 : idx);
+//         d_Sift[idx].xpos = xpos + pdx;
+//         d_Sift[idx].ypos = ypos + pdy;
+//         d_Sift[idx].scale = sc;
+//         d_Sift[idx].sharpness = val + dval;
+//         d_Sift[idx].edgeness = edge;
+//         d_Sift[idx].subsampling = subsampling;
+//       }
+//     }
+//   }
+// }
+
+__global__ void LaplaceMultiTex(cudaTextureObject_t texObj, float *d_Result, int width, int pitch, int height, int octave)
+{
+  __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S];
+  __shared__ float data2[LAPLACE_W * LAPLACE_S];
+  const int tx = threadIdx.x;
+  const int xp = blockIdx.x * LAPLACE_W + tx;
+  const int yp = blockIdx.y;
+  const int scale = threadIdx.y;
+  float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+  float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+  float x = xp - 3.5;
+  float y = yp + 0.5;
+  sdata1[tx] = kernel[0] * tex2D<float>(texObj, x, y) +
+               kernel[1] * (tex2D<float>(texObj, x, y - 1.0) + tex2D<float>(texObj, x, y + 1.0)) +
+               kernel[2] * (tex2D<float>(texObj, x, y - 2.0) + tex2D<float>(texObj, x, y + 2.0)) +
+               kernel[3] * (tex2D<float>(texObj, x, y - 3.0) + tex2D<float>(texObj, x, y + 3.0)) +
+               kernel[4] * (tex2D<float>(texObj, x, y - 4.0) + tex2D<float>(texObj, x, y + 4.0));
+  __syncthreads();
+  float *sdata2 = data2 + LAPLACE_W * scale;
+  if (tx < LAPLACE_W)
+  {
+    sdata2[tx] = kernel[0] * sdata1[tx + 4] +
+                 kernel[1] * (sdata1[tx + 3] + sdata1[tx + 5]) +
+                 kernel[2] * (sdata1[tx + 2] + sdata1[tx + 6]) +
+                 kernel[3] * (sdata1[tx + 1] + sdata1[tx + 7]) +
+                 kernel[4] * (sdata1[tx + 0] + sdata1[tx + 8]);
+  }
+  __syncthreads();
+  if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width)
+    d_Result[scale * height * pitch + yp * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W];
+}
+
+__global__ void LaplaceMultiMem(float *d_Image, float *d_Result, int width, int pitch, int height, int octave)
+{
+  __shared__ float buff[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S];
+  const int tx = threadIdx.x;
+  const int xp = blockIdx.x * LAPLACE_W + tx;
+  const int yp = blockIdx.y;
+  float *data = d_Image + max(min(xp - LAPLACE_R, width - 1), 0); // multiply with 4 for max func
+  float temp[2 * LAPLACE_R + 1];
+
+  float kern[LAPLACE_S][LAPLACE_R + 1];
+  if (xp < (width + 2 * LAPLACE_R))
+  {
+    for (int i = 0; i <= 2 * LAPLACE_R; i++)
+      temp[i] = data[max(0, min(yp + i - LAPLACE_R, height - 1)) * pitch];
+    for (int scale = 0; scale < LAPLACE_S; scale++)
+    {
+      float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+      float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+      for (int i = 0; i <= LAPLACE_R; i++)
+      {
+        kern[scale][i] = kernel[i];
+      }
+      float sum = kern[scale][0] * temp[LAPLACE_R];
+#pragma unroll
+      for (int j = 1; j <= LAPLACE_R; j++)
+        sum += kern[scale][j] * (temp[LAPLACE_R - j] + temp[LAPLACE_R + j]);
+      buf[tx] = sum;
+    }
+  }
+  __syncthreads();
+  if (tx < LAPLACE_W && xp < (width + 2 * LAPLACE_R))
+  {
+    int scale = 0;
+    float oldRes = kern[scale][0] * buff[tx + LAPLACE_R];
+
+#pragma unroll
+    for (int j = 1; j <= LAPLACE_R; j++)
+      oldRes += kern[scale][j] * (buff[tx + LAPLACE_R - j] + buff[tx + LAPLACE_R + j]);
+
+    for (int scale = 1; scale < LAPLACE_S; scale++)
+    {
+      float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+
+      float res = kern[scale][0] * buf[tx + LAPLACE_R];
+
+#pragma unroll
+      for (int j = 1; j <= LAPLACE_R; j++)
+        res += kern[scale][j] * (buf[tx + LAPLACE_R - j] + buf[tx + LAPLACE_R + j]);
+
+      d_Result[(scale - 1) * height * pitch + yp * pitch + xp] = res - oldRes;
+      oldRes = res;
+    }
+  }
+}
+
+// __global__ void LaplaceMultiMemWide(float *d_Image, float *d_Result, int width, int pitch, int height, int octave)
+// {
+//   __shared__ float buff[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S];
+//   const int tx = threadIdx.x;
+//   const int xp = blockIdx.x * LAPLACE_W + tx;
+//   const int xp4 = blockIdx.x * LAPLACE_W + 4 * tx;
+//   const int yp = blockIdx.y;
+//   float kern[LAPLACE_S][LAPLACE_R + 1];
+//   float *data = d_Image + max(min(xp - 4, width - 1), 0);
+//   float temp[9];
+//   if (xp < (width + 2 * LAPLACE_R))
+//   {
+//     for (int i = 0; i < 4; i++)
+//       temp[i] = data[max(0, min(yp + i - 4, height - 1)) * pitch];
+//     for (int i = 4; i < 8 + 1; i++)
+//       temp[i] = data[min(yp + i - 4, height - 1) * pitch];
+//     for (int scale = 0; scale < LAPLACE_S; scale++)
+//     {
+//       float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+//       for (int i = 0; i <= LAPLACE_R; i++)
+//         kern[scale][i] = kernel[LAPLACE_R - i];
+//       float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+//       buf[tx] = kern[scale][4] * temp[4] +
+//                 kern[scale][3] * (temp[3] + temp[5]) + kern[scale][2] * (temp[2] + temp[6]) +
+//                 kern[scale][1] * (temp[1] + temp[7]) + kern[scale][0] * (temp[0] + temp[8]);
+//     }
+//   }
+//   __syncthreads();
+//   if (tx < LAPLACE_W / 4 && xp4 < width)
+//   {
+//     float4 b0 = reinterpret_cast<float4 *>(buff)[tx + 0];
+//     float4 b1 = reinterpret_cast<float4 *>(buff)[tx + 1];
+//     float4 b2 = reinterpret_cast<float4 *>(buff)[tx + 2];
+//     float4 old4, new4, dif4;
+//     old4.x = kern[0][4] * b1.x + kern[0][3] * (b0.w + b1.y) + kern[0][2] * (b0.z + b1.z) +
+//              kern[0][1] * (b0.y + b1.w) + kern[0][0] * (b0.x + b2.x);
+//     old4.y = kern[0][4] * b1.y + kern[0][3] * (b1.x + b1.z) + kern[0][2] * (b0.w + b1.w) +
+//              kern[0][1] * (b0.z + b2.x) + kern[0][0] * (b0.y + b2.y);
+//     old4.z = kern[0][4] * b1.z + kern[0][3] * (b1.y + b1.w) + kern[0][2] * (b1.x + b2.x) +
+//              kern[0][1] * (b0.w + b2.y) + kern[0][0] * (b0.z + b2.z);
+//     old4.w = kern[0][4] * b1.w + kern[0][3] * (b1.z + b2.x) + kern[0][2] * (b1.y + b2.y) +
+//              kern[0][1] * (b1.x + b2.z) + kern[0][0] * (b0.w + b2.w);
+//     for (int scale = 1; scale < LAPLACE_S; scale++)
+//     {
+//       float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+//       float4 b0 = reinterpret_cast<float4 *>(buf)[tx + 0];
+//       float4 b1 = reinterpret_cast<float4 *>(buf)[tx + 1];
+//       float4 b2 = reinterpret_cast<float4 *>(buf)[tx + 2];
+//       new4.x = kern[scale][4] * b1.x + kern[scale][3] * (b0.w + b1.y) +
+//                kern[scale][2] * (b0.z + b1.z) + kern[scale][1] * (b0.y + b1.w) +
+//                kern[scale][0] * (b0.x + b2.x);
+//       new4.y = kern[scale][4] * b1.y + kern[scale][3] * (b1.x + b1.z) +
+//                kern[scale][2] * (b0.w + b1.w) + kern[scale][1] * (b0.z + b2.x) +
+//                kern[scale][0] * (b0.y + b2.y);
+//       new4.z = kern[scale][4] * b1.z + kern[scale][3] * (b1.y + b1.w) +
+//                kern[scale][2] * (b1.x + b2.x) + kern[scale][1] * (b0.w + b2.y) +
+//                kern[scale][0] * (b0.z + b2.z);
+//       new4.w = kern[scale][4] * b1.w + kern[scale][3] * (b1.z + b2.x) +
+//                kern[scale][2] * (b1.y + b2.y) + kern[scale][1] * (b1.x + b2.z) +
+//                kern[scale][0] * (b0.w + b2.w);
+//       dif4.x = new4.x - old4.x;
+//       dif4.y = new4.y - old4.y;
+//       dif4.z = new4.z - old4.z;
+//       dif4.w = new4.w - old4.w;
+//       reinterpret_cast<float4 *>(&d_Result[(scale - 1) * height * pitch + yp * pitch + xp4])[0] = dif4;
+//       old4 = new4;
+//     }
+//   }
+// }
+
+// __global__ void LaplaceMultiMemTest(float *d_Image, float *d_Result, int width, int pitch, int height, int octave)
+// {
+//   __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S];
+//   __shared__ float data2[LAPLACE_W * LAPLACE_S];
+//   const int tx = threadIdx.x;
+//   const int xp = blockIdx.x * LAPLACE_W + tx;
+//   const int yp = LAPLACE_H * blockIdx.y;
+//   const int scale = threadIdx.y;
+//   float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+//   float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+//   float *data = d_Image + max(min(xp - 4, width - 1), 0);
+//   int h = height - 1;
+//   float temp[8 + LAPLACE_H], kern[LAPLACE_R + 1];
+//   for (int i = 0; i < 4; i++)
+//     temp[i] = data[max(0, min(yp + i - 4, h)) * pitch];
+//   for (int i = 4; i < 8 + LAPLACE_H; i++)
+//     temp[i] = data[min(yp + i - 4, h) * pitch];
+//   for (int i = 0; i <= LAPLACE_R; i++)
+//     kern[i] = kernel[LAPLACE_R - i];
+//   for (int j = 0; j < LAPLACE_H; j++)
+//   {
+//     sdata1[tx] = kern[4] * temp[4 + j] +
+//                  kern[3] * (temp[3 + j] + temp[5 + j]) + kern[2] * (temp[2 + j] + temp[6 + j]) +
+//                  kern[1] * (temp[1 + j] + temp[7 + j]) + kern[0] * (temp[0 + j] + temp[8 + j]);
+//     __syncthreads();
+//     float *sdata2 = data2 + LAPLACE_W * scale;
+//     if (tx < LAPLACE_W)
+//     {
+//       sdata2[tx] = kern[4] * sdata1[tx + 4] +
+//                    kern[3] * (sdata1[tx + 3] + sdata1[tx + 5]) + kern[2] * (sdata1[tx + 2] + sdata1[tx + 6]) +
+//                    kern[1] * (sdata1[tx + 1] + sdata1[tx + 7]) + kern[0] * (sdata1[tx + 0] + sdata1[tx + 8]);
+//     }
+//     __syncthreads();
+//     if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width && (yp + j) < height)
+//       d_Result[scale * height * pitch + (yp + j) * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W];
+//   }
+// }
+
+// __global__ void LaplaceMultiMemOld(float *d_Image, float *d_Result, int width, int pitch, int height, int octave)
+// {
+//   __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S];
+//   __shared__ float data2[LAPLACE_W * LAPLACE_S];
+//   const int tx = threadIdx.x;
+//   const int xp = blockIdx.x * LAPLACE_W + tx;
+//   const int yp = blockIdx.y;
+//   const int scale = threadIdx.y;
+//   float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+//   float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+//   float *data = d_Image + max(min(xp - 4, width - 1), 0);
+//   int h = height - 1;
+//   sdata1[tx] = kernel[0] * data[min(yp, h) * pitch] +
+//                kernel[1] * (data[max(0, min(yp - 1, h)) * pitch] + data[min(yp + 1, h) * pitch]) +
+//                kernel[2] * (data[max(0, min(yp - 2, h)) * pitch] + data[min(yp + 2, h) * pitch]) +
+//                kernel[3] * (data[max(0, min(yp - 3, h)) * pitch] + data[min(yp + 3, h) * pitch]) +
+//                kernel[4] * (data[max(0, min(yp - 4, h)) * pitch] + data[min(yp + 4, h) * pitch]);
+//   __syncthreads();
+//   float *sdata2 = data2 + LAPLACE_W * scale;
+//   if (tx < LAPLACE_W)
+//   {
+//     sdata2[tx] = kernel[0] * sdata1[tx + 4] +
+//                  kernel[1] * (sdata1[tx + 3] + sdata1[tx + 5]) +
+//                  kernel[2] * (sdata1[tx + 2] + sdata1[tx + 6]) +
+//                  kernel[3] * (sdata1[tx + 1] + sdata1[tx + 7]) +
+//                  kernel[4] * (sdata1[tx + 0] + sdata1[tx + 8]);
+//   }
+//   __syncthreads();
+//   if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width)
+//     d_Result[scale * height * pitch + yp * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W];
+// }
+
+__global__ void LowPass(float *d_Image, float *d_Result, int width, int pitch, int height)
+{
+  __shared__ float buffer[(LOWPASS_W + 2 * LOWPASS_R) * LOWPASS_H];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int xp = blockIdx.x * LOWPASS_W + tx;
+  const int yp = blockIdx.y * LOWPASS_H + ty;
+  float *kernel = d_LowPassKernel;
+  float *data = d_Image + max(min(xp - 4, width - 1), 0);
+  float *buff = buffer + ty * (LOWPASS_W + 2 * LOWPASS_R);
+  int h = height - 1;
+  if (yp < height)
+    buff[tx] = kernel[4] * data[min(yp, h) * pitch] +
+               kernel[3] * (data[max(0, min(yp - 1, h)) * pitch] + data[min(yp + 1, h) * pitch]) +
+               kernel[2] * (data[max(0, min(yp - 2, h)) * pitch] + data[min(yp + 2, h) * pitch]) +
+               kernel[1] * (data[max(0, min(yp - 3, h)) * pitch] + data[min(yp + 3, h) * pitch]) +
+               kernel[0] * (data[max(0, min(yp - 4, h)) * pitch] + data[min(yp + 4, h) * pitch]);
+  __syncthreads();
+  if (tx < LOWPASS_W && xp < width && yp < height)
+    d_Result[yp * pitch + xp] = kernel[4] * buff[tx + 4] +
+                                kernel[3] * (buff[tx + 3] + buff[tx + 5]) + kernel[2] * (buff[tx + 2] + buff[tx + 6]) +
+                                kernel[1] * (buff[tx + 1] + buff[tx + 7]) + kernel[0] * (buff[tx + 0] + buff[tx + 8]);
+}
+
+__global__ void LowPassBlockOld(float *d_Image, float *d_Result, int width, int pitch, int height)
+{
+  __shared__ float xrows[16][32];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int xp = blockIdx.x * LOWPASS_W + tx;
+  const int yp = blockIdx.y * LOWPASS_H + ty;
+  const int N = 16;
+  float *k = d_LowPassKernel;
+  int xl = max(min(xp - 4, width - 1), 0);
+  for (int l = -8; l <= LOWPASS_H; l += 4)
+  {
+    if (l < LOWPASS_H)
+    {
+      int yl = max(min(yp + l + 4, height - 1), 0);
+      float val = d_Image[yl * pitch + xl];
+      xrows[(l + 8 + ty) % N][tx] = k[4] * ShiftDown(val, 4) +
+                                    k[3] * (ShiftDown(val, 5) + ShiftDown(val, 3)) +
+                                    k[2] * (ShiftDown(val, 6) + ShiftDown(val, 2)) +
+                                    k[1] * (ShiftDown(val, 7) + ShiftDown(val, 1)) +
+                                    k[0] * (ShiftDown(val, 8) + val);
+    }
+    if (l >= 4)
+    {
+      int ys = yp + l - 4;
+      if (xp < width && ys < height && tx < LOWPASS_W)
+        d_Result[ys * pitch + xp] = k[4] * xrows[(l + 0 + ty) % N][tx] +
+                                    k[3] * (xrows[(l - 1 + ty) % N][tx] + xrows[(l + 1 + ty) % N][tx]) +
+                                    k[2] * (xrows[(l - 2 + ty) % N][tx] + xrows[(l + 2 + ty) % N][tx]) +
+                                    k[1] * (xrows[(l - 3 + ty) % N][tx] + xrows[(l + 3 + ty) % N][tx]) +
+                                    k[0] * (xrows[(l - 4 + ty) % N][tx] + xrows[(l + 4 + ty) % N][tx]);
+    }
+    if (l >= 0)
+      __syncthreads();
+  }
+}
+
+__global__ void LowPassBlock(float *d_Image, float *d_Result, int width, int pitch, int height)
+{
+  __shared__ float xrows[16][32];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int xp = blockIdx.x * LOWPASS_W + tx;
+  const int yp = blockIdx.y * LOWPASS_H + ty;
+  const int N = 16;
+  float *k = d_LowPassKernel;
+  int xl = max(min(xp - 4, width - 1), 0);
+#pragma unroll
+  for (int l = -8; l < 4; l += 4)
+  {
+    int ly = l + ty;
+    int yl = max(min(yp + l + 4, height - 1), 0);
+    float val = d_Image[yl * pitch + xl]; // d_Image[yl*pitch + xl].x
+    val = k[4] * ShiftDown(val, 4) +
+          k[3] * (ShiftDown(val, 5) + ShiftDown(val, 3)) +
+          k[2] * (ShiftDown(val, 6) + ShiftDown(val, 2)) +
+          k[1] * (ShiftDown(val, 7) + ShiftDown(val, 1)) +
+          k[0] * (ShiftDown(val, 8) + val);
+    xrows[ly + 8][tx] = val;
+  }
+  __syncthreads();
+#pragma unroll
+  for (int l = 4; l < LOWPASS_H; l += 4)
+  {
+    int ly = l + ty;
+    int yl = min(yp + l + 4, height - 1);
+    float val = d_Image[yl * pitch + xl];
+    val = k[4] * ShiftDown(val, 4) +
+          k[3] * (ShiftDown(val, 5) + ShiftDown(val, 3)) +
+          k[2] * (ShiftDown(val, 6) + ShiftDown(val, 2)) +
+          k[1] * (ShiftDown(val, 7) + ShiftDown(val, 1)) +
+          k[0] * (ShiftDown(val, 8) + val);
+    xrows[(ly + 8) % N][tx] = val;
+    int ys = yp + l - 4;
+    if (xp < width && ys < height && tx < LOWPASS_W)
+      d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] +
+                                  k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) +
+                                  k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) +
+                                  k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) +
+                                  k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]);
+    __syncthreads();
+  }
+  int ly = LOWPASS_H + ty;
+  int ys = yp + LOWPASS_H - 4;
+  if (xp < width && ys < height && tx < LOWPASS_W)
+    d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] +
+                                k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) +
+                                k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) +
+                                k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) +
+                                k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]);
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftD.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftD.h
new file mode 100644
index 000000000..52fd52aa4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftD.h
@@ -0,0 +1,80 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#ifndef CUDASIFTD_H
+#define CUDASIFTD_H
+
+#define NUM_SCALES 5
+
+// Scale down thread block width
+#define SCALEDOWN_W 64 // 60
+
+// Scale down thread block height
+#define SCALEDOWN_H 16 // 8
+
+// Scale up thread block width
+#define SCALEUP_W 64
+
+// Scale up thread block height
+#define SCALEUP_H 8
+
+// Find point thread block width
+#define MINMAX_W 30 // 32
+
+// Find point thread block height
+#define MINMAX_H 8 // 16
+
+// Laplace thread block width
+#define LAPLACE_W 128 // 56
+
+// Laplace rows per thread
+#define LAPLACE_H 4
+
+// Number of laplace scales
+#define LAPLACE_S (NUM_SCALES + 3)
+
+// Laplace filter kernel radius
+#define LAPLACE_R 4
+
+#define LOWPASS_W 24 // 56
+#define LOWPASS_H 32 // 16
+#define LOWPASS_R 4
+
+//====================== Number of threads ====================//
+// ScaleDown:               SCALEDOWN_W + 4
+// LaplaceMulti:            (LAPLACE_W+2*LAPLACE_R)*LAPLACE_S
+// FindPointsMulti:         MINMAX_W + 2
+// ComputeOrientations:     128
+// ExtractSiftDescriptors:  256
+
+//====================== Number of blocks ====================//
+// ScaleDown:               (width/SCALEDOWN_W) * (height/SCALEDOWN_H)
+// LaplceMulti:             (width+2*LAPLACE_R)/LAPLACE_W * height
+// FindPointsMulti:         (width/MINMAX_W)*NUM_SCALES * (height/MINMAX_H)
+// ComputeOrientations:     numpts
+// ExtractSiftDescriptors:  numpts
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu
new file mode 100644
index 000000000..3f1e15fbf
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu
@@ -0,0 +1,593 @@
+//********************************************************//
+// CUDA SIFT extractor by Mårten Björkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <cstdio>
+#include <cstring>
+#include <cmath>
+#include <iostream>
+#include <algorithm>
+#include <chrono>
+
+#include "cudautils.h"
+#include "cudaImage.h"
+#include "cudaSift.h"
+#include "cudaSiftD.h"
+#include "cudaSiftH.h"
+
+#include "cudaSiftD.cu"
+
+void InitCuda(int devNum)
+{
+  int nDevices;
+  safeCall(cudaGetDeviceCount(&nDevices));
+  if (!nDevices)
+  {
+    std::cerr << "No CUDA devices available" << std::endl;
+    return;
+  }
+  devNum = std::min(nDevices - 1, devNum);
+  deviceInit(devNum);
+  cudaDeviceProp prop;
+  safeCall(cudaGetDeviceProperties(&prop, devNum));
+  printf("Device Number: %d\n", devNum);
+  printf("  Device name: %s\n", prop.name);
+  printf("  Memory Clock Rate (MHz): %d\n", prop.memoryClockRate / 1000);
+  printf("  Clock Freq (MHz): %d\n", prop.clockRate / 1000);
+  printf("  Memory Bus Width (bits): %d\n", prop.memoryBusWidth);
+  printf("  Peak Memory Bandwidth (GB/s): %.1f\n\n",
+         2.0 * prop.memoryClockRate * (prop.memoryBusWidth / 8) / 1.0e6);
+}
+
+float *AllocSiftTempMemory(int width, int height, int numOctaves, float &time, bool scaleUp)
+{
+  const int nd = NUM_SCALES + 3;
+  int w = width * (scaleUp ? 2 : 1);
+  int h = height * (scaleUp ? 2 : 1);
+  int p = iAlignUp(w, 128);
+  int size = h * p;         // image sizes
+  int sizeTmp = nd * h * p; // laplace buffer sizes
+  for (int i = 0; i < numOctaves; i++)
+  {
+    w /= 2;
+    h /= 2;
+    int p = iAlignUp(w, 128);
+    size += h * p;
+    sizeTmp += nd * h * p;
+  }
+  float *memoryTmp = NULL;
+  size_t pitch;
+  size += sizeTmp;
+
+#ifdef DEVICE_TIMER
+  auto start_malloc = std::chrono::steady_clock::now();
+#endif
+  safeCall(cudaMallocPitch((void **)&memoryTmp, &pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float)));
+  safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+  auto stop_malloc = std::chrono::steady_clock::now();
+  time += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+  return memoryTmp;
+}
+
+void FreeSiftTempMemory(float *memoryTmp)
+{
+  if (memoryTmp)
+    safeCall(cudaFree(memoryTmp));
+}
+
+void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur,
+                 float thresh, float &totTime, float lowestScale, bool scaleUp, float *tempMemory)
+{
+  unsigned int *d_PointCounterAddr;
+#ifdef DEVICE_TIMER
+  auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+  safeCall(cudaGetSymbolAddress((void **)&d_PointCounterAddr, d_PointCounter));
+  safeCall(cudaMemset(d_PointCounterAddr, 0, (8 * 2 + 1) * sizeof(int)));
+  safeCall(cudaMemcpyToSymbol(d_MaxNumPoints, &siftData.maxPts, sizeof(int)));
+  safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+  auto stop_memcpy = std::chrono::steady_clock::now();
+  totTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+  const int nd = NUM_SCALES + 3;
+  int w = img.width * (scaleUp ? 2 : 1);
+  int h = img.height * (scaleUp ? 2 : 1);
+  int p = iAlignUp(w, 128);
+  int width = w, height = h;
+  int size = h * p;         // image sizes
+  int sizeTmp = nd * h * p; // laplace buffer sizes
+  for (int i = 0; i < numOctaves; i++)
+  {
+    w /= 2;
+    h /= 2;
+    int p = iAlignUp(w, 128);
+    size += h * p;
+    sizeTmp += nd * h * p;
+  }
+  float *memoryTmp = tempMemory;
+  size += sizeTmp;
+  if (!tempMemory)
+  {
+    size_t pitch;
+#ifdef DEVICE_TIMER
+    auto start_malloc = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMallocPitch((void **)&memoryTmp, &pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float)));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_malloc = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+  }
+  float *memorySub = memoryTmp + sizeTmp;
+
+  CudaImage lowImg;
+  lowImg.Allocate(width, height, iAlignUp(width, 128), false, totTime, memorySub);
+  if (!scaleUp)
+  {
+    float kernel[8 * 12 * 16];
+    PrepareLaplaceKernels(numOctaves, 0.0f, kernel);
+#ifdef DEVICE_TIMER
+    auto start_memcpy1 = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMemcpyToSymbol(d_LaplaceKernel, kernel, 8 * 12 * 16 * sizeof(float)));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_memcpy1 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy1 - start_memcpy1).count();
+#endif
+    LowPass(lowImg, img, fmax(initBlur, 0.001f), totTime);
+    ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale, 1.0f, memoryTmp,
+                    memorySub + height * iAlignUp(width, 128), totTime);
+#ifdef DEVICE_TIMER
+    auto start_memcpy2 = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMemcpy(&siftData.numPts, &d_PointCounterAddr[2 * numOctaves], sizeof(int), cudaMemcpyDeviceToHost));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_memcpy2 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy2 - start_memcpy2).count();
+#endif
+    siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts);
+  }
+  else
+  {
+    CudaImage upImg;
+    upImg.Allocate(width, height, iAlignUp(width, 128), false, totTime, memoryTmp);
+    ScaleUp(upImg, img, totTime);
+    LowPass(lowImg, upImg, max(initBlur, 0.001f), totTime);
+    float kernel[8 * 12 * 16];
+    PrepareLaplaceKernels(numOctaves, 0.0f, kernel);
+#ifdef DEVICE_TIMER
+    auto start_memcpy3 = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMemcpyToSymbol(d_LaplaceKernel, kernel, 8 * 12 * 16 * sizeof(float)));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_memcpy3 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy3 - start_memcpy3).count();
+#endif
+    ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale * 2.0f, 1.0f, memoryTmp,
+                    memorySub + height * iAlignUp(width, 128), totTime);
+#ifdef DEVICE_TIMER
+    auto start_memcpy4 = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMemcpy(&siftData.numPts, &d_PointCounterAddr[2 * numOctaves], sizeof(int), cudaMemcpyDeviceToHost));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_memcpy4 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy4 - start_memcpy4).count();
+#endif
+    siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts);
+    RescalePositions(siftData, 0.5f, totTime);
+  }
+
+  if (!tempMemory)
+    safeCall(cudaFree(memoryTmp));
+  if (siftData.h_data)
+  {
+#ifdef DEVICE_TIMER
+    auto start_memcpy5 = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMemcpy(siftData.h_data, siftData.d_data, sizeof(SiftPoint) * siftData.numPts, cudaMemcpyDeviceToHost));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_memcpy5 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy5 - start_memcpy5).count();
+    printf("Total time for sift extraction =  %.2f us\n\n", totTime);
+#endif
+  }
+  printf("Number of Points after sift extraction =  %d\n\n", siftData.numPts);
+}
+
+int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, float lowestScale,
+                    float subsampling, float *memoryTmp, float *memorySub, float &totTime)
+{
+  int w = img.width;
+  int h = img.height;
+  if (numOctaves > 1)
+  {
+    CudaImage subImg;
+    int p = iAlignUp(w / 2, 128);
+    subImg.Allocate(w / 2, h / 2, p, false, totTime, memorySub);
+    ScaleDown(subImg, img, 0.5f, totTime);
+    float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f;
+    ExtractSiftLoop(siftData, subImg, numOctaves - 1, totInitBlur, thresh, lowestScale, subsampling * 2.0f,
+                    memoryTmp, memorySub + (h / 2) * p, totTime);
+  }
+  ExtractSiftOctave(siftData, img, numOctaves, thresh, lowestScale, subsampling, memoryTmp, totTime);
+  return 0;
+}
+
+void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh,
+                       float lowestScale, float subsampling, float *memoryTmp, float &totTime)
+{
+  const int nd = NUM_SCALES + 3;
+  CudaImage diffImg[nd];
+  int w = img.width;
+  int h = img.height;
+  int p = iAlignUp(w, 128);
+  for (int i = 0; i < nd - 1; i++)
+    diffImg[i].Allocate(w, h, p, false, totTime, memoryTmp + i * p * h);
+
+  float baseBlur = pow(2.0f, -1.0f / NUM_SCALES);
+  float diffScale = pow(2.0f, 1.0f / NUM_SCALES);
+  LaplaceMulti(img, diffImg, octave, totTime);
+  FindPointsMulti(diffImg, siftData, thresh, 10.0f, 1.0f / NUM_SCALES, lowestScale / subsampling, subsampling, octave, totTime);
+  ComputeOrientations(img, siftData, octave, totTime);
+  ExtractSiftDescriptors(img.d_data, img.pitch, siftData, subsampling, octave, totTime);
+}
+
+void InitSiftData(SiftData &data, float &time, int num, bool host, bool dev)
+{
+  data.numPts = 0;
+  data.maxPts = num;
+  int sz = sizeof(SiftPoint) * num;
+  data.h_data = NULL;
+  if (host)
+    data.h_data = (SiftPoint *)malloc(sz);
+  data.d_data = NULL;
+  if (dev)
+  {
+#ifdef DEVICE_TIMER
+    auto start_malloc = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMalloc((void **)&data.d_data, sz));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_malloc = std::chrono::steady_clock::now();
+    time += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+  }
+}
+
+void FreeSiftData(SiftData &data)
+{
+  if (data.d_data != NULL)
+    safeCall(cudaFree(data.d_data));
+  data.d_data = NULL;
+  if (data.h_data != NULL)
+    free(data.h_data);
+  data.numPts = 0;
+  data.maxPts = 0;
+}
+
+void PrintSiftData(SiftData &data)
+{
+  SiftPoint *h_data = data.h_data;
+  if (data.h_data == NULL)
+  {
+    h_data = (SiftPoint *)malloc(sizeof(SiftPoint) * data.maxPts);
+    safeCall(cudaMemcpy(h_data, data.d_data, sizeof(SiftPoint) * data.numPts, cudaMemcpyDeviceToHost));
+    safeCall(cudaDeviceSynchronize());
+    data.h_data = h_data;
+  }
+  for (int i = 0; i < data.numPts; i++)
+  {
+    printf("xpos         = %.2f\n", h_data[i].xpos);
+    printf("ypos         = %.2f\n", h_data[i].ypos);
+    printf("scale        = %.2f\n", h_data[i].scale);
+    printf("sharpness    = %.2f\n", h_data[i].sharpness);
+    printf("edgeness     = %.2f\n", h_data[i].edgeness);
+    printf("orientation  = %.2f\n", h_data[i].orientation);
+    printf("score        = %.2f\n", h_data[i].score);
+    float *siftData = (float *)&h_data[i].data;
+    for (int j = 0; j < 8; j++)
+    {
+      if (j == 0)
+        printf("data = ");
+      else
+        printf("       ");
+      for (int k = 0; k < 16; k++)
+        if (siftData[j + 8 * k] < 0.05)
+          printf(" .   ");
+        else
+          printf("%.2f ", siftData[j + 8 * k]);
+      printf("\n");
+    }
+  }
+  printf("Number of available points: %d\n", data.numPts);
+  printf("Number of allocated points: %d\n", data.maxPts);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Host side master functions
+///////////////////////////////////////////////////////////////////////////////
+
+double ScaleDown(CudaImage &res, CudaImage &src, float variance, float &totTime)
+{
+  static float oldVariance = -1.0f;
+  if (res.d_data == NULL || src.d_data == NULL)
+  {
+    printf("ScaleDown: missing data\n");
+    return 0.0;
+  }
+  if (oldVariance != variance)
+  {
+    float h_Kernel[5];
+    float kernelSum = 0.0f;
+    for (int j = 0; j < 5; j++)
+    {
+      h_Kernel[j] = (float)expf(-(double)(j - 2) * (j - 2) / 2.0 / variance);
+      kernelSum += h_Kernel[j];
+    }
+    for (int j = 0; j < 5; j++)
+      h_Kernel[j] /= kernelSum;
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMemcpyToSymbol(d_ScaleDownKernel, h_Kernel, 5 * sizeof(float)));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+    oldVariance = variance;
+  }
+#if 0
+  dim3 blocks(iDivUp(src.width, SCALEDOWN_W), iDivUp(src.height, SCALEDOWN_H));
+  dim3 threads(SCALEDOWN_W + 4, SCALEDOWN_H + 4);
+  ScaleDownDenseShift<<<blocks, threads>>>(res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch);
+#else
+  dim3 blocks(iDivUp(src.width, SCALEDOWN_W), iDivUp(src.height, SCALEDOWN_H));
+  dim3 threads(SCALEDOWN_W + 4);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  ScaleDown<<<blocks, threads>>>(res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch);
+  safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ScaleDown time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+#endif
+  checkMsg("ScaleDown() execution failed\n");
+  return 0.0;
+}
+
+double ScaleUp(CudaImage &res, CudaImage &src, float &totTime)
+{
+  if (res.d_data == NULL || src.d_data == NULL)
+  {
+    printf("ScaleUp: missing data\n");
+    return 0.0;
+  }
+  dim3 blocks(iDivUp(res.width, SCALEUP_W), iDivUp(res.height, SCALEUP_H));
+  dim3 threads(SCALEUP_W / 2, SCALEUP_H / 2);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  ScaleUp<<<blocks, threads>>>(res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch);
+  safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ScaleUp time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("ScaleUp() execution failed\n");
+  return 0.0;
+}
+
+double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, float &totTime)
+{
+  dim3 blocks(512);
+  dim3 threads(256);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  ComputeOrientationsCONSTNew<<<blocks, threads>>>(src.d_data, src.width, src.pitch, src.height, siftData.d_data, octave);
+  safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ComputeOrientationsCONSTNew time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel)
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("ComputeOrientations() execution failed\n");
+  return 0.0;
+}
+
+double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, float &totTime)
+{
+  dim3 blocks(512);
+  dim3 threads(16, 8);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  ExtractSiftDescriptorsCONSTNew<<<blocks, threads>>>(texObj, pitch, siftData.d_data, subsampling, octave);
+  safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ExtractSiftDescriptorsCONSTNew time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("ExtractSiftDescriptors() execution failed\n");
+  return 0.0;
+}
+double RescalePositions(SiftData &siftData, float scale, float &totTime)
+{
+  dim3 blocks(iDivUp(siftData.numPts, 64));
+  dim3 threads(64);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  RescalePositions<<<blocks, threads>>>(siftData.d_data, siftData.numPts, scale);
+  safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("RescalePositions time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("RescapePositions() execution failed\n");
+  return 0.0;
+}
+
+double LowPass(CudaImage &res, CudaImage &src, float scale, float &totTime)
+{
+  float kernel[2 * LOWPASS_R + 1];
+  static float oldScale = -1.0f;
+  if (scale != oldScale)
+  {
+    float kernelSum = 0.0f;
+    float ivar2 = 1.0f / (2.0f * scale * scale);
+    for (int j = -LOWPASS_R; j <= LOWPASS_R; j++)
+    {
+      kernel[j + LOWPASS_R] = (float)expf(-(double)j * j * ivar2);
+      kernelSum += kernel[j + LOWPASS_R];
+    }
+    for (int j = -LOWPASS_R; j <= LOWPASS_R; j++)
+      kernel[j + LOWPASS_R] /= kernelSum;
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMemcpyToSymbol(d_LowPassKernel, kernel, (2 * LOWPASS_R + 1) * sizeof(float)));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+    oldScale = scale;
+  }
+  int width = res.width;
+  int pitch = res.pitch;
+  int height = res.height;
+  dim3 blocks(iDivUp(width, LOWPASS_W), iDivUp(height, LOWPASS_H)); //[80,34,1]
+
+  dim3 threads(LOWPASS_W + 2 * LOWPASS_R, 4); //[32,4,1]
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  LowPassBlockOld<<<blocks, threads>>>(src.d_data, res.d_data, width, pitch, height);
+  safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("LowPassBlock time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("LowPass() execution failed\n");
+  return 0.0;
+}
+
+//==================== Multi-scale functions ===================//
+
+void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel)
+{
+  if (numOctaves > 1)
+  {
+    float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f;
+    PrepareLaplaceKernels(numOctaves - 1, totInitBlur, kernel);
+  }
+  float scale = pow(2.0f, -1.0f / NUM_SCALES);
+  float diffScale = pow(2.0f, 1.0f / NUM_SCALES);
+  for (int i = 0; i < NUM_SCALES + 3; i++)
+  {
+    float kernelSum = 0.0f;
+    float var = scale * scale - initBlur * initBlur;
+    for (int j = 0; j <= LAPLACE_R; j++)
+    {
+      kernel[numOctaves * 12 * 16 + 16 * i + j] = (float)expf(-(double)j * j / 2.0 / var);
+      kernelSum += (j == 0 ? 1 : 2) * kernel[numOctaves * 12 * 16 + 16 * i + j];
+    }
+    for (int j = 0; j <= LAPLACE_R; j++)
+      kernel[numOctaves * 12 * 16 + 16 * i + j] /= kernelSum;
+    scale *= diffScale;
+  }
+}
+
+double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, float &totTime)
+{
+  int width = results[0].width;
+  int pitch = results[0].pitch;
+  int height = results[0].height;
+#if 1
+  dim3 threads(LAPLACE_W + 2 * LAPLACE_R);       //(136)
+  dim3 blocks(iDivUp(width, LAPLACE_W), height); //(15)
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  LaplaceMultiMem<<<blocks, threads>>>(baseImage.d_data, results[0].d_data, width, pitch, height, octave);
+  safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("LaplaceMultiMem time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+#endif
+  checkMsg("LaplaceMulti() execution failed\n");
+  return 0.0;
+}
+
+double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor,
+                       float lowestScale, float subsampling, int octave, float &totTime)
+{
+  if (sources->d_data == NULL)
+  {
+    printf("FindPointsMulti: missing data\n");
+    return 0.0;
+  }
+  int w = sources->width;
+  int p = sources->pitch;
+  int h = sources->height;
+#if 1
+  dim3 blocks(iDivUp(w, MINMAX_W) * NUM_SCALES, iDivUp(h, MINMAX_H));
+  dim3 threads(MINMAX_W + 2);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  FindPointsMultiNew<<<blocks, threads>>>(sources->d_data, siftData.d_data, w, p, h, subsampling,
+                                          lowestScale, thresh, factor, edgeLimit, octave);
+  safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("FindPointsMultiNew time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count())
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+#endif
+  checkMsg("FindPointsMulti() execution failed\n");
+  return 0.0;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftH.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftH.h
new file mode 100644
index 000000000..95e8384ec
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftH.h
@@ -0,0 +1,50 @@
+
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#ifndef CUDASIFTH_H
+#define CUDASIFTH_H
+
+#include "cudautils.h"
+#include "cudaImage.h"
+#include "cudaSift.h"
+
+int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh,
+                    float lowestScale, float subsampling, float *memoryTmp, float *memorySub, float &totTime);
+void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh, float lowestScale,
+                       float subsampling, float *memoryTmp, float &totTime);
+double ScaleDown(CudaImage &res, CudaImage &src, float variance, float &totTime);
+double ScaleUp(CudaImage &res, CudaImage &src, float &totTime);
+double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, float &totTime);
+double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, float &totTime);
+double RescalePositions(SiftData &siftData, float scale, float &totTime);
+double LowPass(CudaImage &res, CudaImage &src, float scale, float &totTime);
+void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel);
+double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, float &totTime);
+double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor,
+                       float lowestScale, float subsampling, int octave, float &totTime);
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudautils.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudautils.h
new file mode 100644
index 000000000..d5b3161cc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudautils.h
@@ -0,0 +1,155 @@
+#ifndef CUDAUTILS_H
+#define CUDAUTILS_H
+
+#include <cstdio>
+#include <iostream>
+
+#ifdef WIN32
+#include <intrin.h>
+#endif
+
+#define safeCall(err) __safeCall(err, __FILE__, __LINE__)
+#define safeThreadSync() __safeThreadSync(__FILE__, __LINE__)
+#define checkMsg(msg) __checkMsg(msg, __FILE__, __LINE__)
+
+inline void __safeCall(cudaError err, const char *file, const int line)
+{
+  if (cudaSuccess != err)
+  {
+    fprintf(stderr, "safeCall() Runtime API error in file <%s>, line %i : %s.\n", file, line, cudaGetErrorString(err));
+    exit(-1);
+  }
+}
+
+inline void __safeThreadSync(const char *file, const int line)
+{
+  cudaError err = cudaDeviceSynchronize();
+  if (cudaSuccess != err)
+  {
+    fprintf(stderr, "threadSynchronize() Driver API error in file '%s' in line %i : %s.\n", file, line, cudaGetErrorString(err));
+    exit(-1);
+  }
+}
+
+inline void __checkMsg(const char *errorMessage, const char *file, const int line)
+{
+  cudaError_t err = cudaGetLastError();
+  if (cudaSuccess != err)
+  {
+    fprintf(stderr, "checkMsg() CUDA error: %s in file <%s>, line %i : %s.\n", errorMessage, file, line, cudaGetErrorString(err));
+    exit(-1);
+  }
+}
+
+inline bool deviceInit(int dev)
+{
+  int deviceCount;
+  safeCall(cudaGetDeviceCount(&deviceCount));
+  if (deviceCount == 0)
+  {
+    fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
+    return false;
+  }
+  if (dev < 0)
+    dev = 0;
+  if (dev > deviceCount - 1)
+    dev = deviceCount - 1;
+  cudaDeviceProp deviceProp;
+  safeCall(cudaGetDeviceProperties(&deviceProp, dev));
+  if (deviceProp.major < 1)
+  {
+    fprintf(stderr, "error: device does not support CUDA.\n");
+    return false;
+  }
+  safeCall(cudaSetDevice(dev));
+  return true;
+}
+
+class TimerGPU
+{
+public:
+  cudaEvent_t start, stop;
+  cudaStream_t stream;
+  TimerGPU(cudaStream_t stream_ = 0) : stream(stream_)
+  {
+    cudaEventCreate(&start);
+    cudaEventCreate(&stop);
+    cudaEventRecord(start, stream);
+  }
+  ~TimerGPU()
+  {
+    cudaEventDestroy(start);
+    cudaEventDestroy(stop);
+  }
+  float read()
+  {
+    cudaEventRecord(stop, stream);
+    cudaEventSynchronize(stop);
+    float time;
+    cudaEventElapsedTime(&time, start, stop);
+    return time;
+  }
+};
+
+class TimerCPU
+{
+  static const int bits = 10;
+
+public:
+  long long beg_clock;
+  float freq;
+  TimerCPU(float freq_) : freq(freq_)
+  { // freq = clock frequency in MHz
+    beg_clock = getTSC(bits);
+  }
+  long long getTSC(int bits)
+  {
+#ifdef WIN32
+    return __rdtsc() / (1LL << bits);
+#else
+    unsigned int low, high;
+    __asm__(".byte 0x0f, 0x31"
+            : "=a"(low), "=d"(high));
+    return ((long long)high << (32 - bits)) | ((long long)low >> bits);
+#endif
+  }
+  float read()
+  {
+    long long end_clock = getTSC(bits);
+    long long Kcycles = end_clock - beg_clock;
+    float time = (float)(1 << bits) * Kcycles / freq / 1e3f;
+    return time;
+  }
+};
+
+template <class T>
+__device__ __inline__ T ShiftDown(T var, unsigned int delta, int width = 32)
+{
+#if (CUDART_VERSION >= 9000)
+  return __shfl_down_sync(0xffffffff, var, delta, width);
+#else
+  return __shfl_down(var, delta, width);
+#endif
+}
+
+template <class T>
+__device__ __inline__ T ShiftUp(T var, unsigned int delta, int width = 32)
+{
+#if (CUDART_VERSION >= 9000)
+  return __shfl_up_sync(0xffffffff, var, delta, width);
+#else
+  return __shfl_up(var, delta, width);
+#endif
+}
+
+template <class T>
+__device__ __inline__ T Shuffle(T var, unsigned int lane, int width = 32)
+{
+#if (CUDART_VERSION >= 9000)
+  return __shfl_sync(0xffffffff, var, lane, width);
+#else
+  return __shfl(var, lane, width);
+#endif
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp
new file mode 100644
index 000000000..c01e6e7d2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp
@@ -0,0 +1,72 @@
+#include <iostream>
+#include <cmath>
+#include <opencv2/core/core.hpp>
+#include "cudaSift.h"
+
+int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh)
+{
+#ifdef MANAGEDMEM
+  SiftPoint *mpts = data.m_data;
+#else
+  if (data.h_data==NULL)
+    return 0;
+  SiftPoint *mpts = data.h_data;
+#endif
+  float limit = thresh*thresh;
+  int numPts = data.numPts;
+  cv::Mat M(8, 8, CV_64FC1);
+  cv::Mat A(8, 1, CV_64FC1), X(8, 1, CV_64FC1);
+  double Y[8];
+  for (int i=0;i<8;i++) 
+    A.at<double>(i, 0) = homography[i] / homography[8];
+  for (int loop=0;loop<numLoops;loop++) {
+    M = cv::Scalar(0.0);
+    X = cv::Scalar(0.0);
+    for (int i=0;i<numPts;i++) {
+      SiftPoint &pt = mpts[i];
+      if (pt.score<minScore || pt.ambiguity>maxAmbiguity)
+	continue;
+      float den = A.at<double>(6)*pt.xpos + A.at<double>(7)*pt.ypos + 1.0f;
+      float dx = (A.at<double>(0)*pt.xpos + A.at<double>(1)*pt.ypos + A.at<double>(2)) / den - pt.match_xpos;
+      float dy = (A.at<double>(3)*pt.xpos + A.at<double>(4)*pt.ypos + A.at<double>(5)) / den - pt.match_ypos;
+      float err = dx*dx + dy*dy;
+      float wei = (err<limit ? 1.0f : 0.0f); //limit / (err + limit);
+      Y[0] = pt.xpos;
+      Y[1] = pt.ypos;
+      Y[2] = 1.0;
+      Y[3] = Y[4] = Y[5] = 0.0;
+      Y[6] = - pt.xpos * pt.match_xpos;
+      Y[7] = - pt.ypos * pt.match_xpos;
+      for (int c=0;c<8;c++) 
+        for (int r=0;r<8;r++) 
+          M.at<double>(r,c) += (Y[c] * Y[r] * wei);
+      X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_xpos * wei);
+      Y[0] = Y[1] = Y[2] = 0.0;
+      Y[3] = pt.xpos;
+      Y[4] = pt.ypos; 
+      Y[5] = 1.0;
+      Y[6] = - pt.xpos * pt.match_ypos;
+      Y[7] = - pt.ypos * pt.match_ypos;
+      for (int c=0;c<8;c++) 
+        for (int r=0;r<8;r++) 
+          M.at<double>(r,c) += (Y[c] * Y[r] * wei);
+      X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_ypos * wei);
+    }
+    cv::solve(M, X, A, cv::DECOMP_CHOLESKY);
+  }
+  int numfit = 0;
+  for (int i=0;i<numPts;i++) {
+    SiftPoint &pt = mpts[i];
+    float den = A.at<double>(6)*pt.xpos + A.at<double>(7)*pt.ypos + 1.0;
+    float dx = (A.at<double>(0)*pt.xpos + A.at<double>(1)*pt.ypos + A.at<double>(2)) / den - pt.match_xpos;
+    float dy = (A.at<double>(3)*pt.xpos + A.at<double>(4)*pt.ypos + A.at<double>(5)) / den - pt.match_ypos;
+    float err = dx*dx + dy*dy;
+    if (err<limit) 
+      numfit++;
+    pt.match_error = sqrt(err);
+  }
+  for (int i=0;i<8;i++) 
+    homography[i] = A.at<double>(i);
+  homography[8] = 1.0f;
+  return numfit;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/mainSift.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/mainSift.cpp
new file mode 100644
index 000000000..0edc92d2b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/mainSift.cpp
@@ -0,0 +1,278 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Björkman aka Celebrandil //
+//              celle @ csc.kth.se                       //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <iostream>
+#include <cmath>
+#include <iomanip>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+#include "Utility.h"
+#include "cudaImage.h"
+#include "cudaSift.h"
+
+int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh);
+void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img);
+void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography);
+
+double ScaleUp(CudaImage &res, CudaImage &src);
+
+///////////////////////////////////////////////////////////////////////////////
+// Main program
+///////////////////////////////////////////////////////////////////////////////
+int main(int argc, char **argv)
+{
+  auto totalProgTimer_start = std::chrono::steady_clock::now();
+  int devNum = 0, imgSet = 0;
+  if (argc > 1)
+    devNum = std::atoi(argv[1]);
+  if (argc > 2)
+    imgSet = std::atoi(argv[2]);
+
+  float totTime = 0.0;
+  float imageInitTime = 0.0;
+  float extractSiftTime = 0.0;
+  float matchingTime = 0.0;
+  float ioReadTime = 0.0;
+  float dataVerificationTime = 0.0;
+
+  // Read images using OpenCV
+  cv::Mat limg, rimg;
+  auto ioRead_start = std::chrono::steady_clock::now();
+  if (imgSet)
+  {
+    cv::imread("../../inputData/left.pgm", 0).convertTo(limg, CV_32FC1);
+    cv::imread("../../inputData/righ.pgm", 0).convertTo(rimg, CV_32FC1);
+  }
+  else
+  {
+    cv::imread("../../inputData/img1.png", 0).convertTo(limg, CV_32FC1);
+    cv::imread("../../inputData/img2.png", 0).convertTo(rimg, CV_32FC1);
+  }
+  auto ioRead_stop = std::chrono::steady_clock::now();
+  ioReadTime = std::chrono::duration<float, std::micro>(ioRead_stop - ioRead_start).count();
+
+  unsigned int w = limg.cols;
+  unsigned int h = limg.rows;
+  std::cout << "Image size = (" << w << "," << h << ")" << std::endl;
+
+  // Initial Cuda images and download images to device
+  std::cout << "Initializing data..." << std::endl;
+  cudaSetDevice(0);
+  CudaImage img1, img2;
+
+  img1.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)limg.data);
+  img2.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)rimg.data);
+  img1.Download(imageInitTime);
+  img2.Download(imageInitTime);
+
+  // Extract Sift features from images
+  SiftData siftData1, siftData2;
+  float initBlur = 1.0f;
+  float thresh = (imgSet ? 4.5f : 2.0f);
+
+  InitSiftData(siftData1, imageInitTime, 32768, true, true);
+  InitSiftData(siftData2, imageInitTime, 32768, true, true);
+
+  // A bit of benchmarking
+  // for (int thresh1=1.00f;thresh1<=4.01f;thresh1+=0.50f) {
+  float *memoryTmp = AllocSiftTempMemory(w, h, 5, imageInitTime, false);
+  for (int i = 0; i < 50; i++)
+  {
+    float time = 0.0f; // set total time to init time
+    ExtractSift(siftData1, img1, 5, initBlur, thresh, time, 0.0f, false, memoryTmp);
+    extractSiftTime += time;
+    time = 0.0f;
+    ExtractSift(siftData2, img2, 5, initBlur, thresh, time, 0.0f, false, memoryTmp);
+    extractSiftTime += time;
+  }
+  FreeSiftTempMemory(memoryTmp);
+
+  // Match Sift features and find a homography
+  for (int i = 0; i < 1; i++)
+    MatchSiftData(siftData1, siftData2, matchingTime);
+  float homography[9];
+  int numMatches;
+  FindHomography(siftData1, homography, &numMatches, matchingTime, 10000, 0.00f, 0.80f, 5.0);
+  int numFit = ImproveHomography(siftData1, homography, 5, 0.00f, 0.80f, 3.0);
+  float matchPercentage = 100.0f * numFit / std::min(siftData1.numPts, siftData2.numPts);
+
+  std::cout << "Number of original features: " << siftData1.numPts << " " << siftData2.numPts << std::endl;
+  std::cout << "Number of matching features: " << numFit << " " << numMatches << " " << matchPercentage << "% " << initBlur << " " << thresh << "\n"
+            << std::endl;
+
+#ifdef DEVICE_TIMER
+  totTime = imageInitTime + extractSiftTime + matchingTime;
+
+  std::cout << "Images initialization time = " << imageInitTime / 1000 << " ms" << std::endl;
+  std::cout << "Feature extraction time = " << extractSiftTime / 1000 << " ms" << std::endl;
+  std::cout << "Matching time = " << matchingTime / 1000 << " ms"
+            << "\n"
+            << std::endl;
+  std::cout << "Total Deivce Time = " << totTime / 1000 << " ms"
+            << "\n"
+            << std::endl;
+#endif
+
+  // data validation
+  auto dataVerficationTimer_start = std::chrono::steady_clock::now();
+  int data_verification_flag = Utility::RunDataVerification(thresh, matchPercentage);
+  auto dataVerficationTimer_stop = std::chrono::steady_clock::now();
+  dataVerificationTime = std::chrono::duration<float, std::micro>(dataVerficationTimer_stop - dataVerficationTimer_start).count();
+  // // Print out and store summary data
+  // // PrintMatchData(siftData1, siftData2, img1);
+  // cv::imwrite("data/limg_pts.pgm", limg);
+
+  // MatchAll(siftData1, siftData2, homography);
+
+  // Free Sift data from device
+  FreeSiftData(siftData1);
+  FreeSiftData(siftData2);
+
+  auto totalProgTimer_end = std::chrono::steady_clock::now();
+  float totalProgramTime = std::chrono::duration<float, std::micro>(totalProgTimer_end - totalProgTimer_start).count() - ioReadTime - dataVerificationTime;
+  std::cout << "Total workload time = " << totalProgramTime / 1000 << " ms"
+            << "\n"
+            << std::endl;
+  return data_verification_flag;
+}
+
+void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography)
+{
+#ifdef MANAGEDMEM
+  SiftPoint *sift1 = siftData1.m_data;
+  SiftPoint *sift2 = siftData2.m_data;
+#else
+  SiftPoint *sift1 = siftData1.h_data;
+  SiftPoint *sift2 = siftData2.h_data;
+#endif
+  int numPts1 = siftData1.numPts;
+  int numPts2 = siftData2.numPts;
+  int numFound = 0;
+#if 1
+  homography[0] = homography[4] = -1.0f;
+  homography[1] = homography[3] = homography[6] = homography[7] = 0.0f;
+  homography[2] = 1279.0f;
+  homography[5] = 959.0f;
+#endif
+  for (int i = 0; i < numPts1; i++)
+  {
+    float *data1 = sift1[i].data;
+    std::cout << i << ":" << sift1[i].scale << ":" << (int)sift1[i].orientation << " " << sift1[i].xpos << " " << sift1[i].ypos << std::endl;
+    bool found = false;
+    for (int j = 0; j < numPts2; j++)
+    {
+      float *data2 = sift2[j].data;
+      float sum = 0.0f;
+      for (int k = 0; k < 128; k++)
+        sum += data1[k] * data2[k];
+      float den = homography[6] * sift1[i].xpos + homography[7] * sift1[i].ypos + homography[8];
+      float dx = (homography[0] * sift1[i].xpos + homography[1] * sift1[i].ypos + homography[2]) / den - sift2[j].xpos;
+      float dy = (homography[3] * sift1[i].xpos + homography[4] * sift1[i].ypos + homography[5]) / den - sift2[j].ypos;
+      float err = dx * dx + dy * dy;
+      if (err < 100.0f) // 100.0
+        found = true;
+      if (err < 100.0f || j == sift1[i].match)
+      { // 100.0
+        if (j == sift1[i].match && err < 100.0f)
+          std::cout << " *";
+        else if (j == sift1[i].match)
+          std::cout << " -";
+        else if (err < 100.0f)
+          std::cout << " +";
+        else
+          std::cout << "  ";
+        std::cout << j << ":" << sum << ":" << (int)sqrt(err) << ":" << sift2[j].scale << ":" << (int)sift2[j].orientation << " " << sift2[j].xpos << " " << sift2[j].ypos << " " << (int)dx << " " << (int)dy << std::endl;
+      }
+    }
+    std::cout << std::endl;
+    if (found)
+      numFound++;
+  }
+  std::cout << "Number of finds: " << numFound << " / " << numPts1 << std::endl;
+  std::cout << homography[0] << " " << homography[1] << " " << homography[2] << std::endl; //%%%
+  std::cout << homography[3] << " " << homography[4] << " " << homography[5] << std::endl; //%%%
+  std::cout << homography[6] << " " << homography[7] << " " << homography[8] << std::endl; //%%%
+}
+
+void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img)
+{
+  int numPts = siftData1.numPts;
+#ifdef MANAGEDMEM
+  SiftPoint *sift1 = siftData1.m_data;
+  SiftPoint *sift2 = siftData2.m_data;
+#else
+  SiftPoint *sift1 = siftData1.h_data;
+  SiftPoint *sift2 = siftData2.h_data;
+#endif
+  float *h_img = img.h_data;
+  int w = img.width;
+  int h = img.height;
+  std::cout << std::setprecision(3);
+  for (int j = 0; j < numPts; j++)
+  {
+    int k = sift1[j].match;
+    if (sift1[j].match_error < 5)
+    {
+      float dx = sift2[k].xpos - sift1[j].xpos;
+      float dy = sift2[k].ypos - sift1[j].ypos;
+#if 0
+      if (false && sift1[j].xpos>550 && sift1[j].xpos<600) {
+	std::cout << "pos1=(" << (int)sift1[j].xpos << "," << (int)sift1[j].ypos << ") ";
+	std::cout << j << ": " << "score=" << sift1[j].score << "  ambiguity=" << sift1[j].ambiguity << "  match=" << k << "  ";
+	std::cout << "scale=" << sift1[j].scale << "  ";
+	std::cout << "error=" << (int)sift1[j].match_error << "  ";
+	std::cout << "orient=" << (int)sift1[j].orientation << "," << (int)sift2[k].orientation << "  ";
+	std::cout << " delta=(" << (int)dx << "," << (int)dy << ")" << std::endl;
+      }
+#endif
+#if 1
+      int len = (int)(fabs(dx) > fabs(dy) ? fabs(dx) : fabs(dy));
+      for (int l = 0; l < len; l++)
+      {
+        int x = (int)(sift1[j].xpos + dx * l / len);
+        int y = (int)(sift1[j].ypos + dy * l / len);
+        h_img[y * w + x] = 255.0f;
+      }
+#endif
+    }
+    int x = (int)(sift1[j].xpos + 0.5);
+    int y = (int)(sift1[j].ypos + 0.5);
+    int s = std::min(x, std::min(y, std::min(w - x - 2, std::min(h - y - 2, (int)(1.41 * sift1[j].scale)))));
+    int p = y * w + x;
+    p += (w + 1);
+    for (int k = 0; k < s; k++)
+      h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 0.0f;
+    p -= (w + 1);
+    for (int k = 0; k < s; k++)
+      h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 255.0f;
+  }
+  std::cout << std::setprecision(6);
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/matching.cu b/third-party-programs/Velocity-Bench/cudaSift/CUDA/matching.cu
new file mode 100644
index 000000000..d54978960
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/matching.cu
@@ -0,0 +1,1530 @@
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <chrono>
+#include <random>
+#include "cudaSift.h"
+#include "cudautils.h"
+
+//================= Device matching functions =====================//
+
+__global__ void MatchSiftPoints(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2)
+{
+  __shared__ float siftPoint[128];
+  __shared__ float sums[16 * 16];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int p1 = blockIdx.x;
+  const int p2 = blockIdx.y * 16 + ty;
+  const float *ptr1 = sift1[p1].data;
+  const float *ptr2 = sift2[p2].data;
+  const int i = 16 * ty + tx;
+  if (ty < 8)
+    siftPoint[i] = ptr1[i];
+  __syncthreads();
+  float sum = 0.0f;
+  if (p2 < numPts2)
+    for (int j = 0; j < 8; j++)
+      sum += siftPoint[16 * j + tx] * ptr2[16 * j + tx];
+  sums[i] = sum;
+  __syncthreads();
+  if (tx < 8)
+    sums[i] += sums[i + 8];
+  __syncthreads();
+  if (tx < 4)
+    sums[i] += sums[i + 4];
+  __syncthreads();
+  if (ty == 0)
+  {
+    sum = sums[16 * tx + 0] + sums[16 * tx + 1] + sums[16 * tx + 2] + sums[16 * tx + 3];
+    corrData[p1 * gridDim.y * 16 + blockIdx.y * 16 + tx] = sum;
+  }
+  __syncthreads();
+}
+
+__global__ void MatchSiftPoints2(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2)
+{
+  __shared__ float siftPoints1[16 * 128];
+  __shared__ float siftPoints2[16 * 128];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const float *ptr1 = sift1[min(numPts1 - 1, blockIdx.x * 16 + ty)].data;
+  const float *ptr2 = sift2[min(numPts2 - 1, blockIdx.y * 16 + ty)].data;
+  for (int i = 0; i < 8; i++)
+  {
+    siftPoints1[128 * ty + 16 * i + tx] = ptr1[16 * i + tx];
+    siftPoints2[128 * ty + 16 * i + tx] = ptr2[16 * i + tx];
+  }
+  __syncthreads();
+  const int p1 = blockIdx.x * 16 + ty;
+  const int p2 = blockIdx.y * 16 + tx;
+  const float *pt1 = &siftPoints1[ty * 128];
+  const float *pt2 = &siftPoints2[tx * 128];
+  float sum = 0.0f;
+  for (int i = 0; i < 128; i++)
+  {
+    int itx = (i + tx) & 127; // avoid bank conflicts
+    sum += pt1[itx] * pt2[itx];
+  }
+  if (p1 < numPts1)
+    corrData[p1 * gridDim.y * 16 + p2] = (p2 < numPts2 ? sum : -1.0f);
+}
+
+__global__ void FindMaxCorr(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int corrWidth, int siftSize)
+{
+  __shared__ float maxScore[16 * 16];
+  __shared__ float maxScor2[16 * 16];
+  __shared__ int maxIndex[16 * 16];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int idx = ty * 16 + tx;
+  int p1 = blockIdx.x * 16 + threadIdx.y;
+  p1 = (p1 >= numPts1 ? numPts1 - 1 : p1);
+  maxScore[idx] = -1.0f;
+  maxScor2[idx] = -1.0f;
+  maxIndex[idx] = -1;
+  __syncthreads();
+  float *corrs = &corrData[p1 * corrWidth];
+  for (int i = tx; i < corrWidth; i += 16)
+  {
+    float val = corrs[i];
+    if (val > maxScore[idx])
+    {
+      maxScor2[idx] = maxScore[idx];
+      maxScore[idx] = val;
+      maxIndex[idx] = i;
+    }
+    else if (val > maxScor2[idx])
+      maxScor2[idx] = val;
+  }
+  __syncthreads();
+  for (int len = 8; len > 0; len /= 2)
+  {
+    if (tx < 8)
+    {
+      float val = maxScore[idx + len];
+      int i = maxIndex[idx + len];
+      if (val > maxScore[idx])
+      {
+        maxScor2[idx] = maxScore[idx];
+        maxScore[idx] = val;
+        maxIndex[idx] = i;
+      }
+      else if (val > maxScor2[idx])
+        maxScor2[idx] = val;
+      float va2 = maxScor2[idx + len];
+      if (va2 > maxScor2[idx])
+        maxScor2[idx] = va2;
+    }
+    __syncthreads();
+  }
+  if (tx == 0)
+  {
+    sift1[p1].score = maxScore[ty * 16];
+    sift1[p1].ambiguity = maxScor2[ty * 16] / (maxScore[ty * 16] + 1e-6);
+    sift1[p1].match = maxIndex[ty * 16];
+    sift1[p1].match_xpos = sift2[maxIndex[ty * 16]].xpos;
+    sift1[p1].match_ypos = sift2[maxIndex[ty * 16]].ypos;
+  }
+}
+
+// Version based on suggestion by Nicholas Lin
+__global__ void FindMaxCorr3(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  int block_dim = blockDim.x; // blockDim.x == 16
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int p1 = blockIdx.x * block_dim + ty;
+  const int idx = ty * 16 + tx;
+
+  __shared__ int maxIndex[16 * 16];
+  maxIndex[idx] = 0;
+  __syncthreads();
+
+  float *corrs = NULL;
+  if (p1 < numPts1)
+  {
+    corrs = &corrData[p1 * block_dim * 2];
+    corrs[tx] = 0.0f;
+    corrs[tx + 16] = 0.0f;
+    const float *pt1 = sift1[p1].data;
+    for (int p2 = tx; p2 < numPts2; p2 += 16)
+    {
+      float *pt2 = sift2[p2].data;
+      float sum = 0.0f;
+      for (int i = 0; i < 128; i++)
+        sum += pt1[i] * pt2[i];
+      if (sum > corrs[tx])
+      {
+        corrs[tx + 16] = corrs[tx];
+        corrs[tx] = sum;
+        maxIndex[idx] = p2;
+      }
+      else if (sum > corrs[tx + 16])
+        corrs[tx + 16] = sum;
+    }
+  }
+  __syncthreads();
+  if (p1 < numPts1)
+  {
+    for (int len = 8; len > 0; len /= 2)
+    {
+      if (tx < len)
+      {
+        float val = corrs[tx + len];
+        int i = maxIndex[idx + len];
+        if (val > corrs[tx])
+        {
+          corrs[tx + 16] = corrs[tx];
+          corrs[tx] = val;
+          maxIndex[idx] = i;
+        }
+        else if (val > corrs[tx + 16])
+          corrs[tx + 16] = val;
+        float va2 = corrs[tx + 16 + len];
+        if (va2 > corrs[tx + 16])
+          corrs[tx + 16] = va2;
+      }
+      __syncthreads();
+    }
+    if (tx == 0)
+    {
+      sift1[p1].score = corrs[0];
+      sift1[p1].ambiguity = corrs[16] / (corrs[0] + 1e-6);
+      sift1[p1].match = maxIndex[ty << 4];
+      sift1[p1].match_xpos = sift2[maxIndex[ty << 4]].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex[ty << 4]].ypos;
+    }
+  }
+}
+
+#define FMC2W 16
+#define FMC2H 4
+
+__global__ void FindMaxCorr2(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float siftPoint[128];
+  __shared__ float maxScore[FMC2H];
+  __shared__ float maxScor2[FMC2H];
+  __shared__ int maxIndex[FMC2H];
+  const int p1 = blockIdx.x;
+  if (p1 >= numPts1)
+    return;
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int idx = ty * FMC2W + tx;
+  if (idx < FMC2H)
+  {
+    maxScore[idx] = -1.0f;
+    maxScor2[idx] = -1.0f;
+    maxIndex[idx] = 0;
+  }
+  __syncthreads();
+  const float *pt1 = sift1[p1].data;
+  for (int i = idx; i < 128; i += FMC2W * FMC2H)
+    siftPoint[i] = pt1[i];
+  __syncthreads();
+  for (int p2 = ty; p2 < numPts2; p2 += FMC2H)
+  {
+    const float *pt2 = sift2[p2].data;
+    float sum = 0.0f;
+    for (int j = tx; j < 128; j += FMC2W)
+      sum += siftPoint[j] * pt2[j];
+    for (int j = FMC2W / 2; j > 0; j /= 2)
+      sum += ShiftDown(sum, j);
+    if (tx == 0)
+    {
+      if (sum > maxScore[ty])
+      {
+        maxScor2[ty] = maxScore[ty];
+        maxScore[ty] = sum;
+        maxIndex[ty] = p2;
+      }
+      else if (sum > maxScor2[ty])
+        maxScor2[ty] = sum;
+    }
+  }
+  __syncthreads();
+  for (int len = FMC2H / 2; len > 0; len /= 2)
+  {
+    if (ty == 0 && tx < len)
+    {
+      float val = maxScore[tx + len];
+      int p2 = maxIndex[tx + len];
+      if (val > maxScore[tx])
+      {
+        maxScor2[tx] = maxScore[tx];
+        maxScore[tx] = val;
+        maxIndex[tx] = p2;
+      }
+      else if (val > maxScor2[tx])
+        maxScor2[tx] = val;
+      float va2 = maxScor2[tx + len];
+      if (va2 > maxScor2[tx])
+        maxScor2[tx] = va2;
+    }
+    __syncthreads();
+  }
+  if (ty == 0 && tx == 0)
+  {
+    sift1[p1].score = maxScore[0];
+    sift1[p1].ambiguity = maxScor2[0] / (maxScore[0] + 1e-6);
+    sift1[p1].match = maxIndex[0];
+    sift1[p1].match_xpos = sift2[maxIndex[0]].xpos;
+    sift1[p1].match_ypos = sift2[maxIndex[0]].ypos;
+  }
+}
+
+__global__ void FindMaxCorr4(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float siftPoint[128 * FMC2H];
+  __shared__ float maxScore[FMC2H];
+  __shared__ float maxScor2[FMC2H];
+  __shared__ int maxIndex[FMC2H];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  if (tx == 0)
+  {
+    maxScore[ty] = -1.0f;
+    maxScor2[ty] = -1.0f;
+    maxIndex[ty] = 0;
+  }
+  const int p1 = blockIdx.x * FMC2H + ty;
+  const float *pt1 = sift1[p1].data;
+  for (int j = tx; j < 128; j += FMC2W)
+    siftPoint[128 * ty + j] = pt1[j];
+  __syncthreads();
+  for (int p2 = 0; p2 < numPts2; p2++)
+  {
+    const float *pt2 = sift2[p2].data;
+    float sum = 0.0f;
+    for (int j = tx; j < 128; j += FMC2W)
+      sum += siftPoint[128 * ty + j] * pt2[j];
+    for (int j = FMC2W / 2; j > 0; j /= 2)
+      sum += ShiftDown(sum, j);
+    if (tx == 0)
+    {
+      if (sum > maxScore[ty])
+      {
+        maxScor2[ty] = maxScore[ty];
+        maxScore[ty] = sum;
+        maxIndex[ty] = p2;
+      }
+      else if (sum > maxScor2[ty])
+        maxScor2[ty] = sum;
+    }
+  }
+  __syncthreads();
+  if (tx == 0)
+  {
+    sift1[p1].score = maxScore[ty];
+    sift1[p1].ambiguity = maxScor2[ty] / (maxScore[ty] + 1e-6);
+    sift1[p1].match = maxIndex[ty];
+    sift1[p1].match_xpos = sift2[maxIndex[ty]].xpos;
+    sift1[p1].match_ypos = sift2[maxIndex[ty]].ypos;
+  }
+}
+
+__global__ void memcopyKernel(float *src, float *dst, size_t src_pitch, size_t dst_pitch, int numPts, size_t width)
+{
+  char *d_src = (char *)src;
+  char *d_dst = (char *)dst;
+
+  for (int i = 0; i < numPts; ++i)
+  {
+    for (int j = 0; j < width; ++j)
+    {
+      d_dst[j] = d_src[j];
+    }
+    d_src = d_src + src_pitch;
+    d_dst = d_dst + dst_pitch;
+  }
+}
+
+__global__ void
+CleanMatches(SiftPoint *sift1, int numPts1)
+{
+  const int p1 = min(blockIdx.x * 64 + threadIdx.x, numPts1 - 1);
+  sift1[p1].score = 0.0f;
+}
+
+#define M7W 32
+#define M7H 32
+#define M7R 4
+#define NRX 2
+#define NDIM 128
+
+__global__ void FindMaxCorr10(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float4 buffer1[M7W * NDIM / 4];
+  __shared__ float4 buffer2[M7H * NDIM / 4];
+  int tx = threadIdx.x;
+  int ty = threadIdx.y;
+  int bp1 = M7W * blockIdx.x;
+  for (int j = ty; j < M7W; j += M7H / M7R)
+  {
+    int p1 = min(bp1 + j, numPts1 - 1);
+    for (int d = tx; d < NDIM / 4; d += M7W)
+      buffer1[j * NDIM / 4 + (d + j) % (NDIM / 4)] = ((float4 *)&sift1[p1].data)[d];
+  }
+
+  float max_score[NRX];
+  float sec_score[NRX];
+  int index[NRX];
+  for (int i = 0; i < NRX; i++)
+  {
+    max_score[i] = 0.0f;
+    sec_score[i] = 0.0f;
+    index[i] = -1;
+  }
+
+  int idx = ty * M7W + tx;
+  int ix = idx % (M7W / NRX);
+  int iy = idx / (M7W / NRX);
+  for (int bp2 = 0; bp2 < numPts2 - M7H + 1; bp2 += M7H)
+  {
+    for (int j = ty; j < M7H; j += M7H / M7R)
+    {
+      int p2 = min(bp2 + j, numPts2 - 1);
+      for (int d = tx; d < NDIM / 4; d += M7W)
+        buffer2[j * NDIM / 4 + d] = ((float4 *)&sift2[p2].data)[d];
+    }
+    __syncthreads();
+
+    if (idx < M7W * M7H / M7R / NRX)
+    {
+      float score[M7R][NRX];
+      for (int dy = 0; dy < M7R; dy++)
+        for (int i = 0; i < NRX; i++)
+          score[dy][i] = 0.0f;
+      for (int d = 0; d < NDIM / 4; d++)
+      {
+        float4 v1[NRX];
+        for (int i = 0; i < NRX; i++)
+          v1[i] = buffer1[((M7W / NRX) * i + ix) * NDIM / 4 + (d + (M7W / NRX) * i + ix) % (NDIM / 4)];
+        for (int dy = 0; dy < M7R; dy++)
+        {
+          float4 v2 = buffer2[(M7R * iy + dy) * (NDIM / 4) + d];
+          for (int i = 0; i < NRX; i++)
+          {
+            score[dy][i] += v1[i].x * v2.x;
+            score[dy][i] += v1[i].y * v2.y;
+            score[dy][i] += v1[i].z * v2.z;
+            score[dy][i] += v1[i].w * v2.w;
+          }
+        }
+      }
+      for (int dy = 0; dy < M7R; dy++)
+      {
+        for (int i = 0; i < NRX; i++)
+        {
+          if (score[dy][i] > max_score[i])
+          {
+            sec_score[i] = max_score[i];
+            max_score[i] = score[dy][i];
+            index[i] = min(bp2 + M7R * iy + dy, numPts2 - 1);
+          }
+          else if (score[dy][i] > sec_score[i])
+            sec_score[i] = score[dy][i];
+        }
+      }
+    }
+    __syncthreads();
+  }
+  float *scores1 = (float *)buffer1;
+  float *scores2 = &scores1[M7W * M7H / M7R];
+  int *indices = (int *)&scores2[M7W * M7H / M7R];
+  if (idx < M7W * M7H / M7R / NRX)
+  {
+    for (int i = 0; i < NRX; i++)
+    {
+      scores1[iy * M7W + (M7W / NRX) * i + ix] = max_score[i];
+      scores2[iy * M7W + (M7W / NRX) * i + ix] = sec_score[i];
+      indices[iy * M7W + (M7W / NRX) * i + ix] = index[i];
+    }
+  }
+  __syncthreads();
+
+  if (ty == 0)
+  {
+    float max_score = scores1[tx];
+    float sec_score = scores2[tx];
+    int index = indices[tx];
+    for (int y = 0; y < M7H / M7R; y++)
+      if (index != indices[y * M7W + tx])
+      {
+        if (scores1[y * M7W + tx] > max_score)
+        {
+          sec_score = max(max_score, sec_score);
+          max_score = scores1[y * M7W + tx];
+          index = indices[y * M7W + tx];
+        }
+        else if (scores1[y * M7W + tx] > sec_score)
+          sec_score = scores1[y * M7W + tx];
+      }
+    sift1[bp1 + tx].score = max_score;
+    sift1[bp1 + tx].match = index;
+    sift1[bp1 + tx].match_xpos = sift2[index].xpos;
+    sift1[bp1 + tx].match_ypos = sift2[index].ypos;
+    sift1[bp1 + tx].ambiguity = sec_score / (max_score + 1e-6f);
+  }
+}
+
+#define FMC_GH 512
+#define FMC_BW 32
+#define FMC_BH 32
+#define FMC_BD 16
+#define FMC_TW 1
+#define FMC_TH 4
+#define FMC_NW (FMC_BW / FMC_TW) //  32
+#define FMC_NH (FMC_BH / FMC_TH) //   8
+#define FMC_NT (FMC_NW * FMC_NH) // 256 = 8 warps
+
+__device__ volatile int lock = 0;
+
+__global__ void FindMaxCorr9(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float4 siftParts1[FMC_BW * FMC_BD]; // 4*32*8 = 1024
+  __shared__ float4 siftParts2[FMC_BH * FMC_BD]; // 4*32*8 = 1024
+  //__shared__ float blksums[FMC_BW*FMC_BH];     // 32*32  = 1024
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int idx = ty * FMC_NW + tx;
+  float4 *pts1 = 0, *pts2 = 0;
+  if (idx < FMC_BW)
+  {
+    const int p1l = min(blockIdx.x * FMC_BW + idx, numPts1 - 1);
+    pts1 = (float4 *)sift1[p1l].data;
+  }
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < min(FMC_GH, numPts2 - FMC_BH + 1); k += FMC_BH)
+  {
+    if (idx < FMC_BH)
+    {
+      const int p2l = min(blockIdx.y * FMC_GH + k + idx, numPts2 - 1);
+      pts2 = (float4 *)sift2[p2l].data;
+    }
+    float sums[FMC_TW * FMC_TH];
+    for (int i = 0; i < FMC_TW * FMC_TH; i++)
+      sums[i] = 0.0f;
+
+    if (idx < FMC_BW)
+      for (int i = 0; i < FMC_BD / 2; i++)
+        siftParts1[(i + 0) * FMC_BW + idx] = pts1[0 + i];
+    if (idx < FMC_BH)
+      for (int i = 0; i < FMC_BD / 2; i++)
+        siftParts2[(i + 0) * FMC_BH + idx] = pts2[0 + i];
+    __syncthreads();
+
+    int b = FMC_BD / 2;
+    for (int d = FMC_BD / 2; d < 32; d += FMC_BD / 2)
+    {
+      if (idx < FMC_BW)
+        for (int i = 0; i < FMC_BD / 2; i++)
+          siftParts1[(i + b) * FMC_BW + idx] = pts1[d + i];
+      if (idx < FMC_BH)
+        for (int i = 0; i < FMC_BD / 2; i++)
+          siftParts2[(i + b) * FMC_BH + idx] = pts2[d + i];
+
+      b ^= FMC_BD / 2;
+      for (int i = 0; i < FMC_BD / 2; i++)
+      {
+        float4 v1[FMC_TW];
+        for (int ix = 0; ix < FMC_TW; ix++)
+          v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)];
+        for (int iy = 0; iy < FMC_TH; iy++)
+        {
+          float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)];
+          for (int ix = 0; ix < FMC_TW; ix++)
+          {
+            sums[iy * FMC_TW + ix] += v1[ix].x * v2.x;
+            sums[iy * FMC_TW + ix] += v1[ix].y * v2.y;
+            sums[iy * FMC_TW + ix] += v1[ix].z * v2.z;
+            sums[iy * FMC_TW + ix] += v1[ix].w * v2.w;
+          }
+        }
+      }
+      __syncthreads();
+    }
+
+    b ^= FMC_BD / 2;
+    for (int i = 0; i < FMC_BD / 2; i++)
+    {
+      float4 v1[FMC_TW];
+      for (int ix = 0; ix < FMC_TW; ix++)
+        v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)];
+      for (int iy = 0; iy < FMC_TH; iy++)
+      {
+        float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)];
+        for (int ix = 0; ix < FMC_TW; ix++)
+        {
+          sums[iy * FMC_TW + ix] += v1[ix].x * v2.x;
+          sums[iy * FMC_TW + ix] += v1[ix].y * v2.y;
+          sums[iy * FMC_TW + ix] += v1[ix].z * v2.z;
+          sums[iy * FMC_TW + ix] += v1[ix].w * v2.w;
+        }
+      }
+    }
+    __syncthreads();
+
+    float *blksums = (float *)siftParts1;
+    for (int iy = 0; iy < FMC_TH; iy++)
+      for (int ix = 0; ix < FMC_TW; ix++)
+        blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix];
+    __syncthreads();
+    if (idx < FMC_BW)
+    {
+      for (int j = 0; j < FMC_BH; j++)
+      {
+        float sum = blksums[j * FMC_BW + idx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = min(blockIdx.y * FMC_GH + k + j, numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    __syncthreads();
+  }
+  const int p1 = min(blockIdx.x * FMC_BW + idx, numPts1 - 1);
+  if (idx == 0)
+    while (atomicCAS((int *)&lock, 0, 1) != 0)
+      ;
+  __syncthreads();
+  if (idx < FMC_BW)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  __syncthreads();
+  if (idx == 0)
+    atomicExch((int *)&lock, 0);
+}
+
+__global__ void FindMaxCorr8(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float4 siftParts1[FMC_BW * FMC_BD]; // 4*32*8 = 1024
+  __shared__ float4 siftParts2[FMC_BH * FMC_BD]; // 4*32*8 = 1024
+  __shared__ float blksums[FMC_BW * FMC_BH];     // 32*32  = 1024
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int idx = ty * FMC_NW + tx;
+  float4 *pts1 = 0, *pts2 = 0;
+  if (idx < FMC_BW)
+  {
+    const int p1l = min(blockIdx.x * FMC_BW + idx, numPts1 - 1);
+    pts1 = (float4 *)sift1[p1l].data;
+  }
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < min(FMC_GH, numPts2 - FMC_BH + 1); k += FMC_BH)
+  {
+    if (idx < FMC_BH)
+    {
+      const int p2l = min(blockIdx.y * FMC_GH + k + idx, numPts2 - 1);
+      pts2 = (float4 *)sift2[p2l].data;
+    }
+    float sums[FMC_TW * FMC_TH];
+    for (int i = 0; i < FMC_TW * FMC_TH; i++)
+      sums[i] = 0.0f;
+    for (int d = 0; d < 32; d += FMC_BD)
+    {
+      if (idx < FMC_BW)
+        for (int i = 0; i < FMC_BD; i++)
+          siftParts1[i * FMC_BW + idx] = pts1[d + i];
+      if (idx < FMC_BH)
+        for (int i = 0; i < FMC_BD; i++)
+          siftParts2[i * FMC_BH + idx] = pts2[d + i];
+      __syncthreads();
+
+      for (int i = 0; i < FMC_BD; i++)
+      {
+        float4 v1[FMC_TW];
+        for (int ix = 0; ix < FMC_TW; ix++)
+          v1[ix] = siftParts1[i * FMC_BW + (tx * FMC_TW + ix)];
+        for (int iy = 0; iy < FMC_TH; iy++)
+        {
+          float4 v2 = siftParts2[i * FMC_BH + (ty * FMC_TH + iy)];
+          for (int ix = 0; ix < FMC_TW; ix++)
+          {
+            sums[iy * FMC_TW + ix] += v1[ix].x * v2.x;
+            sums[iy * FMC_TW + ix] += v1[ix].y * v2.y;
+            sums[iy * FMC_TW + ix] += v1[ix].z * v2.z;
+            sums[iy * FMC_TW + ix] += v1[ix].w * v2.w;
+          }
+        }
+      }
+      __syncthreads();
+    }
+    // float *blksums = (float*)siftParts1;
+    for (int iy = 0; iy < FMC_TH; iy++)
+      for (int ix = 0; ix < FMC_TW; ix++)
+        blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix];
+    __syncthreads();
+    if (idx < FMC_BW)
+    {
+      for (int j = 0; j < FMC_BH; j++)
+      {
+        float sum = blksums[j * FMC_BW + idx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = min(blockIdx.y * FMC_GH + k + j, numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    __syncthreads();
+  }
+  const int p1 = min(blockIdx.x * FMC_BW + idx, numPts1 - 1);
+  if (idx == 0)
+    while (atomicCAS((int *)&lock, 0, 1) != 0)
+      ;
+  __syncthreads();
+  if (idx < FMC_BW)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  __syncthreads();
+  if (idx == 0)
+    atomicExch((int *)&lock, 0);
+}
+
+__global__ void FindMaxCorr7(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float siftParts1[17 * 64]; // features in columns
+  __shared__ float siftParts2[16 * 64]; // one extra to avoid shared conflicts
+  float4 *pts1 = (float4 *)siftParts1;
+  float4 *pts2 = (float4 *)siftParts2;
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int p1l = min(blockIdx.x * 16 + ty, numPts1 - 1);
+  const float4 *p1l4 = (float4 *)sift1[p1l].data;
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < 512 / 16; k++)
+  {
+    const int p2l = min(blockIdx.y * 512 + k * 16 + ty, numPts2 - 1);
+    const float4 *p2l4 = (float4 *)sift2[p2l].data;
+#define NUM 4
+    float sum[NUM];
+    if (ty < (16 / NUM))
+      for (int l = 0; l < NUM; l++)
+        sum[l] = 0.0f;
+    __syncthreads();
+    for (int i = 0; i < 2; i++)
+    {
+      pts1[17 * tx + ty] = p1l4[i * 16 + tx];
+      pts2[16 * ty + tx] = p2l4[i * 16 + tx];
+      __syncthreads();
+      if (ty < (16 / NUM))
+      {
+#pragma unroll
+        for (int j = 0; j < 16; j++)
+        {
+          float4 p1v = pts1[17 * j + tx];
+#pragma unroll
+          for (int l = 0; l < NUM; l++)
+          {
+            float4 p2v = pts2[16 * (ty + l * (16 / NUM)) + j];
+            sum[l] += p1v.x * p2v.x;
+            sum[l] += p1v.y * p2v.y;
+            sum[l] += p1v.z * p2v.z;
+            sum[l] += p1v.w * p2v.w;
+          }
+        }
+      }
+      __syncthreads();
+    }
+    float *sums = siftParts1;
+    if (ty < (16 / NUM))
+      for (int l = 0; l < NUM; l++)
+        sums[16 * (ty + l * (16 / NUM)) + tx] = sum[l];
+    __syncthreads();
+    if (ty == 0)
+    {
+      for (int j = 0; j < 16; j++)
+      {
+        float sum = sums[16 * j + tx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = min(blockIdx.y * 512 + k * 16 + j, numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    __syncthreads();
+  }
+  const int p1 = min(blockIdx.x * 16 + tx, numPts1 - 1);
+  if (tx == 0 && ty == 0)
+    while (atomicCAS((int *)&lock, 0, 1) != 0)
+      ;
+  __syncthreads();
+  if (ty == 0)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  __syncthreads();
+  if (tx == 0 && ty == 0)
+    atomicExch((int *)&lock, 0);
+}
+
+__global__ void FindMaxCorr6(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  //__shared__ float siftParts1[128*16]; // features in columns
+  __shared__ float siftParts2[128 * 16]; // one extra to avoid shared conflicts
+  __shared__ float sums[16 * 16];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int p1l = min(blockIdx.x * 16 + ty, numPts1 - 1);
+  float *pt1l = sift1[p1l].data;
+  float4 part1 = reinterpret_cast<float4 *>(pt1l)[tx];
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < 512; k += 16)
+  {
+    const int p2l = min(blockIdx.y * 512 + k + ty, numPts2 - 1);
+    float *pt2l = sift2[p2l].data;
+    reinterpret_cast<float4 *>(siftParts2)[32 * ty + tx] = reinterpret_cast<float4 *>(pt2l)[tx];
+    __syncthreads();
+    for (int i = 0; i < 16; i++)
+    {
+      float4 part2 = reinterpret_cast<float4 *>(siftParts2)[32 * i + tx];
+      float sum = part1.x * part2.x + part1.y * part2.y + part1.z * part2.z + part1.w * part2.w;
+      sum += ShiftDown(sum, 16);
+      sum += ShiftDown(sum, 8);
+      sum += ShiftDown(sum, 4);
+      sum += ShiftDown(sum, 2);
+      sum += ShiftDown(sum, 1);
+      if (tx == 0)
+        sums[16 * i + ty] = sum;
+    }
+    __syncthreads();
+    if (ty == 0 && tx < 16)
+    {
+      for (int j = 0; j < 16; j++)
+      {
+        float sum = sums[16 * j + tx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = min(blockIdx.y * 512 + k + j, numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    __syncthreads();
+  }
+  if (tx == 0 && ty == 0)
+    while (atomicCAS((int *)&lock, 0, 1) != 0)
+      ;
+  __syncthreads();
+  if (ty == 0 && tx < 16)
+  {
+    const int p1 = min(blockIdx.x * 16 + tx, numPts1 - 1);
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  __syncthreads();
+  if (tx == 0 && ty == 0)
+    atomicExch((int *)&lock, 0);
+}
+
+__global__ void FindMaxCorr5(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float siftParts1[17 * 16]; // features in columns
+  __shared__ float siftParts2[17 * 16]; // one extra to avoid shared conflicts
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int p1l = min(blockIdx.x * 16 + ty, numPts1 - 1);
+  const float *pt1l = sift1[p1l].data;
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < 512 / 16; k++)
+  {
+    const int p2l = min(blockIdx.y * 512 + k * 16 + ty, numPts2 - 1);
+    const float *pt2l = sift2[p2l].data;
+    float sum = 0.0f;
+    for (int i = 0; i < 8; i++)
+    {
+      siftParts1[17 * tx + ty] = pt1l[i * 16 + tx]; // load and transpose
+      siftParts2[17 * tx + ty] = pt2l[i * 16 + tx];
+      __syncthreads();
+      for (int j = 0; j < 16; j++)
+        sum += siftParts1[17 * j + tx] * siftParts2[17 * j + ty];
+      __syncthreads();
+    }
+    float *sums = siftParts1;
+    sums[16 * ty + tx] = sum;
+    __syncthreads();
+    if (ty == 0)
+    {
+      for (int j = 0; j < 16; j++)
+      {
+        float sum = sums[16 * j + tx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = min(blockIdx.y * 512 + k * 16 + j, numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    __syncthreads();
+  }
+  const int p1 = min(blockIdx.x * 16 + tx, numPts1 - 1);
+  if (tx == 0 && ty == 0)
+    while (atomicCAS((int *)&lock, 0, 1) != 0)
+      ;
+  __syncthreads();
+  if (ty == 0)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  __syncthreads();
+  if (tx == 0 && ty == 0)
+    atomicExch((int *)&lock, 0);
+}
+
+template <int size>
+__device__ void InvertMatrix(float elem[size][size], float res[size][size])
+{
+  int indx[size];
+  float b[size];
+  float vv[size];
+  for (int i = 0; i < size; i++)
+    indx[i] = 0;
+  int imax = 0;
+  float d = 1.0;
+  for (int i = 0; i < size; i++)
+  { // find biggest element for each row
+    float big = 0.0;
+    for (int j = 0; j < size; j++)
+    {
+      float temp = fabs(elem[i][j]);
+      if (temp > big)
+        big = temp;
+    }
+    if (big > 0.0)
+      vv[i] = 1.0 / big;
+    else
+      vv[i] = 1e16;
+  }
+  for (int j = 0; j < size; j++)
+  {
+    for (int i = 0; i < j; i++)
+    {                                   // i<j
+      float sum = elem[i][j];           // i<j (lower left)
+      for (int k = 0; k < i; k++)       // k<i<j
+        sum -= elem[i][k] * elem[k][j]; // i>k (upper right), k<j (lower left)
+      elem[i][j] = sum;                 // i<j (lower left)
+    }
+    float big = 0.0;
+    for (int i = j; i < size; i++)
+    {                                   // i>=j
+      float sum = elem[i][j];           // i>=j (upper right)
+      for (int k = 0; k < j; k++)       // k<j<=i
+        sum -= elem[i][k] * elem[k][j]; // i>k (upper right), k<j (lower left)
+      elem[i][j] = sum;                 // i>=j (upper right)
+      float dum = vv[i] * fabs(sum);
+      if (dum >= big)
+      {
+        big = dum;
+        imax = i;
+      }
+    }
+    if (j != imax)
+    { // imax>j
+      for (int k = 0; k < size; k++)
+      {
+        float dum = elem[imax][k]; // upper right and lower left
+        elem[imax][k] = elem[j][k];
+        elem[j][k] = dum;
+      }
+      d = -d;
+      vv[imax] = vv[j];
+    }
+    indx[j] = imax;
+    if (elem[j][j] == 0.0) // j==j (upper right)
+      elem[j][j] = 1e-16;
+    if (j != (size - 1))
+    {
+      float dum = 1.0 / elem[j][j];
+      for (int i = j + 1; i < size; i++) // i>j
+        elem[i][j] *= dum;               // i>j (upper right)
+    }
+  }
+  for (int j = 0; j < size; j++)
+  {
+    for (int k = 0; k < size; k++)
+      b[k] = 0.0;
+    b[j] = 1.0;
+    int ii = -1;
+    for (int i = 0; i < size; i++)
+    {
+      int ip = indx[i];
+      float sum = b[ip];
+      b[ip] = b[i];
+      if (ii != -1)
+        for (int j = ii; j < i; j++)
+          sum -= elem[i][j] * b[j]; // i>j (upper right)
+      else if (sum != 0.0)
+        ii = i;
+      b[i] = sum;
+    }
+    for (int i = size - 1; i >= 0; i--)
+    {
+      float sum = b[i];
+      for (int j = i + 1; j < size; j++)
+        sum -= elem[i][j] * b[j]; // i<j (lower left)
+      b[i] = sum / elem[i][i];    // i==i (upper right)
+    }
+    for (int i = 0; i < size; i++)
+      res[i][j] = b[i];
+  }
+}
+
+__global__ void ComputeHomographies(float *coord, int *randPts, float *homo,
+                                    int numPts)
+{
+  float a[8][8], ia[8][8];
+  float b[8];
+  const int bx = blockIdx.x;
+  const int tx = threadIdx.x;
+  const int idx = blockDim.x * bx + tx;
+  const int numLoops = blockDim.x * gridDim.x;
+  for (int i = 0; i < 4; i++)
+  {
+    int pt = randPts[i * numLoops + idx];
+    float x1 = coord[pt + 0 * numPts];
+    float y1 = coord[pt + 1 * numPts];
+    float x2 = coord[pt + 2 * numPts];
+    float y2 = coord[pt + 3 * numPts];
+    float *row1 = a[2 * i + 0];
+    row1[0] = x1;
+    row1[1] = y1;
+    row1[2] = 1.0;
+    row1[3] = row1[4] = row1[5] = 0.0;
+    row1[6] = -x2 * x1;
+    row1[7] = -x2 * y1;
+    float *row2 = a[2 * i + 1];
+    row2[0] = row2[1] = row2[2] = 0.0;
+    row2[3] = x1;
+    row2[4] = y1;
+    row2[5] = 1.0;
+    row2[6] = -y2 * x1;
+    row2[7] = -y2 * y1;
+    b[2 * i + 0] = x2;
+    b[2 * i + 1] = y2;
+  }
+  InvertMatrix<8>(a, ia);
+  __syncthreads();
+  for (int j = 0; j < 8; j++)
+  {
+    float sum = 0.0f;
+    for (int i = 0; i < 8; i++)
+      sum += ia[j][i] * b[i];
+    homo[j * numLoops + idx] = sum;
+  }
+  __syncthreads();
+}
+
+#define TESTHOMO_TESTS 16 // number of tests per block,  alt. 32, 32
+#define TESTHOMO_LOOPS 16 // number of loops per block,  alt.  8, 16
+
+__global__ void TestHomographies(float *d_coord, float *d_homo,
+                                 int *d_counts, int numPts, float thresh2)
+{
+  __shared__ float homo[8 * TESTHOMO_LOOPS];
+  __shared__ int cnts[TESTHOMO_TESTS * TESTHOMO_LOOPS];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int idx = blockIdx.y * blockDim.y + tx;
+  const int numLoops = blockDim.y * gridDim.y;
+  if (ty < 8 && tx < TESTHOMO_LOOPS)
+    homo[tx * 8 + ty] = d_homo[idx + ty * numLoops];
+  __syncthreads();
+  float a[8];
+  for (int i = 0; i < 8; i++)
+    a[i] = homo[ty * 8 + i];
+  int cnt = 0;
+  for (int i = tx; i < numPts; i += TESTHOMO_TESTS)
+  {
+    float x1 = d_coord[i + 0 * numPts];
+    float y1 = d_coord[i + 1 * numPts];
+    float x2 = d_coord[i + 2 * numPts];
+    float y2 = d_coord[i + 3 * numPts];
+    float nomx = __fmul_rz(a[0], x1) + __fmul_rz(a[1], y1) + a[2];
+    float nomy = __fmul_rz(a[3], x1) + __fmul_rz(a[4], y1) + a[5];
+    float deno = __fmul_rz(a[6], x1) + __fmul_rz(a[7], y1) + 1.0f;
+    float errx = __fmul_rz(x2, deno) - nomx;
+    float erry = __fmul_rz(y2, deno) - nomy;
+    float err2 = __fmul_rz(errx, errx) + __fmul_rz(erry, erry);
+    if (err2 < __fmul_rz(thresh2, __fmul_rz(deno, deno)))
+      cnt++;
+  }
+  int kty = TESTHOMO_TESTS * ty;
+  cnts[kty + tx] = cnt;
+  __syncthreads();
+  int len = TESTHOMO_TESTS / 2;
+  while (len > 0)
+  {
+    if (tx < len)
+      cnts[kty + tx] += cnts[kty + tx + len];
+    len /= 2;
+    __syncthreads();
+  }
+  if (tx < TESTHOMO_LOOPS && ty == 0)
+    d_counts[idx] = cnts[TESTHOMO_TESTS * tx];
+  __syncthreads();
+}
+
+//================= Host matching functions =====================//
+
+double FindHomography(SiftData &data, float *homography, int *numMatches, float &matchTime, int numLoops, float minScore, float maxAmbiguity, float thresh)
+{
+  *numMatches = 0;
+  homography[0] = homography[4] = homography[8] = 1.0f;
+  homography[1] = homography[2] = homography[3] = 0.0f;
+  homography[5] = homography[6] = homography[7] = 0.0f;
+  if (data.d_data == NULL)
+    return 0.0f;
+  SiftPoint *d_sift = data.d_data;
+  numLoops = iDivUp(numLoops, 16) * 16;
+  int numPts = data.numPts;
+  if (numPts < 8)
+    return 0.0f;
+  int numPtsUp = iDivUp(numPts, 16) * 16;
+  float *d_coord, *d_homo;
+  int *d_randPts, *h_randPts;
+  int randSize = 4 * sizeof(int) * numLoops;
+  int szFl = sizeof(float);
+  int szPt = sizeof(SiftPoint);
+#ifdef DEVICE_TIMER
+  auto start_malloc = std::chrono::steady_clock::now();
+#endif
+  safeCall(cudaMalloc((void **)&d_coord, 4 * sizeof(float) * numPtsUp));
+  safeCall(cudaMalloc((void **)&d_randPts, randSize));
+  safeCall(cudaMalloc((void **)&d_homo, 8 * sizeof(float) * numLoops));
+
+#ifdef DEVICE_TIMER
+  auto stop_malloc = std::chrono::steady_clock::now();
+  matchTime += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+  h_randPts = (int *)malloc(randSize);
+  float *h_scores = (float *)malloc(sizeof(float) * numPtsUp);
+  float *h_ambiguities = (float *)malloc(sizeof(float) * numPtsUp);
+
+  // temp variables are for host memory allocation, device data is transferred to temp
+  float *temp1 = (float *)malloc(szPt * numPtsUp);
+  float *temp2 = (float *)malloc(szPt * numPtsUp);
+
+#ifdef DEVICE_TIMER
+  auto start_memcpy_1 = std::chrono::steady_clock::now();
+#endif
+
+  safeCall(cudaMemcpy(temp1, &d_sift[0].score, szPt * numPts, cudaMemcpyDeviceToHost));
+  safeCall(cudaMemcpy(temp2, &d_sift[0].ambiguity, szPt * numPts, cudaMemcpyDeviceToHost));
+
+#ifdef DEVICE_TIMER
+  auto stop_memcpy_1 = std::chrono::steady_clock::now();
+  matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_1 - start_memcpy_1).count();
+#endif
+
+  char *src_score = (char *)temp1;
+  char *src_ambiguity = (char *)temp2;
+  char *dst_score = (char *)h_scores;
+  char *dst_ambiguity = (char *)h_ambiguities;
+
+  for (int i = 0; i < numPts; ++i)
+  {
+    memcpy(dst_score, src_score, szFl);
+    memcpy(dst_ambiguity, src_ambiguity, szFl);
+    src_score += szPt;
+    src_ambiguity += szPt;
+    dst_score += szFl;
+    dst_ambiguity += szFl;
+  }
+
+  int *validPts = (int *)malloc(sizeof(int) * numPts);
+  int numValid = 0;
+  for (int i = 0; i < numPts; i++)
+  {
+    if (h_scores[i] > minScore && h_ambiguities[i] < maxAmbiguity)
+      validPts[numValid++] = i;
+  }
+  free(h_scores);
+  free(h_ambiguities);
+  if (numValid >= 8)
+  {
+    std::random_device rd;
+    uint32_t seed = rd();
+    std::mt19937 rnd(seed);  // mersenne_twister_engine
+    std::uniform_int_distribution<uint32_t> dis(0, UINT32_MAX);
+    for (int i = 0; i < numLoops; i++)
+    {
+      int p1 = dis(rnd) % numValid;
+      int p2 = dis(rnd) % numValid;
+      int p3 = dis(rnd) % numValid;
+      int p4 = dis(rnd) % numValid;
+      while (p2 == p1)
+        p2 = dis(rnd) % numValid;
+      while (p3 == p1 || p3 == p2)
+        p3 = dis(rnd) % numValid;
+      while (p4 == p1 || p4 == p2 || p4 == p3)
+        p4 = dis(rnd) % numValid;
+      h_randPts[i + 0 * numLoops] = validPts[p1];
+      h_randPts[i + 1 * numLoops] = validPts[p2];
+      h_randPts[i + 2 * numLoops] = validPts[p3];
+      h_randPts[i + 3 * numLoops] = validPts[p4];
+    }
+
+    float *temp3, *temp4, *temp5, *temp6;
+#ifdef DEVICE_TIMER
+    auto start_malloc_2 = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMalloc((void **)&temp3, szPt * numPtsUp));
+    safeCall(cudaMalloc((void **)&temp4, szPt * numPtsUp));
+    safeCall(cudaMalloc((void **)&temp5, szPt * numPtsUp));
+    safeCall(cudaMalloc((void **)&temp6, szPt * numPtsUp));
+#ifdef DEVICE_TIMER
+    auto stop_malloc_2 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_malloc_2 - start_malloc_2).count();
+#endif
+#ifdef DEVICE_TIMER
+    auto start_memcpy_2 = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMemcpy(d_randPts, h_randPts, randSize, cudaMemcpyHostToDevice));
+    safeCall(cudaDeviceSynchronize());
+    safeCall(cudaMemcpy(temp3, &d_sift[0].xpos, szPt * numPts, cudaMemcpyDeviceToDevice));
+    safeCall(cudaMemcpy(temp4, &d_sift[0].ypos, szPt * numPts, cudaMemcpyDeviceToDevice));
+    safeCall(cudaMemcpy(temp5, &d_sift[0].match_xpos, szPt * numPts, cudaMemcpyDeviceToDevice));
+    safeCall(cudaMemcpy(temp6, &d_sift[0].match_ypos, szPt * numPts, cudaMemcpyDeviceToDevice));
+
+    // kernel calto transfer memory from device to device
+    memcopyKernel<<<1, 1>>>(temp3, &d_coord[0 * numPtsUp], szPt, szFl, numPts, szFl);
+    safeCall(cudaGetLastError());
+    safeCall(cudaDeviceSynchronize());
+    memcopyKernel<<<1, 1>>>(temp4, &d_coord[1 * numPtsUp], szPt, szFl, numPts, szFl);
+    safeCall(cudaGetLastError());
+    safeCall(cudaDeviceSynchronize());
+    memcopyKernel<<<1, 1>>>(temp5, &d_coord[2 * numPtsUp], szPt, szFl, numPts, szFl);
+    safeCall(cudaGetLastError());
+    safeCall(cudaDeviceSynchronize());
+    memcopyKernel<<<1, 1>>>(temp6, &d_coord[3 * numPtsUp], szPt, szFl, numPts, szFl);
+    safeCall(cudaGetLastError());
+    safeCall(cudaDeviceSynchronize());
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy_2 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_2 - start_memcpy_2).count();
+#endif
+#ifdef DEVICE_TIMER
+    auto start_kernel_1 = std::chrono::steady_clock::now();
+#endif
+    ComputeHomographies<<<numLoops / 16, 16>>>(d_coord, d_randPts, d_homo, numPtsUp);
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_kernel_1 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_kernel_1 - start_kernel_1).count();
+    // printf("ComputeHomographies time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel_1 - start_kernel_1).count());
+#endif
+    checkMsg("ComputeHomographies() execution failed\n");
+
+    dim3 blocks(1, numLoops / TESTHOMO_LOOPS);
+    dim3 threads(TESTHOMO_TESTS, TESTHOMO_LOOPS);
+#ifdef DEVICE_TIMER
+    auto start_kernel_2 = std::chrono::steady_clock::now();
+#endif
+    TestHomographies<<<blocks, threads>>>(d_coord, d_homo, d_randPts, numPtsUp, thresh * thresh);
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_kernel_2 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_kernel_2 - start_kernel_2).count();
+    // printf("TestHomographies time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel_2 - start_kernel_2).count());
+#endif
+    checkMsg("TestHomographies() execution failed\n");
+#ifdef DEVICE_TIMER
+    auto start_memcpy_3 = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMemcpy(h_randPts, d_randPts, sizeof(int) * numLoops, cudaMemcpyDeviceToHost));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_memcpy_3 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_3 - start_memcpy_3).count();
+#endif
+    int maxIndex = -1, maxCount = -1;
+    for (int i = 0; i < numLoops; i++)
+      if (h_randPts[i] > maxCount)
+      {
+        maxCount = h_randPts[i];
+        maxIndex = i;
+      }
+
+    *numMatches = maxCount;
+#ifdef DEVICE_TIMER
+    auto start_memcpy_4 = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMemcpy2D(homography, szFl, &d_homo[maxIndex], sizeof(float) * numLoops, szFl, 8, cudaMemcpyDeviceToHost));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_memcpy_4 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_4 - start_memcpy_4).count();
+#endif
+
+    safeCall(cudaFree(temp3));
+    safeCall(cudaFree(temp4));
+    safeCall(cudaFree(temp5));
+    safeCall(cudaFree(temp6));
+
+  }
+
+  free(validPts);
+  free(h_randPts);
+  free(temp1);
+  free(temp2);
+
+  safeCall(cudaFree(d_homo));
+  safeCall(cudaFree(d_randPts));
+  safeCall(cudaFree(d_coord));
+  return matchTime;
+}
+
+double MatchSiftData(SiftData &data1, SiftData &data2, float &matchTime)
+{
+  int numPts1 = data1.numPts;
+  int numPts2 = data2.numPts;
+  if (!numPts1 || !numPts2)
+    return 0.0;
+#ifdef MANAGEDMEM
+  SiftPoint *sift1 = data1.m_data;
+  SiftPoint *sift2 = data2.m_data;
+#else
+  if (data1.d_data == NULL || data2.d_data == NULL)
+    return 0.0f;
+  SiftPoint *sift1 = data1.d_data;
+  SiftPoint *sift2 = data2.d_data;
+#endif
+
+// Original version with correlation and maximization in two different kernels
+// Global memory reguirement: O(N^2)
+#if 0
+  float *d_corrData; 
+  int corrWidth = iDivUp(numPts2, 16)*16;
+  int corrSize = sizeof(float)*numPts1*corrWidth;
+  safeCall(cudaMalloc((void **)&d_corrData, corrSize));
+#if 0 
+  dim3 blocks1(numPts1, iDivUp(numPts2, 16));
+  dim3 threads1(16, 16); // each block: 1 points x 16 points
+  MatchSiftPoints<<<blocks1, threads1>>>(sift1, sift2, d_corrData, numPts1, numPts2);
+#else 
+  dim3 blocks(iDivUp(numPts1,16), iDivUp(numPts2, 16));
+  dim3 threads(16, 16); // each block: 16 points x 16 points
+  MatchSiftPoints2<<<blocks, threads>>>(sift1, sift2, d_corrData, numPts1, numPts2);
+#endif
+  safeCall(cudaDeviceSynchronize());
+  dim3 blocksMax(iDivUp(numPts1, 16));
+  dim3 threadsMax(16, 16);
+  FindMaxCorr<<<blocksMax, threadsMax>>>(d_corrData, sift1, sift2, numPts1, corrWidth, sizeof(SiftPoint));
+  safeCall(cudaDeviceSynchronize());
+  checkMsg("FindMaxCorr() execution failed\n");
+  safeCall(cudaFree(d_corrData));
+#endif
+
+// Version suggested by Nicholas Lin with combined correlation and maximization
+// Global memory reguirement: O(N)
+#if 0
+  int block_dim = 16;
+  float *d_corrData;
+  int corrSize = numPts1 * block_dim * 2;
+  safeCall(cudaMalloc((void **)&d_corrData, sizeof(float) * corrSize));
+  dim3 blocks(iDivUp(numPts1, block_dim));
+  dim3 threads(block_dim, block_dim); 
+  FindMaxCorr3<<<blocks, threads >>>(d_corrData, sift1, sift2, numPts1, numPts2);
+  safeCall(cudaDeviceSynchronize());
+  checkMsg("FindMaxCorr3() execution failed\n");
+  safeCall(cudaFree(d_corrData));
+#endif
+
+#if 0
+  dim3 blocksMax(numPts1);
+  dim3 threadsMax(FMC2W, FMC2H);
+  FindMaxCorr2<<<blocksMax, threadsMax>>>(sift1, sift2, numPts1, numPts2);
+  safeCall(cudaDeviceSynchronize());
+  checkMsg("FindMaxCorr2() execution failed\n");
+#endif
+
+// Combined version with no global memory requirement using one FMC2H points per block
+#if 0
+  dim3 blocksMax2(iDivUp(numPts1, FMC2H));
+  dim3 threadsMax2(FMC2W, FMC2H);
+  FindMaxCorr4<<<blocksMax2, threadsMax2>>>(sift1, sift2, numPts1, numPts2);
+  safeCall(cudaDeviceSynchronize());
+  checkMsg("FindMaxCorr4() execution failed\n");
+#endif
+
+// Combined version with no global memory requirement using global locks
+#if 1
+  dim3 blocksMax3(iDivUp(numPts1, 16), iDivUp(numPts2, 512));
+  dim3 threadsMax3(16, 16);
+#ifdef DEVICE_TIMER
+  auto start_kernel1 = std::chrono::steady_clock::now();
+#endif
+  CleanMatches<<<iDivUp(numPts1, 64), 64>>>(sift1, numPts1);
+  safeCall(cudaGetLastError());
+  safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+  auto stop_kernel1 = std::chrono::steady_clock::now();
+  // printf("CleanMatches time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel1 - start_kernel1).count());
+  matchTime += std::chrono::duration<float, std::micro>(stop_kernel1 - start_kernel1).count();
+  auto matchSiftDataTime += std::chrono::duration<float, std::micro>(stop_kernel1 - start_kernel1).count();
+#endif
+  int mode = 10;
+  // if (mode == 5) // K40c 5.0ms, 1080 Ti 1.2ms, 2080 Ti 0.83ms
+  //   FindMaxCorr5<<<blocksMax3, threadsMax3>>>(sift1, sift2, numPts1, numPts2);
+  // else if (mode == 6)
+  // { // 2080 Ti 0.89ms
+  //   threadsMax3 = dim3(32, 16);
+  //   FindMaxCorr6<<<blocksMax3, threadsMax3>>>(sift1, sift2, numPts1, numPts2);
+  // }
+  // else if (mode == 7) // 2080 Ti 0.50ms
+  //   FindMaxCorr7<<<blocksMax3, threadsMax3>>>(sift1, sift2, numPts1, numPts2);
+  // else if (mode == 8)
+  // { // 2080 Ti 0.45ms
+  //   blocksMax3 = dim3(iDivUp(numPts1, FMC_BW), iDivUp(numPts2, FMC_GH));
+  //   threadsMax3 = dim3(FMC_NW, FMC_NH);
+  //   FindMaxCorr8<<<blocksMax3, threadsMax3>>>(sift1, sift2, numPts1, numPts2);
+  // }
+  // else if (mode == 9)
+  // { // 2080 Ti 0.46ms
+  //   blocksMax3 = dim3(iDivUp(numPts1, FMC_BW), iDivUp(numPts2, FMC_GH));
+  //   threadsMax3 = dim3(FMC_NW, FMC_NH);
+  //   FindMaxCorr9<<<blocksMax3, threadsMax3>>>(sift1, sift2, numPts1, numPts2);
+  // }
+  // else
+  if (mode == 10)
+  {
+    blocksMax3 = dim3(iDivUp(numPts1, M7W));
+    threadsMax3 = dim3(M7W, M7H / M7R);
+#ifdef DEVICE_TIMER
+    auto start_kernel2 = std::chrono::steady_clock::now();
+#endif
+    FindMaxCorr10<<<blocksMax3, threadsMax3>>>(sift1, sift2, numPts1, numPts2);
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_kernel2 = std::chrono::steady_clock::now();
+    // printf("FindMaxCorr10 time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel2 - start_kernel2).count());
+    matchTime += std::chrono::duration<float, std::micro>(stop_kernel2 - start_kernel2).count();
+    matchSiftDataTime += std::chrono::duration<float, std::micro>(stop_kernel2 - start_kernel2).count();
+#endif
+  }
+  checkMsg("FindMaxCorr10() execution failed\n");
+#endif
+
+  if (data1.h_data != NULL)
+  {
+    float *h_ptr = &data1.h_data[0].score;
+    float *d_ptr = &data1.d_data[0].score;
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    safeCall(cudaMemcpy(h_ptr, d_ptr, sizeof(SiftPoint) * data1.numPts, cudaMemcpyDeviceToHost));
+    safeCall(cudaDeviceSynchronize());
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+    matchSiftDataTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+  }
+  return matchTime;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/MainSourceFiles.yaml b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/MainSourceFiles.yaml
new file mode 100644
index 000000000..8a7a15c82
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/MainSourceFiles.yaml
@@ -0,0 +1,10297 @@
+---
+MainSourceFile:  MainSrcFiles_placehold
+Replacements:
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu'
+    Offset:          1342
+    Length:          0
+    ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu'
+    Offset:          1997
+    Length:          100
+    ReplacementText: 'DPCT_CHECK_ERROR(d_data = (float *)dpct::dpct_malloc(*(size_t *)&pitch, (size_t)(sizeof(float) * width), (size_t)height))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu'
+    Offset:          2113
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu'
+    Offset:          2952
+    Length:          16
+    ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_data, dpct::get_in_order_queue())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu'
+    Offset:          2968
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu'
+    Offset:          3099
+    Length:          34
+    ReplacementText: 'DPCT_CHECK_ERROR(delete (dpct::image_matrix *)t_data'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu'
+    Offset:          3133
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu'
+    Offset:          3405
+    Length:          12
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::dpct_memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu'
+    Offset:          3491
+    Length:          22
+    ReplacementText: 'dpct::host_to_device'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu'
+    Offset:          3514
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu'
+    Offset:          3631
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          1342
+    Length:          0
+    ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          1596
+    Length:          32
+    ReplacementText: 'static dpct::constant_memory<int, 0> d_MaxNumPoints;'
+    ConstantFlag:    DeviceConstant
+    ConstantOffset:  1596
+    InitStr:         ''
+    NewHostVarName:  d_MaxNumPoints_host_ct1
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          1629
+    Length:          50
+    ReplacementText: 'dpct::global_memory<unsigned int, 1> d_PointCounter(8 * 2 + 1);'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          1680
+    Length:          40
+    ReplacementText: 'static dpct::constant_memory<float, 1> d_ScaleDownKernel(5);'
+    ConstantFlag:    DeviceConstant
+    ConstantOffset:  1680
+    InitStr:         ''
+    NewHostVarName:  d_ScaleDownKernel_host_ct1
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          1721
+    Length:          54
+    ReplacementText: 'static dpct::constant_memory<float, 1> d_LowPassKernel(2 * LOWPASS_R + 1);'
+    ConstantFlag:    DeviceConstant
+    ConstantOffset:  1721
+    InitStr:         ''
+    NewHostVarName:  d_LowPassKernel_host_ct1
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          1776
+    Length:          48
+    ReplacementText: 'static dpct::constant_memory<float, 1> d_LaplaceKernel(8 * 12 * 16);'
+    ConstantFlag:    DeviceConstant
+    ConstantOffset:  1776
+    InitStr:         ''
+    NewHostVarName:  d_LaplaceKernel_host_ct1
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2024
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2138
+    Length:          0
+    ReplacementText: ",\n                         const sycl::nd_item<3> &item_ct1,\n                         float const *d_ScaleDownKernel, float *brows"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2260
+    Length:          32
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2310
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2340
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2370
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2418
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2589
+    Length:          30
+    ReplacementText: 'sycl::min(width - 1, sycl::max(0, xp - 2))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2638
+    Length:          31
+    ReplacementText: 'sycl::min(height - 1, sycl::max(0, yp - 2))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2809
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2834
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2852
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2876
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2885
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2919
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          2958
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          3227
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          3336
+    Length:          0
+    ReplacementText: ",\n                    const sycl::nd_item<3> &item_ct1,\n                    float const *d_ScaleDownKernel, float *irows, float *brows"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          3458
+    Length:          32
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          3493
+    Length:          32
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          3543
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          3573
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          3603
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          3651
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          3699
+    Length:          30
+    ReplacementText: 'sycl::min(width - 1, sycl::max(0, xp - 2))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          3748
+    Length:          31
+    ReplacementText: 'sycl::min(height - 1, sycl::max(0, yp - 2))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          4002
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          4197
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          4231
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          4270
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          4533
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          4637
+    Length:          0
+    ReplacementText: ",\n               const sycl::nd_item<3> &item_ct1, float const *d_ScaleDownKernel,\n               float *inrow, float *brow, int *yRead, int *yWrite"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          4643
+    Length:          40
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          4686
+    Length:          45
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          4734
+    Length:          38
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          4775
+    Length:          39
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          4862
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          5056
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          5103
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          5482
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          5628
+    Length:          32
+    ReplacementText: 'sycl::min(dx2, width / 2 - xStart / 2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          5771
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:3: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          5777
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          6124
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:4: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          6130
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          6240
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:5: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          6246
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          6592
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:6: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          6598
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          6708
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:7: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          6714
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          7061
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:8: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          7067
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          7177
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:9: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          7183
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          7529
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:10: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          7535
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          7639
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:11: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          7645
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8001
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:12: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8007
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8037
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8139
+    Length:          0
+    ReplacementText: ",\n             const sycl::nd_item<3> &item_ct1"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8160
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8190
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8213
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8256
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8345
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8393
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8441
+    Length:          22
+    ReplacementText: 'sycl::min(xl + 1, width - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8478
+    Length:          23
+    ReplacementText: 'sycl::min(yu + 1, height - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8926
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:13: The total declared local variable size in device function ExtractSiftDescriptors exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          8965
+    Length:          19
+    ReplacementText: 'dpct::image_accessor_ext<float, 2>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          9041
+    Length:          0
+    ReplacementText: ",\n                            const sycl::nd_item<3> &item_ct1, float *gauss,\n                            float *buffer, float *sums"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          9047
+    Length:          27
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          9077
+    Length:          29
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          9109
+    Length:          25
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          9153
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          9194
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          9266
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          9333
+    Length:          40
+    ReplacementText: 'sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          9399
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:92: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          9532
+    Length:          11
+    ReplacementText: 'sycl::sin(theta)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          9574
+    Length:          11
+    ReplacementText: 'sycl::cos(theta)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          9934
+    Length:          46
+    ReplacementText: 'texObj.read(xpos + cosa, ypos + sina)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          9998
+    Length:          46
+    ReplacementText: 'texObj.read(xpos - cosa, ypos - sina)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          10061
+    Length:          46
+    ReplacementText: 'texObj.read(xpos - sina, ypos + cosa)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          10125
+    Length:          46
+    ReplacementText: 'texObj.read(xpos + sina, ypos - cosa)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          10213
+    Length:          24
+    ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          10273
+    Length:          14
+    ReplacementText: 'sycl::atan2(dy, dx)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          10986
+    Length:          37
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          11033
+    Length:          36
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          11164
+    Length:          42
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 32, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          11216
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 32, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          11416
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 8, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          11467
+    Length:          40
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 8, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          11603
+    Length:          42
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 40, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          11655
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 40, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          11718
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:93: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          11889
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          11943
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:94: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          12025
+    Length:          38
+    ReplacementText: 'sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          12150
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          12204
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:95: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          12332
+    Length:          13
+    ReplacementText: 'sycl::rsqrt(tsum2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          12483
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          12545
+    Length:          6
+    ReplacementText: 'sycl::fabs(x)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          12568
+    Length:          6
+    ReplacementText: 'sycl::fabs(y)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          12576
+    Length:          0
+    ReplacementText: "  /*\n  DPCT1013:96: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n  */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          12588
+    Length:          43
+    ReplacementText: 'sycl::min(absx, absy) / sycl::max(absx, absy)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          12980
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:14: The total declared local variable size in device function ExtractSiftDescriptorsCONSTNew exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13101
+    Length:          0
+    ReplacementText: ",\n                                    const sycl::nd_item<3> &item_ct1,\n                                    int d_MaxNumPoints,\n                                    unsigned int *d_PointCounter, float *gauss,\n                                    float *buffer, float *sums"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13107
+    Length:          27
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13137
+    Length:          29
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13169
+    Length:          25
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13213
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13254
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13340
+    Length:          43
+    ReplacementText: 'sycl::native::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13401
+    Length:          51
+    ReplacementText: 'dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13469
+    Length:          51
+    ReplacementText: 'dpct::min(d_PointCounter[2 * octave + 1], d_MaxNumPoints)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13666
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13706
+    Length:          9
+    ReplacementText: 'item_ct1.get_group_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13746
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:15: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13750
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:97: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13889
+    Length:          13
+    ReplacementText: 'sycl::sin(theta)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          13935
+    Length:          13
+    ReplacementText: 'sycl::cos(theta)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          14988
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1013:102: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          15030
+    Length:          29
+    ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          15853
+    Length:          37
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          15902
+    Length:          36
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          16043
+    Length:          42
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 32, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          16097
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 32, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          16315
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 8, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          16368
+    Length:          40
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 8, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          16514
+    Length:          42
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 40, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          16568
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 40, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          16635
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:16: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          16639
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:98: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          16818
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          16874
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:17: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          16878
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:99: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          16964
+    Length:          38
+    ReplacementText: 'sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17095
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17151
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:18: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17155
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:100: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17289
+    Length:          13
+    ReplacementText: 'sycl::rsqrt(tsum2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17449
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:19: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17453
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:101: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17477
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:20: The total declared local variable size in device function ExtractSiftDescriptorsCONST exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17521
+    Length:          19
+    ReplacementText: 'dpct::image_accessor_ext<float, 2>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17597
+    Length:          0
+    ReplacementText: ",\n                                 const sycl::nd_item<3> &item_ct1,\n                                 int d_MaxNumPoints,\n                                 unsigned int *d_PointCounter, float *gauss,\n                                 float *buffer, float *sums"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17603
+    Length:          27
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17633
+    Length:          29
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17665
+    Length:          25
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17709
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17750
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17836
+    Length:          40
+    ReplacementText: 'sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17894
+    Length:          51
+    ReplacementText: 'dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          17962
+    Length:          51
+    ReplacementText: 'dpct::min(d_PointCounter[2 * octave + 1], d_MaxNumPoints)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          18159
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          18199
+    Length:          9
+    ReplacementText: 'item_ct1.get_group_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          18239
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:21: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          18243
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:103: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          18382
+    Length:          11
+    ReplacementText: 'sycl::sin(theta)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          18426
+    Length:          11
+    ReplacementText: 'sycl::cos(theta)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          18802
+    Length:          46
+    ReplacementText: 'texObj.read(xpos + cosa, ypos + sina)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          18868
+    Length:          46
+    ReplacementText: 'texObj.read(xpos - cosa, ypos - sina)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          18933
+    Length:          46
+    ReplacementText: 'texObj.read(xpos - sina, ypos + cosa)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          18999
+    Length:          46
+    ReplacementText: 'texObj.read(xpos + sina, ypos - cosa)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          19089
+    Length:          24
+    ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          19151
+    Length:          14
+    ReplacementText: 'sycl::atan2(dy, dx)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          19904
+    Length:          37
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          19953
+    Length:          36
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          20094
+    Length:          42
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 32, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          20148
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 32, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          20366
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 8, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          20419
+    Length:          40
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 8, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          20565
+    Length:          42
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 40, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          20619
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 40, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          20686
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:22: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          20690
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:104: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          20869
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          20925
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:23: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          20929
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:105: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21015
+    Length:          38
+    ReplacementText: 'sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21146
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21202
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:24: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21206
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:106: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21340
+    Length:          13
+    ReplacementText: 'sycl::rsqrt(tsum2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21500
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:25: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21504
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:107: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21528
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:26: The total declared local variable size in device function ExtractSiftDescriptorsOld exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21570
+    Length:          19
+    ReplacementText: 'dpct::image_accessor_ext<float, 2>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21646
+    Length:          0
+    ReplacementText: ",\n                               const sycl::nd_item<3> &item_ct1, float *gauss,\n                               float *buffer, float *sums"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21652
+    Length:          27
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21682
+    Length:          29
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21714
+    Length:          27
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21760
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21801
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21873
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          21940
+    Length:          40
+    ReplacementText: 'sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          22006
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:108: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          22139
+    Length:          11
+    ReplacementText: 'sycl::sin(theta)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          22181
+    Length:          11
+    ReplacementText: 'sycl::cos(theta)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          22541
+    Length:          46
+    ReplacementText: 'texObj.read(xpos + cosa, ypos + sina)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          22605
+    Length:          46
+    ReplacementText: 'texObj.read(xpos - cosa, ypos - sina)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          22668
+    Length:          46
+    ReplacementText: 'texObj.read(xpos - sina, ypos + cosa)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          22732
+    Length:          46
+    ReplacementText: 'texObj.read(xpos + sina, ypos - cosa)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          22820
+    Length:          24
+    ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          22880
+    Length:          14
+    ReplacementText: 'sycl::atan2(dy, dx)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          23593
+    Length:          37
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          23640
+    Length:          36
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          23771
+    Length:          42
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 32, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          23823
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 32, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          24023
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 8, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          24074
+    Length:          40
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 8, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          24210
+    Length:          42
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 40, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          24262
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 40, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          24325
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:109: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          24493
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:110: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          24572
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:111: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          24651
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:112: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          24728
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:113: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          24805
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:114: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          24907
+    Length:          13
+    ReplacementText: 'sycl::rsqrt(tsum1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          24975
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:115: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          25091
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:116: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          25170
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:117: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          25249
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:118: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          25326
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:119: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          25403
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:120: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          25537
+    Length:          13
+    ReplacementText: 'sycl::rsqrt(tsum2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          25688
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:27: The total declared local variable size in device function ExtractSiftDescriptor exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          25726
+    Length:          19
+    ReplacementText: 'dpct::image_accessor_ext<float, 2>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          25810
+    Length:          0
+    ReplacementText: ",\n                           const sycl::nd_item<3> &item_ct1, float *gauss,\n                           float *buffer, float *sums"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          25816
+    Length:          27
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          25846
+    Length:          29
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          25878
+    Length:          25
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          25923
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          26042
+    Length:          40
+    ReplacementText: 'sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          26108
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:121: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          26241
+    Length:          11
+    ReplacementText: 'sycl::sin(theta)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          26283
+    Length:          11
+    ReplacementText: 'sycl::cos(theta)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          26643
+    Length:          46
+    ReplacementText: 'texObj.read(xpos + cosa, ypos + sina)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          26707
+    Length:          46
+    ReplacementText: 'texObj.read(xpos - cosa, ypos - sina)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          26770
+    Length:          46
+    ReplacementText: 'texObj.read(xpos - sina, ypos + cosa)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          26834
+    Length:          46
+    ReplacementText: 'texObj.read(xpos + sina, ypos - cosa)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          26922
+    Length:          24
+    ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          26982
+    Length:          14
+    ReplacementText: 'sycl::atan2(dy, dx)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          27695
+    Length:          37
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          27742
+    Length:          36
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          27873
+    Length:          42
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 32, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          27925
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 32, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          28125
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 8, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          28176
+    Length:          40
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 8, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          28312
+    Length:          42
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p1 + 40, iangf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          28364
+    Length:          41
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(buffer + p2 + 40, angf * grad2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          28427
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:122: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          28598
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          28652
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:123: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          28734
+    Length:          38
+    ReplacementText: 'sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          28859
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          28913
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:124: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29041
+    Length:          13
+    ReplacementText: 'sycl::rsqrt(tsum2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29191
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:125: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29211
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29286
+    Length:          0
+    ReplacementText: ",\n                      const sycl::nd_item<3> &item_ct1"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29302
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29315
+    Length:          10
+    ReplacementText: 'item_ct1.get_local_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29328
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29466
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29502
+    Length:          19
+    ReplacementText: 'dpct::image_accessor_ext<float, 2>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29559
+    Length:          0
+    ReplacementText: ",\n                         const sycl::nd_item<3> &item_ct1, int d_MaxNumPoints,\n                         unsigned int *d_PointCounter, float *hist, float *gauss"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29565
+    Length:          26
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29594
+    Length:          27
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29639
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29669
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29794
+    Length:          35
+    ReplacementText: 'sycl::exp(i2sigma2 * (tx - 5) * (tx - 5))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          29869
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:126: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          30083
+    Length:          34
+    ReplacementText: 'texObj.read(xf + 1.0, yf)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          30120
+    Length:          34
+    ReplacementText: 'texObj.read(xf - 1.0, yf)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          30171
+    Length:          34
+    ReplacementText: 'texObj.read(xf, yf + 1.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          30208
+    Length:          34
+    ReplacementText: 'texObj.read(xf, yf - 1.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          30266
+    Length:          14
+    ReplacementText: 'sycl::atan2(dy, dx)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          30350
+    Length:          24
+    ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          30380
+    Length:          51
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(&hist[bin], grad * gauss[xd] * gauss[yd])'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          30439
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:127: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          30751
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:128: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          30924
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:129: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          31797
+    Length:          37
+    ReplacementText: 'dpct::atomic_fetch_compare_inc<sycl::access::address_space::generic_space>(d_PointCounter, 0x7fffffff)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          32319
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:28: The total declared local variable size in device function ComputeOrientationsCONSTNew exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          32427
+    Length:          0
+    ReplacementText: ",\n                                 const sycl::nd_item<3> &item_ct1,\n                                 int d_MaxNumPoints,\n                                 unsigned int *d_PointCounter,\n                                 sycl::local_accessor<float, 2> img,\n                                 sycl::local_accessor<float, 2> tmp, float *hist,\n                                 float *gaussx, float *gaussy"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          32526
+    Length:          46
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          32575
+    Length:          31
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          32609
+    Length:          42
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          32669
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          32698
+    Length:          51
+    ReplacementText: 'dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          32766
+    Length:          51
+    ReplacementText: 'dpct::min(d_PointCounter[2 * octave + 0], d_MaxNumPoints)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          32835
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          32875
+    Length:          9
+    ReplacementText: 'item_ct1.get_group_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          32963
+    Length:          10
+    ReplacementText: 'item_ct1.get_local_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          33194
+    Length:          10
+    ReplacementText: 'item_ct1.get_local_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          33277
+    Length:          32
+    ReplacementText: 'sycl::max(sycl::min(x - RAD + xi, w - 1), 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          33326
+    Length:          32
+    ReplacementText: 'sycl::max(sycl::min(y - RAD + yi, h - 1), 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          33457
+    Length:          42
+    ReplacementText: 'sycl::native::exp(-1.0f / (2.0f * (sc * sc - 0.25f)))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          33544
+    Length:          42
+    ReplacementText: 'sycl::native::exp(-4.0f / (2.0f * (sc * sc - 0.25f)))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          33759
+    Length:          52
+    ReplacementText: 'sycl::native::exp(i2sigma2 * (tx - RAD - xf) * (tx - RAD - xf))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          33832
+    Length:          52
+    ReplacementText: 'sycl::native::exp(i2sigma2 * (tx - RAD - yf) * (tx - RAD - yf))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          33892
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:29: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          33896
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:130: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          33960
+    Length:          10
+    ReplacementText: 'item_ct1.get_local_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          34181
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:30: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          34185
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:131: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          34255
+    Length:          10
+    ReplacementText: 'item_ct1.get_local_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          34502
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:31: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          34506
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:132: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          34576
+    Length:          10
+    ReplacementText: 'item_ct1.get_local_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          34814
+    Length:          14
+    ReplacementText: 'sycl::atan2(dy, dx)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          34866
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1013:135: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          34885
+    Length:          29
+    ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          34922
+    Length:          57
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(&hist[LEN + bin], grad * gaussx[x] * gaussy[y])'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          34987
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:32: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          34991
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:133: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          35639
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:33: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          35643
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:134: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          36349
+    Length:          74
+    ReplacementText: 'dpct::atomic_fetch_max<sycl::access::address_space::generic_space>(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0])'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          36693
+    Length:          54
+    ReplacementText: 'dpct::atomic_fetch_compare_inc<sycl::access::address_space::generic_space>(&d_PointCounter[2 * octave + 1], 0x7fffffff)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          37273
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:34: The total declared local variable size in device function ComputeOrientationsCONST exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          37314
+    Length:          19
+    ReplacementText: 'dpct::image_accessor_ext<float, 2>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          37371
+    Length:          0
+    ReplacementText: ",\n                              const sycl::nd_item<3> &item_ct1,\n                              int d_MaxNumPoints, unsigned int *d_PointCounter,\n                              float *hist, float *gauss"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          37377
+    Length:          26
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          37406
+    Length:          27
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          37451
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          37480
+    Length:          51
+    ReplacementText: 'dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          37548
+    Length:          51
+    ReplacementText: 'dpct::min(d_PointCounter[2 * octave + 0], d_MaxNumPoints)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          37617
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          37657
+    Length:          9
+    ReplacementText: 'item_ct1.get_group_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          37797
+    Length:          35
+    ReplacementText: 'sycl::exp(i2sigma2 * (tx - 5) * (tx - 5))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          37874
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:35: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          37878
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:136: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          38110
+    Length:          34
+    ReplacementText: 'texObj.read(xf + 1.0, yf)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          38147
+    Length:          34
+    ReplacementText: 'texObj.read(xf - 1.0, yf)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          38200
+    Length:          34
+    ReplacementText: 'texObj.read(xf, yf + 1.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          38237
+    Length:          34
+    ReplacementText: 'texObj.read(xf, yf - 1.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          38297
+    Length:          14
+    ReplacementText: 'sycl::atan2(dy, dx)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          38387
+    Length:          24
+    ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          38419
+    Length:          51
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(&hist[bin], grad * gauss[xd] * gauss[yd])'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          38478
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:36: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          38482
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:137: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          38808
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:37: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          38812
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:138: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          38983
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:38: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          38987
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:139: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          39679
+    Length:          74
+    ReplacementText: 'dpct::atomic_fetch_max<sycl::access::address_space::generic_space>(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0])'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          40014
+    Length:          54
+    ReplacementText: 'dpct::atomic_fetch_compare_inc<sycl::access::address_space::generic_space>(&d_PointCounter[2 * octave + 1], 0x7fffffff)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          40542
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:39: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          40546
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:140: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          40604
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          40642
+    Length:          19
+    ReplacementText: 'dpct::image_accessor_ext<float, 2>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          40718
+    Length:          0
+    ReplacementText: ",\n                           const sycl::nd_item<3> &item_ct1, int d_MaxNumPoints,\n                           unsigned int *d_PointCounter, float *gauss,\n                           float *buffer, float *sums, float *hist,\n                           unsigned int &idx"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          40724
+    Length:          26
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          40753
+    Length:          27
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          40783
+    Length:          28
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          40836
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          40865
+    Length:          51
+    ReplacementText: 'dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          40933
+    Length:          51
+    ReplacementText: 'dpct::min(d_PointCounter[2 * octave + 0], d_MaxNumPoints)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41002
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41042
+    Length:          9
+    ReplacementText: 'item_ct1.get_group_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41168
+    Length:          35
+    ReplacementText: 'sycl::exp(i2sigma2 * (tx - 5) * (tx - 5))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41245
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:40: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41249
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:141: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41481
+    Length:          34
+    ReplacementText: 'texObj.read(xf + 1.0, yf)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41518
+    Length:          34
+    ReplacementText: 'texObj.read(xf - 1.0, yf)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41571
+    Length:          34
+    ReplacementText: 'texObj.read(xf, yf + 1.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41608
+    Length:          34
+    ReplacementText: 'texObj.read(xf, yf - 1.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41668
+    Length:          14
+    ReplacementText: 'sycl::atan2(dy, dx)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41758
+    Length:          24
+    ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41790
+    Length:          51
+    ReplacementText: 'dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(&hist[bin], grad * gauss[xd] * gauss[yd])'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41849
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:41: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          41853
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:142: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          42179
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:42: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          42183
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:143: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          42354
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:43: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          42358
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:144: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          43081
+    Length:          74
+    ReplacementText: 'dpct::atomic_fetch_max<sycl::access::address_space::generic_space>(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0])'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          43395
+    Length:          54
+    ReplacementText: 'dpct::atomic_fetch_compare_inc<sycl::access::address_space::generic_space>(&d_PointCounter[2 * octave + 1], 0x7fffffff)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          43930
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:44: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          43934
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:145: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          44016
+    Length:          0
+    ReplacementText: ', item_ct1, gauss, buffer, sums'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          44175
+    Length:          0
+    ReplacementText: ', item_ct1, gauss, buffer, sums'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          50387
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:45: The total declared local variable size in device function FindPointsMultiNew exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          50584
+    Length:          0
+    ReplacementText: ",\n                        const sycl::nd_item<3> &item_ct1, int d_MaxNumPoints,\n                        unsigned int *d_PointCounter, unsigned short *points"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          50620
+    Length:          45
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          50673
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          50692
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          50711
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          50737
+    Length:          74
+    ReplacementText: 'dpct::atomic_fetch_max<sycl::access::address_space::generic_space>(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1])'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          50817
+    Length:          74
+    ReplacementText: 'dpct::atomic_fetch_max<sycl::access::address_space::generic_space>(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1])'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          50908
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          50935
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          50974
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          51051
+    Length:          27
+    ReplacementText: 'sycl::min(minx + MINMAX_W, width)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          51160
+    Length:          32
+    ReplacementText: 'sycl::max(sycl::min(xpos - 1, width - 1), 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          51210
+    Length:          45
+    ReplacementText: 'dpct::min((unsigned int)(height - MINMAX_H * item_ct1.get_group(1)), MINMAX_H)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          51343
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          51449
+    Length:          22
+    ReplacementText: 'sycl::fmax(maxv, sycl::fabs(val))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          51518
+    Length:          37
+    ReplacementText: 'sycl::any_of_group(item_ct1.get_sub_group(), (0xffffffff & (0x1 << item_ct1.get_sub_group().get_local_linear_id())) && maxv > thresh)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          51688
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          51791
+    Length:          42
+    ReplacementText: 'sycl::any_of_group(item_ct1.get_sub_group(), (0xffffffff & (0x1 << item_ct1.get_sub_group().get_local_linear_id())) && sycl::fabs(d11) > thresh)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          51866
+    Length:          16
+    ReplacementText: 'sycl::max(0, ypos - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          51916
+    Length:          25
+    ReplacementText: 'sycl::min(height - 1, ypos + 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          52209
+    Length:          27
+    ReplacementText: 'sycl::fmin(sycl::fmin(d00, d01), d02)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          52258
+    Length:          27
+    ReplacementText: 'sycl::fmax(sycl::fmax(d00, d01), d02)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          52397
+    Length:          27
+    ReplacementText: 'sycl::fmin(sycl::fmin(d20, d21), d22)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          52446
+    Length:          27
+    ReplacementText: 'sycl::fmax(sycl::fmax(d20, d21), d22)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          52495
+    Length:          55
+    ReplacementText: 'sycl::fmin(sycl::fmin(ymin1, sycl::fmin(sycl::fmin(d10, d12), d11)), ymin3)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          52572
+    Length:          55
+    ReplacementText: 'sycl::fmax(sycl::fmax(ymax1, sycl::fmax(sycl::fmax(d10, d12), d11)), ymax3)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          52650
+    Length:          45
+    ReplacementText: 'sycl::fmin(ShiftUp(ymin2, 1, item_ct1), ShiftDown(ymin2, 1, item_ct1))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          52717
+    Length:          45
+    ReplacementText: 'sycl::fmax(ShiftUp(ymax2, 1, item_ct1), ShiftDown(ymax2, 1, item_ct1))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          52783
+    Length:          33
+    ReplacementText: 'sycl::fmin(sycl::fmin(nmin2, ymin1), ymin3)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          52831
+    Length:          28
+    ReplacementText: 'sycl::fmin(sycl::fmin(minv, d10), d12)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          52880
+    Length:          33
+    ReplacementText: 'sycl::fmax(sycl::fmax(nmax2, ymax1), ymax3)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          52928
+    Length:          28
+    ReplacementText: 'sycl::fmax(sycl::fmax(maxv, d10), d12)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          53040
+    Length:          20
+    ReplacementText: 'sycl::fmin(-thresh, minv)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          53071
+    Length:          19
+    ReplacementText: 'sycl::fmax(thresh, maxv)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          53135
+    Length:          14
+    ReplacementText: 'sycl::popcount(ptbits)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          53265
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          53407
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          53599
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          54163
+    Length:          26
+    ReplacementText: '(tra * tra) / det'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          54807
+    Length:          54
+    ReplacementText: '1.0f / (idxx * dxx + idxy * dxy + idxs * dxs)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          55292
+    Length:          19
+    ReplacementText: 'dx / dxx'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          55327
+    Length:          19
+    ReplacementText: 'dy / dyy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          55362
+    Length:          19
+    ReplacementText: 'ds / dss'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          55503
+    Length:          37
+    ReplacementText: 'dpct::pow(2.0f, (float)scale / NUM_SCALES)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          55543
+    Length:          19
+    ReplacementText: 'sycl::exp2(pds * factor)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          55609
+    Length:          74
+    ReplacementText: 'dpct::atomic_fetch_max<sycl::access::address_space::generic_space>(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1])'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          55712
+    Length:          54
+    ReplacementText: 'dpct::atomic_fetch_compare_inc<sycl::access::address_space::generic_space>(&d_PointCounter[2 * octave + 0], 0x7fffffff)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          68605
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          68637
+    Length:          19
+    ReplacementText: 'dpct::image_accessor_ext<float, 2>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          68726
+    Length:          0
+    ReplacementText: ",\n                     const sycl::nd_item<3> &item_ct1,\n                     float const *d_LaplaceKernel, float *data1, float *data2"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          68732
+    Length:          64
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          68799
+    Length:          46
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          68863
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          68893
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          68939
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          68971
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          69002
+    Length:          47
+    ReplacementText: 'const_cast<float *>(d_LaplaceKernel + octave * 12 * 16 + scale * 16)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          69185
+    Length:          26
+    ReplacementText: 'texObj.read(x, y)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          69242
+    Length:          32
+    ReplacementText: 'texObj.read(x, y - 1.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          69277
+    Length:          32
+    ReplacementText: 'texObj.read(x, y + 1.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          69341
+    Length:          32
+    ReplacementText: 'texObj.read(x, y - 2.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          69376
+    Length:          32
+    ReplacementText: 'texObj.read(x, y + 2.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          69440
+    Length:          32
+    ReplacementText: 'texObj.read(x, y - 3.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          69475
+    Length:          32
+    ReplacementText: 'texObj.read(x, y + 3.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          69539
+    Length:          32
+    ReplacementText: 'texObj.read(x, y - 4.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          69574
+    Length:          32
+    ReplacementText: 'texObj.read(x, y + 4.0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          69611
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          70010
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          70185
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:46: The total declared local variable size in device function LaplaceMultiMem exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          70294
+    Length:          0
+    ReplacementText: ",\n                     const sycl::nd_item<3> &item_ct1,\n                     float const *d_LaplaceKernel, float *buff"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          70300
+    Length:          63
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          70381
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          70411
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          70457
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          70495
+    Length:          38
+    ReplacementText: 'sycl::max(sycl::min(xp - LAPLACE_R, width - 1), 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          70747
+    Length:          43
+    ReplacementText: 'sycl::max(0, sycl::min(yp + i - LAPLACE_R, height - 1))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          70944
+    Length:          47
+    ReplacementText: 'const_cast<float *>(d_LaplaceKernel + octave * 12 * 16 + scale * 16)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          71308
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          79297
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          79386
+    Length:          0
+    ReplacementText: ",\n             const sycl::nd_item<3> &item_ct1, float const *d_LowPassKernel,\n             float *buffer"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          79392
+    Length:          65
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          79475
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          79505
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          79535
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          79581
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          79628
+    Length:          15
+    ReplacementText: 'const_cast<float *>(d_LowPassKernel)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          79671
+    Length:          30
+    ReplacementText: 'sycl::max(sycl::min(xp - 4, width - 1), 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          79835
+    Length:          10
+    ReplacementText: 'sycl::min(yp, h)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          79890
+    Length:          22
+    ReplacementText: 'sycl::max(0, sycl::min(yp - 1, h))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          79929
+    Length:          14
+    ReplacementText: 'sycl::min(yp + 1, h)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          79989
+    Length:          22
+    ReplacementText: 'sycl::max(0, sycl::min(yp - 2, h))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80028
+    Length:          14
+    ReplacementText: 'sycl::min(yp + 2, h)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80088
+    Length:          22
+    ReplacementText: 'sycl::max(0, sycl::min(yp - 3, h))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80127
+    Length:          14
+    ReplacementText: 'sycl::min(yp + 3, h)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80187
+    Length:          22
+    ReplacementText: 'sycl::max(0, sycl::min(yp - 4, h))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80226
+    Length:          14
+    ReplacementText: 'sycl::min(yp + 4, h)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80254
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80623
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80720
+    Length:          0
+    ReplacementText: ",\n                     const sycl::nd_item<3> &item_ct1,\n                     float const *d_LowPassKernel,\n                     sycl::local_accessor<float, 2> xrows"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80726
+    Length:          31
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80775
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80805
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80835
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80881
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80943
+    Length:          15
+    ReplacementText: 'const_cast<float *>(d_LowPassKernel)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          80971
+    Length:          30
+    ReplacementText: 'sycl::max(sycl::min(xp - 4, width - 1), 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          81094
+    Length:          35
+    ReplacementText: 'sycl::max(sycl::min(yp + l + 4, height - 1), 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          81234
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          81298
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          81318
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          81383
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          81403
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          81468
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          81488
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          81553
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82187
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:47: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82193
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82217
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82311
+    Length:          0
+    ReplacementText: ",\n                  const sycl::nd_item<3> &item_ct1, float const *d_LowPassKernel,\n                  sycl::local_accessor<float, 2> xrows"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82317
+    Length:          31
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82366
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82396
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82426
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82472
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82534
+    Length:          15
+    ReplacementText: 'const_cast<float *>(d_LowPassKernel)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82562
+    Length:          30
+    ReplacementText: 'sycl::max(sycl::min(xp - 4, width - 1), 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82681
+    Length:          35
+    ReplacementText: 'sycl::max(sycl::min(yp + l + 4, height - 1), 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82821
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82859
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82879
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82918
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82938
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82977
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          82997
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          83036
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          83081
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          83192
+    Length:          27
+    ReplacementText: 'sycl::min(yp + l + 4, height - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          83296
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          83334
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          83354
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          83393
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          83413
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          83452
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          83472
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          83511
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          84080
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:48: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Offset:          84084
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          1344
+    Length:          0
+    ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          1573
+    Length:          23
+    ReplacementText: '#include "cudaSiftD.dp.cpp"'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          1653
+    Length:          29
+    ReplacementText: 'DPCT_CHECK_ERROR(nDevices = dpct::dev_mgr::instance().device_count())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          1848
+    Length:          14
+    ReplacementText: 'dpct::device_info'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          1880
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_device_info'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          1904
+    Length:          5
+    ReplacementText: prop
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          1911
+    Length:          6
+    ReplacementText: 'dpct::dev_mgr::instance().get_device(devNum)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          1918
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          1999
+    Length:          4
+    ReplacementText: 'get_name()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          2055
+    Length:          15
+    ReplacementText: 'get_memory_clock_rate()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          2122
+    Length:          9
+    ReplacementText: 'get_max_clock_frequency()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          2190
+    Length:          14
+    ReplacementText: 'get_memory_bus_width()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          2280
+    Length:          15
+    ReplacementText: 'get_memory_clock_rate()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          2304
+    Length:          14
+    ReplacementText: 'get_memory_bus_width()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          2967
+    Length:          96
+    ReplacementText: 'DPCT_CHECK_ERROR(memoryTmp = (float *)dpct::dpct_malloc(pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float)))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          3077
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          3370
+    Length:          19
+    ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(memoryTmp, dpct::get_in_order_queue())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          3389
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          3712
+    Length:          66
+    ReplacementText: 'DPCT_CHECK_ERROR(*((void **)&d_PointCounterAddr) = d_PointCounter.get_ptr()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          3778
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          3792
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memset'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          3852
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          3866
+    Length:          18
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          3885
+    Length:          14
+    ReplacementText: 'd_MaxNumPoints.get_ptr()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          3931
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          3945
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          4750
+    Length:          96
+    ReplacementText: 'DPCT_CHECK_ERROR(memoryTmp = (float *)dpct::dpct_malloc(pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float)))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          4862
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          5418
+    Length:          18
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          5437
+    Length:          15
+    ReplacementText: 'd_LaplaceKernel.get_ptr()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          5490
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          5506
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          6037
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          6114
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          6139
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          6155
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          6637
+    Length:          21
+    ReplacementText: 'dpct::max(initBlur, 0.001f)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          6853
+    Length:          18
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          6872
+    Length:          15
+    ReplacementText: 'd_LaplaceKernel.get_ptr()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          6925
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          6941
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          7420
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          7497
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          7522
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          7538
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          7923
+    Length:          19
+    ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(memoryTmp, dpct::get_in_order_queue())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          7942
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          8071
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          8151
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          8176
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          8192
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          10586
+    Length:          37
+    ReplacementText: 'DPCT_CHECK_ERROR(data.d_data = (SiftPoint *)sycl::malloc_device(sz, dpct::get_in_order_queue()))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          10639
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          10922
+    Length:          21
+    ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(data.d_data, dpct::get_in_order_queue())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          10943
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          11242
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          11305
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          11330
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          11346
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          13143
+    Length:          18
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          13162
+    Length:          17
+    ReplacementText: 'd_ScaleDownKernel.get_ptr()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          13209
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          13225
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          13717
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          13729
+    Length:          63
+    ReplacementText: '1, iDivUp(src.height, SCALEDOWN_H), iDivUp(src.width, SCALEDOWN_W)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          13797
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          13810
+    Length:          15
+    ReplacementText: '1, 1, SCALEDOWN_W + 4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          13913
+    Length:          99
+    ReplacementText: "dpct::get_in_order_queue().submit(\n    [&](sycl::handler &cgh) {\n      d_ScaleDownKernel.init();\n\n      auto d_ScaleDownKernel_ptr_ct1 = d_ScaleDownKernel.get_ptr();\n\n      /*\n      DPCT1101:214: 'SCALEDOWN_W + 4' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n      */\n      sycl::local_accessor<float, 1> inrow_acc_ct1(sycl::range<1>(68/*SCALEDOWN_W + 4*/), cgh);\n      /*\n      DPCT1101:215: '5 * (SCALEDOWN_W / 2)' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n      */\n      sycl::local_accessor<float, 1> brow_acc_ct1(sycl::range<1>(160/*5 * (SCALEDOWN_W / 2)*/), cgh);\n      /*\n      DPCT1101:216: 'SCALEDOWN_H + 4' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n      */\n      sycl::local_accessor<int, 1> yRead_acc_ct1(sycl::range<1>(20/*SCALEDOWN_H + 4*/), cgh);\n      /*\n      DPCT1101:217: 'SCALEDOWN_H + 4' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n      */\n      sycl::local_accessor<int, 1> yWrite_acc_ct1(sycl::range<1>(20/*SCALEDOWN_H + 4*/), cgh);\n\n      float * res_d_data_ct0 = res.d_data;\n      float * src_d_data_ct1 = src.d_data;\n      int src_width_ct2 = src.width;\n      int src_pitch_ct3 = src.pitch;\n      int src_height_ct4 = src.height;\n      int res_pitch_ct5 = res.pitch;\n\n      cgh.parallel_for(\n        sycl::nd_range<3>(blocks * threads, threads), \n        [=](sycl::nd_item<3> item_ct1) {\n          ScaleDown(res_d_data_ct0, src_d_data_ct1, src_width_ct2, src_pitch_ct3, src_height_ct4, res_pitch_ct5, item_ct1, d_ScaleDownKernel_ptr_ct1, inrow_acc_ct1.get_pointer(), brow_acc_ct1.get_pointer(), yRead_acc_ct1.get_pointer(), yWrite_acc_ct1.get_pointer());\n        });\n    });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          14012
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          14025
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          14602
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          14614
+    Length:          59
+    ReplacementText: '1, iDivUp(res.height, SCALEUP_H), iDivUp(res.width, SCALEUP_W)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          14678
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          14691
+    Length:          28
+    ReplacementText: '1, SCALEUP_H / 2, SCALEUP_W / 2'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          14807
+    Length:          97
+    ReplacementText: "dpct::get_in_order_queue().submit(\n    [&](sycl::handler &cgh) {\n      float * res_d_data_ct0 = res.d_data;\n      float * src_d_data_ct1 = src.d_data;\n      int src_width_ct2 = src.width;\n      int src_pitch_ct3 = src.pitch;\n      int src_height_ct4 = src.height;\n      int res_pitch_ct5 = res.pitch;\n\n      cgh.parallel_for(\n        sycl::nd_range<3>(blocks * threads, threads), \n        [=](sycl::nd_item<3> item_ct1) {\n          ScaleUp(res_d_data_ct0, src_d_data_ct1, src_width_ct2, src_pitch_ct3, src_height_ct4, res_pitch_ct5, item_ct1);\n        });\n    });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          14904
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          14917
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          15400
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          15412
+    Length:          3
+    ReplacementText: 1, 1, 512
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          15420
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          15433
+    Length:          3
+    ReplacementText: 1, 1, 256
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          15524
+    Length:          119
+    ReplacementText: "dpct::get_in_order_queue().submit(\n    [&](sycl::handler &cgh) {\n      d_MaxNumPoints.init();\n      d_PointCounter.init();\n\n      auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr();\n      auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr();\n\n      /*\n      DPCT1101:218: 'WID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n      */\n      /*\n      DPCT1101:219: 'WID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n      */\n      sycl::local_accessor<float, 2> img_acc_ct1(sycl::range<2>(19/*WID*/, 19/*WID*/), cgh);\n      /*\n      DPCT1101:220: 'WID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n      */\n      /*\n      DPCT1101:221: 'WID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n      */\n      sycl::local_accessor<float, 2> tmp_acc_ct1(sycl::range<2>(19/*WID*/, 19/*WID*/), cgh);\n      /*\n      DPCT1101:222: '2 * LEN' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n      */\n      sycl::local_accessor<float, 1> hist_acc_ct1(sycl::range<1>(64/*2 * LEN*/), cgh);\n      /*\n      DPCT1101:223: 'WID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n      */\n      sycl::local_accessor<float, 1> gaussx_acc_ct1(sycl::range<1>(19/*WID*/), cgh);\n      /*\n      DPCT1101:224: 'WID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n      */\n      sycl::local_accessor<float, 1> gaussy_acc_ct1(sycl::range<1>(19/*WID*/), cgh);\n\n      float * src_d_data_ct0 = src.d_data;\n      int src_width_ct1 = src.width;\n      int src_pitch_ct2 = src.pitch;\n      int src_height_ct3 = src.height;\n\n      cgh.parallel_for(\n        sycl::nd_range<3>(blocks * threads, threads), \n        [=](sycl::nd_item<3> item_ct1) {\n          ComputeOrientationsCONSTNew(src_d_data_ct0, src_width_ct1, src_pitch_ct2, src_height_ct3, siftData.d_data, octave, item_ct1, *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1, img_acc_ct1, tmp_acc_ct1, hist_acc_ct1.get_pointer(), gaussx_acc_ct1.get_pointer(), gaussy_acc_ct1.get_pointer());\n        });\n    });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          15643
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          15656
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          16193
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          16205
+    Length:          3
+    ReplacementText: 1, 1, 512
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          16213
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          16226
+    Length:          5
+    ReplacementText: 1, 8, 16
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          16319
+    Length:          104
+    ReplacementText: "dpct::get_in_order_queue().submit(\n    [&](sycl::handler &cgh) {\n      d_MaxNumPoints.init();\n      d_PointCounter.init();\n\n      auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr();\n      auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr();\n\n      sycl::local_accessor<float, 1> gauss_acc_ct1(sycl::range<1>(16), cgh);\n      sycl::local_accessor<float, 1> buffer_acc_ct1(sycl::range<1>(128), cgh);\n      sycl::local_accessor<float, 1> sums_acc_ct1(sycl::range<1>(4), cgh);\n\n      cgh.parallel_for(\n        sycl::nd_range<3>(blocks * threads, threads), \n        [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] {\n          ExtractSiftDescriptorsCONSTNew(texObj, pitch, siftData.d_data, subsampling, octave, item_ct1, *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1, gauss_acc_ct1.get_pointer(), buffer_acc_ct1.get_pointer(), sums_acc_ct1.get_pointer());\n        });\n    });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          16423
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          16436
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          16938
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          16950
+    Length:          27
+    ReplacementText: '1, 1, iDivUp(siftData.numPts, 64)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          16982
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          16995
+    Length:          2
+    ReplacementText: 1, 1, 64
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          17085
+    Length:          78
+    ReplacementText: "dpct::get_in_order_queue().parallel_for(\n    sycl::nd_range<3>(blocks * threads, threads), \n    [=](sycl::nd_item<3> item_ct1) {\n      RescalePositions(siftData.d_data, siftData.numPts, scale, item_ct1);\n    });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          17163
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          17176
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          18195
+    Length:          18
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          18214
+    Length:          15
+    ReplacementText: 'd_LowPassKernel.get_ptr()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          18275
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          18291
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          18599
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          18611
+    Length:          51
+    ReplacementText: '1, iDivUp(height, LOWPASS_H), iDivUp(width, LOWPASS_W)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          18680
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          18693
+    Length:          28
+    ReplacementText: '1, 4, LOWPASS_W + 2 * LOWPASS_R'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          18820
+    Length:          82
+    ReplacementText: "dpct::get_in_order_queue().submit(\n    [&](sycl::handler &cgh) {\n      d_LowPassKernel.init();\n\n      auto d_LowPassKernel_ptr_ct1 = d_LowPassKernel.get_ptr();\n\n      sycl::local_accessor<float, 2> xrows_acc_ct1(sycl::range<2>(16, 32), cgh);\n\n      float * src_d_data_ct0 = src.d_data;\n      float * res_d_data_ct1 = res.d_data;\n\n      cgh.parallel_for(\n        sycl::nd_range<3>(blocks * threads, threads), \n        [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] {\n          LowPassBlockOld(src_d_data_ct0, res_d_data_ct1, width, pitch, height, item_ct1, d_LowPassKernel_ptr_ct1, xrows_acc_ct1);\n        });\n    });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          18902
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          18915
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          20407
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          20420
+    Length:          25
+    ReplacementText: '1, 1, LAPLACE_W + 2 * LAPLACE_R'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          20464
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          20476
+    Length:          32
+    ReplacementText: '1, height, iDivUp(width, LAPLACE_W)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          20603
+    Length:          103
+    ReplacementText: "dpct::get_in_order_queue().submit(\n    [&](sycl::handler &cgh) {\n      d_LaplaceKernel.init();\n\n      auto d_LaplaceKernel_ptr_ct1 = d_LaplaceKernel.get_ptr();\n\n      /*\n      DPCT1101:226: '(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n      */\n      sycl::local_accessor<float, 1> buff_acc_ct1(sycl::range<1>(1088/*(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S*/), cgh);\n\n      float * baseImage_d_data_ct0 = baseImage.d_data;\n      float * results_d_data_ct1 = results[0].d_data;\n\n      cgh.parallel_for(\n        sycl::nd_range<3>(blocks * threads, threads), \n        [=](sycl::nd_item<3> item_ct1) {\n          LaplaceMultiMem(baseImage_d_data_ct0, results_d_data_ct1, width, pitch, height, octave, item_ct1, d_LaplaceKernel_ptr_ct1, buff_acc_ct1.get_pointer());\n        });\n    });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          20706
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          20719
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          21515
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          21527
+    Length:          53
+    ReplacementText: '1, iDivUp(h, MINMAX_H), iDivUp(w, MINMAX_W) * NUM_SCALES'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          21585
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          21598
+    Length:          12
+    ReplacementText: '1, 1, MINMAX_W + 2'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          21698
+    Length:          185
+    ReplacementText: "dpct::get_in_order_queue().submit(\n    [&](sycl::handler &cgh) {\n      d_MaxNumPoints.init();\n      d_PointCounter.init();\n\n      auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr();\n      auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr();\n\n      /*\n      DPCT1101:227: '2 * MEMWID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n      */\n      sycl::local_accessor<unsigned short, 1> points_acc_ct1(sycl::range<1>(64/*2 * MEMWID*/), cgh);\n\n      float * sources_d_data_ct0 = sources->d_data;\n\n      cgh.parallel_for(\n        sycl::nd_range<3>(blocks * threads, threads), \n        [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] {\n          FindPointsMultiNew(sources_d_data_ct0, siftData.d_data, w, p, h, subsampling, lowestScale, thresh, factor, edgeLimit, octave, item_ct1, *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1, points_acc_ct1.get_pointer());\n        });\n    });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          21883
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Offset:          21896
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp'
+    Offset:          1403
+    Length:          0
+    ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp'
+    Offset:          1459
+    Length:          18
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp'
+    Offset:          1477
+    Length:          26
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp'
+    Offset:          3410
+    Length:          0
+    ReplacementText: "  /*\n  DPCT1093:83: The \"0\" device may be not the one intended for use. Adjust the selected device if needed.\n  */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp'
+    Offset:          3412
+    Length:          13
+    ReplacementText: 'dpct::select_device'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          1158
+    Length:          0
+    ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          1311
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          1420
+    Length:          0
+    ReplacementText: ",\n                     const sycl::nd_item<3> &item_ct1, float *siftPoint,\n                     float *sums"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          1426
+    Length:          32
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          1461
+    Length:          31
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          1510
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          1540
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          1570
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          1599
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          1771
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:146: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          1936
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:147: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          1997
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:148: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2058
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:149: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2201
+    Length:          9
+    ReplacementText: 'item_ct1.get_group_range(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2218
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2253
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:150: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2273
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2383
+    Length:          0
+    ReplacementText: ",\n                      const sycl::nd_item<3> &item_ct1, float *siftPoints1,\n                      float *siftPoints2"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2389
+    Length:          39
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2431
+    Length:          39
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2488
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2518
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2559
+    Length:          38
+    ReplacementText: 'dpct::min(numPts1 - 1, (unsigned int)(item_ct1.get_group(2) * 16 + ty))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2633
+    Length:          38
+    ReplacementText: 'dpct::min(numPts2 - 1, (unsigned int)(item_ct1.get_group(1) * 16 + ty))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2841
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:151: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2875
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          2914
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          3210
+    Length:          9
+    ReplacementText: 'item_ct1.get_group_range(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          3266
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          3387
+    Length:          0
+    ReplacementText: ",\n                 const sycl::nd_item<3> &item_ct1, float *maxScore,\n                 float *maxScor2, int *maxIndex"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          3393
+    Length:          35
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          3431
+    Length:          35
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          3469
+    Length:          33
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          3520
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          3550
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          3606
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          3624
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          3754
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:152: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          4085
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:153: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          4553
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:49: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          4557
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:154: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          4923
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          5029
+    Length:          0
+    ReplacementText: ",\n                  const sycl::nd_item<3> &item_ct1, int *maxIndex"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          5051
+    Length:          10
+    ReplacementText: 'item_ct1.get_local_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          5100
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          5130
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          5160
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          5224
+    Length:          33
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          5281
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:155: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          5867
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:156: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          6387
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:50: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          6393
+    Length:          15
+    ReplacementText: "/*\n      DPCT1065:157: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n      */\n      item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          6742
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          6831
+    Length:          0
+    ReplacementText: ",\n                  const sycl::nd_item<3> &item_ct1, float *siftPoint,\n                  float *maxScore, float *maxScor2, int *maxIndex"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          6837
+    Length:          32
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          6872
+    Length:          33
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          6908
+    Length:          33
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          6944
+    Length:          31
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          6993
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          7055
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          7085
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          7239
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:158: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          7371
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:159: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          7650
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          7889
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:160: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          8368
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:51: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          8372
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:161: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          8666
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          8755
+    Length:          0
+    ReplacementText: ",\n                  const sycl::nd_item<3> &item_ct1, float *siftPoint,\n                  float *maxScore, float *maxScor2, int *maxIndex"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          8761
+    Length:          40
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          8804
+    Length:          33
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          8840
+    Length:          33
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          8876
+    Length:          31
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          8925
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          8955
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          9082
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          9224
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:162: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          9506
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          9745
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:163: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          10030
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          10390
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          10448
+    Length:          0
+    ReplacementText: ', const sycl::nd_item<3> &item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          10469
+    Length:          47
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 64 + item_ct1.get_local_id(2)), numPts1 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          10623
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:52: The total declared local variable size in device function FindMaxCorr10 exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          10713
+    Length:          0
+    ReplacementText: ",\n                   const sycl::nd_item<3> &item_ct1, sycl::float4 *buffer1,\n                   sycl::float4 *buffer2"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          10719
+    Length:          42
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          10764
+    Length:          42
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          10818
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          10842
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          10873
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          10946
+    Length:          25
+    ReplacementText: 'sycl::min(bp1 + j, numPts1 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          11073
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          11499
+    Length:          25
+    ReplacementText: 'sycl::min(bp2 + j, numPts2 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          11611
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          11647
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:53: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          11651
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:165: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          11906
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          12125
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          12260
+    Length:          7
+    ReplacementText: 'v1[i].x()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          12270
+    Length:          4
+    ReplacementText: 'v2.x()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          12304
+    Length:          7
+    ReplacementText: 'v1[i].y()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          12314
+    Length:          4
+    ReplacementText: 'v2.y()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          12348
+    Length:          7
+    ReplacementText: 'v1[i].z()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          12358
+    Length:          4
+    ReplacementText: 'v2.z()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          12392
+    Length:          7
+    ReplacementText: 'v1[i].w()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          12402
+    Length:          4
+    ReplacementText: 'v2.w()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          12693
+    Length:          37
+    ReplacementText: 'sycl::min(bp2 + M7R * iy + dy, numPts2 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          12857
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:54: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          12861
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:166: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          13292
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:164: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          13597
+    Length:          25
+    ReplacementText: 'sycl::max(max_score, sec_score)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          14313
+    Length:          33
+    ReplacementText: 'dpct::global_memory<volatile int, 0> lock(0);'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          14348
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:55: The total declared local variable size in device function FindMaxCorr9 exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          14437
+    Length:          0
+    ReplacementText: ",\n                  const sycl::nd_item<3> &item_ct1, volatile int &lock,\n                  sycl::float4 *siftParts1, sycl::float4 *siftParts2"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          14443
+    Length:          46
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          14509
+    Length:          46
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          14656
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          14686
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          14737
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          14810
+    Length:          43
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), numPts1 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          14867
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          14991
+    Length:          33
+    ReplacementText: 'sycl::min(FMC_GH, numPts2 - FMC_BH + 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          15093
+    Length:          47
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + idx), numPts2 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          15156
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          15536
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:56: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          15540
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:169: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          15983
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16180
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16339
+    Length:          8
+    ReplacementText: 'v1[ix].x()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16350
+    Length:          4
+    ReplacementText: 'v2.x()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16394
+    Length:          8
+    ReplacementText: 'v1[ix].y()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16405
+    Length:          4
+    ReplacementText: 'v2.y()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16449
+    Length:          8
+    ReplacementText: 'v1[ix].z()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16460
+    Length:          4
+    ReplacementText: 'v2.z()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16504
+    Length:          8
+    ReplacementText: 'v1[ix].w()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16515
+    Length:          4
+    ReplacementText: 'v2.w()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16551
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:60: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16557
+    Length:          15
+    ReplacementText: "/*\n      DPCT1065:173: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n      */\n      item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16655
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16842
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          16995
+    Length:          8
+    ReplacementText: 'v1[ix].x()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17006
+    Length:          4
+    ReplacementText: 'v2.x()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17048
+    Length:          8
+    ReplacementText: 'v1[ix].y()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17059
+    Length:          4
+    ReplacementText: 'v2.y()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17101
+    Length:          8
+    ReplacementText: 'v1[ix].z()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17112
+    Length:          4
+    ReplacementText: 'v2.z()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17154
+    Length:          8
+    ReplacementText: 'v1[ix].w()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17165
+    Length:          4
+    ReplacementText: 'v2.w()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17195
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:57: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17199
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:170: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17433
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:58: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17437
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:171: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17692
+    Length:          45
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + j), numPts2 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17822
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:59: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17826
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:172: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17864
+    Length:          43
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), numPts1 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17936
+    Length:          29
+    ReplacementText: 'dpct::atomic_compare_exchange_strong<sycl::access::address_space::generic_space>((int *)&lock, 0, 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          17982
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:167: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          18155
+    Length:          30
+    ReplacementText: 'sycl::max(sift1[p1].score, maxScor2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          18531
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:168: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          18568
+    Length:          27
+    ReplacementText: 'dpct::atomic_exchange<sycl::access::address_space::generic_space>((int *)&lock, 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          18600
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:61: The total declared local variable size in device function FindMaxCorr8 exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          18689
+    Length:          0
+    ReplacementText: ",\n                  const sycl::nd_item<3> &item_ct1, volatile int &lock,\n                  sycl::float4 *siftParts1, sycl::float4 *siftParts2,\n                  float *blksums"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          18695
+    Length:          46
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          18761
+    Length:          46
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          18827
+    Length:          42
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          18908
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          18938
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          18989
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          19062
+    Length:          43
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), numPts1 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          19119
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          19243
+    Length:          33
+    ReplacementText: 'sycl::min(FMC_GH, numPts2 - FMC_BH + 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          19345
+    Length:          47
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + idx), numPts2 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          19408
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          19826
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:64: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          19832
+    Length:          15
+    ReplacementText: "/*\n      DPCT1065:178: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n      */\n      item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          19905
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20096
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20249
+    Length:          8
+    ReplacementText: 'v1[ix].x()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20260
+    Length:          4
+    ReplacementText: 'v2.x()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20304
+    Length:          8
+    ReplacementText: 'v1[ix].y()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20315
+    Length:          4
+    ReplacementText: 'v2.y()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20359
+    Length:          8
+    ReplacementText: 'v1[ix].z()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20370
+    Length:          4
+    ReplacementText: 'v2.z()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20414
+    Length:          8
+    ReplacementText: 'v1[ix].w()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20425
+    Length:          4
+    ReplacementText: 'v2.w()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20461
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:65: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20467
+    Length:          15
+    ReplacementText: "/*\n      DPCT1065:179: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n      */\n      item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20708
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:62: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20712
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:176: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          20967
+    Length:          45
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + j), numPts2 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          21097
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:63: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          21101
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:177: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          21139
+    Length:          43
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), numPts1 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          21211
+    Length:          29
+    ReplacementText: 'dpct::atomic_compare_exchange_strong<sycl::access::address_space::generic_space>((int *)&lock, 0, 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          21257
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:174: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          21430
+    Length:          30
+    ReplacementText: 'sycl::max(sift1[p1].score, maxScor2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          21806
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:175: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          21843
+    Length:          27
+    ReplacementText: 'dpct::atomic_exchange<sycl::access::address_space::generic_space>((int *)&lock, 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          21875
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:66: The total declared local variable size in device function FindMaxCorr7 exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          21964
+    Length:          0
+    ReplacementText: ",\n                  const sycl::nd_item<3> &item_ct1, volatile int &lock,\n                  float *siftParts1, float *siftParts2"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          21970
+    Length:          37
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22033
+    Length:          37
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22112
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22128
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22151
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22167
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22205
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22235
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22266
+    Length:          38
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), numPts1 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22314
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22330
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22489
+    Length:          48
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + ty), numPts2 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22549
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22565
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22709
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:67: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22713
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:182: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22860
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:70: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22866
+    Length:          15
+    ReplacementText: "/*\n      DPCT1065:185: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n      */\n      item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          22990
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23101
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23174
+    Length:          5
+    ReplacementText: 'p1v.x()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23182
+    Length:          5
+    ReplacementText: 'p2v.x()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23211
+    Length:          5
+    ReplacementText: 'p1v.y()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23219
+    Length:          5
+    ReplacementText: 'p2v.y()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23248
+    Length:          5
+    ReplacementText: 'p1v.z()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23256
+    Length:          5
+    ReplacementText: 'p2v.z()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23285
+    Length:          5
+    ReplacementText: 'p1v.w()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23293
+    Length:          5
+    ReplacementText: 'p2v.w()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23330
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:71: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23336
+    Length:          15
+    ReplacementText: "/*\n      DPCT1065:186: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n      */\n      item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23506
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:68: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23510
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:183: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23748
+    Length:          47
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + j), numPts2 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23880
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:69: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23884
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:184: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23922
+    Length:          38
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), numPts1 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          23999
+    Length:          29
+    ReplacementText: 'dpct::atomic_compare_exchange_strong<sycl::access::address_space::generic_space>((int *)&lock, 0, 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          24045
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:180: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          24213
+    Length:          30
+    ReplacementText: 'sycl::max(sift1[p1].score, maxScor2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          24589
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:181: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          24636
+    Length:          27
+    ReplacementText: 'dpct::atomic_exchange<sycl::access::address_space::generic_space>((int *)&lock, 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          24668
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          24757
+    Length:          0
+    ReplacementText: ",\n                  const sycl::nd_item<3> &item_ct1, volatile int &lock,\n                  float *siftParts2, float *sums"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          24827
+    Length:          38
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          24907
+    Length:          31
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          24956
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          24986
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25017
+    Length:          38
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), numPts1 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25092
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25124
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25277
+    Length:          43
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * 512 + k + ty), numPts2 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25378
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25433
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25454
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:72: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25458
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:189: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25520
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25552
+    Length:          6
+    ReplacementText: 'sycl::float4'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25606
+    Length:          7
+    ReplacementText: 'part1.x()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25616
+    Length:          7
+    ReplacementText: 'part2.x()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25626
+    Length:          7
+    ReplacementText: 'part1.y()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25636
+    Length:          7
+    ReplacementText: 'part2.y()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25646
+    Length:          7
+    ReplacementText: 'part1.z()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25656
+    Length:          7
+    ReplacementText: 'part2.z()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25666
+    Length:          7
+    ReplacementText: 'part1.w()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25676
+    Length:          7
+    ReplacementText: 'part2.w()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25715
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25747
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25779
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25811
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25843
+    Length:          0
+    ReplacementText: ', item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25904
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:73: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          25908
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:190: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          26157
+    Length:          42
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * 512 + k + j), numPts2 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          26284
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:74: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          26288
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:191: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          26346
+    Length:          29
+    ReplacementText: 'dpct::atomic_compare_exchange_strong<sycl::access::address_space::generic_space>((int *)&lock, 0, 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          26392
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:187: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          26458
+    Length:          38
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), numPts1 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          26630
+    Length:          30
+    ReplacementText: 'sycl::max(sift1[p1].score, maxScor2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          27006
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:188: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          27053
+    Length:          27
+    ReplacementText: 'dpct::atomic_exchange<sycl::access::address_space::generic_space>((int *)&lock, 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          27085
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          27174
+    Length:          0
+    ReplacementText: ",\n                  const sycl::nd_item<3> &item_ct1, volatile int &lock,\n                  float *siftParts1, float *siftParts2"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          27180
+    Length:          37
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          27243
+    Length:          37
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          27337
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          27367
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          27398
+    Length:          38
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), numPts1 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          27610
+    Length:          48
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + ty), numPts2 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          27887
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:77: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          27893
+    Length:          15
+    ReplacementText: "/*\n      DPCT1065:196: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n      */\n      item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          28011
+    Length:          0
+    ReplacementText: "      /*\n      DPCT1118:78: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n      */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          28017
+    Length:          15
+    ReplacementText: "/*\n      DPCT1065:197: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n      */\n      item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          28100
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:75: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          28104
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:194: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          28342
+    Length:          47
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + j), numPts2 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          28474
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:76: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          28478
+    Length:          15
+    ReplacementText: "/*\n    DPCT1065:195: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n    */\n    item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          28516
+    Length:          38
+    ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), numPts1 - 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          28593
+    Length:          29
+    ReplacementText: 'dpct::atomic_compare_exchange_strong<sycl::access::address_space::generic_space>((int *)&lock, 0, 1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          28639
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:192: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          28807
+    Length:          30
+    ReplacementText: 'sycl::max(sift1[p1].score, maxScor2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          29183
+    Length:          15
+    ReplacementText: "/*\n  DPCT1065:193: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n  */\n  item_ct1.barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          29230
+    Length:          27
+    ReplacementText: 'dpct::atomic_exchange<sycl::access::address_space::generic_space>((int *)&lock, 0)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          29282
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:79: The total declared local variable size in device function InvertMatrix exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          29651
+    Length:          16
+    ReplacementText: 'sycl::fabs(elem[i][j])'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          30547
+    Length:          9
+    ReplacementText: 'sycl::fabs(sum)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          31887
+    Length:          11
+    ReplacementText: "/*\nDPCT1110:80: The total declared local variable size in device function ComputeHomographies exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          32010
+    Length:          0
+    ReplacementText: ",\n                                    const sycl::nd_item<3> &item_ct1"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          32072
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          32101
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          32132
+    Length:          10
+    ReplacementText: 'item_ct1.get_local_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          32177
+    Length:          10
+    ReplacementText: 'item_ct1.get_local_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          32190
+    Length:          9
+    ReplacementText: 'item_ct1.get_group_range(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          32859
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          33036
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          33195
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          33332
+    Length:          0
+    ReplacementText: ",\n                                 const sycl::nd_item<3> &item_ct1, float *homo,\n                                 int *cnts"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          33338
+    Length:          42
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          33383
+    Length:          53
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          33454
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(2)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          33484
+    Length:          11
+    ReplacementText: 'item_ct1.get_local_id(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          33515
+    Length:          10
+    ReplacementText: 'item_ct1.get_group(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          33528
+    Length:          10
+    ReplacementText: 'item_ct1.get_local_range(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          33568
+    Length:          10
+    ReplacementText: 'item_ct1.get_local_range(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          33581
+    Length:          9
+    ReplacementText: 'item_ct1.get_group_range(1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          33684
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34005
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1013:198: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34022
+    Length:          19
+    ReplacementText: 'a[0] * x1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34044
+    Length:          19
+    ReplacementText: 'a[1] * y1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34072
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1013:199: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34089
+    Length:          19
+    ReplacementText: 'a[3] * x1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34111
+    Length:          19
+    ReplacementText: 'a[4] * y1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34139
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1013:200: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34156
+    Length:          19
+    ReplacementText: 'a[6] * x1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34178
+    Length:          19
+    ReplacementText: 'a[7] * y1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34206
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1013:201: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34223
+    Length:          19
+    ReplacementText: 'x2 * deno'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34251
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1013:202: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34268
+    Length:          19
+    ReplacementText: 'y2 * deno'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34296
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1013:203: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34313
+    Length:          21
+    ReplacementText: 'errx * errx'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34337
+    Length:          21
+    ReplacementText: 'erry * erry'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34360
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1013:204: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34375
+    Length:          41
+    ReplacementText: 'thresh2 * deno * deno'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34494
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34643
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1118:81: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34647
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          34755
+    Length:          15
+    ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          35669
+    Length:          59
+    ReplacementText: 'DPCT_CHECK_ERROR(d_coord = (float *)sycl::malloc_device(4 * sizeof(float) * numPtsUp, dpct::get_in_order_queue()))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          35742
+    Length:          41
+    ReplacementText: 'DPCT_CHECK_ERROR(d_randPts = (int *)sycl::malloc_device(randSize, dpct::get_in_order_queue()))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          35797
+    Length:          58
+    ReplacementText: 'DPCT_CHECK_ERROR(d_homo = (float *)sycl::malloc_device(8 * sizeof(float) * numLoops, dpct::get_in_order_queue()))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          36492
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          36541
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          36566
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          36580
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          36633
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          36658
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          38437
+    Length:          44
+    ReplacementText: 'DPCT_CHECK_ERROR(temp3 = (float *)sycl::malloc_device(szPt * numPtsUp, dpct::get_in_order_queue()))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          38497
+    Length:          44
+    ReplacementText: 'DPCT_CHECK_ERROR(temp4 = (float *)sycl::malloc_device(szPt * numPtsUp, dpct::get_in_order_queue()))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          38557
+    Length:          44
+    ReplacementText: 'DPCT_CHECK_ERROR(temp5 = (float *)sycl::malloc_device(szPt * numPtsUp, dpct::get_in_order_queue()))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          38617
+    Length:          44
+    ReplacementText: 'DPCT_CHECK_ERROR(temp6 = (float *)sycl::malloc_device(szPt * numPtsUp, dpct::get_in_order_queue()))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          38949
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          38990
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39015
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39031
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39070
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39118
+    Length:          26
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39145
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39161
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39209
+    Length:          26
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39236
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39252
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39306
+    Length:          26
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39333
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39349
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39403
+    Length:          26
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39430
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39496
+    Length:          80
+    ReplacementText: "dpct::get_in_order_queue().submit(\n      [&](sycl::handler &cgh) {\n        float * d_coord_numPtsUp_ct1 = &d_coord[0 * numPtsUp];\n\n        cgh.parallel_for(\n          sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), \n          [=](sycl::nd_item<3> item_ct1) {\n            memcopyKernel(temp3, d_coord_numPtsUp_ct1, szPt, szFl, numPts, szFl);\n          });\n      });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39576
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39578
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1010:205: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39591
+    Length:          18
+    ReplacementText: '0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39625
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39655
+    Length:          80
+    ReplacementText: "dpct::get_in_order_queue().submit(\n      [&](sycl::handler &cgh) {\n        float * d_coord_numPtsUp_ct1 = &d_coord[1 * numPtsUp];\n\n        cgh.parallel_for(\n          sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), \n          [=](sycl::nd_item<3> item_ct1) {\n            memcopyKernel(temp4, d_coord_numPtsUp_ct1, szPt, szFl, numPts, szFl);\n          });\n      });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39735
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39737
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1010:206: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39750
+    Length:          18
+    ReplacementText: '0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39784
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39814
+    Length:          80
+    ReplacementText: "dpct::get_in_order_queue().submit(\n      [&](sycl::handler &cgh) {\n        float * d_coord_numPtsUp_ct1 = &d_coord[2 * numPtsUp];\n\n        cgh.parallel_for(\n          sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), \n          [=](sycl::nd_item<3> item_ct1) {\n            memcopyKernel(temp5, d_coord_numPtsUp_ct1, szPt, szFl, numPts, szFl);\n          });\n      });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39894
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39896
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1010:207: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39909
+    Length:          18
+    ReplacementText: '0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39943
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          39973
+    Length:          80
+    ReplacementText: "dpct::get_in_order_queue().submit(\n      [&](sycl::handler &cgh) {\n        float * d_coord_numPtsUp_ct1 = &d_coord[3 * numPtsUp];\n\n        cgh.parallel_for(\n          sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), \n          [=](sycl::nd_item<3> item_ct1) {\n            memcopyKernel(temp6, d_coord_numPtsUp_ct1, szPt, szFl, numPts, szFl);\n          });\n      });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          40053
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          40055
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1010:208: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          40068
+    Length:          18
+    ReplacementText: '0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          40102
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          40405
+    Length:          80
+    ReplacementText: "dpct::get_in_order_queue().parallel_for(\n      sycl::nd_range<3>(sycl::range<3>(1, 1, numLoops / 16) * sycl::range<3>(1, 1, 16), sycl::range<3>(1, 1, 16)), \n      [=](sycl::nd_item<3> item_ct1) {\n        ComputeHomographies(d_coord, d_randPts, d_homo, numPtsUp, item_ct1);\n      });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          40485
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          40500
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          40920
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          40932
+    Length:          28
+    ReplacementText: '1, numLoops / TESTHOMO_LOOPS, 1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          40967
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          40980
+    Length:          30
+    ReplacementText: 1, TESTHOMO_LOOPS, TESTHOMO_TESTS
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          41104
+    Length:          92
+    ReplacementText: "dpct::get_in_order_queue().submit(\n      [&](sycl::handler &cgh) {\n        /*\n        DPCT1101:210: '8 * TESTHOMO_LOOPS' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n        */\n        sycl::local_accessor<float, 1> homo_acc_ct1(sycl::range<1>(128/*8 * TESTHOMO_LOOPS*/), cgh);\n        /*\n        DPCT1101:211: 'TESTHOMO_TESTS * TESTHOMO_LOOPS' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n        */\n        sycl::local_accessor<int, 1> cnts_acc_ct1(sycl::range<1>(256/*TESTHOMO_TESTS * TESTHOMO_LOOPS*/), cgh);\n\n        cgh.parallel_for(\n          sycl::nd_range<3>(blocks * threads, threads), \n          [=](sycl::nd_item<3> item_ct1) {\n            TestHomographies(d_coord, d_homo, d_randPts, numPtsUp, thresh * thresh, item_ct1, homo_acc_ct1.get_pointer(), cnts_acc_ct1.get_pointer());\n          });\n      });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          41196
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          41211
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          41720
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          41775
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          41800
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          41816
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42339
+    Length:          12
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::dpct_memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42424
+    Length:          22
+    ReplacementText: 'dpct::device_to_host'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42447
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42463
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42688
+    Length:          15
+    ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(temp3, dpct::get_in_order_queue())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42703
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42719
+    Length:          15
+    ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(temp4, dpct::get_in_order_queue())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42734
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42750
+    Length:          15
+    ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(temp5, dpct::get_in_order_queue())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42765
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42781
+    Length:          15
+    ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(temp6, dpct::get_in_order_queue())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42796
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42884
+    Length:          16
+    ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_homo, dpct::get_in_order_queue())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42900
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42914
+    Length:          19
+    ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_randPts, dpct::get_in_order_queue())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42933
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42947
+    Length:          17
+    ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_coord, dpct::get_in_order_queue())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          42964
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          45682
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          45698
+    Length:          41
+    ReplacementText: '1, iDivUp(numPts2, 512), iDivUp(numPts1, 16)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          45744
+    Length:          4
+    ReplacementText: 'sycl::range<3>'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          45761
+    Length:          6
+    ReplacementText: 1, 16, 16
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          45856
+    Length:          57
+    ReplacementText: "dpct::get_in_order_queue().parallel_for(\n    sycl::nd_range<3>(sycl::range<3>(1, 1, iDivUp(numPts1, 64)) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)), \n    [=](sycl::nd_item<3> item_ct1) {\n      CleanMatches(sift1, numPts1, item_ct1);\n    });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          45913
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          45915
+    Length:          0
+    ReplacementText: "  /*\n  DPCT1010:209: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n  */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          45926
+    Length:          18
+    ReplacementText: '0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          45958
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          47433
+    Length:          26
+    ReplacementText: 'sycl::range<3>(1, 1, iDivUp(numPts1, M7W))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          47479
+    Length:          20
+    ReplacementText: 'sycl::range<3>(1, M7H / M7R, M7W)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          47587
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1049:82: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          47591
+    Length:          74
+    ReplacementText: "dpct::get_in_order_queue().submit(\n      [&](sycl::handler &cgh) {\n        /*\n        DPCT1101:212: 'M7W * NDIM / 4' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n        */\n        sycl::local_accessor<sycl::float4, 1> buffer1_acc_ct1(sycl::range<1>(1024/*M7W * NDIM / 4*/), cgh);\n        /*\n        DPCT1101:213: 'M7H * NDIM / 4' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n        */\n        sycl::local_accessor<sycl::float4, 1> buffer2_acc_ct1(sycl::range<1>(1024/*M7H * NDIM / 4*/), cgh);\n\n        cgh.parallel_for(\n          sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3), \n          [=](sycl::nd_item<3> item_ct1) {\n            FindMaxCorr10(sift1, sift2, numPts1, numPts2, item_ct1, buffer1_acc_ct1.get_pointer(), buffer2_acc_ct1.get_pointer());\n          });\n      });"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: true
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          47665
+    Length:          1
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          47680
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          48409
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          48466
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          48491
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Offset:          48507
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+MainSourceFilesDigest:
+  - MainSourceFile:  '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu'
+    Digest:          59bd519d549bdc5cf38176784f41e490
+  - MainSourceFile:  '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu'
+    Digest:          504c8e0303973e4f03d7dc8e37014b37
+  - MainSourceFile:  '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu'
+    Digest:          bcafa76012a74a51ca2537c0be6c3ff9
+  - MainSourceFile:  '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp'
+    Digest:          7ac60955dd21b882e666af92155acd17
+  - MainSourceFile:  '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu'
+    Digest:          521f5b6ac7d703c12bf5041028124e6a
+DpctVersion:     18.0.0
+MainHelperFileName: ''
+USMLevel:        ''
+FeatureMap:      {}
+CompileTargets:
+  cudasift:
+    - MigratedFileName: '/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp'
+      CompileOptions:  '-isystem /usr/include/opencv4 -O3 -std=gnu++17 '
+      Compiler:        'c++'
+    - MigratedFileName: './geomFuncs.cpp'
+      CompileOptions:  '-isystem /usr/include/opencv4 -O3 -std=gnu++17 '
+      Compiler:        'c++'
+    - MigratedFileName: './mainSift.cpp.dp.cpp'
+      CompileOptions:  '-isystem /usr/include/opencv4 -O3 -std=c++17 -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) '
+      Compiler:        'c++'
+    - MigratedFileName: './cudaImage.dp.cpp'
+      CompileOptions:  '-O3 -DNVCC '
+      Compiler:        nvcc
+    - MigratedFileName: './cudaSiftH.dp.cpp'
+      CompileOptions:  '-O3 -DNVCC '
+      Compiler:        nvcc
+    - MigratedFileName: './matching.dp.cpp'
+      CompileOptions:  '-O3 -DNVCC '
+      Compiler:        nvcc
+OptionMap:
+  AnalysisScopePath:
+    Value:           '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA'
+    Specified:       false
+  AsyncHandler:
+    Value:           'false'
+    Specified:       false
+  CommentsEnabled:
+    Value:           'false'
+    Specified:       false
+  CompilationsDir:
+    Value:           '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build'
+    Specified:       true
+  CtadEnabled:
+    Value:           'false'
+    Specified:       false
+  EnablepProfiling:
+    Value:           'false'
+    Specified:       false
+  ExperimentalFlag:
+    Value:           '0'
+    Specified:       false
+  ExplicitClNamespace:
+    Value:           'false'
+    Specified:       false
+  ExplicitNamespace:
+    Value:           '20'
+    Specified:       false
+  ExtensionDDFlag:
+    Value:           '0'
+    Specified:       false
+  ExtensionDEFlag:
+    Value:           '4294967295'
+    Specified:       false
+  HelperFuncPreferenceFlag:
+    Value:           '0'
+    Specified:       false
+  NDRangeDim:
+    Value:           '3'
+    Specified:       false
+  NoDRYPattern:
+    Value:           'false'
+    Specified:       false
+  NoUseGenericSpace:
+    Value:           ''
+    Specified:       true
+  OptimizeMigration:
+    Value:           'false'
+    Specified:       false
+  ProcessAll:
+    Value:           'false'
+    Specified:       false
+  RuleFile:
+    Value:           ''
+    Specified:       false
+  SyclNamedLambda:
+    Value:           'false'
+    Specified:       false
+  UsmLevel:
+    Value:           '1'
+    Specified:       false
+...
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Makefile.dpct b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Makefile.dpct
new file mode 100644
index 000000000..f0724e067
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Makefile.dpct
@@ -0,0 +1,71 @@
+CC := icpx
+
+LD := $(CC)
+
+#DPCT2001:228: You can link with more library by add them here.
+LIB :=  -lopencv_core -lopencv_imgcodecs
+
+FLAGS := 
+
+ifeq ($(shell which $(CC)),)
+    $(error ERROR - $(CC) compiler not found)
+endif
+
+ROOT_DIR     := $(shell dirname $(shell which $(CC)))
+INCLUDE_SYCL := $(ROOT_DIR)/../include
+INCLUDE_CL   := $(ROOT_DIR)/../include/sycl
+
+TARGET_0_SRC_0 = /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp
+TARGET_0_OBJ_0 = /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.o
+TARGET_0_FLAG_0 = -isystem /usr/include/opencv4 -O3 -std=gnu++17 ${FLAGS}
+
+TARGET_0_SRC_1 = ./geomFuncs.cpp
+TARGET_0_OBJ_1 = ./geomFuncs.o
+TARGET_0_FLAG_1 = -isystem /usr/include/opencv4 -O3 -std=gnu++17 ${FLAGS}
+
+TARGET_0_SRC_2 = ./mainSift.cpp.dp.cpp
+TARGET_0_OBJ_2 = ./mainSift.cpp.dp.o
+TARGET_0_FLAG_2 = -isystem /usr/include/opencv4 -O3 -std=c++17 -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS}
+
+TARGET_0_SRC_3 = ./cudaImage.dp.cpp
+TARGET_0_OBJ_3 = ./cudaImage.dp.o
+TARGET_0_FLAG_3 = -O3 -DNVCC ${FLAGS}
+
+TARGET_0_SRC_4 = ./cudaSiftH.dp.cpp
+TARGET_0_OBJ_4 = ./cudaSiftH.dp.o
+TARGET_0_FLAG_4 = -O3 -DNVCC ${FLAGS}
+
+TARGET_0_SRC_5 = ./matching.dp.cpp
+TARGET_0_OBJ_5 = ./matching.dp.o
+TARGET_0_FLAG_5 = -O3 -DNVCC ${FLAGS}
+
+TARGET_0 := cudasift
+
+TARGET :=  ${TARGET_0}
+
+.PHONY:all clean
+OBJS_0 :=  ${TARGET_0_OBJ_0} ${TARGET_0_OBJ_1} ${TARGET_0_OBJ_2} ${TARGET_0_OBJ_3} ${TARGET_0_OBJ_4} ${TARGET_0_OBJ_5}
+all: $(TARGET)
+$(TARGET_0): $(OBJS_0)
+	$(CC) -fsycl -o $@ $^ $(LIB) 
+
+$(TARGET_0_OBJ_0):$(TARGET_0_SRC_0)
+	c++ -c ${TARGET_0_SRC_0} -o ${TARGET_0_OBJ_0} $(TARGET_0_FLAG_0)
+
+$(TARGET_0_OBJ_1):$(TARGET_0_SRC_1)
+	c++ -c ${TARGET_0_SRC_1} -o ${TARGET_0_OBJ_1} $(TARGET_0_FLAG_1)
+
+$(TARGET_0_OBJ_2):$(TARGET_0_SRC_2)
+	c++ -c ${TARGET_0_SRC_2} -o ${TARGET_0_OBJ_2} $(TARGET_0_FLAG_2)
+
+$(TARGET_0_OBJ_3):$(TARGET_0_SRC_3)
+	$(CC) -fsycl -c ${TARGET_0_SRC_3} -o ${TARGET_0_OBJ_3} $(TARGET_0_FLAG_3)
+
+$(TARGET_0_OBJ_4):$(TARGET_0_SRC_4)
+	$(CC) -fsycl -c ${TARGET_0_SRC_4} -o ${TARGET_0_OBJ_4} $(TARGET_0_FLAG_4)
+
+$(TARGET_0_OBJ_5):$(TARGET_0_SRC_5)
+	$(CC) -fsycl -c ${TARGET_0_SRC_5} -o ${TARGET_0_OBJ_5} $(TARGET_0_FLAG_5)
+
+clean:
+	rm -f  ${OBJS_0} $(TARGET)
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.cpp
new file mode 100644
index 000000000..6c230dd44
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.cpp
@@ -0,0 +1,83 @@
+// Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <iostream>
+
+#include "Utility.h"
+
+using namespace Utility;
+
+int Utility::RunDataVerification(const int threshold, const float matchPercentage)
+{
+    printf("Performing data verification \n");
+    switch (threshold)
+    {
+    case 1:
+        if (matchPercentage > 20.0f && matchPercentage < 30.0f)
+        {
+            printf("Data verification is SUCCESSFUL. \n\n");
+        }
+        else
+        {
+            printf("Data verification FAILED. \n\n");
+            return -1;
+        }
+        break;
+    case 2:
+        if (matchPercentage > 26.0f && matchPercentage < 38.0f)
+        {
+            printf("Data verification is SUCCESSFUL. \n\n");
+        }
+        else
+        {
+            printf("Data verification FAILED. \n\n");
+            return -1;
+        }
+        break;
+    case 3:
+        if (matchPercentage > 35.0f && matchPercentage < 45.0f)
+        {
+            printf("Data verification is SUCCESSFUL. \n\n");
+        }
+        else
+        {
+            printf("Data verification FAILED. \n\n");
+            return -1;
+        }
+        break;
+    case 4:
+        if (matchPercentage > 40.0f && matchPercentage < 50.0f)
+        {
+            printf("Data verification is SUCCESSFUL. \n\n");
+        }
+        else
+        {
+            printf("Data verification FAILED. \n\n");
+            return -1;
+        }
+        break;
+    default:
+        printf("Threshold values should be in the range [1, 4]. \n\n");
+        return -1;
+    }
+    return 0;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.h
new file mode 100644
index 000000000..da09d2d78
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.h
@@ -0,0 +1,31 @@
+// Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#ifndef UTILITY_H
+#define UTILITY_H
+
+namespace Utility
+{
+    int RunDataVerification(const int thresh, const float matchPercentage);
+
+}
+#endif // UTILITY_H
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.o
new file mode 100644
index 000000000..220855fcd
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.dp.cpp
new file mode 100644
index 000000000..e799acdb9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.dp.cpp
@@ -0,0 +1,116 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <sycl/sycl.hpp>
+#include <dpct/dpct.hpp>
+#include <cstdio>
+#include <chrono>
+
+#include "cudautils.h"
+#include "cudaImage.h"
+
+int iDivUp(int a, int b) { return (a % b != 0) ? (a / b + 1) : (a / b); }
+int iDivDown(int a, int b) { return a / b; }
+int iAlignUp(int a, int b) { return (a % b != 0) ? (a - a % b + b) : a; }
+int iAlignDown(int a, int b) { return a - a % b; }
+
+void CudaImage::Allocate(int w, int h, int p, bool host, float &totTime, float *devmem, float *hostmem)
+{
+  width = w;
+  height = h;
+  pitch = p;
+  d_data = devmem;
+  h_data = hostmem;
+  t_data = NULL;
+  if (devmem == NULL)
+  {
+#ifdef DEVICE_TIMER
+    auto start_malloc = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(
+        d_data = (float *)dpct::dpct_malloc(*(size_t *)&pitch,
+                                            (size_t)(sizeof(float) * width),
+                                            (size_t)height)));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_malloc = std::chrono::steady_clock::now();
+    std::cout << "Allocate Time is " << std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count() << " us" << std::endl;
+    totTime += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+    pitch /= sizeof(float);
+    if (d_data == NULL)
+      printf("Failed to allocate device data\n");
+    d_internalAlloc = true;
+  }
+  if (host && hostmem == NULL)
+  {
+    h_data = (float *)malloc(sizeof(float) * pitch * height);
+    h_internalAlloc = true;
+  }
+}
+
+CudaImage::CudaImage() : width(0), height(0), pitch(0), d_data(NULL), h_data(NULL), t_data(NULL), d_internalAlloc(false), h_internalAlloc(false)
+{
+}
+
+CudaImage::~CudaImage()
+{
+  if (d_internalAlloc && d_data != NULL)
+    safeCall(DPCT_CHECK_ERROR(sycl::free(d_data, dpct::get_in_order_queue())));
+  d_data = NULL;
+  if (h_internalAlloc && h_data != NULL)
+    free(h_data);
+  h_data = NULL;
+  if (t_data != NULL)
+    safeCall(DPCT_CHECK_ERROR(delete (dpct::image_matrix *)t_data));
+  t_data = NULL;
+}
+
+double CudaImage::Download(float &totTime)
+{
+  double downloadTime = 0.0;
+  int p = sizeof(float) * pitch;
+  if (d_data != NULL && h_data != NULL)
+  {
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(dpct::dpct_memcpy(
+        d_data, p, h_data, sizeof(float) * width, sizeof(float) * width, height,
+        dpct::host_to_device)));
+    // safeCall(cudaMemcpy(d_data, h_data, sizeof(float) * width * height, cudaMemcpyHostToDevice));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+    downloadTime = std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+    std::cout << "Download Time is " << downloadTime << " us" << std::endl;
+#endif
+  }
+  return downloadTime;
+}
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.dp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.dp.o
new file mode 100644
index 000000000..c9edccaa2
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.h
new file mode 100644
index 000000000..737446686
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.h
@@ -0,0 +1,38 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+#ifndef CUDAIMAGE_H
+#define CUDAIMAGE_H
+
+class CudaImage
+{
+public:
+  int width, height;
+  int pitch;
+  float *h_data;
+  float *d_data;
+  float *t_data;
+  bool d_internalAlloc;
+  bool h_internalAlloc;
+
+public:
+  CudaImage();
+  CudaImage(const CudaImage&) = delete;
+  CudaImage& operator=(const CudaImage&) = delete;
+  ~CudaImage();
+  void Allocate(int width, int height, int pitch, bool withHost, float &totTime, float *devMem = NULL, float *hostMem = NULL);
+  double Download(float &totTime);
+  double Readback();
+  double InitTexture();
+  double CopyToTexture(CudaImage &dst, bool host);
+};
+
+int iDivUp(int a, int b);
+int iDivDown(int a, int b);
+int iAlignUp(int a, int b);
+int iAlignDown(int a, int b);
+void StartTimer(unsigned int *hTimer);
+double StopTimer(unsigned int hTimer);
+
+#endif // CUDAIMAGE_H
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h
new file mode 100644
index 000000000..00903f8f6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h
@@ -0,0 +1,48 @@
+#ifndef CUDASIFT_H
+#define CUDASIFT_H
+
+#include "cudaImage.h"
+
+typedef struct dpct_type_558722
+{
+  float xpos;
+  float ypos;
+  float scale;
+  float sharpness;
+  float edgeness;
+  float orientation;
+  float score;
+  float ambiguity;
+  int match;
+  float match_xpos;
+  float match_ypos;
+  float match_error;
+  float subsampling;
+  float empty[3];
+  float data[128];
+} SiftPoint;
+
+typedef struct dpct_type_948814
+{
+  int numPts; // Number of available Sift points
+  int maxPts; // Number of allocated Sift points
+#ifdef MANAGEDMEM
+  SiftPoint *m_data; // Managed data
+#else
+  SiftPoint *h_data; // Host (CPU) data
+  SiftPoint *d_data; // Device (GPU) data
+#endif
+} SiftData;
+
+void InitCuda(int devNum = 0);
+float *AllocSiftTempMemory(int width, int height, int numOctaves, float &totTime, bool scaleUp = false);
+void FreeSiftTempMemory(float *memoryTmp);
+void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh,
+                 float &totTime, float lowestScale = 0.0f, bool scaleUp = false, float *tempMemory = 0);
+void InitSiftData(SiftData &data, float &totTime, int num = 1024, bool host = false, bool dev = true);
+void FreeSiftData(SiftData &data);
+void PrintSiftData(SiftData &data);
+double MatchSiftData(SiftData &data1, SiftData &data2, float &matchTime);
+double FindHomography(SiftData &data, float *homography, int *numMatches, float &matchTime, int numLoops = 1000, float minScore = 0.85f, float maxAmbiguity = 0.95f, float thresh = 5.0f);
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h.yaml b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h.yaml
new file mode 100644
index 000000000..961853067
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h.yaml
@@ -0,0 +1,91 @@
+---
+MainSourceFile:  '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h'
+Replacements:
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSift.h'
+    Offset:          77
+    Length:          0
+    ReplacementText: ' dpct_type_558722'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSift.h'
+    Offset:          375
+    Length:          0
+    ReplacementText: ' dpct_type_948814'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+MainSourceFilesDigest:
+  - MainSourceFile:  '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSift.h'
+    Digest:          3cca4f7dd3623244964a8145ffe4cdbe
+DpctVersion:     18.0.0
+MainHelperFileName: ''
+USMLevel:        ''
+FeatureMap:      {}
+CompileTargets:  {}
+OptionMap:
+  AnalysisScopePath:
+    Value:           '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA'
+    Specified:       false
+  AsyncHandler:
+    Value:           'false'
+    Specified:       false
+  CommentsEnabled:
+    Value:           'false'
+    Specified:       false
+  CompilationsDir:
+    Value:           '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build'
+    Specified:       true
+  CtadEnabled:
+    Value:           'false'
+    Specified:       false
+  EnablepProfiling:
+    Value:           'false'
+    Specified:       false
+  ExperimentalFlag:
+    Value:           '0'
+    Specified:       false
+  ExplicitClNamespace:
+    Value:           'false'
+    Specified:       false
+  ExplicitNamespace:
+    Value:           '20'
+    Specified:       false
+  ExtensionDDFlag:
+    Value:           '0'
+    Specified:       false
+  ExtensionDEFlag:
+    Value:           '4294967295'
+    Specified:       false
+  HelperFuncPreferenceFlag:
+    Value:           '0'
+    Specified:       false
+  NDRangeDim:
+    Value:           '3'
+    Specified:       false
+  NoDRYPattern:
+    Value:           'false'
+    Specified:       false
+  NoUseGenericSpace:
+    Value:           ''
+    Specified:       true
+  OptimizeMigration:
+    Value:           'false'
+    Specified:       false
+  ProcessAll:
+    Value:           'false'
+    Specified:       false
+  RuleFile:
+    Value:           ''
+    Specified:       false
+  SyclNamedLambda:
+    Value:           'false'
+    Specified:       false
+  UsmLevel:
+    Value:           '1'
+    Specified:       false
+...
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftD.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftD.dp.cpp
new file mode 100644
index 000000000..9ff5263e9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftD.dp.cpp
@@ -0,0 +1,2888 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <sycl/sycl.hpp>
+#include <dpct/dpct.hpp>
+#include "cudautils.h"
+#include "cudaSiftD.h"
+#include "cudaSift.h"
+
+///////////////////////////////////////////////////////////////////////////////
+// Kernel configuration
+///////////////////////////////////////////////////////////////////////////////
+
+static dpct::constant_memory<int, 0> d_MaxNumPoints;
+dpct::global_memory<unsigned int, 1> d_PointCounter(8 * 2 + 1);
+static dpct::constant_memory<float, 1> d_ScaleDownKernel(5);
+static dpct::constant_memory<float, 1> d_LowPassKernel(2 * LOWPASS_R + 1);
+static dpct::constant_memory<float, 1> d_LaplaceKernel(8 * 12 * 16);
+
+///////////////////////////////////////////////////////////////////////////////
+// Lowpass filter and subsample image
+///////////////////////////////////////////////////////////////////////////////
+void ScaleDownDenseShift(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch,
+                         const sycl::nd_item<3> &item_ct1,
+                         float const *d_ScaleDownKernel, float *brows)
+{
+#define BW (SCALEDOWN_W + 4)
+#define BH (SCALEDOWN_H + 4)
+#define W2 (SCALEDOWN_W / 2)
+#define H2 (SCALEDOWN_H / 2)
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int xp = item_ct1.get_group(2) * SCALEDOWN_W + tx;
+  const int yp = item_ct1.get_group(1) * SCALEDOWN_H + ty;
+  const float k0 = d_ScaleDownKernel[0];
+  const float k1 = d_ScaleDownKernel[1];
+  const float k2 = d_ScaleDownKernel[2];
+  const int xl = sycl::min(width - 1, sycl::max(0, xp - 2));
+  const int yl = sycl::min(height - 1, sycl::max(0, yp - 2));
+  if (xp < (width + 4) && yp < (height + 4))
+  {
+    float v = d_Data[yl * pitch + xl];
+    brows[BW * ty + tx] =
+        k0 * (v + ShiftDown(v, 4, item_ct1)) +
+        k1 * (ShiftDown(v, 1, item_ct1) + ShiftDown(v, 3, item_ct1)) +
+        k2 * ShiftDown(v, 2, item_ct1);
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  const int xs = item_ct1.get_group(2) * W2 + tx;
+  const int ys = item_ct1.get_group(1) * H2 + ty;
+  if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2))
+  {
+    float *ptr = &brows[BW * (ty * 2) + (tx * 2)];
+    d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * BW]) + k1 * (ptr[1 * BW] + ptr[3 * BW]) + k2 * ptr[2 * BW];
+  }
+}
+
+void ScaleDownDense(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch,
+                    const sycl::nd_item<3> &item_ct1,
+                    float const *d_ScaleDownKernel, float *irows, float *brows)
+{
+#define BW (SCALEDOWN_W + 4)
+#define BH (SCALEDOWN_H + 4)
+#define W2 (SCALEDOWN_W / 2)
+#define H2 (SCALEDOWN_H / 2)
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int xp = item_ct1.get_group(2) * SCALEDOWN_W + tx;
+  const int yp = item_ct1.get_group(1) * SCALEDOWN_H + ty;
+  const int xl = sycl::min(width - 1, sycl::max(0, xp - 2));
+  const int yl = sycl::min(height - 1, sycl::max(0, yp - 2));
+  const float k0 = d_ScaleDownKernel[0];
+  const float k1 = d_ScaleDownKernel[1];
+  const float k2 = d_ScaleDownKernel[2];
+  if (xp < (width + 4) && yp < (height + 4))
+    irows[BW * ty + tx] = d_Data[yl * pitch + xl];
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  if (yp < (height + 4) && tx < W2)
+  {
+    float *ptr = &irows[BW * ty + 2 * tx];
+    brows[W2 * ty + tx] = k0 * (ptr[0] + ptr[4]) + k1 * (ptr[1] + ptr[3]) + k2 * ptr[2];
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  const int xs = item_ct1.get_group(2) * W2 + tx;
+  const int ys = item_ct1.get_group(1) * H2 + ty;
+  if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2))
+  {
+    float *ptr = &brows[W2 * (ty * 2) + tx];
+    d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * W2]) + k1 * (ptr[1 * W2] + ptr[3 * W2]) + k2 * ptr[2 * W2];
+  }
+}
+
+void ScaleDown(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch,
+               const sycl::nd_item<3> &item_ct1, float const *d_ScaleDownKernel,
+               float *inrow, float *brow, int *yRead, int *yWrite)
+{
+
+#define dx2 (SCALEDOWN_W / 2)
+  const int tx = item_ct1.get_local_id(2);
+  const int tx0 = tx + 0 * dx2;
+  const int tx1 = tx + 1 * dx2;
+  const int tx2 = tx + 2 * dx2;
+  const int tx3 = tx + 3 * dx2;
+  const int tx4 = tx + 4 * dx2;
+  const int xStart = item_ct1.get_group(2) * SCALEDOWN_W;
+  const int yStart = item_ct1.get_group(1) * SCALEDOWN_H;
+  const int xWrite = xStart / 2 + tx;
+  float k0 = d_ScaleDownKernel[0];
+  float k1 = d_ScaleDownKernel[1];
+  float k2 = d_ScaleDownKernel[2];
+  if (tx < SCALEDOWN_H + 4)
+  {
+    int y = yStart + tx - 2;
+    y = (y < 0 ? 0 : y);
+    y = (y >= height ? height - 1 : y);
+    yRead[tx] = y * pitch;
+    yWrite[tx] = (yStart + tx - 4) / 2 * newpitch;
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  int xRead = xStart + tx - 2;
+  xRead = (xRead < 0 ? 0 : xRead);
+  xRead = (xRead >= width ? width - 1 : xRead);
+
+  int maxtx = sycl::min(dx2, width / 2 - xStart / 2);
+  for (int dy = 0; dy < SCALEDOWN_H + 4; dy += 5)
+  {
+    {
+      inrow[tx] = d_Data[yRead[dy + 0] + xRead];
+      /*
+      DPCT1118:3: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      if (tx < maxtx)
+      {
+        brow[tx4] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 4 && !(dy & 1))
+          d_Result[yWrite[dy + 0] + xWrite] = k2 * brow[tx2] + k0 * (brow[tx0] + brow[tx4]) + k1 * (brow[tx1] + brow[tx3]);
+      }
+      /*
+      DPCT1118:4: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+    }
+    if (dy < (SCALEDOWN_H + 3))
+    {
+      inrow[tx] = d_Data[yRead[dy + 1] + xRead];
+      /*
+      DPCT1118:5: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      if (tx < maxtx)
+      {
+        brow[tx0] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 3 && (dy & 1))
+          d_Result[yWrite[dy + 1] + xWrite] = k2 * brow[tx3] + k0 * (brow[tx1] + brow[tx0]) + k1 * (brow[tx2] + brow[tx4]);
+      }
+      /*
+      DPCT1118:6: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+    }
+    if (dy < (SCALEDOWN_H + 2))
+    {
+      inrow[tx] = d_Data[yRead[dy + 2] + xRead];
+      /*
+      DPCT1118:7: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      if (tx < maxtx)
+      {
+        brow[tx1] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 2 && !(dy & 1))
+          d_Result[yWrite[dy + 2] + xWrite] = k2 * brow[tx4] + k0 * (brow[tx2] + brow[tx1]) + k1 * (brow[tx3] + brow[tx0]);
+      }
+      /*
+      DPCT1118:8: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+    }
+    if (dy < (SCALEDOWN_H + 1))
+    {
+      inrow[tx] = d_Data[yRead[dy + 3] + xRead];
+      /*
+      DPCT1118:9: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      if (tx < maxtx)
+      {
+        brow[tx2] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 1 && (dy & 1))
+          d_Result[yWrite[dy + 3] + xWrite] = k2 * brow[tx0] + k0 * (brow[tx3] + brow[tx2]) + k1 * (brow[tx4] + brow[tx1]);
+      }
+      /*
+      DPCT1118:10: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+    }
+    if (dy < SCALEDOWN_H)
+    {
+      inrow[tx] = d_Data[yRead[dy + 4] + xRead];
+      /*
+      DPCT1118:11: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      if (tx < dx2 && xWrite < width / 2)
+      {
+        brow[tx3] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (!(dy & 1))
+          d_Result[yWrite[dy + 4] + xWrite] = k2 * brow[tx1] + k0 * (brow[tx4] + brow[tx3]) + k1 * (brow[tx0] + brow[tx2]);
+      }
+      /*
+      DPCT1118:12: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+    }
+  }
+}
+
+void ScaleUp(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch,
+             const sycl::nd_item<3> &item_ct1)
+{
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  int x = item_ct1.get_group(2) * SCALEUP_W + 2 * tx;
+  int y = item_ct1.get_group(1) * SCALEUP_H + 2 * ty;
+  if (x < 2 * width && y < 2 * height)
+  {
+    int xl = item_ct1.get_group(2) * (SCALEUP_W / 2) + tx;
+    int yu = item_ct1.get_group(1) * (SCALEUP_H / 2) + ty;
+    int xr = sycl::min(xl + 1, width - 1);
+    int yd = sycl::min(yu + 1, height - 1);
+    float vul = d_Data[yu * pitch + xl];
+    float vur = d_Data[yu * pitch + xr];
+    float vdl = d_Data[yd * pitch + xl];
+    float vdr = d_Data[yd * pitch + xr];
+    d_Result[(y + 0) * newpitch + x + 0] = vul;
+    d_Result[(y + 0) * newpitch + x + 1] = 0.50f * (vul + vur);
+    d_Result[(y + 1) * newpitch + x + 0] = 0.50f * (vul + vdl);
+    d_Result[(y + 1) * newpitch + x + 1] = 0.25f * (vul + vur + vdl + vdr);
+  }
+}
+
+/*
+DPCT1110:13: The total declared local variable size in device function
+ExtractSiftDescriptors exceeds 128 bytes and may cause high register pressure.
+Consult with your hardware vendor to find the total register size available and
+adjust the code, or use smaller sub-group size to avoid high register pressure.
+*/
+void ExtractSiftDescriptors(dpct::image_accessor_ext<float, 2> texObj,
+                            SiftPoint *d_sift, int fstPts, float subsampling,
+                            const sycl::nd_item<3> &item_ct1, float *gauss,
+                            float *buffer, float *sums)
+{
+
+  const int tx = item_ct1.get_local_id(2); // 0 -> 16
+  const int ty = item_ct1.get_local_id(1); // 0 -> 8
+  const int idx = ty * 16 + tx;
+  const int bx = item_ct1.get_group(2) + fstPts; // 0 -> numPts
+  if (ty == 0)
+    gauss[tx] = sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+  buffer[idx] = 0.0f;
+  /*
+  DPCT1065:92: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+
+  // Compute angles and gradients
+  float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+  float sina = sycl::sin(theta); // cosa -sina
+  float cosa = sycl::cos(theta); // sina  cosa
+  float scale = 12.0f / 16.0f * d_sift[bx].scale;
+  float ssina = scale * sina;
+  float scosa = scale * cosa;
+
+  for (int y = ty; y < 16; y += 8)
+  {
+    float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+    float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+    float dx = texObj.read(xpos + cosa, ypos + sina) -
+               texObj.read(xpos - cosa, ypos - sina);
+    float dy = texObj.read(xpos - sina, ypos + cosa) -
+               texObj.read(xpos + sina, ypos - cosa);
+    float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy);
+    float angf = 4.0f / 3.1415f * sycl::atan2(dy, dx) + 4.0f;
+
+    int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+    float horf = (tx - 1.5f) / 4.0f - hori;
+    float ihorf = 1.0f - horf;
+    int veri = (y + 2) / 4 - 1;
+    float verf = (y - 1.5f) / 4.0f - veri;
+    float iverf = 1.0f - verf;
+    int angi = angf;
+    int angp = (angi < 7 ? angi + 1 : 0);
+    angf -= angi;
+    float iangf = 1.0f - angf;
+
+    int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+    int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+    int p2 = angp + hist;
+    if (tx >= 2)
+    {
+      float grad1 = ihorf * grad;
+      if (y >= 2)
+      { // Upper left
+        float grad2 = iverf * grad1;
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p1, iangf * grad2);
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p2, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower left
+        float grad2 = verf * grad1;
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p1 + 32, iangf * grad2);
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p2 + 32, angf * grad2);
+      }
+    }
+    if (tx <= 13)
+    {
+      float grad1 = horf * grad;
+      if (y >= 2)
+      { // Upper right
+        float grad2 = iverf * grad1;
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p1 + 8, iangf * grad2);
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p2 + 8, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower right
+        float grad2 = verf * grad1;
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p1 + 40, iangf * grad2);
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p2 + 40, angf * grad2);
+      }
+    }
+  }
+  /*
+  DPCT1065:93: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+
+  // Normalize twice and suppress peaks first time
+  float sum = buffer[idx] * buffer[idx];
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i, item_ct1);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+  /*
+  DPCT1065:94: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+  tsum1 = sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f);
+
+  sum = tsum1 * tsum1;
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i, item_ct1);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+  /*
+  DPCT1065:95: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+
+  float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+  float *desc = d_sift[bx].data;
+  desc[idx] = tsum1 * sycl::rsqrt(tsum2);
+  if (idx == 0)
+  {
+    d_sift[bx].xpos *= subsampling;
+    d_sift[bx].ypos *= subsampling;
+    d_sift[bx].scale *= subsampling;
+  }
+}
+
+float FastAtan2(float y, float x)
+{
+  float absx = sycl::fabs(x);
+  float absy = sycl::fabs(y);
+  /*
+  DPCT1013:96: The rounding mode could not be specified and the generated code
+  may have different accuracy than the original code. Verify the correctness.
+  SYCL math built-in function rounding mode is aligned with OpenCL C 1.2
+  standard.
+  */
+  float a = sycl::min(absx, absy) / sycl::max(absx, absy);
+  float s = a * a;
+  float r = ((-0.0464964749f * s + 0.15931422f) * s - 0.327622764f) * s * a + a;
+  r = (absy > absx ? 1.57079637f - r : r);
+  r = (x < 0 ? 3.14159274f - r : r);
+  r = (y < 0 ? -r : r);
+  return r;
+}
+
+// __global__ void ExtractSiftDescriptorsCONSTNew(cudaTextureObject_t texObj, SiftPoint *d_sift, float subsampling, int octave)
+/*
+DPCT1110:14: The total declared local variable size in device function
+ExtractSiftDescriptorsCONSTNew exceeds 128 bytes and may cause high register
+pressure. Consult with your hardware vendor to find the total register size
+available and adjust the code, or use smaller sub-group size to avoid high
+register pressure.
+*/
+void ExtractSiftDescriptorsCONSTNew(float *texObj, int pitch, SiftPoint *d_sift,
+                                    float subsampling, int octave,
+                                    const sycl::nd_item<3> &item_ct1,
+                                    int d_MaxNumPoints,
+                                    unsigned int *d_PointCounter, float *gauss,
+                                    float *buffer, float *sums)
+{
+
+  const int tx = item_ct1.get_local_id(2); // 0 -> 16
+  const int ty = item_ct1.get_local_id(1); // 0 -> 8
+  const int idx = ty * 16 + tx;
+  if (ty == 0)
+    gauss[tx] = sycl::native::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+
+  int fstPts = dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = dpct::min(d_PointCounter[2 * octave + 1], d_MaxNumPoints);
+  // if (tx==0 && ty==0)
+  //   printf("%d %d %d %d\n", octave, fstPts, min(d_PointCounter[2*octave], d_MaxNumPoints), totPts);
+  for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts;
+       bx += item_ct1.get_group_range(2))
+  {
+
+    buffer[idx] = 0.0f;
+    /*
+    DPCT1118:15: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:97: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+
+    // Compute angles and gradients
+    float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+    float sina = sycl::sin(theta); // cosa -sina
+    float cosa = sycl::cos(theta); // sina  cosa
+    float scale = 12.0f / 16.0f * d_sift[bx].scale;
+    float ssina = scale * sina;
+    float scosa = scale * cosa;
+
+    for (int y = ty; y < 16; y += 8)
+    {
+      float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+      float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+
+      // float dx = tex2D<float>(texObj, xpos + cosa, ypos + sina) -
+      //            tex2D<float>(texObj, xpos - cosa, ypos - sina);
+      // float dy = tex2D<float>(texObj, xpos - sina, ypos + cosa) -
+      //            tex2D<float>(texObj, xpos + sina, ypos - cosa);
+
+      int xi1 = xpos + cosa;
+      int yi1 = ypos + sina;
+
+      int xi2 = xpos - cosa;
+      int yi2 = ypos - sina;
+
+      float dx = *(texObj + yi1 * pitch + xi1) -
+                 *(texObj + yi2 * pitch + xi2);
+
+      xi1 = xpos - sina;
+      yi1 = ypos + cosa;
+
+      xi2 = xpos + sina;
+      yi2 = ypos - cosa;
+
+      float dy = *(texObj + yi1 * pitch + xi1) -
+                 *(texObj + yi2 * pitch + xi2);
+
+      /*
+      DPCT1013:102: The rounding mode could not be specified and the generated
+      code may have different accuracy than the original code. Verify the
+      correctness. SYCL math built-in function rounding mode is aligned with
+      OpenCL C 1.2 standard.
+      */
+      float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy);
+      float angf = 4.0f / 3.1415f * FastAtan2(dy, dx) + 4.0f;
+
+      int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+      float horf = (tx - 1.5f) / 4.0f - hori;
+      float ihorf = 1.0f - horf;
+      int veri = (y + 2) / 4 - 1;
+      float verf = (y - 1.5f) / 4.0f - veri;
+      float iverf = 1.0f - verf;
+      int angi = angf;
+      int angp = (angi < 7 ? angi + 1 : 0);
+      angf -= angi;
+      float iangf = 1.0f - angf;
+
+      int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+      int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+      int p2 = angp + hist;
+      if (tx >= 2)
+      {
+        float grad1 = ihorf * grad;
+        if (y >= 2)
+        { // Upper left
+          float grad2 = iverf * grad1;
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p1, iangf * grad2);
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p2, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower left
+          float grad2 = verf * grad1;
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p1 + 32, iangf * grad2);
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p2 + 32, angf * grad2);
+        }
+      }
+      if (tx <= 13)
+      {
+        float grad1 = horf * grad;
+        if (y >= 2)
+        { // Upper right
+          float grad2 = iverf * grad1;
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p1 + 8, iangf * grad2);
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p2 + 8, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower right
+          float grad2 = verf * grad1;
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p1 + 40, iangf * grad2);
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p2 + 40, angf * grad2);
+        }
+      }
+    }
+    /*
+    DPCT1118:16: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:98: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+
+    // Normalize twice and suppress peaks first time
+    float sum = buffer[idx] * buffer[idx];
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i, item_ct1);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    /*
+    DPCT1118:17: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:99: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+    tsum1 = sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f);
+
+    sum = tsum1 * tsum1;
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i, item_ct1);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    /*
+    DPCT1118:18: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:100: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+
+    float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+    float *desc = d_sift[bx].data;
+    desc[idx] = tsum1 * sycl::rsqrt(tsum2);
+    if (idx == 0)
+    {
+      d_sift[bx].xpos *= subsampling;
+      d_sift[bx].ypos *= subsampling;
+      d_sift[bx].scale *= subsampling;
+    }
+    /*
+    DPCT1118:19: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:101: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+  }
+}
+
+/*
+DPCT1110:20: The total declared local variable size in device function
+ExtractSiftDescriptorsCONST exceeds 128 bytes and may cause high register
+pressure. Consult with your hardware vendor to find the total register size
+available and adjust the code, or use smaller sub-group size to avoid high
+register pressure.
+*/
+void ExtractSiftDescriptorsCONST(dpct::image_accessor_ext<float, 2> texObj,
+                                 SiftPoint *d_sift, float subsampling,
+                                 int octave, const sycl::nd_item<3> &item_ct1,
+                                 int d_MaxNumPoints,
+                                 unsigned int *d_PointCounter, float *gauss,
+                                 float *buffer, float *sums)
+{
+
+  const int tx = item_ct1.get_local_id(2); // 0 -> 16
+  const int ty = item_ct1.get_local_id(1); // 0 -> 8
+  const int idx = ty * 16 + tx;
+  if (ty == 0)
+    gauss[tx] = sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+
+  int fstPts = dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = dpct::min(d_PointCounter[2 * octave + 1], d_MaxNumPoints);
+  // if (tx==0 && ty==0)
+  //   printf("%d %d %d %d\n", octave, fstPts, min(d_PointCounter[2*octave], d_MaxNumPoints), totPts);
+  for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts;
+       bx += item_ct1.get_group_range(2))
+  {
+
+    buffer[idx] = 0.0f;
+    /*
+    DPCT1118:21: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:103: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+
+    // Compute angles and gradients
+    float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+    float sina = sycl::sin(theta); // cosa -sina
+    float cosa = sycl::cos(theta); // sina  cosa
+    float scale = 12.0f / 16.0f * d_sift[bx].scale;
+    float ssina = scale * sina;
+    float scosa = scale * cosa;
+
+    for (int y = ty; y < 16; y += 8)
+    {
+      float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+      float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+      float dx = texObj.read(xpos + cosa, ypos + sina) -
+                 texObj.read(xpos - cosa, ypos - sina);
+      float dy = texObj.read(xpos - sina, ypos + cosa) -
+                 texObj.read(xpos + sina, ypos - cosa);
+      float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy);
+      float angf = 4.0f / 3.1415f * sycl::atan2(dy, dx) + 4.0f;
+
+      int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+      float horf = (tx - 1.5f) / 4.0f - hori;
+      float ihorf = 1.0f - horf;
+      int veri = (y + 2) / 4 - 1;
+      float verf = (y - 1.5f) / 4.0f - veri;
+      float iverf = 1.0f - verf;
+      int angi = angf;
+      int angp = (angi < 7 ? angi + 1 : 0);
+      angf -= angi;
+      float iangf = 1.0f - angf;
+
+      int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+      int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+      int p2 = angp + hist;
+      if (tx >= 2)
+      {
+        float grad1 = ihorf * grad;
+        if (y >= 2)
+        { // Upper left
+          float grad2 = iverf * grad1;
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p1, iangf * grad2);
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p2, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower left
+          float grad2 = verf * grad1;
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p1 + 32, iangf * grad2);
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p2 + 32, angf * grad2);
+        }
+      }
+      if (tx <= 13)
+      {
+        float grad1 = horf * grad;
+        if (y >= 2)
+        { // Upper right
+          float grad2 = iverf * grad1;
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p1 + 8, iangf * grad2);
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p2 + 8, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower right
+          float grad2 = verf * grad1;
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p1 + 40, iangf * grad2);
+          dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+              buffer + p2 + 40, angf * grad2);
+        }
+      }
+    }
+    /*
+    DPCT1118:22: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:104: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+
+    // Normalize twice and suppress peaks first time
+    float sum = buffer[idx] * buffer[idx];
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i, item_ct1);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    /*
+    DPCT1118:23: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:105: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+    tsum1 = sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f);
+
+    sum = tsum1 * tsum1;
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i, item_ct1);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    /*
+    DPCT1118:24: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:106: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+
+    float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+    float *desc = d_sift[bx].data;
+    desc[idx] = tsum1 * sycl::rsqrt(tsum2);
+    if (idx == 0)
+    {
+      d_sift[bx].xpos *= subsampling;
+      d_sift[bx].ypos *= subsampling;
+      d_sift[bx].scale *= subsampling;
+    }
+    /*
+    DPCT1118:25: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:107: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+  }
+}
+
+/*
+DPCT1110:26: The total declared local variable size in device function
+ExtractSiftDescriptorsOld exceeds 128 bytes and may cause high register
+pressure. Consult with your hardware vendor to find the total register size
+available and adjust the code, or use smaller sub-group size to avoid high
+register pressure.
+*/
+void ExtractSiftDescriptorsOld(dpct::image_accessor_ext<float, 2> texObj,
+                               SiftPoint *d_sift, int fstPts, float subsampling,
+                               const sycl::nd_item<3> &item_ct1, float *gauss,
+                               float *buffer, float *sums)
+{
+
+  const int tx = item_ct1.get_local_id(2); // 0 -> 16
+  const int ty = item_ct1.get_local_id(1); // 0 -> 8
+  const int idx = ty * 16 + tx;
+  const int bx = item_ct1.get_group(2) + fstPts; // 0 -> numPts
+  if (ty == 0)
+    gauss[tx] = sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+  buffer[idx] = 0.0f;
+  /*
+  DPCT1065:108: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+
+  // Compute angles and gradients
+  float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+  float sina = sycl::sin(theta); // cosa -sina
+  float cosa = sycl::cos(theta); // sina  cosa
+  float scale = 12.0f / 16.0f * d_sift[bx].scale;
+  float ssina = scale * sina;
+  float scosa = scale * cosa;
+
+  for (int y = ty; y < 16; y += 8)
+  {
+    float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+    float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+    float dx = texObj.read(xpos + cosa, ypos + sina) -
+               texObj.read(xpos - cosa, ypos - sina);
+    float dy = texObj.read(xpos - sina, ypos + cosa) -
+               texObj.read(xpos + sina, ypos - cosa);
+    float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy);
+    float angf = 4.0f / 3.1415f * sycl::atan2(dy, dx) + 4.0f;
+
+    int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+    float horf = (tx - 1.5f) / 4.0f - hori;
+    float ihorf = 1.0f - horf;
+    int veri = (y + 2) / 4 - 1;
+    float verf = (y - 1.5f) / 4.0f - veri;
+    float iverf = 1.0f - verf;
+    int angi = angf;
+    int angp = (angi < 7 ? angi + 1 : 0);
+    angf -= angi;
+    float iangf = 1.0f - angf;
+
+    int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+    int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+    int p2 = angp + hist;
+    if (tx >= 2)
+    {
+      float grad1 = ihorf * grad;
+      if (y >= 2)
+      { // Upper left
+        float grad2 = iverf * grad1;
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p1, iangf * grad2);
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p2, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower left
+        float grad2 = verf * grad1;
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p1 + 32, iangf * grad2);
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p2 + 32, angf * grad2);
+      }
+    }
+    if (tx <= 13)
+    {
+      float grad1 = horf * grad;
+      if (y >= 2)
+      { // Upper right
+        float grad2 = iverf * grad1;
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p1 + 8, iangf * grad2);
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p2 + 8, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower right
+        float grad2 = verf * grad1;
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p1 + 40, iangf * grad2);
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p2 + 40, angf * grad2);
+      }
+    }
+  }
+  /*
+  DPCT1065:109: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+
+  // Normalize twice and suppress peaks first time
+  if (idx < 64)
+    sums[idx] = buffer[idx] * buffer[idx] + buffer[idx + 64] * buffer[idx + 64];
+  /*
+  DPCT1065:110: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (idx < 32)
+    sums[idx] = sums[idx] + sums[idx + 32];
+  /*
+  DPCT1065:111: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (idx < 16)
+    sums[idx] = sums[idx] + sums[idx + 16];
+  /*
+  DPCT1065:112: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (idx < 8)
+    sums[idx] = sums[idx] + sums[idx + 8];
+  /*
+  DPCT1065:113: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (idx < 4)
+    sums[idx] = sums[idx] + sums[idx + 4];
+  /*
+  DPCT1065:114: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+  buffer[idx] = buffer[idx] * sycl::rsqrt(tsum1);
+
+  if (buffer[idx] > 0.2f)
+    buffer[idx] = 0.2f;
+  /*
+  DPCT1065:115: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (idx < 64)
+    sums[idx] = buffer[idx] * buffer[idx] + buffer[idx + 64] * buffer[idx + 64];
+  /*
+  DPCT1065:116: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (idx < 32)
+    sums[idx] = sums[idx] + sums[idx + 32];
+  /*
+  DPCT1065:117: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (idx < 16)
+    sums[idx] = sums[idx] + sums[idx + 16];
+  /*
+  DPCT1065:118: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (idx < 8)
+    sums[idx] = sums[idx] + sums[idx + 8];
+  /*
+  DPCT1065:119: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (idx < 4)
+    sums[idx] = sums[idx] + sums[idx + 4];
+  /*
+  DPCT1065:120: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+
+  float *desc = d_sift[bx].data;
+  desc[idx] = buffer[idx] * sycl::rsqrt(tsum2);
+  if (idx == 0)
+  {
+    d_sift[bx].xpos *= subsampling;
+    d_sift[bx].ypos *= subsampling;
+    d_sift[bx].scale *= subsampling;
+  }
+}
+
+/*
+DPCT1110:27: The total declared local variable size in device function
+ExtractSiftDescriptor exceeds 128 bytes and may cause high register pressure.
+Consult with your hardware vendor to find the total register size available and
+adjust the code, or use smaller sub-group size to avoid high register pressure.
+*/
+void ExtractSiftDescriptor(dpct::image_accessor_ext<float, 2> texObj,
+                           SiftPoint *d_sift, float subsampling, int octave,
+                           int bx, const sycl::nd_item<3> &item_ct1,
+                           float *gauss, float *buffer, float *sums)
+{
+
+  const int idx = item_ct1.get_local_id(2);
+  const int tx = idx & 15; // 0 -> 16
+  const int ty = idx / 16; // 0 -> 8
+  if (ty == 0)
+    gauss[tx] = sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+  buffer[idx] = 0.0f;
+  /*
+  DPCT1065:121: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+
+  // Compute angles and gradients
+  float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+  float sina = sycl::sin(theta); // cosa -sina
+  float cosa = sycl::cos(theta); // sina  cosa
+  float scale = 12.0f / 16.0f * d_sift[bx].scale;
+  float ssina = scale * sina;
+  float scosa = scale * cosa;
+
+  for (int y = ty; y < 16; y += 8)
+  {
+    float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+    float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+    float dx = texObj.read(xpos + cosa, ypos + sina) -
+               texObj.read(xpos - cosa, ypos - sina);
+    float dy = texObj.read(xpos - sina, ypos + cosa) -
+               texObj.read(xpos + sina, ypos - cosa);
+    float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy);
+    float angf = 4.0f / 3.1415f * sycl::atan2(dy, dx) + 4.0f;
+
+    int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+    float horf = (tx - 1.5f) / 4.0f - hori;
+    float ihorf = 1.0f - horf;
+    int veri = (y + 2) / 4 - 1;
+    float verf = (y - 1.5f) / 4.0f - veri;
+    float iverf = 1.0f - verf;
+    int angi = angf;
+    int angp = (angi < 7 ? angi + 1 : 0);
+    angf -= angi;
+    float iangf = 1.0f - angf;
+
+    int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+    int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+    int p2 = angp + hist;
+    if (tx >= 2)
+    {
+      float grad1 = ihorf * grad;
+      if (y >= 2)
+      { // Upper left
+        float grad2 = iverf * grad1;
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p1, iangf * grad2);
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p2, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower left
+        float grad2 = verf * grad1;
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p1 + 32, iangf * grad2);
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p2 + 32, angf * grad2);
+      }
+    }
+    if (tx <= 13)
+    {
+      float grad1 = horf * grad;
+      if (y >= 2)
+      { // Upper right
+        float grad2 = iverf * grad1;
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p1 + 8, iangf * grad2);
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p2 + 8, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower right
+        float grad2 = verf * grad1;
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p1 + 40, iangf * grad2);
+        dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+            buffer + p2 + 40, angf * grad2);
+      }
+    }
+  }
+  /*
+  DPCT1065:122: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+
+  // Normalize twice and suppress peaks first time
+  float sum = buffer[idx] * buffer[idx];
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i, item_ct1);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+  /*
+  DPCT1065:123: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+  tsum1 = sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f);
+
+  sum = tsum1 * tsum1;
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i, item_ct1);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+  /*
+  DPCT1065:124: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+
+  float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+  float *desc = d_sift[bx].data;
+  desc[idx] = tsum1 * sycl::rsqrt(tsum2);
+  if (idx == 0)
+  {
+    d_sift[bx].xpos *= subsampling;
+    d_sift[bx].ypos *= subsampling;
+    d_sift[bx].scale *= subsampling;
+  }
+  /*
+  DPCT1065:125: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+}
+
+void RescalePositions(SiftPoint *d_sift, int numPts, float scale,
+                      const sycl::nd_item<3> &item_ct1)
+{
+  int num = item_ct1.get_group(2) * item_ct1.get_local_range(2) +
+            item_ct1.get_local_id(2);
+  if (num < numPts)
+  {
+    d_sift[num].xpos *= scale;
+    d_sift[num].ypos *= scale;
+    d_sift[num].scale *= scale;
+  }
+}
+
+void ComputeOrientations(dpct::image_accessor_ext<float, 2> texObj,
+                         SiftPoint *d_Sift, int fstPts,
+                         const sycl::nd_item<3> &item_ct1, int d_MaxNumPoints,
+                         unsigned int *d_PointCounter, float *hist,
+                         float *gauss)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int bx = item_ct1.get_group(2) + fstPts;
+  float i2sigma2 = -1.0f / (4.5f * d_Sift[bx].scale * d_Sift[bx].scale);
+  if (tx < 11)
+    gauss[tx] = sycl::exp(i2sigma2 * (tx - 5) * (tx - 5));
+  if (tx < 64)
+    hist[tx] = 0.0f;
+  /*
+  DPCT1065:126: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  float xp = d_Sift[bx].xpos - 4.5f;
+  float yp = d_Sift[bx].ypos - 4.5f;
+  int yd = tx / 11;
+  int xd = tx - yd * 11;
+  float xf = xp + xd;
+  float yf = yp + yd;
+  if (yd < 11)
+  {
+    float dx = texObj.read(xf + 1.0, yf) - texObj.read(xf - 1.0, yf);
+    float dy = texObj.read(xf, yf + 1.0) - texObj.read(xf, yf - 1.0);
+    int bin = 16.0f * sycl::atan2(dy, dx) / 3.1416f + 16.5f;
+    if (bin > 31)
+      bin = 0;
+    float grad = sycl::sqrt(dx * dx + dy * dy);
+    dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+        &hist[bin], grad * gauss[xd] * gauss[yd]);
+  }
+  /*
+  DPCT1065:127: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  int x1m = (tx >= 1 ? tx - 1 : tx + 31);
+  int x1p = (tx <= 30 ? tx + 1 : tx - 31);
+  if (tx < 32)
+  {
+    int x2m = (tx >= 2 ? tx - 2 : tx + 30);
+    int x2p = (tx <= 29 ? tx + 2 : tx - 30);
+    hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]);
+  }
+  /*
+  DPCT1065:128: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (tx < 32)
+  {
+    float v = hist[32 + tx];
+    if(x1p < 32 && x1m < 32)
+      hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f);
+  }
+  /*
+  DPCT1065:129: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (tx == 0)
+  {
+    float maxval1 = 0.0;
+    float maxval2 = 0.0;
+    int i1 = -1;
+    int i2 = -1;
+    for (int i = 0; i < 32; i++)
+    {
+      float v = hist[i];
+      if (v > maxval1)
+      {
+        maxval2 = maxval1;
+        maxval1 = v;
+        i2 = i1;
+        i1 = i;
+      }
+      else if (v > maxval2)
+      {
+        maxval2 = v;
+        i2 = i;
+      }
+    }
+    float val1 = hist[32 + ((i1 + 1) & 31)];
+    float val2 = hist[32 + ((i1 + 31) & 31)];
+    float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+    d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+    if (maxval2 > 0.8f * maxval1)
+    {
+      float val1 = hist[32 + ((i2 + 1) & 31)];
+      float val2 = hist[32 + ((i2 + 31) & 31)];
+      float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+      unsigned int idx = dpct::atomic_fetch_compare_inc<
+          sycl::access::address_space::generic_space>(d_PointCounter,
+                                                      0x7fffffff);
+      if (idx < d_MaxNumPoints)
+      {
+        d_Sift[idx].xpos = d_Sift[bx].xpos;
+        d_Sift[idx].ypos = d_Sift[bx].ypos;
+        d_Sift[idx].scale = d_Sift[bx].scale;
+        d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+        d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+        d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+        ;
+        d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+      }
+    }
+  }
+}
+
+// With constant number of blocks
+/*
+DPCT1110:28: The total declared local variable size in device function
+ComputeOrientationsCONSTNew exceeds 128 bytes and may cause high register
+pressure. Consult with your hardware vendor to find the total register size
+available and adjust the code, or use smaller sub-group size to avoid high
+register pressure.
+*/
+void ComputeOrientationsCONSTNew(float *image, int w, int p, int h,
+                                 SiftPoint *d_Sift, int octave,
+                                 const sycl::nd_item<3> &item_ct1,
+                                 int d_MaxNumPoints,
+                                 unsigned int *d_PointCounter,
+                                 sycl::local_accessor<float, 2> img,
+                                 sycl::local_accessor<float, 2> tmp,
+                                 float *hist, float *gaussx, float *gaussy)
+{
+#define RAD 9
+#define WID (2 * RAD + 1)
+#define LEN 32 //%%%% Note: Lowe suggests 36, not 32
+
+  const int tx = item_ct1.get_local_id(2);
+
+  int fstPts = dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = dpct::min(d_PointCounter[2 * octave + 0], d_MaxNumPoints);
+  for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts;
+       bx += item_ct1.get_group_range(2))
+  {
+
+    float sc = d_Sift[bx].scale;
+    for (int i = tx; i < 2 * LEN; i += item_ct1.get_local_range(2))
+      hist[i] = 0.0f;
+    float xp = d_Sift[bx].xpos;
+    float yp = d_Sift[bx].ypos;
+    int xi = (int)xp;
+    int yi = (int)yp;
+    float xf = xp - xi;
+    float yf = yp - yi;
+    for (int i = tx; i < WID * WID; i += item_ct1.get_local_range(2))
+    {
+      int y = i / WID;
+      int x = i - y * WID;
+      int xp = sycl::max(sycl::min(x - RAD + xi, w - 1), 0);
+      int yp = sycl::max(sycl::min(y - RAD + yi, h - 1), 0);
+      img[y][x] = image[yp * p + xp];
+    }
+    float fac[5];
+    fac[1] = fac[3] =
+        (sc > 0.5f ? sycl::native::exp(-1.0f / (2.0f * (sc * sc - 0.25f)))
+                   : 0.0f);
+    fac[0] = fac[4] =
+        (sc > 0.5f ? sycl::native::exp(-4.0f / (2.0f * (sc * sc - 0.25f)))
+                   : 0.0f);
+    fac[2] = 1.0f;
+    float i2sigma2 = -1.0f / (2.0f * 2.0f * 2.0f * sc * sc); //%%%% Note: Lowe suggests 1.5, not 2.0
+    if (tx < WID)
+    {
+      gaussx[tx] =
+          sycl::native::exp(i2sigma2 * (tx - RAD - xf) * (tx - RAD - xf));
+      gaussy[tx] =
+          sycl::native::exp(i2sigma2 * (tx - RAD - yf) * (tx - RAD - yf));
+    }
+    /*
+    DPCT1118:29: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:130: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    for (int i = tx; i < (WID - 4) * WID; i += item_ct1.get_local_range(2))
+    {
+      int y = i / WID;
+      int x = i - y * WID;
+      y += 2;
+      tmp[y][x] = img[y][x] + fac[1] * (img[y - 1][x] + img[y + 1][x]) +
+                  fac[0] * (img[y - 2][x] + img[y + 2][x]);
+    }
+    /*
+    DPCT1118:30: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:131: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    for (int i = tx; i < (WID - 4) * (WID - 4);
+         i += item_ct1.get_local_range(2))
+    {
+      int y = i / (WID - 4);
+      int x = i - y * (WID - 4);
+      x += 2;
+      y += 2;
+      img[y][x] = tmp[y][x] + fac[1] * (tmp[y][x - 1] + tmp[y][x + 1]) +
+                  fac[0] * (tmp[y][x - 2] + tmp[y][x + 2]);
+    }
+    /*
+    DPCT1118:31: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:132: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    for (int i = tx; i < (WID - 6) * (WID - 6);
+         i += item_ct1.get_local_range(2))
+    {
+      int y = i / (WID - 6);
+      int x = i - y * (WID - 6);
+      x += 3;
+      y += 3;
+      float dx = img[y][x + 1] - img[y][x - 1];
+      float dy = img[y + 1][x] - img[y - 1][x];
+      int bin =
+          (int)((LEN / 2) * sycl::atan2(dy, dx) / 3.1416f + (LEN / 2) + 0.5f) %
+          LEN;
+      /*
+      DPCT1013:135: The rounding mode could not be specified and the generated
+      code may have different accuracy than the original code. Verify the
+      correctness. SYCL math built-in function rounding mode is aligned with
+      OpenCL C 1.2 standard.
+      */
+      float grad = sycl::sqrt(dx * dx + dy * dy);
+      dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+          &hist[LEN + bin], grad * gaussx[x] * gaussy[y]);
+    }
+    /*
+    DPCT1118:32: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:133: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    int x1m = (tx >= 1 ? tx - 1 : tx + LEN - 1);
+    int x1p = (tx < (LEN - 1) ? tx + 1 : tx - LEN + 1);
+    int x2m = (tx >= 2 ? tx - 2 : tx + LEN - 2);
+    int x2p = (tx < (LEN - 2) ? tx + 2 : tx - LEN + 2);
+    if (tx < LEN)
+    {
+      hist[tx] = 6.0f * hist[tx + LEN] + 4.0f * (hist[x1m + LEN] + hist[x1p + LEN]) +
+                 1.0f * (hist[x2m + LEN] + hist[x2p + LEN]);
+      hist[tx + LEN] = 8.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) +
+                       0.0f * (hist[x2m] + hist[x2p]);
+      float val = hist[tx + LEN];
+      hist[tx] = (val > hist[x1m + LEN] && val >= hist[x1p + LEN] ? val : 0.0f);
+    }
+    /*
+    DPCT1118:33: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:134: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    if (tx == 0)
+    {
+      float maxval1 = 0.0;
+      float maxval2 = 0.0;
+      int i1 = -1;
+      int i2 = -1;
+      for (int i = 0; i < LEN; i++)
+      {
+        float v = hist[i];
+        if (v > maxval1)
+        {
+          maxval2 = maxval1;
+          maxval1 = v;
+          i2 = i1;
+          i1 = i;
+        }
+        else if (v > maxval2)
+        {
+          maxval2 = v;
+          i2 = i;
+        }
+      }
+      float val1 = hist[LEN + ((i1 + 1) % LEN)];
+      float val2 = hist[LEN + ((i1 + LEN - 1) % LEN)];
+      float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+      d_Sift[bx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN;
+      dpct::atomic_fetch_max<sycl::access::address_space::generic_space>(
+          &d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]);
+      if (maxval2 > 0.8f * maxval1 && true)
+      {
+        float val1 = hist[LEN + ((i2 + 1) % LEN)];
+        float val2 = hist[LEN + ((i2 + LEN - 1) % LEN)];
+        float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+        unsigned int idx = dpct::atomic_fetch_compare_inc<
+            sycl::access::address_space::generic_space>(
+            &d_PointCounter[2 * octave + 1], 0x7fffffff);
+        if (idx < d_MaxNumPoints)
+        {
+          d_Sift[idx].xpos = d_Sift[bx].xpos;
+          d_Sift[idx].ypos = d_Sift[bx].ypos;
+          d_Sift[idx].scale = sc;
+          d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+          d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+          d_Sift[idx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN;
+          d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+        }
+      }
+    }
+  }
+#undef RAD
+#undef WID
+#undef LEN
+}
+
+// With constant number of blocks
+/*
+DPCT1110:34: The total declared local variable size in device function
+ComputeOrientationsCONST exceeds 128 bytes and may cause high register pressure.
+Consult with your hardware vendor to find the total register size available and
+adjust the code, or use smaller sub-group size to avoid high register pressure.
+*/
+void ComputeOrientationsCONST(dpct::image_accessor_ext<float, 2> texObj,
+                              SiftPoint *d_Sift, int octave,
+                              const sycl::nd_item<3> &item_ct1,
+                              int d_MaxNumPoints, unsigned int *d_PointCounter,
+                              float *hist, float *gauss)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+
+  int fstPts = dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = dpct::min(d_PointCounter[2 * octave + 0], d_MaxNumPoints);
+  for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts;
+       bx += item_ct1.get_group_range(2))
+  {
+
+    float i2sigma2 = -1.0f / (2.0f * 1.5f * 1.5f * d_Sift[bx].scale * d_Sift[bx].scale);
+    if (tx < 11)
+      gauss[tx] = sycl::exp(i2sigma2 * (tx - 5) * (tx - 5));
+    if (tx < 64)
+      hist[tx] = 0.0f;
+    /*
+    DPCT1118:35: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:136: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    float xp = d_Sift[bx].xpos - 4.5f;
+    float yp = d_Sift[bx].ypos - 4.5f;
+    int yd = tx / 11;
+    int xd = tx - yd * 11;
+    float xf = xp + xd;
+    float yf = yp + yd;
+    if (yd < 11)
+    {
+      float dx = texObj.read(xf + 1.0, yf) - texObj.read(xf - 1.0, yf);
+      float dy = texObj.read(xf, yf + 1.0) - texObj.read(xf, yf - 1.0);
+      int bin = 16.0f * sycl::atan2(dy, dx) / 3.1416f + 16.5f;
+      if (bin > 31)
+        bin = 0;
+      float grad = sycl::sqrt(dx * dx + dy * dy);
+      dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+          &hist[bin], grad * gauss[xd] * gauss[yd]);
+    }
+    /*
+    DPCT1118:36: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:137: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    int x1m = (tx >= 1 ? tx - 1 : tx + 31);
+    int x1p = (tx <= 30 ? tx + 1 : tx - 31);
+    if (tx < 32)
+    {
+      int x2m = (tx >= 2 ? tx - 2 : tx + 30);
+      int x2p = (tx <= 29 ? tx + 2 : tx - 30);
+      hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]);
+    }
+    /*
+    DPCT1118:37: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:138: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    if (tx < 32)
+    {
+      float v = hist[32 + tx];
+      if(x1m < 32)
+        hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f);
+    }
+    /*
+    DPCT1118:38: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:139: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    if (tx == 0)
+    {
+      float maxval1 = 0.0;
+      float maxval2 = 0.0;
+      int i1 = -1;
+      int i2 = -1;
+      for (int i = 0; i < 32; i++)
+      {
+        float v = hist[i];
+        if (v > maxval1)
+        {
+          maxval2 = maxval1;
+          maxval1 = v;
+          i2 = i1;
+          i1 = i;
+        }
+        else if (v > maxval2)
+        {
+          maxval2 = v;
+          i2 = i;
+        }
+      }
+      float val1 = hist[32 + ((i1 + 1) & 31)];
+      float val2 = hist[32 + ((i1 + 31) & 31)];
+      float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+      d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+      dpct::atomic_fetch_max<sycl::access::address_space::generic_space>(
+          &d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]);
+      if (maxval2 > 0.8f * maxval1 && true)
+      {
+        float val1 = hist[32 + ((i2 + 1) & 31)];
+        float val2 = hist[32 + ((i2 + 31) & 31)];
+        float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+        unsigned int idx = dpct::atomic_fetch_compare_inc<
+            sycl::access::address_space::generic_space>(
+            &d_PointCounter[2 * octave + 1], 0x7fffffff);
+        if (idx < d_MaxNumPoints)
+        {
+          d_Sift[idx].xpos = d_Sift[bx].xpos;
+          d_Sift[idx].ypos = d_Sift[bx].ypos;
+          d_Sift[idx].scale = d_Sift[bx].scale;
+          d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+          d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+          d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+          ;
+          d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+        }
+      }
+    }
+    /*
+    DPCT1118:39: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:140: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+  }
+}
+
+// With constant number of blocks
+void OrientAndExtractCONST(dpct::image_accessor_ext<float, 2> texObj,
+                           SiftPoint *d_Sift, float subsampling, int octave,
+                           const sycl::nd_item<3> &item_ct1, int d_MaxNumPoints,
+                           unsigned int *d_PointCounter, float *gauss,
+                           float *buffer, float *sums, float *hist,
+                           unsigned int &idx)
+{
+
+   //%%%%
+  const int tx = item_ct1.get_local_id(2);
+
+  int fstPts = dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = dpct::min(d_PointCounter[2 * octave + 0], d_MaxNumPoints);
+  for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts;
+       bx += item_ct1.get_group_range(2))
+  {
+
+    float i2sigma2 = -1.0f / (4.5f * d_Sift[bx].scale * d_Sift[bx].scale);
+    if (tx < 11)
+      gauss[tx] = sycl::exp(i2sigma2 * (tx - 5) * (tx - 5));
+    if (tx < 64)
+      hist[tx] = 0.0f;
+    /*
+    DPCT1118:40: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:141: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    float xp = d_Sift[bx].xpos - 4.5f;
+    float yp = d_Sift[bx].ypos - 4.5f;
+    int yd = tx / 11;
+    int xd = tx - yd * 11;
+    float xf = xp + xd;
+    float yf = yp + yd;
+    if (yd < 11)
+    {
+      float dx = texObj.read(xf + 1.0, yf) - texObj.read(xf - 1.0, yf);
+      float dy = texObj.read(xf, yf + 1.0) - texObj.read(xf, yf - 1.0);
+      int bin = 16.0f * sycl::atan2(dy, dx) / 3.1416f + 16.5f;
+      if (bin > 31)
+        bin = 0;
+      float grad = sycl::sqrt(dx * dx + dy * dy);
+      dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(
+          &hist[bin], grad * gauss[xd] * gauss[yd]);
+    }
+    /*
+    DPCT1118:41: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:142: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    int x1m = (tx >= 1 ? tx - 1 : tx + 31);
+    int x1p = (tx <= 30 ? tx + 1 : tx - 31);
+    if (tx < 32)
+    {
+      int x2m = (tx >= 2 ? tx - 2 : tx + 30);
+      int x2p = (tx <= 29 ? tx + 2 : tx - 30);
+      hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]);
+    }
+    /*
+    DPCT1118:42: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:143: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    if (tx < 32)
+    {
+      float v = hist[32 + tx];
+      if(x1m < 32)
+        hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f);
+    }
+    /*
+    DPCT1118:43: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:144: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    if (tx == 0)
+    {
+      float maxval1 = 0.0;
+      float maxval2 = 0.0;
+      int i1 = -1;
+      int i2 = -1;
+      for (int i = 0; i < 32; i++)
+      {
+        float v = hist[i];
+        if (v > maxval1)
+        {
+          maxval2 = maxval1;
+          maxval1 = v;
+          i2 = i1;
+          i1 = i;
+        }
+        else if (v > maxval2)
+        {
+          maxval2 = v;
+          i2 = i;
+        }
+      }
+      float val1 = hist[32 + ((i1 + 1) & 31)];
+      float val2 = hist[32 + ((i1 + 31) & 31)];
+      float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+      d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+      idx = 0xffffffff; //%%%%
+      dpct::atomic_fetch_max<sycl::access::address_space::generic_space>(
+          &d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]);
+      if (maxval2 > 0.8f * maxval1)
+      {
+        float val1 = hist[32 + ((i2 + 1) & 31)];
+        float val2 = hist[32 + ((i2 + 31) & 31)];
+        float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+        idx = dpct::atomic_fetch_compare_inc<
+            sycl::access::address_space::generic_space>(
+            &d_PointCounter[2 * octave + 1], 0x7fffffff); //%%%%
+        if (idx < d_MaxNumPoints)
+        {
+          d_Sift[idx].xpos = d_Sift[bx].xpos;
+          d_Sift[idx].ypos = d_Sift[bx].ypos;
+          d_Sift[idx].scale = d_Sift[bx].scale;
+          d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+          d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+          d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+          ;
+          d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+        }
+      }
+    }
+    /*
+    DPCT1118:44: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:145: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    ExtractSiftDescriptor(texObj, d_Sift, subsampling, octave, bx, item_ct1,
+                          gauss, buffer, sums);                        //%%%%
+    if (idx < d_MaxNumPoints)                                          //%%%%
+      ExtractSiftDescriptor(texObj, d_Sift, subsampling, octave, idx, item_ct1,
+                            gauss, buffer, sums); //%%%%
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Subtract two images (multi-scale version)
+///////////////////////////////////////////////////////////////////////////////
+
+// __global__ void FindPointsMultiTest(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave)
+// {
+// #define MEMWID (MINMAX_W + 2)
+//   __shared__ unsigned int cnt;
+//   __shared__ unsigned short points[3 * MEMWID];
+
+//   if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0 && threadIdx.y == 0)
+//   {
+//     atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+//     atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]);
+//   }
+//   int tx = threadIdx.x;
+//   int ty = threadIdx.y;
+//   if (tx == 0 && ty == 0)
+//     cnt = 0;
+//   __syncthreads();
+
+//   int ypos = MINMAX_H * blockIdx.y + ty;
+//   if (ypos >= height)
+//     return;
+//   int block = blockIdx.x / NUM_SCALES;
+//   int scale = blockIdx.x - NUM_SCALES * block;
+//   int minx = block * MINMAX_W;
+//   int maxx = min(minx + MINMAX_W, width);
+//   int xpos = minx + tx;
+//   int size = pitch * height;
+//   int ptr = size * scale + max(min(xpos - 1, width - 1), 0);
+
+//   float maxv = fabs(d_Data0[ptr + ypos * pitch + 1 * size]);
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W));
+
+//   if (Shuffle(maxv, 0) > thresh)
+//   {
+//     int yptr1 = ptr + ypos * pitch;
+//     int yptr0 = ptr + max(0, ypos - 1) * pitch;
+//     int yptr2 = ptr + min(height - 1, ypos + 1) * pitch;
+//     float d20 = d_Data0[yptr0 + 1 * size];
+//     float d21 = d_Data0[yptr1 + 1 * size];
+//     float d22 = d_Data0[yptr2 + 1 * size];
+//     float d31 = d_Data0[yptr1 + 2 * size];
+//     float d11 = d_Data0[yptr1];
+
+//     float d10 = d_Data0[yptr0];
+//     float d12 = d_Data0[yptr2];
+//     float ymin1 = fminf(fminf(d10, d11), d12);
+//     float ymax1 = fmaxf(fmaxf(d10, d11), d12);
+//     float d30 = d_Data0[yptr0 + 2 * size];
+//     float d32 = d_Data0[yptr2 + 2 * size];
+//     float ymin3 = fminf(fminf(d30, d31), d32);
+//     float ymax3 = fmaxf(fmaxf(d30, d31), d32);
+//     float ymin2 = fminf(fminf(ymin1, fminf(fminf(d20, d22), d21)), ymin3);
+//     float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d20, d22), d21)), ymax3);
+
+//     float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1));
+//     float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1));
+//     if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx)
+//     {
+//       if (d21 < -thresh)
+//       {
+//         float minv = fminf(fminf(nmin2, ymin1), ymin3);
+//         minv = fminf(fminf(minv, d20), d22);
+//         if (d21 < minv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//       if (d21 > thresh)
+//       {
+//         float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3);
+//         maxv = fmaxf(fmaxf(maxv, d20), d22);
+//         if (d21 > maxv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//     }
+//   }
+//   __syncthreads();
+//   if (ty == 0 && tx < cnt)
+//   {
+//     int xpos = points[3 * tx + 0];
+//     int ypos = points[3 * tx + 1];
+//     int scale = points[3 * tx + 2];
+//     int ptr = xpos + (ypos + (scale + 1) * height) * pitch;
+//     float val = d_Data0[ptr];
+//     float *data1 = &d_Data0[ptr];
+//     float dxx = 2.0f * val - data1[-1] - data1[1];
+//     float dyy = 2.0f * val - data1[-pitch] - data1[pitch];
+//     float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]);
+//     float tra = dxx + dyy;
+//     float det = dxx * dyy - dxy * dxy;
+//     if (tra * tra < edgeLimit * det)
+//     {
+//       float edge = __fdividef(tra * tra, det);
+//       float dx = 0.5f * (data1[1] - data1[-1]);
+//       float dy = 0.5f * (data1[pitch] - data1[-pitch]);
+//       float *data0 = d_Data0 + ptr - height * pitch;
+//       float *data2 = d_Data0 + ptr + height * pitch;
+//       float ds = 0.5f * (data0[0] - data2[0]);
+//       float dss = 2.0f * val - data2[0] - data0[0];
+//       float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]);
+//       float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]);
+//       float idxx = dyy * dss - dys * dys;
+//       float idxy = dys * dxs - dxy * dss;
+//       float idxs = dxy * dys - dyy * dxs;
+//       float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs);
+//       float idyy = dxx * dss - dxs * dxs;
+//       float idys = dxy * dxs - dxx * dys;
+//       float idss = dxx * dyy - dxy * dxy;
+//       float pdx = idet * (idxx * dx + idxy * dy + idxs * ds);
+//       float pdy = idet * (idxy * dx + idyy * dy + idys * ds);
+//       float pds = idet * (idxs * dx + idys * dy + idss * ds);
+//       if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f)
+//       {
+//         pdx = __fdividef(dx, dxx);
+//         pdy = __fdividef(dy, dyy);
+//         pds = __fdividef(ds, dss);
+//       }
+//       float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds);
+//       int maxPts = d_MaxNumPoints;
+//       float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor);
+//       if (sc >= lowestScale)
+//       {
+//         unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff);
+//         idx = (idx >= maxPts ? maxPts - 1 : idx);
+//         d_Sift[idx].xpos = xpos + pdx;
+//         d_Sift[idx].ypos = ypos + pdy;
+//         d_Sift[idx].scale = sc;
+//         d_Sift[idx].sharpness = val + dval;
+//         d_Sift[idx].edgeness = edge;
+//         d_Sift[idx].subsampling = subsampling;
+//       }
+//     }
+//   }
+// }
+
+/*
+DPCT1110:45: The total declared local variable size in device function
+FindPointsMultiNew exceeds 128 bytes and may cause high register pressure.
+Consult with your hardware vendor to find the total register size available and
+adjust the code, or use smaller sub-group size to avoid high register pressure.
+*/
+void FindPointsMultiNew(float *d_Data0, SiftPoint *d_Sift, int width, int pitch,
+                        int height, float subsampling, float lowestScale,
+                        float thresh, float factor, float edgeLimit, int octave,
+                        const sycl::nd_item<3> &item_ct1, int d_MaxNumPoints,
+                        unsigned int *d_PointCounter, unsigned short *points)
+{
+#define MEMWID (MINMAX_W + 2)
+
+  if (item_ct1.get_group(2) == 0 && item_ct1.get_group(1) == 0 &&
+      item_ct1.get_local_id(2) == 0)
+  {
+    dpct::atomic_fetch_max<sycl::access::address_space::generic_space>(
+        &d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+    dpct::atomic_fetch_max<sycl::access::address_space::generic_space>(
+        &d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]);
+  }
+  int tx = item_ct1.get_local_id(2);
+  int block = item_ct1.get_group(2) / NUM_SCALES;
+  int scale = item_ct1.get_group(2) - NUM_SCALES * block;
+  int minx = block * MINMAX_W;
+  int maxx = sycl::min(minx + MINMAX_W, width);
+  int xpos = minx + tx;
+  int size = pitch * height;
+  int ptr = size * scale + sycl::max(sycl::min(xpos - 1, width - 1), 0);
+
+  int yloops = dpct::min(
+      (unsigned int)(height - MINMAX_H * item_ct1.get_group(1)), MINMAX_H);
+  float maxv = 0.0f;
+  for (int y = 0; y < yloops; y++)
+  {
+    int ypos = MINMAX_H * item_ct1.get_group(1) + y;
+    int yptr1 = ptr + ypos * pitch;
+    float val = d_Data0[yptr1 + 1 * size];
+    maxv = sycl::fmax(maxv, sycl::fabs(val));
+  }
+  // if (tx==0) printf("XXX1\n");
+  if (!sycl::any_of_group(
+          item_ct1.get_sub_group(),
+          (0xffffffff &
+           (0x1 << item_ct1.get_sub_group().get_local_linear_id())) &&
+              maxv > thresh))
+    return;
+  // if (tx==0) printf("XXX2\n");
+
+  int ptbits = 0;
+  for (int y = 0; y < yloops; y++)
+  {
+
+    int ypos = MINMAX_H * item_ct1.get_group(1) + y;
+    int yptr1 = ptr + ypos * pitch;
+    float d11 = d_Data0[yptr1 + 1 * size];
+    if (sycl::any_of_group(
+            item_ct1.get_sub_group(),
+            (0xffffffff &
+             (0x1 << item_ct1.get_sub_group().get_local_linear_id())) &&
+                sycl::fabs(d11) > thresh))
+    {
+
+      int yptr0 = ptr + sycl::max(0, ypos - 1) * pitch;
+      int yptr2 = ptr + sycl::min(height - 1, ypos + 1) * pitch;
+      float d01 = d_Data0[yptr1];
+      float d10 = d_Data0[yptr0 + 1 * size];
+      float d12 = d_Data0[yptr2 + 1 * size];
+      float d21 = d_Data0[yptr1 + 2 * size];
+
+      float d00 = d_Data0[yptr0];
+      float d02 = d_Data0[yptr2];
+      float ymin1 = sycl::fmin(sycl::fmin(d00, d01), d02);
+      float ymax1 = sycl::fmax(sycl::fmax(d00, d01), d02);
+      float d20 = d_Data0[yptr0 + 2 * size];
+      float d22 = d_Data0[yptr2 + 2 * size];
+      float ymin3 = sycl::fmin(sycl::fmin(d20, d21), d22);
+      float ymax3 = sycl::fmax(sycl::fmax(d20, d21), d22);
+      float ymin2 = sycl::fmin(
+          sycl::fmin(ymin1, sycl::fmin(sycl::fmin(d10, d12), d11)), ymin3);
+      float ymax2 = sycl::fmax(
+          sycl::fmax(ymax1, sycl::fmax(sycl::fmax(d10, d12), d11)), ymax3);
+
+      float nmin2 = sycl::fmin(ShiftUp(ymin2, 1, item_ct1),
+                               ShiftDown(ymin2, 1, item_ct1));
+      float nmax2 = sycl::fmax(ShiftUp(ymax2, 1, item_ct1),
+                               ShiftDown(ymax2, 1, item_ct1));
+      float minv = sycl::fmin(sycl::fmin(nmin2, ymin1), ymin3);
+      minv = sycl::fmin(sycl::fmin(minv, d10), d12);
+      float maxv = sycl::fmax(sycl::fmax(nmax2, ymax1), ymax3);
+      maxv = sycl::fmax(sycl::fmax(maxv, d10), d12);
+
+      if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx)
+        ptbits |= ((d11 < sycl::fmin(-thresh, minv)) |
+                   (d11 > sycl::fmax(thresh, maxv)))
+                  << y;
+    }
+  }
+
+  unsigned int totbits = sycl::popcount(ptbits);
+  unsigned int numbits = totbits;
+  for (int d = 1; d < 32; d <<= 1)
+  {
+    unsigned int num = ShiftUp(totbits, d, item_ct1);
+    if (tx >= d)
+      totbits += num;
+  }
+  int pos = totbits - numbits;
+  for (int y = 0; y < yloops; y++)
+  {
+    int ypos = MINMAX_H * item_ct1.get_group(1) + y;
+    if (ptbits & (1 << y) && pos < MEMWID)
+    {
+      points[2 * pos + 0] = xpos - 1;
+      points[2 * pos + 1] = ypos;
+      pos++;
+    }
+  }
+
+  totbits = Shuffle(totbits, 31, item_ct1);
+  if (tx < totbits)
+  {
+    int xpos = points[2 * tx + 0];
+    int ypos = points[2 * tx + 1];
+    int ptr = xpos + (ypos + (scale + 1) * height) * pitch;
+    float val = d_Data0[ptr];
+    float *data1 = &d_Data0[ptr];
+    float dxx = 2.0f * val - data1[-1] - data1[1];
+    float dyy = 2.0f * val - data1[-pitch] - data1[pitch];
+    float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]);
+    float tra = dxx + dyy;
+    float det = dxx * dyy - dxy * dxy;
+    if (tra * tra < edgeLimit * det)
+    {
+      float edge = (tra * tra) / det;
+      float dx = 0.5f * (data1[1] - data1[-1]);
+      float dy = 0.5f * (data1[pitch] - data1[-pitch]);
+      float *data0 = d_Data0 + ptr - height * pitch;
+      float *data2 = d_Data0 + ptr + height * pitch;
+      float ds = 0.5f * (data0[0] - data2[0]);
+      float dss = 2.0f * val - data2[0] - data0[0];
+      float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]);
+      float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]);
+      float idxx = dyy * dss - dys * dys;
+      float idxy = dys * dxs - dxy * dss;
+      float idxs = dxy * dys - dyy * dxs;
+      float idet = 1.0f / (idxx * dxx + idxy * dxy + idxs * dxs);
+      float idyy = dxx * dss - dxs * dxs;
+      float idys = dxy * dxs - dxx * dys;
+      float idss = dxx * dyy - dxy * dxy;
+      float pdx = idet * (idxx * dx + idxy * dy + idxs * ds);
+      float pdy = idet * (idxy * dx + idyy * dy + idys * ds);
+      float pds = idet * (idxs * dx + idys * dy + idss * ds);
+      if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f)
+      {
+        pdx = dx / dxx;
+        pdy = dy / dyy;
+        pds = ds / dss;
+      }
+      float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds);
+      int maxPts = d_MaxNumPoints;
+      float sc =
+          dpct::pow(2.0f, (float)scale / NUM_SCALES) * sycl::exp2(pds * factor);
+      if (sc >= lowestScale)
+      {
+        dpct::atomic_fetch_max<sycl::access::address_space::generic_space>(
+            &d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+        unsigned int idx = dpct::atomic_fetch_compare_inc<
+            sycl::access::address_space::generic_space>(
+            &d_PointCounter[2 * octave + 0], 0x7fffffff);
+        idx = (idx >= maxPts ? maxPts - 1 : idx);
+        d_Sift[idx].xpos = xpos + pdx;
+        d_Sift[idx].ypos = ypos + pdy;
+        d_Sift[idx].scale = sc;
+        d_Sift[idx].sharpness = val + dval;
+        d_Sift[idx].edgeness = edge;
+        d_Sift[idx].subsampling = subsampling;
+      }
+    }
+  }
+}
+
+// __global__ void FindPointsMulti(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave)
+// {
+// #define MEMWID (MINMAX_W + 2)
+//   __shared__ unsigned int cnt;
+//   __shared__ unsigned short points[3 * MEMWID];
+
+
+//   if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0)
+//   {
+//     atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+//     atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]);
+//   }
+//   int tx = threadIdx.x;
+//   int block = blockIdx.x / NUM_SCALES;
+//   int scale = blockIdx.x - NUM_SCALES * block;
+//   int minx = block * MINMAX_W;
+//   int maxx = min(minx + MINMAX_W, width);
+//   int xpos = minx + tx;
+//   int size = pitch * height;
+//   int ptr = size * scale + max(min(xpos - 1, width - 1), 0);
+
+//   int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H);
+//   float maxv = 0.0f;
+//   for (int y = 0; y < yloops; y++)
+//   {
+//     int ypos = MINMAX_H * blockIdx.y + y;
+//     int yptr1 = ptr + ypos * pitch;
+//     float val = d_Data0[yptr1 + 1 * size];
+//     maxv = fmaxf(maxv, fabs(val));
+//   }
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W));
+//   if (Shuffle(maxv, 0) <= thresh)
+//     return;
+
+//   if (tx == 0)
+//     cnt = 0;
+//   __syncthreads();
+
+//   for (int y = 0; y < yloops; y++)
+//   {
+
+//     int ypos = MINMAX_H * blockIdx.y + y;
+//     int yptr1 = ptr + ypos * pitch;
+//     int yptr0 = ptr + max(0, ypos - 1) * pitch;
+//     int yptr2 = ptr + min(height - 1, ypos + 1) * pitch;
+//     float d20 = d_Data0[yptr0 + 1 * size];
+//     float d21 = d_Data0[yptr1 + 1 * size];
+//     float d22 = d_Data0[yptr2 + 1 * size];
+//     float d31 = d_Data0[yptr1 + 2 * size];
+//     float d11 = d_Data0[yptr1];
+
+//     float d10 = d_Data0[yptr0];
+//     float d12 = d_Data0[yptr2];
+//     float ymin1 = fminf(fminf(d10, d11), d12);
+//     float ymax1 = fmaxf(fmaxf(d10, d11), d12);
+//     float d30 = d_Data0[yptr0 + 2 * size];
+//     float d32 = d_Data0[yptr2 + 2 * size];
+//     float ymin3 = fminf(fminf(d30, d31), d32);
+//     float ymax3 = fmaxf(fmaxf(d30, d31), d32);
+//     float ymin2 = fminf(fminf(ymin1, fminf(fminf(d20, d22), d21)), ymin3);
+//     float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d20, d22), d21)), ymax3);
+
+//     float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1));
+//     float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1));
+//     if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx)
+//     {
+//       if (d21 < -thresh)
+//       {
+//         float minv = fminf(fminf(nmin2, ymin1), ymin3);
+//         minv = fminf(fminf(minv, d20), d22);
+//         if (d21 < minv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//       if (d21 > thresh)
+//       {
+//         float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3);
+//         maxv = fmaxf(fmaxf(maxv, d20), d22);
+//         if (d21 > maxv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//     }
+//   }
+//   if (tx < cnt)
+//   {
+//     int xpos = points[3 * tx + 0];
+//     int ypos = points[3 * tx + 1];
+//     int scale = points[3 * tx + 2];
+//     int ptr = xpos + (ypos + (scale + 1) * height) * pitch;
+//     float val = d_Data0[ptr];
+//     float *data1 = &d_Data0[ptr];
+//     float dxx = 2.0f * val - data1[-1] - data1[1];
+//     float dyy = 2.0f * val - data1[-pitch] - data1[pitch];
+//     float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]);
+//     float tra = dxx + dyy;
+//     float det = dxx * dyy - dxy * dxy;
+//     if (tra * tra < edgeLimit * det)
+//     {
+//       float edge = __fdividef(tra * tra, det);
+//       float dx = 0.5f * (data1[1] - data1[-1]);
+//       float dy = 0.5f * (data1[pitch] - data1[-pitch]);
+//       float *data0 = d_Data0 + ptr - height * pitch;
+//       float *data2 = d_Data0 + ptr + height * pitch;
+//       float ds = 0.5f * (data0[0] - data2[0]);
+//       float dss = 2.0f * val - data2[0] - data0[0];
+//       float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]);
+//       float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]);
+//       float idxx = dyy * dss - dys * dys;
+//       float idxy = dys * dxs - dxy * dss;
+//       float idxs = dxy * dys - dyy * dxs;
+//       float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs);
+//       float idyy = dxx * dss - dxs * dxs;
+//       float idys = dxy * dxs - dxx * dys;
+//       float idss = dxx * dyy - dxy * dxy;
+//       float pdx = idet * (idxx * dx + idxy * dy + idxs * ds);
+//       float pdy = idet * (idxy * dx + idyy * dy + idys * ds);
+//       float pds = idet * (idxs * dx + idys * dy + idss * ds);
+//       if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f)
+//       {
+//         pdx = __fdividef(dx, dxx);
+//         pdy = __fdividef(dy, dyy);
+//         pds = __fdividef(ds, dss);
+//       }
+//       float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds);
+//       int maxPts = d_MaxNumPoints;
+//       float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor);
+//       if (sc >= lowestScale)
+//       {
+//         atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+//         unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff);
+//         idx = (idx >= maxPts ? maxPts - 1 : idx);
+//         d_Sift[idx].xpos = xpos + pdx;
+//         d_Sift[idx].ypos = ypos + pdy;
+//         d_Sift[idx].scale = sc;
+//         d_Sift[idx].sharpness = val + dval;
+//         d_Sift[idx].edgeness = edge;
+//         d_Sift[idx].subsampling = subsampling;
+//       }
+//     }
+//   }
+// }
+
+// __global__ void FindPointsMultiOld(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave)
+// {
+// #define MEMWID (MINMAX_W + 2)
+//   __shared__ float ymin1[MEMWID], ymin2[MEMWID], ymin3[MEMWID];
+//   __shared__ float ymax1[MEMWID], ymax2[MEMWID], ymax3[MEMWID];
+//   __shared__ unsigned int cnt;
+//   __shared__ unsigned short points[3 * MEMWID];
+
+//   if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0)
+//   {
+//     atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+//     atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]);
+//   }
+//   int tx = threadIdx.x;
+//   int block = blockIdx.x / NUM_SCALES;
+//   int scale = blockIdx.x - NUM_SCALES * block;
+//   int minx = block * MINMAX_W;
+//   int maxx = min(minx + MINMAX_W, width);
+//   int xpos = minx + tx;
+//   int size = pitch * height;
+//   int ptr = size * scale + max(min(xpos - 1, width - 1), 0);
+
+//   int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H);
+//   float maxv = 0.0f;
+//   for (int y = 0; y < yloops; y++)
+//   {
+//     int ypos = MINMAX_H * blockIdx.y + y;
+//     int yptr1 = ptr + ypos * pitch;
+//     float val = d_Data0[yptr1 + 1 * size];
+//     maxv = fmaxf(maxv, fabs(val));
+//   }
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W));
+//   if (Shuffle(maxv, 0) <= thresh)
+//     return;
+
+//   if (tx == 0)
+//     cnt = 0;
+//   __syncthreads();
+
+//   for (int y = 0; y < yloops; y++)
+//   {
+
+//     int ypos = MINMAX_H * blockIdx.y + y;
+//     int yptr1 = ptr + ypos * pitch;
+//     int yptr0 = ptr + max(0, ypos - 1) * pitch;
+//     int yptr2 = ptr + min(height - 1, ypos + 1) * pitch;
+//     float d20 = d_Data0[yptr0 + 1 * size];
+//     float d21 = d_Data0[yptr1 + 1 * size];
+//     float d22 = d_Data0[yptr2 + 1 * size];
+//     float d31 = d_Data0[yptr1 + 2 * size];
+//     float d11 = d_Data0[yptr1];
+
+//     float d10 = d_Data0[yptr0];
+//     float d12 = d_Data0[yptr2];
+//     ymin1[tx] = fminf(fminf(d10, d11), d12);
+//     ymax1[tx] = fmaxf(fmaxf(d10, d11), d12);
+//     float d30 = d_Data0[yptr0 + 2 * size];
+//     float d32 = d_Data0[yptr2 + 2 * size];
+//     ymin3[tx] = fminf(fminf(d30, d31), d32);
+//     ymax3[tx] = fmaxf(fmaxf(d30, d31), d32);
+//     ymin2[tx] = fminf(fminf(ymin1[tx], fminf(fminf(d20, d22), d21)), ymin3[tx]);
+//     ymax2[tx] = fmaxf(fmaxf(ymax1[tx], fmaxf(fmaxf(d20, d22), d21)), ymax3[tx]);
+
+//     __syncthreads();
+
+//     if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx)
+//     {
+//       if (d21 < -thresh)
+//       {
+//         float minv = fminf(fminf(fminf(ymin2[tx - 1], ymin2[tx + 1]), ymin1[tx]), ymin3[tx]);
+//         minv = fminf(fminf(minv, d20), d22);
+//         if (d21 < minv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//       if (d21 > thresh)
+//       {
+//         float maxv = fmaxf(fmaxf(fmaxf(ymax2[tx - 1], ymax2[tx + 1]), ymax1[tx]), ymax3[tx]);
+//         maxv = fmaxf(fmaxf(maxv, d20), d22);
+//         if (d21 > maxv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//     }
+//     __syncthreads();
+//   }
+//   if (tx < cnt)
+//   {
+//     int xpos = points[3 * tx + 0];
+//     int ypos = points[3 * tx + 1];
+//     int scale = points[3 * tx + 2];
+//     int ptr = xpos + (ypos + (scale + 1) * height) * pitch;
+//     float val = d_Data0[ptr];
+//     float *data1 = &d_Data0[ptr];
+//     float dxx = 2.0f * val - data1[-1] - data1[1];
+//     float dyy = 2.0f * val - data1[-pitch] - data1[pitch];
+//     float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]);
+//     float tra = dxx + dyy;
+//     float det = dxx * dyy - dxy * dxy;
+//     if (tra * tra < edgeLimit * det)
+//     {
+//       float edge = __fdividef(tra * tra, det);
+//       float dx = 0.5f * (data1[1] - data1[-1]);
+//       float dy = 0.5f * (data1[pitch] - data1[-pitch]);
+//       float *data0 = d_Data0 + ptr - height * pitch;
+//       float *data2 = d_Data0 + ptr + height * pitch;
+//       float ds = 0.5f * (data0[0] - data2[0]);
+//       float dss = 2.0f * val - data2[0] - data0[0];
+//       float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]);
+//       float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]);
+//       float idxx = dyy * dss - dys * dys;
+//       float idxy = dys * dxs - dxy * dss;
+//       float idxs = dxy * dys - dyy * dxs;
+//       float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs);
+//       float idyy = dxx * dss - dxs * dxs;
+//       float idys = dxy * dxs - dxx * dys;
+//       float idss = dxx * dyy - dxy * dxy;
+//       float pdx = idet * (idxx * dx + idxy * dy + idxs * ds);
+//       float pdy = idet * (idxy * dx + idyy * dy + idys * ds);
+//       float pds = idet * (idxs * dx + idys * dy + idss * ds);
+//       if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f)
+//       {
+//         pdx = __fdividef(dx, dxx);
+//         pdy = __fdividef(dy, dyy);
+//         pds = __fdividef(ds, dss);
+//       }
+//       float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds);
+//       int maxPts = d_MaxNumPoints;
+//       float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor);
+//       if (sc >= lowestScale)
+//       {
+//         unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff);
+//         idx = (idx >= maxPts ? maxPts - 1 : idx);
+//         d_Sift[idx].xpos = xpos + pdx;
+//         d_Sift[idx].ypos = ypos + pdy;
+//         d_Sift[idx].scale = sc;
+//         d_Sift[idx].sharpness = val + dval;
+//         d_Sift[idx].edgeness = edge;
+//         d_Sift[idx].subsampling = subsampling;
+//       }
+//     }
+//   }
+// }
+
+void LaplaceMultiTex(dpct::image_accessor_ext<float, 2> texObj, float *d_Result,
+                     int width, int pitch, int height, int octave,
+                     const sycl::nd_item<3> &item_ct1,
+                     float const *d_LaplaceKernel, float *data1, float *data2)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int xp = item_ct1.get_group(2) * LAPLACE_W + tx;
+  const int yp = item_ct1.get_group(1);
+  const int scale = item_ct1.get_local_id(1);
+  float *kernel =
+      const_cast<float *>(d_LaplaceKernel + octave * 12 * 16 + scale * 16);
+  float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+  float x = xp - 3.5;
+  float y = yp + 0.5;
+  sdata1[tx] = kernel[0] * texObj.read(x, y) +
+               kernel[1] * (texObj.read(x, y - 1.0) + texObj.read(x, y + 1.0)) +
+               kernel[2] * (texObj.read(x, y - 2.0) + texObj.read(x, y + 2.0)) +
+               kernel[3] * (texObj.read(x, y - 3.0) + texObj.read(x, y + 3.0)) +
+               kernel[4] * (texObj.read(x, y - 4.0) + texObj.read(x, y + 4.0));
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  float *sdata2 = data2 + LAPLACE_W * scale;
+  if (tx < LAPLACE_W)
+  {
+    sdata2[tx] = kernel[0] * sdata1[tx + 4] +
+                 kernel[1] * (sdata1[tx + 3] + sdata1[tx + 5]) +
+                 kernel[2] * (sdata1[tx + 2] + sdata1[tx + 6]) +
+                 kernel[3] * (sdata1[tx + 1] + sdata1[tx + 7]) +
+                 kernel[4] * (sdata1[tx + 0] + sdata1[tx + 8]);
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width)
+    d_Result[scale * height * pitch + yp * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W];
+}
+
+/*
+DPCT1110:46: The total declared local variable size in device function
+LaplaceMultiMem exceeds 128 bytes and may cause high register pressure. Consult
+with your hardware vendor to find the total register size available and adjust
+the code, or use smaller sub-group size to avoid high register pressure.
+*/
+void LaplaceMultiMem(float *d_Image, float *d_Result, int width, int pitch,
+                     int height, int octave, const sycl::nd_item<3> &item_ct1,
+                     float const *d_LaplaceKernel, float *buff)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int xp = item_ct1.get_group(2) * LAPLACE_W + tx;
+  const int yp = item_ct1.get_group(1);
+  float *data = d_Image + sycl::max(sycl::min(xp - LAPLACE_R, width - 1),
+                                    0); // multiply with 4 for max func
+  float temp[2 * LAPLACE_R + 1];
+
+  float kern[LAPLACE_S][LAPLACE_R + 1];
+  if (xp < (width + 2 * LAPLACE_R))
+  {
+    for (int i = 0; i <= 2 * LAPLACE_R; i++)
+      temp[i] =
+          data[sycl::max(0, sycl::min(yp + i - LAPLACE_R, height - 1)) * pitch];
+    for (int scale = 0; scale < LAPLACE_S; scale++)
+    {
+      float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+      float *kernel =
+          const_cast<float *>(d_LaplaceKernel + octave * 12 * 16 + scale * 16);
+      for (int i = 0; i <= LAPLACE_R; i++)
+      {
+        kern[scale][i] = kernel[i];
+      }
+      float sum = kern[scale][0] * temp[LAPLACE_R];
+#pragma unroll
+      for (int j = 1; j <= LAPLACE_R; j++)
+        sum += kern[scale][j] * (temp[LAPLACE_R - j] + temp[LAPLACE_R + j]);
+      buf[tx] = sum;
+    }
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  if (tx < LAPLACE_W && xp < (width + 2 * LAPLACE_R))
+  {
+    int scale = 0;
+    float oldRes = kern[scale][0] * buff[tx + LAPLACE_R];
+
+#pragma unroll
+    for (int j = 1; j <= LAPLACE_R; j++)
+      oldRes += kern[scale][j] * (buff[tx + LAPLACE_R - j] + buff[tx + LAPLACE_R + j]);
+
+    for (int scale = 1; scale < LAPLACE_S; scale++)
+    {
+      float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+
+      float res = kern[scale][0] * buf[tx + LAPLACE_R];
+
+#pragma unroll
+      for (int j = 1; j <= LAPLACE_R; j++)
+        res += kern[scale][j] * (buf[tx + LAPLACE_R - j] + buf[tx + LAPLACE_R + j]);
+
+      d_Result[(scale - 1) * height * pitch + yp * pitch + xp] = res - oldRes;
+      oldRes = res;
+    }
+  }
+}
+
+// __global__ void LaplaceMultiMemWide(float *d_Image, float *d_Result, int width, int pitch, int height, int octave)
+// {
+//   __shared__ float buff[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S];
+//   const int tx = threadIdx.x;
+//   const int xp = blockIdx.x * LAPLACE_W + tx;
+//   const int xp4 = blockIdx.x * LAPLACE_W + 4 * tx;
+//   const int yp = blockIdx.y;
+//   float kern[LAPLACE_S][LAPLACE_R + 1];
+//   float *data = d_Image + max(min(xp - 4, width - 1), 0);
+//   float temp[9];
+//   if (xp < (width + 2 * LAPLACE_R))
+//   {
+//     for (int i = 0; i < 4; i++)
+//       temp[i] = data[max(0, min(yp + i - 4, height - 1)) * pitch];
+//     for (int i = 4; i < 8 + 1; i++)
+//       temp[i] = data[min(yp + i - 4, height - 1) * pitch];
+//     for (int scale = 0; scale < LAPLACE_S; scale++)
+//     {
+//       float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+//       for (int i = 0; i <= LAPLACE_R; i++)
+//         kern[scale][i] = kernel[LAPLACE_R - i];
+//       float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+//       buf[tx] = kern[scale][4] * temp[4] +
+//                 kern[scale][3] * (temp[3] + temp[5]) + kern[scale][2] * (temp[2] + temp[6]) +
+//                 kern[scale][1] * (temp[1] + temp[7]) + kern[scale][0] * (temp[0] + temp[8]);
+//     }
+//   }
+//   __syncthreads();
+//   if (tx < LAPLACE_W / 4 && xp4 < width)
+//   {
+//     float4 b0 = reinterpret_cast<float4 *>(buff)[tx + 0];
+//     float4 b1 = reinterpret_cast<float4 *>(buff)[tx + 1];
+//     float4 b2 = reinterpret_cast<float4 *>(buff)[tx + 2];
+//     float4 old4, new4, dif4;
+//     old4.x = kern[0][4] * b1.x + kern[0][3] * (b0.w + b1.y) + kern[0][2] * (b0.z + b1.z) +
+//              kern[0][1] * (b0.y + b1.w) + kern[0][0] * (b0.x + b2.x);
+//     old4.y = kern[0][4] * b1.y + kern[0][3] * (b1.x + b1.z) + kern[0][2] * (b0.w + b1.w) +
+//              kern[0][1] * (b0.z + b2.x) + kern[0][0] * (b0.y + b2.y);
+//     old4.z = kern[0][4] * b1.z + kern[0][3] * (b1.y + b1.w) + kern[0][2] * (b1.x + b2.x) +
+//              kern[0][1] * (b0.w + b2.y) + kern[0][0] * (b0.z + b2.z);
+//     old4.w = kern[0][4] * b1.w + kern[0][3] * (b1.z + b2.x) + kern[0][2] * (b1.y + b2.y) +
+//              kern[0][1] * (b1.x + b2.z) + kern[0][0] * (b0.w + b2.w);
+//     for (int scale = 1; scale < LAPLACE_S; scale++)
+//     {
+//       float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+//       float4 b0 = reinterpret_cast<float4 *>(buf)[tx + 0];
+//       float4 b1 = reinterpret_cast<float4 *>(buf)[tx + 1];
+//       float4 b2 = reinterpret_cast<float4 *>(buf)[tx + 2];
+//       new4.x = kern[scale][4] * b1.x + kern[scale][3] * (b0.w + b1.y) +
+//                kern[scale][2] * (b0.z + b1.z) + kern[scale][1] * (b0.y + b1.w) +
+//                kern[scale][0] * (b0.x + b2.x);
+//       new4.y = kern[scale][4] * b1.y + kern[scale][3] * (b1.x + b1.z) +
+//                kern[scale][2] * (b0.w + b1.w) + kern[scale][1] * (b0.z + b2.x) +
+//                kern[scale][0] * (b0.y + b2.y);
+//       new4.z = kern[scale][4] * b1.z + kern[scale][3] * (b1.y + b1.w) +
+//                kern[scale][2] * (b1.x + b2.x) + kern[scale][1] * (b0.w + b2.y) +
+//                kern[scale][0] * (b0.z + b2.z);
+//       new4.w = kern[scale][4] * b1.w + kern[scale][3] * (b1.z + b2.x) +
+//                kern[scale][2] * (b1.y + b2.y) + kern[scale][1] * (b1.x + b2.z) +
+//                kern[scale][0] * (b0.w + b2.w);
+//       dif4.x = new4.x - old4.x;
+//       dif4.y = new4.y - old4.y;
+//       dif4.z = new4.z - old4.z;
+//       dif4.w = new4.w - old4.w;
+//       reinterpret_cast<float4 *>(&d_Result[(scale - 1) * height * pitch + yp * pitch + xp4])[0] = dif4;
+//       old4 = new4;
+//     }
+//   }
+// }
+
+// __global__ void LaplaceMultiMemTest(float *d_Image, float *d_Result, int width, int pitch, int height, int octave)
+// {
+//   __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S];
+//   __shared__ float data2[LAPLACE_W * LAPLACE_S];
+//   const int tx = threadIdx.x;
+//   const int xp = blockIdx.x * LAPLACE_W + tx;
+//   const int yp = LAPLACE_H * blockIdx.y;
+//   const int scale = threadIdx.y;
+//   float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+//   float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+//   float *data = d_Image + max(min(xp - 4, width - 1), 0);
+//   int h = height - 1;
+//   float temp[8 + LAPLACE_H], kern[LAPLACE_R + 1];
+//   for (int i = 0; i < 4; i++)
+//     temp[i] = data[max(0, min(yp + i - 4, h)) * pitch];
+//   for (int i = 4; i < 8 + LAPLACE_H; i++)
+//     temp[i] = data[min(yp + i - 4, h) * pitch];
+//   for (int i = 0; i <= LAPLACE_R; i++)
+//     kern[i] = kernel[LAPLACE_R - i];
+//   for (int j = 0; j < LAPLACE_H; j++)
+//   {
+//     sdata1[tx] = kern[4] * temp[4 + j] +
+//                  kern[3] * (temp[3 + j] + temp[5 + j]) + kern[2] * (temp[2 + j] + temp[6 + j]) +
+//                  kern[1] * (temp[1 + j] + temp[7 + j]) + kern[0] * (temp[0 + j] + temp[8 + j]);
+//     __syncthreads();
+//     float *sdata2 = data2 + LAPLACE_W * scale;
+//     if (tx < LAPLACE_W)
+//     {
+//       sdata2[tx] = kern[4] * sdata1[tx + 4] +
+//                    kern[3] * (sdata1[tx + 3] + sdata1[tx + 5]) + kern[2] * (sdata1[tx + 2] + sdata1[tx + 6]) +
+//                    kern[1] * (sdata1[tx + 1] + sdata1[tx + 7]) + kern[0] * (sdata1[tx + 0] + sdata1[tx + 8]);
+//     }
+//     __syncthreads();
+//     if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width && (yp + j) < height)
+//       d_Result[scale * height * pitch + (yp + j) * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W];
+//   }
+// }
+
+// __global__ void LaplaceMultiMemOld(float *d_Image, float *d_Result, int width, int pitch, int height, int octave)
+// {
+//   __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S];
+//   __shared__ float data2[LAPLACE_W * LAPLACE_S];
+//   const int tx = threadIdx.x;
+//   const int xp = blockIdx.x * LAPLACE_W + tx;
+//   const int yp = blockIdx.y;
+//   const int scale = threadIdx.y;
+//   float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+//   float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+//   float *data = d_Image + max(min(xp - 4, width - 1), 0);
+//   int h = height - 1;
+//   sdata1[tx] = kernel[0] * data[min(yp, h) * pitch] +
+//                kernel[1] * (data[max(0, min(yp - 1, h)) * pitch] + data[min(yp + 1, h) * pitch]) +
+//                kernel[2] * (data[max(0, min(yp - 2, h)) * pitch] + data[min(yp + 2, h) * pitch]) +
+//                kernel[3] * (data[max(0, min(yp - 3, h)) * pitch] + data[min(yp + 3, h) * pitch]) +
+//                kernel[4] * (data[max(0, min(yp - 4, h)) * pitch] + data[min(yp + 4, h) * pitch]);
+//   __syncthreads();
+//   float *sdata2 = data2 + LAPLACE_W * scale;
+//   if (tx < LAPLACE_W)
+//   {
+//     sdata2[tx] = kernel[0] * sdata1[tx + 4] +
+//                  kernel[1] * (sdata1[tx + 3] + sdata1[tx + 5]) +
+//                  kernel[2] * (sdata1[tx + 2] + sdata1[tx + 6]) +
+//                  kernel[3] * (sdata1[tx + 1] + sdata1[tx + 7]) +
+//                  kernel[4] * (sdata1[tx + 0] + sdata1[tx + 8]);
+//   }
+//   __syncthreads();
+//   if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width)
+//     d_Result[scale * height * pitch + yp * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W];
+// }
+
+void LowPass(float *d_Image, float *d_Result, int width, int pitch, int height,
+             const sycl::nd_item<3> &item_ct1, float const *d_LowPassKernel,
+             float *buffer)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int xp = item_ct1.get_group(2) * LOWPASS_W + tx;
+  const int yp = item_ct1.get_group(1) * LOWPASS_H + ty;
+  float *kernel = const_cast<float *>(d_LowPassKernel);
+  float *data = d_Image + sycl::max(sycl::min(xp - 4, width - 1), 0);
+  float *buff = buffer + ty * (LOWPASS_W + 2 * LOWPASS_R);
+  int h = height - 1;
+  if (yp < height)
+    buff[tx] = kernel[4] * data[sycl::min(yp, h) * pitch] +
+               kernel[3] * (data[sycl::max(0, sycl::min(yp - 1, h)) * pitch] +
+                            data[sycl::min(yp + 1, h) * pitch]) +
+               kernel[2] * (data[sycl::max(0, sycl::min(yp - 2, h)) * pitch] +
+                            data[sycl::min(yp + 2, h) * pitch]) +
+               kernel[1] * (data[sycl::max(0, sycl::min(yp - 3, h)) * pitch] +
+                            data[sycl::min(yp + 3, h) * pitch]) +
+               kernel[0] * (data[sycl::max(0, sycl::min(yp - 4, h)) * pitch] +
+                            data[sycl::min(yp + 4, h) * pitch]);
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  if (tx < LOWPASS_W && xp < width && yp < height)
+    d_Result[yp * pitch + xp] = kernel[4] * buff[tx + 4] +
+                                kernel[3] * (buff[tx + 3] + buff[tx + 5]) + kernel[2] * (buff[tx + 2] + buff[tx + 6]) +
+                                kernel[1] * (buff[tx + 1] + buff[tx + 7]) + kernel[0] * (buff[tx + 0] + buff[tx + 8]);
+}
+
+void LowPassBlockOld(float *d_Image, float *d_Result, int width, int pitch, int height,
+                     const sycl::nd_item<3> &item_ct1,
+                     float const *d_LowPassKernel,
+                     sycl::local_accessor<float, 2> xrows)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int xp = item_ct1.get_group(2) * LOWPASS_W + tx;
+  const int yp = item_ct1.get_group(1) * LOWPASS_H + ty;
+  const int N = 16;
+  float *k = const_cast<float *>(d_LowPassKernel);
+  int xl = sycl::max(sycl::min(xp - 4, width - 1), 0);
+  for (int l = -8; l <= LOWPASS_H; l += 4)
+  {
+    if (l < LOWPASS_H)
+    {
+      int yl = sycl::max(sycl::min(yp + l + 4, height - 1), 0);
+      float val = d_Image[yl * pitch + xl];
+      xrows[(l + 8 + ty) % N][tx] =
+          k[4] * ShiftDown(val, 4, item_ct1) +
+          k[3] * (ShiftDown(val, 5, item_ct1) + ShiftDown(val, 3, item_ct1)) +
+          k[2] * (ShiftDown(val, 6, item_ct1) + ShiftDown(val, 2, item_ct1)) +
+          k[1] * (ShiftDown(val, 7, item_ct1) + ShiftDown(val, 1, item_ct1)) +
+          k[0] * (ShiftDown(val, 8, item_ct1) + val);
+    }
+    if (l >= 4)
+    {
+      int ys = yp + l - 4;
+      if (xp < width && ys < height && tx < LOWPASS_W)
+        d_Result[ys * pitch + xp] = k[4] * xrows[(l + 0 + ty) % N][tx] +
+                                    k[3] * (xrows[(l - 1 + ty) % N][tx] + xrows[(l + 1 + ty) % N][tx]) +
+                                    k[2] * (xrows[(l - 2 + ty) % N][tx] + xrows[(l + 2 + ty) % N][tx]) +
+                                    k[1] * (xrows[(l - 3 + ty) % N][tx] + xrows[(l + 3 + ty) % N][tx]) +
+                                    k[0] * (xrows[(l - 4 + ty) % N][tx] + xrows[(l + 4 + ty) % N][tx]);
+    }
+    if (l >= 0)
+      /*
+      DPCT1118:47: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+  }
+}
+
+void LowPassBlock(float *d_Image, float *d_Result, int width, int pitch, int height,
+                  const sycl::nd_item<3> &item_ct1, float const *d_LowPassKernel,
+                  sycl::local_accessor<float, 2> xrows)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int xp = item_ct1.get_group(2) * LOWPASS_W + tx;
+  const int yp = item_ct1.get_group(1) * LOWPASS_H + ty;
+  const int N = 16;
+  float *k = const_cast<float *>(d_LowPassKernel);
+  int xl = sycl::max(sycl::min(xp - 4, width - 1), 0);
+#pragma unroll
+  for (int l = -8; l < 4; l += 4)
+  {
+    int ly = l + ty;
+    int yl = sycl::max(sycl::min(yp + l + 4, height - 1), 0);
+    float val = d_Image[yl * pitch + xl]; // d_Image[yl*pitch + xl].x
+    val = k[4] * ShiftDown(val, 4, item_ct1) +
+          k[3] * (ShiftDown(val, 5, item_ct1) + ShiftDown(val, 3, item_ct1)) +
+          k[2] * (ShiftDown(val, 6, item_ct1) + ShiftDown(val, 2, item_ct1)) +
+          k[1] * (ShiftDown(val, 7, item_ct1) + ShiftDown(val, 1, item_ct1)) +
+          k[0] * (ShiftDown(val, 8, item_ct1) + val);
+    xrows[ly + 8][tx] = val;
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+#pragma unroll
+  for (int l = 4; l < LOWPASS_H; l += 4)
+  {
+    int ly = l + ty;
+    int yl = sycl::min(yp + l + 4, height - 1);
+    float val = d_Image[yl * pitch + xl];
+    val = k[4] * ShiftDown(val, 4, item_ct1) +
+          k[3] * (ShiftDown(val, 5, item_ct1) + ShiftDown(val, 3, item_ct1)) +
+          k[2] * (ShiftDown(val, 6, item_ct1) + ShiftDown(val, 2, item_ct1)) +
+          k[1] * (ShiftDown(val, 7, item_ct1) + ShiftDown(val, 1, item_ct1)) +
+          k[0] * (ShiftDown(val, 8, item_ct1) + val);
+    xrows[(ly + 8) % N][tx] = val;
+    int ys = yp + l - 4;
+    if (xp < width && ys < height && tx < LOWPASS_W)
+      d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] +
+                                  k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) +
+                                  k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) +
+                                  k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) +
+                                  k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]);
+    /*
+    DPCT1118:48: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+  }
+  int ly = LOWPASS_H + ty;
+  int ys = yp + LOWPASS_H - 4;
+  if (xp < width && ys < height && tx < LOWPASS_W)
+    d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] +
+                                k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) +
+                                k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) +
+                                k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) +
+                                k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]);
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftD.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftD.h
new file mode 100644
index 000000000..52fd52aa4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftD.h
@@ -0,0 +1,80 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#ifndef CUDASIFTD_H
+#define CUDASIFTD_H
+
+#define NUM_SCALES 5
+
+// Scale down thread block width
+#define SCALEDOWN_W 64 // 60
+
+// Scale down thread block height
+#define SCALEDOWN_H 16 // 8
+
+// Scale up thread block width
+#define SCALEUP_W 64
+
+// Scale up thread block height
+#define SCALEUP_H 8
+
+// Find point thread block width
+#define MINMAX_W 30 // 32
+
+// Find point thread block height
+#define MINMAX_H 8 // 16
+
+// Laplace thread block width
+#define LAPLACE_W 128 // 56
+
+// Laplace rows per thread
+#define LAPLACE_H 4
+
+// Number of laplace scales
+#define LAPLACE_S (NUM_SCALES + 3)
+
+// Laplace filter kernel radius
+#define LAPLACE_R 4
+
+#define LOWPASS_W 24 // 56
+#define LOWPASS_H 32 // 16
+#define LOWPASS_R 4
+
+//====================== Number of threads ====================//
+// ScaleDown:               SCALEDOWN_W + 4
+// LaplaceMulti:            (LAPLACE_W+2*LAPLACE_R)*LAPLACE_S
+// FindPointsMulti:         MINMAX_W + 2
+// ComputeOrientations:     128
+// ExtractSiftDescriptors:  256
+
+//====================== Number of blocks ====================//
+// ScaleDown:               (width/SCALEDOWN_W) * (height/SCALEDOWN_H)
+// LaplceMulti:             (width+2*LAPLACE_R)/LAPLACE_W * height
+// FindPointsMulti:         (width/MINMAX_W)*NUM_SCALES * (height/MINMAX_H)
+// ComputeOrientations:     numpts
+// ExtractSiftDescriptors:  numpts
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.dp.cpp
new file mode 100644
index 000000000..a07783ffc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.dp.cpp
@@ -0,0 +1,878 @@
+//********************************************************//
+// CUDA SIFT extractor by Mårten Björkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <sycl/sycl.hpp>
+#include <dpct/dpct.hpp>
+#include <cstdio>
+#include <cstring>
+#include <cmath>
+#include <iostream>
+#include <algorithm>
+#include <chrono>
+
+#include "cudautils.h"
+#include "cudaImage.h"
+#include "cudaSift.h"
+#include "cudaSiftD.h"
+#include "cudaSiftH.h"
+
+#include "cudaSiftD.dp.cpp"
+
+void InitCuda(int devNum)
+{
+  int nDevices;
+  safeCall(
+      DPCT_CHECK_ERROR(nDevices = dpct::dev_mgr::instance().device_count()));
+  if (!nDevices)
+  {
+    std::cerr << "No CUDA devices available" << std::endl;
+    return;
+  }
+  devNum = std::min(nDevices - 1, devNum);
+  deviceInit(devNum);
+  dpct::device_info prop;
+  safeCall(DPCT_CHECK_ERROR(dpct::get_device_info(
+      prop, dpct::dev_mgr::instance().get_device(devNum))));
+  printf("Device Number: %d\n", devNum);
+  printf("  Device name: %s\n", prop.get_name());
+  printf("  Memory Clock Rate (MHz): %d\n",
+         prop.get_memory_clock_rate() / 1000);
+  printf("  Clock Freq (MHz): %d\n", prop.get_max_clock_frequency() / 1000);
+  printf("  Memory Bus Width (bits): %d\n", prop.get_memory_bus_width());
+  printf("  Peak Memory Bandwidth (GB/s): %.1f\n\n",
+         2.0 * prop.get_memory_clock_rate() *
+             (prop.get_memory_bus_width() / 8) / 1.0e6);
+}
+
+float *AllocSiftTempMemory(int width, int height, int numOctaves, float &time, bool scaleUp)
+{
+  const int nd = NUM_SCALES + 3;
+  int w = width * (scaleUp ? 2 : 1);
+  int h = height * (scaleUp ? 2 : 1);
+  int p = iAlignUp(w, 128);
+  int size = h * p;         // image sizes
+  int sizeTmp = nd * h * p; // laplace buffer sizes
+  for (int i = 0; i < numOctaves; i++)
+  {
+    w /= 2;
+    h /= 2;
+    int p = iAlignUp(w, 128);
+    size += h * p;
+    sizeTmp += nd * h * p;
+  }
+  float *memoryTmp = NULL;
+  size_t pitch;
+  size += sizeTmp;
+
+#ifdef DEVICE_TIMER
+  auto start_malloc = std::chrono::steady_clock::now();
+#endif
+  safeCall(DPCT_CHECK_ERROR(
+      memoryTmp = (float *)dpct::dpct_malloc(
+          pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float))));
+  safeCall(
+      DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+  auto stop_malloc = std::chrono::steady_clock::now();
+  time += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+  return memoryTmp;
+}
+
+void FreeSiftTempMemory(float *memoryTmp)
+{
+  if (memoryTmp)
+    safeCall(
+        DPCT_CHECK_ERROR(sycl::free(memoryTmp, dpct::get_in_order_queue())));
+}
+
+void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur,
+                 float thresh, float &totTime, float lowestScale, bool scaleUp, float *tempMemory)
+{
+  unsigned int *d_PointCounterAddr;
+#ifdef DEVICE_TIMER
+  auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+  safeCall(DPCT_CHECK_ERROR(*((void **)&d_PointCounterAddr) =
+                                d_PointCounter.get_ptr()));
+  safeCall(DPCT_CHECK_ERROR(
+      dpct::get_in_order_queue()
+          .memset(d_PointCounterAddr, 0, (8 * 2 + 1) * sizeof(int))
+          .wait()));
+  safeCall(DPCT_CHECK_ERROR(
+      dpct::get_in_order_queue()
+          .memcpy(d_MaxNumPoints.get_ptr(), &siftData.maxPts, sizeof(int))
+          .wait()));
+  safeCall(
+      DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+  auto stop_memcpy = std::chrono::steady_clock::now();
+  totTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+  const int nd = NUM_SCALES + 3;
+  int w = img.width * (scaleUp ? 2 : 1);
+  int h = img.height * (scaleUp ? 2 : 1);
+  int p = iAlignUp(w, 128);
+  int width = w, height = h;
+  int size = h * p;         // image sizes
+  int sizeTmp = nd * h * p; // laplace buffer sizes
+  for (int i = 0; i < numOctaves; i++)
+  {
+    w /= 2;
+    h /= 2;
+    int p = iAlignUp(w, 128);
+    size += h * p;
+    sizeTmp += nd * h * p;
+  }
+  float *memoryTmp = tempMemory;
+  size += sizeTmp;
+  if (!tempMemory)
+  {
+    size_t pitch;
+#ifdef DEVICE_TIMER
+    auto start_malloc = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(
+        memoryTmp = (float *)dpct::dpct_malloc(
+            pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float))));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_malloc = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+  }
+  float *memorySub = memoryTmp + sizeTmp;
+
+  CudaImage lowImg;
+  lowImg.Allocate(width, height, iAlignUp(width, 128), false, totTime, memorySub);
+  if (!scaleUp)
+  {
+    float kernel[8 * 12 * 16];
+    PrepareLaplaceKernels(numOctaves, 0.0f, kernel);
+#ifdef DEVICE_TIMER
+    auto start_memcpy1 = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue()
+                                  .memcpy(d_LaplaceKernel.get_ptr(), kernel,
+                                          8 * 12 * 16 * sizeof(float))
+                                  .wait()));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_memcpy1 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy1 - start_memcpy1).count();
+#endif
+    LowPass(lowImg, img, fmax(initBlur, 0.001f), totTime);
+    ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale, 1.0f, memoryTmp,
+                    memorySub + height * iAlignUp(width, 128), totTime);
+#ifdef DEVICE_TIMER
+    auto start_memcpy2 = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue()
+                                  .memcpy(&siftData.numPts,
+                                          &d_PointCounterAddr[2 * numOctaves],
+                                          sizeof(int))
+                                  .wait()));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_memcpy2 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy2 - start_memcpy2).count();
+#endif
+    siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts);
+  }
+  else
+  {
+    CudaImage upImg;
+    upImg.Allocate(width, height, iAlignUp(width, 128), false, totTime, memoryTmp);
+    ScaleUp(upImg, img, totTime);
+    LowPass(lowImg, upImg, dpct::max(initBlur, 0.001f), totTime);
+    float kernel[8 * 12 * 16];
+    PrepareLaplaceKernels(numOctaves, 0.0f, kernel);
+#ifdef DEVICE_TIMER
+    auto start_memcpy3 = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue()
+                                  .memcpy(d_LaplaceKernel.get_ptr(), kernel,
+                                          8 * 12 * 16 * sizeof(float))
+                                  .wait()));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_memcpy3 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy3 - start_memcpy3).count();
+#endif
+    ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale * 2.0f, 1.0f, memoryTmp,
+                    memorySub + height * iAlignUp(width, 128), totTime);
+#ifdef DEVICE_TIMER
+    auto start_memcpy4 = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue()
+                                  .memcpy(&siftData.numPts,
+                                          &d_PointCounterAddr[2 * numOctaves],
+                                          sizeof(int))
+                                  .wait()));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_memcpy4 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy4 - start_memcpy4).count();
+#endif
+    siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts);
+    RescalePositions(siftData, 0.5f, totTime);
+  }
+
+  if (!tempMemory)
+    safeCall(
+        DPCT_CHECK_ERROR(sycl::free(memoryTmp, dpct::get_in_order_queue())));
+  if (siftData.h_data)
+  {
+#ifdef DEVICE_TIMER
+    auto start_memcpy5 = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue()
+                                  .memcpy(siftData.h_data, siftData.d_data,
+                                          sizeof(SiftPoint) * siftData.numPts)
+                                  .wait()));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_memcpy5 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy5 - start_memcpy5).count();
+    printf("Total time for sift extraction =  %.2f us\n\n", totTime);
+#endif
+  }
+  printf("Number of Points after sift extraction =  %d\n\n", siftData.numPts);
+}
+
+int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, float lowestScale,
+                    float subsampling, float *memoryTmp, float *memorySub, float &totTime)
+{
+  int w = img.width;
+  int h = img.height;
+  if (numOctaves > 1)
+  {
+    CudaImage subImg;
+    int p = iAlignUp(w / 2, 128);
+    subImg.Allocate(w / 2, h / 2, p, false, totTime, memorySub);
+    ScaleDown(subImg, img, 0.5f, totTime);
+    float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f;
+    ExtractSiftLoop(siftData, subImg, numOctaves - 1, totInitBlur, thresh, lowestScale, subsampling * 2.0f,
+                    memoryTmp, memorySub + (h / 2) * p, totTime);
+  }
+  ExtractSiftOctave(siftData, img, numOctaves, thresh, lowestScale, subsampling, memoryTmp, totTime);
+  return 0;
+}
+
+void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh,
+                       float lowestScale, float subsampling, float *memoryTmp, float &totTime)
+{
+  const int nd = NUM_SCALES + 3;
+  CudaImage diffImg[nd];
+  int w = img.width;
+  int h = img.height;
+  int p = iAlignUp(w, 128);
+  for (int i = 0; i < nd - 1; i++)
+    diffImg[i].Allocate(w, h, p, false, totTime, memoryTmp + i * p * h);
+
+  float baseBlur = pow(2.0f, -1.0f / NUM_SCALES);
+  float diffScale = pow(2.0f, 1.0f / NUM_SCALES);
+  LaplaceMulti(img, diffImg, octave, totTime);
+  FindPointsMulti(diffImg, siftData, thresh, 10.0f, 1.0f / NUM_SCALES, lowestScale / subsampling, subsampling, octave, totTime);
+  ComputeOrientations(img, siftData, octave, totTime);
+  ExtractSiftDescriptors(img.d_data, img.pitch, siftData, subsampling, octave, totTime);
+}
+
+void InitSiftData(SiftData &data, float &time, int num, bool host, bool dev)
+{
+  data.numPts = 0;
+  data.maxPts = num;
+  int sz = sizeof(SiftPoint) * num;
+  data.h_data = NULL;
+  if (host)
+    data.h_data = (SiftPoint *)malloc(sz);
+  data.d_data = NULL;
+  if (dev)
+  {
+#ifdef DEVICE_TIMER
+    auto start_malloc = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(data.d_data = (SiftPoint *)sycl::malloc_device(
+                                  sz, dpct::get_in_order_queue())));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_malloc = std::chrono::steady_clock::now();
+    time += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+  }
+}
+
+void FreeSiftData(SiftData &data)
+{
+  if (data.d_data != NULL)
+    safeCall(
+        DPCT_CHECK_ERROR(sycl::free(data.d_data, dpct::get_in_order_queue())));
+  data.d_data = NULL;
+  if (data.h_data != NULL)
+    free(data.h_data);
+  data.numPts = 0;
+  data.maxPts = 0;
+}
+
+void PrintSiftData(SiftData &data)
+{
+  SiftPoint *h_data = data.h_data;
+  if (data.h_data == NULL)
+  {
+    h_data = (SiftPoint *)malloc(sizeof(SiftPoint) * data.maxPts);
+    safeCall(DPCT_CHECK_ERROR(
+        dpct::get_in_order_queue()
+            .memcpy(h_data, data.d_data, sizeof(SiftPoint) * data.numPts)
+            .wait()));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+    data.h_data = h_data;
+  }
+  for (int i = 0; i < data.numPts; i++)
+  {
+    printf("xpos         = %.2f\n", h_data[i].xpos);
+    printf("ypos         = %.2f\n", h_data[i].ypos);
+    printf("scale        = %.2f\n", h_data[i].scale);
+    printf("sharpness    = %.2f\n", h_data[i].sharpness);
+    printf("edgeness     = %.2f\n", h_data[i].edgeness);
+    printf("orientation  = %.2f\n", h_data[i].orientation);
+    printf("score        = %.2f\n", h_data[i].score);
+    float *siftData = (float *)&h_data[i].data;
+    for (int j = 0; j < 8; j++)
+    {
+      if (j == 0)
+        printf("data = ");
+      else
+        printf("       ");
+      for (int k = 0; k < 16; k++)
+        if (siftData[j + 8 * k] < 0.05)
+          printf(" .   ");
+        else
+          printf("%.2f ", siftData[j + 8 * k]);
+      printf("\n");
+    }
+  }
+  printf("Number of available points: %d\n", data.numPts);
+  printf("Number of allocated points: %d\n", data.maxPts);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Host side master functions
+///////////////////////////////////////////////////////////////////////////////
+
+double ScaleDown(CudaImage &res, CudaImage &src, float variance, float &totTime)
+{
+  static float oldVariance = -1.0f;
+  if (res.d_data == NULL || src.d_data == NULL)
+  {
+    printf("ScaleDown: missing data\n");
+    return 0.0;
+  }
+  if (oldVariance != variance)
+  {
+    float h_Kernel[5];
+    float kernelSum = 0.0f;
+    for (int j = 0; j < 5; j++)
+    {
+      h_Kernel[j] = (float)expf(-(double)(j - 2) * (j - 2) / 2.0 / variance);
+      kernelSum += h_Kernel[j];
+    }
+    for (int j = 0; j < 5; j++)
+      h_Kernel[j] /= kernelSum;
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(
+        dpct::get_in_order_queue()
+            .memcpy(d_ScaleDownKernel.get_ptr(), h_Kernel, 5 * sizeof(float))
+            .wait()));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+    oldVariance = variance;
+  }
+#if 0
+  dim3 blocks(iDivUp(src.width, SCALEDOWN_W), iDivUp(src.height, SCALEDOWN_H));
+  dim3 threads(SCALEDOWN_W + 4, SCALEDOWN_H + 4);
+  ScaleDownDenseShift<<<blocks, threads>>>(res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch);
+#else
+  sycl::range<3> blocks(1, iDivUp(src.height, SCALEDOWN_H),
+                        iDivUp(src.width, SCALEDOWN_W));
+  sycl::range<3> threads(1, 1, SCALEDOWN_W + 4);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  dpct::get_in_order_queue().submit([&](sycl::handler &cgh) {
+    d_ScaleDownKernel.init();
+
+    auto d_ScaleDownKernel_ptr_ct1 = d_ScaleDownKernel.get_ptr();
+
+    /*
+    DPCT1101:214: 'SCALEDOWN_W + 4' expression was replaced with a value.
+    Modify the code to use the original expression, provided in comments, if
+    it is correct.
+    */
+    sycl::local_accessor<float, 1> inrow_acc_ct1(
+        sycl::range<1>(68 /*SCALEDOWN_W + 4*/), cgh);
+    /*
+    DPCT1101:215: '5 * (SCALEDOWN_W / 2)' expression was replaced with a
+    value. Modify the code to use the original expression, provided in
+    comments, if it is correct.
+    */
+    sycl::local_accessor<float, 1> brow_acc_ct1(
+        sycl::range<1>(160 /*5 * (SCALEDOWN_W / 2)*/), cgh);
+    /*
+    DPCT1101:216: 'SCALEDOWN_H + 4' expression was replaced with a value.
+    Modify the code to use the original expression, provided in comments, if
+    it is correct.
+    */
+    sycl::local_accessor<int, 1> yRead_acc_ct1(
+        sycl::range<1>(20 /*SCALEDOWN_H + 4*/), cgh);
+    /*
+    DPCT1101:217: 'SCALEDOWN_H + 4' expression was replaced with a value.
+    Modify the code to use the original expression, provided in comments, if
+    it is correct.
+    */
+    sycl::local_accessor<int, 1> yWrite_acc_ct1(
+        sycl::range<1>(20 /*SCALEDOWN_H + 4*/), cgh);
+
+    float *res_d_data_ct0 = res.d_data;
+    float *src_d_data_ct1 = src.d_data;
+    int src_width_ct2 = src.width;
+    int src_pitch_ct3 = src.pitch;
+    int src_height_ct4 = src.height;
+    int res_pitch_ct5 = res.pitch;
+
+    cgh.parallel_for(
+        sycl::nd_range<3>(blocks * threads, threads),
+        [=](sycl::nd_item<3> item_ct1) {
+          ScaleDown(res_d_data_ct0, src_d_data_ct1, src_width_ct2,
+                    src_pitch_ct3, src_height_ct4, res_pitch_ct5, item_ct1,
+                    d_ScaleDownKernel_ptr_ct1, inrow_acc_ct1.get_pointer(),
+                    brow_acc_ct1.get_pointer(), yRead_acc_ct1.get_pointer(),
+                    yWrite_acc_ct1.get_pointer());
+        });
+  });
+  safeCall(
+      DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ScaleDown time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+#endif
+  checkMsg("ScaleDown() execution failed\n");
+  return 0.0;
+}
+
+double ScaleUp(CudaImage &res, CudaImage &src, float &totTime)
+{
+  if (res.d_data == NULL || src.d_data == NULL)
+  {
+    printf("ScaleUp: missing data\n");
+    return 0.0;
+  }
+  sycl::range<3> blocks(1, iDivUp(res.height, SCALEUP_H),
+                        iDivUp(res.width, SCALEUP_W));
+  sycl::range<3> threads(1, SCALEUP_H / 2, SCALEUP_W / 2);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  dpct::get_in_order_queue().submit([&](sycl::handler &cgh) {
+    float *res_d_data_ct0 = res.d_data;
+    float *src_d_data_ct1 = src.d_data;
+    int src_width_ct2 = src.width;
+    int src_pitch_ct3 = src.pitch;
+    int src_height_ct4 = src.height;
+    int res_pitch_ct5 = res.pitch;
+
+    cgh.parallel_for(sycl::nd_range<3>(blocks * threads, threads),
+                     [=](sycl::nd_item<3> item_ct1) {
+                       ScaleUp(res_d_data_ct0, src_d_data_ct1, src_width_ct2,
+                               src_pitch_ct3, src_height_ct4, res_pitch_ct5,
+                               item_ct1);
+                     });
+  });
+  safeCall(
+      DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ScaleUp time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("ScaleUp() execution failed\n");
+  return 0.0;
+}
+
+double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, float &totTime)
+{
+  sycl::range<3> blocks(1, 1, 512);
+  sycl::range<3> threads(1, 1, 256);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  dpct::get_in_order_queue().submit([&](sycl::handler &cgh) {
+    d_MaxNumPoints.init();
+    d_PointCounter.init();
+
+    auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr();
+    auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr();
+
+    /*
+    DPCT1101:218: 'WID' expression was replaced with a value. Modify the code
+    to use the original expression, provided in comments, if it is correct.
+    */
+    /*
+    DPCT1101:219: 'WID' expression was replaced with a value. Modify the code
+    to use the original expression, provided in comments, if it is correct.
+    */
+    sycl::local_accessor<float, 2> img_acc_ct1(
+        sycl::range<2>(19 /*WID*/, 19 /*WID*/), cgh);
+    /*
+    DPCT1101:220: 'WID' expression was replaced with a value. Modify the code
+    to use the original expression, provided in comments, if it is correct.
+    */
+    /*
+    DPCT1101:221: 'WID' expression was replaced with a value. Modify the code
+    to use the original expression, provided in comments, if it is correct.
+    */
+    sycl::local_accessor<float, 2> tmp_acc_ct1(
+        sycl::range<2>(19 /*WID*/, 19 /*WID*/), cgh);
+    /*
+    DPCT1101:222: '2 * LEN' expression was replaced with a value. Modify the
+    code to use the original expression, provided in comments, if it is
+    correct.
+    */
+    sycl::local_accessor<float, 1> hist_acc_ct1(sycl::range<1>(64 /*2 * LEN*/),
+                                                cgh);
+    /*
+    DPCT1101:223: 'WID' expression was replaced with a value. Modify the code
+    to use the original expression, provided in comments, if it is correct.
+    */
+    sycl::local_accessor<float, 1> gaussx_acc_ct1(sycl::range<1>(19 /*WID*/),
+                                                  cgh);
+    /*
+    DPCT1101:224: 'WID' expression was replaced with a value. Modify the code
+    to use the original expression, provided in comments, if it is correct.
+    */
+    sycl::local_accessor<float, 1> gaussy_acc_ct1(sycl::range<1>(19 /*WID*/),
+                                                  cgh);
+
+    float *src_d_data_ct0 = src.d_data;
+    int src_width_ct1 = src.width;
+    int src_pitch_ct2 = src.pitch;
+    int src_height_ct3 = src.height;
+
+    cgh.parallel_for(sycl::nd_range<3>(blocks * threads, threads),
+                     [=](sycl::nd_item<3> item_ct1) {
+                       ComputeOrientationsCONSTNew(
+                           src_d_data_ct0, src_width_ct1, src_pitch_ct2,
+                           src_height_ct3, siftData.d_data, octave, item_ct1,
+                           *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1,
+                           img_acc_ct1, tmp_acc_ct1, hist_acc_ct1.get_pointer(),
+                           gaussx_acc_ct1.get_pointer(),
+                           gaussy_acc_ct1.get_pointer());
+                     });
+  });
+  safeCall(
+      DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ComputeOrientationsCONSTNew time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel)
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("ComputeOrientations() execution failed\n");
+  return 0.0;
+}
+
+double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, float &totTime)
+{
+  sycl::range<3> blocks(1, 1, 512);
+  sycl::range<3> threads(1, 8, 16);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  dpct::get_in_order_queue().submit([&](sycl::handler &cgh) {
+    d_MaxNumPoints.init();
+    d_PointCounter.init();
+
+    auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr();
+    auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr();
+
+    sycl::local_accessor<float, 1> gauss_acc_ct1(sycl::range<1>(16), cgh);
+    sycl::local_accessor<float, 1> buffer_acc_ct1(sycl::range<1>(128), cgh);
+    sycl::local_accessor<float, 1> sums_acc_ct1(sycl::range<1>(4), cgh);
+
+    cgh.parallel_for(
+        sycl::nd_range<3>(blocks * threads, threads),
+        [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] {
+          ExtractSiftDescriptorsCONSTNew(
+              texObj, pitch, siftData.d_data, subsampling, octave, item_ct1,
+              *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1,
+              gauss_acc_ct1.get_pointer(), buffer_acc_ct1.get_pointer(),
+              sums_acc_ct1.get_pointer());
+        });
+  });
+  safeCall(
+      DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ExtractSiftDescriptorsCONSTNew time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("ExtractSiftDescriptors() execution failed\n");
+  return 0.0;
+}
+double RescalePositions(SiftData &siftData, float scale, float &totTime)
+{
+  sycl::range<3> blocks(1, 1, iDivUp(siftData.numPts, 64));
+  sycl::range<3> threads(1, 1, 64);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  dpct::get_in_order_queue().parallel_for(
+      sycl::nd_range<3>(blocks * threads, threads),
+      [=](sycl::nd_item<3> item_ct1) {
+        RescalePositions(siftData.d_data, siftData.numPts, scale, item_ct1);
+      });
+  safeCall(
+      DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("RescalePositions time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("RescapePositions() execution failed\n");
+  return 0.0;
+}
+
+double LowPass(CudaImage &res, CudaImage &src, float scale, float &totTime)
+{
+  float kernel[2 * LOWPASS_R + 1];
+  static float oldScale = -1.0f;
+  if (scale != oldScale)
+  {
+    float kernelSum = 0.0f;
+    float ivar2 = 1.0f / (2.0f * scale * scale);
+    for (int j = -LOWPASS_R; j <= LOWPASS_R; j++)
+    {
+      kernel[j + LOWPASS_R] = (float)expf(-(double)j * j * ivar2);
+      kernelSum += kernel[j + LOWPASS_R];
+    }
+    for (int j = -LOWPASS_R; j <= LOWPASS_R; j++)
+      kernel[j + LOWPASS_R] /= kernelSum;
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue()
+                                  .memcpy(d_LowPassKernel.get_ptr(), kernel,
+                                          (2 * LOWPASS_R + 1) * sizeof(float))
+                                  .wait()));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+    oldScale = scale;
+  }
+  int width = res.width;
+  int pitch = res.pitch;
+  int height = res.height;
+  sycl::range<3> blocks(1, iDivUp(height, LOWPASS_H),
+                        iDivUp(width, LOWPASS_W)); //[80,34,1]
+
+  sycl::range<3> threads(1, 4, LOWPASS_W + 2 * LOWPASS_R); //[32,4,1]
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  dpct::get_in_order_queue().submit([&](sycl::handler &cgh) {
+    d_LowPassKernel.init();
+
+    auto d_LowPassKernel_ptr_ct1 = d_LowPassKernel.get_ptr();
+
+    sycl::local_accessor<float, 2> xrows_acc_ct1(sycl::range<2>(16, 32), cgh);
+
+    float *src_d_data_ct0 = src.d_data;
+    float *res_d_data_ct1 = res.d_data;
+
+    cgh.parallel_for(
+        sycl::nd_range<3>(blocks * threads, threads),
+        [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] {
+          LowPassBlockOld(src_d_data_ct0, res_d_data_ct1, width, pitch, height,
+                          item_ct1, d_LowPassKernel_ptr_ct1, xrows_acc_ct1);
+        });
+  });
+  safeCall(
+      DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("LowPassBlock time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("LowPass() execution failed\n");
+  return 0.0;
+}
+
+//==================== Multi-scale functions ===================//
+
+void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel)
+{
+  if (numOctaves > 1)
+  {
+    float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f;
+    PrepareLaplaceKernels(numOctaves - 1, totInitBlur, kernel);
+  }
+  float scale = pow(2.0f, -1.0f / NUM_SCALES);
+  float diffScale = pow(2.0f, 1.0f / NUM_SCALES);
+  for (int i = 0; i < NUM_SCALES + 3; i++)
+  {
+    float kernelSum = 0.0f;
+    float var = scale * scale - initBlur * initBlur;
+    for (int j = 0; j <= LAPLACE_R; j++)
+    {
+      kernel[numOctaves * 12 * 16 + 16 * i + j] = (float)expf(-(double)j * j / 2.0 / var);
+      kernelSum += (j == 0 ? 1 : 2) * kernel[numOctaves * 12 * 16 + 16 * i + j];
+    }
+    for (int j = 0; j <= LAPLACE_R; j++)
+      kernel[numOctaves * 12 * 16 + 16 * i + j] /= kernelSum;
+    scale *= diffScale;
+  }
+}
+
+double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, float &totTime)
+{
+  int width = results[0].width;
+  int pitch = results[0].pitch;
+  int height = results[0].height;
+#if 1
+  sycl::range<3> threads(1, 1, LAPLACE_W + 2 * LAPLACE_R);    //(136)
+  sycl::range<3> blocks(1, height, iDivUp(width, LAPLACE_W)); //(15)
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  dpct::get_in_order_queue().submit([&](sycl::handler &cgh) {
+    d_LaplaceKernel.init();
+
+    auto d_LaplaceKernel_ptr_ct1 = d_LaplaceKernel.get_ptr();
+
+    /*
+    DPCT1101:226: '(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S' expression was
+    replaced with a value. Modify the code to use the original expression,
+    provided in comments, if it is correct.
+    */
+    sycl::local_accessor<float, 1> buff_acc_ct1(
+        sycl::range<1>(1088 /*(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S*/), cgh);
+
+    float *baseImage_d_data_ct0 = baseImage.d_data;
+    float *results_d_data_ct1 = results[0].d_data;
+
+    cgh.parallel_for(sycl::nd_range<3>(blocks * threads, threads),
+                     [=](sycl::nd_item<3> item_ct1) {
+                       LaplaceMultiMem(baseImage_d_data_ct0, results_d_data_ct1,
+                                       width, pitch, height, octave, item_ct1,
+                                       d_LaplaceKernel_ptr_ct1,
+                                       buff_acc_ct1.get_pointer());
+                     });
+  });
+  safeCall(
+      DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("LaplaceMultiMem time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+#endif
+  checkMsg("LaplaceMulti() execution failed\n");
+  return 0.0;
+}
+
+double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor,
+                       float lowestScale, float subsampling, int octave, float &totTime)
+{
+  if (sources->d_data == NULL)
+  {
+    printf("FindPointsMulti: missing data\n");
+    return 0.0;
+  }
+  int w = sources->width;
+  int p = sources->pitch;
+  int h = sources->height;
+#if 1
+  sycl::range<3> blocks(1, iDivUp(h, MINMAX_H),
+                        iDivUp(w, MINMAX_W) * NUM_SCALES);
+  sycl::range<3> threads(1, 1, MINMAX_W + 2);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  dpct::get_in_order_queue().submit([&](sycl::handler &cgh) {
+    d_MaxNumPoints.init();
+    d_PointCounter.init();
+
+    auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr();
+    auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr();
+
+    /*
+    DPCT1101:227: '2 * MEMWID' expression was replaced with a value. Modify
+    the code to use the original expression, provided in comments, if it is
+    correct.
+    */
+    sycl::local_accessor<unsigned short, 1> points_acc_ct1(
+        sycl::range<1>(64 /*2 * MEMWID*/), cgh);
+
+    float *sources_d_data_ct0 = sources->d_data;
+
+    cgh.parallel_for(
+        sycl::nd_range<3>(blocks * threads, threads),
+        [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] {
+          FindPointsMultiNew(sources_d_data_ct0, siftData.d_data, w, p, h,
+                             subsampling, lowestScale, thresh, factor,
+                             edgeLimit, octave, item_ct1,
+                             *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1,
+                             points_acc_ct1.get_pointer());
+        });
+  });
+  safeCall(
+      DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("FindPointsMultiNew time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count())
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+#endif
+  checkMsg("FindPointsMulti() execution failed\n");
+  return 0.0;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.dp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.dp.o
new file mode 100644
index 000000000..bf1f3616c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.h
new file mode 100644
index 000000000..95e8384ec
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.h
@@ -0,0 +1,50 @@
+
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#ifndef CUDASIFTH_H
+#define CUDASIFTH_H
+
+#include "cudautils.h"
+#include "cudaImage.h"
+#include "cudaSift.h"
+
+int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh,
+                    float lowestScale, float subsampling, float *memoryTmp, float *memorySub, float &totTime);
+void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh, float lowestScale,
+                       float subsampling, float *memoryTmp, float &totTime);
+double ScaleDown(CudaImage &res, CudaImage &src, float variance, float &totTime);
+double ScaleUp(CudaImage &res, CudaImage &src, float &totTime);
+double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, float &totTime);
+double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, float &totTime);
+double RescalePositions(SiftData &siftData, float scale, float &totTime);
+double LowPass(CudaImage &res, CudaImage &src, float scale, float &totTime);
+void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel);
+double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, float &totTime);
+double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor,
+                       float lowestScale, float subsampling, int octave, float &totTime);
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudasift b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudasift
new file mode 100755
index 000000000..1b6f89c3d
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudasift differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudautils.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudautils.h
new file mode 100644
index 000000000..28a5ce756
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudautils.h
@@ -0,0 +1,209 @@
+#ifndef CUDAUTILS_H
+#define CUDAUTILS_H
+
+#define DPCT_COMPAT_RT_VERSION 12020
+#include <sycl/sycl.hpp>
+#include <dpct/dpct.hpp>
+#include <cstdio>
+#include <iostream>
+#include <chrono>
+
+#ifdef WIN32
+#include <intrin.h>
+#endif
+
+#define safeCall(err) __safeCall(err, __FILE__, __LINE__)
+#define safeThreadSync() __safeThreadSync(__FILE__, __LINE__)
+#define checkMsg(msg) __checkMsg(msg, __FILE__, __LINE__)
+
+inline void __safeCall(dpct::err0 err, const char *file, const int line)
+{
+}
+
+inline void __safeThreadSync(const char *file, const int line) try {
+  dpct::err0 err =
+      DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw());
+}
+catch (sycl::exception const &exc) {
+  std::cerr << exc.what() << "Exception caught at file:" << __FILE__
+            << ", line:" << __LINE__ << std::endl;
+  std::exit(1);
+}
+
+inline void __checkMsg(const char *errorMessage, const char *file, const int line)
+{
+  /*
+  DPCT1010:86: SYCL uses exceptions to report errors and does not use the error
+  codes. The call was replaced with 0. You need to rewrite this code.
+  */
+  dpct::err0 err = 0;
+}
+
+inline bool deviceInit(int dev)
+{
+  int deviceCount;
+  safeCall(
+      DPCT_CHECK_ERROR(deviceCount = dpct::dev_mgr::instance().device_count()));
+  if (deviceCount == 0)
+  {
+    fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
+    return false;
+  }
+  if (dev < 0)
+    dev = 0;
+  if (dev > deviceCount - 1)
+    dev = deviceCount - 1;
+  dpct::device_info deviceProp;
+  safeCall(DPCT_CHECK_ERROR(dpct::get_device_info(
+      deviceProp, dpct::dev_mgr::instance().get_device(dev))));
+  /*
+  DPCT1005:88: The SYCL device version is different from CUDA Compute
+  Compatibility. You may need to rewrite this code.
+  */
+  if (deviceProp.get_major_version() < 1)
+  {
+    fprintf(stderr, "error: device does not support CUDA.\n");
+    return false;
+  }
+  /*
+  DPCT1093:89: The "dev" device may be not the one intended for use. Adjust the
+  selected device if needed.
+  */
+  safeCall(DPCT_CHECK_ERROR(dpct::select_device(dev)));
+  return true;
+}
+
+class TimerGPU
+{
+public:
+  dpct::event_ptr start, stop;
+  std::chrono::time_point<std::chrono::steady_clock> start_ct1;
+  std::chrono::time_point<std::chrono::steady_clock> stop_ct1;
+  dpct::queue_ptr stream;
+  TimerGPU(dpct::queue_ptr stream_ = &dpct::get_in_order_queue())
+      : stream(stream_)
+  {
+    start = new sycl::event();
+    stop = new sycl::event();
+    /*
+    DPCT1012:90: Detected kernel execution time measurement pattern and
+    generated an initial code for time measurements in SYCL. You can change the
+    way time is measured depending on your goals.
+    */
+    start_ct1 = std::chrono::steady_clock::now();
+    *start = stream->ext_oneapi_submit_barrier();
+  }
+  ~TimerGPU()
+  {
+    dpct::destroy_event(start);
+    dpct::destroy_event(stop);
+  }
+  float read()
+  {
+    /*
+    DPCT1012:91: Detected kernel execution time measurement pattern and
+    generated an initial code for time measurements in SYCL. You can change the
+    way time is measured depending on your goals.
+    */
+    stop_ct1 = std::chrono::steady_clock::now();
+    *stop = stream->ext_oneapi_submit_barrier();
+    stop->wait_and_throw();
+    float time;
+    time =
+        std::chrono::duration<float, std::milli>(stop_ct1 - start_ct1).count();
+    return time;
+  }
+};
+
+class TimerCPU
+{
+  static const int bits = 10;
+
+public:
+  long long beg_clock;
+  float freq;
+  TimerCPU(float freq_) : freq(freq_)
+  { // freq = clock frequency in MHz
+    beg_clock = getTSC(bits);
+  }
+  long long getTSC(int bits)
+  {
+#ifdef WIN32
+    return __rdtsc() / (1LL << bits);
+#else
+    unsigned int low, high;
+    __asm__(".byte 0x0f, 0x31"
+            : "=a"(low), "=d"(high));
+    return ((long long)high << (32 - bits)) | ((long long)low >> bits);
+#endif
+  }
+  float read()
+  {
+    long long end_clock = getTSC(bits);
+    long long Kcycles = end_clock - beg_clock;
+    float time = (float)(1 << bits) * Kcycles / freq / 1e3f;
+    return time;
+  }
+};
+
+template <class T>
+__inline__ T ShiftDown(T var, unsigned int delta,
+                       const sycl::nd_item<3> &item_ct1, int width = 32)
+{
+#if (DPCT_COMPAT_RT_VERSION >= 9000)
+  /*
+  DPCT1023:0: The SYCL sub-group does not support mask options for
+  dpct::shift_sub_group_left. You can specify
+  "--use-experimental-features=masked-sub-group-operation" to use the
+  experimental helper function to migrate __shfl_down_sync.
+  */
+  /*
+  DPCT1096:225: The right-most dimension of the work-group used in the SYCL
+  kernel that calls this function may be less than "32". The function
+  "dpct::shift_sub_group_left" may return an unexpected result on the CPU
+  device. Modify the size of the work-group to ensure that the value of the
+  right-most dimension is a multiple of "32".
+  */
+  return dpct::shift_sub_group_left(item_ct1.get_sub_group(), var, delta,
+                                    width);
+#else
+  return __shfl_down(var, delta, width);
+#endif
+}
+
+template <class T>
+__inline__ T ShiftUp(T var, unsigned int delta,
+                     const sycl::nd_item<3> &item_ct1, int width = 32)
+{
+#if (DPCT_COMPAT_RT_VERSION >= 9000)
+  /*
+  DPCT1023:1: The SYCL sub-group does not support mask options for
+  dpct::shift_sub_group_right. You can specify
+  "--use-experimental-features=masked-sub-group-operation" to use the
+  experimental helper function to migrate __shfl_up_sync.
+  */
+  return dpct::shift_sub_group_right(item_ct1.get_sub_group(), var, delta,
+                                     width);
+#else
+  return __shfl_up(var, delta, width);
+#endif
+}
+
+template <class T>
+__inline__ T Shuffle(T var, unsigned int lane, const sycl::nd_item<3> &item_ct1, int width = 32)
+{
+#if (DPCT_COMPAT_RT_VERSION >= 9000)
+  /*
+  DPCT1023:2: The SYCL sub-group does not support mask options for
+  dpct::select_from_sub_group. You can specify
+  "--use-experimental-features=masked-sub-group-operation" to use the
+  experimental helper function to migrate __shfl_sync.
+  */
+  return dpct::select_from_sub_group(item_ct1.get_sub_group(), var, lane,
+                                     width);
+#else
+  return __shfl(var, lane, width);
+#endif
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudautils.h.yaml b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudautils.h.yaml
new file mode 100644
index 000000000..626cbf84d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudautils.h.yaml
@@ -0,0 +1,559 @@
+---
+MainSourceFile:  '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/out/cudautils.h'
+Replacements:
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          41
+    Length:          0
+    ReplacementText: "#define DPCT_COMPAT_RT_VERSION 12020\n#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          78
+    Length:          0
+    ReplacementText: "\n#include <chrono>\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          323
+    Length:          9
+    ReplacementText: 'dpct::err0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          376
+    Length:          165
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          608
+    Length:          0
+    ReplacementText: ' try '
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          612
+    Length:          9
+    ReplacementText: 'dpct::err0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          628
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          655
+    Length:          175
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          832
+    Length:          0
+    ReplacementText: "\ncatch (sycl::exception const &exc) {\n  std::cerr << exc.what() << \"Exception caught at file:\" << __FILE__ << \", line:\" << __LINE__ << std::endl;\n  std::exit(1);\n}"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          919
+    Length:          0
+    ReplacementText: "  /*\n  DPCT1010:86: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n  */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          921
+    Length:          11
+    ReplacementText: 'dpct::err0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          939
+    Length:          18
+    ReplacementText: '0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          961
+    Length:          176
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1205
+    Length:          32
+    ReplacementText: 'DPCT_CHECK_ERROR(deviceCount = dpct::dev_mgr::instance().device_count())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1442
+    Length:          14
+    ReplacementText: 'dpct::device_info'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1480
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_device_info'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1504
+    Length:          11
+    ReplacementText: deviceProp
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1517
+    Length:          3
+    ReplacementText: 'dpct::dev_mgr::instance().get_device(dev)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1521
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1524
+    Length:          0
+    ReplacementText: "  /*\n  DPCT1005:88: The SYCL device version is different from CUDA Compute Compatibility. You may need to rewrite this code.\n  */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1541
+    Length:          5
+    ReplacementText: 'get_major_version()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1641
+    Length:          0
+    ReplacementText: "  /*\n  DPCT1093:89: The \"dev\" device may be not the one intended for use. Adjust the selected device if needed.\n  */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1652
+    Length:          13
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::select_device'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1670
+    Length:          0
+    ReplacementText: ')'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1718
+    Length:          11
+    ReplacementText: 'dpct::event_ptr'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1742
+    Length:          0
+    ReplacementText: "\n  std::chrono::time_point<std::chrono::steady_clock> start_ct1;\n  std::chrono::time_point<std::chrono::steady_clock> stop_ct1;"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1745
+    Length:          12
+    ReplacementText: 'dpct::queue_ptr'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1777
+    Length:          12
+    ReplacementText: 'dpct::queue_ptr'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1800
+    Length:          1
+    ReplacementText: '&dpct::get_in_order_queue()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1829
+    Length:          23
+    ReplacementText: 'start = new sycl::event()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1858
+    Length:          22
+    ReplacementText: 'stop = new sycl::event()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1882
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1012:90: Detected kernel execution time measurement pattern and generated an initial code for time measurements in SYCL. You can change the way time is measured depending on your goals.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1886
+    Length:          30
+    ReplacementText: "start_ct1 = std::chrono::steady_clock::now();\n    *start = stream->ext_oneapi_submit_barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1944
+    Length:          23
+    ReplacementText: 'dpct::destroy_event(start)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          1973
+    Length:          22
+    ReplacementText: 'dpct::destroy_event(stop)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          2020
+    Length:          0
+    ReplacementText: "    /*\n    DPCT1012:91: Detected kernel execution time measurement pattern and generated an initial code for time measurements in SYCL. You can change the way time is measured depending on your goals.\n    */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          2024
+    Length:          29
+    ReplacementText: "stop_ct1 = std::chrono::steady_clock::now();\n    *stop = stream->ext_oneapi_submit_barrier()"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          2059
+    Length:          26
+    ReplacementText: 'stop->wait_and_throw()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          2107
+    Length:          40
+    ReplacementText: 'time = std::chrono::duration<float, std::milli>(stop_ct1 - start_ct1).count()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          2857
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          2916
+    Length:          0
+    ReplacementText: ",\n                       const sycl::nd_item<3> &item_ct1"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          2941
+    Length:          14
+    ReplacementText: DPCT_COMPAT_RT_VERSION
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          2965
+    Length:          0
+    ReplacementText: "  /*\n  DPCT1023:0: The SYCL sub-group does not support mask options for dpct::shift_sub_group_left. You can specify \"--use-experimental-features=masked-sub-group-operation\" to use the experimental helper function to migrate __shfl_down_sync.\n  */\n  /*\n  DPCT1096:225: The right-most dimension of the work-group used in the SYCL kernel that calls this function may be less than \"32\". The function \"dpct::shift_sub_group_left\" may return an unexpected result on the CPU device. Modify the size of the work-group to ensure that the value of the right-most dimension is a multiple of \"32\".\n  */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          2974
+    Length:          47
+    ReplacementText: 'dpct::shift_sub_group_left(item_ct1.get_sub_group(), var, delta, width)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          3099
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          3156
+    Length:          0
+    ReplacementText: ",\n                     const sycl::nd_item<3> &item_ct1"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          3181
+    Length:          14
+    ReplacementText: DPCT_COMPAT_RT_VERSION
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          3205
+    Length:          0
+    ReplacementText: "  /*\n  DPCT1023:1: The SYCL sub-group does not support mask options for dpct::shift_sub_group_right. You can specify \"--use-experimental-features=masked-sub-group-operation\" to use the experimental helper function to migrate __shfl_up_sync.\n  */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          3214
+    Length:          45
+    ReplacementText: 'dpct::shift_sub_group_right(item_ct1.get_sub_group(), var, delta, width)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          3335
+    Length:          11
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          3391
+    Length:          0
+    ReplacementText: ', const sycl::nd_item<3> &item_ct1'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          3416
+    Length:          14
+    ReplacementText: DPCT_COMPAT_RT_VERSION
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          3440
+    Length:          0
+    ReplacementText: "  /*\n  DPCT1023:2: The SYCL sub-group does not support mask options for dpct::select_from_sub_group. You can specify \"--use-experimental-features=masked-sub-group-operation\" to use the experimental helper function to migrate __shfl_sync.\n  */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Offset:          3449
+    Length:          41
+    ReplacementText: 'dpct::select_from_sub_group(item_ct1.get_sub_group(), var, lane, width)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+MainSourceFilesDigest:
+  - MainSourceFile:  '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h'
+    Digest:          127b6c30b6236cc7de6968ae1eff8ccf
+DpctVersion:     18.0.0
+MainHelperFileName: ''
+USMLevel:        ''
+FeatureMap:      {}
+CompileTargets:  {}
+OptionMap:
+  AnalysisScopePath:
+    Value:           '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA'
+    Specified:       false
+  AsyncHandler:
+    Value:           'false'
+    Specified:       false
+  CommentsEnabled:
+    Value:           'false'
+    Specified:       false
+  CompilationsDir:
+    Value:           '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build'
+    Specified:       true
+  CtadEnabled:
+    Value:           'false'
+    Specified:       false
+  EnablepProfiling:
+    Value:           'false'
+    Specified:       false
+  ExperimentalFlag:
+    Value:           '0'
+    Specified:       false
+  ExplicitClNamespace:
+    Value:           'false'
+    Specified:       false
+  ExplicitNamespace:
+    Value:           '20'
+    Specified:       false
+  ExtensionDDFlag:
+    Value:           '0'
+    Specified:       false
+  ExtensionDEFlag:
+    Value:           '4294967295'
+    Specified:       false
+  HelperFuncPreferenceFlag:
+    Value:           '0'
+    Specified:       false
+  NDRangeDim:
+    Value:           '3'
+    Specified:       false
+  NoDRYPattern:
+    Value:           'false'
+    Specified:       false
+  NoUseGenericSpace:
+    Value:           ''
+    Specified:       true
+  OptimizeMigration:
+    Value:           'false'
+    Specified:       false
+  ProcessAll:
+    Value:           'false'
+    Specified:       false
+  RuleFile:
+    Value:           ''
+    Specified:       false
+  SyclNamedLambda:
+    Value:           'false'
+    Specified:       false
+  UsmLevel:
+    Value:           '1'
+    Specified:       false
+...
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/geomFuncs.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/geomFuncs.cpp
new file mode 100644
index 000000000..c01e6e7d2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/geomFuncs.cpp
@@ -0,0 +1,72 @@
+#include <iostream>
+#include <cmath>
+#include <opencv2/core/core.hpp>
+#include "cudaSift.h"
+
+int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh)
+{
+#ifdef MANAGEDMEM
+  SiftPoint *mpts = data.m_data;
+#else
+  if (data.h_data==NULL)
+    return 0;
+  SiftPoint *mpts = data.h_data;
+#endif
+  float limit = thresh*thresh;
+  int numPts = data.numPts;
+  cv::Mat M(8, 8, CV_64FC1);
+  cv::Mat A(8, 1, CV_64FC1), X(8, 1, CV_64FC1);
+  double Y[8];
+  for (int i=0;i<8;i++) 
+    A.at<double>(i, 0) = homography[i] / homography[8];
+  for (int loop=0;loop<numLoops;loop++) {
+    M = cv::Scalar(0.0);
+    X = cv::Scalar(0.0);
+    for (int i=0;i<numPts;i++) {
+      SiftPoint &pt = mpts[i];
+      if (pt.score<minScore || pt.ambiguity>maxAmbiguity)
+	continue;
+      float den = A.at<double>(6)*pt.xpos + A.at<double>(7)*pt.ypos + 1.0f;
+      float dx = (A.at<double>(0)*pt.xpos + A.at<double>(1)*pt.ypos + A.at<double>(2)) / den - pt.match_xpos;
+      float dy = (A.at<double>(3)*pt.xpos + A.at<double>(4)*pt.ypos + A.at<double>(5)) / den - pt.match_ypos;
+      float err = dx*dx + dy*dy;
+      float wei = (err<limit ? 1.0f : 0.0f); //limit / (err + limit);
+      Y[0] = pt.xpos;
+      Y[1] = pt.ypos;
+      Y[2] = 1.0;
+      Y[3] = Y[4] = Y[5] = 0.0;
+      Y[6] = - pt.xpos * pt.match_xpos;
+      Y[7] = - pt.ypos * pt.match_xpos;
+      for (int c=0;c<8;c++) 
+        for (int r=0;r<8;r++) 
+          M.at<double>(r,c) += (Y[c] * Y[r] * wei);
+      X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_xpos * wei);
+      Y[0] = Y[1] = Y[2] = 0.0;
+      Y[3] = pt.xpos;
+      Y[4] = pt.ypos; 
+      Y[5] = 1.0;
+      Y[6] = - pt.xpos * pt.match_ypos;
+      Y[7] = - pt.ypos * pt.match_ypos;
+      for (int c=0;c<8;c++) 
+        for (int r=0;r<8;r++) 
+          M.at<double>(r,c) += (Y[c] * Y[r] * wei);
+      X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_ypos * wei);
+    }
+    cv::solve(M, X, A, cv::DECOMP_CHOLESKY);
+  }
+  int numfit = 0;
+  for (int i=0;i<numPts;i++) {
+    SiftPoint &pt = mpts[i];
+    float den = A.at<double>(6)*pt.xpos + A.at<double>(7)*pt.ypos + 1.0;
+    float dx = (A.at<double>(0)*pt.xpos + A.at<double>(1)*pt.ypos + A.at<double>(2)) / den - pt.match_xpos;
+    float dy = (A.at<double>(3)*pt.xpos + A.at<double>(4)*pt.ypos + A.at<double>(5)) / den - pt.match_ypos;
+    float err = dx*dx + dy*dy;
+    if (err<limit) 
+      numfit++;
+    pt.match_error = sqrt(err);
+  }
+  for (int i=0;i<8;i++) 
+    homography[i] = A.at<double>(i);
+  homography[8] = 1.0f;
+  return numfit;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/geomFuncs.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/geomFuncs.o
new file mode 100644
index 000000000..3d2f10fdb
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/geomFuncs.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/mainSift.cpp.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/mainSift.cpp.dp.cpp
new file mode 100644
index 000000000..ea9b0c280
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/mainSift.cpp.dp.cpp
@@ -0,0 +1,282 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Björkman aka Celebrandil //
+//              celle @ csc.kth.se                       //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <sycl/sycl.hpp>
+#include <dpct/dpct.hpp>
+#include <iostream>
+#include <cmath>
+#include <iomanip>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+#include "Utility.h"
+#include "cudaImage.h"
+#include "cudaSift.h"
+
+int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh);
+void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img);
+void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography);
+
+double ScaleUp(CudaImage &res, CudaImage &src);
+
+///////////////////////////////////////////////////////////////////////////////
+// Main program
+///////////////////////////////////////////////////////////////////////////////
+int main(int argc, char **argv)
+{
+  auto totalProgTimer_start = std::chrono::steady_clock::now();
+  int devNum = 0, imgSet = 0;
+  if (argc > 1)
+    devNum = std::atoi(argv[1]);
+  if (argc > 2)
+    imgSet = std::atoi(argv[2]);
+
+  float totTime = 0.0;
+  float imageInitTime = 0.0;
+  float extractSiftTime = 0.0;
+  float matchingTime = 0.0;
+  float ioReadTime = 0.0;
+  float dataVerificationTime = 0.0;
+
+  // Read images using OpenCV
+  cv::Mat limg, rimg;
+  auto ioRead_start = std::chrono::steady_clock::now();
+  if (imgSet)
+  {
+    cv::imread("../../inputData/left.pgm", 0).convertTo(limg, CV_32FC1);
+    cv::imread("../../inputData/righ.pgm", 0).convertTo(rimg, CV_32FC1);
+  }
+  else
+  {
+    cv::imread("../../inputData/img1.png", 0).convertTo(limg, CV_32FC1);
+    cv::imread("../../inputData/img2.png", 0).convertTo(rimg, CV_32FC1);
+  }
+  auto ioRead_stop = std::chrono::steady_clock::now();
+  ioReadTime = std::chrono::duration<float, std::micro>(ioRead_stop - ioRead_start).count();
+
+  unsigned int w = limg.cols;
+  unsigned int h = limg.rows;
+  std::cout << "Image size = (" << w << "," << h << ")" << std::endl;
+
+  // Initial Cuda images and download images to device
+  std::cout << "Initializing data..." << std::endl;
+  /*
+  DPCT1093:83: The "0" device may be not the one intended for use. Adjust the
+  selected device if needed.
+  */
+  dpct::select_device(0);
+  CudaImage img1, img2;
+
+  img1.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)limg.data);
+  img2.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)rimg.data);
+  img1.Download(imageInitTime);
+  img2.Download(imageInitTime);
+
+  // Extract Sift features from images
+  SiftData siftData1, siftData2;
+  float initBlur = 1.0f;
+  float thresh = (imgSet ? 4.5f : 2.0f);
+
+  InitSiftData(siftData1, imageInitTime, 32768, true, true);
+  InitSiftData(siftData2, imageInitTime, 32768, true, true);
+
+  // A bit of benchmarking
+  // for (int thresh1=1.00f;thresh1<=4.01f;thresh1+=0.50f) {
+  float *memoryTmp = AllocSiftTempMemory(w, h, 5, imageInitTime, false);
+  for (int i = 0; i < 50; i++)
+  {
+    float time = 0.0f; // set total time to init time
+    ExtractSift(siftData1, img1, 5, initBlur, thresh, time, 0.0f, false, memoryTmp);
+    extractSiftTime += time;
+    time = 0.0f;
+    ExtractSift(siftData2, img2, 5, initBlur, thresh, time, 0.0f, false, memoryTmp);
+    extractSiftTime += time;
+  }
+  FreeSiftTempMemory(memoryTmp);
+
+  // Match Sift features and find a homography
+  for (int i = 0; i < 1; i++)
+    MatchSiftData(siftData1, siftData2, matchingTime);
+  float homography[9];
+  int numMatches;
+  FindHomography(siftData1, homography, &numMatches, matchingTime, 10000, 0.00f, 0.80f, 5.0);
+  int numFit = ImproveHomography(siftData1, homography, 5, 0.00f, 0.80f, 3.0);
+  float matchPercentage = 100.0f * numFit / std::min(siftData1.numPts, siftData2.numPts);
+
+  std::cout << "Number of original features: " << siftData1.numPts << " " << siftData2.numPts << std::endl;
+  std::cout << "Number of matching features: " << numFit << " " << numMatches << " " << matchPercentage << "% " << initBlur << " " << thresh << "\n"
+            << std::endl;
+
+#ifdef DEVICE_TIMER
+  totTime = imageInitTime + extractSiftTime + matchingTime;
+
+  std::cout << "Images initialization time = " << imageInitTime / 1000 << " ms" << std::endl;
+  std::cout << "Feature extraction time = " << extractSiftTime / 1000 << " ms" << std::endl;
+  std::cout << "Matching time = " << matchingTime / 1000 << " ms"
+            << "\n"
+            << std::endl;
+  std::cout << "Total Deivce Time = " << totTime / 1000 << " ms"
+            << "\n"
+            << std::endl;
+#endif
+
+  // data validation
+  auto dataVerficationTimer_start = std::chrono::steady_clock::now();
+  int data_verification_flag = Utility::RunDataVerification(thresh, matchPercentage);
+  auto dataVerficationTimer_stop = std::chrono::steady_clock::now();
+  dataVerificationTime = std::chrono::duration<float, std::micro>(dataVerficationTimer_stop - dataVerficationTimer_start).count();
+  // // Print out and store summary data
+  // // PrintMatchData(siftData1, siftData2, img1);
+  // cv::imwrite("data/limg_pts.pgm", limg);
+
+  // MatchAll(siftData1, siftData2, homography);
+
+  // Free Sift data from device
+  FreeSiftData(siftData1);
+  FreeSiftData(siftData2);
+
+  auto totalProgTimer_end = std::chrono::steady_clock::now();
+  float totalProgramTime = std::chrono::duration<float, std::micro>(totalProgTimer_end - totalProgTimer_start).count() - ioReadTime - dataVerificationTime;
+  std::cout << "Total workload time = " << totalProgramTime / 1000 << " ms"
+            << "\n"
+            << std::endl;
+  return data_verification_flag;
+}
+
+void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography)
+{
+#ifdef MANAGEDMEM
+  SiftPoint *sift1 = siftData1.m_data;
+  SiftPoint *sift2 = siftData2.m_data;
+#else
+  SiftPoint *sift1 = siftData1.h_data;
+  SiftPoint *sift2 = siftData2.h_data;
+#endif
+  int numPts1 = siftData1.numPts;
+  int numPts2 = siftData2.numPts;
+  int numFound = 0;
+#if 1
+  homography[0] = homography[4] = -1.0f;
+  homography[1] = homography[3] = homography[6] = homography[7] = 0.0f;
+  homography[2] = 1279.0f;
+  homography[5] = 959.0f;
+#endif
+  for (int i = 0; i < numPts1; i++)
+  {
+    float *data1 = sift1[i].data;
+    std::cout << i << ":" << sift1[i].scale << ":" << (int)sift1[i].orientation << " " << sift1[i].xpos << " " << sift1[i].ypos << std::endl;
+    bool found = false;
+    for (int j = 0; j < numPts2; j++)
+    {
+      float *data2 = sift2[j].data;
+      float sum = 0.0f;
+      for (int k = 0; k < 128; k++)
+        sum += data1[k] * data2[k];
+      float den = homography[6] * sift1[i].xpos + homography[7] * sift1[i].ypos + homography[8];
+      float dx = (homography[0] * sift1[i].xpos + homography[1] * sift1[i].ypos + homography[2]) / den - sift2[j].xpos;
+      float dy = (homography[3] * sift1[i].xpos + homography[4] * sift1[i].ypos + homography[5]) / den - sift2[j].ypos;
+      float err = dx * dx + dy * dy;
+      if (err < 100.0f) // 100.0
+        found = true;
+      if (err < 100.0f || j == sift1[i].match)
+      { // 100.0
+        if (j == sift1[i].match && err < 100.0f)
+          std::cout << " *";
+        else if (j == sift1[i].match)
+          std::cout << " -";
+        else if (err < 100.0f)
+          std::cout << " +";
+        else
+          std::cout << "  ";
+        std::cout << j << ":" << sum << ":" << (int)sqrt(err) << ":" << sift2[j].scale << ":" << (int)sift2[j].orientation << " " << sift2[j].xpos << " " << sift2[j].ypos << " " << (int)dx << " " << (int)dy << std::endl;
+      }
+    }
+    std::cout << std::endl;
+    if (found)
+      numFound++;
+  }
+  std::cout << "Number of finds: " << numFound << " / " << numPts1 << std::endl;
+  std::cout << homography[0] << " " << homography[1] << " " << homography[2] << std::endl; //%%%
+  std::cout << homography[3] << " " << homography[4] << " " << homography[5] << std::endl; //%%%
+  std::cout << homography[6] << " " << homography[7] << " " << homography[8] << std::endl; //%%%
+}
+
+void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img)
+{
+  int numPts = siftData1.numPts;
+#ifdef MANAGEDMEM
+  SiftPoint *sift1 = siftData1.m_data;
+  SiftPoint *sift2 = siftData2.m_data;
+#else
+  SiftPoint *sift1 = siftData1.h_data;
+  SiftPoint *sift2 = siftData2.h_data;
+#endif
+  float *h_img = img.h_data;
+  int w = img.width;
+  int h = img.height;
+  std::cout << std::setprecision(3);
+  for (int j = 0; j < numPts; j++)
+  {
+    int k = sift1[j].match;
+    if (sift1[j].match_error < 5)
+    {
+      float dx = sift2[k].xpos - sift1[j].xpos;
+      float dy = sift2[k].ypos - sift1[j].ypos;
+#if 0
+      if (false && sift1[j].xpos>550 && sift1[j].xpos<600) {
+	std::cout << "pos1=(" << (int)sift1[j].xpos << "," << (int)sift1[j].ypos << ") ";
+	std::cout << j << ": " << "score=" << sift1[j].score << "  ambiguity=" << sift1[j].ambiguity << "  match=" << k << "  ";
+	std::cout << "scale=" << sift1[j].scale << "  ";
+	std::cout << "error=" << (int)sift1[j].match_error << "  ";
+	std::cout << "orient=" << (int)sift1[j].orientation << "," << (int)sift2[k].orientation << "  ";
+	std::cout << " delta=(" << (int)dx << "," << (int)dy << ")" << std::endl;
+      }
+#endif
+#if 1
+      int len = (int)(fabs(dx) > fabs(dy) ? fabs(dx) : fabs(dy));
+      for (int l = 0; l < len; l++)
+      {
+        int x = (int)(sift1[j].xpos + dx * l / len);
+        int y = (int)(sift1[j].ypos + dy * l / len);
+        h_img[y * w + x] = 255.0f;
+      }
+#endif
+    }
+    int x = (int)(sift1[j].xpos + 0.5);
+    int y = (int)(sift1[j].ypos + 0.5);
+    int s = std::min(x, std::min(y, std::min(w - x - 2, std::min(h - y - 2, (int)(1.41 * sift1[j].scale)))));
+    int p = y * w + x;
+    p += (w + 1);
+    for (int k = 0; k < s; k++)
+      h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 0.0f;
+    p -= (w + 1);
+    for (int k = 0; k < s; k++)
+      h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 255.0f;
+  }
+  std::cout << std::setprecision(6);
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/mainSift.cpp.dp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/mainSift.cpp.dp.o
new file mode 100644
index 000000000..2bdeed748
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/mainSift.cpp.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/matching.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/matching.dp.cpp
new file mode 100644
index 000000000..0a717400f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/matching.dp.cpp
@@ -0,0 +1,2209 @@
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <sycl/sycl.hpp>
+#include <dpct/dpct.hpp>
+#include <chrono>
+#include <random>
+#include "cudaSift.h"
+#include "cudautils.h"
+
+//================= Device matching functions =====================//
+
+void MatchSiftPoints(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2,
+                     const sycl::nd_item<3> &item_ct1, float *siftPoint,
+                     float *sums)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int p1 = item_ct1.get_group(2);
+  const int p2 = item_ct1.get_group(1) * 16 + ty;
+  const float *ptr1 = sift1[p1].data;
+  const float *ptr2 = sift2[p2].data;
+  const int i = 16 * ty + tx;
+  if (ty < 8)
+    siftPoint[i] = ptr1[i];
+  /*
+  DPCT1065:146: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  float sum = 0.0f;
+  if (p2 < numPts2)
+    for (int j = 0; j < 8; j++)
+      sum += siftPoint[16 * j + tx] * ptr2[16 * j + tx];
+  sums[i] = sum;
+  /*
+  DPCT1065:147: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (tx < 8)
+    sums[i] += sums[i + 8];
+  /*
+  DPCT1065:148: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (tx < 4)
+    sums[i] += sums[i + 4];
+  /*
+  DPCT1065:149: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (ty == 0)
+  {
+    sum = sums[16 * tx + 0] + sums[16 * tx + 1] + sums[16 * tx + 2] + sums[16 * tx + 3];
+    corrData[p1 * item_ct1.get_group_range(1) * 16 +
+             item_ct1.get_group(1) * 16 + tx] = sum;
+  }
+  /*
+  DPCT1065:150: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+}
+
+void MatchSiftPoints2(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2,
+                      const sycl::nd_item<3> &item_ct1, float *siftPoints1,
+                      float *siftPoints2)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const float *ptr1 =
+      sift1[dpct::min(numPts1 - 1,
+                      (unsigned int)(item_ct1.get_group(2) * 16 + ty))]
+          .data;
+  const float *ptr2 =
+      sift2[dpct::min(numPts2 - 1,
+                      (unsigned int)(item_ct1.get_group(1) * 16 + ty))]
+          .data;
+  for (int i = 0; i < 8; i++)
+  {
+    siftPoints1[128 * ty + 16 * i + tx] = ptr1[16 * i + tx];
+    siftPoints2[128 * ty + 16 * i + tx] = ptr2[16 * i + tx];
+  }
+  /*
+  DPCT1065:151: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  const int p1 = item_ct1.get_group(2) * 16 + ty;
+  const int p2 = item_ct1.get_group(1) * 16 + tx;
+  const float *pt1 = &siftPoints1[ty * 128];
+  const float *pt2 = &siftPoints2[tx * 128];
+  float sum = 0.0f;
+  for (int i = 0; i < 128; i++)
+  {
+    int itx = (i + tx) & 127; // avoid bank conflicts
+    sum += pt1[itx] * pt2[itx];
+  }
+  if (p1 < numPts1)
+    corrData[p1 * item_ct1.get_group_range(1) * 16 + p2] =
+        (p2 < numPts2 ? sum : -1.0f);
+}
+
+void FindMaxCorr(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int corrWidth, int siftSize,
+                 const sycl::nd_item<3> &item_ct1, float *maxScore,
+                 float *maxScor2, int *maxIndex)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int idx = ty * 16 + tx;
+  int p1 = item_ct1.get_group(2) * 16 + item_ct1.get_local_id(1);
+  p1 = (p1 >= numPts1 ? numPts1 - 1 : p1);
+  maxScore[idx] = -1.0f;
+  maxScor2[idx] = -1.0f;
+  maxIndex[idx] = -1;
+  /*
+  DPCT1065:152: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  float *corrs = &corrData[p1 * corrWidth];
+  for (int i = tx; i < corrWidth; i += 16)
+  {
+    float val = corrs[i];
+    if (val > maxScore[idx])
+    {
+      maxScor2[idx] = maxScore[idx];
+      maxScore[idx] = val;
+      maxIndex[idx] = i;
+    }
+    else if (val > maxScor2[idx])
+      maxScor2[idx] = val;
+  }
+  /*
+  DPCT1065:153: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  for (int len = 8; len > 0; len /= 2)
+  {
+    if (tx < 8)
+    {
+      float val = maxScore[idx + len];
+      int i = maxIndex[idx + len];
+      if (val > maxScore[idx])
+      {
+        maxScor2[idx] = maxScore[idx];
+        maxScore[idx] = val;
+        maxIndex[idx] = i;
+      }
+      else if (val > maxScor2[idx])
+        maxScor2[idx] = val;
+      float va2 = maxScor2[idx + len];
+      if (va2 > maxScor2[idx])
+        maxScor2[idx] = va2;
+    }
+    /*
+    DPCT1118:49: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:154: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+  }
+  if (tx == 0)
+  {
+    sift1[p1].score = maxScore[ty * 16];
+    sift1[p1].ambiguity = maxScor2[ty * 16] / (maxScore[ty * 16] + 1e-6);
+    sift1[p1].match = maxIndex[ty * 16];
+    sift1[p1].match_xpos = sift2[maxIndex[ty * 16]].xpos;
+    sift1[p1].match_ypos = sift2[maxIndex[ty * 16]].ypos;
+  }
+}
+
+// Version based on suggestion by Nicholas Lin
+void FindMaxCorr3(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  const sycl::nd_item<3> &item_ct1, int *maxIndex)
+{
+  int block_dim = item_ct1.get_local_range(2); // blockDim.x == 16
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int p1 = item_ct1.get_group(2) * block_dim + ty;
+  const int idx = ty * 16 + tx;
+
+  maxIndex[idx] = 0;
+  /*
+  DPCT1065:155: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+
+  float *corrs = NULL;
+  if (p1 < numPts1)
+  {
+    corrs = &corrData[p1 * block_dim * 2];
+    corrs[tx] = 0.0f;
+    corrs[tx + 16] = 0.0f;
+    const float *pt1 = sift1[p1].data;
+    for (int p2 = tx; p2 < numPts2; p2 += 16)
+    {
+      float *pt2 = sift2[p2].data;
+      float sum = 0.0f;
+      for (int i = 0; i < 128; i++)
+        sum += pt1[i] * pt2[i];
+      if (sum > corrs[tx])
+      {
+        corrs[tx + 16] = corrs[tx];
+        corrs[tx] = sum;
+        maxIndex[idx] = p2;
+      }
+      else if (sum > corrs[tx + 16])
+        corrs[tx + 16] = sum;
+    }
+  }
+  /*
+  DPCT1065:156: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (p1 < numPts1)
+  {
+    for (int len = 8; len > 0; len /= 2)
+    {
+      if (tx < len)
+      {
+        float val = corrs[tx + len];
+        int i = maxIndex[idx + len];
+        if (val > corrs[tx])
+        {
+          corrs[tx + 16] = corrs[tx];
+          corrs[tx] = val;
+          maxIndex[idx] = i;
+        }
+        else if (val > corrs[tx + 16])
+          corrs[tx + 16] = val;
+        float va2 = corrs[tx + 16 + len];
+        if (va2 > corrs[tx + 16])
+          corrs[tx + 16] = va2;
+      }
+      /*
+      DPCT1118:50: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      /*
+      DPCT1065:157: Consider replacing sycl::nd_item::barrier() with
+      sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+      performance if there is no access to global memory.
+      */
+      item_ct1.barrier();
+    }
+    if (tx == 0)
+    {
+      sift1[p1].score = corrs[0];
+      sift1[p1].ambiguity = corrs[16] / (corrs[0] + 1e-6);
+      sift1[p1].match = maxIndex[ty << 4];
+      sift1[p1].match_xpos = sift2[maxIndex[ty << 4]].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex[ty << 4]].ypos;
+    }
+  }
+}
+
+#define FMC2W 16
+#define FMC2H 4
+
+void FindMaxCorr2(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  const sycl::nd_item<3> &item_ct1, float *siftPoint,
+                  float *maxScore, float *maxScor2, int *maxIndex)
+{
+
+  const int p1 = item_ct1.get_group(2);
+  if (p1 >= numPts1)
+    return;
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int idx = ty * FMC2W + tx;
+  if (idx < FMC2H)
+  {
+    maxScore[idx] = -1.0f;
+    maxScor2[idx] = -1.0f;
+    maxIndex[idx] = 0;
+  }
+  /*
+  DPCT1065:158: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  const float *pt1 = sift1[p1].data;
+  for (int i = idx; i < 128; i += FMC2W * FMC2H)
+    siftPoint[i] = pt1[i];
+  /*
+  DPCT1065:159: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  for (int p2 = ty; p2 < numPts2; p2 += FMC2H)
+  {
+    const float *pt2 = sift2[p2].data;
+    float sum = 0.0f;
+    for (int j = tx; j < 128; j += FMC2W)
+      sum += siftPoint[j] * pt2[j];
+    for (int j = FMC2W / 2; j > 0; j /= 2)
+      sum += ShiftDown(sum, j, item_ct1);
+    if (tx == 0)
+    {
+      if (sum > maxScore[ty])
+      {
+        maxScor2[ty] = maxScore[ty];
+        maxScore[ty] = sum;
+        maxIndex[ty] = p2;
+      }
+      else if (sum > maxScor2[ty])
+        maxScor2[ty] = sum;
+    }
+  }
+  /*
+  DPCT1065:160: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  for (int len = FMC2H / 2; len > 0; len /= 2)
+  {
+    if (ty == 0 && tx < len)
+    {
+      float val = maxScore[tx + len];
+      int p2 = maxIndex[tx + len];
+      if (val > maxScore[tx])
+      {
+        maxScor2[tx] = maxScore[tx];
+        maxScore[tx] = val;
+        maxIndex[tx] = p2;
+      }
+      else if (val > maxScor2[tx])
+        maxScor2[tx] = val;
+      float va2 = maxScor2[tx + len];
+      if (va2 > maxScor2[tx])
+        maxScor2[tx] = va2;
+    }
+    /*
+    DPCT1118:51: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:161: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+  }
+  if (ty == 0 && tx == 0)
+  {
+    sift1[p1].score = maxScore[0];
+    sift1[p1].ambiguity = maxScor2[0] / (maxScore[0] + 1e-6);
+    sift1[p1].match = maxIndex[0];
+    sift1[p1].match_xpos = sift2[maxIndex[0]].xpos;
+    sift1[p1].match_ypos = sift2[maxIndex[0]].ypos;
+  }
+}
+
+void FindMaxCorr4(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  const sycl::nd_item<3> &item_ct1, float *siftPoint,
+                  float *maxScore, float *maxScor2, int *maxIndex)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  if (tx == 0)
+  {
+    maxScore[ty] = -1.0f;
+    maxScor2[ty] = -1.0f;
+    maxIndex[ty] = 0;
+  }
+  const int p1 = item_ct1.get_group(2) * FMC2H + ty;
+  const float *pt1 = sift1[p1].data;
+  for (int j = tx; j < 128; j += FMC2W)
+    siftPoint[128 * ty + j] = pt1[j];
+  /*
+  DPCT1065:162: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  for (int p2 = 0; p2 < numPts2; p2++)
+  {
+    const float *pt2 = sift2[p2].data;
+    float sum = 0.0f;
+    for (int j = tx; j < 128; j += FMC2W)
+      sum += siftPoint[128 * ty + j] * pt2[j];
+    for (int j = FMC2W / 2; j > 0; j /= 2)
+      sum += ShiftDown(sum, j, item_ct1);
+    if (tx == 0)
+    {
+      if (sum > maxScore[ty])
+      {
+        maxScor2[ty] = maxScore[ty];
+        maxScore[ty] = sum;
+        maxIndex[ty] = p2;
+      }
+      else if (sum > maxScor2[ty])
+        maxScor2[ty] = sum;
+    }
+  }
+  /*
+  DPCT1065:163: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (tx == 0)
+  {
+    sift1[p1].score = maxScore[ty];
+    sift1[p1].ambiguity = maxScor2[ty] / (maxScore[ty] + 1e-6);
+    sift1[p1].match = maxIndex[ty];
+    sift1[p1].match_xpos = sift2[maxIndex[ty]].xpos;
+    sift1[p1].match_ypos = sift2[maxIndex[ty]].ypos;
+  }
+}
+
+void memcopyKernel(float *src, float *dst, size_t src_pitch, size_t dst_pitch, int numPts, size_t width)
+{
+  char *d_src = (char *)src;
+  char *d_dst = (char *)dst;
+
+  for (int i = 0; i < numPts; ++i)
+  {
+    for (int j = 0; j < width; ++j)
+    {
+      d_dst[j] = d_src[j];
+    }
+    d_src = d_src + src_pitch;
+    d_dst = d_dst + dst_pitch;
+  }
+}
+
+void
+CleanMatches(SiftPoint *sift1, int numPts1, const sycl::nd_item<3> &item_ct1)
+{
+  const int p1 = dpct::min(
+      (unsigned int)(item_ct1.get_group(2) * 64 + item_ct1.get_local_id(2)),
+      numPts1 - 1);
+  sift1[p1].score = 0.0f;
+}
+
+#define M7W 32
+#define M7H 32
+#define M7R 4
+#define NRX 2
+#define NDIM 128
+
+/*
+DPCT1110:52: The total declared local variable size in device function
+FindMaxCorr10 exceeds 128 bytes and may cause high register pressure. Consult
+with your hardware vendor to find the total register size available and adjust
+the code, or use smaller sub-group size to avoid high register pressure.
+*/
+void FindMaxCorr10(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                   const sycl::nd_item<3> &item_ct1, sycl::float4 *buffer1,
+                   sycl::float4 *buffer2)
+{
+
+  int tx = item_ct1.get_local_id(2);
+  int ty = item_ct1.get_local_id(1);
+  int bp1 = M7W * item_ct1.get_group(2);
+  for (int j = ty; j < M7W; j += M7H / M7R)
+  {
+    int p1 = sycl::min(bp1 + j, numPts1 - 1);
+    for (int d = tx; d < NDIM / 4; d += M7W)
+      buffer1[j * NDIM / 4 + (d + j) % (NDIM / 4)] =
+          ((sycl::float4 *)&sift1[p1].data)[d];
+  }
+
+  float max_score[NRX];
+  float sec_score[NRX];
+  int index[NRX];
+  for (int i = 0; i < NRX; i++)
+  {
+    max_score[i] = 0.0f;
+    sec_score[i] = 0.0f;
+    index[i] = -1;
+  }
+
+  int idx = ty * M7W + tx;
+  int ix = idx % (M7W / NRX);
+  int iy = idx / (M7W / NRX);
+  for (int bp2 = 0; bp2 < numPts2 - M7H + 1; bp2 += M7H)
+  {
+    for (int j = ty; j < M7H; j += M7H / M7R)
+    {
+      int p2 = sycl::min(bp2 + j, numPts2 - 1);
+      for (int d = tx; d < NDIM / 4; d += M7W)
+        buffer2[j * NDIM / 4 + d] = ((sycl::float4 *)&sift2[p2].data)[d];
+    }
+    /*
+    DPCT1118:53: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:165: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+
+    if (idx < M7W * M7H / M7R / NRX)
+    {
+      float score[M7R][NRX];
+      for (int dy = 0; dy < M7R; dy++)
+        for (int i = 0; i < NRX; i++)
+          score[dy][i] = 0.0f;
+      for (int d = 0; d < NDIM / 4; d++)
+      {
+        sycl::float4 v1[NRX];
+        for (int i = 0; i < NRX; i++)
+          v1[i] = buffer1[((M7W / NRX) * i + ix) * NDIM / 4 + (d + (M7W / NRX) * i + ix) % (NDIM / 4)];
+        for (int dy = 0; dy < M7R; dy++)
+        {
+          sycl::float4 v2 = buffer2[(M7R * iy + dy) * (NDIM / 4) + d];
+          for (int i = 0; i < NRX; i++)
+          {
+            score[dy][i] += v1[i].x() * v2.x();
+            score[dy][i] += v1[i].y() * v2.y();
+            score[dy][i] += v1[i].z() * v2.z();
+            score[dy][i] += v1[i].w() * v2.w();
+          }
+        }
+      }
+      for (int dy = 0; dy < M7R; dy++)
+      {
+        for (int i = 0; i < NRX; i++)
+        {
+          if (score[dy][i] > max_score[i])
+          {
+            sec_score[i] = max_score[i];
+            max_score[i] = score[dy][i];
+            index[i] = sycl::min(bp2 + M7R * iy + dy, numPts2 - 1);
+          }
+          else if (score[dy][i] > sec_score[i])
+            sec_score[i] = score[dy][i];
+        }
+      }
+    }
+    /*
+    DPCT1118:54: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:166: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+  }
+  float *scores1 = (float *)buffer1;
+  float *scores2 = &scores1[M7W * M7H / M7R];
+  int *indices = (int *)&scores2[M7W * M7H / M7R];
+  if (idx < M7W * M7H / M7R / NRX)
+  {
+    for (int i = 0; i < NRX; i++)
+    {
+      scores1[iy * M7W + (M7W / NRX) * i + ix] = max_score[i];
+      scores2[iy * M7W + (M7W / NRX) * i + ix] = sec_score[i];
+      indices[iy * M7W + (M7W / NRX) * i + ix] = index[i];
+    }
+  }
+  /*
+  DPCT1065:164: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+
+  if (ty == 0)
+  {
+    float max_score = scores1[tx];
+    float sec_score = scores2[tx];
+    int index = indices[tx];
+    for (int y = 0; y < M7H / M7R; y++)
+      if (index != indices[y * M7W + tx])
+      {
+        if (scores1[y * M7W + tx] > max_score)
+        {
+          sec_score = sycl::max(max_score, sec_score);
+          max_score = scores1[y * M7W + tx];
+          index = indices[y * M7W + tx];
+        }
+        else if (scores1[y * M7W + tx] > sec_score)
+          sec_score = scores1[y * M7W + tx];
+      }
+    sift1[bp1 + tx].score = max_score;
+    sift1[bp1 + tx].match = index;
+    sift1[bp1 + tx].match_xpos = sift2[index].xpos;
+    sift1[bp1 + tx].match_ypos = sift2[index].ypos;
+    sift1[bp1 + tx].ambiguity = sec_score / (max_score + 1e-6f);
+  }
+}
+
+#define FMC_GH 512
+#define FMC_BW 32
+#define FMC_BH 32
+#define FMC_BD 16
+#define FMC_TW 1
+#define FMC_TH 4
+#define FMC_NW (FMC_BW / FMC_TW) //  32
+#define FMC_NH (FMC_BH / FMC_TH) //   8
+#define FMC_NT (FMC_NW * FMC_NH) // 256 = 8 warps
+
+dpct::global_memory<volatile int, 0> lock(0);
+
+/*
+DPCT1110:55: The total declared local variable size in device function
+FindMaxCorr9 exceeds 128 bytes and may cause high register pressure. Consult
+with your hardware vendor to find the total register size available and adjust
+the code, or use smaller sub-group size to avoid high register pressure.
+*/
+void FindMaxCorr9(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  const sycl::nd_item<3> &item_ct1, volatile int &lock,
+                  sycl::float4 *siftParts1, sycl::float4 *siftParts2)
+{
+   // 4*32*8 = 1024
+   // 4*32*8 = 1024
+  //__shared__ float blksums[FMC_BW*FMC_BH];     // 32*32  = 1024
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int idx = ty * FMC_NW + tx;
+  sycl::float4 *pts1 = 0, *pts2 = 0;
+  if (idx < FMC_BW)
+  {
+    const int p1l = dpct::min(
+        (unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), numPts1 - 1);
+    pts1 = (sycl::float4 *)sift1[p1l].data;
+  }
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < sycl::min(FMC_GH, numPts2 - FMC_BH + 1); k += FMC_BH)
+  {
+    if (idx < FMC_BH)
+    {
+      const int p2l =
+          dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + idx),
+                    numPts2 - 1);
+      pts2 = (sycl::float4 *)sift2[p2l].data;
+    }
+    float sums[FMC_TW * FMC_TH];
+    for (int i = 0; i < FMC_TW * FMC_TH; i++)
+      sums[i] = 0.0f;
+
+    if (idx < FMC_BW)
+      for (int i = 0; i < FMC_BD / 2; i++)
+        siftParts1[(i + 0) * FMC_BW + idx] = pts1[0 + i];
+    if (idx < FMC_BH)
+      for (int i = 0; i < FMC_BD / 2; i++)
+        siftParts2[(i + 0) * FMC_BH + idx] = pts2[0 + i];
+    /*
+    DPCT1118:56: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:169: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+
+    int b = FMC_BD / 2;
+    for (int d = FMC_BD / 2; d < 32; d += FMC_BD / 2)
+    {
+      if (idx < FMC_BW)
+        for (int i = 0; i < FMC_BD / 2; i++)
+          siftParts1[(i + b) * FMC_BW + idx] = pts1[d + i];
+      if (idx < FMC_BH)
+        for (int i = 0; i < FMC_BD / 2; i++)
+          siftParts2[(i + b) * FMC_BH + idx] = pts2[d + i];
+
+      b ^= FMC_BD / 2;
+      for (int i = 0; i < FMC_BD / 2; i++)
+      {
+        sycl::float4 v1[FMC_TW];
+        for (int ix = 0; ix < FMC_TW; ix++)
+          v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)];
+        for (int iy = 0; iy < FMC_TH; iy++)
+        {
+          sycl::float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)];
+          for (int ix = 0; ix < FMC_TW; ix++)
+          {
+            sums[iy * FMC_TW + ix] += v1[ix].x() * v2.x();
+            sums[iy * FMC_TW + ix] += v1[ix].y() * v2.y();
+            sums[iy * FMC_TW + ix] += v1[ix].z() * v2.z();
+            sums[iy * FMC_TW + ix] += v1[ix].w() * v2.w();
+          }
+        }
+      }
+      /*
+      DPCT1118:60: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      /*
+      DPCT1065:173: Consider replacing sycl::nd_item::barrier() with
+      sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+      performance if there is no access to global memory.
+      */
+      item_ct1.barrier();
+    }
+
+    b ^= FMC_BD / 2;
+    for (int i = 0; i < FMC_BD / 2; i++)
+    {
+      sycl::float4 v1[FMC_TW];
+      for (int ix = 0; ix < FMC_TW; ix++)
+        v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)];
+      for (int iy = 0; iy < FMC_TH; iy++)
+      {
+        sycl::float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)];
+        for (int ix = 0; ix < FMC_TW; ix++)
+        {
+          sums[iy * FMC_TW + ix] += v1[ix].x() * v2.x();
+          sums[iy * FMC_TW + ix] += v1[ix].y() * v2.y();
+          sums[iy * FMC_TW + ix] += v1[ix].z() * v2.z();
+          sums[iy * FMC_TW + ix] += v1[ix].w() * v2.w();
+        }
+      }
+    }
+    /*
+    DPCT1118:57: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:170: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+
+    float *blksums = (float *)siftParts1;
+    for (int iy = 0; iy < FMC_TH; iy++)
+      for (int ix = 0; ix < FMC_TW; ix++)
+        blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix];
+    /*
+    DPCT1118:58: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:171: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    if (idx < FMC_BW)
+    {
+      for (int j = 0; j < FMC_BH; j++)
+      {
+        float sum = blksums[j * FMC_BW + idx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex =
+              dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + j),
+                        numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    /*
+    DPCT1118:59: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:172: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+  }
+  const int p1 = dpct::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx),
+                           numPts1 - 1);
+  if (idx == 0)
+    while (dpct::atomic_compare_exchange_strong<
+               sycl::access::address_space::generic_space>((int *)&lock, 0,
+                                                           1) != 0)
+      ;
+  /*
+  DPCT1065:167: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (idx < FMC_BW)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = sycl::max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  /*
+  DPCT1065:168: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (idx == 0)
+    dpct::atomic_exchange<sycl::access::address_space::generic_space>(
+        (int *)&lock, 0);
+}
+
+/*
+DPCT1110:61: The total declared local variable size in device function
+FindMaxCorr8 exceeds 128 bytes and may cause high register pressure. Consult
+with your hardware vendor to find the total register size available and adjust
+the code, or use smaller sub-group size to avoid high register pressure.
+*/
+void FindMaxCorr8(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  const sycl::nd_item<3> &item_ct1, volatile int &lock,
+                  sycl::float4 *siftParts1, sycl::float4 *siftParts2,
+                  float *blksums)
+{
+   // 4*32*8 = 1024
+   // 4*32*8 = 1024
+   // 32*32  = 1024
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int idx = ty * FMC_NW + tx;
+  sycl::float4 *pts1 = 0, *pts2 = 0;
+  if (idx < FMC_BW)
+  {
+    const int p1l = dpct::min(
+        (unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), numPts1 - 1);
+    pts1 = (sycl::float4 *)sift1[p1l].data;
+  }
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < sycl::min(FMC_GH, numPts2 - FMC_BH + 1); k += FMC_BH)
+  {
+    if (idx < FMC_BH)
+    {
+      const int p2l =
+          dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + idx),
+                    numPts2 - 1);
+      pts2 = (sycl::float4 *)sift2[p2l].data;
+    }
+    float sums[FMC_TW * FMC_TH];
+    for (int i = 0; i < FMC_TW * FMC_TH; i++)
+      sums[i] = 0.0f;
+    for (int d = 0; d < 32; d += FMC_BD)
+    {
+      if (idx < FMC_BW)
+        for (int i = 0; i < FMC_BD; i++)
+          siftParts1[i * FMC_BW + idx] = pts1[d + i];
+      if (idx < FMC_BH)
+        for (int i = 0; i < FMC_BD; i++)
+          siftParts2[i * FMC_BH + idx] = pts2[d + i];
+      /*
+      DPCT1118:64: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      /*
+      DPCT1065:178: Consider replacing sycl::nd_item::barrier() with
+      sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+      performance if there is no access to global memory.
+      */
+      item_ct1.barrier();
+
+      for (int i = 0; i < FMC_BD; i++)
+      {
+        sycl::float4 v1[FMC_TW];
+        for (int ix = 0; ix < FMC_TW; ix++)
+          v1[ix] = siftParts1[i * FMC_BW + (tx * FMC_TW + ix)];
+        for (int iy = 0; iy < FMC_TH; iy++)
+        {
+          sycl::float4 v2 = siftParts2[i * FMC_BH + (ty * FMC_TH + iy)];
+          for (int ix = 0; ix < FMC_TW; ix++)
+          {
+            sums[iy * FMC_TW + ix] += v1[ix].x() * v2.x();
+            sums[iy * FMC_TW + ix] += v1[ix].y() * v2.y();
+            sums[iy * FMC_TW + ix] += v1[ix].z() * v2.z();
+            sums[iy * FMC_TW + ix] += v1[ix].w() * v2.w();
+          }
+        }
+      }
+      /*
+      DPCT1118:65: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      /*
+      DPCT1065:179: Consider replacing sycl::nd_item::barrier() with
+      sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+      performance if there is no access to global memory.
+      */
+      item_ct1.barrier();
+    }
+    // float *blksums = (float*)siftParts1;
+    for (int iy = 0; iy < FMC_TH; iy++)
+      for (int ix = 0; ix < FMC_TW; ix++)
+        blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix];
+    /*
+    DPCT1118:62: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:176: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    if (idx < FMC_BW)
+    {
+      for (int j = 0; j < FMC_BH; j++)
+      {
+        float sum = blksums[j * FMC_BW + idx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex =
+              dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + j),
+                        numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    /*
+    DPCT1118:63: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:177: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+  }
+  const int p1 = dpct::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx),
+                           numPts1 - 1);
+  if (idx == 0)
+    while (dpct::atomic_compare_exchange_strong<
+               sycl::access::address_space::generic_space>((int *)&lock, 0,
+                                                           1) != 0)
+      ;
+  /*
+  DPCT1065:174: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (idx < FMC_BW)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = sycl::max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  /*
+  DPCT1065:175: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (idx == 0)
+    dpct::atomic_exchange<sycl::access::address_space::generic_space>(
+        (int *)&lock, 0);
+}
+
+/*
+DPCT1110:66: The total declared local variable size in device function
+FindMaxCorr7 exceeds 128 bytes and may cause high register pressure. Consult
+with your hardware vendor to find the total register size available and adjust
+the code, or use smaller sub-group size to avoid high register pressure.
+*/
+void FindMaxCorr7(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  const sycl::nd_item<3> &item_ct1, volatile int &lock,
+                  float *siftParts1, float *siftParts2)
+{
+   // features in columns
+   // one extra to avoid shared conflicts
+  sycl::float4 *pts1 = (sycl::float4 *)siftParts1;
+  sycl::float4 *pts2 = (sycl::float4 *)siftParts2;
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int p1l =
+      dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), numPts1 - 1);
+  const sycl::float4 *p1l4 = (sycl::float4 *)sift1[p1l].data;
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < 512 / 16; k++)
+  {
+    const int p2l = dpct::min(
+        (unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + ty), numPts2 - 1);
+    const sycl::float4 *p2l4 = (sycl::float4 *)sift2[p2l].data;
+#define NUM 4
+    float sum[NUM];
+    if (ty < (16 / NUM))
+      for (int l = 0; l < NUM; l++)
+        sum[l] = 0.0f;
+    /*
+    DPCT1118:67: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:182: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    for (int i = 0; i < 2; i++)
+    {
+      pts1[17 * tx + ty] = p1l4[i * 16 + tx];
+      pts2[16 * ty + tx] = p2l4[i * 16 + tx];
+      /*
+      DPCT1118:70: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      /*
+      DPCT1065:185: Consider replacing sycl::nd_item::barrier() with
+      sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+      performance if there is no access to global memory.
+      */
+      item_ct1.barrier();
+      if (ty < (16 / NUM))
+      {
+#pragma unroll
+        for (int j = 0; j < 16; j++)
+        {
+          sycl::float4 p1v = pts1[17 * j + tx];
+#pragma unroll
+          for (int l = 0; l < NUM; l++)
+          {
+            sycl::float4 p2v = pts2[16 * (ty + l * (16 / NUM)) + j];
+            sum[l] += p1v.x() * p2v.x();
+            sum[l] += p1v.y() * p2v.y();
+            sum[l] += p1v.z() * p2v.z();
+            sum[l] += p1v.w() * p2v.w();
+          }
+        }
+      }
+      /*
+      DPCT1118:71: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      /*
+      DPCT1065:186: Consider replacing sycl::nd_item::barrier() with
+      sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+      performance if there is no access to global memory.
+      */
+      item_ct1.barrier();
+    }
+    float *sums = siftParts1;
+    if (ty < (16 / NUM))
+      for (int l = 0; l < NUM; l++)
+        sums[16 * (ty + l * (16 / NUM)) + tx] = sum[l];
+    /*
+    DPCT1118:68: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:183: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    if (ty == 0)
+    {
+      for (int j = 0; j < 16; j++)
+      {
+        float sum = sums[16 * j + tx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = dpct::min(
+              (unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + j),
+              numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    /*
+    DPCT1118:69: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:184: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+  }
+  const int p1 =
+      dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), numPts1 - 1);
+  if (tx == 0 && ty == 0)
+    while (dpct::atomic_compare_exchange_strong<
+               sycl::access::address_space::generic_space>((int *)&lock, 0,
+                                                           1) != 0)
+      ;
+  /*
+  DPCT1065:180: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (ty == 0)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = sycl::max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  /*
+  DPCT1065:181: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (tx == 0 && ty == 0)
+    dpct::atomic_exchange<sycl::access::address_space::generic_space>(
+        (int *)&lock, 0);
+}
+
+void FindMaxCorr6(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  const sycl::nd_item<3> &item_ct1, volatile int &lock,
+                  float *siftParts2, float *sums)
+{
+  //__shared__ float siftParts1[128*16]; // features in columns
+  // one extra to avoid shared conflicts
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int p1l =
+      dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), numPts1 - 1);
+  float *pt1l = sift1[p1l].data;
+  sycl::float4 part1 = reinterpret_cast<sycl::float4 *>(pt1l)[tx];
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < 512; k += 16)
+  {
+    const int p2l = dpct::min(
+        (unsigned int)(item_ct1.get_group(1) * 512 + k + ty), numPts2 - 1);
+    float *pt2l = sift2[p2l].data;
+    reinterpret_cast<sycl::float4 *>(siftParts2)[32 * ty + tx] =
+        reinterpret_cast<sycl::float4 *>(pt2l)[tx];
+    /*
+    DPCT1118:72: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:189: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    for (int i = 0; i < 16; i++)
+    {
+      sycl::float4 part2 =
+          reinterpret_cast<sycl::float4 *>(siftParts2)[32 * i + tx];
+      float sum = part1.x() * part2.x() + part1.y() * part2.y() +
+                  part1.z() * part2.z() + part1.w() * part2.w();
+      sum += ShiftDown(sum, 16, item_ct1);
+      sum += ShiftDown(sum, 8, item_ct1);
+      sum += ShiftDown(sum, 4, item_ct1);
+      sum += ShiftDown(sum, 2, item_ct1);
+      sum += ShiftDown(sum, 1, item_ct1);
+      if (tx == 0)
+        sums[16 * i + ty] = sum;
+    }
+    /*
+    DPCT1118:73: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:190: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    if (ty == 0 && tx < 16)
+    {
+      for (int j = 0; j < 16; j++)
+      {
+        float sum = sums[16 * j + tx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = dpct::min(
+              (unsigned int)(item_ct1.get_group(1) * 512 + k + j), numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    /*
+    DPCT1118:74: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:191: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+  }
+  if (tx == 0 && ty == 0)
+    while (dpct::atomic_compare_exchange_strong<
+               sycl::access::address_space::generic_space>((int *)&lock, 0,
+                                                           1) != 0)
+      ;
+  /*
+  DPCT1065:187: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (ty == 0 && tx < 16)
+  {
+    const int p1 =
+        dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), numPts1 - 1);
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = sycl::max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  /*
+  DPCT1065:188: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (tx == 0 && ty == 0)
+    dpct::atomic_exchange<sycl::access::address_space::generic_space>(
+        (int *)&lock, 0);
+}
+
+void FindMaxCorr5(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  const sycl::nd_item<3> &item_ct1, volatile int &lock,
+                  float *siftParts1, float *siftParts2)
+{
+   // features in columns
+   // one extra to avoid shared conflicts
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int p1l =
+      dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), numPts1 - 1);
+  const float *pt1l = sift1[p1l].data;
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < 512 / 16; k++)
+  {
+    const int p2l = dpct::min(
+        (unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + ty), numPts2 - 1);
+    const float *pt2l = sift2[p2l].data;
+    float sum = 0.0f;
+    for (int i = 0; i < 8; i++)
+    {
+      siftParts1[17 * tx + ty] = pt1l[i * 16 + tx]; // load and transpose
+      siftParts2[17 * tx + ty] = pt2l[i * 16 + tx];
+      /*
+      DPCT1118:77: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      /*
+      DPCT1065:196: Consider replacing sycl::nd_item::barrier() with
+      sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+      performance if there is no access to global memory.
+      */
+      item_ct1.barrier();
+      for (int j = 0; j < 16; j++)
+        sum += siftParts1[17 * j + tx] * siftParts2[17 * j + ty];
+      /*
+      DPCT1118:78: SYCL group functions and algorithms must be encountered in
+      converged control flow. You may need to adjust the code.
+      */
+      /*
+      DPCT1065:197: Consider replacing sycl::nd_item::barrier() with
+      sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+      performance if there is no access to global memory.
+      */
+      item_ct1.barrier();
+    }
+    float *sums = siftParts1;
+    sums[16 * ty + tx] = sum;
+    /*
+    DPCT1118:75: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:194: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+    if (ty == 0)
+    {
+      for (int j = 0; j < 16; j++)
+      {
+        float sum = sums[16 * j + tx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = dpct::min(
+              (unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + j),
+              numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    /*
+    DPCT1118:76: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    /*
+    DPCT1065:195: Consider replacing sycl::nd_item::barrier() with
+    sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+    performance if there is no access to global memory.
+    */
+    item_ct1.barrier();
+  }
+  const int p1 =
+      dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), numPts1 - 1);
+  if (tx == 0 && ty == 0)
+    while (dpct::atomic_compare_exchange_strong<
+               sycl::access::address_space::generic_space>((int *)&lock, 0,
+                                                           1) != 0)
+      ;
+  /*
+  DPCT1065:192: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (ty == 0)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = sycl::max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  /*
+  DPCT1065:193: Consider replacing sycl::nd_item::barrier() with
+  sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
+  performance if there is no access to global memory.
+  */
+  item_ct1.barrier();
+  if (tx == 0 && ty == 0)
+    dpct::atomic_exchange<sycl::access::address_space::generic_space>(
+        (int *)&lock, 0);
+}
+
+template <int size>
+/*
+DPCT1110:79: The total declared local variable size in device function
+InvertMatrix exceeds 128 bytes and may cause high register pressure. Consult
+with your hardware vendor to find the total register size available and adjust
+the code, or use smaller sub-group size to avoid high register pressure.
+*/
+void InvertMatrix(float elem[size][size], float res[size][size])
+{
+  int indx[size];
+  float b[size];
+  float vv[size];
+  for (int i = 0; i < size; i++)
+    indx[i] = 0;
+  int imax = 0;
+  float d = 1.0;
+  for (int i = 0; i < size; i++)
+  { // find biggest element for each row
+    float big = 0.0;
+    for (int j = 0; j < size; j++)
+    {
+      float temp = sycl::fabs(elem[i][j]);
+      if (temp > big)
+        big = temp;
+    }
+    if (big > 0.0)
+      vv[i] = 1.0 / big;
+    else
+      vv[i] = 1e16;
+  }
+  for (int j = 0; j < size; j++)
+  {
+    for (int i = 0; i < j; i++)
+    {                                   // i<j
+      float sum = elem[i][j];           // i<j (lower left)
+      for (int k = 0; k < i; k++)       // k<i<j
+        sum -= elem[i][k] * elem[k][j]; // i>k (upper right), k<j (lower left)
+      elem[i][j] = sum;                 // i<j (lower left)
+    }
+    float big = 0.0;
+    for (int i = j; i < size; i++)
+    {                                   // i>=j
+      float sum = elem[i][j];           // i>=j (upper right)
+      for (int k = 0; k < j; k++)       // k<j<=i
+        sum -= elem[i][k] * elem[k][j]; // i>k (upper right), k<j (lower left)
+      elem[i][j] = sum;                 // i>=j (upper right)
+      float dum = vv[i] * sycl::fabs(sum);
+      if (dum >= big)
+      {
+        big = dum;
+        imax = i;
+      }
+    }
+    if (j != imax)
+    { // imax>j
+      for (int k = 0; k < size; k++)
+      {
+        float dum = elem[imax][k]; // upper right and lower left
+        elem[imax][k] = elem[j][k];
+        elem[j][k] = dum;
+      }
+      d = -d;
+      vv[imax] = vv[j];
+    }
+    indx[j] = imax;
+    if (elem[j][j] == 0.0) // j==j (upper right)
+      elem[j][j] = 1e-16;
+    if (j != (size - 1))
+    {
+      float dum = 1.0 / elem[j][j];
+      for (int i = j + 1; i < size; i++) // i>j
+        elem[i][j] *= dum;               // i>j (upper right)
+    }
+  }
+  for (int j = 0; j < size; j++)
+  {
+    for (int k = 0; k < size; k++)
+      b[k] = 0.0;
+    b[j] = 1.0;
+    int ii = -1;
+    for (int i = 0; i < size; i++)
+    {
+      int ip = indx[i];
+      float sum = b[ip];
+      b[ip] = b[i];
+      if (ii != -1)
+        for (int j = ii; j < i; j++)
+          sum -= elem[i][j] * b[j]; // i>j (upper right)
+      else if (sum != 0.0)
+        ii = i;
+      b[i] = sum;
+    }
+    for (int i = size - 1; i >= 0; i--)
+    {
+      float sum = b[i];
+      for (int j = i + 1; j < size; j++)
+        sum -= elem[i][j] * b[j]; // i<j (lower left)
+      b[i] = sum / elem[i][i];    // i==i (upper right)
+    }
+    for (int i = 0; i < size; i++)
+      res[i][j] = b[i];
+  }
+}
+
+/*
+DPCT1110:80: The total declared local variable size in device function
+ComputeHomographies exceeds 128 bytes and may cause high register pressure.
+Consult with your hardware vendor to find the total register size available and
+adjust the code, or use smaller sub-group size to avoid high register pressure.
+*/
+void ComputeHomographies(float *coord, int *randPts, float *homo, int numPts,
+                         const sycl::nd_item<3> &item_ct1)
+{
+  float a[8][8], ia[8][8];
+  float b[8];
+  const int bx = item_ct1.get_group(2);
+  const int tx = item_ct1.get_local_id(2);
+  const int idx = item_ct1.get_local_range(2) * bx + tx;
+  const int numLoops =
+      item_ct1.get_local_range(2) * item_ct1.get_group_range(2);
+  for (int i = 0; i < 4; i++)
+  {
+    int pt = randPts[i * numLoops + idx];
+    float x1 = coord[pt + 0 * numPts];
+    float y1 = coord[pt + 1 * numPts];
+    float x2 = coord[pt + 2 * numPts];
+    float y2 = coord[pt + 3 * numPts];
+    float *row1 = a[2 * i + 0];
+    row1[0] = x1;
+    row1[1] = y1;
+    row1[2] = 1.0;
+    row1[3] = row1[4] = row1[5] = 0.0;
+    row1[6] = -x2 * x1;
+    row1[7] = -x2 * y1;
+    float *row2 = a[2 * i + 1];
+    row2[0] = row2[1] = row2[2] = 0.0;
+    row2[3] = x1;
+    row2[4] = y1;
+    row2[5] = 1.0;
+    row2[6] = -y2 * x1;
+    row2[7] = -y2 * y1;
+    b[2 * i + 0] = x2;
+    b[2 * i + 1] = y2;
+  }
+  InvertMatrix<8>(a, ia);
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  for (int j = 0; j < 8; j++)
+  {
+    float sum = 0.0f;
+    for (int i = 0; i < 8; i++)
+      sum += ia[j][i] * b[i];
+    homo[j * numLoops + idx] = sum;
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+}
+
+#define TESTHOMO_TESTS 16 // number of tests per block,  alt. 32, 32
+#define TESTHOMO_LOOPS 16 // number of loops per block,  alt.  8, 16
+
+void TestHomographies(float *d_coord, float *d_homo,
+                                 int *d_counts, int numPts, float thresh2,
+                                 const sycl::nd_item<3> &item_ct1, float *homo,
+                                 int *cnts)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int idx = item_ct1.get_group(1) * item_ct1.get_local_range(1) + tx;
+  const int numLoops =
+      item_ct1.get_local_range(1) * item_ct1.get_group_range(1);
+  if (ty < 8 && tx < TESTHOMO_LOOPS)
+    homo[tx * 8 + ty] = d_homo[idx + ty * numLoops];
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  float a[8];
+  for (int i = 0; i < 8; i++)
+    a[i] = homo[ty * 8 + i];
+  int cnt = 0;
+  for (int i = tx; i < numPts; i += TESTHOMO_TESTS)
+  {
+    float x1 = d_coord[i + 0 * numPts];
+    float y1 = d_coord[i + 1 * numPts];
+    float x2 = d_coord[i + 2 * numPts];
+    float y2 = d_coord[i + 3 * numPts];
+    /*
+    DPCT1013:198: The rounding mode could not be specified and the generated
+    code may have different accuracy than the original code. Verify the
+    correctness. SYCL math built-in function rounding mode is aligned with
+    OpenCL C 1.2 standard.
+    */
+    float nomx = a[0] * x1 + a[1] * y1 + a[2];
+    /*
+    DPCT1013:199: The rounding mode could not be specified and the generated
+    code may have different accuracy than the original code. Verify the
+    correctness. SYCL math built-in function rounding mode is aligned with
+    OpenCL C 1.2 standard.
+    */
+    float nomy = a[3] * x1 + a[4] * y1 + a[5];
+    /*
+    DPCT1013:200: The rounding mode could not be specified and the generated
+    code may have different accuracy than the original code. Verify the
+    correctness. SYCL math built-in function rounding mode is aligned with
+    OpenCL C 1.2 standard.
+    */
+    float deno = a[6] * x1 + a[7] * y1 + 1.0f;
+    /*
+    DPCT1013:201: The rounding mode could not be specified and the generated
+    code may have different accuracy than the original code. Verify the
+    correctness. SYCL math built-in function rounding mode is aligned with
+    OpenCL C 1.2 standard.
+    */
+    float errx = x2 * deno - nomx;
+    /*
+    DPCT1013:202: The rounding mode could not be specified and the generated
+    code may have different accuracy than the original code. Verify the
+    correctness. SYCL math built-in function rounding mode is aligned with
+    OpenCL C 1.2 standard.
+    */
+    float erry = y2 * deno - nomy;
+    /*
+    DPCT1013:203: The rounding mode could not be specified and the generated
+    code may have different accuracy than the original code. Verify the
+    correctness. SYCL math built-in function rounding mode is aligned with
+    OpenCL C 1.2 standard.
+    */
+    float err2 = errx * errx + erry * erry;
+    /*
+    DPCT1013:204: The rounding mode could not be specified and the generated
+    code may have different accuracy than the original code. Verify the
+    correctness. SYCL math built-in function rounding mode is aligned with
+    OpenCL C 1.2 standard.
+    */
+    if (err2 < thresh2 * deno * deno)
+      cnt++;
+  }
+  int kty = TESTHOMO_TESTS * ty;
+  cnts[kty + tx] = cnt;
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  int len = TESTHOMO_TESTS / 2;
+  while (len > 0)
+  {
+    if (tx < len)
+      cnts[kty + tx] += cnts[kty + tx + len];
+    len /= 2;
+    /*
+    DPCT1118:81: SYCL group functions and algorithms must be encountered in
+    converged control flow. You may need to adjust the code.
+    */
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+  }
+  if (tx < TESTHOMO_LOOPS && ty == 0)
+    d_counts[idx] = cnts[TESTHOMO_TESTS * tx];
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+}
+
+//================= Host matching functions =====================//
+
+double FindHomography(SiftData &data, float *homography, int *numMatches, float &matchTime, int numLoops, float minScore, float maxAmbiguity, float thresh)
+{
+  *numMatches = 0;
+  homography[0] = homography[4] = homography[8] = 1.0f;
+  homography[1] = homography[2] = homography[3] = 0.0f;
+  homography[5] = homography[6] = homography[7] = 0.0f;
+  if (data.d_data == NULL)
+    return 0.0f;
+  SiftPoint *d_sift = data.d_data;
+  numLoops = iDivUp(numLoops, 16) * 16;
+  int numPts = data.numPts;
+  if (numPts < 8)
+    return 0.0f;
+  int numPtsUp = iDivUp(numPts, 16) * 16;
+  float *d_coord, *d_homo;
+  int *d_randPts, *h_randPts;
+  int randSize = 4 * sizeof(int) * numLoops;
+  int szFl = sizeof(float);
+  int szPt = sizeof(SiftPoint);
+#ifdef DEVICE_TIMER
+  auto start_malloc = std::chrono::steady_clock::now();
+#endif
+  safeCall(DPCT_CHECK_ERROR(
+      d_coord = (float *)sycl::malloc_device(4 * sizeof(float) * numPtsUp,
+                                             dpct::get_in_order_queue())));
+  safeCall(DPCT_CHECK_ERROR(d_randPts = (int *)sycl::malloc_device(
+                                randSize, dpct::get_in_order_queue())));
+  safeCall(DPCT_CHECK_ERROR(
+      d_homo = (float *)sycl::malloc_device(8 * sizeof(float) * numLoops,
+                                            dpct::get_in_order_queue())));
+
+#ifdef DEVICE_TIMER
+  auto stop_malloc = std::chrono::steady_clock::now();
+  matchTime += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+  h_randPts = (int *)malloc(randSize);
+  float *h_scores = (float *)malloc(sizeof(float) * numPtsUp);
+  float *h_ambiguities = (float *)malloc(sizeof(float) * numPtsUp);
+
+  // temp variables are for host memory allocation, device data is transferred to temp
+  float *temp1 = (float *)malloc(szPt * numPtsUp);
+  float *temp2 = (float *)malloc(szPt * numPtsUp);
+
+#ifdef DEVICE_TIMER
+  auto start_memcpy_1 = std::chrono::steady_clock::now();
+#endif
+
+  safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue()
+                                .memcpy(temp1, &d_sift[0].score, szPt * numPts)
+                                .wait()));
+  safeCall(
+      DPCT_CHECK_ERROR(dpct::get_in_order_queue()
+                           .memcpy(temp2, &d_sift[0].ambiguity, szPt * numPts)
+                           .wait()));
+
+#ifdef DEVICE_TIMER
+  auto stop_memcpy_1 = std::chrono::steady_clock::now();
+  matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_1 - start_memcpy_1).count();
+#endif
+
+  char *src_score = (char *)temp1;
+  char *src_ambiguity = (char *)temp2;
+  char *dst_score = (char *)h_scores;
+  char *dst_ambiguity = (char *)h_ambiguities;
+
+  for (int i = 0; i < numPts; ++i)
+  {
+    memcpy(dst_score, src_score, szFl);
+    memcpy(dst_ambiguity, src_ambiguity, szFl);
+    src_score += szPt;
+    src_ambiguity += szPt;
+    dst_score += szFl;
+    dst_ambiguity += szFl;
+  }
+
+  int *validPts = (int *)malloc(sizeof(int) * numPts);
+  int numValid = 0;
+  for (int i = 0; i < numPts; i++)
+  {
+    if (h_scores[i] > minScore && h_ambiguities[i] < maxAmbiguity)
+      validPts[numValid++] = i;
+  }
+  free(h_scores);
+  free(h_ambiguities);
+  if (numValid >= 8)
+  {
+    std::random_device rd;
+    uint32_t seed = rd();
+    std::mt19937 rnd(seed);  // mersenne_twister_engine
+    std::uniform_int_distribution<uint32_t> dis(0, UINT32_MAX);
+    for (int i = 0; i < numLoops; i++)
+    {
+      int p1 = dis(rnd) % numValid;
+      int p2 = dis(rnd) % numValid;
+      int p3 = dis(rnd) % numValid;
+      int p4 = dis(rnd) % numValid;
+      while (p2 == p1)
+        p2 = dis(rnd) % numValid;
+      while (p3 == p1 || p3 == p2)
+        p3 = dis(rnd) % numValid;
+      while (p4 == p1 || p4 == p2 || p4 == p3)
+        p4 = dis(rnd) % numValid;
+      h_randPts[i + 0 * numLoops] = validPts[p1];
+      h_randPts[i + 1 * numLoops] = validPts[p2];
+      h_randPts[i + 2 * numLoops] = validPts[p3];
+      h_randPts[i + 3 * numLoops] = validPts[p4];
+    }
+
+    float *temp3, *temp4, *temp5, *temp6;
+#ifdef DEVICE_TIMER
+    auto start_malloc_2 = std::chrono::steady_clock::now();
+#endif
+    safeCall(
+        DPCT_CHECK_ERROR(temp3 = (float *)sycl::malloc_device(
+                             szPt * numPtsUp, dpct::get_in_order_queue())));
+    safeCall(
+        DPCT_CHECK_ERROR(temp4 = (float *)sycl::malloc_device(
+                             szPt * numPtsUp, dpct::get_in_order_queue())));
+    safeCall(
+        DPCT_CHECK_ERROR(temp5 = (float *)sycl::malloc_device(
+                             szPt * numPtsUp, dpct::get_in_order_queue())));
+    safeCall(
+        DPCT_CHECK_ERROR(temp6 = (float *)sycl::malloc_device(
+                             szPt * numPtsUp, dpct::get_in_order_queue())));
+#ifdef DEVICE_TIMER
+    auto stop_malloc_2 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_malloc_2 - start_malloc_2).count();
+#endif
+#ifdef DEVICE_TIMER
+    auto start_memcpy_2 = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue()
+                                  .memcpy(d_randPts, h_randPts, randSize)
+                                  .wait()));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+    safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy(
+        temp3, &d_sift[0].xpos, szPt * numPts)));
+    safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy(
+        temp4, &d_sift[0].ypos, szPt * numPts)));
+    safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy(
+        temp5, &d_sift[0].match_xpos, szPt * numPts)));
+    safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy(
+        temp6, &d_sift[0].match_ypos, szPt * numPts)));
+
+    // kernel calto transfer memory from device to device
+    dpct::get_in_order_queue().submit([&](sycl::handler &cgh) {
+      float *d_coord_numPtsUp_ct1 = &d_coord[0 * numPtsUp];
+
+      cgh.parallel_for(
+          sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
+          [=](sycl::nd_item<3> item_ct1) {
+            memcopyKernel(temp3, d_coord_numPtsUp_ct1, szPt, szFl, numPts,
+                          szFl);
+          });
+    });
+    /*
+    DPCT1010:205: SYCL uses exceptions to report errors and does not use the
+    error codes. The call was replaced with 0. You need to rewrite this code.
+    */
+    safeCall(0);
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+    dpct::get_in_order_queue().submit([&](sycl::handler &cgh) {
+      float *d_coord_numPtsUp_ct1 = &d_coord[1 * numPtsUp];
+
+      cgh.parallel_for(
+          sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
+          [=](sycl::nd_item<3> item_ct1) {
+            memcopyKernel(temp4, d_coord_numPtsUp_ct1, szPt, szFl, numPts,
+                          szFl);
+          });
+    });
+    /*
+    DPCT1010:206: SYCL uses exceptions to report errors and does not use the
+    error codes. The call was replaced with 0. You need to rewrite this code.
+    */
+    safeCall(0);
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+    dpct::get_in_order_queue().submit([&](sycl::handler &cgh) {
+      float *d_coord_numPtsUp_ct1 = &d_coord[2 * numPtsUp];
+
+      cgh.parallel_for(
+          sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
+          [=](sycl::nd_item<3> item_ct1) {
+            memcopyKernel(temp5, d_coord_numPtsUp_ct1, szPt, szFl, numPts,
+                          szFl);
+          });
+    });
+    /*
+    DPCT1010:207: SYCL uses exceptions to report errors and does not use the
+    error codes. The call was replaced with 0. You need to rewrite this code.
+    */
+    safeCall(0);
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+    dpct::get_in_order_queue().submit([&](sycl::handler &cgh) {
+      float *d_coord_numPtsUp_ct1 = &d_coord[3 * numPtsUp];
+
+      cgh.parallel_for(
+          sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
+          [=](sycl::nd_item<3> item_ct1) {
+            memcopyKernel(temp6, d_coord_numPtsUp_ct1, szPt, szFl, numPts,
+                          szFl);
+          });
+    });
+    /*
+    DPCT1010:208: SYCL uses exceptions to report errors and does not use the
+    error codes. The call was replaced with 0. You need to rewrite this code.
+    */
+    safeCall(0);
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy_2 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_2 - start_memcpy_2).count();
+#endif
+#ifdef DEVICE_TIMER
+    auto start_kernel_1 = std::chrono::steady_clock::now();
+#endif
+    dpct::get_in_order_queue().parallel_for(
+        sycl::nd_range<3>(sycl::range<3>(1, 1, numLoops / 16) *
+                              sycl::range<3>(1, 1, 16),
+                          sycl::range<3>(1, 1, 16)),
+        [=](sycl::nd_item<3> item_ct1) {
+          ComputeHomographies(d_coord, d_randPts, d_homo, numPtsUp, item_ct1);
+        });
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_kernel_1 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_kernel_1 - start_kernel_1).count();
+    // printf("ComputeHomographies time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel_1 - start_kernel_1).count());
+#endif
+    checkMsg("ComputeHomographies() execution failed\n");
+
+    sycl::range<3> blocks(1, numLoops / TESTHOMO_LOOPS, 1);
+    sycl::range<3> threads(1, TESTHOMO_LOOPS, TESTHOMO_TESTS);
+#ifdef DEVICE_TIMER
+    auto start_kernel_2 = std::chrono::steady_clock::now();
+#endif
+    dpct::get_in_order_queue().submit([&](sycl::handler &cgh) {
+      /*
+      DPCT1101:210: '8 * TESTHOMO_LOOPS' expression was replaced with a value.
+      Modify the code to use the original expression, provided in comments, if
+      it is correct.
+      */
+      sycl::local_accessor<float, 1> homo_acc_ct1(
+          sycl::range<1>(128 /*8 * TESTHOMO_LOOPS*/), cgh);
+      /*
+      DPCT1101:211: 'TESTHOMO_TESTS * TESTHOMO_LOOPS' expression was replaced
+      with a value. Modify the code to use the original expression, provided
+      in comments, if it is correct.
+      */
+      sycl::local_accessor<int, 1> cnts_acc_ct1(
+          sycl::range<1>(256 /*TESTHOMO_TESTS * TESTHOMO_LOOPS*/), cgh);
+
+      cgh.parallel_for(sycl::nd_range<3>(blocks * threads, threads),
+                       [=](sycl::nd_item<3> item_ct1) {
+                         TestHomographies(d_coord, d_homo, d_randPts, numPtsUp,
+                                          thresh * thresh, item_ct1,
+                                          homo_acc_ct1.get_pointer(),
+                                          cnts_acc_ct1.get_pointer());
+                       });
+    });
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_kernel_2 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_kernel_2 - start_kernel_2).count();
+    // printf("TestHomographies time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel_2 - start_kernel_2).count());
+#endif
+    checkMsg("TestHomographies() execution failed\n");
+#ifdef DEVICE_TIMER
+    auto start_memcpy_3 = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(
+        dpct::get_in_order_queue()
+            .memcpy(h_randPts, d_randPts, sizeof(int) * numLoops)
+            .wait()));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_memcpy_3 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_3 - start_memcpy_3).count();
+#endif
+    int maxIndex = -1, maxCount = -1;
+    for (int i = 0; i < numLoops; i++)
+      if (h_randPts[i] > maxCount)
+      {
+        maxCount = h_randPts[i];
+        maxIndex = i;
+      }
+
+    *numMatches = maxCount;
+#ifdef DEVICE_TIMER
+    auto start_memcpy_4 = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(dpct::dpct_memcpy(
+        homography, szFl, &d_homo[maxIndex], sizeof(float) * numLoops, szFl, 8,
+        dpct::device_to_host)));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_memcpy_4 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_4 - start_memcpy_4).count();
+#endif
+
+    safeCall(DPCT_CHECK_ERROR(sycl::free(temp3, dpct::get_in_order_queue())));
+    safeCall(DPCT_CHECK_ERROR(sycl::free(temp4, dpct::get_in_order_queue())));
+    safeCall(DPCT_CHECK_ERROR(sycl::free(temp5, dpct::get_in_order_queue())));
+    safeCall(DPCT_CHECK_ERROR(sycl::free(temp6, dpct::get_in_order_queue())));
+  }
+
+  free(validPts);
+  free(h_randPts);
+  free(temp1);
+  free(temp2);
+
+  safeCall(DPCT_CHECK_ERROR(sycl::free(d_homo, dpct::get_in_order_queue())));
+  safeCall(DPCT_CHECK_ERROR(sycl::free(d_randPts, dpct::get_in_order_queue())));
+  safeCall(DPCT_CHECK_ERROR(sycl::free(d_coord, dpct::get_in_order_queue())));
+  return matchTime;
+}
+
+double MatchSiftData(SiftData &data1, SiftData &data2, float &matchTime)
+{
+  int numPts1 = data1.numPts;
+  int numPts2 = data2.numPts;
+  if (!numPts1 || !numPts2)
+    return 0.0;
+#ifdef MANAGEDMEM
+  SiftPoint *sift1 = data1.m_data;
+  SiftPoint *sift2 = data2.m_data;
+#else
+  if (data1.d_data == NULL || data2.d_data == NULL)
+    return 0.0f;
+  SiftPoint *sift1 = data1.d_data;
+  SiftPoint *sift2 = data2.d_data;
+#endif
+
+// Original version with correlation and maximization in two different kernels
+// Global memory reguirement: O(N^2)
+#if 0
+  float *d_corrData; 
+  int corrWidth = iDivUp(numPts2, 16)*16;
+  int corrSize = sizeof(float)*numPts1*corrWidth;
+  safeCall(cudaMalloc((void **)&d_corrData, corrSize));
+#if 0 
+  dim3 blocks1(numPts1, iDivUp(numPts2, 16));
+  dim3 threads1(16, 16); // each block: 1 points x 16 points
+  MatchSiftPoints<<<blocks1, threads1>>>(sift1, sift2, d_corrData, numPts1, numPts2);
+#else 
+  dim3 blocks(iDivUp(numPts1,16), iDivUp(numPts2, 16));
+  dim3 threads(16, 16); // each block: 16 points x 16 points
+  MatchSiftPoints2<<<blocks, threads>>>(sift1, sift2, d_corrData, numPts1, numPts2);
+#endif
+  safeCall(cudaDeviceSynchronize());
+  dim3 blocksMax(iDivUp(numPts1, 16));
+  dim3 threadsMax(16, 16);
+  FindMaxCorr<<<blocksMax, threadsMax>>>(d_corrData, sift1, sift2, numPts1, corrWidth, sizeof(SiftPoint));
+  safeCall(cudaDeviceSynchronize());
+  checkMsg("FindMaxCorr() execution failed\n");
+  safeCall(cudaFree(d_corrData));
+#endif
+
+// Version suggested by Nicholas Lin with combined correlation and maximization
+// Global memory reguirement: O(N)
+#if 0
+  int block_dim = 16;
+  float *d_corrData;
+  int corrSize = numPts1 * block_dim * 2;
+  safeCall(cudaMalloc((void **)&d_corrData, sizeof(float) * corrSize));
+  dim3 blocks(iDivUp(numPts1, block_dim));
+  dim3 threads(block_dim, block_dim); 
+  FindMaxCorr3<<<blocks, threads >>>(d_corrData, sift1, sift2, numPts1, numPts2);
+  safeCall(cudaDeviceSynchronize());
+  checkMsg("FindMaxCorr3() execution failed\n");
+  safeCall(cudaFree(d_corrData));
+#endif
+
+#if 0
+  dim3 blocksMax(numPts1);
+  dim3 threadsMax(FMC2W, FMC2H);
+  FindMaxCorr2<<<blocksMax, threadsMax>>>(sift1, sift2, numPts1, numPts2);
+  safeCall(cudaDeviceSynchronize());
+  checkMsg("FindMaxCorr2() execution failed\n");
+#endif
+
+// Combined version with no global memory requirement using one FMC2H points per block
+#if 0
+  dim3 blocksMax2(iDivUp(numPts1, FMC2H));
+  dim3 threadsMax2(FMC2W, FMC2H);
+  FindMaxCorr4<<<blocksMax2, threadsMax2>>>(sift1, sift2, numPts1, numPts2);
+  safeCall(cudaDeviceSynchronize());
+  checkMsg("FindMaxCorr4() execution failed\n");
+#endif
+
+// Combined version with no global memory requirement using global locks
+#if 1
+  sycl::range<3> blocksMax3(1, iDivUp(numPts2, 512), iDivUp(numPts1, 16));
+  sycl::range<3> threadsMax3(1, 16, 16);
+#ifdef DEVICE_TIMER
+  auto start_kernel1 = std::chrono::steady_clock::now();
+#endif
+  dpct::get_in_order_queue().parallel_for(
+      sycl::nd_range<3>(sycl::range<3>(1, 1, iDivUp(numPts1, 64)) *
+                            sycl::range<3>(1, 1, 64),
+                        sycl::range<3>(1, 1, 64)),
+      [=](sycl::nd_item<3> item_ct1) {
+        CleanMatches(sift1, numPts1, item_ct1);
+      });
+  /*
+  DPCT1010:209: SYCL uses exceptions to report errors and does not use the error
+  codes. The call was replaced with 0. You need to rewrite this code.
+  */
+  safeCall(0);
+  safeCall(
+      DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+  auto stop_kernel1 = std::chrono::steady_clock::now();
+  // printf("CleanMatches time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel1 - start_kernel1).count());
+  matchTime += std::chrono::duration<float, std::micro>(stop_kernel1 - start_kernel1).count();
+  auto matchSiftDataTime += std::chrono::duration<float, std::micro>(stop_kernel1 - start_kernel1).count();
+#endif
+  int mode = 10;
+  // if (mode == 5) // K40c 5.0ms, 1080 Ti 1.2ms, 2080 Ti 0.83ms
+  //   FindMaxCorr5<<<blocksMax3, threadsMax3>>>(sift1, sift2, numPts1, numPts2);
+  // else if (mode == 6)
+  // { // 2080 Ti 0.89ms
+  //   threadsMax3 = dim3(32, 16);
+  //   FindMaxCorr6<<<blocksMax3, threadsMax3>>>(sift1, sift2, numPts1, numPts2);
+  // }
+  // else if (mode == 7) // 2080 Ti 0.50ms
+  //   FindMaxCorr7<<<blocksMax3, threadsMax3>>>(sift1, sift2, numPts1, numPts2);
+  // else if (mode == 8)
+  // { // 2080 Ti 0.45ms
+  //   blocksMax3 = dim3(iDivUp(numPts1, FMC_BW), iDivUp(numPts2, FMC_GH));
+  //   threadsMax3 = dim3(FMC_NW, FMC_NH);
+  //   FindMaxCorr8<<<blocksMax3, threadsMax3>>>(sift1, sift2, numPts1, numPts2);
+  // }
+  // else if (mode == 9)
+  // { // 2080 Ti 0.46ms
+  //   blocksMax3 = dim3(iDivUp(numPts1, FMC_BW), iDivUp(numPts2, FMC_GH));
+  //   threadsMax3 = dim3(FMC_NW, FMC_NH);
+  //   FindMaxCorr9<<<blocksMax3, threadsMax3>>>(sift1, sift2, numPts1, numPts2);
+  // }
+  // else
+  if (mode == 10)
+  {
+    blocksMax3 = sycl::range<3>(1, 1, iDivUp(numPts1, M7W));
+    threadsMax3 = sycl::range<3>(1, M7H / M7R, M7W);
+#ifdef DEVICE_TIMER
+    auto start_kernel2 = std::chrono::steady_clock::now();
+#endif
+    /*
+    DPCT1049:82: The work-group size passed to the SYCL kernel may exceed the
+    limit. To get the device limit, query info::device::max_work_group_size.
+    Adjust the work-group size if needed.
+    */
+    dpct::get_in_order_queue().submit([&](sycl::handler &cgh) {
+      /*
+      DPCT1101:212: 'M7W * NDIM / 4' expression was replaced with a value.
+      Modify the code to use the original expression, provided in comments, if
+      it is correct.
+      */
+      sycl::local_accessor<sycl::float4, 1> buffer1_acc_ct1(
+          sycl::range<1>(1024 /*M7W * NDIM / 4*/), cgh);
+      /*
+      DPCT1101:213: 'M7H * NDIM / 4' expression was replaced with a value.
+      Modify the code to use the original expression, provided in comments, if
+      it is correct.
+      */
+      sycl::local_accessor<sycl::float4, 1> buffer2_acc_ct1(
+          sycl::range<1>(1024 /*M7H * NDIM / 4*/), cgh);
+
+      cgh.parallel_for(sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3),
+                       [=](sycl::nd_item<3> item_ct1) {
+                         FindMaxCorr10(sift1, sift2, numPts1, numPts2, item_ct1,
+                                       buffer1_acc_ct1.get_pointer(),
+                                       buffer2_acc_ct1.get_pointer());
+                       });
+    });
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_kernel2 = std::chrono::steady_clock::now();
+    // printf("FindMaxCorr10 time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel2 - start_kernel2).count());
+    matchTime += std::chrono::duration<float, std::micro>(stop_kernel2 - start_kernel2).count();
+    matchSiftDataTime += std::chrono::duration<float, std::micro>(stop_kernel2 - start_kernel2).count();
+#endif
+  }
+  checkMsg("FindMaxCorr10() execution failed\n");
+#endif
+
+  if (data1.h_data != NULL)
+  {
+    float *h_ptr = &data1.h_data[0].score;
+    float *d_ptr = &data1.d_data[0].score;
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    safeCall(DPCT_CHECK_ERROR(
+        dpct::get_in_order_queue()
+            .memcpy(h_ptr, d_ptr, sizeof(SiftPoint) * data1.numPts)
+            .wait()));
+    safeCall(
+        DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()));
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+    matchSiftDataTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+  }
+  return matchTime;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/matching.dp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/matching.dp.o
new file mode 100644
index 000000000..cd453ccc0
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/matching.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/CMakeLists.txt b/third-party-programs/Velocity-Bench/cudaSift/HIP/CMakeLists.txt
new file mode 100644
index 000000000..378b04c35
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/CMakeLists.txt
@@ -0,0 +1,108 @@
+#  Modifications Copyright (C) 2023 Intel Corporation
+
+#  Permission is hereby granted, free of charge, to any person obtaining a copy
+#  of this software and associated documentation files (the "Software"),
+#  to deal in the Software without restriction, including without limitation
+#  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+#  and/or sell copies of the Software, and to permit persons to whom
+#  the Software is furnished to do so, subject to the following conditions:
+
+#  The above copyright notice and this permission notice shall be included
+#  in all copies or substantial portions of the Software.
+
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+#  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+#  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+#  OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+#  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+#  OR OTHER DEALINGS IN THE SOFTWARE.
+
+#  SPDX-License-Identifier: MIT
+
+cmake_minimum_required(VERSION 3.10)
+project(cudasift LANGUAGES CXX)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+set(DEF_WL_CXX_FLAGS " -D__HIP_PLATFORM_AMD__ ")
+set(DEF_GENERAL_CXX_FLAGS " -Wall -O3 -Wextra ")
+set(DEF_COMBINED_CXX_FLAGS "${DEF_GENERAL_CXX_FLAGS} ${DEF_WL_CXX_FLAGS}")
+
+if(NOT DEFINED ROCM_PATH)
+  if(NOT DEFINED ENV{ROCM_PATH})
+    set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which HIP has been installed")
+  else()
+    set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which HIP has been installed")
+  endif()
+endif()
+
+set(CMAKE_MODULE_PATH "${ROCM_PATH}/hip/cmake" ${CMAKE_MODULE_PATH})
+set(HIP_INCLUDE_DIRS "${ROCM_PATH}/include" ${HIP_INCLUDE_DIRS})
+set(HIP_LIBRARIES "${ROCM_PATH}/lib" ${HIP_LIBRARIES})
+
+option(DEVICE_TIMER "Build using Device Timer" OFF)
+
+find_package(HIP REQUIRED)
+
+if(HIP_FOUND)
+  message(STATUS "Found HIP: " ${HIP_VERSION})
+else()
+  message(FATAL_ERROR "Could not find HIP!")
+endif()
+
+find_package(OpenCV REQUIRED)
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+set(SOURCES
+  ${CMAKE_SOURCE_DIR}/../common/Utility.cpp
+  cudaImage.cpp
+  cudaImage.h
+  cudaSiftH.cpp
+  cudaSiftH.h
+  matching.cpp
+  cudaSiftD.h
+  cudaSift.h
+  cudautils.h
+  geomFuncs.cpp
+  mainSift.cpp
+)
+
+include_directories(
+  ${CMAKE_SOURCE_DIR}/../common/
+  ${CMAKE_CURRENT_SOURCE_DIR}
+)
+
+# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific)
+# -DOVERRIDE_GENERAL_CXX_FLAGS=" -blah -blah " overrides the general flags only (and not the workload specific flags)
+# passing in both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS is not allowed, in order to prevent ambiguity
+if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
+  message(FATAL_ERROR "Both  CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together")
+elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
+  message(STATUS "Using DEFAULT compilation flags")
+  set(CMAKE_CXX_FLAGS "${DEF_COMBINED_CXX_FLAGS}")
+elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
+  message(STATUS "OVERRIDING GENERAL compilation flags")
+  set(CMAKE_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS}")
+  string(APPEND CMAKE_CXX_FLAGS ${DEF_WL_CXX_FLAGS})
+elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
+  message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags")
+endif()
+
+message(STATUS "CXX  Compilation flags to: ${CMAKE_CXX_FLAGS}")
+
+if(DEVICE_TIMER)
+  message(STATUS "Enabling Device Timer")
+  add_compile_options(-DDEVICE_TIMER)
+endif()
+
+set(HIP_SEPARABLE_COMPILATION ON)
+set(MY_TARGET_NAME ${PROJECT_NAME})
+set(MY_HIPCC_OPTIONS)
+set(MY_NVCC_OPTIONS)
+set(CMAKE_HIP_ARCHITECTURES OFF)
+
+set_source_files_properties(${cuda_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
+hip_add_executable(${MY_TARGET_NAME} ${SOURCES} ${MY_HIPCC_OPTIONS} NVCC_OPTIONS ${MY_NVCC_OPTIONS})
+target_link_libraries(cudasift stdc++ stdc++fs ${OpenCV_LIBS})
diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaImage.cpp b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaImage.cpp
new file mode 100644
index 000000000..5c4cb0ccf
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaImage.cpp
@@ -0,0 +1,111 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <cstdio>
+#include <chrono>
+#include <hip/hip_runtime.h>
+
+#include "cudautils.h"
+#include "cudaImage.h"
+
+int iDivUp(int a, int b) { return (a % b != 0) ? (a / b + 1) : (a / b); }
+int iDivDown(int a, int b) { return a / b; }
+int iAlignUp(int a, int b) { return (a % b != 0) ? (a - a % b + b) : a; }
+int iAlignDown(int a, int b) { return a - a % b; }
+
+void CudaImage::Allocate(int w, int h, int p, bool host, float &time, float *devmem, float *hostmem)
+{
+  width = w;
+  height = h;
+  pitch = p;
+  d_data = devmem;
+  h_data = hostmem;
+  t_data = NULL;
+  if (devmem == NULL)
+  {
+
+#ifdef DEVICE_TIMER
+    auto start_malloc = std::chrono::steady_clock::now();
+#endif
+    safeCall(hipMallocPitch((void **)&d_data, (size_t *)&pitch, (size_t)(sizeof(float) * width), (size_t)height));
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_malloc = std::chrono::steady_clock::now();
+    std::cout << "Allocate time is " << std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count() << " us" << std::endl;
+    time += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+    pitch /= sizeof(float);
+    if (d_data == NULL)
+      printf("Failed to allocate device data\n");
+    d_internalAlloc = true;
+  }
+  if (host && hostmem == NULL)
+  {
+    h_data = (float *)malloc(sizeof(float) * pitch * height);
+    h_internalAlloc = true;
+  }
+}
+
+CudaImage::CudaImage() : width(0), height(0), pitch(0), d_data(NULL), h_data(NULL), t_data(NULL), d_internalAlloc(false), h_internalAlloc(false)
+{
+}
+
+CudaImage::~CudaImage()
+{
+  if (d_internalAlloc && d_data != NULL)
+    safeCall(hipFree(d_data));
+  d_data = NULL;
+  if (h_internalAlloc && h_data != NULL)
+    free(h_data);
+  h_data = NULL;
+  if (t_data != NULL)
+    safeCall(hipFreeArray((hipArray *)t_data));
+  t_data = NULL;
+}
+
+double CudaImage::Download(float &time)
+{
+  double downloadTime = 0.0;
+  int p = sizeof(float) * pitch;
+  if (d_data != NULL && h_data != NULL)
+  {
+
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    safeCall(hipMemcpy(d_data, h_data, sizeof(float) * width * height, hipMemcpyHostToDevice));
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    downloadTime = std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+    time += downloadTime;
+    std::cout << "Download Time is " << downloadTime << " us" << std::endl;
+#endif
+  }
+  return downloadTime;
+}
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaImage.h b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaImage.h
new file mode 100644
index 000000000..8f15f8098
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaImage.h
@@ -0,0 +1,63 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#ifndef CUDAIMAGE_H
+#define CUDAIMAGE_H
+
+#include <hip/hip_runtime.h>
+#define OCML_BASIC_ROUNDED_OPERATIONS
+
+class CudaImage
+{
+public:
+  int width, height;
+  int pitch;
+  float *h_data;
+  float *d_data;
+  float *t_data;
+  bool d_internalAlloc;
+  bool h_internalAlloc;
+
+public:
+  CudaImage();
+  CudaImage(const CudaImage&) = delete;
+  CudaImage& operator=(const CudaImage&) = delete;
+  ~CudaImage();
+  void Allocate(int width, int height, int pitch, bool withHost, float &time, float *devMem = NULL, float *hostMem = NULL);
+  double Download(float &time);
+  double Readback();
+  double InitTexture();
+  double CopyToTexture(CudaImage &dst, bool host);
+};
+
+int iDivUp(int a, int b);
+int iDivDown(int a, int b);
+int iAlignUp(int a, int b);
+int iAlignDown(int a, int b);
+void StartTimer(unsigned int *hTimer);
+double StopTimer(unsigned int hTimer);
+
+#endif // CUDAIMAGE_H
diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSift.h b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSift.h
new file mode 100644
index 000000000..b49f6c503
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSift.h
@@ -0,0 +1,48 @@
+#ifndef CUDASIFT_H
+#define CUDASIFT_H
+
+#include "cudaImage.h"
+
+typedef struct
+{
+  float xpos;
+  float ypos;
+  float scale;
+  float sharpness;
+  float edgeness;
+  float orientation;
+  float score;
+  float ambiguity;
+  int match;
+  float match_xpos;
+  float match_ypos;
+  float match_error;
+  float subsampling;
+  float empty[3];
+  float data[128];
+} SiftPoint;
+
+typedef struct
+{
+  int numPts; // Number of available Sift points
+  int maxPts; // Number of allocated Sift points
+#ifdef MANAGEDMEM
+  SiftPoint *m_data; // Managed data
+#else
+  SiftPoint *h_data; // Host (CPU) data
+  SiftPoint *d_data; // Device (GPU) data
+#endif
+} SiftData;
+
+void InitCuda(int devNum = 0);
+float *AllocSiftTempMemory(int width, int height, int numOctaves, float &totTime, bool scaleUp = false);
+void FreeSiftTempMemory(float *memoryTmp);
+void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh,
+                 float &totTime, float lowestScale = 0.0f, bool scaleUp = false, float *tempMemory = 0);
+void InitSiftData(SiftData &data, float &totTime, int num = 1024, bool host = false, bool dev = true);
+void FreeSiftData(SiftData &data);
+void PrintSiftData(SiftData &data);
+double MatchSiftData(SiftData &data1, SiftData &data2, float &matchTime);
+double FindHomography(SiftData &data, float *homography, int *numMatches, float &matchTime, int numLoops = 1000, float minScore = 0.85f, float maxAmbiguity = 0.95f, float thresh = 5.0f);
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftD.cpp b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftD.cpp
new file mode 100644
index 000000000..33dd561b8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftD.cpp
@@ -0,0 +1,2252 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include "hip/hip_runtime.h"
+#include "cudautils.h"
+#include "cudaSiftD.h"
+#include "cudaSift.h"
+
+///////////////////////////////////////////////////////////////////////////////
+// Kernel configuration
+///////////////////////////////////////////////////////////////////////////////
+
+__constant__ int d_MaxNumPoints;
+__device__ unsigned int d_PointCounter[8 * 2 + 1];
+__constant__ float d_ScaleDownKernel[5];
+__constant__ float d_LowPassKernel[2 * LOWPASS_R + 1];
+__constant__ float d_LaplaceKernel[8 * 12 * 16];
+
+///////////////////////////////////////////////////////////////////////////////
+// Lowpass filter and subsample image
+///////////////////////////////////////////////////////////////////////////////
+__global__ void ScaleDownDenseShift(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch)
+{
+#define BW (SCALEDOWN_W + 4)
+#define BH (SCALEDOWN_H + 4)
+#define W2 (SCALEDOWN_W / 2)
+#define H2 (SCALEDOWN_H / 2)
+  __shared__ float brows[BH * BW];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int xp = blockIdx.x * SCALEDOWN_W + tx;
+  const int yp = blockIdx.y * SCALEDOWN_H + ty;
+  const float k0 = d_ScaleDownKernel[0];
+  const float k1 = d_ScaleDownKernel[1];
+  const float k2 = d_ScaleDownKernel[2];
+  const int xl = min(width - 1, max(0, xp - 2));
+  const int yl = min(height - 1, max(0, yp - 2));
+  if (xp < (width + 4) && yp < (height + 4))
+  {
+    float v = d_Data[yl * pitch + xl];
+    brows[BW * ty + tx] = k0 * (v + ShiftDown(v, 4)) + k1 * (ShiftDown(v, 1) + ShiftDown(v, 3)) + k2 * ShiftDown(v, 2);
+  }
+  __syncthreads();
+  const int xs = blockIdx.x * W2 + tx;
+  const int ys = blockIdx.y * H2 + ty;
+  if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2))
+  {
+    float *ptr = &brows[BW * (ty * 2) + (tx * 2)];
+    d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * BW]) + k1 * (ptr[1 * BW] + ptr[3 * BW]) + k2 * ptr[2 * BW];
+  }
+}
+
+__global__ void ScaleDownDense(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch)
+{
+#define BW (SCALEDOWN_W + 4)
+#define BH (SCALEDOWN_H + 4)
+#define W2 (SCALEDOWN_W / 2)
+#define H2 (SCALEDOWN_H / 2)
+  __shared__ float irows[BH * BW];
+  __shared__ float brows[BH * W2];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int xp = blockIdx.x * SCALEDOWN_W + tx;
+  const int yp = blockIdx.y * SCALEDOWN_H + ty;
+  const int xl = min(width - 1, max(0, xp - 2));
+  const int yl = min(height - 1, max(0, yp - 2));
+  const float k0 = d_ScaleDownKernel[0];
+  const float k1 = d_ScaleDownKernel[1];
+  const float k2 = d_ScaleDownKernel[2];
+  if (xp < (width + 4) && yp < (height + 4))
+    irows[BW * ty + tx] = d_Data[yl * pitch + xl];
+  __syncthreads();
+  if (yp < (height + 4) && tx < W2)
+  {
+    float *ptr = &irows[BW * ty + 2 * tx];
+    brows[W2 * ty + tx] = k0 * (ptr[0] + ptr[4]) + k1 * (ptr[1] + ptr[3]) + k2 * ptr[2];
+  }
+  __syncthreads();
+  const int xs = blockIdx.x * W2 + tx;
+  const int ys = blockIdx.y * H2 + ty;
+  if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2))
+  {
+    float *ptr = &brows[W2 * (ty * 2) + tx];
+    d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * W2]) + k1 * (ptr[1 * W2] + ptr[3 * W2]) + k2 * ptr[2 * W2];
+  }
+}
+
+__global__ void ScaleDown(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch)
+{
+  __shared__ float inrow[SCALEDOWN_W + 4];
+  __shared__ float brow[5 * (SCALEDOWN_W / 2)];
+  __shared__ int yRead[SCALEDOWN_H + 4];
+  __shared__ int yWrite[SCALEDOWN_H + 4];
+#define dx2 (SCALEDOWN_W / 2)
+  const int tx = threadIdx.x;
+  const int tx0 = tx + 0 * dx2;
+  const int tx1 = tx + 1 * dx2;
+  const int tx2 = tx + 2 * dx2;
+  const int tx3 = tx + 3 * dx2;
+  const int tx4 = tx + 4 * dx2;
+  const int xStart = blockIdx.x * SCALEDOWN_W;
+  const int yStart = blockIdx.y * SCALEDOWN_H;
+  const int xWrite = xStart / 2 + tx;
+  float k0 = d_ScaleDownKernel[0];
+  float k1 = d_ScaleDownKernel[1];
+  float k2 = d_ScaleDownKernel[2];
+  if (tx < SCALEDOWN_H + 4)
+  {
+    int y = yStart + tx - 2;
+    y = (y < 0 ? 0 : y);
+    y = (y >= height ? height - 1 : y);
+    yRead[tx] = y * pitch;
+    yWrite[tx] = (yStart + tx - 4) / 2 * newpitch;
+  }
+  __syncthreads();
+  int xRead = xStart + tx - 2;
+  xRead = (xRead < 0 ? 0 : xRead);
+  xRead = (xRead >= width ? width - 1 : xRead);
+
+  int maxtx = min(dx2, width / 2 - xStart / 2);
+  for (int dy = 0; dy < SCALEDOWN_H + 4; dy += 5)
+  {
+    {
+      inrow[tx] = d_Data[yRead[dy + 0] + xRead];
+      __syncthreads();
+      if (tx < maxtx)
+      {
+        brow[tx4] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 4 && !(dy & 1))
+          d_Result[yWrite[dy + 0] + xWrite] = k2 * brow[tx2] + k0 * (brow[tx0] + brow[tx4]) + k1 * (brow[tx1] + brow[tx3]);
+      }
+      __syncthreads();
+    }
+    if (dy < (SCALEDOWN_H + 3))
+    {
+      inrow[tx] = d_Data[yRead[dy + 1] + xRead];
+      __syncthreads();
+      if (tx < maxtx)
+      {
+        brow[tx0] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 3 && (dy & 1))
+          d_Result[yWrite[dy + 1] + xWrite] = k2 * brow[tx3] + k0 * (brow[tx1] + brow[tx0]) + k1 * (brow[tx2] + brow[tx4]);
+      }
+      __syncthreads();
+    }
+    if (dy < (SCALEDOWN_H + 2))
+    {
+      inrow[tx] = d_Data[yRead[dy + 2] + xRead];
+      __syncthreads();
+      if (tx < maxtx)
+      {
+        brow[tx1] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 2 && !(dy & 1))
+          d_Result[yWrite[dy + 2] + xWrite] = k2 * brow[tx4] + k0 * (brow[tx2] + brow[tx1]) + k1 * (brow[tx3] + brow[tx0]);
+      }
+      __syncthreads();
+    }
+    if (dy < (SCALEDOWN_H + 1))
+    {
+      inrow[tx] = d_Data[yRead[dy + 3] + xRead];
+      __syncthreads();
+      if (tx < maxtx)
+      {
+        brow[tx2] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 1 && (dy & 1))
+          d_Result[yWrite[dy + 3] + xWrite] = k2 * brow[tx0] + k0 * (brow[tx3] + brow[tx2]) + k1 * (brow[tx4] + brow[tx1]);
+      }
+      __syncthreads();
+    }
+    if (dy < SCALEDOWN_H)
+    {
+      inrow[tx] = d_Data[yRead[dy + 4] + xRead];
+      __syncthreads();
+      if (tx < dx2 && xWrite < width / 2)
+      {
+        brow[tx3] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (!(dy & 1))
+          d_Result[yWrite[dy + 4] + xWrite] = k2 * brow[tx1] + k0 * (brow[tx4] + brow[tx3]) + k1 * (brow[tx0] + brow[tx2]);
+      }
+      __syncthreads();
+    }
+  }
+}
+
+__global__ void ScaleUp(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch)
+{
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  int x = blockIdx.x * SCALEUP_W + 2 * tx;
+  int y = blockIdx.y * SCALEUP_H + 2 * ty;
+  if (x < 2 * width && y < 2 * height)
+  {
+    int xl = blockIdx.x * (SCALEUP_W / 2) + tx;
+    int yu = blockIdx.y * (SCALEUP_H / 2) + ty;
+    int xr = min(xl + 1, width - 1);
+    int yd = min(yu + 1, height - 1);
+    float vul = d_Data[yu * pitch + xl];
+    float vur = d_Data[yu * pitch + xr];
+    float vdl = d_Data[yd * pitch + xl];
+    float vdr = d_Data[yd * pitch + xr];
+    d_Result[(y + 0) * newpitch + x + 0] = vul;
+    d_Result[(y + 0) * newpitch + x + 1] = 0.50f * (vul + vur);
+    d_Result[(y + 1) * newpitch + x + 0] = 0.50f * (vul + vdl);
+    d_Result[(y + 1) * newpitch + x + 1] = 0.25f * (vul + vur + vdl + vdr);
+  }
+}
+
+__global__ void ExtractSiftDescriptors(hipTextureObject_t texObj, SiftPoint *d_sift, int fstPts, float subsampling)
+{
+  __shared__ float gauss[16];
+  __shared__ float buffer[128];
+  __shared__ float sums[4];
+
+  const int tx = threadIdx.x; // 0 -> 16
+  const int ty = threadIdx.y; // 0 -> 8
+  const int idx = ty * 16 + tx;
+  const int bx = blockIdx.x + fstPts; // 0 -> numPts
+  if (ty == 0)
+    gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+  buffer[idx] = 0.0f;
+  __syncthreads();
+
+  // Compute angles and gradients
+  float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+  float sina = sinf(theta); // cosa -sina
+  float cosa = cosf(theta); // sina  cosa
+  float scale = 12.0f / 16.0f * d_sift[bx].scale;
+  float ssina = scale * sina;
+  float scosa = scale * cosa;
+
+  for (int y = ty; y < 16; y += 8)
+  {
+    float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+    float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+    float dx = tex2D<float>(texObj, xpos + cosa, ypos + sina) -
+               tex2D<float>(texObj, xpos - cosa, ypos - sina);
+    float dy = tex2D<float>(texObj, xpos - sina, ypos + cosa) -
+               tex2D<float>(texObj, xpos + sina, ypos - cosa);
+    float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy);
+    float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f;
+
+    int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+    float horf = (tx - 1.5f) / 4.0f - hori;
+    float ihorf = 1.0f - horf;
+    int veri = (y + 2) / 4 - 1;
+    float verf = (y - 1.5f) / 4.0f - veri;
+    float iverf = 1.0f - verf;
+    int angi = angf;
+    int angp = (angi < 7 ? angi + 1 : 0);
+    angf -= angi;
+    float iangf = 1.0f - angf;
+
+    int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+    int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+    int p2 = angp + hist;
+    if (tx >= 2)
+    {
+      float grad1 = ihorf * grad;
+      if (y >= 2)
+      { // Upper left
+        float grad2 = iverf * grad1;
+        atomicAdd(buffer + p1, iangf * grad2);
+        atomicAdd(buffer + p2, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower left
+        float grad2 = verf * grad1;
+        atomicAdd(buffer + p1 + 32, iangf * grad2);
+        atomicAdd(buffer + p2 + 32, angf * grad2);
+      }
+    }
+    if (tx <= 13)
+    {
+      float grad1 = horf * grad;
+      if (y >= 2)
+      { // Upper right
+        float grad2 = iverf * grad1;
+        atomicAdd(buffer + p1 + 8, iangf * grad2);
+        atomicAdd(buffer + p2 + 8, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower right
+        float grad2 = verf * grad1;
+        atomicAdd(buffer + p1 + 40, iangf * grad2);
+        atomicAdd(buffer + p2 + 40, angf * grad2);
+      }
+    }
+  }
+  __syncthreads();
+
+  // Normalize twice and suppress peaks first time
+  float sum = buffer[idx] * buffer[idx];
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+  __syncthreads();
+  float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+  tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f);
+
+  sum = tsum1 * tsum1;
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+  __syncthreads();
+
+  float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+  float *desc = d_sift[bx].data;
+  desc[idx] = tsum1 * rsqrtf(tsum2);
+  if (idx == 0)
+  {
+    d_sift[bx].xpos *= subsampling;
+    d_sift[bx].ypos *= subsampling;
+    d_sift[bx].scale *= subsampling;
+  }
+}
+
+__device__ float FastAtan2(float y, float x)
+{
+  float absx = abs(x);
+  float absy = abs(y);
+  float a = __fdiv_rn(min(absx, absy), max(absx, absy));
+  float s = a * a;
+  float r = ((-0.0464964749f * s + 0.15931422f) * s - 0.327622764f) * s * a + a;
+  r = (absy > absx ? 1.57079637f - r : r);
+  r = (x < 0 ? 3.14159274f - r : r);
+  r = (y < 0 ? -r : r);
+  return r;
+}
+
+__global__ void ExtractSiftDescriptorsCONSTNew(float *texObj, int pitch, SiftPoint *d_sift, float subsampling, int octave)
+{
+  __shared__ float gauss[16];
+  __shared__ float buffer[128];
+  __shared__ float sums[4];
+
+  const int tx = threadIdx.x; // 0 -> 16
+  const int ty = threadIdx.y; // 0 -> 8
+  const int idx = ty * 16 + tx;
+  if (ty == 0)
+    gauss[tx] = __expf(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+
+  int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = min(d_PointCounter[2 * octave + 1], d_MaxNumPoints);
+  for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x)
+  {
+
+    buffer[idx] = 0.0f;
+    __syncthreads();
+
+    // Compute angles and gradients
+    float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+    float sina = __sinf(theta); // cosa -sina
+    float cosa = __cosf(theta); // sina  cosa
+    float scale = 12.0f / 16.0f * d_sift[bx].scale;
+    float ssina = scale * sina;
+    float scosa = scale * cosa;
+
+    for (int y = ty; y < 16; y += 8)
+    {
+      float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+      float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+
+      int xi1 = xpos + cosa;
+      int yi1 = ypos + sina;
+
+      int xi2 = xpos - cosa;
+      int yi2 = ypos - sina;
+
+      float dx = *(texObj + yi1 * pitch + xi1) -
+                 *(texObj + yi2 * pitch + xi2);
+
+      xi1 = xpos - sina;
+      yi1 = ypos + cosa;
+
+      xi2 = xpos + sina;
+      yi2 = ypos - cosa;
+
+      float dy = *(texObj + yi1 * pitch + xi1) -
+                 *(texObj + yi2 * pitch + xi2);
+
+      float grad = gauss[y] * gauss[tx] * __fsqrt_rn(dx * dx + dy * dy);
+      float angf = 4.0f / 3.1415f * FastAtan2(dy, dx) + 4.0f;
+
+      int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+      float horf = (tx - 1.5f) / 4.0f - hori;
+      float ihorf = 1.0f - horf;
+      int veri = (y + 2) / 4 - 1;
+      float verf = (y - 1.5f) / 4.0f - veri;
+      float iverf = 1.0f - verf;
+      int angi = angf;
+      int angp = (angi < 7 ? angi + 1 : 0);
+      angf -= angi;
+      float iangf = 1.0f - angf;
+
+      int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+      int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+      int p2 = angp + hist;
+      if (tx >= 2)
+      {
+        float grad1 = ihorf * grad;
+        if (y >= 2)
+        { // Upper left
+          float grad2 = iverf * grad1;
+          atomicAdd(buffer + p1, iangf * grad2);
+          atomicAdd(buffer + p2, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower left
+          float grad2 = verf * grad1;
+          atomicAdd(buffer + p1 + 32, iangf * grad2);
+          atomicAdd(buffer + p2 + 32, angf * grad2);
+        }
+      }
+      if (tx <= 13)
+      {
+        float grad1 = horf * grad;
+        if (y >= 2)
+        { // Upper right
+          float grad2 = iverf * grad1;
+          atomicAdd(buffer + p1 + 8, iangf * grad2);
+          atomicAdd(buffer + p2 + 8, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower right
+          float grad2 = verf * grad1;
+          atomicAdd(buffer + p1 + 40, iangf * grad2);
+          atomicAdd(buffer + p2 + 40, angf * grad2);
+        }
+      }
+    }
+    __syncthreads();
+
+    // Normalize twice and suppress peaks first time
+    float sum = buffer[idx] * buffer[idx];
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    __syncthreads();
+    float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+    tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f);
+
+    sum = tsum1 * tsum1;
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    __syncthreads();
+
+    float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+    float *desc = d_sift[bx].data;
+    desc[idx] = tsum1 * rsqrtf(tsum2);
+    if (idx == 0)
+    {
+      d_sift[bx].xpos *= subsampling;
+      d_sift[bx].ypos *= subsampling;
+      d_sift[bx].scale *= subsampling;
+    }
+    __syncthreads();
+  }
+}
+
+__global__ void ExtractSiftDescriptorsCONST(hipTextureObject_t texObj, SiftPoint *d_sift, float subsampling, int octave)
+{
+  __shared__ float gauss[16];
+  __shared__ float buffer[128];
+  __shared__ float sums[4];
+
+  const int tx = threadIdx.x; // 0 -> 16
+  const int ty = threadIdx.y; // 0 -> 8
+  const int idx = ty * 16 + tx;
+  if (ty == 0)
+    gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+
+  int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = min(d_PointCounter[2 * octave + 1], d_MaxNumPoints);
+  for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x)
+  {
+
+    buffer[idx] = 0.0f;
+    __syncthreads();
+
+    // Compute angles and gradients
+    float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+    float sina = sinf(theta); // cosa -sina
+    float cosa = cosf(theta); // sina  cosa
+    float scale = 12.0f / 16.0f * d_sift[bx].scale;
+    float ssina = scale * sina;
+    float scosa = scale * cosa;
+
+    for (int y = ty; y < 16; y += 8)
+    {
+      float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+      float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+      float dx = tex2D<float>(texObj, xpos + cosa, ypos + sina) -
+                 tex2D<float>(texObj, xpos - cosa, ypos - sina);
+      float dy = tex2D<float>(texObj, xpos - sina, ypos + cosa) -
+                 tex2D<float>(texObj, xpos + sina, ypos - cosa);
+      float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy);
+      float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f;
+
+      int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+      float horf = (tx - 1.5f) / 4.0f - hori;
+      float ihorf = 1.0f - horf;
+      int veri = (y + 2) / 4 - 1;
+      float verf = (y - 1.5f) / 4.0f - veri;
+      float iverf = 1.0f - verf;
+      int angi = angf;
+      int angp = (angi < 7 ? angi + 1 : 0);
+      angf -= angi;
+      float iangf = 1.0f - angf;
+
+      int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+      int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+      int p2 = angp + hist;
+      if (tx >= 2)
+      {
+        float grad1 = ihorf * grad;
+        if (y >= 2)
+        { // Upper left
+          float grad2 = iverf * grad1;
+          atomicAdd(buffer + p1, iangf * grad2);
+          atomicAdd(buffer + p2, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower left
+          float grad2 = verf * grad1;
+          atomicAdd(buffer + p1 + 32, iangf * grad2);
+          atomicAdd(buffer + p2 + 32, angf * grad2);
+        }
+      }
+      if (tx <= 13)
+      {
+        float grad1 = horf * grad;
+        if (y >= 2)
+        { // Upper right
+          float grad2 = iverf * grad1;
+          atomicAdd(buffer + p1 + 8, iangf * grad2);
+          atomicAdd(buffer + p2 + 8, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower right
+          float grad2 = verf * grad1;
+          atomicAdd(buffer + p1 + 40, iangf * grad2);
+          atomicAdd(buffer + p2 + 40, angf * grad2);
+        }
+      }
+    }
+    __syncthreads();
+
+    // Normalize twice and suppress peaks first time
+    float sum = buffer[idx] * buffer[idx];
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    __syncthreads();
+    float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+    tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f);
+
+    sum = tsum1 * tsum1;
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    __syncthreads();
+
+    float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+    float *desc = d_sift[bx].data;
+    desc[idx] = tsum1 * rsqrtf(tsum2);
+    if (idx == 0)
+    {
+      d_sift[bx].xpos *= subsampling;
+      d_sift[bx].ypos *= subsampling;
+      d_sift[bx].scale *= subsampling;
+    }
+    __syncthreads();
+  }
+}
+
+__global__ void ExtractSiftDescriptorsOld(hipTextureObject_t texObj, SiftPoint *d_sift, int fstPts, float subsampling)
+{
+  __shared__ float gauss[16];
+  __shared__ float buffer[128];
+  __shared__ float sums[128];
+
+  const int tx = threadIdx.x; // 0 -> 16
+  const int ty = threadIdx.y; // 0 -> 8
+  const int idx = ty * 16 + tx;
+  const int bx = blockIdx.x + fstPts; // 0 -> numPts
+  if (ty == 0)
+    gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+  buffer[idx] = 0.0f;
+  __syncthreads();
+
+  // Compute angles and gradients
+  float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+  float sina = sinf(theta); // cosa -sina
+  float cosa = cosf(theta); // sina  cosa
+  float scale = 12.0f / 16.0f * d_sift[bx].scale;
+  float ssina = scale * sina;
+  float scosa = scale * cosa;
+
+  for (int y = ty; y < 16; y += 8)
+  {
+    float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+    float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+    float dx = tex2D<float>(texObj, xpos + cosa, ypos + sina) -
+               tex2D<float>(texObj, xpos - cosa, ypos - sina);
+    float dy = tex2D<float>(texObj, xpos - sina, ypos + cosa) -
+               tex2D<float>(texObj, xpos + sina, ypos - cosa);
+    float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy);
+    float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f;
+
+    int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+    float horf = (tx - 1.5f) / 4.0f - hori;
+    float ihorf = 1.0f - horf;
+    int veri = (y + 2) / 4 - 1;
+    float verf = (y - 1.5f) / 4.0f - veri;
+    float iverf = 1.0f - verf;
+    int angi = angf;
+    int angp = (angi < 7 ? angi + 1 : 0);
+    angf -= angi;
+    float iangf = 1.0f - angf;
+
+    int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+    int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+    int p2 = angp + hist;
+    if (tx >= 2)
+    {
+      float grad1 = ihorf * grad;
+      if (y >= 2)
+      { // Upper left
+        float grad2 = iverf * grad1;
+        atomicAdd(buffer + p1, iangf * grad2);
+        atomicAdd(buffer + p2, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower left
+        float grad2 = verf * grad1;
+        atomicAdd(buffer + p1 + 32, iangf * grad2);
+        atomicAdd(buffer + p2 + 32, angf * grad2);
+      }
+    }
+    if (tx <= 13)
+    {
+      float grad1 = horf * grad;
+      if (y >= 2)
+      { // Upper right
+        float grad2 = iverf * grad1;
+        atomicAdd(buffer + p1 + 8, iangf * grad2);
+        atomicAdd(buffer + p2 + 8, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower right
+        float grad2 = verf * grad1;
+        atomicAdd(buffer + p1 + 40, iangf * grad2);
+        atomicAdd(buffer + p2 + 40, angf * grad2);
+      }
+    }
+  }
+  __syncthreads();
+
+  // Normalize twice and suppress peaks first time
+  if (idx < 64)
+    sums[idx] = buffer[idx] * buffer[idx] + buffer[idx + 64] * buffer[idx + 64];
+  __syncthreads();
+  if (idx < 32)
+    sums[idx] = sums[idx] + sums[idx + 32];
+  __syncthreads();
+  if (idx < 16)
+    sums[idx] = sums[idx] + sums[idx + 16];
+  __syncthreads();
+  if (idx < 8)
+    sums[idx] = sums[idx] + sums[idx + 8];
+  __syncthreads();
+  if (idx < 4)
+    sums[idx] = sums[idx] + sums[idx + 4];
+  __syncthreads();
+  float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+  buffer[idx] = buffer[idx] * rsqrtf(tsum1);
+
+  if (buffer[idx] > 0.2f)
+    buffer[idx] = 0.2f;
+  __syncthreads();
+  if (idx < 64)
+    sums[idx] = buffer[idx] * buffer[idx] + buffer[idx + 64] * buffer[idx + 64];
+  __syncthreads();
+  if (idx < 32)
+    sums[idx] = sums[idx] + sums[idx + 32];
+  __syncthreads();
+  if (idx < 16)
+    sums[idx] = sums[idx] + sums[idx + 16];
+  __syncthreads();
+  if (idx < 8)
+    sums[idx] = sums[idx] + sums[idx + 8];
+  __syncthreads();
+  if (idx < 4)
+    sums[idx] = sums[idx] + sums[idx + 4];
+  __syncthreads();
+  float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+
+  float *desc = d_sift[bx].data;
+  desc[idx] = buffer[idx] * rsqrtf(tsum2);
+  if (idx == 0)
+  {
+    d_sift[bx].xpos *= subsampling;
+    d_sift[bx].ypos *= subsampling;
+    d_sift[bx].scale *= subsampling;
+  }
+}
+
+__device__ void ExtractSiftDescriptor(hipTextureObject_t texObj, SiftPoint *d_sift, float subsampling, int octave, int bx)
+{
+  __shared__ float gauss[16];
+  __shared__ float buffer[128];
+  __shared__ float sums[4];
+
+  const int idx = threadIdx.x;
+  const int tx = idx & 15; // 0 -> 16
+  const int ty = idx / 16; // 0 -> 8
+  if (ty == 0)
+    gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+  buffer[idx] = 0.0f;
+  __syncthreads();
+
+  // Compute angles and gradients
+  float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+  float sina = sinf(theta); // cosa -sina
+  float cosa = cosf(theta); // sina  cosa
+  float scale = 12.0f / 16.0f * d_sift[bx].scale;
+  float ssina = scale * sina;
+  float scosa = scale * cosa;
+
+  for (int y = ty; y < 16; y += 8)
+  {
+    float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+    float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+    float dx = tex2D<float>(texObj, xpos + cosa, ypos + sina) -
+               tex2D<float>(texObj, xpos - cosa, ypos - sina);
+    float dy = tex2D<float>(texObj, xpos - sina, ypos + cosa) -
+               tex2D<float>(texObj, xpos + sina, ypos - cosa);
+    float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy);
+    float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f;
+
+    int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+    float horf = (tx - 1.5f) / 4.0f - hori;
+    float ihorf = 1.0f - horf;
+    int veri = (y + 2) / 4 - 1;
+    float verf = (y - 1.5f) / 4.0f - veri;
+    float iverf = 1.0f - verf;
+    int angi = angf;
+    int angp = (angi < 7 ? angi + 1 : 0);
+    angf -= angi;
+    float iangf = 1.0f - angf;
+
+    int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+    int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+    int p2 = angp + hist;
+    if (tx >= 2)
+    {
+      float grad1 = ihorf * grad;
+      if (y >= 2)
+      { // Upper left
+        float grad2 = iverf * grad1;
+        atomicAdd(buffer + p1, iangf * grad2);
+        atomicAdd(buffer + p2, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower left
+        float grad2 = verf * grad1;
+        atomicAdd(buffer + p1 + 32, iangf * grad2);
+        atomicAdd(buffer + p2 + 32, angf * grad2);
+      }
+    }
+    if (tx <= 13)
+    {
+      float grad1 = horf * grad;
+      if (y >= 2)
+      { // Upper right
+        float grad2 = iverf * grad1;
+        atomicAdd(buffer + p1 + 8, iangf * grad2);
+        atomicAdd(buffer + p2 + 8, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower right
+        float grad2 = verf * grad1;
+        atomicAdd(buffer + p1 + 40, iangf * grad2);
+        atomicAdd(buffer + p2 + 40, angf * grad2);
+      }
+    }
+  }
+  __syncthreads();
+
+  // Normalize twice and suppress peaks first time
+  float sum = buffer[idx] * buffer[idx];
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+  __syncthreads();
+  float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+  tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f);
+
+  sum = tsum1 * tsum1;
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+  __syncthreads();
+
+  float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+  float *desc = d_sift[bx].data;
+  desc[idx] = tsum1 * rsqrtf(tsum2);
+  if (idx == 0)
+  {
+    d_sift[bx].xpos *= subsampling;
+    d_sift[bx].ypos *= subsampling;
+    d_sift[bx].scale *= subsampling;
+  }
+  __syncthreads();
+}
+
+__global__ void RescalePositions(SiftPoint *d_sift, int numPts, float scale)
+{
+  int num = blockIdx.x * blockDim.x + threadIdx.x;
+  if (num < numPts)
+  {
+    d_sift[num].xpos *= scale;
+    d_sift[num].ypos *= scale;
+    d_sift[num].scale *= scale;
+  }
+}
+
+__global__ void ComputeOrientations(hipTextureObject_t texObj, SiftPoint *d_Sift, int fstPts)
+{
+  __shared__ float hist[64];
+  __shared__ float gauss[11];
+  const int tx = threadIdx.x;
+  const int bx = blockIdx.x + fstPts;
+  float i2sigma2 = -1.0f / (4.5f * d_Sift[bx].scale * d_Sift[bx].scale);
+  if (tx < 11)
+    gauss[tx] = exp(i2sigma2 * (tx - 5) * (tx - 5));
+  if (tx < 64)
+    hist[tx] = 0.0f;
+  __syncthreads();
+  float xp = d_Sift[bx].xpos - 4.5f;
+  float yp = d_Sift[bx].ypos - 4.5f;
+  int yd = tx / 11;
+  int xd = tx - yd * 11;
+  float xf = xp + xd;
+  float yf = yp + yd;
+  if (yd < 11)
+  {
+    float dx = tex2D<float>(texObj, xf + 1.0, yf) - tex2D<float>(texObj, xf - 1.0, yf);
+    float dy = tex2D<float>(texObj, xf, yf + 1.0) - tex2D<float>(texObj, xf, yf - 1.0);
+    int bin = 16.0f * atan2f(dy, dx) / 3.1416f + 16.5f;
+    if (bin > 31)
+      bin = 0;
+    float grad = sqrtf(dx * dx + dy * dy);
+    atomicAdd(&hist[bin], grad * gauss[xd] * gauss[yd]);
+  }
+  __syncthreads();
+  int x1m = (tx >= 1 ? tx - 1 : tx + 31);
+  int x1p = (tx <= 30 ? tx + 1 : tx - 31);
+  if (tx < 32)
+  {
+    int x2m = (tx >= 2 ? tx - 2 : tx + 30);
+    int x2p = (tx <= 29 ? tx + 2 : tx - 30);
+    hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]);
+  }
+  __syncthreads();
+  if (tx < 32)
+  {
+    float v = hist[32 + tx];
+    hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f);
+  }
+  __syncthreads();
+  if (tx == 0)
+  {
+    float maxval1 = 0.0;
+    float maxval2 = 0.0;
+    int i1 = -1;
+    int i2 = -1;
+    for (int i = 0; i < 32; i++)
+    {
+      float v = hist[i];
+      if (v > maxval1)
+      {
+        maxval2 = maxval1;
+        maxval1 = v;
+        i2 = i1;
+        i1 = i;
+      }
+      else if (v > maxval2)
+      {
+        maxval2 = v;
+        i2 = i;
+      }
+    }
+    float val1 = hist[32 + ((i1 + 1) & 31)];
+    float val2 = hist[32 + ((i1 + 31) & 31)];
+    float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+    d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+    if (maxval2 > 0.8f * maxval1)
+    {
+      float val1 = hist[32 + ((i2 + 1) & 31)];
+      float val2 = hist[32 + ((i2 + 31) & 31)];
+      float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+      unsigned int idx = atomicInc(d_PointCounter, 0x7fffffff);
+      if (idx < d_MaxNumPoints)
+      {
+        d_Sift[idx].xpos = d_Sift[bx].xpos;
+        d_Sift[idx].ypos = d_Sift[bx].ypos;
+        d_Sift[idx].scale = d_Sift[bx].scale;
+        d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+        d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+        d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+        ;
+        d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+      }
+    }
+  }
+}
+
+// With constant number of blocks
+__global__ void ComputeOrientationsCONSTNew(float *image, int w, int p, int h, SiftPoint *d_Sift, int octave)
+{
+#define RAD 9
+#define WID (2 * RAD + 1)
+#define LEN 32 //%%%% Note: Lowe suggests 36, not 32
+  __shared__ float img[WID][WID], tmp[WID][WID];
+  __shared__ float hist[2 * LEN];
+  __shared__ float gaussx[WID], gaussy[WID];
+  const int tx = threadIdx.x;
+
+  int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = min(d_PointCounter[2 * octave + 0], d_MaxNumPoints);
+  for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x)
+  {
+
+    float sc = d_Sift[bx].scale;
+    for (int i = tx; i < 2 * LEN; i += blockDim.x)
+      hist[i] = 0.0f;
+    float xp = d_Sift[bx].xpos;
+    float yp = d_Sift[bx].ypos;
+    int xi = (int)xp;
+    int yi = (int)yp;
+    float xf = xp - xi;
+    float yf = yp - yi;
+    for (int i = tx; i < WID * WID; i += blockDim.x)
+    {
+      int y = i / WID;
+      int x = i - y * WID;
+      int xp = max(min(x - RAD + xi, w - 1), 0);
+      int yp = max(min(y - RAD + yi, h - 1), 0);
+      img[y][x] = image[yp * p + xp];
+    }
+    float fac[5];
+    fac[1] = fac[3] = (sc > 0.5f ? __expf(-1.0f / (2.0f * (sc * sc - 0.25f))) : 0.0f);
+    fac[0] = fac[4] = (sc > 0.5f ? __expf(-4.0f / (2.0f * (sc * sc - 0.25f))) : 0.0f);
+    fac[2] = 1.0f;
+    float i2sigma2 = -1.0f / (2.0f * 2.0f * 2.0f * sc * sc); //%%%% Note: Lowe suggests 1.5, not 2.0
+    if (tx < WID)
+    {
+      gaussx[tx] = __expf(i2sigma2 * (tx - RAD - xf) * (tx - RAD - xf));
+      gaussy[tx] = __expf(i2sigma2 * (tx - RAD - yf) * (tx - RAD - yf));
+    }
+    __syncthreads();
+    for (int i = tx; i < (WID - 4) * WID; i += blockDim.x)
+    {
+      int y = i / WID;
+      int x = i - y * WID;
+      y += 2;
+      tmp[y][x] = img[y][x] + fac[1] * (img[y - 1][x] + img[y + 1][x]) +
+                  fac[0] * (img[y - 2][x] + img[y + 2][x]);
+    }
+    __syncthreads();
+    for (int i = tx; i < (WID - 4) * (WID - 4); i += blockDim.x)
+    {
+      int y = i / (WID - 4);
+      int x = i - y * (WID - 4);
+      x += 2;
+      y += 2;
+      img[y][x] = tmp[y][x] + fac[1] * (tmp[y][x - 1] + tmp[y][x + 1]) +
+                  fac[0] * (tmp[y][x - 2] + tmp[y][x + 2]);
+    }
+    __syncthreads();
+    for (int i = tx; i < (WID - 6) * (WID - 6); i += blockDim.x)
+    {
+      int y = i / (WID - 6);
+      int x = i - y * (WID - 6);
+      x += 3;
+      y += 3;
+      float dx = img[y][x + 1] - img[y][x - 1];
+      float dy = img[y + 1][x] - img[y - 1][x];
+      int bin = (int)((LEN / 2) * atan2f(dy, dx) / 3.1416f + (LEN / 2) + 0.5f) % LEN;
+      float grad = __fsqrt_rn(dx * dx + dy * dy);
+      atomicAdd(&hist[LEN + bin], grad * gaussx[x] * gaussy[y]);
+    }
+    __syncthreads();
+    int x1m = (tx >= 1 ? tx - 1 : tx + LEN - 1);
+    int x1p = (tx < (LEN - 1) ? tx + 1 : tx - LEN + 1);
+    int x2m = (tx >= 2 ? tx - 2 : tx + LEN - 2);
+    int x2p = (tx < (LEN - 2) ? tx + 2 : tx - LEN + 2);
+    if (tx < LEN)
+    {
+      hist[tx] = 6.0f * hist[tx + LEN] + 4.0f * (hist[x1m + LEN] + hist[x1p + LEN]) +
+                 1.0f * (hist[x2m + LEN] + hist[x2p + LEN]);
+      hist[tx + LEN] = 8.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) +
+                       0.0f * (hist[x2m] + hist[x2p]);
+      float val = hist[tx + LEN];
+      hist[tx] = (val > hist[x1m + LEN] && val >= hist[x1p + LEN] ? val : 0.0f);
+    }
+    __syncthreads();
+    if (tx == 0)
+    {
+      float maxval1 = 0.0;
+      float maxval2 = 0.0;
+      int i1 = -1;
+      int i2 = -1;
+      for (int i = 0; i < LEN; i++)
+      {
+        float v = hist[i];
+        if (v > maxval1)
+        {
+          maxval2 = maxval1;
+          maxval1 = v;
+          i2 = i1;
+          i1 = i;
+        }
+        else if (v > maxval2)
+        {
+          maxval2 = v;
+          i2 = i;
+        }
+      }
+      float val1 = hist[LEN + ((i1 + 1) % LEN)];
+      float val2 = hist[LEN + ((i1 + LEN - 1) % LEN)];
+      float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+      d_Sift[bx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN;
+      atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]);
+      if (maxval2 > 0.8f * maxval1 && true)
+      {
+        float val1 = hist[LEN + ((i2 + 1) % LEN)];
+        float val2 = hist[LEN + ((i2 + LEN - 1) % LEN)];
+        float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+        unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 1], 0x7fffffff);
+        if (idx < d_MaxNumPoints)
+        {
+          d_Sift[idx].xpos = d_Sift[bx].xpos;
+          d_Sift[idx].ypos = d_Sift[bx].ypos;
+          d_Sift[idx].scale = sc;
+          d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+          d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+          d_Sift[idx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN;
+          d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+        }
+      }
+    }
+  }
+#undef RAD
+#undef WID
+#undef LEN
+}
+
+// With constant number of blocks
+__global__ void ComputeOrientationsCONST(hipTextureObject_t texObj, SiftPoint *d_Sift, int octave)
+{
+  __shared__ float hist[64];
+  __shared__ float gauss[11];
+  const int tx = threadIdx.x;
+
+  int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = min(d_PointCounter[2 * octave + 0], d_MaxNumPoints);
+  for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x)
+  {
+
+    float i2sigma2 = -1.0f / (2.0f * 1.5f * 1.5f * d_Sift[bx].scale * d_Sift[bx].scale);
+    if (tx < 11)
+      gauss[tx] = exp(i2sigma2 * (tx - 5) * (tx - 5));
+    if (tx < 64)
+      hist[tx] = 0.0f;
+    __syncthreads();
+    float xp = d_Sift[bx].xpos - 4.5f;
+    float yp = d_Sift[bx].ypos - 4.5f;
+    int yd = tx / 11;
+    int xd = tx - yd * 11;
+    float xf = xp + xd;
+    float yf = yp + yd;
+    if (yd < 11)
+    {
+      float dx = tex2D<float>(texObj, xf + 1.0, yf) - tex2D<float>(texObj, xf - 1.0, yf);
+      float dy = tex2D<float>(texObj, xf, yf + 1.0) - tex2D<float>(texObj, xf, yf - 1.0);
+      int bin = 16.0f * atan2f(dy, dx) / 3.1416f + 16.5f;
+      if (bin > 31)
+        bin = 0;
+      float grad = sqrtf(dx * dx + dy * dy);
+      atomicAdd(&hist[bin], grad * gauss[xd] * gauss[yd]);
+    }
+    __syncthreads();
+    int x1m = (tx >= 1 ? tx - 1 : tx + 31);
+    int x1p = (tx <= 30 ? tx + 1 : tx - 31);
+    if (tx < 32)
+    {
+      int x2m = (tx >= 2 ? tx - 2 : tx + 30);
+      int x2p = (tx <= 29 ? tx + 2 : tx - 30);
+      hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]);
+    }
+    __syncthreads();
+    if (tx < 32)
+    {
+      float v = hist[32 + tx];
+      hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f);
+    }
+    __syncthreads();
+    if (tx == 0)
+    {
+      float maxval1 = 0.0;
+      float maxval2 = 0.0;
+      int i1 = -1;
+      int i2 = -1;
+      for (int i = 0; i < 32; i++)
+      {
+        float v = hist[i];
+        if (v > maxval1)
+        {
+          maxval2 = maxval1;
+          maxval1 = v;
+          i2 = i1;
+          i1 = i;
+        }
+        else if (v > maxval2)
+        {
+          maxval2 = v;
+          i2 = i;
+        }
+      }
+      float val1 = hist[32 + ((i1 + 1) & 31)];
+      float val2 = hist[32 + ((i1 + 31) & 31)];
+      float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+      d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+      atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]);
+      if (maxval2 > 0.8f * maxval1 && true)
+      {
+        float val1 = hist[32 + ((i2 + 1) & 31)];
+        float val2 = hist[32 + ((i2 + 31) & 31)];
+        float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+        unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 1], 0x7fffffff);
+        if (idx < d_MaxNumPoints)
+        {
+          d_Sift[idx].xpos = d_Sift[bx].xpos;
+          d_Sift[idx].ypos = d_Sift[bx].ypos;
+          d_Sift[idx].scale = d_Sift[bx].scale;
+          d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+          d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+          d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+          ;
+          d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+        }
+      }
+    }
+    __syncthreads();
+  }
+}
+
+// With constant number of blocks
+__global__ void OrientAndExtractCONST(hipTextureObject_t texObj, SiftPoint *d_Sift, float subsampling, int octave)
+{
+  __shared__ float hist[64];
+  __shared__ float gauss[11];
+  __shared__ unsigned int idx; //%%%%
+  const int tx = threadIdx.x;
+
+  int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints);
+  int totPts = min(d_PointCounter[2 * octave + 0], d_MaxNumPoints);
+  for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x)
+  {
+
+    float i2sigma2 = -1.0f / (4.5f * d_Sift[bx].scale * d_Sift[bx].scale);
+    if (tx < 11)
+      gauss[tx] = exp(i2sigma2 * (tx - 5) * (tx - 5));
+    if (tx < 64)
+      hist[tx] = 0.0f;
+    __syncthreads();
+    float xp = d_Sift[bx].xpos - 4.5f;
+    float yp = d_Sift[bx].ypos - 4.5f;
+    int yd = tx / 11;
+    int xd = tx - yd * 11;
+    float xf = xp + xd;
+    float yf = yp + yd;
+    if (yd < 11)
+    {
+      float dx = tex2D<float>(texObj, xf + 1.0, yf) - tex2D<float>(texObj, xf - 1.0, yf);
+      float dy = tex2D<float>(texObj, xf, yf + 1.0) - tex2D<float>(texObj, xf, yf - 1.0);
+      int bin = 16.0f * atan2f(dy, dx) / 3.1416f + 16.5f;
+      if (bin > 31)
+        bin = 0;
+      float grad = sqrtf(dx * dx + dy * dy);
+      atomicAdd(&hist[bin], grad * gauss[xd] * gauss[yd]);
+    }
+    __syncthreads();
+    int x1m = (tx >= 1 ? tx - 1 : tx + 31);
+    int x1p = (tx <= 30 ? tx + 1 : tx - 31);
+    if (tx < 32)
+    {
+      int x2m = (tx >= 2 ? tx - 2 : tx + 30);
+      int x2p = (tx <= 29 ? tx + 2 : tx - 30);
+      hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]);
+    }
+    __syncthreads();
+    if (tx < 32)
+    {
+      float v = hist[32 + tx];
+      hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f);
+    }
+    __syncthreads();
+    if (tx == 0)
+    {
+      float maxval1 = 0.0;
+      float maxval2 = 0.0;
+      int i1 = -1;
+      int i2 = -1;
+      for (int i = 0; i < 32; i++)
+      {
+        float v = hist[i];
+        if (v > maxval1)
+        {
+          maxval2 = maxval1;
+          maxval1 = v;
+          i2 = i1;
+          i1 = i;
+        }
+        else if (v > maxval2)
+        {
+          maxval2 = v;
+          i2 = i;
+        }
+      }
+      float val1 = hist[32 + ((i1 + 1) & 31)];
+      float val2 = hist[32 + ((i1 + 31) & 31)];
+      float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+      d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+      idx = 0xffffffff; //%%%%
+      atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]);
+      if (maxval2 > 0.8f * maxval1)
+      {
+        float val1 = hist[32 + ((i2 + 1) & 31)];
+        float val2 = hist[32 + ((i2 + 31) & 31)];
+        float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+        idx = atomicInc(&d_PointCounter[2 * octave + 1], 0x7fffffff); //%%%%
+        if (idx < d_MaxNumPoints)
+        {
+          d_Sift[idx].xpos = d_Sift[bx].xpos;
+          d_Sift[idx].ypos = d_Sift[bx].ypos;
+          d_Sift[idx].scale = d_Sift[bx].scale;
+          d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+          d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+          d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+          ;
+          d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+        }
+      }
+    }
+    __syncthreads();
+    ExtractSiftDescriptor(texObj, d_Sift, subsampling, octave, bx);    //%%%%
+    if (idx < d_MaxNumPoints)                                          //%%%%
+      ExtractSiftDescriptor(texObj, d_Sift, subsampling, octave, idx); //%%%%
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Subtract two images (multi-scale version)
+///////////////////////////////////////////////////////////////////////////////
+
+// __global__ void FindPointsMultiTest(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave)
+// {
+// #define MEMWID (MINMAX_W + 2)
+//   __shared__ unsigned int cnt;
+//   __shared__ unsigned short points[3 * MEMWID];
+
+//   if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0 && threadIdx.y == 0)
+//   {
+//     atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+//     atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]);
+//   }
+//   int tx = threadIdx.x;
+//   int ty = threadIdx.y;
+//   if (tx == 0 && ty == 0)
+//     cnt = 0;
+//   __syncthreads();
+
+//   int ypos = MINMAX_H * blockIdx.y + ty;
+//   if (ypos >= height)
+//     return;
+//   int block = blockIdx.x / NUM_SCALES;
+//   int scale = blockIdx.x - NUM_SCALES * block;
+//   int minx = block * MINMAX_W;
+//   int maxx = min(minx + MINMAX_W, width);
+//   int xpos = minx + tx;
+//   int size = pitch * height;
+//   int ptr = size * scale + max(min(xpos - 1, width - 1), 0);
+
+//   float maxv = fabs(d_Data0[ptr + ypos * pitch + 1 * size]);
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W));
+
+//   if (Shuffle(maxv, 0) > thresh)
+//   {
+//     int yptr1 = ptr + ypos * pitch;
+//     int yptr0 = ptr + max(0, ypos - 1) * pitch;
+//     int yptr2 = ptr + min(height - 1, ypos + 1) * pitch;
+//     float d20 = d_Data0[yptr0 + 1 * size];
+//     float d21 = d_Data0[yptr1 + 1 * size];
+//     float d22 = d_Data0[yptr2 + 1 * size];
+//     float d31 = d_Data0[yptr1 + 2 * size];
+//     float d11 = d_Data0[yptr1];
+
+//     float d10 = d_Data0[yptr0];
+//     float d12 = d_Data0[yptr2];
+//     float ymin1 = fminf(fminf(d10, d11), d12);
+//     float ymax1 = fmaxf(fmaxf(d10, d11), d12);
+//     float d30 = d_Data0[yptr0 + 2 * size];
+//     float d32 = d_Data0[yptr2 + 2 * size];
+//     float ymin3 = fminf(fminf(d30, d31), d32);
+//     float ymax3 = fmaxf(fmaxf(d30, d31), d32);
+//     float ymin2 = fminf(fminf(ymin1, fminf(fminf(d20, d22), d21)), ymin3);
+//     float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d20, d22), d21)), ymax3);
+
+//     float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1));
+//     float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1));
+//     if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx)
+//     {
+//       if (d21 < -thresh)
+//       {
+//         float minv = fminf(fminf(nmin2, ymin1), ymin3);
+//         minv = fminf(fminf(minv, d20), d22);
+//         if (d21 < minv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//       if (d21 > thresh)
+//       {
+//         float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3);
+//         maxv = fmaxf(fmaxf(maxv, d20), d22);
+//         if (d21 > maxv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//     }
+//   }
+//   __syncthreads();
+//   if (ty == 0 && tx < cnt)
+//   {
+//     int xpos = points[3 * tx + 0];
+//     int ypos = points[3 * tx + 1];
+//     int scale = points[3 * tx + 2];
+//     int ptr = xpos + (ypos + (scale + 1) * height) * pitch;
+//     float val = d_Data0[ptr];
+//     float *data1 = &d_Data0[ptr];
+//     float dxx = 2.0f * val - data1[-1] - data1[1];
+//     float dyy = 2.0f * val - data1[-pitch] - data1[pitch];
+//     float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]);
+//     float tra = dxx + dyy;
+//     float det = dxx * dyy - dxy * dxy;
+//     if (tra * tra < edgeLimit * det)
+//     {
+//       float edge = __fdividef(tra * tra, det);
+//       float dx = 0.5f * (data1[1] - data1[-1]);
+//       float dy = 0.5f * (data1[pitch] - data1[-pitch]);
+//       float *data0 = d_Data0 + ptr - height * pitch;
+//       float *data2 = d_Data0 + ptr + height * pitch;
+//       float ds = 0.5f * (data0[0] - data2[0]);
+//       float dss = 2.0f * val - data2[0] - data0[0];
+//       float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]);
+//       float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]);
+//       float idxx = dyy * dss - dys * dys;
+//       float idxy = dys * dxs - dxy * dss;
+//       float idxs = dxy * dys - dyy * dxs;
+//       float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs);
+//       float idyy = dxx * dss - dxs * dxs;
+//       float idys = dxy * dxs - dxx * dys;
+//       float idss = dxx * dyy - dxy * dxy;
+//       float pdx = idet * (idxx * dx + idxy * dy + idxs * ds);
+//       float pdy = idet * (idxy * dx + idyy * dy + idys * ds);
+//       float pds = idet * (idxs * dx + idys * dy + idss * ds);
+//       if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f)
+//       {
+//         pdx = __fdividef(dx, dxx);
+//         pdy = __fdividef(dy, dyy);
+//         pds = __fdividef(ds, dss);
+//       }
+//       float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds);
+//       int maxPts = d_MaxNumPoints;
+//       float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor);
+//       if (sc >= lowestScale)
+//       {
+//         unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff);
+//         idx = (idx >= maxPts ? maxPts - 1 : idx);
+//         d_Sift[idx].xpos = xpos + pdx;
+//         d_Sift[idx].ypos = ypos + pdy;
+//         d_Sift[idx].scale = sc;
+//         d_Sift[idx].sharpness = val + dval;
+//         d_Sift[idx].edgeness = edge;
+//         d_Sift[idx].subsampling = subsampling;
+//       }
+//     }
+//   }
+// }
+
+__global__ void FindPointsMultiNew(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave)
+{
+#define MEMWID (MINMAX_W + 2)
+  __shared__ unsigned short points[2 * MEMWID];
+
+  if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0)
+  {
+    atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+    atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]);
+  }
+  int tx = threadIdx.x;
+  int block = blockIdx.x / NUM_SCALES;
+  int scale = blockIdx.x - NUM_SCALES * block;
+  int minx = block * MINMAX_W;
+  int maxx = min(minx + MINMAX_W, width);
+  int xpos = minx + tx;
+  int size = pitch * height;
+  int ptr = size * scale + max(min(xpos - 1, width - 1), 0);
+
+  int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H);
+  float maxv = 0.0f;
+  for (int y = 0; y < yloops; y++)
+  {
+    int ypos = MINMAX_H * blockIdx.y + y;
+    int yptr1 = ptr + ypos * pitch;
+    float val = d_Data0[yptr1 + 1 * size];
+    maxv = fmaxf(maxv, fabs(val));
+  }
+  // if (tx==0) printf("XXX1\n");
+  // if (!__any_sync(0xffffffff, maxv > thresh))
+  if (!__any(maxv > thresh))
+    return;
+  // if (tx==0) printf("XXX2\n");
+
+  int ptbits = 0;
+  for (int y = 0; y < yloops; y++)
+  {
+
+    int ypos = MINMAX_H * blockIdx.y + y;
+    int yptr1 = ptr + ypos * pitch;
+    float d11 = d_Data0[yptr1 + 1 * size];
+    // if (__any_sync(0xffffffff, fabs(d11) > thresh))
+    if (__any(fabs(d11) > thresh))
+    {
+
+      int yptr0 = ptr + max(0, ypos - 1) * pitch;
+      int yptr2 = ptr + min(height - 1, ypos + 1) * pitch;
+      float d01 = d_Data0[yptr1];
+      float d10 = d_Data0[yptr0 + 1 * size];
+      float d12 = d_Data0[yptr2 + 1 * size];
+      float d21 = d_Data0[yptr1 + 2 * size];
+
+      float d00 = d_Data0[yptr0];
+      float d02 = d_Data0[yptr2];
+      float ymin1 = fminf(fminf(d00, d01), d02);
+      float ymax1 = fmaxf(fmaxf(d00, d01), d02);
+      float d20 = d_Data0[yptr0 + 2 * size];
+      float d22 = d_Data0[yptr2 + 2 * size];
+      float ymin3 = fminf(fminf(d20, d21), d22);
+      float ymax3 = fmaxf(fmaxf(d20, d21), d22);
+      float ymin2 = fminf(fminf(ymin1, fminf(fminf(d10, d12), d11)), ymin3);
+      float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d10, d12), d11)), ymax3);
+
+      float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1));
+      float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1));
+      float minv = fminf(fminf(nmin2, ymin1), ymin3);
+      minv = fminf(fminf(minv, d10), d12);
+      float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3);
+      maxv = fmaxf(fmaxf(maxv, d10), d12);
+
+      if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx)
+        ptbits |= ((d11 < fminf(-thresh, minv)) | (d11 > fmaxf(thresh, maxv))) << y;
+    }
+  }
+
+  unsigned int totbits = __popc(ptbits);
+  unsigned int numbits = totbits;
+  for (int d = 1; d < 32; d <<= 1)
+  {
+    unsigned int num = ShiftUp(totbits, d);
+    if (tx >= d)
+      totbits += num;
+  }
+  int pos = totbits - numbits;
+  for (int y = 0; y < yloops; y++)
+  {
+    int ypos = MINMAX_H * blockIdx.y + y;
+    if (ptbits & (1 << y) && pos < MEMWID)
+    {
+      points[2 * pos + 0] = xpos - 1;
+      points[2 * pos + 1] = ypos;
+      pos++;
+    }
+  }
+
+  totbits = Shuffle(totbits, 31);
+  if (tx < totbits)
+  {
+    int xpos = points[2 * tx + 0];
+    int ypos = points[2 * tx + 1];
+    int ptr = xpos + (ypos + (scale + 1) * height) * pitch;
+    float val = d_Data0[ptr];
+    float *data1 = &d_Data0[ptr];
+    float dxx = 2.0f * val - data1[-1] - data1[1];
+    float dyy = 2.0f * val - data1[-pitch] - data1[pitch];
+    float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]);
+    float tra = dxx + dyy;
+    float det = dxx * dyy - dxy * dxy;
+    if (tra * tra < edgeLimit * det)
+    {
+      float edge = __fdividef(tra * tra, det);
+      float dx = 0.5f * (data1[1] - data1[-1]);
+      float dy = 0.5f * (data1[pitch] - data1[-pitch]);
+      float *data0 = d_Data0 + ptr - height * pitch;
+      float *data2 = d_Data0 + ptr + height * pitch;
+      float ds = 0.5f * (data0[0] - data2[0]);
+      float dss = 2.0f * val - data2[0] - data0[0];
+      float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]);
+      float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]);
+      float idxx = dyy * dss - dys * dys;
+      float idxy = dys * dxs - dxy * dss;
+      float idxs = dxy * dys - dyy * dxs;
+      float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs);
+      float idyy = dxx * dss - dxs * dxs;
+      float idys = dxy * dxs - dxx * dys;
+      float idss = dxx * dyy - dxy * dxy;
+      float pdx = idet * (idxx * dx + idxy * dy + idxs * ds);
+      float pdy = idet * (idxy * dx + idyy * dy + idys * ds);
+      float pds = idet * (idxs * dx + idys * dy + idss * ds);
+      if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f)
+      {
+        pdx = __fdividef(dx, dxx);
+        pdy = __fdividef(dy, dyy);
+        pds = __fdividef(ds, dss);
+      }
+      float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds);
+      int maxPts = d_MaxNumPoints;
+      float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor);
+      if (sc >= lowestScale)
+      {
+        atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+        unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff);
+        idx = (idx >= maxPts ? maxPts - 1 : idx);
+        d_Sift[idx].xpos = xpos + pdx;
+        d_Sift[idx].ypos = ypos + pdy;
+        d_Sift[idx].scale = sc;
+        d_Sift[idx].sharpness = val + dval;
+        d_Sift[idx].edgeness = edge;
+        d_Sift[idx].subsampling = subsampling;
+      }
+    }
+  }
+}
+
+// __global__ void FindPointsMulti(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave)
+// {
+// #define MEMWID (MINMAX_W + 2)
+//   __shared__ unsigned int cnt;
+//   __shared__ unsigned short points[3 * MEMWID];
+
+//   if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0)
+//   {
+//     atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+//     atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]);
+//   }
+//   int tx = threadIdx.x;
+//   int block = blockIdx.x / NUM_SCALES;
+//   int scale = blockIdx.x - NUM_SCALES * block;
+//   int minx = block * MINMAX_W;
+//   int maxx = min(minx + MINMAX_W, width);
+//   int xpos = minx + tx;
+//   int size = pitch * height;
+//   int ptr = size * scale + max(min(xpos - 1, width - 1), 0);
+
+//   int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H);
+//   float maxv = 0.0f;
+//   for (int y = 0; y < yloops; y++)
+//   {
+//     int ypos = MINMAX_H * blockIdx.y + y;
+//     int yptr1 = ptr + ypos * pitch;
+//     float val = d_Data0[yptr1 + 1 * size];
+//     maxv = fmaxf(maxv, fabs(val));
+//   }
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W));
+//   if (Shuffle(maxv, 0) <= thresh)
+//     return;
+
+//   if (tx == 0)
+//     cnt = 0;
+//   __syncthreads();
+
+//   for (int y = 0; y < yloops; y++)
+//   {
+
+//     int ypos = MINMAX_H * blockIdx.y + y;
+//     int yptr1 = ptr + ypos * pitch;
+//     int yptr0 = ptr + max(0, ypos - 1) * pitch;
+//     int yptr2 = ptr + min(height - 1, ypos + 1) * pitch;
+//     float d20 = d_Data0[yptr0 + 1 * size];
+//     float d21 = d_Data0[yptr1 + 1 * size];
+//     float d22 = d_Data0[yptr2 + 1 * size];
+//     float d31 = d_Data0[yptr1 + 2 * size];
+//     float d11 = d_Data0[yptr1];
+
+//     float d10 = d_Data0[yptr0];
+//     float d12 = d_Data0[yptr2];
+//     float ymin1 = fminf(fminf(d10, d11), d12);
+//     float ymax1 = fmaxf(fmaxf(d10, d11), d12);
+//     float d30 = d_Data0[yptr0 + 2 * size];
+//     float d32 = d_Data0[yptr2 + 2 * size];
+//     float ymin3 = fminf(fminf(d30, d31), d32);
+//     float ymax3 = fmaxf(fmaxf(d30, d31), d32);
+//     float ymin2 = fminf(fminf(ymin1, fminf(fminf(d20, d22), d21)), ymin3);
+//     float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d20, d22), d21)), ymax3);
+
+//     float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1));
+//     float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1));
+//     if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx)
+//     {
+//       if (d21 < -thresh)
+//       {
+//         float minv = fminf(fminf(nmin2, ymin1), ymin3);
+//         minv = fminf(fminf(minv, d20), d22);
+//         if (d21 < minv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//       if (d21 > thresh)
+//       {
+//         float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3);
+//         maxv = fmaxf(fmaxf(maxv, d20), d22);
+//         if (d21 > maxv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//     }
+//   }
+//   if (tx < cnt)
+//   {
+//     int xpos = points[3 * tx + 0];
+//     int ypos = points[3 * tx + 1];
+//     int scale = points[3 * tx + 2];
+//     int ptr = xpos + (ypos + (scale + 1) * height) * pitch;
+//     float val = d_Data0[ptr];
+//     float *data1 = &d_Data0[ptr];
+//     float dxx = 2.0f * val - data1[-1] - data1[1];
+//     float dyy = 2.0f * val - data1[-pitch] - data1[pitch];
+//     float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]);
+//     float tra = dxx + dyy;
+//     float det = dxx * dyy - dxy * dxy;
+//     if (tra * tra < edgeLimit * det)
+//     {
+//       float edge = __fdividef(tra * tra, det);
+//       float dx = 0.5f * (data1[1] - data1[-1]);
+//       float dy = 0.5f * (data1[pitch] - data1[-pitch]);
+//       float *data0 = d_Data0 + ptr - height * pitch;
+//       float *data2 = d_Data0 + ptr + height * pitch;
+//       float ds = 0.5f * (data0[0] - data2[0]);
+//       float dss = 2.0f * val - data2[0] - data0[0];
+//       float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]);
+//       float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]);
+//       float idxx = dyy * dss - dys * dys;
+//       float idxy = dys * dxs - dxy * dss;
+//       float idxs = dxy * dys - dyy * dxs;
+//       float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs);
+//       float idyy = dxx * dss - dxs * dxs;
+//       float idys = dxy * dxs - dxx * dys;
+//       float idss = dxx * dyy - dxy * dxy;
+//       float pdx = idet * (idxx * dx + idxy * dy + idxs * ds);
+//       float pdy = idet * (idxy * dx + idyy * dy + idys * ds);
+//       float pds = idet * (idxs * dx + idys * dy + idss * ds);
+//       if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f)
+//       {
+//         pdx = __fdividef(dx, dxx);
+//         pdy = __fdividef(dy, dyy);
+//         pds = __fdividef(ds, dss);
+//       }
+//       float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds);
+//       int maxPts = d_MaxNumPoints;
+//       float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor);
+//       if (sc >= lowestScale)
+//       {
+//         atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+//         unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff);
+//         idx = (idx >= maxPts ? maxPts - 1 : idx);
+//         d_Sift[idx].xpos = xpos + pdx;
+//         d_Sift[idx].ypos = ypos + pdy;
+//         d_Sift[idx].scale = sc;
+//         d_Sift[idx].sharpness = val + dval;
+//         d_Sift[idx].edgeness = edge;
+//         d_Sift[idx].subsampling = subsampling;
+//       }
+//     }
+//   }
+// }
+
+// __global__ void FindPointsMultiOld(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave)
+// {
+// #define MEMWID (MINMAX_W + 2)
+//   __shared__ float ymin1[MEMWID], ymin2[MEMWID], ymin3[MEMWID];
+//   __shared__ float ymax1[MEMWID], ymax2[MEMWID], ymax3[MEMWID];
+//   __shared__ unsigned int cnt;
+//   __shared__ unsigned short points[3 * MEMWID];
+
+//   if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0)
+//   {
+//     atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]);
+//     atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]);
+//   }
+//   int tx = threadIdx.x;
+//   int block = blockIdx.x / NUM_SCALES;
+//   int scale = blockIdx.x - NUM_SCALES * block;
+//   int minx = block * MINMAX_W;
+//   int maxx = min(minx + MINMAX_W, width);
+//   int xpos = minx + tx;
+//   int size = pitch * height;
+//   int ptr = size * scale + max(min(xpos - 1, width - 1), 0);
+
+//   int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H);
+//   float maxv = 0.0f;
+//   for (int y = 0; y < yloops; y++)
+//   {
+//     int ypos = MINMAX_H * blockIdx.y + y;
+//     int yptr1 = ptr + ypos * pitch;
+//     float val = d_Data0[yptr1 + 1 * size];
+//     maxv = fmaxf(maxv, fabs(val));
+//   }
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W));
+//   maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W));
+//   if (Shuffle(maxv, 0) <= thresh)
+//     return;
+
+//   if (tx == 0)
+//     cnt = 0;
+//   __syncthreads();
+
+//   for (int y = 0; y < yloops; y++)
+//   {
+
+//     int ypos = MINMAX_H * blockIdx.y + y;
+//     int yptr1 = ptr + ypos * pitch;
+//     int yptr0 = ptr + max(0, ypos - 1) * pitch;
+//     int yptr2 = ptr + min(height - 1, ypos + 1) * pitch;
+//     float d20 = d_Data0[yptr0 + 1 * size];
+//     float d21 = d_Data0[yptr1 + 1 * size];
+//     float d22 = d_Data0[yptr2 + 1 * size];
+//     float d31 = d_Data0[yptr1 + 2 * size];
+//     float d11 = d_Data0[yptr1];
+
+//     float d10 = d_Data0[yptr0];
+//     float d12 = d_Data0[yptr2];
+//     ymin1[tx] = fminf(fminf(d10, d11), d12);
+//     ymax1[tx] = fmaxf(fmaxf(d10, d11), d12);
+//     float d30 = d_Data0[yptr0 + 2 * size];
+//     float d32 = d_Data0[yptr2 + 2 * size];
+//     ymin3[tx] = fminf(fminf(d30, d31), d32);
+//     ymax3[tx] = fmaxf(fmaxf(d30, d31), d32);
+//     ymin2[tx] = fminf(fminf(ymin1[tx], fminf(fminf(d20, d22), d21)), ymin3[tx]);
+//     ymax2[tx] = fmaxf(fmaxf(ymax1[tx], fmaxf(fmaxf(d20, d22), d21)), ymax3[tx]);
+
+//     __syncthreads();
+
+//     if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx)
+//     {
+//       if (d21 < -thresh)
+//       {
+//         float minv = fminf(fminf(fminf(ymin2[tx - 1], ymin2[tx + 1]), ymin1[tx]), ymin3[tx]);
+//         minv = fminf(fminf(minv, d20), d22);
+//         if (d21 < minv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//       if (d21 > thresh)
+//       {
+//         float maxv = fmaxf(fmaxf(fmaxf(ymax2[tx - 1], ymax2[tx + 1]), ymax1[tx]), ymax3[tx]);
+//         maxv = fmaxf(fmaxf(maxv, d20), d22);
+//         if (d21 > maxv)
+//         {
+//           int pos = atomicInc(&cnt, MEMWID - 1);
+//           points[3 * pos + 0] = xpos - 1;
+//           points[3 * pos + 1] = ypos;
+//           points[3 * pos + 2] = scale;
+//         }
+//       }
+//     }
+//     __syncthreads();
+//   }
+//   if (tx < cnt)
+//   {
+//     int xpos = points[3 * tx + 0];
+//     int ypos = points[3 * tx + 1];
+//     int scale = points[3 * tx + 2];
+//     int ptr = xpos + (ypos + (scale + 1) * height) * pitch;
+//     float val = d_Data0[ptr];
+//     float *data1 = &d_Data0[ptr];
+//     float dxx = 2.0f * val - data1[-1] - data1[1];
+//     float dyy = 2.0f * val - data1[-pitch] - data1[pitch];
+//     float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]);
+//     float tra = dxx + dyy;
+//     float det = dxx * dyy - dxy * dxy;
+//     if (tra * tra < edgeLimit * det)
+//     {
+//       float edge = __fdividef(tra * tra, det);
+//       float dx = 0.5f * (data1[1] - data1[-1]);
+//       float dy = 0.5f * (data1[pitch] - data1[-pitch]);
+//       float *data0 = d_Data0 + ptr - height * pitch;
+//       float *data2 = d_Data0 + ptr + height * pitch;
+//       float ds = 0.5f * (data0[0] - data2[0]);
+//       float dss = 2.0f * val - data2[0] - data0[0];
+//       float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]);
+//       float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]);
+//       float idxx = dyy * dss - dys * dys;
+//       float idxy = dys * dxs - dxy * dss;
+//       float idxs = dxy * dys - dyy * dxs;
+//       float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs);
+//       float idyy = dxx * dss - dxs * dxs;
+//       float idys = dxy * dxs - dxx * dys;
+//       float idss = dxx * dyy - dxy * dxy;
+//       float pdx = idet * (idxx * dx + idxy * dy + idxs * ds);
+//       float pdy = idet * (idxy * dx + idyy * dy + idys * ds);
+//       float pds = idet * (idxs * dx + idys * dy + idss * ds);
+//       if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f)
+//       {
+//         pdx = __fdividef(dx, dxx);
+//         pdy = __fdividef(dy, dyy);
+//         pds = __fdividef(ds, dss);
+//       }
+//       float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds);
+//       int maxPts = d_MaxNumPoints;
+//       float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor);
+//       if (sc >= lowestScale)
+//       {
+//         unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff);
+//         idx = (idx >= maxPts ? maxPts - 1 : idx);
+//         d_Sift[idx].xpos = xpos + pdx;
+//         d_Sift[idx].ypos = ypos + pdy;
+//         d_Sift[idx].scale = sc;
+//         d_Sift[idx].sharpness = val + dval;
+//         d_Sift[idx].edgeness = edge;
+//         d_Sift[idx].subsampling = subsampling;
+//       }
+//     }
+//   }
+// }
+
+__global__ void LaplaceMultiTex(hipTextureObject_t texObj, float *d_Result, int width, int pitch, int height, int octave)
+{
+  __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S];
+  __shared__ float data2[LAPLACE_W * LAPLACE_S];
+  const int tx = threadIdx.x;
+  const int xp = blockIdx.x * LAPLACE_W + tx;
+  const int yp = blockIdx.y;
+  const int scale = threadIdx.y;
+  float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+  float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+  float x = xp - 3.5;
+  float y = yp + 0.5;
+  sdata1[tx] = kernel[0] * tex2D<float>(texObj, x, y) +
+               kernel[1] * (tex2D<float>(texObj, x, y - 1.0) + tex2D<float>(texObj, x, y + 1.0)) +
+               kernel[2] * (tex2D<float>(texObj, x, y - 2.0) + tex2D<float>(texObj, x, y + 2.0)) +
+               kernel[3] * (tex2D<float>(texObj, x, y - 3.0) + tex2D<float>(texObj, x, y + 3.0)) +
+               kernel[4] * (tex2D<float>(texObj, x, y - 4.0) + tex2D<float>(texObj, x, y + 4.0));
+  __syncthreads();
+  float *sdata2 = data2 + LAPLACE_W * scale;
+  if (tx < LAPLACE_W)
+  {
+    sdata2[tx] = kernel[0] * sdata1[tx + 4] +
+                 kernel[1] * (sdata1[tx + 3] + sdata1[tx + 5]) +
+                 kernel[2] * (sdata1[tx + 2] + sdata1[tx + 6]) +
+                 kernel[3] * (sdata1[tx + 1] + sdata1[tx + 7]) +
+                 kernel[4] * (sdata1[tx + 0] + sdata1[tx + 8]);
+  }
+  __syncthreads();
+  if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width)
+    d_Result[scale * height * pitch + yp * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W];
+}
+
+__global__ void LaplaceMultiMem(float *d_Image, float *d_Result, int width, int pitch, int height, int octave)
+{
+  __shared__ float buff[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S];
+  const int tx = threadIdx.x;
+  const int xp = blockIdx.x * LAPLACE_W + tx;
+  const int yp = blockIdx.y;
+  float *data = d_Image + max(min(xp - LAPLACE_R, width - 1), 0); // multiply with 4 for max func
+  float temp[2 * LAPLACE_R + 1];
+
+  float kern[LAPLACE_S][LAPLACE_R + 1];
+  if (xp < (width + 2 * LAPLACE_R))
+  {
+    for (int i = 0; i <= 2 * LAPLACE_R; i++)
+      temp[i] = data[max(0, min(yp + i - LAPLACE_R, height - 1)) * pitch];
+    for (int scale = 0; scale < LAPLACE_S; scale++)
+    {
+      float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+      float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+      for (int i = 0; i <= LAPLACE_R; i++)
+      {
+        kern[scale][i] = kernel[i];
+      }
+      float sum = kern[scale][0] * temp[LAPLACE_R];
+#pragma unroll
+      for (int j = 1; j <= LAPLACE_R; j++)
+        sum += kern[scale][j] * (temp[LAPLACE_R - j] + temp[LAPLACE_R + j]);
+      buf[tx] = sum;
+    }
+  }
+  __syncthreads();
+  if (tx < LAPLACE_W && xp < width)
+  {
+    int scale = 0;
+    float oldRes = kern[scale][0] * buff[tx + LAPLACE_R];
+
+#pragma unroll
+    for (int j = 1; j <= LAPLACE_R; j++)
+      oldRes += kern[scale][j] * (buff[tx + LAPLACE_R - j] + buff[tx + LAPLACE_R + j]);
+
+    for (int scale = 1; scale < LAPLACE_S; scale++)
+    {
+      float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+
+      float res = kern[scale][0] * buf[tx + LAPLACE_R];
+
+#pragma unroll
+      for (int j = 1; j <= LAPLACE_R; j++)
+        res += kern[scale][j] * (buf[tx + LAPLACE_R - j] + buf[tx + LAPLACE_R + j]);
+
+      d_Result[(scale - 1) * height * pitch + yp * pitch + xp] = res - oldRes;
+      oldRes = res;
+    }
+  }
+}
+
+// __global__ void LaplaceMultiMemWide(float *d_Image, float *d_Result, int width, int pitch, int height, int octave)
+// {
+//   __shared__ float buff[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S];
+//   const int tx = threadIdx.x;
+//   const int xp = blockIdx.x * LAPLACE_W + tx;
+//   const int xp4 = blockIdx.x * LAPLACE_W + 4 * tx;
+//   const int yp = blockIdx.y;
+//   float kern[LAPLACE_S][LAPLACE_R + 1];
+//   float *data = d_Image + max(min(xp - 4, width - 1), 0);
+//   float temp[9];
+//   if (xp < (width + 2 * LAPLACE_R))
+//   {
+//     for (int i = 0; i < 4; i++)
+//       temp[i] = data[max(0, min(yp + i - 4, height - 1)) * pitch];
+//     for (int i = 4; i < 8 + 1; i++)
+//       temp[i] = data[min(yp + i - 4, height - 1) * pitch];
+//     for (int scale = 0; scale < LAPLACE_S; scale++)
+//     {
+//       float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+//       for (int i = 0; i <= LAPLACE_R; i++)
+//         kern[scale][i] = kernel[LAPLACE_R - i];
+//       float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+//       buf[tx] = kern[scale][4] * temp[4] +
+//                 kern[scale][3] * (temp[3] + temp[5]) + kern[scale][2] * (temp[2] + temp[6]) +
+//                 kern[scale][1] * (temp[1] + temp[7]) + kern[scale][0] * (temp[0] + temp[8]);
+//     }
+//   }
+//   __syncthreads();
+//   if (tx < LAPLACE_W / 4 && xp4 < width)
+//   {
+//     float4 b0 = reinterpret_cast<float4 *>(buff)[tx + 0];
+//     float4 b1 = reinterpret_cast<float4 *>(buff)[tx + 1];
+//     float4 b2 = reinterpret_cast<float4 *>(buff)[tx + 2];
+//     float4 old4, new4, dif4;
+//     old4.x = kern[0][4] * b1.x + kern[0][3] * (b0.w + b1.y) + kern[0][2] * (b0.z + b1.z) +
+//              kern[0][1] * (b0.y + b1.w) + kern[0][0] * (b0.x + b2.x);
+//     old4.y = kern[0][4] * b1.y + kern[0][3] * (b1.x + b1.z) + kern[0][2] * (b0.w + b1.w) +
+//              kern[0][1] * (b0.z + b2.x) + kern[0][0] * (b0.y + b2.y);
+//     old4.z = kern[0][4] * b1.z + kern[0][3] * (b1.y + b1.w) + kern[0][2] * (b1.x + b2.x) +
+//              kern[0][1] * (b0.w + b2.y) + kern[0][0] * (b0.z + b2.z);
+//     old4.w = kern[0][4] * b1.w + kern[0][3] * (b1.z + b2.x) + kern[0][2] * (b1.y + b2.y) +
+//              kern[0][1] * (b1.x + b2.z) + kern[0][0] * (b0.w + b2.w);
+//     for (int scale = 1; scale < LAPLACE_S; scale++)
+//     {
+//       float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+//       float4 b0 = reinterpret_cast<float4 *>(buf)[tx + 0];
+//       float4 b1 = reinterpret_cast<float4 *>(buf)[tx + 1];
+//       float4 b2 = reinterpret_cast<float4 *>(buf)[tx + 2];
+//       new4.x = kern[scale][4] * b1.x + kern[scale][3] * (b0.w + b1.y) +
+//                kern[scale][2] * (b0.z + b1.z) + kern[scale][1] * (b0.y + b1.w) +
+//                kern[scale][0] * (b0.x + b2.x);
+//       new4.y = kern[scale][4] * b1.y + kern[scale][3] * (b1.x + b1.z) +
+//                kern[scale][2] * (b0.w + b1.w) + kern[scale][1] * (b0.z + b2.x) +
+//                kern[scale][0] * (b0.y + b2.y);
+//       new4.z = kern[scale][4] * b1.z + kern[scale][3] * (b1.y + b1.w) +
+//                kern[scale][2] * (b1.x + b2.x) + kern[scale][1] * (b0.w + b2.y) +
+//                kern[scale][0] * (b0.z + b2.z);
+//       new4.w = kern[scale][4] * b1.w + kern[scale][3] * (b1.z + b2.x) +
+//                kern[scale][2] * (b1.y + b2.y) + kern[scale][1] * (b1.x + b2.z) +
+//                kern[scale][0] * (b0.w + b2.w);
+//       dif4.x = new4.x - old4.x;
+//       dif4.y = new4.y - old4.y;
+//       dif4.z = new4.z - old4.z;
+//       dif4.w = new4.w - old4.w;
+//       reinterpret_cast<float4 *>(&d_Result[(scale - 1) * height * pitch + yp * pitch + xp4])[0] = dif4;
+//       old4 = new4;
+//     }
+//   }
+// }
+
+// __global__ void LaplaceMultiMemTest(float *d_Image, float *d_Result, int width, int pitch, int height, int octave)
+// {
+//   __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S];
+//   __shared__ float data2[LAPLACE_W * LAPLACE_S];
+//   const int tx = threadIdx.x;
+//   const int xp = blockIdx.x * LAPLACE_W + tx;
+//   const int yp = LAPLACE_H * blockIdx.y;
+//   const int scale = threadIdx.y;
+//   float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+//   float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+//   float *data = d_Image + max(min(xp - 4, width - 1), 0);
+//   int h = height - 1;
+//   float temp[8 + LAPLACE_H], kern[LAPLACE_R + 1];
+//   for (int i = 0; i < 4; i++)
+//     temp[i] = data[max(0, min(yp + i - 4, h)) * pitch];
+//   for (int i = 4; i < 8 + LAPLACE_H; i++)
+//     temp[i] = data[min(yp + i - 4, h) * pitch];
+//   for (int i = 0; i <= LAPLACE_R; i++)
+//     kern[i] = kernel[LAPLACE_R - i];
+//   for (int j = 0; j < LAPLACE_H; j++)
+//   {
+//     sdata1[tx] = kern[4] * temp[4 + j] +
+//                  kern[3] * (temp[3 + j] + temp[5 + j]) + kern[2] * (temp[2 + j] + temp[6 + j]) +
+//                  kern[1] * (temp[1 + j] + temp[7 + j]) + kern[0] * (temp[0 + j] + temp[8 + j]);
+//     __syncthreads();
+//     float *sdata2 = data2 + LAPLACE_W * scale;
+//     if (tx < LAPLACE_W)
+//     {
+//       sdata2[tx] = kern[4] * sdata1[tx + 4] +
+//                    kern[3] * (sdata1[tx + 3] + sdata1[tx + 5]) + kern[2] * (sdata1[tx + 2] + sdata1[tx + 6]) +
+//                    kern[1] * (sdata1[tx + 1] + sdata1[tx + 7]) + kern[0] * (sdata1[tx + 0] + sdata1[tx + 8]);
+//     }
+//     __syncthreads();
+//     if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width && (yp + j) < height)
+//       d_Result[scale * height * pitch + (yp + j) * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W];
+//   }
+// }
+
+// __global__ void LaplaceMultiMemOld(float *d_Image, float *d_Result, int width, int pitch, int height, int octave)
+// {
+//   __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S];
+//   __shared__ float data2[LAPLACE_W * LAPLACE_S];
+//   const int tx = threadIdx.x;
+//   const int xp = blockIdx.x * LAPLACE_W + tx;
+//   const int yp = blockIdx.y;
+//   const int scale = threadIdx.y;
+//   float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+//   float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+//   float *data = d_Image + max(min(xp - 4, width - 1), 0);
+//   int h = height - 1;
+//   sdata1[tx] = kernel[0] * data[min(yp, h) * pitch] +
+//                kernel[1] * (data[max(0, min(yp - 1, h)) * pitch] + data[min(yp + 1, h) * pitch]) +
+//                kernel[2] * (data[max(0, min(yp - 2, h)) * pitch] + data[min(yp + 2, h) * pitch]) +
+//                kernel[3] * (data[max(0, min(yp - 3, h)) * pitch] + data[min(yp + 3, h) * pitch]) +
+//                kernel[4] * (data[max(0, min(yp - 4, h)) * pitch] + data[min(yp + 4, h) * pitch]);
+//   __syncthreads();
+//   float *sdata2 = data2 + LAPLACE_W * scale;
+//   if (tx < LAPLACE_W)
+//   {
+//     sdata2[tx] = kernel[0] * sdata1[tx + 4] +
+//                  kernel[1] * (sdata1[tx + 3] + sdata1[tx + 5]) +
+//                  kernel[2] * (sdata1[tx + 2] + sdata1[tx + 6]) +
+//                  kernel[3] * (sdata1[tx + 1] + sdata1[tx + 7]) +
+//                  kernel[4] * (sdata1[tx + 0] + sdata1[tx + 8]);
+//   }
+//   __syncthreads();
+//   if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width)
+//     d_Result[scale * height * pitch + yp * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W];
+// }
+
+__global__ void LowPass(float *d_Image, float *d_Result, int width, int pitch, int height)
+{
+  __shared__ float buffer[(LOWPASS_W + 2 * LOWPASS_R) * LOWPASS_H];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int xp = blockIdx.x * LOWPASS_W + tx;
+  const int yp = blockIdx.y * LOWPASS_H + ty;
+  float *kernel = d_LowPassKernel;
+  float *data = d_Image + max(min(xp - 4, width - 1), 0);
+  float *buff = buffer + ty * (LOWPASS_W + 2 * LOWPASS_R);
+  int h = height - 1;
+  if (yp < height)
+    buff[tx] = kernel[4] * data[min(yp, h) * pitch] +
+               kernel[3] * (data[max(0, min(yp - 1, h)) * pitch] + data[min(yp + 1, h) * pitch]) +
+               kernel[2] * (data[max(0, min(yp - 2, h)) * pitch] + data[min(yp + 2, h) * pitch]) +
+               kernel[1] * (data[max(0, min(yp - 3, h)) * pitch] + data[min(yp + 3, h) * pitch]) +
+               kernel[0] * (data[max(0, min(yp - 4, h)) * pitch] + data[min(yp + 4, h) * pitch]);
+  __syncthreads();
+  if (tx < LOWPASS_W && xp < width && yp < height)
+    d_Result[yp * pitch + xp] = kernel[4] * buff[tx + 4] +
+                                kernel[3] * (buff[tx + 3] + buff[tx + 5]) + kernel[2] * (buff[tx + 2] + buff[tx + 6]) +
+                                kernel[1] * (buff[tx + 1] + buff[tx + 7]) + kernel[0] * (buff[tx + 0] + buff[tx + 8]);
+}
+
+__global__ void LowPassBlockOld(float *d_Image, float *d_Result, int width, int pitch, int height)
+{
+  __shared__ float xrows[16][32];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int xp = blockIdx.x * LOWPASS_W + tx;
+  const int yp = blockIdx.y * LOWPASS_H + ty;
+  const int N = 16;
+  float *k = d_LowPassKernel;
+  int xl = max(min(xp - 4, width - 1), 0);
+  for (int l = -8; l <= LOWPASS_H; l += 4)
+  {
+    if (l < LOWPASS_H)
+    {
+      int yl = max(min(yp + l + 4, height - 1), 0);
+      float val = d_Image[yl * pitch + xl];
+      xrows[(l + 8 + ty) % N][tx] = k[4] * ShiftDown(val, 4) +
+                                    k[3] * (ShiftDown(val, 5) + ShiftDown(val, 3)) +
+                                    k[2] * (ShiftDown(val, 6) + ShiftDown(val, 2)) +
+                                    k[1] * (ShiftDown(val, 7) + ShiftDown(val, 1)) +
+                                    k[0] * (ShiftDown(val, 8) + val);
+    }
+    if (l >= 4)
+    {
+      int ys = yp + l - 4;
+      if (xp < width && ys < height && tx < LOWPASS_W)
+        d_Result[ys * pitch + xp] = k[4] * xrows[(l + 0 + ty) % N][tx] +
+                                    k[3] * (xrows[(l - 1 + ty) % N][tx] + xrows[(l + 1 + ty) % N][tx]) +
+                                    k[2] * (xrows[(l - 2 + ty) % N][tx] + xrows[(l + 2 + ty) % N][tx]) +
+                                    k[1] * (xrows[(l - 3 + ty) % N][tx] + xrows[(l + 3 + ty) % N][tx]) +
+                                    k[0] * (xrows[(l - 4 + ty) % N][tx] + xrows[(l + 4 + ty) % N][tx]);
+    }
+    if (l >= 0)
+      __syncthreads();
+  }
+}
+
+__global__ void LowPassBlock(float *d_Image, float *d_Result, int width, int pitch, int height)
+{
+  __shared__ float xrows[16][32];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int xp = blockIdx.x * LOWPASS_W + tx;
+  const int yp = blockIdx.y * LOWPASS_H + ty;
+  const int N = 16;
+  float *k = d_LowPassKernel;
+  int xl = max(min(xp - 4, width - 1), 0);
+#pragma unroll
+  for (int l = -8; l < 4; l += 4)
+  {
+    int ly = l + ty;
+    int yl = max(min(yp + l + 4, height - 1), 0);
+    float val = d_Image[yl * pitch + xl]; // d_Image[yl*pitch + xl].x
+    val = k[4] * ShiftDown(val, 4) +
+          k[3] * (ShiftDown(val, 5) + ShiftDown(val, 3)) +
+          k[2] * (ShiftDown(val, 6) + ShiftDown(val, 2)) +
+          k[1] * (ShiftDown(val, 7) + ShiftDown(val, 1)) +
+          k[0] * (ShiftDown(val, 8) + val);
+    xrows[ly + 8][tx] = val;
+  }
+  __syncthreads();
+#pragma unroll
+  for (int l = 4; l < LOWPASS_H; l += 4)
+  {
+    int ly = l + ty;
+    int yl = min(yp + l + 4, height - 1);
+    float val = d_Image[yl * pitch + xl];
+    val = k[4] * ShiftDown(val, 4) +
+          k[3] * (ShiftDown(val, 5) + ShiftDown(val, 3)) +
+          k[2] * (ShiftDown(val, 6) + ShiftDown(val, 2)) +
+          k[1] * (ShiftDown(val, 7) + ShiftDown(val, 1)) +
+          k[0] * (ShiftDown(val, 8) + val);
+    xrows[(ly + 8) % N][tx] = val;
+    int ys = yp + l - 4;
+    if (xp < width && ys < height && tx < LOWPASS_W)
+      d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] +
+                                  k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) +
+                                  k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) +
+                                  k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) +
+                                  k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]);
+    __syncthreads();
+  }
+  int ly = LOWPASS_H + ty;
+  int ys = yp + LOWPASS_H - 4;
+  if (xp < width && ys < height && tx < LOWPASS_W)
+    d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] +
+                                k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) +
+                                k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) +
+                                k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) +
+                                k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]);
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftD.h b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftD.h
new file mode 100644
index 000000000..0d38fe57e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftD.h
@@ -0,0 +1,58 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//  
+
+#ifndef CUDASIFTD_H
+#define CUDASIFTD_H
+
+#define NUM_SCALES      5
+
+// Scale down thread block width
+#define SCALEDOWN_W    64 // 60 
+
+// Scale down thread block height
+#define SCALEDOWN_H    16 // 8
+
+// Scale up thread block width
+#define SCALEUP_W      64
+
+// Scale up thread block height
+#define SCALEUP_H       8
+
+// Find point thread block width
+#define MINMAX_W       30 //32 
+
+// Find point thread block height
+#define MINMAX_H        8 //16 
+ 
+// Laplace thread block width
+#define LAPLACE_W     128 // 56
+
+// Laplace rows per thread
+#define LAPLACE_H       4
+
+// Number of laplace scales
+#define LAPLACE_S   (NUM_SCALES+3)
+
+// Laplace filter kernel radius
+#define LAPLACE_R       4
+
+#define LOWPASS_W      24 //56
+#define LOWPASS_H      32 //16
+#define LOWPASS_R       4
+
+//====================== Number of threads ====================//
+// ScaleDown:               SCALEDOWN_W + 4
+// LaplaceMulti:            (LAPLACE_W+2*LAPLACE_R)*LAPLACE_S
+// FindPointsMulti:         MINMAX_W + 2
+// ComputeOrientations:     128
+// ExtractSiftDescriptors:  256
+
+//====================== Number of blocks ====================//
+// ScaleDown:               (width/SCALEDOWN_W) * (height/SCALEDOWN_H)
+// LaplceMulti:             (width+2*LAPLACE_R)/LAPLACE_W * height
+// FindPointsMulti:         (width/MINMAX_W)*NUM_SCALES * (height/MINMAX_H)
+// ComputeOrientations:     numpts
+// ExtractSiftDescriptors:  numpts
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftH.cpp b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftH.cpp
new file mode 100644
index 000000000..f9c0d9676
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftH.cpp
@@ -0,0 +1,651 @@
+//********************************************************//
+// CUDA SIFT extractor by Mårten Björkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include "hip/hip_runtime.h"
+#include <cstdio>
+#include <cstring>
+#include <cmath>
+#include <iostream>
+#include <algorithm>
+#include <chrono>
+#include <hip/hip_runtime.h>
+
+#include "cudautils.h"
+#include "cudaImage.h"
+#include "cudaSift.h"
+#include "cudaSiftD.h"
+#include "cudaSiftH.h"
+
+#include "cudaSiftD.cpp"
+
+void InitCuda(int devNum)
+{
+  int nDevices;
+  hipGetDeviceCount(&nDevices);
+  if (!nDevices)
+  {
+    std::cerr << "No CUDA devices available" << std::endl;
+    return;
+  }
+  devNum = std::min(nDevices - 1, devNum);
+  deviceInit(devNum);
+  hipDeviceProp_t prop;
+  hipGetDeviceProperties(&prop, devNum);
+  printf("Device Number: %d\n", devNum);
+  printf("  Device name: %s\n", prop.name);
+  printf("  Memory Clock Rate (MHz): %d\n", prop.memoryClockRate / 1000);
+  printf("  Clock Freq (MHz): %d\n", prop.clockRate / 1000);
+  printf("  Memory Bus Width (bits): %d\n", prop.memoryBusWidth);
+  printf("  Peak Memory Bandwidth (GB/s): %.1f\n\n",
+         2.0 * prop.memoryClockRate * (prop.memoryBusWidth / 8) / 1.0e6);
+}
+
+float *AllocSiftTempMemory(int width, int height, int numOctaves, float &time, bool scaleUp)
+{
+  const int nd = NUM_SCALES + 3;
+  int w = width * (scaleUp ? 2 : 1);
+  int h = height * (scaleUp ? 2 : 1);
+  int p = iAlignUp(w, 128);
+  int size = h * p;         // image sizes
+  int sizeTmp = nd * h * p; // laplace buffer sizes
+  for (int i = 0; i < numOctaves; i++)
+  {
+    w /= 2;
+    h /= 2;
+    int p = iAlignUp(w, 128);
+    size += h * p;
+    sizeTmp += nd * h * p;
+  }
+  float *memoryTmp = NULL;
+  size_t pitch;
+  size += sizeTmp;
+
+#ifdef DEVICE_TIMER
+  auto start_malloc = std::chrono::steady_clock::now();
+#endif
+  hipMallocPitch((void **)&memoryTmp, &pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float));
+  hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+  auto stop_malloc = std::chrono::steady_clock::now();
+  // printf("Malloc time for memoryTmp =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count());
+  time += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+  return memoryTmp;
+}
+
+void FreeSiftTempMemory(float *memoryTmp)
+{
+  if (memoryTmp)
+    hipFree(memoryTmp);
+}
+
+void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur,
+                 float thresh, float &totTime, float lowestScale, bool scaleUp, float *tempMemory)
+{
+  unsigned int *d_PointCounterAddr;
+
+#ifdef DEVICE_TIMER
+  auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+  hipGetSymbolAddress((void **)&d_PointCounterAddr, HIP_SYMBOL(d_PointCounter));
+  hipMemset(d_PointCounterAddr, 0, (8 * 2 + 1) * sizeof(int));
+  hipMemcpyToSymbol(HIP_SYMBOL(d_MaxNumPoints), &siftData.maxPts, sizeof(int));
+  hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+  auto stop_memcpy = std::chrono::steady_clock::now();
+  totTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+
+  const int nd = NUM_SCALES + 3;
+  int w = img.width * (scaleUp ? 2 : 1);
+  int h = img.height * (scaleUp ? 2 : 1);
+  int p = iAlignUp(w, 128);
+  int width = w, height = h;
+  int size = h * p;         // image sizes
+  int sizeTmp = nd * h * p; // laplace buffer sizes
+  for (int i = 0; i < numOctaves; i++)
+  {
+    w /= 2;
+    h /= 2;
+    int p = iAlignUp(w, 128);
+    size += h * p;
+    sizeTmp += nd * h * p;
+  }
+  float *memoryTmp = tempMemory;
+  size += sizeTmp;
+  if (!tempMemory)
+  {
+    size_t pitch;
+
+#ifdef DEVICE_TIMER
+    auto start_malloc = std::chrono::steady_clock::now();
+#endif
+    safeCall(hipMallocPitch((void **)&memoryTmp, &pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float)));
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_malloc = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+  }
+  float *memorySub = memoryTmp + sizeTmp;
+
+  CudaImage lowImg;
+  lowImg.Allocate(width, height, iAlignUp(width, 128), false, totTime, memorySub);
+  if (!scaleUp)
+  {
+    float kernel[8 * 12 * 16];
+    PrepareLaplaceKernels(numOctaves, 0.0f, kernel);
+
+#ifdef DEVICE_TIMER
+    auto start_memcpy1 = std::chrono::steady_clock::now();
+#endif
+    hipMemcpyToSymbol(HIP_SYMBOL(d_LaplaceKernel), kernel, 8 * 12 * 16 * sizeof(float), 0, hipMemcpyHostToDevice);
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy1 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy1 - start_memcpy1).count();
+#endif
+    LowPass(lowImg, img, fmax(initBlur, 0.001f), totTime);
+    ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale, 1.0f, memoryTmp,
+                    memorySub + height * iAlignUp(width, 128), totTime);
+
+#ifdef DEVICE_TIMER
+    auto start_memcpy2 = std::chrono::steady_clock::now();
+#endif
+    safeCall(hipMemcpy(&siftData.numPts, &d_PointCounterAddr[2 * numOctaves], sizeof(int), hipMemcpyDeviceToHost));
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy2 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy2 - start_memcpy2).count();
+#endif
+    siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts);
+  }
+  else
+  {
+    CudaImage upImg;
+    upImg.Allocate(width, height, iAlignUp(width, 128), false, totTime, memoryTmp);
+    ScaleUp(upImg, img, totTime);
+    LowPass(lowImg, upImg, max(initBlur, 0.001f), totTime);
+    float kernel[8 * 12 * 16];
+    PrepareLaplaceKernels(numOctaves, 0.0f, kernel);
+
+#ifdef DEVICE_TIMER
+    auto start_memcpy3 = std::chrono::steady_clock::now();
+#endif
+    hipMemcpyToSymbol(HIP_SYMBOL(d_LaplaceKernel), kernel, 8 * 12 * 16 * sizeof(float), 0, hipMemcpyHostToDevice);
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy3 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy3 - start_memcpy3).count();
+#endif
+    ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale * 2.0f, 1.0f, memoryTmp,
+                    memorySub + height * iAlignUp(width, 128), totTime);
+
+#ifdef DEVICE_TIMER
+    auto start_memcpy4 = std::chrono::steady_clock::now();
+#endif
+    safeCall(hipMemcpy(&siftData.numPts, &d_PointCounterAddr[2 * numOctaves], sizeof(int), hipMemcpyDeviceToHost));
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy4 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy4 - start_memcpy4).count();
+#endif
+    siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts);
+    RescalePositions(siftData, 0.5f, totTime);
+  }
+
+  if (!tempMemory)
+    safeCall(hipFree(memoryTmp));
+#ifdef MANAGEDMEM
+  safeCall(hipDeviceSynchronize());
+#else
+  if (siftData.h_data)
+  {
+#ifdef DEVICE_TIMER
+    auto start_memcpy5 = std::chrono::steady_clock::now();
+#endif
+    safeCall(hipMemcpy(siftData.h_data, siftData.d_data, sizeof(SiftPoint) * siftData.numPts, hipMemcpyDeviceToHost));
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy5 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy5 - start_memcpy5).count();
+    printf("Total time for sift extraction = %0.2f us %d \n\n", totTime, siftData.numPts);
+#endif
+  }
+#endif
+  printf("Number of Points after sift extraction =  %d\n\n", siftData.numPts);
+}
+
+int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, float lowestScale,
+                    float subsampling, float *memoryTmp, float *memorySub, float &totTime)
+{
+  int w = img.width;
+  int h = img.height;
+  if (numOctaves > 1)
+  {
+    CudaImage subImg;
+    int p = iAlignUp(w / 2, 128);
+    subImg.Allocate(w / 2, h / 2, p, false, totTime, memorySub);
+    ScaleDown(subImg, img, 0.5f, totTime);
+    float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f;
+    ExtractSiftLoop(siftData, subImg, numOctaves - 1, totInitBlur, thresh, lowestScale, subsampling * 2.0f,
+                    memoryTmp, memorySub + (h / 2) * p, totTime);
+  }
+
+  ExtractSiftOctave(siftData, img, numOctaves, thresh, lowestScale, subsampling, memoryTmp, totTime);
+  return 0;
+}
+
+void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh,
+                       float lowestScale, float subsampling, float *memoryTmp, float &totTime)
+{
+  const int nd = NUM_SCALES + 3;
+  CudaImage diffImg[nd];
+  int w = img.width;
+  int h = img.height;
+  int p = iAlignUp(w, 128);
+  for (int i = 0; i < nd - 1; i++)
+    diffImg[i].Allocate(w, h, p, false, totTime, memoryTmp + i * p * h);
+
+  float baseBlur = pow(2.0f, -1.0f / NUM_SCALES);
+  float diffScale = pow(2.0f, 1.0f / NUM_SCALES);
+  LaplaceMulti(img, diffImg, octave, totTime);
+  FindPointsMulti(diffImg, siftData, thresh, 10.0f, 1.0f / NUM_SCALES, lowestScale / subsampling, subsampling, octave, totTime);
+  ComputeOrientations(img, siftData, octave, totTime);
+  ExtractSiftDescriptors(img.d_data, img.pitch, siftData, subsampling, octave, totTime);
+}
+
+void InitSiftData(SiftData &data, float &time, int num, bool host, bool dev)
+{
+  data.numPts = 0;
+  data.maxPts = num;
+  int sz = sizeof(SiftPoint) * num;
+  data.h_data = NULL;
+  if (host)
+    data.h_data = (SiftPoint *)malloc(sz);
+  data.d_data = NULL;
+  if (dev)
+  {
+#ifdef DEVICE_TIMER
+    auto start_malloc = std::chrono::steady_clock::now();
+#endif
+    safeCall(hipMalloc((void **)&data.d_data, sz));
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_malloc = std::chrono::steady_clock::now();
+    time += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+  }
+}
+
+void FreeSiftData(SiftData &data)
+{
+#ifdef MANAGEDMEM
+  safeCall(hipFree(data.m_data));
+#else
+  if (data.d_data != NULL)
+    safeCall(hipFree(data.d_data));
+  data.d_data = NULL;
+  if (data.h_data != NULL)
+    free(data.h_data);
+#endif
+  data.numPts = 0;
+  data.maxPts = 0;
+}
+
+void PrintSiftData(SiftData &data)
+{
+#ifdef MANAGEDMEM
+  SiftPoint *h_data = data.m_data;
+#else
+  SiftPoint *h_data = data.h_data;
+  if (data.h_data == NULL)
+  {
+    h_data = (SiftPoint *)malloc(sizeof(SiftPoint) * data.maxPts);
+    safeCall(hipMemcpy(h_data, data.d_data, sizeof(SiftPoint) * data.numPts, hipMemcpyDeviceToHost));
+    hipDeviceSynchronize();
+    data.h_data = h_data;
+  }
+#endif
+  for (int i = 0; i < data.numPts; i++)
+  {
+    printf("xpos         = %.2f\n", h_data[i].xpos);
+    printf("ypos         = %.2f\n", h_data[i].ypos);
+    printf("scale        = %.2f\n", h_data[i].scale);
+    printf("sharpness    = %.2f\n", h_data[i].sharpness);
+    printf("edgeness     = %.2f\n", h_data[i].edgeness);
+    printf("orientation  = %.2f\n", h_data[i].orientation);
+    printf("score        = %.2f\n", h_data[i].score);
+    float *siftData = (float *)&h_data[i].data;
+    for (int j = 0; j < 8; j++)
+    {
+      if (j == 0)
+        printf("data = ");
+      else
+        printf("       ");
+      for (int k = 0; k < 16; k++)
+        if (siftData[j + 8 * k] < 0.05)
+          printf(" .   ");
+        else
+          printf("%.2f ", siftData[j + 8 * k]);
+      printf("\n");
+    }
+  }
+  printf("Number of available points: %d\n", data.numPts);
+  printf("Number of allocated points: %d\n", data.maxPts);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Host side master functions
+///////////////////////////////////////////////////////////////////////////////
+
+double ScaleDown(CudaImage &res, CudaImage &src, float variance, float &totTime)
+{
+  static float oldVariance = -1.0f;
+  if (res.d_data == NULL || src.d_data == NULL)
+  {
+    printf("ScaleDown: missing data\n");
+    return 0.0;
+  }
+  if (oldVariance != variance)
+  {
+    float h_Kernel[5];
+    float kernelSum = 0.0f;
+    for (int j = 0; j < 5; j++)
+    {
+      h_Kernel[j] = (float)expf(-(double)(j - 2) * (j - 2) / 2.0 / variance);
+      kernelSum += h_Kernel[j];
+    }
+    for (int j = 0; j < 5; j++)
+      h_Kernel[j] /= kernelSum;
+
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    safeCall(hipMemcpyToSymbol(HIP_SYMBOL(d_ScaleDownKernel), h_Kernel, 5 * sizeof(float)));
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+    oldVariance = variance;
+  }
+#if 0
+  dim3 blocks(iDivUp(src.width, SCALEDOWN_W), iDivUp(src.height, SCALEDOWN_H));
+  dim3 threads(SCALEDOWN_W + 4, SCALEDOWN_H + 4);
+  hipLaunchKernelGGL(ScaleDownDenseShift, blocks, threads, 0, 0, res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch);
+#else
+  dim3 blocks(iDivUp(src.width, SCALEDOWN_W), iDivUp(src.height, SCALEDOWN_H));
+  dim3 threads(SCALEDOWN_W + 4);
+
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  hipLaunchKernelGGL(ScaleDown, blocks, threads, 0, 0, res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch);
+  hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+#endif
+  checkMsg("ScaleDown() execution failed\n");
+  return 0.0;
+}
+
+double ScaleUp(CudaImage &res, CudaImage &src, float &totTime)
+{
+  if (res.d_data == NULL || src.d_data == NULL)
+  {
+    printf("ScaleUp: missing data\n");
+    return 0.0;
+  }
+  dim3 blocks(iDivUp(res.width, SCALEUP_W), iDivUp(res.height, SCALEUP_H));
+  dim3 threads(SCALEUP_W / 2, SCALEUP_H / 2);
+
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  hipLaunchKernelGGL(ScaleUp, blocks, threads, 0, 0, res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch);
+  hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ScaleUp time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+
+  checkMsg("ScaleUp() execution failed\n");
+  return 0.0;
+}
+
+double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, float &totTime)
+{
+  dim3 blocks(512);
+  dim3 threads(256);
+
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  hipLaunchKernelGGL(ComputeOrientationsCONSTNew, blocks, threads, 0, 0, src.d_data, src.width, src.pitch, src.height, siftData.d_data, octave);
+  hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ComputeOrientationsCONSTNew time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("ComputeOrientations() execution failed\n");
+  return 0.0;
+}
+
+double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, float &totTime)
+{
+  dim3 blocks(512);
+  dim3 threads(16, 8);
+
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  hipLaunchKernelGGL(ExtractSiftDescriptorsCONSTNew, blocks, threads, 0, 0, texObj, pitch, siftData.d_data, subsampling, octave);
+  hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ExtractSiftDescriptorsCONSTNew time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("ExtractSiftDescriptors() execution failed\n");
+  return 0.0;
+}
+
+double RescalePositions(SiftData &siftData, float scale, float &totTime)
+{
+  dim3 blocks(iDivUp(siftData.numPts, 64));
+  dim3 threads(64);
+
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  hipLaunchKernelGGL(RescalePositions, blocks, threads, 0, 0, siftData.d_data, siftData.numPts, scale);
+  hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("RescalePositions time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+
+  checkMsg("RescapePositions() execution failed\n");
+  return 0.0;
+}
+
+double LowPass(CudaImage &res, CudaImage &src, float scale, float &totTime)
+{
+  float kernel[2 * LOWPASS_R + 1];
+  static float oldScale = -1.0f;
+  if (scale != oldScale)
+  {
+    float kernelSum = 0.0f;
+    float ivar2 = 1.0f / (2.0f * scale * scale);
+    for (int j = -LOWPASS_R; j <= LOWPASS_R; j++)
+    {
+      kernel[j + LOWPASS_R] = (float)expf(-(double)j * j * ivar2);
+      kernelSum += kernel[j + LOWPASS_R];
+    }
+    for (int j = -LOWPASS_R; j <= LOWPASS_R; j++)
+      kernel[j + LOWPASS_R] /= kernelSum;
+
+#ifdef DEVICE_TIMER
+    auto start_memcpy_1 = std::chrono::steady_clock::now();
+#endif
+    safeCall(hipMemcpyToSymbol(HIP_SYMBOL(d_LowPassKernel), kernel, (2 * LOWPASS_R + 1) * sizeof(float)));
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy_1 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy_1 - start_memcpy_1).count();
+#endif
+
+    oldScale = scale;
+  }
+  int width = res.width;
+  int pitch = res.pitch;
+  int height = res.height;
+  dim3 blocks(iDivUp(width, LOWPASS_W), iDivUp(height, LOWPASS_H)); //[80,34,1]
+#if 1
+  dim3 threads(LOWPASS_W + 2 * LOWPASS_R, 4); //[32,4,1]
+
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  hipLaunchKernelGGL(LowPassBlockOld, blocks, threads, 0, 0, src.d_data, res.d_data, width, pitch, height);
+  hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("LowPassBlock time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+#else
+  dim3 threads(LOWPASS_W + 2 * LOWPASS_R, LOWPASS_H);
+  hipLaunchKernelGGL(LowPass, blocks, threads, 0, 0, src.d_data, res.d_data, width, pitch, height);
+#endif
+  checkMsg("LowPass() execution failed\n");
+  return 0.0;
+}
+
+//==================== Multi-scale functions ===================//
+
+void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel)
+{
+  if (numOctaves > 1)
+  {
+    float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f;
+    PrepareLaplaceKernels(numOctaves - 1, totInitBlur, kernel);
+  }
+  float scale = pow(2.0f, -1.0f / NUM_SCALES);
+  float diffScale = pow(2.0f, 1.0f / NUM_SCALES);
+  for (int i = 0; i < NUM_SCALES + 3; i++)
+  {
+    float kernelSum = 0.0f;
+    float var = scale * scale - initBlur * initBlur;
+    for (int j = 0; j <= LAPLACE_R; j++)
+    {
+      kernel[numOctaves * 12 * 16 + 16 * i + j] = (float)expf(-(double)j * j / 2.0 / var);
+      kernelSum += (j == 0 ? 1 : 2) * kernel[numOctaves * 12 * 16 + 16 * i + j];
+    }
+    for (int j = 0; j <= LAPLACE_R; j++)
+      kernel[numOctaves * 12 * 16 + 16 * i + j] /= kernelSum;
+    scale *= diffScale;
+  }
+}
+
+double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, float &totTime)
+{
+  int width = results[0].width;
+  int pitch = results[0].pitch;
+  int height = results[0].height;
+#if 1
+  dim3 threads(LAPLACE_W + 2 * LAPLACE_R);       //(136)
+  dim3 blocks(iDivUp(width, LAPLACE_W), height); //(15)
+
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  hipLaunchKernelGGL(LaplaceMultiMem, blocks, threads, 0, 0, baseImage.d_data, results[0].d_data, width, pitch, height, octave);
+  hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("LaplaceMultiMem time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+#endif
+  checkMsg("LaplaceMulti() execution failed\n");
+  return 0.0;
+}
+
+double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor,
+                       float lowestScale, float subsampling, int octave, float &totTime)
+{
+  if (sources->d_data == NULL)
+  {
+    printf("FindPointsMulti: missing data\n");
+    return 0.0;
+  }
+  int w = sources->width;
+  int p = sources->pitch;
+  int h = sources->height;
+#if 1
+  dim3 blocks(iDivUp(w, MINMAX_W) * NUM_SCALES, iDivUp(h, MINMAX_H));
+  dim3 threads(MINMAX_W + 2);
+
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  hipLaunchKernelGGL(FindPointsMultiNew, blocks, threads, 0, 0, sources->d_data, siftData.d_data, w, p, h, subsampling,
+                     lowestScale, thresh, factor, edgeLimit, octave);
+  hipDeviceSynchronize();
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("FindPointsMultiNew time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count())
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+#endif
+  checkMsg("FindPointsMulti() execution failed\n");
+  return 0.0;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftH.h b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftH.h
new file mode 100644
index 000000000..6fdbafda9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftH.h
@@ -0,0 +1,49 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#ifndef CUDASIFTH_H
+#define CUDASIFTH_H
+
+#include "cudautils.h"
+#include "cudaImage.h"
+#include "cudaSift.h"
+
+int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh,
+                    float lowestScale, float subsampling, float *memoryTmp, float *memorySub, float &totTime);
+void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh, float lowestScale,
+                       float subsampling, float *memoryTmp, float &totTime);
+double ScaleDown(CudaImage &res, CudaImage &src, float variance, float &totTime);
+double ScaleUp(CudaImage &res, CudaImage &src, float &totTime);
+double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, float &totTime);
+double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, float &totTime);
+double RescalePositions(SiftData &siftData, float scale, float &totTime);
+double LowPass(CudaImage &res, CudaImage &src, float scale, float &totTime);
+void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel);
+double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, float &totTime);
+double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor,
+                       float lowestScale, float subsampling, int octave, float &totTime);
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudautils.h b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudautils.h
new file mode 100644
index 000000000..f56f135a1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudautils.h
@@ -0,0 +1,151 @@
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#ifndef CUDAUTILS_H
+#define CUDAUTILS_H
+
+#include <cstdio>
+#include <iostream>
+
+#ifdef WIN32
+#include <intrin.h>
+#endif
+
+#define safeCall(err) __safeCall(err, __FILE__, __LINE__)
+#define safeThreadSync() __safeThreadSync(__FILE__, __LINE__)
+#define checkMsg(msg) __checkMsg(msg, __FILE__, __LINE__)
+
+inline void __safeCall(hipError_t err, const char *file, const int line)
+{
+  if (hipSuccess != err)
+  {
+    fprintf(stderr, "safeCall() Runtime API error in file <%s>, line %i : %s.\n", file, line, hipGetErrorString(err));
+    exit(-1);
+  }
+}
+
+inline void __safeThreadSync(const char *file, const int line)
+{
+  hipError_t err = hipDeviceSynchronize();
+  if (hipSuccess != err)
+  {
+    fprintf(stderr, "threadSynchronize() Driver API error in file '%s' in line %i : %s.\n", file, line, hipGetErrorString(err));
+    exit(-1);
+  }
+}
+
+inline void __checkMsg(const char *errorMessage, const char *file, const int line)
+{
+  hipError_t err = hipGetLastError();
+  if (hipSuccess != err)
+  {
+    fprintf(stderr, "checkMsg() CUDA error: %s in file <%s>, line %i : %s.\n", errorMessage, file, line, hipGetErrorString(err));
+    exit(-1);
+  }
+}
+
+inline bool deviceInit(int dev)
+{
+  int deviceCount;
+  safeCall(hipGetDeviceCount(&deviceCount));
+  if (deviceCount == 0)
+  {
+    fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
+    return false;
+  }
+  if (dev < 0)
+    dev = 0;
+  if (dev > deviceCount - 1)
+    dev = deviceCount - 1;
+  hipDeviceProp_t deviceProp;
+  safeCall(hipGetDeviceProperties(&deviceProp, dev));
+  if (deviceProp.major < 1)
+  {
+    fprintf(stderr, "error: device does not support CUDA.\n");
+    return false;
+  }
+  safeCall(hipSetDevice(dev));
+  return true;
+}
+
+class TimerCPU
+{
+  static const int bits = 10;
+
+public:
+  long long beg_clock;
+  float freq;
+  TimerCPU(float freq_) : freq(freq_)
+  { // freq = clock frequency in MHz
+    beg_clock = getTSC(bits);
+  }
+  long long getTSC(int bits)
+  {
+#ifdef WIN32
+    return __rdtsc() / (1LL << bits);
+#else
+    unsigned int low, high;
+    __asm__(".byte 0x0f, 0x31"
+            : "=a"(low), "=d"(high));
+    return ((long long)high << (32 - bits)) | ((long long)low >> bits);
+#endif
+  }
+  float read()
+  {
+    long long end_clock = getTSC(bits);
+    long long Kcycles = end_clock - beg_clock;
+    float time = (float)(1 << bits) * Kcycles / freq / 1e3f;
+    return time;
+  }
+};
+
+template <class T>
+__device__ __inline__ T ShiftDown(T var, unsigned int delta, int width = 32)
+{
+#if (CUDART_VERSION >= 9000)
+  return __shfl_down_sync(0xffffffff, var, delta, width);
+#else
+  return __shfl_down(var, delta, width);
+#endif
+}
+
+template <class T>
+__device__ __inline__ T ShiftUp(T var, unsigned int delta, int width = 32)
+{
+#if (CUDART_VERSION >= 9000)
+  return __shfl_up_sync(0xffffffff, var, delta, width);
+#else
+  return __shfl_up(var, delta, width);
+#endif
+}
+
+template <class T>
+__device__ __inline__ T Shuffle(T var, unsigned int lane, int width = 32)
+{
+#if (CUDART_VERSION >= 9000)
+  return __shfl_sync(0xffffffff, var, lane, width);
+#else
+  return __shfl(var, lane, width);
+#endif
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/geomFuncs.cpp b/third-party-programs/Velocity-Bench/cudaSift/HIP/geomFuncs.cpp
new file mode 100644
index 000000000..c01e6e7d2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/geomFuncs.cpp
@@ -0,0 +1,72 @@
+#include <iostream>
+#include <cmath>
+#include <opencv2/core/core.hpp>
+#include "cudaSift.h"
+
+int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh)
+{
+#ifdef MANAGEDMEM
+  SiftPoint *mpts = data.m_data;
+#else
+  if (data.h_data==NULL)
+    return 0;
+  SiftPoint *mpts = data.h_data;
+#endif
+  float limit = thresh*thresh;
+  int numPts = data.numPts;
+  cv::Mat M(8, 8, CV_64FC1);
+  cv::Mat A(8, 1, CV_64FC1), X(8, 1, CV_64FC1);
+  double Y[8];
+  for (int i=0;i<8;i++) 
+    A.at<double>(i, 0) = homography[i] / homography[8];
+  for (int loop=0;loop<numLoops;loop++) {
+    M = cv::Scalar(0.0);
+    X = cv::Scalar(0.0);
+    for (int i=0;i<numPts;i++) {
+      SiftPoint &pt = mpts[i];
+      if (pt.score<minScore || pt.ambiguity>maxAmbiguity)
+	continue;
+      float den = A.at<double>(6)*pt.xpos + A.at<double>(7)*pt.ypos + 1.0f;
+      float dx = (A.at<double>(0)*pt.xpos + A.at<double>(1)*pt.ypos + A.at<double>(2)) / den - pt.match_xpos;
+      float dy = (A.at<double>(3)*pt.xpos + A.at<double>(4)*pt.ypos + A.at<double>(5)) / den - pt.match_ypos;
+      float err = dx*dx + dy*dy;
+      float wei = (err<limit ? 1.0f : 0.0f); //limit / (err + limit);
+      Y[0] = pt.xpos;
+      Y[1] = pt.ypos;
+      Y[2] = 1.0;
+      Y[3] = Y[4] = Y[5] = 0.0;
+      Y[6] = - pt.xpos * pt.match_xpos;
+      Y[7] = - pt.ypos * pt.match_xpos;
+      for (int c=0;c<8;c++) 
+        for (int r=0;r<8;r++) 
+          M.at<double>(r,c) += (Y[c] * Y[r] * wei);
+      X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_xpos * wei);
+      Y[0] = Y[1] = Y[2] = 0.0;
+      Y[3] = pt.xpos;
+      Y[4] = pt.ypos; 
+      Y[5] = 1.0;
+      Y[6] = - pt.xpos * pt.match_ypos;
+      Y[7] = - pt.ypos * pt.match_ypos;
+      for (int c=0;c<8;c++) 
+        for (int r=0;r<8;r++) 
+          M.at<double>(r,c) += (Y[c] * Y[r] * wei);
+      X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_ypos * wei);
+    }
+    cv::solve(M, X, A, cv::DECOMP_CHOLESKY);
+  }
+  int numfit = 0;
+  for (int i=0;i<numPts;i++) {
+    SiftPoint &pt = mpts[i];
+    float den = A.at<double>(6)*pt.xpos + A.at<double>(7)*pt.ypos + 1.0;
+    float dx = (A.at<double>(0)*pt.xpos + A.at<double>(1)*pt.ypos + A.at<double>(2)) / den - pt.match_xpos;
+    float dy = (A.at<double>(3)*pt.xpos + A.at<double>(4)*pt.ypos + A.at<double>(5)) / den - pt.match_ypos;
+    float err = dx*dx + dy*dy;
+    if (err<limit) 
+      numfit++;
+    pt.match_error = sqrt(err);
+  }
+  for (int i=0;i<8;i++) 
+    homography[i] = A.at<double>(i);
+  homography[8] = 1.0f;
+  return numfit;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/mainSift.cpp b/third-party-programs/Velocity-Bench/cudaSift/HIP/mainSift.cpp
new file mode 100644
index 000000000..28c04a3f7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/mainSift.cpp
@@ -0,0 +1,280 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Björkman aka Celebrandil //
+//              celle @ csc.kth.se                       //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <iostream>
+#include <cmath>
+#include <iomanip>
+#include <hip/hip_runtime.h>
+#include <hip/hip_runtime.h>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+#include "Utility.h"
+#include "cudaImage.h"
+#include "cudaSift.h"
+
+int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh);
+void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img);
+void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography);
+
+double ScaleUp(CudaImage &res, CudaImage &src);
+
+///////////////////////////////////////////////////////////////////////////////
+// Main program
+///////////////////////////////////////////////////////////////////////////////
+int main(int argc, char **argv)
+{
+  auto totalProgTimer_start = std::chrono::steady_clock::now();
+  int devNum = 0, imgSet = 0;
+  if (argc > 1)
+    devNum = std::atoi(argv[1]);
+  if (argc > 2)
+    imgSet = std::atoi(argv[2]);
+
+  float totTime = 0.0;
+  float imageInitTime = 0.0;
+  float extractSiftTime = 0.0;
+  float matchingTime = 0.0;
+  float ioReadTime = 0.0;
+  float dataVerificationTime = 0.0;
+
+  // Read images using OpenCV
+  cv::Mat limg, rimg;
+  auto ioRead_start = std::chrono::steady_clock::now();
+  if (imgSet)
+  {
+    cv::imread("../../inputData/left.pgm", 0).convertTo(limg, CV_32FC1);
+    cv::imread("../../inputData/righ.pgm", 0).convertTo(rimg, CV_32FC1);
+  }
+  else
+  {
+    cv::imread("../../inputData/img1.png", 0).convertTo(limg, CV_32FC1);
+    cv::imread("../../inputData/img2.png", 0).convertTo(rimg, CV_32FC1);
+  }
+  auto ioRead_stop = std::chrono::steady_clock::now();
+  ioReadTime = std::chrono::duration<float, std::micro>(ioRead_stop - ioRead_start).count();
+
+  unsigned int w = limg.cols;
+  unsigned int h = limg.rows;
+  std::cout << "Image size = (" << w << "," << h << ")" << std::endl;
+
+  // Initial Cuda images and download images to device
+  std::cout << "Initializing data..." << std::endl;
+  auto start_deviceSet = std::chrono::steady_clock::now();
+  hipSetDevice(0);
+  auto stop_deviceSet = std::chrono::steady_clock::now();
+
+  CudaImage img1, img2;
+  img1.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)limg.data);
+  img2.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)rimg.data);
+  img1.Download(imageInitTime);
+  img2.Download(imageInitTime);
+
+  // Extract Sift features from images
+  SiftData siftData1, siftData2;
+  float initBlur = 1.0f;
+  float thresh = (imgSet ? 4.5f : 2.0f);
+
+  InitSiftData(siftData1, imageInitTime, 32768, true, true);
+  InitSiftData(siftData2, imageInitTime, 32768, true, true);
+
+  // A bit of benchmarking
+  // for (int thresh1=1.00f;thresh1<=4.01f;thresh1+=0.50f) {
+  float *memoryTmp = AllocSiftTempMemory(w, h, 5, imageInitTime, false);
+
+  for (int i = 0; i < 50; i++)
+  {
+    float time = 0.0f;
+    ExtractSift(siftData1, img1, 5, initBlur, thresh, time, 0.0f, false, memoryTmp);
+    extractSiftTime += time;
+    time = 0.0f;
+    ExtractSift(siftData2, img2, 5, initBlur, thresh, time, 0.0f, false, memoryTmp);
+    extractSiftTime += time;
+  }
+  FreeSiftTempMemory(memoryTmp);
+
+  // Match Sift features and find a homography
+  for (int i = 0; i < 1; i++)
+    MatchSiftData(siftData1, siftData2, matchingTime);
+  float homography[9];
+  int numMatches;
+  FindHomography(siftData1, homography, &numMatches, matchingTime, 10000, 0.00f, 0.80f, 5.0);
+  int numFit = ImproveHomography(siftData1, homography, 5, 0.00f, 0.80f, 3.0);
+  float matchPercentage = 100.0f * numFit / std::min(siftData1.numPts, siftData2.numPts);
+
+  std::cout << "Number of original features: " << siftData1.numPts << " " << siftData2.numPts << std::endl;
+  std::cout << "Number of matching features: " << numFit << " " << numMatches << " " << 100.0f * numFit / std::min(siftData1.numPts, siftData2.numPts) << "% " << initBlur << " " << thresh << "\n"
+            << std::endl;
+
+  totTime = imageInitTime + extractSiftTime + matchingTime;
+
+#ifdef DEVICE_TIMER
+  std::cout << "Images initialization time = " << imageInitTime / 1000 << " ms" << std::endl;
+  std::cout << "Feature extraction time = " << extractSiftTime / 1000 << " ms" << std::endl;
+  std::cout << "Matching time = " << matchingTime / 1000 << " ms"
+            << "\n"
+            << std::endl;
+  std::cout << "Total Time = " << totTime / 1000 << " ms"
+            << "\n"
+            << std::endl;
+#endif
+  // data validation
+  auto dataVerficationTimer_start = std::chrono::steady_clock::now();
+  int data_verification_flag = Utility::RunDataVerification(thresh, matchPercentage);
+  auto dataVerficationTimer_stop = std::chrono::steady_clock::now();
+  dataVerificationTime = std::chrono::duration<float, std::micro>(dataVerficationTimer_stop - dataVerficationTimer_start).count();
+  // // Print out and store summary data
+  // // PrintMatchData(siftData1, siftData2, img1);
+  // cv::imwrite("data/limg_pts.pgm", limg);
+
+  // MatchAll(siftData1, siftData2, homography);
+
+  // Free Sift data from device
+  FreeSiftData(siftData1);
+  FreeSiftData(siftData2);
+
+  auto totalProgTimer_end = std::chrono::steady_clock::now();
+  float totalProgramTime = std::chrono::duration<float, std::micro>(totalProgTimer_end - totalProgTimer_start).count() - ioReadTime - dataVerificationTime;
+  std::cout << "Total workload time = " << totalProgramTime / 1000 << " ms"
+            << "\n"
+            << std::endl;
+  return data_verification_flag;
+}
+
+void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography)
+{
+#ifdef MANAGEDMEM
+  SiftPoint *sift1 = siftData1.m_data;
+  SiftPoint *sift2 = siftData2.m_data;
+#else
+  SiftPoint *sift1 = siftData1.h_data;
+  SiftPoint *sift2 = siftData2.h_data;
+#endif
+  int numPts1 = siftData1.numPts;
+  int numPts2 = siftData2.numPts;
+  int numFound = 0;
+#if 1
+  homography[0] = homography[4] = -1.0f;
+  homography[1] = homography[3] = homography[6] = homography[7] = 0.0f;
+  homography[2] = 1279.0f;
+  homography[5] = 959.0f;
+#endif
+  for (int i = 0; i < numPts1; i++)
+  {
+    float *data1 = sift1[i].data;
+    std::cout << i << ":" << sift1[i].scale << ":" << (int)sift1[i].orientation << " " << sift1[i].xpos << " " << sift1[i].ypos << std::endl;
+    bool found = false;
+    for (int j = 0; j < numPts2; j++)
+    {
+      float *data2 = sift2[j].data;
+      float sum = 0.0f;
+      for (int k = 0; k < 128; k++)
+        sum += data1[k] * data2[k];
+      float den = homography[6] * sift1[i].xpos + homography[7] * sift1[i].ypos + homography[8];
+      float dx = (homography[0] * sift1[i].xpos + homography[1] * sift1[i].ypos + homography[2]) / den - sift2[j].xpos;
+      float dy = (homography[3] * sift1[i].xpos + homography[4] * sift1[i].ypos + homography[5]) / den - sift2[j].ypos;
+      float err = dx * dx + dy * dy;
+      if (err < 100.0f) // 100.0
+        found = true;
+      if (err < 100.0f || j == sift1[i].match)
+      { // 100.0
+        if (j == sift1[i].match && err < 100.0f)
+          std::cout << " *";
+        else if (j == sift1[i].match)
+          std::cout << " -";
+        else if (err < 100.0f)
+          std::cout << " +";
+        else
+          std::cout << "  ";
+        std::cout << j << ":" << sum << ":" << (int)sqrt(err) << ":" << sift2[j].scale << ":" << (int)sift2[j].orientation << " " << sift2[j].xpos << " " << sift2[j].ypos << " " << (int)dx << " " << (int)dy << std::endl;
+      }
+    }
+    std::cout << std::endl;
+    if (found)
+      numFound++;
+  }
+  std::cout << "Number of finds: " << numFound << " / " << numPts1 << std::endl;
+  std::cout << homography[0] << " " << homography[1] << " " << homography[2] << std::endl; //%%%
+  std::cout << homography[3] << " " << homography[4] << " " << homography[5] << std::endl; //%%%
+  std::cout << homography[6] << " " << homography[7] << " " << homography[8] << std::endl; //%%%
+}
+
+void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img)
+{
+  int numPts = siftData1.numPts;
+#ifdef MANAGEDMEM
+  SiftPoint *sift1 = siftData1.m_data;
+  SiftPoint *sift2 = siftData2.m_data;
+#else
+  SiftPoint *sift1 = siftData1.h_data;
+  SiftPoint *sift2 = siftData2.h_data;
+#endif
+  float *h_img = img.h_data;
+  int w = img.width;
+  int h = img.height;
+  std::cout << std::setprecision(3);
+  for (int j = 0; j < numPts; j++)
+  {
+    int k = sift1[j].match;
+    if (sift1[j].match_error < 5)
+    {
+      float dx = sift2[k].xpos - sift1[j].xpos;
+      float dy = sift2[k].ypos - sift1[j].ypos;
+#if 0
+      if (false && sift1[j].xpos>550 && sift1[j].xpos<600) {
+	std::cout << "pos1=(" << (int)sift1[j].xpos << "," << (int)sift1[j].ypos << ") ";
+	std::cout << j << ": " << "score=" << sift1[j].score << "  ambiguity=" << sift1[j].ambiguity << "  match=" << k << "  ";
+	std::cout << "scale=" << sift1[j].scale << "  ";
+	std::cout << "error=" << (int)sift1[j].match_error << "  ";
+	std::cout << "orient=" << (int)sift1[j].orientation << "," << (int)sift2[k].orientation << "  ";
+	std::cout << " delta=(" << (int)dx << "," << (int)dy << ")" << std::endl;
+      }
+#endif
+#if 1
+      int len = (int)(fabs(dx) > fabs(dy) ? fabs(dx) : fabs(dy));
+      for (int l = 0; l < len; l++)
+      {
+        int x = (int)(sift1[j].xpos + dx * l / len);
+        int y = (int)(sift1[j].ypos + dy * l / len);
+        h_img[y * w + x] = 255.0f;
+      }
+#endif
+    }
+    int x = (int)(sift1[j].xpos + 0.5);
+    int y = (int)(sift1[j].ypos + 0.5);
+    int s = std::min(x, std::min(y, std::min(w - x - 2, std::min(h - y - 2, (int)(1.41 * sift1[j].scale)))));
+    int p = y * w + x;
+    p += (w + 1);
+    for (int k = 0; k < s; k++)
+      h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 0.0f;
+    p -= (w + 1);
+    for (int k = 0; k < s; k++)
+      h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 255.0f;
+  }
+  std::cout << std::setprecision(6);
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/matching.cpp b/third-party-programs/Velocity-Bench/cudaSift/HIP/matching.cpp
new file mode 100644
index 000000000..bdfad49ea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/matching.cpp
@@ -0,0 +1,1540 @@
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include "hip/hip_runtime.h"
+#include <chrono>
+#include <random>
+#include "cudaSift.h"
+#include "cudautils.h"
+
+#define OCML_BASIC_ROUNDED_OPERATIONS
+
+//================= Device matching functions =====================//
+
+__global__ void MatchSiftPoints(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2)
+{
+  __shared__ float siftPoint[128];
+  __shared__ float sums[16 * 16];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int p1 = blockIdx.x;
+  const int p2 = blockIdx.y * 16 + ty;
+  const float *ptr1 = sift1[p1].data;
+  const float *ptr2 = sift2[p2].data;
+  const int i = 16 * ty + tx;
+  if (ty < 8)
+    siftPoint[i] = ptr1[i];
+  __syncthreads();
+  float sum = 0.0f;
+  if (p2 < numPts2)
+    for (int j = 0; j < 8; j++)
+      sum += siftPoint[16 * j + tx] * ptr2[16 * j + tx];
+  sums[i] = sum;
+  __syncthreads();
+  if (tx < 8)
+    sums[i] += sums[i + 8];
+  __syncthreads();
+  if (tx < 4)
+    sums[i] += sums[i + 4];
+  __syncthreads();
+  if (ty == 0)
+  {
+    sum = sums[16 * tx + 0] + sums[16 * tx + 1] + sums[16 * tx + 2] + sums[16 * tx + 3];
+    corrData[p1 * gridDim.y * 16 + blockIdx.y * 16 + tx] = sum;
+  }
+  __syncthreads();
+}
+
+__global__ void FindMaxCorr(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int corrWidth, int siftSize)
+{
+  __shared__ float maxScore[16 * 16];
+  __shared__ float maxScor2[16 * 16];
+  __shared__ int maxIndex[16 * 16];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int idx = ty * 16 + tx;
+  int p1 = blockIdx.x * 16 + threadIdx.y;
+  p1 = (p1 >= numPts1 ? numPts1 - 1 : p1);
+  maxScore[idx] = -1.0f;
+  maxScor2[idx] = -1.0f;
+  maxIndex[idx] = -1;
+  __syncthreads();
+  float *corrs = &corrData[p1 * corrWidth];
+  for (int i = tx; i < corrWidth; i += 16)
+  {
+    float val = corrs[i];
+    if (val > maxScore[idx])
+    {
+      maxScor2[idx] = maxScore[idx];
+      maxScore[idx] = val;
+      maxIndex[idx] = i;
+    }
+    else if (val > maxScor2[idx])
+      maxScor2[idx] = val;
+  }
+  __syncthreads();
+  for (int len = 8; len > 0; len /= 2)
+  {
+    if (tx < 8)
+    {
+      float val = maxScore[idx + len];
+      int i = maxIndex[idx + len];
+      if (val > maxScore[idx])
+      {
+        maxScor2[idx] = maxScore[idx];
+        maxScore[idx] = val;
+        maxIndex[idx] = i;
+      }
+      else if (val > maxScor2[idx])
+        maxScor2[idx] = val;
+      float va2 = maxScor2[idx + len];
+      if (va2 > maxScor2[idx])
+        maxScor2[idx] = va2;
+    }
+    __syncthreads();
+  }
+  if (tx == 0)
+  {
+    sift1[p1].score = maxScore[ty * 16];
+    sift1[p1].ambiguity = maxScor2[ty * 16] / (maxScore[ty * 16] + 1e-6);
+    sift1[p1].match = maxIndex[ty * 16];
+    sift1[p1].match_xpos = sift2[maxIndex[ty * 16]].xpos;
+    sift1[p1].match_ypos = sift2[maxIndex[ty * 16]].ypos;
+  }
+}
+
+// Version based on suggestion by Nicholas Lin
+__global__ void FindMaxCorr3(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  int block_dim = blockDim.x; // blockDim.x == 16
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int p1 = blockIdx.x * block_dim + ty;
+  const int idx = ty * 16 + tx;
+
+  __shared__ int maxIndex[16 * 16];
+  maxIndex[idx] = 0;
+  __syncthreads();
+
+  float *corrs = NULL;
+  if (p1 < numPts1)
+  {
+    corrs = &corrData[p1 * block_dim * 2];
+    corrs[tx] = 0.0f;
+    corrs[tx + 16] = 0.0f;
+    const float *pt1 = sift1[p1].data;
+    for (int p2 = tx; p2 < numPts2; p2 += 16)
+    {
+      float *pt2 = sift2[p2].data;
+      float sum = 0.0f;
+      for (int i = 0; i < 128; i++)
+        sum += pt1[i] * pt2[i];
+      if (sum > corrs[tx])
+      {
+        corrs[tx + 16] = corrs[tx];
+        corrs[tx] = sum;
+        maxIndex[idx] = p2;
+      }
+      else if (sum > corrs[tx + 16])
+        corrs[tx + 16] = sum;
+    }
+  }
+  __syncthreads();
+  if (p1 < numPts1)
+  {
+    for (int len = 8; len > 0; len /= 2)
+    {
+      if (tx < len)
+      {
+        float val = corrs[tx + len];
+        int i = maxIndex[idx + len];
+        if (val > corrs[tx])
+        {
+          corrs[tx + 16] = corrs[tx];
+          corrs[tx] = val;
+          maxIndex[idx] = i;
+        }
+        else if (val > corrs[tx + 16])
+          corrs[tx + 16] = val;
+        float va2 = corrs[tx + 16 + len];
+        if (va2 > corrs[tx + 16])
+          corrs[tx + 16] = va2;
+      }
+      __syncthreads();
+    }
+    if (tx == 0)
+    {
+      sift1[p1].score = corrs[0];
+      sift1[p1].ambiguity = corrs[16] / (corrs[0] + 1e-6);
+      sift1[p1].match = maxIndex[ty << 4];
+      sift1[p1].match_xpos = sift2[maxIndex[ty << 4]].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex[ty << 4]].ypos;
+    }
+  }
+}
+
+#define FMC2W 16
+#define FMC2H 4
+
+__global__ void FindMaxCorr2(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float siftPoint[128];
+  __shared__ float maxScore[FMC2H];
+  __shared__ float maxScor2[FMC2H];
+  __shared__ int maxIndex[FMC2H];
+  const int p1 = blockIdx.x;
+  if (p1 >= numPts1)
+    return;
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int idx = ty * FMC2W + tx;
+  if (idx < FMC2H)
+  {
+    maxScore[idx] = -1.0f;
+    maxScor2[idx] = -1.0f;
+    maxIndex[idx] = 0;
+  }
+  __syncthreads();
+  const float *pt1 = sift1[p1].data;
+  for (int i = idx; i < 128; i += FMC2W * FMC2H)
+    siftPoint[i] = pt1[i];
+  __syncthreads();
+  for (int p2 = ty; p2 < numPts2; p2 += FMC2H)
+  {
+    const float *pt2 = sift2[p2].data;
+    float sum = 0.0f;
+    for (int j = tx; j < 128; j += FMC2W)
+      sum += siftPoint[j] * pt2[j];
+    for (int j = FMC2W / 2; j > 0; j /= 2)
+      sum += ShiftDown(sum, j);
+    if (tx == 0)
+    {
+      if (sum > maxScore[ty])
+      {
+        maxScor2[ty] = maxScore[ty];
+        maxScore[ty] = sum;
+        maxIndex[ty] = p2;
+      }
+      else if (sum > maxScor2[ty])
+        maxScor2[ty] = sum;
+    }
+  }
+  __syncthreads();
+  for (int len = FMC2H / 2; len > 0; len /= 2)
+  {
+    if (ty == 0 && tx < len)
+    {
+      float val = maxScore[tx + len];
+      int p2 = maxIndex[tx + len];
+      if (val > maxScore[tx])
+      {
+        maxScor2[tx] = maxScore[tx];
+        maxScore[tx] = val;
+        maxIndex[tx] = p2;
+      }
+      else if (val > maxScor2[tx])
+        maxScor2[tx] = val;
+      float va2 = maxScor2[tx + len];
+      if (va2 > maxScor2[tx])
+        maxScor2[tx] = va2;
+    }
+    __syncthreads();
+  }
+  if (ty == 0 && tx == 0)
+  {
+    sift1[p1].score = maxScore[0];
+    sift1[p1].ambiguity = maxScor2[0] / (maxScore[0] + 1e-6);
+    sift1[p1].match = maxIndex[0];
+    sift1[p1].match_xpos = sift2[maxIndex[0]].xpos;
+    sift1[p1].match_ypos = sift2[maxIndex[0]].ypos;
+  }
+}
+
+__global__ void FindMaxCorr4(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float siftPoint[128 * FMC2H];
+  __shared__ float maxScore[FMC2H];
+  __shared__ float maxScor2[FMC2H];
+  __shared__ int maxIndex[FMC2H];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  if (tx == 0)
+  {
+    maxScore[ty] = -1.0f;
+    maxScor2[ty] = -1.0f;
+    maxIndex[ty] = 0;
+  }
+  const int p1 = blockIdx.x * FMC2H + ty;
+  const float *pt1 = sift1[p1].data;
+  for (int j = tx; j < 128; j += FMC2W)
+    siftPoint[128 * ty + j] = pt1[j];
+  __syncthreads();
+  for (int p2 = 0; p2 < numPts2; p2++)
+  {
+    const float *pt2 = sift2[p2].data;
+    float sum = 0.0f;
+    for (int j = tx; j < 128; j += FMC2W)
+      sum += siftPoint[128 * ty + j] * pt2[j];
+    for (int j = FMC2W / 2; j > 0; j /= 2)
+      sum += ShiftDown(sum, j);
+    if (tx == 0)
+    {
+      if (sum > maxScore[ty])
+      {
+        maxScor2[ty] = maxScore[ty];
+        maxScore[ty] = sum;
+        maxIndex[ty] = p2;
+      }
+      else if (sum > maxScor2[ty])
+        maxScor2[ty] = sum;
+    }
+  }
+  __syncthreads();
+  if (tx == 0)
+  {
+    sift1[p1].score = maxScore[ty];
+    sift1[p1].ambiguity = maxScor2[ty] / (maxScore[ty] + 1e-6);
+    sift1[p1].match = maxIndex[ty];
+    sift1[p1].match_xpos = sift2[maxIndex[ty]].xpos;
+    sift1[p1].match_ypos = sift2[maxIndex[ty]].ypos;
+  }
+}
+
+__global__ void memcopyKernel(float *src, float *dst, size_t src_pitch, size_t dst_pitch, int numPts, size_t width)
+{
+  char *d_src = (char *)src;
+  char *d_dst = (char *)dst;
+
+  for (int i = 0; i < numPts; ++i)
+  {
+    for (int j = 0; j < width; ++j)
+    {
+      d_dst[j] = d_src[j];
+    }
+    d_src = d_src + src_pitch;
+    d_dst = d_dst + dst_pitch;
+  }
+}
+
+__global__ void
+CleanMatches(SiftPoint *sift1, int numPts1)
+{
+  const int p1 = min(blockIdx.x * 64 + threadIdx.x, numPts1 - 1);
+  sift1[p1].score = 0.0f;
+}
+
+#define M7W 32
+#define M7H 32
+#define M7R 4
+#define NRX 2
+#define NDIM 128
+
+__global__ void FindMaxCorr10(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float4 buffer1[M7W * NDIM / 4];
+  __shared__ float4 buffer2[M7H * NDIM / 4];
+  int tx = threadIdx.x;
+  int ty = threadIdx.y;
+  int bp1 = M7W * blockIdx.x;
+  for (int j = ty; j < M7W; j += M7H / M7R)
+  {
+    int p1 = min(bp1 + j, numPts1 - 1);
+    for (int d = tx; d < NDIM / 4; d += M7W)
+      buffer1[j * NDIM / 4 + (d + j) % (NDIM / 4)] = ((float4 *)&sift1[p1].data)[d];
+  }
+
+  float max_score[NRX];
+  float sec_score[NRX];
+  int index[NRX];
+  for (int i = 0; i < NRX; i++)
+  {
+    max_score[i] = 0.0f;
+    sec_score[i] = 0.0f;
+    index[i] = -1;
+  }
+
+  int idx = ty * M7W + tx;
+  int ix = idx % (M7W / NRX);
+  int iy = idx / (M7W / NRX);
+  for (int bp2 = 0; bp2 < numPts2 - M7H + 1; bp2 += M7H)
+  {
+    for (int j = ty; j < M7H; j += M7H / M7R)
+    {
+      int p2 = min(bp2 + j, numPts2 - 1);
+      for (int d = tx; d < NDIM / 4; d += M7W)
+        buffer2[j * NDIM / 4 + d] = ((float4 *)&sift2[p2].data)[d];
+    }
+    __syncthreads();
+
+    if (idx < M7W * M7H / M7R / NRX)
+    {
+      float score[M7R][NRX];
+      for (int dy = 0; dy < M7R; dy++)
+        for (int i = 0; i < NRX; i++)
+          score[dy][i] = 0.0f;
+      for (int d = 0; d < NDIM / 4; d++)
+      {
+        float4 v1[NRX];
+        for (int i = 0; i < NRX; i++)
+          v1[i] = buffer1[((M7W / NRX) * i + ix) * NDIM / 4 + (d + (M7W / NRX) * i + ix) % (NDIM / 4)];
+        for (int dy = 0; dy < M7R; dy++)
+        {
+          float4 v2 = buffer2[(M7R * iy + dy) * (NDIM / 4) + d];
+          for (int i = 0; i < NRX; i++)
+          {
+            score[dy][i] += v1[i].x * v2.x;
+            score[dy][i] += v1[i].y * v2.y;
+            score[dy][i] += v1[i].z * v2.z;
+            score[dy][i] += v1[i].w * v2.w;
+          }
+        }
+      }
+      for (int dy = 0; dy < M7R; dy++)
+      {
+        for (int i = 0; i < NRX; i++)
+        {
+          if (score[dy][i] > max_score[i])
+          {
+            sec_score[i] = max_score[i];
+            max_score[i] = score[dy][i];
+            index[i] = min(bp2 + M7R * iy + dy, numPts2 - 1);
+          }
+          else if (score[dy][i] > sec_score[i])
+            sec_score[i] = score[dy][i];
+        }
+      }
+    }
+    __syncthreads();
+  }
+  float *scores1 = (float *)buffer1;
+  float *scores2 = &scores1[M7W * M7H / M7R];
+  int *indices = (int *)&scores2[M7W * M7H / M7R];
+  if (idx < M7W * M7H / M7R / NRX)
+  {
+    for (int i = 0; i < NRX; i++)
+    {
+      scores1[iy * M7W + (M7W / NRX) * i + ix] = max_score[i];
+      scores2[iy * M7W + (M7W / NRX) * i + ix] = sec_score[i];
+      indices[iy * M7W + (M7W / NRX) * i + ix] = index[i];
+    }
+  }
+  __syncthreads();
+
+  if (ty == 0)
+  {
+    float max_score = scores1[tx];
+    float sec_score = scores2[tx];
+    int index = indices[tx];
+    for (int y = 0; y < M7H / M7R; y++)
+      if (index != indices[y * M7W + tx])
+      {
+        if (scores1[y * M7W + tx] > max_score)
+        {
+          sec_score = max(max_score, sec_score);
+          max_score = scores1[y * M7W + tx];
+          index = indices[y * M7W + tx];
+        }
+        else if (scores1[y * M7W + tx] > sec_score)
+          sec_score = scores1[y * M7W + tx];
+      }
+    sift1[bp1 + tx].score = max_score;
+    sift1[bp1 + tx].match = index;
+    sift1[bp1 + tx].match_xpos = sift2[index].xpos;
+    sift1[bp1 + tx].match_ypos = sift2[index].ypos;
+    sift1[bp1 + tx].ambiguity = sec_score / (max_score + 1e-6f);
+  }
+}
+
+#define FMC_GH 512
+#define FMC_BW 32
+#define FMC_BH 32
+#define FMC_BD 16
+#define FMC_TW 1
+#define FMC_TH 4
+#define FMC_NW (FMC_BW / FMC_TW) //  32
+#define FMC_NH (FMC_BH / FMC_TH) //   8
+#define FMC_NT (FMC_NW * FMC_NH) // 256 = 8 warps
+
+__device__ volatile int lock = 0;
+
+__global__ void FindMaxCorr9(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float4 siftParts1[FMC_BW * FMC_BD]; // 4*32*8 = 1024
+  __shared__ float4 siftParts2[FMC_BH * FMC_BD]; // 4*32*8 = 1024
+  //__shared__ float blksums[FMC_BW*FMC_BH];     // 32*32  = 1024
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int idx = ty * FMC_NW + tx;
+  float4 *pts1 = 0, *pts2 = 0;
+  if (idx < FMC_BW)
+  {
+    const int p1l = min(blockIdx.x * FMC_BW + idx, numPts1 - 1);
+    pts1 = (float4 *)sift1[p1l].data;
+  }
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < min(FMC_GH, numPts2 - FMC_BH + 1); k += FMC_BH)
+  {
+    if (idx < FMC_BH)
+    {
+      const int p2l = min(blockIdx.y * FMC_GH + k + idx, numPts2 - 1);
+      pts2 = (float4 *)sift2[p2l].data;
+    }
+    float sums[FMC_TW * FMC_TH];
+    for (int i = 0; i < FMC_TW * FMC_TH; i++)
+      sums[i] = 0.0f;
+
+    if (idx < FMC_BW)
+      for (int i = 0; i < FMC_BD / 2; i++)
+        siftParts1[(i + 0) * FMC_BW + idx] = pts1[0 + i];
+    if (idx < FMC_BH)
+      for (int i = 0; i < FMC_BD / 2; i++)
+        siftParts2[(i + 0) * FMC_BH + idx] = pts2[0 + i];
+    __syncthreads();
+
+    int b = FMC_BD / 2;
+    for (int d = FMC_BD / 2; d < 32; d += FMC_BD / 2)
+    {
+      if (idx < FMC_BW)
+        for (int i = 0; i < FMC_BD / 2; i++)
+          siftParts1[(i + b) * FMC_BW + idx] = pts1[d + i];
+      if (idx < FMC_BH)
+        for (int i = 0; i < FMC_BD / 2; i++)
+          siftParts2[(i + b) * FMC_BH + idx] = pts2[d + i];
+
+      b ^= FMC_BD / 2;
+      for (int i = 0; i < FMC_BD / 2; i++)
+      {
+        float4 v1[FMC_TW];
+        for (int ix = 0; ix < FMC_TW; ix++)
+          v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)];
+        for (int iy = 0; iy < FMC_TH; iy++)
+        {
+          float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)];
+          for (int ix = 0; ix < FMC_TW; ix++)
+          {
+            sums[iy * FMC_TW + ix] += v1[ix].x * v2.x;
+            sums[iy * FMC_TW + ix] += v1[ix].y * v2.y;
+            sums[iy * FMC_TW + ix] += v1[ix].z * v2.z;
+            sums[iy * FMC_TW + ix] += v1[ix].w * v2.w;
+          }
+        }
+      }
+      __syncthreads();
+    }
+
+    b ^= FMC_BD / 2;
+    for (int i = 0; i < FMC_BD / 2; i++)
+    {
+      float4 v1[FMC_TW];
+      for (int ix = 0; ix < FMC_TW; ix++)
+        v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)];
+      for (int iy = 0; iy < FMC_TH; iy++)
+      {
+        float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)];
+        for (int ix = 0; ix < FMC_TW; ix++)
+        {
+          sums[iy * FMC_TW + ix] += v1[ix].x * v2.x;
+          sums[iy * FMC_TW + ix] += v1[ix].y * v2.y;
+          sums[iy * FMC_TW + ix] += v1[ix].z * v2.z;
+          sums[iy * FMC_TW + ix] += v1[ix].w * v2.w;
+        }
+      }
+    }
+    __syncthreads();
+
+    float *blksums = (float *)siftParts1;
+    for (int iy = 0; iy < FMC_TH; iy++)
+      for (int ix = 0; ix < FMC_TW; ix++)
+        blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix];
+    __syncthreads();
+    if (idx < FMC_BW)
+    {
+      for (int j = 0; j < FMC_BH; j++)
+      {
+        float sum = blksums[j * FMC_BW + idx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = min(blockIdx.y * FMC_GH + k + j, numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    __syncthreads();
+  }
+  const int p1 = min(blockIdx.x * FMC_BW + idx, numPts1 - 1);
+  if (idx == 0)
+    while (atomicCAS((int *)&lock, 0, 1) != 0)
+      ;
+  __syncthreads();
+  if (idx < FMC_BW)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  __syncthreads();
+  if (idx == 0)
+    atomicExch((int *)&lock, 0);
+}
+
+__global__ void FindMaxCorr8(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float4 siftParts1[FMC_BW * FMC_BD]; // 4*32*8 = 1024
+  __shared__ float4 siftParts2[FMC_BH * FMC_BD]; // 4*32*8 = 1024
+  __shared__ float blksums[FMC_BW * FMC_BH];     // 32*32  = 1024
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int idx = ty * FMC_NW + tx;
+  float4 *pts1 = 0, *pts2 = 0;
+  if (idx < FMC_BW)
+  {
+    const int p1l = min(blockIdx.x * FMC_BW + idx, numPts1 - 1);
+    pts1 = (float4 *)sift1[p1l].data;
+  }
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < min(FMC_GH, numPts2 - FMC_BH + 1); k += FMC_BH)
+  {
+    if (idx < FMC_BH)
+    {
+      const int p2l = min(blockIdx.y * FMC_GH + k + idx, numPts2 - 1);
+      pts2 = (float4 *)sift2[p2l].data;
+    }
+    float sums[FMC_TW * FMC_TH];
+    for (int i = 0; i < FMC_TW * FMC_TH; i++)
+      sums[i] = 0.0f;
+    for (int d = 0; d < 32; d += FMC_BD)
+    {
+      if (idx < FMC_BW)
+        for (int i = 0; i < FMC_BD; i++)
+          siftParts1[i * FMC_BW + idx] = pts1[d + i];
+      if (idx < FMC_BH)
+        for (int i = 0; i < FMC_BD; i++)
+          siftParts2[i * FMC_BH + idx] = pts2[d + i];
+      __syncthreads();
+
+      for (int i = 0; i < FMC_BD; i++)
+      {
+        float4 v1[FMC_TW];
+        for (int ix = 0; ix < FMC_TW; ix++)
+          v1[ix] = siftParts1[i * FMC_BW + (tx * FMC_TW + ix)];
+        for (int iy = 0; iy < FMC_TH; iy++)
+        {
+          float4 v2 = siftParts2[i * FMC_BH + (ty * FMC_TH + iy)];
+          for (int ix = 0; ix < FMC_TW; ix++)
+          {
+            sums[iy * FMC_TW + ix] += v1[ix].x * v2.x;
+            sums[iy * FMC_TW + ix] += v1[ix].y * v2.y;
+            sums[iy * FMC_TW + ix] += v1[ix].z * v2.z;
+            sums[iy * FMC_TW + ix] += v1[ix].w * v2.w;
+          }
+        }
+      }
+      __syncthreads();
+    }
+    // float *blksums = (float*)siftParts1;
+    for (int iy = 0; iy < FMC_TH; iy++)
+      for (int ix = 0; ix < FMC_TW; ix++)
+        blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix];
+    __syncthreads();
+    if (idx < FMC_BW)
+    {
+      for (int j = 0; j < FMC_BH; j++)
+      {
+        float sum = blksums[j * FMC_BW + idx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = min(blockIdx.y * FMC_GH + k + j, numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    __syncthreads();
+  }
+  const int p1 = min(blockIdx.x * FMC_BW + idx, numPts1 - 1);
+  if (idx == 0)
+    while (atomicCAS((int *)&lock, 0, 1) != 0)
+      ;
+  __syncthreads();
+  if (idx < FMC_BW)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  __syncthreads();
+  if (idx == 0)
+    atomicExch((int *)&lock, 0);
+}
+
+__global__ void FindMaxCorr7(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float siftParts1[17 * 64]; // features in columns
+  __shared__ float siftParts2[16 * 64]; // one extra to avoid shared conflicts
+  float4 *pts1 = (float4 *)siftParts1;
+  float4 *pts2 = (float4 *)siftParts2;
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int p1l = min(blockIdx.x * 16 + ty, numPts1 - 1);
+  const float4 *p1l4 = (float4 *)sift1[p1l].data;
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < 512 / 16; k++)
+  {
+    const int p2l = min(blockIdx.y * 512 + k * 16 + ty, numPts2 - 1);
+    const float4 *p2l4 = (float4 *)sift2[p2l].data;
+#define NUM 4
+    float sum[NUM];
+    if (ty < (16 / NUM))
+      for (int l = 0; l < NUM; l++)
+        sum[l] = 0.0f;
+    __syncthreads();
+    for (int i = 0; i < 2; i++)
+    {
+      pts1[17 * tx + ty] = p1l4[i * 16 + tx];
+      pts2[16 * ty + tx] = p2l4[i * 16 + tx];
+      __syncthreads();
+      if (ty < (16 / NUM))
+      {
+#pragma unroll
+        for (int j = 0; j < 16; j++)
+        {
+          float4 p1v = pts1[17 * j + tx];
+#pragma unroll
+          for (int l = 0; l < NUM; l++)
+          {
+            float4 p2v = pts2[16 * (ty + l * (16 / NUM)) + j];
+            sum[l] += p1v.x * p2v.x;
+            sum[l] += p1v.y * p2v.y;
+            sum[l] += p1v.z * p2v.z;
+            sum[l] += p1v.w * p2v.w;
+          }
+        }
+      }
+      __syncthreads();
+    }
+    float *sums = siftParts1;
+    if (ty < (16 / NUM))
+      for (int l = 0; l < NUM; l++)
+        sums[16 * (ty + l * (16 / NUM)) + tx] = sum[l];
+    __syncthreads();
+    if (ty == 0)
+    {
+      for (int j = 0; j < 16; j++)
+      {
+        float sum = sums[16 * j + tx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = min(blockIdx.y * 512 + k * 16 + j, numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    __syncthreads();
+  }
+  const int p1 = min(blockIdx.x * 16 + tx, numPts1 - 1);
+  if (tx == 0 && ty == 0)
+    while (atomicCAS((int *)&lock, 0, 1) != 0)
+      ;
+  __syncthreads();
+  if (ty == 0)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  __syncthreads();
+  if (tx == 0 && ty == 0)
+    atomicExch((int *)&lock, 0);
+}
+
+__global__ void FindMaxCorr6(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  //__shared__ float siftParts1[128*16]; // features in columns
+  __shared__ float siftParts2[128 * 16]; // one extra to avoid shared conflicts
+  __shared__ float sums[16 * 16];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int p1l = min(blockIdx.x * 16 + ty, numPts1 - 1);
+  float *pt1l = sift1[p1l].data;
+  float4 part1 = reinterpret_cast<float4 *>(pt1l)[tx];
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < 512; k += 16)
+  {
+    const int p2l = min(blockIdx.y * 512 + k + ty, numPts2 - 1);
+    float *pt2l = sift2[p2l].data;
+    reinterpret_cast<float4 *>(siftParts2)[32 * ty + tx] = reinterpret_cast<float4 *>(pt2l)[tx];
+    __syncthreads();
+    for (int i = 0; i < 16; i++)
+    {
+      float4 part2 = reinterpret_cast<float4 *>(siftParts2)[32 * i + tx];
+      float sum = part1.x * part2.x + part1.y * part2.y + part1.z * part2.z + part1.w * part2.w;
+      sum += ShiftDown(sum, 16);
+      sum += ShiftDown(sum, 8);
+      sum += ShiftDown(sum, 4);
+      sum += ShiftDown(sum, 2);
+      sum += ShiftDown(sum, 1);
+      if (tx == 0)
+        sums[16 * i + ty] = sum;
+    }
+    __syncthreads();
+    if (ty == 0 && tx < 16)
+    {
+      for (int j = 0; j < 16; j++)
+      {
+        float sum = sums[16 * j + tx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = min(blockIdx.y * 512 + k + j, numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    __syncthreads();
+  }
+  if (tx == 0 && ty == 0)
+    while (atomicCAS((int *)&lock, 0, 1) != 0)
+      ;
+  __syncthreads();
+  if (ty == 0 && tx < 16)
+  {
+    const int p1 = min(blockIdx.x * 16 + tx, numPts1 - 1);
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  __syncthreads();
+  if (tx == 0 && ty == 0)
+    atomicExch((int *)&lock, 0);
+}
+
+__global__ void FindMaxCorr5(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2)
+{
+  __shared__ float siftParts1[17 * 16]; // features in columns
+  __shared__ float siftParts2[17 * 16]; // one extra to avoid shared conflicts
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int p1l = min(blockIdx.x * 16 + ty, numPts1 - 1);
+  const float *pt1l = sift1[p1l].data;
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < 512 / 16; k++)
+  {
+    const int p2l = min(blockIdx.y * 512 + k * 16 + ty, numPts2 - 1);
+    const float *pt2l = sift2[p2l].data;
+    float sum = 0.0f;
+    for (int i = 0; i < 8; i++)
+    {
+      siftParts1[17 * tx + ty] = pt1l[i * 16 + tx]; // load and transpose
+      siftParts2[17 * tx + ty] = pt2l[i * 16 + tx];
+      __syncthreads();
+      for (int j = 0; j < 16; j++)
+        sum += siftParts1[17 * j + tx] * siftParts2[17 * j + ty];
+      __syncthreads();
+    }
+    float *sums = siftParts1;
+    sums[16 * ty + tx] = sum;
+    __syncthreads();
+    if (ty == 0)
+    {
+      for (int j = 0; j < 16; j++)
+      {
+        float sum = sums[16 * j + tx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = min(blockIdx.y * 512 + k * 16 + j, numPts2 - 1);
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    __syncthreads();
+  }
+  const int p1 = min(blockIdx.x * 16 + tx, numPts1 - 1);
+  if (tx == 0 && ty == 0)
+    while (atomicCAS((int *)&lock, 0, 1) != 0)
+      ;
+  __syncthreads();
+  if (ty == 0)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  __syncthreads();
+  if (tx == 0 && ty == 0)
+    atomicExch((int *)&lock, 0);
+}
+
+template <int size>
+__device__ void InvertMatrix(float elem[size][size], float res[size][size])
+{
+  int indx[size];
+  float b[size];
+  float vv[size];
+  for (int i = 0; i < size; i++)
+    indx[i] = 0;
+  int imax = 0;
+  float d = 1.0;
+  for (int i = 0; i < size; i++)
+  { // find biggest element for each row
+    float big = 0.0;
+    for (int j = 0; j < size; j++)
+    {
+      float temp = fabs(elem[i][j]);
+      if (temp > big)
+        big = temp;
+    }
+    if (big > 0.0)
+      vv[i] = 1.0 / big;
+    else
+      vv[i] = 1e16;
+  }
+  for (int j = 0; j < size; j++)
+  {
+    for (int i = 0; i < j; i++)
+    {                                   // i<j
+      float sum = elem[i][j];           // i<j (lower left)
+      for (int k = 0; k < i; k++)       // k<i<j
+        sum -= elem[i][k] * elem[k][j]; // i>k (upper right), k<j (lower left)
+      elem[i][j] = sum;                 // i<j (lower left)
+    }
+    float big = 0.0;
+    for (int i = j; i < size; i++)
+    {                                   // i>=j
+      float sum = elem[i][j];           // i>=j (upper right)
+      for (int k = 0; k < j; k++)       // k<j<=i
+        sum -= elem[i][k] * elem[k][j]; // i>k (upper right), k<j (lower left)
+      elem[i][j] = sum;                 // i>=j (upper right)
+      float dum = vv[i] * fabs(sum);
+      if (dum >= big)
+      {
+        big = dum;
+        imax = i;
+      }
+    }
+    if (j != imax)
+    { // imax>j
+      for (int k = 0; k < size; k++)
+      {
+        float dum = elem[imax][k]; // upper right and lower left
+        elem[imax][k] = elem[j][k];
+        elem[j][k] = dum;
+      }
+      d = -d;
+      vv[imax] = vv[j];
+    }
+    indx[j] = imax;
+    if (elem[j][j] == 0.0) // j==j (upper right)
+      elem[j][j] = 1e-16;
+    if (j != (size - 1))
+    {
+      float dum = 1.0 / elem[j][j];
+      for (int i = j + 1; i < size; i++) // i>j
+        elem[i][j] *= dum;               // i>j (upper right)
+    }
+  }
+  for (int j = 0; j < size; j++)
+  {
+    for (int k = 0; k < size; k++)
+      b[k] = 0.0;
+    b[j] = 1.0;
+    int ii = -1;
+    for (int i = 0; i < size; i++)
+    {
+      int ip = indx[i];
+      float sum = b[ip];
+      b[ip] = b[i];
+      if (ii != -1)
+        for (int j = ii; j < i; j++)
+          sum -= elem[i][j] * b[j]; // i>j (upper right)
+      else if (sum != 0.0)
+        ii = i;
+      b[i] = sum;
+    }
+    for (int i = size - 1; i >= 0; i--)
+    {
+      float sum = b[i];
+      for (int j = i + 1; j < size; j++)
+        sum -= elem[i][j] * b[j]; // i<j (lower left)
+      b[i] = sum / elem[i][i];    // i==i (upper right)
+    }
+    for (int i = 0; i < size; i++)
+      res[i][j] = b[i];
+  }
+}
+
+__global__ void ComputeHomographies(float *coord, int *randPts, float *homo,
+                                    int numPts)
+{
+  float a[8][8], ia[8][8];
+  float b[8];
+  const int bx = blockIdx.x;
+  const int tx = threadIdx.x;
+  const int idx = blockDim.x * bx + tx;
+  const int numLoops = blockDim.x * gridDim.x;
+  for (int i = 0; i < 4; i++)
+  {
+    int pt = randPts[i * numLoops + idx];
+    float x1 = coord[pt + 0 * numPts];
+    float y1 = coord[pt + 1 * numPts];
+    float x2 = coord[pt + 2 * numPts];
+    float y2 = coord[pt + 3 * numPts];
+    float *row1 = a[2 * i + 0];
+    row1[0] = x1;
+    row1[1] = y1;
+    row1[2] = 1.0;
+    row1[3] = row1[4] = row1[5] = 0.0;
+    row1[6] = -x2 * x1;
+    row1[7] = -x2 * y1;
+    float *row2 = a[2 * i + 1];
+    row2[0] = row2[1] = row2[2] = 0.0;
+    row2[3] = x1;
+    row2[4] = y1;
+    row2[5] = 1.0;
+    row2[6] = -y2 * x1;
+    row2[7] = -y2 * y1;
+    b[2 * i + 0] = x2;
+    b[2 * i + 1] = y2;
+  }
+  InvertMatrix<8>(a, ia);
+  __syncthreads();
+  for (int j = 0; j < 8; j++)
+  {
+    float sum = 0.0f;
+    for (int i = 0; i < 8; i++)
+      sum += ia[j][i] * b[i];
+    homo[j * numLoops + idx] = sum;
+  }
+  __syncthreads();
+}
+
+#define TESTHOMO_TESTS 16 // number of tests per block,  alt. 32, 32
+#define TESTHOMO_LOOPS 16 // number of loops per block,  alt.  8, 16
+
+__global__ void TestHomographies(float *d_coord, float *d_homo,
+                                 int *d_counts, int numPts, float thresh2)
+{
+  __shared__ float homo[8 * TESTHOMO_LOOPS];
+  __shared__ int cnts[TESTHOMO_TESTS * TESTHOMO_LOOPS];
+  const int tx = threadIdx.x;
+  const int ty = threadIdx.y;
+  const int idx = blockIdx.y * blockDim.y + tx;
+  const int numLoops = blockDim.y * gridDim.y;
+  if (ty < 8 && tx < TESTHOMO_LOOPS)
+    homo[tx * 8 + ty] = d_homo[idx + ty * numLoops];
+  __syncthreads();
+  float a[8];
+  for (int i = 0; i < 8; i++)
+    a[i] = homo[ty * 8 + i];
+  int cnt = 0;
+  // for (int i = tx; i < numPts; i += TESTHOMO_TESTS)
+  // {
+  //   float x1 = d_coord[i + 0 * numPts];
+  //   float y1 = d_coord[i + 1 * numPts];
+  //   float x2 = d_coord[i + 2 * numPts];
+  //   float y2 = d_coord[i + 3 * numPts];
+  //   float nomx = __fmul_rz(a[0], x1) + __fmul_rz(a[1], y1) + a[2];
+  //   float nomy = __fmul_rz(a[3], x1) + __fmul_rz(a[4], y1) + a[5];
+  //   float deno = __fmul_rz(a[6], x1) + __fmul_rz(a[7], y1) + 1.0f;
+  //   float errx = __fmul_rz(x2, deno) - nomx;
+  //   float erry = __fmul_rz(y2, deno) - nomy;
+  //   float err2 = __fmul_rz(errx, errx) + __fmul_rz(erry, erry);
+  //   if (err2 < __fmul_rz(thresh2, __fmul_rz(deno, deno)))
+  //     cnt++;
+  // }
+
+  for (int i = tx; i < numPts; i += TESTHOMO_TESTS)
+  {
+    float x1 = d_coord[i + 0 * numPts];
+    float y1 = d_coord[i + 1 * numPts];
+    float x2 = d_coord[i + 2 * numPts];
+    float y2 = d_coord[i + 3 * numPts];
+    float nomx = __fmul_rn(a[0], x1) + __fmul_rn(a[1], y1) + a[2];
+    float nomy = __fmul_rn(a[3], x1) + __fmul_rn(a[4], y1) + a[5];
+    float deno = __fmul_rn(a[6], x1) + __fmul_rn(a[7], y1) + 1.0f;
+    float errx = __fmul_rn(x2, deno) - nomx;
+    float erry = __fmul_rn(y2, deno) - nomy;
+    float err2 = __fmul_rn(errx, errx) + __fmul_rn(erry, erry);
+    if (err2 < __fmul_rn(thresh2, __fmul_rn(deno, deno)))
+      cnt++;
+  }
+
+  int kty = TESTHOMO_TESTS * ty;
+  cnts[kty + tx] = cnt;
+  __syncthreads();
+  int len = TESTHOMO_TESTS / 2;
+  while (len > 0)
+  {
+    if (tx < len)
+      cnts[kty + tx] += cnts[kty + tx + len];
+    len /= 2;
+    __syncthreads();
+  }
+  if (tx < TESTHOMO_LOOPS && ty == 0)
+    d_counts[idx] = cnts[TESTHOMO_TESTS * tx];
+  __syncthreads();
+}
+
+//================= Host matching functions =====================//
+
+double FindHomography(SiftData &data, float *homography, int *numMatches, float &matchTime, int numLoops, float minScore, float maxAmbiguity, float thresh)
+{
+  *numMatches = 0;
+  homography[0] = homography[4] = homography[8] = 1.0f;
+  homography[1] = homography[2] = homography[3] = 0.0f;
+  homography[5] = homography[6] = homography[7] = 0.0f;
+#ifdef MANAGEDMEM
+  SiftPoint *d_sift = data.m_data;
+#else
+  if (data.d_data == NULL)
+    return 0.0f;
+  SiftPoint *d_sift = data.d_data;
+#endif
+  numLoops = iDivUp(numLoops, 16) * 16;
+  int numPts = data.numPts;
+  if (numPts < 8)
+    return 0.0f;
+  int numPtsUp = iDivUp(numPts, 16) * 16;
+  float *d_coord, *d_homo;
+  int *d_randPts, *h_randPts;
+  int randSize = 4 * sizeof(int) * numLoops;
+  int szFl = sizeof(float);
+  int szPt = sizeof(SiftPoint);
+
+#ifdef DEVICE_TIMER
+  auto start_malloc_1 = std::chrono::steady_clock::now();
+#endif
+
+  safeCall(hipMalloc((void **)&d_coord, 4 * sizeof(float) * numPtsUp));
+  safeCall(hipMalloc((void **)&d_randPts, randSize));
+  safeCall(hipMalloc((void **)&d_homo, 8 * sizeof(float) * numLoops));
+
+#ifdef DEVICE_TIMER
+  auto stop_malloc_1 = std::chrono::steady_clock::now();
+  matchTime += std::chrono::duration<float, std::micro>(stop_malloc_1 - start_malloc_1).count();
+#endif
+
+  h_randPts = (int *)malloc(randSize);
+  float *h_scores = (float *)malloc(sizeof(float) * numPtsUp);
+  float *h_ambiguities = (float *)malloc(sizeof(float) * numPtsUp);
+
+  // temp variables are for host memory allocation, device data is transferred to temp
+  float *temp1 = (float *)malloc(szPt * numPtsUp);
+  float *temp2 = (float *)malloc(szPt * numPtsUp);
+
+#ifdef DEVICE_TIMER
+  auto start_memcpy_1 = std::chrono::steady_clock::now();
+#endif
+
+  safeCall(hipMemcpy(temp1, &d_sift[0].score, szPt * numPts, hipMemcpyDeviceToHost));
+  safeCall(hipMemcpy(temp2, &d_sift[0].ambiguity, szPt * numPts, hipMemcpyDeviceToHost));
+  hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+  auto stop_memcpy_1 = std::chrono::steady_clock::now();
+  matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_1 - start_memcpy_1).count();
+#endif
+
+  char *src_score = (char *)temp1;
+  char *src_ambiguity = (char *)temp2;
+  char *dst_score = (char *)h_scores;
+  char *dst_ambiguity = (char *)h_ambiguities;
+
+  for (int i = 0; i < numPts; ++i)
+  {
+    memcpy(dst_score, src_score, szFl);
+    memcpy(dst_ambiguity, src_ambiguity, szFl);
+
+    src_score += szPt;
+    src_ambiguity += szPt;
+    dst_score += szFl;
+    dst_ambiguity += szFl;
+  }
+
+  int *validPts = (int *)malloc(sizeof(int) * numPts);
+  int numValid = 0;
+  for (int i = 0; i < numPts; i++)
+  {
+    if (h_scores[i] > minScore && h_ambiguities[i] < maxAmbiguity)
+      validPts[numValid++] = i;
+  }
+  free(h_scores);
+  free(h_ambiguities);
+  if (numValid >= 8)
+  {
+    std::random_device rd;
+    uint32_t seed = rd();
+    std::mt19937 rnd(seed);  // mersenne_twister_engine
+    std::uniform_int_distribution<uint32_t> dis(0, UINT32_MAX);
+    for (int i = 0; i < numLoops; i++)
+    {
+      int p1 = dis(rnd) % numValid;
+      int p2 = dis(rnd) % numValid;
+      int p3 = dis(rnd) % numValid;
+      int p4 = dis(rnd) % numValid;
+      while (p2 == p1)
+        p2 = dis(rnd) % numValid;
+      while (p3 == p1 || p3 == p2)
+        p3 = dis(rnd) % numValid;
+      while (p4 == p1 || p4 == p2 || p4 == p3)
+        p4 = dis(rnd) % numValid;
+      h_randPts[i + 0 * numLoops] = validPts[p1];
+      h_randPts[i + 1 * numLoops] = validPts[p2];
+      h_randPts[i + 2 * numLoops] = validPts[p3];
+      h_randPts[i + 3 * numLoops] = validPts[p4];
+    }
+
+    float *temp3, *temp4, *temp5, *temp6;
+
+#ifdef DEVICE_TIMER
+    auto start_malloc_2 = std::chrono::steady_clock::now();
+#endif
+    safeCall(hipMalloc((void **)&temp3, szPt * numPtsUp));
+    safeCall(hipMalloc((void **)&temp4, szPt * numPtsUp));
+    safeCall(hipMalloc((void **)&temp5, szPt * numPtsUp));
+    safeCall(hipMalloc((void **)&temp6, szPt * numPtsUp));
+
+#ifdef DEVICE_TIMER
+    auto stop_malloc_2 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_malloc_2 - start_malloc_2).count();
+#endif
+
+#ifdef DEVICE_TIMER
+    auto start_memcpy_2 = std::chrono::steady_clock::now();
+#endif
+
+    safeCall(hipMemcpy(d_randPts, h_randPts, randSize, hipMemcpyHostToDevice));
+    hipDeviceSynchronize();
+
+    safeCall(hipMemcpy(temp3, &d_sift[0].xpos, szPt * numPts, hipMemcpyDeviceToDevice));
+    safeCall(hipMemcpy(temp4, &d_sift[0].ypos, szPt * numPts, hipMemcpyDeviceToDevice));
+    safeCall(hipMemcpy(temp5, &d_sift[0].match_xpos, szPt * numPts, hipMemcpyDeviceToDevice));
+    safeCall(hipMemcpy(temp6, &d_sift[0].match_ypos, szPt * numPts, hipMemcpyDeviceToDevice));
+    hipDeviceSynchronize();
+
+    // kernel call to transfer memory from device to device
+    hipLaunchKernelGGL(memcopyKernel, 1, 1, 0, 0, temp3, &d_coord[0 * numPtsUp], szPt, szFl, numPts, szFl);
+    safeCall(hipDeviceSynchronize());
+    hipLaunchKernelGGL(memcopyKernel, 1, 1, 0, 0, temp4, &d_coord[1 * numPtsUp], szPt, szFl, numPts, szFl);
+    safeCall(hipDeviceSynchronize());
+    hipLaunchKernelGGL(memcopyKernel, 1, 1, 0, 0, temp5, &d_coord[2 * numPtsUp], szPt, szFl, numPts, szFl);
+    safeCall(hipDeviceSynchronize());
+    hipLaunchKernelGGL(memcopyKernel, 1, 1, 0, 0, temp6, &d_coord[3 * numPtsUp], szPt, szFl, numPts, szFl);
+    safeCall(hipDeviceSynchronize());
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy_2 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_2 - start_memcpy_2).count();
+#endif
+
+#ifdef DEVICE_TIMER
+    auto start_kernel_1 = std::chrono::steady_clock::now();
+#endif
+    hipLaunchKernelGGL(ComputeHomographies, numLoops / 16, 16, 0, 0, d_coord, d_randPts, d_homo, numPtsUp);
+    safeCall(hipDeviceSynchronize());
+
+#ifdef DEVICE_TIMER
+    auto stop_kernel_1 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_kernel_1 - start_kernel_1).count();
+    // printf("ComputeHomographies time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel_1 - start_kernel_1).count());
+#endif
+    checkMsg("ComputeHomographies() execution failed\n");
+    dim3 blocks(1, numLoops / TESTHOMO_LOOPS);
+    dim3 threads(TESTHOMO_TESTS, TESTHOMO_LOOPS);
+
+#ifdef DEVICE_TIMER
+    auto start_kernel_2 = std::chrono::steady_clock::now();
+#endif
+    hipLaunchKernelGGL(TestHomographies, blocks, threads, 0, 0, d_coord, d_homo, d_randPts, numPtsUp, thresh * thresh);
+    safeCall(hipDeviceSynchronize());
+
+#ifdef DEVICE_TIMER
+    auto stop_kernel_2 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_kernel_2 - start_kernel_2).count();
+    // printf("TestHomographies time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel_2 - start_kernel_2).count());
+#endif
+    checkMsg("TestHomographies() execution failed\n");
+
+#ifdef DEVICE_TIMER
+    auto start_memcpy_3 = std::chrono::steady_clock::now();
+#endif
+    safeCall(hipMemcpy(h_randPts, d_randPts, sizeof(int) * numLoops, hipMemcpyDeviceToHost));
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy_3 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_3 - start_memcpy_3).count();
+#endif
+
+    int maxIndex = -1, maxCount = -1;
+    for (int i = 0; i < numLoops; i++)
+      if (h_randPts[i] > maxCount)
+      {
+        maxCount = h_randPts[i];
+        maxIndex = i;
+      }
+
+    *numMatches = maxCount;
+
+#ifdef DEVICE_TIMER
+    auto start_memcpy_4 = std::chrono::steady_clock::now();
+#endif
+    safeCall(hipMemcpy2D(homography, szFl, &d_homo[maxIndex], sizeof(float) * numLoops, szFl, 8, hipMemcpyDeviceToHost));
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy_4 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_4 - start_memcpy_4).count();
+#endif
+  }
+
+  free(validPts);
+  free(h_randPts);
+
+  safeCall(hipFree(d_homo));
+  safeCall(hipFree(d_randPts));
+  safeCall(hipFree(d_coord));
+  double gpuTime = 0.0;
+  return gpuTime;
+}
+
+double MatchSiftData(SiftData &data1, SiftData &data2, float &matchTime)
+{
+  float matchSiftDataTime = 0.0;
+  int numPts1 = data1.numPts;
+  int numPts2 = data2.numPts;
+  if (!numPts1 || !numPts2)
+    return 0.0;
+#ifdef MANAGEDMEM
+  SiftPoint *sift1 = data1.m_data;
+  SiftPoint *sift2 = data2.m_data;
+#else
+  if (data1.d_data == NULL || data2.d_data == NULL)
+    return 0.0f;
+  SiftPoint *sift1 = data1.d_data;
+  SiftPoint *sift2 = data2.d_data;
+#endif
+
+// Original version with correlation and maximization in two different kernels
+// Global memory reguirement: O(N^2)
+#if 0
+  float *d_corrData; 
+  int corrWidth = iDivUp(numPts2, 16)*16;
+  int corrSize = sizeof(float)*numPts1*corrWidth;
+  safeCall(hipMalloc((void **)&d_corrData, corrSize));
+#if 0 // K40c 10.9ms, 1080 Ti 3.8ms
+  dim3 blocks1(numPts1, iDivUp(numPts2, 16));
+  dim3 threads1(16, 16); // each block: 1 points x 16 points
+  hipLaunchKernelGGL(MatchSiftPoints, blocks1, threads1, 0, 0, sift1, sift2, d_corrData, numPts1, numPts2);
+#else // K40c 7.6ms, 1080 Ti 1.4ms
+  dim3 blocks(iDivUp(numPts1,16), iDivUp(numPts2, 16));
+  dim3 threads(16, 16); // each block: 16 points x 16 points
+  // hipLaunchKernelGGL(MatchSiftPoints2, blocks, threads, 0, 0, sift1, sift2, d_corrData, numPts1, numPts2);
+#endif
+  safeCall(hipDeviceSynchronize());
+  dim3 blocksMax(iDivUp(numPts1, 16));
+  dim3 threadsMax(16, 16);
+  hipLaunchKernelGGL(FindMaxCorr, blocksMax, threadsMax, 0, 0, d_corrData, sift1, sift2, numPts1, corrWidth, sizeof(SiftPoint));
+  safeCall(hipDeviceSynchronize());
+  checkMsg("FindMaxCorr() execution failed\n");
+  safeCall(hipFree(d_corrData));
+#endif
+
+// Version suggested by Nicholas Lin with combined correlation and maximization
+// Global memory reguirement: O(N)
+#if 0 // K40c 51.2ms, 1080 Ti 9.6ms
+  int block_dim = 16;
+  float *d_corrData;
+  int corrSize = numPts1 * block_dim * 2;
+  safeCall(hipMalloc((void **)&d_corrData, sizeof(float) * corrSize));
+  dim3 blocks(iDivUp(numPts1, block_dim));
+  dim3 threads(block_dim, block_dim); 
+  hipLaunchKernelGGL(FindMaxCorr3, blocks, threads , 0, 0, d_corrData, sift1, sift2, numPts1, numPts2);
+  safeCall(hipDeviceSynchronize());
+  checkMsg("FindMaxCorr3() execution failed\n");
+  safeCall(hipFree(d_corrData));
+#endif
+
+// Combined version with no global memory requirement using one 1 point per block
+#if 0 // K40c 8.9ms, 1080 Ti 2.1ms, 2080 Ti 1.0ms
+  dim3 blocksMax(numPts1);
+  dim3 threadsMax(FMC2W, FMC2H);
+  hipLaunchKernelGGL(FindMaxCorr2, blocksMax, threadsMax, 0, 0, sift1, sift2, numPts1, numPts2);
+  safeCall(hipDeviceSynchronize());
+  checkMsg("FindMaxCorr2() execution failed\n");
+#endif
+
+// Combined version with no global memory requirement using one FMC2H points per block
+#if 0 // K40c 9.2ms, 1080 Ti 1.3ms, 2080 Ti 1.1ms
+  dim3 blocksMax2(iDivUp(numPts1, FMC2H));
+  dim3 threadsMax2(FMC2W, FMC2H);
+  hipLaunchKernelGGL(FindMaxCorr4, blocksMax2, threadsMax2, 0, 0, sift1, sift2, numPts1, numPts2);
+  safeCall(hipDeviceSynchronize());
+  checkMsg("FindMaxCorr4() execution failed\n");
+#endif
+
+// Combined version with no global memory requirement using global locks
+#if 1
+  dim3 blocksMax3(iDivUp(numPts1, 16), iDivUp(numPts2, 512));
+  dim3 threadsMax3(16, 16);
+
+#ifdef DEVICE_TIMER
+  auto start_kernel1 = std::chrono::steady_clock::now();
+#endif
+  hipLaunchKernelGGL(CleanMatches, iDivUp(numPts1, 64), 64, 0, 0, sift1, numPts1);
+  hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+  auto stop_kernel1 = std::chrono::steady_clock::now();
+  // printf("CleanMatches time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel1 - start_kernel1).count());
+  matchTime += std::chrono::duration<float, std::micro>(stop_kernel1 - start_kernel1).count();
+  matchSiftDataTime += std::chrono::duration<float, std::micro>(stop_kernel1 - start_kernel1).count();
+#endif
+  int mode = 10;
+  if (mode == 5)
+    hipLaunchKernelGGL(FindMaxCorr5, blocksMax3, threadsMax3, 0, 0, sift1, sift2, numPts1, numPts2);
+  else if (mode == 6)
+  {
+    threadsMax3 = dim3(32, 16);
+    hipLaunchKernelGGL(FindMaxCorr6, blocksMax3, threadsMax3, 0, 0, sift1, sift2, numPts1, numPts2);
+  }
+  else if (mode == 7)
+    hipLaunchKernelGGL(FindMaxCorr7, blocksMax3, threadsMax3, 0, 0, sift1, sift2, numPts1, numPts2);
+  else if (mode == 8)
+  {
+    blocksMax3 = dim3(iDivUp(numPts1, FMC_BW), iDivUp(numPts2, FMC_GH));
+    threadsMax3 = dim3(FMC_NW, FMC_NH);
+    hipLaunchKernelGGL(FindMaxCorr8, blocksMax3, threadsMax3, 0, 0, sift1, sift2, numPts1, numPts2);
+  }
+  else if (mode == 9)
+  {
+    blocksMax3 = dim3(iDivUp(numPts1, FMC_BW), iDivUp(numPts2, FMC_GH));
+    threadsMax3 = dim3(FMC_NW, FMC_NH);
+    hipLaunchKernelGGL(FindMaxCorr9, blocksMax3, threadsMax3, 0, 0, sift1, sift2, numPts1, numPts2);
+  }
+  else if (mode == 10)
+  {
+    blocksMax3 = dim3(iDivUp(numPts1, M7W));
+    threadsMax3 = dim3(M7W, M7H / M7R);
+
+#ifdef DEVICE_TIMER
+    auto start_kernel2 = std::chrono::steady_clock::now();
+#endif
+    hipLaunchKernelGGL(FindMaxCorr10, blocksMax3, threadsMax3, 0, 0, sift1, sift2, numPts1, numPts2);
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_kernel2 = std::chrono::steady_clock::now();
+    // printf("FindMaxCorr10 time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel2 - start_kernel2).count());
+    matchTime += std::chrono::duration<float, std::micro>(stop_kernel2 - start_kernel2).count();
+    matchSiftDataTime += std::chrono::duration<float, std::micro>(stop_kernel2 - start_kernel2).count();
+#endif
+  }
+  checkMsg("FindMaxCorr10() execution failed\n");
+#endif
+
+  if (data1.h_data != NULL)
+  {
+    float *h_ptr = &data1.h_data[0].score;
+    float *d_ptr = &data1.d_data[0].score;
+
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    safeCall(hipMemcpy(h_ptr, d_ptr, sizeof(SiftPoint) * data1.numPts, hipMemcpyDeviceToHost));
+    hipDeviceSynchronize();
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+    matchSiftDataTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+  }
+  return matchTime;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/LICENSE b/third-party-programs/Velocity-Bench/cudaSift/LICENSE
new file mode 100644
index 000000000..bee8393e0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/LICENSE
@@ -0,0 +1,21 @@
+Modifications Copyright (C) 2023 Intel Corporation
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR OTHER DEALINGS IN THE SOFTWARE.
+
+SPDX-License-Identifier: MIT
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/cudaSift/README.md b/third-party-programs/Velocity-Bench/cudaSift/README.md
new file mode 100755
index 000000000..f0e66b284
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/README.md
@@ -0,0 +1,92 @@
+# CudaSift
+CudaSift - SIFT features with SYCL, CUDA & HIP
+
+# Building CudaSift
+**To build cuda version**
+
+mkdir build && cd build
+
+//For A100 Machine
+
+cmake ../ -DUSE_SM=80
+
+//For H100 Machine
+
+cmake ../ -DUSE_SM=90
+
+make
+
+**To build SYCL version**
+
+mkdir build
+
+cd build
+
+#update the path for OpenCV_DIR
+
+CXX=icpx cmake ../ -DGPU_AOT=pvc
+
+make -sj
+
+**To build SYCL version on NVIDIA Backend**
+
+source /path/to/clang/
+
+mkdir build && cd build
+
+//For A100 Machine
+
+CC=clang CXX=clang++ cmake ../ -DUSE_NVIDIA_BACKEND=YES -DUSE_SM=80 
+
+//For H100 Machine
+
+CC=clang CXX=clang++ cmake ../ -DUSE_NVIDIA_BACKEND=YES -DUSE_SM=90
+
+make -sj
+
+**To build SYCL version on AMD Backend**
+
+source /path/to/clang/
+
+mkdir build && cd build
+
+//For MI-100 Machine
+
+CC=clang CXX=clang++ cmake ../ -DUSE_AMDHIP_BACKEND=gfx908
+
+//For MI-250 Machine
+
+CC=clang CXX=clang++ cmake ../ -DUSE_AMDHIP_BACKEND=gfx90a
+
+make -sj
+
+**To build HIP version**
+
+mkdir build && cd build
+
+CXX=hipcc cmake ../ -DROCM_PATH=/path/to/rocm 
+For e.g CXX=hipcc cmake ../ -DROCM_PATH/opt/rocm-5.4.3
+
+make -sj
+
+# Running CudaSift
+
+**To run sycl version**
+
+./cudasift
+
+**To run SYCL on NVIDIA Backend**
+
+./cudaSift
+
+**To run SYCL on AMD Backend**
+
+ONEAPI_DEVICE_SELECTOR=hip:* ./cudaSift
+
+**To run cuda version**
+
+./cudasift
+
+**To run hip version**
+
+./cudasift
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/CMakeLists.txt b/third-party-programs/Velocity-Bench/cudaSift/SYCL/CMakeLists.txt
new file mode 100644
index 000000000..536654df9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/CMakeLists.txt
@@ -0,0 +1,153 @@
+#  Modifications Copyright (C) 2023 Intel Corporation
+
+#  Permission is hereby granted, free of charge, to any person obtaining a copy
+#  of this software and associated documentation files (the "Software"),
+#  to deal in the Software without restriction, including without limitation
+#  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+#  and/or sell copies of the Software, and to permit persons to whom
+#  the Software is furnished to do so, subject to the following conditions:
+
+#  The above copyright notice and this permission notice shall be included
+#  in all copies or substantial portions of the Software.
+
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+#  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+#  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+#  OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+#  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+#  OR OTHER DEALINGS IN THE SOFTWARE.
+
+#  SPDX-License-Identifier: MIT
+
+cmake_minimum_required(VERSION 3.10)
+project(cudaSift LANGUAGES CXX)
+
+set(CMAKE_CXX_STANDARD 17) # SYCL code requires this
+set(CMAKE_CXX_STANDARD_REQUIRED ON) # Enable modern C++ standards
+set(CMAKE_CXX_EXTENSIONS OFF) # Use -std, not -gnu
+
+option(GPU_AOT "Build AOT for Intel GPU" OFF)
+option(USE_NVIDIA_BACKEND "Build for NVIDIA backend" OFF)
+option(USE_AMDHIP_BACKEND "Build for AMD HIP backend" OFF)
+option(USE_SM "Specifies which streaming multiprocessor architecture to use")
+option(OpenCV_DIR "Path to OpenCV_DIR")
+option(DEVICE_TIMER "Build using Device Timer" OFF)
+
+# Find OpenCV, you may need to set OpenCV_DIR variable
+# to the absolute path to the directory containing OpenCVConfig.cmake file
+# via the command line or GUI
+find_package(OpenCV REQUIRED)
+
+# If the package has been found, several variables will
+# be set, you can find the full list with descriptions
+# in the OpenCVConfig.cmake file.
+# Print some message showing some of them
+message(STATUS "OpenCV library status:")
+message(STATUS "    version: ${OpenCV_VERSION}")
+message(STATUS "    libraries: ${OpenCV_LIBS}")
+message(STATUS "    include path: ${OpenCV_INCLUDE_DIRS}")
+
+if(CMAKE_VERSION VERSION_LESS "2.8.11")
+    # Add OpenCV headers location to your include paths
+    include_directories(${OpenCV_INCLUDE_DIRS})
+endif()
+
+set(SOURCES
+    ${CMAKE_SOURCE_DIR}/../common/Utility.cpp
+    cudaImage.dp.cpp
+    cudaImage.h
+    cudaSiftH.dp.cpp
+    cudaSiftH.h
+    matching.dp.cpp
+    cudaSiftD.h
+    cudaSift.h
+    geomFuncs.cpp
+    mainSift.cpp
+)
+
+include_directories(
+    ${CMAKE_SOURCE_DIR}/../common/
+    ${CMAKE_SOURCE_DIR}
+    ${OpenCV_INCLUDE_DIRS}
+)
+
+if(DEVICE_TIMER)
+    message(STATUS "Enabling Device Timer")
+    add_compile_options(-DDEVICE_TIMER)
+endif()
+
+if(USE_NVIDIA_BACKEND)
+    message(STATUS "Nvidia backend")
+    add_compile_options(-DUSE_NVIDIA_BACKEND)
+endif()
+
+if(USE_AMDHIP_BACKEND)
+    message(STATUS "AMD backend")
+    add_compile_options(-DUSE_AMDHIP_BACKEND)
+endif()
+
+# Use either default or user defined CXX flags
+# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags
+set(USE_DEFAULT_FLAGS ON)
+
+set(DEF_INTEL_WL_CXX_FLAGS  " ")
+set(DEF_NVIDIA_WL_CXX_FLAGS " ")
+set(DEF_AMD_WL_CXX_FLAGS    " -D__HIP_PLATFORM_AMD__ ")
+
+set(DEF_INTEL_GENERAL_CXX_FLAGS  " -O3 -fsycl -ffast-math ")
+set(DEF_NVIDIA_GENERAL_CXX_FLAGS " -O3 -fsycl -ffast-math ")
+set(DEF_AMD_GENERAL_CXX_FLAGS    " -O3 -fsycl -ffast-math ")
+
+# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific)
+# -DOVERRIDE_GENERAL_CXX_FLAGS=" -blah -blah " overrides the general flags only (and not the workload specific flags)
+# passing in both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS is not allowed, in order to prevent ambiguity
+
+if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
+    message(FATAL_ERROR "Both  CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together")
+elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
+    message(STATUS "Using DEFAULT compilation flags")
+    set(INTEL_GPU_CXX_FLAGS        "${DEF_INTEL_GENERAL_CXX_FLAGS}   ${DEF_INTEL_WL_CXX_FLAGS}")
+    set(NVIDIA_GPU_CXX_FLAGS       "${DEF_NVIDIA_GENERAL_CXX_FLAGS}  ${DEF_NVIDIA_WL_CXX_FLAGS}")
+    set(AMD_GPU_CXX_FLAGS          "${DEF_AMD_GENERAL_CXX_FLAGS}     ${DEF_AMD_WL_CXX_FLAGS}")
+elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
+    message(STATUS "OVERRIDING GENERAL compilation flags")
+    set(INTEL_GPU_CXX_FLAGS        "${OVERRIDE_GENERAL_CXX_FLAGS}    ${DEF_INTEL_WL_CXX_FLAGS}")
+    set(NVIDIA_GPU_CXX_FLAGS       "${OVERRIDE_GENERAL_CXX_FLAGS}    ${DEF_NVIDIA_WL_CXX_FLAGS}")
+    set(AMD_GPU_CXX_FLAGS          "${OVERRIDE_GENERAL_CXX_FLAGS}    ${DEF_AMD_WL_CXX_FLAGS}")
+elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
+    message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags")
+    set(INTEL_GPU_CXX_FLAGS        "${CMAKE_CXX_FLAGS}")
+    set(NVIDIA_GPU_CXX_FLAGS       "${CMAKE_CXX_FLAGS}")
+    set(AMD_GPU_CXX_FLAGS          "${CMAKE_CXX_FLAGS}")
+endif()
+
+# JIT compilation
+if(GPU_AOT)    
+    message(STATUS "Enabling INTEL backend")        
+    set(CMAKE_CXX_FLAGS "${INTEL_GPU_CXX_FLAGS}")
+    if((${GPU_AOT} STREQUAL "pvc") OR(${GPU_AOT} STREQUAL "PVC"))
+        message(STATUS "Enabling Intel GPU AOT compilation for ${GPU_AOT}")
+        string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=spir64_gen -Xs \"-device 0x0bd5 -revision_id 0x2f\" -Xs \"-options -ze-opt-large-register-file\" ")
+    else()
+        message(STATUS "Using custom AOT compilation flag ${GPU_AOT}")
+        string(APPEND CMAKE_CXX_FLAGS " ${GPU_AOT} ") # User should be aware of advanced AOT compilation flags
+    endif()
+elseif(USE_NVIDIA_BACKEND)
+    message(STATUS "Enabling NVIDIA backend")
+    set(CMAKE_CXX_FLAGS "${NVIDIA_GPU_CXX_FLAGS}")
+    string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_${USE_SM}") 
+elseif(USE_AMDHIP_BACKEND)
+    message(STATUS "Enabling AMD HIP backend for ${USE_AMDHIP_BACKEND} AMD architecture")
+    set(CMAKE_CXX_FLAGS "${AMD_GPU_CXX_FLAGS}")
+    string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${USE_AMDHIP_BACKEND}  ")
+else()
+    message(STATUS "Enabling INTEL backend")
+    set(CMAKE_CXX_FLAGS "${INTEL_GPU_CXX_FLAGS}")  
+endif()
+
+# Output the compiler flags that were constructed for visual inspection
+message(STATUS "Compilation flags set to: ${CMAKE_CXX_FLAGS}")
+
+add_executable(${PROJECT_NAME} ${SOURCES})
+target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} stdc++ stdc++fs)
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaImage.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaImage.dp.cpp
new file mode 100644
index 000000000..4c0a97d04
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaImage.dp.cpp
@@ -0,0 +1,109 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <sycl/sycl.hpp>
+#include <cstdio>
+
+#include "infra/memory.hpp"
+#include "cudautils.h"
+#include "cudaImage.h"
+
+int iDivUp(int a, int b) { return (a % b != 0) ? (a / b + 1) : (a / b); }
+int iDivDown(int a, int b) { return a / b; }
+int iAlignUp(int a, int b) { return (a % b != 0) ? (a - a % b + b) : a; }
+int iAlignDown(int a, int b) { return a - a % b; }
+
+void CudaImage::Allocate(int w, int h, int p, bool host, sycl::queue &q_ct, float &time, float *devmem, float *hostmem)
+{
+  width = w;
+  height = h;
+  pitch = p;
+  d_data = devmem;
+  h_data = hostmem;
+  if (devmem == NULL)
+  {
+#ifdef DEVICE_TIMER
+    auto start_malloc = std::chrono::steady_clock::now();
+#endif
+    d_data = (float *)infra::sift_malloc(pitch, (size_t)(sizeof(float) * width), (size_t)height, q_ct);
+    q_ct.wait();
+#ifdef DEVICE_TIMER
+    auto stop_malloc = std::chrono::steady_clock::now();
+    std::cout << "Allocate Time is " << std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count() << " us" << std::endl;
+    time += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+
+    pitch /= sizeof(float);
+    if (d_data == NULL)
+      printf("Failed to allocate device data\n");
+    d_internalAlloc = true;
+  }
+  if (host && hostmem == NULL)
+  {
+    h_data = (float *)malloc(sizeof(float) * pitch * height);
+    h_internalAlloc = true;
+  }
+}
+
+CudaImage::CudaImage() : width(0), height(0), pitch(0), d_data(NULL), h_data(NULL), /*t_data(NULL), */ d_internalAlloc(false), h_internalAlloc(false)
+{
+}
+
+CudaImage::~CudaImage()
+{
+  if (d_internalAlloc && d_data != NULL)
+    try{
+      safeCall((sycl::free(d_data, infra::get_default_queue()), 0));
+    } catch (std::exception const &e) {
+      std::cerr << e.what() << '\n';
+    }
+  d_data = NULL;
+  if (h_internalAlloc && h_data != NULL)
+    free(h_data);
+  h_data = NULL;
+}
+
+double CudaImage::Download(sycl::queue &q_ct, float &time)
+{
+  double downloadTime = 0.0;
+  int p = sizeof(float) * pitch;
+  if (d_data != NULL && h_data != NULL)
+  {
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    infra::sift_memcpy(d_data, p, h_data, sizeof(float) * width, sizeof(float) * width, height, infra::host_to_device, q_ct);
+    q_ct.wait();
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    downloadTime = std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+    time += downloadTime;
+    std::cout << "Download Time is " << downloadTime << " us" << std::endl;
+#endif
+  }
+  return downloadTime;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaImage.h b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaImage.h
new file mode 100644
index 000000000..0ce1a922e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaImage.h
@@ -0,0 +1,59 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#ifndef CUDAIMAGE_H
+#define CUDAIMAGE_H
+
+#include <stddef.h>
+#include <sycl/sycl.hpp>
+
+class CudaImage
+{
+public:
+  int width, height;
+  size_t pitch;
+  float *h_data;
+  float *d_data;
+  bool d_internalAlloc;
+  bool h_internalAlloc;
+
+public:
+  CudaImage();
+  CudaImage(const CudaImage &) = delete;
+  CudaImage &operator=(const CudaImage &) = delete;
+  ~CudaImage();
+  void Allocate(int width, int height, int pitch, bool withHost, sycl::queue &q_ct, float &totTime, float *devMem = NULL, float *hostMem = NULL);
+  double Download(sycl::queue &q_ct, float &totTime);
+};
+
+int iDivUp(int a, int b);
+int iDivDown(int a, int b);
+int iAlignUp(int a, int b);
+int iAlignDown(int a, int b);
+void StartTimer(unsigned int *hTimer);
+double StopTimer(unsigned int hTimer);
+
+#endif // CUDAIMAGE_H
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSift.h b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSift.h
new file mode 100644
index 000000000..8bdada3ec
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSift.h
@@ -0,0 +1,87 @@
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#ifndef CUDASIFT_H
+#define CUDASIFT_H
+
+#include "cudaImage.h"
+
+struct rawImg_data
+{
+  float *raw_d_data;
+  int pitch;
+
+  void set_pitch(int pitch)
+  {
+    this->pitch = pitch;
+  }
+
+  float read(float xf, float yf)
+  {
+    int xi = xf;
+    int yi = yf;
+    return *(raw_d_data + yi * pitch + xi);
+  }
+};
+
+typedef struct
+{
+  float xpos;
+  float ypos;
+  float scale;
+  float sharpness;
+  float edgeness;
+  float orientation;
+  float score;
+  float ambiguity;
+  int match;
+  float match_xpos;
+  float match_ypos;
+  float match_error;
+  float subsampling;
+  float empty[3];
+  float data[128];
+} SiftPoint;
+
+typedef struct
+{
+  int numPts; // Number of available Sift points
+  int maxPts; // Number of allocated Sift points
+#ifdef MANAGEDMEM
+  SiftPoint *m_data; // Managed data
+#else
+  SiftPoint *h_data; // Host (CPU) data
+  SiftPoint *d_data; // Device (GPU) data
+#endif
+} SiftData;
+
+void InitCuda(sycl::queue &q_ct, int devNum = 0);
+float *AllocSiftTempMemory(int width, int height, int numOctaves, sycl::queue &q_ct, float &totTime, bool scaleUp = false);
+void FreeSiftTempMemory(float *memoryTmp, sycl::queue &q_ct);
+void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, sycl::queue &q_ct, float &totTime, float lowestScale = 0.0f, bool scaleUp = false, float *tempMemory = 0);
+void InitSiftData(SiftData &data, sycl::queue &q_ct, float &totTime, int num = 1024, bool host = false, bool dev = true);
+void FreeSiftData(SiftData &data, sycl::queue &q_ct);
+void PrintSiftData(SiftData &data, sycl::queue &q_ct);
+double MatchSiftData(SiftData &data1, SiftData &data2, sycl::queue &q_ct, float &time);
+double FindHomography(SiftData &data, float *homography, int *numMatches, sycl::queue &q_ct, float &time, int numLoops = 1000, float minScore = 0.85f, float maxAmbiguity = 0.95f, float thresh = 5.0f);
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftD.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftD.dp.cpp
new file mode 100644
index 000000000..dc4ff5c01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftD.dp.cpp
@@ -0,0 +1,1247 @@
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <sycl/sycl.hpp>
+
+#include "infra/infra.hpp"
+#include "cudautils.h"
+#include "cudaSiftD.h"
+#include "cudaSift.h"
+
+///////////////////////////////////////////////////////////////////////////////
+// Kernel configuration
+///////////////////////////////////////////////////////////////////////////////
+
+infra::constant_memory<int, 0> d_MaxNumPoints;
+infra::global_memory<unsigned int, 1> d_PointCounter(8 * 2 + 1);
+infra::constant_memory<float, 1> d_ScaleDownKernel(5);
+infra::constant_memory<float, 1> d_LowPassKernel(2 * LOWPASS_R + 1);
+infra::constant_memory<float, 1> d_LaplaceKernel(8 * 12 * 16);
+
+///////////////////////////////////////////////////////////////////////////////
+// Lowpass filter and subsample image
+///////////////////////////////////////////////////////////////////////////////
+void ScaleDownDenseShift(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch,
+                         sycl::nd_item<3> item_ct1, float *d_ScaleDownKernel,
+                         float *brows)
+{
+#define BW (SCALEDOWN_W + 4)
+#define BH (SCALEDOWN_H + 4)
+#define W2 (SCALEDOWN_W / 2)
+#define H2 (SCALEDOWN_H / 2)
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int xp = item_ct1.get_group(2) * SCALEDOWN_W + tx;
+  const int yp = item_ct1.get_group(1) * SCALEDOWN_H + ty;
+  const float k0 = d_ScaleDownKernel[0];
+  const float k1 = d_ScaleDownKernel[1];
+  const float k2 = d_ScaleDownKernel[2];
+  const int xl = sycl::min((int)(width - 1), sycl::max(0, (int)(xp - 2)));
+  const int yl = sycl::min((int)(height - 1), sycl::max(0, (int)(yp - 2)));
+  if (xp < (width + 4) && yp < (height + 4))
+  {
+    float v = d_Data[yl * pitch + xl];
+    brows[BW * ty + tx] =
+        k0 * (v + ShiftDown(v, 4, item_ct1)) +
+        k1 * (ShiftDown(v, 1, item_ct1) + ShiftDown(v, 3, item_ct1)) +
+        k2 * ShiftDown(v, 2, item_ct1);
+  }
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  const int xs = item_ct1.get_group(2) * W2 + tx;
+  const int ys = item_ct1.get_group(1) * H2 + ty;
+  if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2))
+  {
+    float *ptr = &brows[BW * (ty * 2) + (tx * 2)];
+    d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * BW]) + k1 * (ptr[1 * BW] + ptr[3 * BW]) + k2 * ptr[2 * BW];
+  }
+}
+
+void ScaleDownDense(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch,
+                    sycl::nd_item<3> item_ct1, float *d_ScaleDownKernel,
+                    float *irows, float *brows)
+{
+#define BW (SCALEDOWN_W + 4)
+#define BH (SCALEDOWN_H + 4)
+#define W2 (SCALEDOWN_W / 2)
+#define H2 (SCALEDOWN_H / 2)
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int xp = item_ct1.get_group(2) * SCALEDOWN_W + tx;
+  const int yp = item_ct1.get_group(1) * SCALEDOWN_H + ty;
+  const int xl = sycl::min((int)(width - 1), sycl::max(0, (int)(xp - 2)));
+  const int yl = sycl::min((int)(height - 1), sycl::max(0, (int)(yp - 2)));
+  const float k0 = d_ScaleDownKernel[0];
+  const float k1 = d_ScaleDownKernel[1];
+  const float k2 = d_ScaleDownKernel[2];
+  if (xp < (width + 4) && yp < (height + 4))
+    irows[BW * ty + tx] = d_Data[yl * pitch + xl];
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  if (yp < (height + 4) && tx < W2)
+  {
+    float *ptr = &irows[BW * ty + 2 * tx];
+    brows[W2 * ty + tx] = k0 * (ptr[0] + ptr[4]) + k1 * (ptr[1] + ptr[3]) + k2 * ptr[2];
+  }
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  const int xs = item_ct1.get_group(2) * W2 + tx;
+  const int ys = item_ct1.get_group(1) * H2 + ty;
+  if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2))
+  {
+    float *ptr = &brows[W2 * (ty * 2) + tx];
+    d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * W2]) + k1 * (ptr[1 * W2] + ptr[3 * W2]) + k2 * ptr[2 * W2];
+  }
+}
+
+void ScaleDown(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch,
+               sycl::nd_item<3> item_ct1, float *d_ScaleDownKernel, float *inrow,
+               float *brow, int *yRead, int *yWrite)
+{
+
+#define dx2 (SCALEDOWN_W / 2)
+  const int tx = item_ct1.get_local_id(2);
+  const int tx0 = tx + 0 * dx2;
+  const int tx1 = tx + 1 * dx2;
+  const int tx2 = tx + 2 * dx2;
+  const int tx3 = tx + 3 * dx2;
+  const int tx4 = tx + 4 * dx2;
+  const int xStart = item_ct1.get_group(2) * SCALEDOWN_W;
+  const int yStart = item_ct1.get_group(1) * SCALEDOWN_H;
+  const int xWrite = xStart / 2 + tx;
+  float k0 = d_ScaleDownKernel[0];
+  float k1 = d_ScaleDownKernel[1];
+  float k2 = d_ScaleDownKernel[2];
+  if (tx < SCALEDOWN_H + 4)
+  {
+    int y = yStart + tx - 2;
+    y = (y < 0 ? 0 : y);
+    y = (y >= height ? height - 1 : y);
+    yRead[tx] = y * pitch;
+    yWrite[tx] = (yStart + tx - 4) / 2 * newpitch;
+  }
+
+  
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  int xRead = xStart + tx - 2;
+  xRead = (xRead < 0 ? 0 : xRead);
+  xRead = (xRead >= width ? width - 1 : xRead);
+
+  int maxtx = sycl::min(dx2, (int)(width / 2 - xStart / 2));
+  
+  #pragma unroll
+  for (int dy = 0; dy < SCALEDOWN_H + 4; dy += 5)
+  {
+    {
+      inrow[tx] = d_Data[yRead[dy + 0] + xRead];
+
+      
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      if (tx < maxtx)
+      {
+        brow[tx4] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 4 && !(dy & 1))
+          d_Result[yWrite[dy + 0] + xWrite] = k2 * brow[tx2] + k0 * (brow[tx0] + brow[tx4]) + k1 * (brow[tx1] + brow[tx3]);
+      }
+
+      
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+    }
+    if (dy < (SCALEDOWN_H + 3))
+    {
+      inrow[tx] = d_Data[yRead[dy + 1] + xRead];
+
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      if (tx < maxtx)
+      {
+        brow[tx0] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 3 && (dy & 1))
+          d_Result[yWrite[dy + 1] + xWrite] = k2 * brow[tx3] + k0 * (brow[tx1] + brow[tx0]) + k1 * (brow[tx2] + brow[tx4]);
+      }
+
+      
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+    }
+    if (dy < (SCALEDOWN_H + 2))
+    {
+      inrow[tx] = d_Data[yRead[dy + 2] + xRead];
+
+      
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      if (tx < maxtx)
+      {
+        brow[tx1] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 2 && !(dy & 1))
+          d_Result[yWrite[dy + 2] + xWrite] = k2 * brow[tx4] + k0 * (brow[tx2] + brow[tx1]) + k1 * (brow[tx3] + brow[tx0]);
+      }
+
+      
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+    }
+    if (dy < (SCALEDOWN_H + 1))
+    {
+      inrow[tx] = d_Data[yRead[dy + 3] + xRead];
+
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      if (tx < maxtx)
+      {
+        brow[tx2] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (dy >= 1 && (dy & 1))
+          d_Result[yWrite[dy + 3] + xWrite] = k2 * brow[tx0] + k0 * (brow[tx3] + brow[tx2]) + k1 * (brow[tx4] + brow[tx1]);
+      }
+
+      
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+    }
+    if (dy < SCALEDOWN_H)
+    {
+      inrow[tx] = d_Data[yRead[dy + 4] + xRead];
+
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      if (tx < dx2 && xWrite < width / 2)
+      {
+        brow[tx3] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2];
+        if (!(dy & 1))
+          d_Result[yWrite[dy + 4] + xWrite] = k2 * brow[tx1] + k0 * (brow[tx4] + brow[tx3]) + k1 * (brow[tx0] + brow[tx2]);
+      }
+
+      
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+    }
+  }
+}
+
+void ScaleUp(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch,
+             sycl::nd_item<3> item_ct1)
+{
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  int x = item_ct1.get_group(2) * SCALEUP_W + 2 * tx;
+  int y = item_ct1.get_group(1) * SCALEUP_H + 2 * ty;
+  if (x < 2 * width && y < 2 * height)
+  {
+    int xl = item_ct1.get_group(2) * (SCALEUP_W / 2) + tx;
+    int yu = item_ct1.get_group(1) * (SCALEUP_H / 2) + ty;
+    int xr = sycl::min((int)(xl + 1), (int)(width - 1));
+    int yd = sycl::min((int)(yu + 1), (int)(height - 1));
+    float vul = d_Data[yu * pitch + xl];
+    float vur = d_Data[yu * pitch + xr];
+    float vdl = d_Data[yd * pitch + xl];
+    float vdr = d_Data[yd * pitch + xr];
+    d_Result[(y + 0) * newpitch + x + 0] = vul;
+    d_Result[(y + 0) * newpitch + x + 1] = 0.50f * (vul + vur);
+    d_Result[(y + 1) * newpitch + x + 0] = 0.50f * (vul + vdl);
+    d_Result[(y + 1) * newpitch + x + 1] = 0.25f * (vul + vur + vdl + vdr);
+  }
+}
+
+float FastAtan2(float y, float x)
+{
+  float absx = sycl::fabs(x);
+  float absy = sycl::fabs(y);
+
+  float a = sycl::min(absx, absy) / sycl::max(absx, absy);
+  float s = a * a;
+  float r = ((-0.0464964749f * s + 0.15931422f) * s - 0.327622764f) * s * a + a;
+  r = (absy > absx ? 1.57079637f - r : r);
+  r = (x < 0 ? 3.14159274f - r : r);
+  r = (y < 0 ? -r : r);
+  return r;
+}
+
+void ExtractSiftDescriptorsCONSTNew(
+
+    float *texObj, int pitch, SiftPoint *d_sift,
+    float subsampling, int octave, sycl::nd_item<3> item_ct1,
+    int d_MaxNumPoints, unsigned int *d_PointCounter, float *gauss,
+    float *buffer, float *sums)
+{
+
+  const int tx = item_ct1.get_local_id(2); // 0 -> 16
+  const int ty = item_ct1.get_local_id(1); // 0 -> 8
+  const int idx = ty * 16 + tx;
+  if (ty == 0)
+    gauss[tx] = sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+
+  int fstPts =
+      sycl::min(d_PointCounter[2 * octave - 1], (unsigned int)d_MaxNumPoints);
+  int totPts =
+      sycl::min(d_PointCounter[2 * octave + 1], (unsigned int)d_MaxNumPoints);
+
+#pragma unroll
+  for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts;
+       bx += item_ct1.get_group_range(2))
+  {
+
+    buffer[idx] = 0.0f;
+
+    
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+
+    // Compute angles and gradients
+    float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+    float sina = sycl::sin(theta); // cosa -sina
+    float cosa = sycl::cos(theta); // sina  cosa
+    float scale = 12.0f / 16.0f * d_sift[bx].scale;
+    float ssina = scale * sina;
+    float scosa = scale * cosa;
+
+#pragma unroll
+    for (int y = ty; y < 16; y += 8)
+    {
+      float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+      float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+
+      int xi1 = xpos + cosa;
+      int yi1 = ypos + sina;
+
+      int xi2 = xpos - cosa;
+      int yi2 = ypos - sina;
+
+      float dx = *(texObj + yi1 * pitch + xi1) -
+                 *(texObj + yi2 * pitch + xi2);
+
+      xi1 = xpos - sina;
+      yi1 = ypos + cosa;
+
+      xi2 = xpos + sina;
+      yi2 = ypos - cosa;
+
+      float dy = *(texObj + yi1 * pitch + xi1) -
+                 *(texObj + yi2 * pitch + xi2);
+      float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy);
+      float angf = 4.0f / 3.1415f * FastAtan2(dy, dx) + 4.0f;
+
+      int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+      float horf = (tx - 1.5f) / 4.0f - hori;
+      float ihorf = 1.0f - horf;
+      int veri = (y + 2) / 4 - 1;
+      float verf = (y - 1.5f) / 4.0f - veri;
+      float iverf = 1.0f - verf;
+      int angi = angf;
+      int angp = (angi < 7 ? angi + 1 : 0);
+      angf -= angi;
+      float iangf = 1.0f - angf;
+
+      int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+      int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+      int p2 = angp + hist;
+      if (tx >= 2)
+      {
+        float grad1 = ihorf * grad;
+        if (y >= 2)
+        { // Upper left
+          float grad2 = iverf * grad1;
+          infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+              buffer + p1, iangf * grad2);
+          infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+              buffer + p2, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower left
+          float grad2 = verf * grad1;
+          infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+              buffer + p1 + 32, iangf * grad2);
+          infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+              buffer + p2 + 32, angf * grad2);
+        }
+      }
+      if (tx <= 13)
+      {
+        float grad1 = horf * grad;
+        if (y >= 2)
+        { // Upper right
+          float grad2 = iverf * grad1;
+          infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+              buffer + p1 + 8, iangf * grad2);
+          infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+              buffer + p2 + 8, angf * grad2);
+        }
+        if (y <= 13)
+        { // Lower right
+          float grad2 = verf * grad1;
+          infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+              buffer + p1 + 40, iangf * grad2);
+          infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+              buffer + p2 + 40, angf * grad2);
+        }
+      }
+    }
+    
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+
+    // Normalize twice and suppress peaks first time
+    float sum = buffer[idx] * buffer[idx];
+
+    #pragma unroll
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i, item_ct1);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+    tsum1 = sycl::min((float)(buffer[idx] * sycl::rsqrt(tsum1)), 0.2f);
+
+    sum = tsum1 * tsum1;
+
+    #pragma unroll
+    for (int i = 16; i > 0; i /= 2)
+      sum += ShiftDown(sum, i, item_ct1);
+    if ((idx & 31) == 0)
+      sums[idx / 32] = sum;
+    
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+
+    float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+    float *desc = d_sift[bx].data;
+    desc[idx] = tsum1 * sycl::rsqrt(tsum2);
+    if (idx == 0)
+    {
+      d_sift[bx].xpos *= subsampling;
+      d_sift[bx].ypos *= subsampling;
+      d_sift[bx].scale *= subsampling;
+    }
+    
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+  }
+}
+
+void ExtractSiftDescriptor(rawImg_data texObj,
+                           SiftPoint *d_sift, float subsampling, int octave,
+                           int bx, sycl::nd_item<3> item_ct1, float *gauss,
+                           float *buffer, float *sums)
+{
+
+  const int idx = item_ct1.get_local_id(2);
+  const int tx = idx & 15; // 0 -> 16
+  const int ty = idx / 16; // 0 -> 8
+  if (ty == 0)
+    gauss[tx] = sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f);
+  buffer[idx] = 0.0f;
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+
+  // Compute angles and gradients
+  float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation;
+  float sina = sycl::sin(theta); // cosa -sina
+  float cosa = sycl::cos(theta); // sina  cosa
+  float scale = 12.0f / 16.0f * d_sift[bx].scale;
+  float ssina = scale * sina;
+  float scosa = scale * cosa;
+
+#pragma unroll
+  for (int y = ty; y < 16; y += 8)
+  {
+    float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f;
+    float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f;
+    float dx = texObj.read(xpos + cosa, ypos + sina) -
+               texObj.read(xpos - cosa, ypos - sina);
+    float dy = texObj.read(xpos - sina, ypos + cosa) -
+               texObj.read(xpos + sina, ypos - cosa);
+    float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy);
+    float angf = 4.0f / 3.1415f * sycl::atan2(dy, dx) + 4.0f;
+
+    int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins
+    float horf = (tx - 1.5f) / 4.0f - hori;
+    float ihorf = 1.0f - horf;
+    int veri = (y + 2) / 4 - 1;
+    float verf = (y - 1.5f) / 4.0f - veri;
+    float iverf = 1.0f - verf;
+    int angi = angf;
+    int angp = (angi < 7 ? angi + 1 : 0);
+    angf -= angi;
+    float iangf = 1.0f - angf;
+
+    int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated
+    int p1 = angi + hist;             // in angles, xpos and ypos -> 8 stores
+    int p2 = angp + hist;
+    if (tx >= 2)
+    {
+      float grad1 = ihorf * grad;
+      if (y >= 2)
+      { // Upper left
+        float grad2 = iverf * grad1;
+        infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+            buffer + p1, iangf * grad2);
+        infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+            buffer + p2, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower left
+        float grad2 = verf * grad1;
+        infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+            buffer + p1 + 32, iangf * grad2);
+        infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+            buffer + p2 + 32, angf * grad2);
+      }
+    }
+    if (tx <= 13)
+    {
+      float grad1 = horf * grad;
+      if (y >= 2)
+      { // Upper right
+        float grad2 = iverf * grad1;
+        infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+            buffer + p1 + 8, iangf * grad2);
+        infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+            buffer + p2 + 8, angf * grad2);
+      }
+      if (y <= 13)
+      { // Lower right
+        float grad2 = verf * grad1;
+        infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+            buffer + p1 + 40, iangf * grad2);
+        infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+            buffer + p2 + 40, angf * grad2);
+      }
+    }
+  }
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+
+  // Normalize twice and suppress peaks first time
+  float sum = buffer[idx] * buffer[idx];
+
+  #pragma unroll
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i, item_ct1);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  float tsum1 = sums[0] + sums[1] + sums[2] + sums[3];
+  tsum1 = sycl::min((float)(buffer[idx] * sycl::rsqrt(tsum1)), 0.2f);
+
+  sum = tsum1 * tsum1;
+
+  #pragma unroll
+  for (int i = 16; i > 0; i /= 2)
+    sum += ShiftDown(sum, i, item_ct1);
+  if ((idx & 31) == 0)
+    sums[idx / 32] = sum;
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+
+  float tsum2 = sums[0] + sums[1] + sums[2] + sums[3];
+  float *desc = d_sift[bx].data;
+  desc[idx] = tsum1 * sycl::rsqrt(tsum2);
+  if (idx == 0)
+  {
+    d_sift[bx].xpos *= subsampling;
+    d_sift[bx].ypos *= subsampling;
+    d_sift[bx].scale *= subsampling;
+  }
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+}
+
+void RescalePositions(SiftPoint *d_sift, int numPts, float scale,
+                      sycl::nd_item<3> item_ct1)
+{
+  int num = item_ct1.get_group(2) * item_ct1.get_local_range().get(2) +
+            item_ct1.get_local_id(2);
+  if (num < numPts)
+  {
+    d_sift[num].xpos *= scale;
+    d_sift[num].ypos *= scale;
+    d_sift[num].scale *= scale;
+  }
+}
+
+// With constant number of blocks
+void ComputeOrientationsCONSTNew(float *image, int w, int p, int h, SiftPoint *d_Sift, int octave,
+                                 sycl::nd_item<3> item_ct1, int d_MaxNumPoints,
+                                 unsigned int *d_PointCounter,
+                                 sycl::accessor<float, 2, sycl::access_mode::read_write, sycl::access::target::local> img,
+                                 sycl::accessor<float, 2, sycl::access_mode::read_write, sycl::access::target::local> tmp,
+                                 float *hist, float *gaussx, float *gaussy)
+{
+#define RAD 9
+#define WID (2 * RAD + 1)
+#define LEN 32 //%%%% Note: Lowe suggests 36, not 32
+
+  const int tx = item_ct1.get_local_id(2);
+
+  int fstPts =
+      sycl::min(d_PointCounter[2 * octave - 1], (unsigned int)d_MaxNumPoints);
+  int totPts =
+      sycl::min(d_PointCounter[2 * octave + 0], (unsigned int)d_MaxNumPoints);
+
+  #pragma unroll
+  for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts;
+       bx += item_ct1.get_group_range(2))
+  {
+
+    float sc = d_Sift[bx].scale;
+
+    #pragma unroll
+    for (int i = tx; i < 2 * LEN; i += item_ct1.get_local_range().get(2))
+      hist[i] = 0.0f;
+    float xp = d_Sift[bx].xpos;
+    float yp = d_Sift[bx].ypos;
+    int xi = (int)xp;
+    int yi = (int)yp;
+    float xf = xp - xi;
+    float yf = yp - yi;
+
+    #pragma unroll
+    for (int i = tx; i < WID * WID; i += item_ct1.get_local_range().get(2))
+    {
+      int y = i / WID;
+      int x = i - y * WID;
+      int xp = sycl::max(sycl::min((int)(x - RAD + xi), (int)(w - 1)), 0);
+      int yp = sycl::max(sycl::min((int)(y - RAD + yi), (int)(h - 1)), 0);
+      img[y][x] = image[yp * p + xp];
+    }
+    float fac[5];
+    fac[1] = fac[3] =
+        (sc > 0.5f ? sycl::exp(-1.0f / (2.0f * (sc * sc - 0.25f))) : 0.0f);
+    fac[0] = fac[4] =
+        (sc > 0.5f ? sycl::exp(-4.0f / (2.0f * (sc * sc - 0.25f))) : 0.0f);
+    fac[2] = 1.0f;
+    float i2sigma2 = -1.0f / (2.0f * 2.0f * 2.0f * sc * sc); //%%%% Note: Lowe suggests 1.5, not 2.0
+    if (tx < WID)
+    {
+      gaussx[tx] = sycl::exp(i2sigma2 * (tx - RAD - xf) * (tx - RAD - xf));
+      gaussy[tx] = sycl::exp(i2sigma2 * (tx - RAD - yf) * (tx - RAD - yf));
+    }
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+
+    #pragma unroll
+    for (int i = tx; i < (WID - 4) * WID;
+         i += item_ct1.get_local_range().get(2))
+    {
+      int y = i / WID;
+      int x = i - y * WID;
+      y += 2;
+      tmp[y][x] = img[y][x] + fac[1] * (img[y - 1][x] + img[y + 1][x]) +
+                  fac[0] * (img[y - 2][x] + img[y + 2][x]);
+    }
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+
+    #pragma unroll
+    for (int i = tx; i < (WID - 4) * (WID - 4);
+         i += item_ct1.get_local_range().get(2))
+    {
+      int y = i / (WID - 4);
+      int x = i - y * (WID - 4);
+      x += 2;
+      y += 2;
+      img[y][x] = tmp[y][x] + fac[1] * (tmp[y][x - 1] + tmp[y][x + 1]) +
+                  fac[0] * (tmp[y][x - 2] + tmp[y][x + 2]);
+    }
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+
+    #pragma unroll
+    for (int i = tx; i < (WID - 6) * (WID - 6);
+         i += item_ct1.get_local_range().get(2))
+    {
+      int y = i / (WID - 6);
+      int x = i - y * (WID - 6);
+      x += 3;
+      y += 3;
+      float dx = img[y][x + 1] - img[y][x - 1];
+      float dy = img[y + 1][x] - img[y - 1][x];
+      int bin =
+          (int)((LEN / 2) * sycl::atan2(dy, dx) / 3.1416f + (LEN / 2) + 0.5f) %
+          LEN;
+      float grad = sycl::sqrt(dx * dx + dy * dy);
+      infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+          &hist[LEN + bin], grad * gaussx[x] * gaussy[y]);
+    }
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    int x1m = (tx >= 1 ? tx - 1 : tx + LEN - 1);
+    int x1p = (tx < (LEN - 1) ? tx + 1 : tx - LEN + 1);
+    int x2m = (tx >= 2 ? tx - 2 : tx + LEN - 2);
+    int x2p = (tx < (LEN - 2) ? tx + 2 : tx - LEN + 2);
+    if (tx < LEN)
+    {
+      hist[tx] = 6.0f * hist[tx + LEN] + 4.0f * (hist[x1m + LEN] + hist[x1p + LEN]) +
+                 1.0f * (hist[x2m + LEN] + hist[x2p + LEN]);
+      hist[tx + LEN] = 8.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) +
+                       0.0f * (hist[x2m] + hist[x2p]);
+      float val = hist[tx + LEN];
+      hist[tx] = (val > hist[x1m + LEN] && val >= hist[x1p + LEN] ? val : 0.0f);
+    }
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    if (tx == 0)
+    {
+      float maxval1 = 0.0;
+      float maxval2 = 0.0;
+      int i1 = -1;
+      int i2 = -1;
+
+      #pragma unroll
+      for (int i = 0; i < LEN; i++)
+      {
+        float v = hist[i];
+        if (v > maxval1)
+        {
+          maxval2 = maxval1;
+          maxval1 = v;
+          i2 = i1;
+          i1 = i;
+        }
+        else if (v > maxval2)
+        {
+          maxval2 = v;
+          i2 = i;
+        }
+      }
+      float val1 = hist[LEN + ((i1 + 1) % LEN)];
+      float val2 = hist[LEN + ((i1 + LEN - 1) % LEN)];
+      float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+      d_Sift[bx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN;
+      sycl::atomic<unsigned int>(
+          sycl::global_ptr<unsigned int>(&d_PointCounter[2 * octave + 1]))
+          .fetch_max(d_PointCounter[2 * octave + 0]);
+      if (maxval2 > 0.8f * maxval1 && true)
+      {
+        float val1 = hist[LEN + ((i2 + 1) % LEN)];
+        float val2 = hist[LEN + ((i2 + LEN - 1) % LEN)];
+        float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+        unsigned int idx = infra::atomic_fetch_compare_inc(
+            &d_PointCounter[2 * octave + 1], (unsigned int)0x7fffffff);
+        if (idx < d_MaxNumPoints)
+        {
+          d_Sift[idx].xpos = d_Sift[bx].xpos;
+          d_Sift[idx].ypos = d_Sift[bx].ypos;
+          d_Sift[idx].scale = sc;
+          d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+          d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+          d_Sift[idx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN;
+          d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+        }
+      }
+    }
+  }
+#undef RAD
+#undef WID
+#undef LEN
+}
+
+// With constant number of blocks
+void ComputeOrientationsCONST(rawImg_data texObj,
+                              SiftPoint *d_Sift, int octave,
+                              sycl::nd_item<3> item_ct1, int d_MaxNumPoints,
+                              unsigned int *d_PointCounter, float *hist,
+                              float *gauss)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+
+  int fstPts =
+      sycl::min(d_PointCounter[2 * octave - 1], (unsigned int)d_MaxNumPoints);
+  int totPts =
+      sycl::min(d_PointCounter[2 * octave + 0], (unsigned int)d_MaxNumPoints);
+
+  #pragma unroll
+  for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts;
+       bx += item_ct1.get_group_range(2))
+  {
+
+    float i2sigma2 = -1.0f / (2.0f * 1.5f * 1.5f * d_Sift[bx].scale * d_Sift[bx].scale);
+    if (tx < 11)
+      gauss[tx] = sycl::exp(i2sigma2 * (tx - 5) * (tx - 5));
+    if (tx < 64)
+      hist[tx] = 0.0f;
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    float xp = d_Sift[bx].xpos - 4.5f;
+    float yp = d_Sift[bx].ypos - 4.5f;
+    int yd = tx / 11;
+    int xd = tx - yd * 11;
+    float xf = xp + xd;
+    float yf = yp + yd;
+    if (yd < 11)
+    {
+      float dx = texObj.read(xf + 1.0, yf) - texObj.read(xf - 1.0, yf); // src_d_data[yf * pitch + xf]
+      float dy = texObj.read(xf, yf + 1.0) - texObj.read(xf, yf - 1.0);
+      int bin = 16.0f * sycl::atan2(dy, dx) / 3.1416f + 16.5f;
+      if (bin > 31)
+        bin = 0;
+      float grad = sycl::sqrt(dx * dx + dy * dy);
+      infra::atomic_fetch_add<sycl::access::address_space::local_space>(
+          &hist[bin], grad * gauss[xd] * gauss[yd]);
+    }
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    int x1m = (tx >= 1 ? tx - 1 : tx + 31);
+    int x1p = (tx <= 30 ? tx + 1 : tx - 31);
+    if (tx < 32)
+    {
+      int x2m = (tx >= 2 ? tx - 2 : tx + 30);
+      int x2p = (tx <= 29 ? tx + 2 : tx - 30);
+      hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]);
+    }
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    if (tx < 32)
+    {
+      float v = hist[32 + tx];
+      hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f);
+    }
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    if (tx == 0)
+    {
+      float maxval1 = 0.0;
+      float maxval2 = 0.0;
+      int i1 = -1;
+      int i2 = -1;
+
+      #pragma unroll
+      for (int i = 0; i < 32; i++)
+      {
+        float v = hist[i];
+        if (v > maxval1)
+        {
+          maxval2 = maxval1;
+          maxval1 = v;
+          i2 = i1;
+          i1 = i;
+        }
+        else if (v > maxval2)
+        {
+          maxval2 = v;
+          i2 = i;
+        }
+      }
+      float val1 = hist[32 + ((i1 + 1) & 31)];
+      float val2 = hist[32 + ((i1 + 31) & 31)];
+      float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2);
+      d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+      sycl::atomic<unsigned int>(
+          sycl::global_ptr<unsigned int>(&d_PointCounter[2 * octave + 1]))
+          .fetch_max(d_PointCounter[2 * octave + 0]);
+      if (maxval2 > 0.8f * maxval1 && true)
+      {
+        float val1 = hist[32 + ((i2 + 1) & 31)];
+        float val2 = hist[32 + ((i2 + 31) & 31)];
+        float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
+        unsigned int idx = infra::atomic_fetch_compare_inc(
+            &d_PointCounter[2 * octave + 1], (unsigned int)0x7fffffff);
+        if (idx < d_MaxNumPoints)
+        {
+          d_Sift[idx].xpos = d_Sift[bx].xpos;
+          d_Sift[idx].ypos = d_Sift[bx].ypos;
+          d_Sift[idx].scale = d_Sift[bx].scale;
+          d_Sift[idx].sharpness = d_Sift[bx].sharpness;
+          d_Sift[idx].edgeness = d_Sift[bx].edgeness;
+          d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak);
+          ;
+          d_Sift[idx].subsampling = d_Sift[bx].subsampling;
+        }
+      }
+    }
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+  }
+}
+
+void FindPointsMultiNew(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave,
+                        sycl::nd_item<3> item_ct1, int d_MaxNumPoints,
+                        unsigned int *d_PointCounter, unsigned short *points)
+{
+#define MEMWID (MINMAX_W + 2)
+
+  if (item_ct1.get_group(2) == 0 && item_ct1.get_group(1) == 0 &&
+      item_ct1.get_local_id(2) == 0)
+  {
+    sycl::atomic<unsigned int>(
+        sycl::global_ptr<unsigned int>(&d_PointCounter[2 * octave + 0]))
+        .fetch_max(d_PointCounter[2 * octave - 1]);
+    sycl::atomic<unsigned int>(
+        sycl::global_ptr<unsigned int>(&d_PointCounter[2 * octave + 1]))
+        .fetch_max(d_PointCounter[2 * octave - 1]);
+  }
+  int tx = item_ct1.get_local_id(2);
+  int block = item_ct1.get_group(2) / NUM_SCALES;
+  int scale = item_ct1.get_group(2) - NUM_SCALES * block;
+  int minx = block * MINMAX_W;
+  int maxx = sycl::min((int)(minx + MINMAX_W), width);
+  int xpos = minx + tx;
+  int size = pitch * height;
+  int ptr =
+      size * scale + sycl::max(sycl::min((int)(xpos - 1), (int)(width - 1)), 0);
+
+  int yloops =
+      sycl::min((unsigned int)(height - MINMAX_H * item_ct1.get_group(1)),
+                (unsigned int)(MINMAX_H));
+  float maxv = 0.0f;
+
+  #pragma unroll
+  for (int y = 0; y < yloops; y++)
+  {
+    int ypos = MINMAX_H * item_ct1.get_group(1) + y;
+    int yptr1 = ptr + ypos * pitch;
+    float val = d_Data0[yptr1 + 1 * size];
+    maxv = sycl::fmax(maxv, sycl::fabs(val));
+  }
+  // if (tx==0) printf("XXX1\n");
+  if (!sycl::any_of_group(
+          item_ct1.get_sub_group(),
+          (0xffffffff &
+           (0x1 << item_ct1.get_sub_group().get_local_linear_id())) &&
+              maxv > thresh))
+    return;
+  // if (tx==0) printf("XXX2\n");
+
+  int ptbits = 0;
+
+  #pragma unroll
+  for (int y = 0; y < yloops; y++)
+  {
+
+    int ypos = MINMAX_H * item_ct1.get_group(1) + y;
+    int yptr1 = ptr + ypos * pitch;
+    float d11 = d_Data0[yptr1 + 1 * size];
+    if (sycl::any_of_group(
+            item_ct1.get_sub_group(),
+            (0xffffffff &
+             (0x1 << item_ct1.get_sub_group().get_local_linear_id())) &&
+                sycl::fabs(d11) > thresh))
+    {
+
+      int yptr0 = ptr + sycl::max(0, (int)(ypos - 1)) * pitch;
+      int yptr2 = ptr + sycl::min((int)(height - 1), (int)(ypos + 1)) * pitch;
+      float d01 = d_Data0[yptr1];
+      float d10 = d_Data0[yptr0 + 1 * size];
+      float d12 = d_Data0[yptr2 + 1 * size];
+      float d21 = d_Data0[yptr1 + 2 * size];
+
+      float d00 = d_Data0[yptr0];
+      float d02 = d_Data0[yptr2];
+      float ymin1 = sycl::fmin(sycl::fmin(d00, d01), d02);
+      float ymax1 = sycl::fmax(sycl::fmax(d00, d01), d02);
+      float d20 = d_Data0[yptr0 + 2 * size];
+      float d22 = d_Data0[yptr2 + 2 * size];
+      float ymin3 = sycl::fmin(sycl::fmin(d20, d21), d22);
+      float ymax3 = sycl::fmax(sycl::fmax(d20, d21), d22);
+      float ymin2 = sycl::fmin(
+          sycl::fmin(ymin1, sycl::fmin(sycl::fmin(d10, d12), d11)), ymin3);
+      float ymax2 = sycl::fmax(
+          sycl::fmax(ymax1, sycl::fmax(sycl::fmax(d10, d12), d11)), ymax3);
+
+      // float nmin2 = sycl::fmin(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1));
+      // float nmax2 = sycl::fmax(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1));
+
+      float nmin2 = sycl::fmin(ShiftUp(ymin2, 1, item_ct1), ShiftDown(ymin2, 1, item_ct1));
+      float nmax2 = sycl::fmax(ShiftUp(ymax2, 1, item_ct1), ShiftDown(ymax2, 1, item_ct1));
+
+      float minv = sycl::fmin(sycl::fmin(nmin2, ymin1), ymin3);
+      minv = sycl::fmin(sycl::fmin(minv, d10), d12);
+      float maxv = sycl::fmax(sycl::fmax(nmax2, ymax1), ymax3);
+      maxv = sycl::fmax(sycl::fmax(maxv, d10), d12);
+
+      if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx)
+        ptbits |= ((d11 < sycl::fmin(-thresh, minv)) |
+                   (d11 > sycl::fmax(thresh, maxv)))
+                  << y;
+    }
+  }
+
+  unsigned int totbits = sycl::popcount(ptbits);
+  unsigned int numbits = totbits;
+
+  #pragma unroll
+  for (int d = 1; d < 32; d <<= 1)
+  {
+    unsigned int num = ShiftUp(totbits, d, item_ct1);
+    if (tx >= d)
+      totbits += num;
+  }
+  int pos = totbits - numbits;
+
+  #pragma unroll
+  for (int y = 0; y < yloops; y++)
+  {
+    int ypos = MINMAX_H * item_ct1.get_group(1) + y;
+    if (ptbits & (1 << y) && pos < MEMWID)
+    {
+      points[2 * pos + 0] = xpos - 1;
+      points[2 * pos + 1] = ypos;
+      pos++;
+    }
+  }
+
+  totbits = Shuffle(totbits, 31, item_ct1);
+  if (tx < totbits)
+  {
+    int xpos = points[2 * tx + 0];
+    int ypos = points[2 * tx + 1];
+    int ptr = xpos + (ypos + (scale + 1) * height) * pitch;
+    float val = d_Data0[ptr];
+    float *data1 = &d_Data0[ptr];
+    float dxx = 2.0f * val - data1[-1] - data1[1];
+    float dyy = 2.0f * val - data1[-pitch] - data1[pitch];
+    float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]);
+    float tra = dxx + dyy;
+    float det = dxx * dyy - dxy * dxy;
+    if (tra * tra < edgeLimit * det)
+    {
+      float edge = (tra * tra) / det;
+      float dx = 0.5f * (data1[1] - data1[-1]);
+      float dy = 0.5f * (data1[pitch] - data1[-pitch]);
+      float *data0 = d_Data0 + ptr - height * pitch;
+      float *data2 = d_Data0 + ptr + height * pitch;
+      float ds = 0.5f * (data0[0] - data2[0]);
+      float dss = 2.0f * val - data2[0] - data0[0];
+      float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]);
+      float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]);
+      float idxx = dyy * dss - dys * dys;
+      float idxy = dys * dxs - dxy * dss;
+      float idxs = dxy * dys - dyy * dxs;
+      float idet = 1.0f / (idxx * dxx + idxy * dxy + idxs * dxs);
+      float idyy = dxx * dss - dxs * dxs;
+      float idys = dxy * dxs - dxx * dys;
+      float idss = dxx * dyy - dxy * dxy;
+      float pdx = idet * (idxx * dx + idxy * dy + idxs * ds);
+      float pdy = idet * (idxy * dx + idyy * dy + idys * ds);
+      float pds = idet * (idxs * dx + idys * dy + idss * ds);
+      if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f)
+      {
+        pdx = dx / dxx;
+        pdy = dy / dyy;
+        pds = ds / dss;
+      }
+      float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds);
+      int maxPts = d_MaxNumPoints;
+      float sc = sycl::pow(2.0f, (float)scale / NUM_SCALES) *
+                 sycl::exp2(pds * factor);
+      if (sc >= lowestScale)
+      {
+        sycl::atomic<unsigned int>(
+            sycl::global_ptr<unsigned int>(&d_PointCounter[2 * octave + 0]))
+            .fetch_max(d_PointCounter[2 * octave - 1]);
+        unsigned int idx = infra::atomic_fetch_compare_inc(
+            &d_PointCounter[2 * octave + 0], (unsigned int)0x7fffffff);
+        idx = (idx >= maxPts ? maxPts - 1 : idx);
+        d_Sift[idx].xpos = xpos + pdx;
+        d_Sift[idx].ypos = ypos + pdy;
+        d_Sift[idx].scale = sc;
+        d_Sift[idx].sharpness = val + dval;
+        d_Sift[idx].edgeness = edge;
+        d_Sift[idx].subsampling = subsampling;
+      }
+    }
+  }
+}
+
+void LaplaceMultiMem(float *d_Image, float *d_Result, int width, int pitch, int height, int octave, sycl::nd_item<3> item_ct1, float *d_LaplaceKernel, float *buff)
+{
+  const int tx = item_ct1.get_local_id(2);
+  const int xp = item_ct1.get_group(2) * LAPLACE_W + tx;
+  const int yp = item_ct1.get_group(1);
+  float *data = d_Image + sycl::max(sycl::min((int)(xp - LAPLACE_R), (int)(width - 1)), 0);
+  float temp[2 * LAPLACE_R + 1];
+
+  float kern[LAPLACE_S][LAPLACE_R + 1];
+  // float kern[LAPLACE_S * (LAPLACE_R + 1)]; // 2d to 1d
+
+  // float kern_temp[LAPLACE_S * (LAPLACE_R + 1)];
+
+  if (xp < (width + 2 * LAPLACE_R))
+  {
+    #pragma unroll
+    for (int i = 0; i <= 2 * LAPLACE_R; i++)
+      temp[i] = data[sycl::max(0, sycl::min((int)(yp + i - LAPLACE_R),
+                                            (int)(height - 1))) *
+                     pitch];
+    
+    #pragma unroll
+    for (int scale = 0; scale < LAPLACE_S; scale++)
+    {
+      float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+      float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16;
+      kern[scale][0] = kernel[0];
+      kern[scale][1] = kernel[1];
+      kern[scale][2] = kernel[2];
+      kern[scale][3] = kernel[3];
+      kern[scale][4] = kernel[4];
+
+      float sum = kern[scale][0] * temp[LAPLACE_R];
+
+      #pragma unroll
+      for (int j = 1; j <= LAPLACE_R; j++)
+        sum += kern[scale][j] * (temp[LAPLACE_R - j] + temp[LAPLACE_R + j]);      
+      buf[tx] = sum;
+    }
+  }
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  if (tx < LAPLACE_W && xp < (width + 2 * LAPLACE_R))
+  {
+    int scale = 0;
+    float oldRes = kern[scale][0] * buff[tx + LAPLACE_R];    
+
+    #pragma unroll
+    for (int j = 1; j <= LAPLACE_R; j++)
+      oldRes += kern[scale][j] * (buff[tx + LAPLACE_R - j] + buff[tx + LAPLACE_R + j]);    
+
+    #pragma unroll
+    for (int scale = 1; scale < LAPLACE_S; scale++)
+    {
+      float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale;
+      float res = kern[scale][0] * buf[tx + LAPLACE_R];      
+
+      #pragma unroll
+      for (int j = 1; j <= LAPLACE_R; j++)
+        res += kern[scale][j] * (buf[tx + LAPLACE_R - j] + buf[tx + LAPLACE_R + j]);      
+      d_Result[(scale - 1) * height * pitch + yp * pitch + xp] = res - oldRes;
+      oldRes = res;
+    }
+  }
+}
+
+void LowPass(float *d_Image, float *d_Result, int width, int pitch, int height,
+             sycl::nd_item<3> item_ct1, float *d_LowPassKernel, float *buffer)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int xp = item_ct1.get_group(2) * LOWPASS_W + tx;
+  const int yp = item_ct1.get_group(1) * LOWPASS_H + ty;
+  float *kernel = d_LowPassKernel;
+  float *data =
+      d_Image + sycl::max(sycl::min((int)(xp - 4), (int)(width - 1)), 0);
+  float *buff = buffer + ty * (LOWPASS_W + 2 * LOWPASS_R);
+  int h = height - 1;
+  if (yp < height)
+    buff[tx] =
+        kernel[4] * data[sycl::min(yp, h) * pitch] +
+        kernel[3] * (data[sycl::max(0, sycl::min((int)(yp - 1), h)) * pitch] +
+                     data[sycl::min((int)(yp + 1), h) * pitch]) +
+        kernel[2] * (data[sycl::max(0, sycl::min((int)(yp - 2), h)) * pitch] +
+                     data[sycl::min((int)(yp + 2), h) * pitch]) +
+        kernel[1] * (data[sycl::max(0, sycl::min((int)(yp - 3), h)) * pitch] +
+                     data[sycl::min((int)(yp + 3), h) * pitch]) +
+        kernel[0] * (data[sycl::max(0, sycl::min((int)(yp - 4), h)) * pitch] +
+                     data[sycl::min((int)(yp + 4), h) * pitch]);
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  if (tx < LOWPASS_W && xp < width && yp < height)
+    d_Result[yp * pitch + xp] = kernel[4] * buff[tx + 4] +
+                                kernel[3] * (buff[tx + 3] + buff[tx + 5]) + kernel[2] * (buff[tx + 2] + buff[tx + 6]) +
+                                kernel[1] * (buff[tx + 1] + buff[tx + 7]) + kernel[0] * (buff[tx + 0] + buff[tx + 8]);
+}
+
+void LowPassBlockOld(float *d_Image, float *d_Result, int width, int pitch, int height,
+                     sycl::nd_item<3> item_ct1, float *d_LowPassKernel,
+                     sycl::accessor<float, 2, sycl::access_mode::read_write, sycl::access::target::local> xrows)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int xp = item_ct1.get_group(2) * LOWPASS_W + tx;
+  const int yp = item_ct1.get_group(1) * LOWPASS_H + ty;
+  const int N = 16;
+  float *k = d_LowPassKernel;
+  int xl = sycl::max(sycl::min((int)(xp - 4), (int)(width - 1)), 0);
+
+  #pragma unroll
+  for (int l = -8; l <= LOWPASS_H; l += 4)
+  {
+    if (l < LOWPASS_H)
+    {
+      int yl = sycl::max(sycl::min((int)(yp + l + 4), (int)(height - 1)), 0);
+      float val = d_Image[yl * pitch + xl];
+      xrows[(l + 8 + ty) % N][tx] =
+          k[4] * ShiftDown(val, 4, item_ct1) +
+          k[3] * (ShiftDown(val, 5, item_ct1) + ShiftDown(val, 3, item_ct1)) +
+          k[2] * (ShiftDown(val, 6, item_ct1) + ShiftDown(val, 2, item_ct1)) +
+          k[1] * (ShiftDown(val, 7, item_ct1) + ShiftDown(val, 1, item_ct1)) +
+          k[0] * (ShiftDown(val, 8, item_ct1) + val);
+    }
+    if (l >= 4)
+    {
+      int ys = yp + l - 4;
+      if (xp < width && ys < height && tx < LOWPASS_W)
+        d_Result[ys * pitch + xp] = k[4] * xrows[(l + 0 + ty) % N][tx] +
+                                    k[3] * (xrows[(l - 1 + ty) % N][tx] + xrows[(l + 1 + ty) % N][tx]) +
+                                    k[2] * (xrows[(l - 2 + ty) % N][tx] + xrows[(l + 2 + ty) % N][tx]) +
+                                    k[1] * (xrows[(l - 3 + ty) % N][tx] + xrows[(l + 3 + ty) % N][tx]) +
+                                    k[0] * (xrows[(l - 4 + ty) % N][tx] + xrows[(l + 4 + ty) % N][tx]);
+    }
+    if (l >= 0)      
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+  }
+}
+
+void LowPassBlock(float *d_Image, float *d_Result, int width, int pitch, int height,
+                  sycl::nd_item<3> item_ct1, float *d_LowPassKernel,
+                  sycl::accessor<float, 2, sycl::access_mode::read_write, sycl::access::target::local> xrows)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int xp = item_ct1.get_group(2) * LOWPASS_W + tx;
+  const int yp = item_ct1.get_group(1) * LOWPASS_H + ty;
+  const int N = 16;
+  float *k = d_LowPassKernel;
+  int xl = sycl::max(sycl::min((int)(xp - 4), (int)(width - 1)), 0);
+  
+  #pragma unroll
+  for (int l = -8; l < 4; l += 4)
+  {
+    int ly = l + ty;
+    int yl = sycl::max(sycl::min((int)(yp + l + 4), (int)(height - 1)), 0);
+    float val = d_Image[yl * pitch + xl];
+    val = k[4] * ShiftDown(val, 4, item_ct1) +
+          k[3] * (ShiftDown(val, 5, item_ct1) + ShiftDown(val, 3, item_ct1)) +
+          k[2] * (ShiftDown(val, 6, item_ct1) + ShiftDown(val, 2, item_ct1)) +
+          k[1] * (ShiftDown(val, 7, item_ct1) + ShiftDown(val, 1, item_ct1)) +
+          k[0] * (ShiftDown(val, 8, item_ct1) + val);
+    xrows[ly + 8][tx] = val;
+  }
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  
+  #pragma unroll
+  for (int l = 4; l < LOWPASS_H; l += 4)
+  {
+    int ly = l + ty;
+    int yl = sycl::min((int)(yp + l + 4), (int)(height - 1));
+    float val = d_Image[yl * pitch + xl];
+    val = k[4] * ShiftDown(val, 4, item_ct1) +
+          k[3] * (ShiftDown(val, 5, item_ct1) + ShiftDown(val, 3, item_ct1)) +
+          k[2] * (ShiftDown(val, 6, item_ct1) + ShiftDown(val, 2, item_ct1)) +
+          k[1] * (ShiftDown(val, 7, item_ct1) + ShiftDown(val, 1, item_ct1)) +
+          k[0] * (ShiftDown(val, 8, item_ct1) + val);
+    xrows[(ly + 8) % N][tx] = val;
+    int ys = yp + l - 4;
+    if (xp < width && ys < height && tx < LOWPASS_W)
+      d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] +
+                                  k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) +
+                                  k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) +
+                                  k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) +
+                                  k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]);
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+  }
+  int ly = LOWPASS_H + ty;
+  int ys = yp + LOWPASS_H - 4;
+  if (xp < width && ys < height && tx < LOWPASS_W)
+    d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] +
+                                k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) +
+                                k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) +
+                                k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) +
+                                k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]);
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftD.h b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftD.h
new file mode 100644
index 000000000..0d38fe57e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftD.h
@@ -0,0 +1,58 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//  
+
+#ifndef CUDASIFTD_H
+#define CUDASIFTD_H
+
+#define NUM_SCALES      5
+
+// Scale down thread block width
+#define SCALEDOWN_W    64 // 60 
+
+// Scale down thread block height
+#define SCALEDOWN_H    16 // 8
+
+// Scale up thread block width
+#define SCALEUP_W      64
+
+// Scale up thread block height
+#define SCALEUP_H       8
+
+// Find point thread block width
+#define MINMAX_W       30 //32 
+
+// Find point thread block height
+#define MINMAX_H        8 //16 
+ 
+// Laplace thread block width
+#define LAPLACE_W     128 // 56
+
+// Laplace rows per thread
+#define LAPLACE_H       4
+
+// Number of laplace scales
+#define LAPLACE_S   (NUM_SCALES+3)
+
+// Laplace filter kernel radius
+#define LAPLACE_R       4
+
+#define LOWPASS_W      24 //56
+#define LOWPASS_H      32 //16
+#define LOWPASS_R       4
+
+//====================== Number of threads ====================//
+// ScaleDown:               SCALEDOWN_W + 4
+// LaplaceMulti:            (LAPLACE_W+2*LAPLACE_R)*LAPLACE_S
+// FindPointsMulti:         MINMAX_W + 2
+// ComputeOrientations:     128
+// ExtractSiftDescriptors:  256
+
+//====================== Number of blocks ====================//
+// ScaleDown:               (width/SCALEDOWN_W) * (height/SCALEDOWN_H)
+// LaplceMulti:             (width+2*LAPLACE_R)/LAPLACE_W * height
+// FindPointsMulti:         (width/MINMAX_W)*NUM_SCALES * (height/MINMAX_H)
+// ComputeOrientations:     numpts
+// ExtractSiftDescriptors:  numpts
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftH.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftH.dp.cpp
new file mode 100644
index 000000000..bae0c62e4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftH.dp.cpp
@@ -0,0 +1,826 @@
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <sycl/sycl.hpp>
+#include <cstdio>
+#include <cstring>
+#include <cmath>
+#include <iostream>
+#include <algorithm>
+
+#include "cudautils.h"
+#include "cudaImage.h"
+#include "cudaSift.h"
+#include "cudaSiftD.h"
+#include "cudaSiftH.h"
+#include "cudaSiftD.dp.cpp"
+
+#define PITCH_DEFAULT_ALIGN(x) (((x) + 31) & ~(0x1F))
+
+template <>
+struct sycl::is_device_copyable<CudaImage> : std::true_type
+{
+};
+
+void InitCuda(sycl::queue &q_ct, int devNum)
+{
+  auto device = q_ct.get_device();
+  std::cout << "Device Name:          " << device.get_info<sycl::info::device::name>() << std::endl;
+  std::cout << "Max workgroup size:   " << device.get_info<sycl::info::device::max_work_group_size>() << std::endl;
+  std::cout << "Max clock freq:   " << device.get_info<sycl::info::device::max_clock_frequency>() << std::endl;
+}
+
+float *AllocSiftTempMemory(int width, int height, int numOctaves, sycl::queue &q_ct, float &time, bool scaleUp)
+{
+  const int nd = NUM_SCALES + 3;
+  int w = width * (scaleUp ? 2 : 1);
+  int h = height * (scaleUp ? 2 : 1);
+  int p = iAlignUp(w, 128);
+  int size = h * p;         // image sizes
+  int sizeTmp = nd * h * p; // laplace buffer sizes
+  for (int i = 0; i < numOctaves; i++)
+  {
+    w /= 2;
+    h /= 2;
+    int p = iAlignUp(w, 128);
+    size += h * p;
+    sizeTmp += nd * h * p;
+  }
+  float *memoryTmp = NULL;
+  size_t pitch;
+  size += sizeTmp;
+
+#ifdef DEVICE_TIMER
+  auto start_malloc = std::chrono::steady_clock::now();
+#endif
+  memoryTmp = (float *)infra::sift_malloc(pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float), q_ct);
+  q_ct.wait();
+#ifdef DEVICE_TIMER
+  auto stop_malloc = std::chrono::steady_clock::now();
+  // printf("Malloc time for memoryTmp =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count());
+  time += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+  return memoryTmp;
+}
+
+void FreeSiftTempMemory(float *memoryTmp, sycl::queue &q_ct)
+{
+  if (memoryTmp)
+
+    safeCall((sycl::free(memoryTmp, q_ct), 0));
+}
+
+void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, sycl::queue &q_ct,
+                 float &totTime, float lowestScale, bool scaleUp, float *tempMemory)
+{
+  unsigned int *d_PointCounterAddr;
+
+#ifdef DEVICE_TIMER
+  auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+  *((void **)&d_PointCounterAddr) = d_PointCounter.get_ptr();
+  q_ct.memset(d_PointCounterAddr, 0, (8 * 2 + 1) * sizeof(int));
+  q_ct.memcpy(d_MaxNumPoints.get_ptr(), &siftData.maxPts, sizeof(int));
+  q_ct.wait();
+
+#ifdef DEVICE_TIMER
+  auto stop_memcpy = std::chrono::steady_clock::now();
+  totTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+
+  const int nd = NUM_SCALES + 3;
+  int w = img.width * (scaleUp ? 2 : 1);
+  int h = img.height * (scaleUp ? 2 : 1);
+  int p = iAlignUp(w, 128);
+  int width = w, height = h;
+  int size = h * p;         // image sizes
+  int sizeTmp = nd * h * p; // laplace buffer sizes
+  for (int i = 0; i < numOctaves; i++)
+  {
+    w /= 2;
+    h /= 2;
+    int p = iAlignUp(w, 128);
+    size += h * p;
+    sizeTmp += nd * h * p;
+  }
+  float *memoryTmp = tempMemory;
+  size += sizeTmp;
+  if (!tempMemory)
+  {
+    size_t pitch;
+#ifdef DEVICE_TIMER
+    auto start_malloc2 = std::chrono::steady_clock::now();
+#endif
+    memoryTmp = (float *)infra::sift_malloc(pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float), q_ct);
+    q_ct.wait();
+
+#ifdef DEVICE_TIMER
+    auto stop_malloc2 = std::chrono::steady_clock::now();
+    // printf("Malloc time for memoryTmp =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count());
+    totTime += std::chrono::duration<float, std::micro>(stop_malloc2 - start_malloc2).count();
+#endif
+  }
+  float *memorySub = memoryTmp + sizeTmp;
+
+  CudaImage lowImg;
+  lowImg.Allocate(width, height, iAlignUp(width, 128), false, q_ct, totTime, memorySub);
+  if (!scaleUp)
+  {
+    float kernel[8 * 12 * 16];
+    PrepareLaplaceKernels(numOctaves, 0.0f, kernel);
+#ifdef DEVICE_TIMER
+    auto start_memcpy1 = std::chrono::steady_clock::now();
+#endif
+    q_ct.memcpy(d_LaplaceKernel.get_ptr(), kernel, 8 * 12 * 16 * sizeof(float));
+    q_ct.wait();
+
+#ifdef DEVICE_TIMER
+    auto stop_memcpy1 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy1 - start_memcpy1).count();
+#endif
+
+    LowPass(lowImg, img, fmax(initBlur, 0.001f), q_ct, totTime);
+
+    ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale, 1.0f, memoryTmp,
+                    memorySub + height * iAlignUp(width, 128), q_ct, totTime);
+
+#ifdef DEVICE_TIMER
+    auto start_memcpy2 = std::chrono::steady_clock::now();
+#endif
+    q_ct.memcpy(&siftData.numPts, &d_PointCounterAddr[2 * numOctaves], sizeof(int));
+    q_ct.wait();
+#ifdef DEVICE_TIMER
+    auto stop_memcpy2 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy2 - start_memcpy2).count();
+#endif
+    siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts);
+  }
+  else
+  {
+    CudaImage upImg;
+    upImg.Allocate(width, height, iAlignUp(width, 128), false, q_ct, totTime, memoryTmp);
+    ScaleUp(upImg, img, q_ct, totTime);
+    LowPass(lowImg, upImg, fmax(initBlur, 0.001f), q_ct, totTime);
+    float kernel[8 * 12 * 16];
+    PrepareLaplaceKernels(numOctaves, 0.0f, kernel);
+#ifdef DEVICE_TIMER
+    auto start_memcpy3 = std::chrono::steady_clock::now();
+#endif
+    safeCall(
+        (q_ct.memcpy(d_LaplaceKernel.get_ptr(), kernel,
+                     8 * 12 * 16 * sizeof(float)),
+         0));
+    q_ct.wait();
+#ifdef DEVICE_TIMER
+    auto stop_memcpy3 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy3 - start_memcpy3).count();
+#endif
+    ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale * 2.0f, 1.0f, memoryTmp,
+                    memorySub + height * iAlignUp(width, 128), q_ct, totTime);
+#ifdef DEVICE_TIMER
+    auto start_memcpy4 = std::chrono::steady_clock::now();
+#endif
+    safeCall((q_ct.memcpy(&siftData.numPts, &d_PointCounterAddr[2 * numOctaves],
+                          sizeof(int)),
+              0));
+    q_ct.wait();
+#ifdef DEVICE_TIMER
+    auto stop_memcpy4 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy4 - start_memcpy4).count();
+#endif
+    siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts);
+    RescalePositions(siftData, 0.5f, q_ct, totTime);
+  }
+
+  if (!tempMemory)
+    safeCall((sycl::free(memoryTmp, q_ct), 0));
+  if (siftData.h_data)
+  {
+#ifdef DEVICE_TIMER
+    auto start_memcpy5 = std::chrono::steady_clock::now();
+#endif
+    q_ct.memcpy(siftData.h_data, siftData.d_data, sizeof(SiftPoint) * siftData.numPts);
+    q_ct.wait();
+#ifdef DEVICE_TIMER
+    auto stop_memcpy5 = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy5 - start_memcpy5).count();
+    printf("Total time for sift extraction =  %.2f us\n\n", totTime);
+#endif
+    printf("Number of Points after sift extraction =  %d\n\n", siftData.numPts);
+  }
+}
+
+int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, float lowestScale,
+                    float subsampling, float *memoryTmp, float *memorySub, sycl::queue &q_ct, float &totTime)
+{
+  int w = img.width;
+  int h = img.height;
+  if (numOctaves > 1)
+  {
+    CudaImage subImg;
+    int p = iAlignUp(w / 2, 128);
+    subImg.Allocate(w / 2, h / 2, p, false, q_ct, totTime, memorySub);
+    ScaleDown(subImg, img, 0.5f, q_ct, totTime);
+    float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f;
+    ExtractSiftLoop(siftData, subImg, numOctaves - 1, totInitBlur, thresh, lowestScale,
+                    subsampling * 2.0f, memoryTmp, memorySub + (h / 2) * p, q_ct, totTime);
+  }
+  ExtractSiftOctave(siftData, img, numOctaves, thresh, lowestScale, subsampling, memoryTmp, q_ct, totTime);
+  return 0;
+}
+
+void c1toc4(float *f_ptr, sycl::float4 *f4_ptr, int width, int height,
+            int f_pitch, int f4_pitch, sycl::id<2> idx)
+{
+  const int workItm_row = idx[0];
+  const int workItm_col = idx[1];
+  float *f_row_begin = f_ptr + f_pitch * workItm_row;
+  sycl::float4 *f4_row_begin = f4_ptr + f4_pitch * workItm_row;
+
+  f4_row_begin[workItm_col].x() = f_row_begin[workItm_col];
+}
+
+void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh, float lowestScale,
+                       float subsampling, float *memoryTmp, sycl::queue &q_ct, float &totTime)
+{
+  const int nd = NUM_SCALES + 3;
+  CudaImage diffImg[nd];
+  int w = img.width;
+  int h = img.height;
+  int p = iAlignUp(w, 128);
+  for (int i = 0; i < nd - 1; i++)
+    diffImg[i].Allocate(w, h, p, false, q_ct, totTime, memoryTmp + i * p * h);
+  float baseBlur = pow(2.0f, -1.0f / NUM_SCALES);
+  float diffScale = pow(2.0f, 1.0f / NUM_SCALES);
+  LaplaceMulti(img, diffImg, octave, q_ct, totTime);
+  FindPointsMulti(diffImg, siftData, thresh, 10.0f, 1.0f / NUM_SCALES, lowestScale / subsampling, subsampling, octave, q_ct, totTime);
+  ComputeOrientations(img, siftData, octave, q_ct, totTime);
+  ExtractSiftDescriptors(img.d_data, img.pitch, siftData, subsampling, octave, q_ct, totTime);
+}
+
+void InitSiftData(SiftData &data, sycl::queue &q_ct, float &time, int num, bool host, bool dev)
+{
+  data.numPts = 0;
+  data.maxPts = num;
+  int sz = sizeof(SiftPoint) * num;
+  data.h_data = NULL;
+  if (host)
+    data.h_data = (SiftPoint *)malloc(sz);
+  data.d_data = NULL;
+  if (dev)
+  {
+#ifdef DEVICE_TIMER
+    auto start_malloc = std::chrono::steady_clock::now();
+#endif
+    data.d_data = (SiftPoint *)sycl::malloc_device(sz, q_ct);
+    q_ct.wait();
+#ifdef DEVICE_TIMER
+    auto stop_malloc = std::chrono::steady_clock::now();
+    time += std::chrono::duration<float, std::micro>(stop_malloc - start_malloc).count();
+#endif
+  }
+}
+
+void FreeSiftData(SiftData &data, sycl::queue &q_ct)
+{
+  if (data.d_data != NULL)
+    sycl::free(data.d_data, q_ct.get_context());
+  data.d_data = NULL;
+  if (data.h_data != NULL)
+    free(data.h_data);
+  data.numPts = 0;
+  data.maxPts = 0;
+}
+
+void PrintSiftData(SiftData &data, sycl::queue &q_ct)
+{
+  SiftPoint *h_data = data.h_data;
+  if (data.h_data == NULL)
+  {
+    h_data = (SiftPoint *)malloc(sizeof(SiftPoint) * data.maxPts);
+    q_ct.memcpy(h_data, data.d_data, sizeof(SiftPoint) * data.numPts)
+        .wait();
+    data.h_data = h_data;
+  }
+  for (int i = 0; i < data.numPts; i++)
+  {
+    printf("xpos         = %.2f\n", h_data[i].xpos);
+    printf("ypos         = %.2f\n", h_data[i].ypos);
+    printf("scale        = %.2f\n", h_data[i].scale);
+    printf("sharpness    = %.2f\n", h_data[i].sharpness);
+    printf("edgeness     = %.2f\n", h_data[i].edgeness);
+    printf("orientation  = %.2f\n", h_data[i].orientation);
+    printf("score        = %.2f\n", h_data[i].score);
+    float *siftData = (float *)&h_data[i].data;
+    for (int j = 0; j < 8; j++)
+    {
+      if (j == 0)
+        printf("data = ");
+      else
+        printf("       ");
+      for (int k = 0; k < 16; k++)
+        if (siftData[j + 8 * k] < 0.05)
+          printf(" .   ");
+        else
+          printf("%.2f ", siftData[j + 8 * k]);
+      printf("\n");
+    }
+  }
+  printf("Number of available points: %d\n", data.numPts);
+  printf("Number of allocated points: %d\n", data.maxPts);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Host side master functions
+///////////////////////////////////////////////////////////////////////////////
+
+double ScaleDown(CudaImage &res, CudaImage &src, float variance, sycl::queue &q_ct, float &totTime)
+{
+  static float oldVariance = -1.0f;
+  if (res.d_data == NULL || src.d_data == NULL)
+  {
+    printf("ScaleDown: missing data\n");
+    return 0.0;
+  }
+  if (oldVariance != variance)
+  {
+    float h_Kernel[5];
+    float kernelSum = 0.0f;
+    for (int j = 0; j < 5; j++)
+    {
+      h_Kernel[j] = (float)expf(-(double)(j - 2) * (j - 2) / 2.0 / variance);
+      kernelSum += h_Kernel[j];
+    }
+    for (int j = 0; j < 5; j++)
+      h_Kernel[j] /= kernelSum;
+
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    q_ct.memcpy(d_ScaleDownKernel.get_ptr(), h_Kernel, 5 * sizeof(float)).wait();
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    totTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+    oldVariance = variance;
+  }
+#if 0
+  dim3 blocks(iDivUp(src.width, SCALEDOWN_W), iDivUp(src.height, SCALEDOWN_H));
+  dim3 threads(SCALEDOWN_W + 4, SCALEDOWN_H + 4);
+#else
+  sycl::range<3> blocks(1, iDivUp(src.height, SCALEDOWN_H),
+                        iDivUp(src.width, SCALEDOWN_W));
+  sycl::range<3> threads(1, 1, SCALEDOWN_W + 4);
+
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  q_ct.submit([&](sycl::handler &cgh)
+              {
+                                     d_ScaleDownKernel.init();
+
+                                     auto d_ScaleDownKernel_ptr_ct1 = d_ScaleDownKernel.get_ptr();
+
+                                     sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                                    sycl::access::target::local>
+                                         inrow_acc_ct1(sycl::range<1>(68 /*SCALEDOWN_W+4*/), cgh);
+                                     sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                                    sycl::access::target::local>
+                                         brow_acc_ct1(sycl::range<1>(160 /*5*(SCALEDOWN_W/2)*/), cgh);
+                                     sycl::accessor<int, 1, sycl::access_mode::read_write,
+                                                    sycl::access::target::local>
+                                         yRead_acc_ct1(sycl::range<1>(20 /*SCALEDOWN_H+4*/), cgh);
+                                     sycl::accessor<int, 1, sycl::access_mode::read_write,
+                                                    sycl::access::target::local>
+                                         yWrite_acc_ct1(sycl::range<1>(20 /*SCALEDOWN_H+4*/), cgh);
+
+                                     auto res_data_ct1 = res.d_data;
+                                     auto src_data_ct1 = src.d_data;
+                                     auto src_width = src.width;
+                                     auto src_pitch = src.pitch;
+                                     auto src_height = src.height;
+                                     auto res_pitch = res.pitch;
+
+                                     cgh.parallel_for(
+                                         sycl::nd_range<3>(blocks * threads, threads),
+                                         [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                                             [[intel::reqd_sub_group_size(32)]]
+#endif
+                                         {                                           
+                                           ScaleDown(res_data_ct1, src_data_ct1, src_width, src_pitch, src_height,
+                                                     res_pitch, item_ct1, d_ScaleDownKernel_ptr_ct1,
+                                                     inrow_acc_ct1.get_pointer(), brow_acc_ct1.get_pointer(),
+                                                     yRead_acc_ct1.get_pointer(), yWrite_acc_ct1.get_pointer());
+                                         }); })
+      .wait();
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ScaleDown time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+#endif
+  checkMsg("ScaleDown() execution failed\n");
+  return 0.0;
+}
+
+double ScaleUp(CudaImage &res, CudaImage &src, sycl::queue &q_ct, float &totTime)
+{
+  if (res.d_data == NULL || src.d_data == NULL)
+  {
+    printf("ScaleUp: missing data\n");
+    return 0.0;
+  }
+  sycl::range<3> blocks(1, iDivUp(res.height, SCALEUP_H),
+                        iDivUp(res.width, SCALEUP_W));
+  sycl::range<3> threads(1, SCALEUP_H / 2, SCALEUP_W / 2);
+
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+
+  q_ct.submit([&](sycl::handler &cgh)
+              {
+                                     auto src_data_ct1 = src.d_data;
+                                     auto res_data_ct1 = res.d_data;
+                                     auto src_width = src.width;
+                                     auto src_pitch = src.pitch;
+                                     auto src_height = src.height;
+                                     auto res_pitch = res.pitch;
+                                     cgh.parallel_for(
+                                         sycl::nd_range<3>(blocks * threads, threads),
+                                         [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                                             [[intel::reqd_sub_group_size(32)]]
+#endif
+                                         {                                           
+                                           ScaleUp(res_data_ct1, src_data_ct1, src_width, src_pitch, src_height,
+                                                   res_pitch, item_ct1);
+                                         }); })
+      .wait();
+
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ScaleUp time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("ScaleUp() execution failed\n");
+  return 0.0;
+}
+
+double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, sycl::queue &q_ct, float &totTime)
+{
+  sycl::range<3> blocks(1, 1, 512);
+  sycl::range<3> threads(1, 1, 256);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  q_ct.submit([&](sycl::handler &cgh)
+              {
+
+                auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr();                
+                auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr();
+
+                sycl::accessor<float, 2, sycl::access_mode::read_write,
+                                sycl::access::target::local>
+                    img_acc_ct1(sycl::range<2>(19 /*WID*/, 19 /*WID*/), cgh);
+                sycl::accessor<float, 2, sycl::access_mode::read_write,
+                                sycl::access::target::local>
+                    tmp_acc_ct1(sycl::range<2>(19 /*WID*/, 19 /*WID*/), cgh);
+                sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                sycl::access::target::local>
+                    hist_acc_ct1(sycl::range<1>(64 /*2*LEN*/), cgh);
+                sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                sycl::access::target::local>
+                    gaussx_acc_ct1(sycl::range<1>(19 /*WID*/), cgh);
+                sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                sycl::access::target::local>
+                    gaussy_acc_ct1(sycl::range<1>(19 /*WID*/), cgh);
+
+                auto src_data_ct1 = src.d_data;
+                auto src_width = src.width;
+                auto src_pitch = src.pitch;
+                auto src_height = src.height;
+                auto siftData_data_ct1 = siftData.d_data;
+
+                cgh.parallel_for(
+                    sycl::nd_range<3>(blocks * threads, threads),
+                    [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                        [[intel::reqd_sub_group_size(32)]]
+#endif
+                    {
+                      ComputeOrientationsCONSTNew(
+                          src_data_ct1, src_width, src_pitch, src_height, siftData_data_ct1,
+                          octave, item_ct1, *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1,
+                          img_acc_ct1, tmp_acc_ct1, hist_acc_ct1.get_pointer(),
+                          gaussx_acc_ct1.get_pointer(), gaussy_acc_ct1.get_pointer());
+                    }); })
+      .wait();
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ComputeOrientationsCONSTNew time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("ComputeOrientations() execution failed\n");
+  return 0.0;
+}
+
+double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, sycl::queue &q_ct, float &totTime)
+{
+  sycl::range<3> blocks(1, 1, 512);
+  sycl::range<3> threads(1, 8, 16);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  q_ct.submit([&](sycl::handler &cgh)
+              {
+                                     d_MaxNumPoints.init();
+                                     d_PointCounter.init();
+
+                                     auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr();
+                                     auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr();
+
+                                     sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                                    sycl::access::target::local>
+                                         gauss_acc_ct1(sycl::range<1>(16), cgh);
+                                     sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                                    sycl::access::target::local>
+                                         buffer_acc_ct1(sycl::range<1>(128), cgh);
+                                     sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                                    sycl::access::target::local>
+                                         sums_acc_ct1(sycl::range<1>(4), cgh);
+
+                                     auto siftData_data_ct1 = siftData.d_data;
+
+                                     cgh.parallel_for(
+                                         sycl::nd_range<3>(blocks * threads, threads), [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                                          [[intel::reqd_sub_group_size(32)]]
+#endif
+                                         { 
+                                               ExtractSiftDescriptorsCONSTNew(
+                                               texObj, pitch,
+                                               siftData_data_ct1, subsampling, octave, item_ct1,
+                                               *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1,
+                                               gauss_acc_ct1.get_pointer(), buffer_acc_ct1.get_pointer(),
+                                               sums_acc_ct1.get_pointer()); }); })
+      .wait();
+
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("ExtractSiftDescriptorsCONSTNew time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("ExtractSiftDescriptors() execution failed\n");
+  return 0.0;
+}
+
+double RescalePositions(SiftData &siftData, float scale, sycl::queue &q_ct, float &totTime)
+{
+  sycl::range<3> blocks(1, 1, iDivUp(siftData.numPts, 64));
+  sycl::range<3> threads(1, 1, 64);
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  q_ct.submit([&](sycl::handler &cgh)
+              {
+                                     auto siftData_data_ct1 = siftData.d_data;
+                                     auto sifData_numPts = siftData.numPts;
+                                     cgh.parallel_for(
+                                         sycl::nd_range<3>(blocks * threads, threads),
+                                         [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                                             [[intel::reqd_sub_group_size(32)]]
+#endif
+                                         {
+                                           RescalePositions(siftData_data_ct1, sifData_numPts, scale, item_ct1);
+                                         }); })
+      .wait();
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("RescalePositions time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+  checkMsg("RescapePositions() execution failed\n");
+  return 0.0;
+}
+
+double LowPass(CudaImage &res, CudaImage &src, float scale, sycl::queue &q_ct, float &totTime)
+{
+  try
+  {
+    float kernel[2 * LOWPASS_R + 1];
+    static float oldScale = -1.0f;
+    if (scale != oldScale)
+    {
+      float kernelSum = 0.0f;
+      float ivar2 = 1.0f / (2.0f * scale * scale);
+      for (int j = -LOWPASS_R; j <= LOWPASS_R; j++)
+      {
+        kernel[j + LOWPASS_R] = (float)expf(-(double)j * j * ivar2);
+        kernelSum += kernel[j + LOWPASS_R];
+      }
+      for (int j = -LOWPASS_R; j <= LOWPASS_R; j++)
+        kernel[j + LOWPASS_R] /= kernelSum;
+
+#ifdef DEVICE_TIMER
+      auto start_memcpy_1 = std::chrono::steady_clock::now();
+#endif
+      q_ct.memcpy(d_LowPassKernel.get_ptr(), kernel,
+                  (2 * LOWPASS_R + 1) * sizeof(float));
+      q_ct.wait();
+#ifdef DEVICE_TIMER
+      auto stop_memcpy_1 = std::chrono::steady_clock::now();
+      totTime += std::chrono::duration<float, std::micro>(stop_memcpy_1 - start_memcpy_1).count();
+#endif
+      oldScale = scale;
+    }
+    int width = res.width;
+    int pitch = res.pitch;
+    int height = res.height;
+    sycl::range<3> blocks(1, iDivUp(height, LOWPASS_H), iDivUp(width, LOWPASS_W)); //(1, 34, 80)
+    sycl::range<3> threads(1, 4, LOWPASS_W + 2 * LOWPASS_R);                       //(1, 4, 32)
+
+#ifdef DEVICE_TIMER
+    auto start_kernel = std::chrono::steady_clock::now();
+#endif
+    q_ct.submit([&](sycl::handler &cgh)
+                {                                    
+                                     auto d_LowPassKernel_ptr_ct1 = d_LowPassKernel.get_ptr();
+
+                                     auto src_data_ct1 = src.d_data;
+                                     auto res_data_ct1 = res.d_data;
+
+                                     sycl::accessor<float, 2, sycl::access_mode::read_write,
+                                                    sycl::access::target::local>
+                                         xrows_acc_ct1(sycl::range<2>(16, 32), cgh);
+                                     cgh.parallel_for(
+                                         sycl::nd_range<3>(blocks * threads, threads), [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                                          [[intel::reqd_sub_group_size(32)]]
+#endif
+                                         { LowPassBlockOld(src_data_ct1, res_data_ct1, width, pitch, height, item_ct1,
+                                                           d_LowPassKernel_ptr_ct1, xrows_acc_ct1); }); })
+        .wait();
+#ifdef DEVICE_TIMER
+    auto stop_kernel = std::chrono::steady_clock::now();
+    // printf("LowPassBlock time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+    totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+    checkMsg("LowPass() execution failed\n");
+    return 0.0;
+  }
+  catch (sycl::exception const &e)
+  {
+    std::cout << e.what() << '\n';
+  }
+}
+
+//==================== Multi-scale functions ===================//
+
+void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel)
+{
+  if (numOctaves > 1)
+  {
+    float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f;
+    PrepareLaplaceKernels(numOctaves - 1, totInitBlur, kernel);
+  }
+  float scale = pow(2.0f, -1.0f / NUM_SCALES);
+  float diffScale = pow(2.0f, 1.0f / NUM_SCALES);
+  for (int i = 0; i < NUM_SCALES + 3; i++)
+  {
+    float kernelSum = 0.0f;
+    float var = scale * scale - initBlur * initBlur;
+    for (int j = 0; j <= LAPLACE_R; j++)
+    {
+      kernel[numOctaves * 12 * 16 + 16 * i + j] = (float)expf(-(double)j * j / 2.0 / var);
+      kernelSum += (j == 0 ? 1 : 2) * kernel[numOctaves * 12 * 16 + 16 * i + j];
+    }
+    for (int j = 0; j <= LAPLACE_R; j++)
+      kernel[numOctaves * 12 * 16 + 16 * i + j] /= kernelSum;
+    scale *= diffScale;
+  }
+}
+
+double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, sycl::queue &q_ct, float &totTime)
+{
+  int width = results[0].width;
+  int pitch = results[0].pitch;
+  int height = results[0].height;
+
+#if 1
+  sycl::range<3> threads(1, 1, LAPLACE_W + 2 * LAPLACE_R);    //(1, 1, 136)
+  sycl::range<3> blocks(1, height, iDivUp(width, LAPLACE_W)); //(1, 1080, 15)
+
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+
+  q_ct.submit([&](sycl::handler &cgh)
+              {
+        float *d_LaplaceKernel_ptr_ct1 = d_LaplaceKernel.get_ptr();
+        sycl::accessor<float, 1, sycl::access_mode::read_write,
+                       sycl::access::target::local>
+            buff_acc_ct1(
+                sycl::range<1>(1088 /*(LAPLACE_W + 2*LAPLACE_R)*LAPLACE_S*/), cgh);                       
+
+        float *results_d_data_ct1 = results[0].d_data;
+        float *baseImage_data_ct1 = baseImage.d_data;
+        cgh.parallel_for(
+            sycl::nd_range<3>(blocks * threads, threads),
+            [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                [[intel::reqd_sub_group_size(32)]]
+#endif
+            {
+              LaplaceMultiMem(baseImage_data_ct1, results_d_data_ct1,
+                              width, pitch, height, octave, item_ct1,
+                              d_LaplaceKernel_ptr_ct1,
+                              buff_acc_ct1.get_pointer());
+            }); })
+      .wait();
+
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("LaplaceMultiMem time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count());
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+#endif
+  checkMsg("LaplaceMulti() execution failed\n");
+  return 0.0;
+}
+
+double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor,
+                       float lowestScale, float subsampling, int octave, sycl::queue &q_ct, float &totTime)
+{
+  if (sources->d_data == NULL)
+  {
+    printf("FindPointsMulti: missing data\n");
+    return 0.0;
+  }
+  int w = sources->width;
+  int p = sources->pitch;
+  int h = sources->height;
+#if 1
+  sycl::range<3> blocks(1, iDivUp(h, MINMAX_H),
+                        iDivUp(w, MINMAX_W) * NUM_SCALES);
+  sycl::range<3> threads(1, 1, MINMAX_W + 2);
+
+#ifdef DEVICE_TIMER
+  auto start_kernel = std::chrono::steady_clock::now();
+#endif
+  auto event_FindPointsMulti = q_ct.submit([&](sycl::handler &cgh)
+                                           {
+                                     d_MaxNumPoints.init();
+                                     d_PointCounter.init();
+
+                                     auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr();
+                                     auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr();
+
+                                     sycl::accessor<unsigned short, 1, sycl::access_mode::read_write,
+                                                    sycl::access::target::local>
+                                         points_acc_ct1(sycl::range<1>(64 /*2*MEMWID*/), cgh);
+
+                                     auto sources_d_data_ct0 = sources->d_data;
+                                     auto siftData_data_ct1 = siftData.d_data;
+
+                                     cgh.parallel_for(
+                                         sycl::nd_range<3>(blocks * threads, threads), [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                                          [[intel::reqd_sub_group_size(32)]]
+#endif
+                                         { FindPointsMultiNew(sources_d_data_ct0, siftData_data_ct1, w, p, h,
+                                                              subsampling, lowestScale, thresh, factor,
+                                                              edgeLimit, octave, item_ct1,
+                                                              *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1,
+                                                              points_acc_ct1.get_pointer()); }); });
+  event_FindPointsMulti.wait();
+#ifdef DEVICE_TIMER
+  auto stop_kernel = std::chrono::steady_clock::now();
+  // printf("FindPointsMultiNew time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count())
+  totTime += std::chrono::duration<float, std::micro>(stop_kernel - start_kernel).count();
+#endif
+#endif
+  checkMsg("FindPointsMulti() execution failed\n");
+  return 0.0;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftH.h b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftH.h
new file mode 100644
index 000000000..746c25a8e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftH.h
@@ -0,0 +1,52 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#ifndef CUDASIFTH_H
+#define CUDASIFTH_H
+
+#include <sycl/sycl.hpp>
+
+#include "infra/infra.hpp"
+#include "cudautils.h"
+#include "cudaImage.h"
+#include "cudaSift.h"
+
+int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh,
+                    float lowestScale, float subsampling, float *memoryTmp, float *memorySub, sycl::queue &q_ct, float &totTime);
+void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh, float lowestScale, float subsampling,
+                       float *memoryTmp, sycl::queue &q_ct, float &totTime);
+double ScaleDown(CudaImage &res, CudaImage &src, float variance, sycl::queue &q_ct, float &totTime);
+double ScaleUp(CudaImage &res, CudaImage &src, sycl::queue &q_ct, float &totTime);
+double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, sycl::queue &q_ct, float &totTime);
+double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling,
+                              int octave, sycl::queue &q_ct, float &totTime);
+double RescalePositions(SiftData &siftData, float scale, sycl::queue &q_ct, float &totTime);
+double LowPass(CudaImage &res, CudaImage &src, float scale, sycl::queue &q_ct, float &totTime);
+void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel);
+double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, sycl::queue &q_ct, float &totTime);
+double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor, float lowestScale,
+                       float subsampling, int octave, sycl::queue &q_ct, float &totTime);
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudautils.h b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudautils.h
new file mode 100644
index 000000000..7e1ca317f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudautils.h
@@ -0,0 +1,108 @@
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#ifndef CUDAUTILS_H
+#define CUDAUTILS_H
+
+#include <sycl/sycl.hpp>
+#include <cstdio>
+#include <iostream>
+#include <chrono>
+
+#ifdef WIN32
+#include <intrin.h>
+#endif
+
+#define safeCall(err) __safeCall(err, __FILE__, __LINE__)
+#define checkMsg(msg) __checkMsg(msg, __FILE__, __LINE__)
+
+inline void __safeCall(int err, const char *file, const int line)
+{
+}
+
+inline void __checkMsg(const char *errorMessage, const char *file, const int line)
+{
+  int err = 0;
+}
+
+class TimerCPU
+{
+  static const int bits = 10;
+
+public:
+  long long beg_clock;
+  float freq;
+  TimerCPU(float freq_) : freq(freq_)
+  { // freq = clock frequency in MHz
+    beg_clock = getTSC(bits);
+  }
+  long long getTSC(int bits)
+  {
+#ifdef WIN32
+    return __rdtsc() / (1LL << bits);
+#else
+    unsigned int low, high;
+    __asm__(".byte 0x0f, 0x31"
+            : "=a"(low), "=d"(high));
+    return ((long long)high << (32 - bits)) | ((long long)low >> bits);
+#endif
+  }
+  float read()
+  {
+    long long end_clock = getTSC(bits);
+    long long Kcycles = end_clock - beg_clock;
+    float time = (float)(1 << bits) * Kcycles / freq / 1e3f;
+    return time;
+  }
+};
+
+template <class T>
+__inline__ T ShiftDown(T var, unsigned int delta, sycl::nd_item<3> item_ct1, int width = 32)
+{
+#if (SYCL_LANGUAGE_VERSION >= 9000)
+  return sycl::shift_group_left(item_ct1.get_sub_group(), var, delta);
+#else
+  return __shfl_down(var, delta, width);
+#endif
+}
+
+template <class T>
+__inline__ T ShiftUp(T var, unsigned int delta, sycl::nd_item<3> item_ct1, int width = 32)
+{
+#if (SYCL_LANGUAGE_VERSION >= 9000)
+  return sycl::shift_group_right(item_ct1.get_sub_group(), var, delta);
+#else
+  return __shfl_up(var, delta, width);
+#endif
+}
+
+template <class T>
+__inline__ T Shuffle(T var, unsigned int lane, sycl::nd_item<3> item_ct1, int width = 32)
+{
+#if (SYCL_LANGUAGE_VERSION >= 9000)
+  return sycl::select_from_group(item_ct1.get_sub_group(), var, lane);
+#else
+  return __shfl(var, lane, width);
+#endif
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/geomFuncs.cpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/geomFuncs.cpp
new file mode 100644
index 000000000..c01e6e7d2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/geomFuncs.cpp
@@ -0,0 +1,72 @@
+#include <iostream>
+#include <cmath>
+#include <opencv2/core/core.hpp>
+#include "cudaSift.h"
+
+int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh)
+{
+#ifdef MANAGEDMEM
+  SiftPoint *mpts = data.m_data;
+#else
+  if (data.h_data==NULL)
+    return 0;
+  SiftPoint *mpts = data.h_data;
+#endif
+  float limit = thresh*thresh;
+  int numPts = data.numPts;
+  cv::Mat M(8, 8, CV_64FC1);
+  cv::Mat A(8, 1, CV_64FC1), X(8, 1, CV_64FC1);
+  double Y[8];
+  for (int i=0;i<8;i++) 
+    A.at<double>(i, 0) = homography[i] / homography[8];
+  for (int loop=0;loop<numLoops;loop++) {
+    M = cv::Scalar(0.0);
+    X = cv::Scalar(0.0);
+    for (int i=0;i<numPts;i++) {
+      SiftPoint &pt = mpts[i];
+      if (pt.score<minScore || pt.ambiguity>maxAmbiguity)
+	continue;
+      float den = A.at<double>(6)*pt.xpos + A.at<double>(7)*pt.ypos + 1.0f;
+      float dx = (A.at<double>(0)*pt.xpos + A.at<double>(1)*pt.ypos + A.at<double>(2)) / den - pt.match_xpos;
+      float dy = (A.at<double>(3)*pt.xpos + A.at<double>(4)*pt.ypos + A.at<double>(5)) / den - pt.match_ypos;
+      float err = dx*dx + dy*dy;
+      float wei = (err<limit ? 1.0f : 0.0f); //limit / (err + limit);
+      Y[0] = pt.xpos;
+      Y[1] = pt.ypos;
+      Y[2] = 1.0;
+      Y[3] = Y[4] = Y[5] = 0.0;
+      Y[6] = - pt.xpos * pt.match_xpos;
+      Y[7] = - pt.ypos * pt.match_xpos;
+      for (int c=0;c<8;c++) 
+        for (int r=0;r<8;r++) 
+          M.at<double>(r,c) += (Y[c] * Y[r] * wei);
+      X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_xpos * wei);
+      Y[0] = Y[1] = Y[2] = 0.0;
+      Y[3] = pt.xpos;
+      Y[4] = pt.ypos; 
+      Y[5] = 1.0;
+      Y[6] = - pt.xpos * pt.match_ypos;
+      Y[7] = - pt.ypos * pt.match_ypos;
+      for (int c=0;c<8;c++) 
+        for (int r=0;r<8;r++) 
+          M.at<double>(r,c) += (Y[c] * Y[r] * wei);
+      X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_ypos * wei);
+    }
+    cv::solve(M, X, A, cv::DECOMP_CHOLESKY);
+  }
+  int numfit = 0;
+  for (int i=0;i<numPts;i++) {
+    SiftPoint &pt = mpts[i];
+    float den = A.at<double>(6)*pt.xpos + A.at<double>(7)*pt.ypos + 1.0;
+    float dx = (A.at<double>(0)*pt.xpos + A.at<double>(1)*pt.ypos + A.at<double>(2)) / den - pt.match_xpos;
+    float dy = (A.at<double>(3)*pt.xpos + A.at<double>(4)*pt.ypos + A.at<double>(5)) / den - pt.match_ypos;
+    float err = dx*dx + dy*dy;
+    if (err<limit) 
+      numfit++;
+    pt.match_error = sqrt(err);
+  }
+  for (int i=0;i<8;i++) 
+    homography[i] = A.at<double>(i);
+  homography[8] = 1.0f;
+  return numfit;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/atomic.hpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/atomic.hpp
new file mode 100644
index 000000000..922c88c3a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/atomic.hpp
@@ -0,0 +1,317 @@
+//==---- atomic.hpp -------------------------------*- C++ -*----------------==//
+// Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+//===----------------------------------------------------------------------===//
+
+#ifndef __INFRA_ATOMIC_HPP__
+#define __INFRA_ATOMIC_HPP__
+
+#include <sycl/sycl.hpp>
+
+namespace infra
+{
+
+  /// Atomically add the value operand to the value at the addr and assign the
+  /// result to the value at addr, Int version.
+  /// \param [in, out] addr The pointer to the data.
+  /// \param operand The value to add to the value at \p addr.
+  /// \param memoryOrder The memory ordering used.
+  /// \returns The value at the \p addr before the call.
+  template <typename T, sycl::access::address_space addressSpace =
+                            sycl::access::address_space::global_space>
+  inline T atomic_fetch_add(
+      T *addr, T operand,
+      sycl::memory_order memoryOrder = sycl::memory_order::relaxed)
+  {
+    sycl::atomic<T, addressSpace> obj(
+        (sycl::multi_ptr<T, addressSpace>(addr)));
+    return sycl::atomic_fetch_add(obj, operand, memoryOrder);
+  }
+
+  /// Atomically add the value operand to the value at the addr and assign the
+  /// result to the value at addr, Float version.
+  /// \param [in, out] addr The pointer to the data.
+  /// \param operand The value to add to the value at \p addr.
+  /// \param memoryOrder The memory ordering used.
+  /// \returns The value at the \p addr before the call.
+  template <sycl::access::address_space addressSpace =
+                sycl::access::address_space::global_space>
+  inline float atomic_fetch_add(
+      float *addr, float operand,
+      sycl::memory_order memoryOrder = sycl::memory_order::relaxed)
+  {
+    static_assert(sizeof(float) == sizeof(int), "Mismatched type size");
+
+    sycl::atomic<int, addressSpace> obj(
+        (sycl::multi_ptr<int, addressSpace>(reinterpret_cast<int *>(addr))));
+
+    int old_value;
+    float old_float_value;
+
+    do
+    {
+      old_value = obj.load(memoryOrder);
+      old_float_value = *reinterpret_cast<const float *>(&old_value);
+      const float new_float_value = old_float_value + operand;
+      const int new_value = *reinterpret_cast<const int *>(&new_float_value);
+      if (obj.compare_exchange_strong(old_value, new_value, memoryOrder))
+        break;
+    } while (true);
+
+    return old_float_value;
+  }
+
+  /// Atomically add the value operand to the value at the addr and assign the
+  /// result to the value at addr, Double version.
+  /// \param [in, out] addr The pointer to the data.
+  /// \param operand The value to add to the value at \p addr
+  /// \param memoryOrder The memory ordering used.
+  /// \returns The value at the \p addr before the call.
+  template <sycl::access::address_space addressSpace =
+                sycl::access::address_space::global_space>
+  inline double atomic_fetch_add(
+      double *addr, double operand,
+      sycl::memory_order memoryOrder = sycl::memory_order::relaxed)
+  {
+    static_assert(sizeof(double) == sizeof(unsigned long long int),
+                  "Mismatched type size");
+
+    sycl::atomic<unsigned long long int, addressSpace> obj(
+        (sycl::multi_ptr<unsigned long long int, addressSpace>(
+            reinterpret_cast<unsigned long long int *>(addr))));
+
+    unsigned long long int old_value;
+    double old_double_value;
+
+    do
+    {
+      old_value = obj.load(memoryOrder);
+      old_double_value = *reinterpret_cast<const double *>(&old_value);
+      const double new_double_value = old_double_value + operand;
+      const unsigned long long int new_value =
+          *reinterpret_cast<const unsigned long long int *>(&new_double_value);
+
+      if (obj.compare_exchange_strong(old_value, new_value, memoryOrder))
+        break;
+    } while (true);
+
+    return old_double_value;
+  }
+
+  /// Atomically subtract the value operand from the value at the addr and assign
+  /// the result to the value at addr.
+  /// \param [in, out] addr The pointer to the data.
+  /// \param operand The value to substract from the value at \p addr
+  /// \param memoryOrder The memory ordering used.
+  /// \returns The value at the \p addr before the call.
+  template <typename T, sycl::access::address_space addressSpace =
+                            sycl::access::address_space::global_space>
+  inline T atomic_fetch_sub(
+      T *addr, T operand,
+      sycl::memory_order memoryOrder = sycl::memory_order::relaxed)
+  {
+    sycl::atomic<T, addressSpace> obj(
+        (sycl::multi_ptr<T, addressSpace>(addr)));
+    return sycl::atomic_fetch_sub(obj, operand, memoryOrder);
+  }
+
+  /// Atomically perform a bitwise AND between the value operand and the value at the addr
+  /// and assign the result to the value at addr.
+  /// \param [in, out] addr The pointer to the data.
+  /// \param operand The value to use in bitwise AND operation with the value at the \p addr.
+  /// \param memoryOrder The memory ordering used.
+  /// \returns The value at the \p addr before the call.
+  template <typename T, sycl::access::address_space addressSpace =
+                            sycl::access::address_space::global_space>
+  inline T atomic_fetch_and(
+      T *addr, T operand,
+      sycl::memory_order memoryOrder = sycl::memory_order::relaxed)
+  {
+    sycl::atomic<T, addressSpace> obj(
+        (sycl::multi_ptr<T, addressSpace>(addr)));
+    return sycl::atomic_fetch_and(obj, operand, memoryOrder);
+  }
+
+  /// Atomically or the value at the addr with the value operand, and assign
+  /// the result to the value at addr.
+  /// \param [in, out] addr The pointer to the data.
+  /// \param operand The value to use in bitwise OR operation with the value at the \p addr.
+  /// \param memoryOrder The memory ordering used.
+  /// \returns The value at the \p addr before the call.
+  template <typename T, sycl::access::address_space addressSpace =
+                            sycl::access::address_space::global_space>
+  inline T atomic_fetch_or(
+      T *addr, T operand,
+      sycl::memory_order memoryOrder = sycl::memory_order::relaxed)
+  {
+    sycl::atomic<T, addressSpace> obj(
+        (sycl::multi_ptr<T, addressSpace>(addr)));
+    return sycl::atomic_fetch_or(obj, operand, memoryOrder);
+  }
+
+  /// Atomically xor the value at the addr with the value operand, and assign
+  /// the result to the value at addr.
+  /// \param [in, out] addr The pointer to the data.
+  /// \param operand The value to use in bitwise XOR operation with the value at the \p addr.
+  /// \param memoryOrder The memory ordering used.
+  /// \returns The value at the \p addr before the call.
+  template <typename T, sycl::access::address_space addressSpace =
+                            sycl::access::address_space::global_space>
+  inline T atomic_fetch_xor(
+      T *addr, T operand,
+      sycl::memory_order memoryOrder = sycl::memory_order::relaxed)
+  {
+    sycl::atomic<T, addressSpace> obj(
+        (sycl::multi_ptr<T, addressSpace>(addr)));
+    return sycl::atomic_fetch_xor(obj, operand, memoryOrder);
+  }
+
+  /// Atomically calculate the minimum of the value at addr and the value operand
+  /// and assign the result to the value at addr.
+  /// \param [in, out] addr The pointer to the data.
+  /// \param operand.
+  /// \param memoryOrder The memory ordering used.
+  /// \returns The value at the \p addr before the call.
+  template <typename T, sycl::access::address_space addressSpace =
+                            sycl::access::address_space::global_space>
+  inline T atomic_fetch_min(
+      T *addr, T operand,
+      sycl::memory_order memoryOrder = sycl::memory_order::relaxed)
+  {
+    sycl::atomic<T, addressSpace> obj(
+        (sycl::multi_ptr<T, addressSpace>(addr)));
+    return sycl::atomic_fetch_min(obj, operand, memoryOrder);
+  }
+
+  /// Atomically calculate the maximum of the value at addr and the value operand
+  /// and assign the result to the value at addr.
+  /// \param [in, out] addr The pointer to the data.
+  /// \param operand.
+  /// \param memoryOrder The memory ordering used.
+  /// \returns The value at the \p addr before the call.
+  template <typename T, sycl::access::address_space addressSpace =
+                            sycl::access::address_space::global_space>
+  inline T atomic_fetch_max(
+      T *addr, T operand,
+      sycl::memory_order memoryOrder = sycl::memory_order::relaxed)
+  {
+    sycl::atomic<T, addressSpace> obj(
+        (sycl::multi_ptr<T, addressSpace>(addr)));
+    return sycl::atomic_fetch_max(obj, operand, memoryOrder);
+  }
+
+  /// Atomically increment the value stored in \p addr if old value stored in \p
+  /// addr is less than \p operand, else set 0 to the value stored in \p addr.
+  /// \param [in, out] addr The pointer to the data.
+  /// \param operand The threshold value.
+  /// \param memoryOrder The memory ordering used.
+  /// \returns The old value stored in \p addr.
+  template <sycl::access::address_space addressSpace =
+                sycl::access::address_space::global_space>
+  inline unsigned int atomic_fetch_compare_inc(
+      unsigned int *addr, unsigned int operand,
+      sycl::memory_order memoryOrder = sycl::memory_order::relaxed)
+  {
+    sycl::atomic<unsigned int, addressSpace> obj(
+        (sycl::multi_ptr<unsigned int, addressSpace>(addr)));
+    unsigned int old;
+    while (true)
+    {
+      old = obj.load();
+      if (old >= operand)
+      {
+        if (obj.compare_exchange_strong(old, 0, memoryOrder, memoryOrder))
+          break;
+      }
+      else
+      {
+        old = obj.fetch_add(1);
+        break;
+      }
+      // else if (obj.compare_exchange_strong(old, old + 1, memoryOrder,
+      //                                      memoryOrder))
+      // break;
+    }
+    return old;
+  }
+
+  /// Atomically exchange the value at the address addr with the value operand.
+  /// \param [in, out] addr The pointer to the data.
+  /// \param operand The value to be exchanged with the value pointed by \p addr.
+  /// \param memoryOrder The memory ordering used.
+  /// \returns The value at the \p addr before the call.
+  template <typename T, sycl::access::address_space addressSpace =
+                            sycl::access::address_space::global_space>
+  inline T atomic_exchange(
+      T *addr, T operand,
+      sycl::memory_order memoryOrder = sycl::memory_order::relaxed)
+  {
+    sycl::atomic<T, addressSpace> obj(
+        (sycl::multi_ptr<T, addressSpace>(addr)));
+    return sycl::atomic_exchange(obj, operand, memoryOrder);
+  }
+
+  /// Atomically compare the value at \p addr to the value expected and exchange
+  /// with the value desired if the value at \p addr is equal to the value expected.
+  /// Returns the value at the \p addr before the call.
+  /// \param [in, out] addr Multi_ptr.
+  /// \param expected The value to compare against the value at \p addr.
+  /// \param desired The value to assign to \p addr if the value at \p addr is expected.
+  /// \param success The memory ordering used when comparison succeeds.
+  /// \param fail The memory ordering used when comparison fails.
+  /// \returns The value at the \p addr before the call.
+  template <typename T, sycl::access::address_space addressSpace =
+                            sycl::access::address_space::global_space>
+  T atomic_compare_exchange_strong(
+      sycl::multi_ptr<T, sycl::access::address_space::global_space> addr,
+      T expected, T desired,
+      sycl::memory_order success = sycl::memory_order::relaxed,
+      sycl::memory_order fail = sycl::memory_order::relaxed)
+  {
+    sycl::atomic<T, addressSpace> obj(addr);
+    obj.compare_exchange_strong(expected, desired, success, fail);
+    return expected;
+  }
+
+  /// Atomically compare the value at \p addr to the value expected and exchange
+  /// with the value desired if the value at \p addr is equal to the value expected.
+  /// Returns the value at the \p addr before the call.
+  /// \param [in] addr The pointer to the data.
+  /// \param expected The value to compare against the value at \p addr.
+  /// \param desired The value to assign to \p addr if the value at \p addr is expected.
+  /// \param success The memory ordering used when comparison succeeds.
+  /// \param fail The memory ordering used when comparison fails.
+  /// \returns The value at the \p addr before the call.
+  template <typename T, sycl::access::address_space addressSpace =
+                            sycl::access::address_space::global_space>
+  T atomic_compare_exchange_strong(
+      T *addr, T expected, T desired,
+      sycl::memory_order success = sycl::memory_order::relaxed,
+      sycl::memory_order fail = sycl::memory_order::relaxed)
+  {
+    return atomic_compare_exchange_strong(
+        sycl::multi_ptr<T, addressSpace>(addr), expected, desired, success,
+        fail);
+  }
+
+} // namespace infra
+#endif // __INFRA_ATOMIC_HPP__
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/device.hpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/device.hpp
new file mode 100644
index 000000000..4a859e20f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/device.hpp
@@ -0,0 +1,534 @@
+//==---- device.hpp -------------------------------*- C++ -*----------------==//
+// Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+//===----------------------------------------------------------------------===//
+
+#ifndef __INFRA_DEVICE_HPP__
+#define __INFRA_DEVICE_HPP__
+
+#include <sycl/sycl.hpp>
+#include <algorithm>
+#include <cstring>
+#include <iostream>
+#include <mutex>
+#include <set>
+#include <sstream>
+#include <map>
+#include <vector>
+#include <thread>
+#if defined(__linux__)
+#include <unistd.h>
+#include <sys/syscall.h>
+#endif
+#if defined(_WIN64)
+#define NOMINMAX
+#include <windows.h>
+#endif
+
+namespace infra
+{
+
+  /// DPC++ default exception handler
+  auto exception_handler = [](sycl::exception_list exceptions)
+  {
+    for (std::exception_ptr const &e : exceptions)
+    {
+      try
+      {
+        std::rethrow_exception(e);
+      }
+      catch (sycl::exception const &e)
+      {
+        std::cerr << "Caught asynchronous SYCL exception:" << std::endl
+                  << e.what() << std::endl
+                  << "Exception caught at file:" << __FILE__
+                  << ", line:" << __LINE__ << std::endl;
+      }
+    }
+  };
+
+  class device_info
+  {
+  public:
+    // get interface
+    char *get_name() { return _name; }
+    sycl::id<3> get_max_work_item_sizes() { return _max_work_item_sizes; }
+    bool get_host_unified_memory() { return _host_unified_memory; }
+    int get_major_version() { return _major; }
+    int get_minor_version() { return _minor; }
+    int get_integrated() { return _integrated; }
+    int get_max_clock_frequency() { return _frequency; }
+    int get_max_compute_units() { return _max_compute_units; }
+    int get_max_work_group_size() { return _max_work_group_size; }
+    int get_max_sub_group_size() { return _max_sub_group_size; }
+    int get_max_work_items_per_compute_unit()
+    {
+      return _max_work_items_per_compute_unit;
+    }
+    size_t *get_max_nd_range_size() { return _max_nd_range_size; }
+    size_t get_global_mem_size() { return _global_mem_size; }
+    size_t get_local_mem_size() { return _local_mem_size; }
+    // set interface
+    void set_name(const char *name) { std::strncpy(_name, name, 256); }
+    void set_max_work_item_sizes(const sycl::id<3> max_work_item_sizes)
+    {
+      _max_work_item_sizes = max_work_item_sizes;
+    }
+    void set_host_unified_memory(bool host_unified_memory)
+    {
+      _host_unified_memory = host_unified_memory;
+    }
+    void set_major_version(int major) { _major = major; }
+    void set_minor_version(int minor) { _minor = minor; }
+    void set_integrated(int integrated) { _integrated = integrated; }
+    void set_max_clock_frequency(int frequency) { _frequency = frequency; }
+    void set_max_compute_units(int max_compute_units)
+    {
+      _max_compute_units = max_compute_units;
+    }
+    void set_global_mem_size(size_t global_mem_size)
+    {
+      _global_mem_size = global_mem_size;
+    }
+    void set_local_mem_size(size_t local_mem_size)
+    {
+      _local_mem_size = local_mem_size;
+    }
+    void set_max_work_group_size(int max_work_group_size)
+    {
+      _max_work_group_size = max_work_group_size;
+    }
+    void set_max_sub_group_size(int max_sub_group_size)
+    {
+      _max_sub_group_size = max_sub_group_size;
+    }
+    void
+    set_max_work_items_per_compute_unit(int max_work_items_per_compute_unit)
+    {
+      _max_work_items_per_compute_unit = max_work_items_per_compute_unit;
+    }
+    void set_max_nd_range_size(int max_nd_range_size[])
+    {
+      for (int i = 0; i < 3; i++)
+        _max_nd_range_size[i] = max_nd_range_size[i];
+    }
+
+  private:
+    char _name[256];
+    sycl::id<3> _max_work_item_sizes;
+    bool _host_unified_memory = false;
+    int _major;
+    int _minor;
+    int _integrated = 0;
+    int _frequency;
+    int _max_compute_units;
+    int _max_work_group_size;
+    int _max_sub_group_size;
+    int _max_work_items_per_compute_unit;
+    size_t _global_mem_size;
+    size_t _local_mem_size;
+    size_t _max_nd_range_size[3];
+  };
+
+  /// infra device extension
+  class device_ext : public sycl::device
+  {
+  public:
+    device_ext() : sycl::device(), _ctx(*this) {}
+    ~device_ext()
+    {
+      std::lock_guard<std::mutex> lock(m_mutex);
+      for (auto &task : _tasks)
+      {
+        if (task.joinable())
+          task.join();
+      }
+      _tasks.clear();
+      _queues.clear();
+    }
+    device_ext(const sycl::device &base)
+        : sycl::device(base), _ctx(*this)
+    {
+#ifdef INFRA_USM_LEVEL_NONE
+      _queues.push_back(
+          std::make_shared<sycl::queue>(_ctx, base, exception_handler));
+#else
+      _queues.push_back(std::make_shared<sycl::queue>(
+          _ctx, base, exception_handler, sycl::property::queue::in_order()));
+#endif
+      _saved_queue = _default_queue = _queues[0].get();
+    }
+
+    int is_native_atomic_supported() { return 0; }
+    int get_major_version()
+    {
+      int major, minor;
+      get_version(major, minor);
+      return major;
+    }
+
+    int get_minor_version()
+    {
+      int major, minor;
+      get_version(major, minor);
+      return minor;
+    }
+
+    int get_max_compute_units()
+    {
+      return get_device_info().get_max_compute_units();
+    }
+
+    int get_max_clock_frequency()
+    {
+      return get_device_info().get_max_clock_frequency();
+    }
+
+    int get_integrated() { return get_device_info().get_integrated(); }
+
+    void get_device_info(device_info &out)
+    {
+      device_info prop;
+      prop.set_name(get_info<sycl::info::device::name>().c_str());
+
+      int major, minor;
+      get_version(major, minor);
+      prop.set_major_version(major);
+      prop.set_minor_version(minor);
+
+      prop.set_max_work_item_sizes(
+          get_info<sycl::info::device::max_work_item_sizes<3>>());
+      prop.set_host_unified_memory(
+          get_info<sycl::info::device::host_unified_memory>());
+
+      // max_clock_frequency parameter is not supported on host device
+      if (is_host())
+      {
+        // This code may need to be updated. Currently max_clock_frequency for
+        // host device is initialized with 1, in assumption that if other devices
+        // exist and they are being selected based on this parameter, other
+        // devices would have higher priority.
+        prop.set_max_clock_frequency(1);
+      }
+      else
+      {
+        prop.set_max_clock_frequency(
+            get_info<sycl::info::device::max_clock_frequency>());
+      }
+
+      prop.set_max_compute_units(
+          get_info<sycl::info::device::max_compute_units>());
+      prop.set_max_work_group_size(
+          get_info<sycl::info::device::max_work_group_size>());
+      prop.set_global_mem_size(
+          get_info<sycl::info::device::global_mem_size>());
+      prop.set_local_mem_size(get_info<sycl::info::device::local_mem_size>());
+
+      size_t max_sub_group_size = 1;
+      std::vector<size_t> sub_group_sizes =
+          get_info<sycl::info::device::sub_group_sizes>();
+
+      for (const auto &sub_group_size : sub_group_sizes)
+      {
+        if (max_sub_group_size < sub_group_size)
+          max_sub_group_size = sub_group_size;
+      }
+
+      prop.set_max_sub_group_size(max_sub_group_size);
+
+      prop.set_max_work_items_per_compute_unit(
+          get_info<sycl::info::device::max_work_group_size>());
+      int max_nd_range_size[] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
+      prop.set_max_nd_range_size(max_nd_range_size);
+
+      out = prop;
+    }
+
+    device_info get_device_info()
+    {
+      device_info prop;
+      get_device_info(prop);
+      return prop;
+    }
+
+    void reset()
+    {
+      std::lock_guard<std::mutex> lock(m_mutex);
+      // The queues are shared_ptrs and the ref counts of the shared_ptrs increase
+      // only in wait_and_throw(). If there is no other thread calling
+      // wait_and_throw(), the queues will be destructed. The destructor waits for
+      // all commands executing on the queue to complete. It isn't possible to
+      // destroy a queue immediately. This is a synchronization point in SYCL.
+      _queues.clear();
+      // create new default queue.
+#ifdef INFRA_USM_LEVEL_NONE
+      _queues.push_back(
+          std::make_shared<sycl::queue>(_ctx, *this, exception_handler));
+#else
+      _queues.push_back(std::make_shared<sycl::queue>(
+          _ctx, *this, exception_handler, sycl::property::queue::in_order()));
+#endif
+      _saved_queue = _default_queue = _queues.front().get();
+    }
+
+    sycl::queue &default_queue() { return *_default_queue; }
+
+    void queues_wait_and_throw()
+    {
+      std::unique_lock<std::mutex> lock(m_mutex);
+      std::vector<std::shared_ptr<sycl::queue>> current_queues(
+          _queues);
+      lock.unlock();
+      for (const auto &q : current_queues)
+      {
+        q->wait_and_throw();
+      }
+      // Guard the destruct of current_queues to make sure the ref count is safe.
+      lock.lock();
+    }
+    sycl::queue *create_queue(bool enable_exception_handler = false)
+    {
+      std::lock_guard<std::mutex> lock(m_mutex);
+      sycl::async_handler eh = {};
+      if (enable_exception_handler)
+      {
+        eh = exception_handler;
+      }
+#ifdef INFRA_USM_LEVEL_NONE
+      _queues.push_back(std::make_shared<sycl::queue>(
+          _ctx, *this, eh));
+#else
+      _queues.push_back(std::make_shared<sycl::queue>(
+          _ctx, *this, eh,
+          sycl::property::queue::in_order()));
+#endif
+      return _queues.back().get();
+    }
+    void destroy_queue(sycl::queue *&queue)
+    {
+      std::lock_guard<std::mutex> lock(m_mutex);
+      _queues.erase(std::remove_if(_queues.begin(), _queues.end(),
+                                   [=](const std::shared_ptr<sycl::queue> &q) -> bool
+                                   {
+                                     return q.get() == queue;
+                                   }),
+                    _queues.end());
+      queue = nullptr;
+    }
+    void set_saved_queue(sycl::queue *q)
+    {
+      std::lock_guard<std::mutex> lock(m_mutex);
+      _saved_queue = q;
+    }
+    sycl::queue *get_saved_queue()
+    {
+      std::lock_guard<std::mutex> lock(m_mutex);
+      return _saved_queue;
+    }
+    sycl::context get_context() { return _ctx; }
+
+  private:
+    void get_version(int &major, int &minor)
+    {
+      // Version string has the following format:
+      // a. OpenCL<space><major.minor><space><vendor-specific-information>
+      // b. <major.minor>
+      std::string ver;
+      ver = get_info<sycl::info::device::version>();
+      std::string::size_type i = 0;
+      while (i < ver.size())
+      {
+        if (isdigit(ver[i]))
+          break;
+        i++;
+      }
+      major = std::stoi(&(ver[i]));
+      while (i < ver.size())
+      {
+        if (ver[i] == '.')
+          break;
+        i++;
+      }
+      i++;
+      minor = std::stoi(&(ver[i]));
+    }
+    void add_task(std::thread &&task)
+    {
+      std::lock_guard<std::mutex> lock(m_mutex);
+      _tasks.push_back(std::move(task));
+    }
+    friend void async_infra_free(std::vector<void *>,
+                                 std::vector<sycl::event>,
+                                 sycl::queue &);
+    sycl::queue *_default_queue;
+    sycl::queue *_saved_queue;
+    sycl::context _ctx;
+    std::vector<std::shared_ptr<sycl::queue>> _queues;
+    mutable std::mutex m_mutex;
+    std::vector<std::thread> _tasks;
+  };
+
+  static inline unsigned int get_tid()
+  {
+#if defined(__linux__)
+    return syscall(SYS_gettid);
+#elif defined(_WIN64)
+    return GetCurrentThreadId();
+#else
+#error "Only support Windows and Linux."
+#endif
+  }
+
+  /// device manager
+  class dev_mgr
+  {
+  public:
+    device_ext &current_device()
+    {
+      unsigned int dev_id = current_device_id();
+      check_id(dev_id);
+      return *_devs[dev_id];
+    }
+    device_ext &cpu_device() const
+    {
+      std::lock_guard<std::mutex> lock(m_mutex);
+      if (_cpu_device == -1)
+      {
+        throw std::runtime_error("no valid cpu device");
+      }
+      else
+      {
+        return *_devs[_cpu_device];
+      }
+    }
+    device_ext &get_device(unsigned int id) const
+    {
+      std::lock_guard<std::mutex> lock(m_mutex);
+      check_id(id);
+      return *_devs[id];
+    }
+    unsigned int current_device_id() const
+    {
+      std::lock_guard<std::mutex> lock(m_mutex);
+      auto it = _thread2dev_map.find(get_tid());
+      if (it != _thread2dev_map.end())
+        return it->second;
+      return DEFAULT_DEVICE_ID;
+    }
+    void select_device(unsigned int id)
+    {
+      std::lock_guard<std::mutex> lock(m_mutex);
+      check_id(id);
+      _thread2dev_map[get_tid()] = id;
+    }
+    unsigned int device_count() { return _devs.size(); }
+
+    /// Returns the instance of device manager singleton.
+    static dev_mgr &instance()
+    {
+      static dev_mgr d_m;
+      return d_m;
+    }
+    dev_mgr(const dev_mgr &) = delete;
+    dev_mgr &operator=(const dev_mgr &) = delete;
+    dev_mgr(dev_mgr &&) = delete;
+    dev_mgr &operator=(dev_mgr &&) = delete;
+
+  private:
+    mutable std::mutex m_mutex;
+    dev_mgr()
+    {
+      sycl::device default_device =
+          sycl::device(sycl::default_selector{});
+      _devs.push_back(std::make_shared<device_ext>(default_device));
+
+      std::vector<sycl::device> sycl_all_devs =
+          sycl::device::get_devices(sycl::info::device_type::all);
+      // sycl::device::get_devices(sycl::info::device_type::gpu);
+      // Collect other devices except for the default device.
+      if (default_device.is_cpu())
+        _cpu_device = 0;
+      for (auto &dev : sycl_all_devs)
+      {
+        if (dev == default_device)
+        {
+          continue;
+        }
+        _devs.push_back(std::make_shared<device_ext>(dev));
+        if (_cpu_device == -1 && dev.is_cpu())
+        {
+          _cpu_device = _devs.size() - 1;
+        }
+      }
+    }
+    void check_id(unsigned int id) const
+    {
+      if (id >= _devs.size())
+      {
+        throw std::runtime_error("invalid device id");
+      }
+    }
+    std::vector<std::shared_ptr<device_ext>> _devs;
+    /// DEFAULT_DEVICE_ID is used, if current_device_id() can not find current
+    /// thread id in _thread2dev_map, which means default device should be used
+    /// for the current thread.
+    const unsigned int DEFAULT_DEVICE_ID = 0;
+    /// thread-id to device-id map.
+    std::map<unsigned int, unsigned int> _thread2dev_map;
+    int _cpu_device = -1;
+  };
+
+  /// Util function to get the defualt queue of current device in
+  /// infra device manager.
+  static inline sycl::queue &get_default_queue()
+  {
+    return dev_mgr::instance().current_device().default_queue();
+  }
+
+  /// Util function to get the current device.
+  static inline device_ext &get_current_device()
+  {
+    return dev_mgr::instance().current_device();
+  }
+
+  /// Util function to get a device by id.
+  static inline device_ext &get_device(unsigned int id)
+  {
+    return dev_mgr::instance().get_device(id);
+  }
+
+  /// Util function to get the context of the default queue of current
+  /// device in infra device manager.
+  static inline sycl::context get_default_context()
+  {
+    return infra::get_current_device().get_context();
+  }
+
+  /// Util function to get a cpu device.
+  static inline device_ext &cpu_device()
+  {
+    return dev_mgr::instance().cpu_device();
+  }
+
+} // namespace infra
+
+#endif // __INFRA_DEVICE_HPP__
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/infra.hpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/infra.hpp
new file mode 100644
index 000000000..498512888
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/infra.hpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+//===----------------------------------------------------------------------===//
+
+#ifndef __INFRA_HPP__
+#define __INFRA_HPP__
+
+#include <sycl/sycl.hpp>
+#include <iostream>
+#include <limits.h>
+
+#include "atomic.hpp"
+#include "device.hpp"
+#include "memory.hpp"
+
+#endif // __INFRA_HPP__
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/memory.hpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/memory.hpp
new file mode 100644
index 000000000..444d193ed
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/memory.hpp
@@ -0,0 +1,1292 @@
+//==---- memory.hpp -------------------------------*- C++ -*----------------==//
+// Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+//===----------------------------------------------------------------------===//
+
+#ifndef __INFRA_MEMORY_HPP__
+#define __INFRA_MEMORY_HPP__
+
+#include "device.hpp"
+#include <sycl/sycl.hpp>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <mutex>
+#include <unordered_map>
+#include <map>
+#include <utility>
+#include <thread>
+#include <type_traits>
+
+#if defined(__linux__)
+#include <sys/mman.h>
+#elif defined(_WIN64)
+#define NOMINMAX
+#include <windows.h>
+#else
+#error "Only support Windows and Linux."
+#endif
+
+namespace infra
+{
+
+  enum memcpy_direction
+  {
+    host_to_host,
+    host_to_device,
+    device_to_host,
+    device_to_device,
+    automatic
+  };
+  enum memory_region
+  {
+    global = 0, // device global memory
+    constant,   // device constant memory
+    local,      // device local memory
+    shared,     // memory which can be accessed by host and device
+  };
+
+  typedef uint8_t byte_t;
+
+  /// Buffer type to be used in Memory Management runtime.
+  typedef sycl::buffer<byte_t> buffer_t;
+
+  /// Pitched 2D/3D memory data.
+  class pitched_data
+  {
+  public:
+    pitched_data() : pitched_data(nullptr, 0, 0, 0) {}
+    pitched_data(void *data, size_t pitch, size_t x, size_t y)
+        : _data(data), _pitch(pitch), _x(x), _y(y) {}
+
+    void *get_data_ptr() { return _data; }
+    void set_data_ptr(void *data) { _data = data; }
+
+    size_t get_pitch() { return _pitch; }
+    void set_pitch(size_t pitch) { _pitch = pitch; }
+
+    size_t get_x() { return _x; }
+    void set_x(size_t x) { _x = x; };
+
+    size_t get_y() { return _y; }
+    void set_y(size_t y) { _y = y; }
+
+  private:
+    void *_data;
+    size_t _pitch, _x, _y;
+  };
+
+  namespace detail
+  {
+    class mem_mgr
+    {
+      mem_mgr()
+      {
+        // Reserved address space, no real memory allocation happens here.
+#if defined(__linux__)
+        mapped_address_space =
+            (byte_t *)mmap(nullptr, mapped_region_size, PROT_NONE,
+                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+#elif defined(_WIN64)
+        mapped_address_space = (byte_t *)VirtualAlloc(
+            NULL,               // NULL specified as the base address parameter
+            mapped_region_size, // Size of allocation
+            MEM_RESERVE,        // Allocate reserved pages
+            PAGE_NOACCESS);     // Protection = no access
+#else
+#error "Only support Windows and Linux."
+#endif
+        next_free = mapped_address_space;
+      };
+
+    public:
+      using buffer_id_t = int;
+
+      struct allocation
+      {
+        buffer_t buffer;
+        byte_t *alloc_ptr;
+        size_t size;
+      };
+
+      ~mem_mgr()
+      {
+#if defined(__linux__)
+        munmap(mapped_address_space, mapped_region_size);
+#elif defined(_WIN64)
+        VirtualFree(mapped_address_space, 0, MEM_RELEASE);
+#else
+#error "Only support Windows and Linux."
+#endif
+      };
+
+      mem_mgr(const mem_mgr &) = delete;
+      mem_mgr &operator=(const mem_mgr &) = delete;
+      mem_mgr(mem_mgr &&) = delete;
+      mem_mgr &operator=(mem_mgr &&) = delete;
+
+      /// Allocate
+      void *mem_alloc(size_t size)
+      {
+        if (!size)
+          return nullptr;
+        std::lock_guard<std::mutex> lock(m_mutex);
+        if (next_free + size > mapped_address_space + mapped_region_size)
+        {
+          throw std::runtime_error("sift_malloc: out of memory for virtual memory pool");
+        }
+        // Allocation
+        sycl::range<1> r(size);
+        buffer_t buf(r);
+        allocation A{buf, next_free, size};
+        // Map allocation to device pointer
+        void *result = next_free;
+        m_map.emplace(next_free + size, A);
+        // Update pointer to the next free space.
+        next_free += (size + extra_padding + alignment - 1) & ~(alignment - 1);
+
+        return result;
+      }
+
+      /// Deallocate
+      void mem_free(const void *ptr)
+      {
+        if (!ptr)
+          return;
+        std::lock_guard<std::mutex> lock(m_mutex);
+        auto it = get_map_iterator(ptr);
+        m_map.erase(it);
+      }
+
+      /// map: device pointer -> allocation(buffer, alloc_ptr, size)
+      allocation translate_ptr(const void *ptr)
+      {
+        std::lock_guard<std::mutex> lock(m_mutex);
+        auto it = get_map_iterator(ptr);
+        return it->second;
+      }
+
+      /// Check if the pointer represents device pointer or not.
+      bool is_device_ptr(const void *ptr) const
+      {
+        std::lock_guard<std::mutex> lock(m_mutex);
+        return (mapped_address_space <= ptr) &&
+               (ptr < mapped_address_space + mapped_region_size);
+      }
+
+      /// Returns the instance of memory manager singleton.
+      static mem_mgr &instance()
+      {
+        static mem_mgr m;
+        return m;
+      }
+
+    private:
+      std::map<byte_t *, allocation> m_map;
+      mutable std::mutex m_mutex;
+      byte_t *mapped_address_space;
+      byte_t *next_free;
+      const size_t mapped_region_size = 128ull * 1024 * 1024 * 1024;
+      const size_t alignment = 256;
+      /// This padding may be defined to some positive value to debug
+      /// out of bound accesses.
+      const size_t extra_padding = 0;
+
+      std::map<byte_t *, allocation>::iterator get_map_iterator(const void *ptr)
+      {
+        auto it = m_map.upper_bound((byte_t *)ptr);
+        if (it == m_map.end())
+        {
+          // Not a virtual pointer.
+          throw std::runtime_error("can not get buffer from non-virtual pointer");
+        }
+        const allocation &alloc = it->second;
+        if (ptr < alloc.alloc_ptr)
+        {
+          // Out of bound.
+          // This may happen if there's a gap between allocations due to alignment
+          // or extra padding and pointer points to this gap.
+          throw std::runtime_error("invalid virtual pointer");
+        }
+        return it;
+      }
+    };
+
+    template <class T, memory_region Memory, size_t Dimension>
+    class accessor;
+    template <memory_region Memory, class T = byte_t>
+    class memory_traits
+    {
+    public:
+      static constexpr sycl::access::address_space asp =
+          (Memory == local)
+              ? sycl::access::address_space::local_space
+              : ((Memory == constant)
+                     ? sycl::access::address_space::constant_space
+                     : sycl::access::address_space::global_space);
+      static constexpr sycl::access::target target =
+          (Memory == local)
+              ? sycl::access::target::local
+              : ((Memory == constant) ? sycl::access::target::constant_buffer
+                                      : sycl::access::target::global_buffer);
+      static constexpr sycl::access_mode mode =
+          (Memory == constant) ? sycl::access_mode::read
+                               : sycl::access_mode::read_write;
+      static constexpr size_t type_size = sizeof(T);
+      using element_t =
+          typename std::conditional<Memory == constant, const T, T>::type;
+      using value_t = typename std::remove_cv<T>::type;
+      template <size_t Dimension = 1>
+      using accessor_t = sycl::accessor<T, Dimension, mode, target>;
+      using pointer_t = T *;
+    };
+
+    static inline void *sift_malloc(size_t size, sycl::queue &q)
+    {
+#ifdef INFRA_USM_LEVEL_NONE
+      return mem_mgr::instance().mem_alloc(size * sizeof(byte_t));
+#else
+      return sycl::malloc_device(size, q.get_device(), q.get_context());
+#endif // INFRA_USM_LEVEL_NONE
+    }
+
+#define PITCH_DEFAULT_ALIGN(x) (((x) + 31) & ~(0x1F))
+    static inline void *sift_malloc(size_t &pitch, size_t x, size_t y, size_t z,
+                                    sycl::queue &q)
+    {
+      pitch = PITCH_DEFAULT_ALIGN(x);
+      return sift_malloc(pitch * y * z, q);
+    }
+
+    /// Set \p value to the first \p size bytes starting from \p dev_ptr in \p q.
+    ///
+    /// \param q The queue in which the operation is done.
+    /// \param dev_ptr Pointer to the device memory address.
+    /// \param value Value to be set.
+    /// \param size Number of bytes to be set to the value.
+    /// \returns An event representing the memset operation.
+    static inline sycl::event sift_memset(sycl::queue &q, void *dev_ptr,
+                                          int value, size_t size)
+    {
+#ifdef INFRA_USM_LEVEL_NONE
+      auto &mm = mem_mgr::instance();
+      assert(mm.is_device_ptr(dev_ptr));
+      auto alloc = mm.translate_ptr(dev_ptr);
+      size_t offset = (byte_t *)dev_ptr - alloc.alloc_ptr;
+
+      return q.submit([&](sycl::handler &cgh)
+                      {
+    auto r = sycl::range<1>(size);
+    auto o = sycl::id<1>(offset);
+    sycl::accessor<byte_t, 1, sycl::access_mode::write,
+                       sycl::access::target::global_buffer>
+        acc(alloc.buffer, cgh, r, o);
+    cgh.fill(acc, (byte_t)value); });
+#else
+      return q.memset(dev_ptr, value, size);
+#endif // INFRA_USM_LEVEL_NONE
+    }
+
+    /// Set \p value to the 3D memory region pointed by \p data in \p q. \p size
+    /// specifies the 3D memory size to set.
+    ///
+    /// \param q The queue in which the operation is done.
+    /// \param data Pointer to the device memory region.
+    /// \param value Value to be set.
+    /// \param size Memory region size.
+    /// \returns An event list representing the memset operations..
+    static inline std::vector<sycl::event>
+    sift_memset(sycl::queue &q, pitched_data data, int value,
+                sycl::range<3> size)
+    {
+      std::vector<sycl::event> event_list;
+      size_t slice = data.get_pitch() * data.get_y();
+      unsigned char *data_surface = (unsigned char *)data.get_data_ptr();
+      for (size_t z = 0; z < size.get(2); ++z)
+      {
+        unsigned char *data_ptr = data_surface;
+        for (size_t y = 0; y < size.get(1); ++y)
+        {
+          event_list.push_back(sift_memset(q, data_ptr, value, size.get(0)));
+          data_ptr += data.get_pitch();
+        }
+        data_surface += slice;
+      }
+      return event_list;
+    }
+
+    /// memset 2D matrix with pitch.
+    static inline std::vector<sycl::event>
+    sift_memset(sycl::queue &q, void *ptr, size_t pitch, int val, size_t x,
+                size_t y)
+    {
+      return sift_memset(q, pitched_data(ptr, pitch, x, 1), val,
+                         sycl::range<3>(x, y, 1));
+    }
+
+    static sycl::event sift_memcpy(sycl::queue &q, void *to_ptr,
+                                   const void *from_ptr, size_t size,
+                                   memcpy_direction direction)
+    {
+      if (!size)
+        return sycl::event{};
+#ifdef INFRA_USM_LEVEL_NONE
+      auto &mm = mem_mgr::instance();
+      memcpy_direction real_direction = direction;
+      switch (direction)
+      {
+      case host_to_host:
+        assert(!mm.is_device_ptr(from_ptr) && !mm.is_device_ptr(to_ptr));
+        break;
+      case host_to_device:
+        assert(!mm.is_device_ptr(from_ptr) && mm.is_device_ptr(to_ptr));
+        break;
+      case device_to_host:
+        assert(mm.is_device_ptr(from_ptr) && !mm.is_device_ptr(to_ptr));
+        break;
+      case device_to_device:
+        assert(mm.is_device_ptr(from_ptr) && mm.is_device_ptr(to_ptr));
+        break;
+      case automatic:
+        bool from_device = mm.is_device_ptr(from_ptr);
+        bool to_device = mm.is_device_ptr(to_ptr);
+        if (from_device)
+        {
+          if (to_device)
+          {
+            real_direction = device_to_device;
+          }
+          else
+          {
+            real_direction = device_to_host;
+          }
+        }
+        else
+        {
+          if (to_device)
+          {
+            real_direction = host_to_device;
+          }
+          else
+          {
+            real_direction = host_to_host;
+          }
+        }
+        break;
+      }
+      bool is_cpu = q.get_device().is_cpu();
+
+      switch (real_direction)
+      {
+      case host_to_host:
+        std::memcpy(to_ptr, from_ptr, size);
+        return sycl::event();
+      case host_to_device:
+      {
+        auto alloc = mm.translate_ptr(to_ptr);
+        size_t offset = (byte_t *)to_ptr - alloc.alloc_ptr;
+        if (is_cpu)
+        {
+          buffer_t from_buffer((byte_t *)from_ptr, sycl::range<1>(size), {sycl::property::buffer::use_host_ptr()});
+          return q.submit([&](sycl::handler &cgh)
+                          {
+        auto r = sycl::range<1>(size);
+        auto o = sycl::id<1>(offset);
+        auto from_acc = from_buffer.get_access<sycl::access_mode::read>(cgh);
+        sycl::accessor<byte_t, 1, sycl::access_mode::write,
+                           sycl::access::target::global_buffer>
+            acc(alloc.buffer, cgh, r, o);
+        cgh.parallel_for<class memcopyh2d>(r, [=](sycl::id<1> idx) {
+          acc[idx] = from_acc[idx];
+          }); });
+        }
+        else
+        {
+          return q.submit([&](sycl::handler &cgh)
+                          {
+        auto r = sycl::range<1>(size);
+        auto o = sycl::id<1>(offset);
+         sycl::accessor<byte_t, 1, sycl::access_mode::write,
+                           sycl::access::target::global_buffer>
+            acc(alloc.buffer, cgh, r, o);
+        cgh.copy(from_ptr, acc); });
+        }
+      }
+      case device_to_host:
+      {
+        auto alloc = mm.translate_ptr(from_ptr);
+        size_t offset = (byte_t *)from_ptr - alloc.alloc_ptr;
+        if (is_cpu)
+        {
+          buffer_t to_buffer((byte_t *)to_ptr, sycl::range<1>(size), {sycl::property::buffer::use_host_ptr()});
+          return q.submit([&](sycl::handler &cgh)
+                          {
+        auto r = sycl::range<1>(size);
+        auto o = sycl::id<1>(offset);
+        auto to_acc = to_buffer.get_access<sycl::access_mode::write>(cgh);
+        sycl::accessor<byte_t, 1, sycl::access_mode::read,
+                           sycl::access::target::global_buffer>
+            acc(alloc.buffer, cgh, r, o);
+        cgh.parallel_for<class memcopyd2h>(r, [=](sycl::id<1> idx) {
+          to_acc[idx] = acc[idx];
+          }); });
+        }
+        else
+        {
+          return q.submit([&](sycl::handler &cgh)
+                          {
+        auto r = sycl::range<1>(size);
+        auto o = sycl::id<1>(offset);
+        sycl::accessor<byte_t, 1, sycl::access_mode::read,
+                           sycl::access::target::global_buffer>
+            acc(alloc.buffer, cgh, r, o);
+        cgh.copy(acc, to_ptr); });
+        }
+      }
+      case device_to_device:
+      {
+        auto to_alloc = mm.translate_ptr(to_ptr);
+        auto from_alloc = mm.translate_ptr(from_ptr);
+        size_t to_offset = (byte_t *)to_ptr - to_alloc.alloc_ptr;
+        size_t from_offset = (byte_t *)from_ptr - from_alloc.alloc_ptr;
+        if (is_cpu)
+        {
+          return q.submit([&](sycl::handler &cgh)
+                          {
+        auto r = sycl::range<1>(size);
+        auto to_o = sycl::id<1>(to_offset);
+        auto from_o = sycl::id<1>(from_offset);
+        sycl::accessor<byte_t, 1, sycl::access_mode::write,
+                           sycl::access::target::global_buffer>
+            to_acc(to_alloc.buffer, cgh, r, to_o);
+        sycl::accessor<byte_t, 1, sycl::access_mode::read,
+                           sycl::access::target::global_buffer>
+            from_acc(from_alloc.buffer, cgh, r, from_o);
+        cgh.parallel_for<class memcopyd2d>(r, [=](sycl::id<1> idx) {
+          to_acc[idx] = from_acc[idx];
+          }); });
+        }
+        else
+        {
+          return q.submit([&](sycl::handler &cgh)
+                          {
+        auto r = sycl::range<1>(size);
+        auto to_o = sycl::id<1>(to_offset);
+        auto from_o = sycl::id<1>(from_offset);
+        sycl::accessor<byte_t, 1, sycl::access_mode::write,
+                           sycl::access::target::global_buffer>
+            to_acc(to_alloc.buffer, cgh, r, to_o);
+        sycl::accessor<byte_t, 1, sycl::access_mode::read,
+                           sycl::access::target::global_buffer>
+            from_acc(from_alloc.buffer, cgh, r, from_o);
+        cgh.copy(from_acc, to_acc); });
+        }
+      }
+      default:
+        throw std::runtime_error("sift_memcpy: invalid direction value");
+      }
+#else
+      return q.memcpy(to_ptr, from_ptr, size);
+#endif // INFRA_USM_LEVEL_NONE
+    }
+
+    /// copy 3D matrix specified by \p size from 3D matrix specified by \p from_ptr
+    /// and \p from_range to another specified by \p to_ptr and \p to_range.
+    static inline std::vector<sycl::event>
+    sift_memcpy(sycl::queue &q, void *to_ptr, const void *from_ptr,
+                sycl::range<3> to_range, sycl::range<3> from_range,
+                sycl::id<3> to_id, sycl::id<3> from_id,
+                sycl::range<3> size, memcpy_direction direction)
+    {
+      std::vector<sycl::event> event_list;
+
+      size_t to_slice = to_range.get(1) * to_range.get(0),
+             from_slice = from_range.get(1) * from_range.get(0);
+      unsigned char *to_surface = (unsigned char *)to_ptr +
+                                  to_id.get(2) * to_slice +
+                                  to_id.get(1) * to_range.get(0) + to_id.get(0);
+      const unsigned char *from_surface =
+          (const unsigned char *)from_ptr + from_id.get(2) * from_slice +
+          from_id.get(1) * from_range.get(0) + from_id.get(0);
+
+      if (to_slice == from_slice && to_slice == size.get(1) * size.get(0))
+      {
+        return {sift_memcpy(q, to_surface, from_surface, to_slice * size.get(2),
+                            direction)};
+      }
+      for (size_t z = 0; z < size.get(2); ++z)
+      {
+        unsigned char *to_ptr = to_surface;
+        const unsigned char *from_ptr = from_surface;
+        if (to_range.get(0) == from_range.get(0) &&
+            to_range.get(0) == size.get(0))
+        {
+          event_list.push_back(sift_memcpy(q, to_ptr, from_ptr,
+                                           size.get(0) * size.get(1), direction));
+        }
+        else
+        {
+          for (size_t y = 0; y < size.get(1); ++y)
+          {
+            event_list.push_back(
+                sift_memcpy(q, to_ptr, from_ptr, size.get(0), direction));
+            to_ptr += to_range.get(0);
+            from_ptr += from_range.get(0);
+          }
+        }
+        to_surface += to_slice;
+        from_surface += from_slice;
+      }
+      return event_list;
+    }
+
+    /// memcpy 2D/3D matrix specified by pitched_data.
+    static inline std::vector<sycl::event>
+    sift_memcpy(sycl::queue &q, pitched_data to, sycl::id<3> to_id,
+                pitched_data from, sycl::id<3> from_id, sycl::range<3> size,
+                memcpy_direction direction = automatic)
+    {
+      return sift_memcpy(q, to.get_data_ptr(), from.get_data_ptr(),
+                         sycl::range<3>(to.get_pitch(), to.get_y(), 1),
+                         sycl::range<3>(from.get_pitch(), from.get_y(), 1), to_id, from_id,
+                         size, direction);
+    }
+
+    /// memcpy 2D matrix with pitch.
+    static inline std::vector<sycl::event>
+    sift_memcpy(sycl::queue &q, void *to_ptr, const void *from_ptr,
+                size_t to_pitch, size_t from_pitch, size_t x, size_t y,
+                memcpy_direction direction = automatic)
+    {
+      return sift_memcpy(q, to_ptr, from_ptr, sycl::range<3>(to_pitch, y, 1),
+                         sycl::range<3>(from_pitch, y, 1),
+                         sycl::id<3>(0, 0, 0), sycl::id<3>(0, 0, 0),
+                         sycl::range<3>(x, y, 1), direction);
+    }
+  } // namespace detail
+
+#ifdef INFRA_USM_LEVEL_NONE
+  /// Check if the pointer \p ptr represents device pointer or not.
+  ///
+  /// \param ptr The pointer to be checked.
+  /// \returns true if \p ptr is a device pointer.
+  template <class T>
+  static inline bool is_device_ptr(T ptr)
+  {
+    if constexpr (std::is_pointer<T>::value)
+    {
+      return detail::mem_mgr::instance().is_device_ptr(ptr);
+    }
+    return false;
+  }
+#endif
+
+  /// Get the buffer and the offset of a piece of memory pointed to by \p ptr.
+  ///
+  /// \param ptr Pointer to a piece of memory.
+  /// If NULL is passed as an argument, an exception will be thrown.
+  /// \returns a pair containing both the buffer and the offset.
+  static std::pair<buffer_t, size_t> get_buffer_and_offset(const void *ptr)
+  {
+    if (ptr)
+    {
+      auto alloc = detail::mem_mgr::instance().translate_ptr(ptr);
+      size_t offset = (byte_t *)ptr - alloc.alloc_ptr;
+      return std::make_pair(alloc.buffer, offset);
+    }
+    else
+    {
+      throw std::runtime_error(
+          "NULL pointer argument in get_buffer_and_offset function is invalid");
+    }
+  }
+
+  /// Get the data pointed from \p ptr as a 1D buffer reinterpreted as type T.
+  template <typename T>
+  static sycl::buffer<T> get_buffer(const void *ptr)
+  {
+    auto alloc = detail::mem_mgr::instance().translate_ptr(ptr);
+    return alloc.buffer.reinterpret<T>(
+        sycl::range<1>(alloc.size / sizeof(T)));
+  }
+
+  /// Get the buffer of a piece of memory pointed to by \p ptr.
+  ///
+  /// \param ptr Pointer to a piece of memory.
+  /// \returns the buffer.
+  static buffer_t get_buffer(const void *ptr)
+  {
+    return detail::mem_mgr::instance().translate_ptr(ptr).buffer;
+  }
+
+  /// A wrapper class contains an accessor and an offset.
+  template <typename dataT,
+            sycl::access_mode accessMode = sycl::access_mode::read_write>
+  class access_wrapper
+  {
+    sycl::accessor<byte_t, 1, accessMode> accessor;
+    size_t offset;
+
+  public:
+    /// Construct the accessor wrapper for memory pointed by \p ptr.
+    ///
+    /// \param ptr Pointer to memory.
+    /// \param cgh The command group handler.
+    access_wrapper(const void *ptr, sycl::handler &cgh)
+        : accessor(get_buffer(ptr).get_access<accessMode>(cgh)), offset(0)
+    {
+      auto alloc = detail::mem_mgr::instance().translate_ptr(ptr);
+      offset = (byte_t *)ptr - alloc.alloc_ptr;
+    }
+
+    /// Get the device pointer.
+    ///
+    /// \returns a device pointer with offset.
+    dataT get_raw_pointer() const { return (dataT)(&accessor[0] + offset); }
+  };
+
+  /// Get the accessor for memory pointed by \p ptr.
+  ///
+  /// \param ptr Pointer to memory.
+  /// If NULL is passed as an argument, an exception will be thrown.
+  /// \param cgh The command group handler.
+  /// \returns an accessor.
+  template <sycl::access_mode accessMode = sycl::access_mode::read_write>
+  static sycl::accessor<byte_t, 1, accessMode>
+  get_access(const void *ptr, sycl::handler &cgh)
+  {
+    if (ptr)
+    {
+      auto alloc = detail::mem_mgr::instance().translate_ptr(ptr);
+      return alloc.buffer.get_access<accessMode>(cgh);
+    }
+    else
+    {
+      throw std::runtime_error(
+          "NULL pointer argument in get_access function is invalid");
+    }
+  }
+
+  /// Allocate memory block on the device.
+  /// \param num_bytes Number of bytes to allocate.
+  /// \param q Queue to execute the allocate task.
+  /// \returns A pointer to the newly allocated memory.
+  template <typename T>
+  static inline void *sift_malloc(T num_bytes,
+                                  sycl::queue &q = get_default_queue())
+  {
+    return detail::sift_malloc(static_cast<size_t>(num_bytes), q);
+  }
+
+  /// Get the host pointer from a buffer that is mapped to virtual pointer ptr.
+  /// \param ptr Virtual Pointer mapped to device buffer
+  /// \returns A host pointer
+  template <typename T>
+  static inline T *get_host_ptr(const void *ptr)
+  {
+    auto BufferOffset = get_buffer_and_offset(ptr);
+    auto host_ptr =
+        BufferOffset.first.get_access<sycl::access_mode::read_write>()
+            .get_pointer();
+    return (T *)(host_ptr + BufferOffset.second);
+  }
+
+  /// Allocate memory block for 3D array on the device.
+  /// \param size Size of of the memory block, in bytes.
+  /// \param q Queue to execute the allocate task.
+  /// \returns A pitched_data object which stores the memory info.
+  static inline pitched_data
+  sift_malloc(sycl::range<3> size, sycl::queue &q = get_default_queue())
+  {
+    pitched_data pitch(nullptr, 0, size.get(0), size.get(1));
+    size_t pitch_size;
+    pitch.set_data_ptr(detail::sift_malloc(pitch_size, size.get(0), size.get(1),
+                                           size.get(2), q));
+    pitch.set_pitch(pitch_size);
+    return pitch;
+  }
+
+  /// Allocate memory block for 2D array on the device.
+  /// \param [out] pitch Aligned size of x in bytes.
+  /// \param x Range in dim x.
+  /// \param y Range in dim y.
+  /// \param q Queue to execute the allocate task.
+  /// \returns A pointer to the newly allocated memory.
+  static inline void *sift_malloc(size_t &pitch, size_t x, size_t y,
+                                  sycl::queue &q = get_default_queue())
+  {
+    return detail::sift_malloc(pitch, x, y, 1, q);
+  }
+
+  /// free
+  /// \param ptr Point to free.
+  /// \param q Queue to execute the free task.
+  /// \returns no return value.
+  static inline void infra_free(void *ptr,
+                                sycl::queue &q = get_default_queue())
+  {
+    if (ptr)
+    {
+#ifdef INFRA_USM_LEVEL_NONE
+      detail::mem_mgr::instance().mem_free(ptr);
+#else
+      sycl::free(ptr, q.get_context());
+#endif // INFRA_USM_LEVEL_NONE
+    }
+  }
+
+#ifndef INFRA_USM_LEVEL_NONE
+  /// Free the device memory pointed by a batch of pointers in \p pointers which
+  /// are related to \p q after \p events completed.
+  ///
+  /// \param pointers The pointers point to the device memory requested to be freed.
+  /// \param events The events to be waited.
+  /// \param q The sycl::queue the memory relates to.
+  inline void async_infra_free(std::vector<void *> pointers,
+                               std::vector<sycl::event> events,
+                               sycl::queue &q = get_default_queue())
+  {
+    std::thread t(
+        [](std::vector<void *> pointers, std::vector<sycl::event> events,
+           sycl::context ctxt)
+        {
+          sycl::event::wait(events);
+          for (auto p : pointers)
+            sycl::free(p, ctxt);
+        },
+        std::move(pointers), std::move(events), q.get_context());
+    get_current_device().add_task(std::move(t));
+  }
+#endif
+
+  /// Synchronously copies \p size bytes from the address specified by \p from_ptr
+  /// to the address specified by \p to_ptr. The value of \p direction is used to
+  /// set the copy direction, it can be \a host_to_host, \a host_to_device,
+  /// \a device_to_host, \a device_to_device or \a automatic. The function will
+  /// return after the copy is completed.
+  ///
+  /// \param to_ptr Pointer to destination memory address.
+  /// \param from_ptr Pointer to source memory address.
+  /// \param size Number of bytes to be copied.
+  /// \param direction Direction of the copy.
+  /// \param q Queue to execute the copy task.
+  /// \returns no return value.
+  static void sift_memcpy(void *to_ptr, const void *from_ptr, size_t size,
+                          memcpy_direction direction = automatic,
+                          sycl::queue &q = get_default_queue())
+  {
+    detail::sift_memcpy(q, to_ptr, from_ptr, size, direction).wait();
+  }
+
+  /// Asynchronously copies \p size bytes from the address specified by \p
+  /// from_ptr to the address specified by \p to_ptr. The value of \p direction is
+  /// used to set the copy direction, it can be \a host_to_host, \a
+  /// host_to_device, \a device_to_host, \a device_to_device or \a automatic. The
+  /// return of the function does NOT guarantee the copy is completed.
+  ///
+  /// \param to_ptr Pointer to destination memory address.
+  /// \param from_ptr Pointer to source memory address.
+  /// \param size Number of bytes to be copied.
+  /// \param direction Direction of the copy.
+  /// \param q Queue to execute the copy task.
+  /// \returns no return value.
+  static void async_sift_memcpy(void *to_ptr, const void *from_ptr, size_t size,
+                                memcpy_direction direction = automatic,
+                                sycl::queue &q = infra::get_default_queue())
+  {
+    detail::sift_memcpy(q, to_ptr, from_ptr, size, direction);
+  }
+
+  /// Synchronously copies 2D matrix specified by \p x and \p y from the address
+  /// specified by \p from_ptr to the address specified by \p to_ptr, while \p
+  /// from_pitch and \p to_pitch are the range of dim x in bytes of the matrix
+  /// specified by \p from_ptr and \p to_ptr. The value of \p direction is used to
+  /// set the copy direction, it can be \a host_to_host, \a host_to_device, \a
+  /// device_to_host, \a device_to_device or \a automatic. The function will
+  /// return after the copy is completed.
+  ///
+  /// \param to_ptr Pointer to destination memory address.
+  /// \param to_pitch Range of dim x in bytes of destination matrix.
+  /// \param from_ptr Pointer to source memory address.
+  /// \param from_pitch Range of dim x in bytes of source matrix.
+  /// \param x Range of dim x of matrix to be copied.
+  /// \param y Range of dim y of matrix to be copied.
+  /// \param direction Direction of the copy.
+  /// \param q Queue to execute the copy task.
+  /// \returns no return value.
+  static inline void sift_memcpy(void *to_ptr, size_t to_pitch,
+                                 const void *from_ptr, size_t from_pitch,
+                                 size_t x, size_t y,
+                                 memcpy_direction direction = automatic,
+                                 sycl::queue &q = infra::get_default_queue())
+  {
+    sycl::event::wait(detail::sift_memcpy(q, to_ptr, from_ptr, to_pitch,
+                                          from_pitch, x, y, direction));
+  }
+
+  /// Asynchronously copies 2D matrix specified by \p x and \p y from the address
+  /// specified by \p from_ptr to the address specified by \p to_ptr, while \p
+  /// \p from_pitch and \p to_pitch are the range of dim x in bytes of the matrix
+  /// specified by \p from_ptr and \p to_ptr. The value of \p direction is used to
+  /// set the copy direction, it can be \a host_to_host, \a host_to_device, \a
+  /// device_to_host, \a device_to_device or \a automatic. The return of the
+  /// function does NOT guarantee the copy is completed.
+  ///
+  /// \param to_ptr Pointer to destination memory address.
+  /// \param to_pitch Range of dim x in bytes of destination matrix.
+  /// \param from_ptr Pointer to source memory address.
+  /// \param from_pitch Range of dim x in bytes of source matrix.
+  /// \param x Range of dim x of matrix to be copied.
+  /// \param y Range of dim y of matrix to be copied.
+  /// \param direction Direction of the copy.
+  /// \param q Queue to execute the copy task.
+  /// \returns no return value.
+  static inline void
+  async_sift_memcpy(void *to_ptr, size_t to_pitch, const void *from_ptr,
+                    size_t from_pitch, size_t x, size_t y,
+                    memcpy_direction direction = automatic,
+                    sycl::queue &q = get_default_queue())
+  {
+    detail::sift_memcpy(q, to_ptr, from_ptr, to_pitch, from_pitch, x, y,
+                        direction);
+  }
+
+  /// Synchronously copies a subset of a 3D matrix specified by \p to to another
+  /// 3D matrix specified by \p from. The from and to position info are specified
+  /// by \p from_pos and \p to_pos The copied matrix size is specfied by \p size.
+  /// The value of \p direction is used to set the copy direction, it can be \a
+  /// host_to_host, \a host_to_device, \a device_to_host, \a device_to_device or
+  /// \a automatic. The function will return after the copy is completed.
+  ///
+  /// \param to Destination matrix info.
+  /// \param to_pos Position of destination.
+  /// \param from Source matrix info.
+  /// \param from_pos Position of destination.
+  /// \param size Range of the submatrix to be copied.
+  /// \param direction Direction of the copy.
+  /// \param q Queue to execute the copy task.
+  /// \returns no return value.
+  static inline void sift_memcpy(pitched_data to, sycl::id<3> to_pos,
+                                 pitched_data from, sycl::id<3> from_pos,
+                                 sycl::range<3> size,
+                                 memcpy_direction direction = automatic,
+                                 sycl::queue &q = infra::get_default_queue())
+  {
+    sycl::event::wait(
+        detail::sift_memcpy(q, to, to_pos, from, from_pos, size, direction));
+  }
+
+  /// Asynchronously copies a subset of a 3D matrix specified by \p to to another
+  /// 3D matrix specified by \p from. The from and to position info are specified
+  /// by \p from_pos and \p to_pos The copied matrix size is specfied by \p size.
+  /// The value of \p direction is used to set the copy direction, it can be \a
+  /// host_to_host, \a host_to_device, \a device_to_host, \a device_to_device or
+  /// \a automatic. The return of the function does NOT guarantee the copy is
+  /// completed.
+  ///
+  /// \param to Destination matrix info.
+  /// \param to_pos Position of destination.
+  /// \param from Source matrix info.
+  /// \param from_pos Position of destination.
+  /// \param size Range of the submatrix to be copied.
+  /// \param direction Direction of the copy.
+  /// \param q Queue to execute the copy task.
+  /// \returns no return value.
+  static inline void
+  async_sift_memcpy(pitched_data to, sycl::id<3> to_pos, pitched_data from,
+                    sycl::id<3> from_pos, sycl::range<3> size,
+                    memcpy_direction direction = automatic,
+                    sycl::queue &q = get_default_queue())
+  {
+    detail::sift_memcpy(q, to, to_pos, from, from_pos, size, direction);
+  }
+
+  /// Synchronously sets \p value to the first \p size bytes starting from \p
+  /// dev_ptr. The function will return after the memset operation is completed.
+  ///
+  /// \param dev_ptr Pointer to the device memory address.
+  /// \param value Value to be set.
+  /// \param size Number of bytes to be set to the value.
+  /// \param q The queue in which the operation is done.
+  /// \returns no return value.
+  static void sift_memset(void *dev_ptr, int value, size_t size,
+                          sycl::queue &q = get_default_queue())
+  {
+    detail::sift_memset(q, dev_ptr, value, size).wait();
+  }
+
+  /// Asynchronously sets \p value to the first \p size bytes starting from \p
+  /// dev_ptr. The return of the function does NOT guarantee the memset operation
+  /// is completed.
+  ///
+  /// \param dev_ptr Pointer to the device memory address.
+  /// \param value Value to be set.
+  /// \param size Number of bytes to be set to the value.
+  /// \returns no return value.
+  static void async_sift_memset(void *dev_ptr, int value, size_t size,
+                                sycl::queue &q = infra::get_default_queue())
+  {
+    detail::sift_memset(q, dev_ptr, value, size);
+  }
+
+  /// Sets \p value to the 2D memory region pointed by \p ptr in \p q. \p x and
+  /// \p y specify the setted 2D memory size. \p pitch is the bytes in linear
+  /// dimension, including padding bytes. The function will return after the
+  /// memset operation is completed.
+  ///
+  /// \param ptr Pointer to the device memory region.
+  /// \param pitch Bytes in linear dimension, including padding bytes.
+  /// \param value Value to be set.
+  /// \param x The setted memory size in linear dimension.
+  /// \param y The setted memory size in second dimension.
+  /// \param q The queue in which the operation is done.
+  /// \returns no return value.
+  static inline void sift_memset(void *ptr, size_t pitch, int val, size_t x,
+                                 size_t y,
+                                 sycl::queue &q = get_default_queue())
+  {
+    sycl::event::wait(detail::sift_memset(q, ptr, pitch, val, x, y));
+  }
+
+  /// Sets \p value to the 2D memory region pointed by \p ptr in \p q. \p x and
+  /// \p y specify the setted 2D memory size. \p pitch is the bytes in linear
+  /// dimension, including padding bytes. The return of the function does NOT
+  /// guarantee the memset operation is completed.
+  ///
+  /// \param ptr Pointer to the device memory region.
+  /// \param pitch Bytes in linear dimension, including padding bytes.
+  /// \param value Value to be set.
+  /// \param x The setted memory size in linear dimension.
+  /// \param y The setted memory size in second dimension.
+  /// \param q The queue in which the operation is done.
+  /// \returns no return value.
+  static inline void async_sift_memset(void *ptr, size_t pitch, int val, size_t x,
+                                       size_t y,
+                                       sycl::queue &q = get_default_queue())
+  {
+    detail::sift_memset(q, ptr, pitch, val, x, y);
+  }
+
+  /// Sets \p value to the 3D memory region specified by \p pitch in \p q. \p size
+  /// specify the setted 3D memory size. The function will return after the
+  /// memset operation is completed.
+  ///
+  /// \param pitch Specify the 3D memory region.
+  /// \param value Value to be set.
+  /// \param size The setted 3D memory size.
+  /// \param q The queue in which the operation is done.
+  /// \returns no return value.
+  static inline void sift_memset(pitched_data pitch, int val,
+                                 sycl::range<3> size,
+                                 sycl::queue &q = get_default_queue())
+  {
+    sycl::event::wait(detail::sift_memset(q, pitch, val, size));
+  }
+
+  /// Sets \p value to the 3D memory region specified by \p pitch in \p q. \p size
+  /// specify the setted 3D memory size. The return of the function does NOT
+  /// guarantee the memset operation is completed.
+  ///
+  /// \param pitch Specify the 3D memory region.
+  /// \param value Value to be set.
+  /// \param size The setted 3D memory size.
+  /// \param q The queue in which the operation is done.
+  /// \returns no return value.
+  static inline void async_sift_memset(pitched_data pitch, int val,
+                                       sycl::range<3> size,
+                                       sycl::queue &q = get_default_queue())
+  {
+    detail::sift_memset(q, pitch, val, size);
+  }
+
+  /// infra accessor used as device function parameter.
+  template <class T, memory_region Memory, size_t Dimension>
+  class accessor;
+  template <class T, memory_region Memory>
+  class accessor<T, Memory, 3>
+  {
+  public:
+    using memory_t = detail::memory_traits<Memory, T>;
+    using element_t = typename memory_t::element_t;
+    using pointer_t = typename memory_t::pointer_t;
+    using accessor_t = typename memory_t::template accessor_t<3>;
+    accessor(pointer_t data, const sycl::range<3> &in_range)
+        : _data(data), _range(in_range) {}
+    template <memory_region M = Memory>
+    accessor(typename std::enable_if<M != local, const accessor_t>::type &acc)
+        : accessor(acc, acc.get_range()) {}
+    accessor(const accessor_t &acc, const sycl::range<3> &in_range)
+        : accessor(acc.get_pointer(), in_range) {}
+    accessor<T, Memory, 2> operator[](size_t index) const
+    {
+      sycl::range<2> sub(_range.get(1), _range.get(2));
+      return accessor<T, Memory, 2>(_data + index * sub.size(), sub);
+    }
+
+  private:
+    pointer_t _data;
+    sycl::range<3> _range;
+  };
+  template <class T, memory_region Memory>
+  class accessor<T, Memory, 2>
+  {
+  public:
+    using memory_t = detail::memory_traits<Memory, T>;
+    using element_t = typename memory_t::element_t;
+    using pointer_t = typename memory_t::pointer_t;
+    using accessor_t = typename memory_t::template accessor_t<2>;
+    accessor(pointer_t data, const sycl::range<2> &in_range)
+        : _data(data), _range(in_range) {}
+    template <memory_region M = Memory>
+    accessor(typename std::enable_if<M != local, const accessor_t>::type &acc)
+        : accessor(acc, acc.get_range()) {}
+    accessor(const accessor_t &acc, const sycl::range<2> &in_range)
+        : accessor(acc.get_pointer(), in_range) {}
+
+    pointer_t operator[](size_t index) const
+    {
+      return _data + _range.get(1) * index;
+    }
+
+  private:
+    pointer_t _data;
+    sycl::range<2> _range;
+  };
+
+  namespace detail
+  {
+    /// Device variable with address space of shared, global or constant.
+    template <class T, memory_region Memory, size_t Dimension>
+    class device_memory
+    {
+    public:
+      using accessor_t =
+          typename detail::memory_traits<Memory, T>::template accessor_t<Dimension>;
+      using value_t = typename detail::memory_traits<Memory, T>::value_t;
+      using infra_accessor_t = infra::accessor<T, Memory, Dimension>;
+
+      device_memory() : device_memory(sycl::range<Dimension>(1)) {}
+
+      /// Constructor of 1-D array with initializer list
+      template <size_t D = Dimension>
+      device_memory(
+          const typename std::enable_if<D == 1, sycl::range<1>>::type &in_range,
+          std::initializer_list<value_t> &&init_list)
+          : device_memory(in_range)
+      {
+        assert(init_list.size() <= in_range.size());
+        _host_ptr = (value_t *)std::malloc(_size);
+        std::memset(_host_ptr, 0, _size);
+        std::memcpy(_host_ptr, init_list.begin(), init_list.size() * sizeof(T));
+      }
+
+      /// Constructor of 2-D array with initializer list
+      template <size_t D = Dimension>
+      device_memory(
+          const typename std::enable_if<D == 2, sycl::range<2>>::type &in_range,
+          std::initializer_list<std::initializer_list<value_t>> &&init_list)
+          : device_memory(in_range)
+      {
+        assert(init_list.size() <= in_range[0]);
+        _host_ptr = (value_t *)std::malloc(_size);
+        std::memset(_host_ptr, 0, _size);
+        auto tmp_data = _host_ptr;
+        for (auto sub_list : init_list)
+        {
+          assert(sub_list.size() <= in_range[1]);
+          std::memcpy(tmp_data, sub_list.begin(), sub_list.size() * sizeof(T));
+          tmp_data += in_range[1];
+        }
+      }
+
+      /// Constructor with range
+      device_memory(const sycl::range<Dimension> &range_in)
+          : _size(range_in.size() * sizeof(T)), _range(range_in), _reference(false),
+            _host_ptr(nullptr), _device_ptr(nullptr)
+      {
+        static_assert(
+            (Memory == global) || (Memory == constant) || (Memory == shared),
+            "device memory region should be global, constant or shared");
+        // Make sure that singleton class mem_mgr and dev_mgr will destruct later
+        // than this.
+        detail::mem_mgr::instance();
+        dev_mgr::instance();
+      }
+
+      /// Constructor with range
+      template <class... Args>
+      device_memory(Args... Arguments)
+          : device_memory(sycl::range<Dimension>(Arguments...)) {}
+
+      device_memory(const device_memory &) = delete;
+      device_memory &operator=(const device_memory &) = delete;
+      ~device_memory()
+      {
+        if (_device_ptr && !_reference)
+        {
+          try
+          {
+            infra_free(_device_ptr);
+          }
+          catch (std::exception const &e)
+          {
+            std::cerr << e.what() << '\n';
+          }
+        }
+        if (_host_ptr)
+          std::free(_host_ptr);
+      }
+
+      /// Allocate memory with default queue, and init memory if has initial value.
+      void init()
+      {
+        init(infra::get_default_queue());
+      }
+      /// Allocate memory with specficed queue, and init memory if has initial value.
+      void init(sycl::queue &q)
+      {
+        if (_device_ptr)
+          return;
+        if (!_size)
+          return;
+        allocate_device(q);
+        if (_host_ptr)
+          detail::sift_memcpy(q, _device_ptr, _host_ptr, _size, host_to_device);
+      }
+
+      /// The variable is assigned to a device pointer.
+      void assign(value_t *src, size_t size)
+      {
+        this->~device_memory();
+        new (this) device_memory(src, size);
+      }
+
+      /// Get memory pointer of the memory object, which is virtual pointer when
+      /// usm is not used, and device pointer when usm is used .
+      value_t *get_ptr()
+      {
+        return get_ptr(get_default_queue());
+      }
+      /// Get memory pointer of the memory object, which is virtual pointer when
+      /// usm is not used, and device pointer when usm is used .
+      value_t *get_ptr(sycl::queue &q)
+      {
+        init(q);
+        return _device_ptr;
+      }
+
+      /// Get the device memory object size in bytes.
+      size_t get_size() { return _size; }
+
+      template <size_t D = Dimension>
+      typename std::enable_if<D == 1, T>::type &operator[](size_t index)
+      {
+        init();
+#ifdef INFRA_USM_LEVEL_NONE
+        return infra::get_buffer<typename std::enable_if<D == 1, T>::type>(
+                   _device_ptr)
+            .template get_access<sycl::access_mode::read_write>()[index];
+#else
+        return _device_ptr[index];
+#endif // INFRA_USM_LEVEL_NONE
+      }
+
+#ifdef INFRA_USM_LEVEL_NONE
+      /// Get sycl::accessor for the device memory object when usm is not used.
+      accessor_t get_access(sycl::handler &cgh)
+      {
+        return get_buffer(_device_ptr)
+            .template reinterpret<T, Dimension>(_range)
+            .template get_access<detail::memory_traits<Memory, T>::mode,
+                                 detail::memory_traits<Memory, T>::target>(cgh);
+      }
+#else
+      /// Get infra::accessor with dimension info for the device memory object
+      /// when usm is used and dimension is greater than 1.
+      template <size_t D = Dimension>
+      typename std::enable_if<D != 1, infra_accessor_t>::type
+      get_access(sycl::handler &cgh)
+      {
+        return infra_accessor_t((T *)_device_ptr, _range);
+      }
+#endif // INFRA_USM_LEVEL_NONE
+
+    private:
+      device_memory(value_t *memory_ptr, size_t size)
+          : _size(size), _range(size / sizeof(T)), _reference(true),
+            _device_ptr(memory_ptr) {}
+
+      void allocate_device(sycl::queue &q)
+      {
+#ifndef INFRA_USM_LEVEL_NONE
+        if (Memory == shared)
+        {
+          _device_ptr = (value_t *)sycl::malloc_shared(
+              _size, q.get_device(), q.get_context());
+          return;
+        }
+#endif
+        _device_ptr = (value_t *)detail::sift_malloc(_size, q);
+      }
+
+      size_t _size;
+      sycl::range<Dimension> _range;
+      bool _reference;
+      value_t *_host_ptr;
+      value_t *_device_ptr;
+    };
+    template <class T, memory_region Memory>
+    class device_memory<T, Memory, 0> : public device_memory<T, Memory, 1>
+    {
+    public:
+      using base = device_memory<T, Memory, 1>;
+      using value_t = typename base::value_t;
+      using accessor_t =
+          typename detail::memory_traits<Memory, T>::template accessor_t<0>;
+
+      /// Constructor with initial value.
+      device_memory(const value_t &val) : base(sycl::range<1>(1), {val}) {}
+
+      /// Default constructor
+      device_memory() : base(1) {}
+
+#ifdef INFRA_USM_LEVEL_NONE
+      /// Get sycl::accessor for the device memory object when usm is not used.
+      accessor_t get_access(sycl::handler &cgh)
+      {
+        auto buf = get_buffer(base::get_ptr())
+                       .template reinterpret<T, 1>(sycl::range<1>(1));
+        return accessor_t(buf, cgh);
+      }
+#endif // INFRA_USM_LEVEL_NONE
+    };
+  }
+
+  template <class T, size_t Dimension>
+  using global_memory = detail::device_memory<T, global, Dimension>;
+  template <class T, size_t Dimension>
+  using constant_memory = detail::device_memory<T, constant, Dimension>;
+  template <class T, size_t Dimension>
+  using shared_memory = detail::device_memory<T, shared, Dimension>;
+} // namespace infra
+
+#endif // __INFRA_MEMORY_HPP__
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/mainSift.cpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/mainSift.cpp
new file mode 100644
index 000000000..12d1b943a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/mainSift.cpp
@@ -0,0 +1,313 @@
+//********************************************************//
+// CUDA SIFT extractor by Marten Björkman aka Celebrandil //
+//              celle @ csc.kth.se                       //
+//********************************************************//
+
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <sycl/sycl.hpp>
+#include <iostream>
+#include <cmath>
+#include <iomanip>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+#include "cudaImage.h"
+#include "cudaSift.h"
+#include "infra/infra.hpp"
+#include "Utility.h"
+
+#ifndef KERNEL_USE_PROFILE
+#define KERNEL_USE_PROFILE 0
+#endif
+
+void copyData(void *host, void *dev, size_t size);
+int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh);
+void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img);
+void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography);
+
+double ScaleUp(CudaImage &res, CudaImage &src);
+
+///////////////////////////////////////////////////////////////////////////////
+// Main program
+///////////////////////////////////////////////////////////////////////////////
+int main(int argc, char **argv)
+{
+  auto totalProgTimer_start = std::chrono::steady_clock::now();
+  int devNum = 0, imgSet = 0;
+  if (argc > 1)
+    devNum = std::atoi(argv[1]);
+  if (argc > 2)
+    imgSet = std::atoi(argv[2]);
+
+  float totTime = 0.0;
+  float imageInitTime = 0.0;
+  float extractSiftTime = 0.0;
+  float matchingTime = 0.0;
+
+  sycl::device dev = sycl::device(sycl::gpu_selector());
+  sycl::property_list q_prop{sycl::property::queue::in_order()};
+
+#ifdef DEVICE_TIMER
+  auto q_time_start = std::chrono::steady_clock::now();
+#endif
+  sycl::queue q_ct(dev, q_prop);
+#ifdef DEVICE_TIMER
+  auto q_time_stop = std::chrono::steady_clock::now();
+  // std::cout << "Queue creation Time is " << std::chrono::duration<float, std::micro>(q_time_stop - q_time_start).count() << " us" << std::endl;
+  imageInitTime += std::chrono::duration<float, std::micro>(q_time_stop - q_time_start).count();
+#endif
+
+  // Read images using OpenCV
+  cv::Mat limg, rimg;
+  auto ioRead_start = std::chrono::steady_clock::now();
+  if (imgSet)
+  {
+    cv::imread("../../inputData/left.pgm", 0).convertTo(limg, CV_32FC1);
+    cv::imread("../../inputData/righ.pgm", 0).convertTo(rimg, CV_32FC1);
+  }
+  else
+  {
+    cv::imread("../../inputData/img1.png", 0).convertTo(limg, CV_32FC1);
+    cv::imread("../../inputData/img2.png", 0).convertTo(rimg, CV_32FC1);
+  }
+  auto ioRead_stop = std::chrono::steady_clock::now();
+  float ioReadTime = std::chrono::duration<float, std::micro>(ioRead_stop - ioRead_start).count();
+  unsigned int w = limg.cols;
+  unsigned int h = limg.rows;
+  std::cout << "Image size = (" << w << "," << h << ")" << std::endl;
+
+  // Initial Cuda images and download images to device
+  std::cout << "Initializing data..." << std::endl;
+  CudaImage img1, img2;
+
+  img1.Allocate(w, h, iAlignUp(w, 128), false, q_ct, imageInitTime, NULL, (float *)limg.data);
+  img2.Allocate(w, h, iAlignUp(w, 128), false, q_ct, imageInitTime, NULL, (float *)rimg.data);
+  // std::cout << "Img Allocate time " << totTime << std::endl;
+  try
+  {
+    img1.Download(q_ct, imageInitTime);
+    img2.Download(q_ct, imageInitTime);
+  }
+  catch (sycl::exception const &e)
+  {
+    std::cerr << e.what() << '\n';
+  }
+  // std::cout << "Img Download time " << totTime << std::endl;
+
+  // Extract Sift features from images
+  SiftData siftData1, siftData2;
+  float initBlur = 1.0f;
+  float thresh = (imgSet ? 4.5f : 2.0f);
+  InitSiftData(siftData1, q_ct, imageInitTime, 32768, true, true);
+  InitSiftData(siftData2, q_ct, imageInitTime, 32768, true, true);
+
+  // A bit of benchmarking
+  // for (int thresh1=1.00f;thresh1<=4.01f;thresh1+=0.50f) {
+  float *memoryTmp = AllocSiftTempMemory(w, h, 5, q_ct, imageInitTime, false);
+  for (int i = 0; i < 50; i++)
+  {
+    float time = 0.0;
+    try
+    {
+      ExtractSift(siftData1, img1, 5, initBlur, thresh, q_ct, time, 0.0f, false, memoryTmp);
+      extractSiftTime += time;
+      time = 0.0;
+      ExtractSift(siftData2, img2, 5, initBlur, thresh, q_ct, time, 0.0f, false, memoryTmp);
+    }
+    catch (std::exception const &e)
+    {
+      std::cerr << e.what() << '\n';
+    }
+    extractSiftTime += time;
+  }
+  FreeSiftTempMemory(memoryTmp, q_ct);
+
+  // Match Sift features and find a homography
+  for (int i = 0; i < 1; i++)
+    MatchSiftData(siftData1, siftData2, q_ct, matchingTime);
+  float homography[9];
+  int numMatches;
+  try
+  {
+    FindHomography(siftData1, homography, &numMatches, q_ct, matchingTime, 10000, 0.0f, 0.80f, 5.0);
+  }
+  catch (std::exception const &e)
+  {
+    std::cerr << e.what() << '\n';
+  }
+  int numFit = ImproveHomography(siftData1, homography, 5, 0.00f, 0.80f, 3.0);
+  float matchPercentage = 100.0f * numFit / std::min(siftData1.numPts, siftData2.numPts);
+
+  std::cout << "Number of original features: " << siftData1.numPts << " " << siftData2.numPts << std::endl;
+  std::cout << "Number of matching features: " << numFit << " " << numMatches << " " << matchPercentage << "% " << initBlur << " " << thresh << "\n"
+            << std::endl;
+
+#ifdef DEVICE_TIMER
+  totTime = imageInitTime + extractSiftTime + matchingTime;
+  std::cout << "Images initialization time = " << imageInitTime / 1000 << " ms" << std::endl;
+  std::cout << "Feature extraction time = " << extractSiftTime / 1000 << " ms" << std::endl;
+  std::cout << "Matching time = " << matchingTime / 1000 << " ms"
+            << "\n"
+            << std::endl;
+  std::cout << "Total Device Time = " << totTime / 1000 << " ms"
+            << "\n"
+            << std::endl;
+#endif
+  // data validation
+  auto dataVerficationTimer_start = std::chrono::steady_clock::now();
+  int data_verification_flag = Utility::RunDataVerification(thresh, matchPercentage);
+  auto dataVerficationTimer_stop = std::chrono::steady_clock::now();
+  float dataVerificationTime =
+      std::chrono::duration<float, std::micro>(dataVerficationTimer_stop - dataVerficationTimer_start).count();
+  // Print out and store summary data
+  // PrintMatchData(siftData1, siftData2, img1);
+  // cv::imwrite("../../data/limg_pts.pgm", limg);
+
+  // MatchAll(siftData1, siftData2, homography);
+
+  // Free Sift data from device
+  FreeSiftData(siftData1, q_ct);
+  FreeSiftData(siftData2, q_ct);
+
+  auto totalProgTimer_end = std::chrono::steady_clock::now();
+  float totalProgramTime = std::chrono::duration<float, std::micro>(totalProgTimer_end - totalProgTimer_start).count() - ioReadTime - dataVerificationTime;
+  std::cout << "Total workload time = " << totalProgramTime / 1000 << " ms"
+            << "\n"
+            << std::endl;
+  return data_verification_flag;
+}
+
+void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography)
+{
+#ifdef MANAGEDMEM
+  SiftPoint *sift1 = siftData1.m_data;
+  SiftPoint *sift2 = siftData2.m_data;
+#else
+  SiftPoint *sift1 = siftData1.h_data;
+  SiftPoint *sift2 = siftData2.h_data;
+#endif
+  int numPts1 = siftData1.numPts;
+  int numPts2 = siftData2.numPts;
+  int numFound = 0;
+#if 1
+  homography[0] = homography[4] = -1.0f;
+  homography[1] = homography[3] = homography[6] = homography[7] = 0.0f;
+  homography[2] = 1279.0f;
+  homography[5] = 959.0f;
+#endif
+  for (int i = 0; i < numPts1; i++)
+  {
+    float *data1 = sift1[i].data;
+    std::cout << i << ":" << sift1[i].scale << ":" << (int)sift1[i].orientation << " " << sift1[i].xpos << " " << sift1[i].ypos << std::endl;
+    bool found = false;
+    for (int j = 0; j < numPts2; j++)
+    {
+      float *data2 = sift2[j].data;
+      float sum = 0.0f;
+      for (int k = 0; k < 128; k++)
+        sum += data1[k] * data2[k];
+      float den = homography[6] * sift1[i].xpos + homography[7] * sift1[i].ypos + homography[8];
+      float dx = (homography[0] * sift1[i].xpos + homography[1] * sift1[i].ypos + homography[2]) / den - sift2[j].xpos;
+      float dy = (homography[3] * sift1[i].xpos + homography[4] * sift1[i].ypos + homography[5]) / den - sift2[j].ypos;
+      float err = dx * dx + dy * dy;
+      if (err < 100.0f) // 100.0
+        found = true;
+      if (err < 100.0f || j == sift1[i].match)
+      { // 100.0
+        if (j == sift1[i].match && err < 100.0f)
+          std::cout << " *";
+        else if (j == sift1[i].match)
+          std::cout << " -";
+        else if (err < 100.0f)
+          std::cout << " +";
+        else
+          std::cout << "  ";
+        std::cout << j << ":" << sum << ":" << (int)sqrt(err) << ":" << sift2[j].scale << ":" << (int)sift2[j].orientation << " " << sift2[j].xpos << " " << sift2[j].ypos << " " << (int)dx << " " << (int)dy << std::endl;
+      }
+    }
+    std::cout << std::endl;
+    if (found)
+      numFound++;
+  }
+  std::cout << "Number of finds: " << numFound << " / " << numPts1 << std::endl;
+  std::cout << homography[0] << " " << homography[1] << " " << homography[2] << std::endl; //%%%
+  std::cout << homography[3] << " " << homography[4] << " " << homography[5] << std::endl; //%%%
+  std::cout << homography[6] << " " << homography[7] << " " << homography[8] << std::endl; //%%%
+}
+
+void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img)
+{
+  int numPts = siftData1.numPts;
+#ifdef MANAGEDMEM
+  SiftPoint *sift1 = siftData1.m_data;
+  SiftPoint *sift2 = siftData2.m_data;
+#else
+  SiftPoint *sift1 = siftData1.h_data;
+  SiftPoint *sift2 = siftData2.h_data;
+#endif
+  float *h_img = img.h_data;
+  int w = img.width;
+  int h = img.height;
+  std::cout << std::setprecision(3);
+  for (int j = 0; j < numPts; j++)
+  {
+    int k = sift1[j].match;
+    if (sift1[j].match_error < 5)
+    {
+      float dx = sift2[k].xpos - sift1[j].xpos;
+      float dy = sift2[k].ypos - sift1[j].ypos;
+#if 0
+      if (false && sift1[j].xpos>550 && sift1[j].xpos<600) {
+	std::cout << "pos1=(" << (int)sift1[j].xpos << "," << (int)sift1[j].ypos << ") ";
+	std::cout << j << ": " << "score=" << sift1[j].score << "  ambiguity=" << sift1[j].ambiguity << "  match=" << k << "  ";
+	std::cout << "scale=" << sift1[j].scale << "  ";
+	std::cout << "error=" << (int)sift1[j].match_error << "  ";
+	std::cout << "orient=" << (int)sift1[j].orientation << "," << (int)sift2[k].orientation << "  ";
+	std::cout << " delta=(" << (int)dx << "," << (int)dy << ")" << std::endl;
+      }
+#endif
+#if 1
+      int len = (int)(fabs(dx) > fabs(dy) ? fabs(dx) : fabs(dy));
+      for (int l = 0; l < len; l++)
+      {
+        int x = (int)(sift1[j].xpos + dx * l / len);
+        int y = (int)(sift1[j].ypos + dy * l / len);
+        h_img[y * w + x] = 255.0f;
+      }
+#endif
+    }
+    int x = (int)(sift1[j].xpos + 0.5);
+    int y = (int)(sift1[j].ypos + 0.5);
+    int s = std::min(x, std::min(y, std::min(w - x - 2, std::min(h - y - 2, (int)(1.41 * sift1[j].scale)))));
+    int p = y * w + x;
+    p += (w + 1);
+    for (int k = 0; k < s; k++)
+      h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 0.0f;
+    p -= (w + 1);
+    for (int k = 0; k < s; k++)
+      h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 255.0f;
+  }
+  std::cout << std::setprecision(6);
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/matching.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/matching.dp.cpp
new file mode 100644
index 000000000..a5c4f10db
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/matching.dp.cpp
@@ -0,0 +1,1944 @@
+// Modifications Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <chrono>
+#include <sycl/sycl.hpp>
+#include <random>
+#include "infra/infra.hpp"
+#include "cudaSift.h"
+#include "cudautils.h"
+
+//================= Device matching functions =====================//
+
+void memcopyKernel(float *src, float *dst, size_t src_pitch, size_t dst_pitch, int numPts, size_t width)
+{
+  char *d_src = (char *)src;
+  char *d_dst = (char *)dst;
+
+#pragma unroll
+  for (int i = 0; i < numPts; ++i)
+  {
+#pragma unroll
+    for (int j = 0; j < width; ++j)
+    {
+      d_dst[j] = d_src[j];
+    }
+    d_src = d_src + src_pitch;
+    d_dst = d_dst + dst_pitch;
+  }
+}
+
+void MatchSiftPoints(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2,
+                     sycl::nd_item<3> item_ct1, float *siftPoint, float *sums)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int p1 = item_ct1.get_group(2);
+  const int p2 = item_ct1.get_group(1) * 16 + ty;
+  const float *ptr1 = sift1[p1].data;
+  const float *ptr2 = sift2[p2].data;
+  const int i = 16 * ty + tx;
+  if (ty < 8)
+    siftPoint[i] = ptr1[i];
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  float sum = 0.0f;
+  if (p2 < numPts2)
+
+#pragma unroll
+    for (int j = 0; j < 8; j++)
+      sum += siftPoint[16 * j + tx] * ptr2[16 * j + tx];
+  sums[i] = sum;
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  if (tx < 8)
+    sums[i] += sums[i + 8];
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  if (tx < 4)
+    sums[i] += sums[i + 4];
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  if (ty == 0)
+  {
+    sum = sums[16 * tx + 0] + sums[16 * tx + 1] + sums[16 * tx + 2] + sums[16 * tx + 3];
+    corrData[p1 * item_ct1.get_group_range(1) * 16 +
+             item_ct1.get_group(1) * 16 + tx] = sum;
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+}
+
+void MatchSiftPoints2(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2,
+                      sycl::nd_item<3> item_ct1, float *siftPoints1,
+                      float *siftPoints2)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const float *ptr1 =
+      sift1[sycl::min((unsigned int)(numPts1 - 1),
+                      (unsigned int)(item_ct1.get_group(2) * 16 + ty))]
+          .data;
+  const float *ptr2 =
+      sift2[sycl::min((unsigned int)(numPts2 - 1),
+                      (unsigned int)(item_ct1.get_group(1) * 16 + ty))]
+          .data;
+
+#pragma unroll
+  for (int i = 0; i < 8; i++)
+  {
+    siftPoints1[128 * ty + 16 * i + tx] = ptr1[16 * i + tx];
+    siftPoints2[128 * ty + 16 * i + tx] = ptr2[16 * i + tx];
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  const int p1 = item_ct1.get_group(2) * 16 + ty;
+  const int p2 = item_ct1.get_group(1) * 16 + tx;
+  const float *pt1 = &siftPoints1[ty * 128];
+  const float *pt2 = &siftPoints2[tx * 128];
+  float sum = 0.0f;
+
+#pragma unroll
+  for (int i = 0; i < 128; i++)
+  {
+    int itx = (i + tx) & 127; // avoid bank conflicts
+    sum += pt1[itx] * pt2[itx];
+  }
+  if (p1 < numPts1)
+    corrData[p1 * item_ct1.get_group_range(1) * 16 + p2] =
+        (p2 < numPts2 ? sum : -1.0f);
+}
+
+void FindMaxCorr(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int corrWidth, int siftSize,
+                 sycl::nd_item<3> item_ct1, float *maxScore, float *maxScor2,
+                 int *maxIndex)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int idx = ty * 16 + tx;
+  int p1 = item_ct1.get_group(2) * 16 + item_ct1.get_local_id(1);
+  p1 = (p1 >= numPts1 ? numPts1 - 1 : p1);
+  maxScore[idx] = -1.0f;
+  maxScor2[idx] = -1.0f;
+  maxIndex[idx] = -1;
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  float *corrs = &corrData[p1 * corrWidth];
+
+#pragma unroll
+  for (int i = tx; i < corrWidth; i += 16)
+  {
+    float val = corrs[i];
+    if (val > maxScore[idx])
+    {
+      maxScor2[idx] = maxScore[idx];
+      maxScore[idx] = val;
+      maxIndex[idx] = i;
+    }
+    else if (val > maxScor2[idx])
+      maxScor2[idx] = val;
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+
+#pragma unroll
+  for (int len = 8; len > 0; len /= 2)
+  {
+    if (tx < 8)
+    {
+      float val = maxScore[idx + len];
+      int i = maxIndex[idx + len];
+      if (val > maxScore[idx])
+      {
+        maxScor2[idx] = maxScore[idx];
+        maxScore[idx] = val;
+        maxIndex[idx] = i;
+      }
+      else if (val > maxScor2[idx])
+        maxScor2[idx] = val;
+      float va2 = maxScor2[idx + len];
+      if (va2 > maxScor2[idx])
+        maxScor2[idx] = va2;
+    }
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+  }
+  if (tx == 0)
+  {
+    sift1[p1].score = maxScore[ty * 16];
+    sift1[p1].ambiguity = maxScor2[ty * 16] / (maxScore[ty * 16] + 1e-6);
+    sift1[p1].match = maxIndex[ty * 16];
+    sift1[p1].match_xpos = sift2[maxIndex[ty * 16]].xpos;
+    sift1[p1].match_ypos = sift2[maxIndex[ty * 16]].ypos;
+  }
+}
+
+void FindMaxCorr3(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  sycl::nd_item<3> item_ct1, int *maxIndex)
+{
+  int block_dim = item_ct1.get_local_range().get(2); // blockDim.x == 16
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int p1 = item_ct1.get_group(2) * block_dim + ty;
+  const int idx = ty * 16 + tx;
+
+  maxIndex[idx] = 0;
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+
+  float *corrs = NULL;
+  if (p1 < numPts1)
+  {
+    corrs = &corrData[p1 * block_dim * 2];
+    corrs[tx] = 0.0f;
+    corrs[tx + 16] = 0.0f;
+    const float *pt1 = sift1[p1].data;
+    for (int p2 = tx; p2 < numPts2; p2 += 16)
+    {
+      float *pt2 = sift2[p2].data;
+      float sum = 0.0f;
+      for (int i = 0; i < 128; i++)
+        sum += pt1[i] * pt2[i];
+      if (sum > corrs[tx])
+      {
+        corrs[tx + 16] = corrs[tx];
+        corrs[tx] = sum;
+        maxIndex[idx] = p2;
+      }
+      else if (sum > corrs[tx + 16])
+        corrs[tx + 16] = sum;
+    }
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  if (p1 < numPts1)
+  {
+    for (int len = 8; len > 0; len /= 2)
+    {
+      if (tx < len)
+      {
+        float val = corrs[tx + len];
+        int i = maxIndex[idx + len];
+        if (val > corrs[tx])
+        {
+          corrs[tx + 16] = corrs[tx];
+          corrs[tx] = val;
+          maxIndex[idx] = i;
+        }
+        else if (val > corrs[tx + 16])
+          corrs[tx + 16] = val;
+        float va2 = corrs[tx + 16 + len];
+        if (va2 > corrs[tx + 16])
+          corrs[tx + 16] = va2;
+      }
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      ;
+    }
+    if (tx == 0)
+    {
+      sift1[p1].score = corrs[0];
+      sift1[p1].ambiguity = corrs[16] / (corrs[0] + 1e-6);
+      sift1[p1].match = maxIndex[ty << 4];
+      sift1[p1].match_xpos = sift2[maxIndex[ty << 4]].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex[ty << 4]].ypos;
+    }
+  }
+}
+
+#define FMC2W 16
+#define FMC2H 4
+
+void FindMaxCorr2(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  sycl::nd_item<3> item_ct1, float *siftPoint, float *maxScore,
+                  float *maxScor2, int *maxIndex)
+{
+
+  const int p1 = item_ct1.get_group(2);
+  if (p1 >= numPts1)
+    return;
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int idx = ty * FMC2W + tx;
+  if (idx < FMC2H)
+  {
+    maxScore[idx] = -1.0f;
+    maxScor2[idx] = -1.0f;
+    maxIndex[idx] = 0;
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  const float *pt1 = sift1[p1].data;
+  for (int i = idx; i < 128; i += FMC2W * FMC2H)
+    siftPoint[i] = pt1[i];
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  for (int p2 = ty; p2 < numPts2; p2 += FMC2H)
+  {
+    const float *pt2 = sift2[p2].data;
+    float sum = 0.0f;
+    for (int j = tx; j < 128; j += FMC2W)
+      sum += siftPoint[j] * pt2[j];
+    for (int j = FMC2W / 2; j > 0; j /= 2)
+      sum += ShiftDown(sum, j, item_ct1);
+    if (tx == 0)
+    {
+      if (sum > maxScore[ty])
+      {
+        maxScor2[ty] = maxScore[ty];
+        maxScore[ty] = sum;
+        maxIndex[ty] = p2;
+      }
+      else if (sum > maxScor2[ty])
+        maxScor2[ty] = sum;
+    }
+  }
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  for (int len = FMC2H / 2; len > 0; len /= 2)
+  {
+    if (ty == 0 && tx < len)
+    {
+      float val = maxScore[tx + len];
+      int p2 = maxIndex[tx + len];
+      if (val > maxScore[tx])
+      {
+        maxScor2[tx] = maxScore[tx];
+        maxScore[tx] = val;
+        maxIndex[tx] = p2;
+      }
+      else if (val > maxScor2[tx])
+        maxScor2[tx] = val;
+      float va2 = maxScor2[tx + len];
+      if (va2 > maxScor2[tx])
+        maxScor2[tx] = va2;
+    }
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+  }
+  if (ty == 0 && tx == 0)
+  {
+    sift1[p1].score = maxScore[0];
+    sift1[p1].ambiguity = maxScor2[0] / (maxScore[0] + 1e-6);
+    sift1[p1].match = maxIndex[0];
+    sift1[p1].match_xpos = sift2[maxIndex[0]].xpos;
+    sift1[p1].match_ypos = sift2[maxIndex[0]].ypos;
+  }
+}
+
+void FindMaxCorr4(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  sycl::nd_item<3> item_ct1, float *siftPoint, float *maxScore,
+                  float *maxScor2, int *maxIndex)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  if (tx == 0)
+  {
+    maxScore[ty] = -1.0f;
+    maxScor2[ty] = -1.0f;
+    maxIndex[ty] = 0;
+  }
+  const int p1 = item_ct1.get_group(2) * FMC2H + ty;
+  const float *pt1 = sift1[p1].data;
+  for (int j = tx; j < 128; j += FMC2W)
+    siftPoint[128 * ty + j] = pt1[j];
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  for (int p2 = 0; p2 < numPts2; p2++)
+  {
+    const float *pt2 = sift2[p2].data;
+    float sum = 0.0f;
+    for (int j = tx; j < 128; j += FMC2W)
+      sum += siftPoint[128 * ty + j] * pt2[j];
+    for (int j = FMC2W / 2; j > 0; j /= 2)
+      sum += ShiftDown(sum, j, item_ct1);
+    if (tx == 0)
+    {
+      if (sum > maxScore[ty])
+      {
+        maxScor2[ty] = maxScore[ty];
+        maxScore[ty] = sum;
+        maxIndex[ty] = p2;
+      }
+      else if (sum > maxScor2[ty])
+        maxScor2[ty] = sum;
+    }
+  }
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  if (tx == 0)
+  {
+    sift1[p1].score = maxScore[ty];
+    sift1[p1].ambiguity = maxScor2[ty] / (maxScore[ty] + 1e-6);
+    sift1[p1].match = maxIndex[ty];
+    sift1[p1].match_xpos = sift2[maxIndex[ty]].xpos;
+    sift1[p1].match_ypos = sift2[maxIndex[ty]].ypos;
+  }
+}
+
+void CleanMatches(SiftPoint *sift1, int numPts1, sycl::nd_item<3> item_ct1)
+{
+  const int p1 = sycl::min(
+      (unsigned int)(item_ct1.get_group(2) * 64 + item_ct1.get_local_id(2)),
+      (unsigned int)(numPts1 - 1));
+  sift1[p1].score = 0.0f;
+}
+
+#define M7W 32
+#define M7H 32
+#define M7R 4
+#define NRX 2
+#define NDIM 128
+
+void FindMaxCorr10(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                   sycl::nd_item<3> item_ct1, sycl::float4 *buffer1,
+                   sycl::float4 *buffer2)
+{
+
+  int tx = item_ct1.get_local_id(2);
+  int ty = item_ct1.get_local_id(1);
+  int bp1 = M7W * item_ct1.get_group(2);
+
+#pragma unroll
+  for (int j = ty; j < M7W; j += M7H / M7R)
+  {
+    int p1 = sycl::min((int)(bp1 + j), (int)(numPts1 - 1));
+
+#pragma unroll
+    for (int d = tx; d < NDIM / 4; d += M7W)
+    {
+      buffer1[(j * NDIM / 4 + (d + j) % (NDIM / 4))] = ((sycl::float4 *)&sift1[p1].data)[d];
+    }
+  }
+
+  float max_score[NRX];
+  float sec_score[NRX];
+  int index[NRX];
+
+#pragma unroll
+  for (int i = 0; i < NRX; i++)
+  {
+    max_score[i] = 0.0f;
+    sec_score[i] = 0.0f;
+    index[i] = -1;
+  }
+  int idx = ty * M7W + tx;
+  int ix = idx % (M7W / NRX);
+  int iy = idx / (M7W / NRX);
+
+#pragma unroll
+  for (int bp2 = 0; bp2 < numPts2 - M7H + 1; bp2 += M7H)
+  {
+#pragma unroll
+    for (int j = ty; j < M7H; j += M7H / M7R)
+    {
+      int p2 = sycl::min((int)(bp2 + j), (int)(numPts2 - 1));
+#pragma unroll
+      for (int d = tx; d < NDIM / 4; d += M7W)
+        buffer2[j * NDIM / 4 + d] = ((sycl::float4 *)&sift2[p2].data)[d];
+    }
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+
+    if (idx < M7W * M7H / M7R / NRX)
+    {
+      float score[M7R][NRX];
+
+#pragma unroll
+      for (int dy = 0; dy < M7R; dy++)
+#pragma unroll
+        for (int i = 0; i < NRX; i++)
+          score[dy][i] = 0.0f;
+
+#pragma unroll
+      for (int d = 0; d < NDIM / 4; d++)
+      {
+        sycl::float4 v1[NRX];
+#pragma unroll
+        for (int i = 0; i < NRX; i++)
+          v1[i] = buffer1[((M7W / NRX) * i + ix) * NDIM / 4 + (d + (M7W / NRX) * i + ix) % (NDIM / 4)];
+
+#pragma unroll
+        for (int dy = 0; dy < M7R; dy++)
+        {
+          sycl::float4 v2 = buffer2[(M7R * iy + dy) * (NDIM / 4) + d];
+#pragma unroll
+          for (int i = 0; i < NRX; i++)
+          {
+            score[dy][i] += v1[i].x() * v2.x();
+            score[dy][i] += v1[i].y() * v2.y();
+            score[dy][i] += v1[i].z() * v2.z();
+            score[dy][i] += v1[i].w() * v2.w();
+          }
+        }
+      }
+
+#pragma unroll
+      for (int dy = 0; dy < M7R; dy++)
+      {
+#pragma unroll
+        for (int i = 0; i < NRX; i++)
+        {
+          if (score[dy][i] > max_score[i])
+          {
+            sec_score[i] = max_score[i];
+            max_score[i] = score[dy][i];
+            index[i] =
+                sycl::min((int)(bp2 + M7R * iy + dy), (int)(numPts2 - 1));
+          }
+          else if (score[dy][i] > sec_score[i])
+            sec_score[i] = score[dy][i];
+        }
+      }
+    }
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+  }
+
+  float *scores1 = (float *)buffer1;
+  float *scores2 = &scores1[M7W * M7H / M7R];
+  int *indices = (int *)&scores2[M7W * M7H / M7R];
+  if (idx < M7W * M7H / M7R / NRX)
+  {
+#pragma unroll
+    for (int i = 0; i < NRX; i++)
+    {
+      scores1[iy * M7W + (M7W / NRX) * i + ix] = max_score[i];
+      scores2[iy * M7W + (M7W / NRX) * i + ix] = sec_score[i];
+      indices[iy * M7W + (M7W / NRX) * i + ix] = index[i];
+    }
+  }
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+
+  if (ty == 0)
+  {
+    float max_score = scores1[tx];
+    float sec_score = scores2[tx];
+    int index = indices[tx];
+
+#pragma unroll
+    for (int y = 0; y < M7H / M7R; y++)
+      if (index != indices[y * M7W + tx])
+      {
+        if (scores1[y * M7W + tx] > max_score)
+        {
+          sec_score = sycl::max(max_score, sec_score);
+          max_score = scores1[y * M7W + tx];
+          index = indices[y * M7W + tx];
+        }
+        else if (scores1[y * M7W + tx] > sec_score)
+          sec_score = scores1[y * M7W + tx];
+      }
+    sift1[bp1 + tx].score = max_score;
+    sift1[bp1 + tx].match = index;
+    sift1[bp1 + tx].match_xpos = sift2[index].xpos;
+    sift1[bp1 + tx].match_ypos = sift2[index].ypos;
+    sift1[bp1 + tx].ambiguity = sec_score / (max_score + 1e-6f);
+  }
+}
+
+#define FMC_GH 512
+#define FMC_BW 32
+#define FMC_BH 32
+#define FMC_BD 16
+#define FMC_TW 1
+#define FMC_TH 4
+#define FMC_NW (FMC_BW / FMC_TW) //  32
+#define FMC_NH (FMC_BH / FMC_TH) //   8
+#define FMC_NT (FMC_NW * FMC_NH) // 256 = 8 warps
+
+infra::global_memory<volatile int, 0> lock(0);
+
+void FindMaxCorr9(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  sycl::nd_item<3> item_ct1, volatile int *lock,
+                  sycl::float4 *siftParts1, sycl::float4 *siftParts2)
+{
+  // 4*32*8 = 1024
+  // 4*32*8 = 1024
+  //__shared__ float blksums[FMC_BW*FMC_BH];     // 32*32  = 1024
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int idx = ty * FMC_NW + tx;
+  sycl::float4 *pts1 = 0, *pts2 = 0;
+  if (idx < FMC_BW)
+  {
+    const int p1l =
+        sycl::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx),
+                  (unsigned int)(numPts1 - 1));
+    pts1 = (sycl::float4 *)sift1[p1l].data;
+  }
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < sycl::min(FMC_GH, (int)(numPts2 - FMC_BH + 1));
+       k += FMC_BH)
+  {
+    if (idx < FMC_BH)
+    {
+      const int p2l =
+          sycl::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + idx),
+                    (unsigned int)(numPts2 - 1));
+      pts2 = (sycl::float4 *)sift2[p2l].data;
+    }
+    float sums[FMC_TW * FMC_TH];
+    for (int i = 0; i < FMC_TW * FMC_TH; i++)
+      sums[i] = 0.0f;
+
+    if (idx < FMC_BW)
+      for (int i = 0; i < FMC_BD / 2; i++)
+        siftParts1[(i + 0) * FMC_BW + idx] = pts1[0 + i];
+    if (idx < FMC_BH)
+      for (int i = 0; i < FMC_BD / 2; i++)
+        siftParts2[(i + 0) * FMC_BH + idx] = pts2[0 + i];
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+
+    int b = FMC_BD / 2;
+    for (int d = FMC_BD / 2; d < 32; d += FMC_BD / 2)
+    {
+      if (idx < FMC_BW)
+        for (int i = 0; i < FMC_BD / 2; i++)
+          siftParts1[(i + b) * FMC_BW + idx] = pts1[d + i];
+      if (idx < FMC_BH)
+        for (int i = 0; i < FMC_BD / 2; i++)
+          siftParts2[(i + b) * FMC_BH + idx] = pts2[d + i];
+
+      b ^= FMC_BD / 2;
+      for (int i = 0; i < FMC_BD / 2; i++)
+      {
+        sycl::float4 v1[FMC_TW];
+        for (int ix = 0; ix < FMC_TW; ix++)
+          v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)];
+        for (int iy = 0; iy < FMC_TH; iy++)
+        {
+          sycl::float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)];
+          for (int ix = 0; ix < FMC_TW; ix++)
+          {
+            sums[iy * FMC_TW + ix] += v1[ix].x() * v2.x();
+            sums[iy * FMC_TW + ix] += v1[ix].y() * v2.y();
+            sums[iy * FMC_TW + ix] += v1[ix].z() * v2.z();
+            sums[iy * FMC_TW + ix] += v1[ix].w() * v2.w();
+          }
+        }
+      }
+
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      ;
+    }
+
+    b ^= FMC_BD / 2;
+    for (int i = 0; i < FMC_BD / 2; i++)
+    {
+      sycl::float4 v1[FMC_TW];
+      for (int ix = 0; ix < FMC_TW; ix++)
+        v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)];
+      for (int iy = 0; iy < FMC_TH; iy++)
+      {
+        sycl::float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)];
+        for (int ix = 0; ix < FMC_TW; ix++)
+        {
+          sums[iy * FMC_TW + ix] += v1[ix].x() * v2.x();
+          sums[iy * FMC_TW + ix] += v1[ix].y() * v2.y();
+          sums[iy * FMC_TW + ix] += v1[ix].z() * v2.z();
+          sums[iy * FMC_TW + ix] += v1[ix].w() * v2.w();
+        }
+      }
+    }
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+
+    float *blksums = (float *)siftParts1;
+    for (int iy = 0; iy < FMC_TH; iy++)
+      for (int ix = 0; ix < FMC_TW; ix++)
+        blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix];
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+    if (idx < FMC_BW)
+    {
+      for (int j = 0; j < FMC_BH; j++)
+      {
+        float sum = blksums[j * FMC_BW + idx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex =
+              sycl::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + j),
+                        (unsigned int)(numPts2 - 1));
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+  }
+  const int p1 = sycl::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx),
+                           (unsigned int)(numPts1 - 1));
+  if (idx == 0)
+    while (infra::atomic_compare_exchange_strong((int *)lock, 0, 1) != 0)
+      ;
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  if (idx < FMC_BW)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = sycl::max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  if (idx == 0)
+    infra::atomic_exchange((int *)lock, 0);
+}
+
+void FindMaxCorr8(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  sycl::nd_item<3> item_ct1, volatile int *lock,
+                  sycl::float4 *siftParts1, sycl::float4 *siftParts2,
+                  float *blksums)
+{
+  // 4*32*8 = 1024
+  // 4*32*8 = 1024
+  // 32*32  = 1024
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int idx = ty * FMC_NW + tx;
+  sycl::float4 *pts1 = 0, *pts2 = 0;
+  if (idx < FMC_BW)
+  {
+    const int p1l =
+        sycl::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx),
+                  (unsigned int)(numPts1 - 1));
+    pts1 = (sycl::float4 *)sift1[p1l].data;
+  }
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < sycl::min(FMC_GH, (int)(numPts2 - FMC_BH + 1));
+       k += FMC_BH)
+  {
+    if (idx < FMC_BH)
+    {
+      const int p2l =
+          sycl::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + idx),
+                    (unsigned int)(numPts2 - 1));
+      pts2 = (sycl::float4 *)sift2[p2l].data;
+    }
+    float sums[FMC_TW * FMC_TH];
+    for (int i = 0; i < FMC_TW * FMC_TH; i++)
+      sums[i] = 0.0f;
+    for (int d = 0; d < 32; d += FMC_BD)
+    {
+      if (idx < FMC_BW)
+        for (int i = 0; i < FMC_BD; i++)
+          siftParts1[i * FMC_BW + idx] = pts1[d + i];
+      if (idx < FMC_BH)
+        for (int i = 0; i < FMC_BD; i++)
+          siftParts2[i * FMC_BH + idx] = pts2[d + i];
+
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      ;
+
+      for (int i = 0; i < FMC_BD; i++)
+      {
+        sycl::float4 v1[FMC_TW];
+        for (int ix = 0; ix < FMC_TW; ix++)
+          v1[ix] = siftParts1[i * FMC_BW + (tx * FMC_TW + ix)];
+        for (int iy = 0; iy < FMC_TH; iy++)
+        {
+          sycl::float4 v2 = siftParts2[i * FMC_BH + (ty * FMC_TH + iy)];
+          for (int ix = 0; ix < FMC_TW; ix++)
+          {
+            sums[iy * FMC_TW + ix] += v1[ix].x() * v2.x();
+            sums[iy * FMC_TW + ix] += v1[ix].y() * v2.y();
+            sums[iy * FMC_TW + ix] += v1[ix].z() * v2.z();
+            sums[iy * FMC_TW + ix] += v1[ix].w() * v2.w();
+          }
+        }
+      }
+
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      ;
+    }
+    // float *blksums = (float*)siftParts1;
+    for (int iy = 0; iy < FMC_TH; iy++)
+      for (int ix = 0; ix < FMC_TW; ix++)
+        blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix];
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+    if (idx < FMC_BW)
+    {
+      for (int j = 0; j < FMC_BH; j++)
+      {
+        float sum = blksums[j * FMC_BW + idx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex =
+              sycl::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + j),
+                        (unsigned int)(numPts2 - 1));
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+  }
+  const int p1 = sycl::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx),
+                           (unsigned int)(numPts1 - 1));
+  if (idx == 0)
+    while (infra::atomic_compare_exchange_strong((int *)lock, 0, 1) != 0)
+      ;
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  if (idx < FMC_BW)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = sycl::max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  if (idx == 0)
+    infra::atomic_exchange((int *)lock, 0);
+}
+
+void FindMaxCorr7(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  sycl::nd_item<3> item_ct1, volatile int *lock,
+                  float *siftParts1, float *siftParts2)
+{
+  // features in columns
+  // one extra to avoid shared conflicts
+  sycl::float4 *pts1 = (sycl::float4 *)siftParts1;
+  sycl::float4 *pts2 = (sycl::float4 *)siftParts2;
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int p1l = sycl::min((unsigned int)(item_ct1.get_group(2) * 16 + ty),
+                            (unsigned int)(numPts1 - 1));
+  const sycl::float4 *p1l4 = (sycl::float4 *)sift1[p1l].data;
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < 512 / 16; k++)
+  {
+    const int p2l =
+        sycl::min((unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + ty),
+                  (unsigned int)(numPts2 - 1));
+    const sycl::float4 *p2l4 = (sycl::float4 *)sift2[p2l].data;
+#define NUM 4
+    float sum[NUM];
+    if (ty < (16 / NUM))
+      for (int l = 0; l < NUM; l++)
+        sum[l] = 0.0f;
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+    for (int i = 0; i < 2; i++)
+    {
+      pts1[17 * tx + ty] = p1l4[i * 16 + tx];
+      pts2[16 * ty + tx] = p2l4[i * 16 + tx];
+
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      ;
+      if (ty < (16 / NUM))
+      {
+#pragma unroll
+        for (int j = 0; j < 16; j++)
+        {
+          sycl::float4 p1v = pts1[17 * j + tx];
+#pragma unroll
+          for (int l = 0; l < NUM; l++)
+          {
+            sycl::float4 p2v = pts2[16 * (ty + l * (16 / NUM)) + j];
+            sum[l] += p1v.x() * p2v.x();
+            sum[l] += p1v.y() * p2v.y();
+            sum[l] += p1v.z() * p2v.z();
+            sum[l] += p1v.w() * p2v.w();
+          }
+        }
+      }
+
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      ;
+    }
+    float *sums = siftParts1;
+    if (ty < (16 / NUM))
+      for (int l = 0; l < NUM; l++)
+        sums[16 * (ty + l * (16 / NUM)) + tx] = sum[l];
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+    if (ty == 0)
+    {
+      for (int j = 0; j < 16; j++)
+      {
+        float sum = sums[16 * j + tx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = sycl::min(
+              (unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + j),
+              (unsigned int)(numPts2 - 1));
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+  }
+  const int p1 = sycl::min((unsigned int)(item_ct1.get_group(2) * 16 + tx),
+                           (unsigned int)(numPts1 - 1));
+  if (tx == 0 && ty == 0)
+    while (infra::atomic_compare_exchange_strong((int *)lock, 0, 1) != 0)
+      ;
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  if (ty == 0)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = sycl::max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  if (tx == 0 && ty == 0)
+    infra::atomic_exchange((int *)lock, 0);
+}
+
+void FindMaxCorr6(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  sycl::nd_item<3> item_ct1, volatile int *lock,
+                  float *siftParts2, float *sums)
+{
+  //__shared__ float siftParts1[128*16]; // features in columns
+  // one extra to avoid shared conflicts
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int p1l = sycl::min((unsigned int)(item_ct1.get_group(2) * 16 + ty),
+                            (unsigned int)(numPts1 - 1));
+  float *pt1l = sift1[p1l].data;
+  sycl::float4 part1 = reinterpret_cast<sycl::float4 *>(pt1l)[tx];
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < 512; k += 16)
+  {
+    const int p2l =
+        sycl::min((unsigned int)(item_ct1.get_group(1) * 512 + k + ty),
+                  (unsigned int)(numPts2 - 1));
+    float *pt2l = sift2[p2l].data;
+    reinterpret_cast<sycl::float4 *>(siftParts2)[32 * ty + tx] =
+        reinterpret_cast<sycl::float4 *>(pt2l)[tx];
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+    for (int i = 0; i < 16; i++)
+    {
+      sycl::float4 part2 =
+          reinterpret_cast<sycl::float4 *>(siftParts2)[32 * i + tx];
+      float sum = part1.x() * part2.x() + part1.y() * part2.y() +
+                  part1.z() * part2.z() + part1.w() * part2.w();
+      sum += ShiftDown(sum, 16, item_ct1);
+      sum += ShiftDown(sum, 8, item_ct1);
+      sum += ShiftDown(sum, 4, item_ct1);
+      sum += ShiftDown(sum, 2, item_ct1);
+      sum += ShiftDown(sum, 1, item_ct1);
+      if (tx == 0)
+        sums[16 * i + ty] = sum;
+    }
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+    if (ty == 0 && tx < 16)
+    {
+      for (int j = 0; j < 16; j++)
+      {
+        float sum = sums[16 * j + tx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex =
+              sycl::min((unsigned int)(item_ct1.get_group(1) * 512 + k + j),
+                        (unsigned int)(numPts2 - 1));
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+  }
+  if (tx == 0 && ty == 0)
+    while (infra::atomic_compare_exchange_strong((int *)lock, 0, 1) != 0)
+      ;
+
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  if (ty == 0 && tx < 16)
+  {
+    const int p1 = sycl::min((unsigned int)(item_ct1.get_group(2) * 16 + tx),
+                             (unsigned int)(numPts1 - 1));
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = sycl::max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  if (tx == 0 && ty == 0)
+    infra::atomic_exchange((int *)lock, 0);
+}
+
+void FindMaxCorr5(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2,
+                  sycl::nd_item<3> item_ct1, volatile int *lock,
+                  float *siftParts1, float *siftParts2)
+{
+  // features in columns
+  // one extra to avoid shared conflicts
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int p1l = sycl::min((unsigned int)(item_ct1.get_group(2) * 16 + ty),
+                            (unsigned int)(numPts1 - 1));
+  const float *pt1l = sift1[p1l].data;
+  float maxScore = -1.0f;
+  float maxScor2 = -1.0f;
+  int maxIndex = 0;
+  for (int k = 0; k < 512 / 16; k++)
+  {
+    const int p2l =
+        sycl::min((unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + ty),
+                  (unsigned int)(numPts2 - 1));
+    const float *pt2l = sift2[p2l].data;
+    float sum = 0.0f;
+    for (int i = 0; i < 8; i++)
+    {
+      siftParts1[17 * tx + ty] = pt1l[i * 16 + tx]; // load and transpose
+      siftParts2[17 * tx + ty] = pt2l[i * 16 + tx];
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      ;
+      for (int j = 0; j < 16; j++)
+        sum += siftParts1[17 * j + tx] * siftParts2[17 * j + ty];
+      item_ct1.barrier(sycl::access::fence_space::local_space);
+      ;
+    }
+    float *sums = siftParts1;
+    sums[16 * ty + tx] = sum;
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+    if (ty == 0)
+    {
+      for (int j = 0; j < 16; j++)
+      {
+        float sum = sums[16 * j + tx];
+        if (sum > maxScore)
+        {
+          maxScor2 = maxScore;
+          maxScore = sum;
+          maxIndex = sycl::min(
+              (unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + j),
+              (unsigned int)(numPts2 - 1));
+        }
+        else if (sum > maxScor2)
+          maxScor2 = sum;
+      }
+    }
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+  }
+  const int p1 = sycl::min((unsigned int)(item_ct1.get_group(2) * 16 + tx),
+                           (unsigned int)(numPts1 - 1));
+  if (tx == 0 && ty == 0)
+    while (infra::atomic_compare_exchange_strong((int *)lock, 0, 1) != 0)
+      ;
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  if (ty == 0)
+  {
+    float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f);
+    if (maxScore > sift1[p1].score)
+    {
+      maxScor2 = sycl::max(sift1[p1].score, maxScor2);
+      sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f);
+      sift1[p1].score = maxScore;
+      sift1[p1].match = maxIndex;
+      sift1[p1].match_xpos = sift2[maxIndex].xpos;
+      sift1[p1].match_ypos = sift2[maxIndex].ypos;
+    }
+    else if (maxScore > maxScor2Old)
+      sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f);
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  ;
+  if (tx == 0 && ty == 0)
+    infra::atomic_exchange((int *)lock, 0);
+}
+
+template <int size>
+void InvertMatrix(float elem[size][size], float res[size][size])
+{
+  int indx[size];
+  float b[size];
+  float vv[size];
+  for (int i = 0; i < size; i++)
+    indx[i] = 0;
+  int imax = 0;
+  float d = 1.0;
+  for (int i = 0; i < size; i++)
+  { // find biggest element for each row
+    float big = 0.0;
+    for (int j = 0; j < size; j++)
+    {
+      float temp = sycl::fabs(elem[i][j]);
+      if (temp > big)
+        big = temp;
+    }
+    if (big > 0.0)
+      vv[i] = 1.0 / big;
+    else
+      vv[i] = 1e16;
+  }
+  for (int j = 0; j < size; j++)
+  {
+    for (int i = 0; i < j; i++)
+    {                                   // i<j
+      float sum = elem[i][j];           // i<j (lower left)
+      for (int k = 0; k < i; k++)       // k<i<j
+        sum -= elem[i][k] * elem[k][j]; // i>k (upper right), k<j (lower left)
+      elem[i][j] = sum;                 // i<j (lower left)
+    }
+    float big = 0.0;
+    for (int i = j; i < size; i++)
+    {                                   // i>=j
+      float sum = elem[i][j];           // i>=j (upper right)
+      for (int k = 0; k < j; k++)       // k<j<=i
+        sum -= elem[i][k] * elem[k][j]; // i>k (upper right), k<j (lower left)
+      elem[i][j] = sum;                 // i>=j (upper right)
+      float dum = vv[i] * sycl::fabs(sum);
+      if (dum >= big)
+      {
+        big = dum;
+        imax = i;
+      }
+    }
+    if (j != imax)
+    { // imax>j
+      for (int k = 0; k < size; k++)
+      {
+        float dum = elem[imax][k]; // upper right and lower left
+        elem[imax][k] = elem[j][k];
+        elem[j][k] = dum;
+      }
+      d = -d;
+      vv[imax] = vv[j];
+    }
+    indx[j] = imax;
+    if (elem[j][j] == 0.0) // j==j (upper right)
+      elem[j][j] = 1e-16;
+    if (j != (size - 1))
+    {
+      float dum = 1.0 / elem[j][j];
+      for (int i = j + 1; i < size; i++) // i>j
+        elem[i][j] *= dum;               // i>j (upper right)
+    }
+  }
+  for (int j = 0; j < size; j++)
+  {
+    for (int k = 0; k < size; k++)
+      b[k] = 0.0;
+    b[j] = 1.0;
+    int ii = -1;
+    for (int i = 0; i < size; i++)
+    {
+      int ip = indx[i];
+      float sum = b[ip];
+      b[ip] = b[i];
+      if (ii != -1)
+        for (int j = ii; j < i; j++)
+          sum -= elem[i][j] * b[j]; // i>j (upper right)
+      else if (sum != 0.0)
+        ii = i;
+      b[i] = sum;
+    }
+    for (int i = size - 1; i >= 0; i--)
+    {
+      float sum = b[i];
+      for (int j = i + 1; j < size; j++)
+        sum -= elem[i][j] * b[j]; // i<j (lower left)
+      b[i] = sum / elem[i][i];    // i==i (upper right)
+    }
+    for (int i = 0; i < size; i++)
+      res[i][j] = b[i];
+  }
+}
+
+void ComputeHomographies(float *coord, int *randPts, float *homo,
+                         int numPts, sycl::nd_item<3> item_ct1)
+{
+  float a[8][8], ia[8][8];
+  float b[8];
+  const int bx = item_ct1.get_group(2);
+  const int tx = item_ct1.get_local_id(2);
+  const int idx = item_ct1.get_local_range().get(2) * bx + tx;
+  const int numLoops =
+      item_ct1.get_local_range().get(2) * item_ct1.get_group_range(2);
+
+#pragma unroll
+  for (int i = 0; i < 4; i++)
+  {
+    int pt = randPts[i * numLoops + idx];
+    float x1 = coord[pt + 0 * numPts];
+    float y1 = coord[pt + 1 * numPts];
+    float x2 = coord[pt + 2 * numPts];
+    float y2 = coord[pt + 3 * numPts];
+    float *row1 = a[2 * i + 0];
+    row1[0] = x1;
+    row1[1] = y1;
+    row1[2] = 1.0;
+    row1[3] = row1[4] = row1[5] = 0.0;
+    row1[6] = -x2 * x1;
+    row1[7] = -x2 * y1;
+    float *row2 = a[2 * i + 1];
+    row2[0] = row2[1] = row2[2] = 0.0;
+    row2[3] = x1;
+    row2[4] = y1;
+    row2[5] = 1.0;
+    row2[6] = -y2 * x1;
+    row2[7] = -y2 * y1;
+    b[2 * i + 0] = x2;
+    b[2 * i + 1] = y2;
+  }
+  InvertMatrix<8>(a, ia);
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+
+#pragma unroll
+  for (int j = 0; j < 8; j++)
+  {
+    float sum = 0.0f;
+    for (int i = 0; i < 8; i++)
+      sum += ia[j][i] * b[i];
+    homo[j * numLoops + idx] = sum;
+  }
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+}
+
+#define TESTHOMO_TESTS 16 // number of tests per block,  alt. 32, 32
+#define TESTHOMO_LOOPS 16 // number of loops per block,  alt.  8, 16
+
+void TestHomographies(float *d_coord, float *d_homo,
+                      int *d_counts, int numPts, float thresh2, sycl::nd_item<3> item_ct1,
+                      float *homo, int *cnts)
+{
+
+  const int tx = item_ct1.get_local_id(2);
+  const int ty = item_ct1.get_local_id(1);
+  const int idx =
+      item_ct1.get_group(1) * item_ct1.get_local_range().get(1) + tx;
+  const int numLoops =
+      item_ct1.get_local_range().get(1) * item_ct1.get_group_range(1);
+  if (ty < 8 && tx < TESTHOMO_LOOPS)
+    homo[tx * 8 + ty] = d_homo[idx + ty * numLoops];
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  float a[8];
+
+#pragma unroll
+  for (int i = 0; i < 8; i++)
+    a[i] = homo[ty * 8 + i];
+  int cnt = 0;
+
+#pragma unroll
+  for (int i = tx; i < numPts; i += TESTHOMO_TESTS)
+  {
+    float x1 = d_coord[i + 0 * numPts];
+    float y1 = d_coord[i + 1 * numPts];
+    float x2 = d_coord[i + 2 * numPts];
+    float y2 = d_coord[i + 3 * numPts];
+    float nomx = a[0] * x1 + a[1] * y1 + a[2];
+    float nomy = a[3] * x1 + a[4] * y1 + a[5];
+    float deno = a[6] * x1 + a[7] * y1 + 1.0f;
+    float errx = x2 * deno - nomx;
+    float erry = y2 * deno - nomy;
+    float err2 = errx * errx + erry * erry;
+    if (err2 < thresh2 * deno * deno)
+      cnt++;
+  }
+  int kty = TESTHOMO_TESTS * ty;
+  cnts[kty + tx] = cnt;
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+  int len = TESTHOMO_TESTS / 2;
+  while (len > 0)
+  {
+    if (tx < len)
+      cnts[kty + tx] += cnts[kty + tx + len];
+    len /= 2;
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+    ;
+  }
+  if (tx < TESTHOMO_LOOPS && ty == 0)
+    d_counts[idx] = cnts[TESTHOMO_TESTS * tx];
+  item_ct1.barrier(sycl::access::fence_space::local_space);
+}
+
+//================= Host matching functions =====================//
+
+double FindHomography(SiftData &data, float *homography, int *numMatches, sycl::queue &q_ct, float &matchTime, int numLoops, float minScore, float maxAmbiguity, float thresh)
+{
+  *numMatches = 0;
+  homography[0] = homography[4] = homography[8] = 1.0f;
+  homography[1] = homography[2] = homography[3] = 0.0f;
+  homography[5] = homography[6] = homography[7] = 0.0f;
+  if (data.d_data == NULL)
+    return 0.0f;
+  SiftPoint *d_sift = data.d_data;
+  numLoops = iDivUp(numLoops, 16) * 16;
+  int numPts = data.numPts;
+  if (numPts < 8)
+    return 0.0f;
+  int numPtsUp = iDivUp(numPts, 16) * 16;
+  float *d_coord, *d_homo;
+  int *d_randPts, *h_randPts;
+  int randSize = 4 * sizeof(int) * numLoops;
+  int szFl = sizeof(float);
+  int szPt = sizeof(SiftPoint);
+
+#ifdef DEVICE_TIMER
+  auto start_malloc_1 = std::chrono::steady_clock::now();
+#endif
+  d_coord = (float *)sycl::malloc_device(4 * sizeof(float) * numPtsUp, q_ct);
+  d_randPts = (int *)sycl::malloc_device(randSize, q_ct);
+  d_homo = (float *)sycl::malloc_device(8 * sizeof(float) * numLoops, q_ct);
+
+#ifdef DEVICE_TIMER
+  auto stop_malloc_1 = std::chrono::steady_clock::now();
+  matchTime += std::chrono::duration<float, std::micro>(stop_malloc_1 - start_malloc_1).count();
+#endif
+  h_randPts = (int *)malloc(randSize);
+  float *h_scores = (float *)malloc(sizeof(float) * numPtsUp);
+  float *h_ambiguities = (float *)malloc(sizeof(float) * numPtsUp);
+  float *temp1 = (float *)malloc(szPt * numPtsUp);
+  float *temp2 = (float *)malloc(szPt * numPtsUp);
+
+#ifdef DEVICE_TIMER
+  auto start_memcpy_1 = std::chrono::steady_clock::now();
+#endif
+
+  infra::sift_memcpy(temp1, &d_sift[0].score, szPt * numPts, infra::device_to_host, q_ct);
+  infra::sift_memcpy(temp2, &d_sift[0].ambiguity, szPt * numPts, infra::device_to_host, q_ct);
+  q_ct.wait();
+
+#ifdef DEVICE_TIMER
+  auto stop_memcpy_1 = std::chrono::steady_clock::now();
+  matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_1 - start_memcpy_1).count();
+#endif
+  char *src_score = (char *)temp1;
+  char *src_ambiguity = (char *)temp2;
+  char *dst_score = (char *)h_scores;
+  char *dst_ambiguity = (char *)h_ambiguities;
+
+  for (int i = 0; i < numPts; ++i)
+  {
+    memcpy(dst_score, src_score, szFl);
+    memcpy(dst_ambiguity, src_ambiguity, szFl);
+
+    src_score += szPt;
+    src_ambiguity += szPt;
+    dst_score += szFl;
+    dst_ambiguity += szFl;
+  }
+
+  int *validPts = (int *)malloc(sizeof(int) * numPts);
+  int numValid = 0;
+
+  for (int i = 0; i < numPts; i++)
+  {
+    if (h_scores[i] > minScore && h_ambiguities[i] < maxAmbiguity)
+      validPts[numValid++] = i;
+  }
+
+  free(h_scores);
+  free(h_ambiguities);
+
+  if (numValid >= 8)
+  {
+    std::random_device rd;
+    uint32_t seed = rd();
+    std::mt19937 rnd(seed); // mersenne_twister_engine
+    std::uniform_int_distribution<uint32_t> dis(0, UINT32_MAX);
+    for (int i = 0; i < numLoops; i++)
+    {
+      int p1 = dis(rnd) % numValid;
+      int p2 = dis(rnd) % numValid;
+      int p3 = dis(rnd) % numValid;
+      int p4 = dis(rnd) % numValid;
+      while (p2 == p1)
+        p2 = dis(rnd) % numValid;
+      while (p3 == p1 || p3 == p2)
+        p3 = dis(rnd) % numValid;
+      while (p4 == p1 || p4 == p2 || p4 == p3)
+        p4 = dis(rnd) % numValid;
+      h_randPts[i + 0 * numLoops] = validPts[p1];
+      h_randPts[i + 1 * numLoops] = validPts[p2];
+      h_randPts[i + 2 * numLoops] = validPts[p3];
+      h_randPts[i + 3 * numLoops] = validPts[p4];
+    }
+#ifdef DEVICE_TIMER
+    auto start_malloc_2 = std::chrono::steady_clock::now();
+#endif
+    float *temp3 = (float *)sycl::malloc_device(szPt * numPtsUp, q_ct);
+    float *temp4 = (float *)sycl::malloc_device(szPt * numPtsUp, q_ct);
+    float *temp5 = (float *)sycl::malloc_device(szPt * numPtsUp, q_ct);
+    float *temp6 = (float *)sycl::malloc_device(szPt * numPtsUp, q_ct);
+#ifdef DEVICE_TIMER
+    auto stop_malloc_2 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_malloc_2 - start_malloc_2).count();
+#endif
+#ifdef DEVICE_TIMER
+    auto start_memcpy_2 = std::chrono::steady_clock::now();
+#endif
+
+    q_ct.memcpy(d_randPts, h_randPts, randSize).wait();
+    infra::sift_memcpy(temp3, &d_sift[0].xpos, szPt * numPts, infra::device_to_device, q_ct);
+    infra::sift_memcpy(temp4, &d_sift[0].ypos, szPt * numPts, infra::device_to_device, q_ct);
+    infra::sift_memcpy(temp5, &d_sift[0].match_xpos, szPt * numPts, infra::device_to_device, q_ct);
+    infra::sift_memcpy(temp6, &d_sift[0].match_ypos, szPt * numPts, infra::device_to_device, q_ct);
+    q_ct.wait();
+
+    // kernel call to transfer memory from device to device(replaced 2d memcopies are 2d copying is slower on sycl)
+    q_ct.parallel_for(
+            sycl::nd_range<3>(sycl::range<3>(1, 1, 1) *
+                                  sycl::range<3>(1, 1, 1),
+                              sycl::range<3>(1, 1, 1)),
+            [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                [[intel::reqd_sub_group_size(32)]]
+#endif
+            {
+              memcopyKernel(temp3, &d_coord[0 * numPtsUp], szPt, szFl, numPts, szFl);
+            })
+        .wait();
+
+    q_ct.parallel_for(
+            sycl::nd_range<3>(sycl::range<3>(1, 1, 1) *
+                                  sycl::range<3>(1, 1, 1),
+                              sycl::range<3>(1, 1, 1)),
+            [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                [[intel::reqd_sub_group_size(32)]]
+#endif
+            {
+              memcopyKernel(temp4, &d_coord[1 * numPtsUp], szPt, szFl, numPts, szFl);
+            })
+        .wait();
+
+    q_ct.parallel_for(
+            sycl::nd_range<3>(sycl::range<3>(1, 1, 1) *
+                                  sycl::range<3>(1, 1, 1),
+                              sycl::range<3>(1, 1, 1)),
+            [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                [[intel::reqd_sub_group_size(32)]]
+#endif
+            {
+              memcopyKernel(temp5, &d_coord[2 * numPtsUp], szPt, szFl, numPts, szFl);
+            })
+        .wait();
+
+    q_ct.parallel_for(
+            sycl::nd_range<3>(sycl::range<3>(1, 1, 1) *
+                                  sycl::range<3>(1, 1, 1),
+                              sycl::range<3>(1, 1, 1)),
+            [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                [[intel::reqd_sub_group_size(32)]]
+#endif
+            {
+              memcopyKernel(temp6, &d_coord[3 * numPtsUp], szPt, szFl, numPts, szFl);
+            })
+        .wait();
+#ifdef DEVICE_TIMER
+    auto stop_memcpy_2 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_2 - start_memcpy_2).count();
+#endif
+
+#ifdef DEVICE_TIMER
+    auto start_kernel_1 = std::chrono::steady_clock::now();
+#endif
+    q_ct.parallel_for(
+            sycl::nd_range<3>(sycl::range<3>(1, 1, numLoops / 16) *
+                                  sycl::range<3>(1, 1, 16),
+                              sycl::range<3>(1, 1, 16)),
+            [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                [[intel::reqd_sub_group_size(32)]]
+#endif
+            {
+              ComputeHomographies(d_coord, d_randPts, d_homo, numPtsUp, item_ct1);
+            })
+        .wait();
+
+#ifdef DEVICE_TIMER
+    auto stop_kernel_1 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_kernel_1 - start_kernel_1).count();
+    // printf("ComputeHomographies time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel_1 - start_kernel_1).count());
+#endif
+    checkMsg("ComputeHomographies() execution failed\n");
+    sycl::range<3> blocks(1, numLoops / TESTHOMO_LOOPS, 1);
+    sycl::range<3> threads(1, TESTHOMO_LOOPS, TESTHOMO_TESTS);
+#ifdef DEVICE_TIMER
+    auto start_kernel_2 = std::chrono::steady_clock::now();
+#endif
+    q_ct.submit([&](sycl::handler &cgh)
+                {
+                                       sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           homo_acc_ct1(sycl::range<1>(128 /*8*TESTHOMO_LOOPS*/), cgh);
+                                       sycl::accessor<int, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           cnts_acc_ct1(sycl::range<1>(256 /*TESTHOMO_TESTS*TESTHOMO_LOOPS*/),
+                                                        cgh);
+
+                                       cgh.parallel_for(sycl::nd_range<3>(blocks * threads, threads),
+                                                        [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                                                            [[intel::reqd_sub_group_size(32)]]
+#endif
+                                                        {
+                                                          TestHomographies(d_coord, d_homo, d_randPts, numPtsUp,
+                                                                           thresh * thresh, item_ct1,
+                                                                           homo_acc_ct1.get_pointer(),
+                                                                           cnts_acc_ct1.get_pointer());
+                                                        }); })
+        .wait();
+#ifdef DEVICE_TIMER
+    auto stop_kernel_2 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_kernel_2 - start_kernel_2).count();
+    // printf("TestHomographies time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel_2 - start_kernel_2).count());
+#endif
+    checkMsg("TestHomographies() execution failed\n");
+#ifdef DEVICE_TIMER
+    auto start_memcpy_3 = std::chrono::steady_clock::now();
+#endif
+    q_ct.memcpy(h_randPts, d_randPts, sizeof(int) * numLoops).wait();
+#ifdef DEVICE_TIMER
+    auto stop_memcpy_3 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_3 - start_memcpy_3).count();
+#endif
+    int maxIndex = -1, maxCount = -1;
+
+    for (int i = 0; i < numLoops; i++)
+      if (h_randPts[i] > maxCount)
+      {
+        maxCount = h_randPts[i];
+        maxIndex = i;
+      }
+
+    *numMatches = maxCount;
+#ifdef DEVICE_TIMER
+    auto start_memcpy_4 = std::chrono::steady_clock::now();
+#endif
+    safeCall((infra::sift_memcpy(homography, szFl, &d_homo[maxIndex],
+                                 sizeof(float) * numLoops, szFl, 8,
+                                 infra::device_to_host, q_ct),
+              0));
+    q_ct.wait();
+#ifdef DEVICE_TIMER
+    auto stop_memcpy_4 = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy_4 - start_memcpy_4).count();
+#endif
+  }
+  free(validPts);
+  free(h_randPts);
+  safeCall((sycl::free(d_homo, q_ct), 0));
+  safeCall((sycl::free(d_randPts, q_ct), 0));
+  safeCall((sycl::free(d_coord, q_ct), 0));
+  return matchTime;
+}
+
+double MatchSiftData(SiftData &data1, SiftData &data2, sycl::queue &q_ct, float &matchTime)
+{
+  float matchSiftDataTime = 0.0;
+
+  int numPts1 = data1.numPts;
+  int numPts2 = data2.numPts;
+
+  if (!numPts1 || !numPts2)
+    return 0.0;
+#ifdef MANAGEDMEM
+  SiftPoint *sift1 = data1.m_data;
+  SiftPoint *sift2 = data2.m_data;
+#else
+  if (data1.d_data == NULL || data2.d_data == NULL)
+    return 0.0f;
+  SiftPoint *sift1 = data1.d_data;
+  SiftPoint *sift2 = data2.d_data;
+#endif
+// Original version with correlation and maximization in two different kernels
+// Global memory reguirement: O(N^2)
+#if 0
+  float *d_corrData; 
+  int corrWidth = iDivUp(numPts2, 16)*16;
+  int corrSize = sizeof(float)*numPts1*corrWidth;
+  safeCall(cudaMalloc((void **)&d_corrData, corrSize));
+#if 0 // K40c 10.9ms, 1080 Ti 3.8ms
+  dim3 blocks1(numPts1, iDivUp(numPts2, 16));
+  dim3 threads1(16, 16); // each block: 1 points x 16 points
+  MatchSiftPoints<<<blocks1, threads1>>>(sift1, sift2, d_corrData, numPts1, numPts2);
+#else // K40c 7.6ms, 1080 Ti 1.4ms
+  dim3 blocks(iDivUp(numPts1,16), iDivUp(numPts2, 16));
+  dim3 threads(16, 16); // each block: 16 points x 16 points
+  MatchSiftPoints2<<<blocks, threads>>>(sift1, sift2, d_corrData, numPts1, numPts2);
+#endif
+  safeCall(cudaDeviceSynchronize());
+  dim3 blocksMax(iDivUp(numPts1, 16));
+  dim3 threadsMax(16, 16);
+  FindMaxCorr<<<blocksMax, threadsMax>>>(d_corrData, sift1, sift2, numPts1, corrWidth, sizeof(SiftPoint));
+  safeCall(cudaDeviceSynchronize());
+  checkMsg("FindMaxCorr() execution failed\n");
+  safeCall(cudaFree(d_corrData));
+#endif
+
+// Version suggested by Nicholas Lin with combined correlation and maximization
+// Global memory reguirement: O(N)
+#if 0
+  int block_dim = 16;
+  float *d_corrData;
+  int corrSize = numPts1 * block_dim * 2;
+  safeCall(cudaMalloc((void **)&d_corrData, sizeof(float) * corrSize));
+  dim3 blocks(iDivUp(numPts1, block_dim));
+  dim3 threads(block_dim, block_dim); 
+  FindMaxCorr3<<<blocks, threads >>>(d_corrData, sift1, sift2, numPts1, numPts2);
+  safeCall(cudaDeviceSynchronize());
+  checkMsg("FindMaxCorr3() execution failed\n");
+  safeCall(cudaFree(d_corrData));
+#endif
+
+// Combined version with no global memory requirement using one 1 point per block
+#if 0
+  dim3 blocksMax(numPts1);
+  dim3 threadsMax(FMC2W, FMC2H);
+  FindMaxCorr2<<<blocksMax, threadsMax>>>(sift1, sift2, numPts1, numPts2);
+  safeCall(cudaDeviceSynchronize());
+  checkMsg("FindMaxCorr2() execution failed\n");
+#endif
+
+// Combined version with no global memory requirement using one FMC2H points per block
+#if 0
+  dim3 blocksMax2(iDivUp(numPts1, FMC2H));
+  dim3 threadsMax2(FMC2W, FMC2H);
+  FindMaxCorr4<<<blocksMax2, threadsMax2>>>(sift1, sift2, numPts1, numPts2);
+  safeCall(cudaDeviceSynchronize());
+  checkMsg("FindMaxCorr4() execution failed\n");
+#endif
+
+// Combined version with no global memory requirement using global locks
+#if 1
+  sycl::range<3> blocksMax3(1, iDivUp(numPts2, 512), iDivUp(numPts1, 16));
+  sycl::range<3> threadsMax3(1, 16, 16);
+#ifdef DEVICE_TIMER
+  auto start_kernel1 = std::chrono::steady_clock::now();
+#endif
+
+  q_ct.parallel_for(
+          sycl::nd_range<3>(sycl::range<3>(1, 1, iDivUp(numPts1, 64)) *
+                                sycl::range<3>(1, 1, 64),
+                            sycl::range<3>(1, 1, 64)),
+          [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+              [[intel::reqd_sub_group_size(32)]]
+#endif
+          {
+            CleanMatches(sift1, numPts1, item_ct1);
+          })
+      .wait();
+
+#ifdef DEVICE_TIMER
+  auto stop_kernel1 = std::chrono::steady_clock::now();
+  // printf("CleanMatches time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel1 - start_kernel1).count());
+
+  matchTime += std::chrono::duration<float, std::micro>(stop_kernel1 - start_kernel1).count();
+  matchSiftDataTime += std::chrono::duration<float, std::micro>(stop_kernel1 - start_kernel1).count();
+#endif
+
+  int mode = 10;
+  if (mode == 5)
+    q_ct.submit([&](sycl::handler &cgh)
+                {
+                                       lock.init();
+
+                                       auto lock_ptr_ct1 = lock.get_ptr();
+
+                                       sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           siftParts1_acc_ct1(sycl::range<1>(272 /*17*16*/), cgh);
+                                       sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           siftParts2_acc_ct1(sycl::range<1>(272 /*17*16*/), cgh);
+
+                                       cgh.parallel_for(sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3),
+                                                        [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                                                            [[intel::reqd_sub_group_size(32)]]
+#endif
+                                                        {
+                                                          FindMaxCorr5(sift1, sift2, numPts1, numPts2, item_ct1,
+                                                                       lock_ptr_ct1,
+                                                                       siftParts1_acc_ct1.get_pointer(),
+                                                                       siftParts2_acc_ct1.get_pointer());
+                                                        }); });
+  else if (mode == 6)
+  {
+    threadsMax3 = sycl::range<3>(1, 16, 32);
+    q_ct.submit([&](sycl::handler &cgh)
+                {
+                                       lock.init();
+
+                                       auto lock_ptr_ct1 = lock.get_ptr();
+
+                                       sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           siftParts2_acc_ct1(sycl::range<1>(2048 /*128*16*/), cgh);
+                                       sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           sums_acc_ct1(sycl::range<1>(256 /*16*16*/), cgh);
+
+                                       cgh.parallel_for(
+                                           sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3),
+                                           [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                                               [[intel::reqd_sub_group_size(32)]]
+#endif
+                                           {
+                                             FindMaxCorr6(sift1, sift2, numPts1, numPts2, item_ct1,
+                                                          lock_ptr_ct1, siftParts2_acc_ct1.get_pointer(),
+                                                          sums_acc_ct1.get_pointer());
+                                           }); });
+  }
+  else if (mode == 7)
+    q_ct.submit([&](sycl::handler &cgh)
+                {
+                                       lock.init();
+
+                                       auto lock_ptr_ct1 = lock.get_ptr();
+
+                                       sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           siftParts1_acc_ct1(sycl::range<1>(1088 /*17*64*/), cgh);
+                                       sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           siftParts2_acc_ct1(sycl::range<1>(1024 /*16*64*/), cgh);
+
+                                       cgh.parallel_for(sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3),
+                                                        [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                                                            [[intel::reqd_sub_group_size(32)]]
+#endif
+                                                        {
+                                                          FindMaxCorr7(sift1, sift2, numPts1, numPts2, item_ct1,
+                                                                       lock_ptr_ct1,
+                                                                       siftParts1_acc_ct1.get_pointer(),
+                                                                       siftParts2_acc_ct1.get_pointer());
+                                                        }); });
+  else if (mode == 8)
+  {
+    blocksMax3 =
+        sycl::range<3>(1, iDivUp(numPts2, FMC_GH), iDivUp(numPts1, FMC_BW));
+    threadsMax3 = sycl::range<3>(1, FMC_NH, FMC_NW);
+    q_ct.submit([&](sycl::handler &cgh)
+                {
+                                       lock.init();
+
+                                       auto lock_ptr_ct1 = lock.get_ptr();
+
+                                       sycl::accessor<sycl::float4, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           siftParts1_acc_ct1(sycl::range<1>(512 /*FMC_BW*FMC_BD*/), cgh);
+                                       sycl::accessor<sycl::float4, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           siftParts2_acc_ct1(sycl::range<1>(512 /*FMC_BH*FMC_BD*/), cgh);
+                                       sycl::accessor<float, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           blksums_acc_ct1(sycl::range<1>(1024 /*FMC_BW*FMC_BH*/), cgh);
+
+                                       cgh.parallel_for(sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3),
+                                                        [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                                                            [[intel::reqd_sub_group_size(32)]]
+#endif
+                                                        {
+                                                          FindMaxCorr8(sift1, sift2, numPts1, numPts2, item_ct1,
+                                                                       lock_ptr_ct1,
+                                                                       siftParts1_acc_ct1.get_pointer(),
+                                                                       siftParts2_acc_ct1.get_pointer(),
+                                                                       blksums_acc_ct1.get_pointer());
+                                                        }); });
+  }
+  else if (mode == 9)
+  {
+    blocksMax3 =
+        sycl::range<3>(1, iDivUp(numPts2, FMC_GH), iDivUp(numPts1, FMC_BW));
+    threadsMax3 = sycl::range<3>(1, FMC_NH, FMC_NW);
+    q_ct.submit([&](sycl::handler &cgh)
+                {
+                                       lock.init();
+
+                                       auto lock_ptr_ct1 = lock.get_ptr();
+
+                                       sycl::accessor<sycl::float4, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           siftParts1_acc_ct1(sycl::range<1>(512 /*FMC_BW*FMC_BD*/), cgh);
+                                       sycl::accessor<sycl::float4, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           siftParts2_acc_ct1(sycl::range<1>(512 /*FMC_BH*FMC_BD*/), cgh);
+
+                                       cgh.parallel_for(sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3),
+                                                        [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                                                            [[intel::reqd_sub_group_size(32)]]
+#endif
+                                                        {
+                                                          FindMaxCorr9(sift1, sift2, numPts1, numPts2, item_ct1,
+                                                                       lock_ptr_ct1,
+                                                                       siftParts1_acc_ct1.get_pointer(),
+                                                                       siftParts2_acc_ct1.get_pointer());
+                                                        }); });
+  }
+  else if (mode == 10)
+  {
+    try
+    {
+
+      blocksMax3 = sycl::range<3>(1, 1, iDivUp(numPts1, M7W));
+      threadsMax3 = sycl::range<3>(1, (M7H / M7R), M7W); //(1 , 8 , 32)
+
+#ifdef DEVICE_TIMER
+      auto start_kernel2 = std::chrono::steady_clock::now();
+#endif
+      q_ct.submit([&](sycl::handler &cgh)
+                  {
+                                       sycl::accessor<sycl::float4, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                           buffer1_acc_ct1(sycl::range<1>(1024 /*M7W*NDIM/4*/), cgh);                                          
+                                       sycl::accessor<sycl::float4, 1, sycl::access_mode::read_write,
+                                                      sycl::access::target::local>
+                                            buffer2_acc_ct1(sycl::range<1>(1024 /*M7H*NDIM/4*/), cgh);
+                                       cgh.parallel_for(sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3),
+                                                        [=](sycl::nd_item<3> item_ct1)
+#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND)
+                                                            [[intel::reqd_sub_group_size(32)]]
+#endif
+                                                        {
+                                                          FindMaxCorr10(sift1, sift2, numPts1, numPts2, item_ct1,
+                                                                        buffer1_acc_ct1.get_pointer(),
+                                                                        buffer2_acc_ct1.get_pointer());
+                                                        }); })
+          .wait();
+#ifdef DEVICE_TIMER
+      auto stop_kernel2 = std::chrono::steady_clock::now();
+      // printf("FindMaxCorr10 time =          %.2f us\n", std::chrono::duration<float, std::micro>(stop_kernel2 - start_kernel2).count());
+      matchTime += std::chrono::duration<float, std::micro>(stop_kernel2 - start_kernel2).count();
+      matchSiftDataTime += std::chrono::duration<float, std::micro>(stop_kernel2 - start_kernel2).count();
+#endif
+    }
+    catch (sycl::exception const &e)
+    {
+      std::cerr << e.what() << '\n';
+    }
+  }
+  checkMsg("FindMaxCorr5() execution failed\n");
+#endif
+
+  if (data1.h_data != NULL)
+  {
+    float *h_ptr = &data1.h_data[0].score;
+    float *d_ptr = &data1.d_data[0].score;
+#ifdef DEVICE_TIMER
+    auto start_memcpy = std::chrono::steady_clock::now();
+#endif
+    infra::sift_memcpy(h_ptr, d_ptr, sizeof(SiftPoint) * data1.numPts, infra::device_to_host, q_ct);
+    q_ct.wait();
+#ifdef DEVICE_TIMER
+    auto stop_memcpy = std::chrono::steady_clock::now();
+    matchTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+    matchSiftDataTime += std::chrono::duration<float, std::micro>(stop_memcpy - start_memcpy).count();
+#endif
+  }
+  return matchTime;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/common/Utility.cpp b/third-party-programs/Velocity-Bench/cudaSift/common/Utility.cpp
new file mode 100644
index 000000000..6c230dd44
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/common/Utility.cpp
@@ -0,0 +1,83 @@
+// Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#include <iostream>
+
+#include "Utility.h"
+
+using namespace Utility;
+
+int Utility::RunDataVerification(const int threshold, const float matchPercentage)
+{
+    printf("Performing data verification \n");
+    switch (threshold)
+    {
+    case 1:
+        if (matchPercentage > 20.0f && matchPercentage < 30.0f)
+        {
+            printf("Data verification is SUCCESSFUL. \n\n");
+        }
+        else
+        {
+            printf("Data verification FAILED. \n\n");
+            return -1;
+        }
+        break;
+    case 2:
+        if (matchPercentage > 26.0f && matchPercentage < 38.0f)
+        {
+            printf("Data verification is SUCCESSFUL. \n\n");
+        }
+        else
+        {
+            printf("Data verification FAILED. \n\n");
+            return -1;
+        }
+        break;
+    case 3:
+        if (matchPercentage > 35.0f && matchPercentage < 45.0f)
+        {
+            printf("Data verification is SUCCESSFUL. \n\n");
+        }
+        else
+        {
+            printf("Data verification FAILED. \n\n");
+            return -1;
+        }
+        break;
+    case 4:
+        if (matchPercentage > 40.0f && matchPercentage < 50.0f)
+        {
+            printf("Data verification is SUCCESSFUL. \n\n");
+        }
+        else
+        {
+            printf("Data verification FAILED. \n\n");
+            return -1;
+        }
+        break;
+    default:
+        printf("Threshold values should be in the range [1, 4]. \n\n");
+        return -1;
+    }
+    return 0;
+}
diff --git a/third-party-programs/Velocity-Bench/cudaSift/common/Utility.h b/third-party-programs/Velocity-Bench/cudaSift/common/Utility.h
new file mode 100644
index 000000000..da09d2d78
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/common/Utility.h
@@ -0,0 +1,31 @@
+// Copyright (C) 2023 Intel Corporation
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom
+// the Software is furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+// OR OTHER DEALINGS IN THE SOFTWARE.
+
+// SPDX-License-Identifier: MIT
+
+#ifndef UTILITY_H
+#define UTILITY_H
+
+namespace Utility
+{
+    int RunDataVerification(const int thresh, const float matchPercentage);
+
+}
+#endif // UTILITY_H
diff --git a/third-party-programs/Velocity-Bench/cudaSift/common/Utility.o b/third-party-programs/Velocity-Bench/cudaSift/common/Utility.o
new file mode 100644
index 000000000..220855fcd
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/common/Utility.o differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/cudaSift_migration.md b/third-party-programs/Velocity-Bench/cudaSift/cudaSift_migration.md
new file mode 100755
index 000000000..59912f69c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/cudaSift_migration.md
@@ -0,0 +1,385 @@
+# SYCLomatic Tool: Migrate bitcracker APP
+## Use the command line to migrate large code base.
+The SYCLomatic project (the Open source version of Intel® DPC++ Compatibility Tool) can migrate project that contain multiple source and header files. 
+| Optimized for         | Description
+|:---                   |:---
+| OS                    | Linux* Ubuntu* 22.04
+| Software              | Intel® DPC++ Compatibility Tool
+| What you will learn   | Simple invocation of dpct to migrate CUDA code
+| Time to complete      | 15 minutes
+
+
+# Purpose
+The SYCLomatic tool can migrate projects composed with multiple source and header files.
+Used the dpct option **--in-root** option to set the root location of your prepared migration APP. Only the files under this specified root will be considered to migrate. Files located outside the **--in-root** will be considered system files or libraries files and will not be migrated. 
+
+The dpct **--out-root** will specify the directory into which generated SYCL*-compilant code producted by the dpct tool is written. The relative path and the name will be kept, except the file extensions are changed to **.dp.cpp**.
+
+
+# Key Implementation Details
+Except the --in-root and --out-root options, there are additional options can help to migrate the code more smoothly: [Command Line Options Reference](https://software.intel.com/content/www/us/en/develop/documentation/intel-dpcpp-compatibility-tool-user-guide/top/command-line-options-reference.html).
+
+
+
+## Migrating the CUDA Sample to Data Parallel C++ with the Intel® DPC++ Compatibility Tool
+
+Building and running the CUDA sample is not required to migrate this project
+to a SYCL*-compliant project.
+
+> **Note**: Certain CUDA header files, referenced by the CUDA application
+> source files to be migrated, need to be accessible for the migration step.
+> See *Before you Begin* in [Get Started with the Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/develop/documentation/get-started-with-intel-dpcpp-compatibility-tool/top.html#top_BEFORE_YOU_BEGIN).
+
+> **Note**: If you have not already done so, set up your CLI
+> environment by sourcing  the `setvars` script located in
+> the root of your oneAPI installation.
+>
+> Linux*:
+> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
+> - For private installations: `. ~/intel/oneapi/setvars.sh`
+> - For non-POSIX shells, like csh, use the following command: `$ bash -c 'source <install-dir>/setvars.sh ; exec csh'`
+>
+> Windows*:
+> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
+> - For Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
+>
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or MacOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
+
+
+### Command-Line on a Linux* System
+
+1. This sample project contains a simple CUDA program with 12 files:
+
+```
+CUDA
+├── CMakeLists.txt
+├── cudaImage.cu
+├── cudaImage.h
+├── cudaSift.h
+├── cudaSiftD.cu
+├── cudaSiftD.h
+├── cudaSiftH.cu
+├── cudaSiftH.h
+├── cudautils.h
+├── geomFuncs.cpp
+├── mainSift.cpp
+└── matching.cu
+```
+2. Make sure the ```OpenCV*``` is installed on the machine. ```
+$ sudo apt-get install libopencv-dev
+```
+Then, make a `build` directory to use the **cmake** command line tool to generate the corresponding build tool (make) directly.
+```sh
+$ cd CUDA && mkdir build
+$ cd build && cmake ..
+```
+3. Use the **intercept-build** tool to intercept the build step to generate the compilation database `compile_commands.json` file under the same fodler.
+``` sh
+$ intercept-build make
+$ ls .
+CMakeCache.txt  CMakeFiles  Makefile  bitcracker  cmake_install.cmake  compile_commands.json
+```
+2. Use the tool's `--in-root` option and provide input files to specify where
+   to locate the CUDA files that needs migration; use the tool’s `--out-root`
+   option to designate where to generate the resulting files(default is `dpct_output`); use the tool's `-p` option to specify compilation database to migrate the whole project:
+
+```sh
+# From the CUDA directory as root directory:
+$ cd ..
+$ dpct --in-root=. -p=./build/compile_commands.json --out-root=out --gen-build-script --cuda-include-path=/usr/local/cuda/include
+```
+
+> If an `--in-root` option is not specified, the directory of the first input
+> source file is implied. If `--out-root` is not specified, `./dpct_output`
+> is implied.
+
+You should see the migrated files in the `out` folder that was specified
+by the `--out-root` option:
+
+```
+out/
+├── MainSourceFiles.yaml
+├── cudaImage.dp.cpp
+├── cudaImage.h
+├── cudaSift.h
+├── cudaSift.h.yaml
+├── cudaSiftD.dp.cpp
+├── cudaSiftD.h
+├── cudaSiftH.dp.cpp
+├── cudaSiftH.h
+├── cudautils.h
+├── cudautils.h.yaml
+├── geomFuncs.cpp
+├── mainSift.cpp.dp.cpp
+└── matching.dp.cpp
+
+```
+
+3. Inspect the migrated source code, address any `DPCT` warnings generated
+   by the Intel® DPC++ Compatibility Tool, and verify the new program correctness.
+
+Warnings are printed to the console and added as comments in the migrated
+source. See *Diagnostic Reference* in the [Intel® DPC++ Compatibility Tool Developer Guide and Reference](https://www.intel.com/content/www/us/en/develop/documentation/intel-dpcpp-compatibility-tool-user-guide/top/diagnostics-reference.html) for more information on what each warning means.
+
+
+This sample should generate the following warnings:
+```
+warning: #DPCT2001:228: You can link with more library by add them here.
+LIB :=  
+```
+
+
+See below **Addressing Warnings in the Migrated Code** to understand how to resolve the warning.
+
+
+4. Build the migrated code with generated Makefile.dpct
+```
+$ cd out
+$ make -f Makefile.dpct
+# Please make sure the oneAPI package was installed before building the application to make sure the oneAPI DPC++ compiler was installed.
+```
+
+# Addressing Warnings in Migrated Code
+
+Migration generated one warning for code that `dpct` could not migrate:
+```
+warning: #DPCT2001:228: You can link with more library by add them here.
+LIB :=  
+```
+This message is shown in the Makefile.dpct, for **cudaSift** the application need to link the **OpenCV** libraries during the link time. Modifing the Makefile.dpct will fix the linker error.
+```
+LIB :=  -lopencv_core -lopencv_imgcodecs
+```
+
+
+## Rebuild the migrated code
+After manually addressing the warning error, need to rebuild the application.
+```
+$ make -f Makefile.dpct clean
+$ make -f Makefile.dpct 
+```
+# Example Output
+
+When you run the migrated application, you should see the following console
+output:
+
+```
+$ ./cudasift 
+Image size = (1920,1080)
+Initializing data...
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of Points after sift extraction =  3681
+
+Number of Points after sift extraction =  3933
+
+Number of original features: 3681 3933
+Number of matching features: 1220 1258 33.1432% 1 2
+
+Performing data verification 
+Data verification is SUCCESSFUL. 
+
+Total workload time = 2206.28 ms
+```
+**Note:** The testing result was running on Intel(R) Core(TM) i7-13700K on the CPU backend with 2023.2 oneAPI released oneAPI packaged. 
+
+If an error occurs, troubleshoot the problem using the Diagnostics Utility for Intel® oneAPI Toolkits.
+[Learn more](https://www.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html).
+
+## License
+See
+[License.txt](https://github.com/oneapi-src/Velocity-Bench/blob/main/cudaSift/LICENSE.md) for details.
diff --git a/third-party-programs/Velocity-Bench/cudaSift/inputData/img1.png b/third-party-programs/Velocity-Bench/cudaSift/inputData/img1.png
new file mode 100755
index 000000000..efd56fde5
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/inputData/img1.png differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/inputData/img2.png b/third-party-programs/Velocity-Bench/cudaSift/inputData/img2.png
new file mode 100755
index 000000000..28020ea2f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/inputData/img2.png differ
diff --git a/third-party-programs/Velocity-Bench/cudaSift/inputData/left.pgm b/third-party-programs/Velocity-Bench/cudaSift/inputData/left.pgm
new file mode 100644
index 000000000..0005f1967
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/inputData/left.pgm
@@ -0,0 +1,2616 @@
+P5
+1280 960
+255
+�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������½��������������ľ������������þ�������þ������������º¾��������ľ�����¾�����¿������������������������������zhSGdbbdedeeda`_ZWXWXZ[WXXWPVWUSNMLLKNLLKJJOOOPQPTSVVTXYY[\\^}��wdjuy{vyspmed]P}��p@47;=@DFIJJLNNONNPOLLLMKKLKKKLLMOPPOLLLLLLJPOKOPMMOPQQOPHI`defe`bb_aLNUVZ[^fhbhihcdggf`aecca_muc99:;=LUeq�kXiVHTlsi��`aS.MlHCC@:5%Mr�aPOQSFbB3?@^`:2*2|�����ePpb@>g�;..MeIBc�^HPXkM9B_A9B_iJOkUe9'<E=:9653/Deh\Y]^^9Zi������1\FAQyI8@Io������T6m^M`o^G=KpbcXWek\Tns��|{z6-G>769<Sv\JKLTjP`Z@;99<:0UTEA@@B>=YZY]`af_VRGPhvmJ?��������������������������������������������������������������������������������������������������������������}}�y���}tx���|yrqpp]IQU{�y~�]:RTXZYSJ<P������j,JA53227<@UfmljkQBHA5=lG*+F@8^i]\`>XjD42574;BN_���������n������r��f\Va~�����B3}p16IOU>*/UmdI2(.j�jCVieHK/7nwyzxf.1YveC>EGMQRRTQWy�����hB]�����J6m�����D>Q^bdcLFB7+*,5GRZ~�����QFDEC?@?AIIRQE=9=P^fea]QMUVTRNMKLKHGZojdindl_STX`^ZXMSbgjiifhqwqkljlpolomlmprrqpppnh8%(&<<471567546665452.+*%'+ #&)**)+))#&#""%#$$%-142+%#"$$#"
+ "!
+	
+
+������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¿¾���ž�����½��þ���Ľ���¾���¾��ļ���¼��Ž������������Ļ��þ�������������½��½��¾���·������������������������xiQG^W[_^]]]]][ZWTUUUVTQRSTUVSQQQRPOPNNKKGIIGIIHJKPTVVWZ[ZXY^z�������}~~��~~wr{��t?6;=BEGILNNMPOOQRTTTTUTSTQNPQOLMQOOPNLJKMLLPPOPONMNMQROPMOdghgfccb_\MOVZ\Z\fg`gggaeggfddgccbdnwc<:79?LQis�q`o]FUoug��dcQ-MoGCMB;2'Qv�cPRRQFg>1D@\^83)2}�����cUma@@e�=//NeKDdnVGRXjH7Bb@;CekHO~�y4&:D=86564/Dgi^]^_^7\h�����2aG>JqI9>Jt������P9o]QadXF<Jned\]di]Wqu��~}y8.OA999=OtZILNWiLd`A::;;92[WEBB@A<<ZWQURT^\XRJQhndKB�������������������������������������������������������������������������������������������������������������z|��}~~|���tqy����{tqqp]FRX}�u��_8NQXYYUI>L������l,LB5224:=CWkkijhNDIA69N6)*NKN`e_]c:j~B610137>N`���������n������n��dZXc������A8~i26>BD7(-J\WB2'2j�j>OXUJK/7kw||h00YseD?EJSXXZXLR{�����c:_�����M4n�����E9R^cebH>=5./29HPS������UA4-.*3<>@7.-./15P`acilG9NXUNJLONLHHRYVXok[YYQSX]YURRU_[_`^XRaomefjmpoproopqqrsonpod8),& !""%*,.03/.,+(+)		"&((*+*))"&%#"""$#$'())'%%"#"#$
+
+	��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������þ���ý��������������¿���ÿ��������������ýþ���½�������Ƚ��������¿��û�������ľ���������������������������������zjODa[YX]^^_]_`^\\^^]`a`a^`ba^\X[[XWVRQRONMKHFFHGHMPRTWWXZZ\Zp�����������������xvsmC9?DGJMQUUSWXX[^Z^_a`acda[YWXWVWXWSRQPPRRSRPTVTUTSTVXZ\]PJhfhggbba``LPWYZ[\fhbfggceffffffcabenxaB:8:?KSfl�n\lYHUmrh��c`O.PjKNjU;1*Su�^NQRXEg@3BC[\<3)5~�����aUob@Aj�?20NiLD]rhHR]oF55<8<?fiK@GP^6%;F=665760Ehg_Z_]^5Vi�����|5aG=@S?7=Ds������L:rTPdaUH<KqcbX[dk\Qov�{|z7/RB8:8>Tu_FNOXeKciA;;<><2ZSFC@?A::XUPSWQZ[VQEQWZRE@������������������������������������������������������������������������������������������������������������~�|{���~�uqx������}utto^EU\��}�Y9LU^`_YK>L������m/MB7335:;CUjkhihPCJ@7EzK,(LMT]`]^a<r�J81/014:F\���������k������t��bZ[b}��~��=;j/<O]K5),6:<80&4f�d?UdaLH-6ky|~}g,3ZvfB:DJUZZ[WIPy�����e@]~����L5t�����C=R]aedD;:3.287BIT������U>0***09>>7,('),4H`ZYjtC:Lc`PFEFFC@76AKJRSLHHBN[dbZSB?HN[]USRUaecegilmmproopqqpoopm`6(*&"!$)%	%'&)****)#$%##"$&%$$#$%(&&%%"""	
+���������������������������������������������������������������������������������������������������������������������������������������������������������������������¿����������������������º�������Ŀ�����������������Ž�������»������������ú���ü��ľ���Ż��������������ſ���¾�������¼�����Ŀ�����������������������������������zgQNjggfgghjjlljjjljjkmjllntrpihgdeccba][YWTRPNLILOTY^`dhhiio}������������������zlfJ=AEIMNSXZ[]`eddcdgijikjjgb^`_^`_^YUPSSSSUWWWXWYXW[_`fniXLhhihfdec_`FRVZ[]]eidgiiaegefffedcbamx`<<::?IRdk�lOYTJXmri��]_R0JmIVrX80)Rv�_NPSYBg=5@F]]92)5~�����^Wra?=m�F32QiIA[woIR\mH4456<EegHU~qg3$;E;685530Gghc]``[9Wj�����~6`G<9<68@Jq������L9rWKek]I;Mp`bV[gi[Vot�{{~9/RD;<;=XsdJPS[eKdj@;=:<<4[SGC@?@;=_ZZ\^^_[XSIZ`_XP@�����������������������������������������������������������������������������������������������������������~~�}z��}urprx������|usrn`HZ^�����Z@T]|��hK?L������h.RA7544:;BPifdhkPBKA8BoF,*NJR\^`ad5^vD741/.38C\���������q������u��dZ[c~��}��;B�k.>M_K3&+5443.%5h�\?^oeQF.5mz���p.2\wbC9DIT[\\VHTw�����c9V�����E2t�����@:T]aeb@:83/8=<AHU������P;-*)).68:2)&)*-4Jaa]ntG9SplTC60//43,4C>0*(3ACLhrqngE7?CJNBFFJX]_ceimpopqnoqprqoqqoe6))%!
+$'()+)++)#%#"#$&(*)*&'()&%""##!
+	����������������������������������������������������������������������������������������������������������������������������������������������������������������������¼�������������������������������������������������þ������������ü�������ſ���ž��Ŀ���Ž������������þ��ž���»���þ�������Ž��Ŀ�������������������������������zgOKyqqssrrssrsrrropqqrponqrronnljjkjklkjffc_]YWTV[]afimpppt{��������������������zhUECFHLMQTX[]^acfegggijkkikhfhgggdb`\YZ[\X[\_]^^`bedfgmniZDfhfhedeb^_MSXY[\]fidghjfgigfedeedden|`?=;:?KQlk�jey`FTlwk��^^N.OkHWxZ71-Su�_RTTVFc=8BBXW<4+4~�����YVl^<;Tc830OdF@e�uKSZlL5355:EdhJKdpn6"=F<78763/FfiinkfY7_j�����z2bF;864:AJs������H>qXL`dQF;NnabY[gg]Xou��|{z7-SFGF?:VubJQV^kKfgB;::;94`SGCB@A=?Y]`bbdd]YTFe��wcA���������������������������������������������������������������������������������������������������������|~����{���zronmw������}tssoaD[a����ZG]]z��~P@N������j1VC7566:>CQh][bjNCLC8;V<*(BDQ\_^cj7[C6610/26>V���������r������s��aVZd���{��:D�h.@R\O6&.AC<4,#0d�]B]mgQG.9ly���p-2]zaC7EKVX[ZUBP{�����g;O�����F7t�����9;T^bfc>772.8A<AHU������M;1,**/3681)&*.06J_VMjtF9SmkSB1**+--#+;6-)*/;?Lmz}��S1530+-1>DO]]adglnopnonqrsrpqpmd4')&!
+$'))+),-+#$""##(/276.*),'$$%#" 
+
+ "
+
+	����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¾��¾����������������������ÿ�������¿�������½�ſ��ÿ���ſ������������ÿ�������ļ���½���¼��Ǿ��ſ���ż��������������������������zhQGzuuwxwwtttutrpprrprqpoopqnmmiijkkiinojkmge^\]_cbdglmqqs|���y������������}|����\THEGKNOSWY]]bcdffhfiklljjjjjlnjhebbcbcdcfghhjlnnomnopok^Jdghkhdda``JQXZ]]_fichgjhhjgfedgdddgo|b==<;?JSjn�g^xcJVmtd��d`Q,VjDW}Y72-Vy�\PTSTFg94A?C@;2)6������XZm^>>b�<1/OeJ@d�ZGS]kH6345<EdfHTfa[9&@H<898630Fgjr��m\8^h�����s6`H<8669AKs������HAvVPee]H8NpbdY]gj\Zou��~{0,PGk�nPWx`KQV_kKecA<;<<94aWFBBBA<=^\[^[``\XSAi���h7������������������������������������������������������������������������������������������������������������������{sonpkv������ssuqbD`[~����]Eaah}}uO?R������h1TB76;:<?ETgebehPGRC:>O;*':DU_c`fp;h�D7511005?V���������r������r��aU]h~�����9Cf/ANMH6'0UjQ:-%1g�_D]phPE/4o|���k+1aza>9DIUXYZUEMz�����k>R�����D7s�����;<S`gldA662-4<8@EO������J:/,+,/2671)&(-/5Hf\[jtNAUnkYA3.-++' *74+(*0;=Gnz���V.0/*(*-;@JRXaegjpopqpopqrpnoomd3)+% 	$'*,--01/& &####(;KNL?1/10,'&#"!
+	03+ 
+
+		���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¿�����������������¾�������ÿ�������ÿ���ľ���Ľ�������½�������ÿ�ſ�������ÿ�¿������������������ü������������ȿ���½������������������������������zjPMxrtutvussrstrqspppppqolmljljgdhgghgjlijmlhdbcffedbilnpx���fTp�����������xvp`b��w\MDHLNQRTX\]bbeeghghijkjhhjjkmigeeddcddfhkmmnprsrrsrppjYLchfhgddbaaJNVZ\[`gibghiiggfggdggecfq{_<?<=ALTgg�eZfWDZqvi��ebT-OhCZ�Y741Wy�^UXUTAh95A>=<91(2������[_p]==l�@10MfHFh�bFR\jF6555=DfhJb��n4(BG;877540Feko{i\6\i�����s7aK<7668@Mt������G;oXS^\_I8Orce[_hlZVqx��}~~-.QEk���`w[HTW^kKhc@:;;<84`TDA@BA;>\XPTTWYZVTEj��yd?�������������������������������������������������������������������������������������������������������~�����~{pronmppw������~pnor`Ia_|����YD_ez��uP@Q������e/UB8:=<<=DTiigggJGRA9CkG*(:Gdmlgkq8Yq>5200016?Y���������r������u��c\]h������8E�d1Ngh]<%/ZnV;-%2f�]C_qdMH)8r}���k.-ay_?:EHQVXYSDMx�����h<W�����C:s�����:;Sdqsd=564.257@BM������E:0,+*/2360((*,/5Jb^dsyGBUkgUB6561+%)51*,-09:Fk{���Q..-)'*-8;GRY`dgjmnoqpoonqqpnond3),&!
+	$')-13277* %#$$$'IdjpS87@@90*&# >K?(
+
+	���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ÿ����������������������������¿����������Ļ���¿��¿���ľ������������Ŀ����������������ÿ���ǿ��ÿ�ÿ�¿��¾������������½����������������������������|hMNnprrsrssqppppopnnnnmomkkhhjgbacbbdghjijkjjigfghhffjlmn}��pOHMv����������wn[GLh��cVHEJKNPRVY\`bcffhffjiiighjkijghfdedcegehlnpoprsqqrqqngZLdgdcadeb`_KMUZ]^ahi\gkkgfggffehhfdhr{_A@=?CMTkl�gh�eF[pvq��dcR1UlC\]:3,Uy�\`k\XAd87D<=;:2(3������Z_p^=@h�>30QiG=_eDGR\mK7547<DffHVsvc2%?H=87563,Efjm�~g]9]h�����s3fH94568>Kr������E<rZQ^`ZJ;QrbcZ[go_Urz��~}z.0OFp��zWv\NV[alGhaA9;9;93dSGB@AA;@]UQQRSUZTS@lvpaB������������������������������������������������������������������������������������������������������������~}zuprzupooqu������|ma[j\D`]�ni�SFbg���~PCU������c4TE:;<==>EVjiihhJGSC9CrF)):Jy���ns5Sf<5100249B[���������w������u��gdbn������8F{`0KOLM:'0IG74-'2i�dF]ofOH+5u~���q/-cya?:DFMQRTNEP|�����g;b�����;6v�����?>Tj�dA675.-16<BN������D70,*,/3393*('+.6K^[ZmyIDSa]O@59C7+"'62-02377Ek{��S-//))).46BQYafiknnnqpooorrqppnd1')%!%(),5DELI/!&#"$%(AVdfP=HRWU>-($!
+&'&!!!
+#DVL.	���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������½������¿����ý�������¿���ýþ������������ÿ��������½�����������ǿ������������Ǿ������������½���ü�������»����������������������������|gMMkklmlkmnnmmlkjjjikmlkjhefdb_]\]]]a`bghhjiihjhhhjhklmnv���]FHGU���yxy��zfIAEKz�|[NEGILNPSUZ\]_beeeggihhhijhiigddeeefegilnoqqqqsrrqqqmhYOdffbbdcb`aFOTY\^agiUhjjgegfffehhdcgqz^?>=>DLSgn�dfbJ\otl��g`K*RjARrX94-X|�\lx]VBa39A?<;;3)5������X\m\?Cm�>30TiE8HB=HT]iJ4347;BdfH_�d=3%?J=77562*Gfkp��l[9\f�����r7eD86769AMs������@?sTQahYG8Poee`bhp^[px�~||z//JEi�~fOt_NY^dmHg`?:;:<94cTFBCCA;=_YTQRUVXRPHdcSUY@��������������������������������������������������������������������������������������������������������~����vps�~�uonu�~����}m\Vk]G`]���}�TGbc���zQ@R~�����f5SE;;<;<?DUkiihhKGR?7>N9')7Hh���pt4RlA520138<@Y���������{������v��lk^g������4J|^+;:74/&+7865-(2g�bD^qdJH*6s{���k*1dz`=<FADDDIHBM}�����g=c�����:7y����B?Ql��c?685/-06>ET������E60,+,124<4+)(,,6Lf^YmwE@RWSL>59C6*!(51046774Hl{��I-//+*(.45AQ[agikponppoopstpopo\1'($!%(+.<cdgf3!$"#$&,Ld^YL5TbppI/($"
+$272'&..-&
+%=J=*
+
+	������������������������������������������������������������������������������������������������������������������������������������������������������������¼���������������������ÿ����������������������»��½���¾���ý�������ý������������ļ�������ý�������¾������������¿�¿¾�����¾��������½��ƻ����������������������������}fJMefedcedeggfgfedeeefebfca`\\[YZ[YZ[[_bcdefdcedccdegkmn}��tMDGJSx�xbX]_^e��lRDCEb��_UEDGJKLQTVY[\^`bdhhggghhjjhfecdcdcbdfhklnprqrrtssqpmhULfffcdceba_INUZ]^biibhcffhhefgehgcdir{Z<><>BMVlj�aZeXF]osl��eaK,UeHSqZ74.X|�ZktYUDe6<A=<=;4)7������T]l\=@u�=3/UiE;?:?HQ]iG4556:CeeGi��Z6"AJ>99653,Fikr~}i\9^k�����n7cE9777:ANs�����}A?sUPckZH=SohjgfmoWWpx�~~}y13QFv���ds]JY`fkKi`B<<;=;8dUHEED@:;^ZYZ\^^XSRH`pbaWB��������������������������������������������������������������������������������������������������������~���}rw�����woot~�����|iYYl[H]V�qu�VG]f���mQBS������]5WD:;=;==DUljjifJHS?6>nJ),9Ifz�|pr3Qc=23138>BI\���������x������w��ml`n������1O}`-7974.%+9@@<-)3e�_CZe[KK):v|���e*4eya@>F<88:=@AR������d;a�����98|�����>>Tm��h=5:61./5?DS�����~C60+*,035:3*)),-5KfgksxEATWSL=18C6*!$2207:;77Fnz~�I*/0,*(-24AT[`filnoppnmonrsopqsb3&(% #)-/-7IP\c1#&$#%%-AVUUK79KZ`D-&##	!5GTG2% +@DA5&
+ 3KC/ 
+
+	�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¿�������ÿ���ü�������½�������½��������¿�������ľ������������ǻ���ÿ�������ľ��ÿ��������ý��ü���ü���»½�����Ŀ�����ƻ����������������������������{gMHjegfdbacccdcb^^a^[\a\\]YXVVWRVUXVUWXZZ[\\[\\]^_^_begh���_DFHLa~cZixyF9h��^IDELw�m\E>ADEFIMOPQTWZZ\`ba`adefeeccbcbaa`ceegilmnopnppppplcRLffcdeeebabINUV\^_hhcfbcdgiggecgedehr}Z>=>=BMUlk�`kvXGXprj��e]N+V`HW�]63,Wy�Zft\X@f4;A<<;;3(8������U^k_;=el<50SgE><:=LRZgG4565;GefBRcn\2#BH>:8652+Ijlr�|jW7be�����n6cF96779@Nt�����};AtXSfh[I;RpekjiklUZqw�}}v/3TC����fqZNYaglKh^C@C?><6cUGDBBA;?_]``adg[RRAVhedVF��������������������������������������������������������������������������������������������������������~~��}s{���~ynpos������zeUZlXCb\��cd�UD_k���hPAQ������]3UC9:;8:<DVjjihfKJS@88@2'"9>Oy��}pr7MY;2127:BCPk���������y������v��qlin������4V{`09>A;3%/FZTD0)3f�`?Z`[ND":x|���j*2e{`@=G<7769>AT���u��eAa�����:;������4>Ul��e;5<61027>DQ�����|A6/+),/27:2**+,-6OinryuDBUWSJ<15?5)!%3118:887Buyrx�D+./,)'-03?OY_ejlklnnpmnoqrppqpd0()& #6;.(,((&&%#(37<8C\W]Z-"'%"#%-:O]s\DP]bV?,&%#
+#KZTR>((DS[N,
+ 1MQ7)"
+
+���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¾�������������������¾ſ�����������������¾�������Ž������������ƿ����������������ž���ľ���û��ſ���ƽ���þĿĺ��������������������������{hLO~oommlllkjjjhgdcbcba`WYZYUZWVXVYUTXZ[Z\\^\[\\\[ZZ\]^f���WGGIRr�ms�~~m?uwx�mUEBG`�y^J;<>@BCFHHLNPNUWYYYZ]_[]\__^__`][Z\_`cdghjklkklkjhg`NNihfdefdb`aGPUZ]^`hjefccdgiiib`cdbchr~\?<<?BOWik�bhxWIVotk��g`O(YdCZ}T73.Y{�Zi}]WBe1<B?<<:2*9������T_k^>>OK85-ThG=;;>JRXhF5447<HdgFYlw\1!@F>98652*Ihmq�jX7cj�����m4cD87769?Ot�����~;DrVSdhZH<Sq`gaclkVYov�~|{01XDu���]u\QZbhlIgaBHME=<9dVGBBBB<>`[Z\]]aZSQFKT[UGI�����������������������������������������������������������������������������������������������������~�~~}}�}w|��urlt|v|�����|aY[o]Eeb��qv�QF^u�lwO=S������a6VE:::9:>DUgjigbGGP@865/)'SCOu���rr7T\=315:=@BRs���������|������t��qlkl������/`�^/?SYN7%-;MM@1*2g�dDVdZLC*>z}���f*2e{a@=J=665:?BU������aBf�����=:~�����5CRl�~`<5;5/039?CO�����x@70,),/2671+++,.;Pggkuw@CYZTK;38?5*"&51178677Crqfo�H-/0+(*/01@OZ_ejklllnpomnqrpppne0++&+U`D8Q7=67@6%/@DYHD^hqX-$%#$$'-?SbqLBZZW_@0(&"
+%LUCG<+"   *LXUN/$>TL;44)�������������������������������������������������������������������������������������������������������������������������������������������������������������������¿���������������������¿��������¾������������¾���¿�����������Ŀ���þ���¿����������������������������»�������þ���ý�������ƽ���Ľ��ü��������������������������{hMO~srssttusssqonomjjkjie`aca_]\^_a^__bceefhhiffeccadefn���PEHNZ��k��z^=^۷Z|xZE@EO��bQ;;>?CEGILNOQMSWUWVVOTUTUXVWRXWXVUTWZ[]ababcccca`__XKPhhgfggec`bGRYZ]^ahieddcefhggdbbccchr}Y=<<@DSVfl�`UcWGZmrl��daO(YdAZ{Q63-Z{�]s�TZDa1:A>=;92(<������Ubo];Cl|?40SeG=;<?KS[kG5237>FgeFVxmX0#AH>88652,Iilo�~iY8ai�����g4bB:6768>Lx�����~;DtSPhmaH=Vqdf\dnkXYoy��~{y+2XBm���Xv]TafhjOhaBKLD=9;eRFABCC<<`VPUVRZZTPGKSgd@I����������������������������������������������������������������������������������������������������}|~~���}~}|w�{urq{�z������|cWXs]D_\��sy�QG`r�x�qM>U������[5ZC:;<;<=FXkjihiMLOA8=V<*$SJS|��~mo.biB359=@AEOp���������{������w�~plko������+^�\-GPEL6$)28;90*0i�_CLNMIC&=u}���g)3ezb?DK;545:=AV������]Ah�����8B��v���;BSk�a:7;4159:AEV�����uA92,*,/2481-.-..7MaSVnw?BZZVL=49=5+!'72/67457Hxvos�E-01+(*.02@UZ_cikmnnnmooqsrpopni0+*%5\0,[=YG?PG0/DTbTJ\^fQ-%($$$%+KgmkD9UXU`A/&$"	!Gf`L8-)'('/BA3.$'@KICGN9�������������������������������������������������������������������������������������������������������������������������������������������������������������������¿������������þ�������ÿ���¾���þ��¿���Ǿ��������������������������ž���½�������ľ�����������������¿�������ý��ſ�������������������¿��������������������������{hKL~sstuuuuuvusrqrponnpqmjghhdeedefeeejikklnomnpomlmmop����JFFQb��|��p*%q��aew[IBHO�iU;=BDHIHLPRTVY[\]^^]YYYXXYYYWZXWSRQVXYZYVW\]YXXVVWYTLQighkmfgdb`GRY[^_bhidcedffihgebcdedfqzY;:;AFUUjl�^^y`D^psj��a]M)\bH]rS:5/\|�]n|UU?a2:@<;:81):������Xfp\=Cv�952RcH<:<=KQ\jD6335>GhdFAFDD0%AF<78652,Iiks�kY;ah�����f8d@97778>Mx������=GuTQklYH<UpcdYdnjV[pz��|w-0UH����du[XjkjmJjaCNMB<9;kRFCEDC<>^WSTTW[[UPHO]l^@I����������������������������������������������������������������������������������������������������vy}~��~|||y}��vy{xrv������{gTWo\B__�~���QI^h���fO<T������Z6YD:8;;=@GWjiihfGJP?9EnA(/;V���tjn0gr?34:=>=>@b���������|������x�~ojel������*\�^+?E?@3"/68<<2)3l�_BJHJKD(?z~���k'3b|dADI;554:=BT���y��XFm�����1F��z���:BSi}y`;9<328?=AKW�����tC;2,)+/34823:3/.9QbS^m{ADZ]XN@7?@7*!'83068658Hxzx}�C+02.))/01=SZ^ekklnqqomoprsqnpmd.++#,_!$d?W=<T=)-IVSONfgi]*$'%#$%,TaebD<[sqh:,&%!
+CY]T55587228;82',HOKFCC<%�����������������������������������������������������������������������������������������������������������������������������������������������������������������ÿ��������������¾�������ÿ�����������������ſ�����������������½��Ŀ��������ÿ����������������ƿ��������þ��ſ�������Ǿ���þ���Ŀ���¾��½����������������������������{gPIrmnosssuussqpoonnonqppmljihhiggfgjikmkkmnoppmoopoqru����FFHPg�����e$l��j]u^HCFQy�mW>>CHJJKNPSUX]^`bdeecb`abc`bbbabb^[^]^]__`cd^^````aZRPggjljffea_IS_^]^chhddddefigfdabdfegr}Y99:@GTVkl�[SgUBYptm��^`N+\c?TkS94-Yz�X^bWTE_1<A<=;;1(;������Scp[?@x�741ScD<8:>JO`hD4545;DfeFDPS@/$@E<79664-Hikr��gW;ag�����h;c=:::89AKy�����<FvVSjlYI?Vr`aZdplWXry��|t*3TE����cu[UhojlIj^DJKBA=<jTEBCDE=>aZ\^__c^UPIMafZ@L����������������������������������������������������������������������������������������������������uz}~}|��}{{vw��}|�rms}}����{j[\jZDaa�wv�NHai���jQ=S������X8[A7667;@GWhijihJLS=6?O9'):Z���zko/gr>3347:8:=b���������{������y�}lj`k������*S{\)>KB;/"2=EIB1*1k�dFLJLK@'<|{���g*8i}d@BL;867;?CW�����YGm�����1G������5>Tj||b79;43<A=BLX�����q?91,*+135849J90-9SjaenrAGZd]OA7<@7*!+93168787J|lw�?-.0,,,/01AR[_ekkloqponopsrrppn_-/-& -i#)b(O:=K*"/EMTQSehlZ*#%""$'1SSF[NFcqoh@-'$!	"OKFTCAKUXC>OQTI0#<BBA>;6&������������������������������������������������������������������������������������������������������������������������������������������������������������¾���ü������������¾���������������������ſ���ÿ�������¾����������ÿ��Ŀ������������Ŀ�������������������������������ÿ��ƿ���ž���ƿ�������ü����������������������������}gJHg^diyurpmkjkkihijkjmnojhgggghggfikikmkjloqoppnorrros~���RFFNf���҂Y-l��lfzZECEMt�oX=?CCHIKMNRVY]``adeedgfcffhfgfghgdeeeehiilmmlillljkfVUdginjfeca^KU]]^^aghbefgcgigfcabcefgs~Z:99@ERTkk�`_jXFWosk��]_J*\dDVoU930_|�YYaWWC^3>B>?<<0&:������NcrY;Av�430SdG=::=KQ\iD5446<FgaD_ns]2%DE=9:773+Lkku��iV9`g�����f;aA>DG=:@Lx�����=ErOTin`K>Uqda[eqiW\pv�}}x)7RD{���[sZS]eglJh]DKQCB<:hRDADEE?=a[]__`f_RRIPZ_SAJ����������������������������������������������������������������������������������������������������w|}}|~~}|zty~~~��xpjr�����}lb`lYD^X��sw�LI^l���ePDT���x��W9WA7425:?GXghefhEHL<6FmG(+9\�o|rm1n�?645@A;:?g���������y�����}x�~lhXi������-P{\*>cfJ/#3R]ZB1,:l�cGKKKJD(;z|���i+5d}b>FM=778<@E^������[Fl�{u��/H��~���:@Uizw_39;43>B=BJ[�����m?81.+,244:5;J80.:UbfiquCHWYXPA47?5* *740788:;O~�ku�A010-/1102BS\`djkmnpqqqoptusqqm],!0-'!*d)]%Y9@J+(0U]e[QKIe],$&#$&&*NeflR;TimcA-&$#!FQTZCGYfpSKZZ][2 
+<G<<DE;&�����������������������������������������������������������������������������������������������������������������������������������������������������������������»��ÿ���½���¼�������¿���º���ÿ��Ŀ���ž�������Ľ���Ľ��Ŀ���ÿ���Ŀ�������Ŀ���������������������ſ�������¿�ÿ�������ý������������ļ����������������������������}hLGb\al�zqiecabab__cecddddba_``aa`dfddddccdfehjkjlllkilw�ɧUBGMb����Q%Kz�~hquT?BFNx~mS9=ABDGIKNPVX[__`efefiheggfffgggfheffijkkprponoppooiVVffiqmgeb_\IV]\]]`ei`giiffigfcbdcdcgr~W99;?DPTjj�^bu]EYmql��c_J)\cDZvS83,[}�ZhrZWAg3;@>=;<1(=������Rhv[>Cw�;30SbG;::?JQ[hD5345=KjbI`w|s/#AE=:9972)Jiim||kV9bg�����d9_B<^m?:@Mw�����:GqOUfh_I<Sjbb^dnhY[mw�||w(6SG~��t\tYR]dgmGk^CPTBC@=hPDDDEE?=_UWWXY_^POJRfxbBI����������������������������������������������������������������������������������������������������w||}|{~}~zsy|�}~�zolq{}����}jhfpXDb[��kp�KI]m���gPAU������W:VA5225;>EXjf^bgFJN=5=U:(-:Y���ymj.v�E956OP=;Ej���������|�����~w�~mgZj������*P�]/<_aD0" 2;=D:1+5h�_CJJNJA)?|~���e&7fz_>HP=878=BJa������YFm�to��,H��v���4@Tl�_37<418=:?GX�����k>90+,*256:4<F70,;ZfW`px?GYVUQA58@6*+741787:<O{�v}�?330.14312@R]_dilmoprqooqsutqrk[+0.& $>#M E1=J;0)AIOPDC<HF(&&$$%&.InyqSFgfnb@+%#!BdjdADVTcH=RQRM-
+)40;W]J&�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������þ��������������������������ľ�������ſ���þ��ſ���ž���ž�������ľ���ý��ſ������������ſ���ÿ��ž������������������������Ľ����������������������������~iJDnggpzzrkhiigdefddeb]ba`_[\WT[YX[]_\\WUWX[ZXXZ^`cabbar���dFHJU�����w[t�xmkscK@CIRzkJ37<@BEFFKNRSX[\]bcegghfhhddfgefffcefgjmmnppponorrpjVVgfioifeba]HT]_`]`eh_hklfeihhedgfedgp{U89:>EPUik�S`mY?Zlojzrd_J(Y`D_wQ83-`}�Yr�^VCf4=A?<;8.+A������KkxY>Av�<45XdF<<;?JS[dE5344<KfdEFTrW0$DE>;9872+Jhkv�|iV2ai�����d7b@>LT>:?Pw������5FvXZik^I;UjcaYdmjW^nx�~|u&3RHy���cvWRZcimEn\DNN?D?AgQFDDCD>>[STVWU\\POIRciTAN����������������������������������������������������������������������������������������������������y{{{{z|~}~|tx��~{umq|~����zb^boVCa_��d|�FM[gtq|qL@X������W<Y@5235:=EXkd`fbDJP;6FeB(,;Y��slg*s{E:66KO=:Lu���������}�����{v�zohZi������'Q�Z-=L@;0" 17;<8/)5m�^DHJMI@(={}���`*:fzb@HJ:789=@K`������TFn�zx��,J������3DTp��_96=53?A;=EX�����jA8.)+,158;2=H80-;Wm^dru?K^SSSC58A6)*83/4778<M�����>140-12312>Q[_chmmopppppqstrrrm\+//)!"'$(33&4WaZI;?;;9%%&"$%&/`ntlLKh^e^>+$% !R[\^E@[abQBTc`J/	
+!6>6;FQA&�������������������������������������������������������������������������������������������������������������������������������������������������������������������þ������������ÿ���¾���������������������ÿ�����ÿ���þ���¿��Ŀ������������ý���Ŀ�������������������������ȿ��������¼������������ľ��ü��������������������������}gLR}qnswvttrrsspoqponnmlihhbac`_`_\^___^\]_^]]ZZYX]]]^ao���PGIMi�����zvpnrtgQDBCL]te@037:=>A@CGJNQSUXZ[_abca``_]`ba`a`beeegjkkklmllmooniWUhijnjeebb\EU__`_afe`hjldfighefhgfeir|X;:;=DOUgk�Xgo\C\oqheda\L)^aC[}P72-_}�[r�YSAb0AB?==;0(@������IruW=?kq752ZdD;::?LS]kB4547<K`aGEAC=.%GD>;9773(Kllu�}eX9bj�����b:aBD[qE:AOx�����|3EuRWil_H:Ulga[gljW_nv�~~{q(7PI}���\tZTZagjEjYDOK?B=CgPEDDCB=B^VTWWW]ZPOIQfzYAN����������������������������������������������������������������������������������������������������wy}}}||||}|y~���}~�vjr}|��~�zeXarVC`]��jy�AM[e���zN;X������S:ZB7457<>EXojijgEKQ97I];&)<Z���xoj-lsD958HJ9:Ht��������}~�����yy�}ld[i������)I�X.>RVM6#2;C?80)4m�[CFHKHB,A{~���\):e|_DGI=676=@H_������UCn�����.M������1EUn��]58<82OV;=G]���w�e>7.**+237:13<3/.<TfRXuw>KbWURB6:@5*.;3/5767;K�zp~�<140.13312BT\achlnoooqqpsuussroZ'".10$9LCMNG[[SD'%&$'&(,[kmjBMjlrg<)%#!	EIFJCDVX[N:G^YL2	
++JYHKUZD#��������������������������������������������������������������������������������������������������������������������������������������������½����������������¿���¿�������½���¿���þ��¾�����������������ľ���½��������������������������ÿ�������������������������������������Ž�����ý����������ž��ü��������������������������}eIT�wvuvvuuutwwutssssqspqponlnnkihhgffgiijjkihecbccdegio����eFJIOz�����zwzxgUEBDGQo|q`:359=@BDCEHKMMLLSQQRXXZWVXVVWX[\ZX[^]_`abc``bclpmlkhYUgihmidcca]LU^]]^_geahkkghjffdegfecfrW;89=CMPjf�VYZRHYrrieda\L,ZdB[vG64.a}�ZoWUHg1<A?=<:0(D������JxuY=@ae:5/ZeC;;:@LT_mG6457=KdeEYafU+$FG=98774(Nklr|wfW9bm�����`=]ADa�E:BMx�����y3HsPR_d\I;Vnhc[gphP]my��~}r$9TB����^qXWejhkGlVDNK??9AhOEEEEB:@[TWZ[\_^ONIOdlOCQ����������������������������������������������������������������������������������������������������uy||}|{{z{yt}��yy{wnin{}����x`U`sT@aZ�����DP^dsx�xQ9\������S;ZB7237?>C\kijkgEJO;6H`;&,?[���ioi,lzG:57:74:Fu��������}~�����xw�xme`j������'G|T,<Si`9#4J\L90*4o�[BIKKIA&A|~���[(9f{_FJL=777;>G`������TEp��}��/L������3CTm��^29;75JP<=G^�����f?:/+*+2467002/-.<XcSWsy>Ha^[TA5:C2)/=404899>P�yw�8330/24333BU_cfhloopqrqqrtttrqm]("06=.""(*' !"&#%&#!"3NU\RHhvqW)$&%%&)2\jgaAKddj`@*%# 1RVJ1?HDJC:Kce`6-?GIHNG8"������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������½��������Ŀ�������Ž���¾������������¾������������ľ������������ÿ�������¿�������ý���½¿ƾ���ź�����������������������������������������|dKQ|uuuvyusstvvrrsqprqpoqpnnppponmljkjiklloqpljjjlkknpqu�����TIGGOn�����xodSHCCGK^ythO25:?BFKJLMOQQSTVVSWY[[][ZXXZXZXXWVVVSVTVY[ZWXcyied]PUfggmkedba^ET\\_^_geakmngfifecdjfebgt}S999;BMRhf�Ugp[FZqqifeb]J.YaCK]H83-b}�Xn}TVPo3=A>=:8-)?������K�xV;?y�=4/YdD<;<?IT\iF4457;IfdCl��]/%FH>:9972(Llmu�xdT;al�����^<^CBbz;:BOy�����v4ItQUej\K<Vjee[cokW\ku��~t!8RB���u\uUVjojhGlWCMI=>:ChPECEDA:A`]qnac]PMGOgkZ?P����������������������������������������������������������������������������������������������������uy|zz{|{}~yt{}vtojjio���ze`csW@aZ��vk�EP[^a]daM:[������R>X=4334??E\lijidCKP<7Op>'-?\���toh*grG;7;GE7;Ey��������z}�����stwrke[k������)N}Q-;CPK6"#2YlQ8.+8q�\EIKKKE&Dz}���V%7h{_EKJ<98:<>F`������MHn�yl��0Q������0EWq�[3::87NQ>>H[����hA90++,357924?1..=Ueaerw@Lel`UB49G2( 3>2/347;AT�|w��943//35423BT_eghjoppprrsstturrm_)#10iQ#"9HIU)<KB*?>4*..*%%#4VklLHakcS&#$%%%(5IU]`9FijeZ@,%$!(7;3'2DILGDV]XM1		,MYQDY]@$���������������������������������������������������������������������������������������������������������������������������������������������������������������������¾���¿������������ü����������������������������������������������������Ŀ������������Ŀ�������ľ�������ƿ�������ƾ���ǻ���Ľ�������������������������������������}eKNqonnrtqpoqqsrqqnnnolklmlkmmkomjjiiihillnoonlnoppqstuv~����KFFFKVdjhgYQMECDDLTptiZ>29?BGJOOQTWWXX\`^bccccaa`ba_aded`````__`_``Y[j�xg\]YPXhhjnlfddb^FSZ]`_^fe^hnmgfigfdfjeedgs|U:9;=BNVgj�YfhYIZpokhecaL+`aGSoQ62,`~�Xp~YWUw3@B?>:9.'E������L�~U==gy530\cC=<=@JP_iE6456=HibC\u�^0&GH><<;83,Mkkv�xfS=`n�����^=aDA`q;;CNy�����u7HuRS^dYJ;YnebZdkfT`iw��t#6RE���zasTWnogiFkYDNL;=:AeMEBCDA:A^^z�sceZQLEKefY=L����������������������������������������������������������������������������������������������������uw||{|}|~xqy~�wpigjlrzz��~�{qmovW=`^�����AR\`kdd[M<Z������QAW=4224;@CYjklhcBKM<7EU9)/?Yst�yrf*jwF;:@RT=:Iy��������z}�����rvvqnf[j������'S}S)=LQF2  /WnQ8-)9s�[EIKKLF'Gz|���U(:e{[BIG<989?@D`������JNmrl��*N������3EWr�~Y1:?><VXB@H]��y�f@81+,-348:4<O2/0@Yd`ero7LgvfWC47>2(-:2,./4:BS~~|��;340.144/4CU[djkknqqqqssrtwurqq`+"3+uY "[C!eEVKF)]UI-PMD0:7'8UgkTUiY_L$$$#$#&:ef`b@Ielj]>)$$!
+1853-3DQ]RBT__U3"
+,X^RM\bD$���������������������������������������������������������������������������������������������������������������������������������������������������������������������ľ���ý������������Ļ���¼�������Ľ���Ŀ�������»�������¾��������ÿ����������������������������������������������ļ���ƽ������������ü��ķ������������������������|gJOqkikmnmkhjknmljlkjgigegghfhggfegfefefikljklmmnoqrsttts�����uPHGFEGEILFEFDDDJVhoi^G75;ACGKORRVZ^]^`cbdfgffgigghgijjikjliijknigihfjonjghfWXhghlidcca^GT[^a`afgahmmhgigffhlfefit|V=:;=CSVkj�YaeWH]spigdb_J*_dAWuN66/a��Yw�YYYw1<B=<:8.'F������H�{V:?]j540W^B=<;@LO]hC6446;GgcFUjsZ,)GF@CE?831Njiehh`S;]o�����\:]BBds@;CO{�����r4IrQRXTSH=XmfaZeolN[kx���u7UH����]wU[{uffJi\CMO<>;DfMFCCD@7D^Ur�^[^WPLALlcRBJ����������������������������������������������������������������������������������������������������vx{}zz}}}~wlw��{ujfgls{~����|uuuvS=e]�����;P\p�xgN:Z�����M@ZA5226:>E\lljhbBNN<8Lq?&1A]���|me+fr?86=RQ;9Hz��������w������suvqoe]j������!TwP,=HGC0  /FVI7/)7t�WELKLMH*D|~���T&9d~^DKI<889?@>b������NLr�rp��,N������/FWowX/>JQXddSHK_�����f@91,--13883EM70-?Yg_dtr8KfqfXC16?3'.72,*,5<CS�{y��7440-132-3CS^cilloprtqqrrtuutrqW&#2+wO#+z1MQIfZ*UK:$WGN8J>-1M`aTXhcgN($%#$%(6[OGI=F_fi^;)$$"DZXP86=QcU@Kcc_7"	1WbTOXS9#���������������������������������������������������������������������������������������������������������������������������������������������������º�����������������������������������ž���¾�������ÿ���Ž�������ļ���þ��þ���þ���ľ�������Ŀ���Ŀ�������½�����������������������¼�ýĿ������������ž��·������������������������|iISyollpomjgighifadeda___`adca\^]a_aaa``adeeffhiijlmopprtp���۱wRMECEFGFDDEDGKVfkc^O825=BEHMPRUY\]]bcdefghhhjkigjklmonnlllmnlonkjjklmnorrq_Wgijmicdb`_GV\]a`cghahmnfghffehldfegq{V<99;APRjl�WfwYDZnpjgda]I,\`Bg�O882^|�Wnu[\Wt/?C;<=;.+C������C�uV;Chz95/YbC<:;?MN[eD5657;HhbC\�}P/+FF@]xK83+KhkjrobR=am�����X8]>>fx?:ANx�����r7IqX]leYH;Vkd_\gniO_ny��|~s&5S@����ZuR[}vheJjXAJI=>8HhKEDBCA7F_Wv�[Y\VMMGMcfRBQ����������������������������������������������������������������������������������������������������twz{yz|zz|xry|�{rgdhq|}����}ttsuQ>e]�y~��4O\x���kO8[���~��L?W>63459>C_mkjhe<KM<6CP3(/?Tlq�wnd-`k>559NL89Dw��������x������nuvpmb\i������QzN2@IHK2 "/4:;5/(8r�ZFLLKOD,Fy~���M(:e|[BNK?<;=@AAb���v��MKt�qn��*P��w���-GVhwt[5Aa�����RJ\�����d?:1--.24990BM9/.@Znptzs:NdshXB/5A3&/61+*-4<@V�{v��0440.343.5DT^cjkmppqsppsrrttttpV%#..vH%+z6GV!6j<V=.$VPF*QK/ 9PS^QSVVmQ)#$##%(5aZRL<Mcdka:)%$!6\gkJ9G\hHAUb_Y40O`PQUS>&����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ÿ���¿���������������������»���þ�����¾ľ���þ��ƿ���ž���ž�������¾�������ǿ���¾�������ǿ���Ż��Ŀ������������¼����~���������������������{fHTwxxvvusqqppnmkfdghgfbeffdd`a`b`ba_^____aeb_aa]]edgkllkd�����~cPHIGDCHGIMS^ie[UM:0/6>BFIMPSWZ\Y[cdcedfhhiiijjijlknlmkiklmnqnmjikkklortq[Xdimrleec`\GW_\_^bfgaikkeggfffhlhfdiq|V<99=BNSjm�QQbTJYtokgdb]H*cbGk�O963a}�Xt�Y]Lt/CB:;=:/'G������G�tT=Dv�43.]`D=<<@MO_fD3456=HjaCEW^N/%CGAa�H63,Nglx��gQ:`l�����W;[>?g�B:APz�����w6IqWZhg[G<UkcabhngSbjy�}�s#9RA~��|]uS^ztjdKhU@KIA>7HgKEBAB@6C]Z��b`bXNNJQitYAT����������������������������������������������������������������������������������������������������vxzyyz{z|{yu{�����ngipy{����|qjeoP@d`��~��5QZh}�z^N6\������H=U@6216;?G^mjigh?NL<7Jj=&0?Wcclhmd&lsC55:HI:;Ky����w���x~�����pvvrlc\l������#K{O.FW\[2!/6863,'7r�XDJKLL>*Jz}���L(:i{[AIIACKLCAEb������JLs�su��,T��{���-IV`hmZ7@MUWej_DH_�����`?<2,,.349:0AG3.0BZowz}q9OgshYC004.%170-,.5;@Z�|}��2230.254.4FU_dhknprrssrstssutroT"#0/rP?5`P(IE2FC]8+.PBJ$0K>$4EHZJLLAQB$&%$#$(9`grkDK]WYY6)$$!NibX;7VcaD>UYXU3/IVKMLJA'��������������������������������������������������������������������������������������������������������������������������������������������������������������¿������������������������¿�������������������ľ������������¼���û�������ž���½����������������������������ÿ�������ǻ���ƽ������������ž��ĺ��������������������������ygFRxvxyyvvstsusqtrpnmnqpoqommjkkmlkjjgeeggfffb`a_YWa^ad`aa_kx�����zdVKBCNQV_gdYRQI9-+/8>BGHKNRXZ\WY`abcacdfhhghigghhhghfegiijlkkjhhiijmnrkVYfjnqkeea_ZIZa_``beechjjfgiffdgjfcchs{U=:;?ESTlg�VMURG[rmidec_H(`_C]rF870_��Xs~W]Ks)CB<<<9.&L������H�xT=Ap}520]_D<;<@JP`gC6547<Ij`FSZ\R-)FG@[�F84,Qinq�ygR7`n�����U;Z@?g|99APz�����y3IrRVljUI<YhghflmjNdm}�}�u#<PJ����bsSYvskcIjT@TTC>9JeLEDCA@7E\_��^_aRLLKTipV?S����������������������������������������������������������������������������������������������������uxzzzyzx{{yt{����zogipz|����|pf\oRAb]��w��1R[c��}^M7[������HAVB6217=>E]mijhe>MK:3<J3&1>ewqsgm`)pyE64?XJ:<Hv�������v}�����quwrk_[l���q�|!IzM-CJD@*!5DKC5,'7s�RCINLL=)Lz}���J)9kzZDKMEKidE>Fb������LKt�y|��*S������3IW_cjZ/<;02AE;<F^�����]?<2.--35::1:=20.@Ynwz{r<PgtfWB101,%06/-125:=Z�~��/241.354.2HV^djmlprsttstttuvsspS%'11]\U,&UA<&=CH4PE=5O/P4,2=$6QaaGI^OF<$%&$#$(;RGWdJCnpoT5)$$MZZV>DTYUJ;XfhV3 2KQLLZ`F%��������������������������������������������������������������������������������������������������������������������������������������������������������������¾���ü���ľ�����������������������������������ſ�����������������½���¿��Ľ������������þ��������������������������ĿƼ�����ľ����������ƾ��»��������������������������zgJStpsrrqoonoppqqmoomnonnnmlljjlmlkkjgffgikmnlmlh`ajlkllmkjifm������zoeemoni]SLME7.*,06<@CGIKPRTUWWZ\\ZYX]^^aacb``[\^[[a__`bbecddeeefjjjkgXWeijokee``\HX`^``cfeahmlhgjffcfiedchr}V=:=?ESQle�W_fWE\mngeec^G-a\AT_B962c��PdiUWGk-EB;;<8,(F������F�rT>An�932\_E;:<>KOah?4438@Oi]Gl|oW,(HD@]�H82)Qkmq�zhU:am�����W?Y?>KS6:AO{�����|0KrQ\rlZL:YhgfdjmjPbm~�~|�x%8MJ����drUUwskdFkRCSWA?;KdKFCCCA:F^]yyWX[QLLLWjjR=S����������������������������������������������������������������������������������������������������vwxyzyyyzzvot~��zpkhipuw����{lacsQAf`��dz�5QWm���hO5]������JDU?7647@AF[ihikk?ML;443/%2=q���rna+Z_844AO>6:F{��������w������lsurma^j��vi�~IvH4HYWF-"?ZfZ:.&7s�SCKNKL=&Pz���J)<kyYEMRMQaJ=>Ce������KGq�|�~,R��v���1JT\bl[/:;65A@9>J`�����Y>;2.-.45:94=@20/C]qwz{o8NhtgW?268/&2813@558BT�|n��-220.134.2EW_ejnlprsttuttuuvssnT$'/-(%#'(%39:+6(;4=82!&)8H@?_jeG'%$#%%'8[WU[ALeiiW5*%#D\hiINUFDJ9CUWF.3KSPHV_B#������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ŀ�������ÿ������������¿�������ÿ����������������������¿���������������������ſ���½�����������ƿ���ǿ���ĺ�������Ź������������º����������������������������zfMLe^Z_dcceaeddddfeecfeddeeddaadeccfdcabcehikkllmhimnoqsrpoke_du����zurlh^UQPJ@5.++16;@DGJMNNPSXZ\YX[[\YYYX^ceedcabcbbeca_`Z]`][`_\_cceeZMXfjinhdea_[GW^^``dgf`inlfgiffdgiedeis|T=:::BSOhf�Xrx_L\poeeba]G.b^Fe{D65/e��ZnnZUCe1?@=;;8-)J������@�qQ=@o}340b_I=<<>MQ^fB5249HVk]@i��],(IGCc�H61*Rhmx�|fT:_n�����O>V??_i<;CQz�����.OqTVeeZK:WgeaTbljLbi�~|�s9IF����`qUVielcKlRBHH?B:HeLCBBDA8G\[|~YXXKNLJVbcOAY����������������������������������������������������������������������������������������������������twzyxy{||zurz��~}shgjorz��}�yj[arN@f\�����7QYu�tz]N7\������KBWFB@?<CBGYjeehcCOK749B6$0Ao���nn]&FG64<AGC7;E~��������w~�����istniaYh��rb�~Is@6SVLR/ !6@EG80(:s�SBKMNL?)K|}���K):k|[?N[WReXFAEg������FNu����~*P��n��~1IU\bn[.;:55PK<?I_��zc�Y?=512135983B>///B]pwzzm5NjufWA5=A2(2913<449CU�{r�},11//.13.0HXbgkmnpsuttusrruutpmW$$/+%!  (+"#&&06PjmpK$'%#%%&?diciAU``h`2)%$F[c^CH`dRA;IB20%0NVMIP[A!���������������������������������������������������������������������������������������������������������������������������������������������������������������������¿���½�������ÿ���þ���ÿ�������½���½��¿���¾������������������������ÿĿ����������ÿ�����������Ľ���ȿ���ü���¿��ż���ž�������Ƽ��·������������������������yeGJeacfddefc`\^___\]]_ZZYZ[ZYT[WUVYYYWXXX[[[\bbfffhijklnnklhc\Z_elpvupkhdaXQJB<81+,17<CIKNRWZ^`dccdegefgfcegfijmimmnnmmmjihhillhghehgfb`a[QZfkkmjfda][IW]^`]bfdbgllfghdddjmhdchrxQ<879APPmg�\rw\Q\plgebaZF+c\Ac}F73.b��Wnv[VHe1>?<;;9--L������<�tR<Bbc530`]I?=<?NP^fA544:L[hZ@mzQA,(HFEeD61(Qkku�ygQ7_m�����O>Z@=`yJ=DOz�����0NmR[kfYJ6\ieaW`lkMbl}�~|s <L?a��o[sSU`_ieLhW>BA=H;KdMDBCD@4I\[w�ZYW?LKMQT]S@T���������������������������������������������������������������������������������������������������rvzyvy|{{yvpy~~|ujhhs�����yfU\oPAd`�x}{�8SY{�~�jM2[������HB[Xbj\\QCE\i`_ef=NL:6No=&0Aa��zdlZ*KK5:GIRN79D��������p������muwql`Yj��um�zKtB5QWNO/ /4574.(9s�SEKMPL>%Ny}���J,:n|\CPc`Tm\E=Gd������FPu�{~�{)S��v��~3JV\alZ.:936OJ<?H^��|t�S<>?JK?85:70DA011B\nv{{n6NivhYC3??3'4:127258BS����z+00/-.23.3EXagjmmoqsrstsrsuuuqmX#%.,&"$(*+/VdXL<#&%"%&%9^U[d>Qc_dS3(%" OZV\?>[d^H620+% -S_GCQF0	���������������������������������������������������������������������������������������������������������������������������������������������������������������������¿���¾��Ŀ���Ŀ���ý�����������������º��ÿ���»���ſ�������ÿ���ľ�������þ������������Ŀ�������ƿ�����ƿ���Ŀ�������ż���ſ�������ż��ö������������������������ziKVurtuvtrrppmljijihhigfebba_ac`\[ZXXRTUROOSRVTS[W__`a``baa[[SQSTTVTYYZUPKCA<752,).39?DKLNSW[a`cdeehihihgghhihjmmqnmnnnmnlkmopqqponprupooaSZhkjigfeb_[LU\^b_bghbhmkffgcddilgfdgqzT<:9<BNPie�UmtYI[mlifca]F+c\DdrD82.c��WnuXVDl1DA<:;:.*K������;�rT=E|{550_^F><<@JO]dA546:DNiZHy�gB-(GEAb�B72)Pijv�whQ5ap�����PAV>?JYB=CN{�����{/OpR_lg\J;]hcaXcnlHbq}�|z�p$7ODN|�m^sNUgegcHeVADDBI<NbKECBCB4I[]�][Y>JIKNRj[CR����������������������������������������������������������������������������������������������������muzxxz|y{yumu}�}~ngj������xaUfsM?fc�����6QZsz��eM3[������JC^b|�ot^EE^idcgh;QK;6@L4$.Bg���loV,XW28DB8858@~�������s������kuxrma]k��pj�{ Qs?4FUVC0#.6994.&8v�TGQSQJC%O~~}�I(:mzUDP`]ZeRA=Fb������GR{�rv�}'W�����y2GX`fiV09803PK;>I_��xi�WAHc��kG;;908;100B`qw||l7MhrfWD3@@1&7:00D768>Q|���s*21/-254/5FXbhjkmonrstsssttwtroT$'0+%!$(++/[ffjE &$#$$(1^kl]8CipjL1'$" M__bB<QefU>>>:/&9M`K2*"
+
+��������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ŀ���þ�������Ŀ��������¾��¼���º������������ÿ�������ľ���Ŀ�������ƾ���������������������ȿ���ÿ�������þ���»Ŀ�����ʽ�����ľ������~���������������������{iIXyuvtutprpqpqopqnmnnlllmonkkjkiifebb`_ba^]\\\XXT[Z\\YWXVYXVRIMOTQMIIH@>>=:751,+-06:?DFIMMPSUVYZ\`cecefefgfdddhkkhillmlnmlknpnopopsutrsoeTWiiggghgb_ZGSZ]a]`gh^hmlfffceeikgffiqzU>9:<AOSjf�TlkUDVqjifda]G,c\@fzC820c��XlpYV@r3DB><=8-)K������?�tR:G��243\\D=;<@KN`eA4436>GjYLc}}S0+GEAm�A62*Pnoz�{jS:cp�����M@YAA^nE:CQ~�����{-RnOZkfYI:Ygc`[hpiLdo~�~|�n#=RIv�~q^tMPzukaJjUDQOEH=LbNFEBCA4I\`{yac`=IKIHJONAT����������������������������������������������������������������������������������������������������rvzyyzzwzzwoy|}��yow��������x_RcsL@d`�����5QW_^bbXI5^������HF_muoq[BEZjjhkf=OJ97AV;%1?o���qqZ&qr88G@0348B~��������s������itwrl`Yo��wk�xTs<3DSO?/#6OWG9/(9x�NH\mZM=&P~}~}~K)>p}SCQWNWhQB?If������AOy�pv�z+Z�����z1IYbhnV-:706QM;>Ic��o�UA[����m>>83DC200D^puz}m6OlueVB5;=/'7;23:448;Mlswxi+42/056405IXbikmoppsuttrsuuxuqpS!)/*&%)+-/WJ>A7'%###&5UrqU9XdmqT0($# "CKY_A<S\_Q=LZWD+6DRE)	�������������������������������������������������������������������������������������������������������������������������������������������¿���������������������������ÿ��������������ſ�����þ���������������������������������¿�������������������������������¾��Ⱦ��������¿��ž���º�������ɽ�����������������������������������|gH[|ursrrolnpplmljkkkhefkikihhghihikjhgihkijilkhgigiijhccdedb`\YPSQNJFDB@>;:87448:<>@CHJKNKLNJKPPQRTXZ\\ZYZ]\[Y[\]\`cdfeeecfeghgfjnnonknj`KRjigjhgfd_[HVZ_``bfd]fjhcfgccdiljgehqzU<89;<ORkk�JghRBRnkhec`]F.bZCdtI80-c��\uwVVBj,A@=<<6.'O������D�vP<G��130`Z@;:;ALL]jD4568<GhXHZdlE(+GC?`qC8/*Pmoy�}iN=br�����I>Y>@fF:?O~�����z/TpN[mlYL=[gfb]ingN`k}�}�m<QL����asLNyocLlNCMKBK;LaKDCBB>5I\]ef^db8LKHHFEHBV����������������������������������������������������������������������������������������������������rvwxzzzwy{wqy{�~un���������waTdwLAeb�����6QWYZWTSK5`������HFZ[Z\]ZVDF_ijike@RJ;6BQ8"2>x�{nrV(�=<LJ<547E�¸������t������jttqmb\p��ye�s VuB4LQK?/$;`n[;/(;x�KJa�aO;%R}|��C(<ozWCMKB\kYD@Ef������AR{�pp�z+T�����{/LY`cpW1:727ON<AGc��|hxTDp�����B>:5NB/.1E^pwy}k4SmsfUA58:.'3:35;425;FagjiU*2100673.4JYagknpppppqsttsuvvrnP %1+'"#)*,.@:0/+&$"$$+@]XYV<SLQiU/(&#!CJ[`BARETN9E_ge1	2DF?)	�����������������������������������������������������������������������������������������������������������������������������������������������������þ������������ü���¼���������������������Ļ���þ�������ƽ�������ƿ���Ľ������������¼��������������������������žǾ������������ž���ľ¾����������ÿ��Ļ�������������������������|iK[|wttuwrqqrutrqppnqqmnommkjhhfecbeffcebghiijlijkklkmkkklnmiggbZXVSNLGFDCA?CBCGKQUQUVYY[\[[ZVNTQNNSVXVQQQQSQMNRQRORTWXPXZZZXY[\[_[`daa^]VJVhjjolgeb^\CX_`bacff\ghfcdfcdbgjigdirzR<:9;@OUhi�KlrRAPmlhgda_G,a[Dc~F63/c��XtyVXDh,D?>=<9-)Q������H�qO>Apm21,`]F>;;@NM_nD2457=KhXJh�|P'*IED\sE80*NjlpuohO?`u�����LBZ@@lr?;AV|�����|-TlP`os\G>YiggelpiPco~�~}�j"<OM���y_qGO{zobMmNDPI@J9L]KEDDC=6I[VZ[Z\Z;KHGHFCGAZ����������������������������������������������������������������������������������������������������qtvwyzywxywoz�wpo{����������ybNatNBd_�����6NWVVSSOK7_������FG^WUSVVVGI^iiikd=PJ75Le>$5F���rqS(��@?UZS<6:I�¸��x���u������mtuqk__o���j�qZpA1=675+"7FG?5.':|�QGe{gK@%P�|��}C*;p{WDLICWaMB@Gj������EQz�tq�y(W�����z0NX^bpX0:723QK?AEc���x�XCu�����E>;6N:-02F^pwy|j7SkubV@6;?2'692;A416;D_ddbO)21//463-3JW`glnorrqrsqsttuvtspO%1*%"
+%(*+-/11-( '&$%%(DhMEW=KNJXM.'%#G^bV<8@M_N6[_UM, 	4`_H)	������������������������������������������������������������������������������������������������������������������������������¿�����������������������������������þ��¿��������½���ü�������ļ���ý�������ƾ�������ƿ���ſ���¿�������ź���¼�������Ŀ¿�¿��ȿ���ž���¿��˽���½�������Ž������������ľ����������������������������}gJ[voooooollnnnlnkklmnllklljjjmoljffehggghikjjjiegijhgghhihehdc`ZWWWTQJDHHHHJKOSRX[ZZ_bbdeegihcghefba`ba`a___^\[\YSUV[ZYYUYWUVVTTUVTSPTSURNVfkkpkgec_[LW`ab^bdh[gigdegddddeefdis{R>;:<CNWig�LpsUBOmifedaaH+aXG^oC82/d��YovXVHj,>=<;<9*'T������D�jO=Epj441`XF=::?OPamD3446?GiXEIJGH++HEEf�C90/QihhmjfS=]v�����CEW>?lo9;CS|�����x.SiS_ieUI;XfjlooniM`n��}�h"<PI���s]tHPzsk]JkMFXI=C9J]NGCCC>6J\Uad]ZU>LDDGEDF=[����������������������������������������������������������������������������������������������������quvwx|zuyzuuyvjwz����������w_OaqNAba�l���2QOMMJHJE5^������EE^VLORRTGH_iiijb?QI77N_8"5Cju��hnQ#xz9>U_R=67C�÷������r������kvwql]_n��se�t[s=/6364*#15634/';|�RIj~iN<,K�~���B(>nyUAJE@YiOC?Dh������@Syz~�u'Z�����x5KZ^cpY1;835JH=BJd�����S>t�����C><.G9/02E^qwz|j7TjrdW?3;C4(57.8:117;GafbdK(01/0353-5KZaglnnpqqrrrrsstusqoK(/+'  %+,-.2221)!&%$#%)BqlZS7G[]fQ2'#" Ka^Z?7RXV84DEOR3!&(*O]H'	�������������������������������������������������������������������������������������������������������������������������������������������������������������������¾�����������������ý�������������������������������¾��Ŀ������������ľ���þ�������Ǿ���Ŀ�������ƿ���ÿ��Ⱦ���Ž�������Ǿ������������Ż��ĺ������������������������}gL^{mnnnnmlknmljihjiiiiigfegdehffhhgghjiiknmnmommqomljjkklkkonjdggcb_YUUMPPQMQTSRY[[\]]]`cacdcdhkjjfgglhgljgggfgigefijilhhfecbbdbbedbbdfe^RXdjjojggb`XITY^b_afhagiidfiehcddehdit{V>99=CNUii�OrtYDOslhefc`H+c[B@C=900d��\jlWWGh,C@>=<8.(V������C�iQ?E��963^[F::<@MNakF5569<IhWEAHD9+*GECj�C92*Rhku�vgPAar�����?EWCEro5;CSy�����x1SoH]ogXJ<^fijkmniJdo���}�i>OL���o^nHO{mg`OlRGXG<E<L^PGB@D@6H[Zek_[XGKGCEDEF>W���������������������������������������������������������������������������������������������������pvwwwyxvuxwqpsz�������������xcWdqIAd`�X���0QPKdjYFC7_������HH\SIJLMPFGckgijaCRE76EU:$0<TZa\^lN#{y67M^F727B�¸������r������gvyrl__p��le�sVs<366;9+#/79771*@~�SDn~^N9+R����C)?syUELG@[kVFAHi������>X~���l,W�����r3LZ]boV.:94269=@Ib��~}�SCw�����F>:1I>020E_rx{}k8TlqdV?147-' 5805=226;DZ]^^F+11..341-7KZ`fknnoopssuussttrqoM(.*&!$>'-0&4&&+.$#" !'-0,/9>82(!'&%&&)?dde[88D_pK+'$#!MU_kO) -1-4Kah9 *00HK< ��������������������������������������������������������������������������������������������������������������������������������¾������������ý������������½�������������������������������ÿ�������ļ���¾�������¼�������������������Ŀ���Ŀ�������ľ�������ƽ���ſ¿�������������������ƾ���¼�������Ƽ��Ĺ������������������������|hI_wnopnpuqpprrnmpnkknmliiihfffefaddedfhgihgiggimklkkihjigkmmkgffe`db`]^]^]Y]\^\\Z^__aa__ada``beeeedcfdfgghhhhhiiijilkllkihkikknlmolmqpqldKUgkkmigdb^[DW^`b_`hh`hkjefhbfdfhgfdht{N:68=ANVjh�LgtYAMtkfgeb^H,dZ=;;980/e��]npZWFk1H@?==9.%U������G�iP<F�z444bZC<;<@OOamK6468>KlZCBLF8**MFCq�C81*RjigokbK>as�����@EVAM�t:=AVy�����}*UnK`plUJ:^ifd`gmiKem��}�d?NS���oapGMrif[MiRFVH?C:N_NHBAEA5J]aqoce`HLJEDBDG@U���������������������������������������������������������������������������������������������������zmtussvwwvxudj{��������������xh]apEA]e�����4KP`h\iLA8]������DIYTMNIIOCEbkjjicAOF:4Rn;"36BKSFXjK(��@7BRF839B���������q������gxzpm]^r��rj�nVo;4=FOB+#8VaJ;2+:}�JGl~_O8&T|~���A*@oyTALGD^jNDAJi������<W|����n.Z�����s5NZ[`nS/<93236<BHd��qn�QEw�����B>:2JA03/F_qyy~e9WmrdR@1-,)%26/5?536:ASWVX?,21.*+0/,6KYafjmpqoptuuttsrrromL(/)##v9Ac*c1>KK4'C04)'-./2F]B3)"''$%&%I[Uli=D]if=(&$$L9@QC),D[j[6 .1	��������������������������������������������������������������������������������������������������������������������������������¿��������������������������ü���½��¿��������¹������������¿���ſ��ƻ���û���¿��ž��������������������������½��������������ÿ�����ƾ��������ÿ��ǿ���¿��������������ľ��Ĺ�����������������������zgIZqmlkkmnolnqpllkkkkkjnklmllikjlghgijjllkjjlkkkmkkljheihghkifceffc`a]\^][^\]]]^^]^abgeedefdabgiggghgfcefegiggigdhffhhijihgjejijjjllmkikg^NYijknifeb^ZHY^]`^cgg_hijdfgbefhjgfeirzQ;8:?CMWij�RgnUCLukgeb`\E+aU;;::81/g��^{^SCi2B@>=>8,%Z������M�mO=D~u121c[E=;;@MPbtL6659@Lk]NOIBB,,FFAr�C80-QmkflgcL:`z�����AFT?Gvu?=CQx�����{(UlY`jcRH7^fea_bkkKdl����e!=RI���g_rFMwkg]OmSCSJ@D9M]NFDBEB7J\g|{c`]FMIHFDFF@\���������������������������������������������������������������������������������������������������nrttuvywxywsw��}����������vlaetNC`g�����3NKWmmkF@:_������BI]YTUQUWEG`jjjjd@TI:8IL1"36ThgaZlF,�|>7@cX@4;E���������s������eruolY^o��gj�lcu=3BXaH.!<`kN=3(>�MHfyfN;*V����E(@ozTBHED]oND?Ff������BX}����p*[�����o2N[]coT*;943:9;@Gg��rs�LGz�����D?;4G@335Gctw{d8VppdS=/++*$!67023226:BNTSR=.01.**./*4LZbgkoqqoptvttttsuspnI)1+%yU_Y#`4[1+9(c7S?"	(0474GaE5)"'&$&&&;F,CF32255/&%%#BDFV<#.QXaV4>:!
+������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ÿ��¿������������ƾ���½������������¾�����������������������������������ƿ�����������������þ��ȼ�������Ǿ���ȼ�������ż��������������������������yeM^tjkmllmmmlmkiihiiigilighhjeihhfgfgjikkkmlmlmllllkhfijiiikmoomihhgefffc`a`]^]^^]abdgcabbcdacfkhgkiiffgffdhikjjjjiiiifiilkllkkkkjlnmkiki_PVdikmgffd`ZLW^]`adfg_gijedebcdghefdirxQ<7<AFOXeg�OqrQDNnjiecd^D-_V=:99831f��Zx}WTJf,D@==>;-)T������N�tK>C}v543c^D<=<AMMbsL4558@Mi[\�|tX(/EF?cuA:10Vkjx�veN;\y�����?CS?@kv<<CR{�����x,YiZ`llXI8ahd`YajfKfj����a >OM���l^nCQyif\GiOETJ>C;N]MFECDA9L\ezpXXWILHGFFGF=a����������������������������������������������������������������������������������������������������ttvxwwyw{�}���������������yrqssI@]b��s��3ROTt}jCA9^������CH[XURRYWCGbkjijh?MC85Z{;#37[]\d`oA,�{G7<RH22<D�ķ��~���n������hswsiV_k��`f�k ss=4DEED/$7FIL>4*@}�LH[pkO:(U�~���B(BpxQALE@GMCC>Jg������=U����l%]�����l7R_afoR(;946HF:@Gf��ld�JD{�����B>:4JE123Iasxz~c2VoseT=/,*)$"57089216<FZ`^]<,10.++.-)3NZagmoqpprsttstssttrpH)/*%o+?]!_2l)/a-aE_=(2:I?PaG6*"(%%%$'-1)+/,(((%$$%%#8BVdC%2`]ac8 E= ') ������������������������������������������������������������������������������������������������������������������������������¾������������¿������������ÿ�������ľ���þ���������������������������������þ»����������¿�������������������������������������������������ÿ���þ�������Ļ�������ɽ�������Ļ�������������������������{fK`ypqqqspqsqoqmkmnmkkmljffffcdeggfeefeggikihijhhggd_bffghhikjihfffeggdfhedbbbbbffgfjldddfefeghilkjijhhgfdcfhlmkkkkjhifgdhhhjkklkikijiijh^MWghiihgec_XLX[]b`cge`gkkgge`dahmcfdkryS;8<@EMVhl�IwqTDMsnhgdb\A,`U><::820c��YvXTIf/D==>>:.'\������N�rI;E~z231a]D>;;APOctL4569AOk[V��P)+FCA^j@9/,Wkiz�zdL@_x�����=DQ?@ku=<ER~�����w-^fP\jgXI=bhc\XbjhHim����_BSN���}^oN\}jg]KfLDUI?E;Q^LHEDD@4Mai�v_\XFNHFHHFH:`����������������������������������������������������������������������������������������������������yuuwwyy{��������������������{ibprIA_b�x���6RLejqnOE8`������;K]VQGJRUCHblkjkc=SB87NW5!26Tem[Xq>-~|D55BC23=F�Ʒ������m������huxroX`n��gj�l!lr<8P[[K.$2;FK>3+B��IIbpgL;+V�����C,@qxSEOEAEOFC>Fm������<Z}���c+a�����n8O\^fmO(9946OG9BKe��uv�KAy�����==93J?/1.Idtxzc7UnrcR:.+*($ 57.8C326=IiggrJ)10,**.-*4O^chjprrrrstsrtttutqlF+0*% ~&9^$a'i.,_,X(K8	(2@OESeB6("'%$$#$)')''(*),'$"$#"5OWD3!*AB@T4
+'H<!")76'����������������������������������������������������������������������������������������������������������������������������������¿��������ÿ������������¾�������ž���ü������������������������������Ľ���Ļ�����������������ÿ��ſ���ž��������Ŀ��ÿ������������ƾ�������ǿ����������ǿ������������Ŀ��Ļ��������������������������~bM[nnmkloommlkljikllijoljkklmmifhkhhhhimnlmkilnljifdhjliijkjihiecec``cecaab_a``_`_bdigddeedfeehiljiijhgfggijjjkikmllkljlmljillmmlljjkihjf\HXffffffeb`ZHRX[``cgecgkjffeac_fgdfdipxQ:8=@CLUej�HvWAPtlggca]C*aW>;97720g��^u{XUMa/B??=<:.'V������M�pI>Bzt142^VD>9<BPPbuI555;AKiXKw{qA,,IDEh{B80.Yjkw�vgM@_|�����9JR;?bb<<BU|�����s-]jZ`lhYG5_fc[ZekgKfl�����`GPK���q_sP[zmh\MdL=DA?D;N_MGECEB4O_f|n[\ZJNKIJHII:_���������������������������������������������������������������������������������������������������zpttvvvx�����������������~��|m_rqG?^d�{y}�4PMXfg[FC9a��s���?JYVMDGKPGJbjjlna>RF71K\;"14^^mb^l?)yr=25DN43:K�·������n������gvvslU[l��jw�i#bs?:V_dV3%06AF@0,B��QDl{aM;(X����@+@rwSDHCARoTC@Kp������>X|����i*Y�����j:N\_dlO)8736UJ;CMi��yx�IDx�ps��==94L;/14Jfrxzc8UmpcS;.+*)$26.,-.07:OehimI*13.**-/+7R^dhkoqqqruutttttrurmC/1,$$s&GT%a RD7W'W$N5 
+'3AS>IUA6) %##&&$&)*&&+340)$$%$#!50$  L\ZI*.A4%#!+67+	����������������������������������������������������������������������������������������������������������������������������������������������¿���������������½��ÿ������������¼���¾�������¾�������Ż��������ý����������������ľ���ľ���¿���½��Ŀ������������ž���¾��ǿ���Ŀ�������û������������������������������������������|gJ`l`b^dggfceebecacccacacdebbab`_ddffefijhklkmpmkggjjmpnlmmnpnljfgegjmkjjgfeheb`_abdd_`dddddddceffhhifffefeedceijjlkhjjjjkjjlnoomnnmoollfZLYehikggfd^WGW[^aaeed`gggefg`ecffffdiqxN;8;>AMWjn�MopNBQnhgfcb^C,bV>888910g��Xq{ZYHb2H>??=;.&Y������W�tJ>F�u640`YA=:<AMM`m?445:BIlZ_�Y|N+-HE@p|;8.,Zlq�xdJCa}�����8IT?:=8:=@Qy�����v,]iZbnn[I:cec][ckgLck�����_GMF���YdoN\wnj]NgPBDB@E8L[NGEDFC6O^^lcUZXKNLIKLIH<b���������������������������������������������������������������������������������������������������{ouwvwvx��������������������yqosqEG^e��t��3VPVTOE>B<^������?IVTQMNQS?Ealkkoj?RG75Mh: 28^hi__n<.}w816OK439G���������j������bwzunT[k�����h$ov<5CDKP1'225=<1+E��NKq[L;)Y�����G*Cu}QAKE?L^LBAJo������<V�����f+\��{��e8P^dipN-9848VK;AKg��ls�KD}ckx}>?94M;.12Hduy|}c4WoscQ:1--.( 56,)+-/3;PcedaG,12-*,-/+6R]ejmoqsrtuutttrprurjB+2+"8",9%G "ECT.T'O5(2DXGCF@4) %"%%&&&))%'2HH8,%&'$""*0&
+%2=/",-*1=5(	����������������������������������������������������������������������������������������������������������������������������������������������½���½����������������¾�����������������Ŀ������������þý����������������������ÿ�������ſ����������������������Ŀ��ÿ���¿��ȿ���ý���ÿ��Ľ���ſ�������ž��¶��~���������������������~dMg}mljjlljijifhffhdcee^_a`Z\\\\ZZ[\X_[Z_[a`aa_ce`dgfbcbc`^deeeecaaagffd``b`abb_`abeeeffffdfiffgihgghhhhggeegggghhikijljhiiiijmmllmnonkjfZLXejgidffd_XKW[_b^cfg`fiiefiadafgedcirvK:7:>AMUhj�EQQLHUnfhfda\E,_W?:::9//c��Vpp\[Jd2H@>><8.*Z������V�jI=D�v040cX@=>==JNah>523<EIhZ\�z�J&/JE@sxC8-(Vqp~�ydGA_������@IP@:879=DT}�����v,_iV`feVI7djf\XajbIdh�����XFLU���i_lNY�oh]MaJAIFBG7TZLFDDFA5L``nlaaXJLLJLMIH>]���������������������������������������������������������������������������������������������������|ksuwuwy������x���~����������yliltCB`d��bx�2QO_fIXFB6a������=KYWQSUVVGIcjkkkc=OH94420#15NWRFYj:-u{B04QB239H�ö������j������fx|wnQ[m�����b%uv=3ARRE2-823651*F��JJq~^N;)V�����E(?svM?KEEbnKB?Fn������9Y����e.]��d��e9O^biqM/:649XI8BKj�����ID|�srx~==9/H6030Jgsz|^4TosaQ:68;:-#65-)*,/2;Iab^c@-/3/*+-/+9P^gjmprsqprsttssqrtsj>-2-""$'".(
+)2C]JNT>5*!&!#%&%'+)'*7ZT:,%$%#!"".:@/$1724:5&
+������������������������������������������������������������������������������������������������������������������¾��������������������������Ľ���þ�������Ŀ���¼��ľ���ſ���ý������������������������������ý�������������������������������ž����������������������������ɿ�����������������Ǿ�������Ƽ��ù������������������������~fMk|spqqpoqnopqpmlmmlmlkkilhghggfdfeghhihhgdedefhfggihgbaebb][_]_`\[[Z[\ZUQQUSXXUX]Y^_\\[Z\``__]````ab`_bcdbccdfijhggijjjkllklnqpptsupoqrdQXhkhhgged_ZLWZ^a_dge`gihffgddbffdcchq{M;9;?BNUgn�HMLLHVoghddbbD+cT@<;;804h��UZ]Z[Qc.CA>==9..]������`�gJ>E�v341d\C@DCDNOcc?754;EPg[QtzjA)-GEFjw>90+Vqky�uhIB_�����<LS;8668=ES{�����t(\iXdjlYH7dhf][dmbLbj�����YERX���qdkOUjgh_QgGDMKAN4SYLHEDF@4LZ`md_]YJKKLNNKI;^���������������������������������������������������������������������������������������������������vistuqtx�����}x���x����������yj[dsBAaf�����1TMgqeoN@8`������7MYSKPQQVGJdkkki`APE;5211"/5RIJ?Wm8,vx:13D>23<F�÷������m������gw{wlL]m��q}�g#ts<5IncH40967<;3*M��MIryhL:'^�����G&BtwLCJDDcdHB?Il������9W�����b.\�~p��`6N^`anM/;539OH<AHh�����KF��vk{~:=81<5010Hhtzz�_3WoqaP7;RRW<"51,++,15;G`c^_<*03,)*-.)9O]gknqrsqosuutrsqstpi;,2*$	 *2>cKQP=4+!%$#$$$&++'';_R;,%"$$#
+'-)!$7EH1$5>4032%
+��������������������������������������������������������������������������������������������������������������������������������¿������������¼���¾������������¼��Ŀ��������ý����������������ǿ���ƿ���Ŀ��¾����������������������ÿ�������ý���¾��ƾ������������ǿ��������ÿ������Ŀ���������������ɿ����������������������������bE`mggijikjjnnnmkkjkkkkkkkjjjjmkjihhjkjklmnnnoopsrqutrrrsrqppmmkmmlgjmfddbb]^\^]\^^aba^^_\Z_Z]YVYV]\YXVSWYVT[Z[\ZY]^^bacecccchffimlhkeefi[IWgjjlkiee_[IV[_a`dgf_fffgggcdcfdbcchryJ<;;>BMShn�FLMLKYmgfcdcaD,aS@<99803e��Yaa\TOe2J@>><80*_������[�hL<B_J325`YGF`^HMObfB665:DLi[b}WwO#+GBFk�>71)Vokw�sfL@`������7KR>7798=EUz�����u.\gXZhjTI8cjf\YfpgLdj�����WEKI���cblNRX^eZPhHCQMAH8RZKGCBED6NX`spUWWMMJMNMLI=_���������������������������������������������������������������������������������������������������snsuusrw�����ts���w�������~��wd\em@Dag��m�4PPWYcgKB5b�����9KYSGJLKNCKelkkka<PE@=:99#37mbVY[m3({r>24C?24;B���������j������ex~uhO^o��ry�\ru<7JjZC3 0<HUYE3)J��NIl|cO:*\�����C'FuxKAJCAcaF@<Jn������=Z~����[/_�~y��Z;R`bfoN-::69HC>BHl�����FG��tn�{<=95I9104Ggvyz�\7XttbP98RQ:-#55./..148BNZUS4(/0,*)/.*;O_hknqqqopuvtsrrqqspi=00(!1)#-%(!#%&""#$$!	*2C_DTS:3)#%%$##%'.-++=gW;,$#%%#!5I;)
+&9JK3&7=3+++"	����������������������������������������������������������������������������������������������������������������������������������½��������������������¾��������������¾�����ý���������������������������������������������ÿ�����������ȿ���Ǿ���ÿ������������ÿ����������������ƽ���¼����������������Ž���������������������������dH^rfgabdfhhjjihgdedeeffegeiifhffggfgfghjklmnoopqqosstqrsuqspqqqoorpqqqprrmnnkokkkjkjkilpkikjhjjjgghffgfdfeefegfecaehhfdaedggffgffefg]ac^TKWhjiljggf_XJV```_dgh]ghiffhadcfgdedjsxO?:;?CMThm�GNKLLVpihfdb]@+cS@=:98./f��]rt]YRc3C??>=:/,l������Z�gK=Ak\722]UFInlKMPdhB877<FMiXc~N{Y(,GEEr�?71*Yrq|�qcH@^�����5KP>866:>CT{�����v*]fXVZUOH<ejf[ZendLel�����X HML���^fiLNSZe[QgJDUI=C9R[NGCCGC6M[czmVUVMOKNNNLM>i���������������������������������������������������������������������������������������������������|vututsv~���}nv���u}���������u_]grCD]a��w�/MNLCUQ@@8`���{��=LWVLMLKNFLdlkkjd<OECOnen325cbde]j0(la8227604;G�õ������m������g��tfR\p��gt�b'pr89KRC9/1D]xnI4)L��HIn�\O9*^�����F'EuwLBHEB\fEA?Mq������7[�����[0a����Q=PbbgpM.;:::?>>CHo�����DE��wsw;<9:V;106Ffszz|V6XqtbN>@LC>-"6419;2357?IQOG0*/0,*,00-=T`jmmqpoqtuurstsqsusj:-/)#jN4e)].'99S65074,!	 *3;ZFXd>1("%%$#$$(.197@[W3*&%%%!
+  7NB))@Z]9);B413/$
+������������������������������������������������������������������������������������������������������¾��������������������������½��������þ�������½�������¾���¾���ú������������þ����������������þ������������º����������¿�����ÿƿ���Ŀ����������������������������¿����������Ľ���¾������ǿ���ǽ��������������������������}eNf�mmommpsnnpmjjffehfcdcbaccb_]__`aa^^ba_dbcffghhgfhiijkfchhiffghiiikieiiljkhiiiiikklkjhjkkkmmnmnmllkmmlllkkklllnmopooqpopqstuqoqrpolknj_PYjkjkjhec^YHW_`__dei]hklhgi_cdhigdchq{I;9:=ANSfq�HMLKKXpefhdb^C-eR@;:98-5h��[uy[WMf5CA?>>:-(i������U�dI>I�p212`VFHmjFNNchA757<CKiWIu�uB*,FDBw�@5/,\lj~�ygLG`������.MQ=968;?FV~�����u(_fV]f_SH;dig]\eqfIdk�����VINZ���ZdkMPRXe]RdIFVG>D:Q[KECCFB6L[`}cTTSLNKNMMLK?k��������������������������������������������������������������������}�������������������������������xtrttst~��{pjz���qt������u\SftCD`f����/PFOizlF?6`������;JYXTURRVCJakkjkd=RCDz���J 56bNfZXj1'XR7422017<D���������o������g��pfQZm��bu�['us:7G_UF/2CeyaA3&N��LPq�[P9,]����E)FuuNAGE?SgIC@Nr�����>X�����W+a�|z��R9Q`bhjI.:CKO\MBBJn�����EA��qc{x;=8;Q9/16Ifuyz|Y5Vqp`M;H]aoB $82.<>4148?JPME1*02.*.00.=Uaimopppprtvtssutttrj:.,(%f\Gxl1(D:rRN<S^C#
+ *3FX>X_;2'"'&%%%%'.9YD>DE3*%$#$"	""=UA'.L_lC"(9>7672$����������������������������������������������������������������������������������������������������������������ý�������¿�������¾��������Ŀ�������¿���½��Ŀ��������þ�������¾���ÿ������������¿��þ������������ĺ���ž���¾�������ý��������½��ÿ����������������þ�������ƿ��������¿����������¿Ǿ����������������������������|fKbzpqpoqttruzvrrstqoomoiilnljidfhfedgghhefeefeghgfcdfeeheffb__^`\bb_Z]`bYWX`_^`^aab`dbacdbeeggihkjiklljljjkkjmmklmnmlmnppqqrtrpppqprppppdNZjolkhgdc^\HW]_`_cfgakjjggf]ddhjfdciq|J:::>DNSfn�GKLJJYnhdfeb_C-aP?;:;802j��ZxyY\Oa4FA><<8-)h������R~cH=G�o.25aVDDb`DNKbbA745;CLhXQ}�m>*,GB?Y\98.+Wtk|�ubID_������.OS=8768=DT�����s,^g\_moWF=bjg^\emgEek�����X#JLU���`hlRSU[dZTdKCVH>E7U\JDCEFA7JXe~aWVRLMIMMMKI@h����������������������������������������������������������������������������������������������������vtstttv~�vrkdx}sssust������ub[fmAA`h��o��1QJKLQJA?<`������:MYWWTVXVFMgokknf=SFEx�s�?49mda^_j1'[S<61-,2;>H�Ŷ������m������k��qjK\j��Wp�X%qu:<MjfQ50>HJA;4(K��EMfsVN4+`�����B+BsuODIDDWfEB@Lq�����y<\�����X2b�~w��P:R_bkoN0<CIN^PBBKq�����FC}re�q9A:8J:/26Kgvyz~Y7Uuq_M6?RCjE$531@:1169@LQNF.+23-+,//)=Ucimnpqqqssruutttsspl<-,($hH9pk.(I3BXd(7N+ 	 *3LM9CX=4(!'%%$&&(0@cNAOC5)&%$%"
+#&'(GY=)	4YfnG#%6<5782%
+�����������������������������������������������������������������������������������������������������������������������þ�������������������������������������¿���¿�������������������������������������������������½��������ſ�������Ž���Ľ���½�����������Ⱦ�������Ŀ�������ǿ��������ǽ���ÿ�������ĸ������~���������������������~dCXmiijiilmnoqqpppoonolnnmoopommlllklmmmmmlllnqlloopqnopromonnnkkmnkljkihdfa_a^]\\[_``\_`][^Y[^a`bbdddc__aa_`ddcdccgffefghjljliiikllmmnni^KZionpjgeb]XHZ]^]_cfi^ilkffk]fcikfbbhs{I9::=BMRgq�LLKKLZmifeda]B.cQ>:89900i��Xxz][Pc/DA?>=9*(k�����NygH;@eU213aUDFsuFPMdf?756:BLgWBn�r>+2FCANG=:.+]qgprg`EDa������0LP=9678>DS�����s,[d[`jcQD;dhhedikeJfk�����W!IGI���_cjSVW]dXS`JDVF@@9TYKEEHID7NWZiZSXTMLHKKJKI;e���������������������������������������������������������������������������������������������������{tuuttuw}ypkkwtkm�{z������xidioDB_j��t��1QJMNPE??>c������7L\daa`]TEKilkloa;PCAexr6$2=\^gb`i/*fb=2../2=<E���������k������j��olYZn��Zw�W'pq9;DIM>0.=LO=;2'F��>MWYQL1'b�����G)CzzLDIEE]aF@@Jr�����y;]�����S/a�x��L;S^ejkK2::3:ULECMp�����?I~ysr}l8>:4H9027Jhuzz~V5Qgh[I75XY\3&430=6/17:BNUSL-+22-++/-*<Tejknrqpqrtuuttssvupj?**'"n5)nj,*J75/g#3T&	 )2K^CBJ=3("&##%'&(/BdUIXI6)#$$$"
+%-=/?M:(		"7WbgD $3=45=4%
+������������������������������������������������������������������������������������������������������������������¾���ÿ�������½�����������������¾��ü��������»��¾���¼���¾�������ƾ���������������������¿������������¼���Ľ�������ƾ���ž��������������ǿ�����������������ý�������Ƽ���ľ�������ĺ����������������������������~fDWlcab_\dfgdefffggifhgihikknkjjijlnmljlkllnloqmoqqrursrsqpqrqqopqqorusrrnpnmmmmkmkhjlllmjhhgijkjjhihbca^^]ZWZ`^aZZ]_Z][\_``aa^^_bcbb`]]_TLYjmknifdb]WLXY_a_cei]hkjfhk`edhhecdiqwK89:<BOWhr�FNMNLYikgedc^G.dR?;887/2l��Xtw]ZWb1GA?><6++m������Q~hK=I�n126cUEGiqGPOdg?777;AIhUP��o?)0HDGnmC9--^niine`DC`������4MS>7878>DW}�����t'_gWbplTD:gkllnoncNdi�����WKPZ���bfiPX^ahXT`LGWE>C6TXKFFHIB4MYWd\WYVMMJLLKIL<g���������������������������������������������������������������������������������������������������xpstsstu{~xtpszpil����������veZgo@E_o�v���2YLTdeXB@?_������8Qc{z{�lRDNgmmlnb=QQQx���7#1<HI_J^h/-mb=1/167C@N���������o������l��miQ\n��gu�W'sr78DITC-.Gb_G=3(G��;LY`TO2)d�����D+HyxJBGEJoiDBALt�����y;]�����S2e��z��M8S]dknN2?A=D`PCCJo�����AI�}ki�h8<8173/06Mowy{~S4O]^VI97IM=(&840>:446:BPYWW1)32,*-0,+=Tckjnrrpoqtuvwuvvwupe</*'#l8-pf<5>48$N#3V#
+ +4DXAX^B4'#'$&&'&(1DdURYG6+%#$#"	%7\;<77(	
+$6O\gE %3?63:1$����������������������������������������������������������������������������������������������������������������������������������������������¼���þ��ÿ���Ľ���ü��þ���ý������������ǽ�������ſ������������þ��¿��������Ż���ƻ�������ƾ���ż����������������Ŀ���ſ����������������¿������ľ����ſ�ƽ����������������������������|bG`tgfhpolkifgghfhhiddcebbbad`bcfbdfecbfgfeghiiehikmllnmjijmnlmmlnnpqpqqonpnooppqqmnnnmnooonnpopqpnomikllhggfeegiiijggfeeecccecdcdc`bb\[ZQS[ijgkhfca]SJUY]b_beh\eeefhh[cceddddjqxO;:<>BMVis�CNMLGVmjegcaYD,dO=<::6,3k��[||\YZa6HA@@@9-*p������P�hK=B~n513\VFJmgCPOdkH765;CIgRFcrf>(/HCDtt?9-+[pm{tbEAd������1MQ:8768=BT|�����u&`g]ekcOE:egknqrmdHej����SKK\���ZgjSYakmWUdKFVF@F6[ZLGEFGA3LWZe\XYVLLKMLHHJ:k���������������������������������������������������������������������������������������������������ripssqqtxysru{|lek�����~}��sbZhrAG_i�����0VKhe^hHB>b������4Q`ttz~\JAJjnnlm_?Ujo^��k&"28JU[9_e.*m`;-/7>7FGO���������k������l��khQ^m��kx�S'tt4:T^gJ+.H^UC=1(J��;L\jXL3.c�����D+JvxIBFDJlgBA@Ou�����|<\��~��S3h�yw��L9R^binG/F]S@TH@BKl�����DG��kah9=7,*,.15Kluy{Q8K^]WJ59QTH,%75/CB4249BOVTV1+11+*..+,=Scklprqqpquvuvvuvwuqe:0,'$J.(U9H@&64!F#1I# +4@G?Ma@3%"'$%%%')1GgMFaI6+%#%$!&5\>B@6(	!/DVmA $7?2*2/$	����������������������������������������������������������������������������������������������������������������¿������������¾�������������������¾������������Ŀ�������¾������������ǿ�������������������������ü��������ž���ƿ���ÿ��ſ���ƽ���¾�������ƾ�������ǽ���ſ�������ǽ��ſ��������Ⱦ�����ľľ��������������������������}aGbuiir�{~xqrtppqrpnoqokklkjjjkhiihhebcddecbegd_`deddgfgdecegedcddafgfkifeciiklmlpkkjjjklnnmpoponnoomkkjlmlkkklopmnoolnpnnnllmommmlmmnhhh`W[jniiggea]WFSY^a_cdhaecehhh_bdecdechqzL:;<=CLQhq�CNPLIWpgbgdb]C+dQ=;;;8/4l��Zxv^\W]3CABEB<.-t������Y�gJ>J�i-03^UGN|lCOOjwJ465;CKiYINFGA(/IFHwu<910Ymo��wcCCc������0MO>9967=BV~�����u*ag^dmaPD<gkkgfjqeJeh�����R JH^���fjjQ[fpkYTeF>@>@H9ZZLGEFE?7OX]reXVSKLIKLJJL;v�����������������������������������������������������������������������������������}��������������tktstsstwvsrx�wibi|���~���}��r^]gp=I`l�~{��/PDdolcAB>c������7NVRLRMDFDKlonmoeDWop=e}h-"2=`goR^f,.gX2+-782=DS�¶������r������q��liV\q��d��R1wp57JRE@0+CXK?80)H��ANaiVM/)f�����@)J{wHAGCFlgDB@Pu�����~=_�yo~�O7g�x��N=S`aktI7T�pCOB?BLt�����DK��wp�j<=7,(*.17Lmz|{~K7JY\YK4?ZPk6*63-CC3136>IOLG*+01,,-1..?Vdjlostuqqtwuvxuvvvsd61+'$ % ',"*4  *3>Q@O]=3&"%#%&'&*2JcJMeH6*&%'&#$7O9JK4'	%7Kj<%4;1,00$	�����������������������������������������������������������������������������������������������������������¾���¾��������¾��û��¿��������������������������ÿ���¿���ÿ������������������������������þ���ǿ����������¿��¾�����¾����������ǿ������������Ǿ�������ž���ƾ�������Ⱦ���¹�������ƿ�������Ƽ��������������������������{cB]ogelyxxrlnonnnpnmllllnoonnomllkmmjkkkjmommnpmnmmklmmooonnplkiieeefggcdaZeacdc`^`caefgffhhd`bdfggfifefhihijihjkkklloooopppnlmllmmmnonnnfVZkledfgca^UFUY]_^ede`dddfgi_bacbdfcgoyM::<=ANUep�GNONIVmfcgeb_B,bP><;9703g��Unn_\W]6IDLWP:.-q��t���b�nF<F�o,14^TEI�pCOLhxH7557ALfXe|usM$-KFDvr?9.-\rp{~ocBGe������.MR>9::9=AU|�����t,\f]bbcQC;ficZYfobKfl�����K$PJR���]jfN^jmhUWgC;;;@C;VWKEFGE@4NWawaUURKLHIIHIK>u���������������������������������������������������������������������������������������������������|sqprtqqrnnlt�pjfjy�������||�ta\gp>I]k�z���3WGbd[bBB>b������5MM?255;CAJlooorb=Ttv=lx�7"2=jYbb`d+-`C/++7508>Z�µ������p������s��kfV[u��h{�L7yp65<<4K;*>aaE8/*C��=MgmRL0)a�����A,MvrIAHCKofCD?Qw�����y9^�xn�K5h��y��P>S]`hpG7Q�n7GICCGs����}AK��}s�g<<5.))+05Kjwyz~O4MWZYJ6?WJe4)96-@@5357?IMJC+*/0*)+0,-?Wbimoqstsrtuuwxvvvurf3/,'# */4?@STA3'$$#"&(&)-JZI@YK7*&%&$!#=]DMR6'
+#-=^<%5</174&
+�����������������������������������������������������������������������������������������������������������Ŀ�������¾����¿����������������¾��ÿ���ƿ�������ƿ���¿���ý���¿��¼���������������������Ž���ƽ���þ����������������¾��������½���þ��������������������������������Ǿ���Ļ����������������Ŀ�������������������������zcBdtghioppokjillllllfhhijkmlmkihfhhikilklnnommmnmlmnppruqqrrromoonoqpspqqpnnlnmklijjkjhjjhhhghhicfecdfeb_cdffe`afcdfgkllkimlkijgghhikkkmldNZllfeffcc`VKV[[_^fefbeddejj_cbddeecgq{K8:<?ARVgt�EOPQLXmgcge`\D2fO??=<8.2j��_||_`YW7GJWe[<..u��~���W��L=K�w.28aUEK�pDPNjmA7559BLhTa��{F$/JD@ru>901_tq��weBIc������,NR=999:=>[|�����r,ag\bbdSA:efc[`jpfEek�����M JNe���bfeNbjgfZWgE<;<?B6YXJFEGG?4NYbt\XXULOFIIIJJ>r�������������������������������������������������������������������������������������������������~vqprspqohgkt~me]hr}������~�racmq?L_k�wm�{0VGkfkmIC;e������3KI:233<DBJmpops_=XyuU}t|5%48bqvY\b**[@/((376;@U���������t������m��lhU^t��ey�J7{q07FGC]<);KO>70)E��>QlsYO1.g�����=-MuyGAHCEQK@AAPy�����xB`�{k�M6j�}~��Q?V`cjnC9O{W:UQABLs����{CO�����f9<4,(),.6Njvz|~M3MY[WI75PZG&*851?=5356@LNKA-)//**,-*+@Taimnqststuwxwvvwyyuk11,(!
+!+0./9SQ=3&$%#%&&').C^N9KE5*'%$$"!BeGF_9(!,:Z:$7@22:6%
+
+������������������������������������������������������������������������������������������������������������������������������������������������þ������������Ŀ�������������¿��ľ�������ǿ������������ý���ʼ������������ÿ�������ý��������þ���¼�������¾��ľ���ſ������������ſ�����ÿ�����ÿ���������������~���������������������dFavommmqvstrrrrsrpspmnmmnnmqnkhhefighfgefjgghhjkjiljhkkjhjkjhghgfgmjjikjmmlkomnomnnnljmmilmlmnnknmlkjljklikihfggdeefhhijillifebadfddcdgf`LXjjdeeedb^VJWZX`^dfg_gdeehibdcddefbfozJ;:=?DNPdt�GQPRKQo^`fca]?0aPBLM@8.0h��Vv{W^TT8EF`fV;.-u������Q�R=J�s/05`SEKte@RLhqE5569@LdRY|zl>)1DCFys;8.+]pj~�rb?Ga������*PQ<9878<?Xz�����q,ag]gqjUC<fge\_nk`Gem�����MGFX���bgiP^bdfZQfE=<=?A6]\IFEEG@4Q]co]YZUKMEKJNNL;m����������������������������������������������������������������������������~�������~�������}�����}tqrrsqrnefhnggd`mqqv�����}��ukkqp>J_j�tk�t/TGank]@A?d������4MJ<103;DCMkppptg<Z|y���C$2>gpm_^b'+^B,(+3636AQ�¶������q������l��mjNau��`|�H:xn45AIA@-(;EF<71)I��@Tgk[L..d�����;+JurF=IC?<=?>@Ow�����y?]�{k~�L4j�y{��OATadkoD5FX9;J>>AHq����z?R�����j<=5,')//6Rmyz|�P6N]^VG78GH8&)642653368CNPMC)*00,*+-*+CUbjnorsuttuwxxutvxxte0/-'""*/-.7QQ=3&%%#%$()+.;`PH\B7+&%%%!#=VBGQ9*
+!,=Y9$04,/97$		��������������������������������������������������������������������������������������¿�����������������½��¿���ý���½��������������������������¾��¿���ÿ���¾�����������������������ƿ�����Ŀ�����þ¿��������þ������������ĿĿ�������������������ÿ�����Ŀ���ɾ������������ſ������������þ�������¼��÷������������������������~cFXjjjggkpnlkkmkkllnllpjklnlllmmokmqnkklnmqnlnnmpopnmnlkkijkkkkmjjilgfhhikjhfiddehicefdgccihiijkkllikkllljhjkkmlkmkjkklllmmnmkifhhhhpqlkkdRZimfedecaZQIUZ\b^def[feeehhcdcddfgcho}N<<?@ERTgq�MOORLTrcchdd`C5bPKhkF8/2f��Xrw`][R:G@apP<,,v��z���]�iF<Da2/5cSDD^VCPQmpE545:>OeROt�WA%-CBGx=8-+`vo��qaCIb������+RP=9989:AY}�����o)`f[_edSD<cig`cql^Kjo�����L"LEB]`[PkgO[]`cVVeC<;:?D9\YIFEFE>3Ta__]`fYLMELN_iS<u������������������������������������������������������������������������������������������������zoptrqqroihhhgypjousv}���}��zsstp>H`q�u��x/ZERNOB@@Be������1OI;336;BCQjnqprd>]|rz}�;%2<ezl_ab%,^@-.1=902?K���������q������p��ojS^r��]x�G>{r438:41)'=Z]G9.)H��>QabSO1+f�����<)KuiC?HB==???@Q|�����t8a�|m~�J5n�y���L>SacipE6@A8<M=>CKs����v;P�����i:>7-)*..7NoyxzI5RkjWF6>X\T*(85/99416:DNRNA)+10,)*.,)DWcimorstttswyywvwxxwc0-+'!#+...1GJ80$&&$%&(',.5JABfK7*'%%&"#=S?7E8+!
+&1@S6#('#.85$
+���������������������������������������������������������������������������������������������������������¼������������ù���¼����������������ý���¼�������Ž���ü���¿��þ���ÿ�������¾�������ƿ������������ǿ���»������������Ƽ���������������������Ŀ���ľ�����ǿ�������ÿ���ÿÿ����������»�����������ķ������������������������|cH^iffddgghghhhhijjjigffhefebgihkhgggeegghjkkllkmnnmlmnnomnnnpommlnmjkmkllkmlkkkjjkjkihikilmjlklotnkmmljlgejjhjjihijhghklnnmkhfefhip��~uuhSZjlfddfcb[SHVY]``dgg^hfdfggadccbceafovG:;?>ERQcr�ENPPNRqdcgca[B1fQIkjK:.4h��Wmua[ZQ;FEfoS<./z������\�fF=BjP0.8cTFIufGSQjkA867;@ObTY{�b@&0CBD�|;8-/]ql�sb@Ic������/MJ>9999:;Zz�����l&bf\fnhQB=hihfkol\Lil�����H KE<=:>JmfKXY]cXY_C;;;AI6[XLGDDE>1Scnk_clZMMKMS`cQ<t����������������������������������������������������������������������~���������������������������zonronosolhdei�voosux~���~{�yjnuo@JZp�m|�s.WGG@LG@AAg������.MH7235;CBNnqqprbB\{om��+$39McaD`]'1aE-/49813?N���������q������n��lfP\|��|��E9ys344223((;EF<7,(H��@PllRO0+d�����>.LobC>GE=>>?CAP~�����s<`�yt�I4q�|���KAV_ahsD5=>4>U?;BLo����vBS�����d9=4*)*,/8Pnwxy�F3UtrXI6BG<h4)521HC416;GSTNB*,00,*+0.-EYbhkoqstrtuvwxxxwyyva0-*&"
+
+$,/00013/-#'($%('(*.,*-DbL5*&&%'#&6O</=6) 
+)6DT5!(21!
+���������������������������������������������������������������������������������������������������������¼�����������������½���¿��»�������½���¼�������Ŀ���Ľ���¿��ž���ƿ���¿��Ŀ���ľ��ɾ������������ƿ���ý���ľ�������ž������������ſ������������ž���Ŀ��ż���ǿ�������ȿ����������ſ�������½¿�������������������������}cLjwmklkmmplnlnpqpopsqpooqomjljhjjggifdddddffihgfeffhhhhjedghdcdfefgfcdefihkheghgjlhlnjkkjmolkjmptpqqppspnoqnnknropnjjkmooopkieeddffw~tnlcUZklfefgdb]VKVY]aadffcidegjkacbbcbdbhowN<:;<DPSew�LQRSMLqdegdc\>2eNKstJ<-8e��\wudaZV8FJikT<0.{������[�eI>J�a-/7bTGN�iBSRhlF987=?PaQ^gQhE'1FC@�z68-0^po��tc?Gf������/ON>889:<<[|�����m*eeWaf_PA;eefehnn]Ign�����D"GF:;;<NicMWX[aTYeE;<<CK6][LHEEF@4Sbom\`m\LLMPSb^O=}��������������������������������������������������������������������������������}|���~����������|spqppossujcak�mdowuy~���yy��xaevq?H\t����x1VDWiuhGC=d������.NI:324<CDQkqqqu^A\yp����:$2:C@I6^]%1nS1022348@V�¶������r������s��mhS^|��m��E={r447863)*>QN;6-(J��?RnmVN3.i�����;*KrcB?HF?@@BEDR������qBc�}z��J5p�����I?XbahsB4983>Q?:@Iu����rBR�����b8<3-++.18Orwzz|H4Wrp[G8AB5M,)74/B=435<HXXQG()01-+-0//CXcljnsssstuwwxxwwzxwd02-&"
+
+(4875100/."&'%&(((-/)&+GXH6*&&&%#	%6]=AO<* 	!/AKY6$-/
+����������������������������������������������������������������������������������������������������������������½��¿������������þ����������������������¿ÿ�����þ������������ÿ�����Ŀ�������ſ������������ſ������������¿���ž¾���������������������������������ǻ���ƿ�������ƽ���¾�������ɿ���¼ſý����~���������������������~aG]jdccceefeghikjhihlkikjikmllkkkkkmnljijiijjkkkljijikmkihijjgghghjgghgghiklg_a_\]\`\^```_c_b`a^`beddehihgiieggijhklllkijlnqomkkkmkinkhhfcT\lkecdecb\TLWY^c`bdgahceehi`da_acdcgnyN<:;<DRWgw�NRPRLOpcbgfc]?2dPJtwI:/4g��Z~s``[V7IEX]B:,,{������_~cH>I�j/07bRHO�gAORie?787<?QcLlwA{L%2HED~q27-,]sqqb>Df������1QN<;:9:=>\{�����m+dcTQOPMB=hidZ^im_Dfo�����D$GE<;<<NjcP[b`bRYcD::<BE8]ZJFEFG@5OXVSZV\WKKKOTbgP;{����������������������������������������������������������������������������������~���������������ysppqqqs{�ncds�kbktty����|z~�vkltp<K_r����|2[BKXWJ?CAc�����+QI9434:CDQmppqv\@]}qx���0%25GM^K_^"1}`8554358?T�¶������s������p��nhP\w��Q�AH�q28CQ?2)(D`ZA6+&J��=RijVL11f�����8.Mue@?FE@BBCCBN�����o?`�|��I8r�����J>WbbiqB1765>T>9>Eq����pCS�����]9<3,**038PnwzyzH6XrnYF9BLG=&):5/C:426;IYXRH'*/.+,-1.-BXcklpsrsrtvxwwwxwxxud-5-'!
+
+,IMNE71/0-#$&'()*)-0('/LbK4*&%(%##*=/7V?+#6DLZ7!!'$	������������������������������������������������������������������������������������¿��������������������������Ŀ�������������������������������¾�������ý�������ľ���ÿ�������Ǿ�����¿�����¾���¾����������������¼������������Ⱦ��������¿����������������������������Ŀ���¾��ɽ���ü����������������þ��������������������������|fDWea]]^^^^`d`a^_aa_bddfdefgegdccgddcbbcggefhffgffgfegghghhkkhghefhfegegklkkkiihkkkkijihhhkjkilfdgigedejjghgaa^_cbcgfhhfdfhikighjkjknljlkaO[knfdcdbc]RJWZ_e]degahecdgibdb`_cdcfmyN;;;:CUWizMeTTNSoc`jhd`@5eTIrsF9-7h��Znc`_W;IN}�Y<./�������r|eH=J�d316cOEH\JCMPhb=778;BRfKXz{s=(1FDIvf47.4\po}~ocBGd������3RJ@?@;>B?^������n,caRONNLB?gjcZ\ik^Hhm�����D%KB::;?OlcOeqgcSW_C;;=@D>[WKEEFG=3MY\Z_\`YNNLPTadR;y����������������������������������������������������������������������|�����������~�������������spopqrqqz�jbftzedhnrw����|{�uosqm?J^r�u�~1XFD@NFB@Dc���s��(PH:435;DDTmqoqv_Da�uO|�o$%1:^n{Q^\!0�c<755458=J�ö������s������o|~mfX`y��b��BJ�p/;LbG5(+=FA:4+$J��=ThiUR00d�����5-NufBAJEABFFA=S������p=c�����B;q�����G?Xbfmo@/653;K:7>Gl����o?Q�����\:<4+**027Vmwz{F8YypXD6@blY*,841G>426;P]ZVL(+/0*+,0//E\ffkostustvxuwwwxxwwj.6.(!	1_fpj=4461$&'&'())-/*(+O]F3((''&$#!!%5W;*
++?KTd:$,+%	�������������������������������������������������������������������������������������������������������¾�������¾������������û������������þ������������Ľ�������ž���ľ�������ƿ���ÿ�������þ���ý����������������º���¾�������ž������������Ǿ�����������������ƽ�������ż���Ƚſ���Ŀ¿������������Ǿ����������������������������|cDYgffdbcccggeecdd`]gjhggiiiiedeegfefeddgebce_deaac^^`ababbcffgdfcaceeeegfdb^`fiefjiklkklnlnnlllpnprmoquupnpokjkhjkjhikkijhghgdcdd_`dgghdTJ[jljkfebb[SMX^ad_egjdhhffhibcceeddbfmxN<9:;CUTiz^�nWNRobd�tb^@3fLHfoL;.7g��\xqc_bU;HKlwS;..~������t�gF;>J:307cMCHpZCQPga=778;BQdOYoj_=&1CDK|n88-3_xt}wl_?H_~�����0PQY_]]]U@]~�����l+fbRWWTL@<eha[_jn[Ihn�����B!A=:98?Nk`MjxjdP[aC<:<@@8]WGFDEE=3Qbpy}~]MOMOTbmM8y���������������������������������������������������������������~�������}���|���}���~|���~�~�����qqpqprppu�icgme_dbkqv����yy~�le`gl=H_o�{n�x2ZCYlyhC@Fc���t��+OH9445>DCTjpqqu_@^������4'2>ZJLDd[#6hC776787<S���������q������lwzleW_y��`��B@�o27>HD=.+59653,&J�<LWYOO.-g�����8.KocBAIFBDIIC>V������l?a�����<=u�����F@Vdhoo>05438A;9?Hk����i7P�����];?3*()/27Vlwyz�?9YxqYF78Td<&-730G?636;S][UK)+/0+*-0./EXeiloqststtvvyywwxxsc,:/'
+.WFNb<?;C:&&'&&&)*.0+)*FK</'&'''$	"!6P<*2FO\i86FH9)# 	
+��������������������������������������������������������������������������������������½���ü��������������������������������������������������������������������ÿÿ�����������������ÿ������������������������������ļ���ž���½�����������������¿�������þ�������ǽ��Ǿ�����������������ž���ľ��Ⱦ���ʽ����������������������������|dFawmopnmnoonkjjklmkllfdbegiecdddedcegeggefgiiliggjjfhjhggfgilmhgffhjhlkiikkhiihghigkjijkmlmlkjjijkjggggjjjklhimkkijhgijhkkkkkmkkllmmklmgXOWhjjkiged]UMY_`cadfkaimmghg`cdhidebfn{R<98;DURh}{d��TLUp]f��a[?4gNGdnF9-8f��\zde`T<MDbkB:*2�������z}eF:77420:fPDJ�dBPTfb=668;BOeUi���E&2CAG�p<8-2^vq�m_?K]j~���s-T]v����o:`������m,efYiphO@@kje]ako\Jgp�����B$G?;:9>Pl`MftidM^^B;;<@A6bWGEDEF>5O`hmpv{ZIPNRQ\dM:v��������������������������������������������}������������������������������~~������������~���~�����qpqqppqtw�lfjlb`dclqv����{w�ha^el;K_t��z�w2[L^\_VGCAe������-OH6156=CBSkprrx_E`������;)5It\eX_U"6�fB865778=\�¶������r������nw|ofWaz��`��B:zn+5:CFH3+7ED63+&K�~>GMLMN22i�����:.KncA?IHFHLLEBU������k?c����?8x�����E=Yekrp?1633>L><AIi����k>V�����];<2+()/08Woxy{�>7WuoXE;EVbK(-613D?739>U^XTL()-0-,/1+/HYfkmoqsutstwyxxxvxzud+7.)!
+)RnmY?SIpY(%'&&')-4750.492,('&&%"	 8V:+5JTcn;QdbM;:7'	
+�������������������������������������������������������������������������������������������»���������������������¿��������������������������¿�������ý���¾���������������������ÿ���Ž���þ�����������������������¼������������������������������������ÿ������������½��ʽ��������þ��ſ���ļ��˿���ɻ��³������������������������|iI]wonmmmnmmlkjkkkjjjjhgefghdgfeeecbdeecddfffdacfeggfifdbaeffffdcbbad^`dc_bdbfhfggghkmkkjmmmllmlkomkjigikklikgeijjeedb^_adggfdecbeefhjhjiYMYllhhefec^VLX^_baeef]ipmehh`bdiigeaemzT>:9<DSVf}vg��WISq`j��_^<6bLNulA9.8g��ZxbbbO:JK��Q;,7�������{zgG94342.;hMFN|_AORge@757<CR_MV�rT9&.EA>XN87+1_rn}vha>I]iy���p.S_|����t3_}�����m*baYfkdM@;hje]dmpYLir�����>&RB:;;;Pm_NdrieN\\B:8:?B8cWJECDG?6OY[]]`eVKORWU^dN:r������������������������������������������������������������~���}|���}��~}��~~���������~�����mmqqplorw�jgkkdfgbhqv����zy~�d__fj=L]u�le�t1[b��{xJ@d���z��.PJ:345<BASlpqpt^Bb���~�i()6H]dr_`S7�b<743447=R���������r������ox|neWa~��U��><zn-5=EJI/,HRQ?3+(K��?GMMMN0/j�����7,Op_?AKLMNOTKCV������h?f����A:{�����A>Xekqq@/874<OB>AEg����g7S�����Z:=3+(*01=Wnww|~@6XunXD87434"+643=7416=T[YXR&),/+,-/+-H[elmorrttttvxxyxwyytf- 9.(!	.UmoL:Ucp])%)''&)5DFF@3...*''')("	 9J5' "=RZj|? GJSNNW_7
+������������������������������������������������������������������������������������������������¾�������ÿ�������þ���ż�������Ŀ���¼������������ÿ�������ſ��������¾��ľ������������Ƽ���ý���þ�������ý������������Ŀ��Ŀ��������¿��ǿ���ǿ����������������������������ʾ���Ŀ������������Ļ�������ǻ����������������������������|hFYuonkkjhhijjighheghijhdeffdeccfedefdgggiggjghfhhiihiijiffiigfeeggiecbddabbbdgdcdeijllijkjkkllgdgedgiihijihlihnnmkjjghgfhhfhhee`babffbccRO\mngkhhfd^VKX]_cbdch]fjighiaachfec_emvQ?;;=ESTf~{_�YLTsci��^`96gIRx{J:/=g��Vqma]^P>KIu�H;*2�������~gF96533.9cMELucHQTirE668<CObKk�~H8$.HA<=<86-0_vs��rc=I]ix���o0Sa{����t6`|�����m)dbXjqdP@<hjjhlpqZHjq�����?'NA:<<?On^Pk�kdP[\B<;=@A7dUIEBCF?1NXSYY[^UKOPXXinO8x������������������������������������������������������������������}���z���z~�}�{��~�����rprqqoppz�dglcdqwelru����uy�mhgkl=M`w���x3]l�����NDd���x��*QJ8346=C@Qopqpw_Bd�����p**<CDX[HcX8�^8434336>Q���������t������oxzogT\���j��=@{k/69AGE/,DQP;3*(M�?MNNNJ01i�����5+RpaECLQVWWZTGY������f?i�|}��;>z�����:BUckqq>0953C[B@?Cj����j>U�����Y:?4+))00;Wlvx}}?;ZwkWC4--+'.:44=3116<MQRVR(*..++-/,/J[fknpsstsruvwxyyy{ysd*!?3(#
+2XMUW:[gZR-&(&'(+@okmc>451.+)(('"	2C,".L]^r�> AjdJRae=
+���������������������������������������������������������������������������������������������������������¿������������Ŀ�����þ�����¿������������¿�����������������¿��Ľ���¾�������Ž���Ľ���ý��ȿ���ù�������ž���ƽ���¾����Ŀ�¾��ƾ���Ƚ������������½�������Ƚ���ý�������Ŀ���ü�����ľ�������Ľý��������������������������}iG\zpmmmjgdilmmkjjihhjkiihggdddcgfggeghghghggeeegeecehihgdcdefecdfljcdcdd_bbdfgddfiijkjhjkkmlkkmpnjhghjgdfccgfaighea`aefffgeegfigigghgfgdTL]lniokhec\SIY__dcedf_ihigii`cdeffdaelwS@>?CHQUe|{[��ONRp]f��`\:6dJR�E9.9e��MYYfc_RAJMy�O;.5�������}gH87633-9cNBNvhGPQjuE899<EPbJ^|f="3GA==;98./_ur��q`;IYgspv�o0T^}����r3a}�����l)ibUjpdQB?jkjgill^Iho�����?'L@:::>Lm^Ql�jdMYZB<=?@@5_SHDBBE?2RZX]]^_VIORWYlgO8~���������������������������������~�������~���~����������������~���������}���|���{~}}�zy~y~��tqqpqporz�fgpffz~movw����ty~�vssrl@M]w�yq�u2[i�����O8g���q��/RF6335<DCRmprqt^Dd������:.EbvwslgX;�\7355239@X�¸������w������nyzmgT]���k��;A|m*5:@FF..?JC52+(L�:IJLLJ-2l�����7,StdC@MTY]]^UFW������f?h��~��8;z�����:CXcjpq>3843?R@?>Dm����f>V�����V:>3+))0/8Xlxz{y?9[vnTD3,**&+811E72.4:CHMOM'+.-*+.0,0J\flnqsttstuuwxyyx{xsf)#<2'!	/VJS_=RTW]+''&&')>e[^l@:ABB7-)'&!"!!" "" ;cjk{w;@c\<R[X8����������������������������������������������������������������������������������������������������������������½������������þ��������������������������������������������������������ſ���Ž���������ƿ�ſ��ľ���Ž���ľ���������ƽ�����Ǿ�����������������ƾ�������ɿ���ĺ���½��ƾ���Ľ��ǿ��������¾��ɿ��������������������������jDRvokllhhhgijkjfgjhgijgiffgeddgggihgjhgjhhigjkjihiihjjhfggfefddefhheefgfbegffggfghekkifghikgfdfgbggihijjiiijljmkkjjifhhhfdffefhhgeffefg_RM]jolmiedb[RNZb_bafdfcilkghk`ddimhfaemwXCAJMMTUg~{Xo�UKNp`f��^]<8dJM��C:.9g��UYZgc\TBKP��V;.8���������fH96653/<gPDPubDQPknA879<CSeK`pcdB#5DA>=;86/3`zn��oc;I\ipjp�o-TZ|����s/d�����k'icWcjaM?>kkhbfom[Gir�����>#NB:9;?NoXPpcbPXYE;;<>=7^THECDD>2P\bgffg[IPRVWokO>y�������������������������������������������������������������}���}|��~~�~�}~���}��~���~|~�}{���wronoqos{}`hrhk�tkotx���ux��vvvvl@L`z�r[�r3[m�����J:i������,SI:436;BDTloqrw^Ac���}�v.1Hw����kT!?�V857644:@]���������w������s|~keW[���g��9C}o*5<?C@,.FTM;3**Q�|:GIKMQ.1p�����80TtdBBNU\][`WHS������g>j�����8<|�����:AUcjoo91922:I=<?Ip����f=U�����U:<2))*0/;Zouw|{@<ctoRA2*))&0;14G=4359AFJLE'*/.+*./,1J]gloqttvttvwxyxwy{yti+">2'"
+/TjrW?ipsm+)'&'()9ffeb8IPZjI0*'&"	!,.00(" @doxym7JJVGAGS;�������������������������������������������������������������������������������������������������������������������������¿�������þ���þ������������ƿ���ſ��ƿ��������Ŀ������������ſ���������������������Ǿ�����ľ�����ƺ��ļ���ž���ž���ž���ſ�������ĿĿ���������ľ�Ŀ�������ǻ���þĿƿ��������þ������������������������������}gJR�ysqqpppnoosqoonnnmmlnjjlggiiiihefhffifddbadfheffecdffffggfdffgedgdddfgefghmihkkimljkkikmkjiijfgfjhfhhgedhihljkiihhgfgfdfegjjjigihiikfYO^jokmgedc[RLYdac^cdeciomggj_dejmefbelwVBDNPMUUg}{Rt�ULSr_d��Y];9hLK�|C;+8g��WZ[icZPCMT��S;-8������}��gA75641.=cUEFd[ERPktB9:;;@OeKg���C%7F>;<:6605b{n��pc:K[irdj�h*WY~����q/`}�����j*gc[ehcO@<ijf^dnnXFiu�����=$MA9:;>PpULo�bbP\ZD::;??9_THEBDD=0Q\bhigmZHOSTZbXO:{�������������}���������������~�������������������������������������~{�}||}}|}���~x}��}}~���yrnompqsyyaglikrhgkpu{���tw�zvwvp>M^y�xx�s4Ykzsr|�I=i���}��-RJ:416<@?Rjnoor\Dc�����Y&3Jy���iTA�_8775459>\�¸������t������r��idW\������;Kn'49=?<*.Ic]>3)*S�v:IIKMQ/0o~����6-Sna?BNXgncbWDQ������d;j�����7<~�����:DYbhop906329C:<?Ip����e?Q�����S:;3+**/09Tlknw}AAftmUA1,+*&.815D:3258CHKTA')00-*,/,0I^floortussuwwxyxyzxui+$<1("	.I[jY=HGUa% ('&()*/\|zW5UYhnP2*(&!&JNPR7&#"" "@dovne5ECRNTXe@
+����������������������������������������������������������������������������������������������������������������¿�����þ���������þ���¿���������������������Ŀ���Ľ���ƾ������������ž������������¾�������ȿ���Ž�������ǽ���½��Ŀ���ſ���Ļ���û����������������ľ��ɿ���ƿ����������ÿĽ������������������������������������������~gJJ�{roomnpqpquponmopnmlollmikmlnqomlkklnmllkijjhefhhgkiifceffecdfecfaeccfedcegbghhihigfedeggeiigilkimnnlkjjjhgnkklhhddfefbeefgcegffffeeaSM_lnkmffee_UK[abf]cdddkomfil^cfjjffadlxWBERQLTUg|yd�hSLSpah��ca;8gNImpB:/;g��VZZhfZM>JS��S;,8������z��gA855320AfkNS�nDPPmsE;PI>AOdLN�}l="4J@;;:7707bvk��pc8K_kpfl�m0Z[�����n3`������h+gd[a``N??hjdZ`lqZIhv�����?&OA;<9>OpQDl�eaQ]YB;;<??8aTIEACD>1S[_bddg[GMSWY\XU<|����������������������������������{�������������������~���~������������}�����|��{���zw~~|u}~���zspponnt{y_hjjpmedhirv���yy��{vuwr>N_y�|t�p4Yfirv}~I?k������-RH8315;AASmnnpt^D_������51Gx����hVB�Z?87678;B]�¸������v������u��idX`������9O�h(59?@=+-;C?:6+*R�p<KMMOR12n�����8.SmaBEOX}�o_TCU������cBj�����6A������3DXcipp:2862DX><AKs����d=H�����I6;2,*,01;[fR\u}=EatnTA3-,+',512@53269BIJL?&(.0,*-/+0L]gnnpsvustvwwwwxxxyub)$:/'!	0=RkaOsklc'*(&()+FhkbV4Nkh]M2+*(!(fedsE3/.1'!#AhogYY4:fdICHZ?	����������������������������������������������������������������������������������������������������¾��������¿�������ú���½��½���������������������������������ÿ���ſ��������������������������¿ſ�������������������ɾ���¾������������Ľ��������������������������½��Ⱦ���¾���ÿ��Ļ���Ž�������ƽ��÷������������������������}eK?zoca`acbefghddbdeefeefeeeeegggihgihgijmmmlkkjjikkjnqnpnllljiknjjihhkkllkjkkljjklmkljjiigghefieefheehfgghffeffiijgfcdghfeefjljjffeccbc]QN`nphkhged^UKX[`c`cecammjhkl`bdhhgfaekuU@GORPVTf}uj�TVOQn`g��_a74bJIfdD908i��WYZmi]GALMs�H:+9������x��f?:6753.<fui^�eBMNib?>fT;@OaM^|gw?!4D@;9::906f{q��m_9G]kriq�j1T[�����o5a������i*jfWfhcN=;hjb[_klVHfv�����=*MA;<:=MnUC_e_fR]XB;<;?>9^VIEACE=4UYY[^[bTJMMSYYVN<x�����������������������������������~������������~���������~����������}�����}��z~�}|{�����zrnoonns�ekikujbeehqx���}z��zwvws;K^z�nr�n7]fjtw�CAj������2QF8325:?@RnonntZ=_������+3Fu����jSA�V@7767:<C_���������w������v��jbW`������5M�d(9>@B>,-;JG:6+,P�r@KMMNJ03r�����2.Xn_BCOY�o^UDX������^Ak�����4D������4BYcjpn84950G\?<AKt����a9=X���d85:1+),13<XeZXw|8FevlT@1,**%,61+,/3137?GGI;((-/,,-0,4L^hmmpsvuttwyywwxwwys\)#:1'!
+8RgkIGXNnc$!+&&(**F\BNb9F_Z^U3+)) 	"TNHaA>=CU8% De_F@P02M]PU]\= 
+�������������������������������������������������������������������������������������������¿��¾���Ľ���¾���½�������û�������ý���¾����������������ſ���ž��������������������������ż������������ɿ�������û���Ž���������������������ƿ�����������������������������������������������ľ���ż�������Ƽ��Ź�������������������������dI5nmc`\VYT[\Y^]__]`^affffea^]`_ab`^`_\`cdgfedaacegfgikifighhdcdffgfedeghkhiijkkjjjllllkkonmommrpmnnnmmokihihggfihgegeb`edcabachfccaa_^\UEJbrqijhggd]SKY`adacdhajkkhhj`cdhige`ditVAHRTTZUe}qi�]VQRoad��^c:=bLPqmG;09i��XZ\kb`FEKLz�L9,5������t��fA955630=fw�oxcDLQdV?AnT:?O`Ni�y�B%3C@;<==9/7evs��p`:J]irhq�g0SW�����m2d������g,ogWihcN?>jjb[`kjTJfv�����<*ND;<;>QqQHe~haP^XC<<<?>7_WIDBAD<4WURUXX_THMPUUa_U?z������������������������������~�����������������������������������~���{|�~}~�{{}�y}�����yqllnolp{{lidefbjgb\nx���yy}�zwvuq=M^}�u~�k7^ivhgv�KDh�����1OG83139>@SkolmrV=b������2:Kx����eN;�e>8888:>Ic���������z������t��ibY_������5O�d)?BAA@1*<EFA7+,R�o>JKLNO0.w�����11Wn]DCNYy|m\VC\������^Dl�����5B������4@Xcjol85:61BW=:@Ky����^484AL?726:0**-22>Yledxx8FhxiRA1+)*%.50+*-3148;DGH<&).0-+-/,4M\honotvuuuvzyxwxyxxs^'&80(!
+:PTPAA_:WS"!+'(*),;jipd<Zukr`2**(!	 Mswc/HI_jL)DhX@LX2)KhSMT]<"	����������������������������������������������������������������������������������������������������¾���¾���½�������ý��ľ���¾�����������������¿��ÿ���ž���ÿ�������Ž������������Ǽ������������ȼ�������ü���ƾ��������ÿ��Ŀ�������ȿ�����������������ž�����������ÿ���ſ������������¿����������ǿ����������������������������eK2u�|sjggechiljhheffeknnllgbaeaab^]`\W]\Z]^Z\YZ[[^__[[[XVTWVWUUTQVX[Y[\[[\]__`a_]^abfiegjgegfehkillkilqoompqnmlnlfhligfghjhegfiieffgfec[CGaooijfhfd\QO\_adaccf_kokhgj^beihgfceivZFKQTVWPfpX�yWPLn`g�xed9:_KFPM?>0:j��XZ\g[aG=IFTM;8.8������m�aB:78720?gfqmxcCLScN>?mN:AQ`JI\ZW8#4F?:==;8-:fus��t^8M\lwx~�c-WZ����m0g�����g.mjV_fYK=<hiaZ_lkREfv�����=*KC::9;SoRJm�h_T]WC=<<@>7`THDABD;3VUSUXY]SFPQUXrgS8������������������������~~�����}}��~������~~������}����������������~~��~}}~}y{�xx~�~z~��vpmmnnlowwkga`hgkld_p{���vx~�yvvtm?O[|�yy�m<Yhn]csF>i��v��.RF8321:=AVpnlmpSAa���}m*,=Ix���gP>�[::::89<Dc���������v������v��ecX\������1T|b&NKCA>*+DVY]A,+R�k>KLLMP1/q�����2,Zn\EDO[x{mcUAZ������`Fj�����3G������5FWckno54:61EW;9AL|����]561-+-00690*).20<^iacvs7HjwiQ?1,**%.80+*/2269?DEH<!&-.,+-/+3P]ionptvvvuuvwywwxxxr_#%7.'!	3PeY@HpikS  (()))+=`rw`8HNSfT1,*'"	(Jnh[,FVaWM,Cfk`vs6=W[>JHR5
+	����������������������������������������������������������������������������������������������������������������¿�����¾��¿���¾��������������������������ÿ�����¾�¿���������þ�����Ƚ������������Ǿ�������¿���ʽ���¼�������ž�������ȿ�������������������������������»�������ƾ���ž��Ǿ���ǿ��ȿ�������������������������������}iI2l��wnoornqpprqqttqruuvruqljfgfiikihehhggfhjhefhffd\``ZZYYXWXZWV]]VTNMROTUXUTTPSUSUVZWZ_^_]]^`b^a_``_`edgkmigfeecdefedgikgfigggegghjmobIH^pmhnihed\SP^abdbccdbjnkiii]adiiigbffyYFLSUTYOg�uRj�XOLna`kkf^88aMCB@?A2:l��WY\dTcI@L[�{M<+7������ku�c?98871->fimu�eENVfQ<@hL;>PdG@<<<4#3DA<=;::.7gzm��nb8J]nww{�e-WZ�����m7e������h-jfPPSNK?@jjc[^mjPGhv�����9+LB;9:?RiOKn�i_P][A<=<?=7^UIEAAF=3WWVZ]]bWHPTWXnfTA~�������������������������������������~���~���~��}{z�~}��~}~}}~~}��~���~{~��|z��tonnnpoptrmfb]stmifbv}���ut}�wwvwp<N`~�|v�m4\q�ijo�FFl������.VF9434;??UnnnmoSCd��~��]&2JO}���}iL=�]89:857;D`���������z������w��hcTa������2Q{a)LFCA=*/R]hwH*+V�k>HKLLJ00r�����50Zq^BBP^~}l`SAX������XHn�����2H������.FZckpn42942AO;;@N�����\761,--106:0**.21=^iXb}u<KlsjS@1,+(#,6/,,.1158@EEE6$'..,+/0,4O_jmnquvuuvuvxyxywxxs^#(;.("	:\pv]=Kmrf& ))&()+9F]uo>`ijmP1++("
++YOF^@?PQUP& Bcr|�{7DJE@S\U4		������������������������������������������������������������������������������������������������������½��������þ������������¼�������ſ��������þ��Ľ���ǿ�������Ŀ������������Ľ�����ý����������ľ���ü����¿��������¾ľ�����������������ƿ����������������������������ľ�������ɾ���ż�������ȿ�������ȿ���������������������������hG*Hdf]^]VXWXX^cdceghhhmnjnjecaabeikknkngginnkjjkhjjkilmopompomlijkfhhkjnnhahfeabYYVV\[VRWY\ZZZ\_UYSIPQMMPPPNLPMWTSVZZYWXZVUVYXVWY^b]_`bVFG\roijgjfb\NI[``eacaccjlkijg_achihgbgcwWGOWTTUPe|tZ_cXPRqcfffe^<;fLCA??@/9i��W[YdTbHEJ[��L;-7������bh�`?76563-?iqhk�gCNThR=BjR:@P`D>>;86#5H@;=:89/:gsn��n_4KZkqnw�c0UX����i1d������e/khV[`TM>@kke]alkODgu�����5+TB::;<QpNEao``P_ZB<=<?>;bXJFBCD<3VWXXZ\aVGNUZ_xsO<}�������~��}���~����~~��������������~�������~~�|~~}|~~~||�|{|z{y{zzz~}z|�}{}~~{{~��|nmlmlonnrpld`\ikhgdgu|���vu~�xwwtn<L^{����k3\u�vcr~?Eh������2SG7456;>ATmonloM=a������6.INy~~�{eB@�W:87655:CZ��������}y������u��gaSb������/Nz`(EDBD@,*GYdsE),W�h:JKKMJ-+q�����2.Yl^BBO\zzj_UBR������UGq�����0F������-DZcjqi418436:7:@I|����V54/+,,0/570+*-219[pkmyt7FiwhR?.*+*"-50-233059?CFE7%(/.-/20,4Naimoqtvttttwxwyzyxyu[ +=.($	BcaTB87Hl]!"))')*,CQev]6]_hsS/***%
+&RRUaAIebe[+<blz�x6C\T;E_f:
+������������������������������������������������������������������������������������¾������������ÿ��������¾�������¿�������ľ�������ſ���¾���þ��ſ��������������������������ſ����������������������ſ����������������þ������������ſ���������������������ƿ������������������������¶������������¿���½��������������������������gH%0CLNVQNLIMOVXZWSY]Y\]\VSSULWWZZVUW]YVWVYee__cdceedgjkkqrppnpnmojhgiljorqkkmklpkjlmnicdgfeedefefjf`ifdb__^bba`__VW[[ZUWSOPUQNGGNZ\RQOMH=C_uqgffkhc[PKWZ^d_^`eekmlkie^abghggdegvXIOWWYWPf|tUWWZPPoacffg_<;bLDA@AA2;i��Y]_hWcBCJJ\^B:-8������`c�`>855631BffX]�fAOVfQ<DeM;>QcE=<;9:%4E>:<<;:.<lws��qa5O[jpir�e0X[�����j7f������d0no\mnbL?BflhehnjOGgv�����7,X?::<>PoNFUY^_Q]XC==>?;5cWIEBCF<4VXWYWZ`TGOTY]qlK?����������}���~}��}�����~�~������~~�����~�������~}�}�~��~}}�|}~{{y}{zz~{y{~}zy|{zv|~��{nnmllpnotoiba]abaddhw|��|wu�ywwunCQ]|�sy�j2\w�����GFh������0QG8467;?CVmponoOD_���{��,<IjdjptgEE�Z>86788;AV��������~z������x��gaY_������)Uv`)GEEB=)+?KKK:))W�e9IKMLI00s����21]r]BBN_tujbUCZ������VBq�����0H������0CWdkti338210359AKx���P64.,--0.5:0-)+12<]tvm{t9KjygS>1-+*$1924:86459?DGE8$'..0672*4Pakmpstvuutvwxxy{xyzxY"':/)#KdceKFjhu\"#))(*)+Hb`aO8ZTM]M1+)(!
+ Ibrj<<ML^Z."1Zitpl2!KkhA0Lb:������������������������������������������������������������������������������������������������������������������������������Ľ��¿����������¾�������������������Ŀ������������ÿ�����þ�������������������ɿ���ļ¿�������������������þ�����������������þ��ʿ��������þ�������Ļ�����žǾ������������Ľ�����������������������������gK+SbYWZVRINUW[XXWTVYV]YVUUSXY[ZVRHFHHGROJRZ[TUXY[\ZXVWZ\^``^__^^]ZXXVVSSRUWUXX]^]````__a_]__`beehfhnokigkhnkhjdbffheebad`afa`^WUY\_[ZWVMA@[shhiiklb[UNVX_d`Yahgihhhgcaaaceffc`fwYGORRSUNf������aKobbighc8<cNDB@@B/;m��Y]\iSeHCKW��L7-9������YZ�]?976531DcWQ^�`?MR`N?C^F<>PdE>=;;6#4D><=;<9*9iugkecb7I]lnes�e.U]�����d2d������a.hmUhlbI>CgkgdglhKEfx�����3*S?8:;@QnMDTZ^`P\U==<=@=8bXHDBDG:4XVWXTX[OCNTRVkbG9z~��������������������������������~�~~~��~�~{{|~}}��}~~�|{|~{|}{{�}|}{x}{{z|{xu|}~�vsokjnoptpjeb``aabdjqu~vuv~�xvwulAOX~�ys�h6[o�~�~GBl������0QF8579=>DVkomllL@b��{��\:K_t�mng>G�]<;:899<?X��������{|������v��haWa������%U}b'AAC@9'+?PH@5*+Z�f:KNLIL15s~����20^q_CCO_�~k_REY������YDp�����(F������(HVbjri558500249?Lu}}}�R64/..-0.4811..21;_f_fzm5OnzgR>0,+)#/837;98479AEGE6#(..2893*6TbkopsuvvvuvxxxxxwxwwZ!$:1("!HWQ]LFehsQ#*'((*-Ff_TD4^hihN/+*&
+&CO`sCGXZaP)"1Ygttm5%[^JDO\b6��������������������������������������������������������������������������������������¿���¾��ÿ��������ļ���Ŀ������������������������������������������������������������������������ļ���Ž�������ſ�����ȿ���Ŀ��������������Ŀ��������������������������������������¾�������Ļ��ȿ���ȼ���¿�������ź��÷�������������������������jO.oxidfgefhfjihgefgffhdbcfefeehd`YZ]ZZ\ZU[^]ZXWYX[UPTXVWVTSSRJHSPNSROK=IIGKJNMMJMNJJPOLOSSTUUWVWY[]^\\^ZYWW[ZYXXZ\__`__^^`a__a`^]\^\ZWTJA=O^Idkljl`[VMTW^`a]bgbihfgieba_ceegfbfqXEGIGNVRj������xJng^hghc<;aKCA@?>+?l��Y\_pWdIDIM`[F8.9������\_�]@:76641AfMKe�`FLReS=>C=>@MbE?>==9#3C><<<<8-;gulpqfa3J_mvw��f/WZ�����i3g������c4ikYkncJ@CjlebgnkPGhy�����6)M>::;@RmJFX[`_NaV==<<B?8_XIDACF<8TWWWXZ[QCMQRVsbM>z���{��|~���}��~������~�������~}�~��������������~}�}z}�xy}~{w}}||}}yy|�|zz{|zy|�}|��zrnpmmnospifb^_`bbcdhn{zssv�yvwvmAL[��uh�g6[ieRWsyCAj������/PE7698;>BXonmlqM=e���}��.8IikmgrfAB�X?==;;:<BY��������w}������v��j_V^������$[za);?@@=)-@H@52*'Y�b9MONLN,4v�����.1]y`ADSZw{h`QE^������PIu�����.L������*IWdjrg04950/36:=Gv|{|�S740--+/.6948A2005dm_hyn7QjxiP=2-+("/817>=9469@DDI3#(./2:82)5RcimpsvwvuuwxwyyyyyxvR';2'!DM464IffrI0/'**,-=_u}`4PgtuT2+)'%4:iq>Z]\sW*!"4U\n�~;'VWK?Pja5���������������������������������������������������������������������������������������þ��ÿ������������ľ���¾�����������¿��������¿���¿����������������ľ���ƾ��������¿�����������ƻ���ż��Ŀ���ƿ������������û���¾�������ƾ���ž�������Ǽ���Ľ��ž������������������������ƾ��Ⱦ���ȼ���½�������Ļ��Ŷ�������������������������hO0ovommnnmqqpoprooqpqsqqutwxustrrourqsmnponklommnoopmmmjjlh`]ZW``ccbcb_`\ZaVTOFFFGGLGDBDDGJPMEDGJCGIJHMKLPNHONMPQRRQUUUUTQNPSUVUTTTVQMGC<;NjOgcaX]]RLAOTYZ`[Z]Ghhccgb\ZUacacfceo\Y`RLRWTf������oKnmcfd^Y8<dOEDDA>-Ak��\_`r\fD<GMQIE5)7������\j�\=965530EjTIR[EEMRgX<<:;>BPaKB@>=:"5@>;;=<80>jwn�mh:J_m{���d0XW�����m1g������`.ko`mheM<AiohehmfKKkw�����3-J@98:@NmHDZac`McUB<=>C?8]VID@DI=8UXYZ]^^Q?MPOM_WLC��������������������}~�yy��}�~z|�~��|���|}���}��|~�~|~��~}��x~�|||�z|~z||{zyz{yuz|{s|��vnjnmmlmnngdb^Z_afc`emwytsv��yvvtn?LZ{�nt�i8\hjmls{GFi������/QG7687;?EYqnmmlNCd������28Mrihszi?<�W;;=;9:;Dc��������yz������y�j`U]������$\u]&:=ABA+/JgS62**[�a9MNNNO'/v�����2._v^@CQVmj_bPB\������QHw�����,I������.H\elsi237430379>Js{{{�T760,,-0/6:4CV71/;ah^cvl8MnufN=1-,*!/915>?904:?DDC0#(-.1882*8ScjoprvwvutwyyxxzyzxvR)80)"	 LkklKHnmvX)C.(**,/TtvmX5?FgnP-+)%(GZe].KI?TM+ !9bD@�~7'PLGBM^[2����������������������������������������������������������������������������������������¿�ÿ���������¿����������������������������������������������������ľ���ľ���������������������ǽ���Ž��ľ���Ŀ������������ǽ��������Ŀ��ǿ���ƽ������������Ǿ�������ɿ������������¾���������ž�Ⱦƾ��������������¿��������������������������}lN.kqpooqopqrqpqtnrttstvxzzzyxywyyxyzy{yy|zz{{{{y{}|{||}{}}xywkkmptwx{|xzzz|vvwvmbhhcbffhfhgebac_^]WWURRNUVQONRPKLJHCLKJHFF?ABFINIIIM>BA:22M[Oa][LPVFF=MQSJWYQaRc_]Y_\TSPX_\]fh_cZbreUUSQZUeojjjUMe]fxbWQ5<aNFHMHC,Bc�rec`gUlI.=LMB8.(+OfkkryEr�\D;:976-Ci]KMQJJOWcS=;:;ADTdGBA?<=&7A@<?=<;3?mx_jjee4Haq����b1W^�����d1g������_3jmZkhZJ8BjonnnofIKgx�����9.K?89:=RqMFZfgaLaU@=>=D?<`WICCEK@8[Z[]]`gN@MQRT`YM@��~������������������������~�~�����~~~�~z��|}||~~{{}~~~|{{�{~}}|�}~}~}||}}|y}||z��~ronlnnnmlnjhg`[dikgcfnuywss}�xuxvoBO]}�r~�`8]k|�y||>Eh������0TI7486;ABWmllmiICa�iq{i  7NphRgqd:A�Z<=:978=Hd��������x������y��g_V_������&fu]#:>AA=)2SkH32)*_�c:LONNS.5v�����/-]n\BGQ^��f_PE_���}��MDs�����+M������,G^elsi237624669@Nv~|{�U75/---0/6:1FT611>`dIb~l4OovkQ=241+!0526><7149@CCA2%(..1891*:Wgkpstuvuvuvxz|zyxxyxO(=1)! "$$"" 	 EXQlT\pntQ05/()+*1Sf_OE8Z`kvS/+*&
+
+(HUKJ9A\V`O,! 4_fn�~3#J>2<[cd6��������������������������������������������������������������������������������������������������ÿ��Ŀ���¾���ľ���Ŀ��Ŀ���ſ�������ľ�����������������ż����������������������������Ǿ���ý������������ƿ�������Ⱦ�����������������ƿ��������������������������Ŀ��ȿ���ü���������ſ�Ǽ�������Ⱦ���ÿ��ż��������������������������~mO/oqnoppopononmopqqprtvy|}}}{zxz{z{{|{zz|{y{{|}||}~����~}zppoty}~������~{sonostwxwx{wwzuuutwtronjfcbbfe`abac^cb__[]YWQVX^]^a^U`_\U[`ZV^`_\^]UXR[WTP^_U`Q^[ZLRZOT\XVVUgvXXRDLMNU]NGLJVVLTRO[VK[UNSHGTIAKJCC5GWYVdYSOBoO<4=80($&&7:C?/3*m�ZD@A=:90Ef[KOQKNPVbQFC@@CBRlUGCE@;%5A><=>=;,9p{`b`ae6L`n���`.W\�����d3f������Y4ijYln`L;CorprrphIJex�����4-MA:;<;PmOK\dd_Q_X@;;?B=;`UIDDFI>6WWX[^_dJDNRPUodK<|}��{|��~~�}��~��|~������|}}�}���{z��~�~|~�~|z~~~~|}zyyzz{|{zx}|}yyz{xz|~}{z|}{{~��{mllkjmklllhhgcafhhd_dlrywrs}�zwxvnLSZ����_5\t�xizyCDi������0TF5488<@@XjnmmlHC]�Y0862 ;K}nppf<F�^>:99:8=Fi��������v������w��e\X]������%iy^$6<?@9)/HR;4.)-c�Z<OTQOK(3u�����22\qXAETa~xdaNC[���}��MDt�����-N������)K[ekqa/48439=:;AMy�~�Q75.,+-//69/CF40/Ableo~d1VqteP>3=:-"2925=;535;DMIE2&)/.187/*8Wdlqttutvvwvxyz{zwx{uJ&=1'%:D/4>,#
+ C_UeEXTYvP#+.((+**`slsX9dqyrM0+)&	
+%?cmf?@XppX)# /_qx�|0+\^XMZc\3
+�����������������������������������������������������������������������������������������ü�������¿���¾��ÿ���ƿ�������ľ���ľ�������Ŀ���þ�������ľ���ǿ������������ľ�������������������ÿ�����Ľ������������ſ�������Ľ�����������������ľ�������ƿ��������������ƽ������������ſɿ�ƿ�������ʾ���ž��ż��������������������������nN0jooppqqppnnllmnoopqtuz|}}||{z{}{{|{zyxz{zzz}}|~~���������zoqruz}�����������}xsrrvwyyz{|}~}|}|yyzyusnmjkjikljhihjijllkputvtvuw|}|{|�y{wtsrqvxz�{zx{|tqrnhljhhijmgeiffejja`YV`XU]g[TNALPFK[SSN3EDGHQKRL;@=AMNPRF;KGIONsx`=EDC?60/;4BHHDFz~JACJB?A7H\VV^VQUPRYR[g\UUHMaUgkUUP(5@BEHGF?,?fmok^[]6HYgz���a6V`�����b6g������V7ov`ll^K:DkonrqqgLHjz�����50PA;:<ASkHF\df^Q]Y@<=@C@?`VHFEGL=8XWVY\^dKDORPQSRID~���~�{�|~���~|��|~�||~y}~}x|y}}y��{|�{|}�}{~���~��~~}{�|{||yy~z{xzyyxz{|wxz|}z{~��{lkkllljlomjkkc__cgd_cirvvvx}�xuvvnJP^�����a7Zrd_s�}?Im������.VM4799<?AYnpnnlG@\�X397/;Qx|yw|h?4L�T=86A?5:Cj��������t}������x��b_X[������ gt^)7;>@<)0LO:41'+d�^D[kaRM*9x�����+3_pV@ET_}i^KA`���}��HJw�����+L������+K[hosd02974BI;;@Jx��{�Q62/-*-..59.AH53.Fatux}f-YmxeP:3<:,!6;24;:515<ELKE2((-.164/*9Wdkpttvuvuvxz{zzyyyyvM':/& ";j�WfyP2#
+&@]rp6NN?`L'+((*+0`Q:>=7\\khC/-*'
+
+&PgnpB*#IhV)# 5crlkg+&RT_SKW^6�����������������������������������������������������������������������������ý������������������������ľ���������������������ſ���¿�������ü���ÿ���������������������Ľ������������þ������������Ǿ���ÿ����������������ɼ�����������������ſ������������û�������ɿ������������Ⱦ�������ż���Ƽ�����Ŀ��Ļ���������������������������jL1kmoprrpnnmlmlmlllmnrw{{|{|{|yz|yy{zzyxzyvwy||{}~���������|qrsty|�������������}xrqrwxyz|{{|��~||zyttojkllkijlllpoortwxz}|}~~�������~}~~~���������~}z}�{yzx{~|}}{|||zx|ztyvx{xvtstuptspnib\_][T[SVTKPRJIHPULFIOMSNf��{{y����~��������WA<@GHFUOMUSRWSSXNLQNR_\S]VR\J[hTZa669?Pd\UC.ATLtr]SN5AMPX^`\F2OVgu|�W8h�~���V4jvVYQSK9CkmgjklcLNiw�����.0OA;::?WmHJ_fn`K_W?<>?C:=_SIEEGM:7YWVX[]aNGQRQONOG=����~����������}~��~}���}z�~x|~�|{�~{w}|}}|~ox��|w{~~{}|}}{|~{|~�~{{}}zzz}{yz~}|x}~}yy~}|�ymjkmnnmmspr��bZ`aeccehlkort|�yvvtjGQ^�����a<\w|tj|z8Bm�����|,ZK68:8<@CYklkkmED^�V8XK0:Po_Tdue@5EhM:35>;16=f��������t������z��f]Qb����� iz\'9?MO<)0NjN82*/g�_C_mbTQ*:u�����/1]mTBCR]lpY]KDb�����CJt�����+O������'J[mvsd22767EI;;<K|�~z�S52/-**.,37/;A32-Bftuwza/UpxeO:5A@.!383397216=EKKB3(---0250+:Wcmprtvvutvx|{yyzyyxuL*8/& />O��y��z;+"	%HIatCRa]kF'*()+,-bjW_E;oqwsG+,+'+bbSL1APVqZ+""9_jc?D'%O]WKDDN*	������������������������������������������������������������������������ÿ�������������������������������ÿ����������������������������������������ļ����������������ſ�ÿ�������������������������������������������������������������������������ƿ�������Ž���¿�������þ�������ʿ���¿��ɾ���Ǻ��������������������������������������mK/kqooqsqpomnkinmnmnpsvyyy{{}yxxxvvwvvwxwyzyzz{|~���������~wupsw{~������������}}xqsuvxz{{{z}������~|||}{yvrrqoponqprttx{{}}~~������������������������������}���������������~�����}|�����}~}}}xywusokmmkpnllknnlnkepxragenigmmnhjkkmgbTREIONGRRHTNHNJJSNIOGGOTNURLYB@DDGQC99>IMJD=9GULW_VOMFDCDGKMLA8GJPV\ZSGASU\o`W\C=]TPc[PM>E]^_de_ZJQaclonlo)9K@<??C[mRNT\s_G_\JD?BH><ZUJGGFE98Y\]_aaaQIQSOMOQH@|�}{���}~���~~�~|����~~���~�����~��~��~����~|��{z�}{|z~||{}zw|}yv{|wy|{xy{~zy}�}zyz~t{~xpljljjjkutv��g_]bgfb_`\]lru~�ywuvmDP]�����];]vuol{@En�����~/RE68;9<?CYponooFE_�U6\G1! 8V~yrprg<6B`I801?</16[��������s�������z��l_Ua��io��b{Y(?Ffe=(1LVD72(/i�]A_e\ML)4|�����-4`uZ=DPW\\_aLA[������DPv�����$P������(K^pyve10887AC9:<Lz�}w�N73-+,-0-59.DD512Bcrsv~a1Wsq`L:3A>.!6;2386303<FMMC1(-/,114/,9\elnrtvxwvwy|{zxyyzytL,<1) 8oJ|�n��qML%#I_p]G4=c�R!/.)*,,,hqm~_9klnoG0.,'-egeg@PhovW*"!0apoZC%&Mk[?HLT-�������������������������������������������������������������������������������������������»�������Ŀ���ü���½�������ƿ���½���������������������û��Ⱦ��������Ž��������������������������ü�������ý�����ž�¿�������¿���������������������ƾ�ǿ�������������������������ž�����������������Ƽ���Ŀ������ĸ�������������������������lO3mrsqqtrqqqsqqusrqppqsvwwwyywuutsssqsrqtuvwvxz|}����������~{xtwy~������������}yusvvx{|}|~�����~~}||yvwstutrqqtw{}�~�����������������������������~��~��������������������������������~~�~�~{zwvwwuxz|{|}�||}}~{z|zxy{z{yuutxqqrrpnkkjgijha^UXVRPPMENRTRVLHL:BEFDECA<@CC>>B?IJ=HJKILLH@>>ACGJDAFILMJKIL_^GWXVTMJXQHPSMPNMORW\`VSKR`XW_SKJ0=MELSTZio[RJM`VM`WSXURK?CXSSZLGD9;`]^`a`ePGQSPOQSNF���}~�|�}�w}�|{���{��|}��{y}~�{|�}~~�|~���~|}�|{�~}~�}yzy|zz}zyz{{ywww{zyx}zz{�ywwxxv|uljijijjlrtw��pf_emrcUVT[kqw��xwuunDO]�����^7^vnlk~{>Fl�����~.RH97;9<>@ZpnnojF@^T5SD2 "8T���{vd;7IsK831B<..5X��������s�������u�}e\Va��rz��]Z)BOl`=&1<C?72&/h�Y?bi]NM-2}����~(1arY>EOSX[[\HH]������FOw�����,R������)L^n{vf.3855BA9:=Oy�}k�O52-+,-1-450CG0/.Cgrqw].Vij^M93>;,!5922641/5>GNLI2&,//234/+;Zfknsvvvutvy|{zxyxyysM*:/'!E�x~~vh>"#:njM:I^ymA0/'(,,.]YH_V:j[lxK0+)(,bQ=E/CU\fH-!!2f{��['-Kgb:=Uf/�������������������������������������������������������������������������������������������ý������������ž���ü�������ǿ¿�����ſ�����������������Ŀ��ƾ��������Ľ���ý��Ŀ���ƾ������������ý�������Ŀ��������ž��ƾ���Ž������������Ǿ�������ʿ�����������������������������������ǿ���ľ�����Ž��������������������������������������oO0|�|{yyxz{{z||}||{zxwwwtvususrrpqssprsrtsrtsuxzzz}������������yuw{���������������{vsuvy{{|~�����~~�}~}zywvwusrtz|~���������������������������������~��������������������������������������������~{{|{|{|||}�������������������|{}{zwwxvwvsmlphdacaabbaaaca_[Y^]Y`b_dcdcac\]`]UHPSPNPTVIACB>HOMJFBBIDIRPUHCQUNQPNLDFMNLRSNLKPTTRWRQbVKUURQEFKBCD=FVf`ppf_TT\UUg[SOCK[TTfVMJ>ASQOXZWUJLSTZYSXXF���������}�������}���}�}���~{~}~||zy}~~{|}~|y{}y|~|}|~~}||{x{|}yzy|yzzzy}z{{{uwzyxx~�ufhjiklklruz���{npvjZVUXamrw�zxvwnEP`���~�]8bpdhgx7Lk�����z,TG67:8=@DYpoookGC_�Q0UF3";W����zh<:OzN665DD007]��������r�������rvvh^Xb��lu��]}W)=Emd?)2XXI73(2j�ZBalgQI/9z����}-7cqT@ENQSSTVAGa������COy����,O������-L_oyte-3865@@:;?T��~gN52.**-.-452@K11,Dfqwz{R3Wc`YL86>9-#690254218AJQMH4)03/2650,=Xflnrvwwwvxwyzz{{zzwpL*9/& Pqp��lpB"	%Kijb9;BLJ6)-,)++/Px|wB=dBPd?-*))(`YFC-ZmosV."%Bjz��o&)XiZCA[O)����������������������������������������������������������������������������������������������½������¿�¿¿�ſ��Ŀ���Ŀľ�����¿���ÿ���ÿ����������������ſ��������Ŀ�������Ŀ����������������������������������������Ž������������Ⱦ�������̿�����������������ľ����������������Ⱥ���ƹ���º�����������½���������������������������nO5����������������~{yxtrqqoquvwwuwvtsuptsuuuvvx|}�����������~{xy~���������������{wusuvw{}~~������~|}}}}{yyuux{~�����������������������������������������������������������������������������~~}~~~�~������������������~���~}}|zyutqmjjikjljkkoopoqsqqpprrnrorpoqqrrqsqusrqssrpprnmmnprlrpjnnjffgcfdca[][]TXU[RD=DDJVRSL?GHILVQPE6;@AHOIKK<=?BCR[Wgsi\PUb^IIOSUNVm]M\USSLJMNKNOORFLXPW\TRFL{w|}}~{~|~�}}���~}����}���~~�����|�����~~}~~zy~|z{}|zyz}z||{ux||}wz|�zz{}|{xzz{v{}{yx�ujjjhhknmruz�����sdSUUWX`nsu}�xvvufBRa�����V:^ko���w4Gk�����q0RB5797<@B\qooniEI_�R4H=0!8Y�z��{c79OgK::3<=.4=a��������m�������kprh\Vf��fm��W~T);BokA.0>:660)3k�[@Xn_LH,8z�����.0dkSBDMPPPNM:D^������DL|�����'Q������/L]nwtc+2863::8:?U~�i~M50.,*,/.34.06/0-Ddptv{P4R_ZVH64>9,!480176416BLQLC2)1415860+=Yflosvvxwvvwyy{zywzyuI*3.& B���gA"$HD4WF>>991(+)*)+5XPitA9\`]d=,,+&	&mpmyCR`[cT,"'Gow��m) ^cN97<7"����������������������������������������������������������������������½�������¾��������¾�����������Ŀ�����������������ƾ���ǽ������������þ�������ſ���ſ�������ƿ���½�������ſ�����������������ž������������ž�����Ŀ���������������������ɿ�����������������������������������˼���ȼ���ý�������ž��½��������������������������nO6�����������������������~|{||�~~���~~|{zyywwuwyz|~�����������}zz{���������������|yuttsvy{~������~~}{|{zzxxz}�����������~~�����������������������������������������������������������������~}~��������������������~~}yxtpnqsqooqprrsttvwwuwyzwxxyyxyxwvwwxxz|{{zz{zzxyyvvuvwuvusuvxwvwwvtutvtqvpttronpolomknjifheaebedfd`ZXYYUY[S]ULPORLYYIDOSPJOQDCIJGKMW[PNURNNNLNLGJLNRKL[GNRTPQ^yz}~|~z~~~y~�z{�}}z}��yz~{~�{~�~~}��}~��~~�~{z}�}}~�~|~{|xz{zt{}{zx}}yuy|}zyz{xwy|xx|~woljjghjlptw}��|i_`USWUXamtr}�zwwugDSb�����T;]s{���t8Jm�����q7QA78:9=?D^qpqqqFE]�Q6VH0!:Z�gp�{d56J�Q><4<92;Ik��������p�������qzxg^Xc��qp��X�W,<EukF1 -FIB85)1n�aBYh\KK)6y�����(0hnRCDMPOMLM9F`������CPz����})S�����+O_pyu\.3754:979>R���t�J61-+,-0-45.++.2/Ec]Xl}M3OZWSF54>7+! 462597527DNPKB2).21577/-A[flosvxxywwwyz|yxxzzvG+5/(!_��G'!$JclmC^|vnD),)++,2`\nmO8EXq}C.++(&SOEc?US[sY)!%@kpv�_&-XhS946,!�������������������������������������������������������������������������¿��������������¿������������¿���������������������ʿ������������þ�������ǿ��������Ŀ�������Ƽ�������Ǿ���ÿ������������¾������������Ľ���������������������Ⱦ���Ž������������Ľ����������������������������Ƽ���þ��ɽ�������Ž���������������������������jM3��������������������������������������������~}{zww|{}����������zuz}��������������~~{wrrsutx|{~~~~}~~}|{{}z{{|��������~~}}�~����~�}~�~~�~~��}�������������������������������|}}~~~�~���������~~|~{{}yxunmnmnptvvvyxxyxyyz}{{z{~~{{||||{z{|{|}{}}}}}|{{||}zyz{{zzyyxyzyxxzz{zyzyvyyzyuvuxwuusstssrqppqonorqprppssrlillliikjecb^^]XXVQLPQTTSTYRMQQKNSUTKGKNKHSSLD:BKC=Uk}{�~|~�����|�~~}�}}��|~~~y�zy�}z{}}}||||y|}{wy{}zz}�|{{{~{{z}}||~}}{||xw||{xyyxxwyxvvz}}�wpmkjihkmpsuzyhWUZ^WTYZ[cnst�ywwvg@R`�����T=b{����s8Jp�����q3ND67:8<?EdrqqrnE>a�O3gL1!:[�ln�zc<;I|L;83:73=Dl��������q�������w�{cZXg��wu��Y�U*;DljO3/PTE74)4t�Z?XfdOL)7{����|)4jsQBFOQOPOM:Ef������AOy�����*R�����|-N^p{sY,4634>=88<W���w�L81/+*,/-65,,*-24BeZSg{F3MWYTH7485+!"45/4:9638ENQMH/(./.256/-?\glnsuwwwuwxyyzyyzzyrD+2-'!$./YJ3*!%Banj7GetvI(*))*+.[vq\@;Qewd>,**($Vppj7ND7UN)!;e`f}V"0J<UKV_N*!	�����������������������������������������������������������������������������½�������¾���¾��������������������������ſ��������������������������������������������������������Ƚ���ÿ������������þ������������ÿ��������������������������Ƽ���ƾ��ɿ���ž���������������������Ǿ�������Ľ���ȿɽ�������¾���������������������������oQ/���~~~}~}�����������������������������������}|{xxz{~���������}y{~���������������zwupmruvz|{{|~~}~}}}}{{{|}}����������}}}~�}|~~~�}}}|}{|~~}}}~~~��~��~~���~~}~~���~����������|}}}}|����������~}|{|zxusqpsqoorrwyzy{z||}|{}}}}|}�}}{|||}~}}~}}||~~~��|}}}|{|{||{}z{|zzyx{yz{zzyyzxwyywxvxyxxwxvwvvvvuvvwuuuutuuuutrtrpsqllkiihccddabcbbb`acddbbbcddgedgeeecfhgcha`cb^]`dosnqpqux}{{}�~~����}|{�|}z�~�}}~�~}|~��~}}}{~~zxz{|wx||zxyyzwy{{zyz|{z{||xx|�{yx{{zzxzwuy|z}ulijjljllpxuoaSWWX\XZdb_dnss}�wuvveDS^�����O=`�����t6Ju�����q4OE6688<?E`upqqlHEb�O3hH0 %9]�~��x^:;KoI943446<Bo��������m�������z�~e^Yb��io��X�Q,;@oq]0-;9455(3s�Z?]ieOK(4����u)8m|U@FRXUVSP:Ge������@L|����z0T�����z/N_pyt`,5632@@8;>T~��x|G41/-+.0/54,*+/22Eiiet}J4P[ZWH7299-!#66.5:974;HR[TN/*01./14/*B\elprvwxwvxyxyyzyy{ywE.51&!AfY[QL:$
+	%@Kby?\ssn?+)'))+1\to^<7RXXM5,+)&	$SKii6CMN[P* %ChefoK!,UW[Jaoa.�����������������������������������������������������������������������������½���Ŀ��ÿ���ý��������¿�������Ŀ�������ſ���������������������Ľ��������������������������ÿ����������ÿ������������ſ�������������������������������������������������˿���ƾ��ȿ������������ʿ���Ƚ�������Ľ���ǿ��������������������������������������lP2{��{x{z{{{|~}��|�~�����������������������������������}zw{{|�������~|{z|���������������}|ysstsvwwxxz{x{|}{}}{|}}}�������}~}|}|}}|}{{|||{}|||}{||{|}}|||}|}~}|~}~}~�~~~~|}~~}}}}~~|~}|�~}}~��~}�}~}}~~~~~}|}}~}zvvuupnlmmorvwwwzz|}}}~�~�}~~~~�~~~||}~�~}}~~|}||}|||}|zz{z||{zz{{z|zy{z{{{|zyyxzzyyyxyzyxwxxyxvvxwwwwwvtsvvussrommnljhgfefghhijihiiikmmmonmmmmmnnnoqqssnnonompopnpooostqnfhllrxxwzww{|}}|||zytwonorutvwzyzz}||z}~||z~}|z{xvwyzyw{{{{z}{{yzxyzyxywyzyvuxzzuxwxvwywvuuxywz}vmkiijijjlwk^TTXVU[^cecckrtt~�yusueCVb�����V?]����s/Gu�����k6RI7345<?B]qsrrlFGc�P5I;. $9_����z`2)GwK943147<?k��������m�������}�{e[W]��du�~R�V*@DlrL),32362(5t�YBandLN-;|����t);pxXAFSYWXWP9Ki������<Ry����x(Y�����p-P`nuwb,572/?<6<9Nv�tH41-++/0.95+*,/00Gi_Pj{F2S``YH6087-!$;607<8519FV`WQ/)/1-.-2-)A]gorsuuwvvvwzzzyxzzxx@093' ?x_mfUB"		%?Imj7_P7J1+)&()+8iURmD5;::@4,**($QOgf>2=\xZ) %Dea`_=MfWH\]S&�����������������������������������������������������������������������������þ�������ſ���þ�����������������ý�������ƿ���������������������ſ�������������������������������¾������������Ǿ�����������������¿��ƾ�����Ⱦ�����ǿ����������������ſ�ʿ���������������������ɿ���˾�������ÿ���ɼ�������ʿ��¸�������������������������mN1j�zytvusstvvwxvyyzz{{�~}}~~~~�~��������������������~|wvz}��������~zxx|�����������������}{wttrsvvwwxz{}||{}}������}~}}|{{zzyxwwwwywwvyyyyxy{z{||||{~}}|{}~~~}|{z|||}{}~}~~}|}}|{}|}~~~}~~~~}~|||}~~}~~}}{z{{yuqnnmlkilnssuyy{z||}�����|}~|~~�}|~~~~}}|}}~||~}|{{{{|||}zzz{{|}{yzzz{{|zzzzyzzyzy{zz{xxxwxvwwvuuuutstttrnlkhggklgfddfhgjklnmlmmoprqqstrrqrssttuttuvutusttuvvtvvvutuqmhnqrvxuwy{{z{||{{zzxvwpjikjklntuvvxyzxxyvvvvpuusrprsssswwwwwzyyyzy{zyyyxyzzwxyywvzwxtvvustuvut{�ujmkiiikfhlg[PQWXY^cbbinqtsttrxqqbAT]m���MA[u����t5Hq�����l6UH5448>@CasrproLMe�J553. ';`����v`)T�Z976888;@o��������j�������|�|b[Vc��^��~h�Y+NQSPB(+23583(5t�UBbn_QK+;}����r*;pwXBGV]^]^X8Gi������<T����~*Z�����n*N`nsw]-462.237:>QvwvewF71,*-//-<6-**-24Ig\Uo|A9RbjXH419=. %7508<8539EQYUR2*00+,+1-,E]hostvwwvwxyzy{yxxywuD.70&!,I>LQMA"		$AQeX6U?./(*)&'++1f]WmLAhspf=.*)'$LkjU;6FlnF##Aa\XT4"8X\MH/-����������������������������������������������������������������������þ�������������������þ����������������������������������������¿�������������������������������¾�������½��������������������������ǿ�������ʿ���Ľ���¼��ɽ���ſ����������������ǿ��������ÿ���������ǽ���ɼɼ���ǿ��ƾ�ƾ�������ɽ�����������������������������mN0Gd`bcceefgechjllmpoopqrstsssuttsvvuuxwvyz{yz}{~~}�~}}~~xqrv{}~������{wvvy|~��������������|xuqrtstwwwwz}}{||}|}�������~~|{{xwvssqppppqnmnoqrrstv{||{{y{|{}}{{|}~~}||~}{~|}}}}|{{{||{|}y||{}~}}~~~}}}}|}||{|||~{zz{{ywtnigijkmortxwwz{|}}����~{|~|}�~�~}||}~|}|}~}~|}|~~}}~|}}z{z{|{|zyyyz{{}{zyyyzzz{yxyxywxyyzywxwxwwvsttssqqoooponkihffecilhghkklmnpqroqrqrqstvwxvwvvvwwvwwuuwvstuvuxvuwxvwussnkmqrruxvwy|{z{|{zzyxwuvqgeggikoruxwyyywvwuuttstsqnmpnpsqrsttuwuuvuvwuuvwttvuuvvustvtutttttsrrrrv{siilllklkjji\ZW_ca_`glponorqkgsqjldITVHTNRR1BU`gmpiQ8M[{����Z6PE:>>=ADFWknlkpOGb�M7462'9[����q_+#j�^:98;;<?Dl��������i������|�xbZXe�����{��X)FHGEF))124:4+7w�WB_h[RK)<}����q,=qxW<FYa`ac\6Eg������?S����|)[�����k.RakprZ/553.,/8;=Ruus\k=2/-,.0/074,,*.24FghbuzE;OadXF33;=- $8506:7749HQVUT2*./+*,2-+D^hlpruvwuxyzzwzzzyywrC/70&!JQMIII;!	
+$,.,2323//&+*(*+-3[q}p6@gy{q@-)*' NkjV25@KI7"!>bODL31]_?-����������������������������������������������������������������������û������������Ľ�������ÿ��������¾��Ŀ���ý�����������������¿������������������������������������������ý�������Ⱦ������������ľ�����������������ƽ���ƿ���������������������ǽ��������������Ŀ��������������ʾ���Ƚ���¿��Ƚ���Ǽ��ſ���������������������������lP*#*.9BKVYW\^[^^bcbedefdfhigihfhghjhijifjlkmmmpqoooorssrqrqpnklosuvwzyyz{zwxtqpruy|~~�~}~��}}}ywspnmpqrrsuwz}|}|}|~�������~~}{ytpnmmkjlkkljjkmmmnqrtyx~}}}zz|z{}|}|{}||||}{|~y{||}{{{{||{||y}}|||}|||}||}|~~zzz{z{{{yzzxupokfeglnqsuvz{y{|}~�~~}~~|}}~~{}�~||}}|{z{{}~~~|}||||}}}|z|||{z{{zzxyyyyzzzyvvvwyyyvvvuwxvwvtvuusrqnmkkiiiigfgfggffiimmmnmnpsrqrsttusttuuttvwxywwvvvwxwvvwxxxvvvvvxwxxwvvvtohinpprvyvuwy{|{}|{{{{xxxtkfffimqsvyxzzzywvstututrsqqrstsrstvuvwvuttvvutuutstsrqrssttttrqrqqnmolmmkljihffhiieff`^bdhdahljklnppmmhLTbjqohbFR\]VJIURQVSN?9QXSZUMU><RKGRSIHEJNO\\WUELYYNB=:6*<Kainpe_-0w�]>?=ABEFGi��������n������}r�o_ZXa�����z!��W+C??CC*(/2373+9y�U>RPOPN)=~����r->qxVAE[dcdd]7Hk������<R~����s'Y�����j3Q^horY+461,+/:=>Usqojm:2-.4<411::.+*-3/Kh_ZprA3PheWE65?=, #862787738M[a[R4,/-,*+1,,C^elpsuvwvwvxy{zzzzzxs@06/% ^^\_e`@ 		"+-.00000.&/*(**-:V[iv@Hksoe9+))'
+'Z[U_<094.2&#?bF6<0"D\F"
+	���������������������������������������������������������������������������¼���½��ÿ���¾���������������������þ������������ľ���¿�����������������������������������ý����������������������������ſ���������������������������������ǿ���ɿ�����ȿ��������������Ŀ��������������ʿ���ȼ�������ƾ���Ǽ��ǿ���������������������������mR,%.7KXWWXWYZ`fe`dfgfeghgfeedeeffddddffdfddeeceacdecebbabecggijkjkkkkghhggiikmoprqrrtsrtqstrppmnlihilmloqpoqvwvwyzz{{{{zzyyyywsokiffgfghjjhijklnpsuvxv|z{�}��~}�}~}}}}~|{}{{{{||z|z{|y|~{{|||yzzz{{{||yyzyzxvuttssrnmjhghkpstvwyz|||�~~}||}~~~~~~}}{y{||z{{|}|}~~||z|}}|{y{zz{xyyyyyyyxwxvxxrusuwwwutsqrsqqqrpqpokjgfgihffghdbegfgikimortsqqsstuvvwvvuvwxwwwxxzxyxwwvwxvvxyyyyyvvwwwyyxvwwtohinpqqsvvvvxzyy|{zz{|zyxvsnhedhnsuvvvyzxutstttuttuvttuttstwwuvwwutuvutsrstssrqqrsttrrrqqpmlijmmjhgggfedddfeacbbaba`aa`diilqspmmkkkjnopnl__cfb\Y_\OUWUMF[lRGSLJDL_eRNRROEM]SOJFDCIVSJIE@9*:@>EDDMUR,3d�hTVSORVUWr��������qj~����h]^]ZUU^�����o��R,A:;;9('./230*?}�PBPMMON*7����l)?svV?G\dggh_4Ei������6U�����s)\�����k1N^gnrY+68/++09;>Rronqq<300=K81-97,*)-2/MiX[rq>4XmlXE64<9+$63067760:RYZ[U1,1.)(*2--C]glqsuvwvxxz{|zzyzzxp=.4/'!<ABGKL8	#+./0/000.%.*)+,-6BPru;HgEDH2,+)&'NHFa<KdjhM* %Eb@.0* 
+�������������������������������������������������������������������������������������������������������½�������ÿ���ſ�������ÿ�����������������ÿ���������������������ü������������ſ���Ŀ��ǿ���ý����������������ƿ����������������������Ⱦ�������ǿ�������Ȼ���ƿ���þ�����ƾ˻�����ɽ�������ɾ���ž�������������������������������mS3 !,9;FVWVWWV_kvzuu||sssutneeeeeedceccceddddcbcbaaabbbba_^b]`^__b]]\\\]_^`^_bab_aabb^^_^`a`_^^`]^]]_aabcbceefeigfhmjmmlljjjhgfhdeedcccbcdfgghfgijnqrrtttvwwy{�������������������~�|�}{~}|}||}y{{{{yz{xz{{{{yxyyxsppoponpnkkegjiotuwwzy}~~~~|}}}}~~~~||~|}{zyzz{z{|{{{|~}||{|z{|{{zzzy{{yxwwyyyywvxwwxtvttvttromllolkmnmkjkjieegighihhighihjkmnqqsutssuuvwxvuvvwwuvvwxxwy{xwwxvwwwxxwxywvvvwvwwyyvvvurkikmpqqsuxwxyzz{yz{||{zyxwrigbdlnqsuvwxwtstuwvutuusrsttuuwxuutuvttuvutsrrqqqppppqsusqqoppmlkijieddecbbbaadcbbb`abba___behltusomlnpnonoomolmnkjkhcdd_`dbfgbgcge`bef`a]\\YY_XQJCEJLSRDDHEB=FD@CEBEJD,#4elgkh\cd[cx��������oaajgZUQIQTOKPUXefdcQ#��X+/(++.#&*++,..F�S@KMKOO*7uy}��o/@ttR>F^jjkna8Jl������9Y�����p+]�����h-OaiorX+67/,,/8:>Tqpqsq>500BJ21/95,+),01Qnijtt@;Xuy\E5299-'74/589639FKOPQ-*.,)(*2.,F\fipuuwwvwy{}{zzz{zys<-5/& #!$"	&,-/0..00/%/+)),/6Wexc7@H3**,++($"Jkpo2H`roS) 'GaD20(	���������������������������������������������������������������ÿ�����������������þ������������������������������������������¾����������ƿ��ÿ�ÿ���������������������½���¾�������ſ������������ž�������ɼ���ÿ��������������������������˿��������������������������¼�������Ⱦ���¾��Ƽ���ƻ˾������������������������������������nT9'$%4HBASZZY[\do}�{{{|yzyz}�peeceedbcbbbdecbedddbbbbbcbba_``__^\^^[_]^]]\]\]]^Y]ZZYZ][[XYZ[\\\\[\\[[ZY[XZ[ZXYYVYXVVXYY[WXXXYVYYYX[]]\^^]]___`^\\]]^`_Z`b`fefehjlloqprpqtuvvvtvxvx{wuyu|{uy|z{|xxx{{z|z{}z{{{zzz{zxwrmmkklkkjjhcgmnptxyxyz}��}}}}}}}}}}||yz{xzzyyzz|{{z{|z{~|{|{|{zzzzyyzxyzwwxwyyxxwwwvxvvxvvspooljjkmjjkmonjjjhffhiijihgihhijkmoprtuvuuvuuvwwusuvvuuvtvvxyzyxwwvuwxxwxvvvxwvvwwwxxxwvustplijkknqruvwxyzzyyz{|{z{yxwtqkhgikprutssrsvvuttsttsstuvuuwvstuutsustusqqppoommomonppnoonmlkiffeaabb__`_\^`ca_b^_]]^]\\__fkqqqpooppoopqroopnommmliigdcghnkjplonkllkopmklkhihfb`ZZXVWVVWTPQUOQRKKOQF431CG@:5984=Z|��������c[VUSRMP][PHCGOMSVQE=4��^02'"#%$'1>O|�\;BHA:5)<U]gdhT,;SSB=F\cjmo`@Mj������<U�����l)^�����d1QajnqS065/-,/7:=Vspqrn=5/,GL50088,+*,./OdYhup:<W{v\F4010,!%86/467519DEFHK+),.)(+3-.C_ehpuvvwuvyzzyyxyyzzq:06/&!		%*,-,,-00.# /+)+,,/37@A0.4-)*()))%'EZgv=ObdeN''D\>8/%�������������������������������������������������þ������������¿���¾������������ļ������������½���¾�������Ŀ��������������������������ļ����������������ƾ��������Ż�������½����������������������������ľ����������½���������������������������������Ŀ������������¿�������ǿ�������Ƚ���ǻ�������˻��º�������������������������nU>2/.5VYHOZ^]]_fouvwvuuvvvxy�|lcbefdbcbbccbcabbabbbcccbaba^^_`_^]]]]^^_\[[\[^_\]]]^^]]]\\]\[[Z[]\[]]\XZY\[ZXWWXZZZUSRVWWVVVWUVVWUUWXWWXXWWYWZYWXXWXXWWUVXVVUXUXXWVRMUYWWWUVWW\]YYY_[^^`abbddefhgiinljlkosotomrrqtqoljiihghehgfcgmortvwwux||~~}}}~}~}|{{{zzzyxyxxyy||{||{zy{zy{zxyxyzxzyyxyxwwyvxxxwwwywxwvyvvzxtrmkljgihjigghihfeghiijjjhijghhiihinpqtvwwvvwvvvvvvttuttuvuttuy{yyxwuvwxxwwwyzxxuvwvwvwxuvvtvurojihmnoqrwxwxxyxwx{{||{{zyzvpjfgikrrprsvvvttusqursuuvtvvutuvtsttsstrpoommnmkkkmjkmkkkhhgdccb`a`^_^_^[Z[]`_\_]^]]^]`a^_filmmmnooppqrrqpqrqqnnmoomkigfgilmnoppqpqopnnoqrqqqomjgeccdedeejlgefc`cabbcg[WQLHFEDCL^a^daggh[F>KPNMIFJ]VH?;=AHGII=9<y�z�H"!!",V����\N@>9531>@CKA@:3?EC=?ISVZ]ZTBP_�����tAQisvx�Y/\�����a9O^gjpU19920/16;@Unmosp:51/JN612<9-*+014LbR`un89\vs[D3,*** &53./0241:GKJIH*)/.*(+1,/Gaklpuxyyvvy{{z{yxwxyj839/%
+%+--**-00-$!,,(),,.-,++,*+*)***()% 3?bu:TL092 $CX(,,#���������������������������������������������������������������������������¿�����þ�������ý��������Ŀ�������ƾ�����������ÿ������������������������������������ľ������������¿������������ǽ���ÿ���������������������¿������������Ǽ��������������ÿ�����������������������������������;���ɿ���Ⱦ��ɽ��û�������������������������mYEBGE4RdXS]```bdmwvwwvttsquvx|ujcecdcaababbccccba```___`b`\\^[]]\ZYZ][ZZVYXWZYWYYY[ZZXZZ[[Z[[Z\]YZZY[ZZYY[YXVVYWXVTSTUTSSRQQQPQRONONMOSSRSSSTSSSUVWTVTUVWTSSTTUSUUUTSSTQNSSQOTTQRTSRONROOQROLPVSRONRPQPRQOXVWYXYVXXVZ[[\YYY]_``chjqpmlmnpqrutswwxzxyyyvvvvxyutxuvwyy{yyyzzxwywwwxvuwvvzvvuuvututtvvuuvzyxwwyxw|zvsnmmjkhhliffffceefgihhjhfhiiiihiikmpqrtvvuttvuutuuttussuxuttwy{zyxxwwxxxwvvwyyxywtuxttywvutwuturpmlllostuvvvvvxzzz||{{{{{xvunhhggjikrvvwutsqqurrtuvuttssuvusrssqqronommllljgigffggecc`]]`^^]\[]Z]]\\\]]]\[]]\]^]_cbehjklnlnoopooqrqqqrpqppnmklmmiheahjmoppqqqprppqqqtrrnmlkkigecbceijjlmkjjjjiileca`]\\]^agiilhgc_TNIQURNCCFMKEDA?:>;;@<8-@]}��R51*'%)*09BIO\l����iEK=:9747>C?B>;=EOZPADR]ORQIG@MWgkd]TC<JNROFN?6OnuvwuC3ET\^hN1A@><759>BThhhol;732<:224A<.,-/05Jc`dul69XuoUC2++*)(:5,+*043;MTTVU+.0-(),/,0H`hlquwwxxvxz|zzzxyyvm639-#%+,+*,,...%%/*(+,/.,+,--+++*)()**%	"EPpe/A1 %HT!#"�����������������������������������������������������������������¿���¾���¾����������������������������������������������¾��������������������������������������������������ļ������������ľ���ÿ��ƾ���ƿ���¿��ɾ���ž������������ʽ���ÿ���������������������Ŀ�������ʽ������������ſ��ƾ���ȿ�����ʾ�����������������������������mYGESU5>_[T[^^__bhu{{{zxvwvvw{~�sgba_______abcaaa`_^_`_____\]\[\YXXXYYXWWUTUUVTSTTSTUUTVVWWXWXVUXWVZXY[WVXYWVWVWVSRSRQTPMNLGGGEAECCABCGJLMNNPPOPQRRRQTRTSRSQSQRRPSSSSSRQRQRSSQQPQPPPRPOOOONNPNONMLNMLMMMOOPOLOQPPNMPOROPQSQQRTVX[\X\[ZY[][]``bcebefedddccdfhglilhkprpmonolprormoornlmpnrooooommoqprstsuuvvtsussvurrnmjggggffedddccedefgghgfhggigiiiklmnpqrsstttstvtttttsspuusux{}{xvvwyxwvwwvvwywwwvvxxuxwvtuwvvwuwvsrooqppstttuwxxyzzz{|{||z|ytpkf``alquvtsqprsqrrstsrsrtussrronmlmljmlkkkihfeddba`_]^]^\\Z\^]]^_^^^^^`_a^^\]Y`_`cgjkjmllnjmmmnnoooopnppnnpnlkmnokkhec`ekmnppoqrppqpprppommmnlkhggc`_`fhillmjjmmmihhghggghknmnnniedcdbac`]WMHNQQUSRTQQQNJFFGCIPIDEDFGCHIKPRSQLGCA=F=6>?==>@@ADDB>>AIWdRJM[lLFIDADP^ZQMGDHITUNMG@>@KQNI>>37BINIF98NMPRKB@AEIWZXUT:;:=>:637B=741/05GWbgla7>NgbMA2,+*) +>7/-+132<QU_aX,.1.*,-1,2Iahortwxwvvxy{{|{wxzwm326,#	&*+,+,,-..$+2)*+++*)*+--+,,**)(*,&	 )127'"$$JV%%'�������������������������������������������������������������½�������þ���ý����������������ÿ���ƿ������������Ŀ��������������������������������������������ƿ���������������ž�����ſ��������������������������ľþ���ƾ�������������������������������������������������ɽ���¿�������ž��ʿ��������û��ȿ���������������������������mTHIVYC1W`ZY^]``adlsssvtrqrrpuvuug``^Y]^^]^___]\\^]^]]]]^]]\[[ZZ[XWWUWTTUSQRSSQOQQNRRQPPQQPPNQPNQRPRSRTSSSUSSTQPOPQOOMOLJKJEDBBA@?>?==BFHHHJKKMLONOOMPNQOOQPPPOPPQQQQQNLQQPQOMNONONNNMNMMMNMMMMKLMMNLKKLNNMMKKMNLLLNLLKMNPPKMONNNNLNJLLOLHMPNOHMNMPKLNKPNNMOJIKNONQNORRQNOTQSSSYZXV[[][Z[]]]^\]`ba`bdijihgjilikkijhgfdbddcda_baabaccedeefhgeeddccdegghhkmmonprtussrsssssrorrrtwy|zwvvwuvuuwxxywvwwwxxxxwvuxvuxvvvwywwwutrrqsspmrstttvxwyz{{}||z{ytqga_dlttsqppqqqrqrqpopqrqmponjiihfhbdgggfbb`_``^^_[]]\]]__^^__a^^accbccddfdgfhhhjlkklnkklkopmmlloomnoponmoomlllonkgfc[]ejonmnnpoppppoonmonlmmlkjhfd_Z`__cfgfgilliiiiiijlllmmmlkeeimmlkkjhcPGPWZ^\\\[[[ZWPTWXZXSQTXWZYVVUUVRPNLILNPQPQPRRQRMPQNOKMOOSTLPPNLGGFFGJJIHGFCEIIMIAD???BEGB?=;;BEBDA:8=^eNMHB@AQ\RPIB@;=BDB>:5:E?BA6328DGLIAB3ENVSH>4.-+)!2>;642443;MX_VJ/0640/03.2L`hoqtvvwvwyz|{{{zzyxm/.4.$	&,,**++-.+#*2***++**-+-.,++()((+,$$SZ%1.#%!������������������������������������������������������������������¾�������¾������������ľ��Ľ���ƿ���½�������ż��������ÿ�����������ſ��������ļ��ƿ�������������������������������Ǽ���¿���������������������ƾ��������������������������Ž���������������������������������½�������Ŀ������������ļ��ɾ���������������������������mVHJV]P/Fc]W_`_`acdffcfeeegdfee`b^`^^^\]][]^\]\[\]]]^^]^^]][[[YZYYYWTUUTTTRSSTSQQNOPOOOLOPMMLMMMOPMKMLKONLNMONMNNLOMKKLKJLLLJIGHHGFFFGHGIJJIIJKJGJLMKLLMMLKMONMNNONNLNMNMMMNLLLLMKKLLKLJJKKLKLKJKJJKJJJKKMKKJJJKKKJKLKJLMMPKLMKJLKKLJKIKKJLKKLJJIKLKKJHIILKMLHGIHFIHHHGIJKLLJIJMNLMNOOOONNOORPNNPORNPQRNRTPRTRRPUWVUURVXVVVURWXY[Y[\`_^]`_`__`^^]_``_`aabceegimopnnnnppnnnpopruwzxustuvuuuvvwyvvvtvwtwwvxuwvuwwwvxzzxwuvvvvvvtrstqliopsvvxxz{|{{{zzwpe`elqpopoqprqqqnllkkkjeggfededdd_acdedb`bbacaaededefhhihgggghhihikilmllnnnmlmkllnmlkjlkpplklmnnnnpnmmmmmlkmlmljeeeb`bbikkmlnnqpnmkoononmmlklljhfd`b]]^]a`_bccbdegghiiihihediilprqpoonlmaRU]`a`a`b__\YTY][\]ZVV\]_^^^_^]^]\]Z\]^\]^`_abbc_`_``^b_^_\ZXWVRPQPPSOLLKKIIIIGEFE@DEE=CC@?CBBFCA@==ALPECB??ANXHF@=;;@DC?976;RE?=966=LHEC;:8`sfZA9652-*$(:@DJI=;65:AGID<05;<:87517Oainouwwwuvy{}{z{zzyxl1/1-#',,*)+++..$.1+*,*+,,./130,*)))*,*")WU+2K5%*%���������������������������������������������������������������¿���������������������ÿ�����þ�����������������Ŀ������������ƿ�����������������ſ�������ƿ�������ſ�����������������Ƽ������������½������������ǽ���¿�������ſ������������������������������ʾ������������ƾ��������þ���������ʿ���ƾ��ſ���������������������������mVGKXb[A3[c[^c`bdegffefddccbcaaac`_aaa\`]_\^___^__`^^^][]\[ZZ\[[\]\[YYXXWVUUUUTSSQRPQPOMNNMLMLLJLNMMOMKMMLKMMLJLMLLJHIKJHJKLKKLLKKJKLMLJKLLLLLMKONMNJKLLLLLMMLIILLKKIKIKJJIIIHJJIFHHHHIFGFFFHHIGGGFGFGHGHIGHIGFFIIHIIHHIJIJHKHIHJHJJIIHKIGHIHJIGFHIIHIGFGIHJGGEGEDEDCCCDFFGIFFFIJIFIKJIIIHJHIGGIJHKKJHIIKLJJKLKIKJJLMLKMMLLLKNMNPOOQRRSRRUTTUYVVVVWVWZYXXWZ]^`acdfgfgfcdeefhiiloromllmnnpqqrqroqrqssqstrsrsssttvuvxxwwvxxxuuwwxuutrusussrstwwzz{z{zztnb]cghjkmmnljijhhdeecb`eecbbdefgfgggiljhkkkmllmnmomnnnponnnoopnmoonommlmnnnlknmmmmlljkionkjjlnkklnmmmlkkjiijljihggdc^Ycimkkllnoppmonjmnnmmmmllifc^_\[\[[\[][]]__aabbb`_cddeilpsssrqqpnkf\S\_caaaaaa\YW[^``_]WW]```bcbbbcb```_bbabacdddddcdeehfhgijfdba`]]_]]]\YYYZZXXWYWSTSRQQPOPPOQONOOKKJGGC>A<>ACA=??>@B?<=A?@>>:79;;<=>=>JFBA><=WhdX<89740.+5GWGIJ?;78???<84/BLDKC:74<Tgloqvxxvuvxz|||{zzyyo211*!',+*++,-..#-0+*++--+,7@D:1-**)*)*"*WU7Na4")-)	�������������������������������������������������������������������¿���¿���ÿ�������ž�����������������Ŀ���Ŀ���������������������������������������������������Ŀ�����������������ƻ��ƿ��������Ľ������������ǽ���Ŀ�������Ƽ������������������������������ȿ��������ǿ��ʿ���˾���Ż���ÿ���������ƿλ�����������������������������nTGLageV6Ogadhgijklkjjjkiiijifgiffffeghgcdbbbcdcbba``a_]]^]]]\\]\\\[YYXVUUTSTQQPPNMNMJIJLJHJJHHHHIKLJLJLLKLNLKLKLLLLJKLKJLLJJKLKKKLMLLMMMLMMMMOMNLMLJMMMMMMLKJJJKLKIIJJKJGFHIHIHIGFEFGFFEDCEFEFFEFEFDEGFEGFDGDCDFEFGEDEFGFEFGFEFGEGFFEFFFEEDDDDDCCDCCCBCDDDEDDB?>=<<;<==?ACDDDCGFFEEHGEEGGFEFFFGGGHFHIFIIGHGHIHHIHJIIJIIJIHIKKHIJHHJKKJJIKKLILLKNLLLLOMNONQRQPTTUSUVWVVUWXY[[Y[__^]^^``a_ceggfcefegfhhgfgjiklljmmnpoqqpsssrrruustqxwuxvvtsrtrstvyxwvtrbXY[]cdhigea]bgjgikkmkmlkmmopoooqnppppqrqpqqqprqqpqpoprqporrqporqqpooonmnnnlononmnlkjjmljkiijihgjkikjhhhijkljjhggdc][`gjjjkmmonnknmlommmmnmlkkgeb`\[Z[YY[]]\Y[[\][^_abdfgkoqrssttrqqokhcWY]aca`^__ZX^_bbbb`[XZ_abdecffccbcbbefcbdeffddeeeeefefghhgecddddcdddb`_`ba_a_`^^_\Z\[^[[]Z\Z[[\Z[WSQOLJIKKJLIHGJKKIIKKKJIJGD@CCFFGF>>EA@CDBCNNGDB><:887<?NECC>=;<E=98777UiFH?886BZjnpswyxwuxzz{||{yz{yg2!10' (.-,,--..+"*/*)+,--,,=FKB3,++*)))"  *ZVE^`2%-01"����������������������������������������������������������������������������������¿��������������������������ľ�������ƿ������������������������������������Ŀ��������������������������������������������¾�������������������ýǿ���̿�����������ƿ������������������������ȿ��������û�������ȼ�������ͽ�����������������������������nYLQcnldE>ddekkmnnppompqononopnmlnllmmmkjjjhgijiighgffeeddcb_aca_a`^^ZYXVVTRRMJJKFHJEDDFCHFFFACBECFIGHDFJKMMKLLLLMMOLKKILLLLLKJJKMKLMNMMLNNMMMPPOLLMLLLLLLLLLKJKKKJJJJIKJHIHIHIHHGGGGFDEFEDEEDDECDCCEDEFDCDBEDEDDDDCCCCCEECBCECDBBBDDBEDBA@A?@@@>>=<>>??>=>?>>><976434689:=>?A@A?ACBACDDCDDEEDCEEEFFFGDEFDEFFEFFEFIGFFFGFGEHGGFHHFFFGGGJHIJHGIHHIIIIJJHHHJIILIIHLKKHJLLKMLMLLNPRKNMLOPQOMQQQRTTTVUUUWSPUUXXXYZZYZY]_`_]_`bbbcegighklmpopropnmmjklnqqjgYPNNMNSZa]`aciilmqppootsqrrqpppqroqqorsqppqqrrrppnrppppqsprrrpprrqpooqnonmnmppnnmlkkklokikjgligfgihgfegghhgihgeie`][]dfiijkllplklmommmmmmnkjjkifb`\[[ZYYX\]\Z[]]^\_behilnpssrsrrpqpnmkif`ZXX[]\[[ZX[`abadbbYYZ_ccdeddddd`cehfefdeddcdefhgfhffffhfgdcdffddeddecbccdddabbbbbacbc`b``aaaab``^Z[WWYUUURPPQOQRNNPQRSTSSTWRTSSRQSSTQSRQRNOQRSQRPONLJGD?>CEFDDA;57;:==;<D@><:77Njoqruwxxwvuz{{{|{yyzxk0'1+$	)./..-.00-#..))--+,-2DLE@6-+**++)! &('")WNCX[4&/00)!����������������������������������������������������������������ÿ����ÿ��������������������������������������ž������������ſ���������������������Ƽ��������ÿ��ȿ������������ƿ���������������������ʾ���ż�����������������¿�������ȼ������������������������������ƽ�����ž����������¿�����ʺ�������ɽ�����������������������������kTLPankj[:\fadkmnpqqsqttroppqponopoppnomlmnlkkkijjkjjhhhhgfiedgdccaab`a`]_`_]`_]]\YY\YVVXUSTSQQOPOSQOOOMPQRPNPOOORRPQNMJLLNNMNMLONMNNNNOOPOKKLKMMMLKLMLKJKLLKKJJJIJJKKLLKKKHGFGIGGHIHGEEHFGEEDBDDC@BECEFEDDDDCDEDDCCDCBBCBCBBCBDCBCEEDCDB>>><<;;88889889:;<=<<;<;::98:;;;;;:::;==??>>@AACABAA@@ABDBCBBBCCCBCCBCCBCBBBCBCDABCAA>??@ABBCEEFFFEEGFFHHFFEHHHFHHIHHGGHHHHIIJIJJIHJJKJFJKIKJJKIIIIIJIKKHIKJIIJKIKLMJHKJLMMLLMLPOQPOPPOQQUTSRVTRTVYXZ[VZZ[ZUNG@94/.8?MOSY_dchejkkhjknkkjonmnlnonolooljllnononmlmmknnnnloonomonlonmoonlkmmnmjmlkkjkjnmikliiigefgedcdgdeeefcddd`^]`dihiiikllmlljkomknlkklljkihgca]\[YZXXZ[\\_`\`cceikklmorrqtqrnomkkjihda\\YZXXTZ[adbcacc`ZX[_acc``decfeeeggefeedeeggfhggggggfffhgffgfeefedeeedeefdbbdddceecedbcdccdccabd`^_^_]]]\Z[YXYVUWUUURNRRQSPSRQTTWTWTVWYYXWYZXY[Y\Y\WQQPOOOQRPOMLLLJJEDBBBA>@?@f~zuruxzxxwvzz{{|{{{zym.)'
+)-00/0/00/" ,1,+.+*++/99=73-***+,*$%.45( ([P9JN3*243/$����������������������������������������������������������������������½����������������������¿��Ľ�����������������������������������������������Ǻ�������������������������������������������������ɼ���ļ����������������������������������������ľ����������������ʿ�����������������ƾ������������ǿþ�ý��������������������������mVLPalklg@Kd]\kloppqtrtssrrrqppppppopqpnllmklkhhhhhhgeddedcbabab`^]\\\]Z\]\^\^]\_][^][[[^[Y\\\\\\\]^\\_^\\\Z^]\ZZ[[X[[YXWWX[YWVWWVUXXVTVUTUTROQQONOOOPOOMMNNNMJKLJLJKKJKJHIIHHIHGHHIHFFFHEGFGEEDEDC@DBACDCCBDCBACECAA@@AB@BBBAABCDDBCCBA=9:;73110/.//002335799:;99:<;:<==>=<<>=>>?@???>@B@??@??AABABB?BA?A@??B@A@A@??@?>?>=>=;999:<==???@AABCBDCC@CDEFEFFFGEFFFFFFFHHGGGIHHFIIHFHIHGIHGHJIHHIIIIGEGHHFHGIGIIIFFGHGFGGDFFFEFFFEDFHFGGFEGHHHCHKKJMLHF?9/*%# "*39<AAHNSQTYWWYWXXZZ[][`][^_]^^`^^\^b``cbiedgehecededfghjjiihjhhhefkfhfehhhihjijkhfgddedcddbacfdcdbba^```\abfjiijljjjllkmijlkjllkjjjjjihhfea_]\][YZ[]]]]^adfimnklmopppqqsnmkkjjhfcebb`]]]^eadecdacb^ZX[_accdcccdcddefebddfeggfghhhiihiigggghgifgeedcbefedeegecccdedcdefecdfffeefccbabcca``a`b`]]_\^][[\WVXVSSRRRPRSSRSQTSTUUTSUWXXWVUSSSRQRORUTVVZZWYWZVSSSPONMI>^��wtuwzxxxvy{||}|{|zyh2+./0021030#!.2-,-,+,,.++-,-,*))*,+# "-4<:- &XP0@E-'154.$��������������������������������������Ľ��¾������������ÿ������������ÿ���þ��������¿��»���½��ļ���þ���������������������ſ����������������������������������������������������������������������ɾ�������������������������������������������������������������������������Ŀ�������Ƽ������������ƿ���»��������������������������mYLPclmkkP;e`Xilnoopsssqqrrqqooponllkmmkjhgfdecbbdcbec`__]^^\[[[XWVSSTUUUXVVTTVUSSTVWUXWWYYZYZZ\\[[^\\\]]]]\^\][\\[Z[Z\\ZYY\YYZXYZXYZYZYYXYZXV[WYUVWTUTVUUUTTTRSRSRRPPQOOMMPNMMNNLKLKIIJJIIKIHIGGGDCEDEDEDDBACBAAB?@A@?ABABA@@@AA@@@AA?><8973/,,+*))()*)*+,/23676668869:::;;<==>>?@@A@>@A@AB@@ACBBBBCBCA@@@??A@A@A@>?@@>??@?>===??>>?@AA@@AABAAACA@ABCBCDDDDEDCBFCDEEFGFHGFFFGGEFFEFHFEEFEFGEFHHGCEFGFHHGGGHGGFFFCCFFEDEBCCDECABCDDCEGHGCCEFCDCBA<::61,)*((').17:;;=>CCCDDFFECHIIJKHLIKKKNNMMNIJKOPMMOSRSSRUSRTTUVVYXYZ[]YZ\^ZY[]]]_ba`aa_adaaa`aca_a^^___``___]_`\\_bdfhkhiglmkjjkkijijjhhjkkihhiiggfhgc`__]ZZZ\]\[[^adhikliklkmnnnpqolljhhhecfdgdbddcgfdfbdcc_\XSX_abcdcededccdddddegefffhigiigiihghgggfgggedeeddfgeeefeccdeffeeghggfgghgfeddfcbccbb`babefaabba_`_[[[[]][WVVWXVUUTWUVXWVVVXWYVVVSUTUTRPSRPRTUVVZXVWVYURTUPK?U��xtsx{{zywxz}~|{z|{|f2%$#! &-101103230$(44011/-...+-.-.+++-+++# !15=50!(YS/9B-**.'!	��������������������������������������ý�����������������������������������þ������������½�������ÿ���þ������������ÿ�������ſ�������������������������������þ���������������������ǿ��ƿ�������������ƿ���������������������ſ������������ǿ��������������������������ýǼ������������ƾ���þ�������ƾ�������������������������������kXIO_gigdY:XbV\hjlomoqpoopnkljlkkljjklljiggd`a_`cdccdb`\YVVXXUUTTTRQOMOOOQPQPPPRQRTSSSWUTUUUUVWVWUUWWXXWXXWXXZYYYYVVWXWYWUUTTTTRQRQRQRQRUUTUURRRSRSTSSSOTSSRRSSUTSSTVRTRRPPPOOPQPPQQNNLLPOKMNJNNNMJLLMKMKIJJIIIIIHGIHGFFGFFDDCDB@@???>>>=<<:865531//--/--./126788688778::9;9;<<=>>=<>?>?@AAB@A@ABAB@BCBAAAAA@?AABB@@@ABA@=??=??@AA@???AAAACBBBBCBAAAACBBBAABDCCBF@ACCDEEDDCCCEDDBCBCDCDEDDCBCCEECBDCADEGGGFFEDCDDBEFDEABCDCCDDCCBDFEGIIGFEDEFGGEDCA@>:::9887:<<=?@>@@AAAABDBCBCCDDCCCCDCCDEDDECEGFEFFDFEHGHEFEGIKIIJIIJKLLKMKIKNQNPOPRRSSTSSTTTTSVSQRQPQRUX\[Y\]_`^`dcghljlljnlkfehigggfhhfgghhhfefhgeeea^[[ZYXX[ZXYZ_behfgiijkjllmnmmlljhiihebdaededbecbbecbbb][WVX_`abcceddcbbcdedddedefffeehgfhhgghfedcgffddeggeeffdddefdefgeggefghfgfhhededffccdacaabaabacdcbbb__]^^^][[ZZY[ZZ[\\\ZZ[]]^\]]][[[[Y[ZYXWVTTSRTTSRTUSTTRSWTT=Q��wttvz{zxxzz}||yy{zze<?IGCBA@=9963/-+)($!! !2;6744326442)'8AE9477422011.01/..0/.-&# %)(&&!'WM26D4 " "	�����������������������������������������������������������������������������þ���¿�������¿���������������������������������Ŀ��������������������������ſ���Ļ���������������������Ǿ��ſ��������ž������������ȿ�������������������������������ÿ���������������������ż��������������ſǿ������������ýĿ���������������������������mXLQ^feec\JCcZS]ehjhlmmmmmkijiklklkjjjjijhigdddfeddbcba\YYWXTURSTNONJNLIHNMMNNJPNIPRSSSTUSTUUUTTVSTTTUTTSTUVTUTTVTSSSRPTTQQRQRPNOQQSQPOPQSPPQMONMPONNNPMNMKJKMOOMKIHIJJEKKIJJIIJHHHIIHHGGIHHHGHJLHJLLKJMKKOLLLMLMNLMONOLLKIFFCA>;:887665:9:99;=>@@<==;<=;<=;>??==<=<;<;==>>=>=<=>?>=>???>@@@@A@ABAAAABAA@@@A?>@?@A?@AAA@A=???@?@AA@?@@@@ABBACBCCBABABA?@@@@?@ACCCB@BAACBBAA@@ABCABBBBBECBBBAABCA??B@CAACCCDDCCCBAABBDBABDECBCB@AAABCEEFHEEFFFHGHGGEDECDEDCBBDCCBABA?@A?@A@AAABCCCCAC?AA@@A@@@@A@CCBBCBCCEECCCCCDCDEBCCDDCCDDCDDEEEDBEFGFEFFDGIHGHJHHIIGFGILQRQRTUWXYZYaaefdegigf`_ccaba`bbbbbaadcaae_abd^[UUSTTWX\WWX\_bddffeghhkjlmkjiiiggggeeeeedcdcdc`ddaa`^[WUWY^`acddcddbcbbddcddeddggfefgfffeeeffddcgeceeeffdedddddegecccefedegfdfdeededeccdbbbbdbbcbaabbcabcaa__^]_\^`]^b`^_^``^^[]^_`__``_`_^`_^[[[ZXWVWVUSTTTSRPOORXDR��xvuvyzzwx{{{z|{z{yye<MXZXYZa`_\WUWNMNNIEDBA=9765468;>DORJIA=AAB=<=15[keQLBGB:::9:99955388672)%"!!!!"!  !D>-?fH'������������������������������������������������������¾�������ÿ���¼���ÿ��ſ���ü�������Ľ������������������������������������������������������Ŀ���ľ�������ȿ���������������������ȿ����������ľ���������������������������������Ⱦ�����˾���ÿ������������������������������Ǿ���½����������������ľ����������������������������~lXJP]ffeffZ?`^NTdgiijjllnlllmmllkmljkiiiiiifdefgeeddcdfb`a_`]\\]\Z[WVZTUWVUUXUTYYVWVWYVXYXWUWVUTTSTVVUUYXXVUSWWVUTRTVTUTTQRRQRPQOQQRQPRRQRQPSQPOQQMONOQPNLMKLMNONMLLKLJJLIIJJGGHGGGGFFFDDCFDCCDDDCDCCCDAEDBBBBCBA@@ACBBDDB>:9550/.,,,,),..//0/26:>>?@BBDCCCCDDEDCEFDCBBBDEEDCBACBAAAACA??@BA@BBBEDCCDCBCCBBB@@CA@@@ABA@=??>=<@>=@AB@BB?@A@@@BBACDBBAA@?@?>?@?>CBAABAAA@@A@A??@BCB@AABBB@AAAA@@A@@?@?B@?@?@@BBB@>@A?@BAA@@@BA@?@@?@A??@@CCCCBBCBDDDCDCCCDDBBCECCCBBDCBACABAABBCCECBAB?@?>?A??>@@A@@ABBBCBCAB@@ABAAABB@@AA@ACCBBB@ACAACBAABBAABACBCCDDEEFDCCDCCEEGHEFIJJJHJMORQQUTQPRSUWUVZZWXXXXZ\[[ZYZXYSQOPNPPRSUUUVVW\aabbdcdfhhihhggjkgfeeedcefcbbbdb`ca_`^[VTUV[_^`ccbaabacbbcbbcdeedcffhgfhihffefedeefdccccdfddedbbcdeecbbeecdeffcdcacbbcdbabbbdccabbbaaa`bbbcaa`_``a`a_``ab```_``_^_^`a`aa`accbab`_^^]]\Z[ZYXXYXVVWPQOR@U��uvuvyzyxx{|{z|{zzzzg8S`bdgglkjhjdggbaa__``_]]WWYZ]bbcbbda`^`]^][Y]UQ_d[WVLUQHGFHHGHHFEGFHEJO+1C3-,*&$&$"%&%$#$%$" ,T{b0$"������������������������������������������������������¿������������¿�������Ŀ���ż������������ž�������ſ����������������������¿�������ſ�������Ŀ���ž�������ƾ���þ�������ȿ�������ʻ���Ƚ���Ľ�������ſ������������ƿ���¿�������ȼ�����̾����������������������������ľ����������¿��ɾ����������������ƻ������������������������~nYGQ[ffhihcHRgUL]hjjkmnlnlmnnnlmlmmkllljiiihgffghggecdeedb`bb^^^]^]^Z[YYZZYXYXXUXYYWXZZZWXYVWWXWWWYZXXXYWZXYVXYYWXSUVUSQPNOKLNJIHJJKLKMLLNMOOMNMNNNNNNOMMNNLMMOONLLMMMKIMMKKKIIHHGIHGGGHGFFFGHGEEFEEEEEDFGDBBBCBA@AAA@A@@>=<987430.,,*+,++,+,,+/04668:=<;<<<=>=AADEFFFGFHGHGHIHGIHIIEHEFGFEEFGFHIIHGFGEGHFFFFEFEFFGFFEEDCACA=???@AA@@@@BABACDBACDCCAAB@@A?????AA??AAB@??@@?>@>@@A@@??????@A@??A@@AA?@?@AA??AA@?@?=>?@@@A???@@>???@@>=><?>??><>=<<====>==>?>>??=???@@@??>??>>@@?@@@A@@?>>?BA@@@AB@AACCBBABACA?BC@@ABA??A?>?B@>@??@@?@???>???@><>??AACBB?@@@AAAABAAAA@AAB@A@<?AABACABBCDHJKLJKIIKKKNMMORMMKKJKILJJILNOOQRTZ[^^]`baabcbbdcddddabcbbbb`^```_^`_^]ZVTRUY\^_^`caabbabbabcbdbbdfcegigghgfeeefcddcdedddcdbabdccddcaddgeedcbddedccbabbaab`aa_aba`baca___abcbaaaa`_`^`^^`aa`b`]_^_____aab`_acba__a``_]\]\^^_^]\][[_WVTRDW��ywuvz{yxx{|||}{z{z{f7Udgjkjjkmkmjkkhjiiiihkljiihjknnqroprrqpmqllllkhhhgdbe``_[YYWVSTTQQPOQRT>-&&1LYK@><71/113600,310-!?vzmD214.&"  &%�������������������������������������������ÿ��������������������þ�������������������������������������������������������������������������������������ƾ�������Ƚ���Ľ����������������ɿ���Ǽ���ľ�������ǿ������������ſ������������Ǿ������������ƿ���������������������Ż�������ǿ���ɿƾ����������ƿ����¸�������������������������mWIP\ggffggW@dYJQdiikmlknmmonlmmlmnlllljkliijjjgiiiiggghfefeedbbbc`^]^]\]\\][[[ZYWZYY[ZYXYYVTVXZXWYWWTVVYYXWUUWVXWRSPPKMMJFABCA@@@?A???>BFFDHIJJKKMMMOOMNPQPQNPPONNMMOMLMLKLLLKKIIHHHHIKIIHGGGGGIHFIIIFFHGEDEDEEFECEFDCCAABB?A@?=;:7556542113323567979999987788:<>=@AAAACBDCDGCDFFIFFGFGHGGHFGFHGHHFIKJIGGJKIJHHJIHHLGJIHIIIFFFGFFGEEEEFEHFFGFGFGGFFFFEEEBCCCCCBADDA@@B@@BB@A??>??@@??@@@AA@?ABABCB@@@>?@B@????@?>?@@@??>>??>>?@?=>>==>?>><=<<<<<;==;;9;;=<<=;<>=<;>=;;;;;;;;::<;;=;;:<<>==>=?@>>?>=??@@>>@>?@BBA??>??>==?A?>>>A???@A?><====<;<<<==>==:==<==>==>======<<=><<::;<;>>==>@?>?>@>=?@@@ACDBBCA?BCACEDCEEFHKKKOOPSTTVVXZZYY[WWWXXVWXYYY[[Y\\[YXYY[[UQPPTZ`]b_^`_`b`eb`_bcbca`adeceffffgfeeeccddbdddccccfdbbcddcbbddcccdcbbbcbbaa_abbaa`_^``___a``__``bacb````___^^]_````a__a^__``_`bb`aba_`a^_ab`^\\[]\_`]\]\]^[ZZRHX��{wwx{zzxy{|~~|{{{zxc6Q_fiijkijjmkjljklijjijlmmmkmnoqqrrstsssqrsrsrppqmqpoommonomjnmhgfddbcb_[WQQUYXUONMICA@ACFEA?BEIG7 0Ke]QIBBOF<4/020/,*&%'74�������������������������������������������¾��¼�������¾�������ÿ���þ��������������������������������������������������������������������Ľ����������������������������������������������������������������������������������¿�������������������ľ���������������������ż������������ǽ�������ǿ���ſ������������������������������lXMP[dddceb[CX_PJ^gijjlmnlknjklkklkikjjjjljijhhiihhgfhgfeegfeedccda_`a__```__^__^``]^_^]]^^\[\\\\[]][[[ZZYZYYXWRXXXVTSLPOJFDCDDA??>A@?BDFJLIJJIKILMMMOMMMQPONNPOOMNOOOOOOMLNONKNMJKKKJKMMKJJHGGHIHHIHGKHGFFGGFGGFFEEFEEEDDDDDFFDDBBBCA@@?;;:8;==>>=<=<>>==:;9:<:9;=>?@>>@??@>@@?@@B@AA@ACBADDBBBB@BCCDBBBCEEDCDGFFDCEEHHFGEGGIJLFHKHFGGHHIIIIJIIKJGGHHHHHGJHHIKHFIIGFFFFFDEDEDBDCBDECCDDAACAACFDDFFDDDCBBCBAA@@@AA@A@???@@A@>??>=>?>>>=??===>>>=<=>=;=;=;=;;==<=<<<>><;<;:9;9:78889:98::999::;:88;:9:;<;:;;;9:==;;<;<<;;:<><=<;==>==<:;999888:9878888777678886678998:99:88766789::;<<<<;9:;;;;;<>=>=>?=<<<>=<<?@>>?=;??????A@AA@ACAEFEFFDGIHIJHIJNMLNONNPPQQMMJJJLLONOSUVVTVXZ[YWXYYZ[[Y^]^`__`_ada^babbeadacaacaecbbbbba`_b`ddca^_ccbb`a``_baa_]^___]a`^\__]]\``b`^__^_^`^^^_^`_`__^``_^_`aa^__``_^`aab`aa``_]_]\[\[\]\][ZZWF[��{xvwxy|zyzzzz{zy{{va3O^eggeffhiiihhhiiihhiijjmlmmmonoqprqrspqrqqtrqrrrqpoqqopooononnoommoomkjmmigebc`]]\VTUTNOQNLJIKFA76=>><AAA>>CDB?>AA?=;;<;>B4	�������������������������������������������������������������½������������¿������������ľ������������ǿ���������������������¿������������Ž���Ŀ�������������������������������������������������ǿ���ƿ��˾���û�����ſ�����Ƚ���������������������ľ�����˿������������Ļ���ÿ�������Ƚ���������������������������������������������lUJOX_aabaa_ONhVLR`ehhhikjghikkjiijijgihiiiiefhfgfghfffecccecdbbba__aa_^^___^________``_^^^^^_^]^_^^\_^]]\]]]_]\\^Z[Z\WWVSNNNNNKLNPNNNQQQVUVTTRUUTUSVWUVUUSTPQOPQPPQQOPRPPONNONLKKLKLKLLLKLLJIKIKIIJIIJGHGHGHIHIHHFGGGFHHGGGGEHGFCEEEDDCCA@@>?@@?@@@@BBBAA??;==>=?@@?A@ABA@A@??@?>>==>>??A@AB@@@?=@??AAA==>>=>>=>?=>>@@?>A@@BDCA@BCAADEDDEEGHFGFFFFFGGGGHGHIJIHIHJIIIHHIHGGHIHGIJJIJGFHHFHHGGHHHIJIHHFFFGFEEGFDDECBDFBDCABBCB@@@?>>??>==?=<===<>;<=<:;<<;<==<<;::;<<<<<;;::<<;9::99:;;;99;;9:;:;;978878888;9779988:89999988887978999989778878886676544555456531123324454210/022257876776679:::9:;;<<=<;=<=;;;<;<;;;:8;;99;;;:::;<?=:<=><;<?>??@A>?>>?>>=??BB?<??@ABABCDCCFEFGIJHHJIJKLLMOOPRSTSTTTUVUW\YZXZWZY\\[[Z\\\]]\\]_]^_`^Z^^^]^]\]^]^^^][^]^_]_^_]_^^\]`^`]]]^]]]_^`_^]``]]_\_]^]\^_a_]__^_^_`__^_`^`^]]_\[\[[[Y[WXZWB_��zwwxz{}zyy{{{{zzz{yc4P^giihihhihhegffgedehggfiiijkkklnmkmoqoonmmoopooomknonmonllllmmnmmllnmlnkijgfefffgfdedbec_]\]WVUWURLJDDCB@>>=<:::<<<<=>?BB>.����������������������������������������������������������������������������������ÿ����������������������������ƾ������������¿�����������������������������������������������������������ľ��ɿ���ȿ������������ƽ������������ƽ������������Ŀ�������Ļ�������������������ÿ�������ƽ���Ƚ��������������ſ�����������������������������nUGQW^`_a`a_YE_ZMH\cdghhiiihikijjiiijghiihghgfffgfeffffeddddddccb^``a___\\^__]^\]^^_]^^]\Z[[[[[\Z[[\ZYYXV[ZXYZ[Z[\Y[Z]YYYXVYVZZWW[WYYXXUVXWXXZUWYXUXXYZYYXUWWVVWWUTWUUVUSTTSSRRPORPPPONONONLOMNLLMKKIKKIMIJJKJHJKJHIIHIGHGGGGGHFGFFFEEEEECDCCCBBBCDEBBADDABA@A@@AAAACDCCBCBABAACA@?>>?>>??@AAAA@?>@>@AA@>====<<=?=;:;>==<>=????>=>?>??@@@@@@A?@@@@A@ABAAA@BCACDCBEFFEEEFEEEFGFGHJHHIFFGHGHGGHIHJHHIJIHHGGHJGHHEFFFGFFGIGEEEFEEFFDCDCACCABBB@CA@A@@??>??=;=>=<<=<:<;;;<;99:::;;;9999<:::9:==<<:;;:878887699:98998:;::98:767878776677767657766545466775443543321/.00/-//-.////01002432456543576666778;::::998878776757866565566787898887:998:999:98799989877887799789:9;:;;<<>=:<;<=<=>>>>?A>@B@BCBEHFDDFFHHHFILJMNNMOQOOPOPSRTVVVWUVWYVWWYVXXYXYZZZY\[Z\\[^]Z\Z[][\[[[[]\\]]]\\[\\[\[[^\\[\\\^][]]^[^_^\]^]^]\[[\[[[Z[YWUYUTTQBe��zxvxz{}|||}||{{{z{xc6Pagjihhihggijhhhhhiggfggfefgegghhjjijjjjkjkhikjghffghhikjhgggffiiihhhhgfedfeeefhhgfggffggeceaa`_^]]ZYXTPNMNMNIIIEEDC@@@=@B@;75/-)������������������������������������������������������¾�������ÿ���þ�������Ž������������ž�������������������Ŀ���������������������������������ſ���������������������������������ǿ�������û�����������������������������������������������������������ʿ���������������������Ǿ���Ž��ɽ���ǽ�������ɽ�þ�������������������������lULSW^``a`aa\LR_TEYceghghikhhkjjjjiljjiiiihhfdefeffdcdefffffdfbcda``a^^\\\]^_\^]^]]^]][\YYZZZZZ[ZZYXZXVXX\YXWXYZXWY[ZZ[XYXWZWTUWUVUSVUVSUUUTRSSUVUSUSVXWVUUUTTUUSTTUVUSRRSSSSQRQPQQSSTTPPPPQSPPPOMNNMNONONOMLMLMNMLKKLIKJHJJKJIHJJJJGHIIFGFFGFDDEEFEDEDEECCCCC@BBBBBCCCDBDDABCDBA@A@?AAB@ACA@BB@??AAABBAA@@A@>>>@@>=<=<==<===>=<>>><=?@@@>==>=???>?=>>>==?>>>>??=?@A@??A@ABBBBCDDBBDCBCDDEFEGEEGFFHGFEGGGHGEEGIHEGHGFFFFHGEEFFGGGEDFFEEEGFDCGFEEHGFDDEDEEFDBBBBCBB@>@@>>>@>@???>>=>??>>>=>?=<:99:;;:887776888677888:;889:79:9:78778878655665434355552223310/..+))(&('%%),,-./0111223224542442355553555453442331101123200/.../1422343324444545543345443542364355564665887767:978669877::778:::9:;=<;;;==;:=<<=>==:??@>@B@@@@BCCCDEEGEEGGHJHIKNLLLNPOQRUUTVTRRSSTWWWWVWVXZZYXZZZXWWYYXVYYYXYZ[\[[Z\\[[XZYZ\\[\\[][ZZYXXWXWWUUOJ=m��{xwy{{|{{|}}{zzyyy{`4Uagihhghhggijhhijiihghihhihhhiggfiiihjjjjikeghfcdca`baaeed_ddcdededccdfgddegefffffffgfdefadda^\`_`_^\\[[XYXVYTSTUTUTSPMNNLMOOPRRR����������������������������������������¿������������¿���þ��¿���¿�������Ľ���ſ���½���������������������ƾ������������Ŀ���������������������ɼ�������μ���ǿ������������Ƽ��������������������������ſ��������������������������ȿ������������������������������������������ȿ���û��̾���ʾ�������Ǻ��ø������������������������kTLOV^``aaa^\UF\ZQVbeeeeiiggghjhilkkihigijhggggffgfeegdceeffedbdcbba`^_```_^`^^^_]]^]\\\\[\][ZZ\[ZYY[[XY[YVYXZ[XYWYYY[XXWUWVVVVTUVUSTTUOQQQRPONRQQORNMRRONMOLOJIMKHKNMKJLKJJIJNKKIHJJIIHJJJKPMMMNJNPMONLMNLLLLNNLMMMMOLLLKMKLLMKKKKJIJKKIIKHIIIKKHIGHGFFGEEDEECDDDCACBBDCBCBBAABCDAA@@ABABBACCBAAAAAADCCB@BCBAAA@EAA?@?????A@BAA??@>>B@@AA@@?>???>>>>>==>A>>@?@@??>???>???A?>>@>>==>>@@?@?@?BCBCCBCBBBEDEEBCBDEDEFFDDDCDDCBBCBDCDEADFCDDECCDDEEEECBCDFDDEEFDEECCDCECECEFBDBDBBBC@?ADCBA@?@@@@??@>>====<<:9::988778;;88::78888:98888656775656654454454334421/.-*&$$##"!#'*++,-/100./0//02222202422113333422103211212100-///0./020/22/012002100//000/.0033210/1211233325544445443533445556456764568867767777787697589999:86578879889:899:9;:89;;9;=>@@@>ABAEDCDEGHKJJGJIOOPOQPQQSSTRRSSUUTTVUVXTUVXYXXVYYYYXXXVVXXXXVUUVVVVUWPG<o��xvuw|{{ywz~~||{y{{{\2K^egghiiihijihfeffhggghghhhghhgffghggjkiigigfegdebbddbccdehffegfcddbdddeedcffgfeeecddbbdedbaa_[][`[]][\[[[ZXVXZZXWVXXXXVVWVWYZWY[������������������������������������������������������������������Ŀ��¾����������ƿ���ľ������������������������������ſ���¿�������Ǿ������������ɽ������������Ǽ������������ǿ������������ƾ���þ�������Ž������������ǿ������������ʼ���������������������������������������������������̾������������Ƽ��Ĺ������������������������mVMQZadegfeeb`LQ^VRejjklmmmmmmnkmnjjlkkhjkkijihgegggegedeeececdcddbbbaa`_a`_a_^\]^^``]]`^^^^^]]\\]Y\\^]\]^]\[\[\ZY[\\[Z\[YXZYZXXWWVUWWWXWWVVUVSWUTTSRUSUSRPPOOOOMLLLOMKIKFDBEGHEE==9:<><<?BDCBEFGHEJFCGGFGGHIGKIEHJIIJHIIHKIHJJJIJJHHHJKIHIGIHHJJHIGGGHFFFGGFCCCDDCBBCDCCBDCCCCDCCC@ECECCBBDCCCCCCCCCDBBDBBAACCAADBCCB@@AABDCBDCBBB@?BAAACBB@@AA@???>>@@?@AAAAABAB@@BBBA@?@@?@@?@@@?=>?=>@??AA@?@BACBABA?@??@A@@AA@@BCABB@?@?@?@@A?AA@@@A@@A@AA@AAACB?A@B@@@AB?>=<@>>==?==?@@??>><<>?>>@>?>===>>>=;=?=>>=??>>><<>?>=>>>=;<<<<==<;;;889998998865543234443432220.+))))))+-..-./0210101100102201320011112//01//12100110/0.//0010011001012210/000/./20/01010/12/00000123211022332///020001311133311035334315432344434454322343335323453444345412553344576557856665:;::;::=>==>ABA@=@CBEEEHGHGHHIONMNQRSQNPPQSRSUUTSSSUUSSSTUTSRPG@w��ywww{|||{||}{{}yz}{`4FU`dggghhffiffgggfffgghghffhgggdfffedggfgggfecedfccfdaaceefddcfddddcdefgeedeeccedbdfdbacdb`a_``_^__\^^\Y\\][[ZZZ[YVXYXYZYYYZ\[Z]Z������������������������������������������¾������������½���º���þ��ſ�������Ŀ����������������������������������������������������ƾ������������ſ������������Ƽ��������������������������ľ������������ǿ������������Ǿ������������ǽ������������Ż���������������������ɿ�������ǿ�����ſ�ƿ���������ÿ����������������������������~lUNPTijjjjiiii\DZWJhpopprssrttustqrrrrrsrqqqqoopooonmmmlllkiiikieiggegfccccbb_a`]a_`b`aa^^^`^\\]^^ZY\[[\\]\[^\Z\[XZ^^Z[\ZZY[Y[XZYXWVWYVWXYYXUVTTUUVYUVVTTTSUQRQSTQQQOPQQOPOPNLLKMIGGHDDBEEGGGHHIJIIIGEIIKJFGJHGFHKJHHHGFHGGFDFGEDFGFEDDEFCDCDEDFEEEEEDFEEFEEGECCDCBBB@ABDACDCCCDEEEDFEGEEDDEEDEEDDCDDCBCEECEDCDBBCDEAACDCCCDDCCDBCBAAADCACA@?AAA@??A@@??A?@A@@@><>??@ABAA@@AAAAA@@AB?????@@?@@@@@AAA?A@AB??>??@?@@@@??@?>@?@>?>?===><=>==>?=<=<;:::<<:6:::89999999;8879889;:99;::98:::7989979::::88:;:;:99<;;;:;;;;:;<<;<;;;<;;;;;<;::::;<;:;99897766410///..0.+*))))*+-/1/001110213433234423332011111100//112///001/./0/011/0211112332210..0/012/00/././10/0/0/0121///-./0.-.0/-/0/,,.////.0-./0100///10./0/020/0100.-..---./0//0./.--../././//012101154342235443554544466548765997578:9:=;;;==?>>BBBBCGFHGHIIKJNMMNONOQQOTQR���{xwx{}|{{{{~~}}z{}z`9DGW\dcfffifgggggffeedfgfhhgfcghegffggfggefgeffdcefffeddddeddedfeddfdccedbcdbbacacdeeca_cccadcba`_`_]``^]^][[[YYYZXWXX[[[ZZY[][\][���������������������������������þ�������ÿ���¾�������ý���ü������������Ǿ��ƿ�������������ÿ�������������������������������������ü�����ž����������������������������������������������������������������������ž�ľǿ���������������������������������������������ý����������������ǻ���ž������������ü��������������������������nYPQVtokhhgffhcBLXKZmnpqprsrututttttttrttssrtrrrrrpsrrrppqrrpmnolmnmknnlklkkkikjgihiifegededefdedfdcbdbbaa_^`a`^^Z^^]]ZZ[YZ[Z[XX[ZZXYZWVWYYXUVUUWTUWVUVQUTQRQSRRSQQRPOOPOPPOONNOPMNNMKJLOMMNQMKNNPMNLNQOOOONONJKMLLMKKJLKJJJIIGHHGFGGFEEFFGDCDDFDCAAACDDABA@BABBCBAA@AA@CABCABFBBCDCDECCDCCDGCDDCCCBBCDDECBCBBCDBDEDCBEFDCCDFDEEDECDCBEEBBCBCDCCC@=?@AAABBAA@?A?>?BBBA@BB@@BA?@?@@ABB@@@??AAAAAA??????@?>??>>???>??>????@??<===<<>>>==>><;==<<<;;;999:99::99987987666643235555434334425525668765687777976676567655787787789865766655665566678878987766421/.---++,**)(*,1112324343333422344442343222322201/133311/01111121120022122101222011/00010/1/02100//00011131/0-/..-/.,-/..-/.,-..-..-,-..-../+,.--.-,,...-+---*((('&'')(())))&%%')***+,,--/../..0/0//./10121221222112133223222432554454667777:989:;;=<?ACBAAC@AFDIOKRZm���{zxy{}}z{{}~||z{|{i6HFGRW[_a``cccedddcceedeeeefghhfegggffecddcbdffdacddfeeccdeccedf]_cedccecbbbadaabcddecaacdcc^^cbdeededbaaaaa]]\][XX[YXY\[\ZXY\\[Z[������������������������¼�������¾������������¿������������¾��������¿�������������������������ƿ��������������������������ɿ������������ƿ���Ŀ�������������������������������������������������Ƽ����������������������������������������ƿ������������������������ž���������������������Ŀ���������������������������������������nYNQV�rebbbb_^ZF?WPKijkmmonoqppqrrrrqppprrpopoppponponnppnnnplnmmmmlmppnjllikjjjjkkkjkkjiikgikhgkihhhgfgffggfgfeedcdddddcb`_``aca]]_^`_[[\\\[YY[YWXYWXVUUXTTTTTSRQOROKLNNNONMMPNMKLLJKMMNMLNNNPOOMONMMONNMNKLLLMMKKLMKJKLJJJLJJHIHHJHIHIIIGFFFFEEDCABDDDCCDBFFDCCBBDCB@ACACDCCEBCABABEDCCBCDDBBCCCACB@ABAAA@@?==>@>><>>?@>@@?@AB@ACA@BCBA@ADEBBCA@A@?BB@ACBAA?ACBBBCBAABBAAABCA?BCDBBBBA@BBBB@?A@@@>BBAA?>???>?=>>><?>==>??;==>;<;<=<<:;:9:<;;<:;=;::<==<=;99;;<;979898875554321//.--..//12443535676577655676314223344466684445555444544543355445432222230/00./-././/023214555466555453365345544454433322233433425346755336665544443334234431111111021013322211001110//..0//..-/.-./....--,,,.,,+,,,+*,+*,*+++**+)))(%%$"!!"#$$&$%&&'&((*()*)++*+**+---,---,.//..0./001/000.0001000/000/2310123544465799889:?><>;8=FKUXfn���zwyyz~~{{{}}z{}{z}vg6BJJJMQRVZYZ]^^a__^_^`ac`aa`cca`bfeefdddddcdddedbbbbdccdddddbadc`bbbbcbcbaaacda`ccdcdeccbdcbddddeddcccb_`a`a_```^[[[YZZYYYY[YYZZZZ�������������������������������������������������¿�������������������������������������������������������������������������������������������������������������������������������������������������ǽ�����������������ſ�������ľ������������Ƚ��������������������������þ�������ƾ�������ƾ�����ƿ�����ƻ�ü�������������������������lZOOU�}kaddaa[WJATUF^ghjijjjjjjkkjilljkllkkklkjjkkjjjijjiihfheggghggllkieffffeffhfeggihhihjjjliiihighhhighhhhihhffehiggiighfeedfedfgfegdbbccbdababbacba``_^^^^]Y[ZZ\ZWWVTVVRRSTRRPQNOPQPMOPQPQQMMOPNMNNJMKMNLLILHJKLLKJJJJHJJIIGIHGGEHIHHGGEFFGEFEBACCCDDBCBDDD@CEDDBC@BC@BCCACBFDBCCECCCCBDCCCA@A@BBAAADA@@A@?>==<:867899<;::<<<???==>@A?@?@@@?@?@?@>>>=AB>@?AA?@BC@AA@B@ABCBA@ABCBBB@ABABBBCAA@BBBBBABBAA@??@@@@?AA@@=====>=>>=><<?>=<<<;:::::;;;:;<;<<;<<:9::::79::8999773210/--,,-..023565886568976767::632122434545467776735544411/2223433101221/0321221/110001222111232202233334214521233124321214435434434466776567777755676555778876767543435633544334412021211210111000.//---.--++,-/.-,-,-.--,+,,+-,+*+))''$$#! !#%%%&%%&((((&'&&'&'''(''(')('))''*))))**++,-*+,,*,,-/-,+,--,,-../.--////2133223129:<?HIOU]pqtw���|zyzz}~y|~||}}|{}yg2598<=AABBEIKLLNPRTVUWY\[\][_ac_``^^`aa_^__aa_bc_`cb``bcdadccbcaaabcbdbcabcbbbea```b`edecehgigeceedcba`__`_`^__`a_^_^[ZZXYZZWZW[Z[�������������������������������½����������������ſ��������þ��ÿ���ÿ����������������ƿ���Ǿ�������������������������������������������������������������������������������������������������������ǿ�����������������ƽ���������������������ɼ������������ƽ������������ü�������ɾ�������ǿ���ǽ�������Ȼ�����������������������������mVNSX����tqo`P>4FL=Ncfedefihhiihgehgfiihggffgdcegfddcbdddebcdfb`dedgfcbaaa`__`ab`^`]__aabaa`babbabc``aaaaaacacbcccccbbddbb`adccbdedfbbbabbbbecaddedeccbbdaaab`_`da__^^]]\^\Z\[\Z[]Z[ZYXYVX[YXZ[WWXVUVUSSSSURTQOPPQPPPNLLKLLLLKKLLKHHIIIHFGHIFEFFEDDCDCDDAAABABC@>?>@@@@><>@@>@A?>?@@@?A?>A@>?>>=><<=<;:;::9985655530...//26568889<==>?>????AA@@A>>@>=<=?A@>>>?><=?A@A@>A?A@@@@AA@@@A@@A@??@ABA@?CCBB?@AAA@>?BCAA@??>@A??@?A?>=?>=?<=>==>><<;=<;=<<<:;;===<=<;9:9:99<;999967656521112336668989:::98998888899:867689778775786778765545230212444431210210210/0/./011002212331012110113331123221////1//11/0200001101/13322223333333554244454354555565764665556555554435534443323322101.01./0----00/-..-.,.-./....---,,**)''('&&%'&'('((''(*(%''&(&&&$%%$&%%&&%%%&&&&'''&'()((()+*)())*)(((((()*)*)**++++,--+,,,*&#*@JKHHT_dmwxwz���{xwwz||||~}}|z{{{xe40011324454::899;<<:=???@CFHFGGKKJSTTVUUZ\[_`Y\\]__]_\[Z]^`]Z^_`a_```[\a_a```aeacacdcefgfhhghgfefdededddbddcb___^^]a`^_]^]\ZXXYYYW�������������������������������ý������������������������������Ŀ���½�����������������¾����������������������������������������������������������ǿ�������������������������������Ŀ���������������������������������ƾƿ������������ƽ���������������������������������þ������������»��ǿ���ȿ�������ȿ��ĺ�������������������������mYQQU�����~zoT8)D<:cghfgijihhhiihihhijhgggfeeefgeeddeecddadbcbbbabcbaba_a_`a````__a`_^\^_`_]]\[_]]\^]^__^\^\^]]_`]\]]]\\[\_[\^\^^\\\\[[]][]^\__^____^`a_```b`]^a^^_`]__^\[]^]^\]^[\\\Z\[[[Z[\ZZY[ZX[XYYZYZXXXXWYYWWXWUSRRUVSSTSSRQOMMKLJIJGECBELMKGDCDCCCEFGIFFGFFEDHEDCEEECCEDDCDDABBA>@A?@A@??>=<;;9:697655310..,,+*+,/0144545889;::<<??@@@@A@?>==>>>>?=>?A@?A@>>@?=@>>?@@BA@>>@A>>>=>@?=>>@>??A>@?????>=>A?>?===>??=?????>@?<==<==><>>>;<=;<;:;:;<=;:<;;<:::;;;<::<<<9:<;:867689898:9:988:9899:9887989:9897899899768774676786534544323444221110210/../0//..1210111123221223112321111210000//21022101/.//...,.//1//0./0/01//0111/00/021/0110113322122223323221233232332334421232232320330011121/1001/./0//////..-,+*,,+,,,+,,**++****(((**(*((''&(&&%#%%&'%%&&%&%%&%%&%%'&&$%&&'&&&'&&'(&%&(&&(('&''$$%(%#!0X_[YT[bgrz}{}����{zxx||}{y|}~~}|{{|xb801100//212132233343455665587799:9;;>AABB@DDEEJNPPRVYXXSUVY\[_]`][\_abdb`bbaabcacbddchghdgjffgffhffhheffceeeebb`]``]]]^^__^^\[Z[YZ����������������������������������������������������������������������������������������������������������������������������������������������������������������������Ž�������������������������������������������������ƾ�������ſ���ǽ���������������������ſ������������������������½Ƚȿ�������������������������������������������p[STR�������|sV7 12,Unjimkklmoolmnmnnpmmljjkjjijjighhgggehdedccbbaabaaaa_```^`a``acbbccbaaaaa`__``_``_`__^^\\]^]]^]^]]\[]\Z\^[[\[Y\\[ZXZZYZZYXWVXXWWYYZ[[Z[ZX\\W\\ZZ\Z[[ZZYZZXZ[ZZZZ[Z[]ZYYYXYXZYZWWYYZZYXYYXXXYXZXXXYYXXYXXVVUXUQPTSSOQPMKLJJFIMMLKHHJEIJILMLMNNPNNPNMMMNOPLNMMNLLKMIKLHFIHGGGFGEEDDCCBA@?>=::64444432234467788999;::::9<<;<<<=:<;;:9<=;<<=<=>>????><=?=;;<=?>?>?@@?A@>>A?>??@????==>>>????=>=>>=<<>>=====><==<=<<<=<:<<<=<;;:886988899889999;:;9;;:9;<;;;;;;;::9:;:;:8::9;:9::;:::9:9899;9868;8778887867674679774453346434433410231200032010013102222222211221233421223202/11121211031332/.//,,.-.---.0.--/./-..../..//..//-/..//-10/0..../.-../10-/0..//../..01001//./241/0021101111211221100/00/0.-.///0///0.--.,.,+***,,+*+('())'''%'''''&&%%%&&%$&&$#$%%#"$#$$#"$#"##$##$"$$"""#$"""%##%5^aada`bmw���}����yzxw{|}{yx}}|}{yz|x_:3221020100110011122210212134402445444656677887<=;=<>==ABEECDHIOKJNRVYTVXZ[[YZ_a]a^c_baabcfbacddgdfcbaeba`b`__``^^`^^]^]^^]_^^^\^\��������������������������������������½��ÿ���ſ�����������������¾�������Ž���¿������������������������������������������������������������������������������������¿���������������������ƿ��������������������������ȿ�������������������������������������������������ȿ���ſ�������ǹ�����������������»��������������������������mUOTU�������u_?+(0)$Bcghkjkmllmmmmmooolonklnllkkkjkkihiihgihfefddcdccceadddecbcbdccdddccdcbcccdddcceddeecdcbaacba``a`__^]^[[\Y[\ZY[\\\\[]\[ZZ[[\Z[YYYZXZZYZYXZZYYZXXZVVVUVRVVVVVVVVWWUWWYXUVVXXXWWUVXWVWWWXXXXXZXXZ[YXWXXXXXVTUYWUTVUTQPQQPQQSPRPPPNORTQRSPPPQRTSURSPPPPRRQQQQQQQRRQTPNOPONOOOONMLLLLKJLNLJLKFIGGEEDCCEA@AACBBCCAABBBBBBA@?BAAA@?>>@?=>===??=>?@==>>==<<;:<<;=::<>>>=;:<=<<<<<>==?=>=;=>>==?>=>=;<=;:;;<<<<;;:;;<;;<;;:;<;<;;::;98:8879899999:=;::::9988;;:::<<<99:::;98889;98:9988:8987899997988878777878785798676876435455444433343233343223332222222122321232432111222001101101323124320/01/++-+-..-/10../,.-//..-,/.--...//../...../-,+,+,--././/.-+--,,,,--,,--+,----.-.----./.---.-./.,-.//////.......,..//...--.-,,-..,,,+*+,*****)))('(''(()'&&&&%##%##""""$#""""##"!"##" !###$$#"#!#4`effddhr{�������yzxw{|}{z{||z~|zy|v[942332323533322342133312421333234322134355664457556554675568767768;;;<<>ACDHJIKMNJMQRPTUUUV]ZZb_^__^ZZ[YYXXZYY[\[\^[\]^]b^][^^^_^^��������������������������������������þ�������ƾ������������Ŀ���ý���¾��ſ�����������������������������������ɾ��������������������������������������������������������������������ʿ�����������������Ŀ�������Ž�������������������������������������������������������������ž�������Ž���������������������������������������������lXQXZ��������t_=*NP<>H\dijjkjlkkljjjkmlmnlkknlllkkjiihhgghgfdfeeccdcccccabcdccb``edabaceeeddcabbcebbcdccbceedeccddfdbda```_^^__]\]\\\[\\\\][]][]^]\[ZZZY[Z\ZYYWYYY[ZWWWVVTUTTVUSTTSPSTRRQPRTRTTTTSTTSSTRSSUUTTUVUUTUVVVWVTUTUUVUWSSSSTRSQQPNOPPOSRRRQPRQQQRQRRQORRSNQRQPTRRUQRRSRPRRSTQQQNNPPOOOPQNOPONPQNMLOOMNNOMKKLLNLJJKJJKKKJHIIGJJIKKJIIJHGIJFGGGGGFEFFFFGFEDDDDDCDDCAA@AA?@@??>=>@@@??@>??>==>=;<==<;<:;;;:9;98;<<:;:99:;;:9;;:<;::9998<:;;;:98999:::::;<;;;:;<=;8:;::;;;<:8::;;8:88997777686557676546635566554434344545667766544344345555434545543344344433435343313322333201122221.0000//./01/02/0/0///.,.-.///0.//./.-,..-.,,/./.-/0.0/0/...-00/..../.---.-.--,-,,,++++++,,*,-,,+-+,,-++-.+)**++++,*+*+++--*,++,++,+*+,+++++*+)))+***))*))+)*++***+*(*))*+*))((((''''&$$$$&%$$%$#$#!$((&#$$$#$# 4bcghgghoy�������yyvw|}{zyy}}{~|zz|vY71101/0/021112223322434442244545542313456544434333220010//0//010.00//00110110025367788<9:<@@CDBDKCCEIJLMMKLLMNNTSRSTWWXX[Y\]\ZZYZZ��������������������������������������¾������������������������������������������������������������������������ʽ����������������������������������������ȿ���Ľ�������Ǿ�������ȿ���ʿ������������ƿ���Ż�������ɿ�����������������ſ���������������������������������������������������Ƽ���������������������������������������������lZVZX��������u`GW��lkhjfdikkhikmidiijkjkolhjkijiiigiihgfhhghefccbcccccbcababccbbccbbcbdddeefdedbcedeedcdbcddbfdcdcbaaaabba`^__^][[]\Z[\[[Z^]^]]]][\]Z[Z[[Z[YX[Z\\XYZZZXWWUWUVWVUVVUSTURSQPTTQSSQPOQQPNMMOOPQQOPRPPQRRRSUVUSSTSSSTTUTTTUVTSQPQPOOPNNQRSSOOORRRTUTTTUQSSURRRQRSSRRQPRPOQPRROOOQPPOPONSPOMOPNPNNNNNPNMLLONNOMNMMMMKKKMKKJJLLLKKLKMKMJKIKJJJJJILJHIJJJHHIHIHIJIIFFIJHIKJHEDGHGFFEEDDDDEDCCBCBABACB@??>>>===>?><<=;<<<<;;;==<::;;<<<89:99:<=<;;::;:;;<;:9:<;;9:::;:::<;::9::8:::99898888999888656786433343244332222334433211211000231113433423223223454332336543331223321111112001210/0/00/.00/.//..-...,...--..-.-.++,,--+,--.-,-/--.0///./0//00..0////.--.-./-----,,+-,+++++,+,**,+**++*++)))((***)))+,*)***+)()*+))+()**'&&'&'&&%'''%&&&&'&&&&(&%&&&&'&&'''('%'((&%&%&&&%%%$#%%&)+*(&%###" !#'Bdgghhghiqz|~���|ywwz|}|z{}}|{}|{z{xW50/.-,,-...-/.,/0.-.00//0//12432444222466654421221/////,)))()*+*(*+)*)***))**()*)**(('(&(),+,,.//1224445;<?>=?A@@BCFHJLLLPVSSUSTUV�������������������������������������������������¿���ľ���ü������������þ������������������������������������������������������������������������������������ž�������ƿ��������������������������ɿ���Ľ�������Ⱦ�����������������ƿ�������ʾ������������ȿ������������þ�������ǿ�����ľ�����˿ƽ������������������������������������nWU^b��������x`Qyʼ����������~��{x�~�vvtyulvpnohkpgimigfacfdca]\a\ad__`_]]_Zab``^[ac_aca__`cbcddbedcccdeccccbabbbaa`_b``_^ZZZXWWUSUTSVXXXX]\\YZ\[[]]\^YY[Z]\ZZ\\ZXZZZZYWWVVUVVTUUWVTUVWUUTUUQSRQRPQQRRRQRNPRSOPQSRQPPNRSSQPNNQPQRRRRPOSSRQQRQPRPPMLLKJLMMMOOPRTTSSSPPQNRNOMOOONNNNONKMRMNMPNNNONLLMOPRLPNMONNNNNNMNMLOPONLOLLLLKKLJIJKLLNKLJLLMMKJKKKIJJJKKLKJIKKJJIJIIILKJJJIJKKGJKKIHHHIHGFHJIHIIHIFGIGHHEGGDFEFEDEFDCCDDCCA@AAA@ABABA??A@@?@??>>=;=?@><======<<::;;9:;;::;:::<:98999999::8876778:9988876788765556766444345454435532113111014211334311232232324332324421532000011110101000101121./0/,/1//.,./-...-00.----,-,--,-,--,..--,--.,-+-,.,--.,--.--+++,-,,,,+,,+-,+++,++++,,+**++,*+)*)))(**''))'()***))*)**)**()**)()(()*''&'%&&%&%%&$$$##$"!!"#$""$####"#####$$$#!$#""###$#"$$%%&&'&%%$""""#%'*+,1Iehijiidhkwy}~���~{zxz{||yz}}|}~{yz|vT5//-*,+,,----,+-,-/0/.-...0/..-./10/0/24333111/-,,,,,+*(%$#&$%''&&&&'''(('%''&%%#$%#$"#""#$$##$%&')),,-.12246655678;=::<?ADFEEEGHH�����������������½����������������¿��������ľ���½��ƽ���Ļ�������ÿ���Ž�������ƽ���¿��ſ��������ÿ�������������������������������������������������������������������������������������������������������������������������������������������������������������������ž�������ǿ���ſ��ɿ���Ǿ��������������������������������������oXW`Z��������x^S����������¾�������������������������������������vy}�srsprlkoplkkipjfjih_ade`Zab`^`_b_]a_``\]_]\]aZ_XYS[SKED@DFCFCDDHMMPQTVVYWZ[[[\]][[[]^_^^]\\[]^\\][\][YYXZZWWWXXVWWVWVVVTUYVTRSSUUTSRSQRTTRSTSSSSRTSSSQQNQQQQPRSQPQPONOPPOROOLLJKKKLLMLNOOPNNMMMLJKJGJJKJJJHHIJJFFBCEDGC?BBA?ACC?@BCDDDEGIHGHIIJJIJIJHJJJILKJKJLKLKKMLNOMLLJKLLKMNLKMLLLMNKJJJJJJIHIJKIIJJIJKJJJKJGHIIHHIJJJHHIHIHHIIIHGGIFHIGFGFFFGGGFGFFFHGEDFHEEGECDFEDFECBCDCBABBBBCBABA@@@???>>>?==>===><<<=<<=<<=;<:;:9::99:9:9997777875679776576564775445543444543453333255323443432221113431113221100/0/1/0///0//1000.,.0.-.//.../-.----.--,-.-,.--.-.----../...-,--..-..--..,-...,++.++,--,++*,+,++++***(**(*)))(*(((''()(&')((*)))))()'(('((()))('('((&%%&&%%&&&$$%#"#"##"""##"#$$##"!!""" !!"! !!!!##$$##"! "#$" "##%'*...0/-/Fghjkhjdfnyz|}���}zyyxz{|{y{|~}�~{{}xS8--,,-,*,*+,---.,,---.----//.,-.,-./../00///.0/.,--,,+++))((*)*)*('&''&''&&&%%#$%$#$#""!#""!!  "%%&%(())+,-/132223357656776667868:�����������������¾��������������������������ÿ���¾��ľ���ý���ÿ��Ŀ���ſ������������þ�������ž��������������������������Ľ������������Ŀ�������ž������������̾���ƽ�������������������������������������������������ɿ�������ʾ��������������������������������������ÿ�������ɿ�������ȿ���ǿ��������������������������������������nXZ``��������y`Q������������������������������������������������������������������������������������������}���z{{{tsxkpaa`^WY[UWPLJPUSXVVVV[[WW]Z\YWY[[Z[\ZW[X]ZYYXYW[[WWVYYWYWZXYYTWXUWWUWWXYXWVWVWUUVWWVUWUUUTRSSSSSTSPPQRQQQRQSSSOQPPOOPPNPNMMLMNNLLKLLKIKKLKKLLJJKLJIJJKMIJKJJKIIIIJIJGEFDGHECEFEFHFA?>@BECEFFEBEDDHBHGGGIFEFHHFGGFGHHIJJKJNMLKMKKIIMLLLIHKLMLJKJJKKKKLKLLKJJKKJJJKJJIJJIJIHHHHJIIIHGHIEGGHGGGHHHHFGGGGGEEFHFFGHGFFFEEGEEDFFEFEEEEEDCDDDBDDDCDCDEEBBCB@@ABA@@@>>?AA?@@=>@?>??>=<>@?<<==:;<<;9:9:::::9878899975764455566655434323433443433322110233001000111000/0//00/1./21/0..00/0//1/00/.-,--/.-,,..,,-..--,,-.---.//.-./-..-,-....-/.//,--.,,--,-+*+-,..-++***()+***)(()''&&&&''%&'(((((((('(%%&&&%&''&'%%&&%$$#$%#$$#"#$"!!!!!!"!"""!"#"!#"!!!!!   !  !"%$#$#    !##%)./342/-,+*)+Gmiiiikfgoy{|}���}zzxyz{zzyz{}|zzyvU2+-*++,+*+,+,,+***++++**,,..-,./.---,..-....-.--,---./00/.-,,-/-.-,+,,-++**(&''((&&'''%&&#"""""###%&'')''()*+----0/120111211/0/012��������������������������������������¿������������������������������������������������������������������������������������������������������������������������������ľ���������������������̿����������������������������������������Ϳ������������ȼ�������������������������������������������������ɿ���ƻ��������������������������oTYc^��������ybT����Ī����������������������������������������������������������������������������������������������������������������������������~��tuxyprrrqjkhkggkgfacf``]_`a^UY[WW[PVW[XXRRRXQUTURRRTVUTWRPVUURTUWVTUTTRUTTQRRSRRRRRTSRRQPSQPOPNNOOOMNOLONLLNONMNNMKJLNIMLLNKJLLLKLKKIKKKMJIKLKMKJJKLLKKKLJIHGFFGIHIHHHGEDCEEDDDFFEECCDIGHJJKJJIIHJJJJJLMMOOKLMKLLKJMMLJIJIKKJKJKJJIIJJJHIIIJIJHIIHGIIIIGFHHGJHHGIHFHFGGHIIGGIHGGHFGIFGGGGFEEEDEFGFFEFFDDDDDCEEECDCBABCBBDCDBBBBACB@@AAAAA@@BBAAA?>@AA?>@@??<>?@?==<<==<>=;:;;9888:9788997655333332223344323132333221222322001/032000111///00000/00/01/0/./..,.--./.-------,+,--,-/.-,,,,+,*+*+,,,+,,-.-,...--...,,,-----,+-,-,+*,+,+))+**)(())()*(())'((&'&'''&'&&%%&%$$$%###""!""""  "!  ! !"!!"      !   ##$"""! $*-/235310.*(')'(,Ejlkkmkhir{{~}���~zz{zy|{{z}~}}~}xy{uR.,,*+**)()(*)**()()**)))**+++,-,++,+*,-,,-,,,-*+-,./......-,,-.//-..0/.-,-,,,.-+,,-,.-+**('(')((((()*)*)))(()*)**-,,,++*,,,,--,,-.��������������������������������������¾�������ý��������ý��½���¿��þ���ľ�������ǿ���ǿ���������������������ɿ���ſ�������Ǿ�����������������ž�������������������������������������������������������������������������������������������������������������������������ȿ������������ǽ�����������������ú��������������������������qWWa^��������zcR�����������������������������������������������������������������������������������������������������������������������������������������������������������������������}x��ywy|xrxuvnmklkfjihecbeebb`a_]Y][YWZ[WUVSRVRPSSPRPNQPTNTQQPMQNNQPRQQQPQPPPPOMQOMPPPOOONMONOMNNMNMONMMMMLLLKNLKKLMLKKMMKKJJIIIIIIIHFHGIGEGFFEEEGGFEDGGGGGHIIIJJIIJKIKIHIKJKIIJIIJJJIGGHFFEGIGHJIIIIIGFGIJGHHJHJHGIJJIGIJHGHGGFFHIJHGGGIHFFFEFFFEFGFEEFEGFFFFFFFFEEFEEGFFEEEDDEECCDDCCDBDBCCBCBBCBCBCA@?@BDBBBBA@ABA?@BA?@@BA@@?>=>>>>A?=>?><:::::866742/0/-,+,+))**+-/0244555544414345332231/020011220///..///0/000.20/11/--,-.0/.-,+**,,+,+++*+++***)()))''(*))*,+-,*,--,+-,-,,+,--..-,---,,+,+,+**,********+)()))'))((())((('&''''''&%%$%$#"""#"! !!  !       !  !(03322/-*+*(''&((*,Ghljklkjjt{w}~���|zz{yz||z{}}|}|{||vN.+*)(*))(')*)))('''((())('((()*)()++(*+*****++(*,)+--,,-,---,,---,.-----,,,-./,,-..-..-,,,,,,--,+,,--+,+*(***+)*,+,*)(**++*)+***,*��������������������������������������½���������������������ÿ�������ž���Ŀ������������ƿ���Ŀ���������������������ȼ�������ʾ���ž������������Ǿ�������������������������������������������������ʿ��������������������������ƾ��������������������������������������������������������ƿ�����������������¾��������������������������rVY`b��������|cS�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���}�tywuynnhokmedfcc]_a`]X]Y]ZWTWUQOTSQTMQLNOQMRPRLMRNJLKLLMMLMPPNNNOPPOOLLMMKLLKKLLJLNLKJKIKJJJJIHIKKIJIJIHIIIIHHIJJIHJIJIHHFJKHHJIHHHGGFFFFEEFFGHGFGJIHEFGFGGEEFEFFHDFGEEDCDCEDDCDBCDDEEDFFEFDCBBDEEEEEGEEEGFEGFHGEEEEDDEGFGFFDFDDDCCCCBCECBCDDCCCABDAACCBABBCABBBBAAA???AB@AAA@AAB?=?@>>@><=<<:986740-,*('&(''%#$%$$$$$)*-/256776667457775455643443232342002000010/0/000/.0/../---/.--,******)*)*+*++,+++***)('&(''***+*)*)+++,*+**))+*,-,,,+,+**,+**+++*))*,+))**)())('''&))(())'&'((()(('&%%%$$$####$$#!##!!!!"!       !  "#(141/,*)('&&'(*,//.3Qmkklkljjq|{|����{xzzxz{{{{{}~|}|||{uL1*(')((()(()((''(((())))(''&'))(')()(())((()**(()*++)++,*+,+++)*+*++,*++,,,++,+*,,,,,,,,+*+---++++,-,,--,+,++,,,,++++()+*++,+****)������������������������������������¾��������������������������¿���������������������������������������������������Ⱦ���ƿ�������ſ��������������������������ľ�������������������������������������������������ɻ���ſ�������Ⱥ�����������������¿���������������������ļ�������ȼ������������ƽ��������������������������������������pXZa`��������|cR�¼����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}����}}�y}t~uvsqnlkngghggf_dd_]_]]ZYUWYXXYUSONRJLNKMOOHMMGIJGJNJGLILMMMMKKIJJHIHGHGGGHHHJHFIIHGGHIHGFGHFGEEDEFFFGFEFGEDFGEFDCGFFEFEGDEEDDDDCDCEDBCCBAAAAAAAB@AAAA>>?@ABBABBECBDCBBDA=??>?@@A@A@ABDCCCCABBBA@@@?ABCCBA@@AB@@BBB@A@?>?@@@@=>??@?@??>><>=<<;<;:::8:878445421-*(''(()(('&&''&')(*/0134565555667877666756567668756656765455775657444444323331100/./-,-+*,+,---./-,--.-+*)'&&&)*(()))()))*)*()()*(*++,+***))))('))*)('())('()(('''''''''''((&'((&&('''&#$%&'%%$$###"!#$#! !       ! !!    !$%%&'&'+*''&'''&&&'+.0/,,/Nkllklmjis{||���{xzyy|||zyz}|||||{|xF.+))**(((('''(&'((*(((('&&'''((('('&'))(())('()(()**)***(*+*+*''*)'))'''))***((),+*++*((())*+))**)*++,,+****+,++*+**+++***+-,,+*()�������������������������������½���¼����������������Ŀ���Ŀ���½��ǽ������������ſ�������ſ�������������������������������������������������ü���Ž�������ɾ���������������������������������������������������������������������������������������������������þ��˾���Ž�������ȿ���Ž��˿���Ⱦ�����������ù�������������������������qYZba��������}dS�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������w|}zx}{}ssnommgfjifccfcc`bf^Xa\\ZZPQRQROPMPQRNKLFGKGGIHHGDFFFDEGFFHIIHIJGFFGFEEEECCEEEFFDEDECDECBBBCBABBABACCBBBACCBDBAA?>@A@A@A?>>>?>;<<98777899::;;==>?>@@?>?@@>=>??@@AA@@>>@???@@=??==>>>?><<?>>=<::;9998685555555856542332100.../010/./01100112355557789::8:9;:8:::;99898888769:88988777898998777788778877777655444111/0//0/.//00/..//,*-..-,,,+,+,+**+*)+***''()***(''(()('''(''(((('&''''%%%%%&&%$&'''%&'&&&&&&&%$$#%%####"""""""!  !#"  !     "%(+****)'%$!! !! !##$%&((('&&%&,Jinomkjkku|~|����}yyyy{{{|zz}}~}}|{|pJ.**+*)(('(*(&''(((((()('&&'('''())('()(()))))())**+()*)****(**)))*)*)'('))***)))*)(('&&&&&&%&&&&%'&'())*)('('(((')**)*)((*++**(()*���������������������������������������������ý���þ��Ŀ���ľ�������ǻ���þ������������¾���������������������Ŀ������������������������������Ž���ǽ�������̿���ɽ���ǽ��������������������������¿��ǿ��������ʿ��������������������������ɿ��ʾ������������������������¿������������ſ��ȿ���Ⱦ�����������Ĺ�������������������������nX[`a��������|`R����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{����x}|�xxuyqtvppkpmheffdb`d^`\`[YTWXXXURLSKKNJMIIGEFCAEDEFDACA?@ACBAA@BACACABBBB@BCA>@?@A@?>AA?><96433131/.,.0.134569;;:<<<=???>===?>==?>?=<<>====<<>>==>==<><<<;;<:9987889976677688676652334543544322125567667777654366666768999:;;;9::;;::::99:;:::9::88999:99:98898798877777888898776544343223211134323342223200121///./0/....-,,,,++,,,,*++)*)(()()'''('(''%&%&#%%%%%&%$%%%%$$%$"#"""!!"#"!!       ""!!"   !"'-145/+*'$#" !!"$&%&$###!!! !")Nkmnljigkt||}����yyyyxy{{{z}|}~}}|z{sG-+**)'((')+(&'())(&'()'&()))('''(*))**)()(())))**+**+))*+*)(****)*'()())(()())('&&&&%%$$$"$%$%$%$#$%$$&&&%$$#$%&&&'&'&'&())'%&&&''�����������������������������������������¿�������������������������ǿ����������������������������������������ſ���������������������ɾ��������������������������˸���Ⱦ������������ƿ������������ƽ������������ȼ���������������������˾���ɿ������������������������������������������¿��������������������Ĺ�������������������������oWYbb��������|bS����Ȱ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z~wxxwwsqpuhhej`ab\`[[[[VTTRPNOLNJIKMJFGEBADA?@?@><<>=:666-00-/,-,02446889<::<:<<>=<==<>?@>?;=?>==>>>??=@>>>>===?><=?>><<<;9:<;::99;9:;:99988666877877755657767668887765455232134356466764688877988878887788788889898799888998899877898:9788678899975654556654553235643344444344311220////0022211220/00..-.-.---.-,**,+(())(('(''%&%%%$##""$"! !!!   ! "!    $(+.133540+)&%$""  "&)+)('$#"!!!!"$#!"(Rqonlkjhkt|~}���}xzzz|}|{z}}|}}|{zzqF,''('%&'''(('((*('())('(()(')((&')))))())()'()++*)(())()))((())(()'))(()''('(('&%%&%%%%%$#$$""#$#"##$##$$$$#"$#$#"#$%%$$&''&$$$$##����������������������������������¾�������¿��¼��������������������������ƿ������������������������������������������������������������������������������������ν�������������������������������ž������������¸��Ƚ�����Ŀ����������ʼ���Ǿ������������������������������ɼ������������ɽ���ź������������ü��������������������������oY[b]��������}eT����ñ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����������������������������������������������������������������}}����y~x}wytxtqnjjdji`abba^\^\WTRPIKHJGHIJIIIDFFDBA?@B:=??=:>B?>=?><<??;<;?<=?@=?@?>?==???>=<;=>=<<;:;<=<;==<>;=;:;=<9:;;:;:;99988:989:957865675423343330//0/.01001332456455556555555887677766877789878888898667566779:87766777755665434565455555344422221111023232132231110221210/2101111.//./--,---+,,,*)(&'(&$%$$$#"!! !!    "$'+156542.+*'&&&%%%%%()+,*(&%$$$##"#%%$#%(Pmpommljkv}~����|vyzz|}||x|~}|}|zzyqF+###"""#"#%$##"%%$%&'$$%%&&&&''('&&'))(((()'''()*(('()(&'&&&'&'''()''''(&&&&'(((&&'&%%&'%%%$#"$$$""$$###$#####"""###$#"""##%###$"!������������������������½���Ļ���¾��ľ�������Ž���þ�������ǿ������������ž������������ǿ��������������������������ļ�������ƿ�����������������ſ���������������������ȿ������������ʿ����������������������������������������ľ��������������������������������������Ŀ����������������ȼ���Ȼ������������ù��������������������������q^_fc��������~eZ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}������������������������������������������������������������������������������������������������������~�x}{yvxusropllegjj^^__^]Y\[UVSSSQINJJJFDGFHDBAB>@BAA>A>>>?@<:@=>?A@?@@@@@AA?A@@==>>>>=>>>=<<=;<<=;:<<99::;:8888765432110232343322345554554444455555454454445435676676755555446688787888777767676655444466553433233322321221000110321114422242222210101212010//00//.,-,++-+()*(&&&%#!!! !#""$###!"$*/123452/+*(&'&%%(''+***)(&&'(&&&&%%%&''''(-Rqppomklkv~~~����yyzxy||{y}~~}}}{{znB*$#!!!!!!"!   !! !#" !""$#!#$$#""$%$$%$$%$%&'')'(&''(&%%%%&%'('&'&''((''&&'(''''(&&'&'&%$$$$#$%$%%&%%$$$#$$#"##$$$#"!""#"##!!#"!�����������������������������¾����������������½��������������������������þ���ÿ�����������������������������������ż�������ȿ���ľ������������ż���������������������ǿ������������������������������������������������������Ǽ������������˿������������������������������������������Ǿȿ�Ǿ������������ż��������������������������q[]g\��������{aY��±��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���������������������~�������~��������������������~���~�����|�~���}}�w{~t|{{qu|yspsspjnng`fff_`^_YXXZUVWWQOLGJGGDFFFCFDE@??A@@=?=::?>?=;?>;?@@>>?><?>=>>;;:::9:;9::889998887887789878888968767768655656455444454433332335434555343466654563322343242333543222331/00/.--,-..0...2553342222312221112212111001/.-..-,,*)((&&%%$"###"" ""   !$(,/4552.++((%%%%''&*,+)+'''('%&('('(('''()()*)1Umpoolklnv}}}����~zyywz|{z{}~~}~}zzysB(" !  !  !!!""!!!"! ! !""""""#""""#!"#$""$####$%%$%&'&&'''&&'&&'(('''&%%'&&&'&$%'&'&%&&$#$$%$##$##%$$##""###""! !"������������������������������������½���ÿ�����������������������������������������������������������������������������������ƿ�����������������ǿ�������������������������������������������������ǿ�����������������ƿ�������ȿ�������������������������������������������������˿�������ʿ�����ɿ������������������������������������r]aga��������zfY����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���|�����������~������������~���������~~�����{�~�|�~}z��|{~~{x��y{�x~z}yxz}{w}{yt{{|hyywrqptonlqkihce`]\]XYYZVVQPOMMLGIGGIEABCE>C><=?=<=?>=>@><;=====>><;9>><=<;;;:89:98898898887867677666557444544553334544423421012301//1112111//01/-//.,++*+,*-+,,.232111243221223222222220./0/.--,,--**'&&%%&&&&%$! !$*01211/,*)(%$%$&'(,,)*)(&'''&('''&('((('&'**)(**2Xnqpnllkkv~�����yxzzyz||{{}|}|}}{{yr@#! !#"  !""! !!!  "#" ! !    !!   ! !"###"$'&$$$%''''&&%$$&%&'(('''&&%&&'%%%%''&%%&%%%%%%#"####"#"""�����������������¾������������ü���¼�������ü�������ſ���ž��������������������������������������������������������������������������������������ſ���¿����������������������������������������¿���������ɿ�����������������������������������������������������������ƿ�������ʾ���Ž��̿���ǿ�����������Ĺ�������������������������pV`fc��������|`Q���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���{���}���}~��}}���}~���~�{�~�~���}�~|~{|z|p�}}xyy|yyy|zw~zwuvv{svwzutr}orowmnnoriiqifcicZ^b_WWZYRSVTNMMIDGILFCB@FD?=?:=A=<<==:<<8;=:99<=;;:98889:787866569765676754444312343322112100110000/00.//.///.-.//.-.10//0121021121133201120..0//0-+***)(&',,))(''&&#!&+.0/.--+)((&%&$&()**)(('&''&(((()))''(('(('&&((((*,3Zrrpmlklmw~�����xxzyy{{|zz}~||}|z{{v>#!! ! !!"! !"! !#""#"!! !! !"    !!!""""   #"#$###"!##"$&&%%$#$$$%$&&%%&&&&$%'&&&&%$$%%$%%$#"""����������������������½�������¾���ÿ�������þ��������������������������������������������������������������������������������������������������������������ý������������������������������ʿ���ľ������������������������������������������������������������������������ƿ����������ƾǾɿ���ɽ���ȿ������ż�������������������������qZbl`���������aT�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���~������~��������������y���~���}{|��~{zzy}zzxyx{{{v|rxw~vzv|uwwwxsswtsvtvqyxypvuxmstwnmu{rqpskilmi`fkd^adcY\]_UTWTRPPPLMMMHCDB?A?@>?>?===::9:9<:98698767489898765565454553466544333321233321112/032221123220011101101/.//.-+)'%&&()1=H;31011/'!((,00.-)'&&%$$%%%&(*+*(''&&''&''')*)**))*)(()**))***),.2\oqpplknpx}������zxzzxy|||z}}}|{|{z{x>$   """#####$$$""$$#!"!!"###!"#" !"###" !!"!"!!"""" !#"#$#!!!!! "#"$%###"#""!#$$$%%%$$$#$$$#"##��������������������������������������¿�������������������ÿ�����¿������������������������������������������������������������������������������������������������������������������������������ǽ�������ɽ������������������������������������������������������ý������������ÿ��ǿ���ʸ̼���ν�����ɼ����º�������������������������rY_jY~�������~`O����õ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���|���}���w{�{���|{|�~|~~~~�}}z~}~{}{v}{{{yzzyz|xzx|x{v|wxsxxwstvytuuupquuootwqktvultuvkpptinqugkosifhljhgkd`ab\][[YWRUTLOLKKILHBBDA@>@>;;9:8778799857457477575465556654344555443446643434332200//-'%%##)28;AM[E;;;<=<1*244100.'!!&')((((&%%)&&&&&'''''((())*)'()**))),+*,-3\qsoolmonx�����zwz{wx|}|{{|~}{||{ys=("  !"#"#$$$%$"$#$%$$$%%$$####$$%%$##" "###$##!""!#""!!"!  ""!"" !!        !""" !!!""#""#"���������������½������������½�������ý���þ������������ÿ�������ý��������������������������Ŀ������������ǿ�����������������������������������������������������������������������������������������������������������������������������������������������������ž������������ƾ�������ʷ������������̿���ü��������������������������s\bk]|�������_L�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���|��}{��|���{}z|~�z~�|zz�z{}~{z}�|z{}}{y{�wwy{yxy{xxwzxuv{xvu{yutuuuwtxsrrvttorqrpsssklpsmkmtllqqmmkqnlqtjforicjmg_hniegij`^]e`\YYTSSRNRKKGHGECC??>>@=>;96;:769:8:68:;:99:967655542/-'$$)5DIHGHQ^H>?AAA?319::8::<.!##$$$%%$##&&%%%''&&'%&((()+)())))***++*+-1aqrrqonooy�����{wz{yx{|}{{}~~~||{wq@,(%$#$"!!"!"!""!   !  !"#$$#$$#"%$$%$$$%%&%$%%#$&$$#%$"$$#$#####""" !!!! !"!   !"   !     !!!���������������¾�������¿���ý�������½���Ŀ�������ļ���¿�������ÿ��������������������������ž������������ƽ����������������˿���ƺ�����������������������������������Ƚ����������������������������������������������������������������������������������������������������������������Ⱥ������������Ϳ���ļ��������������������������pXdla�������}]O��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���|}}�{~~�w��|{|�|z~{y{yvz}|zz|~rx}}xx{}wwx~xvy}yux|xtu{xtyy{tuxztuwvuvvzuuqtttpootkqtrnpstnorqjlnrmilnmipnkflrndjnphkllehjnjkjneegjhgcjf^]]`^W_\WTRRRMOLNEJLHEEE?@>><9<:8985.0=MONKMT`I@?@AA?32;<=?@A?2  !  #%##""$##$""%&'(()(())))+***++/5bossomnnoy������}vyyxxz{}{{|}}~|}zxn@.*)(&&((''&%$%&%$$#$$"!"""! !! #"#$#"#$$#####$$%$#$&$$$%%$$%$&%%%$%$%%#####"!"###!"""##"! !     !!!  �����������������������������¿�����������������������¿����������������������������������������������������ľ�����������������������������������ǿ���������������������̿��������������������������������������������������ƿ��̿�������������������������������������������������ſ�����Ȼ���������������������������������������������pU^h^�������}]U���������������������������������������������������������������������������������������������tqurbfdckdo{�������������������ƿ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����~������||��~{�~~|}}~u}z}|}y{zyw|}yyz|wux{uv|{ytv}zrwxwtwz|svwwuttwwsryussvvusxuqprsqprrqpnpnkiprmiqqnhmojfkmmfjmreilohhlngijpkkhoigfleigneifmdeeb]`_^][[\][VROB0BUSRLOV\I@BCB@=12>?>?@?=2#!!    !   !$$$&'$%'((()((**,.5gpsrpopoox�������|xyyzy{}}{{|}|}|}{zn<,)(''&)*(')(''(()''((&&''(&&&$%#$%$$%$$$""!   !!"##"#$##$##%$&%&%%&&%%&&%%$%&$$$$$$%$$$&&$##$$%%$$$%&%$#"$#""""##"""!  ����������������������¾�������Ŀ���¼��½���¾�������Ŀ������������Ŀ��������ÿ����������������������������¿��������������������������������������������������������������������������������������ƾ�����������������������������������������������������������������������������ǿ���Ⱦɻ̽���ǿ�����������ú�������������������������oV_i^}�������_T����������������������������������������������������������������������������������������q�ylojggd`TOQTPPPOPYbs���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|����������}�����������~�������}��~~�~}��~|}}|~z}|}{}}zx{�yxx{|{syxvuwuxn{ywrstvrswztstxsrrwspuvsopwtprvrmotwqopuromonkkoonhmnqemnogkmnkmnoejlnjikodilkfhkofhhlddfiddekcd_\XQBIXVQPQW^G@BDB?>2 2<?>>@A?4($$""##"!! "!"!!"##!!""#%&%$%&$%&''&('''((+4eqsrqpqppy�������{zyzzxz|}zzz{{}|}{zo=-**+**)*)&())(())(()'()*))()(((''))(()('&''&&%%'%%%$#$##"$#$$#%&&$%&%$&&''%%%$%%%&&&%#&&%$$##"###%&%%%$##%%$%%%%%$$#%&%##$%$$%$$##�������������¾��Ŀ���þ�������Ŀ���ľ�������þ���þ��ſ������������ǽ��������Ŀ�������ľ��������������������������������������������ȿ���Ǿ�������ɾ����������������������������������������ʾ���Ŀ�������������������������������������������������ĺ���������ǿ������������ʿ���ɽ�������������������������Ż�������������������������p]bh_|�������~`O��ö����������������������������������������������������������������������������������tQg}}uokhea\XQNPPPMOQVY[�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}����~������}���}���{~��|�}~��|||~{|}~|z}��z{�zz}~xxz}{yx�xvuxwxtyzxvwvwuuswutvusttvunttrptyqksvtnoprmpsspqrvprqroqnqqoorqqmpnnknnngkkkjlnmgkkkdegdbfif`c`^ZVTX\[VSQV[IBCC@>@46>???AB@;>C>>@A8541../+*++))'*)****))+**)(((('))))+))))+7irsrqpmsrz������yzzyzy{z||{|}|~~}{ym=/---,---,,++**)*++)))))**+****)))*++'()(')))*(()(&&'&'&('%&))$&'&&'(()((('&''%&%%$$%%$%%$#$#####$%$$%%%$$$$#$$$&'$$&&%%%$%&%$$$$$$�������������þ�������ÿ�������ƿ���ſ�������þ���ÿ����������������ž�����������������ƾ�������������������������������������������������ǿ��������������������������Ƚ���Ŀ����������������;���ƿ�������������������������������������������������Ʒ��������������������������ž������������������������������������������������������oU]h[|�������{`P��ļ����º���������������������������������������������������z|w}t�}zunwm{vxrt��������tGg��xpmhb_[WQOONMMMNTUT���������������������������������������������������~zx}{|{w|y�{�|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�������������������������������������������������������~y��|~��~~~�{}~~}z�zz~y{�~xy}yz|~yxyz|y~z{z~xxwyw{y{xwwzwvvtutusvwrpstsorrqjqtqkrtslrqsnqtvnrstprqtopqwqpopnoqqpolonjjkjihmkhega_]YWZZYVSSZ^HCDCB?>3 5=>@ABCBANSPVXRUPPNQLLLHDDCBDEA=;<875613421/.---/-,,./,-3isttqoorq{�����zxy{{z{}|{z{}}|||{{k=0,+-,,./..+,+++++**,++)*+++)()***)+,*)*+)*))(())'&&('(((('(((%'(''(''()(()'''&''%%&&''&&&&'&%&&$#$%$$%%%%$%%$#%&$%&'%$$$$$%#"###$$������������������������������������������������������������������Ŀ����������������������������������������������������������������������ȿ���������������������������������������������������������������������������������������������������������Ⱦ�����������������¾�������ʽ�������˹�����������������Ľ��������������������������rY`h[�������z_Q������������������������������������������������������lcvupl`UOG@C840./(**)&((-*-007Jb9f��zqkgb_XVSRPNNLJKTVU����������������������������������������������~|zwuspmmligdb[[[^[[[j����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���������������������������������������������}��������������~~���|~��||�}{}��z}vz{}rxz}zz{zx|}wxy}xvx|wswzxvxyxtwxvuuuwvuptvsrwsqmvtvqsttmttrkuttopsupqrrmnrqmmnpmmpokkppigmkd_][\\[YXUU[bICCBAB@3"6?ABBDEB@QSSVUNTTQMRNPNMMMLRPRLILMIJPLHFGII@CD@@ED>@B>;=Cirutqpqttz�������{y{||y{||{||~}||zz|j:0-.-,,,-.++,,,--+*,-*)**)*+)()+)***+**+*)((()**('(())((())'()()*(''''')()(('&%&'&%&&'&&&%&'&'((&&$&&&&&%&&&&%%&%$&''%%###""""#$$%%������»�������¿�������¿���ſ�������ľ���ÿ�������¾������������������������������������������������������Ⱦ�������ſ������������ǿ�����������������������������������Ⱦ��������������������������ȿ�������������������Ⱦ��������������������������Ǿ��������������������������Ż�������˼�����������������ú��������������������������nW`f[�������w\U�����������������������������������������������������}kTYkolf^ULE;730-)'#$#$$$#"!!'\��xphgc`XUTRPNNLKNTTP�������������������������������������Ù������~|zxuqqomjhfc_[[WURKIV��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z���������������~����������~������~~������~�|�~~~�y�~x�||v{}�{|}}{{|}yyyzyww}yq{|yuwzuryywswzyvwwvvvwzvuv{vxswtttwrsozwvrvvvrrqqoopplqqokoqoikolgjqlc__\ZYZ[XWU[^GDDDDBB2#9@ABDEFDAUWZYVSVVTQVQQOQPMLSMKIKNKKMNLJIKJJJKKJJNNNONKLQTmssqpooqsx�������{z{|{yz||{{{~~{|{|{g<00/0./0./--//.--+--,*+,,+-,,**+()+))'(**(''(('&&&(((&&&&''%()((('&'('')'('((''(((&('&&&&%&&&'''&%%&&&&&&'''%%%&%%%'&%%$%&$$"%%%$%%������»���������������������ÿ�������ÿ���¿�������¼�������������������������������������������������ȿ��������������������������Ƚ������������˿���þ�������Ŀ�������˾��������������������������ȿ������������ȿ�������������������������������������������������������������ż�������̽���Ľ������������ļ��������������������������oY`iZ��������uYN����������������������������������������������������picRUkpke^ULD;52-*($#"!""""!Y��yrkhd`[WTPLMOMMOTVT����������������������������������{���Ę������}{yxusrmmjhfb`\ZWUQKK[�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�����������|���~���~~��~��~|���|~��{��}~��~���}}}}}�~~}~{{|}{z|{v|z{{~zxwtw}zvtxxxuwxurwwxpvyysvx{styyruz}uvvywvuvqsvvrtsurqptrphnpomooofa`]]]\[XUS\ZFEDEHEC2#:ACCCFFF?T\\ZZZZVVX\USRVTQSWRNNQOMPPPLKLJJLNNKKLNNOONLNQWlvtrppqrsz�������}||{zyz||zy{}}|||{}gH<:9:878544442220221/-//.//0/..,+,,,*)**+*())&%&&&&&&%%%#$#$%%%%%''&&'&(()()()*(()(()'''(()(&(&'('''(&&((''&$%''&&%%%$%&&%%%%''&&%'����������������������������������¿���������¿������������þ����������������������������������������������������������������������ɿ��������������������������Ǿ���������������������������������������������Ľ��ſ��������������������������̽��������������������������ż�����ſ�������ʽ���þͽȿ��������ž��������������������������qWaiU��������v\Q������ƴ�������������������������������������������scgcUXjmie^UJB<80-+'&%$#"!   Y��xqjgb]YWSPPPOMLLQTV����������������������������������z���Ė������~{yxtqqmkigec^[ZWTRKE[���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�������}���~~��x���x�t��y{}z|~�~|}{{}�|z|x~}{x||zwz~|xxwyyzyyvwtzzztyyvozyynuzyrvy~sswzuuvxstwvruwxrrvwqurqnoqsppha^\]_][YVU[YEECCGCB0"8ACCCFGE?YZYYXWXSVXWRUUTMSUSPPQRMSSRPLMNNMPQONOOPRVTOORSXlttqpnrrw|�������|yzzzzz||zy{}|}|zywiYVWXYRNMPIHKKKGGCB???=7776653122232/010/..--,+)+,++*)'))''&'&%''&&'''&'(()()())''&&&''(')*))''%'((''(('((&&'&&''''&&&''&%&&%&&%%%$$�����������������ÿ���¾�������þ������������ž�������ÿ���ſ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˿������������˾�����������ƾ�������������������������rXaeV�������u[O��������������������������������������������������ojigeTVhkif^VLB;71,)'&$#"" !" V��ysmkc\[RPQNMMLLLPW[���������������z����������������y���ę������~|zxuqrnjjhec_][XTPKEX������������������������~�wzy~�|���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|��������������������������~���������������~����}���������}�y���y��~yz|�{|~�|{{zy{�z|z~vzxx{}xxz�xv{{zwxz|wz|||~{}yxx}zxu|zxs{xzsvxyswwuruutquyxrrwynsvqjnsolngf_]\]]\[ZU^ZIGEDCCB0$9ACEEFFDB]]\ZZ[[YXXWPVWVNUXSMTRNMTROMMLLPOTQQQPKQTWTSWUTXmutrpoqpv~�������zz|{{{||}{z}~||{zyvh[TUXVSSVYQRTTRRXTUXTPRTNMKKOIHPLD@CA?>;:889767401330//,/0-,+,,,+++,++)*,*)*,**()('''(''&'''''%''('%&'&&''''')''')(&('(%$&'''(''$$%%�����������������þ��������¾��¿���¿�������ſ�������������������������������¿���������������������ÿ���¿����������������������������������������������������������������������������������������������������Ⱦ�����������������������������������������������ſ��ɿ������������ȿ������������ȿ��������ÿ�ȿ�������������������������sX^fT~�������uZR������������������������������������������������ygjjidUYkkhe^TM?94/,(&&""! !" W��xsnhea`l���wUKKMQVR��������������ȋj�m���������������w���ƛ������{zwtrpkkifdc^[ZYTPMDT�����������������t����������vtphgd`bn������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���������������~���~������}���{|���|��z~��}~�{����~~}{~}~�|}z}}|zz~x�{}~{s{||z{{~ywz{xtwyusz|wu|zyvxz{wvy}wxy|uwu{vwyzuttwurtxwwvutyqpsrmtunkofe^]^_]\ZUU`ZJFEDCEA/%:@CBCEGDE^]__]\\[\YYXXWVTXVTTVSPQSQLMNOPRPPORRPLTSRSTVRU]oturpnopw��������}z{|zz|}|{{}~|||{yuh]U[VWUWWVVUWVTWVQSSTRWWTQTURRQURPRSRQRVQKQPIHIHJHEHGDBB<;7:976743232/.0..-..-.,+++***)+''&''&'('''%&''&'&'&'((''(('(''&&''(('''%%&'�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ŀ���������������������ʿ�����������������������������������ǿ�����������������������������������Ⱦ������������������û�������������������������oY^gQ��������uUQ�þ���������������������������������������������lilkjdXVjlid]TL@:60,'%&#"! !"!V��xsmhfa~�ɸ���]JMSUS��������������ƅ�Ȟ���������������{���ę������{zxvsqkljgdb^\[XTRJES{ff_^]^gbkk|y���nIr����������{unhc]W[����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}������������������������������������������������������������������|���~�������|�~z�|~�x~��y}�y~x|}z{~�{z{�z{}~xxx�}z{~{zwy{yxyx{yvzzyv}yvu{yzvuwwrwxyuxz}ww{{uvxxuuxytwxwtvutrssvrppphe`^__^]WWTXVIFDCDDB0&8?CEDEHEE\]_]\Y\[YWZZXUVWYRUXXTRYURTVSTUWSQSVSSUWVUTYYUYcottsrnmrz��������{y{{{{|}{{}|}}}|zzuj`]a][\_[Y]^\XYZUVZXUTYXWVXTRTWSMWVRQUQOPPTSQQUQRQQTTONQONMNLIIPHIJHBAB??;;::78643322300-+-,,,+--*)),,**))*)()*(())(**()'(''('&&&'%&������������������������¾������������Ŀ�����¿�¾��ƿ��������ÿ������������������������������������������������������������Ⱦ�������������������������������ſ�������������������������������������������������ɿ�����������������������������������ǿ��������������������������ɽ�������ȸ���Ⱦ������������ſ��������������������������oV`iQ{�������qNP~������������´���������������������������������mjkmnkcTUkkie[QJC=5/+(&$#"""   
+Z��zrmifc��`Q^x�}FLRTS��������������ȃ_�����������������w���ƙ�������}zvtromljeca_]YVSOIA72KA61.22.+*-.175+._����������}uiaa^TX����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����������������������������}���������������~�����������~�������}~�����������|�����}~��~}��~x~~|{~~v|��v{}yz{~x{}~xw~�vu{~ywy~zw~~yvz{z{~{{xvwyxxuvwxtzzywxz|vwwwtvwvtuxvsvxurttrotvqmprnhba__`_]WXTZZGEC@CD@.%8@DEFFGFI_^b`^Y_][Z]ZVSVXVSYVSRRUURTURSQSPLTTPRWWWYXYWW]epuusoklqz��������{{z{}{||||{}}}~}zwti`_b\]^`WY]^\[_ZZ\`YZZ_ZVYZWX\ZXU\WRUXWVTUWSQSUQURSTOPSTQQRUQRRTOPPQNSUMMONMKMJIHGIF?BB@?<>988967444533210000/.--,,,-.,,+)))*)'(((&%�����������½�����������ſ�����������������ſ���ÿ������������¿����������������ü������������������������������������������������������Ŀ��������������������������������������������������������������������������������������˽�������������������������������������������������������������Ǿ������������Ž��������������������������pT`gU}������}rSM}����������������������������������������������vhkknomeWTljhd\RH?:4/+)&#"  ""  
+Q��zsmkge�UNPHNarPMQTQ��������������ȴ����þ������������x���Ȝ�������~ywutqonjeba`^ZWUOH?$'PH?886213330/,-+%K����������|sf^]]VV��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|��������~���}���|���|��z��z|��z}���}��||��{}��~��|}~�{}��}~~�~x�~}y~}}|}|{x|}rw}|yvy|zx{|wt{|xxz|ztvzztuyysxy}wxxyvzx|vvwyvvvvwvtvxuruurqsuroprleabaa^_[YYY]VIFDBCD?-';BEFGFHCH[bf___b]]]^[ZWYXRUZXTSSWTTVTSSOSQTVRNRUVTYZXXY[bquvsleiqx��������}{|~}{}~~|{{z|}{{yvjcabbba_\]`^^^]Z]_^X\^^Y[_[WZ\X[XZXYZZVTYWWVXXWUXUVWSSTTUXWYSTVURTTRPOONPPPOQPNOMPMOMNLLLKMLLKIHJHFFGBBBB?>>?>;:8797652101110/..--,+������������������������Ŀ�����������������ÿ�����������������������������������þ���������������������ǿ������������Ǿ������������Ǿ�����������������ƿ���������������������ʿ�������������������������������������������������̽�����������������ÿ���¿����������������˾�������ź������������������������ľ��������������������������qZ^jRv���~�|ztOBz����������������������������������������������hnmmoqoeURjkgb\QG?83/)('%#!!!   !
+M��zungff|XZXPQQ^QIPUS���������������Ò|u���������������w���Ǜ������|xwvspmkgacb`^[VSNGB$'\IA:75132430--+)&B����������wnd_ZXUT��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���~�����������������������������������������������������������~��~���w���{���~{���z{�v}��~}�z|�v|�x|~�|{}{{|�{{{�{z{�|vv~{yyz|yyzzwxzwsvxxvtx|zvwywov{yuzywvxy{suyuqvxxtwvustuvssssqqpqrmeb`_`_^\XUV_WDCBAAC?*'9CFFFGGEF\``Z_`]V[\\W[[ZVUXVVWVVVUXVSSTTTTVRTUUUVXZ\[Z[[bquusiVfnx��������~z{}}y{||{{|{||{{ytlfceea^_`c__`^^_b`]\^^\\``[V[[Y][\YXZZWXZWXZZVWZWSWYXUTVXZYYVXWWVXVTRRRQRQRQRPNRSQNNONONONOPPMLKKKMMMKNMJHIIIKLHGHHEDEFDB@?<>;:9:632�����������������������������������������Ŀ�������¾����������������������������������������������������������������������ƿ����������������������������������������������������������������������������������������������������������������������������������������������Ϳ�������Ⱦ�������������������������û�������������������������o[_lOs�����}vkKDy�´��ŷ���������������������������������������uonnqrndSSslhd]UI>;5/+)'%#""!!! ! M��xupigj��ylLNXkOLPXS��������������̑_�����������������v���ʙ������}{ywusnjid_cb`\\XWQK@',�HA88634222/,-.+%C���������~tlda\URO��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}������������}�������}�������������������������������������������������������������������������}�������������������������������|������~���~}����~}~��}�}��~�|�z~}z}{~t~~|x{}yz~yz|xvz}{xz|yx|~xvx||xw{|xvzyxzyxuytxyxwyxyvwxxrvyurvuvswxvrsuuoosskprpnlge`^`_^][TXaZCDABBC>*';CGFGGHGKeb^_a^^Z]Z[Y[ZWTTXWWVUUTSTSRRRQSSTPRVVTW\\[\\\[eruusiTfpx��������||}~}{||}{{{|{{|{ysjheddc`bbeabc_b`c]^bc]``a`^___^a`^^\]^\Y[Z]\ZWY[XVXXXYWYZ[XZXUVZ[YWXWUWUWWTUWONSSTRTRQRSSSRRONSONMOPNMOMKNNMIKKKLKKIILKKIIJLIGHILKFF�������������¾������������¾�������Ŀ���þ��ſ���ž������������Ľ������������½��ȿ���������������������������������������������������������������������������������������������������������ǿ��������Ŀ����������������������������������������������������������������������ƿ�������ƾ�������˽�����������ǽ�������������������������nZ^eRo����}{vhMFu�����������������������������������������������srqstqgXOkkhe]SG=:61+)&##"!    
+
+H��{tnjed��ЀE^��JLQVM��������������Ǌ���ĵ�������������t���ɚ������~zyvrrYMPN\cb^]ZWTOJ?%'hH?62543321/-.+(#=�������~}xrkf`[SPL�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|����������������������������������������������������������������������������������������}��������}���}��z���{}��|��~~��~}��x���|~�}��~~��}~}~}�}�~�{|z}|z|||wz}�xx{}xwvzwsy}vuxyuty}xvw|ysy{xqwyzvx{yvxyzwuvwwuxysttvvupttuqorrqqqpomgda__^]][TX`WCCCEEC?*)<CGGGEHDGe_]`a]\^\]\[_YXWVUVWUWUTWUTURRPURRTUVUSUYYX[\YWdotrrc[iqw��������|{{}{|||zz{}|}~}yukieddcaededbgaacd^acd^_ba^``_`cc\]`a]_`_^]^]\[]ZY\[\_ZX[[[VZ^ZXYXXXWXSTUUTVVVSSTRSTUSRUUTUUUQRUPPRRQQRQNNQPNNONOMLNNNKLMNOKNLFFKKHJK�������������ſ���¿���������������������������������������������������������������������������������ƿ������������������������������������������������������ƽ������������������������������̿��������ľ��������������������������������������������ɽ���������������������������������ƿ��������������������Ǿ�������������������������rW_iRu����}}iT5<k�����������������������������������������������wtuvxqfROjlkd^QH@:50,)%$#   !!!!
+P��{tmic`ft~jE[lZIKOUN��������������Țo�����������������x���ʗ�������{xuqpljme]eca]\XVQH>,hJB96552331/..,(%<�������xqmkid^ZNOR��������������������������������}}~z}uy{�����}���xyvtrvvwsomkiegfgbdgolmu|�~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���}���{{��{{��}{{��z~��|~~z|}~{}}z��|}�{}}~{{|}zyz||zz|yzyxy{wvyvstxzuqtxvpyyxsvyvty}xtx{{rszxtvzyrvyusuutrvusrtsqrqrnecb`^]__ZWW`V@BBDBB>+(9BHHGHHDK^\___YZ^]Y[[XTXVVVWZUYWXXSTWUVSZVTUUTUV[[Z[\[Y^hqsutplnrx��������{z|~}}}~|{{{}~~|xtjfgfcahhdfhefdeeebcaa`ccb_a``beb\`cc]`a___c_]^_[^__^_^]`]^\][\^^[[\Z\XXXWUXWXXWVTUUWTTVVTUVURUURTUQTUVQPQQQRTPNQSRRRTRQQQSQQPLMNMJLK���������������������������������������������������������������������������������������������������������������������������������ƾ�����������������¿���������������������������������������̿���ʾ������������ȼ�����������������������������������˽���������������������̼���ɾ���������������������������Ž�������������������������rV\hWv�|�zs<&':l������������²���������������������������������ztwuvseRRgkibZPGA960*)%$"!  !F��sngba[v��VHNMNMRVR���������������Ñrp���������������r���˜������~zxurpnjkfcdc_]ZXTOG@+bKA9788313010/,*":��ZSP><=52,023DPO����������������������������oyxuoffh\X[WRRQPMBH^�z��zxurrpnljfdaa_^ZXYURVUSUR�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}������������������������~���}���}�������������~���~|��~|~x~�}~|u||~v~~}x|}xz{{w|}|yuy|zxz}wxy{zxwzyvuy{zvuvxvtyyysuwsuyyxryzxptxvmswvptxsotvsntuojrtqnprmddc`^]]^ZWX`V?BCEDB>+(9BFHIIICF_\ba_\\]]Y\YXVYVVVWXSTVUSSTTRTUYTSVUTUWY\Z][\[]gquvussqty��������{x{{zz}�}y|}{~~}{yshdgfabee_dfeadea]beabed`ccadcdd`_daa^a`__``__ba^``\\_^_`X^_`Z^__[_`][]_VXWZXYYYWWZUV[ZVUYWWWUTUSUVRQSTQSUTSRUNOSROQRTSURQSROLPQONOPM�������������������¿��������Ŀ���¿������������������������������ſ������������Ⱦ�����������������������������������ƽ����������ǿ������������������������������������������ɾ�������������������Ǿ������������ɾ��������������������������˿������������������������������Ϳ���ɽ���þ�������Ȼ������������ſ��������������������������rZ^dXy�|{yf0/5<l������������������������������������������������zyxwseTRjmle_VIA:30+(&&$!!!! 
+?���ungbc�ǳ��VMMLOTVN��������������ȸ������������������x���Ϝ������{yusoaXQV]cb`^YXTOI?*"[MB:;KuXWa<11-,+$7���gb^V[YMKF?:;ERH����������������������������vs���wrkib_Z[USPNMFD]n��{zvtoqplkgeea^\Z\\\ZZSRE{�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z�������~���}���}������������}~�|{~�}y~~z�z}��}~����~}}||}|�{~{~{w~{yv|z|zzwzuvxztw{|ywwzvtwzxvvwvuuyyuvwwtu{wvxx{xwuvvvuuuutuuswwutprspnssrooqkcab`__^\XTW_TBCBEEC>((;BGJJHJDC__ca_`][Z][XXY]XTUVZUWWVSUSSSSRRSUUTQTVUYZYYZ[]dquuvsrqpx��������}z||{|~}|{|}|}~~}zsmggfeeegaefecfjdbeb`be`^cea`dga_cebabc_`ba`_aab`a____^`_[a`^\ba[Y\[\Z\[WZ][YXXXXY]VU[WTVYXXVYWWYY[YXWUWWUUVSRSVVTSTTSVVTQTTSORSPPPQP����������������¿�������¾�������������������������������������������������������������������������������������ȿ���ɻ������������ǿ���ÿ���������������������������������������������������������������ž������������ȿ������������ſ�������������������������������������ɹ���;�������ͼ���Ƚ��;��������Ľ��������������������������r^]aR~�{zzvc77=Fl�·���������������������������������������������}{{wrePGhnmeZSJB930+'%#"""!! 
+9��}rofce��KflZJMMOPUJ��������������ƭp_0~��������������z���̟������~zxusqfeY\add_[ZWSNJ?)'nOA:?h����p30-,-$/z������~wgouj`ZOVQ���������������������������wgt������uphlacffefXG:.m�����������}}|zuqnsupprqiRH]���������������������������������������������������������������������������������������������������������������~��������|������������������������������������������������~��������������������}��������������}��������������������������������������������������~�}����������w{}�|{�|}�w|�~z|}{�~{{��{{{�w~~|yz�zz~~xzz�vyy{zyw|~zvwwxtvxyxvvvrsxxtovvsntxuswzxtyxunrxvquwtrtxsosurqprrnpqpoiebaa^\]\XUW`R=@BDCBB)+<CGGIIKDD]__^^^]VX]ZX\Z]VVXVXYWWXVWTSTUSRRUWRTVXVZXYZ[\\gsuvurqqry��������}|}||}~}|||{|}}~~ztlgffifdgdedceedcccaadeaaccabfeabcca`c_]_b`^`c^`b_\_abc`]_b``ababaa`_^[\ZZ[[ZYYYXZZYY\YSSXWXYZUVXYYXYVVYYVVWVSUVTUVWWTWUUVXWVVTUSVSSQ��������������������ÿ�����������������������������������������������������������������������������������������������ɽ������������Ⱦ�������������������������������������������������ɿ�����������������ȿ�������������������������������������������������ȿ��������������������������ƿս�����������������ž��������������������������s`^eU�lh^]YI;:?Z������}������������������������������������������}}xqdRNfnnfYPGB841-(%#"!!  
+9���smfbd�_Gt~[MMMOPUR����������������ŀz���������������z���̟������~|zvtrmmggffc`\ZVTNJ>*#aE>;Bm�����400.,&4|�����|yrV;8437MWS��������������������������wjax�}RH68155625(167CG:/r�kMMICA0+)*,,),)++,*-,00@RHR��������������������������������������������������������������������������������������������������������������������������������������������������������}������������������������������������������������������������������������}���|���~��������������������}�yuik�������y}��~��|�~~~~~�}~}~~{�~zzz|x||}w}~|yxz~wy|uzy~sv{}yx{~yxwxyzxxxwx{vvttvutwwvtpxwwvvvutzxuqsvupsspptvsorurooqmlmqomieca`_]]]WVWbR=>>CCC<'*<BFGGGIEIb`__^_\]^\XW[YTPXUVUUTTSUQSSTTRSSWURTUVX\\Y]^^]fqvvutvssx��������{y||}||}{z|{}}|||{tieccgdbee`aegaddd_beccefcddddcaabccbb`abbbdab`ab^]aacb__`a^^aaaaba_^_[][]\\\ZZ[\[]VYZZYZXWX[ZTZZXYYXUVWYVWTTSUVTUWWXWYSUYXVWVRTTUTUU����������������������¿������������ľ���Ľ����������������Ľ���Ŀ������������Ŀ���������������������������������������������������ȿ��������������ƿ������������������������������������������������������������������������������������������������������������ÿ���������������������ǿ̽������������������¹�������������������������ve]bW��WbUNPK=326998>71+.448:A<9:<FN^j����������������������������|yseRPinoe\RHA:50+(&$##! 
+	2���qleaaji]��ROMOPSWO|�������������̻nc����������������y���ҙ�����|zyvtpaMKM[cda]YWSPI?*#`F>:9V�����4-/-+%1z�����{wxmM>:9>U]X�������������������������xada~������vprhb`][ZQYS=/x��������xwukljfgeaaa`\YLISEN��������������������������������������������~�������������������������������������������������������������������������������������������������������������}���|��������|�������������������������������������������������������������������|���~���{~���~��}~��pl^XZg�������}y����~{}�{{�~}��~y��|��|~~}{�|~z}z|z{x|w|u|{~wz{{r|}|tx|}ttwzwwxxutxyttxywvvywruxxvvtttuwyuustrrptrmoutpqsrponqmmmpmlidca``_^]ZXW`M?;<BED=&,<CEFIMKDIa]___[\^]ZUZ]XXY[TVXVVUWUSSTTRRUTSQRSUTVZZX\[Z\cqvtutvsr{��������|zz~||{}|z{{}||{zyskgeeffcfd`dfgbedc`bea`cdabbc\`bebecb\abccb``_bda_abdcb`c_`_bc``aa_\]a\^`\Z[ZXV\^ZZZZXX\ZZY[[]Z[[X[ZVWZYXWYZWVVXVVWVVWWUVXWUWXTUXTQTT�������������þ�������ľ���Ŀ���¿��Ľ���ľ����������������Ŀ���ƿ������������Ž��������������������������������������������ɿ���������������������������������������������ǿ���������������������������������������������������ɿ�����������������������������������������������������������ƿ���������������ɼ�������������������������udR[X��YgaXRH<5555653/.-,,+***-'%%#%#%����������������������������~|ztdMIgome\SI@:50+(&&$#"   
+	5���sjfb`Xp��bFQOOPQXR}�������������ɚCTf���������������}���љ������~{ywtoikgeebba_\ZUQIC+[J@<<9XhWu|(/.-*#/t���o`Z\\ZX_b[\__[������������������������qWY_b��ȴzvqnib^Z[[XZZba>2����|xvsnnlhjjjkgffdekjk{WLN�������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������������z���~�������~��������������������������������������������~��}vpmhRNPPVf�������~x��}�x|}�xy~�{y}�~u��|x|�y{}x|~~{~}}z|}~zy|{}z}zyy�wwwz{zxxxzwxyxutvwvuuwtsvwvpyxtsuuvvxwwsuwwssrtsrvuqpvsonqoqqnnnmmlfcb`a^]^[XZ^L>;;AED;&-?DDGJKJBI_\\\[X\\\YXZZVXZZSVWVWXWTUVWUVVWUTUYVUVXZ[\][\^gsttuvvutz�������~||{~}|}~{z{|~}z{yriggegghfcdgecbffcadebcb_acbd_cca_dc`]ba_abb``cd`_`_`baae`a`ba_aa_^^__^_[\Z[[[[\[VZ\ZYY\ZZZYX[[Z[YZVUY\XYXYYUVXYYXXUXZYXZZYWWYXXWTTTU�������������ÿ�������ÿ������������ý���������������������������������������������������������������������������������������������������������ǿ���ÿ���������������������ƽ���������������������������������������������������������������˿����������������Ƚ���ƽ��������������������������������������ÿ�Ž�������������������������ue?AO��Xge^TI>:94131-++**)((%''$$""$&,����������������������������}ytdLCeole\TIA:3/+(&$$#!! 
+	-���rlgccu����WOMNNNVTt���������������������������������w���ҟ������}{yvtokpjhedbb_\YVOI@,YG@==O��~��12/-+%/t��fFG>6667@Z_^]``������������������������}hg[X����swnkgc_[YVSRT[\>1y��tyspnjhfdba`_`[YYYWRTXqZLK�����������������������������������������������������������������������������������������������������������������������������~�����������������������������|�������������������������������|����������������������������������~���|���~�����������~vcVNOMIJLNWg�������n{���~�{~}|}~||�z|z}|x{x{|{w|~|w{}{rz{}yyz}vuw{vwxzxxy|wwy{xuvwvuuwuuutuusrxupsuvvtwuvpvxtrtvooruqmsuqmoqooqpommqkedcaa__\ZZY_O<9:@EC:&+?BEHIIHCO`[\]]Z\\[XYYXWXXVTUVTSXUOTWUQSUUSSUVVUWXX[][Z[^guuuuuvuuw�������~|{{}}y{||{{}~z|zqgdeddgga`efccfdb`edb`dcbcdbcbfbaada_`ba_acb``cb`_a][_a_a]__^]_`_]\^]]^^[]^]][\^ZZ\[Z[[[XYXXYYYZZ[ZWWZYUZYVXWWVWYWTUZYWZ[YZXVVVWSWYVT�����������������������������þ������������ÿ������������ſ���������������������ƽ�����������������������������������������������������������������������������������������������������������ǿ������������������������������������������������������ȿ�������ʾ���Ǽ���ȿ�������ͽ���ſ��ʿ�����������������ǿ��������������������������ug6':��UhhcSG?<9111/.+*(())(&''%%#"!#/����������������������������~}xtcIHdomg\SI@;3/,)(%$"!  !	,���rlfbdywNWmaLOMKNWPq����������������ʿ���������������{���ќ������~zyvslWSMQZdbb_[XXPKA.]H@@Ab�����0//..%.r��z��}ml{}\[cccc^My�������������������������[L��Ķtvnjga^YYXVQRYZA8}��wzyupmjgedc`^__\[[YVXYxZLM��������������������������������|������������������������������������������������������������������������������������������������������������������}���������������������������������������������������������������������|�����|������}��z���vusbPGIGGGGJJKYf�ó���}t����w~��|}~�}��}|~~��}|}�}||}z{y~{|z}|yv{{zzy}uwyzxxxwvxyzrrvxstxxsptytsuxusyxwsq{xwuvxtvuxustvvrstspqttqoopmlqqjjkmgbcbc`_`_ZWY`P><;>CC<&-=BFIGGGAI_^_\\_`][[]XUWZYVWXXUYYWUVVUTUUUTUTTTWWVX[[XX^]grtuuvvtsw�������|{{z}}z|~~{{|~~}}{qhfhcfgfbcggccfeaabcabbabeebccc``ce`_b`_`a``acb`ca`^bbb_b__^__a`]\]^\\]ZWY\\ZZZ[WY[YXY[XWYY\[\ZX[Z\Y[[YZ[ZWXVXXXXZYW[XW[[X\\XXYXTWXSS������þ�����������������¾��ǽ������������ľ������������ƾ��������¿�����������Ⱦ�����������������������������������������������������������������������������������������������������������ž���������������������������������Ž�������������������ſ��������������������������Ⱦ�������ѿ�����������������ɼ��������������������������sc4.��VhibSJ@:511//,+*(')**((($##" ,���������������������������|yt^GGellg]RI@842/+)'$""  !	)���tlea_odNJbaNNMLQVRt�������������Ƭk]Yb��������������|���њ������~zzwtmefa`beb`_[YWRKC/aH@@@Ge_j��420..#-u������uPMI?Vghgf`1l�������������������������P?��µsuojgba^ZYVSRZ[B4��z{xupojigd`]^][[\ZYXYaxZMS����������������������������������������������������������������������������������������������~������������������������������������������������������������������������~�������{��������|������������������������������������������������y���|��r[KKABFGGGEDGJLYh�´��z�����s{~}{z~zw��zv~{y�~y{|z{~}vx|~xz}~ww{}xxz~uww}xyxywwvwwwrtuxtswwqqssqqtvrqyvtrsyuqtwvsvxwqsvuusttpotvsrqonoooqonmkfaccb____ZXZ_N???=BC:%.<CGIGHJ@I__]Y[][Y[][TVVVWXXVWUVVVVWWVUWWUVWUUVWVVYZ[WZ^]gsttuwvtrx�������~||z}|{|}}zz|~}|zypjfeehgbceedddbccdcaaddacec`eec]`cb`^___`a\^bb_`b`]^cbcabaa]a`aa```a]^^\[[[YYZZZYZ[ZZZYZZYQ[\[YXYVYY[WX[ZWXYVWWVVY\YWZ[\Y\^[XYYYWXWWT�������������������������������������������������������������������������������������ǿ����������������������������������������ſ��˿��������������������������ɿ�������������������������������ľ���������������������Ƚ����������������������������������������ž������������������������������������������ȼ��������������������������ud4+��Tii`TKB:331/2/+))(,/,&%$"  +����������������������������~}xo[CBeloh]PGA;520.)'&'"     
+%���yngbdx����cMONOTWVo��������������¯��w��������������|���Ӝ������zywtqlmgeffdb`\ZVSLA/]F@?BHyvs�t/32.*#+v���wiuvTIGQfijigc<q�������������������������W@��żttpkhda^\ZWVRX\F4{��zzwtpnligeca`^]]^ZYWY\vXLQ�������������������������������}�������������������������������������������������������������������������|����������������{�����������}���}��������|���}�����������������������������������������������}���{��������������~��|����������~�|�kVKBAAADEECDBEFFJTf�ʭ�kx������y}}}|||~}}vy~~zy~}{|}zx{~{tw}{u{}{rw{yux{}rwz{vwyxvvwwuxvyvwuxwuryvtssuuqnwusrtusrvvtruvsqsvtqrsooqwrmorolprlnppjjgcdb`_\``\XYbMBCA?BD:%->ADGFII>N_^\Z]\Y\_ZWRXWTSUWSUXWTUTTSQSUUSUWTTUUTWZZW[_\\gssvuvvusz�������~||{}|{}~{{}}~{yyphdcedcbddbbfe`beeabeedceeacfeddecaba`aba`]ba`]dc`^_`ad_`aa\`aa``b````][]^[Y[\^^\[Z\\[[[[Z[]YY\\[ZZZYUXYYUXXTSX[VXZXYZXVZ[YWZ\YVXXWXV�������������¾��������������������������������������������������������������������������������������������������������������������Ϳ�����������������̿����������������������������������������ý������������Ž����������������������������������������ƿ�������ƾ�������������������������������������������Ľ�������������������������ue4-��Rnl_RJF>643122-*+*,,($" (����������������������������}{xpY5>bkmg]QH?9518=3032  
+!���xoiccf���yGJMNLQWRm�����������������ȗ��������������y���՞������~ywwuqkXS_geeb_[XWSKA/]JABF`�����653/+#+s��������}khjjiifa=o�������������������������[B����rvokgda[\ZVWS^]H<}��sxvrnmligecb_^]]][XXWZ~WNLl������������������������������w���~����{����������������������������������������������������������������������������������}������������{���|��������|���z���}����|�����������~������������|�����������������������������~~���}}��}���{}�um^QCBBBBA>BECAA@?@@EMa�Ō|}������~}�||}|}~�|zy�~|~�}~~|{z{~|zw{~zuzzzwzwxuzyxs|{zpty{ttwxsvxxtquxusuytruvtttttsuxtstwxsquwusrsttrqpnoqtpnqumkpqmlnmfkgbec_^]`_ZWV\L@CC@AC:#,>DFHIKI@Q^]]_]ZY]^YXXXWVUVWTVWTQSTSRSTRRQTTTTRQQVYYW\]Y]hrsvvuvuty�������}|z{|}{}��|{|}}}|{wofdcdeacecbccc`ddcbbdbadfeaaeacfdabbb`bc_abc_`ab_^_aa_bba``\`dbaa`^]`_\]\ZWZ\\]][ZZ[[\Z[XX[^[Y\[Y[[ZYZZXYXXVVVWYYYYX\ZZV\ZYYYXUUYWVVV�������������¾���½������������ý�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǿ�������ʾ�����������������ý���������������ƿ�������������������������ug3-��_qqcXQOC852012/,--,)&"! !"���������������������������|zyvmS3:clng\QI?878f����r! "!  	"���umgc_hzf_jcLONLOWUn�������������Ȳnei���������������~���՟������{ywtp`]cbcfec`\YWSKC/$eGBABU�����642/,$,s��m[VPLMKFGcllkgc?l�������������������������\C����psokggd^\[UXSc]E<{��xuuqonkklfcb^\]\\[ZXY\zYO7N�ż�����������������t~���������������������������������������������������~��������������������~������������~�������������������������������������������{���{���{���z���~���|����~���~���v���z������������������������������}~�������x�nUF;DEBELCIDBEDEA>=;9>G^����������|~{y{�|z}{v{~zx~�~|~~zx{�zyy~|z{{x|yyuzy}xvw|ywszxyuvwwqsvwstuurptvqntxsqsvtquyqqtxtqrxwqsuurttppnrqrpopsnonnmmnlhjgbb`__^^][WZ^L@EFB@@;$.>CGHIJGCT\[[_XUZ[[WX[VUUUVWXVUSTVUTVWTRTTUUTVUSTW[[Z[[Z]hrvvtstutx�������{z{{||~~~|{|}}}}{wndfieedggdegdechbfcbcbabecbab`cd`]aa`_a_Z`ac^_caZ_a_a`bcb_```````_^_``^`[ZY__\[Z[ZZZWV[\[Y[ZY\]ZX]^YY[YWX[XVXYWWWYZ[\XWX^WZ\[YWZXWZXW����¾�������Ŀ�����������������ÿ��ſ���ƾ������������������������������������������������������������������������������������������������������������������������������������������������������������ǿ��ν���̿������������ʿ�������������������������������������������������������������¾���������������ƾ�������������������������tf4-��btpi\SOG8642/20/./-*)&$"#�pboka^NJADCJLFEBCEFDECAIGEHJMNLA*4aonh^QH@;87I_UZSA#!!!  #���xkga`�¹��~OOMKOUXk�������������ɧpxw}������������������֢������~{xuupdimg^eea`]YWSKD/ iH??E^�����62/.,$,v��dURKLKHEEepmjhcCk�������������������������`H����tsoiecb`]ZWWUc[G;x��{vuromjjiecca_]ZY[YZZ\rUR3M�Ž�����������������`{������������������������������������������������������������������������������������������~�����������������������������~����������������������}�~�~���}������~��x���w���z���~}������|���~���~~��~����~|��}���}~shLAHUYblkxxz}xqmhW?9658BV������������|}{}}�{|}}zx{}zy}wz�|yw{�zuyz|uwyyvyxuqvy{puy{vyx{tvuxuusvuutzwvqtuutstwtruusnsunptwsmqvrmtvsorsonosopssomosokmpljijh`bbb^a^[ZWZZHBEE>;?:".=AFHHIH?SZ\\]XVZ[XWYWTSTTRQWWSOUUTRTVUTUTSUUTUUXYXY[[ZZ_jrvwvtstrx��������|||{{{~~}{{|}}~~~wmggfdefgdbffaddc_beaabccccdccbebb`ba`aabca`a^aca_a`[^]adb[a`_]_a][^^^_``ZZ\^]_^YZ\\YWX[^[[\YU[[[[\YWXZXWXYUTVWVXWXWZYWWZ[SY[ZWXWTW[WT������������������¾������������������������������������������������������������������������������������������������������������������������������������ʽ�����������������ȿ���������������������Ͽ���ǿ�������ͽ���Ⱦ�����������������������������������ɼ���������������������ǻ���ƿ��ɾ���������������������������������������������se2,��\tnfZPJB953441.,..*+,,'%$#!BAjpjcZPIECBC?;?;:<88:845135422/%*]ooh`SI@;664-+9%$  "   	���yoe]_lk]ZmgGRONRUSe�������������ɮ������������������}���֡������~{yvvphgg_[geb`]YWSKC1eGBAF_����h410/,%+q�������zbT_fqokhfAl�������������������������cD����rulgeb`__ZWWXd]H;{��uxwsnkjiifccba_`\\[[ZZqTQ1M�ľ�����������������a|�����~�����������}���~��������~��������|�����������������������������|�����������������������������������������������~���~��|���z����������y�����������~�������������~�~���������|~~�|���y~�y��|{��z�~hVDDIg|�wx������������\40028N������������z|~�}||}~|z{|~|x|�zzy{{zxzyyxyyww{ywvzwzqyxynyuvrtuuotuxoquwustwrtwvrstutssqrppsturorsrrwvrprrnmoqlmrqllnpmjmnegjle^ab^[_^][WYZFBCEA>>:"0>EGHJIF8L]]^`_[ZXXZXVUTUVWWWTRSVTSRRRRTTRPTUSSTVUUYYXXX_kstvussssy��������}}}{{{}}~|}}}~~}}xnihdcddfcedebddc^aeb`cfc`dfbbbdabdd``babddb`adcbbba]`]ab_\ba^```^]^_^^`\YY\[ZZZYZ^\UZYY[[[[[Z\^Y[[YXYXXZYWWXXXXYXXXZYXZY[[[YXUXWUWVUT������ÿ���¾�������ÿ��������¿����������������Ŀ���������������������������������������������������������������������������������Ž���ż����������������������������������������������������������������������˺����������������������������������������˿������������ɾ�������Ƚ�������λ�����������������Ƽ��������������������������sf3+��VqmdZPF>966885.*-1-+,,((&%$!BkpjcUMIFEBA@?A===;;9798653322(&_nmi`TGB;67\����p$!"!   ���xmf]ifUNRo{KONLPWRe����������������������¿������������ա������}zxvrl_HOcgeb`][WSMC2fGDDBJy���J-2/0-$*r�������}aIYisnjjd@o�������������������������hF����wsnida__]YXZUb]F9z��u|ytqlkjhfdc]```Y\[XZYkVN-N�ǽ�����������������_y���������~�����������w����������������������������������~�����������w�����~�����}�����������������������������������������|��~���x���{��}|���{�~����v���{�������~���}��������������������{��}���{�~�~�wSEb~����}y������������k2+))-B�����������}x|~w{||{|�zy|~zy|}x{|zy{|{yzz{zz|}xz{}v{x{wzw|tvtyvto{tvosusnqttprusnpsrporrpnssqrrstsstsrprrpnonoosqnmonomkkmghimfbcaZS[]]YVWXH>BDB?<8!0?FFGHHF;J\_]Z]\ZW[[VUXWVVWYXTUUVUUTSRQUSRQTTRTUVUWYYXXX^ktvvsstsry�������{}}|~{|}~~||}|}|~zmghccebdcfbdeibdcfdbccecded`accaadb_ab``bc`b`_^bca`^bdb_`aa``a_^__^a__^\\Z\ZZ[ZXZ\WVZZYZYZWXZYZ\ZZVX[TZ[XTWYVWYYWXYYZZXWZ\ZYXZVVWWVVV������ſ���Ľ�������ľ���ľ���¾����������������ž��ſ�������������������������������������������������������������������������Ǿ��Ƚ���Ż���ǿ�������Ƽ�������Ϳ������������˿��������������������������ʾ�����ƾ������������������������������������������������������������������������̽�����¿����������ǻ��������������������������th4/��Somc]WG<77::971.173++,((&%$AimicWQJFFABAA@><=:;9566542222&$Yqni^SFA:68S��xo[#"!   	��vlhn������dJPLLPXRb�������������ɵnZav������������������ۡ������}zxvrlnjhceca_\ZVQME2$hHCAB\����}F;40,#*u������{yyysmlihbBl�������������������������jB������zqkhaa^^d_^e\C4u�����|{xtsnmiifhggac`__ahYQ)Q��������������������`s����������������������������������������������������������������������~���y���������������~���~�������y~���}���~���������~�������������}����~�}�~�}�y���~�������~��y�~������~���~���{���~~��}����}��}}��}|x������������|t{~~���p.$""':�����������|y}xs{|~|xyxz{zvx}|yvyzuuzzwtvxzwy|zry{xqxyysvz|ruw}tuu~suswurquurottrpssqmqtqnorrqqssrpqsqonqpompnmnsqmnronomlljkjigdcdTHZ\[YXWYI@CBBB<6 -@DEHIHF<N\]ZW[\XU[WTTWWSTVXVTVVSVUUTSSVSTSTSSVVVXYYYZYY_kuvtttsssy�������}||{{|{|}}|{}}}||}ylgebcfbbcf^dfe_becaadedbfcb_dcb`bc``bba`bc^b_`]`a```aba\_`_\aca^_a_`^]\^][Z[]\ZZ\[XX[[YZYZY\ZXWZ[ZXY\WZYXUWXVUVVVWZYYZYWVYWWZZUWWWWWV��¾�������þ�������Ŀ��������������������������ľ��ÿ������������������������������������������������������������������������������������������������˿���������������������ʿ��������������������������������������������������������������������ɽ�������������������������������������ɽ���ο������������ƽ��������������������������qa2,��Zpk^[THB<;>;9832696,()''$#"Aklg`WPKCDCCAA><<<:;9766544340'&[oni^QHD;72K��l&$#    y��xmgqs����|lMNNOPVVf��������������ƫksx������������������١������~}{yvq_NPQWfd`_\YTPJA1#dD@BHk��Q��rH0.0'(u����xumegegb]dgfbAl�������������������������o=���|wmghed`\[[[WYVNF:r~�}~zvtroljlhffhfgdbbabaaVP,U��������������������eq���|����{���w���{���|�����������������������������������������������������������������������������������|{��{��{~������{{���~���z���}~~��}��y���~�����������}���|�����������}�~�}���||�wz�z|}�{z|���������������}nrzy���f%"7�����������~{yy�||||y|zzywv{{y{ywzyxzxwquwyuxywswzupwxupvvymrtwlsturrutsqtvrprwroswqnputpnrssqrsurrsqsqopnonnlmmqomnpjmpligkkihd`a^IA[][VVX[E@ABCC=5 -=CEHHIG;P_]\[]ZZ[[WVUWUTUWVSTTTQSTURRTSSQRSRQTSTWXXYZYU_jsusuurttz�������||||}}|}~}|}~~}||ykedcdcbced^eeb_ccaa`ecaad_b_dabbc`aabaa``bb`^_cc`a`aba_]aa_^`c__ab_`^ZY]\ZY\]XWY[ZX[ZZ\[WX[[ZYY[]ZYY[[[ZXYWWXXVXYXYYXZWXXXXXXXXXVXVVU��¿��������������������������������������������������������������������������ž���������������������������������������������������������������������������������������������Ƚ��������ǿÿ����������������������������������������������������������������������ɿ���ļ��Ⱦ�������������������������������������������������������������oe3,��_rg]ZSMLE@A<:847995.*(**$!Aike_YQKEEDCA?=<;;:9978654222/%#Yqoj^QHB<83W�Bj4#"   ! !!	{��vohqv{�t�xqJNONPVTd���������������yhj}������������������إ�������}{xupiikc_fc`_]\VRKC2dFEA?^��z��uI-//&)z��vPD>58:57448TebEm�������������������������m;��{tpnlhfc^^\YVUQQNH9p~tyupomlkjifeba``aa^\ZVWVVK(Q��������������������kr���{��������{����~���z���{�������������������������������z�������}���}�����������}�����������~�����������}����}�����~�}���{���y���u}{y��}v���x���}}~�z��~t��x������~z������|��������y��~|z|����������������}{kpvu���J 4����������}}yy{�x{{~xwz}tsy}yy||xz{zyyyxzxzz{xxxxwtv}wutxwzquuvrustsqsroortmqqroptulkptpoqspqqsqrvxtqstqqosppnonqpnmmfmnkiikihie_`_G<\^^ZU[]E?@@BC=4+<CFHHJE8V][`Z\XX[ZYXWYWWVVWVXVTTTVWTVVUUSSRSSTSVVXXZWYV_hpstvutst{�������~}|{|~|}~}|}~~}||zjbeeebdfeeefcaab`cbadcbcdaeaa^ad^___^_``_`a_Z^c^__```_``_[`a`^^``_^`\]__[Z\][XZZZZZ]Z[\[YZ[ZYZ\ZZZZYY\ZWZ\XYYYTVWWWYY[YYYWZYYYYZVWVUT������������������ü������������������������������ɿ��������������������������ſ���������������������������������������������������������������������������������������������ż����������¾�������ʼ���º�����������������������������������ǿ�������������������������������������������������������������ſ�ļ�������������������������se0+��_of[UPLGCA>;8676532/,.1.%@ekd^WPIDECA@?=;;<:9777543212.#"\tri]OK@;84CdK|1"#"!"!!!	}��xohp���c���JONLQWVb�������������ɽ����������������������ۡ������}{yuuqlkm`]ecb`[\YTKC1iJE>AN����uB900/&*v��ulpqnm`VY[UPPdcGn���������������¿��������l=����wrnjfc`]YXVUUSOG2x��~zvtojjgffdca_\]]][XYVUUG'N�¿���������¿������ew������}����������������������������������������������������|���|��������}���������|���z��������~������|��|���{���~~��}}�������������}����|��}|���{���}}~�{|��}v���{���~��}{��}z��~x��y���|��|}}}{~��������������}���}qgqtr����H1�����������~y�xyy|xzw|wwx|st||vvy~wx|{yvz{xty{yvuzytty{qtuwtttxuxwvststpooqmrospqmqptlnrtnnornnqrqmornmpsoopsmmlonnpqlkkokiikhihje`b\D5\^]XSXXD@A?AB>3+=BEFGHD9RYZ[XWWZZYYZXUUXVTWVZTUVTQUVVTTUTRRWWTSWWYZZY\\`hqttuuutt{�������|{}||~}~~{|}}}~}yiadfcbde_dgd]abb^bdaacgcbcfdbaccae_b__a`_`_^]_`^`^`^^]`a^[aa^[\`]\^_Y]`][\^ZZXZZZZ\]X\[[[\YYZZ]WXYZYY[XY]XVYYXUXYXWXXXZ[YWYWWWVXVWWVT��������¿��������þ����������������������������������������������������������ǿ������������������������������������������������������ž������������Ľ��������������������������������������������ʾ���·��������������������������������������������Ƚ������������Ż���ȿ����������������ǿ�����ν�����������Ľ�������������������������th4,��_kf\UNHA><:;:8810/0-,110( Afha\WPKFDBAA@??;9;:7753333210$%Yvpj_PH?<86J���7"#"!  	w��{ngdu�dKQapQOONSWWb�������������������������������������ڣ������}|yuusm[MP_fb``\ZYTNE4cKC>BP����L/3221$*p��}����{mbhig`YbaHl�������������������������n>����yrlid`]ZVVUUVSNF3z��~ztqmiifb`_]][YWXXVWUTTSD+R�ſ�����������������b}�}���|���~�����������������������������������������������������������������������������������������{���~��z��|���y|��xx��y��{~��z|����||��~��|�~�~����������������{��}���{��|x���y{}{urlu��������������������wehswx����B.���������|w}y|zyz{w{yzyzx|z{yzzysx|yywzxsq{zvtuvurtvvmvutttssmttrnsstoprtmqtunppuouqsqroqpqqqrrooonpoqronooikllkmonghllbfijfgfidab]MH\[XWUYUC?BABD?2 -?BFGHHF9O[^]ZWZZXYYXWUWWVSXUVRWVTQTUSRTPQRRSQQSWUXYXWZ\_jqsssuustz�������}|||~}|}}}}z|~~~~~wibcdcecd`ced^dcc`ac]_ad``bc^`bbaae_a^a_a_^__`a`^b_b_``a____^]^aa\^_^[]]WXZ[WZYZXY[XWXZYWZ\YVZXZY[Z[^[ZX[[XWYXYXXWXZWWWY\XWYYYZZYVVXWW�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˼���ɼ�������ɿ���Ⱦ��̿���ʻ���ǽ��ο���ſ��������������������������tg3+��Nee^VNGBA@;6644.-/,*(-,*$ @fld\UQLIEBBAA@?<::9863233321.#!YspkaNG@;76?e^�G!""!  	t��}oga\RNTPPNNNPPQWWa���������������ĥ��������������������ڤ������~{yvusqheehec`_^ZWUQF5mPEBAY�����65243('q��jID=9::33348M``Gj�����Ż������������������n;���{uoida\ZWSQRQPNMG7}�{xuqkhggb_^^[ZYWUVWTSSRQQE'P�ſ�������~��������e����{���z��������}���w������������{����������{������������}������������~����}���}���}���~����|����~��������~~}���}���x}��}w~�y{~u~{{}��|y}��{}~�z~��zz��~{���{~��y~��{~������}}~��yxvwxxs|���������������������raivy|����=-����������}uyy|ww{{vx{{vxx~yzx{zww|ywzzyutxyvuvwtuvwtwtztssusspwpogsqpmrrrjqrthmnohmorlooqmnotpnpuoosponoqpnklmlikolilnkdhhjhhgjb_`][[[Z[XVWUC=@@BDA1 /=CFGGGE;O[]XWX\ZWVWXUXYWUWYVUVZWUWVTRTTQPQQRSTUWUYXYXZ[]irttturttz�������~z|{~~}~}~}{}~}xkfeeeeccaddbcedccbb_edccaab`_`]^a`\_]_]`a`^^aa[_f\__aab^^_^\^a^^__^][^]Y[^ZY]XZXZZWV[[WXYZXYZW[[[WY]ZTVYXWYXTXWWVXZWXYYZYYXYZZXWYWXXX�����������Ŀ�������¾������������ſ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿ�������ʾ������������ȼ���ǿ��ο���ƻ��������������������������se0,��RljbYSNHFE<41.0.00+*),)%##=flbYSPLIFCC@?=<<;88853333220.$Zqoj`PFB;78K���.!#" !!!j��{nf^[YXUSNOQOPNRWX[���������������{bf}������������������ܣ�������|zvuriTPRcddb`_\WSND4]OD@BO�����:6575+)t���pfc\^]SNJEK[aaHk�����Ƹ������������������o;���rrqjea_XWVTSQPOMJ:v�wvssoikkedb`]\[XVWYVRSRRPG*M��������������������f|����������������������z���������������������}���y���������������~����~��z��~{���|���}��y~��w���x���y~��~���|��}���~���~~~�z�|�{�~��{||}}�{|z���}�xy�{|�}z~�}z��y|�||���|~�~y~��{wrvy|������������������������ndlvy~���8+{����������}v{y}ww||rzzytxx}vvuxvvvytty{vttxusvxvqvyvqxyyopsumruymqpuqsouqpmtnpjrmpmmmqnommkmpsnkoumooojmpqlllnljkqnlkrjgkkhjlmiic``a]\\[YVVVRC=@AAC@0.=CFGGGD;S`\YWXTVVXXRUWVTTUVTPUTUTVRPSTUSRSRSUXXWWYWZ[][]kstttsqru{�������~|}|~}|||}||}}~}{zykdbbec`a`cacecadcaa`e`bcb_ba__adc`_ba_]ab`^a^]]`b_\`baaZ^_]\__[^`\Z\\[\[[ZXZ_[[Y[Y[]\[Y\ZYW][W[ZYWYZYWY[YZXYVWTSTXYVVY[WYYWWYYWVXWWXZ��¾�������ſ���¿��ľ�����������������������������������������������������������������������������ƿ�����������������������������������ɿ������������ʿ�����������������������������������ʾ���ȿ�����������������������������������������������������������������������������������������������������������ǹ��������������������������te3)��]spfWRQH>>;40+..12/-..*&## >glc[USLHE@A@=<<<;:7655414220.' Vqpl`SJB;57H���6$#!!!
+i��{le`\dq]UOTS[^NPXY]�������������ʾv��u������������������ܦ������|ywvpdbnfaeeca^\XSKB6SLC?CV����J26567+*v������y~znm`T\_bcMl�������������������������q;������wuojda__a\ZWRK?x��������{vsnlhhfcdcdfbbbaUJ-P��������������������dt���}���~��������������������������~�����������������������������������������������~�y~���{���y~��v}��~~�~v~��y���{|��}{}��|}�}�~�y}�{�|}|���{|}~}}��}{|~|}���{}|~}~�~|�}}x~rouyz}�������������������������nZlvz����v4)k����������zz|v{{zxxw�|yvzxxwywxzyuwwwywsuvxvuvwtpzvrowusorssotvwkqrsoprtnnotnnpvmtoonppqomlssrooopkponimooikjmjklmllkohgjlhijjehd_^`]\^]ZVUWRC=>>AD?1.>DFGFFC:V_ZYYZX\WYWWXWWTUUXRPVTTUSOORSQQRQOPQQSTVVTX[ZY]nqsrrqrrsz�������|}~|||~~}}|||}~||jdbbgd`cbd`dc`bba```c[bba^acb]ab__`e_^`b__`a`^_`__``ba_\_\\```Z`_\Z\]\\\ZXX[[X[Z[XX\YZ\]VVWZYX[ZXYZZX[[XYYWXWZVUUWWWXZZXYYWYWWWYYWVXY��ÿ����������������¿�����������������������������������������������������������������������������ǿ����������������������������������������ſ�����������������������������������̿�������������������������������Ŀ���������������������̿�������˿���������������������̿���˿�������ν�������������������ʼ��������������������������ue4.��_qmbTMI?88940,,-022//.*%""@clc[XTLHFACC?==:;:8654323232.'Uork`TJA<87;\VyI#%" m��}oea]u�������zIOWWZ�������������ɿn���������������������ܦ������|zyvqijmibdeb_][XRKD5TNBACM����u<7210%(s��gHJ;:;902769G^dLh���������������ÿ��������tA������ysojddabc`caYK?z���|xttrnkhgdc_`^__^ddfxYJ&N��������������������bs���{~�~{���y�������v��������{���~���}������������}����|���~���|����~���{���}~��~���������������~���z�~���{{~}�y���x~��xy|~}v{�y|}�x{��{x}�x}�}y~��x|~�~|~�~x|~{}��~~}�z}~�{~~�x}}}{|rty|{~�������������������������jUju}����q;'f���������}y|zuxxyuux~uuvztwy}vsz{utxywttvwstwxtsx|ssuvqsstrtrvtqntsrmpqrklpsknprkpoolnqrmknvnonommornkmpnllojiimjjkmkkghikggiibid_\]]\[\YVVWRC>?>AB?1.>CDEEEC=XZTYYWVYYWWXXWVTYXVUTVUTWSRRSTRRTSPSTSTSVXTV[YW^lqtrrpsqqy�������{{~}}}|}~~|z{}~|ylbcfeccfbccfbbddaabaa]dc`^ca_^cd``be^\_`]\_a^]^___`^`^_`a^^_``^d]^b^\^]]ZZ]Z[Y\[ZWXZYZ\ZUYYXXY[UYYYYZZZWVXVVXYUVVVXZ[ZY[YXXYWVVXWWXXX����ý��������¿��Ŀ������������ƿ�������ǿ���������������������������������������������������������������������������������������������������������������������������ɿ�����������������������������������������������ʿ�����������������ɽ���������������������Ⱦ���������������������οǿ������������������¿�������������������������tf7-��]ni^QF?:6521.,,*,//.,+%"!!>blb[XTNHFDDA@=<::88764332331/(UrqkaUJ@:56F���?%%"!!!"  	p���qgcas�������yKNUWW��������������ňr��������������������ަ������}|xvrka[SZfeb_]\WRKE7QLBBGU�����G;41/'&r��qcb[^_[RMJKJTbbQj�������������������������rB����owyykede`]]XS[ZH<y��xvuuqoonmifec`a[[XZ[Z_UH&J��¾����������������cy�������~���{���~����u���|��������������������~���y���~���{���{����}���t���z}~�y|���{}��t}��zy���|��}~�}�~{{~�{���~|}�~||�}|}z~~{{w{|�}wy}|w}~{v}�ty~�xw~�{yz|}y~��||{�vy~�yx||v|~|y{vw{}}��������������������������tWhtz����j;$_�������yvyxxwvxysux|rsvyruyzquyzpryytruwtruxurtxvqutsopvsnsrsqrruoqrsnqpoqsoplnkonolnoqjjmrjmonkjnrjknpmlmnjikqhilojijlhighhgekd_`^\\Z[WVSWQC>?AAA@0.=BDEEDB@WYUYYSVXXUUVVTSUVRTVVSRRSQSSSTRSSRRSTUUTUXV[YXY_krsnoppnry�������||}||~}}}~{{}~}~}xi`ac_bba^^_cbbd_^bda_acca`ca`adb`ba`[^``^^^`\^^Z^]\[]\]`^X^b^\^_XZ`][^\YX\]XZ[[ZXXZZZ[ZXX[ZYY[[VZYXYYXYWWVVZZVTWVVYXXWZ[WUWYWUUWTVYST����Ǿ��������½��Ľ���þ��Ǿ���Ŀ������������������������������������������������������������������������ǿ���������������������˾�����������������ž�������ȿ�������Ͼ�����������������ʿ����������������������������ſ�����������������Ⱦ��������������������������������������������ɼ��������������������Ŀ�������������������������vc6-��Zmh\OD?;63//.-,)(()(('#!"!>^jc[YSOJGDC??=:;987643431211.)WtqlbUIA<67D}{pCH&#" ! ""	m���phdaefDk��^]^ONWXW��������������Ƚ���������������������ަ������~|zwurneXXdffbb_\XRKD9UHBCCN��yzW27520''k�����{|yrgkqpiffcRk�������������������������vA����sw��upojjecZU^_M>z��xxvsqpu{ytjfdoi[ZY[YZ`UG%G��������������������i�~~���{~��}���}�������~���������������������������������������������~���~���~�|}�~{��w�~y}��zw}�z~{~y|}}ysz~y~~}y|��xx|�|w{~zyy�y~}�|}|{yz}|�|}|�y~~~x}}~{|}�}�}}y~z{{~yx{|zw|~{wzxz{}~�������������������������u[fpv���}l<%`������stxvvvxuxtxwzuvvzvwxxrzxwpswvtvxvutuvqqtvqnturmprqhusrlpqpkoqojnponoppjmnplmmqnlmmlnknnnllnoilnlijmkhjkmejllfgkhcffidfff^`_\[\ZZWSPVNB?>BBB>..>BCEFEA9QWZZYUWXWVVVTSVUTSVUTSUTRMRSSPPPOQRPPRUTRWYZVYY^ioqpoprpsy�������~}}}{}}}~|{|}~~~widcb_cba]^_ba_a__ac]^^```_a_^``Z_b_^^b_]]_^`_^_^^\\\^`^_]^a_]]__[`^[Y`]YW[[VY\[XXZXY][[Y[[Y[\ZY[[YYYZXWZXXX[YXXWXY\XWXZZXYYXXWWWWY[UV��������������¿��þ���þ�������ǿ������������������������������������������������������������������������ȿ���������������������̿�����������������ʿ�������ʽ��������������������������̾���������������������������������������������������������������������������������������������Ƽ������������˾������¾�������������������������ue5.��]oh]QF@?:5455/,*(&'**($!""">aib_ZRPKFEC@>;:;;:8444520121-&VpqlcUJB<84AtpmIB&"" ! ! 	e��~rhe__k���ogbYQSXXV����������������MGb������������������ک������|xvtqkbd`cfga``\YTLD6XNEDDS����o6>:2-$%j��~ZG;:971@_qplibRi���ƶ��������������������z=����pu��yuvnkkcVWa`N=y��twutqp{�~��yj|~h[ZYXY^{\K%G��������������������h�������{}|�z���~�������vz�������|���z���z�������������������~���}���|���}���~}}��}|}��{�x|�{��~{�~w}{}z{yu|}}x�{{x~~tw{~xu{||xx{�w{}}vw}}xy|}}u}~y{|~z}}�{x}~|z{�y~~y{|~xxz~|{z{|}{y{xxz||����������������������~}��xbejr��zo9%_�~����~tsurtwyovxytuuwtxxxuvwyuuvxwuvxwuvwwtqvyyurqtrlssqkysrnqppmonnimnlhmmnemmmhkmokhmmgjnojkonmlmonjlolhhkkkhmljhjhgdffieded\]_][][ZWTQUNA?@>>B<--=CDBCC@9OXYXUZYXVXXTSUWTVWXUSSWUSSTSQURQQQSRQSUUVZXVTYX_iononprpsz�������z|}~}~|{~}{{}~~wkfdcdc`_`cabaa`_aabWb`b__^]\_`^\`^\]_`\[]]\]\[\_^\\^``_\\b_Z]_\]_b]\]_[[\\ZZ[ZYZ\[X[[[Y\ZYWY[\ZYYVZYYZ[\YXYXUWYYXYYTWYVUZZXVXXYXYXXYZ��¿������������ÿ��ÿ������������ý�����������������¿����������������������������������������������������������������������������������������ʿ������������ʿ��������������������������ʿ����������������������������������������������������������������������������������������������������Ͽ�������ӿ���ƻ��������������������������td6-��^pj_UMGEE>:::3/*&&&%&%#$&'& =ekaaZUPLHEA><<;:;97653320021/$YqpmdVLB:81C���D $#  !!!	
+c���qga_i�ϥXFe�vNTZZU��������������̿�m������������������ߩ�������}xwuqgVOPVdgd`^\ZUNG6YOFA@[�����H=81.$"h��xdfie\UVUYptokfQf�������������������������?����ptppomkdde^YW`cN;s��vvvurrq~wqeqrc\ZZZZ]v[L'C��������������������e�䟎��iJA@FCHTelmsuteYbqXYnnm������������������������������{���������~��}��u~��zx{{{�z�wuk�~}|�|v���y��x|}�}y~~yzz�{z{�|{w{y|y|~vyxzy{x~zzwzzzxt|}|z{{~vy|~z||~xw{~zw{}|yz|{yyz}uwy|ywz}zxyz|xy{{|���������������������{|~�{hgbn~��vo:"\�|}����{rmqntuwouturuuvqtwxnrvxqquwsotxusvxvrqttpqtqnqqrkprvmnprnpnolkknnljnjkgnkjgljkhjkjehlmihkmjjlljjmphhijgiinihimhfhgeefdbb_\]^\]YWWSTUL?>?=?A<,.<?BABD=:OXWSQYWTTXZTQVURTUUSQRTRSSTRQTSQRRRRSVWXZYWVY[Zbjqonlopqry�������{|~}|~||}}{y{~�~|wke_``^^^``\]_a]^c]__dadab^^]``___]]_]^[^^]^][]]^^Y]_`_]Z\`^[\]YY\]\[]ZZ[]WWYYWZZZZXYYXZ[YXXXYZ[YWU[YZ[[YVWWWVWZWVWWVXZWVZXWWXWXWVUUWZ��ý������������þ��þ���Ŀ�������Ž�����������������ü����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̽�������ѻ���Ȼ��������������������������vj;-��Rnlf]TMMNG@?>94.'&&&$"#%&)%=cia_ZWQMHCAA?><;976654232022/"XtsndYMD<738^UR!$# ""!
+_���nib[nƒTNRVkqOTZ[Z���������������vnpx������������������᨝������}yvutl]ZZ_hhdb_\XUQG9fQC>AT�����I752.$%l������}onnhdtvtqiRg����������������¿�������A����uuqpmifd`YZYV]_K?y��rttuvwujhgjfb]]^]\ZY\`x\J(F��������������������e��EPE3=+#!" !%&"+CCFP@B�ƿ����������������ļ���ÿ���e[�����������}�}���{ywell^/)*%!.96.956DO@=DXupu{rr}~ur}~�uw|�}w|�yyz|t���{~�y{|�|y~�|}~}}�z|~}|}}{z|y{y{z~xwzuy|{tvxzxuyzzwyxz||~�����������������zz{��mkjo���xi9
+!U�qv����wmqtvxwusvuvwvuutvuvnxvxsstvsqqvrquuqpquplqvjfprpjqpmfnqnjlpqkjnojmmnhjirmkmnikikjhhlkkkhjlghjihjjkfgiigijjfghfbdgebddc^c`]\][XXWUTSVK@>>??@<,/=A@?@C<:T\XVXYXVYXWTUTTSXTQPTSQQSSUPSSPPQRQQSSTXYYVUWXV`jonklmpnow������|}}}~~|{z}|y|~��}}ujb^_^_\[WVVZ]^\_a\bba]``^_aa_^ab^[^`]]^^\]_]^\]^_\^_`]Z\][\\\ZX[\]][[XZ[ZWUXXYZXUXZYVXZXVVWWYZZWW[ZWWZYWYYVUUVWVXVWYZYWWYWWYWVXVUUWXV��ý������������Ŀ��ſ������������Ŀ�����������������ÿ���������������������������������������������������������������������������������ɿ������������������������������������������������������������������������������������������������̾�������Ϳ���������������������Ŀ�����������������ſ���������Ѿ���Ǿ��������������������������vj9-��Knni\SNLJFBA@<4+((&&$"%'''#:akc`[YSMGCA@??<<976643332121.#WqsndXLB:5:J���?$%#   W���ogb_k�HEPRRQYSUZ[[��������������Ɛ���������������������ߧ�������}zwvtg_i\eiidc`\ZUPF9mJA>@Dq��o�<552.''q��wKEC78839]yvsohPf�������������������������|A����tv��kkmnd_][W`^J=y��{vuw���vrrleabb^`]XY[_pYK*E��¿����������������j��EIB:q�]1!"!"!$)/4/+,;DIF67�ſ�������������������������g,��~�}~~���~~��{wk$$=<$#9wyd>5<@;?JC;58=FGKSVWZZc\L�V;44=JEDCIRj�������������������~�~vwzx��s]jieonmqtsw{rwwytux~�~�����~~�������������yty�|lmms���yc8"S�ys{}��ulstwxrtuuquxupruuqrvxqrrrpuorqtrssqlsutyvtvppyutosrmjqnjjmnk_lnlgjllijjlihmnfjlnihiljjklikknljkljjijhigijhgffdacedacdd^d^[ZYXWWYWSQPG==?>?@;*-:?A@?A<BWXVWYVUVXTUVXTSTVSRTURRSTVURVRNTVSQQSSRUVVUYWVT_jnmlkklnpu�������{}}|}~|{}|zy|}~�}ufWNNIG@=7398=@LV\`eb`[]`^]^^]]a`XX]^[]__\^_[XX[[]\]]]ZZ^]YZ^][\]\\^\[[\ZXXZZXYXYY[\YYYXWXYWYXYZWWYWWVYXXYXVTVXVUXVVXYWXWWXXYWVXWTVXUU������¿��������ÿ����������������������������������������������������������������������������������������������������������������������ſ����������������������������������������������������������������������������������������������������������������������������������������������ο���ü��������������ÿ��������������������������vh5+��NonfYTOJIHCB=93./-**)+)(('%7_jba^XTLIECC@=;<976632233210.&VnrnfXMD;<d��up4&%#!!  V���sib`g���~~y~wYU\^V��������������ǂ`��������������������જ������}zxvsnmoliihec`\YVPF9kLCBBDRQL{�<94/-&(n���tbcb]TQWixvpleUf��������������������������D����rt��uyuqnel]X`]K?y��vuvv������~{xpnopfYXX^qXJ&B��¿����������������k��HLB,1[��P,$#$,;JGH:0<FIG::�ý����������������������ÿ�b1��}~�����������~~|wm"7'fR+   !2  !%7������������������������zF2)D�E%$&"$+4=?9>\n�mXVRRRNNRQY]]e}�����������qmo~znmqs���xf7#P�yqzw��sktrtuusrqiqstiptsmtuvpnrsjnosomptpoq}����bifhhhknmmhpsjgmuoshsllikjiefggdgjj]fjkgghkdhnobikkgilkghjidfhifghheedhfeddegel_\ZXWXVWURPRE9<>=>?;)-:@??@A=?SWTUUSTVVTUVXTSRSRRSQMRRRSTSPPQRRRSRSRSUWWW[WYX_hmljjinmos}������}}|~}}||~zy|~|pZ/%)-:MJ;43/1,2=V]b^^`c[__`_^^_\V`^_\\\]]]^Z\Y[Z_]][[ZZ][YY]][]^[[][Z[]ZXZZVUVXXXWZY\\WX\]Y\]YYXYYYZXX\[WWVWXXVWWVVWWWZXUWWVVYWUSVVOS��������������Ŀ�������¿�������������������������������������������������������������������ɿ�������������������������������¿������������������������������������������������������������������������Ƽ������������Ȼ����������������������������������������������������������������������Ż���������������ƾ�������������������������wd5,��PkjcYTPR[SFB@;99854424.,*)&:^haa_WVKJFEB@=;;:765443222120%Smtmg[PD==YZ�jeB"##"!! !	P���picdn��������YZ__X��������������ͺjtY������������������ݭ�������~|xvspqolligfd`\ZWOG8cLHHCZ�����?950,"$l�������tf]UYssnkhSe��������������������������A����rszytqjfhgg\X`_M?s��rxvw~���{zvpnkklkaWXW^qWJ+B��¿����������������h��KKA,)9�x�x+"'8@<7>E9AJID5>����������������������������d4���}�����������}|{wh :(% ]-&#  &-�������������������������-'Cm!'&.I�b(fIk^XWXW]]`XY[SFhlknrw���vf3%P�vn{w{gltkil��d]YYUY[Z[[[]bYZ_`_Z]agilprrv����͜S]^]XZ\XSTYpfY]\XX_`g\``e`_`jihjljicjilhhilgjkkfjihdhifaggfcfgedfdfbdfe^dfdcfgd]\[YXWVTSPPRF;;>=<<9).8<=?A@;>SVSRVUWTVTWVPRSRPQUSQMTRNRWQNLPPPSQNMQRPQUWWSZZ^gkkjihklmt{������|}{z||{z{|{z|~�}{rZ%'5N�VDB<9:;;BT]^[]]`W^a][_^WZ`b]\[\Y[^_]\_`_^`^][\[[\^]`_]Z]\[]\[[ZXWZZYUZYWXZWXY\ZXYXYVY[XXYYXYZXVZXVVYXVVVUVXXUVYXVWWWXYZXTUXWSV���������ÿ���þ����������������������������������������������������������������������������˿�����������������ÿ������������ƻ���������������������ƿ�������ɿ���ſ��������������������������¿�������ƾ���������������������������������������������������������������ʿ���������������������������������������������������������������vi7,��MghaWSRc�raH\AMTI=EE<9?.,,'!:aka_^WWNJFDB@==;;756544321020%OotmeZOE>>Nw�m�X#""""!!! 	N���rhdcehfkaRR]]Z\_`V��������������̪Wcs������������������ݬ�������}{wvsnnnjigfgd^[YVOG8_FBCCQ�����;782,$"i�����s^D;45KppnmkTd��������������������������D����xvsjeea^Z[ZYXcbO=x��y{||unnnlmkb`[_]VXZ[Y\qXL'@��������������������g��LKE./O�wT'"*=91-.>ABMIC46�ĸ�����������������������ùa5��������������}zyi9()$O'&$ %3�������������������������,)Bx&  %##9�_'p3^`]\\[[^[YYVO5jnlpsw���yp7 H�{m�rwrhosBLQmdAFE@BA>?B??>ABAB?>@@=BVsslxz����țS]]][Z[XXXI)$@"!$!"#!!&%%#1Zglhdghkgiijgjkkfghljhhljhgjkjelhjgiggeigd_edccdd`[ZYWWUUXRNNMD<;>:;<9' .8;>BB@;<TWVUUTXVTVYYTTVVTZURTTSSTVVRNPQPNQPQNRTQPXVTSXV\fjkjjfkmlrz�����}|zz}}{zz|}{z|}~}ztZ"(:Q�VHGC????@X^]]^^^\^_\\_]U[``ZXZ[X[][Y]_[Z\^][\^^^[_^ZZ^]]\]\Z[[XVX\YYZ\YVYYUWY[WXYYYXXYVWWXXWXWWYWWUYVTRUSTVWVVXVUWWVUXYVUXUTTW����þ���¿�������ƿ��������������Ŀ������������ſ���������������������������������������������������������������������������ſ��Ǿ�����������������������������������������������������������ž�����������������������������������ɿ�����������������������������������Ϳ������������ƽ���ƿ��ʽ�������������ý�������������������������vi9,��Mgld[SMc�t\B�=cvmDg_>Cf.4Z2(&:Zc`^_YUPLFBBB?=;:65544332110-&Vssme[OD=:>]R#I;$$"!"! !	
+F���rjcaa��ŴV^_]^ac\��������������Ȉ��������������������ᬜ������|{xwsiWNPYffdb`]ZUOH9VI?@A@FKJ|�=763.' e���fK<6CYelkpqnmlQc����������������¿��������@�������}wumjdeedie]QAq��~��zxvrnjjgecccdiab`aoWL(D��¿����������������k��KMG<o�n?%!!"+965/-;AGRHD55�Ż�����������������������úb6���������������}{xj$9%*(Y$($ !%0�������������������������$&@b'(9$&#8�`'t:__]\]]XqoUXVN2ikmssx���ys< N�wiwipqqpoAFNVL@@=>>><===>???@>??@?>=:S`PTVb\q�ˠT\ZZYYXVXWE:&Gff^]ihhbX[iiaec\\bfabhu��n`[_`badkmgnkjfkejicbc`]WVXUSTQROMMPB<<=;==9&0:==@BB=8RWUPRSSQRWYSUVSOSVQQPSQQSUSQPQPOOSSSTVVUWYTUWWU[chkif`fjnry�����}}|{{{{|{|}z{|~}ytU"+:W�YHEDAAA@?ZZ___]]^^]^ab]]_]\[Z\]]_^[\\ZTZ\][\^^]]Y]]YZ]][[]\ZZXWWWYTY[YVWYWY[[WXYXXWYWXYYZ\ZZWUZXUTWXTUVUTUVVWWXUUWVSRVUURWUTTV����������������������������������Ǿ�����������������������������������������������������������������������������ǿ�������������������������������������������������������������������������������������������������������������������������������������������������������������������̽���ü��Ͻ���ƿ�������ɾ��������������������������uf9,��Tfid\RNg�xuB�4ewj@he@Al*2cR-)9Wd``_ZTPLGDB@><<:76433222201.%QmqoeYNE=820L2():)$" "!  
+	D���tkdfn��l�xa`a_add]��������������̦���������������������ᬞ������|yxwvp]Q[`ggfda][TQG9!NJBAGZ�����>453.(_�����yohkhgkprnmhSg��������������������������D����qxridbbc^]]\Z^`OCs��uuvvrpnjigeb`_^^Z][ZYZs[J+=��������������������i��LOF4P.$"!#+?EL9+9>DPJE7<�Ž�����������������������ĸ`8�������������������}yzl <%$#
+`!'#&\yZ/  "'1���������f���������������(%HP!8�2#%(.+*�]+s=aa^\jy|��TYWP1flmqqx��z~j6 K�wopbtvtqkBFLUH@>=;<=<==;;:<><=?>==96&1HKLLTLYm͛FQNNNONLKJ72+1%.8>+''+/1)#(+**,,1;P��SCMJJLIJMJFLMHMJOWbbc^_dX\ZVORUVRFKG==>BAA=:)!#1<<:??B;:TWMQUPOPUTUQXUTSZTQSSQNQROKORQMNQQNPRQQSUSMQURPUagiha[dijox}���~~~{y{||}~}~{z~}{wQ!):]�QCBECDBBBZY\^^Y[]][[]YW\\YX\]Z[^^\]]\ZZ][\]^a`\]Z\YW]^][]]\]^ZYXVYXZZYWXXWUZWPWWWWWYWWYXUY[ZWWYVUVXWUWVUTVVUVXVXXXVUXVSSTUTVWV������������������������������ÿ��Ž�������������������������������ÿ�������Ⱦ�����������������������������������ɾ�������ξ����������������������������������������ȿ���ƻ������������ƿ���������������������������������������ɿ�����������������̽���������������������������������̾ѿ�����ʿ������������ž��������������������������te7,��PkjfcYRl�a�G�2ggiJe^<Bq)4RJN.:]da]^[TOIFCA@>;::87542122322/$MqrpgZNF<96BpLV��7$"#!   
+	?���rldfo�^c{|f``_bff_��������������ˀ�cg������������������᫝������}|{wvjVeg`ffeb^]ZWSG8UOFEF]�����@4562'`��������yi_ZjnokhQ`�������ƴ�����������������E����jy��zrjica\WZbbO;t��vvvuqvtnpledc`__^][YW[|YK);��������������������n��IOG<`cd_]J."'4@E3/54BPLD66�Ǿ���������Q�������������÷c8�������������������}{xh C)$"	@$  5N(SpI$"%1���������������Ƽ��������&'HM /����o"-*&�Y$&-u=f`\]x����ZYWO0clmrtx~�zyi7!O�xqqm~|uriBDIVE<=;<;;<;=:;;<<;;;<<;87#=DJISKS@`ф &&()'%$##
+7,/! !!$'*��KMOOMNOOOOMPONNMON`I0-305>/+,+..,()$$'.78>4*# "&'((*+*'*;748524N\]jllegehd`^WUXYWTTYYVQSTRMNQNIKMG??=62<\ehk_Rafhjuy{���}}||{{z{y{}}xz|~}{tF+=\�VGJGJICDHZZZ[\Z]^[Z\[Z[^\WY\]YZ]YY[\YXYZYZZWZYZ[[YYY][\[ZWYYXYZ\WYYZYYX\YYYZXVXXUUYXXWWVTVWUVWWVWVVSSRTVVWUWWVSVWVUTURQUTUVUVU���������������������ſ�����������¾�������������������������������������������������������������������������������������������������������������������������������������ȿ����������������������������������������ǿ���������������������н�������ν������������˿���������������������Ҽ���̿������������������������������������������sd;-��Wnkhd[Sa�ldCz8`k_Cad@Hv9;M,E69Zi`^`[TPHHF@@>;;976651132210-%MpsofZNG=875J���g&""!! ! 
+
+;���smfih~pn��b_``adg^��������������ɂly�������������������ᯞ������~|xvtlhnkhhgdc`_[YSI;SSICBV�����G267/%]�����~��}wrklmkkhT_��������������������������G����s|���xxl^ma[Z``O?v��ywuv~���zfkrjqmnb\ZY]xXL(<����������}���������m��IOIH�x{���<#)7BIF@>7BPLE53�ž���������=��1����������ĸ`9�������������������~|zi <&&&	:)#%S0_M&3���������qXZ]__bs��������#'@E+tx�|t",'(�X$^iN  +v7gb__dqv�p]YWQ*^lotst~�|wq8 R�{ruo|{urgBDH^D;::<::;<;;<<<;:9::8::3#5IMNOPQOf�'%	1/1!#-��HNPQQPPOQPMNOPONOOW, 4b����������uy�`\^bg`]`d_VSH;20-+)# &*>Yqutsh]ikcjswv{��|zzy{{{z{|{x{}~{yvG-;j�TDUa^JABGW\^\]\^^\ZZ[]\\YY_^\\\[\[Z[XZZZZZ\ZYXYYZXY[\ZZ[\WVYUWYXUX\YWZXYWVYWUWYUUXYVVWWVWWXRWUWXZXUTWTUVUVSUVVVUVRTTTTSTSSTTRV���������¾���������������������ƾ����������������������������������������������������������������������������������������˿���������������������������������������������ƽ�����������������������ǿ��������������������������������������Ϳ���������������������ʾ���������������������н���̾�������Ѿ���Ǻ�º�������������������������sh:-��WnpjaULKF>55;218;6B>33>984)( 6Sc^^^[UOMGFB?;;:9754532321110&
+MrupeZPF>653^�[#$   
+
+9���{okkhti`ck\_b^_eg`��������������γ��þ�����������������ட������~{wxtlaejjhhfb^]\ZSI:ROEEET�wpi{<251,& \��~���z{vrpnkhgX`��������������������������C����r|�p||qk`r]YYY_N>p��tvsr{���uzkornuqnbZY[]zYK*7��������������������i��KNHBu) $1r7"*>CIJEEAESND50�Ž�����������F&����������úd;�~�����������������|yxh!8!))	>*#BB))M?$1�����������}z{uio��������$&=D !8�ut2"&'(�Y#bSB*u8d\ZX[UW[XUWWO-^onqtx��sxo8!R�vlsm~|sqg<AGXC<:::89<;;:<:::;89:8873%*KKKIWAXeσ$���'	3$+ !)��KPOPKHDKMJONNNOOONZ&&U����������s_{OVULUWTSUXTB-%$$$)5Ga���������������wk��~|{{{xz||{|zx{}~{xsC.=j�\JgsbKABHU\][[Y][\\[Y[\[XW[ZZXYZ[][YZ\[[[[X[Z\\[[^[]ZXXY[XYZVVWVWUVWUZZURTYWVVXUUZTSWXUVYWUUZUVX[TUXXTVVSSVVVTUWTVYVUUTSSSSTRS����ÿ��������������������������ȼ�����������������¿�������������������������������������������������������������������������������������������������������������þ�����������������������������������ľ�������������������������������������������������������������������������������ξ�����������������Ǽ�Ľ�������������������������th6+��WjniZOF@9501/.,/230*'%(+*($ 6[c_^^[TPLHFC?>;::8665221/023/(MrrogZPC>649\MJSO%"   !!!9���wqnnv�����kl�abdfZ��������������ͱ���������������������⮠������zwtqol\U`gigc`_]XSJ=eSDEBQ�����B342.& Z��xjj_^^a^]VTVQ`h\a��������������������������=����lz|inllhcaXZV__ODr��vwtsx�~}spjjiffhi`[\\\zZK+9��������������������l��LNHD};#(:{5",8:=C6@?AVQH94�ü����������}-(����������¹^;������������������}zzj <#&-	L"4)) Ht=5C2G .���������vz����ʽ��������$'CM! 5|rt9',&�Z)tMrnnnkohhfecV(dsprtw��xys;"N�vhuq��uoe>@HVF8:9989:>??<:;;;98:9764!6IMPPQZSeщ%[+Q#/)/ $-��FPRKm����PPQPPPNNPW%   !  "Y����������kI�UVUTVTUVUUT6.3:Man��������������������Pd{zzzxz{~}||}|yx|~|{xB0Eo�YOz~cF=CKW\\[XZ[[[ZXZ\\XYX[YYYYXWZYZ\][XZXW]]]^YZ][VXZ\[YY][YZXXWX\YW]ZXXYZVVVXXXYVUYYTVWUUVWVYXWRUVSPTTTRUUUVWWVUUTVWTSTTSSTT��������������ľ���¿�����������ƾ������������������������������������������������������������������������̽������������������������������������������������������ž��������������������������Ž������������ƿ�������˽������������������������������������������Ǿ�������������������������������������������¼�������������������������tj:0��[mldZOE=6213/++)/00)$$(*)($8Wda^^]VRNHFC>?;::88543211120-"
+NssqgYNC@78E�����?!"!!  
+:���{plnn�����dlcacg^��������������Ȃ�qm������������������௜������~zyutn]UQbghfca^[WQH<dLBCBT�����=2532*Z��v@B7<240.0,2:[gYb��������������������������>������~onieadb_\c]M?p��vupqolikjgeba^[][YZZXYr\M,6�����������������¿�k��HMI6vg-/Ui+"&317B369CTLG;1�»���������b|������������º]<�������������������~zyh>$&*	L(J:1 1fb?(6K%1���������l���p�����������('LI 1��v,#"%�]%O[=+sy�������������3crmrrw~�z{r<!T�znso�|spi:BGSC87779:DKKQ`OODF=:97653#<NNOQMT=\уp�q	)(3'-3%,��INRY�����SGMLNMLNM_"8L%/4467<?<'!Y����������eO�[VUUVVVVTWXGVfrvxz�������������������<Yvyxyzz||||}}|zz|~}ys> 4Fn�SMuqR=8<GSVYZYYVZWXV[ZWVXZZYZXY\XXY\\^[Z^\[[_^\VUZZW[][ZZ[WVZZWWXYXXVTVYYVXVXWYXXUTWWUVXWVWVWUXXVUUWTRUVVVURTTTQVUSRVWQPTURQSR���������������������������������������������������������������������������������������������������������������������������������ƿ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������uj8-��Zfgb[SJA74221.**+..)((((''"6Yh`^\^XSNGFC@>;;<86643121100.#Hosqi[OF?969o]F?/'#!  ! 
+	9���voomzhc`qfbfbabeZ��������������ʎax�������������������ⳛ������}{wvtj[jlljhfca_\WQJ?YO?<>O��\��=1212*[���|�xvhbX`ed]V_f__��������������������������@�����n|�kvmmchdshh\N<x��wwusqpmkhfec__^\YZYY\\wZL-7�����������������¿�h��JKF,>����D#/05:245CPLD8/�ü�������������������������a8�������������������zyh 6#&	R'0#-$)L_e3$1����������ͦ�������������#(G>!3��t$!$�W,ha@.q}�������������3dklutw~�uxi5!X�}rrryzsng<@EX=77668<l�^v�U��}C:97641$)KKOFRM[iӆ%u�w#2>���K,/!(('"#)��JROb��qqujklifb]MQZ#ac&-9>DHMLUN2Y����_LR���[SzXSTUUUVVS\nprmlnlpv�������������������1[vwxzzz{||{}}}{{}~}|yA$6Fn�C7KI>96;@DQV]QEHMKJSgVNT\XYYTSWWUUUVW[_ddc\ec`a\``][]_]Y[\XV[\TWXXWVUUWYWRUTTSWVUUVUUVWWSTTSVWYUTUTTVWUUSTTTWXUUVVVSSRQSUTSVWS�������������������������������������������������������������������������������������ʾ���ɾ��������������������������������������������������������������������������������������������������������������������������������������������������������������̽������������Ͻ��������������������������ȿ�����������������������������������vh:*��L^f^WOIB731221/,+,,)')**$!  9Ugb_]_YRMGEB@><:896633233111.%Nstqh]RG>98:wiT $" !   !
+4����vppo�ǽ��sbcb_deZ���������������ä�ƾ�����������������Ი������~zvutmklgehhea_^\XQJ@@C;54?�����4.100(X��p}��iPFZhf_N^f[]��������������������������G�����p{�inuinkahtn]S=r��wyuurpmiffdc`^\Z[YYYZ`zYM+3��Ŀ����������������l��JJE.(,;@* "0<FFEB9ALLE93�ü����������������������¾�\:�������������������~{{h<"	T+8+'@QZ'RqB%3���������gwliw����������� *OA 8�8f># !�S% )p�ǿ�����������4ajmtvy�syr6&^�vqqkkrqoi:=BQ>776563]vmt�adg�@986530"1/0-67Ae΂"^�l	=$hS)>T(*'7@>4$*��KRRW����������ı\N]#Kh_`YV_"
+7LKJFB=7/$]�����Yw���]SqVSTTQVTWTZhjkgfknmnw������������������/Wxxyyy{{||{|{|{z}�}yv:&*8Eu�I>DB>=:>9.))* %")E-$%052.//6?@A@GK_t�{y{zzwsoqlmoldb_`^Z[ZWVWVWSWWVXXVTXVVVYUPTUSSVUTRSSQTWXUSTTSTTTUWTSUWWVVUVVTTTTVTTUVSU������������Ŀ���������������������������������������������������������������������������������������������������������������������������������������������ʿ�����������������������������������������������������������������������������������������������������������Ѿ��������������н�����Ͼ����������������������������������������ug:*��O`f`UJE@74124210.,,(""$$!7^kba_^\UOHFD@>;:888635332121.'
+Nptoj_TH>97@���e %#"  !!!4����womm�~uy�i`_]^cdY�����������������������¾������������䱞������{xureLONWegedb_\YQI=79/+)+Aq��v)--,/+R��|LOX`onglgXLE\fZ_�����������������¿�������F������z|mjjgcd^^`g]LBk��zxvwspmjeddd`^\[YXYYY^q]L+9��������������������n��FIE@nuhb]\1 #+5EKADAANNI<.�ƾ����������������������ÿ�`7�������������������}{yh7 C(5*(KU@ #0/%2���������f���������������"*SL /{"rC$$!�W V`<(u��������������4gmotux�w}q6$]�wrspnppnf><@Q@86877::05:A=64<656872/"%,_·%cAS$	>%@0*+,?MI@"%0��NTROIINU[Z^_eilrOPU!9u�����.<=;5110- b����rJn���ZQ�XSTSRRTUUNKWVWUUZY`emv���¶���`AM�����7auyzyzz}}}}}{z{{~~}zvD63<Fy�JEIFGFBA8 &"M�����������������wwcUVTRQTRSSSTSTUUY]XWVXVWYXWXXUTVVSRTVVQSSTTUVUUTQQUWXTTUUTTUTSSRSTTRT��������������������������������ſ������������������������������������������������������������������������������������������������������������������������������û�������������������������������������������������ɽ������������˿�����������������������������������������������������о���ɿ���������������û�������������������������sf;+��QcfaTGB=76210001.--&!!  7[l___][VPHFCA=;:985443320240.&
+Jsroj^RG?:5:qrnN $#!  !"!
+6����xqkhudX`yo[^]^bcY�������������������������������������ᯞ������{wtrjkpkhhheba`[WRJ? 72',)*KZU��5)*)3>!N���{{{wrikrmkidef][�����������������¿�������H����ltohge\]\[ZWW^\PBl��suvtrpnkgecc`^][ZYXZZ`m]J.6��������������������o��LNE3_o��p�="!(0AMCC>CQQK81�ǿ�������������������������d<���}���������������~{yh%B# 
+;,P=)b^!$%0������������ɺ�����������!-WN7|fl]>"""*�U#h}G  ,v�ǿ�����������1hmnvvw��~r4a�xtrnprpni<;>L<6JWLGRPFG<=D>BJMIDJD40#*aχ%NC%
+<9,-?+,-@LK>!%1��JQSKs�������\;NPQS )	)7;;5+ !d����ncl���VNmYUSMGHIQQD6ILNKKKNKEIH[x�Ķ���gWd�����9auwxxyz{||{||{{{{~~|xoH;4=E~�=AHGFCAA:)@7&-)&9�������������������_DJJHFIGKHBDIJKK~gTSQTYcc[[[XSTUNMRRPPMOKOONPRTQSY[WZWVTRRQSQSSTRSTTU�����¾�������ƽ�������ƿ�����������������������������������������������������������������������������������������������˿����������������������������������������������������������������������������������������������������������������������������������������������������������ǿ���������������������ɽ�ǿ�������������������������vi:*��Nce`UHB<9:50..//-/,%"" 5[m___\YURHEDA>><:86544431211.&
+Gqspj]RG?;55er~g $!"! "$!
++����{spm�����v]^]\ad[�������������������������������������㵟�������|xuspmnkihgfca^]WTMA!ILSZJZ�ƾ��L1.&/3N��|hfc^SPQXYkmigibV�����������������ÿ�������G����suomica\[[XVXa^P?i��uttrqoolifeb_^^[ZZWXZ[s[I,/��������������������v��KMH.'8{m3y8"$/>BBCB5APOJ;3�Ⱦ�����������������������½^8��~���������������}|yh(V"%<0'%<A#$!%3.$G&/('DLL!%8���������dhke\�����������-QJ8����f "!%�X"+,&,x}�������������5nopvx{�zx4
+	!i�zuqmrspni?;?J:1p�lo{�p�vh�wp{u{r�~;3#(fԅ"vwq&6MUANQ(, ,=?<-$+��BOT]����������LPPPV"3{��f"2BBIA5%d����������S[qVTQ>.-/CPB;QLMLKHMPNOQMr�ŷ���r[X�����:euxxy{z{}|{}}{yz|}~zoL<3=I��=DGFFC?@9;5:(%MWTD#9�������������������TGHGGECDE?=@EDAC�mNHJMVel]]\VRUN94346::99::=>@CDQdom^XUQMLLLMNPQTNTXZ¼���ÿ�������Ŀ���������������������ļ���������������������������������������������������������������������������������˾�����������������������������������ξ�������������������������������ǿ�������ɿ��������������������������������������������������������������������������������������������������ɽ����������������������������uk=,��P_f_UJE@<<60-,0200-'#!!!4Zm`b_\ZURJEEA=;:996444332121/'
+Dnsoi^SGA;6;k��n   !" 	+����xroj����q\_\]\bd[�������������������������������������䲠������~{wwsm`abafhgbb_\XUJ@'bij\RSq����6!'*'J�vIE@BAA;7EinkjjdS��������������������������C����rsokfb`\\ZWWW_]O>h��usrsponligba]]]\YYXXWXp\L++�����������������¿���IMH5T����j'"'7=.-.,*APNK=4�Ž�������������������������]@�������������������{zyg&H '|����u'7.3994!"#F.<'.!+;Y' !"#1�������������������������.O=6�km'$""�X!FP0)z��������������1nonrtz��}�y6
+g�{vqlosqrk;:@G71g|mqzxei�jqvwwqwsx8. %'cщ}�s
+1#lz��{),/!,>A3 $,��IQQc����������KPNPW )lwruyY	.6.=?2%h����K<>���OZlWURF7/2BPDHnJQ\WKOSMLPPr�Ŷ���yKG�����@auxxxyz||zz{zyzy~{mJ;6>L}�7BFGFC?>6!]/S?!QUZO$>�������������������VJIIHCACD??CBCBD�sUPPPSU\VXWTRO>  !#&(Fy��eTVPCCCDEFKLOHYnx��������������Ŀ��þ�����������������Ľ�������������������������������������������������������������������������������������������������������������������������������������������������������ʽ�������ʿ�������������������������������˾��������������������������������������������ϻ�������������������������������������������������vi8,��Vdh`WKFC@>20,,.111-)# 3Vnbb_^[VNKHE@AT\9977557C6012/'Boqqi`SG@;68nteML(  ! !!"!
+'����{splgcd^_^\\\\beZ�����������������������¿������������崞������|xusgX[XZgihda_\XTJ@"^odbVULKe[p)&0/$J�x~|�|vfUinmjjdS��������������������������D����qrnifda^\\XVV_cP<l��|utqoppnhheb^\]][YYXX[j]J)0��������������������|��POIAv_)-:*  &6?<3)'&ASNI77�Ǽ�����������������������¿c>����������������}{yf!=#/$l>Nf"=<</-7-0&
+A*.-"6ot) !&5������������ˤ�����������.]F1`!:4$%(�V!m�B,u��������������1kkjquy}�z�x4
+ e�|wsnqsppi<9>N60:<:9;:653113;>F<7::2- #&'g҇%h7C%
+5BAV1,1!-BH9!$.��HPSXv��������zIRQQW8DM!	+3+7<1#h����|y���MWkWUQG736DTC>VFh�zdv�[NQQr�õ�����������5exyxy{{{}|z{|||z}�{xmJ<5=H��<CGHGDA?5%_)T?"RY_K$A�������������������YHGIHDABB@AEDC@E�oTRPOPPRTTUUSVC9y��dSWQCC><878891e�������������������������������������������������������������������������������������������������������������������̿������������������������������������������������������������������������������������ʾ��������������������������������������������������������������������������������������Ͽ���п�������ʺ��������������������������vi=*��T`g_VKGEEA4/-,*-10-)" 8Rjab_crgWOJ:Je��;68877X�G.210'BsurjaPHA;67u����7! !!! 
+'����}tnjhec`][Z[]^ae^�����������������������¿������������⶜������~zyvsmlpokiihe`][YTNA&ehZ]RPRd�=$#+21I�nplja\WQHelljgaR��������������������������C����nqmjgeb_\WVXW]cP>h��wuusponliiec^\\][WVXX[p\H0+������������������¾��ONH37'%($"" (5DM@50ASMG71�Ż�������������������������c>������������������}{{ye!N#%--(@!-3-"#,N\"
+D*6'#7"0Q5  %1�������������y����������� .UR %%"!)')�R8A,*w��������������2hpmtwy}�|v6d�yvuqsspmf>49K4./2-//+//.001000101/-*+857:)gъ$spm)
+2$pm'.. ,@G8"$6��DRRM_XV\[\^][OPSSRS/[A:4>S
+.6787610-(m����������J[hVTQ>327LV@<JMh�wZ]oUIPOw�µ�����������1_wxxyz{|}|zz|{z|�}ynH<5@M��:AIHEDB@5E1G-#WcbL!I�������������������WJIGFFBBEDEECA>E�nWSPOPOOPQRTTRB3|��aXTM?2('''(()$b�����¿����������������¾������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɽ�������������������������������������������������������������������������������;���ƾ����������������̾��������������������������wg@*��U`e`VKHFEA5/.)+-10,&""6Rnccez�י��kzyk�678:63L�I/321&
+Bltui]OHB;9:n�`J%$! !  	$����~vpkheb_]\\\[\^b\~������������������������������������⶜������}zxutrnllmihgea_]YTMC%Z\OTQOQbts}B #), I~�~RLEE:9>;FhlkiieT������������������¿������C����mtolgec`\ZWYW^fO>f��vvtsrpoljjea``^]\[XXZ[s]J/-������������������¾���KNJGz���~r8 !,AXKK7@VNH86�¹�������������������������d=������������������~|zzi?!(QWW=!',.+*(U^J!&&4" 49%2���������jmkm{����������� 1SV+6)'%* (''�T#?4*y��������������1iqswxy~�||r5
+c�yvurrqomc=59G70/.......-.//..,-..+,'R���k+dҍ$cg`&
+2&hT/20)<@8) $7��CPOV������������XPT!^����8	!6ADCEFAD?$(p����I=R���GaiWTPFADAMOB:JNh�^Z^qWGQP{�Ķ�����������1dvwxyyz{|z{{|}|z}~|zjK?0@O�|<AIHFFDB4+?1!$SWJ:O�������������������UFHHGE@DGGFEBB<B�p[UPOOOLLQTUVT>!2~��eVSMB-$&'&'&($e���������ƽ����������������������������������������������������������Ŀ����������������������������������������������������������������������������������������������������̽��������������������������ɿ�������̾�����������������������������������������������������������������������������̿̿�������������ü�������������������������wf?+��R_fbXOIFF?4/-+,230)" !5Rjcde^��_T�y[L^�FC34:7E�J0100'
+>istj]QJA;88h~eV"%#"!    
+$����uplgec`_[[\[]`e]x������������������������������������⹜������~{wtsjXPPZgihdba^ZTMB T\QMNPq����Y(%&%'I�������{xxpklmkihfU������������������¿������G����utoicf^^]ZXXY_`MAj��uuttqooligdcaa_\]ZXXY[oZI..������������������½~��MNIE�ls�p�> !&1CREB4>SNG:5��������������������������ǿa@�������������������|yxd#E 6��i}""369<6)KL	f!&+$#YLW- $4�������������������������3LL"<����s"##&�T#f�K)w��������������4etsutv||uzq4
+!i�xwuqrpnle>67H6-----+,,,++,+*,++++++&*-MPH*eӋ K9N
+/#\]U'.+ -BGD9$#3��JOQO~zx�~������|PPU#!,D?#	,3-060')$#t����QQU���IbhWTQ?/-7FO>8JNfq?aoxXIOQ�¶�����������.duvyzyyzz{yy|}|{{~~{xkE<2?R�}UAHILPQD3%/''Y`R6W�������tahlq�������XKHIIDBCDEEDC@;D�m\WQQOPMMPSUST<5~��aSPMD2&((''(("d���������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʾ������������������������������������������������������������������������������������������������������������������˿������������������������������������������xj>/��Tbhc[QJGE>4/,+/42,&"  6Phadgc��~�e��_�f�eQ^ck�>-10/'
+>orvj_SJ@888}�u[%%$#" !"!	$�����vqlfcd_\WXY[]ad^u������������������������������������湜������{zwtonnlhjjhda_]XSLC%YcUPOR����}E;74/)N����pkd^ZYW^jnkikhS��������������������������F����osmied^^[YYZV[`NAj��zustrpnkhfeca`_][ZXZZYr_I.+�����������������������KNJF~0Ue'r:$.=E?3*+>PPM89�÷��������wech�������������d@�������������������|zxh-b/M^ls"!-('$">@R"1%#/$ ]?d3"%8�������������������������2NL 3lz�xs%$!"�W 8-)u�ɿ�����������/frqoru{}tzq3'%o�zwwpsrongB47G4*-+*,+++++****,++**+)%02P�P)_ЈGi�(
+*&hr_$
+0+ .FF?/!#3��IPRX����������CGLTS/`NHGJP3:5>F7*q����~}{���DajUTO62>AJT>8ILcY*EgtUJPR���������������.dvvxzzyz|{{z{}}{|�~{lI;4AP��e?]b]TRN4'+*!'^fk@\�������aKSHP�������YLHFEDCCDEDCCA?L�p\YTSRQQQRQVUY?;���_TQMH6('($%''$g��º�������Ľ������������ƿ������������������������������������������������������������������������������������������������������Ŀ�������������������������������������������������������������������������������������������������������������������������������������������������������Ͽ���ͽ������������ȿ����������������������������xl=.��Rcid[QJFC<4.+*-.+&"! 2Nnacfa�ꕤ����Y�2|�tQn�E,21.'
+<nvtk`SI?888hMMZ"&#""!!!" 
+$�����upjb^h_ZTVTW[\a^u������������������������������������溞������{yvtpomniiihea_[WSN>&ZcTNKIt����^B==8/"O���TFJG@:75AcmmjnkU��������������������������G����rrojea_^XZXYX]^M?l��{ttrsqnmigec_]\\ZYYYXYk\L.,��ÿ�������������������IMGAi1JM*f4 "/3./*(-?QPK<7�ŷ���������mNx�������������^=�������������������}zvd#^%'ZV"16662$!%
+D,?.!6 >8!!5�������������������������4KN '#4W(!"!�R;D,-v��������������2htrrrw}}wzt>qM&n�|vuqssnnhA26F2*))()**))*))))*)***))$KjKSB&bԎL�R)"^UQ!2, ,@F6!&2��BQNTwxu{����VIRQRKA������
+ 4=BA?7(z���������IcfWSP67FJNRA:IMeY9D_vTMQK��µ�����������,jwuwyyz{{{z{}}|||}~{fC81>N��aEfjv]DO1&)*"&[Q8)b�������p��n|�������THIGGA==@CCCBA?O�t`ZZZROOPPQUVU;:���^TROI4)&&$$''$i��ļ�������ſ���������������������������������������������������������������������������������������������������������������������������Ϳ�����������������������������������������������������������������������������˿������������������������������������������������������������������������������������ɾ��·������������������������wk=)��Rdha[QJD@:3/,)(&&$#!  3Pieeg_��_G����X�.j�|I<Q�J-21.(	8mvtmbVJ?985\rzwi;%"  ! 
+'�����xqiV.>;<:88RX[`\s���������������������Ŀ�������������㻠�������~yvtmXQLUejgea`\YVMC$J^TQKQq����eC>>93!P���|����qspXclnnmiO��������������������������D����qspkgca][\YWX`bO?m��{uvtsrnmkgeb_^\[YZZXVYi_N,/��Ŀ�������������������MNG/@-)1/.! $1=;@?;5?PNI:4�ƹ���������w�������������ǿaC�������������������}{v`#W$B/ *.-1>+	G '3%"6$ IiX0#!$'8����������j489D����������4QJ :olYU #�W ]�X*z��������������.iutvuw}�z{u?�I%i�}usrsrpmkB14C3('(('(()*)(**((')''(($FsgJ@&i֌`gQ#
+#$dgs'4* *<B+ (3��ETNT���������lLRQRM38'I;-C	/@B47/(|����[3S���EedVVT>-.9INA;JM_d[UVlXNP^��µ�����������,lyvyzyz{{{|z|||{{}yf@94?Y��eLePekOJ2%**#,`_R3e�������G�g6B�������OEKGEAABEEBBAA=Q�n_]baUONNMQSUV7:���_VURM8(%%&%&)&j������������������������������������������������������������������������������������������������������������������������������������������������������п������������������������������������������������������̿���������������������������������������������������������������ɿ�������ʿ������������ɿ�����������������������������������wm?*��Tdka[RJGC:2/-*(&#"##"    1Ofdegi��^����k�Tibve<G�T110-&	8luundWI?985e��|�<!"    !
+ �����vrjS583-/69NTY^Rm������������������������������������滜�������|xvtrnigbekgeba\VUMA#TdVPKW����rH@?@=7"Iw{ujljf`XVSIfnpmkdM��������������������������D����rtsmgdb_^ZWVW]bP?o��|uuuspmkjgea`_\[[\[WWZj\Q).��ſ�������������������OOI>sE5g�`' ';B9A?<4=MMI:1�ø�����������������������ĽaA�������������������|zye.Y)}�x%.649?,I"*8$!8#N)i[I"#'9����������w����i���������4WE #=�����&""�T=E2 *v��½����������1krsvwx|�||qB~4&g�tqosspnk@02C0((&'%%(-0+,1-*'''&&('#D\U3''hԋMii#
+' 9$H5+;/-AD2&7��ENRY���������IPQSK3G3N=0=+9::9'&~����PG���?dfWVOF:4:JQ>:KKJEEDCJKNTm��������������w.fvuwzz{z{{{z|}|{{|~}we>86@U��sB{dbdZM2#)'-`kh:l�������D�H}E�������OGHECBJPLEBAAA<R�h]^ddYQNOOQSSU6A���[XXSJ7(%%%%%((p������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˿��������Ļ�������������������������woA*��[fmcZQJIF:/-,)'%$#""#!  !1Wleefy����zCakaoUL.SueTtR332/(	7owvofVIA;8;h����A#$""!!! 
+ �����wpi_]eO/BURQSW[Uk�������������ϴ���������������������弟������|yvurlmkjhkhdca]WUN?&PcSMIQq����c>BCA:!GgYTOGCCBA><DgpokjdN��������������������������E����psqlfea`_[WUW^bN=p��|ttttppmjgea`^Y\[[\YY[o[S,+��Ŀ�������������������NLGF�4P�b~5 (74(,''+<LMG<-�ù�������������������������dD������������������~|{|e/R!0+h%+216=-J#).3!CJ%8WP+&8��������������ĵ���������6UE %>wdlTH $!�Q/v��������������.jnmpuy~�{|oEw+'l��vsnrtome9.3@0''%%$%):noojk;'&&&%&%"?^uF%)n׍a�G -%s���u%:/ -DG6%5��EPOT����������HORTIA������	8EFFB1-����aDx���>ebWVO=45:KR?:GJIFBAEHMNPo��������������w2jwvxyzzz{z{{{}}|~~{e;76G]��{Cq|\[YS.!&#*VPC+q�������NAL�M�������JKGEDJ_l^MEAA@=Y�i[ZbdZSNOPRUVX6E���_XXUI8)&'((&'(s���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ľ����������������������������˾�����������������������������������������������������������ź�������������������������wi>)��[gmdYQLHF;0-+)'&$#"""  ! 2PkadcbXn���BNqzE8Yga/6TaM031)	8qwvpdUKB<67GPMLG)#""#"!!
+�����xoh^VF-6HMNOQVZSh�������������͹���������������������彡�������}zvvogcejjhhca`]YSKA(N^SOOTYgr��d@BC@6!Ey���}�}zsgknnmibO��������������������������A����prpkgd_^_ZWUX^dN=i��wtutspomjhdb`^\[YYX[ZYj[N,(��ÿ�����������������IKHB�l|R-~7 &32(+((*>PJE;-�¸����������Pe�������������_C�����������������~}zxf-R&>%% +0/39,Y&-;)5#HkG2XJ%6�������������������������5RI/j���\4"$�N,u��������������1hihjov~z|mGt"$o�{prnqsone;-0>-&&&%%%(F�����Q&%&%%$%!!12'&qԋ
+'[pW&%?0*<?.%8��EOPa���������~POPSK8O?DCJM	 5?@@>3+�����YD_���7jeVSL1-55EQ;9GJIEAADHLMPn�ǿ�����������t5kuuxxyz{yzyz{~{{~~}wf:54DY�yGnpEWUU-&+%-Y\`4u�������dU��p�������PKIGFL]LSRD@AA;]�`V[_]TUSROQSVV2  F���_XXVF6*')+*)))s�����¿�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ŀ�������ǿ�����������������������������������������������������������������������������������������ǿ�������������������������xkB)��agmg]RKGE;1.+)(%##$"!! 4Ooadb`dyq��w�HK��tKPtgY|�B-0+2mvwqdXKB<643&#%&'$"!!!!! 
+�����wnfV2,CQNLAKOSWQi��������������×��������������������佞�������}zxvqnZQ[iigeca]YVMB(Y`TSTT�����f=EC?5">��qcbfca]Zionoi`N��������������������������A����rspjgc_^\ZZVX`eM?i��{vvsqppmhgeca`_^ZZXYYZh^N.-���������������������IKF/ThQ'+I'&399=873>MIE;/�Ķ���������gTf�������������^C������������������}zyf;j+v|�U"6955-
+R#*:*9"B6^+(0'<�������������������������3ULF���-$�O%OZ<'v�ſ�����������/hhilnw{}oHw%'3*'o�~utrttpoh:-/<,%%%$$"%E�J�\�R%&%#$%%"*("(-'oڋD
+$+laR =.):@/$>��FQPOFDLQVTPYXNMQRTJ++2	0ABB@/*�����������3ldVSL6:D9HP>:GHECAADGKMQq�ʿ�����������o4kuwxwyxzyyyzz{{{~}}vf:46G^�m}OikNQXO,Ig:*QL]6|�������ZM����������CFFGEKZFOVEBAB8\�^W^[VX_e]USSSS2&82%K���bZYUH9('****)*z���������������������������������������������������������������������������������������������������������������������������������ʽ����������������������������������������������������������������������������������ƾ��������������������������������������������������������������������������������������Ⱦ��ĸ������������������������zlA+��cipi_WMHB91/,)'&$"#"!# 2Nhbbba`PC|�o�<'n��/-i�l$>�`-1)
+5hwvodWJB:63E[ONO7"#""!"""
+����vnfQ,MVRONIKPQUTf��������������θ�����������������������������|zwupXPZgjjhea`\XUND'VfRMQP�����n>HA<0!?~��S@<?@@9:=fppnj^P��������������������������A����prokhda^]YYXU\cOCe��zuutqpomjgfdba_\Z[[ZY]f^N.,�����������������������LKH4H6&?P6  (:A?BDE>>LIE6.�Ķ�����������������������½`E�����������������~|zzg3W(D+Hx!&%%%$E"'9) 9!KD>3!$&:�������������������������6RM=w�uv:"#$�Q&H7)*x�ſ�����������1kpnrsz~{znJt)Qwk?i�wzuuuoni8),<,#$%##$%G�F�T�S#%$""!!!Q~x�i'iԋ&�+!&(66RC/):B4!<��INNMKKLMPOMOLNPNRSI;\MMPh]	3<<<9+/�����Zy����8laUTO81.9KO<5GEB@@CEFIMPr��������������o0kuuwxyxxwzyyzz{|}~�}vh815D]�cwFjtJIXU- E^5/36'~��������):Z��������:GGGGJOIONCB??:`�^VYVUZggcXRPRR/>QS3P���a[ZWI7*(),*++*{�������ü�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˿�����������ŷ������������������������|nC'��dipkcZRIC91-*''&%$$###" 0Qmddc`^]��t{�b?tx}?5b~|'7�E/1(
+4fwwreXJA967`���{M""""! !!
+�����wqgYJUPONONLMQUPd�������������Ͼ����������������������������zzvtrjookhiheba]YTOD&YhUOPM�����rBCD?3$?��hqsqstpqhromkg`K��������������������������C����prnigda__XZYV\eKCf��ytwtppomkhfebba\[\[[YZl^O1,�����������������������IIHGyA;��u/"*85+-1?AAMHE92�·�������������������������`C������������������~{yye.L0yx}n /:<>.#A"";+!: +f99!%?���������mRPU������������6VR-�l�:" "�R#)y��������������/kstvvx~}{nKr)_wsIg��vzotvonq:),<*!##"!"$E�AHE�Q""""!"" QV:IF(oَ %$8RcD/&5<6" #A��KNLLLKNOOOOONPNORWM'),`�i 9CHIG8-�����[Jd���5jaTTQG76FTR;;GDCBBFEEFMUw��������������r/ktvuxyyzxyzz{zz|}~~vf+)5Fa�\vKqwMFJR+ AS1'8,'~��������>A-c�������AKKGGM^b\MCB??;b�[XZTU[ZP\ZQQQQ*>*)6 U���`\ZUI;.-.-+++*��������Ŀ����������������������������������������������������������������������������������������������������������������������˿���ɿ����������������������������������������������������������о�������������������������������ȿ�����������������������������������������������������������������������������¿�������������������������zkD)��_ispdZRKD:3.,*((&%$#"#! /Rndbcaf��£�u�zd=jvcVJ�tpa+/1'
+/gxupeYLB968B/%((5$!!  !"	�����xqg^H313FNOMNRVUf�������������μ����������������������������{ywuqjmjcfhgdca\ZUOB'QlZUTW}�p��S@BE>3";��jmjc`cca\imlge^R��������������������������E����vsnhfga]\ZZ[X_aM@m��~txupponjjgeb`_^]^\[YXi_N/-�����������������������IIFH<fq;y9!)83%%/;??MLI<4�ø���������z{y����������¼�[C������������������~{zxe$M#8)Ht(<.%+:'=$#<!`W[<!!"'>�������������������������5RH:��ms+&�L#TZ;(z��������������(ovxvvx|�~{mNs&bwwHg�|tvlquplk8')5(!"!"!"$@�6"3�B!!!"!!!DB%)u֋:55
+#&eaA/'<DA) &F��COMKLKMOQPOOQNMORUFF��e%5<=AA4.�����B`~���4fbVQND::ETN8<GJIHJLKKKMQw�ȿ�����������j2luuvxyxwxywxz{{}~}}~t`)#4Db�StVopQIKK) AA44[VJ(��������k2Kp��������FJIGFJXdZMCB@?8d�VTXQSWXT_[RPQT*7$*;R���b[ZTIFDB60-,+-~���¿�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̿�����������������������������������xkG*��ejtpg[QKE;2/,(&'$#$###!!! 0OnecbcdihQJD>HC35=I?43MYO4-10(	/nwxrf[NC:64AQ+(K=&!" !"!
+�����yqi]OMQFGOPMNQWU_�������������������������������������Ý������|zxum\TPUehgeda^ZUNF$KoZTST�����^A@?=5$>���SC>;7;;6@bljfd^Q��������������������������G����mtnjiib_]\VXX\`P<g��}qsstppnlkghf_aa\\[ZZUi_O-)�����������������������LLG<o�;2r3%3:1/6A<=ONH;5�ƺ����������������������þ�[A�����������������~}{xf!N-~||K$:1).7'
+ D%    'C#7`;'"!$)>������������Ÿ�����������6NBB~��x6!!�L"*,z��������������(nwwtuw|~}oOo$cywDd�{trprsolk:&(4'! !!!!#,?'%):+""!IdB.&(y׌q��)
+ OM)B-$3>8# #GAOOMKGINMIMJHLNQQSA,q��C 3 7@BDA/0�����~�����3m`TSN8+29NM;?JKLKLOQONQNw��������������h5ltuuxyyxyzyzz|}~}|}w_)#8Aa�PqUnoMLLM*"AK28_fb/!��������?n����������FHHEEJQ\TGA@A@9i�\WUQRUXY^ZURRR*...0T���aZZTNXFD:1-,+2�����ÿ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ս�����ʾ�������������������������������������������������������������������������������˿�������Ͼ��������������������������voI-��iktneZPJE=3/+)&%##$"!""!  0Qqbbbb^\SKGC>=::9764320.031/.*0fxwrgZNC<85<n~�t4%##"""#! 
+�����znhZSXVOINQONQYUb��������������¤�����������������������������|zxvqooo^diifda]ZVNC)HmYRTX�����xAC@@8$:|��uxxtsqdikliied_N��������������������������A������}urnjfda`aaf`Q<i��������~{vsqpnjjfegehifp]P0(�����������������������ILE03G6)5  *:@EA?/<KJH96�ƻ�����������l�������������[A����~���~~��~��~}}{xyh C'C-787/ ;!"/9856646,%<#/&8+!$'C����������tlnk�����������9VE.p�v�9# �K"[mC/x�ɿ�����������(mxwonsy�~}lRk!axuF`�{utqsspli5&(1'!  ! #%&$"#    Jq`7%(wՉERC#$J+"'#%CȔCMJF[poqpkmjdaTMLSD3mfE@PO"$#$$3����y_Qx���2l`UTH25:7HN8<KLMMNOOOOPUu�˿�����������`7lsvuwxywwyzz{|~~||}}x\(&7Aa�FmYzeJGML+"Je48VK@$&��������;h����������FEEBCJbjcKABBA:j�_\VOOXimeZTPSO' EMN7 V��|aYXSPQ/290-+)3�����ý���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ⱦ�������������������������������������������������������������������������������˿�����������������������������������zpM3��gltnd[QKF?4/,)&%%$###"" !0Tw]bba\ZWQLFA<==:76644333321.)	2hwvrgZNC>94=__b*$&#"!"! �����xnh]MNRDFPSSQSZW_����������������������������������������������}zwurpqoijihedb][WNC%OtZORMq����p=CB?7$;~��wwtvplmopng^]baO��������������������������F����zxtmjjgb__^_a_WM?`�����{zvrrtpnjgfgddd`cbdeWP0%�����������������������KMG6-""$%!$.9@9.)<NKH:<�Ƽ����������z��������������ZC�~~{|~}{��}~~|~}�|za#D*opuD%.*,&#C#&LkkjkmjiN#"4 &!/N$#(A���������n���������������:]H  ZnC~1#!�K$oyB*x�ʿ�����������)iyvonvy~~}iYi#dzsAb�{ttqstqlg2&)0'!7geeec7   $AWj_.'*x֏o�y)"$!,G,#%""HȌENIa�����������sIU?# "  ""6�����n;����4k_TTL5239JN9<KLMNNNNOPOZt�ɿ�����������`6lsuvvxyzyxzz{{}~{|~~xY'(7Ae�MhOvsXINO'#Ud59XXV,%��������k0Ct��������FDDCBDFGJDBCBB9q�`\YOMV_faZROOU(0DD0V��}`YXVPK46:0-,+4�����þ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������˾���������������������������������������������������������������������������������������������������������������������������˻�»�������������������������{rP2��ilwnd\RLG?3/+(''#%$$$$"! 1Mvaaad`YUNID@<;:9774432122100(	0hwuqgWOE>84PpIS)&%"!!!!
+�����xpi`Q617NRTUUV][]�������������Ϳ����������������������ơ������|{vup^[Y]figddc_ZUNE'QwOHIKWr{e�g;DA=6#=���UGA>:<69<_V7Ka_M��������������������������Gv�}okohddfc_[[XWWSOJ=Yusnpmmnjjkigfbaa`]\ZXXZXUO1"����������������������LMI<bl`^bZ. (.1*%#*;NLG:8�ź���������i���������������W7_E??<BA@BBDDBCGFCDFFDD7"C=z<Rn!1<@A3
+%B#!KnjillhmW$> shC,-$&@�������������ƽ����������:]B7�}�m:  !�E$")u�ɾ�����������,irtttyz}}{gWe"cvs?j�zrtqssole2&(.%"D�����A!  /\hf[5('uؑN2! 2����\6&%4<4$MȀJOGm��fg��uyy��OT;5;@@?>?A7!5�����Lw����1n\RTN?12DON;=HLLMMMMNPPK\�ʾ�����������]:lxsvuwxzyxxzyy}~|}�}xW$'6@j�QkFfp]OMN)"\^1<__Z/(���������=<+j�������JIGFFOdd`H@CBA7o�c_XNOYdkhYPMPO*2FE,Y��yZZZXOH@=70-./:���ſ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿ����������������������������������������������������������������������������������������������������������������������������������yoK/��nnvof\SKF=3-*(&'&'*****&#! 0Ipdbcc_YWMGDA==<9853432320231(	2evurhZOF?92Hui_,&$"!!"!
+�����xqibZWYPPRTWWZ``^��������������ƹ���������������������š�������}}yvqgf`bghfed`a[VNG(AtTKOTn����uDFC>8#8|��snolii`YXc`OYd`J��������������������������Mt�}wqolhgeb_^\YWVUQL>X��}wtrqpojiheeb``^]\\[YXVVO1$~���������������������JLH5Yifjrb0#.7:2(#(;JLF:8�Ƚ�������������������������Y8W#!%$"! !!""" "! !  !D;�M\z&+=5!#@$)6;Kd^LdS& F"Lk^bHH"(?���������|���������������9YB7uddC, !"�J%WV>*u�ɾ�����������*durwxy|�zfX`!fut@k�ytwotsqom8&(/%$F�C/1+! (N`Xr:'.{׌S�}! +]MMUH4&0FED,%K�~FOLq������N|����NS9/3,"?CEDCEHHA(:����jDYz���/u\UUNB;>EPM9<GJHIHHJKNPLW�̿������wno|�U>muqtvwyzzyy{{z}~{|}zS%&6Al�VcMrkRJML)'OU2;ZKJ.)��������f0Po�������HJGEDLeheNBBBB8q�aYTORYagdYQQPQ'CNN0 \��ya^[XQTWV<.-11>���ƽ�����������������������������������˾�������������������������������������������������������������������������������������������ȿ�����������������������������������ɿ���������������������������������������������������������������������������������������������ǿ������������;���������������������������������������������������zqM,��nqyqeYOIC;2.,)&'*036<95/)$ 2Ikhcb_^YVMFBB?<:985555321011.(	1cuvsi[NF>82Ir\Z)&$ ! "!! �����zska76:.9RWXT[b_[��������������͹������¿�������������ƞ�������}zxustpmlkigfe__YXOE';sSKOW�����rAFA?@*2v���z~���tfeffa`a[L��������������������������Iz���vqljeb`]ZYXXVUQK:W���ytqppjhfcab_]\[ZYYXWTTVQ.%}����������������������FII01$(\pJ!!(3FL>5-;KIE84�ȼ�����������Ŀ�����ÿ�����_9V#!" !! !#"!   0L5�feI1DC6(
+#;%3aR-aX##F#EI.c^&)?���������bhjcm�����������8`J 3���7"#!�H'inJ *w�ÿ����������(`oluvx��yg]d!dwtBj�uuworrnno;().#!G�12Uba5)*}֎pqK
+)N560& 1HCB3 #L�~ENLX�����|Jq���cKW9)e���e 	7<9<<9:;8"7�����������2v\STH.,1:KM7<GE=<8;@ELPQ^�ɾ�����������Q8otpquvxyyxy||{|||}�xL$'6Cs�[SGogPLKI%'RU/A]\Z0-��������Q`���������}EIHECM]Z[FBBAB5s�YQMKQXZa^WQRQO$---(]��u^^a\PEI@2//10;���¼���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˿�����������������������������������ɿ��������������������������������������������ӿ�����������������������������������|rM*��mqzrhYLE?:2.-**-4:=@B>:61*"1Ilgeec`\UOKDA=;:876654321110.)
+-guvuj\QF>65AVOd-%$!#"!!! 
+��ū�~uodVUSMFVYZY]baa��������������Ǒ���������������������ȡ�������~zxuoWT\Xejhfd`^[WOD)6oSNOWito��tBCCC>(3{��pZj}v[9ASefca`[N��������������������������Kx��zrnihb_[[XUTUSRPJ;W�~usqnjjgdd`^]\\ZXWVVVTRSQL0'���������������������|�PNI7jbOwb�?!!)FRHI8<KGF:9�ȼ�������������������������a6X%!!!!  !"""   !?_)#+40/.
+$<(7iP2`W!O%"VhJ"("(E�������������������������<`W 1H+n9"!�I$."(xL������������t*ctnvyy}��zfaa!gvvBh�ywwsrrooo7%'-$G�+NS_9(.�֏aNL "g���81%-CC>, !QɁEOLSqcdggfgjeMGMOS5LdPKNY=	/BHHHFHB3=�����������3p\TRH0=DHNO6;GC;9557@MPU`��������������N9lvvsvxx{{z{}}|~~}��vN&(3=y�OENynKMMQ&"\\0"=NIO*,��������dMEi�������sEJHDBKPAHABBCA5y�RMMMR^llmXRSOM&$I]T7!b��t_^_\SSYN5/.0-:��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǽ���������������������������������������������������������������������������������о�������;��������������������������{tK)��hkythZMB<81.-,/37<=@@;<:4/&/Ikhefd`[TNIEA>;97544433111010)	)evwui]QG=75Aeip3$##""""""
+��ê��xqlife\AUYZZ\bb`��������������Đ�ϑ���¿�������������ɠ�������{yvnbjjceijgdb_[VOH(6tOORWfyt}�j?CCGB+<������Q?Yxqcdcbb^J�����������������¿�������Lx�{oomfcb_\[WVSOMNNJ<Y�wppnkkhgec`^__[[XYWUUPPQON2'���������������������~�NLG/K�|e7�@ #0GOEA38OHG9:�ƺ���������^hj�������������d3V%" ""  !"!"!!!!"" Cj(v~��V'=&3gO._Q"q'<XOqa-'&B�������������������������@f_ 6���<! �M(HN3'yAT[[VWUUXRXVJ&_rryy{}��yg`]"btu@`�ysyossqol4$&/"!@�+- /,Y9((�׎p��/&J^o;;$':?/&ŃENQu���������jJPOT6I1)"K	&4787.&"@����|rt����4o]VSH;EMPPM7<GFA>;:;BLOR`�˿�������]k��K;muvvwyx{|zz{||}~}}~wM$'4@~�FDKhtUMLK'>T+!B\f['-��������ClvDu������pJHFD@OlfgJBACB7}�SPMLT^ijoVQPPP%CXO7 b��ta_]ZRKGH7-,.+>����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̿�����������������������������������������������ɿ������������������������������Ͼ�������Խ��������������������������}sM,��hkusi\OC=7/--044698=A?>=71, 0Goiefdb[VLJFA?999653433223210*
+*fzwtjcSG=75Ea`T$%#"! !!#"��ű��yonmgbWGWX[Z[be`��������������Ǖ�Ǜ���¿�������������ʡ�������{xwpipk`fihgcb^[UNE-4tSX[W�����vDECGE.9�����sLD_xmY_fgec_B��������������������������Ny���~{volke`_^]ZYYRK>Z�������~wusttolhfdeehkojn]N4'����������������������NKH0&Aiyzi(#.?C</(,<NJG;4�ú������������������þ�����a5P#" !#""""$#%##"#!N�S-J1?_0)@$-eS1gR#!�+!L"@B''H������¿�hfcdm����������{BdY4pl! #�K*mvM)xD^`_^]`a_^^YN*dtqvvz}��xgbZ$cus?c�zuysstrpo7#%/! $.  !<"Qgq7*)�ؐqfa%' 5��gH1?&.@>' &J�}FNPSbecj|{���]JQPQ3F��!ZE	4BCFA+@����Kbi���}3t\WUH95:CNK8<JJEB@=@INQQZ�ɽ�����������M5ortwxyz||{{|~|}~~~uK"&3H�CJNroPKMM&#?K+@`gY&3�������xO��rN������hHIFD@FJKQGABDB;�ZRNMUde]_SOQPP%#G9=,"l��q_\\WGD?>7/-.(C������¿�������ȿ�����������������������������������������������������������������������������������������������˿�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̿�������ϻ��������������������������}qN+��gkrog[OD>60/0553444;ABA@:2-&-Jniggd_[ULIFA?<::65312323200/)	+cxvulaSI@70;TnU0(%#"" !!v�ȴ��wFB4:;,4Z[\Z[acZ��������������ͽ�ƺ������������������ɝ�������}{xurh]V^jjffcb_[UPE-7z[XNH�����xBFA>=)6����kSUr}lWWghigc^L�����������������¿�������O~������zsslkgdgfhf\M;X��������|zvutqmigihijlnpaS5&���������������������|�IHF0&&)/2$ "-1,'"#)>QJG?6�Ƚ���������`mq�����������ü`5S!#! #)+&(*+&')'$ ?�P,!%)H%)H</eQ$!S4EB%!!%H������������������������~=VJ4u}~:!!�N'!!&{E]___^^^^^\]O#`pqpux}��{ie[!aurAg�wwvrsrpnm8#$0 *==<97%*5=_>&'�֏k[e",(yzE5$!6HD/ !Q�vDONLIBER�����fMROR3$CR&O(	!789B?0>����}�����w8s_VQH4(%7KL8>IHB>61<JOQN^�ɻ������tad��J5kvsqvxz|||}}}~}|}|tF&3J��HeKofNMML% )$ C]g[&4�������wV��xV������gDHIFDHL[UDABCA;��TQONSfib^OMQRN#$4.2&!m��iUORNCYXY:-),)G����������������������������������������������������������������������������������������������������������������ɿ�������������������������������������������������������������������������������������������ɿ�����������������������������������������������������������������������������ļ������������������;��������������������������|qN)��pptmcWNE?93/28522217>DB?<3-) 1Hphhfd_ZWOJD@><;975532321222.)+]zvtl_TIA93CkcA-+&"!"!"!w�²��yhpadbXOZ]]Z[be[��������������ǝ���������������������ɢ�������~zwvqokelkifdba_\VRE2=�QGKO�����zGIB=<*6���������zyyljjje`U��������������������������G{������zprmkgedfie]P?X���������|ywusqomkjjklkllcV3$|�����������������¿�z�MHF0#"###" !)*&&!"'=QNF=/����������������������������e6U###"%?a\KTeYScW0#*[Fxx�r'+&584"(C%)046203RC! B"?BO8\mZ'J��������������º��������|;QK -�f?# �K 5=6&{F^``a___]]^_N#aprquy~��|ea^"dyw=g�wuuqsuuli5$&0#C�����>EW[ce*ٓ cRf..;f@DA(!1FA+"%Q�zENNJGCKG[ekmbCJQOT4'	021CB+C����]WY���w:u]UPOK>9AKJ6>GIB6).?JOOLd�Ȼ������eh|��H6dwvrwyy|}|||~~~~|}|rD%6I��AoTe[SONH$%\H"%>W[M#9��������g��tl������`FFGFDLdd[B@BC@7}�WRNNP]ghbPQURK$&EPV1%q��P1/-..CLK3)(("H�����������������������������������������������������������������������������������������������������������������������������������������ɿ������������������������������������������������������������������������������������������������������������Ⱦ�����������������������������������������������������������������������������������}mL(��lpwm`UNC=62016411205:DA@=3,&.Fohgee_ZVOHC@=;:9755422223221)
+&bvvtmbVJ@:1CsmppO&"!     s�����{wsimjeTW]\[]cfY��������������̻�|���ÿ��������������Ȣ�������~{{voa_\^ehfcba^\XRF.9wMKOXu����dHGDB?,1|��}aTOPMGFENjiijfQ��������������������������M�������zvsmkhfeiii_O>X���������|xxvtoqnjjjjllmncO5#y��������������������x�JHG9KNC?A9%"-10-*-+;PKF;3�̿���������qzq�������������b5R$#"#'F�{GjemlsnP#  #:hU;)507*.&
+#4#&OhehgfaaI"!Q oZ_)22(M������¿�WWXV\\Kw�������};XO5.&&!!�N HYI (|J\`_^\[[[]^^O$_nqwx{}��{f`Y"czy>g�wstnsrqmi9#%0"'JNLPN*T�s[P-�ّUii$*'/H4&,@6#"%U�wEMMo���������z�ZNT1_thcai?+.,41'##H����������s7uWTRRPHHKPM4>HI=/.9@JORO`�ǻ��������~��F6Xhouwxy}~}}}}}~}|||v@'7K��AcuYc\QNG!&aU# 1@^aL#5�������������������[CEGF?EUNMC?@@>8�XSMJKOQUUPRTQG#!()1&%p��I*)*'.?4;/1/0$K����þ�������������������������������������������������������������������������������������̿����������������������������������������������������������������������������������������������������������������������������������������������Ϳ������������ʽ���������������������������������������������������ξ��ķ������������������������~oM)��dozkaULC>73224356547<GGAD9+%/Elkhfe`ZWOID@>=:876642231221.)
+#^xywncWJ@:7@eGQ;.$#"   !"
+e�����{wqW@IDBZ]_\Zdg]��������������ͣ���������������������ˤ�������~zzwpc[X\dhfd`a^YVRG,5zUNNT|����lDJI?<*/z��f[]YRLHHKH\eknlR��������������������������N~������}xsnjifgiig`P?W��������{zxwtonlhhmllnnnbQ8$w��������������������{�MJGAj�����<%6ADJHJ:>RLI=8�˾�����������s�����������ž`5S&#%'+Ezv_rknktkL$" (D4!!45,!
+.;&$UmgjkimqO!U!297*&'M�������������������¿���{<]]"'"% %�J(|K]__ZKORR[^ZO!bopvyz}��|h]X"buw<i�yvupqoonl<#%0" / WPG3'�א,S>*)`WaRD5"1GB.""V�sGMRd�����������\QW0m�����X	.369;=AF5E����m\c���j6xZSRSSQQTRM4?LH<//6=IQLSc�ȼ�����������E9]knrvxy}~}|}}}~|{~{w@ '8J��BI{hgQUNG#/")H[ZP%8�������������������cHFEE?CBHVF@??=6�UQMGKW]i[RSSQG (6DL*%r��aORQEK^`a[YXZ9S���Ŀ���ſ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȿ�������������������������������������������������ʿ���������������������������������������������������������������������������������}rK'��fkxi`RIB;63224689899CONPPD/$,Eokefga]XPIC@><:996542221010/('XwwvncWJ@:6CiIHM5#"!!!!!"o�����~zqhL@G^]^_\]df]��������������ǥ�˵������������������ɢ�������}{zwsmngggigec`]ZVPH0<�QHN[�����AHD@;-/z��}|w~���zptponkR��������������������������O�������~xqnhggggff`R=W��������|ywvtqmkklkllmno^S:#{��������������������t�HGD5*-TQ3�A!(9?<GBEA?MKH;7�˽���������jr������������Ŀd/V'&/0-3P@E@RG>WF2)*",R1!%%#5C"'UeCJgXJcN  B Meh4#"$%R������������������������u@[X"-:9=9) &"�K,rzF&x K^__XIMPHZ^ZO"`mkpvy|��}g\[#avuCe�}utosrllk9!#-# =�8.@i:'+�Ւ1(.z�tvM-" 7GE<* &W�uKLHLk}~LBjrsVCROOV/'O16/9?DDIJH5E�����I<���f<tZUTRNPPRSK7>JI8,),2FRMTg�ɻ�������er��D6]mtswz{|�}}}}~||~zu@+=H��QyysmYZPC6V^I$;�������������������_CFCD@XojfFA@?<5�zTPKHMXjaZTUVTG(FK<+&r��p``^I>QdqkmkrBT�����������������������������������������������������������������������������������������������������������������������������������������������������������������ȿ�������������������������������������ɽ�����������������������������������������������̾������������������������������������������������������þ�������������������������}rO%��jmwg`VLB;622447:;=<=GS[aVE1$0Hsjhhha^YRIC@>;::855321442200(	'XzyulbXKA:6CytpM0##!""!!!
+e������zsndRYc```_^cd\�����������������Ѱ������������������ʡ�������}|xwsnpoolhigea^]XQI1=�IEP^�����IEDD>-4{����������}zurrojN��������������������������P�������~xsnjgihihf`Q@W��������uvrnoolhghhkmlmonaV2$~��������������������w��LHE1&&EhV�8 '695F<?;=QLG<9�ʽ�����������������������Ľ`5T#+RujKacSX`ROi_\\R@ 6v":L3!37350
+2B#*S`+5hL0dO" <!N^\W'&&O������ÿ�pzvttum��������|B^X"*GflgjP)$"�L%:93)�J`__XRY^\^^\N$]hfltx|��|i^V$^rn>i�zutpusmmm;!$-"!# A�>J{_2'(�֔q��,#6pE7/#"2IF@0'U�iILL{���~�����XNRQW/T��{s~?"<DHFA:0)!L�����R����`=uXUTK99=FRL6@HHC<1/7HQQTe�Ⱥ�������qu��A7S[ptwy}~�~~~{~~{v?-;M��X�fipbZR@ %K;?Z^O%G�������rI����������`AGDC?ANMZHB@>:;�vURNJMNQQZUTUTH+%+((v��qc]B)%"/YglmsFT��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̽�����������������������������������������������̽��������������������������ʿ�������˻�����������������Ž�������������������������|nK#��dbmf^TLC:655458<>ABBFPY`UB0%-Fnjilgc_XPIDA@>;:85431032120/)	$Y|yuldWK@<:DfNP''#!!#!!!!a������{uP/@?Q^aa_adc]��������������̱���������������������ͣ�������|zzwqij[Zbhgfc__[VQI/6xHJPYp�|~�oBCBD?..y���������~|wtqniO��������������������������L}�������}oibbghhge]P<T��������kmkjihfcc^ZflmmnnbV8,|����������������¿��u��KIF4&&)Wv^'#2;CJ@C5;RKF89�ƻ�����������������������Ľ^4R%)W�sjn^qw}USzcdzzO!*R$a�zF")(*(!
+4E )Qb,8gL3dP# @.Z8D[-%T���������{��������������tAXT!%>VaRPSdI.$�E#"'|K___YSWZW\]]P&`lljsx}��|j^S#_sq9e�xurorsqml7 %.=�}}~�=%)S5&(�ٍ>@\-$)l]24%!1C=/  %Z�fIJP��zv���v��dPQQT,"y�[C�#"59=>=>?<,G����@0Z���_<|XROB.%)>OK9?ILJH916GPRRj�Ⱥ�����������>0IVkyxy|�}}|}~}|~}s=+=R��G�y\od\Q?)XL&Kc\I%I��������F.Y��������`GHCCBEHGOGA>;:8�yURMLIXY^aWTSUI ,0A/'w��mdV*!(9KP`jnrJV������ƿ����������������������������������������������������������������������������������������������������������������������������������������������������������������ʿ�������˿�����������������������������������������������������������������������������������������������������������ȿ�����������������ľ�������������������������{oK#��fdkf]RIB;78988:<@@@AGMSUT@7'.Gwmihgd_YQLGA@>9885642.23111/(X{wumdXLC=9@cej6&$!!"#"" [������}qYhq_Z`bca`ef_��������������˫���������������������ʞ�������}zyxsg[Yfdigec_^[UOH51vOMO[y����u@DAAJ5/x���������~|{zuqqlP��������������������������Rz������}nd`dhffhf[K>U��������mmjigffca`ahmonnmcU:-���������������������u��MLI5,;:55. !*6B90.*:RLG;7�ɻ�����������������������ż^8T&(So`qibopi^{d^mwN!'>g��.@�1",120"	2E(Ua)5dJ0eP""E S_pP)WX#W������������������������tHWM -OY?%!*;VL0�E(mlN&wJ]`^XIKJJ[]ZN#_rusrx}��}i_S `xt9b�xurmqvrmm8!$-B�����7YqkA=+�ێEKS**AqbD8#!4JE1 #Z�gHMTs����ym���]LRRX,0(@ABDKHRS@I����������]?�URSM?2<MRI9?GKJGA57KPQRo�Ǻ������y����9(:Qdvyz|~~|~}}~}t=-?X��?i��xbdP?.XL& 1@7L��������`H01������[FGFECZqp\C@>:;>�xUOMLI[chdQTUQE-RRY1)x��leT($8>:[josNa����������������������������������������������������������������������������������ɾ��������������������������������������������������������˿�������������������������������������������������������������������������������ȿ�������������������������������������������������������������������������������������������������������������zmI!}�ieqh\TIC;9;=<<??@??ACGJRMD6),Cnmlhhg_WPKGB>=:78643210101.-)	XwxuocXME<62P4_5#"" !#""!
+S������}wgmnUN`eeb`efa�����������������ո������������������Ρ�������}{yxvmkpmkhgfda^ZWOH6.zWMU_v�����GHEDG3.x��������}|zxtqnlN�����������������¿�������S~�������~ne`ajhhhibO;S��������plhihgfcaa`hnnnmk`T9,���������������������m��ONG7U�����= *49;632?MKF;8�ƻ���������{�������������Ļ_4T&%3F>0AJ8:<A>HKB8</"/&>d~}X! /7A>)
+-< (Q\+1eK2fU  :!EFXY.+ U���������eLM^~����������sB]V$2XI-#+\fH(�A#]d<(yJ\_]\RWTQZ][S Vtusrv}���e`Q `wq:\�vtqnqtpni5$/ 7�3GSR11"�ڑ^oi&+&.@@4"4IE>*!$\�eIOKMdhd[JPii^GOROU)PD><6F+#47756354%M����KOQ���VBrYSPG5.4FQK4?HJGE?69KPRSs�ɹ������o^g��8*4Cjvx{~~~~}}~~~}|�|o:*<X��Co�rphfP9 7]O&Q��������E4Y������~�]FHFCAFLIHB>;::@�tQRONIKGGVIRTTB!#"5++{��mdP% !% 2\mmrJe����������������������������������������������������������������������������������ɿ��������������������������������������������������������Ͻ���������������������������������������������������Ϳ��������������������������˿������������ʿ��������������������������������������������������������˾�����������Ż������������������������|nO#~�ifoi`VOE?:<>?BDDCA?ADDEHGH8+.EnlmjhcbXQKFB>=;9854311130./-*WuyvndYKG:7AnOR3$#""!!!! V������znokTTbdcaafh`��������������͸���������������������џ�������~|ywuusqomkifc`_\WOJ4+yWMTZo�����HGFHC0/w��������}{{yuqmjmS��������������������������Vx������~lf_`gihgfaO=S��������pmjggfdccbbjqnmnmbS9&���������������������o��TNJA|a,+/.#+:JVKNF<MKG<5�ȼ�������������������������`:V$"$"""#!###$# !"! &7(4'%6=.<C!)Qb+&G:-^R!:!Hk(0K_3&W������¿����Ş�����¿���vAfV#>V='!-YmU.�A(}F]a`[TUWV_]]O$`ttqtx~�}`bN!evr=X�ssqnqsnrn:"%/>�?Log1%)�ڏNPh/.ZbO3!1EDA,!%\�_LNM������������nPU)!������M&8?=>=)/)U����\�c���OCrXPO@*&+?PI6@IG93/,7JORSv�Ⱥ�����������;38Cgvxz}}}�}~~~~~|p;+<S��Ij|eN[YI7/OI%S�������^O���������ZFEDABS`PGB?<9:C�sQPOIHT^\]RPPPD(138(1|��obW2(-ADSfnoqNh��������ǿ������������ƿ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɸ����������ǿ�����������¼������������������������ynK!z�iemh_WOG@=ABCFHGE@=??==?BB:.,@qmmjid`ZSKEA@>9:963532210/0/)!VszuldYLD<5<zxp:"#""!    S�������xmrnhbbedb`eg]��������������˪�w�������������������ϣ�������~|ywvwqqpijfecb^]WRH3*�ZQQ\{����vDGFF?-.u���vvrnkbb]\\`]hkR��������������������������Xu�������{ldcbgfhee`M:R��������hmjigfddba`lonmnleT9.~��������������������r��TMJE{=  #&.:M<=A;MKF;4�ɽ�����������s�����������ºZ8X!$""!!   !      (:2z}o/$7C?7,	A@"#DQ*(I< %=!SE]%&SM"Q������¾����Ĥ����������zAb`5Y3!"5^bD+$�D#GdL+}F^__YGJ?F\]]N*`tutvz|~�z_gM bto<e�zsroormpm8"%,!&;$AdS2%.�ؓb�b19sw`
+6 !/>=(##Z�]JLLftlpqut����bQT%5'!#'1$%5><=901-W����aQM���LGqZSMI:9CJUL6AHD90-7BLPRWy�Ƹ�������w���6-2Agwy|{}~~}|}|~�~~}}r9!FDY��H_rfTJKH;2^S#X�������Z{�q}�������XJDDEFKgnW@;:<;A�nRRPIG\hi[PRNO@5MMG#-}��jb^I31EKglppsRe�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƿ�����������������������������������������������������������������������������������������������������������������{qNz�hblgaYPHA@BBBDFGE?<<<77:<>9.#/Fpmljhe^YRJFBA>::86433122101/+	UxxtmdZOC=9?cTV,!#$#!"! " E�������yD+:;4\bb^\dh`���������������ǟ��������������������Ѥ�������~|zy|����umefcb`]XQG8%�\OUYe����IDFC@?.'p��i@?8745.6135BijR��������������������������]y�������~nbddhffge`M;S��������lkjjifddba`hopompcU;)���������������������o��WKJ@wQ$#$$!(4GQFF;<NMF=4�ʿ������¿����ſ���������Ǿ]7V "!    !   !""$$#""(=Th+XcB"$&%&"79"!:CBDEEAB0$�=$E,[3#()'Y�������Ĥ����x����������oBa_ ,Q?)##-KdU.�G0>8'zK\_a\RMJS]^ZK%auttty}��|XgR!awq<g�|uwpqvrqw8!#*")F1,4'ZcwQ(.�܏S{E;/LSY07IE+##a�cHOO����������VCMQT#WSGY8	#35455544%Y�����o����IIqVQOF2-4FQI6CJHCFFCFNROQy�ɶ������|v��,!.Inwx}}~~|||{��~|s?.[S]�Fj�kYLIG7/ZM"X�������T}s51�������ZHDBCH\g[K@:<:<D�qWSOKLYSOQRRON?,0&(.���ib`_TJJZjmoqrSi��������������������������������������������������������������������������������������������������������������������������������������������ȿ��������������˿������������������������������������������ʿ�����������������������������������������������������������������������������������������ɼ������������þ�������������������������{oO!z�hgng`XNHD>==<<>ACA>:7226<A;2(/@nnljiiaYPKFA?=;886423113200.)	VwxuoeZPC>8=YWlD#""" !  " B������~xfhi`S]ca`^df`��������������ʢ���������������������ѣ�������~|zx�����lieba`\ZSH:(zZNSRf����oKJD@C2%f����~uwkRK\hiihjlS��������������������������Uu�������nc`agfedc_P8N��������lljihffba`agoopolbR;-~��������������������m��WIG5]��rvl0  +8C=53*:KJF<<������������n�������������ƾ[7T ""       "#$%&%!/9Xr>`hF$)22/ 8; $OhhijjmlF#�nqO.S$ !$(*]����������D;M�����������nFe`$<L>/7HbaF$�C(�H^a]YB@MRZ]`I$`zttuz|��{^cQ#ctr<d�xttprxuq}:!%* C�j���*$$&(.�ُk}y&;>�xI
+0"6GB)!!^�[ENP`pmr{����fMRSQQ$W54�jI&&=CBBCGCD3[����mYy���FHkWSR:'708HD4CJJEBBBINOOa��ƹ�����������)'B\qvw{||}||}}~�{o9,\Va�|?_w~bCGG4:_C);$`�������J}UvG������PGDDCGPOHA?;;<;C�mUPMHJYfiXTSQN< ,GN7!6���icaa_dttnlnppMm��������������������������������������������������������������������������������������������������������������������������������������������ʿ��������������������������̿��������������������������������������������ɿ�������������������������������Ƽ��������������������������������������������̿�������������������������������������}oO!{�lgqibYPKE<:9687;BC?:8659HKB3+!-@jlklkibYQJGB><:9755432221//1+	PxzwsgXMC>72Hcp>$%#""!!""!@������~xpsvqZZ`ba_bd^~������������������������������������ѥ�������|zwy����yhgfb`^\YPH9'xXLOT{��x��EHDAD0%h�����o[F?>Xa`kjomQ��������������������������Vn��������lc_`hhgegaO8K��������nkkkjgfdba`gnoonmcS;-���������������������p��UHG.0}�rpd+'058.((9KIG>>������������qf��������������]9W!""!!  !!"#%&'&%" 8?L�Zb<&82-50A>%Nd][a\Z\=FQ=NqjR!&&'[���������ɾ�������������jB`[ -ShZZggN1%�C"#" 'J\`_\VWQO[^dN']}wxwy{��z_iQ dvt=e�xwurvzts�: '.P�T�h�7 &,�ݓqwr"5,Hg<
+1 4A<$ %g�^LQEU����~hxq]HORSR!!JS�)</!.0/0/042%`����M�`���HKqXRO6/<08LF4@IJD<<?LPPQY��Ź������sio��-:Wkuxy|{|~~}}~~�~|s6!FMe�vYs{ntGHD/!@bB 5pqS&i�������W@D�J�������THECEJWawY<:9:7B�kNLKIOYoqYRROO7$KdD 2���j_`]WX\[\]dnoOj������������������������ʿ������������������������������������������������������������������������������������������������������������������̿��������������������������Ϳ�������������������������������������������������������������������������������������������������������������ҿ˿����������˿�����������Ǿ������������������������|pOz�fbpkcYQJC<<;9986<CCA?<:BPUK:,$-CkmkmkjaXPLFC><:8553223211111)	QvyxreYOD<9AyWR8$%#"" !! !;������}xP5?95X`a_^`c_y�������������ε���������������������ӣ�������~|yvv��vqojihca_]XPG9hXNNW}�����ADAAF2$i���ZJ>GYlpwxn\VhiR��������������������������Wr��������od^[ffhec_O:O��������lnkkhdcba_`foonnmcS<,���������������������o��UDG;V�}SG?''0DB;7+7NJHB@���������¾�����������������]<S"!  !""%'&$"  1<.7'#%5 
+>;$N]3#&''*!> GNCO\e;*"'[���������dSQSa����������h@X^ $9OW^\D*&  �C0B=@A<3'�H\_^]XWWS_]_M%]zwxxy{�y\hN!f|s;\�|wvqsuto�:'-P�N�K�7!&/�ۓ _hb'0.�m6	*4@>$!&d�XHON����������TNRRODb�z1r%	&;=>A>@DA/ e���@�h���GLqXMM?-),AKB5BIJD>?CLRQTU��Ʒ�����������-EWgrxwy{|~}{|}~�~��zu1=Oe�wJ`{wyOAD- ;B1/88[4j��������[|�h�������QFEDEFL`eC9:::8D�gNLJDL]`WSSUNO57A2'9���dXOA=HJ=:;7R_Fr��������������������������������������������������������������������������������������������������������������������������������������������̿����������������������������������������������������������������������������������ſ����������������������������������������������������������ɿ��������������������ʿ������������������������|pNz�heoka[PHA?@A=::33=ELPPQWZVH8.%,BtqnoljbYNKFE@<;976521111120.+
+RyywoeZPE<:<T-];#%#""!!!!!8������~yfchaaa`a_^`c_t�������������̝���������������������ҡ�������|{wt{���sjjgcc_\UQI9k[PPUf�����EDCBB.$g��pUgy~�{z|{wpjijR��������������������������Xu��������md__hfgge_P<O��������nnkkjf`ba``goqqoofU;0��������������������q��RGG:a~v{��D",:>67F8<NKHA=��¾����������Ĺ������������X9R""###$$&&$"  ,=8����D%33+&+
+=<%O^( CJ2sq;&\��������}���������������eB]]$@TGHN@2$!�D[������2+}H[^]XLMHL^[[K$_yvxyz{�~^oO!_tq;c�vumqvwt�=%+O�T�W�4$).�ݑI>A/.!	&-=>'"(j�UHNK����������UMSSP!*17!%	#69;=:1/-$i����?Ad���FOpUSQJ>:?LSD7BFCA@?BGPQR\��Ǹ�����������+H[mvyyy{}}||}}�~}xq/%PWe�rAm|spBEG.!?YC(`P@*n�������ZCLN]�������PIFFFG]jWD=<==9F�dMLKKMcddYTRNL6.8?5!@���XC5;J\nufK3/DDv�������������������������������������������������������������������������������������������������������������������������������������ʿ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƿ������������������������ylQr�penjbZPIA?><961/-3@NZ`YXOH?91&+?rrmonkcZPMFB?<;96663110011//)	NuyyqfYNC<89e`o>"%##" !  
+7������{xidkPQ]_^][_c[t�������������͚������ÿ�������������ԡ�������|zxz��~�~phffc_[WOH; q_PSTOq�iWSCFGD@2"h����~~~|{{wtomifO��������������������������Wo�������~kca`fhhge_P9N��������mmikiheb^a`fnrrqpfT7,{��������������������n��SLH3)'_c2p: $38*')=BBMKE;?����������������������������Y5Y"%%#'''%"  *;7Z=>T*.& 
+=:$N[($F Sl`#B;&]�������ß���˿����������f>d`'P{_dum[9!�?^�����N" +~F\a`WLVXY^\^M%axuvwy|��}[lL#cxu:_��vtoqtuu�:&-J�w�f�/$# *-2�ߌY�q#:	*,==%"*g�XLNJu���������NSSQP"^JCC@O 
+"5=CA7 l����������?WqSTN@5:8HRD7AD=996:ALSP`��ȶ��~���kWe��(Garxxy{|~~}}}}��}~�yq.*kfg�oFk�nmMDG+!<RC+mvO$t�������abaXg�������LKHFEHSZUJ>;<<7K�dLMOOT\gr_TPJK2+@[D ?���S?3?F[faQEIXgBw����¾�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̼�����������������������ľ������������������������zkQ t�nepkbZPIC<85650-,-9IWXOHFA?:1%*>rrnnpmd\RMHA@=:97554210/12/.)MtwxrgXLE=:;VN`D!$"" !!"!"
+3����{wfl\2TX^_[X]a]r�������������н��}������������������ץ�������~|{y}�sox�uhgeb`\UPJ;#u^IIKLJ@EMJGFGD?3!c��qRQLg`HboqqnkdR�����������������¿������Vm�������~lc``eghhe`O:K��������{wrrqlkgdfgjnpqqqeR<*|��������������������p��UKI8=k�~~}0%42%&+8A@MGD<B�����������¾���������������_:`"'('&%#      -F"?E.!1630%
+	>7"Sa+"<!FZ6"%(']�������Ēi��������������kBbn"&=YOb`U`R+�CG����k*}G\_^XJMKL]\]M#cyvxwx}��{]jH%evt:`��vtqswws~8&.7}�qcm'k���U/��DW$=	+!;IB,!#d�TKOKfzv{zvuyygFTSRT )������&
+$46>?:%p����lOp���<XnWSO6)02@ZD8BD?=:58?KRTf��ǵ�������|���(Iiwxxzy~~}}}~�}~�{p-"hth�kBiwieMED)% =Y@5X^/x�������Y`����������HIIECFDB@?=<<;4K�bMPSRQ\mh[VONQ3*NI*C���c`ZXLSUMP_gouEx�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǿ�������������������������zkRv�lcpke_SKC<9941--++.<IG@9;;;8/"+>nqmmokbZSJFA?=9976343122221.( KvyxtgYNE<8;fgtD$%!! ""! -��Ĝ��|thW<OYV[][X]^Zi������������������������������������բ�������}|z|�~z��qggdba]VRI: kVDILNNGFGFEB@>?<&`��riujisbRgirrokgR}�����������������ÿ������Xl�������xqkgdfffgg`Q:L����������~{wspmlkhjmqqpofS:+x����������ÿ��������k��RIG?lv91H2!07/02:<=MGF=?����������������������������[;\))&%"! !"   ! 0CM�v�F*57-!	66"O`&"8 (YY:$%%)\���������jyup�����������eA\�)(;AF`XH\Y. �BH����U)}I[^][YVTR]]ZL#eyx{xwx��{[iG!bxy;X�zvuqsutq~1$-&" L:4GB-�ߎ
+4%	+!8HG:%"#n�NMMR����������RSTSQ&D%<C&@
+-)/>6s����1Y����:[iSSM:/8=GRB7BEA@?<=CNQPb��ö�����������)Pixxy{z~�}|}~~�}|}|yj*Kcp�d?\[N`HDF'$ 8_>.F]0|��������P#A�������LIHEFGEB??==;92J�bSQRRQhkh^XQOQ06\F3 K��zdccaZX_ojilmsB|������ſ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ϳ����������������������������̾�������������������������������Ǽ�������������������������|lPv�ocqne_ULB<;91.,-,+(.9;5.0395, )?nromnjc[TLGB@><;86433133321.*	
+Krzyti\ND>9=b[`1"$#!  ! ,��Ǟ��|vlQTdVWY\ZW[]Zi�������������̰��{������������������֥�������}|xz����skhfcb_[WRH9!nXLPLNJEBBCGEBBB;%^���~�}pd_eb`pqnmiLz�����������������¿������Sn������yrlhecehihaS;K����������~ywtqmljjilmnongX=+p��������������������k��NKH<@,)*#'8=<;=1:LGE?C����������������������������]?_(&$#""!"    !!->^_FR&27'<8"Nc, #<H/ "%%'_�������¾���������������dBY�0 (Qo[\nf_W6�Ec�����**|I\__[VWUY^\_K%dvwyxwx��~beC$ezu<T�xutqttsq}*#)8l'C/B>1�ތ1N"&/>C<*$&u�PLOPp�|~{y|���JORSQ!B%=A8
+	'"$u�����lc���9YdSRMA<A?KRA7EGB=::9DSRV_��ķ����������*Rnsvz|}~}|}~��||~}xj*>Uo�`@WWHXGEI$!Cc1<ea. ��������FDx��������JHHEGOPKJD>=:9.R�_RQMONUWYYVSRS0",)&G��tcd^TD0H[X\Z^fB�����������������������������������������������������������������������������������������������������������������������������������������������������������̿��������������������������������������������������������������������������������������о��������������������������˿���������������������̾����������Ƚ�������������������������znPq�qetod]VLD><81/,++0-)*,)(,340))AqtolnjcZUMFBA@;:9644322121//+
+NoxxujZOF>9:Rl]8%$"    "!!!��Ǣ��|vpkhbZYY[XUXZZi��������������ǿ��������������������ק�������~||zxxyzrhiihc`_YWRL8rXQMNRIB>>GKGEE>3$c��wNJIF?568WppnkeKw�������������������������Wn������}xrlhdddhfd_O>H����������|yxspokhjljkmoniZ@*r��������������������m��PHG79g�v^0 *:B:4,7MHC>A��������������������������ľ^=T#""!!!!#"!!!"! 5B_�|�Q1491*?:$EWIGJGDF3$?30"$%%`������������������������cC\s'$DVD:KODNG�DL����n!)�I___VMOJP\][K%^twyvx{�|deD%cyq<R�~utrsust�.$)I�,:n�q9/�܏	-!!&&"!!s�QLNQ����������OPQPQ/������#	!w������}���4\iTRJ700.FPD6BF=,,.0>LOQa��Ķ��z���jfq��.Sltvy{~~~}~}}��}||�ve'@Xv�cAKQIQGCG%0=*@hS4#~��������GJ���������JHHFCQlkiI?=:7(W�YORPOQQMNRRRRW/H��q^`RRH<PP]`NK[>������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������λ�������������������������������������}oMr�rgund\TMC<9720,+.75-&#"*452,")?rxropjb\YNEB@@:9666333111221*	
+Krxxsh[NE=:;VtI2&$!"!    
+#��ȣ��{rmhg^XXWYWUX\Xe���������������ő�������������������أ�������{zzz����|hgfdba[YSK?wXPKO`_igKJHGD<-%d���uuuorufissookfNv�������������������������Zt������|urmifefghe`O<D����������~{xspmklllllmooiW>&t��������������������m��TJG;_�\Gm^$!*3553<PID?=����������������������������Z?T "##""##"""#!   <B46
+
+93&TmgioefjE':!E8W&"%&'^������������������������hE]\%BWMLSQG3.�@>����P(F]^^WHHOOX]ZL*_wuxux{��|]kC'dxo;]��vsqqvrr�8&)1KDJf�+2Se=.�ڎ	(!"#"!v�PKNLpuwsss���FRRPN)TE>AB_	%y����OLf���7_iTPK6'26EQ@6BJB4104BOQT[��ĵ������~~���.Wkuyxy{~~~|�~~�}||�xf%[^r�\BLOLMFEG# :^jK ��������xR*>��������GFHHBHL:<>==;4(^�bXUROSSRRRPPQU,!%&$!I��lZO>GHFNOXUUck@�������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƾ��������������������������������������������������������������������������������������������������������������������������������ɿ��������������������������������pPo�uguph^TMC<:732.3211,&#$*86.%)>ptqoplc[SMGDA>:8664431011110(
+Htxxrf\QE=:38.'*%"!!"!  
+"��Ȣ��zplbg`ZRORVVW]Zf�������������ι��{������������������ܦ�������|zxxz~��nggfcb`\YSI:lTMMQy����sGGE@6.*f���������ysorpplfKq�������������������������^j������|unkjgfgihg`O<J����������}{wrpnllllllllphWA'm��������������������q��XIHDzQ8l5 '5C?;2:NIE?9��������������������������ƿ[@\!$''&'%%#$$$$##"! <?	
+93#[kXX[X^hE!+=!bQl=!$&&]������������������������eE]h$%?ZPYbkgG&�> d�����%+L]\][TQXT]]YK&\yvxxyz��~aiB&bsn9]��tustvst�8$(K�����-,Po}S0�ڎ	1""&79&"#y�KMPT����������ORSQK"5
+!y����������2cjRPLD;CGPRA6BGA1//3BNOUU��Ƶ�����������,^nuxxzz}~}|~~~~��zh&#�`w�SEJNLLEDD#!AYX>%��������ZZw���������IIHGCGS_RB?<;1+a�]TSST]efgXPNPT)+ILG4O��hZWVYRPNPXRN[r?����������������������������������������������������������������������������������������ȼ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~rVn�wftpl`ULC=;733495,*'&&%&0/#)?otnonjeZQNIE@=:9853333200120*
+Gvzytf[QF>:5cYK<-$#"""!"! ��Ȣ�~yo[387,/6?QTW]Xa�������������Ͻ������¿�������������ڦ��������|{xv}��pnlifddb]XRJ;h[MKR������DEE?6.(c��}^ROMHEA=RopnjdIp�������������������������Zr������}wokjggfffd]N8G���������~zytqomjkjjjiihjgWB'l��������������������o��THHE|C"".xG'>JI3'(7QIF?;����������������������������]>R#()1/),)'*.'-)&)$'DJ	
+;2%Z]/%()=fE"(?I'!"$$a���������������ƾ�������\A^t#$2C=Xaf^D$�DU����}$+~M^`^][__\_^]P$Yyxzxyz��~emC$bto8Z��wxrxyvw�=%+)C=@`�,e�U+(/�ۋ	5 2@M>(.JI4%�KOPQVbfglsyy{fMTSRK,dI7# "$!!%{���iqGb���-gmNPOPQNQSO@6CF>2348FPPXS��ķ�����������.[oxzz{|}}|}~~~~~~xe#,�\|�X@LMHFEDC%"'1(;\]=!#��������|SLo�����}��LJJGDDXmcE?;92.f�UQQQPXa^]TPOOS*$:>>1M��_ZffgL]ejb:#0bD�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˽��������������������������������������������~pPn�sesrk_TJC><614651+&#%&#"&#'?pvooojdZSNHC@=;:75432331110.)
+EsyytgZPE><8dkg_:%"#""!"! 	��Š�|vm[DWSAAJMURV^Y^��������������®�����¿�������������ۨ�������zxxx�����nhfeda]XSI;e[IKTw�����DJIE<4+f��ydgeb`WGHRonkhcNp�������������������������Vi�����~xtnkfda`ab`[K:C�|{tutommnkkjhedc`_`]]\]\[T?)k��������������������m��QIG=sJ"2~=$6BD7-+6OJG==��������������������������ſ\=S"'B��bx�k�a�}cwc`"
+>E
+	0*%XZ#1fE"(@Zb/ #'d�������������Ƽ�v�������[>_j&Bc`b]N>7%�> ,VTPN-,uL^`_]Z]YV\^\J%Xuwyxxy~�|akB#_wp9X�|vvs}vx�P(*D�)+:VjG2�ً--|��T6&8<'!&��GOPLFGIGJFEGFJPTPRI#*>R[X2I`c``gdR7$~���i@ru���0fjSRTRPTSQS?<DIA9;<>JSSXX��ŵ��~{��rSx��,_ry{zz{~~}~~|~~~~~}sd#<�Q��TBKKGFEEA#!GT<>d]2#��������F`oGl�����{DGGFFH_M?>>;60/i�[UTRSZhbYRQQMM(9</&!R��VXbfgAenr]0''OG�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������;�����������������������������������������������������������������������������̿��������������������������ž�������������������������nQl�vitpi`SKD?>6363.+(%##%%")>lwrpole\UOIB@=<:8554442211/.)@qyyui]QG?<9`\P;$"#!!" !  
+��Ƣ�{rl^aebPOVYXUX\X^�������������Ͼ��p���ÿ�������������ި�������|zwwwzvssljjfba^ZTK:``NMS[q{`�z@MLEB;(g��������wB^_mnigdMl�����������������¿������ZY��tuusklifb]Y\YZWSJ78�}wqsrnomjgfgfecb`]^]Z[[[ZW?(g��������������������m��OHG9OF#(1T'$1?CE;;PLI@@����������������������������_CR#&K������������ô�&
+9>/*%W]&7dF%.CD:Z)!#'d�������������y[MH�������YA_u )Kia`ZSMA)�> 2A+nH\^]YQSRL[_]F'Ztuvtux}�}cnD%ctr;Z�tww�}rw�V'*8{)P<':94�݋
++$>01"&}�USSLNPOSPOOMPPQQQSJ$Lj���c,`N@LMJGBD*)�����������+hbSSTPMORTT=<EG>4135DNQTZ��ŵ�����������+^qwyyy|~~~~}~ubK�H��LBLKHEECA!"=I7DYP%'��������E��|I����}�~GJFDB@]aOA;9326m�\VTPSWfppYSSMG'1]Z.U��V\cefFltwa5+)FG��������������������������������������������������������������������������������������������������������������������������������������������������������������������˾������������������������������������������������������������������������������������������������������������������������������˽������������ƾ��������������������������oQi�xivoh_UKA@@734/+('%%$$#(>mxsqple^VOHA>=<:86433212110/)
+@tyyvk\LE?;8bvth'$"!"!!!! ��ĥ�ymhX>IJ?JYZXVZ\Z[���������������˖����¿�������������ݪ��������}zwx~{|xnkhec`\XSK>lhMMUd�����HJECC9&g��������{X[cmmjhdOl�������������������������ZY��}ytpljgfc`^[ZXVRI45��zyutrokjjifccba_\^^[[YXYW?)g��������������������o��PJI6GSQTPH&%1:>B9>NNJ@>����������������������������W?K$%<���������������#997,%Xa.$BhC#/L!6V." %%e�������©�eSLPTZL�ȿ����WAXl '=KDLNOIA+�? =ttro?,rK_^^XHHHIY\]D&^wsoouz��gqE#evq8`��uyy}{vx�W'*9q+@XiYLD�ߌ/R�uoE0"$�װ�����������������n7��s@!
+'XPEM2%CH+*����u]V_���*kfSRNGBBFPO;8EF>99:<GPQRZ��ĵ�������}���,_sxxyz|~~|~�~~�~~ubX�E��PFIKGDED@%!@W8`wV .�������|I���B����y�{JJFEBG_LWF;6049o�[QQMOUik^SSTPI& BTE+Y��RW`aa8]nshI0)BF��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ծ������������������������������������������������������������������������������������nYh�qasoh]RG@>@:40-+(&&'&!*:hvqnqme_VPIB?<<976422311011/*
+	Buz{wk]PGA97VPMZ%$$#!!""" 
+��Ȫ�xmg]I;7GTZXVSX\YZ��������������ȸ�����¾�������������۪�������|{yy�����mkhccb]XRJ>naHKTi�����DJCB@5'c����wsplle_ZhkjicNl�������������������������^c���~tomjgeb_\[[WTRK56���~{uqnjhgfdba`_\[ZZYYVWUO?,g��������������������o��TOH;Yr~�qc-#8LG9/,9OOIB@��������������������������ƿY=I$&/v�^qewxqV��Zolt"	CA
+4($QhM0->Zb<"0D"1O>W6#&(e��������kFQZWX`ol�������UBUk)A]KIMD6+"�Ck¹�Ɩ%.pNa`]Z\[SO[Z^H'_trjit{��|il>$bur7[��wzx}|xy�R(,:~�*5[��^T�ߏ0)':SX@	;!7%:1$$�ݾ�����������������}.A4LMK'0K9+�����qh���)ncSQK;,-=LN>9DHFHJIFKSRTZ����������aCn��-_sxwz{|}||~�~~~x`ZpF��KGKKEFHF= 'EW-UfH/��������D��Yb����w�xGGGF?IO>NB84/;>p�YPNKKNZ\PLS]SJ& 7>2%`��OT`YR3\rqh\93AC�����Ŀ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ϳ��������������������������Ͻ������������ο��������������������¿�������������������������qZh�ratne[PH>9764/-)&&))% (9jxsqrlfaXPIDA=<976411011/01.+		@ox|uj^PHA:6cwgD""$#!""! 
+��˪�xoh]P]\XNTNRSX[\]�������������м�����¿�������������ۨ�������}|{|�xqx�pkheca_XRL:v^IKMWwst��GHDA?7(b���XFC@@>807RchhaNj������������������¿�����a^���tpnjfda_[YWURONJ27��vpmmlifdc`_^]ZZYWWRTQSTQM>*i������¾������������n��ULI7*'B_"$&9IE5+)7UNH@A����������������������������Y:N'&&#%$#% 	TF8(Ahf`ahk[2"/@!,@>]oN3&%c�������ĀTQa��ſ�������UH\g (Qm_fi`W7$�C mO��%,tJ\`^[WSUW^^]H'^stuuv{��zfm:$`vo9^��wxvz{ty�K!'--BJ~�P"toK1(B�ܐ*<ce!=,]G`\+",�پ�����������������u)>'(#&66FFJFCCE=-�����������)pcOQK=00>MN@;DIGDECBKRPWk��´������o`i�0]uzy{{|~}}}}�~}��v[_aI��KELLGGEFB(Lc.T`J"4��������lF@H�����w�vFGGC=FLIR<632==s�OKGDAKh\\J_aQH%#HTK,^��OV`I/%]tvl_A=AG�����ſ�������������������������������������������������¾�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~pUi�uapnfZTIA9511-+(%&'$!)<gytspkf_UOKEB><966422111010/+	
+;lw{vk`QKA<9hFEF$$$#!!#"! ��Ȭ�wpiS,510*4>PRVZ^Y��������������î�����¿�������������ݫ�������~}|yx��v��mkigca^ZRK;t[IKQ]w���kAGD@?7)^���tusv}xxlX\bfe^Mg������������������¿�����]Y�zqopjkeda[ZYXVRNKI2;�|yxzxrlkieba_^]Z[YYSVPUUPN?)k��������������������k��RHG6((Ef##%3BA>1<PNH>A����������»����������������V;T#%4[nNXYVKKNjLGEI7
+]@#B*)MfkojaI'!(C-Q^0'f������������������������VLcd&R^CWZH`L(�C<��]�R-{M^a_XKKIQ_`\G$_uwxyy}��|fq=$`wo7[��yww}|v{�E!(+S���U R73J43�ݑ	),=B:/^YQ^3!(�۽�����������������v,WIVk��'YWKRRNNTO'2�����������+qaNRJ:6>EOQELFJEA:5=LRQZu��´����������x.btx{z|}~}���~�~uY*eOH��LGMMHEEGA+#OI'ZgW$7����������Y?����y�qEHE@>DbfS8549A=x�I@E@<PL9MF;BLH%*D$24_��RUbD%(lk^l]?:<F�����Ŀ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̾�������������������������~oWh�rbqof]VKB:51/+)(&%"# 
+$<fvqqpkf_UQKFB>;:654432210010*	:iyzvj_SK@;5LNZJ!%$#"!#"!!��ı�zrl^Zf_YQTRXTX]\X������������������������������������ޫ�������~|zy~���ujiifca^YSK=s]NJSk����{AGDBB6(]����������zmljid]Mg�������������������������Qa�����zwplieb_a`]YRM5>��������|xttpljhhecdfloopjMB,n��������������������k��WIH85>Rp-/$#/3;5;MKGAC����������������������������SAQ"%:��������������z	U=';*&?QUKKN3!)D#))(f�����������ż���z�������UI`a&KX.EK'UV*�Ca����}!.vP]_^XNZVL\^[I%]quwxx|��|fr>$atm9P��{yzy{xz�C!')/FIz6$m��~F5�ޓ(3\a#	?#S\?F(!(�ݼ��������xg���P6<�u Z����R*SD76324120����|w{����(q`OQI87FMROGTGGC:./=LST`v��¶����������s1bsvyz|{}}����~|�|wW>iDO��GHMLHDDE<+#]c3Xib17��������~R?c�����v�mFHD@AR_SU;62:B>{�?@B?<OE<RB=QXN$)9(24 e��TRdK#%leH]Q55:I�����ÿ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʿ����������������������������������������������������������Ǹ��������������������������oXe�wdrog^SH@93/.,+*'%"" 
+$8awpppmhaZQJFA=<:665433201120)
+	8kzytl`SH@:8^uwS"$""!!!!"��±�|sk[<KE:CUMRUU\ZX��������������ͦ�����¿�������������ݫ��������~|yy{~|vnkihfeb^YUN> yiMJUu��e��IHHE?7)_�����~zupieYhjib\Jh������������������¿�����Bf������~vsokiggghh_P9?���������~xvtslkighhjoqpmlMB-n��������������������k��VGH7i���u�>#.2B9<68KJFA<��������������������������żW>O%%0�������������À
+M=*6*!"<dA#,I $&)1g���������������ŧ�������VIa^$=H28</C@% �Av�����(1vO]`_ZKNKQ[]ZI#[ostwx{��~fo@%ctn:N��zzxuyuz�F &);~~#3bea;3�ޔ!EqI"	!C%D/#*�ܽ�������R38U��5#3�n(Qja9@OOLIRQRP#/����EAAs���)s]RQI<9CRTOEFFH@3/9DPUS\u��������������t1csxyz{|~~}~�~}�|wS?\AO��@FOMFEGD<,!N].JQV9<��������C2x������v�bEFA@ASLCR@43;C:}~:A??@FNRZHKk_P#!699- e��SK_N#&oiOKD35>Q��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̿������������Ϳ������������ƽ��������������������������nWc�xbuoh_VJ?920-,,,)%" $=]wrqoojb[PJGB>;97543211130/.(		2izzulaRHB;;?4;Y1(#" !"#!��ǵ�{rjY6C?>IVPRVV\ZV��������������˘��������������������ଠ������|zy{�����oiigea]ZWN?kKKWq��w��LGHIF7(\���^VHC@935Biggb]Me������������������¿�����Qb������~wtpligghhgaP9<���������~zxvumljiijkoolppSE/h��������������������n��UHI:@7/038&&7=SIJ;;JGD>:��¿����������������������ȻS9R&%,��������������u	J=4$!>eB%/M!'#&'))*.e������Ŀ����������������WEZb #>PIKKID2�=P����k.zP\`_[UOPZ^^`I%hwvvxxy~�}ir<%bsn5T��yyyu~vw�D ''9o(!Z^Xc44�ٕ	
+LsuD">1aWH2"$*�۽�������$+:C_�t/\�j4sa_oxZ!;7969659: 7����}������-qZROJ;4ASSO9;DH>78<DPVTbz��µ���������r0drvzz~~}~~}}~~~~|tP;D>K��CFOLFEGE=."Si.YnW6A��������RUj�����x�eGB>ADefiX<34=B;�v;CA?ARhe\G@ENI!'JSH+"i��VI]L$)ifMB8/9JQ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˾������������������������������������������������������oW`�{buqi_WL@941/.,)&%$! !$:b{stppjc\QKF@=:765532233100/*	/fy{vl`RIB=7]�~td0#"#"!"! !~�ó�|rkeZUUTTUWZVTZ[W����������������������������������ૠ������|zwy����~oiigea]YVM>�`GNUc�����GGGFA6'Y�����vligMLUeefc_Pb�������������������������_b������wtoliggijgaO9>���������~{zxuqplkjklmonpmTE+j��������������������m��VEH9clG%!$-<A<08MJHA>��¾����������������������ȺQAX"$#*-"$(%(''8+(#&&NA
+;$5=<<>;Nc@#*I'dcd]lnod?(h��������bLKQOPNUO�������TG\h!!:TOS]a\>�<e�����"*zO^_]WORWV\^^I$hwtqwwxz�~fs?%cvm:V��y{yx~xu�A!''# #<de@$4�ؘ!OqI!%G PS_c;.&2�ٽ�������_>C?HSNF��fF������
+-bWOMKORRG":����sl�����-v[ROK<6DSUN<DCGA;31BNTU`u��ô������iml�l1frxz{}~}}~�~}zsO2:@Q��FLQMFCDE<-#YN#ObK,E����������tM�����|�cCD?AN_TUF938AB:�yCB@>=WKJ]IBMZM"-;&5.#l��XL^K#(ji[=3.5PO�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̿��������������������������������������������������������������������������������������������~pV`�|guqg^VND:400.,)&%"! %5a|rropgc\RJEA>;776532111001.+	2fyzwlaRIB;9\|VA,"###!"!!  x�´�~tla?.57ITWXTT]\T��������������ǧ��������������������ᬡ������|zxz~yxwvmiihe`]YUM=�jORRU���`?IHA>7*X������~{qCIUeihgcRc�������������������������^d������ysoljgfikheO;=���������|{wtqqmfhjlnoppmWE+j��������������������m��XII58\�wI+""0<6''7QJHBC��������������������������ŹR?V&$%" ! !"#"!  "! K;
+=&'[ngfhhhg?!(B!:Qzysh]H1&i��������eKUYTZVVI�������RJ`^  0=@Qb_V>"�;a�����%,yN\]]UIAFK[][I%fqxwyy{�bqA&fwl<Z��{zxv{xx�C&(B\dbbW  ]f^g;4�ْ	!"EDV*L(XY^X<+$/�Լ�����������6$"3t�g%(
+.\)%))(*(#;����Ju����}1qZSQK;6ASVNB\DIC5*1CPVVYm��³����������i0fszyz{~~~�~~���ysL-4C[��AMSLEDCE;-&]b& GH=D�����������������|�dDC@AQSB@=62<BA9�xF@@DBbSO^CAIWL"8H5D/%n��]L_L&$sv]9427MM�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ο�����������������������������������Ͼ��������������������������������������������������������������������Ľ��¾�����������������������o\`�guqhaVME<50.++)($ !! %9[xtsnmgc\SIEB=;98754332001/.*1by{wnbTIB:9SXIC&%$#!!""! 
+w�ú�~tp`Ea]MKSZWSV[^W��������������Ř�z������������������᫢�������}zyz��z|wmjigc`^ZTN>vnVRTh�����RSKBA9)Y�������|xifckfdhhUg�������������������������c_�������zpomjjjkkjeO8=����������yvsnnlgjjjmoqrnXD(e��������������������m��WJH3%.lt~p/#1==.&(;QKKED��������������������������ĸU>Q'%%%$$#"$$##%$$# 	M5!>&"Mc[`ecik<",=/`dqxyzre8%k������ƿ~}�p{�up�������NKj_!%>TX^ZG:3$�9g�����!.wN[^^YLKJM\]]F$iuzzxz|~�dp?#gun>Y��}|yx{z|�I!(+R�����('P#C<6�ٔ		&$1Gb
+-R9`P:)$1�Լ�����������) $S�]+K#("=����Lh����t/wXQPL@9AOQOD[CG=.0<FQVWbu���������{�k9csyy||}~~}~~~}}yrI$/D]��BKOKGEFF:-+OJ&eoCF�����������������{�aEBA>MI?;;33>DA?�pB@@@KbhmV?;BPK#"GITF&&r��`GE6$.qm\B;?DMS�����ƿ����������������������������������������������������������������������������������������������������������������������������������������������������������������Ѽ��������������������������������������������������������������������������������������������������������������������������������������������������������������������pZ_�|cwrkcWNE;3/-+**&#""!;Yzsuqnia\SJEC><:8743131/0000*	0`}{xoaVJ?;9KjgM&$"! !"#"" x�ý�~rqd\e[MKUZWVY\]Y���������������Ы�������������������⬢�������|{yy�����njhge`_ZUN> ln\TSg�����KKE@C=$V���tkb^WQMKMg^I^fVe�������������������������c]�������yvmifilmkkfQ==��������{romkihfd``coqqrrmYG(`��������������������m��XGG9-6tquh-,BEA:89?MJICC���������¾��ɾ�����������ƺTBR$(''((&$'('''%&'$""!	Q:#&
+C""+,/01Jh;&0<7�{|||}~K)m������Ž���Ѥ��;�������RFi\%D`Z^YMJ=& �;f�����#-{ O[_^ZT[RV^]\G%huwyxz{~�eo="gtm9T��{{{}�~}�G'-*wqP�% ci1S;6�ܙ
+,#-cm	,P#0)"%3�н����������i &<.
+.\MGHGEKOT)>����������j2uZSRJ84@QRNAYFI;5:CEOTSax����������yp�^7itzy{|}~~~���}~|xlF"0F\��@HMKHGGF:*#VV&UuJL�����������������~�[DAAAAD?>:04AEA@�i=><;TK<>AA@\oJ"'?#"!)y��`D;75Difd^YbaLZ�������������������������������������������������������������������������������������������������������������������������������������������ɾ��������������������������������������������������������������������ɿ����������������Ѿ���������������������������������������������������������������̾�������տ���ƽ��������������������������nZ\��dvqiaXNF;4.+*,(&$"!  ":ZssqojbZQMFA?<:7655322212/0+		1^}zwm_UJB;8H?6>)$""! !!"$!o�¿��uqd[c]JEU^YTX^[X��������������Ц��������������������ᮢ�������|{xx�����pkggea_\TOF%suXTXf�����KHE@E:$S���WNOJH@;=OhY>[eVc�����������������ÿ������e]�������zo`baijijhaR>;��������upnkhfdc`][amrrrqpYI)^��������������������h��[GH:Qz�W7#):;@<<B?JHG@D��������������������������ȿQI[,.31/-.1./0.1---.-)+&F9%)	>?g:(+;0{zwuusxsG)o������żr�ȭny�Ė�������PFe`"3EADHFEB0�8J����Y/yL^`^][^TR[][H#_rzzxy{�dn?"fum9S��|z{~~xy�D&-?�jJ�%CQ,D.7�ݔ
+
+7#37K!
+2M#LcfC"$`������������u)$!#!1
+1Q946553;X.=����YM����i4rZSQK@:DSSNCTDE8./1:NWUd|��������������a<gsyxz{}}~��~~~~yrD"2F[��?HOJHHGG<(&PG"+aa=M�����������������~�TCA?@AA??6.8AC@@�f9<<;JA6:;A<HQE '0&~��`F\bbeqqgjabjN_������������������������������������������������������������������������������������������������������������������������������������̼���������������������������������������������������������������������������������������������ξ�������������������������������������������������̿��������������������������ʾ��������������������������p\Y��ixqhaXMF<5/+)*&%$#"   %9[�tqpojcZQMHB?<96534332321/.),byzvm^UJD=9EquG$%$#"   ""n���wobIOKC@EFPWW\]W��������������Ƈ������¿������������௣�������{yz���{okhhga^]UNB"\nVTUe�����KHEC>2"O�������umhkhqpgjhUc�����������������¿������f\�������~t]bahjjjhaT9<��������tqnnjfdb_]]bmrqpqm[J&\��������������������f��ZGK;\[8$$"'16=677;JIG@:��������������������������ɾUW�mnpmljjliihhggggfc``Z7A8!&=9^6%.@+dPPYs`UYJ+k������ƿaM������_�¿����NHfe $+#$($%/3�83|Na`_ZHHJM]^^H$ar|xwy{��fm<%fsm;]��|zxyzwy�@#)):}�mX�%"fikr85�ٓ	7"Qu\5
+9M<_BI`'!#T������������{>0)#! 4mHG<8942dF	)MD?<<::AK!D�����i����e3rZROG47ERSOIOEE:345?OWVay���������|jfx�V@fsxy{|}}~~��~}}vsE"3C_��BIPKFFHH;'*QJ#!>O3P�����������������y�P@<;=@=;920:@BAC�e:>>AJNLMGC;?IL##&���[AefdZ`jH[OWeLc������������������������������������������������������������������������������������������������������������������������������������̾������������˿���������������������������������������������������������������˾������������������������������������������������������������������������������������������������������������������������}m^\��dvrjbZRG>5/-+)%%#"! " !%7Z~utrojdYRLGB>:975433312202.+	*cz{wn`TLD<8WyrY'&""!" !" i�¿��{qa:;288>APVX^`[������������������������������������⮠�������~{yw~yspnomigfa^[VM>%UqWRN[}gq�tBCGC>6$O���~oqdZXTTUhqnmhXb�������������������������e\��ß���}r^a`gkjihcT==��������rpomkhda_^]emprprn[B%Z��������������������j��XJK;>9#3I=#!1>@PD@=:JHI@A��������������������������ɼY\�������������������}zRA5P"&>'%/F5�������M'r�������Ů����Ƶ���������NLjq  %! $""+2�6*<8#.wM__^[JJQW\^aB&ct|wvy|���er=#ern;`��}zvz{yy�>%'+b�|���%#f\pj?D�؍
+6#csD:L@]EAg-  N�����������Q3>:.'##>���������R
+1DEFE>>;/D����je����b4uYQNG:6FQTMCVJGBEJNNTXVez��������������ZCiryz{|}~�~~~�zuA#4Ec�m;GLJHHJH7"&VM"#$ \�����������������v�M:556:643,,6;<<>�`6<BFKNKLOLJHUN#0~��\<ga]GKO4NAFXCd�����������������������������������������������������������������������������������������������̿������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ľ�������������������������n\ ]��`trkaXSH>61.,(&%$! ! !"7U|wtsohdZRLGB?;986432213201-*	'\|{xocWLE=8HFK]+&#$"!!#"! f�����|sl^F?RSZ^\YY\aZ����������������������¾������������ᱣ�������}{ywyytmonlheea]YTLB$YsTQNb�����SAF@>7'J���VKDD==<>>UejkjY\�������������������������dX��ŧ���zqabahkjijcRA5��������srojlheda_^empppqoXF)Z��������������������h��YJL@n[2t��>"3;6C99?9JJF?C��������������������������ɺV^����������������~}xxOK;	R%%"N /JrxqaOG2%s������ƽ����������������HIas!+42582,1+#�8,C<&-z M\^^\WVMMZ^_E$fw{wpsz���gt=!guo<a��{{w{}xw@"'*NH6jhD"T.IQAD�ߓ
+58;O6F/``OU$ N�o�������X/ 4MA4*'$;vHNLVXSvF
+%:;;>98522M����hc����]6tUSPI=6DQRN@OJHFFIJOVXXi}�˿��sx��zaZv�XGis{xz|~~~�~��}}~�~wvB%5Ge�oAGOLGJKJ1$0_D 6m]<%^�����������������w�4$! !%(()7�Y,4@<>D@>DCBDJ@2~��]>gecKNX;bOG^Cg��������������������������������������������������������������������������̽������������Ͽ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿ�����������������������������������������p]Z��`uqh`XSI?61/,(&%%" !  7\{uttpieZQMGA>=:97432322/..-(&\zzyncXLB=8Rxvd'$##"!!!"!a�¿��}tl7<WUEV\]YZ]aV������������������������������������Ქ�������~}zxy}�ytnjgfda^ZUJC#\�UIM`�����RCHFA:#I���ppo�{meiehkkjjW\�������������������������bS��¬���zmca`gjiiiaO@5��������usnkjgec`_^copnprqVK)X��������������������l��TFI=sMO�WzL!/3+91279JHF@A��������������������������ƺR^����������������{xzMF:
+Q )/%G2cgpqvytc9)p������ź�������TF�������BIYj&=XSX\XSQ?!%�:-A?'.yP^`^XLSYV\Z]D%bvxxtvz���it>#ftp7V��}zu{~wv�B%)* V42D/8�ڔ
+5Pci&8H4M5* Mx?El��c@6\i��S.)+/ %.6�HU@*OKLLJGCHCI����Ea����V:qUSRI:7DTRM<@LLF>:5@UXTg|��������������VEjsz{|}}}~�������}ys;#3Fa�h?HMKGIKL2$(<S\c8d�����������������y�&1�S    ##"-|��]Be\F-'- /--E7n���������������������������������������������������������������������������������������������������������������������������������������ȿ�������������������������������������������������������������������������������������������������������������������������������������������̾������������Ȼ���������������ǿ�������������������������r`[��`wriaYSJ>72,+*&$""! 5W�tstpid]SOFA?<9:7322222//--))Xy|znaUKC=8DQL])$""!  !" ]��ä�ujE_a_VY_ZVZ]aZ������������������������������������䱢�������}|x����pjgfdc^\VNC#]�QAOUOe���RGKEC;"H����tp��~topqpoljZ]�������������������������i\�������{kd_^fhkihcQ>7��������}romkihfa^^blpppqvTG,V��������������������k��XFI6e�O$rH&'#,'(-5IGE??��������������������������ƺR`������������������~{wzHJ5!R"()"")/"G6���~}��I,u�������þ��ȱ}ZYS�������>F[l &Aa[bc[X^=�8-;;)0{M``_YKDJV]^[C#cyyyuv{���lv;%hwm8O��}wz~xu�A$)*$&')%!&ihr�?8�ۓ	,6Hl6	>K R��r;n�6a������`Ws�4/c{h%e?
+0?IMMH8/%J����������N>oWTQH=7FTTM:>KF5000DUXXp~�����~~�������WHjwyz{|z}|�~��~~zr:!4Hb�i<JNLJJHJ-)(13&5Y[g<!l�������8153.�����|�#1�N.}��U?]?0%  !!"R7q����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������p] Z��avpkbYQI>71-*)(%#"    !3U�uuvrhd_UMFB>;:864331220//.(+\||zqdULC=;TstX#""!! !!!!!T��Ƥ�~todea^YZ^\UW^a^������������������������������������ⲡ������~|zy��ku~rkhedc_\VME!g�LBM\�����OLI>E>%H���cSKW{|UVinnnnjY[�������������������������iX�������{nc`]eejjhbS?3��������zpnlkjhgc__blooqpvWF*W��������������������j��TEJ42@9!%/ %4GGFA=�տ���������ž������������÷S^�����������������|xyMT4&
+R %FWMJB4#&,!A.*)+/045/(v�����������rJPZX^�������=G]}A #4=1NY94E4�:+>@/0vO_]]WNFBPZZYF#gxywmny���js8&gun:N���~z|�wwE$)(Y����x"'qb*=�ݖ	
+)	>H bȾ�}h��N�����������C)`�A
+,TKD7*!!..I���0AY����O?oWQOG;5DRTL<BHD;<FNTX]\k���������������YLkwzy{|||~~}~~wp7#4Bb�b=IMKIJJN)0+.[A Mkb3j�������L���F�����~�##$7�I!0%"! ##$$ '$ "%$"5��|*&$"!;.j���������������������������������������������������������������������������������������������������������������������������������������������������������Ͻ�������������������������������������������������������������������������������������������������������������������������������������������������������Ǿ��������������������������o_W��`woi`WOF<721,*)&#"  ""7T�vuvuna[RKFC>;8875343330//.*'Wy}|qeXMC>;GRcI&&%" !!!!!"Q��ƥ�~wrjjf]XZ\[WX^b_������������������������������������崤�������~|{z�|twsljihea]VMB"Q�MGYk�����XNJGG=%G���m}~nroR`jjnoojXX������������������¿�����hX�������}mc``ffkkhbT>3��������wnmlkkhfda^aooqpspYJ)W��������������������j��XHL<Nhcb`c5$0FFFAD�۾�������������������������N^������������������|y{MS6&R"&@QR\\N/&/(I $%&v�������ßmOWZ[a���������9E`s$ $>`V[fYPI/�7%:=(2x L^^]WENU[^]^Egx|zpo|��lt;)ktk9P���}y{~uv�G%*)Hu��xX$9uU/%A�ڐ
++YW:*
+<D#"! iƻ�����������������@>X['FA6/*++-3P���qcj����MAwWSOG::JRUN=BJJIMSTTZ][]x���������qSPk�YQmwzx{z{|���~~~�}xp5"4@i�b=LQMIIJM)0*4Q<!>wkD(n�������Q~��J�������!:C-8)1(/133.%6�?0W8H4G>>=BA=)L<87;7@><H>54��tn���z��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ⱦ��������������������������p_W��^wqjaYPG>73//-+($##! 7Q~vvwtle\TMGB=<9974331221000*
+%Ty|zqeYNC<6EhmE*&#"" !!"!V��Ǧ�|vrkhc_YZ\[WW]a^����������������������¾������������ᴢ�������~|zx�����okiifb`[ULC)M�NKW]�����WMHFB7#H�������lddg\dopnk\W������������������¿�����gS��������td`aghiiibT@5��������umlljiedd`^blnoqrpZM*Y��������������������k��^IM>l���~}G!'/JGEB<�׼�������������������������Na�������������������}yvKU9!
+<*HMFC73$#"%G" !2?('w������ƾWFSXi�����������6Gdw (Iyh`lf_M*�1 )(0wM^^_YUYZ\`_^F"gy|{vv{��}mp:&guj=O���~vyuw�D%),A��X"y�ny@;�ܖ	
+)Zv��I	BIEhG)mɺ�����������������<8G8#0SRJKEDFLBU����Lm����J?vVTPF52FSTL6BMKJOE7CZ[\_u��������������VOmvxx{}{}{}~����~yp0"2Eg�g<KNKJIJG%*"1:;"%4UJ:!s�������vHC<t�������$>G6M;LG?8>9:1<�<:cFZJTLCGD=E.DB@IFQH<@C;:7��mo���n���������������������������������������������ɾ��������������������������������������������������������������������������������������̿��������������������������������������������������������������������������������������������������������������������������������������������������Ͽ����������������������������������������������pZ!S��]ytk`XSG>8401.,*&%!#!5M~wxwtmh^UOHB><99555312130/-)	$Tz|yrgYMB;8DmZ]2%""""""" U��̩�}wrkhb^[X]\WW\aY}�����������������������������������൤�������}yyx���lmmjgeb`[UMB&Q�VSY`�����TCB?:2$L���hTJGD889DmspnlZQ�������������������������fS��������sb`ajiihhbP@3��������wmklkifdc`\`loprrq\K([��������������������n��\KJ<fQ7x,S>! %3IHF@@�׼�����������������������·M_�������������������|zyKO3!
+E1TIHNJA,$&#G#SZ#.)O'%|������Żv]ge������������=Ic�&;J>;?;::,�9$04+2vL_`^QEIGK]^YD$gx{zyx|��~gs;&htn>P��~|y~~wvE#(++��j�2!%5*>�ڔ	
+*-, CG.b_aVoǻ�����������������8I����ymjl�3
+7C0+)(),,' W����s�����FAuXSOC56HVRM:EPRMC3,D^^_a{�ο��wv��rrr��SQmvyz|}|}~~���~��~xq."0?j�`?LQMJIIF'.%0Q@ H4pdE$u���������b~��������9E99=B>87@7+/A�;(491;6459;83(%+*-11034:/(?��g13-*-,.*3)& q���e���������������������������������������������������������������������������������������������������������������������������������������������������˾������������������������������������������������������������������������������������������������������������������������������������ҿ�������п��������������������������������������pY#S��ZttmbZTI?7501/-)''#"7Q{wwtupg]UMGB?;8765432011/.-(
+"SyyxpdWIB<;Tyc�=%"!"!"!!""M��˩�~xrlga]X\_]WV]b`~�����������������������������������ඤ�������~|yx|���zqljgbc_\XPF%R�JQSb�����KDA=:7'C��������tsqxuonjYP�������������������������jW��������tc__ghjgfcSD4��������unlikhigddaajnoppr\N*Z��������������������l��YJIAiH4n+YC1  #4LGHA>�Լ�������������������������Ja�|�����������������}yzIL0$F2F*""$!)3 < HVOj[.G%(}�����������Ž�����������?Hf�!$@]SFD5,-,�;%394!5wM`_^SKXXV[^[D#bu{yyz}���cu8$grk>N��~}y||wu�I#*'`�@&{y $'I�ۓ	
+/
+c?&"@C*XP][#tǺ�����������������3W���������2	$FHDGFHDGHV���pVZ����FDrRQOH;<KUTN7FOM7&)>S__`d��Ͼ�����������PPmwzz{}}}~}��~~��}wi,"0;p�VBLOLKHII$0$3cG$5^W5%{��������P;W������� " &$!F�8!! @��d5=>?A@?AD78!p���\��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ξ�������Լ��������������������ǿ�������������������������p^T��`xukaYRI@741/.+*'(% ! :Q}xxxtof]VNGC@<:8643321210..+	"PvxwodXKB;588#9*#"#"!!#!""F��Χ�|upkhd]Z\^^WS\c\z�����������������������������������两������}{{x��}z�uljgce_[UPG!C�QPY`�WBCHJFDA?+B�����������wuwpom^R�������������������������kV��à����tdabfikihcUC1���������|yurnpmllhhlnmprmYN(^��������������������i��]IK;1)&'''*",JHHA@�ս��������������������½���H^�������������������}{}MG0'C2K@@DA8(*7:2H(i`k&'~������ƿ����������������=Id�-OUSb]M:/�3%9><#5xJ]_^YT[^^]^]C#cx||zy|���iq9&guo;W��}{y|{xuQ$)(!sb>�$ #(F�ؖ
+4_`1	C>GlgT8sȻ�����������������2GKa/+94#&558* ]���lKi����DHsQSQC68HWSK;FOM?=FKS]abk��;��u���eQSx�RPovz|||~~~�~��~���xd)"/Bz�WCOSMIFKN). 0U:&Ii0%}�������Qd�`H����}�~I�:11.,*#!"$'*(=��c)1-.63/07.+${���S�����������������������������������������������������������������������������������������������������������������������������������������������������ͽ�������������������������������������������������������������������������������������������о�������������������������������������ж��������������������ľ�������������������������oZ"N��_xskbYRI@940./,,(+($"""!!7O}yvyuog\TMFB?<;854312132///+
+#Ox{yqeYNC;52/,)&##"""!"  H��ʩ�~xrkif]WX][UV\c]u�����������������������������������䷡������|{{{�~os�vmigbc_[XOD&B�QOWb����sXTDCA;%<���qTOHF<:4:lwsqn]R�������������������������hS��è���|rkgggiihhcTC2����������|xvuuqnlklkpoqsnVQ+`��������������������e��`FF7'#$"" $ ".HEFB?�Ծ�������������������������K^�������������������||{LD/&A4N98<95'*:5[1.e7H%)�������Ǿ[GMOHFNGO�������<He�%#1JOM`bXI- �5 )+&.xM_a^XOQUW]]^E!cx{}yz|��hw8#cun;W��}|zz{yuvI#*+<)%&+P�ۓ	
+/hu`:#
+D::="!mû�����������������/$ %
++HI35IHG8!c����������>NxVSPH>8FTRJ@GRY]YXYZacbe��˾�����������PTnuz{|z}����}�~|f)"/Bx�QCQPJLIKR'/"C_@:Oa+#��������=���-\����uJ�4"OH@L@7*7'D;:E)H��]#z���V�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������οѾ���������������������������������������������r`#P��dvrkbYRIA83///,,*.,&# !"!5N�zvxtoh\TPID@<;7534341220//* Ns}xpeYMC;42.)'%$"!!"!"    B��̬�yqlje^YZ[[YX]c\u�����������������������������������䷣�������~}z{��rw�tljhdd_ZVOE&@�]QUc�����{lHGC:)>����|~{yqaV[ovsrm]Y�������������������������kT�������yskgddgjjgbRD/����������}yvusqpllkjkqqptWH.^��������������������h��[JG8/3',1-$!"0HFFB?��½������������������������L\������������������}{|MI/ $@0G;41*&(5#8LM0]MD)*���������bQXRW\eXT�������<Mg{!'CZX[]SK;'"�3%1/"2}N\a`SJMEH]^XE"bv}|zy}~��cq3$etn;N��}|yz~xtwW)-*K=%*E�ٓ
+
+0BG[eL@;<TTVR uƼ���J;:@CCFCHHM���2=;=	6B.7AK906K f����������<OnSQPC07KTTL:JTZXT]b`cbcg��˾�����������JWmuz}|}~~����~�xc)!1H�MEQULKKMP$5$ G]C8dk)(��������A���1e���~�s&/9345502.R�7<15:B<)+#4@4E(L��X!""*~���T��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̿����������������������������������������������������������п��������������������������o]#J��`wrj_XVM@8410.,**/,$"  ! 8M�yxvtnh_VNJD?=<85442310000.*
+!LpzxpdWMD;53.(''&#"#!"!!!!<��ͳ�}voifc\VY][WXZaau�����������������������������������溡�������}xx{xstvjmgffc]ZVNG)>�YOYg�����wsOF?=+?���������VIcmvuqlaT�������������������������jL������trmifediijeQD1���������~zwusqpnljkmnoqpZS.Z��������������������e��]CE>d`+Kx{3% !"-GBCB>�������������Ľ�������������Ia������������������}y{IN0"#$	<%BYQWUH.$% 3!D>4GV%$�������ȹtxytxvvsg�������:Ici)DSK=;/*)(!�5,@?-1zM\]\VLXTO[]\D$bz|}zx|~��gl.(dwp?L��}z|�vv~Z&--e�g+$(D�ۖ
+1amQ?<Fk__`!  yǻ���}pxx;4qqm`6���,T��{yski�#
+(EED;'4II%f���mFy����<SqRRND7<PXSL?LUQ=-1G]eefn��ͼ��{r����}{�FZkv{z{~}~~~��~�|w_'!3E��MCQUMKNPK#1!5B6SkY%*��������Sc�gS����w�m,6=659;112 W�/! !!$O��M'05451/1/'(����N�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ҿ��������������������������������������������������������������������������������������������������������������������������ú�������������������������sc"M��^upj`[VNB:533/-*)-,'#""## 7N�|zxuoi_VNID?<;86623201111.)	 Kw||seXNB<73-*(''#!" !!""":��ζ�}vmeeb^YY\]XW[dbn�����������������������������������庤�������~{yz�����tniffc_[WPF&?�SM_i���{`QPLGCD.<���������yflrxxrncO�������������������������nR��­��zxvmhefhhhicTC1���������~{xusqonnmlnnnroXP)X��������������������k��\DJ?pI:uJw[$ !+ECE?D���������¿�þ��������������J]������������������~}||FV6'*&	B$CPGKF7%)%!3"72!)#+�������ȹ�������ӹ��¿���4Kdj$$EPGENC5+( �5/D>'/wO`_^\QIJR[^]>"ey}}yx{|�gh-'evnAL��y{vvyV$/0%z��S$%(K�ݔ	1esKFCG^! wɻ��������7w��tH���,e���������$
+$(%&:H%m���be`����7YsRTN=48HSSG>KRA1028Wbcfk��ξ������~K|��A[kvx}|}~���~��}u])#4G��ODRSONQOH/GV>\xM!,���������J3P�����u�j"-.)'(+('%Z�.),,--,'%"$$'$*%'%""!S��O&15;98@<62,����I�������������ǿ���������������������������������������Ƚ��������������������������������������������������������������������������������������������������������������������������������Ҿ������������̾���������������������������������������������������������������������������������ڿ�����������������Ⱦ�ƺ�������������������������q^"Q��Xrpi`ZTLA9423.*'(-.*&&#&%3Dzxutoi^VNHD@=:77742310010/)
+Gu|xofZOD<62.+)'$#  ! !""!=��ε�~vrhda\WY\\WX[c`k�����������������������������������丣�������}{wz�����plggeb^[SOC*?�TVc[��`q_<>IJC=*;�������yojjs|ztqfQ�������������������������mS��Ʃ��{vrlhedfhffbSC0����������}zwutrnmkkmnnpouYO0Z��������������������k��XDG8f}w`$}V !.FCC>I���������¼������������ľ�µM_�����������������}}z}GP2##!
+C"0+%$!+%#6:RiS1$&&&�������Ƶ������ȴ~�������2Hfg&FbW_ddZ='�50?7%/wM_]^]ZPSY_\[A ]v{|yyx|jj+&esn@R��}w{}vvzR'/*j�h�p!%*G�ڙ	2,/5B:/:#%!"!�ʺ���������bRHP|���%KI!!(c-))u"
+(A=>>:;;CO o���Z?f����0TlTQO<42@OQI=KSSY]_S[fggj��м���~�������A[nux{|}}�������{u['%6E��JFPSNQQJE *29'GWF!.��������wg\l�����v�c_�.IEL@?BFDAE'15:3>?HB>=<!V��I! /����B���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ο��������������������������Ŀ������������������������˼�������������������������q]#I��VtslbYTLA9411/,&$'+,&$%$% 4Q�xvsuskaWPKE@>;9755410.10/-)
+Ht|xpg[NC<62.*)&#" !!! 9ðɲ�{sngc`\VY^]WV\dbj�����������������������������������伡�������|yyyvq|ypnlhhdc`ZUOC+D�_OPQ�����XCHD=7*?���kLHH@@:9@q}zvsgO�������������������������nM��ͣ��|toljfdeffhdSC4��������}{wrqqnljhijlllkiWO0Z��������������������g��_HG61i^/$M-"0BCB>C�����������������������¾�ĺP]�����������������|{{JI. #	5);;<>@@,(&(;1=/dXL'$#���������gUrxzqbN[�������4E_s$6CFYYKYM7!�1/A<(1v N^_\UQSPV\^aF#[pz{zyz}��hl.%irk=S��w{~yw}Q',)h~g�l&(H�ږ1
+OD@
+A8Ceei]�˺�����������������' "-nt
+%;99679<BP#q����������.[lSRPA46BOSH=NVUW`XSZgegk��ϻ�������VY��IXkuxx{||}~~�������{vY%$6K��GEPURQNJF"-ZsW"4�������*HF=;����q�`#/,21'c�-?>D76=D:>@)5883<7E=;;>!W��@" .����B������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˿���������������������������������������������������������������������̿�����������������ƾ�ɾ������������������������q\!K��Vtsmd[TMB:6330+&$&+)#"$"" 4M{yxsttkaWPIEB>:76632102010.*
+Fs}{tfZMD<73.((%"   !!!!! 4��ȱ�{smeb\ZVXZ[XXYb^g�����������������������������������༣������|z{x�����tjiecc^ZVMA)E�aISc�����kCEA>;':����������~fv|yvqhO�������������������������oG����wqnkhgda`a_aWNB.y�xuvurpplmkhhd]^_\[]]XYVXWN-Y��������������������g��\CJ9%$ #&J6",EFDB>����������������������������N[������������������|y}GQ2 $56PAA@FP>+($;A7 =[Y%"��������ɚRKSRTR]��������0E\y!#9RHVQJUM-!�20BD01vQa`^PMLKN[][?$]qzzyxz}��ho-$iuq:P���~x}�~xzQ',)1w��V+$'J�ڐ/
+cmZ+B7J\I[["!�ʺ���1-10!\N5N�����$-|����u[m�
++G%q���e9s����.izVQO;1<GTSG@MSKBFEG]kegg��ξ�����������=akswy|}|}~~�������ytY%%4H��BFSWVPHKJ"0OZS!4��������\���O����s�U-F4>C9d�(!"!!%#"$! !"%$$%Y��<*435,0+20+!)3����:�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ҽ��������������������������о����������������������Ľ�������������������������pd!I��]wtng[TOB95352+%%(*(""$!!!5GxzyvuriaXPIGB><96632022020-*
+Fs~|sgZMD=94/)&&#"! !!"!"  /��ʹ�zsmec`XSWY[YVZa`f�����������������������������������ἢ�������}yyx{�~}zqjihfc]YUPB,H�\IQ`�����eCFB@:):������~}xvp_mzzvriO������������������ÿ�����pF��zsvqljheaa]ZZUVPMB-v�ytuttrpnkjifedcb_`_][ZY[YO+V��������������������f��aIJ6)%$%)oc"2HGEA?��¾����������������������üR^������������������~{y~ID0"$9-JJKLKK4(#$41el"!6g((���������͡r_^_t���������/Fam$GkXU\\TE-!�00B@*2y N_^_ZGOTQ]^^@#`s{xtx{~��kp,(fvq=M���{��}yP'.)o�b.#)K�֑'[dGK8G`-OW"'�ʹ���@���h{�yI����� t���j?���l
+.HFFBC>CMK't����������*fvPSQ7,8JRQHAMR7$&+5[khhg��ѽ�����������:`msx|~~}~�~������{uX&&7M��KFUYTNJLG9DgX 8��������G��\j����x�W!1*++'h�($()(!"&'%!# !#]��;,<64436>>@'.4����8��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʿ��������������������������������������������qc$G��Wstme\VQD;5121*%%(+($$%"! 5M{xyzwujcXNID@<;97641211/1/.)Fr|zrg[PE=83/)'&$##!!!##"!.��Ƴ�xqjdb_WTWXXVWZ`ad�����������������������������������㽢������~}{zuvrnpnnljhdc_ZWOF)F�[HQY���XWBFEF?8*;���rSLILC>:Bq}zxrjN������������������¾�����qB���{xqnjgfa`\YZVSRO@*u��}zwturokihgedeb__]]\YYWVN+W��������������������d��_IK<E]OKPnK.EFFCC�������������ɽ�����������ǹMb�������������~�����}zzGF)!9+AFKLJE0*&(7.YL."*'*�������ƹ��ų���Ȯ�������2Ffo 4O@38@238�3/BA-0wL\]_]X\\[][\@%auzxwy}���ot/)dul=R��}~{~�{x�N'/'OA#(J�Ґ*]hIQ8.`jl="&�ĺ���S=83-.(1i�����Z@%/L6
+9E**<H,*7A*y����������*]lSQP=4>OTTI>RYKQWTObjjin��Ѽ������l;R��6`lsw{}}}~�������|uQ"&6N��DGW\QLLMB=aiN9��������lH=Y�����r�Qp�&8AICB>FG?06>9;<<B>2\��7 "# ""$%&'!;����4��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʿ�������������������������~sa%D��Tuung_YQD:2133+%&+.)%%$#$!3O�yxzxtlbWNJFA>;86541211120/*
+Cp{zrg\NE?830+(%$""!""#""!$��Ƹ�wpjda^YSTXZWVYac`����������������������¾��������������������|zxvussqomjhgec`\WPH(B�WMRY����xFFEB:9.7����������~|~yupgM�������������������������rC����yojfed`^ZXWVVRN?+v����yuqonigfec`_^^\[[YYWWTJ,P��������������������g��^JL;Sxssps@ !/HHGAF�ڿ�����������������������˻Mc�������������������}yxHJ,"'=6G?JLAI7-*2@:5#'&'�������ƳPRZY]_]]]�������/Icu"2!"#""0.�8/@>)0uN^]]____^]_\A&by{{{y{���mr1)ewp:L��~~y{|x�K$.($(N�ؑ,VsV
+Q6,5. "$�Ļ����������������� 3
+(/#+3*y����|�����%eiURO:3:OTUG>PVGSiogmnkjj��̻������umm��8_ktxz}~~~}����yP!(7R��FO[[MIMNA!D(;9?�����������������q�Ly�",7FBNF>@A6-F@DDFFE3f��:# ;����2�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ͼ�����������������������������������������������t`#@��Yuwpg`YPC94453+'&)+($%%%&"3H�zxzwrlbZQJGC>;965323211/1/)
+As|{sh[OE<81-,*&%"!!!!"##!*��Ǹ�vphe`][RUXZWSYac]�����������������������������������ྠ������{yxyz~{zwpiggeb^ZWPF(D�[TRUu����gCDC><14�����y���r{~}|wrkR�������������������������r=���vlida_\[ZTRRNKJI=.w�xwsnmiiiebba`]\\[YYWQTTUSG/T��������������������h��]KJ6%""#"K?"0KIGBF��������������������������ǹJe�}�����������������}zxJH*#)I0<(-4&5617,9 +du2"$'&)�������ƹYZXX\]XZW�������+EZy"$,""$# 3,!�4+;:+2wM^`_^^__]_]\A![u{{|{|��nt3(gwp<J���}x{~zy}O)/* $(K�ُ	'X5A`[TN �»���80142461T�����!QE)/)'"%.f"6568<B=>C'z���JC�����"daURNC87NSTI@O[MDLQUemmjc��̼�����������7]jtzz}|}~�}~�����~wO &8Q��DQ\TKLMMA#9?�����������������p�G{�#")'$&').)"!%$%"!f��@471,$+,*+03.&B����0������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rd"B��Xstme_XQC:5342)%$'*($"%"#"3J�{zyvrjaYRMGA<;9754431100/,&>s{{rh[ND;71-*(%$#"!""!#"!%��˸�{nid`[WQUXZXVYac\������������������������������������á������~|{xy�����ukhgea_[WQI*H�_WRP}����fCGC?;,5���w[W]q�YHow~|ytjY�������������������������tB�|oslmjdb][XRSTRPNJ@2o�zwrqrnljgfdccba^^ZZYWVUVSM+R��������������������g��_GF:&$$"%;5#"2LIHAG��¿����������������������ƶNc�������������������}{vIP+%@,D:18;>22=&2!"P1$$&%(�������űt}|zn{wyn�������0D^�! -946863<+�5/>;(0x"Ma`__^____^aA'_s{|}||~��rx9(gvn=O���~{|�{x�U*1+!'.O�ڑ+!_G/*N0<b`aZ#!�ź����������I`�����edSXSU_o��!"17BCEICEF-�����[�����(deTRPB7;MTRDCSU7((%5anloq��̻������y����=Ykty{||~~}���}wO (<V��GQYSMPNL=,<D�����������������s�G)'$#   "! !!!��"f��G@>7.'46?<@?<.?����.����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ͽ�������������������������������������������������������������������������������������������̾������������������������¾�������������������������q_#=��Suwob^YPC:645/(&%$('! " 2I~}{zvrleYRMGA=;98643312100,&=o{yshZQG=61-*&%##$!!##$""��ɹ�xnic^ZTNSXZXVXac`������������������������������������Ģ������}|zwxvx}�pnjjfc`a]YPF)D�^WOT���vQ?DFB=;05��������|uzwr~~zwmQ�������������������������tI������yrqmffe``_c[QD3w������zyxsommmkihfccdffefYN,R��������������������h��ZCE=?JFB?=-""/GFFBG��������������������������ƵMa������������������|{wEW/=1F>AGBF201%4![K %'%'�������Ǳ���ȟ��̰�������+C_�$BbTXbYXS5 �02D>$2v"N``b]___ab_]:"\ux|{z{~��wx9)nvm;N���x{�{x�\).*#,/R�ڐ.$u���9R0B`^ZU ($�Ĺ�������V;=+\�����6&%6Wt����7VNVK>832)4�����|�����%fgQQM@6;NVQCEUWPVVSShnloo��ͻ������llh��6^muyz{{~~~~��|xN!(=X��IRSRPPNP@7IJ�������|OJQPx����q�^D?;8746996445//�~*+,/,%$&&%%%'# !!"l��3%%!"%%)-00+F����.���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ý������������������������p^%?��Pvvnd`ZRC8644/)&$#%$ 0Ht|||zunfZTNFA<<97543210000-(AkzxshYRI>82.+'&$##!!"##!"��ʺ�|pg`^\VOSWZXV[ac_������������������������������������ä������}{xxz���|rmkfc`_]XPG)E�WJJZ���~zUEFB>=46����ormhWKVTh��|ysW�������������������������wM������xsrmigfiimiVE2w�Ź����~|zxvttqmmkhhlonqv]R+R��������������������h��]BE>`���y}K"-GEEBF��ý����������������������ɷKa�������������������|z{FU.?-LIC:=E/+"
+"25IVE&&&$�������ʭz�ȣn���������$?c�'JmQQ_UQL1"�/-@8"0u"Saf�k]`b`_`]:$dwv{zzz}��uw6$ism<K��}}z}�zw|X'1+ &-Q�ܒ0:$\6LJJc:&&�ĺ�����������������=������kC %4440)(,3-0����hbx����*gdQQL<67ITSEBV\[cfi\fnmpn��˺�����������7bksz{{|}~~~�����|sK"):Z��NQRPOMLN=0FO��������eF]F]����w�aJBA?;:>@?>>@@<;��<5=T=4257676?5$ %'(#!###'(''&"&l��)M����,��������������������ȿ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƾ��������������������������pa'>��Ruumgb[QB75341)&$"##!/G{~{{{wmbXQKDA=:9753331/00/-)
+?k{ysj^RH>94/*('$""! !#"! $��ǹ�zqf^^[VRTVZYVZ_c]����������������������¿������������£�������}{yy�����vmigdb_]XRJ'B�UJQ^�����rEHC@=58����ff^X[ZW^|��xpS�������������������������sB��é��zsplgghijifTE3w��������yyyxvspnmjjiknrrn[T0Q�������¿�����������a��cDD6,2Wy6aQ .EDDBI��¾����������������������ʸKf�������������������}{yDU.R/ED4;EB**&3$K2T%(&(�������ȮNl������c�������%<b�(DZ1(5,-7-#�1-?9"/t!Oa`������ybY>%`yz||{z~��l{6$evo;N���|y{�uv{L%.+-3",*  &+T�ړ3idJC]23&-%! �ù����������{~�����:g��f*
+3�����������'gaSPMIGGNTTDGV[[b_]Ripnoo��ʸ�����������8_nty|{{}}~~|�����|pF!*9Y��OLRRNLMQ8)@R��������qF�`j����u�YCBA?=<>@?=>>>>;�}B4L[;799<:;<B;("&('%#$&(+*++*'-m��2'$#&"*..++)+!U����'�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̿�����������������������������������������������������������������������������������������������qb'?��Tttmh`XSC75222*&%##"   3C}z{}yod[SLHA=;985332/0110-)
+
+:n{{vk]RH@:71+)'$#""!"#!  #���Õzqf_][UPRUYYVVac]������������������������������������ţ�������}{xuyxokklkhgbbb^ZRG)=�YHMZ~����q@MEA=5=���������������wnP�������������������þ����vE�������ytqmjghjkigRE0z��������}zwurqpljlllnrsn]S.J��������������������\��`EF91S~�lw>/DDCAH��������������������������ȸLk�������������������|{wDl0	M9L>?C?B30$	(4 #H<*%'$(��������¡����ě���������'>bz"%DU-#-##-*$�0%1,1w!M_`ZV_fvbuoZ@$^y|||{z}��o};%ftj<M��~~x{�tw{N'1)"o�d��L%)[�ؗ2_��>]-NmjiQ#'�»���/>4621/%C����M."3LPL:"4�����������)h`SQRRQNQTRDIVR:7;=Gkpnni��͸�����������5cqtyz{|}�~~��~|tD*;]��GJSUOKLP65>S�������|DE5<�����s�^CDA?==ABA>>>><:�t>9WS878:><;=D@&!%&%$$()++*++)(/u��H4=@Y\`baeiggDU����2���������������������������������������������������������������������������������������������������������������������������������������������������˾����������������������������������������������������������������������������������������������������������������������������������������������������������»������������������������pb)<��Poqmh`YSC:61//*%$!!  #".B|~y{{xqd\TKFB>;984433010//.)
+
+7my{vj\RG@;50+*'##!!!    !�����yme_]ZVOPUYYWU`e[������������������������������������Ơ�������~|zx}����tmhfdbc_ZQJ'0�QFOb�����jHHB@>5=���������������}wnT�������������������½����tH�������zupmhfjmjjeTE1x��������|zusponlkklnoopp]O3I��������������������Q��aDH:R}Q;QG  -DDDAJ���������¾���������������ɻKk�������������������}{zFi+X3RLC26?,/!
+ -MD8"$&&'��������������vn�������'<ai#<SB;;/+*"&�1$,'7w"M_a^n���|^_\A!`||||zz|��r{<%huk<E��}x}�uw}L%1)!~jqv�p%)X�ܖ'PdM_+7?DB5 "$�ú�������������������������
+2����JL�����(udRSMD;DMOQDGUT7/,.Emqpql��˹������sxr��:bpty|z{}~}~��~yvD!)>_��AISSMLOU4@<]����������q������u�ZDB?>=>AB@>>>??=�k4;K;678;@A>>G?%"&%$#&+-+'(,-*&.u��C8BBdjkkorrrzO`����G���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¼������������������������rb)5��Sqtpja[QC:60//)&%""""!$!/C}{|{wsh\TLGC><974234311.0.*
+5mz{uk\NE@94/+(&$##" !!!"!���ŗ}oga^]WRRWYXWV_cY������������������������������������Ǣ�������|{xy�����ykjhdb`]VRJ+/�]LQZ��p��THC@?=7=���������������}voU�������������������������wE��ñ���|uqohfjljifTF2t��������|wvtonmihhlonoppoYQ4M��������������������J��gBF=F>! !/KDDBI���������½���������������ɼLm������������������}||E[( 	Q.TSIDA4 ,!
+!+1eW[%%&&'�������ƽ������жt�������'=av! 6STR_RJ4#%�,.?9#7u!Q_`^x�csi`^]@"i}|{}{|}��q{?'lum9J����|}�uu|L%2*}Rmeeu&'U�ܔ!?\Cb,$DP?! #'�ƺ�����������������"����������
+
+4����^Y����|,maQTM@8BKPQEHYVEEELTqqqsq��ʻ��������y��9douw}||}}~~~~~���|v? *Cf��9GSRKKNS3I:b��������YADe�����u�XGC@@>???====>?C�v?>JD;9=@@ABBI9&&'&$$(..)%)-.*'1v��:4@>gnnnpooprNi����N����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ͼ������������̿�ſ�������������������������re'7��Soupib[QE:521.)((&(*(""/?|}{|zvsi[SLGB?<964322200./,)
+
+1l{{uj]SG?:5/*(%##$#! ""! #���Ø}ne_^\WSTWZYVW^cX������������������������������������Ƣ������|yxv|}y{xrkifbaa]USJ-2�^TTQx����ZBCA>>7:���������������~xnS�������������������¾����vG��Ư���xloiiklkihUF2n�Ů����xjkigceeca\^jnoppmYQ.H��������������������O��_?G>IY[`TI3 -IECBH���������½��Ķ�����������ȻNi������������������~}}Cg+&?*DON_\J),%#/KK4 #''%�������ǫ|���������������'>`t!.ENIjaeH)&�,/?9#5s Q_`^t���_a_];"d|~ww|{}��px?'hrm7Q����~~�xsvK&2'�Nkbiu')Z�ې##TG<
+`'HbSaD#'�ú���2256714 b�����> $O	5����S`����u+n^UTPKNLRSPHM[Z/(2*Mprrtr��ʼ����������Adotv{zx|}}}}~~��zo< .Cf�u7ISQJINS2J6f�������kR��F�����w�OCCB@<>A?=<=<=:?��J@IR?;>B@>?A=2#%)(%&(++)'*---,5w��84;:gmnnog]ZW>f����J������������������������������������������������������������������������������������������������������������������������������������������ȿ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rf)7��Spvric\OD95210+**).-)# 	/>|�||yxujZRLHB=:97632220.//-(
+2j}{uj]SJA;4/*(&%$"!!!!!! �����xpf]\]WRPVZZUW_bU������������������������������������Ǥ������}yxrzvpsrojhfdbb^VQH17�cTTV�����l9A==>7:~��������������}yrS�������������������������}D��������{debjkkjhcVG1j�ð����|nmhigeebaaakopqqn\T5N��������������������T��bBG@l�iyU% -GDBBF��������������������������ȻOm�������������������|~D'c+&	J*BA94-%/')/:$'%'�������ȫNSRSRUPQ[�������&Agw"$>Z_`v`\F/"�-2B;#5b!R^`]t��pa__:#]uslpyz|��qxJ'jtl9M��~�}�ytvL'2&�gzkt'+V�ْ#ej7^&TI*QU $)�������������Fl����{D 
+8����imy���m-s_SRM<7DPTQDL]cTXc\huuttp��˻������j]c�~?cpvx{zz|}~~}~�~~��zm=.Ej�n<LTOJJOY,E1h�������Pv��Qv����y�MFDC?>@BA@>=>><A�o?BSY<>?D?<:@B7&&*)''*--+''*+-.8|��=567bnmk`NFGM9g����H����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Խ���������������������������������������������������������������������������������������������������������ſ�������������������������sd*3��Suvrkf]PE94232/=\WZ[C$	.C{�}{zysj_VNFB=;:7333420//.,(7fz{tk^QGA<50+('&%" ""!!"!���śxmg`^[VQRXZXUW_d^������������������������������������ǣ�������|zxv~����xjhefda^YQG56�g[SNo����O7A>;:3;{��������������}yuY�������������������������|@��������xfcbjkjjgbUF0n�������}nkihfedcaa`jnnppk]M3F��������������������R��c?E<_F6�%RO% 0DAA@C�׾�����������������������ȽMj�����������������~{|E?k-%	O#-,*)*'/$//$&'+�������ȩW^\[^YXX[�������#8]y%&DaTPV>3.) �0/?8!/JO__\v�e�\[_`:&_spoqz|~��rvK#hsk9J��|~y|yu{N%5%!k��]|S&*X�ؒ&!az<f*OaSVJ"$*�¹���?9887543n����v z?(
+
+
+
+;����_`]���i0s^TQN;(2APPDO]fnvqpsvtssq��˻������`3[�zAcrtzyzyz|~~~��~}��zp>-Hj�h;LWOIIMX*D- p�������Qp��J~����w�LECA>>@B?=<===<C�a5;[[BH@@=:8MM<&')()()-,*))+++/:���@557^ojX@=AJL7h����J���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ջ�������������������������������������������������rg*9��QquqjcZOF:6358N��tp��? #"
+-Aw�|{yri_UPGB>;965332320/0-*
+4hz}wl^RHA;6/+)'%##!"! !"!���ȡ{si^^YUQTYYXVY]c\������������������������������������˦������}zxx{����wlhfdba]YSG52�q\KO|���P@GD<;99y��������������~zsW��������������������������F��Ķ����{jcbjkmljdUD-j��������nlkkhged_`bknmopm\T2I��������������������P��iBE@[F6n-DH !.AAA?J��������������������������ɺNj�������������������||FBa"%
+W5KHKLGM.1"
+2-##&'�������Ȯ����������Ŀ����#;[q $@MC:?>9.%#�+->:#6J!P_b\�����^`a;&dwqrxy|��ouL#gtk:G���z}~zx~S'5)!/*!! '+]�Ԓ	.$S3.
+k'1YY?-!$/�ú����������������v$uE^j:%
+
+
+A����WS\���b4o_USO>00:QPDQ[knrsssqrtql��ͺ����������{Cdpvyz{{z}~~��~�}t</Ls�e<KTNIHMR.<)p�������vEnbJ�����z�GCCA??@@>=<<<::C�]48dQ?F=;<:8@B7&')++)(+,)*+,+--5��y?78Eam[EA@AEG6l����K�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ͽ���������������������������������������������������������������������������̿�������Կ���˾�����������������þ�������������������������tg.=��OrtnibYPF:64:QW��S.s�M%$".Ax�}{zxtk^SOEC?;8543324300/,(
+-gy}wl^SIB<5/,*'$"#!! ""!!���Ȣyqh_]YTPSUZZWZ_d[����������������������¿������������ɢ�������|{wvy����rlhfca`]XSH4&�fSKY�����v@HE?>;:x��������������zsV��������������������������;��������zhdcjllkkgTE/k��������nljhfedba``jnmnol^S-I��������������������S��f@E=94+2%+(!-??B@G��ÿ����������������������ɺMm�������������������~|}I1`"$
+Q6L=8<DL3.	1. $$%*�������ƿ������λ��ÿ����"@^|#<VUWbh_<$!�,*==#0E#T`a^x����^__:%dwvwz{��~oqK"erj;M����}}yx~S)9)!@.&(Y�Տ,(��2
+b$!' ",�»���TIGNKJN\�����s -8o;PXG#	B����q�����\9l_TRJ40>KQOCH@7@WbTNZqvst��͹����������rGbpvzz{{||�~��~~zq;2Kl�[?KOMJHNL)A$t��������nBQ������~�NGB@=>@???>==;9E�\3DY@FE>;::9780$$+.,)(*,+))+*,+5��u<47[jdEA?@CDF3p���J������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ti,;��Lrsmic]TI;7<_N/w�N-r}5&#!	-Aw�~{zxsm_TPIE?;9765444300.-)
+/i~}wm`UJB;50+)&$##!   "#"���Ǟxpfa^ZVRRTX\WW\c[������������������������������������ˣ�������}zxx|�uqsljigdc_]YTI2$�iSRS��yuPBFD@@<;x��������������{vQ��������������������������;��������wjeajljlliWF0j��������pnkhgffba^_kmlqrn\X/B��������������������R��bAG@RnrhegD"-?@CBF��������������������������˺No�������������������~|{L$_$! 	I3OQJNF?(/
+21 &'$(�����������������r�������A^�""1>@OegcP.!�-&66 .J$S```^qVje^\^:'dyxy{|~���omK"etm:U���~~�|xS&8(&s�F&()[�ӑ	'0nO)b!!$,�����[=zt[(lw?c����m7 8oZ��ĢA
+C����Zlr���X7r^SSH:=LTSOFN:(BWZ<:Hmvuu��ʺ������ePp�sDdqw{{{|||�~~}~~xm76Oo�Z?JPKIHKN-D)#y�������kXKUc�������LGEB???==<=;<<8H�]=OYHMME?:<=98/##(+)((*++&&')++>��w<31clO<BBABEE2p���}C����������������������������������������������������������������������������������������������������������������������������������������������������������������������ʿ�����������������������������������;���������������������������������������������������������������������������������������������������Ž������������������������ui07��Nrrnid^TI=6T{4.~�Icx@'""
+)Aq�~|{vsl_SMEC?<964433210/.-)
+2b{~wnbUJC>60+(&%##"! "#"���ʢvnd][[WQPQW[VS[d]������������������������������������ͦ�������|xwx{����piigfb`\WRI/&�fONN���mUAAHD@@:9s��������������zuS�������������������������{<��ƴ���xke`lklllfUH0g��������pnlhfedb`]aknmnnk\W0C��������������������R��hCH?iqa�XgU"-BECBE���������¿���������������ʼLm�������������������}{xF(_# #	D/OSOSK9$.	.. %&$)�����������������������D^x #9XVabXI7#�-!'$-F$Q]a^y����e_a6$fzy{||~���qoJ cvq9W���}}�zxO'5*4��n8&'^�Ώ'+�ygH
+l#!$.�����g3Zl�3SZBp����j%}|�����c>	D����cap���P9tYTPF46HSRPCLEMeefFDQuwts��˹�������{j�mJeqwyzz{{}��|~xl17Ns�TBJOKFGJK#D*#�������ER2Q7�������KFDA?>@><<>;<<;M�]AMWKFGORQNRH:1""&%%'(**)%#$(-+B��m42.eiB<AABCIN9y���yD����������������������������������������������������������������������������������������������������������������������������������������������������������������������ǽ�����������������������������������Ϳ���������������������������������������������������������������������������������������������������Ľ������������������������uj56��Pqrojc[PG=9q�)2��]��p<$""
+'Bp�}}{wun`ULGD@<864542210..,)
+1`~ypcVJB>60,(''#""" "$!!���ʥwne][ZWQPSYZVU\d[������������������������������������ˤ�������|ywx����zkkgeca]ZSI1�iIEMx����tKIB>?97p���������������|uS�������������������������~<��Ǹ���~zjebihijkdVF2a��������qnmjghcaa_`koonnj_V1I��������������������S��oCH=eT2w,@G ,DGE@E��������������������������˾Ln�������������������~{}F(_ $$	C,GTP[\L+++*$($&��������������ǧwa��¾���A]�!%D]`_[GD?("�(*41/J'W_aa|�f}�c_^6%i|w|}|~��loO!\qm=T����}z~ywzL'6(xtx�I &*k�Ԏ'%.E_/
+a#8@>?0  #.�»���p|��wn}�����e+���jH
+
+K����f=|���M:pYTOF6:HRSOFK:49ZfBATvwvu��̸�������zk�gLgqwzz{zz|~~��~~}}wj3#6Ks�QDJNKGHKJQ)'��������N�J�L����}��JFCA@@BB@?>>>?;M�Y8IMA7<GKDEJE8/! $$#&(*)'"!#$**G��i00-\RA?A@@EUV9|���sE�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ӿ�������������������������������������������������������������������������������������������������ľ������������������������sj95��Orqoi`XNF>>��B/��:Gt�o !!
+)El�}~|xsn`WMJF@<974332101/--)
+*`}ymcVHC;51,'%&$#"! #"! ���ɡxmc]\YVPQTXZVV\d\����������������������¿������������̢������~{ywvu����wjjfc``][SI6�oDKP[����{?CBB?87r��������������~{tS������������������ÿ�����:��ȹ���~{hecijjijfWJ2b�������onlggfa`a_`mmnolj_T3G���������������¿���U��h@F?]O-j2@F (HGE?>��������������������������̾Ml�������������������{|B)S &$F3NMH?7,.*'$(&*�������ǵ�������ǂ�������$B_�&(ARTZ`ZWL(!�+!7B:!.G$Uabbeb\dc__]6#czty}|~��mlS bwpAP����||~uu{L)5'!xdl�h!&*u�Ս#
+
+
+^!XhdhN!#0�������������������f/m6
+
+N���}Qi����I=lYSNE42CRSPBPHKUb_DFWqwz|��ʷ������oo��dPfrwyzzz{||}��~}}yf.!7Hu�LAKPIHIJH!O%$��������W=,;Q����z��GFB@??BA>=<;;;6Q�U7KL:8=HA331/3-!#&%%#%('#""##'(F��l7.1eF<ACACLSV6z���pA�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ͻ���������������������������������������������������������������������������������vi23��NuroiaZQD;<m��A��-(?��%!"	'Ao�}~|yupcULJE@<97545201000+(	*`|}xncXKC=60+)(%$##!"##"! ���˟tof][WVPQUY\VV]eW������������������������������������ѥ������}zywv{��~�ljifdb_\ZTI8�oGIOz����W5BA><85q��������������}ypS�������������������������~9��������zihejnmkifWI4d�������qokiigdb``]jonnnn\T1B��������������������[��kAH:00&.%20 *JIFCB�׿�����������������������̿Mt������������������}{|@5[!&#=:PINKGG,.
+*(#&$*�������ɝfw~}���z�������!A]�) '8JF@@=:2$"�/"9A8"0D$Raos\^_a_b__6'byz{|{~���qlY_trBL����{{�ww|T'9),}��b* ''r�֏
+]!&@Z]K""0�����j4224440+����b!0
+	
+Q����t�����G?nXRPF87ETSQETNDDiofSUsz~���ͺ����������cKfpvyz{{{|}~��~�~ze,$9Ft�H@JPHFHIJV#'���������hr{�����}��IECB?@B@?<<:;;9Q�V?LH;>DE7---*:0 #&'($#)' !""!%&E��n=+>t]@AEEJLNQ7{���mF����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������;��������������������������������������������������������������������������������������̿��������������������������������������������ug03��Jprmje^UK??FaX@��2$8�U##!	%@l}~}}yrkdTMGB?;87565200//,*'	+]~~yocWJD>61-+)&%$# """!#!���Ρqng^[YTNQTX]XV\dZ}�����������������������������������Υ������|yww|�pnnkkjgfc_]ZSL8�jFGMr�uN>99AD?=84q�������������}xrU��������������������������7���ű���yifelmnmiiWH1_��������onnjigeb`^_knlmmq[S2G��������������������\��m>E:&&&'#DH+GIGBG�һ�����������������������˾Ko������������������~{z?C\)'	7:N@D65H15	*,%&&*�������ɞT\_VUWTQ^�������C_y  )BaY`khW9$!#�.6A8!/B$Qaw�k`\feWZ^7+bsuy~{}��nlZ`sm>M����}|�xv}U%6)#u�h4'%n�Վ	
+
+U /70!#5������������9�����`#_)! >K
+	
+
+
+
+Q���lP|����G?hZRQF37DRUQDQC4IhlG8Vu|{���θ����������eIhsuy{{{{|~|~��}}zg,%:Jx�H?JPIHHJGS"(���������SAJ�����{��FD@@?CCCA>>??=:U�O;A?8:?>698,*9/ #')(&&)'"#$#!#%K��_3#PomSDCELJPI1~���iF�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˿�����������������������������������������������������������������������������������������������������������������������������������������sh0.��MpponjcZUNJC<9?��w^`X+!!
+%Dr�~}yskcUPKD?<:9655100./-+(	)]~|zpdXMC<88540*%$"!""" !v��Υnof][[QINSUXWUZc]z��������������������������������}��Ч������}yxx|���|okhfeca^XSL7�kMEFUP@=;<@EH><79q��������������~zvV������������������¿������6���Ȳ���xkgcimomjdSL3f�ǵ�����yxvpomjigaennlmlpZT5F��������������������^��n?G>5A<56TQ ,IJFCI�ι��������������ÿ�������̼Mo�����������������~|zz?Eb!++:2SLH607':
+%(&''*�������ʜ`ed_df\Z_�������"Ab�& )IdMPUYcP*"�(,<8!,A%Taa�������]Z4%etqx}|���mn`_vo:K����|�tw}T%8&)fJ#%&j�֍	
+JSC+"#4����������D127�����^/�[VXWZXd�W
+
+Q���hmp����FAnYTRJ98KUTNCTK9Eniequy{t}��͸����������bLgsvz{}~{}}{~���~yh,&9C�K>JMIGHKER&0��������Of�hI����z��EE@>?EB@=;==<<6W�S@??4576044..8/$%)+*())&#$&#!%%L��Q(5bnpfL@DIC9/2���gF���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¾�������������������������th1+��Lpoookd`[QMIJMK`ssU6)$!
+%An���}{tkaVPJEA=9:645521/---)+^y}ypeWKB?ED54:?*%" !!!!!k��ͬtpeZZYTONTVYWUZa]q��������������������������������|��ͥ�������}yxwty���tkhfeca^YTK6�eGBBHC=;=>AFE=:77o�������������{yvQ������������������¿������7���˰���xnifjllligVL5d�³�������yvvpnnjhmllmnnZT3C��������������������a��{CF?P��~{["-EGEBH�й�����������������������ͼOs������������������~}|zA<d!'%=/MTMNH=&0	#%$'()�������Ȭ����������������(BG9a�(%Jb534@[I* �$*97#+?%O__]U_eq}�j]6'd{zy|}~��qic ]vm?P����~|�wx|N$6%&'q�Ջ	  
+	BRb`R5!$8�������������������Z)LI��O
+
+
+
+X����Z�����B@lYTQG55BUSNDS@6SkcFB\xzt���̻����������_Ogrvx{||y|}|~��~�}xe)%:D~�DBILHFHIAN&8��������:���:����t�}ID>>@CA<;9799<9U�a^cWNRHA<=7;5G3%&)++**)&%'($"'(P��K,G]hnmQ4362)#4����dE�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˿�������������������������������������������������������������������������������������¿������������������������vg4,��Opqmf`VKHHGHGIDA20*)+(&$!
+%Dj���}{tkaUOGEB?;:7644210..-)([|zxqfYMCBW?).2F6%" !##!! h��ʪ{pg\\XTPNRUYWVZ`]p��������������������������������~��Ф������~|zxxy���vmihffda^ZWM8�iDBDB><;:;?FA<:66o�������������|yuV��������������������������;���ΰ���xsohhkkkhcVI2b����������yusooljiklnmnp\S6E��������������������e��{BF:2;972RV!-AFEAK�к�����������������������̻Nv������������������}{}C)c$+$#L-LXS^\I).	($%&))�������ƾ������ͬt�������?��>c&#<fRY_\T:$�&4G;!,@'S`a`x���[si_6$d~||~}}���qjc [wp@N����}�vxO&9&!SZD[X( ',x�Ԋ	$!C?UP]K!"9������G>s�xLL[�����Y"R���i#
+
+
+_��pjl�����=GtYQQF=?EVSPFV:9AacZep�}t~��˹����������\Ifsxz||{{|}~�����~|g(%<H��AAKNGEGI@Z 2��������B���=����s��HDA@AA@>==<==?=^�EBNDEFFGGEHR:I3$())*+,*'))'$$'(P��J,/+0ZX5)%$""!1����aJ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ο��ǿ������������������������t`4,��QsrkaXPHB=FKFFC<;9213.++&
+&@m���ztk_TOJD?;63345421000-)([||wqdZLDJ_;+*/C?$#""#$""!a��˥slh^\XTKKPWYWWW^]l��������������������������������{��ӧ������~}zwxz�qjlnihgfcb^[WL:�nLHFGEA:7;BD?;<82m��������������~{uS��������������������������9���ά��xrmghhjihdVM1a�����������ytqonlhhiinopn^T5D��������������������e��u?F9'$$!AT!,EGBAH�и�����������������������ͼLs������������������~z~B!c ,%
+!>6RL>;5-"4'& %())�������ƨ�����yy�������;��7f|&#2NSU[R=-#$�'/B7/='V``]�G�|\`a6'g~|�~}���skcZvo@P�����~xy|Q&7&$�x���X!%)q�Ӎ
+
+MXb[I+!$E�����@Y��rBo|C}����TM���j"	
+
+_��{JZp����:DnXTNE88>LSOEQEboq[CK^|~u���̷����������YKetxz{|{{|~���~~~y^''<H��;ALMHGGH>^8��������NbY[����q�zDEB?@CBA@@@A??7g�KIOEA<A>956<68,#%'')+,,+,,&#%)'U��O3<FOR6&" #2����aM��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ο��������������������������������������������sf5.��PppkcYNGC?SQ<7?/;;*)5+%,#	"Cn����zuj`VOKGdZQ`X>24220/.,*	'^x}yrfZMCD]B.+.E9#"""#" !!`��ʡuqh][XUJJOTWXUV`_n���������������������¿���������{��֥�������}zyzy����{jkgfdb]ZWL<�rUJHFHD<7=FIB==90k��������������~{yT�������������������������:���ϰ��~vsoiigiijdWO2_�����������yurpnmkkkhmmom]S5B��������������������`��q=F=-,-*)01$&'%))&'/5DCCCK�ѽ���¿������������������ιNt�����������������|{C*c +"
+!:4OQ]ZNO4>
+$&&)',�������ǭ����������������B��6dz*#9ULG?6-(#"�(0<9#.;&V`aat���w^]^5#l}|}|{|��umeYun@T�������xw~T+5%$�Nyd�h %%l�҈		M K7! #@�����O<XNM��ZE�����MR���g(E1
+	
+a���^O�����6FjVROF927GSLFJ79Eebcmq}}x���˺����������XJhuxz||}|~��������x_');H��;@MNGFFI?&o=���������P@Jw����r�zKDCABDCA@===:81d�KP\HA=?<:75882'!$&%'*,,,-+&$'*%Z��phioc;' !#;����`K��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ϳ�������������������£�����������������������od1(��LpplaTIE@A[\:6H,KM30<+'4$!	"<f����|wlbWPJD��[^m{@/310..-(&]x|yqeZLB@NK713B,#"""!!$"!\²ɥwpf^YURNMPPXXVV``j���������������������ÿ���������|��Ф�������}{vwx����~lhgdc`][UL<�tRJBBHC<8@DC><<70f��������������zywY��������������������������4���չ��}vuphgghhjdTL1\����������}{wrrmlmjjlmlno^T7A��������������������`��s?EDACB?==>=;><>@<=CCBAAAI��������������������������̸Ir�������������������}{x='\*"	!>.INQP@A0>	($%'&+����������ͣ���~���������:��2ap-"3MONUQH5#(1?:'2G&X^``g~~�][\`5$hvxz|z{���tokUtl>R�����|�ww�V'2$$�N{T�k $$t�Ո	 
+
+
+CDBFI.!>������m`v���dx�����K7���jSW^b�L
+
+
+!d����{�����/Jk[SQH84FSTMHNEU\cYKPj{yz���̺����������VHesx{{|}|~�~���}w]'*<I��>OVYTRRO=+j=������������K����n�tGDB??@@=<969;=8g�@KVEA=<;:75851%!&*''(-.,,+&$%((]��kfinI*$   #('=����YL���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƿ�������������������������sf2)��Nmoi_VJE@BUP=:B*CC/1>)&3'"<i���}vmaWMLB��/49�s0420//.'#Wy|{sgZMC;=A<83/($""""#$#"T¯ɦvpf]YVSMNMQXYST_^b��������������������������������w��զ������|zwww����xkhedba^ZUM;�lNA<CH?67BED>;;:4g�������������{zvW������������¿������������4���٫��yuoiffhiigYL3Z�ǻ�������|{wqoommljlnhln[V3>��������������������W��r>FEEFGEA@CCCCBDGADHGD@B>I��������������������������˼Kt������������������~}{|:+c +!
+!E*5*)6<4#8
+*%&'$/�������ǰ���~k^[Xh�������6��-e=",ELJX__K,�(.F=(5E(V``bp���}[]^1%h|~|{{}���vkhUtmCK�����{�xx}X$2$'�[}U�g!''{�֊	
+
+	I!U_\e=" C�������������������IL=#g���VY�����.LmVTOI;:HSSLJR<AGiV7Bg}u���ʹ����������WQgrx{{}}�~|~~}u[(*<H��KebhchfZ5*a$23+,'!A��������ODG;F����n�mBFE?@>>;;99=@@9p�<DG?AA?<96473.$"')&')-.++*($#'%Z��L_ne4%# #&**?����TJſ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʽ��������������������������������������������tm4'��Mlnh`UIE?=BA145*-.'*,&"&%	=c���{si_XPLC��6:-t�/22/-.-'$Z{~|rhZLB9521/,(&$!!!""##!Oêɝqnd[XWSKLPSUXST^[^��������������������������������q��٧������~yxy{�zrrijjgecb_ZSN8{iE4=DB96<BBED;75/e�������������}{uR��������������������������2���ġ��zurlfddfgc`UJ0Z�����}{urpqoljhdaa`__`]\ZXT5<��������������������L��r<GEFFGGCDCHGCDCIDEHFE@BDN��������������������������ȹFs������������������~zz<"X,	>3
+,&$'%'�������̢Z^Y]bce`z�������2��)_�E%:QTV\[WC&�'$+&1='U`a\_W]�u[^`2"i|}}|z~���vfhWujFL����~�yy�^'1#*���n�`4!()z�҇
+KDWMX< # A�������������������E	
+$i���ae�����+RiVSLH57HTSMJM44@ot_e~��z���ɺ����������RTgsxz{~|||}~�~~~~~|vX%+=N��;NUVSRVV3,cgvqlfX@@��������woo[K����p�lEGC??A?=<=???=5o�BKHACHF?754530%%('))+++))*(#%$^��EatU*##   %*+)G����UIſ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rj1*��NopiaWMGA?@I767;1%&00%#,-	>e����|tj^XRK@~�221~i-430./,'%Wx||tf[MB;30-*)'%%$$"!!##  Q��Ƞxod[ZVRKKORTXXX_]\���������������������¿���������q��ק������|{yxvwqpnkiihcca^ZTK8|rF>CC@>@=;<AB<544d�������������|zuT������������������¿������4x��}tzurnkid`[ZZXTQK3S�}|{wurpnkihgdabb]^_]ZZYZV69�����½�������������P��v?GGHGHSPQRTVTOR\GFHGFDEDM��������������������������ʻIw�������������������}yz=#_"- =4*#$%%.�������ΤYggaez����������/��6^�I%EUOKIDDA*"�&3B'Tano^`^cl_^b2$gz|}|���xclUwnDR�����}�vy�Z)2$Mg[5Q&6"')v�Պ	C#[W[^. # A�������������������A	(k���TN�����'VkXSPC/4MTVNJSHVYw������{���ɹ����������STgrwz{}|||~~�~�~~|yW%*@K��9EKJDEIJ2,f{��}�~[!E������������e����r�fHCA@@BBCCBA;863r�HWU@HUJF;75621$ %((**+,+*()+**%d��GbtN(#!!"'+*(H����SI������������������������������������������������������������������������������������������������������������������������������������������˿���������������������������������������������������������������˾�����������������������������������������������������������������������������������������������ο����������������������������ug0(��Olpj^TLHB>>L:5>@9'*99$'63)#	=`���~ytk_XSMC{�JNfD2200.0+'	$Xw}ugZND:31-*(#$%""!""""!K��˞yndYYWTNLNOSVTU^^]���������������������¿���������q��Ԩ������}}xutrpnnlihgda`]ZTL;woH=CBCB;545;?>720d�������������{xxT}�������������������������5t���yxvqokgda_ZXWUQH0J��|zwvtrqnkkjgddcda__^[XYXR78�����û�������������N��y@IFEGHl���������bAIIFFFEU��������������������������ǶIv�������������������|zy9%Z +	#?-	/% %&%*�������̣Yj���������¿���3��2c�3$<C744-TR3"�*5G&T_x�bXSb_\_b3&fy|}���x`hLunAK����|}�ux�O(6$! (*w�ҌX*@=
+=+$0!#!G�������������������<
+*k����������$WjUSOB14KRTQP`x���������{���ŷ����������NUhrw}}}z{|}|~~{wV#,@V��9CLJCDHH//c�aYp{`A�����������������t�aEC>>@BEFC@:6664w�HU^NUS@E<52533% %&'(*,+**''*(++e��I^pF("!#&(*)L����QI����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ⱦͺ�����Ͼ���������������������Ϳ����������������������������vf3#��Qnrk`WJHC?HX;3=A<&-88''35/'	<_���~yrk_VSMC��dmU;04201.0+'#Vy}~wg]MC;51/-+&%&#$"""## M��ΥtpeZWUTOLORUVRW]]X���������������������¿���������s��ԩ�������}zwutpmnnnkgfea_\ZUM;qnI9CHH<5337AHE700e��������������|yvRw����������ľ�������������4w����|uqmkgfa_[XURME/G����}{xtpnkmkhfdbd_^]][ZYWS7:�����º�������������G��vAFEEHJj`8=8�z2<�j?IJGCCDN��������¿����������������ƸJt�������������������|y}:(X'	$?-
+.#!'&',�������ʧ�����������¿���.��-_�,&CTMN`[dX5�%6D)V`h������xY_4(j|������zajNpjAC�������xw�P&5%")+�Έ+�cu�
+	9!8Q1!!Q�������������������5	APPN`QHHD(m����������%WkWUPG99ISTORfu|��������{���ǹ����������KYhpuz~}z{|~}~}}~zsW#-AO��7DMJFEDD.9g�|Z;Ow^N�����������������p�_HE@=@EDB>979<?>�>QVR^MA>;86542%!'')*++*('%$&%&+m��O\t9$!"$'(+*S����NK�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������տ�����������������������������������������������������������������������������������������������ѽ����������������������������������������������������������sg5#��Tkqk`WKHD@CB4144-%(,)$&)+'#	
+;a��}yrjaVYJ7~�+02222232/-,%	Lv}{tgXLA;5/++)((%#""!""#"K��Щond[YWSLLNRWXSU]`V��������������������������������p��׫�������}zxutroqolihgeb^][TM;smC@CGE:674:CLB51/a�������������|{vPr�����������������¿������1r����ysmjfd_][YTQPMC0H�����zwsoligfdb`_\ZXYXXVUSS79��������������������N��tBGEEGJj\>C5l�^0�q;HGGCBBK����������������������������Du������������������|y}:R"vs%=9	)$!)*(/�������������������������.��-c�, &B^UU\]`Z2�(6='Ubb\[\Zy��c^3%h}��������xakNngAD�������xvzT)1$%*.��͍#�Cw
+BF[VO&!Q�������������������0	
+L;2>p�C<X,r����������$YgYVQF9:AMQONdt~��������|���ǹ����������G[hqvz|||z{~~~~~~�{xX".BV��:CKJFEFF,B||tllyWR�����������������p�Z@CA>?A>:::>D@@;��=PSSTDD@:95212#$)()*+.+)&&"#!#&m��]_x<$ !#'(,*W����UP�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ð�����������������������th4"��Njtk_YPJG?<9300*'$%%#!##$# 	9\��~}ysj`U��:w�.@s<432331.,% P~}uj\MA;30-+*)(%$#"!"!#!K��ͩvmd\YWTNNORWZRV^bZ��������������������������������k��ܣ�������|{ywtrnoomjhhdba_]YS=nfA@AD@8687<CC9.4.]�������������~~{xUq�������������������������,m��|rrnieb`^[XUPLHIB.G���{uvsoiigeea_]\YVUURSUSSP69���������ɺ���������M��u>EDCEGic<DHm��0�n;GGGDBCP��ÿ���������������������½�Ew�������������������~xz;`S�_!";8
+'% %*,),�������������������������/��-d�&!,847;EXU/�)3@%Wac`m�s�e}f]2'b}�������y\rPrhCH������yxT'2$$ )1,*��ҍ%c*|�M#]aF- %!P�������������������.Z\\WiYJQ*
+R51:P�r-K,q����������'Wp[UND98CQROYhu}������������ɺ����������E\jrvz}}{z|~�}��{sO$1DS�|5DKIDCDF)Igeszz{KV�����������������p�^FAA?@B@<;>@?=92�z8JMNLIJC=;42-3##&'((+./-*(&%$#)t��aQw@%! #').-Z����OK�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ò�����������������������pf6�Ulrl`XRKE?=6113+&&$"""#" 	:T��wsi^Yy�Xh�*_o7632310/+&Ny~~ujYJ@;62-**)'%##!"!#"!C��̦xmd]ZZXRUORXYTT]`W��������������������������������j��ڧ�������}{ywtrpnmkkgebbb^[WM9jmEAAB<256:>B?8450]�������������}|ywSk�������������������������.l���|~toliea]YYZSNKG0K������}tnlhffa^\[ZWXVTTWVTR<9����������Ĵ��������H��w;DECFCd\8C@�ʑ/�q@DHFBBER����������������������������Cw������������������}y{6d3��	!7B+'("&/+(,�������������������������,�/g�'  "'+CL1 �)5A(Ubc]����tc`_2'h���������{VuKpjAI�����~�xw|N&3#"+&6K>&��Ӎ/�Z|�#I*Fei5!N�������������������,"q=:C�AA8	W845i��1L*ttHNU|�MT��%[jVTMA33BPVNWjt}��������}���ĸ����������A\jttz|}}|}~�}|��{wM .D[�u5DKHEDFF)ZB#-8?DF. [�����������������o�SBB@??BD=;B>9788�vAIJLOOKB<<8302""%$&'),1/-*((('*t��bApE' #!"&+.,\����QF������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ս�������������������ǿ������������İ�����������������������uh:��UlomdZRKHB<6663+&($%$ ##	:Y���ysiaYS��n�HyF4662330.-'	Py}{shYJA;62-+*)&$#"!!"##" D��ɠrnd]Yax[���gWWU^bT��������������������������������k��٩�������~{zxvutpnmkjfccb_\WN=o{HHG@7288=@B@;85.\�������������|{yxTk�������������������������2n��Ő��ypjged^_`biYM3P�����~ysnjiifb`]\\[[XVSV^ZU<7��������������������K��x8DDEGIkd?E=��}2�w=GFEA@ET����������������������¾����<x������������������|x{3 h]��+ 	'9C
+($[d^]pqXed(.�������������������������.~�/b�+ $%+/!~&4@+Vaa\t�hU[___4,j���������|Ur!IolD@�����{�yy�R)1"%+*+-/(+#B,5FR.��Ҋ �LNW 	)T3(" QƼ�����������������-#p=87_�I31
+R69C���5H,yl664j�G)��$^iYTOB@AHPRPXju{z{������~���ö����������D`jquxz}}}~|}~~}�{vK /E`�k7FLIEEGE(`8"c�����������������r�VDCA@?DSiG>977:=�wCFFLQOH>9:9420%!&%'(*/53.-*)('.|��h@iO-" %&&(-.*`����NI���������������������������������������������������������������������Ǽ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ο�����������������������������������ı�����������������������ug8~�Upqmc\SJGA:5545-)*'(%" ##
+:W���wqh_XQc���rc36554330.-'
+Ov|ztgXKB;62.,+*'$###"""#!7��ʜoke]_|�z�q��ZVV\aS��������������������������������g��ڪ�������~zyxxxyvwqlhfdcaa]YP>iJK8/3667:>@=864*Z�������������|zwtVi�������������������������,o���||uojfda^ZZWWcXF2P������{upnllhdca_^^[[ZYXf_U=4�����½��z����������N��|8CEEGHic?Pj��P2�x9GEFAB@S����������������������������;x�������������������|y}9#[$�̹/ 
+0A@#$c?45M�MBl$/�������������������������,|�*a�+###"%!~#?:4=-Vbb_����p^a`1)g���������wSs#HojAA����~v�ww�Z(/#KeS:OqQ $'E,>AK1��̈(�Q_n
+	*Y&YH(#!XĻ��������n��������-"o899s�f90	N724}�e3J'~_2=<f�p1�� `fYUOCBEMTTLLkx{qhw�����}���÷����������=^isw{|}z|~}�|}~zvH .G_�g8FMIECDH'c6 `�����������������t�TEBA@@DU�f558;?C�o@EFIMHC87:;421""&&%'*3hT/.*)%%,|��hAcX3#")1.-0/'d����QK�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ĳ�����������������������ue;w�Slsne[TMF@843440/-+++&" " 	7T���}woi_YQE{��r;65334330-*%Ht~|vfWMC;71/,+))&$##! "#!7��ʢvof\b����z�NWY^bS��������������������������������f��ګ��������|{{y�����sfeeb`^\WP@K�P=289726<A<:863.[�������������{vtSf�������������������������-n���w~xplhda^\ZXWe[L3P���~��{vrpnmjdda`][[\\[VogUD2�����¿���¼��������Q��~:DEFGLjhCz��S7A�t<HFFACEX����������������������������8w�������������������~zz6)j e̥4C<+&j<41;��2p$.�������������������������,{�)^�0$$! " ~$d�T2;/Uabaj_Ifca__0(e���������zUr#FqjCD�����t�vtU'.$L�WH�I!":#.HX4��Ί&sD:K
+
+*Q!Sc`S5,$Vż��������pb�������(#s::E��d=/	N<^��w;;P+�]7AJ��u4��#`gXSM926KVTSYmzzy{������y���������������D^ksx|}}}~}~~~~~{zJ!0G]�`;FLIDDDC(c4$l�����������������v�TEA@@?DH��:8:?BE�q;ADECD=699=73/!"%$&&),f�<,)(%%,}��c9^_6(%9OM>95-d����OH�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ű�����������������������td8{�NjrleYPLF?742210/-0/0($
+7N���~xog]WTLO��E186422320,)%	Fu}}teVKB<43.,+)'&#$$#!""!8��ʠume_n�������_XX]cR��������������������������������b��ު��������}{{|�����vffedb_[UO?B�G6;C:2/5;76>>74-W�������������{xwuSb�������������������������/r���xwrlid`^\[XUe^K5S�̭z�~{xurolgfed`\\Z\\ZZpeVG.�����¾�������������Q��=FFFGKr���������xCIGGBBBS����������������������������;x������������������|zy5*`"M��]=D;(!k<5.b��;n"1�������������������������*w�*`�0 ,!#! ~"(��@17-Xb_bv���t]^_3,g���������zUu)GkiAB�����x�{t�R'1#H�TE�D#;?:IS0��ςI*l~
+ +LFPSe=.$![Ƽ��������Wa�������&(l60G��M6/	`Zhv^KM\T/�W49C��Y4��#dbUSL@=;KTSQZmxzuw������y��ӿ�����������@[lsxyy{{}}~}~~~~}zwE 3I`�Y8DJGCDDC%d0o�����������������y�QBBA??BJ��C>>@=B�o=>ACABH[<;?<3, "%""$($I�T+*(%$+y��\-Bb@(*=RWUG8,g����JJ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ѿ�������������������ű�����������������������th=w�QlsneZPJE=84212,--0/-)%	
+8T���zpf]UONH��<387633431.+#	Es}{sdUIB=63.,+((&$#""###"1��̟qkdY�����dS�lUV^cT�������������ͯŝ���ÿ����������c��ܪ�������~|z|�������dfdcb^[UN@<�F>FA8334328><72.V�����������|zyxuqP_�������������������������1t���x~xsmic`_\ZXWk`H6U�شu�zxtsplhgfba`^Z[ZZ\pkUH.��������������������U��;GHGGKt���������w?GIGAADV����������������������������>w�����������������~|yx7"X 6�Ĺd	>@9*#j=4.k��Dn!5�������������������������&x�.c�! 0 ""*��k3:-Xdb^a[b�ea^a0*g���������vOx)HpiBC�����x�wq�S(4!I��a��B#*2BMB,��Ј#tJHP
+ 	-R*ccTJ/*#]Ƽ��������Su�������$)j@j��h.9+���������3|V<[���9A�~$edVSPLNNQTVO[nw{rw������}���������������<aluwy{|}}}~~~~~}yvE3Jb�]9EJGDDEE#i4"t�����������������}�NDA??CDb��C@?<6?�lB>>??CF�d<@@9- "%&##(,o�f(+($$3���]36\T.TWQSVVH.m����JF�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ñ�����������������������vh?v�WmtoeYQJC=74210*++,+)%$	
+6R����ypf_XQHd��h267543330.+&	Bs{{vfUG?<72-+)'&$""!#$"!!1��ʟqmfl�gKd��{�dVV\cQ������������Ӽ���ȗ�������������h��ܪ�������}{z{�������jedc`\ZWR>6�KCB;634656:>;61-W�������sr��v|yvwpN[����������������ÿ�������.p���x~zrkgba_ZYXWg_H7R�״u�}yvspnkgfdca_]\[YZ[mjTM)��������������������S��}9GFDGHp���������y?GGEACES���������½������������ƾ���>w����������§~����~zxt2&Z$25$
+=G 4
+&$iA9>m�j?m#5�������������������������(x�*`�-!! !~"^�ǃ2>+Tdca����w_^`1+c���������Qr,HskCA�����xps{N(5"K�ww�H:upxyxF��Ї	/T*E/^¼��������_��������'&r_osaHHZ3$���������6x`Jt�jB2R�q(efUUQQOSSUSN^oz}rn������x���������������:`ltxz~}|~��~}~~{uB3Ge�P:GJFCDED"g+u�������������������MDB@@B@Y��CA<74A�lC>>CEC2�HGC@/"%&&&*8��a+*'$'8���e54Zj9K�KLTVV4n���}ND�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ĳ�����������������������wf<u�WntoeYQJC=85430+)(&%### 	7N����yqg^XOK����I47654330.*$?l{{ufUH@<72-,+)(&#""%$##! -��͝slfv�_RX�yf[[S\cU������������Ӱ������������������b��ݩ�������}zz|��q���yheeca^\XS?7�\?;;823548:>:50*U�������outy�{vupS\����������������ÿ�������.l��݂�xrkfca_[YX[iaK4M�ٴy�~zusonmjfdc_^[]\YX[jmTI0��������������������M��8FEEHJn���������z;GFFBCEQ����������½����������������=w�����������ʉ����~{x4"W	GL!;
+)$fCR���BGf#<�������������������������'w�(^|/ #   |%k��}2B(Ubcammijb_^`0*d����������Ur,HojEF�����x|os~N&7#L�85�J0`[YUY>��х#.)	5R$@?DD$ "^Ļ�������q���������&2��������L	 z�������w;����������d*hdYVTTSSSTTP\q||wy������x���������������=_ltz{}}||����{r?!4Dj�Q>IJGDFEBr/$|����������������|��KCA@BCX��[@967:H�eA??GGBC��^MIF1#')()0{�H,*($%7���lH5UgUK�U9GQX/q���xEH�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǳ�����������������������xi;r�[muof[QID<73200.(%#"#!!
+
+7L����zph]WKm�n�jp75643330.+#=nz|seTHB<52.,*)&&%$#$###! *��˗sjg^ZYUOJLRSXZT]cT������������ƞ��zʸ�������������a��ڨ�������}{z{{trtoplfefea^\XP=-�S.8<732119;<84/,T���������r`v�}yusQW�������������������������.h��ۀxqjeb`^ZYWXecH2M�ܸ|�~zuspomieea^^]^ZYYYgjVJ1�����½��ý���������P��;EFFHNr����������>JHGABCQ����������������������������Av������������ʜ���}y{t6&`	HK!33'xrq�xhbqr)8�������������������������$u�%]�2 #u#G��Y%,(Wcba`[]\_`^\.,e����������Sv-Fnl>C�����yxpt�S*1#Q�K?�K+$&!-(8,��у'*UJ&"
+6Q)accg/!!`�������������������%=��������Fn�������f?���}������_,a_YURPRQSTUN_p{yoh������y���������������8_ktyz}}}|}~�~~~��zp;6Ho�L=NTFAHHA�2"������������������LDA@D]~~`D;559=H�aAA?EEHb��`KKI5$''/Fy�{3,*&#$=���p]?Yej|�_*/<J1y���u@F�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʿ���������������������������������������������������������������������������������Ӿ�����������������������������������ɲ�����������������������wh=r�]jvpgZOFA;73//01)$"##"  
+
+4O����yph\XU�xa�B�P165343/.+%<fz}sdUJC<620,,)&&'$$$$%#" $��Ɨshd^YWSPMOTWYYV[cU~����������ղ���vư�������������b��ܫ�������|yxwtspmmihhgfa^]YP>*�M;B?963/16<9541,R����������s|�}{vvNV�������������������������1_��݂xqiecb_[ZXXhhG3=�ܽ|��|wsqmkjfcba^\\[YY[ihPH,�����¼�������������U��;GGFGKi���������u@KHHDCDP��������������������������µCt������������Μ~���}{w6(a
+EO"D2)���������+;�������������������������(u�-X. !")$u#6Y�ō&(Vedaaaa`_^_a.*b����������Uw1HmnA@�����~yuw�U*2#E[H@GWDADHIXN]Y��΃4 �n4&
+:O+`3#iĻ�����������������$7��������B	"��������o@����������]*f`WUUTRSTUUUbs~}}|������y���������������>_lwyz{}}}}~��~���xs; 7Kp�GS{�urraD�/'�������������������FC@@A?D>>;557;=H�_D@@D@BX��ZMNO3"%'&5cuY3/,)'&#;���qkX^dh��_**+./x���s?D�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ư�����������������������wi<o�^jwqh[OG@;73/0/0+&'#"!"
+
+1O����yqjaSx�Gh�0ot5433320/*$	7dz{rdTJE=74/.-+'&&$#$""#"!'��̜ljd^YXTMNNQXZZWXcQw����������Ӱ��̣����¿���������`��ܫ�������|xwusrqnnihhfda_]YO>/�]@>9540/2464413+T�������������}{vtKO����������������ÿ�������-T��݅�xsmfb_`_]YXc_A09�����}{xupmjjfgdb_\]]\Z[iiUD-���������~����������O��;EEEFHMVWZQRVPTcGBIGHCCDM����������������������������Cu���������}��ʒ���~zx4,d;D##"D/#���������'<�������������������������*u�/Wy- #  ''}%1�±q+%Scb___aa_^`_/,_����������Nr1GjhAA����~yyx~S'6% !  /NFRIXQj���!?K��.J#
+@U.#vƺ�����������������!2��������<	>SPSQWQR5B~|��������U/kbVkjSJUTVTVcr{wy|������x���������������8bmuyz|~}}}}}����xq8 8Kt�Ij����fC!�/*�����������������|��EA>=;;;==6247:<J�T>A>AFQ}�tTHPM.!%&&)*)*+*((''"=���qjb_bg��kC.(#){���hCD��������������������������������������������˿�������������������������������������������������������������������������������������������������������������������������������������������������������������������������̿����������������������������������������������������������������������������������������Ʈ�����������������������wk?n�Zhuog]RIA=620/.20,,'&%"  
+
+2N���{slb\��3p�3H�K145411/)$
+:dwyobWHD>94/.,+(&&$%$"!!"!3��Μglf^YXROMOQU[ZVYbTq�������������������������������a��ޫ�������}{xxusrqnnljidc_^]YP<-�W:9610..0245520*X�������������}|xuOK����������������¿�������/G��ި���}uqljghfbfcC+6��Ѻ�������~�uvtlqtoppljohUB'�����¾�������������S��@GGGEFHBDA>=?>BOFCFEGCAAO���������ÿ�~���������������Cx�����������ö���}yv3-h7?=:+#!#>($���������%6��������Ñ���Ƽ����������)t�2Zv-   #   !!z$&')Vebba`a`a_b_0*^����������Pt5Goi@A���}��yzwzX&6%:GIKMXWT���}!@h��?L%
+DV$"sȺ�����������������#7��������=	H����������P4iaW�}qacj]TTeqxwx}������{��п�����������?amtyy}~�~��yp3!7Lx�Dd���|sP?"�*,�����������������|�821121103../224I�X<?>Bf�nQMGQM,!#%%%&&)*)(&&%#C��|mfI>]��`IB/#"-����hHE����������������������������������������������������������������������������������������������������������������������ſ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿű�����������������������yi>j�\gupf]RKD=7230246540,+(%"0F��}yrj`��U5i�4:hj645221.)#
+	7_tvlbWKFA:50-..*'&%%%#!"#!(6��ƞnmf_XWSPLORUZYWZaZg�������������������������������b��ܪ�������}{xvusrpnmkhgdc`]ZXP<-�V8963520015:940+S�������������}{xuME����������������¾�������-D���yiaYME===:=B<=J@*5z��pe]XVQHEC=<<<=<>::9:<STE,������¿��¿��������Q��?FECCCEABA???@BEB@DCECABP�������������������������½�@w����������ö�~���~|xt2-eBD:0/-.%%F&#|�������'9��������ƌ���������������%v�.\n,  !+""#"t$+Sccb```_`_`_0+[����������Vr5Fol?B����{|ywN*4#RJMFZJ];���{8S¹.7#
+DP $"pǼ�����������������"#@HMPU[^Y)
+M����������L1m_Y�cnwix`TQfpsqjt������v��п�����������Gamuwz}|}~~~~}~~�~}xr/"5Lw�DTornidK?�%/�����������������~�s !   !"D�Q243149FCCA=KE%!" ! F��}n`/+APC'%&&$$/����gHF��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ͽ�������������������Ʋ�����������������������za9k�dgxvh]TLC>;75566:::752.+($ 4O���}{slm��@Ah�79HyL34120.*%;_yxn^RLDA<5/...*(('$%$$$"!;C����oni`Z][RNPSX[ZWY`ao��������������������½���������h��ݬ�������{ywusrpnmkhfgdc_\XO>)oM68;642.147641/+Q�������������zwqP?�������������������������/?�����n`YTRPORPNKNK?)-r����zsni`[ZXXURQPRXQTWY[UQB1��������������������O��@DEEDEE@?A?>>=@B@AEEFBAAM����������������������������A!u�����������}}~��~|{w23c&:)#HP%&"##" (E((|��������$8��������Ɨ���������������(}�-c�3 "1"!$" %& {$+Tacb``````^^/*X����������Us2Gll=H�����~|zw{P,8$=LFKOTTW���w3=zs1$	BF""pż����������������� 
+P����������K3l]Xl[\dab[SRhu����������w��ο�����������Gckvwz~|||~~~|~���~wn/ 8L�>?KIA>EE:$|$%'&'*''' ,������������������m#F�A"#$#%"#%('&%%J��|jZ*%!  0����aBD�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ų�����������������������v^31YvKJMIC>;87975783898875552.,(&$!  #7Mt|yxvha{�WC=[�465``3421//,*$&?XbfXKFC?:50/-,+)()&%%$%%$!!Cr���~}�}pjlti_ad`^`_[[Xs�������������������¿����������s��᭡������{ywuupqonjjhgddc`[SD$A;434.+,00.,-/./*R�������������|{yfA>��������������ľ��������w)6}��ʰ���vvpmlljddO=()d������}xupjfhe`]\\Z\]YZ[VT:*��������������������K��}?FFDCEF@>@><>>?B@?DGGBBBJ����������������������������As���������}�~~~{u2?^$4:2" /AnhJ"CJ1$(6%&D][_dadi[$8��������ų���������������+<W�;#'"!!$"&&'z#+Xdbaa_`a`^_[.*V���������Qs6Jmk6M�����~xw�M+5&6OIPI\O]���z#%#$
+BE$"rû�����������������!
+T����������G4q^WVPNOQTVURjw����������z��ξ�����������Dbkuvx||{|~}}~~���}xf/";F}�=BIKDBDC8'y$#/7?B<;><*0�����������������~�g  2MJ�:"%!#"- (!%'L��cNA% 1����Y8=�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƴ�����������������������vY0'YdX:)))).,18AJKIGFHGEGEGDEEFDA?B=@GNJGMGDBABGQR5H@FQ<CCJR=@::;;862002/5<;8759;7;9615544001200..0.,))*'7r�~{nsupga^]ZRSNHD@EJE@V����������yrspii^Z[VQOJFB??;=?6C��̣�������~xtqqqmmljjijjhdha_]F! *.535.%$())%'()2/U����������}yrh[OG)&a��������xxsokj`[XWXQSNH<.h���srmb`_ZWNLDGJ?6%!S~��vopmmheb`_\ZZVTPMOMLPUT;*��������������������P�ڈOOQMGFLB?BB@@BEHFEJOJEIHB��þ������������������������@t������~~�}�}~}{xt3%Mc%M{u]4(Bh^|�x6NJ7/&(-&&!"&/+>������������������������� JsL!"$*!#$$+*#!$t$/Zcaaa_`_``__0(Nx��������ySs;Gkk2U�����x|ux�I,7'KFM@PL\I���y 
+>D"%#uŻ�����������������T����������E5j^WTSRQRUSSRkz����������x��н�����������Acluwx|}||~}}�}��~|g.";H��;AIIBBBC:$h'&+<[[Y^\48�����������������x�a:[N�;8P$I-2])c+DJQ��;#6����D('�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ο��������������������������������������������������������������������������������������ĳ�����������������������wV-*jleE<303:BO]abbffdfcabaecjfcaa`adgpqjcc_a][^_][]`_\Z[\[]^]\XXWQOLIOSXVNMJGEMLNNRQOMOOPOKKIIGIJFFHHEFJPZWSDDAC<?;:@EGHLE@<9DNKFDEELIFJVWZWXNLUUXPKBJUWVSRGINMLPWfvysoolga`^\\_]`]]\YYYXYXTVYVKXIFHLLRSPQSRRQUUW\_dhu��|od^b_`X[afikiijnpuxyzujkaYXXUZ\_`Z[]a^\\X[npgjhecWYhnne`VY^REIJ`vvmjgha^WWRRPECA?DE?>@@EDGCDw�������������������\}��{{piXMWSWPNPP]ZO[lg`[XYXG�õ�������������������������B(y�~�����}|{{{z{|~||{{yr/1l�a|���I(.+Pedbn~NzLD=952650.+0225573*'(39/+%"$)-3*A������������������������w <��8''/(())-835&:={&3Y`aa__^_a_^].%Mw��������wTr>Gnm0]�������y|�B+<)!$IGMITR^R���q'9= !")#{»����������������W����������A7g]VVRQOQTTTXj|����������w��о�����������Fdmuvx~~~|~~}~~�|h+!:K��;@HGDA?B5,^*)HORWL*4�������������z���v�V7\S�8@h(c07\!l%JOS��2 +'#";����=#�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ô�����������������������uW+.fec^H79AN]cjnqrpsquvtyxwvyw{y{|z||��~||vssstrpoqonppmmllljjhhgfgghnoqpollkmllhilkhlghjijjikllnkkllqswvyqniihgffgdcc`_`adgihgfklieiihiloprvtttqqtsspooqsxwqrswzz{snliijkhgge^adigjkijiijmgmmqqx~���������������������������������������������������������������������}}|y~~{y��{sv~����������{zurqqmmmnorwxz�{���������������������������{{||xutyzwysu{zvy|ywxzyzy����������������������������mj�������~zxyyvuwwywywwoPS|�jnvrkUNTOWVPQW[Y[B;nvaWONJHKIGG???<ACFJ?@;;8&-Z\LF@:;69?LFJ������������������������R"8HCMzUECI@9DFPVQVCUi�+&'$###$""###$8Za_^^]]]^\[W/&V���������hCuP:hg1^�������}|x;EzM3*+'%&$$$!#"#  "#$!"$$1FXGVIbWj���v#&7<.41./<YcX55JTw·����������������~.&!Y����������?8o[STPMOPTUX[n�����������s��ʺ�����������Idnvx|~~}~~~~~���zh)"3H~�:@DC?=?@8L�67����������wskny��r�R9[Z�3?]b1=Uk#NKR��23MFK$>%A����>(���������������������Ҿ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������õ�����������������������vX*.gmn_9<DKVahntwwvxwx{|�~~~��������������}|z||{{{zyyyxxyxwxwwyxyyy{{zzz{y{zzz|~{||zzyyyzyz{|}~~���~{zzxvyxxwwtstsssutvxxvtstsqrstuvxz|�~�~~~}|}}��������������~�������������������������������������������������������������������������������������������������|xy~�����������������������������������������������������������������������������������������������������������������������������������}}zz{yogehhcdhljkigjmmlqyyvsronlkggb^WORQMMIEILPQM?GNMF=;849<BELMMSKEA;BHJIGDEBGEHIHGKILJJRX^SOG?ABHHDEBDA><<ADP]iLJHCCBACA?@@?@CMV]ZXROTSJTSVC;FPRVTOOJKLGHf\JUWMY^XVSROFFEKGl�n[LJNIF<;9976432124249CTdg]cbj`bw��qW:./.28:DD&;,*))(++./1/''(#)ei4/05633Bdlnibbh|�ZZo���������������������HFFAA;:51,,+++**+-,7EA-(#  )+""$%/?b����������CJ�h\\VRSVUWgmx�����������~��ò�����������Wgqyy{}~}~~}��xd&'7PQ410//.04;b�W  9�¯��������������p�J1A`�,BZl->P!q"NEW��56U5Z5?"C����@&���������������������θ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˿����������������п�������������������ô�����������������������vW,-kvtM:FNWbkqv{}}{}~}~��������������������������������~~~��~}}~�������{|}{|}~}{|}}||}~|~}~����}~||}~}}{|~}}}|||}|z{|zyvusuwvvxy|}~�������������������������������������������������������������������������������������������������������������������������������~{y{��������������������������������������������������������������������������������������������������������������������������������������������~�������������������������������������|yyj[NH72136:@EKRY\bimntw|�~�������|�}�~uqrrpllljmnmpjlqrtttvnmliimnropokkkmnmmjlilikhkjtwy{wusllpputuvvv{yz}ywrqojieddabWMCIKILJIIOIGIGHHHIHILJLOPPSRRKLNNG?>=GIHIKIHEGMIMJGIGJJHFHJJGDFIGRQJIIJJIFHF@===<8;@AHOMKDDEGJLJLLOOOMORZWQL9;;:<=?:==<;8798:>>EKD==<9788887638;@A;6:=?KRH?DPO\ba_cgDN`TSROKILNNNKDOUXXXYPMRXN?LSRKLMQZ\cmler�|tw{{|���~����x]!'(#!'Bs�zU>/*'$""=�����������������s�Le�,CO#_+18G!:5\��/9R!\I2I����8$�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ͻ��������������������������������������������������������������������������������������������������������ó�����������������������wV/A{aBEOV`iqw{��������������������������������������������������������������������~���������������������������������|~|{yyxxyvuwwyyxy{z|���������������������������������������������������������������������������������������������������������������������������{y{|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������q\RA62/.9DHOTalv������������������������|z{yxyxy{z{y|}}~}|�~||{y}}|||zz}~||}|~~��������������������������~|{{zwwurotsqrsqoomnkjlnqpppqtxwxwx|zwtopohkiiihijliiefgegddccedcbbeeddgdefgececcdca`abcbaaa^[[Y[^Y[[XYWVZZWXUQTXWYZYZYVZZYWWX][X[YXX\_[[ZZYWYXVUWTQRTSNPNRSPRUVVTWYWQPOOWYTRQRQRRRPLFB<<>8<;=AHGIMOJE98:=@@@;<@95Y���{||}��������z])FgkfphiaI516-    ",TdGP�Ű��������������v�E f�.%&#c��(;PRU$O����- ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ӿ��������������������������������������������������������������������������������������������zX6c�vGCLU\fpv{~����������������������������������������������������������������������������������������������������������������������~}||{zzyyyyvuvwwzz|�������������������������������������������������������������������������������������������������������������������������{x}�|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yodZG7/7GQ\bbirz~������������������������������������������������������������������������������������������������������}~�}z{||}{|{z|}{{|~}�������}}zyzzxwwwywwwyvwuwvuuuwvrvwuvwrqrtutstsqrrrtqqrqnpopqpopqonnmlnoonnmmomnonnnllnlmoommqonmllmnossrqqrttsrnoprpoonpnnqqpttsttqopppmmnmljhhfdddgjlmlonlnmokmlmlgdddcbadbbddc`VP���~|z}���}~~�����x\'!&(.2632318=DBKpV?=@EBHLFHG<@UviKSy����������������s�I.%""!"###$$%!$(j�1%#$ #!##$ (   a��, 4@29 S����+ �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ĵ�����������������������yTW�]GJQY^jqw|�����������������������������������������������������������~���������������������������������������������������������������~~}}||zz{yyyxy||}~~����������������������������������������������������������������������������������������������������������������{~�}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}ytph_RP[_cjhlnrw{}��������������������������������������������������������������������������������������������������~}|}||}{{|{z|{||||{}}~~~~~~�~}}|{{{{y{zz{yyyyyz{yyzxwxxvvwxwxxwzywxxwwwvuvuvvzvutxwuwxuvutttrsvwuursstsssstttsttvsutsussusuuvtvyzwvxzwuvvwxxwuutussutsuttttrrsspooooopnnplmonopqoqonmnnmmllliihgfjjgiigikgaS���}}}~���~�������xb92146545448;==AA?>@BBBEFEFGGDCCHLLHILLNQVXSUPD<@?<;9;@@:DMKHONRJB;87:=<;=:<<;B@Tf=51142122476478=@::94652/132114Y�|30./01+---/-+(V����2&�����������������������������������������������������Ͻ������������������������������������������������������������������������������������������������������������������ξ����������������������������˿��������������������������������������������������������������������������������������������������������Ʋ�����������������������vVu�qKKQW`fnu|�����������������������������������������������������������������������������������������������������������������������������������~~�~}}|{{}||{{|}|}~~}}�~~������������������������������������������������������������}~{|{zz{{}|}}}������������������������~~�|nz||{yzxz}}~~�������������������������~~~~~}|}~~�����������������������~~��������������������������������������������������~~�������~~~~|}}~~~~}~~~��~~}}}�����~y{zyvrmlhdccfejjmnpsvwy{|}}~����������~��}~~~}}||����}~~~~�~~}}|}~~~}}|}}�~z~~}}~~}|~|{}}~}}{||}}|}||{{{{z{yxyyyyyxxxxyyzzyxwywyxxxxxz|{{yyywwyyxwtvwwvwvuwvuuutsrsuuvtttttttusrtvvutusrssssstsstsrrrtrqpqrrqronoqrrrrqqqrrrrpqrprsstssrsrtusuttstssrstrpprpppmnoooopoqpomnmmmmlmkkkkijlllklkllmkjkjiikkhfgfdefefgegfedX���}�������������}wbGGMRNOONPPRNQPPSQSTSUYXY\ZWVUUWUUWWY[YWYZYXZZXVWVZWWYSSUX\]^b`XWUXWVTTSTT_\\\^c]XRSTROLMKMKIIMNOVSV[XWXYVYOQXVVUVWYMILLMLLUSLQQMISaWUW=53�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������´�����������������������vh��TEJQW_jqx}������������������������������������������������������������}������������������������������������������������������������������������������|}|~~}}}|}}|~~~|}}}}}}~}~}}~~�~���������������������������������������~}~�~{yy{ywxzyxzz{|}������~��}||��~�~kttvuuurtvuwxyxuz||||||}}}~|}||{{{{{yywwuwwwwxzyywz{|||z|}|{{{{|zz|{zzyyxxwxyxwxyyyzy{{|~}|}||}~~~}}}~~||~|{{}|~~|}|{{|}{z{|}||zzyxyyyy{{y{zyy{zzzyyyxxzzyxyxyyyzzyxxutwyz{{zzxvwxwtrqqoljgkmkknnprssttvxywxy{{zzz{z|~}|zyyxzzzyyyzuwyxy{|{{{{{xwxyyyyzzyyxwvxyxxuqvvvwyyvxvuyxxvwxxwvywwxwwyywwuwwvuwxvvwwuvuvuutuutuuuvuututtvvtstsutsuuttttsrsqstptsrrsssqrrqpooopqqrpppppqpqqqpppqppqppoqpqqopppnppppqonnnmmnmnnnppponppopqoppmnppqopooppppopoopqonnomoonkmmlmnmlkmlmmkhiilkkkifgjgfghjihhjjjkjjfeghhgefebbabbeffghhgW���~~~������������}wbHNSXVUXXYXZZ\^[b`^`_^bbacccbb_^__^^^__```^]^^^_^\ba^bd`_]_`b`b_aabaacecb`_^bfdccda_b_]ZZ[]ZYXWXWWWWWYZWWWUUVWZXWWYYYWXXWVUTVVVTUUUULBBFCB�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������п��������������ó�����������������������v}�{IFKU]gou{��������������������������������������������������������������������������������������������������������������������������������������������������������}}~|{||{}}}~|}}|}|{}}~~���~�����������������������������������~}~�~|}|{yyyz{zyyxyzz}~}}}||||}|}}||z{������pvxvtvvvwwvwwxwvwxwxvvvyyyy{{yxzyxzzzyxyxvxyxwwwzxxwxyyyzz{yyyyyzzxyzxwwwvvwxuvuuvwvwxwwvvxwwvvxyyyyzyzyz{ywxxywxxyyxyxxxxxywwuvvwwuuuuwutxvuwwwvvuvvvuvwwwvuvuuuuuvurqrrppnrttuwxwxyxvtswutqonqqrpqssrststtutuuuvutuvvwyyuutvtsuswwtvutustrvvttvutttvuttstttutsutssutsqsqrsutssttststtssttsutttuttssrsrttstrtpsrtssqrrqpqrppqpqpqqrqoqqqppnoopooonponpnoopnnponpmnnnmnnmmnmllnnmnmmmmnnllmlklnmmnmlmmkllljllklkiijkjjgejklkkiikmlkmljjkmmmmmklkkjklkjkijmjkkjjjijhijjjjhgghijhhhjghgiifgffehjkjhiihhjkhfhiihgfgfdccghhgghihX���}}|~������~������}udLQY\XY[]]\^^`b_aa_adbccbbeeb`ca`__``_aa``__`aaaa_\]^`_aa^`bb`_abeebbcbfhgdaadeedcbbcdfd____[[WYXYZZ[ZYXYYWWXXXWXXWWWVWWWVVVXWUUVVVRRRSRPR�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������³�����������������������}��T7BHS]fntz{�������������������������������������������������������������������������������������������������������������������������������������������������������������~~��}~}~}||}}~~�������������������������������������������������~}|||{|{zxyyzz|~|{zz|||}||}z|wy������mvyxtvvvxwwvvvuuvwwwxyz{{zz|zyyyyz{{{y{zzxxyzyzzzzzzzz{{|zz{zzyxxzzzyyyxwvvwxuvvvwwxxxxwvvwxvvvwvuuwvvwyzxtvvsvvvvtvvvvutvwxvvtvwwvuutvxvuxvrtuvvvuutvttuvvvvustvstutrqqqooprruwxyyxz{zzz|~|yyyzywvwwuvvuuuvvuuvuutrrutuvutssrrqrqrrrsrrrqrrrrqqooprrrqpqpoopqqqqqqqrrqppqoprpqrrrrsrronpqqprpprqsttrqpprppqqolporpolmmnnnonononkmmnnlmnnmmnmmmmmmlnklmkkklllllljijiijmkkjkkklmlmlljklmmljkllkmljijjihiiiigiihggjhggeiighhghhfhijjjghiihiijjhjhiiikkjhhhijjigfhhhhhggigfefgfeggffghghhhhhihiijikjihkjklkklmlkjiihggfiikjkmkkf���~~�������������~vbPUZ^]__a`afbbbbbdaeebdebddcbdedddcdbca`aa__`_]acdegebefigefeeeecbddhfddfffgdb^eeddc`\_```bb^^^_ZY\[XWVXXRWUXWXVVYZVVVWXVVUUWUUUVUVTUUTVSS�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ϳ��������������������������������������������������������������������������������������������������������´��������������������������47AEQ\eov}������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~����~}|~}�{~||��~��qz}|zzzz{|{{z{zyxyyy{{|}}||}}||~�~~~}}~~~~~~~�����|}~{|}z{~{|{yxxxxwxyxxyyyxwvwwttuuwvvwxvvwxvuuuuxzwvutuuvvvuwxstuwvvvvvvuvyxwwwvvwwwwtvvuuvvuuvvwvvvvuuvussrqprvxxxyyz{~�����|~}~}|{}}{zyyxxxyxvxvuvvvusttsqttssssrrpppqrrpoppnnnppooooonnmnookmoonooompmnronnnoppopoopopoqrqrqstqoqqppqqooppomnnnmnonmmnmmlnqmloopnnpoklnmknomnkmkkljklljijkllljiikmkjiikkkkkllmkjmmlljjigkmlkiiihhiiiihhjlhgiiijhjigfhihigghklkiijhihhiiijiijiigfgfhgghhgffiiijihkjfgjhhiihhhhihgfhiikklknmnppooolmnoopolmnnkkikllmllnmbb����~~�����������~ucMUZ^abb``cefdgfefedfdfeddeeccba`_^bddbcdefcbcbd`ba``_adcdcdefkjigdbbfigfdcccgfeaaab_a`__^\\\\^`^^[``]\[[VWXXWUSSTTROSRTRUSRQSVUTSWVSSTTQR�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ŵ�������������������������q!6@?DL^lty|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~������r|�~~~~����~~���������������������������������������������������~}}}~~}|}}|}{|zyzy{~|z}|yyxxzzywywxxwwvwwwxxyxwvw{|{xyyywy{zyyxyyyzzyy{}zyyzzzxxz{zz{zyxxwwtstwxy{|~��������}~{x{~~��~~}|{{z{yyxxxyxvuvvutuutsttsrrqqqpqqpnopoqqqppopqrpopppqrpqpqprrsqqtqpoqppnnqqppppqssrroorsqrssnrpppqpppoononnlknpooopqoppprppppononmmnmmooonmollmlnmlmooklmolnnmlmnnnoonlmlnnmmkkmnkmljkkkjiljhiijilkhgijkjjhghijiiiiihjjihjjiihiijihjihgegihihhghhejiiijjijgiihghjjghhhhhklkmmmmnnoqprsrrqnoppomnlklnjjgkklkilmmlk���}}���~�������~ueUW^`ebdfghgjgihijklliijhjligfggeggeceecdba``__b^_\[^]]^_`_`_``fjjhhdbbfgffcccffebaa]]_\]_]_\__^`^]]]^`]\]\WYYZ[X\ZVWVUUTVUSTVWUVTSPRTTSNR�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ô�������������������������]!6BIMI[flqtvz|���~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������v|����~�����������������������������������������������������������������������������~~}|~~~~|~|}}|}}z{{{{{{|{zzz{{z{|}�{|{}~|||||{|}}|~~~~}~~}}}|{~~|||}z{z||zz}|~����������������������{||{y{~~|{z{{|{zyyyyxvxuwwxtuturqsstsqpppqssqppqpqoopqomooppporsstrqnqrsppqoqpqrrqqprrrpqrstsuvuuusssssstqqqqsrprsttsrtstrstrsrrrssqrrrqrrttssspooppqqrqsqoponppnopppqrttqqponlmmijllkjklkmkjkkkkjkiknijlmllkkkjhkjijiigjkiikljhhhijhhjjiighhggjihiihiiiihiighkkkjlmnlmlmnmmmmnppprppoqnppoknmllmmnoolqmljonlmlkkji_��}~��������������}udQV\\\^`\aba_adbebcfdfgfehhfffeda`a_aacca_`a``__Z[]^]`[__`bbbca__giljjdeddghggd_`bba`bc_^^^^[\\ZX[^[^[^^`^__\ZZ[\][\[Z[XYZ[YZZ[[\XYZ\WWWWU��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������R15H_cbeigkmlihv~���{���}y��x|��xy~|�{z{{}y||�{vwzzy}x|xy}wvw{vy|w{{xwyzrxy{{zwyyu||}||y{zvzwvvvvrrnpoutx}{zx|����������~{�������~}x��������~~|{}}�����������������������������������������������������������������������������������������������������������~z������w{������������~��������������������������������~~���~~}{��}~����������������������������~���������~}~~}|~~~~}|}�~~{}|~��~}}~�������~�~~}}�}~}|~}||zz|��������������}{wxyw{z}~�������~}|}|}{{{z{|}|z}{||yy{yxxzywvyxwwvwxwuvuuttuuuttrstuttuuusttvwtttuyvuuutvtsttuusssuwwvwxwxwyxwtuvuwwuvtrtutw{vvuttwvvuvttuuvvwuuvvvvttruuwuusvuuttusttuututrsrsvutstrrrsrqqrrqprqronpppnpnnopkooopooonnpoqronprpoqpmlllnopnmnonlmlmlnnlklmmopolmqomommmlmnlmmmlmmmlknmlllmooonnkjloknpnpqppommlkgijkehkgejihjljf��}�~~�������������}vcPX_`bca``caadcbb`fc`bbdcefcbdcc``a`a`a_a]]\^_\_`a`baccbba``acghebehijkjjgfdfijhecc_\]a`_``a`a]_^_`[[X\Z\[ZV[ZXZ[]]^a]_`bcb_\]]_b^\^a`]Z[Z�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ѿ�����������������������������������������[B9W��������r]h����ʯ��ƺ�����������������������������������������������|��}����������������������������|z���}������{�|�����y{uzx|y|uyvuyrrpzttvzwsywyswuussrqtntoplosrssvwnruswtrttvtotustsqvprsvsrwssvrxxuurutwvxvvuwyywy{wz{xxzyyvzyyyyyvwttvzyyyyxyxuv}xw{{zz}|zx{yzwvr����~mrxz{{|y|}}�}�~��||}��}��������~���������������������~~~}}}��}}�||}||{yyyvxyz{|{||yz{{|{}|{}�~}}~{|||}{{{{}||}|{zzzyx{yyz{z|yzz}||�z{}~~�����������~}~{zy{}z{zz}}}}~�}~~}}~||�����~~~yxurxyyxy~}~~�~~~�~|}{yxzyzz{{yyzyyxyxyyyxzwxzxxxwwxzwvvwvvy{xwyy{~vuvuvxzvwwxxxzzzwwxvwyw{z||wvwxxy}~xvxywxzwvwuwwy|zxvuvvwtuutsquuuutqsrsrutqqrtqrqpooppusprsttustrvtuwutttsrsutststrrtsrqqsqpqrportuspqorsqppqlmpooopqqsqomklmoonnmoonmmnmnklmoqnrsrmlnmmnonmlkmnnnolnmlkmhihijkmklnrrruutrnjjliiilmnokkknjjlkklmpmleg��{~��������������~v]LQY_ZV]][[_`_`]`^abbcefhggeefec__^aaaa`bb`cfd`addbcfdhihhedbb^bfedbdegijkkiieededgfdegdccba_^^^_^_^^^__^\_^]_][\]]WUUUUXZVUXXYWVTRWVVXYZY��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������WRSa��������y[e�ƿ�̼���������̷�������������������������������������������������������������������������������������������������������������������������������������������������������z����~��~���{����}���}|���||�~}��}x��|z�{|{~xzzyxs{xytqvywvvvwmoxxrqsqmsutvmutuqsvtopn{��}�}jemnppsnqlqrtqpqqunoqspuplnoqkoproljhpklmooprppuljqorilkqgonqjqrnmmnloqtpsmpuqutssstrsqmoqouoopslopurtnsrssmliomqoroqlqpnonnoonnnmnrunnnpmmppopmmmnjmmonjiggklgkhlmkijffbekknpsroqtvvxxywttvqtsxmusxzyxywsutsqrrklnnnrqpuzzzxuuspsstrqoppqqtrnmlnpnmmnnonoroosqppoqqnqrplnlmorpqpoonnqpppqpoqoorturqnottvsmqtuopusqsvwuwwy{ututxttvuwtxtrtpqttvvuuussuqqpolqusqssutsnnqqpqooomlokjljlnmmmnnmlnoppqnmpqommmrqnnmmnmlmprpqmoponmopmokllllnpnnnnmkkkklmjkkklmmihihighhggihiffigdifhjihjknmpspnmljihegggnnnoorpljjfefkhjkfeegdgZf��z~~��������������v`WY`ecedegkfhffhiieegijhhghfdghecdcbbcadb`]^][ZZZZ]YY[\[cfghigfb^`fcbbedcfeehghgfdebdedddffegeddcb`__`]Z^_^^]]Z_^]`_^]_]^_\[ZYZYVUVYWWWVSV�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ξ����������������������������������������~M__\��������Wf����ƼǸ��Ž���̶��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���������������������~��������~��������{����������|���|}��|��z��|{}��{���z~�����|y���}���|||�{{~�x��uvz�tx~�yt{|~vz}~vvy�uoyytqvv{stt|sqsytnrrsptvqkopyorqrlnnonppjojjnjnkmqgfnmhjnhlanek_kakgngebgkblhahiifa`ehac\\Wc`e\`ei[^bca]^`V[afe_d^`bdbeac]_Zb\\W[Xbd]]a]__X\[`][Y\_c_X[\XU[YVUYXTXZ^YW]]\YT\UVXTRRUZ_UWVZUSTVT^[XQ_ZXXZXYTWUXWVS]Y[\]YXY^U\\\TUWY[Y[\[_`^Zb_]`a_^]_a__^baba\_^]\c]]]^`a\\_[]`^^[WZW^^X[][]^`\][\_`\\_[]_a`_]]__ba]ccdb_]\][^_]]\Y\^^[^Z^_\_^ad__a\][[[[VYZ^]bdgcdied`]\\[ZZYXXWWWY^^`dcd]{��{�������������~x_RY\cacbbc`baa]]_bab^accbbbba`\]^]^]\ZY\[\\]^_`a_`a`aa]W\\`fhhihbb\_bceedea^`bdeihkffeecb_dbcc_dccbbccddeabacbac```_a_`^]_`__]]\Y[\[\ZYZYY�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xQi_^��������Zi��ųĿŹ��ľ���ŵ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~������������}��������|��������~�����������������������������~�������������������������������������������������������������{���}��}��{���|���y}��zw}�~{�~{��x}��u�|w��~{|�{}��{~x|~x{{�x�y�v~w{�|x|}}t�}|||zzt||zqruyqyvzszrutzurwopvvruurmnnzotqtspnumqruoopsmmljmnmnmjnnnjoiokhhpfllnikimglfkfnjkblllfgikbgfmghfgcgfk`eeidcjj``cd__dg\]cgcabcb]cc^[`fb`^e`]`e_]bd`Y_c^W^bY\`^]^^^Y]XZ\[]\YW[VVWQTTTZ]HEFKFE=82:GLIJJFLCKTPMNJJKOLMLOMKLKLHKKKHJDLLLILMKIILIKIJOMPONOPSOMSRPIOTUUUPW[e���|�����������������}tYHSZZZ`caadda`]cdb^]`cccbjjjccbaa_]][\]^``_ba`_bddc``accb^\[]_fghjfeca`_behflfge_cddecdeffecbbb^\a```aaa]]_`^_`bbbccccacdaaa`ce``^]^ba[[\]�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������oNhb^���������Uc����ø��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�������������������������������������������������������������������������������������������������������������������������}�����������������z|~�zz�}|}�||��{��v~~�w{��yx��{v|�~x~�xy}�r{�v|~|y|}{v||~xx|~x{y{wxxyvwv{wxz{swy{vswxtquwzppuwrxwzrqvwjqtxnquvtquvqmryrnqspmtsrjtsojtpsoqqnltpohimqmrnvooonlqilisnolnklhnfnmmmlkohijifipnjgjojijjffijigklihjljilnkhjmijkthehmhhijggjhcdggeeee`aad__`^\^llFCDCC;0#)<BGIHFGA?TTURPORRNONMKLLLJKKKGLLKIJKKIIHIIIJJKMMOQQPPORTV\]^TSY\[\\bs����~�����������������}rV?EIMPOPPPTWUTPMPQURSSWXWVQSRXWVTSSSTWXWUUUSUSTVYYZXWYXYWXYXUPOW^abaaa\Z[WWWVUXciec`_]\\[\\]]_da^`a__adf```_`^^__]]][^[\\Z]^][[\`a__`]^d`[����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������п�������������������������������������������������������������fRjeZ���������^j����Ͷ��ø�½Ų�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����������������������������������������~��}�����~z}~x��}�|~��{~�{~}�y}��|{�}z|�~z���w}~�q��x}}|}|z�|w}}�x{~�uy~�t{|{www{wx{}uuyzsr{{ppx|xqtvwkv{zqtwwoqwyqpsvssszsps|rnprqmsurlrvuntsqkosrmpqsikqulonqknqqhnonipoqjjmpkmnpljnojlkmjjhljklogijlhllkfgkijjijjkljlklnmmmkgnligljfhmijljfdehfffe``bcb_``^_nlMEEFC:-!';DEGGGEBGSWTUPPTPLNMLIKLMIKLJHMLIGKKJIHHIHHIJJNQNQSQNNRTW_ac_]beeebi�����{������������~saRTUUTRQQRTTNNRPTRRQKMQIOOOPPPQOMLMMLFILLJGKOFGIMGJLRPPRPSTRPN=BQWVTVWUQPLONJHF?<HSYZZYVUVTSTSPOSVQSNLJKSVTTUZZ\ZWYZXYXWXWVXXXYVXYVWXVVVSS����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ξ�����������������������������������õ������������������������\Rkd^���������Ya����ɳ��ǹ���ı��»�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������~���}������������~����������������������������|�������~��������}�����������}���z����~���~���|���|���w{��xz��~|���|���{���z��z�{}|~~|�~}~�v�y�u��|�}�|x�z|~�z{~~z|yp}{{xyx~yyyzvvz{rsyvvu|{xqxsypzxwpvvtqvvxrruxsqoustvtonrqrmpstpooqksrsmnrrmooojoosloptkoqphmmpkommkklpikoqmlnphlloihkmjilpihlmhginggkljjjmjjnmjjopkhmmjkkkggkkgiilggiifhjiaaacfbb_ac``rlEFFFC;.'>EEGHEDDLUYQVURPONPMMKKJLMJGIJKIIGIIIIHEIGDHJLOQNOOSQPTXYbfhgghihhim~����~���������|qb[]\[Y[^[X\]YYZ\Y[\XW^\XWYYXVXWWYZWUWZVVX[WVZWTY[VTTWVWTUXUUTXPOTWSTRPPSQLPNPMKKLJLOPPPRPRRQQTSMPPLNLKLJGFGIMLOPQOQRSQTPNNRRNOLMPNPKMPPNMO���������������������������������������������������������������������������������ο��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ѽ��������������������������������¸������������������������ZVphb���������Zc��¿Įȼ½�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����������������������������������������������������������������������~�������������������������~������������}���|���y|�}~���z���v���}���x��}~~�}z�}x��w�|�x�~�|�z}~�z�}x�}|w}}{uz�{{yzy�~|txx{xwvyuu|{zv{vzruv{rwrwqruyutwxstuwqruysotvqmurqoutrmnoqotpqrtoplplrmpmqquoomromkqpolnmmknlpnqnnmnmqlmjjlkinmnlkkihjknjijljlmnkillkkonkjppkjkniflliiknhgkjdfji_`cdcbabd`_csoIIFGB9.!*AHGJJHECFX[QRURONRPLNMKGJNKHKPJIJKKGHIHGIJHIJNLMPOORRRQV^giljjkjklnu����{����������~��|qb^__\]Z^][X][YW[Z\]ZV[YW\^[XXXYT\[WUXZUWXZWY\YVYYUTUZXZSWZWVWYUUWWPVURQSTQONSJQPQNOONPSVQQRPMSSPOPMMRQNKMLLNONMNPOOQRNPRPNQSNLPONLRLNOMJIM�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʾ����������������������������������������������������������WVlg^���������Xi��ƻĴϺ���·���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�y�~�������z������|���}�~}��w��z~��w�|�{~~�z|��|y��{u�y||~x{~qyy{ty|wyy~wz{}wt|}vpw{zqswznw{{osy}pqw{tsuxpou{qnuvqnvvsouvplrttkqsrlqqqlrrpnpqrnrsqlpqpiorpjmoqlmnskmonjknoiknnkjmqjkmnihlojkklkmkrnllkllklmmllnoljhnkgikjkllhihjgijhdeefeb`_ba^csqKHGED9, (=GHKJIIEFWWQUQRQOOPMMLIHNOJFIMIKILIHHIGHJLIJLNMPRQMRTTPT]ikmkjnmnopu�����v������}����~��zse``^\^\]^`X]]ZWY\[ZZXZX[]ZYYZZZX\ZYXXWVYVVWYZZYWUVWV^VXVXYYXXXVYUVVYUTRQSQRQQMRRPNNORQPQPRRTNQSQNQQNSSQNPNKNONLOOMPOROQRPNPSONPOOLOKLPOKKM�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¹������������������������UWomh���������Te��ˮǸʶ���´��ɮ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�����������������������������������������������������{���{~�����{~���}���y�����~�}���~~����}�|�y~�}z���}~}}~|}{�{~|y{|z|zu{z|yzxvvzzw{|rtyz{wst~sxxvouxypwxzrsuxqqtyqrtwstvtrntupnttqksrvqqqrqrqplqproqqoknrrlrosjnotnoptkoqnhmpskkrslknqgjmngjmnllnplmoskinplloqlkmmklonimkkgjllkjiieiikgjhgddfea_ba`cvnIIGHF<. *?GGJKIIEDTUUWOPPPLNNMKKMNMJJKJJJIJHJIKKJIJJJJNMQRRPQVTTXcknqpnpponpv�����{�����~��~}}��zsd_`^^_[]^_[^`]Y[^^[\\Z[]]YZ\\Z[Z[XZZWXZYVVXWXZ\YW\\WYWZYXXXZWXXXSVWXTTTORTURQPRQPOQRTRPRQRSSRPRROQUOQRQQNOPNLNONOQRPSRSRRQPPOPOPRONNPONJKM������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ӿ���������������������û������������������������RTmkc���������Vb��ǴȼȰ���Ʒ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~|��~���z���w���s��{~�{{��~y�}~���z���x~��w}�z{��{{��}z~u~y||}w}zxy}}xx{}{zz{yysxyr{{wn{xyuuvytyuytvuwstvyrwuvrsqvspstoosstqooqqvsrrtsrnooppqoqoporoplpmphpnsqqmngppqhmlrlknnjmnslnjmhllojjjoiknojiprliltmjlnjmpofilmijomiijjeijjghjggedeccdabesoMJJIF<.+AHIKKHEBHTSUVRQSQLMONMKOOHIMJHMLIIKKKJLKJJIJIMNPPSTSPPSValorqqqqppqw�����{�����������{qa\`_^_^__][]_\Y\[^][[\\\ZY\\[Y[YZXZZVWZ[XXXVY\ZXX[\WXTYYYVXWXVXWSVXWSUXSSSTQQRRMPPPPSQNSQRPRSQPSRNSTPPSROLMMKNOLOSSOSRNQRROPPOLPQMLNPLNMLL�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǿ��ż������������������������\Xmjf���������U_��Ϳù��ȼ�ʻ¶�����³�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������~���}�~�~�~���~��~|���y��v��|~�x~��|{�y}��x���y{~�u~~xyz�tt}�uty~{rwxqz{znxz|tv|zlwxyptu{tsy{svvwsqqwurswqovutnurpmuxsqstrlpsqlprsmoqsppsulopslnrrlomrjonokponmlmnkomqjmkokplnknnnlmmmmpnlktnklmjnopklmollmnjhjlijhjhjhggfffecabadylOMKJG=,-CIKLLIFDIUSTXRRSNLLNMJNMMIJKJJKKGHJJKKMLIKJKILPPORTSQSUW`oqrpqrtsprx�����|��������������|pa]b`\]^_[\]^]]^\[\^Z[^[[\\][[Y\[\[YYZYZZZ[[Z][XXZY[XZVZVZYYYZYVWVXVURQRSUTURRSRPPQPRSRRURQQSTSRUSORSQQRRPPNPOQQOQSQOSTRQQSRPSROOQMLPPNKNNM������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ϳ���������������������¿������������������������VVnkd���������Wb����ó��ž�½´����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|�������������������������������������}�������������������������������������������������������|���}����������������~������������~�����{���~�����������{�~|�}�}�}�}~}||~}|z�|�{x~}~t~}x|x|w{}tx||{wz�yvy{zvwu�vuvwwxwvsvt{uvxxsvsssspuuurupltsurutsksusqttrmqutpuutnnqslnqulnqnkosocqqrlmqoilopkmookinpiimpgknollopkknpioorklnoknnokjmneimnkklkhjjjhjkhdegfgdbcccbdvkMIGHH>+-CIJLLHGDHUUSUQQOMOLLMMSJIMKJJKIIIIIHKKLKHHJJHKMPRRSSPRRUcrsrpqstrrsy�����|�����������~}���{o]^`a]\_^Z]_`[_^]\^^][]][\^]\\\[Z]\ZZ[[W\\[Y\_[YZ^XWVYZ]VYYY[[YQYZZSVUQSVXTSSSRQRQPRSRSSTRSSTSTSSRQUSQSPRNSNPQQQRQROOSQRPOSTOPONLPPPNNNLNML����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ePZrlb���������[e��ɼĹú�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{���������������}�����������������z���}���}�������}{���z���}~��|�~�z~��||��}}~�x�y|~�{}{zzzxyz~zzw{x{~wtz|{xzzxr}zxuxyytvyyrvwwtsvxssvvsswysotssqsvtstutttturrqrqqpstqoqrtnqnrmsroloornwoqmmnrlmopgloqkknqjjosnknsmlpsijmrirssnqqpkmpqmlopikoojklmijllikkkcfghhebdebbc{mPKJLI>(/AHILMHHBHVWRSQONNNLLNNMHJNKIJKLJJKIHLMJJJKLJJLNPQSSTSRVYeptsrtvusrsz�����~�������������xoc````___\\_^\_[[\a^[]\^\\]_Y[\[\^^ZY[\Z\\\Z\]ZZ[^W]ZZ[]WYZZYZ[VYYYSVXSUWTRUTSPRUTRRTTRUTPSTSTUUPQTSQRSOQQSMOSPRTSQQRSRTQNTUSQQROOQQMNPMMNO�����������������������������������������������������������������������������������������������������������������������������������������˿���������������������������������������������������������������������������������������������������������������������������������������������������������������������¿������������������������LL[rn^���������Wa��ȳ��Ŵ��ƹ�¹»���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~������������������~�����~�|���}������{�������y���|���}|��y�~�z}|�~|�~z}��{~��x{�uz��xy|�vw{~xxz�xryyuz{zsy|{nyyzswxxru{{ptvxusw|rsxzrtu{qprtspuvtnsvuqsvvorsqnrssppqsorstktqtoonpmrnuklnoilpnipimlokollkmnqnlmpmmmoikiplsprpqonknptnnkmjjmnjklnkijkikkideeihebdeccc~jMJIKI>#/@IILMJIAISUQQSRNMMNKKKKHKNLHIJJIMKHGLJIKJKJJKNOQSSSSSQU\gsuttuvtqqt{�����~����������~xndb__aa__`^\]^]\[\]]\]]^]]Y\YZZ[]]]\YY]]\ZZYZ[[[\\Y[XYXYY]\ZZY[YXXWWYVUVWVSTUROSSRRTTUSRRQVTSTTVSRSRSTSNOSQOOPPQQUSQVSQSSORTQOOQNPONMMOMOMO�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������o=J\ppd���������Y`��ȸ½ɺ��ž���ĺ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|������������}�|�{���|�~�y���������~���~|��|�x��}��}��{}�{z|{y|�w{{�{yy~zxzyx{|vy|z|x{{|sxxzsxxzvyy|tvuyutt{rtwvrtsytuspuqutuptwvqotvpsqtqsvtoqtuopttitttoqrrmrrxljprkqsujmnrmlnpkmnokopojnlpklmoinqrjostoloqjnqsjlnlkooljkkggllhfiiihihgdfecdf�oNKLMI=$2DJJNLJFCLTTSPQSPLMOLMLKLJHJHHKKJLJIFIJHKJIHKJKORRTSRQQV\hsutuvuusru}�����������������~��yndb___a^`a`]\__]\]\]^\Z]b_Y[[]Z\_[Z_]YZ][XZ[Z\_]Y\[[WZWYZ][Y]YZ[XXXWYUUWUUVTSTTVTTUVTTURRTUSTSTVUTSRSSQPQSRQQPRRSTTRXUSSRQRQSRQQQQNPMMOLNNN�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Y<IZqqf���������Qc��Ʒ¶ɷ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���z����{���z���y���{���{�������~~��w���w���w���{~��||���y~~}�~|z{{�y~z{~|{v{|~z{||zytzyt}{v|z{uvzyrwzzuwz{pvxwtwvxvuvyuquwtruwuqsrtuvvrruttmqsrqsrqpspppnlwqspoorooouponqlqqpommummnoholnkpqpgjoumklnglqsmorwklpqlonrikoliooliimlhklhgjljiihhefedgjgOLLMK9 0DJLONJGCHPTTQMOOMJNMLHJLJGIKIJKJHIKHILJKGGILIMQRQTTROTW\jtwuuvvusps~�����~��������������yma^`a``_b`]\\^]]_[]`^]Y\`_[\\\[]_]\^]X[\ZY[]\^[\Y]\YXYV\][Y[^Z[ZYWXUYVSUSUVTRSSUSRRRSTURRUSRTUTTUUSSTQQRSPTSQQTTPQSSSRSRQTSORTSPRRMOQMMQPKM������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Û���������������������KDH^toc���������Td��̸į̸Ľ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|����{���|������{���~~������}���|��|w���v���z��|}��|�w�y|��w�xx|�vz|�yyy{xw}xu|�ut{{zvw{{pv|{rw{}ouyxssvyqty|tttztqs{omsvuquysotvtosvrlsxsmruqntttkrrqlqsqnsrqkqnnoqponrnonplolomqpnillsqnmnkopsorovnmovnoknjjmmlpnlilnnjikkiiikjhhifeedej~jTMMNL;"2GKKONKIDIQVTRNOLKJNLLILLIGIJFIKIGJKJIIIIGIJLLQQOOTVROUXVjvxwututtrt~�����|�������~������~xl``ba^^`a]]`^]_a_[^`^^[]Y\^^Z^`_]__^^\\Z[[[[\\[[[^\ZW\ZZ[\^[[Z[YYX[XYXWYVWVVTRSUSPQSSSTTTVSUUTSVVURTUQQRTPTSPQTSQRTSSRRQRSTTTSQQRRPQPNNNPNM�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������CDFYsod���������Xc��ʾ±Ŀɺ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������������������������������������|�������}�����~������~~~���~�}��~}�~}|����~x~�z~��t{�xxy�w||~yyw�{yy}vw|�vtx{xux|zrw||qyz~vvwwtuvwsuvzsutwtqt|tqtvurxxspttrnqtqmswsmqtslpusosrqkorpjqrsksqpnqqrloqqloqojnopkjnrmnpqlmoolopslpopkqqoknolkmoqjikmjlkjjjjhehifhgfffddh�jQMNKE:! 6HJKMNMIGMRTQPPPMMLNNMJKKIJHJHJJIIKJJJJJIJJILKNRQOSUQQUVTkuwvvuvutsv{�����{�����~�������~wk_^_`^___Z]`]]c_][__[[baZ\aa]^_^[]^Z\_]YZ]\X\]X[^^[ZY]][X\`XWZ^VY[XVXWVTVVTVUSSUURRUUTUVQSUUTUVWTTSSUQRPSQRRSRRRSRSVURQTSRUTSQQSPRQQOPRNPNO����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������l>BDYnoa���������\d��Ƚ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z�������������������~���{�������~���{���|���z��|�������~���{��|���z���|��y���}~�z{}zz~{~|~|{{|}y�~}z}s}|}{xuuxxzxwuyy{vyxywwruyzwuuzutsysrttuwtvtutuvvrtvtsrsssrsutqrrtqqtuprnsnsnspsqtmvqqnplrkoqtmoookooqkknpjlqqkmoomnqrnorqkoqqkkonilorjimmhknnijliciihhfdgecdk�eRNMNI<7EJLNMLIGNRRNNOOMLNMKLMIGJJHIHJIIIKHIJIJHIJJOLMPSSVTSTUUZqzxyxwwvvtuz�����~��������������vk`a`ba__`^_^__b]\]`a]]`^[\a^Y_^^]^^[]]]Y\^]\^\X\\^[[W_^\Z\^W[]_[[[WVXWUUWWUWVUUUUTSUVSTVUTVUSVYUTUUSRSUQSSQPTTQQVURTVQPTTPUUPPQSQRTQMOPNPOO���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~XB@BWrn\��������[_��ȷ»���Ǽ�����ù�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����������������~���~���}�����|��}~��}���{���w���~��{x���w~�~x}�z~��yz�yz~xy|�tz|wwwyx{|tox{ztxz{ux{}svz|rsz|tuu}stwzuqsvsqvwqqrwuqvvsqrssqssroqssqstpnwuthrrtoqspnsqrpppmirnolrlommnqmoimmpkmlmknmooomspppplqpqklnojlpphinnemnljhklchihhgeedd`p{ePOLNL=5DJLRNMKEKQQNMNMJJNMIJKHGHIIHIIFGIIGHIJIKJIIMNOOTUSPSVTTXpxyywvvtttv}����������~������~��vjb`^`cc_^a`\_``]^`\___\_]^]^^^\]^_^]_[\[\]Z]]\]][_^Z[^^]Z[[Y^[\][[UYYYWUUXWUTSSRVUVUUSSTUVUVUVXVTRRSTUTSTUSRSSSRTUTVUQQSSRTTSRPSRRSPNPQOPPN���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������kKDADXtn]���������_^��ö��Ƶ�ʿ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����������������������������������������������������������������������}������������������������~������~��z���}���~��||~~�y}�|}�zz|�{{~�{{w�y{z}uyw�|w{}wr{{zv{{zwvwztxy{qxzyqvt|rvvyvruvssvxprrytuyvsostqntwspqssqqstpsssnpstnqtplrqqlqrpjossjmorjlprlpnnfprqhmpoloqoipqqkqqqoolonmlmnmjklojnkkkijjikjiihfeeecq{gUNMMN> 6FILRPMIEMNPQPNNLMKLJJIIIGHHHIGGJHJHHGKKLKIKMLOPTTSSSUVVYqwyxxwvutrv|�����}������~��wkca]_`b`___Z_b`]^`Z_ba\``_\`c]Z^_]]^_Z]_^\]]]\^^\\]\\\Z\ZZZ[^Y\\[XZZXVVVTWWSTUTSUUVTUUURVWTVWWVVXVUTVTTTTSTTRQRQSUTTTUTTSSVUUTQQRROPPNOOPPO���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿ����������������������hECAEVuq_���������dc��ű¿ű�ƻ����û���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���}������z�����������~��������}��~���{�|��{���|�~z�|y�}||�|}|{~~~z}y~|~{xux}{{xzw|xxz|vyzzwzzytwt{xwtvpxrzvvvvutuwuvpsuuqswprqtsqqsrssprtpqqursprrtsrmqqpmporlqqrjorsmloqhmoskommfqrqhnonlprtjpttkmqpjmknklnlkmmljnmjijljijhijiiffeccfq�eUOQPN< 4GKMQOMJCLKOPNKNPKHKMJIKKHHIIHFHKGGGIHJLKIMKKLPPPTTVSRWY\nwxyxxwuvsu|�����~~���}~�������xlaabc^aaa^``_`a__`^a`_^^__]]^[]_^\_^]X\[^[^_\[]][^^ZY]\\[YY\[\]Z[VZXYXWWTTUSSVVSSUTSTVVSTUSUWWTXWTTVVRSTRQTTQORSTVVTTVVUUVWSSTQPRSKPRONSPMN�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¿��������������������mFBACTtrb���������d^��ȳ�����Ƴŵ�����¯Ũ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���y���~������������~���{���}���|���x~��}|��v��{�~{��z{z{||�vz~�zwy�y{x�|tz}zux}}sy~{sz~{sxyysuy{qxrzusuyustyussutstvtorusprusptysoqurntuuotutqqqsnnqtnrqrnroqpppmlspnlrlonmnsookqnnnppqkrpqmloqkonqlnnlkmmkjokjjilihichjigfeeedfrfVRQNL;!6FKNQOLIEMNRSNLMLJHKKFIKIGHHHGGHJGGHKJKLLJMLMLOUOQSXTRWZZqxyxwxxrxvw}������������~���������yk``cc^`aa^^b``a`^_ac[]a`^a_]^^`__`b`[^a^^^_^^^\_`_[Y[`_]\]^]\\]Z\][ZZZVYXWUTSVVSVUUWVYWSVVTYX\SWXWTUWRVSQRUSQOSVVVXTVVUUUWVSTTSRRUQQSNRSNLM��������������������������������������������������������������������������������������������������������������������п�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ž��������������������oGCAAQsra���������dZ��ƹʿ��ƿ�ɺô���â³����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������������|�������������������}}���{���~���s���|}��w�||��~z}�xy{�y}}}xy{wvw~zyx�}tx|ywz}zsz}vt|}{sxxyrvwysxtyutqwuusvsrutqvqxroutonrutpsvokqvrlrutntsslrsvpnqrmpsrkqqrmnppjprphosnilqpkpopjjpqkpqtnooomqoplomnkklmmonkjlmjkjjc\hjigfgfedes�eUPONK: #:ENOROMIENQQOLMKKKJKJDHJHHIHIGEFIGHJKJKKMKJKLMQTSQRUTVX[Vlwyywxvrxww}�����~�����������xi__`a_ab^[^a`aca_``a[^a_\`_^\^b_]`b][^^[]^\Z`_[\`_Y\^__]\\_Z]\\Z]]YXYZSWXVTWWVVXWVWXVVWUUVWYTTUVVYWUUSVRSUSSUSQRUTVVWTTVUTTTSSSSPQSQQPTPPON���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ѿ������������������������������ÿ��������������������r@C@ARtra���������i^����ʻ��ž���Ĵ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{����������������������}��������w�~��~~��}�~}}~}~||�zyx�|�|z|}zzz|u}z{y||yxu{{{zyy{xvz~zxxzwyxztvuwsvxyuvuwstuwtss{txvsuutsrststrqtsrnortrqsqvssstrpopnrrpnrrpinqrjoqqhqrnhkpojpoojjotiqurikrumnpoklmlhlnmlookilojhieZ]jkjgffggddr�fUPNNK:%:FOORQNIENRPMNNKKLOKKHHGIKKHGGGFIIJJKKKJLMKKMNPSPRSTVXWY^pxzyyxutvvx|�����}}|��������������xiaa`a`aaa`a``bbba^__]^_aaa_^^^a__`b_^^]]_`^``^Z\aa[__^]_^^_S]\[[[[WYXWUXWUVXWUUWVUWVUUWXWVXXVVWVUWYUVUTPSUTSTTQRVTUWWSUVTSSWSTVUQRSQQPQKPQP���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������uEDAARpt\���������k[����ĸ�����ķ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����������������������������x���~���|���}���|�������z���y���{�����{|��||��}|�~w~|v|�|x{�yv}�uy{�xu|~wrx~}vw}{uv|{vx{xow}{qux|qsw|stvyptwytsu{tsxvpqttqqsvtprwsopsqqttroutsorspmoqqlosropmoptmnoplqpoklmmlqmnlkmunqqlmmnslmmmlklkhlnpnnomihmkjb[V`ljiffhggefszcSPNNK9&<GLOQRPJFMSQMPNKLLMJKJHHIJIEFHHFGIHGKKKJMLIIMMPQRTTRXXUW\tzzzxwutwvw}�����}}|����~�������xjcca`ac`ccb`cbaaa^_```^`b`_^_]__`^b`^Z]`___a__^]]^^Z^`]]\[^Z_\\^[XYZZWXYUXXWVWUSVVXVVVWXWWWXXXYWWYXVUUUSUUUSRTQSWTUVUSUTSTSUTSSRORPORPOLNQN���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yEDACOln]���������k\��Ķùɹ�ŸǮ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z���������������}}�������|���}���}������yz~��}|~~{{��y|��x~~�zxx�zz��vzz�uv}wuy}}ww|zvx||vy{ztx{xrwv{ruu{utuzsvvyjtt{otxvrqruorsurquwpnrvqprusmturlqwrmovpkrvpiptqjnrqlmoqnnopjmopjmqokqrsjlstiropmlnnjlpkkqnlkomkkkh[XW_iiifgigfefx|_RSQOJ8&;GKOQQQLHMRSRQLMNKKHIKFIKIFGHHHDHIHGLILKLLLLMMRSSTTTUWTX`uz{zxxwwxvv�����}��~~������xldcaabb^b`__bb_aa`_ba^]ad]^_`]\ab[aa_Z\_]]a_]_^^[]^[\_]^]\]]^Y]`ZW][YWXYTYYWSWVSTVWVWZWUXYUXYWVXYSWWUQTUUTVTQSSTTUWUUUWUTWTUUTQRSSQPTOQPQQP�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¿��������������������|@D@@LmpZ���������k\��ɴ��ϸ�ȺƯ��ƹ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{����������������������������������������������������������������������������������������������������������|���|���|��������������|������������������������|������������y�|��~w}�~{~�{||}�}{{t�z}|{wzzu{~zwwxz{zuw|zxwz{yvywzyusytxuyrvvwtvtvsvnwuzsyvssrpupvttrqsspqtumrptqpusslovrmoskkqtqkprnjprollppklppjnnmkmoniorqimtvhpnpiknnjkplknokinmgehgWVU]iigffgfffeu{bSPOOJ7&>FMNRPNJEPQSRNJNOKHKJHFIJHHHIHHGHHIMLHJLLKNMLLRTSSTTSVVZcty{xwxwwyus�����~�������vjacbbab_c`]`ba`d`a_a_``_a]]_b_^_```_^]\\YZa_]^^]]^\[]^\]^^^\\Y_\VW[YVWYVVXUYUVVTUVXUWYTUYYVWXXUWXTSXUQUVSQVUPQTTUWXUSUWSNTSRTUPQUSQRSNRPRNO�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƿ���������������������eUIGOjt`���������lW��̷��ϵ�ż����¹������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���~���z�����������~���{���|�����������}������x�����|��|{}��|��{z��|zx���{}~~u{|�zyz~u|{�|xy|xwz}zuy|zty||sz|}xsvyqtwwnuv{rsuyqtuxsvxzrosvqrrwsruvrpsusrqsqqqssqtrqpstnpqqppoopopnpoqmjjommorlqimjmmpnoonjlmtnrlqikmnjholkmmjkmjhgii_UP]jjfffggefew}_VQMPL6&@JMOQMLIBPQSROMNOMKKKHGJKHFIJGHIHJJLKHHKKJKLJJQTRSUUTUW[apz{xxxxvwuv~������~~������������wibdcb`ccc`aeb```_`a`[aa_^__]``^__bc`]_^]^_``_^]_`_]\__]___]\_^_[Z\\ZXYYVYXVYWUWUUWYWWVUWYZWVVZXVZRVXXTTVTSTTQTVUVVVTSVVVUTUTUWSRTRSSSORQSPP������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ye\os_���������n^|�̺ǸŰ�ó������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����������|������|~���}������y��|z}�|y��vz��|z}~~w�~y}~r{~�zw|�sz}�yuz|xxz{wqxzwtx}|q{}{qswyquxxk{vwtts{ruvzrvw{rptypotxqptvpnuvootuqqrutnqwrkqvpmrtojorolnpniqonhlnnkoqpiklkgqpnklmmjrpsjomkmlkilknmlmljiilgkh`S\hjheefgfefw~]WPLPN4(>JMNROKIFSURQOPONNNLLJJJKIJILHIIJHHGIIJJKIKLMPSSRWTWTVXZ`syyxwyywwvv|����|�~~��������~vic`bc_dbaabbaaaa_ada]`a_^aa]_`b]_bc^^`^^__]``^[_b^[^_]__[__Z^_][[^[\[ZXX[XUXXUXXWWYYUVYYVXXXUZYWVWXTTVUVSUQQRSRUUSSWTSUWVSVVSUTQPSRQQPPPOPP�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������k]po]���������o\s�ʿ̺���Ȩ��Ǳ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�|�������������������|�y}�}w�~}|}|}w��y|{u||�~{{~v~|uw{{xzzxwy{zvxzy{t|xztxvxuvw{s}uttttzuvswuxxxrvrxsrtwquuwprwumquwopqvuqoupnrromqrqjqqpnnqolqomhlnnjoslgooldsuqinonjprrilqnkjmmjlpmimnjjmpgfknb`jjh\`fhfec|zaTSORL5)AKMPRPLIHRXTPQRLMMONKJKIKLJGLKJGJHIHKLMLKKMNOQQQTWUVXXV[euzzxxyzxwww}����y�}}���������vgaa`bbebcaabbccaadda`_ba`ac`_^d`bb`^_`[^_`[^_]\``]Z_aa`__^^Z\^[[[]Z\\[WWWTUVWUWXXTWVVUXXUUXXTXWUUYWSUWUUTSPRSROTYTSWXUVVVRVXTTTSPTURQQQNNSQ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ä�����������������������mZnpZ���������n^q�ǿ˸���ɨµ±�¹��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������y����������������������������������������������������������������������������������������������������������������������������~���|���|�������~~������|���}���{���}~��|~}��y~�~zy�~y|~�|~}w}}�z{|xyxy{x|{yx}wvw}ztw|ztxw{uxw{stxwtuy|otuwquuwntwytutumruurttursruusqsqsttpqtumpqqqnrqpmrnnnqqnmllnmoklnjipnlmqmkkqpojmokimpohmkjhklkijnljjolikkedjmeehheT]deffg{zaRQKON:(BLNRROLIIQWURRSLMMONKIKJLLIFKKJGHHIIKLKKJLLKMQRRTWUTXWT[duzzxwyzywvx����y~�������~��~ujee`bccadb_`cbacbc`_b__aa^`baabdb_aa_^_`_`^_^^``]_]]``_`_^_[\][[[\[[ZXUYYUUUWXXVWSXVVVUXTTYXVVUVYVXUVVVTSSSUSRQTWTUWVVXWTTUWVUPSRRSQQQPQPRP������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������å�����������������������kWns]���������q_u�͹�����̭ͭ���ů���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~������{�������}�����������}���{�����}}}��}��{y}�|t}��y{~�z�~v{{�vxy}uy{xxy�wuv}ysuyyuuy|wyw{tvxvsvw}nwuurstukvvxrutumquvorstpprusqsupovxposxlhrsplrtngormlqvmfnsnionlilonjknnjlqkgopnmmqlkmlliklmklmkkmkhlmlihhiiliijie\_geegix{YTRNPT:(ANPRSPMIIQUVTRPLMNOLJJLJLLJIKIHGFHKIJKKKLLJKLPRRSVTUUWW\fv{}xvxzyyvw~����y�����������~tidd`bdbabb^abccbbb_ac_^`b]]aa]aca\`b^^`a^__`[]`_\_`\`__b`]\\]\\\\^^\WXXXYWWUUYXXWTXVWXUUXWVYYSUZZTWVTQVTQQVWRTVSTUUSVYVUSVSTWUPSUSQRRPRSPMO������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¨�����������������������l]ls^���������s\x�Ӵ��Ź�ȹî���ħ��Ȧ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}������������������������~����}����������������|�|�}��}}w}z}~~||||�{{t}y�y}z}u}z}yyv{yyxzxwvw{zxwyyyuxvvtuwuoxryttsutzrzrutxququstpssurprsrqpxoputqqmuokpqnnqpkmprimqrmjoqlfmnihmqmgkpogkpkeqvlglrmikojckpnflliimlihknigimiiijigaX\deeffvz[WRMPO8(CPOSSOMIGQTWVPPONLNNLJKLMLIJLJLIGJIJJHILJKMNNORUUUUTTUX[exz{xvv{yyxu~����v�������������~thbcccbdfdb^aaabbbcabda`]`^_aa_bba]``^\``]^_a]]__^]_Y`^_]b]\[\\\\[^[\[YXWYXYVUWWWXVWUVZYVWXWYXUUWWSVZVTWURRTVSUUPSWSPUWRRVUQRUSPSSRRRQMQTOLP������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¨�����������������������m^nu\���������sY~�Ӵ����������������ë�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z��������~���|�������������{���~���|���z���}���z��{{��yz�}|~�}w}�y~��txzvz{~wzz�ww{}vuz|xuw{xwy{zuy||uuxxqvyxmtx{stvvqtvxrsu{qsvxqsuwoqrsqrttqurrsrrtpsqqomqromkrppnronmnokjlpjjkmoljllmjklkimskhkrojjlkgjokjlkhikkhhhljfjkegfiif`JZfgggfzx]WSOPM<(CMNSQMKJJQVXUOQQOJLNMJKKMKILLLMKJIJLHILMIKNOMMRVTSVUTTYWavwyxxz|xwwt|����|�������~�����~tgbcec_dfdaab_adb_bba_bca_ac_]ab`acd_^^__`a^```_a`\]^cba^d`_^]^^\Z_Y^]YWYXYZWXVVWWWYXVZZWVXVWZYXXYWWYYVVSPSUTVVUTWWSTXWTVVVVRTUSURQSRRPRTNNQ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ĭ�����������������������nblu`��������r\��Ϻ·�ý��Űŵ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����������������������������|������~���}��������}��{���}���|���z~��yy��}{}�}|}�}w|~~yy��vyzxz~~pxy�pu{}vvxzwtvzwswzytxzztuxwpwxvptvyluwxmrq|ossznpqsmruuoqsuqsvtortspprvoloqlkqrmlnrjlotlgmqmgjolglpkimmlhlpjfikjhlmleoomgkjjkkmkhgljiijfkiheijldgiga=Wgggfi|pZVTMOL< )HMLQOLJIKTYWTTUQOKOLKJMMLHIHLLNKIHJKGJLJILMMMNQTSUWYVUXYcvxyuuw{xvuv|����x��~~�}~���|scacdc`bbb`ac`adb``b``_c``^d]]`_\bc^]`a^^`_Z^a_\``\]^``a\^^^]^_\Z\aX[\XVUZXWUXUXXXWZZWWYXUVVVWWZXY[YSVWQQQSSSURTWVTUXZVVYWVUSSTTTRRTQQPQPOQO���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ҿ���������������������������������������������������������������������������������ο��������������������������î�����������������������oWht`}��������s_����Ǻ��ľ���ʼ���ǫ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~������������������������~�������������}�}���{{�|��~}~~�~y{}}y|�xzy�y{|{tyy|r~x}szwxvxyywvxxwwxwzwstzvutsrtttnwtwprpzqsrwqtqtosssnsrtqttrmqsqnqqsmqqphprpjporjmosmjmqnigplhiojjopkhmnfdlnhjnpnboqibljegilihilhghldfjjdgjkdfhfcZcggeehzvZUXJPK<",FLMRQMJEHUWUUUTPPOOIKKMKLKKFKLKJILKIIKKKKMMLNPRTVXSWYZXYguyzvuxywvut~����{���~~}~�����|rccccedbaab`baabcaa`bc_a`a_da`_a_cbba_a___`]`a^`ba^]\b``^^^`\[^[]\^Y[\XVX[ZXWVUXYWUXWVXYZVVWVUXXUVYTSVWTTUSPRTQRVYTTWWUVVTUUSSTWSSQSOQQOOQPL������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������°�����������������������p[hq\}��������t[���˹�������Ż���í�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�������������������������������������{������������~������{���|���~��~~������|���y��y��x~��~}~�{|{�~}|~~{~�|w|{}s{~}xxy|vyxwyw}xvw{yvwyxvv|xtw|vpvwvssxyoswvrutrqvuuqsuxnquurrqsmqssppptqoqronosqnlrqolqnqmnnokmnomkgmnllnkkoliklkfihnfnomldkphhjmbnojgfgkgihhbegggihhahjggghhfddjysXWTKNJ<" -ENLQRNKEITWVVVTOPOMILJKJMLJHMMIHILIHKKIJMMKLNOPRUXUWXWVXixyyxsxywuvw�����|����~}}���}sgeb`fdb_db_`bc^bdc]ae\`ad`abb]bbca_`^^ba^__^\^a`a`_]b^__`[a^[\]`\^[[[XWWWXWWWWXXXWVWUVXVUSTWWXXVWXUVWVTUSSUTRTVTUTWWTUTUVWTSUVSSTRROQQPQNNL������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ı�����������������������rbgn^��������tZ��ļö�������ù��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z���}���}���~���{���y��|���x��{|��x{�|v{�}y}�~wz}~wz�sx{}vwx~tww}usx}xtuywsv{toy|uquvvtrvxltvvrusuqxvvnrutnqtuoqsupqrsnoqwmmprlmornjosminrlnpqlllojjnohhnoljnnkhnnkjilgimnkgikkkjoiihpligigghighhfcfjiefgjjffhgfddeg{pZUQLPL=$!0GMLQQOKECOXWWTSRTNNLMIMMKKMKLLKKKKJIKJILKKLMMPQUUUUUVUU[fwxzxotwtvvw�����������~�����zqgebadc`_dc\_cc]ba__bd```c^acbYaca^``]`eb[_b_]]ba`___a^^`]\^^[Y[^YZYYXZZWVXWTVYXWXWVVXXWVYUTYZVWWVUUVTTTSRTTRQVXURVWSVYVUXWSSWSPSUQRTQNRROOO���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ѹ�������������������«�����������������������q[gma��������v\��ɻ�����Ŀ��»��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}����������������������{�|������~�~��������}�}���}�}}�q��~z}~�z}|}{xz~}x{|~wzwzw{}}tywwtxw}tvs}xuvzvttwwvwyys{wuuuswtsrtrwtwrulvrxusizrtnstqmqqrppqqnrpumqqnknnpmjlqljmqlmpokkoqhjmoggknggnmhgolhhkngeklifjmidlmeekmfefi^cklaehfceihdeiihhfffddddgwkVSTLNN=&!1GMORQMI@16MTWQPSSLLNNJLLLILMMLLNKHILJHKJKJLLMPRTSUVVSWX\jyxytnwytuuu�����~~�����~�{rfdbddbba`_]badbbaab`a`__a_aab\_``^___`^]\``]^]_\^^^^b\__]Y]\][[]\[WXWYXVWYWSRYWTUVVVTUUUVUWXVUTXWTTTQRSRQSSRQUVRPTUVWXVSUSQTUQQQSPQSQOOONPO���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������п�������������������¯�����������������������p[gn\y��������xZ��˹���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~������������~�������~���~|�������~���z���z���|���}|���~���~������y���}�}{�{{|{y}{z~~|v~�~py|}x{z}tww|xxyyuwz|wvxztty{xuywuryxulxwumvvunvuvltsuqsrumqqqopsunpquopqtqrqrnkmrnmlonpoqlnmpmmmpnjmmllgjllhlmliilihjkkhhkmjijkegmkdgiicefhafgidfedehgjfffggfhffffdchzkYVVONO>$ /IOLSQLI>.)7LWPRRQFNMKIJJLHKMLLJKIIKKGHLKHLLLLOSUPUWVTXY\jxxvlnyxsuuw�����~~��~����|pfbcfaaec`_ac_acba_db`aa``bd_^_`_`a`^`a^`aa^`a]_aa][^a_`]^\_Z_^[^_^XX[XVXYWWVUVUWTTWYTVWWSUYWUVUWTRUSTVSQSURRTUVTSRUXWRSUTSVUTRTRSRQQQNNOOPM�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ŀ������������������������q_jna{��������y^��ʸ��Ⱦ�����½�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z���~���z����}��~~���{}��|��~q�{���xx�{z{�|y{{y|~{w{}hzzry|ynxx~tsx~ptyxoru{ssyytpxxrpzypjzsuluurltvsissvnoswkpqpiourhpstnmstmoqphknqllmokloqjknqifmpjhkmkkimifiqkeinigjkigjjhjlmhhhkjhfjefghdedfeghhddghfiffddegffddffah{iYUTPNL>&#0GOMRQNI?,&)3FPUOPMQMLMMKKKKKKMHHIJKJHJJJIKNNLOSVTUVUUWX]jwxxgfsvtuut�����}~~��~}~~}~�����|od`ccbced^`adcab_\ac]``a[_ab`_`^\_a^[^_]`c_]_aZ^a`[\`a[\[_^^[^]Y]__WX[VTWWTTWUTUWTTVXSWYWRTWTRVWTSWTRRURRTSRQTPQRSRSWVSTVQPTTOQRQOSRLMRNNNPL�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʿ������������������������s_knb|��������x_��ʼ���þ����Ƶ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~������~��������������}�����������~~����}|��|���|{}~~|||y}y{}~}{zyzny�v{xxqyw~uvv|qzwxpuw~svxvuuvwrxzrpr{pysvutrvtrktqwppsupusolqssjsrtoppqlqqnemmpjnmqlomqjlltkkmmjiimhkijgejnidjliejleejkggimggnmgchjcfhi`befcdffa`efadfeadghgfddccck|cVTSQML?%"4HNLPNLH>-&'(1@SMNNNJKLLJKLIJLMHGIIIHHJIKKMNMNOPSTTSVVVU^m{yu^[ptuuvx�����|�}}�~}}��}���|ogecadcab^c``cb`b```^d`]`ca`aa`______^]^`_^^`a]_`_[\_`^\\__[[\^Z[\YVYZVVWWUTVTTVWTTXXUXVVQTXUTXXUUWTSRTTSSQRSTNRRRSUXTRTSRSTSMSTPOOOMPSLNQPL��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ο���������������������������������������������������t]jo`{��������w\}���˼���Ƭ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������x���~�������������������}�������}����}���y���}~��|~��||���}��||���z~��|~��}}|�{|~�yz~wxy~xy{}yvxw}u{{}xyzxoyxyqtt|utqxvxrywsswvrrxumrxsivvsoqutorsvostsntounkoqorrqknqrloqslkmolrrrkonokomkjphphjkjjmfmjkihfjiiikjjgiffhddfglhgljgeeg^mifabcdbdef_abebegdbehhffdfgbbieRUSTMM<! 3JNMOMKJ?,%$').=IMMIKKIJFJIIHIIHHHIJGJLIJILKOQNORUTTWXVVZm{ywicrututw�����|~�~}~}~}������zogd`]ac__`b`bd]\`b__bfY[b`\_da_`ba^a_]]`a]_`]^]`_^^]\_^\`^\Z\\[\\YZ[YWVWWWUUTRTWVUVTVVXVUUUVSUVUUUUTTSTURNSTSQPQQRTVUUURRSRPPQRRSSOOOPONQOOL�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������t`mrb~��������~^}���˻���ı��ö������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}������������������~��������������������~������������|~��z���|��{���~}|�|{�|x~��u��~z��y||�}y}�yx}�wrx|yuz~xv}}tx}ztwzxquuyqtx{nmsyusszportqoqvsptwnfyvmhtwxonssjqsqmposknspgqtpjnqrmmpsgkooloqsiipqjknngkorijnlehiphhkmhhjoiijmgfikgbfkfhimlghibb_lefcc`cdebbac^bcfeddeeefddddcck}aSURPNL="!3GMNQOLI?+%$&&)/>LKJKKHIIMJKKLJKKIKIGHIIJIKJMNNOVSQUWWVWXkxyxutuvutsu�����|~���}�����|pfcb`bbbabbbba``aa`_db[_a_]^d`^^```a`][^`]^^[X]c\]`][]X^^ZZ^`ZY]\[Z[YYXXUTUVRUUWQVWSUWUSUWSRSUTUXTRVVRQTRNPQPOQTQRUTRUUOPSPOQRNNSSMMQPPQPLNN�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������t_lu]z��������z\��ǳýǿ���ó���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���~�����������������������������}����������������������������������������~���{�������������~���}��~t�~�~�~~~��|�~}z�x�|}v�||{zywzxw|zvx{vy{zrx|zsxxxvutuqwtxoqu{swqyquruyutwtrtromvsmmtqwmmpolxqmmpppmprojtrmipppkmlndpooikloiglqhkmoklmngillchgoidjqgfilfafmgdfiecgkchlofggmecej_`eeZbfg_bdd^^bd`_dfeddcdcbdclzeRUUQMM@#"5IOOROJI<,$$%&').8FJLJJKIJILKIIIJHHHGGHJIJJKMKMQRRQUUUU[Znxywvuvvwutv�����|~~~~�~~~|��zle_ae_`db`addabcb__bab_c`^^_ca\^_a`b`]]\_ZZ]\[\a[^_]Z`\`\\]_[[Y]][XYYVUVUUUSRTUTRSTSVVUVUUSSUVOSVQPSTOQSOMPSRONOKPUUSQOPTSQRPSOPTQLNRPOOMKNN�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʻ���������������������������������������ѿ�����������������������������������r]kwby��������{Z~�ϸ��Ͼ���ɭ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�������������������������������������~��������~�������}��������{���~����������|���~���~���|���|���}}��z~��|{~��}��{}�{~~~u�xz}|xy{wvxzz|{||uxvzvyzvvwxurvxvzuvvxuytvtuqrxvqtqxsousumquuoutrkmqzmmprlnqrlonuoljvmnnpnmjnlnmmmmkkipjnimilljhlnkbkjjjfgoidiogfhmihhiecgidhjleghg_fggcbce[afe__de__bebbcdeedcdecablxeTSUPMO?#"5IMNSOLH<+%%%&&(+.7BKIHHIJIIHHIIJIGGGIIIIGHJKKLRRQTVSRVWWpxzyxwwxvsqv�����{~�~~{~~~}����}lb[bc`acb``ba^`a`_`b_`aa]`bba``a]ac\]^_^_a_]____`^\[[b`_[[_]Z\\X[]ZVXZVUTVVTSUTUXTSTVVVUXUTUTRUUSUSSTRQPPQQSSRPRRTTUURORUSQTQPOQQROPRQNMLLOM������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������³�����������������������p]jv[x��������{[~�̻��ý���ï�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����������������}���}����}������|���|���z���|~���~��~z~��{~�}{{�w}�uv��|~��v~�~w|�vz�xw{xv|�yuw{wtw�|su{{uvzwpuvuputzluuusuuzortsosuxqrtwpnrsonsrqotrmkoqqloqpjpppgmrvgkprjnolgmnqgknpkjkmiimnejkofjjkhifjcfikgijhghghghgheeeeehelhiiiffeledbfaaaebaee_a_a^`cc_edddedcbaaj{]RRRNLL<!#5GOOSKKH:'###$%')*,4CGHKJLJJIGGIIHFEGIIHIHIIGJMOOQORSTUTYryywwwyxwspv�����{����~|}}}~���~ykc^`cc`bda`baabbaaa^]bab[`a_]``^]^a\^^]Z\^^]_^\\]\[[[`_][\bYV]\WY^ZPYZSTUVSTSTSUXTSTVVTUSRUSRSXWSVUQSRPLRTSQSRORSSRTWONPRPPOMLPRNOOONRPLLLMH��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ӿ��������������������������Ŵ�����������������������raou^x��������\�ɿ�����²ŷ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���~��������~�z�����}����������������}|��|{|}z�v�~�z|�{�}}z|~x|~zyx�ww{yrvyyww||svw{urwuoxuuo{rymyvtrxp{lsosrsutosqtmrtrrrrmttsqprqoqmopqlrkoiqrthpmqinmninkogilniiiljkkocjhjblklbfiibdglceghcfhidfgibdggbfbjffhj`aggaddf_add\_bbZ``_\^ba[bddeeccda^h{\RRROJL9#8JNOQMKF8(#!!"$%'(*-4@GHHJKIJJHFHGHEFIJHHGHJJMMMORQRRTUV[pyzwvwxxvsqx�����}~���~~~~����}vkbaabcd`bcca``a`bb_`_a_a`baaaa_a___]b^^_a^__^`^^\[^\\_^\]^\XY]ZYX[WSZWQTUTSRSRSTXOTNUWVUSSTUUTVRSTUPRPNLSSOOQSPRRSRRWQPPPPPNOMPROORNLLNMLMMH������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ÿ�����������������������r\juay��������\}���ļ���ů�����±Ŵ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���|���|������������~�����~���|�����~~��}���z���|���x���|~yy�|~~�}{~zx|}z{z~}yy{uyx|yywwuxyywxxuvsyxutzx{uvu|sxszsst{i|mupruuqqrwpqqtqorspnstqlqsolosqnpnpkpnqnpqnhqlpjkjmimklknjiklkjhnjpijinkhgighfegjcgghchiiafdgcfehchajfhggadfe_ecd[aeb[a]a\_^]\[^a\bddcddcba_iyXQQRPKN8 "8IMNQKHD7'"!"""$&(*+,/;DIIHGIKIEEGGBEHHEFFHGJKLJLMPQQSSU\py{zxwxwurpw�����{�}~~~�����}vjcd^]cd^ac`\\`_^bb[^ab^aba_`a`]`a_^a`^]a^\_`]]]`[Z]^\ZZ]^^[[][[\YY[ZXVTWUUWTRUUTUTUQTWUSTVUTTRQSWSSRURSSSRPPORQQSSSQSPRPNQQOPONPQPNONLNLKLMI������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ź�����������������������t`juew���������\|���ž���Ŵ��ż���ư������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|��������������������������������������������������������������~���~����������}���������~�����������{���{����}���~���{�~�|��}��}���{|��x~��t}��r~�t||�}x}�zw~v}�~uzxzty~uxz}ttxzttw{trtyuq{yupxzwnvzxmuvzoruxlsuvkqvwlqqukmqskhuvqnprphnpogqsokmppinolfoqndlnmginngklmgjjkillkekkncjknefhidehjggfhefgiffehceefcgfj`hhjfefh`bcha``baaab\_`][\^_^d]bcdcedcebakyYQQROLK7##8HKMNKGE6&!!#"!$'(*),/1<FGGJMIHFHGFFGHGHIHIFJKIIMNONOSSW]syyyxvxyutsz�����|~}~�~�~~~�����~xjdc_`bbaba`^]__`a_Zbb_Zaaa\a`][`_^``_Z^`^]_^Y\]^\Z]]]YY[\ZY^^XYZVVYVSUVVUUVRQTURTTUPTWTQSVTSTSOQWSSTSRRTOOQRLNQPNPRRRTRPPRNMORPQQOMONMLLLKJI������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ù�����������������������v`ktZv��������~_}�˻�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|��������������������������������|��������������������}�}�����������������������~~��z}���|{��y�y�~�s��}v}{zz|�zu�~t}{|t�x}xuwyr|w}suwzusvzxwtxso|xuqvvxnuvtnsszmrtuivrrmrutirqsnqpqlnrtqpnornonkgorpmkloklmkinllekllhhlmellhcikkbjjmekijahkl]fgh`ffhbaeg^cgg^efd`cdfbbdg[dfldbgh\]ce\]`a\^a^W^_[Y\a[Y`adbbdcbdea`kzTNPSPKH4#%8IMNMIGC7'"#$$##&(*+,..3<CCIJGEHGDFJGDDFIGIIHHJKLKMOOOQVZryyxwvxywrrz�����|}~~~���}vgbaada`ab_aa__`a___d`^^`_b]_]^_b___]`\^\_`_^^^[\[Y[\]\[]\Z[ZZWYYVVWTSTSSPUTSSSSPSTRQUVTSRTSRRURTSRSSPLPQMNSQLLMOOOQPRSQOPQNOPPOQQNLLLKLLLKKH������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ú�����������������������v\itdz���������\}�ҵ��ͽ���̮�������°��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������������������������������������������������������������������������}���~��������������v���~������}����|���|���|���w��|�~�}{������|���}�~�}z|�w|{�|~}}x�y}x|z�s}yywyv|vyx{vutwvwz{uvtxxtuwwtyvtstuwpsruostuopqtrqqupoqvpnorpkpsomrqnlknnlpqnjmnmkjmphllohkmmjkiolhikjngkhigihhgihifgdgagegddehbefe_eea`cacbcdf`dcm\dfe^_aa[]]`\\_]W]]\ZZ_ZV]_a^^ccabcabmzVPPPLKI5!$:IMNMJIC6&###$#$%'(+,,.148@HGDEGEDFFDBCEEDFFGHJJGILKLLPUXrzzywwxxurs{�����z}~~}~~�����|vc`_`b^_bb[aa]^`a]^ba^`b`_^`^[^]\\a`Y]_]\__\\`_\]][Y\]^^[Y[]YXXYXYXWTVVTVTURTSRSUTRQSSVSSSSRRQUTTORUQQQROPONOQNNQRQPRSOPPQPMONPPOLMMMLLNLLMLJ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ĺ�����������������������wZis`o���������]{�ʹ��˿�þų��Ŀ���Į��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����z�����������������������������������������������������������������������}���~����~�������������}���}�������|~|��x���|��y���y�{�}~��z{���y��x��yy~wy|�tz|{y~|v{~~vy|�ryz{tx{~tvx~vquysvy~uqt{tor{unwwrouvwkpuuipsrmoqwlpotkmrsmlnqnhmrljnpnjjnljnpmeinljjnmjjmmeimkegkkhgkkekgiffjhegijdfhg`effbefebihfcdcf`c`fbcae`daebeaj`a]`^d^_^c\\\\Y]Z\ZXXZ[]\``aad_`ba_l{UNNOMKK4$;IKKMKHD7&"""#$&%%'*++,0126BDEFDDEFFEDCEDEEFHJJJGKMLKQQT]qwxuutwxuts|�����{~�~~�����~}t\\\a`^_a`^a`\_``^_ba\__^]\^][_^\\_\Z]_]]_`Z[[^[\[YYZ[\]ZVZ[ZWYXVYZVTXWTSRVORTQQRRNRTQPSROQSPORUQNUSOORQKMNMMPMMOSOOQRJOQNMNOMNQNHLLJJKNIKNLK������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ź�����������������������w[etcr���������`z�ŷ¾�¸����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������������������������������������������������������������}��~������}��|�x~{�}�yz�~}|�x�}�}{|~w}~~uy{}xy}}{x||}wxz~txxxsvt}svt|ttvwsst{rrwuooqxtrzspmtuvprstprrqkopqksmqnqpqknoqnlnpnllnlhjlnjnqmekllljijgikjgijlggjjgfgaajbgcefjehghaeeeYcge[bbc^dcd_bbc\ccc_abd_bcd^a`g\_ad\_]^Z_]\Z[]\V\\XWZ[XXabaacc_```n{QNNPMKL0 $=IIKNLHD7(%$#$%&%%')++-/1137?DEABDDBBDDDDHHFFHJHIMLKKPPO]rwyvvsvxtuvz����|{~~�~���~~~}u``aa`bb^^_`^]a`_a`_]^^]`]]_^]]^\_\[[]]_]__]^[][ZWZ][[][ZXYZYYXWVXVUUWVRSSWTTSQQRSPRPOPTRMOQOMSTQPUPMMSPLNOLKNNNNQRPPPNNOMMNNMNMLKMJILMLHHKJJ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ź�����������������������rafqco���������`y�ͽɼ���ű��ļ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���}����~���~������z���~���~��~}���w������~|������{���|���{~�z}�{}{��|~��~|�x}w{{}{wzxz{~|{{y}zzx{x}uzww{{t{u|szy{otvwtwtuvrsrvuvyxopqxnuusnpqroqpvoooulkqtmnnrlinunknpljllillllllljklihilkkhghgikjhfgfgdcglfleiehefcebedcaebb`fcebgcbbd``^ca`\abb^__`]b`cX_`bY\Z\X`^[ZZ[[VXZSXYXUW^``_aa__]`luSLLNMIF1&=HIMLIHC4%#$$$%&%%()+,-./1339?BBCBAAAABBBDEEFFGFGJGIKJKNZqwxvutvvurtz����||~~����~~|u^`_\^ba\]^[]_`^^^_Z[`]Y_`ZY^_^]\^X[][V]^][^_[[]ZW[_\[ZXZ]WYYYWWVUTUWVURTSUVSQRRPPPQOPRTQQSPPSRPTRRPOMPOMMOPRONNNOOPMNQNMMMKNPOKMMLKKKJJHIIII������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƿ�����������������������s]drar���������cw���˹���ȱ���®��²���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���������|���}���y{���~���y{��y��y~}�yz~�w{z�z��y}�r~�ty�yxy~twz�zxz~{tz}{szzytuyztzx{rtwypqwyptxxtpvtumtzulrtujrsrknqskprtkmorgknpfknqjdnqgjnmifjmggkmjfllhfijfdkjhdfjifhhgbgkiddiheghiceihbdeg`aeg^begaabd`ccd^abc_acd`bcc]`^c^b_d^`\^X]^a\[[ZY[XYYX]VXYY`a_\^^_a_^irPKLNNKF3&;HIKJJID3%!#$%&%%'++,,-..01138<BDBABDBBCCCEFEEEHHIGIJIJMXrwxttstttst}����|z~������������~u_^_^_`a_]]]^``^a]`\_a\Z_`XZ^_[[\][]\YX^\Y\__ZY\YVZ^XWVYZZPZZWUVVTUUWUTRROSTOPPPMNPPPPQQPPONPRPOSQMOQOMMMMMOQJKNLMNLJLOMJLNLKNOILMMKKLJIJHGJI������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ž�����������������������uXcqdq���������_w�ǽż���į��Ⱦ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���~���|����~���}�������~�|�~���}��zz��}�x�z��{}{�z|z�|w�zw{|}z~}{v{z~swy{txv~yww{qy|ztyyxruyzrwuzstpxrstwosuvnmtrnntuolqrrjsqqgmopltqqhmmqfmmlepkkilnlgommkhihhikmiikjfcgjddihhaehidggc^hjebchgcbgf`bde`bdf]`cd]abc]]`c]b_`Y`b_\^abZ``]Ycad[`_^Z_^\V^^]Y][XVZYWSY\WTX\___^`_^]]YhnNHLLMIG0!'=HHJJHGB8.$!"&'&'(***----/12358?BAACA@ADBBCEEFGJGIIJIJMQXuwvrutttutu}����|{�������������~|rd\_a`\aa^]_`___a````\^_][\^]\ZZY\]\ZZ[\[[\][YYYXZ[YVYY[XXV\YVVVVUUVVVSQOORPQROQNNOQRPQPPPRQSQMQRPLNOMJLMNOMMKMONNMNLLNNLLMNLLMKMMLKJKIIJGGGH�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������v^dqdn���������[u�Ƿ��ƿ���¸��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����������������|�������z�������}��������z���~���{�����}���t���z���{~��}{���|���|���z{��}}���{�~{{���v~|�}~�z�}�x~~}zz�}x|}}y{zzx}xwzy}xzzz|}x{uywzvwtwvyvvrzwxostwuunvusnstontutppsmrltnrnpnoqxnnnslopskmmtihmslllqljnkjilmjgjmkghhihghhhjegghiiedfif`cdegbdedcdbecaac^aad^``abaZe^`ZbZ_]]]__`\_`_\d]_Yc\[Ya\XUV[XY^\VTZYWQTVSRTZ]_^\^_^\ZWdpNILMIGF0$:GGKIGD@??0%##%'()))),-///012458;=@A??@ACBCCCEEGFFGIGJMNVsxvtttuuttt~����z{~���������~{te^]`a[cb\[^_]_`^\^a`Z]^]Y\_\\\ZWZ]ZYZ[[[[]ZZZXSX[YXX\\\XWXZUUWWSTWTSVTPPSQPRRNMPSOQUQRQRQSTSOOSQONPNNOLJNOMONLMNRMMNQNOOMMNNJIIKKKJHJLIIHLII������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������½�����������������������tXftej���������`t�̻���ŷ��ĸ��¿���°�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���}����������������|����������������������������~���}�������{���x���}���z|���z���x���w|��u|��y��xv���r�w|�x|��z{�{xz�|t|~|tz{yrzxrwz{xyz�|w|zzrvzxtwxzqvz{mwzznowwooryqoqwpmrwqlptokqsrlpqqhlrrgopsemoqfimpignqhigoihnmgemojclqgdhjgefjgegkdbfkg`bhe_ahc`cdfbcccadbc]`ba\b^`_`__Yc`aYa`_\```\a`a_\[c]a_f\`\_\Z[WXZ\ZXVUXXWUUTSSTX\]^\\\][YYamIHJKGFC-#:FHHHEDB@EB5%"$%()*)*-01./1024457:?@@AA@AAACBEGEEEFHFIJNXqwvtsstuttr}����z{~~��~������}ve`^__^b_X^^_Z_`]]_^\Z\\ZZ]^YY\[ZZ\YXXYWYZZXZYWTZ[YVV[ZYUWYXTWVUQRUPTTTQQTPNNPONPQMRROPQROQPMLMPNMOOLNOLMNKKLMJLORLJLMKOOJKNKJKKLIFIGKKGGJLHH������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������û�����������������������u^drgm���������\r�н���ó��ù��¾���ǯ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����������������������~�����|����}���x�����x�~�~z�|}��y~|�w|}y~~{}{�wyy}{u~{zu}{xs}w~qwxyuvx�xrx|vpvxzsxyworzyixxwlpsxnoowpqmwpkovlkrsniqnpmqoifmonitmohnligkjmhjkljkimghllhgikgelqcafhgfffeddfbaegd^bea^cec`bfc^be`^dcd^_a^Y_baX```[cb_Y^^\U```X_\[[^^^V_`_V]\ZW[ZTTX[VRVYWTUVSQSRQU\]^^\ZYYYZdkJHIKFDF*%;EHHGGDB?FFC4'!"&()),.0.-/12454467;?CBABCECDDEECFGEGHHHO_vzxtsssttsq|����xz�����~�����zpd__]_aa_\_\^^`^___]]\[[Z]\\ZZZZ]]YXYYZZZZYYYXWXYXWWXYYWWWUWVXVTTTTSSRQPRSPONPOOOOOROPPOONOOPONONOQNOPNNOLMKKJHLNMLMLMOOMJMOKJJKKJIHHKIHIKLIH�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܽ���û�����������������������ubcshg���������]o������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���{���|�����}��v���}���|���~������{���v����}�}|���z���w���z{}�{��{z|�{}}~y|~~y||�{~{zy{}{{{y~xzz~ywwvxx{vyw}u|wwwvsw{yzuxutrvwwqyvroxqocnksqoMOW]dhjurnnpqlplpljmnlmpviknqjiloikjnfgkljjgjiijlhhfmkjefffghedegfccfgebccbaeebbbcdbabc^_b^a`b_^\`_\W___Zb][Y_ZZZbZ_X\[[W]\]Wa\_S[[WUXUTWYZSQUYVSSTRQPLDP\\[Z[YY[Z[ddGEHKGDF*&;DFFFCB@=EFD@6(##%))+././1233445466:>@?BBAABBDDCDDBEFGGO`vywuusststs|����zz���������~~yoc^]\_a^\\^X\b_]]_^[[[YYZ]YY[YWW\ZVX[YX[[XXZYUVYWVXYWXTTWWQSUVTUVSRTSPQRPOPQPPMNOORRNPRMORPOOOMNOQPORNKNOMNMKKKNMOMOJMPMKJLLLKKIIIIHHJFHHHHJH�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wbeseb���������]jo~��ƻ��Ʒ��ƽ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���|���}���y���{��������z���������|�����~������������|���|���~�����~��y���{���~�����������y���w���}}~�z|���y���y���v}}�x~}�{w��{y~�t{~v||v{��wn~�zt{|zrz|}tu{{qwz{svxvuz~vpw|wp{yyosyyruvvnprxlQ>?@NXW.),---3CNU^rpinpqjkmpgmonfkpqdjmogjlmcfjmhfgjgfijdbhmidcgb`hjeaakeacfb`be`^dgd^adb]acc\\abY___]^`_Z[\_[]]_ZYZaZ^]bW`^][_Y\[b[`Y^[\WWXXVXYYWSUVXXWTSSQI?:KXYXY[[\[YWdcGEFIFBB)';BCGFDB@>GGFE?7*##%)+,./1001354654588>?BA?AABCDDCDCCEFHL_wxvutqstpot~����yz~��~����������zob_]\__\^^]Y^^^[]]]ZZYYYYZYYZZW\]YVW\YVZYX[ZXVXYVTUWUVSTUVPRUSSUUPQRRRQPNOQPNNMNMMPNLMQNNONNOOMMPOLLPJIMMLJMJILMHJLLJMOLIIJKJIJKHDEHHHEGHFGFF�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������w_crfg���������aeW@r�����Ʒ��Ƿ���õ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���}����x��������������������������������������������������{����������~������������������~t���~�{}��}�~���~�{�w}y�y�}~�w}�{}q~}}y~{�o~��wv�~su{|wr}x{txz}uzwwpst|tuvzqnzxsqzwxmtxuoutsppmvlK962042+)('%%+>Naruriljmfmnlemomcllmiikhfhhkffekehhicejjedbebbff`^alaaaaa`bc]\cc_^aa_[_b^Z\__X^__ZZb^W^^]X\_]WX[^W`^^U]]ZUZ]XV]\ZWZ^ZPWZVOTYRSUUSRUWSRNG>87JYYWVWZZWVXbaCBEGEBB((:BDHGDAA>DDEEB?<,%#'*,//0./12435454669=A@ACCCDEDCCEDDFHJ_vwvtsprsrqu�����qx}~~����������ync]\]`^^_\\^_]]^`[[]\ZX]VYZ[Z[\]\YXY[XZXXY\VWWWVVVUWUWVVVWUUSRVTUQTQRQQOQUQNNOOOONNMNMOONNMNOONNOPKLMKLMLLKMKJLMMOMKLMMKKJKLKGINJGIJIKHIIHJFG�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ҿ���������������������������������������������������þ�����������������������vacufh���������b^K3?]��ƽ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z���~~��������������������������|����}������v~���x���|��z��}~������|���z}}�}�{�yy�|y~��x���z}�u{�w{��}zw��u{|z{~~v}x�}}u|zxv{{ywx|vzyzu{{|ruswutwyvwqvxqsvzuxrwvtrvrrptntnH631//.,*)*)('$! (7IXfpnvnqlkjnkifngiimgfhiehihejhmeffjfdehcdddbc]`dg`fb_bdcb^ab_`b`^`b`^_][[[\]_\^\][[\ZYWY\]XY[]X]\YW[]VSXYYV[YXUXZXQVWVPPVPPRUSQSSOLE=:89KWWTRUVVWVZa^DBEDB@A();CBGFDA?=CCDB?@A:-%$(+/11.12233555344589<=BB@@CDAACDDGGH`xxvsrptutrt�����tv~~~~~���������ym\X\^_[]^Z[]\\[[_Z[`ZSZZVX[\XY[ZVVXXVWZWWZZRTWVSSTTTSVWYUTVUSSUQQQSPRRQPQSPLNONNPNLMPMLOOLNQNMOPLNNOOLPLKLMLJJKKMOKKMLKIJIIIGFGJGHKKGGGGFHHGH��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿ��������������������������������������������������������������������������������������������ÿ�����������������������xbesbh���������_[B498R�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���~��������������������}���������������}���{}���}���{�������}�������|���}}��}z���~���{���{���}~��r���x���|���x���u~��{~{�v|��xz~��w���x{}�uz�xv��|vy�}qy}|uv}~r}{~wyw|vry|wtx{ztwxzty}{pstyqrv{usszunsxxqvxyprtvnpsujmkF5200/-)**,,.0/,($#" ,;MYompwpehkk]chjcehicfiicegjabfk]_dlbadh`_aecbbhc]ad`^ad_^`f^\^d_[``^]^a]]^`^Y[]ZX[ZZZ^\ZZ^\\YYYWYWXZXXZYWYZ[S[UWVWURRUSSTRPRQLC>;:78IVWQKPVVVWYf]CACCA@@&);ABEBBA=;BDC@>AA?:.$#(,1111/1203334423446=BC@DCB@CEDDGGF\wwwsrpstttt�����vu}~~�~������~~|l^[^`]\]^\[[_][Z\]\\YXZXUWYZWVXVVYZWVVYTVZXTUUTSVVTRRUSUORTTSRRQRRRPPONNNNNNNMMNOMJLOLJKLJNOLLMNJLNMLKLJIKJHHJJIKJHKLJJIGHKJHHIKEGHIFFGFEGGDG��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǿ��������������������������������������������������������������������������������������������¿�����������������������z`cteg���������[T?158;Kq�ǿ¶�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���~����������}���������������������{���~������������}�������~���}������������������������|�������������~����}�}�~��}�}���|�|�y��{�{�{~~�}�|�~}y}x}|�{w|�|x{|vz{}wxy}r�}~u|||vu}|urwzyouwyszz{nssvrrs{trqwspu{wosuulpsrgnrsemeE62000-++++,./0,*))'(('%&$ &(4GQ_lkgekkhaefhafffbegg_dbfY`dda`df`\ac`^be][aa\\`cZ]__\\_]Z[__YX__\Y\`[WX[YW^]\Z\[VS\^YTYZXWWYYUTYVQY[ZTWYVSUURQUTQQPONKA;<;:67JTUNEMSVUTXd^?@DEA><&*<AACBAA=8BDBAAA?=?=0'%'+.10.21234235432337>ABA@CDEEEDFEMcwxxsslrsrtw�����vs~~����������{i`]]\]_]\]^\]^^[^^]WZ\[XX[Y[XXYYZ[XWWW[XXXWWXUUUVUUUSVSTTUUTSQSRPRTQPNNNNPPPMLMNNOLMMMMJJLNMLMMMJMNJJJJIHJJHIJKJJJJKIIJIHJKKJIJIFHIGHFFEFIGEG�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yb`ric���������]R?/047?Fd����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���������������������~����������|���~����{���|���~���||��~x���z���|{�~|�x��x���v��v~�{y}�~|}�x|�~xz|�v}�{~}}|yx�}vxz|w}w{{v�y{qogqsy{uyvuuyx{wxtwquruqwtuqttttztputsqqoqkpouhvc@3/.//.--,,-./0.,)))(+**)(&%&$"! #(9I]npnknffbiecbhdeejacdg`acddc`^_cbbac_^a`\ab_]b`\_`_^\``^[^^Y[Z[\ZZ[YYW\ZYW[YVS\\VSX[TUXVWTTYTMYWUTSTPRQOLLQRQPQQH?:::9868IQUPAISUTTVcY@ABC@?;&);??BCAA<9@BBBA?=>>?:4*&')-//102232332132126<@>@DCBBCEEEIfywvsspqsspv�����ss}~~�����������zi]^ZY^_\Z\^ZY\\V[^\VZXVTWZWWXWVYYXVXXUXXXUUXVSUVURUUSRQSUTRTSORQPQRSOLNPLMPMJLMLLPMJLLLJJLKJKMLMMMJJKKKJIHHHJKKKJJJIGIKHGIIFHJIGHKGGHDFGGFGHG�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xabpf`���������\N=.0247@A\��������ư���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����������������������������������������������|����������������~���������������������~�������}������������~���v���~����v���|}��|����}���y���}|~�}{��zz���{���w���}x���{���y�~�w~��{v��~{|�{|�|~�~z��w|��v~��s|�~uzz�{}}�y{{}uvz�uxz~xz}~xrf``_gwzqxw~wx{�srstWFJ[egrzsowy{ruwylot{qotunptusqrsvqqtpmosrjopta<2/--//.+,+++-/,+*)()*),)''&&&(''(%##"#&,9L\ejqsbdde`aeh^`df``ac_^`_^`bc^``a^\]b_]_b^\`b^Z\_]\[b\WX\\YY][YXYZVXXYWXZ\YXYWVTVWUXVVVTUWUUXVRRTSPMOQRROMG>:9:86646GQRK9FPSTTTfU>>?@<;8$):?>AB?A>=@AABA??@??=<6*&&*-.0221223323100225;?BDCCCCEEFMgwwtsroqrsrv�����qp~~}~�����������{k_\WZ^]\Z[]Z[\[W\[[YZZVSVWUUTVXYVVUWVRVWWTUXURTVSQUURROQTRQRRNPPOMQQOLKMMNMLJKNMJLLJLLKIJKHHMMKKMLKKMJIKJGGIKIIIGJIIGHIIIGHHIIGIIIFHHEFFFEGGF�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ø����������������������xbbld\���������YK;22113:?GR��ϻ���ɯ���������������������������������������������������������������������������������������������������������������������������������������������������������~����������������������������������������������������������������������������������������������������~����������������~���|������������������������z���|���}|���y�~�{�������}�~�}�|���������|�}���|�������}�z�}���}~�~}}�{~�|y�~}~~}~||z�{��u�|v}y}x~|{zzyxzvyzx{xz|{ypQ;@@@IOThmvtw|}prv���yl_NKV[dz|yrstthnnuooqskoqsroqsrlnqojlppkmqp\:1/-.-.-++,,+,-+*))*+)*+''&&%&'((('%%%$%&&%(..=MUfjjccedX_^a]`^_]^_`Z\a`Y]^a[W[^[]_`Y\``\UX\XY[\XXY[WWY]YUY\WSWYXUVZXVZ[WTX[WQZXRQUXVSUXSPRRPOPPOQSKC<998898405HRPF5HOPSPSdS=>@>9<8!(9>>@@>?<;??@@>>A@==>=;5,''+-/1111234455201233<CDBBCBBCFMfwursrjprrsy�����pm}}�����������zk`\\\][\]Z[]]Z[]]XXZYYZVVWWWTXWXXXXVVUWWVVVUTTSSSUUTTUVSTRROPQQPPOSQQNKLOOMMLNMKJKKLMMLIJKKMOMIKLKKLLLKIHGHKJGHIIKJJIHIJIGIJJGGIHIGFFFGEGHGED��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ͽ���������������������������������������ycbmbb���������XK>334236;ADLk����������������������������������������������������������������������������������������������������������������������������������������������������������������}������������}��������������������������������������������������������������������������������������~���������������������������������������������z���������}��{���|��������|���|���|}�����}���t��������|���|���u~�zx|��}���y}~�v|�zv~�yu~�}w���x{{�yzx~xxx{w{}}uyzqw||xyy~xwyvM@=:::==DEQU]juoqpvyx|��{uhWI>KZjmlspomrprpsospuqqmlqopoqllknknmmZ80-+-,,++,++-,,+,+))))**'(&'%&(*)(()(%%'**++*)++,76AJQTNGOU`_c``_b_b^`aa__^_]]\^]`]^^_]_Z[Z[Z[[YX[Z[XXXYWSYXTUXWSSUXTTUVUPWWSPVUROPSSPVUSQPQMLONNNIB;9787799407HONKCLNOQLQ_Q=<?=9;6 )8<<>?;<;:<>??<>@?<>?=;<5/))+-...0/123443210//39CBDBBBBEIctutsrnqspry�����ok~�~��}��������zfYX\]ZY\^XX[]XY[[WZ\WY[XSUXWTVVXWUVSUWURSTTRQTRPQTLRRTURPQQMMQPMPPPOSNKLNLKLMLKKKIHKLIIIJJLMKKJLHJMKJJMGIJGIKJHHKKIHHFGIGEGHFCDEEGGFDFFDFGECD�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ß����������������������{e`jcb���������^K=5544338<BGKd��ͩ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�������������������������������������}���}�������~�����������������������������������������}���w���z���~}��x���x~��|z��}y���t|��y|��}{���y���u����������}x��tz|�{yz��z���~�w|��zz{�yv��{t}~v{{�pww|vvxyr~~nty�suywuz�ruzyI=:8:99;>;:<=?FMRQY`fkmvx|��~qdOHDI[enlnqwooqypqqwoltrrjppmhnsqdmW60-++,+)(**+++,,***)(')*(&'''()+,+,+,+*+*(*++')+-_F.+  #%%8^ad_]`dY\_aZ]aa\]``[]_`ZY\a[WY^Z[]\[XZ]YXYYWVXZXTWXUTVUWXVXVSWXXRSUTQRRPSSVVTRPMRROPPKF>;97588775514INOONMNNPPS`P>=?=7;4 '8;<??<=::>A@==?>>>@@>>><:2+(*,,-.-/22/131/..-018@FEEDEGJeuvttsoqsrsy�����ll}����}~��|cWW\\YZZ]XY[\[]ZZZ[\YYWXUVVTRTVVUVSQUUSRUTTQPQOQRSNRPTSQORPMOQPKPPMMOLJLLJHLKKJJJHHKLJHJJJLMKLKJGKKHIKIFHHGJKIFGHHGGFEGJFDFGFDDFCDFDDFECEFBBC�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ѿ����������������������������������������������������������������������â����������������������ye`kc^���������YJ;34666469<BJHX�����®����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{������������~������������|���|���z���|���zy�������~����}���~��|���}~}��w���|���z���z}���y}��x}~�|��~}}�{����������������{~{�~~wywprw{}�������{~z}}�|{x�z|vzzn|w|xxy{{y{{{pwx�vvv|xux�nvvoC<:97798:776657:97;@GLOWckpuxuy~|qhJ/:CYenklqvmnrvleqsoempkhnongmS7-*+,++()*++**+-*))))()(''((()+,--.,--+,,-,.,)'('zd  4]`bZY``X]]bVZ^`XY^]UZ]\UXZ^[YXZWW\ZVVXVXUWZVTWZUQTYPPUUSRUVSOTYRPTTROPSQOTXSNQQMNPONLE<89755897454/2FNMNMMLMNOT\L=<?=8:2 )8;;><;<67>?;<==<==?><>?=?90))**--..0001210//...08ADEDDDIjwwtuunqstsy�����fg}��~~�����|gZ[[Y[ZVV[^YY]]WX[YZZZUVUWTSTUTVUVRSVTRVXSTTRPQTPPPQRURQPQPMNNPLPNMNMKKLJJIJJKJIHHKJJJJHIMLKKJIJILHIKKGJLGHIGEGGGHHFEGIHEEFDFGFDCCDFFEBCFEBCC�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ĥ����������������������xc]haZ���������\H<46768667:=DILQ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~������~���������������������������������������������������������������������������~�����������}�����������������������|~���z�}|�~�|�~������{~��{}}~|{~��}{~��v~��{����������������{}vmdcbcdilqwy������}|��~u{�xz}�{wt}www}wu�|yu{wxuzyyuyywtzr{yhC:8876578764453443114339IVZ^dgktxx�a"#"#)1<FUginqpljqnmfkkljlnokmO4-,-,****((**))*)((''&'('&(()*-/-+,--,---//0.)$&,�e"=g^`_^_\Z__^Z]^_[]]\X]][X]\[[_\\Z[XYVXYY[XUXUWYWRSVURTUUPRUTRNSURQRSQPQSOPRROMOQMMOLIB=976655876871/2EMJLMLLMONQ[L<:<<68/)79:==;:56==;<<=:9;><;<=;=<81,(&)+-.-.2220121/..--4=DDBBIiuwtuxmoqssw�����mi{~��~������zk^\WVXXQUWZUY[YWYZUUYZRUUVRSTSOTSSSUUQORURRRONRRPPPRQNMQPMMNOLLKLLLNMJKIFIIIGIIGGGJHIIHGIJGJJIGIIIFIKEIKGEFFEDEEEGGECEGFDEECEFCCECBEECADDBCCA�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǣ����������������������wb]cb_��d~�����_J;457798789=@GF]|w������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����������������������{������������|���|���������������~���{��������~������������~���{���������������������}�����������|�������z��tmx�y}�}s���x|~�qu|�ur~�~u}~~x||�z{��zu}��u���qw|����������������nbTQW\]aehjkrw|���������v}{yqx|�sxt|rrw{so|zwrxwxotvzqvt{qwlah[92556645422333100.-,+&&')2?CEO]cfkkkWP9)%&(,=AJZelqqomnmjelmlhiotL1-++****+++*+++))*('&'&('&')**-.,+--,,...101/' ",�d"@g\]`^ZY]`ZZ]eZY^_WWYaWV[^YX\cYXV]VUU[WVY\VTUYWTTWUTUWUUSYVQSTTPQRQQQPOQOQPPOPOPNMOIA;:756666667741..DLKLLKKMMLPWI:9;:57-)499:;;<67<=;=<<;;==;<<:9===;5)%&&),,-1001100....-04=EDELivvsssmpsssy�����kgz~~����yj\YWVYVTXWYW[ZXXXZYWTTTSUTTTVSPSQRTSRNRUUSSQOQQQNOLLNMNOMLLMNMMLMLLLJKKHGIIIGFGGGHIFGFGGHHFHIGGFGGHGGDHIEEGFFFFEFFEEEEEFGDBEGDEFGGEEDBCEB@@A<�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ä����������������������y`\c``��Tv�����`K<568688:;;:9BWpvs}���������������������������������������������������������������������������������������������������������������������������������������}������������~����}����������������������������������|�����������������~�����������������ux~�{��������|������������z���wq{������w�����������z���|~���{���{�������|y���gP\gjz|�v~�~w��{MKT^SHTfpw�zx|z�{||�wx~��v�w�SMVgk~����������������}id`Ycabhjhmsu}���������iYbmpoupsvwztv|xwr{qypwtxqws{qyX3:C834211122202121-*''&%%'&&&.00148<>LZ`ikc>+),28=DLMLVjkmmjghllhkmoI1+**(**))+,*+*,+,++*'(&')'*+*++,,+,,,-./1112.#*�n!@e\Z[\XY[ZTWZ^VZ\]VWY^UU\ZVW[^WWTXUUUXRTZ]OSXXSRVWSQVZTSTVPMRTPMNRNMOPJLOPNNPOMNOLG<787545565574120+/DKKJKMLKKMPWG779857-)478999956;;;=;;;<=<<><<;<<>><3,(%%)*+..14:9:9997557;ENPUluwtrnmnqrsy����lf}}������xj\\^]]^__Z]][[\^`__`YX[Z[ZYZ[ZZZVVX\WVX^\[ZZZ[VVRUOMMLLMLLLLMONMLNLIIMKJJJHHIHGGIHGHIEFHGFHKGGGGGFHFFFHHEFFGFHGGGFEFFEEHGDFHFGMRQOMKIEHDD9,(*������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ż�����������������������}_Xc_a�nPgw����V@:69:78:<;8GX{�zrwwQw���������������������������������������������������������������������������������������������������������������������������������������������������}����z���}��������������������������}���������������{�����������������������qDJ\l���������������������~����{gft����w��������|���}���{�������������|z��cLQPXblx{{�|~�yo:00IPNMGAXst{{x~{w{||�|�s�SEIJKXaiw����������������������������������������xXQadsw{xvt{{uyy|oxvxszvywyp~T6;:9;864//0../133/,(%%&)+13210-..,'CP+!$!$%%'''&%()*("+gjjnkfikihlkhB0)('(()')*))+)+,....+)((('*+*+,,*+,+,-./22/0-!*�l Ee\\[[Z]\ZY[Z\X][ZX[XYUYZZXZYYUWVYSVUXUYYXRWWURSTTQPTUTQORNPQOMNOQMNNPKMQNJNNOKMLB:655544553456321*(1CJIHKLJLKJNZE57:744.(4898898357:889;;::;:<9<<:9<<;;7/'%%'(+-3Jhd^`bdbe`eghfcgpsvulgkpqsuy����|jc��~}}}~��~zeRSVWTWTWTVXTMTVXZVVWVSRSQPTQTTRONPSSTRKMOLLHIFCDBB@@>>ACEEDGKIKKLMKJIHEFBBEFFEFGFGIHDEGEFHJDFGGFGFDFHGFFHEDCEDFFDCEECDFFEFDBH[cgihjhf_[TC0&#������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ļ�����������������������yaU\bw~YNbjpylXE@AD<@GOi|����qq{yQWm����������������������������������������������������������������������������������������������������������������������������������������������������������z����������������������������������z���}����}�����������|�������~�sFGCQ`lz���}���~���������~����xg^o~���������������{����������~���~~��{}jSRQRRS]fv|v���ppHD@GMONNLKFP_s�tv{{stxzxsyxzqqYJC@?DA>LYhu��������������������������������������a3?HNVfljszspw|wlsuvpvvyqqs~UGU;8886645420-,-.//.,/13478420),30/g�8!"""!$&&$#"#$$# &onkjkhjhiilgb=.)('(()())()+*+,.--//0127=;20-.,**,+,,/011/.+!,�X!AY[^YZ]aYY\`Z\^^ZY]^VVXZUVX\WUW\TXTXUVVYUUXYTSSUTPNTSRVTPPRUQNPSQPOQOPPOKLKONNMI?7664355554455524/)*5DGJKLIHKKIKWA649622*'367:98616898::;<::;;<<==;==;:=>8.(%%&(*1DRIIIILJHCE@;<DGittsgijpoqty����~hg���|}}}~���}}y]@;<?=;=?@><61+18989;<>>=<;;<;::<;98867789:98:87:9:::9799::88<<=?CDFEFA>:416;?@CDCDEEDDFCFGHDEGGGHECDFEEEHDCBCCFFDDEEEFEEDDCEL[cfhhnnqmnlmmkf�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������à����������������������ycOOOc]RWdlqtuyw�{y|~��������umtzLWXf�������������������������������������������������������������������������������������������������������������������������������������������|��������������������������������������������������������{{������������}���~���}pGGDAFTbt��|���x��}���{���}|�����e]h�|�������~{}zxwz��}{���{���y��~z{�~tjTRQQOOPTXaisuv_UOPPPQROLKNNLDEKQX`gfgkjlloqppnhfYM?<=98<CEQ`w���������������������ÿ������������g5:;<:>CP\fioyvrnpsvosrznqs}RcvXH?94756556761-,),+/478851,,%%54)o�/#"$"!#$$#$##%&" "mmgeiijbhgg_`:-*(((((')))**+)+,-.0/338@TWU`<6/+.,+,+.01/0.+ 2�F   >ZYZUU[\QZ[YSX[\XX[[WTUWOTYXRTXWPTSUSTVXOSUTOPTUPOPQMPSSNLPQNKNRKLNPKILLHJKNMMG>75564454334544431..6=EGJKJJHJIHLW?537411)(266988716999;:9:9::::;:;<>>==?@<91)&$%&-9CFFGFGHFEHHEJKQlrqkYbkoqqs|����|]g��~}|{{|~�~~xWB>>=;:;;:;;:5668978899987866444446778889:89:;<:;<99:::;;:;99::;::;<;=;98889:;:=??>@@ABC@ACDDEEGIHDFHEFECECFCCDEBDECCDEBDEBBCJZacefhjnmllijlj�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ť����������������������|bC8L`XW`gknmlqpuvx}{{�������tks|qIV[`f~����������������������������������������������������������������������������������������������������������������������������������������|���|�����������������������������������������������}�����}�������������{��������mDGEA?DOZix����}���~������~~�������vR`qu��~~��~topqrtx~{|~��~���|���}}}y}gSQPPPOMONPW]`[MKOOOQTUUSMJMKLKJC@DO]_fhhjllnpppnljgZI@:88990Bk����������������������������������e99:867779=EL[fozrnvsulstzrwSfroke\QD=:556799574/('*0595-"#/6&��(! $# """$%$$$&"#pjfgfjjhfdibc6+('()))()+**)*,----/.37;AUV\a>>;><6/*+-/1//.)1�J  J[\]VWYXQ\ZYWYVZXZZZWTUVUYXWUWWXRUSUSWRTQTSRPQRSNPQPQSQRMMOPLLNOKKLNJJKIHJNNLE<6443355323355232/.2<AAEFGIHEFIHHLT@444220'%135677724555885578:9999::<===>?>=92+'')-8@BGHHCFECEEFEKWksqfP[gprrt}����yZe�~}{|y~}|vV?><;;;;:9:9:9:799:8:::9:9:875321/156878:;::;:;;:=<:;;9<==?>>=<=><<?>>><9;=>>====>?>>A@>===?@ABDFDDFDCDEDBCD@BCB@BDAADA?BDAACGV]acegikmjkggih�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ť����������������������y`2*��rqonkgdab]^^\aa^^U^lhWE5S}xucBPW]aew�������������������������������������������������������������������������������������������������������������������������������������}����{�����������������������������~�����������~���x���~���{���z���x���|�������hEFCA<=AGL^pvv���~����~���|�}}��������JQX_nptpnnhfe^`dgmpsu{|����~���~��}{~�|bOPNMOMMONLNRRVXUQONPRSTWXVSNIIJKME;7AR\cgjjlossrrqpoogZH>:5((*0Mk�������������������������������Z9;97666456656;BS]_jqthkope_Ramihjklk_WC>:878677765./230)'}�& $$  !!"##$%#"$iggjhghihcije4(%((()&%(**))(++,,+..38<FUV_QB><?<92,+,+..,*(7�D "P_Z\XYVXX]XUXZUWY[XWXWTVW[VVUXTTX[UWR]STUUQSTROOMRRPRTMPNPNMMQOMLNJLMLMLKKMK?96543424310442331..3;?AABCDHGEEHGFKN=346332(%0035655085676676888899;<<=>@?=<?>=<6,(*.7ABBEGHGFFGEEFITmvsjCPhqrrs~����uYd~}}�}|{{|��~}tT;=<<<<::9:8:=;;<;<<==<=@?>==>?><::9;=<<;<=;=>>>=>?>@@@A@@@ABB?=?@@@@?@A@AAA@@@@@AA@>@@A@A?@BCABCCDFDDEFCCFCAEBDEHFDCCBABCABCHTZ^abceggggfffe�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƥ����������������������x`,-��~mhgfc[Z[XXXZXWWVSOQLJC2R�po]EKS[`_^n��������������������������������������������������������������������������������������������������������������������������������~��������~�����������������������������������������������{���{����|������|���z���}����hAEE@<<;>@CMZcpwumtvxqntyseffjnsv}����F?HOSTWWZ[]]ZY[]_cemqvvy���w���|{��wv��|`LOONPMMNMMPOMOSTXXYVRQRTVWWXVNKFDFFIB54<KY[roy{yojopprwvn[C-$%'(,5No����������������������������W<:986565532120+.2?LV`WUWUTORTX[]`dijhosgVG=531120027983*��1#"!""###"""$")feijdbhld_hh]0'&''((%%%%(())(),+,//49=GVW_FB?><893*++*.-+*'D�8 '-, 'O\VWWUPVYYRXYYTVYXSTWUQTWXNRVVRQTVMTSVMPPQORSRMOQSNLPTLLKOJILOGHKMGILLILMLH>5332133331135333.-.6@B??AABDEDEEFDEKJ=355331'&/124553.3476457779::8:;;;=@@><<>@=>=4-+/;CAAFEECDEFFFFJSmwslEGkqssu�����u``�|||~|z|~�~}�zW;>@@?A@@AA?ABBBCCBBEEDFEECBFEGHEGFGGHDADA@?><:=>>>@BCDFGEDEFFECBDDEFCEEDFFEDCBEDDBDCCEDEFEEGHGFFGGGHFGHFHHHDIEBHCEECAAB@?BB?BT\[]^`addedcded�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ǩ����������������������w^*2���mlie`ZWVUUXVTTSPNKKJKH<OwgaWEINU\`ede|��������������������������������~���������������������������������������������������������������������������������������������~��������|�������������������{����z���z���|���|�����|��{���~�~��~�������}��~~�����}���|kDFC@?=;;:=@CFTki`WPMGGD?C:=>@A?B@BCB=657640:KUV[\Z]^_``achqv{|�}t���~{~~xu}~yUINNMMLMMNNNPNMOPVY`]MHIKHIKG?CEB<:;@D8-'�����eOIA7./98/(#"&&&'%*1Qq�������������������������S897755775430.*$#$(,0YG@b\OG?:99<<:?CCDEFJTWA/*((')((.056."��. "! !""!"$$%(,.ecfdefgg`bihV1'&&&('('('('&''(+,+-/3<AK[YYF@<<9880))**,,))&K�*ED{o%#PZXWUVUUTWVZXZUVWVNRWVTSVWSUSURSQSNSPUNUPQHSOOJNQOKMOOILKLIJIKEIIJFFJLJLLE=44320132443431131,-5=@?<?ACCDCCCCDDEIJ</2332/&'//03543.12454475469:98;=:;;<<<;=>=>=94./;ABDCCA@BDBDDEGOrwtqjfnptuu�����o\^|~{{~{z{{�~|tP1CPSQVQOQWSQQSPMLOOMOMLQONOPOMSRSTUVVPNRROPTPMMGB?:8?RVNQLPOMMKIIHIHKNIHONKPNLLJKFJJLOLOPJQPOVPOQOQPPSSZ[Y[[WQOTOSO>>??>?A?<AOUXY[^```_`ca``�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ǫ����������������������x\*4��}qnkc^YXVUVWUVWSNJIIIKL8Rlb\QFGJQY]bfhit����������������������������������������������������������������������������������������������������������������������������������������~�������������������~�~������{���|���|��������w���w���}|�������~����|���}����{pBDB?=::;:<=?BK{�p]NE;6742./0/*-/-+'%%'&'''&.DT^_]aa`a`cffghluz~����~yy}y{`QMLLMLKLNPMNNOOQRTVOVIDG>==6143/1/-/1.&(�w���eI>/#%# !&',6W�������������������������S386554443320.*$!!$$(^M;�tNGA511132141664520.,(&$$'$%)))()!
+
+��+%$##$&$$&%&/B&3fhhdfkhdcegcY.'%&%&''''%&(''(()--,.1<AMZWNF=99866/+*)**,+)'V�'C�h*(!QVXZSVXXSWWYTXVZVUUXUUUWVWUVQSRXURPUPRPWRRRVNOOTONKQNNLMMLKLIKKMJJGJJLJKC94432232330255101.,/8?A>@?A@ACDCACBEDDHH;/153/.&&./02432.1455558746:989;;:=;==>=<;@A@?=95<@CB?>>?@A@ADDFOrwwrnmprsvy�����lVa��~�~}|||}�}|sJ9u���qrruqmsskrjronntnpiiiiljnmqulinlomjomjlqjor~vnRWwmOQLQSRLMIHIJGJMGK`WNPLOOLPNQNKNLKOJNQLQNOKNOOJOQLOMOKLNRfdgpFAAAABA?@ENSVWXZ]]]]__]]\����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿ�������������������Ǩ����������������������yb'9��yrljd[VVTSSVXUUTMJGFGLK:EeYXRHHFLTY^cgjll���������������������������������������������������������������������������������������������������������������������������������������~��������|���������������������w���������~�z�~�x���|z������z~�x���{~��~~xmBBD@=;:;9:<>@Iy{raQI<53420-..,+)('$#$%%%$#'/=Wovqrtspjglqtpnqsx�����~�|vz�yrxbPNIIJHJJMOLMMNNPRQUPxuZOC;765641010-.0(
+)�t���`L</#&" "$%+2S�������������������������P687453332221,)%"#!!#dD?rcMHB810011454223320.,)#  ! "&*)(( 	��,'300/0/.10.5I+5lhb]bje^`db]W,'&'%&'&&'('''&'*)-/,/2;BOXSJB<63564-*++))**(%Y�!#)^X"(MTVXTTXTLXWYOWVWTRSVLRUTLOTUMPSVMOS\LNQQKPRTLLQSJLMOHKKOIJKMFHKMHHGKIGH;6521/011242233102/-.5>@@=@?>=?BDECBCDBBFH8.152/-$&,-/2330,14334554378557779<;==><;=>=>>?@=>?A>6:=07<98=CFSuxuqmprsvy{�����oZ_��}}�~�}{}~�~}|r@1yyne^`\]]YZTWXYYY\]ZYZZXZZXW[ZXZ][]\][]]^^\]^dt�}qY\[H<==:::;897:999;<=><>==???>?>?>?A@??B@<@A@@>>??@??BA?@DGSifSPC@>=?A><@FNQRTVXZZZWZ[YZZ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̪����������������������w`'8���smkg]USTSTVXUSSNJJKJIN<B]RNIHKHJNRYbdhlnp��������������������������������������������������������������������������������������������������������������������������������������{���}����|����~���}���y��������~���xoz����}����}�{��zq|�����z���}z���{���}|ymBBA><;9:89;=>K|qbUI=74420--,+)''&%$""#%%$(.*P�����¿�����������������yuu~ws|`MMJKJFFIMNMMMLMORT_U��XQA8676654002/00)*�z���]K;0# &# "&)-P�������������������������O995443321210-)$ !"#*p=FuWFFE<70./06;854320/-+(  !!"%&'(' 	��(,XXRRVRRRMMPN(5dgb]`jd`_ccZQ2*('&&&&&&(%&'')*)+../3>ESZQH?:63452+'*)()*((#`�!EjB#!*QVVYUVVURYTVUXXWSSQUOUSSQRVUOUTTPUSSNROPNRSSIJLNJLLOKMLKGFGJGGJJGEFHFB<42210/1310/121011/.2<=>??=??=<ABBBABABBCII:+.310."&/..0120+0110343137746768::9;;<<===:<<>>=?@A=,7>+4?3%.2?Buztpnqtux{|�����hX`~}}~~}|~��~|s9/w�<:FABKDHHCJGGKHHIIKINMJMJLUYQWRYTRVXWZRQSV\ex�}sNbZ;'$ "$"%#! !""  "&$!"$#%')+*'%$""! !%)''*+,/.0/-//1137;FicPJA>=>?>>@?AIMPPSVVWXDOTSSQ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ȫ����������������������w_(:��qokg^VUTSUUVTTSOJHGIHN;EVF66?DKLLMQZcfklmmz��������u�������������������������������������������������������������������������������������������������������������������������}����~�~��~����~��x������������gds�u��|~���}}�����{ghy��{}��}z���|���}gBB>>;87:869<:Fx|sfWH>8551.,+)))(&&#$$$%%&%(-!V�����������������������{z}vwwzz_KMJIHFGIMLLJLLLNQU`L��RL?789:942/15642*
+.�����YM<1$ (& !#&+1V�������������������������H13130140/00-*($! #%}6BvODEIA7/,,-/224200./,)$  !  "%&%()!��&,dd_`a^\\YZZL&/OUX]^cbda`e`XQI:2,*('''&#&&&(***/215AETZOD=721230)&(('''&&"y�!=N@!!)VXTWXXQRVWSSXYRTUWRTTYSTTRPUWUNPSTONPSMMNQNONPJLMLJKILJIHHIJGJIGGGGGB94200/11010./2/012116<@<>@>?@=;<A@@@AA?@ABHH5+-10..$'.../141*013355456458899;<;:=<:;>>?=@>@?BA@@>-3>0:?92?@EKtxurootvvz{�����lT\}�{|}~~}}}}~��}{r02w�F					
+	$*)-29=GL`x��wEQF#	
+				
+
+	
+ !$)/:Lp^NI@?>??=?@=@EJMLORRTT>CFIMI�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȫ����������������������zc%8���tpmh_XVUUTTVVTPMKJGIJI=@N=/6=BOTSOOSZbfloopy�������^Y����������������������������������������������������������������������������������������������������������������������������z����x���{��������~������������|���pXnu��w}���{~��}����mgpux|~�|y���v~��w~���nYG;98669868<9Dx{tgXJ=6351..,**)''%$$#"%&%&)/#\������¾��������������yx~orv|v\KPOLIEEJKJIJLJKOSVbG��SQE>>>><74126886-
+3�����\M<9, #" $,($&#'*+V�������������������������=&*+.0131/0/-*&!&�7FxEDDC?6+()*+-11221..-*&"!!!%''(* (��#)E=99:;<<:;CH#&<ACIPW]b^[a`\WSPKHC80-*''''''*)*0548ELXULA;3/022/)''$"!"""�*�pK *[WRTVTJQUTPUWSLSRUQLTUPQPRMSSRLPQOJORQLLOPIKMMGILJIIIJGIIHFHIKFDEGG?72010/.//11-/21031.09=<>>==@?>;;=>@ABBB@A@@EC4*+./,*#%+,-//00,0/2643254477688::;;?==>@=>?@?@@DCAA;21:/7=60<AERvuuplosuvxz~����lKc��|||~}~~}~}zs0@��G	"#,6=KZk���yNN@		
+
+
+ '1?RzZOG?>>==<>=;?EHJLMNQQPHC@FLD��������������������������Ͽ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ǭ����������������������z^)9��vplg_WYTRSUWVRPKGHEFIG<9C3.87?MQQQOTW]bgjlmkq������V\��������������������������������������������������������������������������������������������������������������y���}��������z����w���|���������������������z���cXq��r~���z~zxuuz�mU[j{�|���v���w����pXN@524666788D|�vhXI=544//.,*+)(''%"#$%%&)*7^����������������������~xr{jwwwqXMTTRJGDEGHFGIIKNSWjE�{ZUIAAEEA?<96989;2
+2�������|zpfcddVONRW\TL;)'*)T�������������������������E,(%%&)-....-*% #�5JuGCD@<3+&(&'*/.000.-,*'# "&&%')!0�� !"+/*''))()+,3A %7:<=?=DLPTY\YWUSSONJJH?94-**(**+078;DLUSI<3-,-/1.&$#"��#??J/YRRTTRQXTUOQSTMWTQMPTSOQQSRTNONPMOHROMMOOPIMKKJIIIJJJJEEGHDEFHCDGC=60/0/.-..//./00/12.26;=<=<<;=<:9;>>??BBA@@??A>2((.,+*!&+,-.-.-*-/122//223435668789=<<=>;;=><<?BA?>7/,<-6?4*5@BOpuvroqrttyz}����eXa~}|{}~|~��~zr,?��B			 (2;I^n���uIH:	
+
+		
+	
+
+
+
+
+
+	
+
+
+
+"'0?PoYKF=;==<=<<=@CFGHJJLNNJKKKKJ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ϼ�������������������ȫ����������������������~]+4���xqnh`ZXUSSUWUTPKHHGFHL8490588AIMNOQVY[`cfghgeehcfdZSRv�����������{�������������}��������������������������������������������������������������������������������}~��w����������{���{�s~�������~���������~{����w]Peklzzxrpogkjmpvx{oSb`v|����{�{�}|~������zaSK93332545Dx�ykYK>746412/.+++*(''(&')))+4i������¿��������������yt{u|{ssS?FEAEIFIKJFGHHJMQWmN�z`ZPGBEEDDA:8::8=82���ũ����{zxvpnggjlc\L+&()W�������˲����������������E2.*&$$$%')*,)%!�;JmGBA?:0)%%&&*-/01.,,+)%" $&'((*8�z!':?5/000-/0..58 &,08<?ACFKORYYYUVUTSRQKIHE=94--..8==DLTPA3+*()--+%�x&tyU#9DPYNOSURTUSPTVYPNORPQRRQRSTNPSRMOPVLKLNNNNOLJMNHIKMHKJFFGDGFHGGC;531/,,-.../,-./-11.29;;=<::;==<<<;=<=BB??@?=>A=0'(-,)) &)*+---*+/232202544335776699<=>>==>>?==>AA?<64*;,9=2-3@ESvusrqtvssyz}����j]hvz}}|}}}}~~��{q/C��=
+$' "(017AXr���zFR=# "$#$!#%$&#"$$#$$%'()'&# "**((((#"&#)+)-.000.524;;=7+,DXpWLE>===<==<=@BCCDFGIJHCE@HLM�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȭ����������������������}^+6��}xrnib\XVTUYWVTNKHIGGHI944196<EIKMPSVX]abbcffdehfb_XSMNNk����������������~���torku�����������������������������~p�h����������������qu�wq������������������������toqmu����������}����|���rklsev���������������~bgnw��|qk_W[^aagfiehlmopqtvmK`fflv}�}t��w|��}�~���nZP?3./122Fw��~riggdUNJMJORRHG@=CGIHKS\X5p������þ��������������~}puxwns`LD861=DIMJGFFGHKNRZI�yf`VMHFDBDD<<>>>?;2�����z~yvlcege`[SXYZUP>&%++Y��������n %(%! & %����D631-+*('&%%&$!%�2RiICA?7.(&&&(*-000,,*)(%"#''()(
+	<�f#@^XPMMKIKKJIH>(!#'+05;BIPQWWZ^___YUSSSJEAEDD?=<=A@HMSM<,%$##)*&�f-O`I!07EOLHPQNKSVSLOPOLRSPLPSSIQQPLMJOBJPNIMNKGJKICHJJDGGFCDDGEHIB92100.,,...-,..,.0202:=:9:=;89==;;<:;==??>>>><<@=/&)-+'( ')+,/.,*+03211234464767757:9<>>;==>=?>>=???<9<-4+::/0??DRrsrnosvtsxz|����h_hrvwy}}}~}{}~zn/B��<!;32.-0,-*++*/(($(#,W}mcRMWIJOMHEEIGC0Gq���w?PE=[lkpqrwpmlffbeb^\\\\^`_O8R����~wwulmknrqxzvwtlqsgg^7&C`tWOE>=;;=>;:;>@?@ABEFHB9:8>FF�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ȱ����������������������e,2��~|upjc\ZYVWXWUUNLLKFGIG7/2488AEIMNRVX[]_bcdeeghgfd_WP?=Sb��������������������nbabn�����������������������������|ssqgu���������������ls{pj������������������������rocVn���������x��������sgigSg������������~Y]_ffifcjt}z}yzslmllnnnpqsrjRYZaccq|}x��~y{~�vssu{�~nN\R<1/1.C|����{z�{rlflnvxxopb[Zbpxy{yy>y������¾���lO^g��������~nsw}tkze]XOD=45;GMIKGHJLOUWZ�xg_SDLOOIHD@??CDF;3���ĳSF9U;2,%$'.32#'$&+*_��������h!&%" !'����C86421121.-*%!%�/ZfHCC>7/(&&&))-/./,-+(&#!!$&&))
+	H�^!(Ygb\]`bd^X]YO,  "'(,.7>HVhuvqqluw{wmaTJBFHGHIJKMNNRTM5-'%%%%"�`)DV)$2>GMKSQQOOMOOURQMPQPIUMNMOKNGLLJJJLJFKKLFIIIDDCCBGCGHG@82///----/1.,..,-13346;=99=;:9:;99:;9:=>?>==??;=@;.%(-)'&&*+*-/-++010011123425566569:<<<<>=;<=:9<?A>;4<+*(90/1<?DTuvsonsutswwu���~ddfnvyz|||}}}�����{n+F�}7+k2$%'!! !!# =��T;62*/9;599+%)?n��u<NFCdrx{{~�|��wurqprkmkigfaY<Nsvyyywuuqllonstututropnke]5#D`xYOE>=:;==:::<@@A@BDBD@867;B9���������������������������������������������������������������������������������������������������������������Ͽ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɮ����������������������|b,7��|w��g]ggd\WXYcUSLHGGJL5(/:8:FJLNPTTY[^abcffigfgef]TK:9P[����������������}��{dPRUar����������������������������sokip|��������������srrgfq����������������������vqkSYou�������w���|���vhhf\Qq��{���x�������snf^SKGK\p��������}||~usumornqorrklpprvpqmolikkkoty|~xM`aWH61,?x����{y|smifhmzyqmn\Y\blv|sf=�������þ���zh~��������xt}uqjaaZYXXSOB>8:=DHKMNNPRU\����~qtzeLH;3.1<CH9;���ŰTEQԾq׎��������##$&,-a��������d"&&  #!����B89888779875/(')'#" (�)[gJEB985*&$%*+./..,+*('$!$%'(*	G�\ 8NDAGGT]ODEIB+!!$%(+07@Ph}�������������}ODGHLOPRQTVSC;0)&#" �U!U 
+ .<EPPPJOQRONQQMPOULLMOHNMOJILNIILMIMFIGHFGCCDIGGF>41/..-+...0/---+-/158;==<:>>:::<<:<;;9:>?==>=??;=?:.&(,'%$%))(*--,.22002113312556689:;=<>=??;<=;=>?>?>>==6//*(1???Svvrolttssuuu���s__dn{~{|}}|~}����zr(J�{7,o/#&%  !""D��YEA;78865410-)Bo��qEPGBcrtuz{|||{yvsspnlhjiecaX7Nqtvxywuupnnqptrqtrtpppoke[.%EgtYNF><:;9:<;::<=AAABBBA>>==@:�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȭ����������������������~`+9���~w��k\ol~uVXbw]fOHHHLI4$AACJLLMKLPRORYZWX[[WVUQKD>;3ASW_j{�������������~~vTLKQ[ams�������������������������zroi\k`hu}���������|rlo`Xehz��������������������{snfQ_gr{|��z�|�}{x�}wzywlfgW\u{z�}|�������uqtk`TKFBLd������������ľô���������[`f_eecddghkkmprty|KOY\]VD44g���xuwrmjfgkyvono]Z]enu~�tl?�������ÿ�������������rhedZTVPSSRVWVUVQI>98>EINOLMWd�������׏OG+""&*<L;?�����RFm��v岻������|$$'*-c��������^&)& #"%����H=KIA?@B@=??:*).5;92,BVbLF@::6+&$&(+0/--,*(('$"%(*++
+P�W2L?AABQZMCBCA+  "#&)/6=Ne��������Ⱥ����Q@AFILNONPTVX\WQC5-(!�O'X�M#7D%  -7@KPPLOQNJNOMFJNNHKLLDIMJCIKHDHIHEGGFBDFHHC:2///.++--.11..-**.159;;;;::?<:::;99<:8:;<>=><;=<;==8,%&)&%%&))'**,+,00022124334545688:;>:=<@=;<=;=@@><>?==<=<;9;>>8Wtsqmlqpqqqtu���n[_bm~~~}}}�~���zi+M�x3$c1%%"   #H��j]A74791-,++*'Fm��~n@RAFfuuvz{}}{{xurqmmljiihdd]7Nnvxxwvutpnnqrsuvwrtpookkga.)FiuWNC;:;979<:9:;=@?@AAB>;>@A@@�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ȯ����������������������}`-6���t��`_e[kjSY`oX\MHFHHH3LKKJHD?::;764321.1*)($$$"&+1GTYZUaknw|����{{x{}u=DIR]\aaks�~�����������������}z{{zxtqgQcb[^aeimqstrqpttrlkcK_]_euqz�������~z�}�y{zz{|xppZO`_cijssq{yzsrtsqsvuwviie[aqqxntrpt�����ofgb\UMHGFTv������������������������|��mkfgkfebfejlmmpsbZcbhjk^<>i�|vqsrmighlxuolj_Z^inpvysp@�������¾���hpy��������fNLKQOPOOPQRRSWZ\\L48<@@FMECQh�������ҌV@-,+/17I@D���ĩL@��̎о�������L#"&**a�������˩����11rsT"'����CA��`GIWL=ELSUPF@?FH9B8RdLHC=;6+&%'*+000/-*)**$! "%'*.-	e�K7k_[ZZf`^\Z]L,!#&),3<Mi��������������T>=?CFJKLNSUUY^ceee]J5&(�IBc@I/i�D+&(.2-"8416<GE>GMMJRPOONNPKNNLJMLIGIIHAHIFDIFFCEEFBDFFB8..0/-,,,-/-.10-(),05989:877:;88::999;989;<>=<:9;<<<<9("&'$$$%('()*,)+-.1210032354245779;<9<;<:;;<9:=?@=>==?<=?==><=A^ssqnprpoopsrx��kUW^dpx}|{|~��~���~{b,N�r/-f6&#!  2�pOQ3.+(!$%%%%&#Dj��~r=P=Aassuxz}|zywtrpnokkiiifbY7Npuwxyusqnkprrttuvrsqonjiea)(GjtXMB:;:878;99;;<>?>>?@;569;9<�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʯ�����������������������f.7���u��`d_W[WY\]qTQJNIHIL6KNMJFA;6543/+-*'&$"!#0MUY]_begjlpvwwwz{{~���^BK]g^`^fjkpsquxx{vyzvzxxvxyxwyyxyzysoZ[f^aegmoppqqtstttqmmVY]\`ddjclnknqoonqruvvwx{{{vrpW\aaccddekkmmjoonsssuzunke[hi[]dcfiu����bZZVUUNGGJOc������������ÿ�������������������ý����������������E#Np}~xrroljhhkqrmjf_[_jlmrsql=��������»��qb{��������`IKMRSSSSRSTUUY[\^U=8?DEJME>Hh��������}[OKMEEIBMD@���țMD��Щķ͒���z�+"##(,+d�������������DF�Ŕ%/����6)��vF;60%)2BMNB544/#<6XkLHIJ=8+*'),.35220-,77(%"! #%&*0-
+o�J3\RMNQTPPOPQD)#%(,2<Ni���������¼���dZ\WKFAEBDPSQX^cc`]hryqcR>-.�A?::0g�<+-+DLNQUOORNMOMNJLLMHGLJIJGIFFFIFFDDCFDFG>5.,-.,,,----,+00+),06;::;:8889989:;::;;8:;:<=<;;:;<=<:6("$%$$"$&'%')*)+.1342113446447777:;<<<:9:=<;:=>>>=?=<?<>>=<:<>D^ssqmnrrplorqv�|hXPZ^ry{||~~}�~d"R�k*4n3!""  A�d23)-8>0,,,+)% Dl��sAQBHhtuvxyz{|ywtprppmkigfdd^6Psvvywvrqpmoonorrsrqqomljg[')IjnZPA;;88:::;:9:;<>>??@;30/116�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ͱ����������������������~d04���}u��o{`W\\]f_bMNM]RHIG:GKLLF@;6230,+*(%" /MS\[^ahfikstuxwz~�����HHU`Y]`_ejknspqttwxwwyxxuuwxvzz{{{{ut_Tg\^bimoustsstvyyvnjZQ_[\dhekkmnprrqqqwtvw{|z{}}vqfQY_bghijlnlompopuxuux~wkkcXhnghimopv���ZPSSSUSMHGSb������������ÿ������������������������������¿�����7Lt|}xrqomkhgjmnkhe`\`knonqpc7��������»����{��������W;ER\__Z]\^]ZYZ[[YWWJ@GGNLE1>i�vl~�ĿÂZNAIIIH:QB@�|�ōRJ������ߔô�s�#!"#',*l�������������D&��Z 4����0!��l?7( %$% ""%$0&Z_MK[~LB/.'./-EP071--Ji,&$(""&(/2,	
+h�D4A-230//322?6& $'-2;Ni���������������������~rimlnjkkkhbnqrsrx}~wdC%0�8*u�\&e�6!@HKQOHLOMHIIKGJIJFHKGEGIGAEFGCDCEDGED?4.,-,+*+-,.-+,-.,)*07;=<:;99:<::::99;<:98;:9:=<:9:<<::93& "$#$!#%%%'()(+01023212553357668:9:;999:<:;:<=<?>=;;=;9:=<:;<Ebusqnorqmgmpot|zbMJU_kz||}~~�}}c$V�j%;u/ "    !! #C�f2-,155/-./*('"El���n?NBJjutw{zyzzxtqlnadkjigfbb]1PotvwwwspopmfeipddpqpppljcV)+KrqUNB:;88866889;=<;=@>?=:61126�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɮ����������������������}d3/�ʪ����krc\[^]aZTNMHQJHHD6JLJHA>;51/-+(($#!&R?@GJKPMKLNMNJJIEB?EF?M<(.ROIMLPPPPTONOPKPPNMMOPROQPMIIIED:0&.IPR[^[WYWWY^\USKGC6.4RVQRW\[_bc`bc__]]YZZ\[ZYWYXTNEQZV^^a`b\\]\\^\][WUWZXO@11Dhilggkd_n��WIHJHKMIB<EWf��������������������������������������������������7Lu}}vtrnmkjhhljgdd`\[klnjrrh9������������u{��������U2Ih|y}}ystrljhjfddcde`Y`ZR%3j�zsy��׿iTR@CKI=8VEM�yȽ}NE�}}��o����_Y�#!#$)-.n�������������Q(#;����0'��d>3""(%"#&$#!!\fRHd����������|5h[WTVv)%(OaRPTU]F/		q�FAdSPRSTSPKRL>&#'-3>Pk���������½���������������ʾ����������������I?�5$=]j['
+f�0 %AJLOLLOMIEGHKJJHIGGIHCEGFADDC@CCGEFD<2/.-,*+,-,,-++./-*+08<<<;8:999;889887:;787:89:;=;:::98990%!##"#!%%#(((&,0/-/11/044234635796889888979;:;;===;:<;899;:9;@autrooqrpklnmo}zZJINYgs{|}}��~y\%X�g$@w1$"  ##!"!   )<}`+171.2///.''*$Co��}q8M@Hlvtwxyyxxta``nV_kjjiec_Y4Ptsvwvtsrprmc_amWjrrqooljbS)+IomTM@9:9:75898:;==<<>=>?@=69=>��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Խ���������������������������������������������������������������������������������������������������������������ǯ����������������������~f11�ǣ~��yfc^WVVUVROMLHIGEGE7MLKHC=94/,+*(&"! cE(/,32*'&%# (�x:552,('%!""";N583*)%$#%'$#" KI.+/1./*/**(%*'"#""!!   !$.Ti;+)()*'+%&!(!!## !! ! RakM3;6../4'%')*,--,-1?Ck����������þ�������������������������������������2 Mr|}xwuqolighnmigf^[^kllpvtr>�����������������������UK����������þ�������������.0l�y�����tUXOC814>LUFQ�nȶtpe4E0#9"9/:9!#',,n�������������l"&\����*1��W>0#$'&#$)($!Z`SIc����������x<������"%(r������M/	w�B Co^^\^`_\][ZA%#'+2=Oi����������ü���������������������������������CK�)!@�hD'p�, &CQMNOOMKKIKIJKKHJJIHHFFGGEFCDCCCFEA8/--.-,*+,-.-,,./-*+17;<;<;8:::;;:::999997<:97:<=;99999777.$!$" !"&&$''&%.20/11//353234557::9:889:89:;<=<:88:9767658788;>`wqlnqqonmmmmm|}T?@DO`sy|~������~yW'Y�b Cx0$"!!###!! ! ,L�i;@<.,,*+*)&*0'Hr��}o:KALnuvwxwxwuqc]Xd[glkkhfd`Y5SstwwwsrqpplfabhaqrsqnmjheT&-JkmWJ?;99866978<<;;;:<=>@?=88<=�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ű�����������������������l3.���uutogca_]\STSNKIHEEDIF7KNOJC>940-**)&" Y��rK4-($0��P983.*'% !N��S) "!RU-)'%%$!##!!"c�`-" "" Oh�s1/)#!   !"$%*5;W����������¿�������������������������������������0!Js~{uwwplifdfmpmiha\\knnuxuy;���������������v�������QQ�������������������������+1r������ʕXZC)"!&.CTEU�g͸}�؁׷!D����}�i��R#'++n��������ȷ��ɧ" ,�����%0��S<1!"(('$1TQ2,$!ddVJi����������u3fchalt$'+Vus�tv�I/	{�8"FhIBCDW[IEEXG%"&,2;Ql����������»����������������¿��������������z7U�"%BQVo�&'DMJLMLIHJKIGHKIEGIGDEEDDEDF@DEDDE?40-,,,,+,,,-,,.0-,*,16:<;:;<99;<<::::;;9878<:87:<;85699:776-#!%# !"'&$$%%&-0001/234443444576798888779::9;;9767:8443233669Basrnnnmlljjjjh{{J.3:E^s|~~������~}V%`�h ?q/&#!    !"#"#@�kEG>0*'$$%&&*0'"Lp���j=MALotvvyvwxxvoljlhlnlihgebY2UuttxwtsqorpppqssspqqmmjgfS$.OmjUJ>887567757:;<=<;===>>;6224�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ů�����������������������m4%���̥��mggq�VV]UKIIGGFIG6EPMIC>94/-,((&"\���c2-&#6��_771.*(#  Q��U# #!FkC%![��8%$Fk��7-$""&/4N����������¿�������������������������������������0!Nt~yvvxqlhdcekqnkhb[^hmlux{|1�������������cu��������JR�������������������������+3v��������vZA2/.449RHW�cƻq����s��Ѻ����w��d'+/s�������ΣE0a��)#7p�����%3��F:gD+-,)-�ǽ{1%"nbTN^j>;8J�8ANv2<6/,Jq()+."/a$-kM.��2#"Mg6%&+EY6'0N@##%+1=Tm����������������������������ÿ��������������q/Y�#H\N 
+u�# &EGFILKIFIKEDGJGDFFECCDECCCC>DEFD;1-,--,++*++,,*,0/,*-387:;999;88;:78988:98668:7889:9403897665-##$"!&####%%).-./.20111113634357777777997689866455442123436;aorpmkgedcdcfjxn5")6Yr|}}~������}W'`�e?r,$#!      !"" E�dCAC=2,)()**,+%!Ks��~h?I@LjsvwxxwwxurrrsonoljhdbaV2RptuvsqpolnonqrrqqnnnnijfgT$0IlhTH<76656876899;;;;=<>==:542/�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ů�����������������������o7'�����~�rlcmgr\X`WMLILVHDD9KTKHD<73.-,(('#!_�U.,%"8��i55/-+(# !L��D#!%!Ci=# X��. &$?j�z.+" "$-1I���������������������������������γ��������������2 Qs�}xzzvpjfefkvrjha[]emiuwxx6$�������������kr��������HS�������������������������)1v����}sx�wXE@HJNEIWG^�e��r�ϴ���͵Ź�������Y(,3{��������W"t�Y5�������!5��D;�ѪF).n���>$#wgTMMH@<1R�+68umB`WPGZ^)+/ME8����D/��."$CkA"&,=L6+3L? "%*0?So����������������������������þ��������������p/i�  Nxc'{�#)HGHKLHILJGFGHFDFFCFECCEECBCCED@90.,,+,,,**+,++-//,,17;;:=<9;;::;<89:9998999:::8998882-3787676-%#$ "&"!"#$$*/0/13401243337565789:8679:8899985300.1.,./+.237coomke^YWRPS[hvZ".;4+Os~����������{R&c�aAn+$## "$$$%##$$$%O�WA<67<644651.+$ Mu���i@J?RnqmsomporolnmlhkhgecaaaV4Wsturghddgdg`bedgighfd]`ddQ$/OojUK<6578963698::88:<<>=<;:><9������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ÿ������������������������l7$�Ɨ��q�~}eZYZd^tZPPRSzeFF9FPIJG>83.,+*(&#"!V���O0+&! 6���=70-,(# "L��4%!%!;\7"N��-!%$9i�q,+"  "-1E���������������������������������ҹ��������������2#Ps~z{|vrkgfhmuypmcZ\ajhturl5��������������]t�������CW��������������¿���������,2y����ž��tUA'%'*/?SF_�_��q��ν�ā|��Ě�����^',1z��������2'!:��,�������!?��<1����u),20`;&#}kXNLIC=L��c=L�lT�����g/-0lz_i��l@4"��)"!5I8$$(+.*)1@1!$*1>To����������������������������¾��������������o%u� #FWxF"��!)EFILKDGJEBFGEDADD@BCA?DB@?CED@6.-..+**,,*,+*+-/.,,1:;:<>=:9<:8;<969<99<;::9;88::77970,3688664,%###'!!$%$)/10133022332254678699757866:97885312/-++**)*.17]nljcUI?;637GcoG&?HC&Jr����������~yL%f�]Er.%"# !"%$#$$$$&%-V|P:;88965543/,)#"Nt���`?H>Smq_l_\i`]gZc`b`e\\]UX_`S2\tvvrY`]^eZ_[X^aVd\W_VWWb`L"1RxrWI:54568324579999:;;<8558<=>�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������û�������������������������j?"�����q��vXWXg_uYNNSSwqGJ9ELLKH@:40-**(%##!P���G2-' !4���>80-,(! !G��1%"&!5V4" O��+!%!5f�q**" ###-.G���������������������������������ӻ��������������/(Rs~}z{~vqmiggjotpqcWZ_jjptrg4%��������»��}I���������CT�������������������������*9~������٪cYJ6'"'8OP>b�]��nߺ���ðőҮ�����c'.3{��������06�[ ��9!������~"A��B1����]#(F=Cp:&"�jYSQLG:GnubM��kHtvvv|`212@Iygo]<:0+��)#'+'#$%&&&(,/) #)1@Vo����������������������������½��������������k"zw!��$-DGHGDDEGFDFEFGBDCBBAA??AAADC=4.,+*+,**+,.-**-.,,-17:78;>=::;:9:8789:89:9887967:964881.4887774*$#!%&! !%%%+/0//10/121133336623457875257568842///+(()))(,.8]hgdWB*(+$"+C^nH+CJ>'Jw������~����}}M'i�`Fr-&%#!"" !  "&%5[�M<?A930/-,*)*)$Tv��`BH9Xmtcj`YhjfbU]c\XcYT\WY^_S3Zquwthdceghec\bh^eeadcd^cbL!3PniTI<66566545788:989;<;2.//399�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ž�������������������������j:"�����x��l[Y\g`kWONQUf]KL;@IKKG>:40+)))&"O���?2,& "8���>61,*(# (,!B��-%$'!0U0! K��$$%"6g�d*+$$!$*,I���������������������������������έ��������������3'Sr~}x}�|uoiggjmqorfZYallptto3%�������������Wet�������BY�������������������������*@��������i\[NK?AEOQWCj�[��l�~�s�����a��{j��w<"(/1}��������NLݺ*j�C!y�����zH��=+��ɪG%;��}p4%!ngZQP[WK@@@J]qYDPB><?C57558`[�~=9-,��& %*$%&)(&*,,-( &/?Vp��������������������������������������������h �s#>nZ&�} .DFGFGIHHHHFDHJEBBCEBCB@ABCC94.,++*+++++,+++---,/37897;<<;<::<;:8:;::8:::898978884589426775463)!" %%! $#%-//02110113;D417Rcgknq��ysrlh[PQ>/1-.-)%%&&'().5ZfbVF3,.62&2N^pW(+/*6Wv����������zK*k�YLy-'&#!"!!  #%$-d�RDIA0/-+'&&'(*""Ux��_CE8Ynsnrplononlklkgfefgfaa_S4\pvvurppnnnnnnqqnoonlijkebJ4QraTI:5556422346699979:;51.,.3.������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������þ������������������������k<"�����urqk^ZYXY\]ULJLLKMIF9AJIID=83.,*)($!M���63,( !5���A80-))%!!!%3R! ?��-(#1/!.[2"H��%%9'$<j�c/+"#$!"!!+,L���������������������������������ͫ��������������2+Sy�~y~��{pjhgjqxzyk[Y`jlorpl1,��������¹����z��������?]��������������þ���������'F���������z^@2/149?TAi�a��^<00""$&"$#&/3���������shm*-T##�����uF��8#D��|1*8��ê9("�rrdPy���������b^�����`CG8Sn���m6;/5��,"*6599:998896' %.?Yr��������������������������������������������e�p,xae9�u!/CEBDGGDEFFCBEFC>AB??@DCBB?7.-+,+*)*,+,++**-.-,/58::9:=>=;=;9:;;::;9;;;::8889::8856:9766556341)"! "$! ##%-../1/0113:VG,/8s����������������7/,,)'%$$$%%$)2R_UE=:::<74HYcpa2$(3Khz����������uF-r�VKv0,'#!#!! "!#%$*h�`NE?950,)(')+)"$Qx���]DF:[uuuwvuutrrstspnlkkijfd_R0]qvvutrprroqsrttssspnjkihcE 5SwfUH9455333225787887:::99:6895������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿ������������������������nA!���{yrnkcYVUTVWUOIGGGCHJG<BHFJ@<73-+*((%!K��h@3+&#"4��{:7/.)(# !1/12979:FX$ #:��+* )SG (^6$!f1J��!!1u5#@i�b0+"88$$ "*,O���������������������������������б��������������3-S|��|}��{qjghiqx{{j^Z\ehljhe34������������{����������A\������������������������~'E��������ׂZ3$*7R<k�d��q|ol^PKOLJD??DHGB?-%'/4���������j%������qJ��.6��eG+)2yw��4(#�srhl�ý�������\\�����`DI;���[G81<,6��%"!B\^^\ZXWYVTG,#(/@Yq���������¾���������������������������������]�c1J$2�p!1FCACDGCBCD@ACCAA??>>>CCC>4--+++**)+,-,**+,-,.26789679:;:9<9::::89:88::79987799664579865566331)# #$!""$,-/..-0115Pa3,2J�����������������L*+(%$$$#$$"#&1LRB=<;?@<4D]ahwjI97N[q}�����������~xE/w�RPt4-)$"#$$"!!##$&#.r�QA739?852/-,+& "Uv��WDD;^uuxzvvxwvttttononnmjeb`P4^quwvvuqrrrpqrtttsrqpokijaE!4Vx`TF:77655444578777788856878:9������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʾ������������������������mD!�â~wrpibWSQQTTRNHHHFFGIE?DKDG>843,*+)*& J��gD5-'$PX!1��t=80.,(%!*Ylmigbab__#"#9��)-,;;=:659=`B'S1%#,'# " )x4D��,+.489<G�5!Kl�k3+"(,--,+*,C@$""$++I���������ļ����������������������ѳ��������������0+Wz��|~�~urjhgkqvt{jbW[^gmmj^/1������������aT���������E\�������ԟ|����q����������(E��������΂ZB6728=BZBk�j��|����wwyytodcgih[W9&(.0���������7 #"G������oN��/;����I)A����.(#�vtm�����������^[b>B=YN@D5fH Dc1=/?��%!%`m__\ZWWYXXU.!&+2BZt��������ɶ����������������������������������\ �V)U *,�n2FCEGFGEDCC?CDCADA@?ABDD?4-+*+*))+++-,))+-.-058::9878::<;:<;<:;;<;9::::8::98:87774258855556340(""$"! ""$,./010322>hM,-8h�����������������a')&$$"  !!!$/AC@BA>A=8>VegoynTFFWav������������}wF4v�LUs3+($%'&&" ""#%!7w�P?8986430.-./) %Y}���XFD8^rwwxvvvwvtttspnomomigb_L4^suxwvtrrtutrstsssponnnjfaB 6Zw^SI?=====;:89:<:88:9871//./44������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƿ������������������������lF �Ġvroh`VTTQSTRNKIHDFHJD>BKFD?822-,,*(%!G��sC60*,.#!'$#(!RQ !+��d?<3.,(%#%:>?AWWSDE[#"#:�|,.=����{uuq{E!!J.#N���������/@�v?���������8 Rl�i2+"(M`b^WVTR[A$! #,,N���������ø����������������������ϲ��������������/*U{��~��unjidiqwqwmdXZ]fllkb-3������������}x���������C^�������ԧ����������������%F����wtjmo[=ASSQVRX>l�müs�xojfonke^Z\_]UN3%),7�������ѷ ,(&#D�������hP��-/j���K&;~���7' %�swp��mVHNEAKZx[^�RDIzH>?1ZFCCNp-<-
+J�v"".ma<6<>7:9:HY/"',4FZu�}������Ʊ�������������ɰ������������������~W�Q9wK/ �j2B@CFDBBCBA@BA?BC@>@ADC;2-+++**)***++)(),.0/588:;9999:;<><<;;9<>=:89;<<89:9:;7788423775455513/(""" "$,..010214So5+-@~�����������������k$'##" "*5=BFC@=5?T`gkx�v[KNZjx~~����������|u>5z�LZp-)(#"'(%! ##$$!8o�L=DH:21,*)++/,"%Wx��TKD:_twxwwuuwwwvrpqpqmnligc`K3buuywurpoqqonpqotpllkijif_@"8[w^TKEBCDFEDDCBCDDBBB?<;1.**(+,������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������þ������������������������nE �ơ~wtphaXTSRSURNJHFCDGGD>>GEF@812.,))&#!F��v>4-*Q���������W*��\C<30+)% %Ac[bN0I!""?�p+,.>?>BPZE7\B&L0!9]VU^ddDE�- <�s,ZSLUnhM@�4 Jg�j.+$&=AILOQC;K<$")-O���������ĺ����������������������б��������������0+V���~��ynkifjs}}wg[]bhklmk+:�����������������������?_����������ͼ��§��������&R���˒qa_bfaA5KPRVU^<p�q��^H?7(!%+--&#'+-3,*&$(-7�������Ͽb.D�������������jU��)C�s|s8&>U.=Q1(!-�xyr��YTBC<<CIQKL�����4>;7�����~,B,	U�w!,ljTRO<$1IQVW."',3F^u��������Ƹ�������������ƘĿ����������������{Q%�D;�p7�b!7@@ACBAA@?@@A@?@@<@BBA7/-+,+****)),,+(),..247579978::99;<;:;99::8689:9:8977887787224555555412-&"! $-/.-/-01;eX,,1Y������������������Z('#"   (5?BB?;3,E^chu��y[SR_q{}���������{s;9~�Hcf,*)$$&(&###$%%$"/[�WICB85520./01.%%\w��PC@>buuwwpnnrollojpkkhheeedcJ5fvvxvsmkihlhikgfmiehbciji^A"9Yr_RKFEGHIIIIIHIJKLKJIKJGEB?:;5������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¼������������������������oG�Ú}wrnh`WSRSTVQKHIFCCFFD?<FFEA922-+)*($!?��wD5-*8SKA@a`>)\U)��gF=40*'$"ZX*Gg-&!"@�h,*".bmdo8@69[/)l�~_c.:�z"v��x1\1 Fg�m.+" *ATQM283"!'(G���������ú�q��������������������ΰ��������������1-W}������}nigfhr||yscZ\djmnpg$?������������ul_u�������?`����������ɹ����þ������}%T�����qgjigbG?BDEJPX=o�q��\C:/%(##"#'.8���������ë��������������cX��'-dHb�@(6MVZ6))#,�yzi�}RI<;7;BEJHAPz��l,=71_WXYdh*C*Y�o"$Vi`a_F*4S\WH)!&-4E^v��������Ʒ�������������ʱ������������������zK&�;4pa$�W6BEB@BD@>@B@?AAA??CD?4-+++*)))++*(*+)(+,/37877:988:;:9:<==<:;;<::8;989;;87::998870/2544444332,&" %.0000/26Lq>*-8v������������������F)&$!%4?C=9434E[el|���dRYgw|}����������|v==��Hik/,*(%')'$#&&%%%#/Z�SF>:9;;964420,#%]{��NG@=fuuyukljlmfhmaggbdc_ceaaI6ftwxvre_hcjgbkhadedcZ^ihe\9 >Zt`SKGHJJJKMMNMNNNMNPQQRSRQQRUQ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������oI����xsmhaXTTSTUQLJKGEBDIE==GFED;42.+*,*&!5��zJ9.*%g���,;O,��sL<52-(& !ZW)Pf'"!=�k+* !@�8"gP%#Kb/N�96�8!=�y;�:2�F&$ Ed�g0* 1ZG/NF+#!! )(K���������������������������������ϲ��������¿����1.Vz���~��|migfiosqriaX[fgijop+A��������������{��������;^���������ʿ��o����������}%Q��ƹ��{hcee2"&$*+;W9t�lĻ\A4+! '#""$(-7�������μMHDMJGCAHMJMV���`W�{+"$0K/(W��ŏ/*#0�wyb��B:533<ABQF?3@d��?A2'% #Dg,D)	U�j"9JIJHA46=HJ?#!'.5Hav}������ǵ���������������������������������|D)�3 )Mn+(�R   6AE?=@C<=@A>>@?>@BC>4.+++*()*+**,)(**---28:868:978;;::;=>>=;<=>::;<;89<;988889885.*1433322232,$$-.01/.1<^g-)0F�������������������5*'%%4>@:88AGS`hs����iVZf��|}}~�����zt3>��Dlj*+*%$%'%%$$###" 1c~OICA><;84320.*"(]|��}LJA:gvuxukjlhohhlhdeeaaZbea`G4hrxxurlbiejibjfgbhfb_\hldZ7$=^u\SKIKMMMORRRRQSUTVVXXXYWVYZZ[������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ž������������������������rH�Ş�urngaYTTTUUQMIIFDDFHGA>DDCA9410-+,(%"9��yI9.*#+�Evi+��hG<64-(%!;^[gO  " <�c*, !9�=-mK  N_+R�2�9!?�s<�7/�D #Yd�g2+-_@%LA! "*(M������һ�������������������������ɶ��������¿����0-Yx���{|~jihiinonng_[Vdehikm'B������������zu��������<b��������ĉty������������z&S���������rh6((#*1AZ:w�n�����{jb`a^PEEBGORFB'%)-:�������κ& $9���X`�l%#bzU*&)V���q-)!1�mtZ}��WMECKP\JS{���~5C.###'.GQ/F*
+a�j#%GSWXTUSQOQQD%!&.5I_x}�������ȴ����������Դ�Ư������������������}@.�4 ;^j[+3�`;90 !:@B?>?@<=>=<>?@BD@90++**))()**)**''),/027996579779:888;;::;;99:78;<:97:7967688766.*1323223311,##+,/./-2=kO%(/Y�������������������))%"$4::9<FOS\cjv����gV[j}{{}~�����~|t0B��=cc*+("#%#"   !#! 5e�WOKI=76.))**-) ,Z~��}DIABcwxzuolpjrjikjhbga`^adc_H6hrwwwtpoqpqnkmimkkkjfgije[3?]xWSLJQTURRZZZZ]`b_aaeceehgjhhj������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʺ������������������������rH�Ǧ}xsngbYURRTTROIGFFFFHFB=FEF@9520.-,)'# <��~E:/*%)�:wj$��oG<63,)&$#3636M^V>;?" 7�b-.% '`rpj1#& B_* 5�}��!B�q&{���(&& %mb�s2*!!-L]QT2  ""#+(N������д�������������������������͵��������������0+Ux���~�rlkjntxvvla\Yelili`$E�������ƿ���vaU��������9d�������Ӵ�Ŀ������������{)T���������gRQTRTRQ[;~�t����������¸��������?%*.<�������ϼ*"### !&+:���Vj�Z%I���j''Ga1!'+)!6�gnVW����������Ix��Z0*%>*"#&+//22J)
+n�l !7gjhb`c`]X]_H%!(/8Hb{��������ƺ����������ע�Š�����������������yw;:�,Cbm>(>����}lf`[j�D#>@>@A@@?@<<=@?ACA7/+***)))')***)''*)+/47::97899;;:98;9;==:<;;<><<<<;=99:9678999892-1434444410,"
+$-/0./07Do@%(2k�������������������#(#!#18>EKSZ_dhm|����paaoy{z|~������|q/D��8d[(*'#%$!  "$%"!2`�aUIC;41,))((+*.Z���zJN@Afuwzyvsususqqpoooljhgfc]A9guxyxttssssstrpqopommmlid\0$E_u]Y]adgkosvvx{{}~||~���~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɷ������������������������pI�ͥ~wrkgcZTSTURQMIHGFEFHH?9GGDA940/,,,*&#?���P;/)#n���9
+#��rD<61,)&'-^gaeaebbfU!!!6�b/03?GHJYbE=O:!:W+!-30*Cgb5"? <�e 042DnqRGf5 (|R�g2+# +34=PVF19.%#"+)M���������������������������������п���������¿���.*S{��~��~rkihmtxx{paZX`olhd^#O�������ž����kq��������8d�������Ѽ��ʻ���¾������w&U���������ybKQVTaSPi8~�yÞ���þ�������������>%*,;�������ϸ3$!$ %(6���Rk�\!P����3%aȬ�[)' 8�glPLGM[X\cns��HOfmR.-&A-$%9UV=.,G$p�`!!GpY;?@><D@CF:"!(/9Hbz��������°����������ըĽ������������������}u6@�("I�Y 	L�e�kjm~}u{�=$<9;>=:<<>9:>A@B=6.))**)('(())))((*,,/3578569977;:79:<99;=;;==9;;:99:<98::89;;98792/3543353310,"	#-031237Gl1#&4p������������������y &#"1=FKOU]cgjp�����qb`mw{|}~������|yp2I��7iU+*'!#%$!""#&%#"1b�U>:.;94320.0-'-]���|KN?Ajuxz|yuuwtrqrrnoponliec_@;jwwxuvtqpporromllmlknlkgd\.&E_v]]hou{|���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������µ������������������������pI�ˠ}vpljcZVUUUSRKHGFEEFIH@;HIEA941/-*('&# <���L:/)-8&!0^lH:?$��xD?840+))/]G/268<9:2"!=�^14K���zqilkj7>R.!_���������(!=�k:���������61�F�Y3,',V`c]WSOTW9%"#+)R������г��������������������������ɻ�������������.+W}��}|��yrkjimpusxk[WW`lkca`%J�������ļ����y{��������@g�������й���������������uV����θ��zka;CMC?5Dj7�|ã�Ϸ���������������;%+.@��������˷���������������Ph�[aҍCk1'K��ƍ-(!:�eoRxL;2%.3BlDYIEZru1F-'R����`1I%
+q�^  HpXFAEHGDEGK>#")0:Kd}��������ô��������������������������������u3F�!#FeJ!S�4?;;Go96�5$9:=<99<<<8:>AA>3+***+**)(()**('')*.1676665798778868::89;<;;;<8;:9999:8789459:7688224445545320+$ "'0=<=?@[n'!$2x������������������c!%%$5CIKMW_dfmw�����sa[k}~}~~���~xm/L��;hR*,(##'($"#%&('&#/e�YBAA@@=932562'-_���}=E=Dlvuvutnpqpmknlkmoijggc`^=>ktusgpkjilkjbdhgcdccffh`bZ-(GatYUgzz�������������uwtuqrops����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������տ���������������������п������������ɼ�������������������������qHyɢzwqmjcZTTSTTRNHGGEGHHEB<HEGA84/.*)))&"6���L;/'G���������^#��HA;73,*'+Q4 $&&'* %";�^14Os635.(--0'FZ*"!n�>CA?==:0"?�q H�A?=<9;67$4�H�c2,'/^E9822/02)"!#*'N������Ҿ����y������������������������������������.*Vw��}��vrmjjmltntk\XW`jjiia+L�������Ž����jz��������<e��������Ďnr������������z!Z�����xi`lmibfW<+.Kj;��y¬�˧qh]X_���^MQUb��<%+-?�������еrw~��~wt~������Fx�[!^��k�.&Q���h))!;�emS����phka[e�@z�g98W<K+8��~}��JH#
+y�Z" 7mofjd^_]]Y]F##'/:Oe~�������̿������������������½��������������m/P� R�L!
+_�y���j^~��r,%;==;:<?=<<>A?;1,(***))**())**'')+.4589878698788888::89<;><<9::><=;:::778:78::9:;:44999:2764662,,/17:@??<+""%"$/;CQT5*&7������������������T)+*'"*<ELMOV^ciny�����saTq~}~�}|~}���zl-L�}9qM'*)%%%&%$"$$%#"!1o}\JDFEB?940354(1b���}DG<Bkvquqnfhjnmgjjjge_cccZ^_ECluxtflgjgfgncfchhbY`fabWa[-(E`w\N658889:66452+-&#!"����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ծ���������������������ɿ������������Ⱥ�������������������������sJtĚ}wrnic[VSSTTQNIFDCDHKE=;HFD@94.-(')+&"6���I8.+O�44;:0-+%��tIB;64.,()DB=?BGGGNV"!&$;�e23E[&!"1+>^.!_`!8�p By'8'1zV�b1-',I1""('#! +'R������ӯ�����������������������������������������.(V~������}wojmqtzzzq`ZY`kqun\'M�������Ŀ����{���������>i���������ƻ��ǿ���������x#[����ž���zmbH-"+Hdl;��}������þ������������7&+,A�������Ѧ/P������A��]YmN4K*,wʳ�p() >�fpX���ý������9AR{lX6#K/TB00;r^D 	|�Q! )MZNPTOKMMMO> "(/;Ke���������������������������ÿ��������������k*\�!V�Dq�Ia." A@*!';><99;<:=BA@91----.0,00**+*)(&'*/6979:;769<97788:<>>?BEEGHFEIHJJGEFGHJKKKKNQROOQSVSZXWUQNNLMMOSSTUWWYXX[[ZSROJIHA8,! &5ET`|������������������R4332.*"*?GLLMQ\cjny�����tdYv|~~~~~���}|n+S��6 vR,+("!"%%""#"""" 2T}QGAB?@<51100/(2_���yCK=Bruospmlkejpjmild`^bcbZ_\>Anvxtfmcicielgjegee__f^`[^Z+*DdxXO+$'*/3584122�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̿�������������������������pHp~xrnhc\UQQSTPLKGDCABED@9IFC?70-+)*((%!6���I9/)E�
+��~I?840-+'*FWXYZefWVO  $"6�e20;]]][\YV[n:Aa+!GQGLJGIISt( ;�p/VHKY\bcp�>2v]�V//&(ABCB<95;G9%#  '&O������Ҵ�������������������������Ϳý�������¿���.'X������{qmkpvyzwmbZV^jtsl]%K�������þ���oq���������>j�������Бtz}wun~��������x$`���������zf>.L^\j7��{��������������������,&,+B�������ө""#"@c������B~�UHgnc7%$KhDxF%( ?�hqU�����������.5;ik��6M3WX.44-Q\D
+��F! !BKHFKIJJKHH2 #(/;Pf��������������������������������������������f%e�%J<>��-e����|T/#(;>;8:;<<@CDA:7<<ABBCGFCA;520-+./4:<:;>@>=@C??A@BEIKLOONPQQQRSRSTUSTTUUUUWXWXZYZ[[^\]]]^__^]YWVWVXWXXXY[\Z\\[YZZYVVUPD9*"*6Lbv���������������N773430-'+>ELMMRZcjly�����vd^y}|}}|}~~~���}zg*Q�|. xN,*& "$"!!##"!#!0J{�IC?A>=702321.'6b���yBJ;Epurvrniljlpiqlkhf`a^b]\\<Fntvvpojlkmoqnnnklkkgggea_[()GgtWQGENUZa^_db`OFEFHHHINJPUXdhq������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ų������������������������qIoĚ�yslie\URQSTNKJFDCCDGD:7GFD>62/++*)'%"3���I</(2e6AHLJGHfU��tNB;42-*('421*,N[?74 % 6�_01<dgiecvWPS/Ec-"U������y�r" ":�i2~�����opj.-o_�\5.&&JW][U_PLI/#""'&P������԰�������������������������Ƽ��������������0,X|�������|vposvzxuobZX`lpsob)P�������ÿ���e`���������Dl�������ε���������������v_���������uj@2>KQ@Ee3�����Vw������{4b�����@ '-,A�������Ҡ!I������?��G!���ߏ*&}��o7''G�ioY�nf]��abh{�/aw���k7Q6_d466;h\H!
+��7!5mpeliac[Z^Y6#)1;Nj��������������������������������������������c#p~&`=��i_XYl��O$)9>==@@ABDDEFGDEEGIIIGJKKKKIGEFHIHHHLJKIIKMNMLMORPPSTSRUVSSTVUUWXY[[\[Z[Y[\[^`aaba`bbbbbcaaedbcaa^[[\]^]]]^_^_ababaa_\ZXUK@-%.D\s�����������T?><955420$/=BHJHNX`gky�����vddx|}}}~~~~�}yh*U�u+$nI+)%! #&####$$!!9U{�VLDFA>602311/*2e���xCI>Koxyzwtsrptrrwqomlkhedeb]<EovvxutrrstsqsrrqoonnqojfaU&*Jow[\hv���������vkz|wwvtzsnsyxpr����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʿ������Ŵ������������������������oIgƞ~yslhe\VQQSTNJFECBAEGF?4GFFA84/*))'&#2���I;0*;�������|F��hPD;40-)''M[X]^d^VfX%!&!4�[2223-+,6fK:<,4]+#)!:q$"!"9�g'6N:>&-q_�U6+%$.80+,PC05'"!#(&R������ӣ����c��������������������ɮ��������������-/X|������~{vqrqs{{unbZYamnokb"V������������{q���������Co�������л��ïʻ���������w#b�����ç��ynfcc_^Z[d2 ��������vpb]_\QMPRQV\SC&&+-E�������Ѻ�����o'4�������:��?z���i)&p���n''K�dnLZMTq��$-6T�>���_O?;U9Rv]VN^XN"!��/! JqVFLNKKHGPW<""   (-<Ok���������������������������ÿ���������������`|y'a�E���@v.)89:<?@ACEEFHFGIHJMJKMNLMMOOQPQPQQONQOOPOQRRSRSVVWVWTWWYZYYYY[YYZ]^__^^`_^``bbabfdcfcfedefhhfghihgdecaa___```cedfggheefdgfdc`QG5(2Ib}������fUXURJGEAB;4-$,:=BB?IW[bkx�����ybgvz~~~}~|�}yg&[�{*(pF+*%""&'&&&$$"!!">W|}MQLKA:5034331+5h���sBJ>Iqxy|{xsvuusruqopolhhgfb[=GnvvvwuursssuspopoononmigaQ'*KqpYYn{���������ow�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʿ�������������������������tHe΢~zvpid]TSRTXRLHFHEDGHDE<KHF>72.*'))(% 4���L8/)'"		qL��vRE:40,)%&=?<=9:25BF!!$1�]65Gx���{jVsv>7^1%\���������'  :�m3������v��;+uV�T6-'*O_]\W[MPZ6$ ")%U������ұ���������������������������ľ������������-/X|������{xtspr�zqe_Xcnpnhe Y��������Ľ���l���������Es�������̑}ws�����������z%l�������ۼ|qigdbabab2"�����������ú��Ķ������4&*,F���������������e1�������;��=j��Y5&%`EO�U&'K�ckKUd����R9G��>sZ?0HwDW>A~���mOES!%��/# WqI9>=108>DU4#'((('&#')<Mf��������������������������������������������_�o'^]B��fq}dPQR^eT+!/@@><;::=DCGGHJKLPOMPQQPQRSUSVVTUWWUVWWVVVWXYZZWXYZ\\]_`^]_`b_`^__a``adccdeeddddfgfgiiiihgijijikkillkjkhfb`^ccdfhiilklmmomnnoppolZM:+ $4F`w�zxspslqqmfc\REAABCC>CNWdgz�����waju{�~~}||{~~��|zc&_�u).{G**'$$'(&'(('##%&!"Cj��d]PG?952451//,$7g���s?I<Lmwz{zywuuutsspqonkijieb^9JqwyxwuusrrttsgggonoolljibS&*NrhWVo|���������m��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȼ�������������������������qG]Ǫ�|x�nb^VRTVTOJII\NDGFH=5FGE>72.*(()'$3���N8.)@������|��_��xWC;60-,'%795;6;99CP#!$0�b63365/-+$0A/Aa-$7E?=:7&:F "7�o'L@<;9,!;H(+pU�\7,%%47893+"0?-"!*(S������Ԫ�������������������������Ϻ��������������,-Y}������|xpqwz�}ytfaWgomjfb"U������������|o���������Fr������������������������t#l���ֻ����|snjeddbad0"�����������»����������0%+*I���������������h\������4��@"���،('�e!'$$U�ioVk����������?|skdbx:V8W��{��GGR!$��)"Nuh`_P/)Da_P)$//.//120//-;0Lu�������μ�������������������iBHEIMPMk������\�\)Y`2=��!;h��f6!"3CDFDCB>9648?FJKNPQQSSSUVWVWXZY[[]^^]^]WOU[^__]\^_```abcbccedcdefdcc_`fhgghhhiihhijiijmpqqpnnonoooppoqsnolic_XW]fiklmnonpoprsrrsvvssvfYB2% 7XTO7//.036210-9LTSQRNLLQZ`t~����vgks|��~~|}~}}~}`$_�k'8~@)(&$$'(%(((&%#%&$$Ho��}x_G8631331--)!8f���vAG<Npw{yzytuuusq]fnkkkjhgdb\5Lmvzywutttrrrq[W_pommkljg_Q&.KrhUZq}���������o��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ǽ�������������������������nG]è�||��f`ZTY[QQNLKsUDDEH?4IDC?72.**+*($ 2��G8-*-R@<95/"5&~�m[G?720,'%Hdc_X^\Y^W#!$0�b129PVVSNMPTg=Hh.!6G?<8;45Fi"!!4�z&BA:EIPSf�=,pY�W;-&&6A=:7239H5$ (*O������ӯ���������������������������ɱ������������-.Wv�������xnow{}|ug^Zgkiig]W���������¼��t���������Cs�������ѽ�������¾������v&n�����z�|tyoZb\[YZbk2(�����������������������.&+-J�������З7<BHB>K������5��C`c���&)��T7)(#\�kn`��Qw����hJ������;T9aX;;?kSHQ	,�� !8\^Y]U4-<MK=$/<@??AADEGCGH?EJ;������ͽ�����������������ÿ��������x�����T�C!3�ۺW������}PRvkgc`WHFGHIIJJIJKD<527EJSSVWXYZYZ^]_`abbbfeeed_XLDMZdddcdcdhghfhhjjghgjkikkhjkmorsqqponlmmmlmnorvxzyutuqstuwxtvuvtsrnbZSSWbjnrustststtvsuuwx|{qbQB."!( BZ^dfjnnjillx����vgmv�~�~~��}xX%f�i(6y>,)'#"&%%''&&%"%%#"Gq���|vX=<543110.&=i���tDF8Rsw|{zzxvtsrqXjllkkjiigc]8Onxzxxwttsrrrn[WdnnmmmmiecP#,PwiV]p}���������j��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ǽ�������������������������kC\â�|~ǻ��mXXz�tXLGfSDGFF</CCC?720*)*((%3��lB9.*)7.-,/--*QM}�jZKA:41,)$01(*'.2/21 #,�]13;jtvpga]]Z4Bg,#!!^��������w&"!5�s2�������xw1-|a�P9-%(E_^XPKJLL2" '+O��������ĺ�����������������������ε��������������,-Zu}������yqpvxz{zti_[fjgika!_���������½�n����������Bt�������Ї���c���þ������u(n�������ŕxm#"(*Rn/%�����|�}ta^ag���XHNOi��*%,/I�������֎ X������5&��;AB3:F&1��кP(#Z�jjp���J.?���pEFa\TPXn7P3\C7:;fQHO0��"!DWJGJG@:>@7.*J^_cacfejmkjkf^M������ʴ�����������������¼���Ǿ���������~Q�<  "6�޽?9DMD8A?.b����tWLKMONQNPQTQTQQJD67;MTY]_cefgggghhikkjjkllhaO=>BUakqijjnkoklrnnrnmpnnprqqrptvwr^_hsrpronmmrpt}�����������~}~|xsh\LHKG@DJSW_]bbedhfb_Y^fekkaA& Bz�mcedfgcdgnkj{�vflu}~~��~~��~yW(d�f!.n<,*&##'('%%%#%$&'#!Go��}ypZFPQ:53550%=j���n?I9UpvyyxyvsqpqnVflkkiijhec_7QrvxwvuuttrqoiQXellnnpljecN#2RmcQZr}���x�����l���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������į������������������������kC\����ʸ�e9<���\HGZRBEFG?1IEF>810-,,)'% 0��pA;0):���������Ut�ZWK?;31-'$865:68;=CJ&!#0�a52.#%%Am0$$$!#1�l'  ,%%n\�Q:-'#)***&$"#/'!!&'N������Գy������������������������̫��������������,+[x}~||��|}zvuxyyxuqjc]emkih[d������������{nj��������Fs�������ϳ��Ԥ���¾������r(q�������̷o(#%&(,Th.(�u�������¼������������''-+M�������؋$#"0�������++��;%hf@ON)*Wq��U&#[�oqq��eC600A;:CCC@<5Us9L*]NJi�EGK0��# %`pb]``\[[ZO1Rxmzidtrt|~��������������������������������{cs����������{J�>&#%%##9�������qm��fs����zvVPNORTUTSVXVX]]^^_^XNA?I[jnhnnoprrsrmlooopoqrpiV6*@m��������������}||~{~|uusurTBKkaZ^`]URYXUTS[]����������������������#!!! -^UINNONLPNOMFHszv]hs|}��~�~�~zwW+i�b4n@-)%!"')'''%####%"!Kk��yxpTKZkW<9<:3* ?n���uEF<Wqwz~zwussoqqhillljhgeea[7RsxzyvtusrpqledjlnppommgebL%3VyeVYv~��������j���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������а�����������������������|h@W����wǱ�td[`zn]SPMf_BBGEA3FCB>930-+*))$ ,���H=/)('j�~`K?94/+)+^nj`hpba_h+ #*�X47<V^`aVSY^e;<t+"">RLKEADJRh"/�s#>GIKZX^gz:(p^�V<-');GE?88:?D5##'"N������ӫ|������������������������˾��������������(*Tt~{|z}�|z}vswwwsqpke^eppjfZf������������|m���������Gs�������ν��б�����������n%o�����޷��|q]cWN`[^l/&�s��������������������g#%++Q�������Ӎ)/+*'!x�������)+��4&�ἝQ'4�kwc;(%[zpij���U7[���_B?=E:3Yu+F%5����e5AB4��!! ScXUY`_]ZPC)
+0 "'1:No����Ľ������������������so}an���|�����}F"�:0PRB+%D�ٸoj��]Z^]X�����zq=c���������z}}������~V}������������������������7p���������������������������?7$$'*.(#/4.(/35r���������������������'1RURPRQRSSSTTPEvzv[ds}}~}~~~~}wR(l�a!4j:++'$$''&&$"""$$$$Hj�wmpeUPNKHDBCA:.!@m���lAD=Yvxz}{xustqnonlolmkhhfdbZ5Tqxyywvuqspokefoppppomjec^J%3Wx[PYs���n�����l��������s�������������������������������������������������������������������������������������������������������������������������������������������������������������Ҿ���������������������������������������������������������������������������������������������������������������������������������������������������������Ư���������������������yc?U����{ԣZc[Z_s^RLGLvi>BEF@6JF@<950-+++*%!-���S9,*-=--366:<QCm��hM@;60--0]G/.Ch;37V( #+�Y67Q�vwx�n]`f=;m+#!q���������# 2l3��������?"la�_=/)1[]YabZLNV:% ''O������ֳ�������������������������ͽ��������������(*Rv�zyzy����toxvsqsupcZevxuujn��������ƿ��tn���������Et�������̗������¿������q'q�����ԟ��}rmfO@TFJi+2�q�����������l/Y�����k2#&,)R���������������6Y�������$0��<$����s#)qPA�E*'_ysgf��S,E�����YEpcWEQe1G$2:W_LE3@<	=��  580>Va[\YH-	.K���ʾ�����������������®��������������xB%�5!#(>0*"I�ٺ�o[k��\Y{�����{q-o����������������������qŮ����������������������Tv���������������������������G9)$%//##.0% %./p���������������������$6VWQRQQRRRRRQQGy|sbds�}~~}|~~~~|wP/q�^3f6,,'%%''%&%$###&$ "Cg�~ki\URPLFGFE@;0!Bl���i>B;Ytzz}zvyusqmlmnoomjjifd`T2VoxywxxuqssqlfjpqpppnmjkfcI#5Ws_U_r~���������h��������w�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ⱦ��������{~�znkhet|~��za?N���y͘]^XV^ueNMIO�mACEHD3EEB<7421.*))$!  .���?7-(A���������jx��hPD=5.,,0O7 ->"&,P& #.�S66Rs"&/iI%O=5a,%!t[#J�/#d"7�n7�(!,~Y!(`>$Vd�a;/*1P5(/JB $<5$ '%K������ֱ�������������������������м��������������* @`kgee^_efaVV]`]\XWPI?BNNGH@y��������ƾ���l���������>t�������Ϫ����İ�ÿ������q!t����ղ���~sn^NGLCVn,-�x�����t�mXN<-!*#"$&,,R���������������`$�������%2��:%|BDwR'-|yvp&('`}rgu�ME1k������J�����s,>#evig~w3A9@�x !CPV_d[MLST?		1 R���ɳ�����������������¯��������������u=+�+ "<N4'"N�߰W}������`�����|l)k���������������������|	�ȳ����������������������P|���������������������������<8($#$$  &*' %02v���������������������'  $%!6WUSSRRSQSTSRMHuzt\fs~}~~|}�|zvR0o�X6d4+*($%'&%%%#%#$$$ (Kq��e^SFEHJFEEDB<0 Hq���gDC>^xz{|zzvsqqoomnnnnlkkgcaW2XowyxxwtrsrtpgjnqomoolihdbF!5\v[S[s���}�����f��������j�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʿ�������zxzn]VPUV`gwz{~~yb@Q����}Ϡ`\VV`x{^QJO�VADEFA2CCB?8300,)**&"+��fG9/';�||,Tk��UGA:4/+)+;6,03037=R(!#,�P56IS  *2"'M87l0%"fP&A"m"1�n0t?,")`=$Sb�X:-)*@,"&.' :5$ )"S������ק�������������������������Ҿ��������������!2KVLD@941--/.,+)'(&%&((&&&++"y��������ļ��r_���������?t�������ϻ����ǻ���������o#t�����~�²�yK(Bem*.�q���������������|ynYH8"$*,T�������ѱ������8!j������#3��4)�h!#)),L~̳:*'[{rd��DC7���Z>j�;f^PL\_2=!=83l�|3>5D�p",lphYN;.,7BE%		'$Q���ʹ����������������������t����������w71�*#.?:3,$W�ޭBNZQ+*;1(�����|i+s���������������������t
+�ô����������������������O���������������������������54'&"!   "&,|���������������������u���������������E e����.:UVRRPSTUSTRQPNxzy_ex~}~�|}~~|ytO2t�V;a5**'%%&$&&%$&$#$$",Y|��vg_QOPQNKJJJB6#Ju���]ED9^ux{||yvtromoqonnoomifedX3Xqx{xxwustsstpooqonmlligcbG:axb[_w���������g��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ⱦ�����~��zsaH7@BFN\erx}~vaAO����y��n^VVay}�[DMwGBDFEE5JC@=51//-*))&$  %��VH;.)9�:;.Yc�vDH?72.-))Ee]aghcYe[(!#.�S76>NLOLLCGUc<.e.# <>9<:425>n.�m&B6:>CFIZs?(ke�R8.((;:<=<56:I7#"'&T������ӻ�������������������������Я��������������&0OSND=731///*)(''%%#$"$%%(*,!u�������ƿ����t��������?w�������ːurvqsh���������n%r���������~m4(+--0\f&/�i��������������������z$%,*V��������l!%&#h������9�y2+��lI,(4���y2)&evoa��=>a���6<W�5800.Ya1= #5o��<,?2O�[ !+c^G70/)(-30	$'W���Ȼ����������������ÿ���������������s35�""+%".,$[�ܪz���Df��Q�����wc/{���������������������o�������������������������H����������������������������./H62/,(#('!&3��������������������������������������M#'(H%<USQQQRRQPRRSNJxxt_du}~~~}{{}}~~}}}tI1y�MAb6,*%#%'$%%#"%%$%$!)Wu��{pjaZXXSNKLLI9Ju���ZHC9btxy|{xutqoonnnpoponkheaU3Xswxwvwvtvtssqppqoommljga`@:d~q`av����n�����k��������}������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ĺ����|z{r\KGIJGEI^jt�~}zaBF����z�ӮhPIlo[rbJSQBCDFFB!2FEB=62/.,))'&$!#�e9B8/**G""(*?Gf�xMIA81.)(*11.EdO55// !"0�M67Du|��thhc7,n2#!^��������z!.�s %~�������x1-tc�P9,'(EX\afUHIQ2!!'$R���������������������������������̯��������������'1Q[OE<51-..-)'%$$%#"! #$$&*,x�������ƾ�������������}7w�������ͼ�������ÿ������k#q�������Ҵyl]rqll[^c!-�p��sVOVT�������������~!&+([��������s!%$""�������A��5.����W$-�֣{2($owncu�mp���W7=z�:F67*GM1:"6��i2,1=,	V�S )PTDFDFGD@</ (e���ƶ��������������������OFn����������m1<�! *KE3'$`�ߣ:x����~uH����oU/~���������������������e	�����������;}�^����������=������������NH�cm�����������@t�rk����wyqdhkfN�������������������������������ſ�¾��F#S�" ?YSRQLIIJNLQSQOzzx_gu|~~{{~~~~|yqD4}�NJd3-)%"$)&%$#$&%%##!-Vv��}tmc\Z[TMHHJD6 Nw���[AB<cux{~|ytstspnnopqqmnkhfaR6\uz{yxwvtsrrrpooponnmljhca8!;cwcWby����y�����n���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˼����{vt\CHSVKFHTdp|�~~}eDH���~xx��x7HoWQVQNJEDBCEGA 2EGD=83.-*(('&$ %�\GD:/)6���������]i�kNMB92-)'&'&5dM#'*)8%!"/�K544,9s[&"$/#(h3&!+)!C�O  /�n,%)wr-%%, -wb�X6-%!&,<PM-&)+$ !(%Q�������°������������������������ɪ���������ÿ���)1KVMC<62/,+*)(&"#""   "#$%()r��������ǻ������������|8v�������Ҷ�������ÿ������h%s�������ָye3,,//2Za6�p��dH;1-���3?OWi�����s#(+(\��������q" `�������G��0-yg��X""/K7C,*$tuoec�ļ���5?m�cJ�{o{�L18'{�zJHU-<*	f�I!']pbfcbebXOA	$b���ɼ����������������ÿ��������������m.E�#-?@2%&n�ܧ~y�����ng����|qN2���������ov�����������^�����������T�},����������3 ������������u��)f�����������H������~s��p���d���������������������r+FIMVYWbfjsvwvx|5:8:7 AUSTUbidegMGSQQ|{y]ft{}~~{z||{|}|yp@4}�OG`2,(# "%%$%$$%$%$# *[u��}tl_XVUQLIJJD4$Mu���ZBB?ewy}~|zvssqspppnpnkmjff_P:\sx{|{xwusrrrmnonomnkijhb_7%?bm\Rey���~r�����n��������h������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʺ����|vfK@ISUDEPdqz���~zhFD���}xqw�nZZXPPOIHFFEACFGF-HCC?940-+('''#!"�mSF<.*-;# ^�@"h�`HOA92-+*+A_ivZXXY`f/ !-�T5224;vo/&09R5*h3%"!#J�]#M 0�q (g�2$4=Y7+zj�Q8/'#*7T\6%*/;.  *(R������ة�������������������������̬��������������&2NTIB<720.*))&%"""!""$$')* �������Ǟ����}xrng���~9q�������΍��ұ�����������h'v���������qkA#>c^5�n�������������������w%'-._�������՘jkhec^d���������F�y/1�J!;/$ &*$yvofSy�ÿ�S+_��K8>3V��?13#LaV[ik,8(
+f�> 7mhRSUQNMKB:"
+"#n����ú���������������þ���������������j*P�%8SK8$(u�܎R������~_����}x<.���������2n�5���������T�������������&%����������-#��������������(!j�����������E�������yp}qm�b���������������������g+`S4 BYVSZ����ȕJQNZ~{v^hu|}|}|~}}}~}zlC8��LFX0)'""$%%%&%%&&&%2ay��~vhQQSRMLKIGD4&Sy���XC?=fwy}~|wuttqqponnnljjhed`P8_wy||yxxutsrqpnsoomlkjjfb_6"Cfl[Tey����x�����r���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ź��{�yuWDABD=APct~������{gG@��~xymh[WUUSSOIEFKICEGEA(D?@?942.,*''&#! ��cL=,*&M�6
+ 4e�^<PC93/++,<MKEGFGFD@" "+�K36E}���}vvxz<7�4%!^���������!*�j(x��������>(hc�P:.&(FcnbUORWV5*(M��������Ĳ����������������������Ī��������������%2RUKD?820-*(''%#"! !!$%&)+��������c&""+p����:s�������ж���������������i$x�����|���tnqcLO]fd\8�q��{��Ϳ�������������_"',-_����������Զ�}����������|A�k,-�f,$("#(#��q_LL`uiB,3K��H/2e��I51!3% ';-:$
+a�6@t_@7:>=<<72
+"p���ǿ����������������þ���������������h%]�$1JF0$*z�׏GXSG1/=1:����}�L3���������{�[���������G�����������}d>����������%'������������r|�sr�����������M~�����vtowsiqy�e���������������������cNun?AYTSKIm���oIRMV|zt^huz�~}|{}}~~~|yo7<��DQ[,%# !#%&$&$$#"#!9cz��zscLJNI@BGHB=/"R|���ZD?Akx{�|wtuuqoonnmnmljhgedN5^uy{{zxyvtsrpomnoollikifc[6$GinTVew���|�����o��������o�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʿ���}}tsYLDECJan{���������nI:����y��hZVQSVVQKFI_WAEDD>!*D@A@:42/++)('#  "���M8.)3����r����q]�dMSG;62+)*)$  !# "!(�P449B8:<4,...#<�2%":QIEF=?91) *�f#PWOFC@@@4 *ma�_9-$ .6544/+.-#)'Q������կ������gq����������������ʲ��������������&2SSLD?940-*(''%#!!"!#%$%(,#��������`0DHF22A6+n���|5q�������Р{��������������h#v��Ĺ�����sfAMLJLD^a@�q��y�����ò�����}`R:0'"'-.^��������зf."&]�������yH�c0:�ӷ�2$#(#��q^OKD?-,16:P�_8z��3!$801�����p+A%	s�5 3pl`c[XUTQK9#|���Ĳ����������������ÿ��������������d!cy%413*#*��Ы����v���Z����{xG8����������{'���������>�������������������������#-����������������������������S��}j��wms�zdix�f���������������������R=0')"!FZUUb���[8=NSMT~wr_ivx}}|}}{||~~}|zu5D��Ed[($ "!!"!"!!!!7by��zkM30/*)()).5,%Ry���ZH?An|||ywwvsqqoqmnmlljhdbM8dtxxy{yvsrrpnnokmnnkjjhgd[8'GmpUVfy����������r�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ⱦ���|zwrfdZ[_q}����������jH3����x��aTQUgi_TNGhjBEFDE!*EA><941/-+*'&" ��iE<.(,kl`XZUNEE)^�nVVH;52-'&"# .�c01+=�0%!! +�u "!'oh�W4+$'&Q������ջ����x��������������������Ȱ��������������$3WUKD?940+('&%&#  " !!#%"#(,*��������[P���m%}�w/p���;q�������˅�Թ����ÿ������c'v���������t_%(Z`B�x��o�v`H@@2-(!",CSWcE#'./c���������X"$ ]������vH�e22����E $&!��t_xiL@/)-/54�NN��aXv=8/3v��|�n)C#
+w�4# Tj_ZYZVXSH/%&����õ���������������������������������bmt$04?,$/���~8MWh\Q]Pe����{h.E���������g{�j���������4�������������������������1����������������������������;���k�������zy}p[���������������������]r���~zspolfe^[VL >^cB%+##DWUVOX���yWOQKY~{rbgvy|||}|||}}||{o2G��FbR+$  %$#"!""## 6_s�oC7&#+*&$#$$$&!!U}���QE?>nx{|~|zxtsrqpopnlmlkigd`K2cswzyzxvtsqponopnmmmjhgfeZ3(GhiXVgx����������r�������v������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������þ���~}sdfms|������������qL3����w�Թq3*9��eNAgs?FFDH#'C@?<8320-,+*&#!�^J;.(%"
+f�pVWE940,)%!" +�Z+0)&o5%!!!+�s &pk�U4,$%#P��������´�����������������������ƺ��������������$4QVLD@82/+)('%$#!!"""##$$&(-5�������Α�����l��{a����w<t�������ͥ�͟������������e&t�������ڸtb7>=@DCa\E�~��n�yxyy}�����������Z#'-/e�������֏$%!.������uL�a/9{p��6#& ��xZ������}zyw�H-9759U:B5(2c�=AV*@ 
+
+}�8!#E_UTRTQPLE*,(����į�������������������TPGSQZUZ������\tp&CL5*%1��χf����yds�����}z8E����������������������0"�������������������������5���������������������������}7���������������c���������������������k�������������¿�$&hFF6#&#FUUV^�����|HRNZ~|rdixz}|||~~||||}{l2F��>$hU-$"$*,)'('('$ 0S^}Q**&"(,+(''$""!X~���QE@Eoxz}}|yvtsopppomnnkighf_L5evyyyyxxvusqoopmmmlkiggge[0'KpiXUiy��������}r����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������º�����xr|t{�������������oO2���w�ɡnICLo}q\QQmu=DGE?''CA?<840-,+)(&#!}�6E8-'$ f�\MVF:21-*'&!"-�R*0+ !b2&"!!+�p!&rm�S7.%&%O�������ȼ������������������������ȳ��������������!4QTJB?82/-+'%%$#! !"##$$$$)*,��������c4++*(" %.w���s;t�������ο�ϯ����¿������^#v������ɹ�sjZVGFRDebI�t��������������������@"&,3h��������VZt!R`v�����rR�a+?�no�:$& ��sU���ž������AB`SD?L,E5.q����]+=	��3 9emc_]`ZZTK50
+3����������������������ÿ��������������~Y{g*4)'&3��ˤ������i_~����|}2E����������������������+#�������������������������9���������������������������z;y��z����y}ts��`���������������������h��������ý������"P$%#$'"HWTUX{����eOTL\~zu^hvz}|}|}~}}~||~{l5K��@,jN*$ "'+'(,,-+($ *=N|U<3)"&+&$$%"! #U��~UF>Jlw{}~{xtsqqppmmnmonjdgf`K9ftyyyywvuutrnpnomlkjiggge[.*NtiVYl{���������|x����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ļ�����������������������mN,����y��p^][YYYVPKNjr?CDFB'*FDB<630.,+(&$# u}XI<-'$ `�oQRE81/,*)/KW5"4F;  -�Q/0,$$P0Z2%""*+ #*"  *�k%=?&"dr�X8.&"1K:-DF-&'P�������ɘ������������������������Ͻ��������������%5XWKC=831.+(&$%$"  !$#$%$&(-'��������T"(,+%  $*9u���q:t�������̣�������þ������`'v���Ǵ����q]*#)$-a_M�k�����¿�����������xK0#&,-n��������@9��"��6l�����jX�\,!9/Hy5 %&��oS�����������Bf�����AF.A�a9ip:0;��- MscQONKFKD<0:1��������������������������w�����������}U~Z(=:3+$>���`�������@a����{v(P���������|������������!*�������������������������>���������������������������v:��������������sg���������������������Iy���������������,0;.)*"#MVTURTv�|T;QTJ[�zvejw{||}|~|}~~|~yk0N�><sI($ $'*3=;71*% %6RW481##&#!!    &X���PD;Jow|~{wvusrqqmnpnmlkhfe`K9gvz{{xvvvttrrpppoljjjihfd]-*MngWZn}���������x{��������������{�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������»�����������������������qO"*����w��dZXTSRTQLDHVXBDDCA**JDA=642.**)'%"!i|hI9.*%#\�nIOE93.++.\���/J���l ,�J12=�zy|wz{~�;Z6#";v~L?iul>!!*�hOwm*0bxrYVn�S2,)9���9G���t)&$Q�������å������������������������Ƴ����������¹��"4VVLC;731/,(&%%%#"!"###$##'-$��������H>�������������o?v�������̕�������þ������_(z�����z�Ťw^".KX=KaXK�f�����¾������pYMD9')'!&+,o��������4Sа i�%|�����ca�T,!(*(*%!#&
+��tPvjRH>DIPPV�?W_JvLi7B,9fl}x=+3<��(RqT:7?<9887.C1����µ����������������ÿ�t������������|P�R+?=8+%A���B:JII<?H-i���zv&U����������������������2ù�����������������������?���������������������������yR��������������tl���������������������' $%%**^US+(* #QVTY^�����\PSH]�{uhlwyz||}~}}~}~}wn/T�z?M�O(&"$&(2;<:3-& 6[zI37.&&&%$%%%$!'Y���LD=Irx}~{yxvussqponnnmjihec`F6gsx||zyvtsqqqrponkjjkihgd`**Psh[\m{���������t~��������������x������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƾ������������������������qN!)���~vzof_XRPQSOIFGHBBDEE>'*JDC=852/++()& p~aH9.**Hlq+2U]>Z�rKOF:4/*+3��y�`���}�' /�K03=o`o�h><:m5 d7%$e�}yP{ki^m " )�e*~ejU_VYLe# Vp�S4-+K���w�joa�;&$N���������ž����������������������Ĵ����������¹��#4VVMB:620-+(&&$$"! ""###$#'+������¾f\�������������n<w�����������ϭ�����������[&z��̻�����xnTcilh`_WR�c�����������������mGI3$&+-r��������>VҬ!2������ai�Q,"'*)%" $&��tRNF@5'*,2/9�@bU8t/^5B),����?2;��#Fqj]\]ZQVRD0G6����������������������¿��������������{O�F)1*(%&G�׻������|;e���ys$X����������������������8ɻ�����������������������F����������o����������������rt������������|�~r���������������������,!,[MR/-,!$RZVZh�e?S�tGRKb�}xhixy|}|}~~~~~~~|vk1W�vBI�_.+$  &+5?<:64.*#.V�D78.&()%$&''#!)Y���|QG=Mrx{~}zxtssqqqnnnnkigfdbA8gty{|yxwurppqronmlllkihge](-QrfT^m|��������p���������������}������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ľ������������������������uO!'����w�vd_XQOSRMKFFZ]>BEFA')LCA>:51/,,*)&! o�b@;-*1��wl:v�{z7U�pFMG940++3l����^`�v�&!-}J220*P�|mJ3Pl*Z3%&  a�y{|Q:rdj $�f'yoblTWgaNj�J6.+@����Y'p��1##P���������������������������������ó����������¹��"6ZULD<951-+(%'%%! "#!"""#$(* �����ǿ�T4-/2*'%'+6w���m9x�������˭������ÿ�������\&�������۫xY1937::\UT�d��r��������������dS\A%&*+s��������T Iͪ #f������`o�N, '+)%#$%#��rQFEB7*)-4Hu�:Qd/@>s4F)?}K;CsW1:!��%!-\cZZUSLNH;'B>����������������������½��������������wJ �A&4=1""K�ܻ�������fWy���~zq]����������������������=̵����������������������}N������rsrrZ����������������eg�~bn����������un���������������������"  @ +*!&RZX\i�h7[�jEUHh~znmvz|}|}~}~~~}|yf*W�sBV�o>0'$"$(-6>B92/,)".Y�C59,$&'#!!"$#(Y���xMG;Nrw{�|{wsqqqpmmmnnjfhfbaC<ixz{yxwvwtpppqonmmmlkkihf\'.RugT]n{���������v���������������v������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ŀ������������������������wP$)���{��db^VRW\TMKJju9BDEB(+MA?<973/--+)'# vxD>:,*<�vqztkkneWQ�uEIG;40,+2Is�vE8lrG '�L23/M�T:mzgN(V2&%!B��O#)Yb9 $�c LfaK2E.Qe�G4-*7r�}P<T>##J���������ľ����������������������̾����������¸��#6VTIA=:3/.+'%&%#"!"#"#"""$')!�����ü�B"$$$  $*v���i9z�������ˊ������¾�������V-{�������ɢrW!-ZSW�h��SB73,-7>GOW_dZKNJH5#%*/y��������{+��$]�������Vt�F/!(*)%# $#
+(kTSAB5)(,6N��:6/()/v<L*IU(01NX6?	&��&"+31120*),,%<;����������������������¹iZ]YY���������uB$�9,FJ55�Ӱ��������������|h!f����������������������
+?ë���`������������������vS���������������������������[N�kYLa~�k_gk��Us���������������������T�����{{wwrmjgfa5&bmH&&'W[WU_�����ZOSDo}zpqwy{~}}}~~}}}yd([�sDZ�jI7.(('(,2345-()("+PxM<9,&'&"   ! (\���xKE?Vrx|}|{xutqpqmmljiiegfbZE:kyzzyxwvtqrpqpmmmnnljihgcT&0UycZ_p{���j�����r���������������v�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������÷�����������������������tP'%���~v�Ù�hK@e|w\OE_s;CEFC&&IC?<8621--+)'$ xz>?8+,5����o/mvIZ�w]UI;60,,1[e3!"$ %�H447`N.:717Eo?
+"h0'$!T~tV*"d"%bgZ@-  Ke�I5.+@���Z=)  A�������Ǿ������������������������������������¸��!7TVH@;81.-*'%%%""""""!!"$%'*#��������E;���Nl�V$o���i=x�������̰�ξ���¾�������Q/}���������ubJMDHVG[UX�g�r^���������~rkb[gT:)!',/w�������Һ����t����������Oz�G/$(*(&$ &'*ÓPQ�n\@543?9E�8l�slj~>C)Gn72@hS4@-��% "$"$&!(1,9
+?����������������������º��������������t@&�7(1)C�PD0ECBCHJOQJKLIF8j����������������������HǬ���hE{����������������kX���������������������������VTxchkj[_\\`VQRUG{���������������������v���������������e0Q6B%&%)WVTSNFl�oKHSTDq�}woryz}�~}|||~}~|yc-d�s:O�X<;81,(-5A<32.'))%0TsB39,$%&#   *^���vFD;Wry|~~~{wuuqlpoklliihifb[B?lwxyxwvvsqrpqpmllonliihfcT&4WwbW`q{���������p��������v������s�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ʒ�����������������������uQ$"���y�̹�j70o��cK:Vt;CDDD&%JDA<8411.,*)&#! qzIG<.*,?fkQ(:UGa�~nbN>72.+/Af��yd<*!%{C56I�������v�:#|6'&!Cl��yiU>)$�c 9[pYZhbJ-Ab�I5/,0g��r{|tV4 :�����������������~}�}������������������������¹��"7VZKB;71/+('&$$"""! !!!#%&),"�����º��}����Y���\����i?w�������ɭŢ|���¾�������N1����������wZAN;9C<\Ve�Z�s���������¶�����pQ1#'-0y�������έ���������������G�O/%(*)'#!$'-�~XT�����������4�����7,)8�����F430��#-/+12(!)66&	=I���̽�����������������¼��������������p:*�0!!H�b�* B3"p���������������������~Lȫ����_?ez��������������e^��������}�����������������OOylopnqg]gn_[a^S���������������������y���������������^,19&%&*XWVY^o���_JTRGqzxoty{}~||}}}{yd/h�m3CuQEJH:/*1@IA632**,&._s:0.+$&(# ._���xDE:[wz~�~zyvusqppmmljhigec[:DnwzyxwvusqqqoonnlnmljjigeU 5ZvaV^r~���y�����j��������}������t�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƹ�����������������������tP("���y�ydZ^eiaSKLcs@EDDH&$HEA<841/-+)(&$"!mw>H=/),A1+,5;:6IA^�zgZM@73.,/@l�hRn��a6#%�R646G41/+)&3a8 e9&%=}�P?`��b.$�d CuZ&Ch}a8Ab�N41,.i�xPi��vC"'j��������ymnvsnfc[[a\����������»������������º��!8WZLD;720+(&'$$##" !!"#&)-"�����ú�yb]W\ULOPLL}���h?{�������ȶ���������������M1������¾Ýt]&,"4`Si�Z�v��������Ƚ������j^A#',1x��������D$'+.,'%(6������A��H.$()(%#!$&
+0�mUW���ý������4QI=8;j/(,,L���q@20.�|%4Y[QSO90HLN42*9N����¸����������������ø��������������r50�. 2<8.K�g��~rs{�Cx���������������������tRƣ��.YefL|��������������_^�������mc������������������KUyopolmfcrodZa^O���������������������}Gz~�������������J)Z�f !".YWTZj�����MRTIp�}yquyz�~~~~�}}z_(c�l5O|SU[UL7*0EWH5.0**+"2dw>10,'&'$" !!/`���tHE=Xxx��}xxxtroonmmjkjihdb[9EovyxxxutpqqsonomllnmjjlhcU#3\vaWar}���������n��������~������q�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������÷�����������������������wO&���{��`^[UTVVQHDH`bCDEEH("EC?=:50.,,*'&$!  n�SG=.+4�������x}VR�p]UNB73.,.H��*/Z���M %�P64G���������<c:(%!:i��xrbE)  "�`>exkZD/%"@c�G40+:u���dI, %! !Ko���kmklZ3565:1745CG}�������в���Ȱ�������������!9YWJC<63/+)''&%""# !!#"#&),%��������B!+x���d=|�������ˬ��ĩ��ÿ�������L0��������ۣwsM=XZGNaRq�]�q�ϰqnlz{xngXOVaZUpO#'-3z��������= #! .������A��H.#((('#"%&0�kVY�����������=GS93Lq0/1657As�S0,7�q$ !Xsc\_R9NZS[I#B��>Yp0^����������������������°\WV[p���������m2>�*#8HH6U�=\HSW�jRZ�3����������������������h[ʟ��u�a?&;��������������Xk���������������������������AV}mku|oa_mla]`iU���������������������q "!"!0%/XXV\p�M?X�YNTLq�}}rvx{~~}}��~}|y`,k�n3H�d`^UN:,-3A?/&()*) 1htC3,)()+(&&&#/b���rFD;Yux~��}xxwsqooolllmkfgfaY9EoxyyxwutrprrnllnlkljijmgcS&6^u_Ves~���|�����o���������������h�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������µ�����������������������sP'���|��`_[SNRQRJEGLJ@BECB)%L@==;6/.,-*''$ o�nH?0*/A5-),.&#%&P�m^UPC82+*/=b���~a@%!'�Y533M;=|�:'#2'j9)%!R{kS5'%4  �\ "ZW2*D =a�E60,>u[=+"0V+0p��������xdcXTTOMTYki��������ɞ���˾���������¹��!:\VMB;52-,*('$$$$""!!"!""%)+"��������~l`bZ3FV8+|���i@|������������������������L4���ƽ�����yf[^]`_TdSt�`�l��Ģ�����|pekrqeh�Y!(/4~��������<% 2������=��A. '(''# #$5�lT[�K?8*�H6>Z�?qpH��v+..Mog���G0)J�f" )cg?:@?DWE=OJ"H��7 F19i���ɸ����������������ÿ���������������j/D�%%1I=.t�OW-&/q6$\)!����������������������^	ḑ������x1��������������Oo�������}������������������?a�tljedaluje[b_S���������������������n6-1WXX]r�gRr�UPQMt�~|sxy|~}~}}~��}}|wZ+l�d(C|^hi][=-*-46+%(**'4nsB4.((,-(%%'#!5c���p=B9_x{��}|xvtrpoomlllkhigaZ7GrwywwwutsqppnlklljmkkigebJ!:`v[Wbs}���������q��������z������o�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������º�����������������������sR(����|��]b]TRRRRJEECBA@DDB.%ICA@<60--+(&%"  q�ND?0+2XXQUZZZWgXN�qe[SE:4-+/\�uW5(!'�i22350i�K (Bt@d8($"7[ZNQURVhf  !�W!0WQ[afepx#>c�G6.*&_��������/:����˾���������|zprd��������đ���Ƚ�̾������ù��8XVMC:50.-*(&%%##""!!"!##%'($�������������i<��o?y���l@}������������������������L5����������zR"-kXw�_�l�������«���������H"(.4��������q[SN:&;h������8��?,?nQ-&$#4#(&C�q^`�PLB<�S9?Z�9vBf�vw.*(d���W203*H�a (ehFDJMZK<IVH!L��9*6 q���ɺ�����������������¾��r�����������i+K�##1O?'z�l���������0%����������������������Rmɰ��{|�����������������Jt�����qc[OX|����������������:h�zqg]`eourn[ZSS���������������������eIl* CXB!'/'6YXVX]�����OTQIt~~|tx{}}~~~~�}~~yY-n�](Cq[nkXO3*&'2:-'(('#7pj:10*+/.(&%''$ 4f���s@D;^vz�~}}xvustrommlljiie`Z8Gpxzxwwvtrpnojlkkjjmjjjhe`J!:fwVXav���������q���������������o�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ż�����������������������sT'����}��_c]URQPPJDDAABACF@.$DDA>851-+**(%!   l�VB=0,9���}{tuOK�}mbVE82.+.=U3,(079Cf* #�f54B���������AV9&&"+ryqq~oj�a $�]!/dFFN_?Uh@f�H5.)(isKPjjHvu0�����ȿ������qebUO\U��������ɇ����¿�{pngf~�ļ�� 8XXMC:50.,*&&&'$""""  !##%%)'��������<*/CSHUs�������i?������������������������N=�������ҳ�~Z,)(07BoZ��c�cv������¨��������|) (,5�����������̿�!>�������2��B.e�ɟB%(iI513+&P�vjn�zW\���QN��8�[��0e0''>QJ`ZV;1'L�`Wpd]cdU4>Z[>S��63,' w���ɽ�������������������lehX����������e(S�%%4>-(��n�0,-*-6Pg& *����������������������Gpñ����j:^���������������<z��������������AH?DMLQ\k����;p��rhmhmvxupb^aZ���������������������[ <I^#J�w}Z"-(8ZXUWIC\bSDJUTGz�}|vx{}�~~}}~~}}vX-v�^,DlPUSD7-(""+:*%"$#!4ee;>@@<50+*+,-*#6i���pAD=_uy~�}}{zwtsrqpmmmmliie`X7GoxzzwvvtqpppmmmkjkliiihebE"<ky]W`v���������k��������z������q�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȼ�����������������������tU&����z��d]\WRQPPJFCCBCDFEA.#FC@:73.,()(&$!  c�VC@3*2�G%!K��haTF:41..8���������+ �h644=&!# -)O<)&#D�PB@f87hW \!$Wm?BY_$dm"?n}J6.+F��vw�X<��&"o������vi[\^]ZRNSMPWT��������ψ���������ȼ�~l���� 9ZXKA:50-+)&&$#""!  !!""$$%("��������W@!&"(;x���kA�����������ʮ������������FA��������͟�rtxJ\yZm]��h�hSZqvnnvrgXSX_WUXK."(+9���������������CK������/$��@))IRj?$*�����J&^�wls��oVW^]Ou��;}��SPm+&%&+*2P�E.'O�U3JHEFD5$2A:,[��+)nt7$|���ɷ��������������������������������~c$Z~"$/'%&����v=(%-����������������������A|������s�����������������/��������������f�������r����<u�tmijikkwrmd`ZU���������������������O%h��v*QC%P'4*8WXX\~�����GTQN{~}}wx{|�}~|~}||}wR/v�[*PwUOG=31-'"%,(%$$#$ #>qjP_bI?930//00.+4g���f?C;bu{�|{yxuspmmlkkllihd`Y6Hpxz{yvutsppnmkklkkkiijhd_D ?hu]Tax���������r��������{������m�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʾ�����������������������xX&���~�ʉtbWQRRMIFFGGDFDC@.$KC@<62.,))&&$    \{>:>0+2�K K�{FCIC:62.1F�P+/YT9_~ $�`122Rz|{{zw{�J@8%&$b�����=W{N |[!&fg`^kY'_F=m}I5.)Fyfbe}IErM"#K����wke]WTVYYUOQPSYT��������ӯp��ɽ��������iy���$:\WKA;50/,*'%$"!""  !!"!#$%(�������ƹ�F0uuI$ &3����hG�������������������������I>�����������[IuRY_Dn[��]�aMC8,  $!!%!*+7�������������ŭ.!3������.#�~E'[q?.#$+�����R(d�pe_k{jRA;9@W\M8E\P,01#"%)(+D5+$	O�O'$")(!%1,%a��%"L4(����ǹ�����������������¥�������������~bc~#1G@$�|��j��aA+%&5����������������������8�����sb������������������-������[nMGOc���f�������o����:u�rpyyrnovrihgbV���������������������J!=;>[.N$$N+=.9ZXWV\t���@PTN{~~|vxzz}}~}~~}~~|}|wT,v�W"VuUJ<830.*%#$%###$% $B�lZK96951.//1/-!7j���gDA=cv}��}{xvsrollmllljifdaY8Ntx{yxwqqrnmlllikkkkjiihd\B"=jr]Vcx����~�����v��������y������s�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ⱦ�����������������������wW)����~��|�gVQSQNKFFOTDBFCD.!HE@>640.+*('&" !W}15;.(,gTHIGUPQm`K�u@HHC:5..4f�ws�PD|�B $�b22.c~SPMKFiv,
+=;'&"Q\:'(< &$!�S  X@(( >m�E3/+F�kG.%#I{���pb_YVWYXSQSQTZX���������Сjv���������ѳi���"9UTJA:51/--(&%$#"" !!!!""$&$!�Ϳ��Ľ�bG%"-$*>����gG��������ǚz��������������JB��������ȣ�oSv��nex[��d�XJ>6,"(+5��������ҫfe��u!?������/!�~>%v�Ѱ8$ c�}�:&d�sbZWSMB602575302.% ""#$&&&)&&]�M!""!*+%j��$&W2+����ɼ����������������������w���������}afs!%7A;$�s�]$p�_���j 8����������������������1�����HFXYr���������������'#������r�p��m���`�������}����8y��uyyv{{xxskgmc���������������������=!N�`,%)Q% :XVVSFBu�\�HRQNz~|{x|{}�|}~}}{vP1~�X(ajH>4200/-)#"###$&&%GwrRL@5352/-.02/-"9o���hMB?dv}��~{yvtrqommlkkkigebY1Rsxzzyvsrqoqnkljkkjkkjhfc]?#?hk[Vfw����|�����u���������������w��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¨����������������������xU*���}��_g^XRTTQKGG_kGCECE/!GA@:441.,+)&%" `�D==.+2x��������QC�lBTLD<5--0W{af`r;Owf# %�]336�w>L^CG�?
+<8(&$Jv�y[@(!�XB_voWL5,=n�B4/+0b���v]?*"$Jz���~qb_XXY[YWTRNPZV����Ǹ����Ѽ�mq��������Ξ~��!:WTJB;62/,*'&'$#"!"!"""!$(&$��������<$(:AO\j������h@���������ɻ��������������FI�������׾��kDijONlsW��g�XM=2+'&$'-;���������J L��!p������)%��?#p���5"*��mI6$%XsZQQOHD:0.12100/.+$!#%%'&'&j�K#2/(&,-&),6:)p��"?41����ǹ��������������������������������|\nb%FZ?%�b������pl^AG����������������������,�������������������������#(������z�v��m���Ni�����p����8q��vglv��ty�v`W_���������������������6d�bO�q*<WTW^x�����ISOP~}z|{~}}��}}~}~}}}vP3}�S(c\;7531451+%!!""%'$#H{uTN=85731224544#;n���aG?<fx~��~}{xusqqpomlljiigfcZ1Rqxyxwvuurpqommlkmmkjjkhc^8"Enq\Uhx����s�����s��������x������x��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������æ����������������������yX.���|~�Ďx_VQbmpXIGWyACEGD2!EA@<543.,,*(%" k�UB:.+-6+A].@yc#>�yaZQJ;3/,-.%%D:# !#�S22:�����^g�5=7(%$6i�sTlvrJ% �_ :ij95c�vA?t�I7/*/w�w?Y���A#Ht��|{pd`YWYZZUQQPOYQ��ݥzfas���������|�����˸}��":XUJB;61--)''&$#"! !"!!""%(&#��������fiy�����wpj����e?�����������ȳ���Ŀ�������BF�����������^Pm4!,FrW�od�TI:2*!.bt+'*8�������ت& $��M l������)-�w6$p�~.#"0���ū?%`�xMKHGA2,,0111.,+)""$%&&%%#	p�H 7aUNNPSLLMUR1|,X34����ƺ��������������������������������x[sU*ED-!$�e�zC5& _����������������������$�����~iev����mprrtxz����(���������������SJC>Og�����|F����xtl{uvswicdc���������������������2%_M6!G04? ?ZWWRVwb_nWGUQQy}z|{|{{~}||}~~}|{tO1~�M,e_?;;76:96.)#  %'#(O�y[UJ?9:8457547>%@r���]E??iy}��}{wsrqnpnmlkjihfe`S2Sqy|{xxwtqppnmnllnmllkjgda6#Fpr\Vgy����������x��������|�����w��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{X+���}��îa-,c��kJ?Sx?DFDB0!DB?;751-,,('%" i�H7;-)(%D�w�R>��hYLM;3.,+' 3~��t$�T/0+%'!3)V6%&% 4o�lTm}qK! �a-[sg`eV<(Af�N5/,0^����pU:$#Ku���sf`[XYYXRQRPN]V�����ģrbm����ί�������ɾ��!:YUMC:61/-)('$#$!!"""!"#"&&&$������ù6 0{���a>~�������Ê�±������������GI���λ������o~W 0eauP�fo�Zl`PF0.965.(!&--5y�2!(*9�������۠@P(R̕J������&.�n5&u�w!& $j����=#{��JKGD=.&(/01/-+)& !$%&''%$	p�<?nia`b^ZVUaX7�ć#D7����ƺ�������������������SVWRl��������{U�O(8%%!'�]�B8:c�������������������������������u���)+;7763+���+���������������������������uC������le^skbjosk���������������������*Oz�) D0@-0$!@[XXYh����gJTQTx|{||{y{��}}}~~~}|zwI9��H-a[FIMK>C@?40(!  "$#,R�rYVME==<985548:&Cp���[ID>ky~��}zxurqmnnmmlkjhfd_S5Wsy|}ywwvrrpmmomllllmjffc\7#CjpYUk|����������u��������������}|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ħ����������������������Z(�����ם�mURp�lYMM]DBDDC0CA>;640.,+)(&"   e�>9=0*(% ?���M8��k_OK<3/,(%!R�5?�2 '�Z22:yxx}ur{v�Ts<&(% Ix��bJ0�i$YniL1,D`�O4--X�{[>"#8#"Iu����of`YWXYWSRQPR]Z�������Ȯ��}��­���ͷ��ȿ���;VRJB;720.*''%"#"!!!  "#!%&)$������»~q`O3*<bit}V?{�������Ø��è�����������BN��������՝�aN+LoTyK�Zt�]�μ�����������vhx�/"'):�������֫!��i(��  5������&2�r=(�)% 5'Im8$h�{LHD@7*%&,./.,*(%#%%''&&#		u�<!@lK59=HU>7GN3�Ć3p*;����ð����������������ſ��������������{NH3JJ8!1�_�����yyx�kc��^OUT]���wcdW]`^fl�������~����r���<�������4���0��������������PN|���������nN���z\ld_orbOX`Ur���������������������%(ckJA�o8#'! @UWY`������ERSY|{{|y|y|��|~~}}}}xvG:��H3gbWXa_SPLG?;/'! !"$!/T}n^XPH?A<94468;9&Er���[EB@ky}��}zxutqoonnmmlkifd^R2Tsxz|yvtupppnnnomlklnmihd\6'FnqYXl}���������~y��������������}���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƨ����������������������}Y*�����ndaY[qxVMHO_�DACD@/FB<:751-,*&&%!  !!e�SD<0))0$<s)8t[*'1��m]SJ;30-)'"S�*B�:!$�X32>�|ss�xHLyUg8('$!]oR;0.-0KU�f&aP0,6AL[t(@_|G2-+S�{onngo��&$Hw���~jb`YXXWVRSQQT\Y����������ɾ���{�����Ů�����=WTME<630-*(&$#$#    !""$&&#����������[@)*Hehhfh^><z����������������������@S��������Ǘ�g+3`LGrJ�^w�Z��������������vsut."(*@���������9f�|es#5������#7�r=:l��$$!.��|J:'"e}eMFB@8)"#(,/.*'$%#&&&'&&$
+x�6Er5")@T,&<L/ �ʀ>����ʺ����������������Ŀ��������������yL!�> -9D/ 5�SmTOShj\X�dt��k���`���jQjb[[SWKs�������cg{����3�������.���0����������������OF<R�������mX������xv���ghzdx���������������������#'{hI#224#.1"AUXY^��p�t�<SMS�zz{{zz}��~|~}}}|{zqC>��E0lk`agc\[QLHG9/(%"#%#5N�iXWRGAE=:569;;5$Eu���ZG>Epz~��~||zvrqponolkjjjgd`T1Wrx{zxvvvqpoopommmijljkie]2*Ty}acw����������~}��������������|���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������é����������������������{Z*w���|��db_VWtPLHId�>CCE>.HC=:72.,+)('%" [�HD<0)1p���|z���i.�{LTNJ;4.,*'"3���u xX54=�;ibRQH6''$_�|���{wwUqV"bri��g^\LEd�D3-(Ajl���ca_H$Ju���|i`_VWXVSRQQQT]Z���������Ʈ����w������ǜ����:YUMB:52.+*(&$$$""!!! !##%)((��������6!$ /16ejmmK>{�������������Į���������@S������г����thpvnouI$�_v�Y��ɩ�������kt�y~rf-"(*>���������M$!A������?�k;�Ƭl#$!0���ƫ6kt[LFB=5)"#(--,*($#$'&*,*(%	��5"!Cp@!+7>-';J-#��x>����î����������������ÿ��������������{G"�;!)++';�ONSZaxk'/s[���s���w���kr������^{��	 �������������,�Ÿ��Ü8���4����������������`��z`f�����iY��������_io|�ux���������������������&P_2-s�pA),!EXVW\��u�|�FUNT�|{{{{z}�}~|yr@>��E4rqjimja\TOMLF:3+$%'%#&CX�lWZ]VQUKDACEGB9%Ev���bQBNn{���~{zxtsprpoolligggb_P0Yvy|{xuvurqpoqnlmllllkjjeb54]zqa������{|����{~��������������v���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ĩ����������������������~Y-w}|��b_^WXo�`JLMz�?DEGE4FC>;53.,+)('&# Rr/@@1,/ikT[ge_XT31�mA^ID:2-*+,(#=<3~_225f-&0.*TMI@$(%-1E�l4(#$tZ Cc2Cb�G4+( "S�H1!Iu���yfb_XWWXVRPRSR\]��������϶�u|�˰i{��Ŀ������  ;ZUMC:72.,+(%%%# "! "!###&((+��������=&#&$ !+7noomL>y�������ǧ�Ĳ������������CV�����ʷ�����~yxwvv{J$�g��Nc\WL?AENSP=539=C`Z*"(*A�������ת$# `������E�l6O��y!")v����5wmYKEB=6+$%+.-,((&"%()),.+'
+��)!,RN839:75=J<$*��pE`QA�������������������������g\`l���������xB%�1"/038D�b���qH^��x4���uLUWn���ck������Z���$�������������=#k���s<\���;�����|]rpo_�����d���io����ad��������qju���u����������������������+\~;%?A;8/0#B[TVXpbOZ]hMTOT�{z}{|{}��}}}~~~~}ys?I��R@wwolvvka\VRTVFA5(''%$.Rp�wge_YWZUMJKMJE='Ah��nTU_ky�����{zwvsqolonmkkjgea_K3\txxywusssrqoomnmmnmkijhe^AGijZu������������x��������}w�����t���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ŭ����������������������Z,l��|{�ą\ZUVku~ZIJ�y@CEHF1ED>841.,*)&%&#    O}/:<0*0u�M(0�_GSA>82/*,Lxrolejoo�3�`016���������J<=)($ 3I~p*'**;FfR .OwH(8@Pg)Ub{B3,*K���ebdfz}%"Et��}xia]ZVYYVSPSVWge��������ͻ�ȹ��¶�rfgv������<XUJB;82/-+(&$$#!!!! "!"$$'&/�����ļ����vournlHBlonlM={������̾�Ɗ~��ÿ��������>W�������������{yyyw|E+�k��GK96*(( $(4OK&!(*>�������؈!E�������H�a2*��u""4k4�]j1�qYJDD?5)##-.-+))("!')+-120)	��' 2b_V\^[RQUdG"2��l+X>M���������������������������ý���������u=/�11B6X�DfA"(5#�����������jY������F���*����XU@?Gl����\6+!'4g���{A�����y����������VfVUr������]y��������wW}��|e���������������������y/u~7$0(&-- =WVUSNPQRORSROV{{{}|}|}��~}|}~z{zo:<a`BW}zokpunhc[U_eVM=,&''&.Ro�s\bWNJUQHHJHHG?/9_gRAI^q�������{zuurqnoprnkjiihb^K5`swxyxwwurpooolnlmmlkihgf_JMXRi�������������s���������������u������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ͽ�Ĭ�����������������������X,n���~�����TKygrkJXoKBDHGE2DE>941.+))&%%#"!!O�(8:-)/Oj��{S23�dRKAF82.,.Q�����{xw�.#|e21/;1.v�7'18(&%!b������~�bcL$#o�zksnni[!bkxC3,)N��~wztnmS #Ku��zma\WVZY\WT\dhri�������������Ѿ��������̻��� <ZVLC<71.-*'&$$#"#!  !"""$(%-�����Ŀ����qqrlc\-5^higD@}�������İ���������������:Z������������~|zzxvwE.�g��I]NI>.2:CO]VGTkkeUK'#*?�������ڕ.Lsga_{�������~P�c>���T""9�nkTe1 yy\JFEB7)##,/.,)('%!()+1574+!��$ PoWPTTRNJNVM%5��nV{eT���λ�����������������ü����|���������u:,�- *I; ]�M�zj[IEBS�A���bdcdw����KNcjl`I\��{.����s����|���n����������wC�����|x���p���|]Mg���������Sw������~�������d���������������������v5QJ&:���/!*(AWUTSUUUTVVSRI]}z{|{z{}��~}}~}}}z{zlA)$3Fe�}omtuqib\XgaH5,)*'&%+Up�zfdTLRTNFHLKJJB5<PHCJVgw�������~zyuqoooopnklhhfc]K3\tvwwyxvtspmpomnmopmlkjheaQJHSu�������������t��������������o���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƪ�����������������������[/g���}v��İQCmVXUVTLGBDGFC5BH@;652,)(''%#"""H�9<:.)+8jzSOx�vJ,�dSPDE;3.+,0/Dz�tE:S�  �d312A<]�`9BJpW
+4@''&!2?;930/,' ^E#"Vop@1+&$Kv���yla[XWYUUZ_uqbn���������������žČ�����ʿ���!<ZUJB<83.,*'&$$!"##!"""!#$('8�����ý�7"%'"3^ba\>?~�������ʫ��ű�����������9Y������������}|xxuxD5�k��[�ǫ�fchs���ow�vi_\)!,C���ǻ���б_�����ñ������{L�Y>���u&" O>*q�6 �fVKHDB;+$%*/.,*)'%  #(+-0462+ �}#Xg707MK4.<MO%>��]%''S����������������������»���^����������q7.�'!.D0'
+g�P���������9!!���wqoo|�����oWPL[z���p8����v����v���6>��������kF������XMJWr���z������������T{������{���d��\���������������������m/hxG;.4"-* !E\USSTUUUUUSUK^}{{}}z{}~��~~~~}|ymB&&Ics��{�~xplkbU:*)''*(%#(Rs�yg]O@KRKBHOMLI@59;<=L\ix������{|xtrpnnnonkjijic\H3^sw{yxvurrqopmpnlmolllkie_UGF^~�������p�����t���������������k���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ì����������������������Y2d���|xw���aWTUROLFGDBEGEB8EC>7771-+('&&$#"!J�D@;0+,K�x55g��` +�aURCH;2-,*+8�t63���L�a636������}}i6CK)(%#$"!WI$AisA1,$"$($Mq���yha[YWW_kz~hWo����������Ͼ������κ���������! >ZRIB<73.+)(&$##! !"!!"#$$($2��������7! (($$#/`a[_HC�������ѵ�y����¾�������:[�������������~{xusv>6�s��W�å�yu�}���x{|va[U%!*E��ͳRUSKGB*0A;6225e�����uP�W:<��Z!2Z+*%�mYLHC@:,$$+//.,+*)$%#! #(+,-1330,&�v! !!bq\XR]Y17X_G%D��VihV����������������������ú��^�����������r1:|! !t�<+1>Z,,z0"$�����������ao���������f	4��������������h&F�����^Q���������������������������My������������s]���������������������g@ya!#$0!*'"H[TSM=>:CAISSEa�zz~}|{}~}}||{xq9 ,Pi~�������yxzY7-*$#$&%"(Ln�xh]PHQRJFLMKJF>69:;=M_jz������~|yvsqopommmljhgfa^J4ewyzzwvutssoommonmkklkihgaQ@Of��������|�����s��������u}�����q���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ĭ����������������������|[/b��y|tmjibYSTXSQJFHDCDFEA6CD<7552.+*(%%#"!  I�08=/)(9Z���xO&,�^SJ4E92-)')7X"&+u]42+T�y+6dxe(FB**&##8my["_T"6[V):mr@3*! <v�}5!Ks��vhaZPZl��uHMx�����������͵��sz�ѽ~������ú�} >[SLC<72.,('&%##!!" !"#$'(3���������|���wc_^51]a^eMB��������}���������������:]����ʶ�������}zwxu{=9�c��\Ӽ��{{�~pdOJC:2&$& !,H��̴PRIC5' %[�����nO�[:#��_#!=����w(�pZLID@9+%'-.-/.,,+&%$"!#)../2511.+�q$ !Xg^WTXY17RT:G��V(:\���̾�����������������½�o������������m,Dv !7/j�n����p���{*$���cTLIj���uQ=[y������V<ð������������rgϠX ,u��XX�����k�������UNM=L\][l����=d�|z{xvw{yt{xsn^���������������������Z:�q'L��v%#1,"KXVZq�����UQSIa{zy~|{z|}~}~}}|{{|xp0/Kd����������uD*-+%"""$!&Hk�sf_QIPTMJMNKJG>6464<L]k{������|yxvsqqpoomljhgcd]F4cuyz{xwstrqonllmnmmljkkigeN7Lj��������}�����p��������}������h���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ű����������������������~]1`��wxsqmfbWXnvgQLHEDCEEGA6BG:841.,**(&#"##!  ;�,5>/(*s�sJ*'r`YP<A92,+)(('q[53>���mj�vx�NBE+(&$,jpEjXaU%,q`_i<pG1*!(o@ju $Gt���ye\[g���e@c�������������з��Ѱ��ͽ������¹��=VTKC=84/,(%#$$"! !!###'%2�����ý�p{yttbVUP:>aaaiGD��������Ê�Ȳ���¿�������:Z��½����Е���~|{zx{;:�]��Z�²������wqy�|m\N "+C��̮KLG=7) d�����mT�\=9��F $ T���ѭ-�pUJEBB:+%).0/.--.+(&%##$(++.2674/1�k#!8GHGAI@;AKK5!Q��OR1f����ǻ����������������ÿ��������������l+Fq!&:I:(s�A�= &QG))���}���c�����Q[8;h����GCï��W][[i�����mpϱ�1!g��P_������eRd�����������pz�����J��������|xqttqj]���������������������K. 5"Q[UWTcw�p]PSSDg}{z~||y{~~}}~~}}{|xl- 2Mg����������jG:90$# !'Ko�wjgZNQRIHLNKLF@6264;J]kz�������~|zutsqomoplkiicb_G6ftz{zxywvspoponnommlkjigfhG@To��������~�����s��������|������i���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������İ����������������������|\3^��uyuqm}�F9���dUNGDBCDFB6BD>541,,+)''$""!  6q4=>/(-J<%&%*C*xqbZA?83-+(($rZ415�����2F��K
+=?+)'$1|<CiYQ!Jg
+!z&<i�A.)!+�V=�#,Zk��qwq~���{HEw��������������ʹ����ȯ��������º��<VRME=630-)&%%#"#!"  !"$%&%3�����Ŀ�##5acfpGF��������У�ǅ������������8]�����ѱ�Β����}{wx8:�^��P������������������"!(-C��ɵnri\<&"2))"p�����l`�W:)X�o"# 6�����* �]RID@?9)$)/0/.,-.+)'$%$$),-.1221.
+=�e Kg]\`^VUWXY>T��HfHl����ǽ����������������þ��������������f%Ll"*-#%��o�@7/(#!Ce/���Qlwsl�����[���M;[��AGƳ��w���������T"3=~����Ib������|��M`�������qi�������K������hfvw{{yzxf���������������������A:1 CdoG%TZXWRW���KGTSCl|{z|~y{~��~~~~~}|zl/$;b~����������gO?;8)" )Pn�qefYKHHADB@?BE@6341:M`lz�������~zuvtspnnnnlkigea\E5cvzzzyxxvsrrpoopmjkkiihegcB@Pm��������������p��������|������a���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ʊ����������������������}]3[��{zuo���7Nq��oRQSEBCGDA5EG;62/,,,((''$"! 6r=::0*,e���������%-��toPE:5.+)&% qZ520F��j*#TL'<>.(%$-niAgYTN&9o$8h'=c�K2+"$i�Psk.�������ý�T8^����������������˽�ǟ�����������¹��?XTLD<74/+**(&#""    ""#$%($7��������#/-,(  )%2`bccDJ�����������һ������������6g�����ӟ����~~zxy6>�]��>{\WN8E]ago�������y"!(,L��������f#fɾ�Q-������gi�[9*^�c"G�dB)#�ROHECA5'$(1110/11-)&&&%'+--.011.+J�Z! %aeQQPTPIKPUDZ��9MMp����ǽ����������������ý��������������dSa &@C9&������������!?���rUuw������[��eK\p��6Pʶ��w�������u,H��������?j������]Ws��������Yl��������G������dfoaXox{zf���������������������;%a��Z)/FCJ
+$QXV\r��e��QRQDp�}{~x|{|}~~|~~}}}|wk,#7[w����������dM?74+%$#!'Is�td[K?<A?B?;:;<<522.;M`l~������~|zyvsqomnmmljhecb^F9lvzz{ywvtrrpqpnnmjjjkjihfeDAUr��������������n��������w������m���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������į����������������������}^4Y��|{r�Żcb\ZVSQWgLCCGFC<C@952/-,*('(%###" 4y@1:1+(ObUx9739f$$}�pjNG85/+(',_ifnlnO
+jX81,$
+;I/'$"##7lr]!"5[F"Rng>4$<h�G3+$!!.a�b'(K$+������ße1?p�����������������ΰ���Ğ���������ù�!B]UKB:62/*)(&#!!!    !""$)&6��������(^�ue),^_04caffAJ������������~���¿�������6b��������������~{zw{6>�U��Y������lVPF2&10+,24!)-L��������P"vؿ�j u�����`p�Y9K�ъ*% _��¯x%'}MQKGEC8+',44564695/.*('*-/./01/+'H�]!*g]/6;Q@%5DPAb��400q����û����������������¶�½�����������` \\%-B3�iGF&A]?������������sEFNb�����1RǴ��������������ý������<o�����{s��������uk����������Q������ruqhuxz|yf���������������������20]ss5Cxx2'U[XX^v>39`ORRDl�z{}z{z||}}|}|}}|z{yl'%1Qr����������cP@70+*,(#(Iw�lWQF=:<<=<<:999331.:K^m������|xwvsrnmonmligfda\C@jxz|{wwvuqoopoonlkjijjjhh^BAZs��������{�����l��������������t���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ű����������������������~]2X��~||�͌g[TUSQPOV{dCCFGE:F@:6411.)&&&%%"!" 4w<+;2,*(0_�|P--Pp  ykeOE82.,**/z�~��i�bi\50+ *3@O.'$$GyqhcdYUosbG%00.4EF=Pz*;kC/*(H��tkcj��(&t���ͳxA=[�������������������ͤ��z�ɿ�~w�����¹�|!A\UJA;73/.+'%$"#"   !"#&)&7������±\s�uqLHgeGMhdlpFM����������������¿�������3c�������������}~zww6E�N��Z��½�����������wnS"*,J��������S("A�ƫA"^�����_u�W74|��0'#L���ՙ#'wQSNJGF:-,.589<=?A<51/-,.100010+'!M�]! -gcWTY[K*M\Y;n��6"^`8!s���˵�����������������ðgqqtsqgq������^k[!(3<E%�gr���Ph��o9P��{UOOX����JIk��������+W̲��}��Yv���������������3v��������������[TSehlsx�����P�����ycssdz~�~tk���������������������-#ju{y+!;- =0eV6(QXXWS|���GDRSFlxz||{y}}~~{}{{~{zyj%&3Tw����������fUC90+)+*$/Js�_MJ?97887973289221.<Obo�������|zyuuronollkjieea\??fw{||zxwurnnppomonkkijiheaA;Yu��������������u��������y������p���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ò����������������������^3W��}~��ddYTRSQMKIo�HBEGF<A@<842/,*(&&%%##"-s;2;2)'*m�K9y��{7!{�mlLD:3/-*),%PcybbX2/(6p[VO*(#"L�}�rr��nU`R%!|���vn��a%;l�C0+)E���yu���W"$P����mGT���������������������ͳ�������Ȣ�����¹�{"?VQHA:622.)''%$##" ""#%'"<�����ÿ�4+(,*'#!',9cbpq@K������������Ƨ��ÿ�������4j��������������|wv7F�D��P���������������~��F9--J��������e!% 5H,k�����Y��N/D��F"%##F]}o",sFOJHFB7---58>CEHFA:532156554210+"T�Y"+ef\ZXYO.AWL/t��+$nW%w���˷�����������������®`diadkgu�����XuH)EJ6!�k�p&X�q0^rQ����������������������"_ϲ��t�s�r���`$(&*#p��*z�����������������zxyy|�����R�����wn~xm����tp���������������������'!J#"<=1:H-��_%RZW\o�pb��KRRDo�zz~{zx}}}~{{{{}}z{vc'$2Wy����������fQA;62-')%#.Mn{ZLD<::::9963677.22.>Qcq��������{zxutsoonlmlkieeb[;;iuz|yyxwvtpoponmmlkjiiigc`EA[v��������k�����s��������s������q���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������²����������������������~^5T��|}���dd\VTURMIFZ�PGEHE;?A;:62.*))&''$!#"!']?4>4*(.[5'2.'uvhg=G:2.*++/J@==@Cpcjk8/)!5Yy�j*QK*(&$!&6s>4^LfU.&>5?X5<k�J1+$ 5{Z;hX&/de��s������������������������͢���ն���������¹�z!AWQHD:530-+(&%%#"#!!!!!!#%&"9��������3"(% %'7damb;M��������ĝ���������������1s����������������{yx4N�5��BM713���Y`knx���}��tK+/Q�������ک1!".������P��S7M��K''"+���q',tCLGEC?1'(+48BGFHG?92/.04677632.)#	Z�U 242040+$&/-'}��!$���ʷ��������������������ȵ�»�������|VzE&61%'�b|zEb��NR�Q[�����������jihhkqvz���_ɰ��f�t�x�������ͦ@?����#}��������������������������~U�����xw�}q���}ru���������������������#2"4�x8(AE'*RYW_|w=M]�KPOHl�{y||||||~~{z||~}|yve(&4^y����������eG=;954023+ ,KdrI@@9===?=>@AA80+151ARcq�������~|ywusqqonllkhfdc_<Bpz|}{yvstsoppqpnlkmkhikhh`EA]y��������������r��������s������u���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ñ����������������������`4L��{|��hfc\UTVQIIFS�VGEDD::@:751.+**)(&%!!!! !*iL7@6*+@ZC>DKGDH^.#rxg`:G;1.,+,6�������lXT8/-+,L|�~Q"MH*('&#A���IdV#Cg^&=h}E/+"9���\6�����ļ���������������������Ƨ��������������ù�y!DWRIB<630.+'&###"!!! ! "$&(#>������¿��ne2"3KA*9eamX2M����������������¾�������6u����������������~|{2T�/��@ZKLM���ims{���{���a>).Q���������z! )q������Hy�K8&WJ(%&#V����V 2oLJFDC?3)'*15:?CDB;2-,('+-.00..,)#	_�Q " !  "(*$���&$����ɸ�����������������½��eo���������{Uz? 4LKB0�UCgrf2*RX3`��q_bhUw���\[ZX\O=Be��lƮ��XV����������X.V�����!}��������������������������zS�����{y�ql���{sz���������������������!L��d+VZW\|�DSx�CPPKm|zyz|{{||~{z|}}}|ysa&'4[t����������cB:8773222)(@]oJJG87<?BDBDCC>3,273?Sdr�������~zwwtssqommjigieb\8Blv}|yxzvsqqonpplljljihjhhaA?[z��������t�����u��������}������r���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ų����������������������~b5J��xz��cgbZUSTQLFHY�MDDEC@HD:740-+*))(''#"#"" -xV3;4,-c�������zp*{�k_7@=4.+**,Et��fXK
+dV700[���P	N@*('% "A�pjc8!YT#D�]k8 &>cq@3+"3�t[qD&1<������Ž���������������������δ�й�����������¸�y"EURJC=620-)('#$$!!!! !###&(!7������ñSM2<& %),(6\`uoAO�������������������������;w���������������|{}3[�*��d�����������������u!&.P���������֛U5)0L��������Fz�G9C�g3"%$[pVi�o#3eOIFDD@5,))0457:;80*)(&()++./,,)("d�G"))$���%-����Ȳ��������������������`{�lm^������}Sv9#*'$2�Uo���nklW{��������ZX���qƬ�������������6:�������%���������������������������yY�����|y�z����t}���������������������3uS0$'.%*UXVUc����bKTPIs~zx|{{z}|~||}~~}}zw`)'=`}����������V<41/,('&&*@\j]ZK:7BKKHAFIHC90394CTcr��������zyvttsponnkjhhfcZ;Bjv{}{yywurrpnoollkkljhkhe]GD]x��������������y��������w�����}z���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ĵ����������������������b6J��|{��mc_WRRROJCKd�GACHB;A@8741-,*('&$$""##!.W4;3,,::(!"#%"+~�eZ7B?5/,,+4l�K2cn_\Z82/1:\x��rMA8M:-)&$!4CId50^ziUMV"<K(g`^)CfyA3)&6SZpJ0]��o!! E������������������������������ʭ�������������¹�t DWSJB:511,(('##!!! !  !"#$( 4������Ĩ&)4$%@\`ohAN�������������������������;{���������������}{}1Y�6��f���������������}g?$3P��Ȱ��������������������C��G6T�Ԓ('&bƴ�tW#9aGKHFFB6,),4545785-)*)))*/.-/..(#j�E!*(#�ē"/����Ǻ�������������������S���{at������{N u5	4�Nw�������������dIk�����t�������������\.l�ʽ�����+������{sx������������������m\�����un�������t����������������������,k��x##a��a,PWVXJCOSK9OSPIw}{y|{{{}|}~~~}~}|{{xZ'+Fi���������h@1-)&""  '@`xcXMFJVXNMKNPMF=78:6CScr��������zxvttqpomliihgeaY5Dkvy{zywvtrqoommmkllkkiggg`QKVu��������������u��������������x}���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������f7H��}|{��e[VQSSNIGK]oCBCGC=>C:852.-+(&%$$#$#"0�[<;2,+T|wux}{zz�2yaY;CC61,*,2t��sT;\4
+W`9325$<Xv�eYB/*(&!_��������\QT"ayqmosz��.Jp�>0*)U��������q"J���������������������������������������������¸�v!CYTI?9722/)&###!!"!!! "#&*!8������Ƹ����fyRKTUgccme@M�������������������������<�����������������}z|*\�<��d��������so\JA1"!/S��ʐMSN?3+!'( !������E��K3I��O)'&=����X#@kINNJIE:0-09;::;982.-+)'.30--/1-% 	w�C"+%!(�Ǉ-����İ��������������������������������zI#s5 "'B�P60&!#)#}��iNw��������TX��������������������A*2014?���.�����]5IHJR^��r^\VWZcid����hk������v�������s���������������������~(FI!@G3-TXW_������OSOEz~zw|}{y{}~}{}~}{{vY"&Ck}�������sR4)##"!)9^~dZQQW][SPPSQNJ@>>B<FUet��������~{ywusqonmmkjhfdb]5Hsy{{}|zwurqppppmmmmlliihgbWIGr�������yv�����v��������r�����y����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ó�����������������������d:D��u{v���o[WVYXUGF[ODCBFG;:@;972/-+'%$$%#!!! 0�X=;4-,f�snqkf_VP!svbaMHC60+),-h����}�Z
+]b;46��Y37AXH-*(%"6I?=C<:52%WS VeWQNE95.LdwC1*$-D>:74100'" L����������¿�������������������ż������������¸�vATQG@;731.(%!"#"!  !! !"%(*#<������ŨTQPS73./52C]dkc>M���������������¿��������C�����������������|wz)^�E��Pj[N:.(,"!,V��͎LSK>1(!������={�J4S��3$&&Wl|�])!IzaXUROM?66;@DDD@>:51,('&03210//,'	}�?+31(5#!FN[I-�ƅ2,2����Ż����������������ÿ��������������tE%x-")?E&M�IIRK>77<>C.����RUOo����gQd�������}�����j������������{zy}���2�����z[SN`m���x�������v����eu������{�ph|���t���������������������v#o}wPL�~:.SZURQY|uo�DSONw}yy}|zxz|~}|}~}||uN4`{�����oK.*&! ! +<m{e_\\\ZWRLMPONIB@>C@FYfr��������}yyvusronkljhffe`V5Gmz{z{yyvspppoppmlkjkkhhgf_Z\>i��������������~��������z�����q����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ŷ����������������������d9E��~~yu���nPXt�vKQWBCDEFD<<=;:830+*('&&&#""! )kS>;2,,e�pycjJCF6/,*.2q�|U1;5[Z751Ii���j=&@TUB-('(# lW@\pF2)#"O���������������������������������˾�������������rDVPHC=630,('%###!!!"! #%("C������Ĥ5.//("":^glg@T��������������ù���������F�����������������{wz"`�B�yFC4-!%1Z��ɊFNC70#������6��H1,d�<'&%A:0#""Smc^ZYYJBACGJJHCA>83.*)*25431.-*&
+��=!,�����XS����!,�ʇ/}}09�������������������������~������������sA'y&"(( 
+T�?E?46?>7:=*����]iSb���kIHE@8:<Gy�{�����n=ay����������������8��������l{����p���x���y����c�������}�sy����u���������������������o1euP( 05%2TVTRJS�pg�KVLKy}y{~}}z|}}}~~~|~}vO&By���N,&%&%! "':tzhfba\WSOJLMOOJDBBEIIWgr��������}xwwurqonjkihefe`V2Nqy|||zwvrqqponmklgjkkhgfeZYoY]�������������~���������������r�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ҿ���������������������������������������������������������������������������������������ĵ����������������������~h:>��z}wrpz�~EI��tXUJCDEHFD=9==;61.**)(((&"   &bT=65+*S_/|�ioA8A71++.8�wPGQTz\
+Pc:508el@;c��n0^E,,((%m\ @S\A1) "#O�������ϻ�������������������Ϳ���������������·�sBVOGB>83/+)'%$#!!!!  !  "%' D������ý���l;!!-=GR`hleBR�����Һ������������������K�Ľ��������������|yx"]�=�kGA4-  "(0U��Ǳ���u:$D^XL 5������5}F0W��*%'&Det��@ _�jb__^_PIGJQQNKJHA952/-.313333/)&
+
+��6!?�����lh�Bn|8��}*e"K�������������������������qlik���������t=0q!",4
+a�9H/%(21,-8$���\Sq����������������o	������~�BW���������������:����������_{��i���z���y����b������{n�yn����w���������������������c9{l% #h��;6UVUa�����vISNNw|zz}||z}~�~}}|~}||zuP#+d�T9)('&%$! (=uznlf`[TMLPTQROKCEQKPMTjt��������|yvvssqomkjjjffdaT/Ot{}|{{wuttqnnomlkjllkihigFDpzOr������������x���������������l����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ź�����������������������h:8��w}wqkijkabytcSMGDEGHFC>7A=;73.++)'''$   ){Z=85,)Psghlpmnx�6wxfi>6@80+*,7}������W	
+K_;6/A�p->czneG[F-,('!)ADFECBDJO"UP#,'=XuC2*"'3./,)*1GO#"P�������̞����o������������������}���������������t"DSOGB=730-*&%#$#      !%'H������ŬO8**'"+E_aV`ikeHY�����Ͻ������������������H�ſ�������������|wv"d�B�hF>.* *0W��������?%����? <������2 �uG3Y��.'),�����i ]jebb_^PMILTRPNMHB61.+),121121.&!
+��6!1���u�R[�Tzm@��p/QA����Ƚ����������������ÿ���Ŀ���������p:2i!"-;'"
+e�@LJ5!,69 ����������������������f
+!�´��qOnz������½�������v?���������k@|�pk��kj��x����a���������x�����t���������������������Z/ln5!-QR'8SVVUWdKVaBPTMP}}yz~|~}~�~}|}}yvS.5^R/,(#"%$" +Bwyljfaa[VPPUSQPICFbhRRPct��������|{xutsqooljiiifeeP0Moy{zyxvvusnb^Yahjkmjlkjhb>6a�yc������������{���������������f����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ÿ�����������������������f=6��vzvrnga\YX^UNKHDDCAGHD?7@==72/+**''%#  *�[@64-,_�kllgdWTQ%
+xta]B>B6/-+*.5?�i1#;ESa<72;U���c-<QYC/+'$ >����|yutp#VSNbcjlkwD6[�G1($F��������w%#P�������̰�����������������������ɠ��������������r"HUOF@:410-*'%$!     !"$)A������Ȥ!!7bjkeL^�������������������������I�Ŀ�������������~{wt!e�9�kT}pk_QRZXD).3?CCNR2!(1\��������3&����?&������-#�}B2;n�)$&&y����I"PcX[X[[]NKHKNNLKJB<2,)&%)------(" 
+��6 <9/`Hn���qD��e6_$G����ʿ��������������������������������k86i"#5E?,
+m�LA5+%03 "���ONNQq��������������T!�����c�������|~��������l@�������{ts�����nat��o~�����[������s��������n���������������������Q5fZ& # 'fvq'!<UWWc�����eMQKR}|yz}}~{���~}|~||yqN;(*TH/(#!#  *L|�lg_UUWVSNNI@@?:Db�eVZ[n�������{{yutqqnnlkihggdbQ.Nqz}{yxxwtpj]XS[enkmiacicb12Up�}m}����|�����u���������~�����g����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ź�����������������������h>2��||vqmh_XTTUSOKGCCB@FGD:6?=;621,+*)'%#"!""&lS?75-(`�knWM<:B70-)*3{������u^d@84o�d4"4XQ/+'%!(86Z�C7T"NS#l|D?>A`?3Y�B/(#8hQUPl�mT6%M��������½�������������������ʴ��������������¶�s#DZOF@<60--+''&"""    "#$(!D������ͩZektojhaSEN_mmhL^�����������ľ������������H�Ŀ��������������|zu"l�H�ng���������p|y�����V",5_������׾5$J��n������,&��G6�Ȳt-%"NC #$!]re_ZWRLA?CJRQNLJ>;2,(&&*-//.-+'$ !�s5B�tps�S}ga^IM��W"A$O����°����������������¿~]PW[VVP������i0<f!!0@:*v�OZA6)#+7A@!)��zf��~���������������L!�������������.-"&*+9��fF���������������������������X������g}�����~|j���������������������D9sv%&)#"! Sjh+#AWWVSZw��\SORMR|zy{}~}~~}}�}{zsE@J*-LE)% " +P|�obG=GOQPQOF.(--@Z{�odaYq�������}|xvtrqnllljjhfc_Q2Qv|~{zyxvrlcWNKaimTSNDYhda.0RngWav���������r���������������g����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ø�����������������������h>.��|~xqlhaXSTRPOIFCCBCGEB;5<:950.++*'$#"! !!ZQ@65-)MX1!
+_l`VB5A91,)*-ORHDNLK6IZ@82VJ:JSNV^~oGF*))'#Kx�o|lfqPHP$-YVJJTW-4^�D.'"E�����rr�r""R�����ҽ����������������������˳�Ƹ�����������·�p$BZOE@;81/,(&&$"""    !"$) G������ϣ@Vm������o_ZjkhJa�������������������������D�ÿ��������������~{ut�9�ff������������{����S$*2_��������Q!,+&������-(�}@3~���@%&���aC' r~qjec\RB59DGQUTREC61,*)-000-+)&"$�{4M�����Or&&'W��NBa)T����Ĭ����������������½��u�����������f,Ga#//)$n�<?1-,.AOE2.��vs�������aZ]]`Zfkp��F$�����FHG9j���A�áA���I��XK������ihmlr���{������������Ln�~���|}�|�����g���������������������93wp$#'""%""IZWWXUu��m@PSLU~{{{}~{}~~|~~|{{w>9`\;%H@'#'O�}_8+7ARVTSRJ0)('Ap���flSPy�����}|xvtpqomnliggfd^O3Us|}{{zxvriXH?BE@@35:;Zdc^-/Tn^S[r���������r��������}������g����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ż�����������������������h>+��z}wnkgbZSUVRMHDCGEDHFB<4><94.,-*('&&$!! !dV>73-+L�wzyzyw}�;Tok`M>D9/+*))%	
+G]B83c����zc`cjCB.(''#-|k@"C[`C=I#.6Salab?4`�=.)!-LH?`|h/FD$"P�����̮����������������������̷���Ǳ������������p"EXPFB<830-((&%$!! !    !##)F������Н)!).0HkgjdJb�������������������������D�ǿ��������������}|v��4�^PcZ^[fc`|����v����J&*6a�������ړ##Y������))�w@2w�vB*&+���̺E!mi`^``\ZF9:>BJQLJD=3-'((--.-,(%#!!*��/.'$&]:riq~L^��^^���;��������������������y�����������g+Kb!#:;4"|�+*8=@<72.*0�����������O]cYEfgXF��7)�������oV����C���9���E��UN��������pPj���xrlggssns����Kru{��}��w�����h���������������������1"Hwb# !!"!'LZXY^��sd�_OSJY}|{z~|}|}}}}~||p<Dw�b#=:&" %M�zQ+*.6PZ[XUQ8)'">y���OP?Fu~����~|wwunqomnkhiggc]M0Yvz}||zwusrlO:@IH65<IVbebX*.WrcX]p���������p���������������f����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ź�����������������������f=*��{~xoohbZUSUWLHJKHFDFED91>=:50.,*)&%$#"!! aY=73,+H{nhcf_VRN#aqllP@A9/*+)&
+Rm@82,"a�`&!QYB=0)('$-Q!#!#<!9C#jsfrjQdN2^�?1(  1�amf&!N�����з����������������������̾�����������������p"JRMGA;630.+'''#! ! ""%&"F����������"F}Y2!"!O�qjeH`�������������û����������>�������������������}��+�cU]]nw�mes����|vnV=#$)3a���������y'R�������#+�?.g��)*&#M{���K h}mca_]ZI>@MJKJGC?8.)'%&,.--+*'%$$.��.R�����;}���Gh��Yf���ͼ�������������������z�������������a%R[ "460!x�)8PRH6+.758�����������O���P���L��/*������}~�����4���2��}V��NP��������p�������������|����O��y}��~�������{d���������������������+"!MiE   'b�l""KXUZ^�U4B^YPQLW}|{z~}{}~~}~}~~}||l8E��M7;$  J�xH*)*0R^ZXVO7(*$@u��|BJBIv~����~{wuoabmllkiieeb\J0Zuz|{yyvtqhVKABGB34VQfifbS*1[p_TYt���t�����t��������{������g����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¹�����������������������i=*��{~yr}~j]\TWgj\VbNGEFGC>3>=:72-*()(&%##!"!eeC97*))'!Mh[\gxvK=E:1,,1H@945;A?ZKCd?81,U��;Lt|r,WF/+*&$={wxyysswu(<D#i9L9@C2c}91'!5�N`o'"M��������Ŀ���������������������¾���������������n#KUOG@;641/.('&"!       "&&"J������ɬwO5D-$g�rkg?`���ǹ���ʼ���������������?�������������������y��2�im������hiz�l\OD8<J-#+6^���������ԲxQGb���������"+�{;6h��&((#b�~kn1p�xeb`^\PAFOLHIGB?7/,--,023322.+,.5�o*�{xu�CPU_n:j��N>^Xm���ν��������������������������������}`#UP%;B9"{u1+*)$!"'45@�����������K���L���H��*0����|D\������Yl@=\Ey��HU������ci��������������t����]��������������{o���������������������'!&Y~Y ! '1. IVWZd��rL>EOPIU}{z{~}~}�~}}~~{|zp6G��L<8$ Fs3(,15I^[ZYM4)-$Ew�zBI>Hv~����~|zxuQLJgnliijgec_I0Xow{{{zxspVEJHMK<5<dRghecW)2^n\Uaw����������s���������������e����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ù�����������������������g?-��~xpx��ksfYhbVRgKDCEFG=3@>:62-+)*)'&%$"!jdB46+'$"R�[wfWit_B9G:1/,8���������W7U;60)jbQK++(%$.TTWSQIB@C@F#- "#
+2f�A2' !#\�x9-9'%P������������������������������������������������m$GSPFA;62/,+'&$$!!     "%' I�����ƾz'(4>Vhl��mjd=c���Ƕ���Ǻ���������������;�Ŀ����������������p��5�do�����{ddhko������L#)4`����������ÙbPp���������"(�v;1(y�-'%-�ݶ��8{saa`^^`RHKORQPJFA9545558886784115;�q'l�����>����Hn��DY`<s���μ��������������������gIK~��������{aWG"-4"�pLL;1,(,192M�����������J��|=��sS��(0����|Ygk`����t::{�eQ|���=Q�����tAWWZw�����hcr���v����h��������������p���������������������  #HfO !!  8��rIVVUQ^����MPQJ[z|y~}|}}�}|||{{xl3M��F>5"GuR*&-669HSWM@<50%Dt�wEG?Mv����~|zve<D3Ykkhhggfc\I1Zrz}zyxxsoY^XMIP6:@\Jjgc_U(4dw_Wav����z�����o��������y������e����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ĺ�����������������������hA(��}}wr���pqdZ_RNO^DCCEFEA1><:72-*(((*)&%" cY:.9*&#!qsK~MdleI9H<2/.3J?W�e{s5,
+A\762=I04AABN`j	<E-,(&%$,())'((4U%?I"%$4n{61)#@}wqw|s�s(%P����������¿�����������������ʰ���ȿ���������¶�g#ERNEA;62/+)'%$$#   ! "" :����ü��������������hia:c�������������������������4��½�������������{n��8�Zk�����vb]ady��v���K$*8d��������ԠH&-z�������!)�qC4{�&($+m�i��/!���xqnnp\RRRW\ZOJID?;:99<>?>>=<765A�o(n�����#1=sVAy��C49*{���ͼ��������������������������������}]dE"029�rDF>:349=@2W���YLTP}���OFZNPFRFr��#8˴�����������������¿���/X��������������ht��hqv�����i���������������t���������������������#U|L  JSGFWWWWX�}��XQSI\zzz||{}~��~}|}|~}|zk2L��@=1 !BcF&(-39@AJOE?==3&H{��wIO>Ry~�����|xwL9ALmmlkihfcb]F0\oy|xzyxrq_V[WAD8EP[Lhff_R'7cs_Xax����������u��������������e����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ļ�����������������������jC&��~|yv��m^eVZ`VMJZEBDEGC=.>;850,)'(+0;=>W. ^[?.6*'$V�d|c
+=]�uO=H=2.,,*#:���F
+
+Ec541e������|x\	;@.+)(#B������~|r(DF!IG
+4rx=2*%?z|��r[WcL &#O����������ÿ�����������������̿¹���������������c#HQMFA:320-)'%$#"    2���ͽ��������������fjkb:d���������¿����¿�������~7��½�������������~zld6tLOY\bWFFB>F;>JM;AJB(%+7b�������װ@" -������}.�l>(�Ń#'%'`:".3tongdijdOEHSZ[\[_[NBD:9:?FHIFFC@<;G�c$������)%XP7���L bs= ���̼��������������������������������}Xg?'>=3)�a*&4753-(-'cî��kys}����gb���{����=̲��qennn����SXW\Z[bd���+a�������{{����������||������k���������~�����~��������������������� HM?7}F!LYVYe���sVIRODa{{yz~~}~�~}|{}~||yj2U��A=0 +?hJ&%')2FGB<?=9;7!I{�~s@J=Vw~�����zxq>AILnijkiffab_E3\oxzwxwwrqi\ZaTUSZRXCefe^M'9enYVb{����u�����q��������t������o����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ļ�����������������������lC"���}yuvoj}�TYg]QP[GDCEGE:2@<730-)(*-A��s�Z! 
+HRG/5,)*%MbX%3'>e�wH<F<3--5OGQ�gy�]]T=L160fd*$
+<B/-)'#+EBk�]:2,4LL#GbZXZfjN
+2j�P0+$0Kb�s==@g` '#O�����������������������������ʡ�����������������d$ESNG?853.-*'%$##   :�������Ž����������gmke:f�������Ŭ���������������|<�����������������}zf~]D�TSey�cYb[LPLZhsz~pL$+6f��������h!J�����1�g;.��x"&%+�Ʀ�Z%zuollkh`SLKPWVTQOJA9<<?@EOOLKF><AEV�a#�]#+.->����5���5F3#����η�����������������¶�������������|Sd:#$%#,�R:BLF>>>=-h����|�}�����������²��IѲ��^~��tj���X[W)=SV4n��(f�����xdmiTp����������������o������������������������������������� \tR;$W]@!IYVUYw]88>NSQDb}}zz�~}~~�}{~�}|yh+U��<912A`@)&#")>D=066:@;&Iz�}pCI<Sy����{wfZWWYilkkhgfebZC3auy{zyxvtrogdSUZlo`_Bbgg`N&9jpZXez����������u��������w�����|n����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ĺ�����������������������jE ���{u�����RTWUPNLACBFEC:+D=820.,)).G�L1A% 
+DRF57,)C�~upsnqt�BAY��U<B<3/-8���������`3H38.W`6>-.((' )E�d&"*:W%FL H^NIBBUM
+/YiE/+(Q��������b&%N����������ÿ�����������������˿�����������������b$EXNH@:62-,,)'%#"  8������������������wlmib7c������������������������<�����������������}xf�e>�Yg~���lpibdi�������5$*4f��������=#U��O)�����y/�f9-h�f&%#)���Ŵ6 ��xrokkj\ROUQIMHE>7359@EDCIGB>=;87]�U!�T$*1.1pt�n1!���2V+)����ɺ����������������������{��������yQa4#$$(3�YJ/(% (.5)p����������HIGHJCJZ���MҰ��p����y������[�ſfv�� k�����p����f����lb\fhv�����y������������������������������������x(]e@+>uu<"L\WXk�����NQQKh|{|y}{z}}|}~||}�~{yp/V��75.(Af9,*$ &4DC/0-;B;%L~�}oFF:Zy�����zj_f]X\illlhfdea[=6duy{zzyvsrqlWPLI`akaGcff^L%;in[Udx����~�����x��������|�����yr����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¼�����������������������pC"���}|x���pwQTXhYOlEFBGFD;+B<620.,(),6�mUtL GXK25-+?ud\��eHA@$PP��T<C<2-,2YL863;;6HG
+
+2U<7/5+')(/8IP
+1=0-(&& B����x|zwo):J"+aO.Z^?0+'3C5620/4OM&#O����������ÿ������������������������������������a$EUPG@:40-*+(&$$"  .���������|��������xpmid=e���������������¿�������~?�ľ��������������|yi�wA�Wt���hkgjou�������E$*<j������ٱ,$-��ǰ%!�����y4�g=&!+-'&"%TK���4*���~z{{vlaXUVRTPKFB?<<BMNRTPLEFB>8^�P������8]V-)���%!\E/����ɴ��������������������������������{M h/!%273�N@701%%3+}���`^bz����{}}Ob}�Qq��Rӳ��[}��t�������A���Zw��"r�����e����j���o������p����x��������������}���������������������q(e|?!?��P#NZW[j�cem�LPRBi|y{{}~{||||}|}}|}{wg*Z��88,!6]8,/+&&/IH($$5G8%O��}qHG=\w����z]gme_[fkllfeddaZ=6dt{~z{zvtrrpVbN@PQhZHceeaJ!?jn^Sdx����������v��������{�����xx����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ü�����������������������mB ��}|uumlx�WVWbUSrF@CGGD7 )<;:30-*(*-=�fd< 
+BOC02.+,%>�PJJ~�R9B<1.-7���������[
+3]?3.Z��������x	-=/,+(& ,A6741.('@#3?!!3HZddU&
+0dqB/*(T��������j'#P����������ÿ�����������������ɵ�����������������_%HUNGA:40,*)&%$#"!  (���������~w�����x�qmlhc9c���������������¿�������}A��¿�������������ywb�_T�WzzmUI>IIGA=LQUIJ�K&)9k������ض+%*��Ĭ% #�����w3�c<)!')&#!-�E�UA%'���}|{zufVSS^`]]SI<:9;;CGDCBC?<327_�M �o_b_O.pz�z)+�â#+6*2����ɸ��������������������������������vG"f/08&
+9�A0CCF9%25#���o}�\�������_y��hr
+Uѳ���jgby�����F+${æB���#s�����z���z�����������z����o|��������������{���������������������h 5)!*+./-*'0B='&RXVZn�;@Y�OPQDh|xz{}~|}}~~~}|{{~}{we*Y�u5?/ 
+/N1+1,"!)>>-J8(R��~kAF:`z}����xbmifZEIhkjcbed`V:7euz{{zxvurroWh\JHQgRHaee`H%Bln_Wgx����}�����v��������|�����{|����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������û�����������������������kC���z|x�����OPSPHHLFEHHFB8!(A;941.,,+-7w�[d4 
+EOD*6/*.BG�{5:=HiB:Gv}U8@:2./1JD+*%%)(;>
+Gd?1.\�%,,&*]	/>.+*(&?STQSSQcv-4="%W��{O@3
+.bxF0*)`�CFB<6/E>($S�������ϯ��ľ������������������ǻ�����������ſ��`%KULF@:52.*)&%$#! -�����������{x�����koli_>d������������������������~F��Ŀ�������������zya�QO�Rvzpi^OZ^^ipw��|��F&)7n��������2U��A!4�����p:�b>(&'&%#(�wP]�/0���y���~qrtxwqolcYQTNHDEKJKKLD;>FM
+m�E"$�?`�vy�)+�ǝ$fyL9����Ʋ��������������������������������vF$f,+E)	A�I<.,*)!%, ���m���q������|T��_u��
+[ײ����������V(W�y>:S���*r�������������}�������{����ez�����{~�������v���������������������X$".;?BCC@=X��R&RVV[o��n��ORNGhyz{{||||{{~~|{{~~zwd/^�q4;- 
++L3+11$#/@( :7(R��{pHJ9b}����urkaXLLM``ZMXde_W<>ev{zzxuttrpiah_QF\i^WdebZC#EljZVhy����������z��������~�����w�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ý�����������������������nE��y{y�����MTYNHIOGDEFFE<$=;841,,++-5�n$gn 
+COK26.,;�������{x><BfhR9=:10/2o��������t
+OW93-6�h)%_V
++;-+(($ h|rv{hy�\!4;".+(@WfB	-_}C/+)d�jmgjos�b&#P�������ͪ�����y��������������ʜ�������������Ŀ��\'LSMA>;52.*)%%#"! *������������~tt���jqmme=c������������������������}G�����������������~g �RJ�Lo|urjdomw���������7%,9l��������f"'" `�����i;�b>&$)&$! / Gt*E���������������xutrkbXVUVY\chikmon#r�CH) g����#0�Õ"Q38����Ʒ�����������������½�������������wB'd(#
+	E{MR@2)$!'7:"�¤qVdbs����uKEE`��L���	dβ����������������u{����*t�����tnssx����u������x����]{�����vt�������p���������������������N'7:DKNRQW[a^RUYE(4CIJKJJC#0/ 'OWVWI\��yJMSOEm|zzz|}||}|}~}{z{|yu_+h�s24)
++K3'-3)$#'56 $+$T��|gIG:d}�����zfsf\RIWQJDKGDbe`Y6>fvz{zwvuusogfgb\N_ijfgeb]A%Fni[Tgx���rh�����y��������m�����r�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ƽ�����������������������nE���zzv�~vr�PWny`RhICCEGG>%@;:40+-,-./Vif�b FSJ<7-+-@1(()'#D:jyY;;90007��VSQPOOvh
+sS65.%5y�����a"	'6,+*)& (oL%2T$+TV7?"U�c@ "D.*ev@2**H���|tkbgS'"S�������ϱuYVX\^��������������Ƨ������������ƿ��`,KTLB?;62/+'%%%$"!    +�������������}q��unspng=g������������������������yM���¾��������������k!}JCrAUioiagpm|��������W"&-<u�������ׯ8#!?������j9�X>#%(&% 4��i<&!J�������������������{}wonjkqtwwwsso	y�E R|{�o;�ǔ.:����Ƹ��������������������������������w<*]%
+]t6678358CE5!�����������kGr��<DNf��|
+rб����������������������/z�����}wwxy����rajplop�����]{�����ux�����yh���������������������A"<NTWXWSO &k��F+SYVZguwtscOSNEo{zyz|{|||~~}|||~}{w_'f�n/9).PI3.42*"#+, %V��}bJEAb{�����xMo``TP_PHPhe_ed`Y5@fx{|{ywvusngfhf`I_jkigeb\A%Frj\Xiz����������z��������������o�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ǽ�����������������������oF���|zu�woi�LW]_]LWEDEEHD<':;;52.//:C+G_gt>CVH>5+'%!`Pt�]::91-//N�oJECQcl.
+fT:3*$8RS?.
+)50-+(&!B�����*7yp%7C!0rwt�f{A
+([j<.)(@eUQMVU\rV'!S�����������������������������ͼ�η����������ž��X&KTKEA<72/,('&$$"!!   ,�������������xt}twvsqa>h������������������������uP���ÿ��������������h!~IC�CPHGK99H?=EA<GD31*+%,:w��������֚>F�������j;�W;#$(%##;��̿�( H���������������������wssttsrpopnnj{v9!t���|?�ɖ4>����Ʋ��������������������������������p51_"h�35;=@:32. !���ucfm���������������ptϲ�����������SC���v2|��������������������������V~����}q|�����zqh���������������������>=TQ`lqqrppnmjbZO.K]adffbZ!+'+U_X[{�����ENODnz{wx{{{||}}~{|{{|{{wY*k�k(<**GQN==4%#'  'X��y^JC=gz����uSpdaCO;Hdmd[^gb^U;Aix{{zxwvttndUif_I[jkheecZ>%Ith]Xiz���������}��������������o�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ľ�����������������������oG ���}|x�����WUUTZLRAEFGGC?")=<:850-3b�Bz�hya!!>XM:3-)$hWdpUF@:0..-.AYy��zP1/@f=5+#
+,:31-*'"&=@>Ju$"7/=D :~}Y8(='*Xb=/*(R�����|vtO%S�������ѿ��������������������˜h~�ּ��������ž��V'JTKFB=72--+'&%#"!! ,�������������~vroqyvogX>i������������������������vJ���ſ��������������i!y>E~A�ug\RVVRm����{x~z+%.@{����������Ξur���������`<�S9%#&&&&*i����*Q�������������������}vnooqstsqmkgf\	�v4ih��rB�ȑ/Psi';����ƭ��������������������������������p37X!
+c}=A<3/%+92$%����e�q��������������a{Ͳ��PgfaX���`L�����RO��o9����������������������z����Kv����u[m����wmh���������������������8Kmf�����������~o.I`gjmnjb!.|��6,VYW\}�`�[�BTQBrzyy{|}|}|||~|{|{|}{vX-k�m*=+ )CEG>:+ #'"#'$&[��z\FC?k|�����jYhc_UePZ^WURO]a`Z7Aku|}{xwwtsndTmfTHQijigeb`9'Mrd[Wjy���|�����|��������}�����o�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ſ�����������������������mG ���}t}���aUVdy]QnFCEIGB:#'@<;83//1Lm@[`=Q8!
+CoQ44/'#Uoyb@?;1-+,8mdihmos�~
+
+9aB5-$
+*910.+% 8)E&:@ES&
+'Yf:.)'7S��b::>YN&%W��������ǽ�����������������������}��������������Y$IRKEB>82.,*('$"!+�������������|vwsnvrje]?j������������������������oK���¿��������������dyHKrH��vmklhk����������-%,<v������زZTMQPMJNHe�����X3�S9'#%%$!"vwvib%i�����������������zimrmnpnqomkigc`S�w4 8����ZH�Ĕ(K~i&*����ɿ��������������������������������k/@W!
+ewEVJF>;@IB4.����~z������Y7:897L���V!ʰ��~�������E�������;��g:������������������wfkp{����M{���~vr|����|shg���������������������3Oql���������}~n(7CM[efdX(AA .VWW^��^�b�HQQExyxy{}}~}|||}|}{z}}{qR*o�g*6));6197) $+*032**_��tZED@m|�����cceaB1>LX_\^YQX_^Y4Eiu}|zxwvtrkecobSITmljhgc_3(KxhWWhz���������z���������������e������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mG ���|}t~��ppVUY\RPTDCCFEE9 #;;863/.5{��~vpwK BtL43/'$Dx��h8@<1.-.G��{y�o��T	3L?6-%	':0/-+%":vy~z}ou�18D!(*#Vl=/('W��������b'&R���������Ƽ���r������������������Ѥ���������ľ��W&GSKE?=72/+(&%%#  -�������������|xwvlnnkja@k������������������������lS�������������������d#�EJqF��vkldeh����������1&+:y������۬&%R�����W3cU7&$(%#">�̲�{( wǯ��������������}xsptopsuspmige__Qt2.sckxUM��o$I5+��������������������������������������n-A`
+knDH*$!&('7���z{dv����ob�����^T��K!�Ȱ��l���t���<�������D��]?�������ylz����vomw���������I|�����������~tkf�¾������������������,4@=T_``a`]ZYMNPC&3;1:9<@B.{�y(2VXUR^eDUJdMRQDuzwx{|~|||||||{y|}}zvT/p�d' 6) 1C-(+.% ")0144()Z��v[KFHp}����|_ghZIOMKWeUQQMVb^Z5Goy}}|yxwupf^X\QI<SkjihfcZ6-NxdXSg|���������t���������������g�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ž�����������������������oH ���{|u{�ys�TW]cSSU@DFGFD< "9:8520/-c�ZXVOfE
+9gF54-&#	
+Au��S6=<4./.g�K9Bq*J�[		6G<6-%	0@.00-'"M�aaa�k@Bf,8F!DH
+%R[?.($.ZQDA?:>]O("U���������ɲ�_Ov��������������ɡ�p�˿�˴�����ľ��R*KULFA<51-+)'%%#  .�������������}yxwojhile;j������������������������gQ�������������������e'�8IsR��cF;7>@LVWYXNCB=3&+@������ۨ(' # R�����Q3jQ7($'&$#<����y%~̰�������������}�~{wwtsvvspmifb`aV	 ~m2���aT��b=V+����ɿ��������������������������������g'EV	{kJZA,;�����������S�������B��<�ï��dORX|���4W�����Z]��W>������^gcSa���sfa{���������O�������������wsi��������������������� " 8]x��}[Yw&3TWTQOPQQLJOQOIyzww{|}}zy{|}||yz{}zvM3u�e)#;)#@A("!$#'/352#*[��uVGDDr}����xdmkheUSOKFD@JGUa]S3Hky}{zyvvuo]HKGG\XbighiecW/*UwaXUk{���������t���������������h�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɺ�����������������������mI y��}|vtkim�UVjhNO�LFDIIC<#$:8862125l�sdXdV! 
+/YH75.'"As��U@<;4/06{�����2b�O	5G<6/&	3<00/+'$!P�$KA,q25@*1/2<FPR\L	&JU</($1��������Z&"R�������йuS_�����������������ϿͶ�}��Ƭ�����Ŀ��T*LTLFC<53.+)(&$#!!:�������������yxxrlginf<o������������������������eR���¼��������������c$|+IsR��S $&"&*?�������٢,)[�����K4lK5$#('# !'"-M'   �ŭ������������������}{zyyvpkhfgfgW	$�e0"����Z]��bKd7��������������������������������������f#HMsYEK9D?-+%D�����������R�������F��5 ��������������'!O���Q?�����w����q|�����wjjs������G������������zsp���������������������!5RZ]iowe0N-4TWUUTRKMUNLQOIx{wx|~}~|{|||{{{{|{ysH1u�b(#<*'SA.)$),'!%(.46.%*`��sQFFBt����qds]efQRXOGK@GRab`T1Joy{|{xxvtpm[UPVlnlkjihfe^0,V�^WUl}���������q���������������f�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ǹ�����������������������qK s��y|y�����STbbIMjDDDEDB?!%98;82211Ov2I[LfR" 
+.B@67/'!
+9h��Y@=:2--3t�a45$%	
+>Q74-%
+*91..+(#Ea"[,5>`��wgc[fL	#DQ=.'%N�aVc\0k�K$#N�������еn��»���������������ϧ������ʰ�����ľ��N*HNKCA;52/+)(&#$""  0�������������~zxyrjflpb:n������������������������eT�������������������[$|7PpH��hD828:<=ACKQOX`U %*=��������������=%c������GAfD2!"''% !$$ "|���������������������~}|}{xvrrqnn[	"�b2gym_6h��^/8����ɿ�������������������������������e!9C
+}X?>(=I8DJ9(H���{{�{|���TQ�����ak��.#�����uwtw���������������LG�����vl���j~�������zk`q����J���������}zvvsv���������������������" !,342346/>MK4USSSK[~��xKQLKyzxz{|}~}|{}}}{|||}zsH2x�a&*A*)Y>,-312.!!37/55/%*c��xTMCEp�����ripRiT>TeYEGIKSab_T,Inx{|zwywsocZZX_mnljkjhfa].0Uw_SZp~���������s���������������i�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ǽ�����������������������rKo��{}y�����NVX\UQSDEDDEF; ">;:843/0O�Pt�XdT#!	/LB:6/($.QssWD<<3//1Q�����RJuW
+6G40+"	(?1.-+(#>bWacfilt�4/;!($XgR7K
+	"CW?.(&Y����[2tsE&%Q��������¿�������������������˺��sx��Ĭ�����ž��L+JPKD@;51.*)(%$"!! !!0������������{yzxqifbab?p������������������������cU���½��������������]$�;TxP����k]d�����������'(+<�������������ɚ ,������EBqH4" #%&$ " !(�����������������������������|yvv_	(�^-$,(h��J3^q9����ɿ�������������������������������|aA;
+�XGYQJE776. R���~~�v�����\;B<6@a���%!����l[_GjZ���������qI]��CI������[jy[S������sjlw�����}?�������zxy�urwq~���������������������$!@P@DS< 6wvlQ.245785%.KJ41TWX`���dNCKRLJyyxz|z}|}||}~~||~}{zpE5v�^")B, 'V@,'*''("".4489:'/e��vQJCIu�����onrRqf]Xkb;@EFUbd_R3Qqz}~{yywrmW^\\ckmljkjhfaX-0\|bVWp����������t���������������j���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿĽ�����������������������mJ r��y|ytoef}QWbj[LgKFHGGE=#!<99852/.162wiB4" 
+._G<4/)$
+	.LhiMB<=4./0I�sU���izl	5G5.)#
+.B./-*%# :ol��{ZNG:-<!V_.k?&
+!De@/("I[;6AC8>]T&O�����������������������������̽�ͽ����������ƾ��I*KRLGB<61-((&%%""%2������������~xvwulgdgmc<o������������������������\X���¼��������������W#y8FsR���zkbh�����������$).=���������������O�����C>mH3"$$$$ !%������������������������������|{xy]	0�X-$)'n��F.>����ɿ����������������ÿ��������������YK9 
+�E=E0&X��������������������������u��v�|�����nHAa���:I��������������fcu���������y/~uhej`[a^ZZ[^^\|��������������������~"'*Bl�L$@//7 1689:75"6�2UWVX���YEEPOMPzw{z{}}{z|}|{{{}}{zs@7|�S,E+ 1g>*&%"%'"!.359::(2f�oJG@Kw����~]loVlocVmo@:9@Rab^N0Qpzz|yyxsqe]\VXXjlkkjhgfcQ+/\vaWTn����������q���������������n�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¿�����������������������pJ!c��y|xrkiv�TTaiZVrLEGHHD@& >=<:51.-/.3r�h�t$ 	0kL;40($ 	*Ju}M95@5/-/_���c:#4*	
+=G6.("
+*:/.,)&$6}c"?(/="n>	RJ
+	"Pp>-('e��������m&"O��������������rx�����������������н���������ƿ��E,KTMHB:64/)('%$#"   (0�������������yuvulidgka<p������������������������\Z��¾���������������T!m2DhV���d]X`o���������b$'+=������������ý{U�����??dF3$%&%# 0�����������������������������|wxz|Z3�T'%)&z��GOX?����Ⱦ����������������ſ�������������}[W; 
+~I(%_�����������������gL_��'����k��{�o���4#?~�������2N������r\S^����uhdp��������uC��wwzqrkfd]\_]\���������������������u!7jtd7"(J)6 /78:887 2+&5WVSPA@h��aMOMS{yu{{||y{z{}}|||{|{zt<=~�T.E+BpF*$%'+&!%4:7:95 2j�~vNI>Jw���tfjoafgZOk_D;8DJYa`O1St{}}zyxui_cE>IQjmkklhhfgW)0[t`VZp����������o���������������n�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¼�����������������������rM#i��y}z����|QT_d\TYGFFHHD;$!??=;72.+,+,^xLfQ 
+0hS82/("
+	*AYlN94<2--.ek4%36?^	8A70*%
+#50-.*)#-X��WIU[h}7.6OrVe+
+	%OZ0+&$f�:+Y�4O]&"P�������������t]��������������Ü�������������ž��A(JRNG?;4/+(&$#""!  '2������������~zvvvkneboh6m������������������������WV�������������������O%t4Kg[ùb% %++("$'$+A�������������¿�9}�����<:fH4$#&$! *������������������������������}}}~R=�W($("���64%B����ƽ����������������ÿ�������������yW]3
+'y<n���hUca������fHC?\v����������������F%$P�������(L�����ni��w_������~m^ac����o:����zqxuvuvomeb���������������������l &FMYy_/"1797893'^pH6UXVYb^ZP\OLPAP|yx}z{|xzxy~~}}}{}}yq9=��T1D+<jH'##**& %6<9<80!7h�|uMH=Nu���{ijf_g}lSN[DD:;D>J][M0Uux}}{xtrXjjIA>Vllokighf`V'1^r^U]o����������n��������������r�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ž�����������������������tM#`��z}w���vuSUafXJ[IEHHHD;(%B??;63/+,,/j�k}j 
+/eM740(!
+):FQC74=2/.2q���������	
+3851*%	&62..+("8��{{tjaT'.7" ?S;.@>
+ IO0+&$DI,U_&!P����������Įha��������������Ѷ�Ǣv��ݮ������ľ��A)OUNGB<5-,('&%#!  +>����������~|xyxoqfln9q������������������������QT�ʿ��������������~�I.v5FhU��W!%+F���������ι�����:T�����<6dG3$#%$"! .��������������������������������~�N
+5vV(#&!���0#+S����Ƽ����������������ľ�������������xQ`0
+0�<u�����������;<I��������)����������������a7$Ba���#(Q�����e����^����������z����db|mC?S]YZ7QYTMP���������������������^ #/&6QS!&(+,&"2686881;dU68RUV^�����ULPHW{yy}zy|{zz{}}|~}|}}yq9>��R.@* DrN*! "! %==6:6+ 9o�}tFGBRz���sge[tVVFDGQX`_\Z[X^YJ0Tv{}|zzwskmV3,/@Vklkjiid`R$4dp^V\s����������n��������������ws�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ⱥ�����������������������vN!_��z|vrf\_�[UemZQzQEGIIB<)"A@;8510,+,7��WfV 
+-SB31/&!
+(=HRC7282/-,ATk��zMHej		0952,&
+-B40,*&"&##!*5#V���tm{quZ
+
+@P;,%!U��������b!(#R���������Ǫjq�������w�������Ѿ��Թzg��������Ž��B*KPNFA;51-,((&$"  /C������������~}{ztprbbxe=s���������������¿�������N[�ɿ���������������G.q2GbDNH1%*=��������х7$($ `����68^F2"%%" ! .����������������������������������J
+?zQ'$% ���4X����ƻ����������������þ�������������zL].
+	6z9���sJ}�����sR:Lv������(����oKS2 f��������Q%"t��"U�����k`��we���������������f5��m*=[^We^[VROD���������������������R#'g����; Bwn`;#.25465-?��X=QSSRD==EeRNQJU~xuzzz{||{{{||||}~}wn6E��O1?)!QmS("!&<=760':p�}qFL@Qw���sfdMZVSJJL>XONLLM^aVI3Xpz||{ywvsoXOMOVYhjiiihc`S%7dl_WZt����������r��������������xw�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǻ�����������������������qO#e��v{x���~SSgk[XnKEFIHC;'>>:811/---9nq`yl  
+'F>3//'"%?HWM?.950-.6S�P.c�|i.		1C;4,%	*@30-(&!/@#4=8gu3E
+
+@I7.%!>id��jGAV= '$R��������ȗZy������~ga��������Ƽ���śjg������Ƽ��>*LRNGB<61-*'''%"    !""6 C������������~z{}ujigacS8n���������������ÿ�������HZ�Ľ���������������B1r1K^C=3('.@�������ݞ7"$&"%y���59WC2# %&#! &����������������������������������I
+G�W"$%""���1a����Ż����������������¾�������������wII*
+;u7������q���������WDOj���
+,������qt�������L.8c�����%_������\NPa���Y]befdbY|���aE��}Xdhha@3HY]Z[�½������������������K"( !0)).81*#*+./.,'0K9%@SUUVSLST�dJOAR|x||{}|{zz{|||{||ym2G��Q7B.$MbU)#0E@71*#;s�|rKI?Wx�������}{}ttononlhdif`YI2Zt|}{{ywvsssvtqpjklhghhe`R";ep^TYw����������v��������������wz�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ƽ�����������������������rN'^��w|vz��x�[P[aWR\EBFFFD?+;<973/-,+-2Uo]t\! (Q?4-0'# #8C]Y@2?63/.S�zZ\dlnp~	
+4K:3,&	
+*E0.,*&"-:#!]v^hJLf2
+	=C5,$!Dhp��\Se�Z )&R����������������oZi��������Ͻ�����do����Ļ��3)IQMFC:40+)'%&$#"  !" "3F���������|zwqsxqihqomY;p���������������¿�������H\�¹����������������?:k5EY>=3(%-I��������f(%"!4���6<V@1##&#}���������������������������������H	IyV"$$""���9(ZAm����ź�������������������������������vA F&	Kl7����������������uDBE��y1�����Bh�����n#V��������&_�������������������sot����UK��{hnrwY5)7Zi`[���������������������="-WLFJL*59#/&!(**)*'"1UI,CSTWh�����`FH8\�zv{z{||zyy}|{{||}}xk4K��K4@,% Qo[!!!=NF7)$">v�}oDJ;Uy������~{zwspnmlkjigfe`\F1Xr{}|yxwwtsrromlllljhhhf^K$9qt[W]v����������s��������������v}������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������tN(^��x�w���~�[Ubs[RmWAHHFE=-=>;7401/,,;}�LZV! 'Q@50/'$	$7AWN<0;71.2w�{�zymq�	
+3G74,$	'B0/.*&",8$P�7:_f<
+	MM8+$"P�����sn;!)!N�������������������fg���������ɽ����ڝ��{���ļ��/-NRLHC:5/+)'%$##" !3J�������}{qmlec_d_esqnV8p���������������ÿ�������F]�ø����������������?:f8EN<<0&%,H��������T' $KMEG$:RV���1@UA1#"$$! &:����spuodiw�|��������������{zuro0	P�S&#&'���=*WV4#u����ĺ����������������½�������������sA'P#
+Ws4���uibcr�����nUH[u����k-������nO}����Z@'Au������*]�����gTQS]q�������yt������HH��{mw|a@+$.Icib�½������������������6!",k���},-;3="")(**)(NwW- EWUTH<:EX�]JE6`�{xz{{|{zzz||{{|}~|wn2K��F4;*!,^zc.MSNB% !Ax�zoFLA]}������~{zvtrnmmlihihdc_F4^tz}|{yxvssrrolklmkiiiie^L'=lh[Vay����������w��������������v�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������tN&]��z|z��oldNU[aSQhO@IFEC<-;<<7531/-,2fj;QE!	%E@6//(#	%=PeL;3<70.1W�a*)<jh	/?/.+&	!82.,)'"*5!J.:?	OQ;,$ P��C5dre# '#P��������������������a����������̲���Ӡ������Ļ��1,LRKDB:6/*'%&%#!!    !#5F���������������yrcjuroO7t���������������þ�������B\�������������������95c.EP>;0#'/F��������['[��²3������-ASA4 !$$!",R�վ�������������zmcaMDK[TRQI>;:>D
+LzL%&&3���$"&$}����ź������������������������������q;,O#
+
+Tk-!���~�}�|���i2=f�������^1����eUQ5-x�������U+5���)a�����XyxrpY������q~�������@O��hbflA00)-2Q`]���������������������/  ")( 37O<'"'((()'Azl6!BXTWj��Q:kRE>4^zzxy{zy||{y||}||}||zm2R��E.6(,\rf#ETWWJ#"$C{�}nDI>`|�������{zvtsnnmlkiifb`\C3]qz}||yvurqppolknmmkkige_L%?okZP]w����������w��������������n������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƾ�����������������������wQ'[��z|{wrfg\SUXZJKnQEIGEC?-;<><52/.-.6p�_vo% 	'NA9-4'#
+!6CRD;8D90,,/U�����]
+*:1-+&$85/.+)%+; G�~}}~�R	EM6*''u��gF����J"+#Q��������������������a������������º�Ь������Ż��3.LUOF@:51,*'&%$"     !'5H������������������|zvqU9t���������������ý�������A_�����������������~�9;g/HW?;/&&/E��������~#_ʽ��$|�����.<SB3"#$#!  'B���������������������������{xx~���5MtG%'&9���&U3&�����ź�������������������������������p8,I$
+K_,#������������uY@>Ou����R8��������������������~��|-f�����c����c����uy���������=R��8MY]6KhTP9GR[���������������������)"=:^{X4M?)!#())''#!DZH""GYTUOd���Y:?80bxz{z{|z|||||~~|}}}yvl/S��=25&@dmo"3][VVI+(*%D}�jDH<d}������~}zwvtqpnlkihfca[;6`r{}|yutqnnnolkmkjhjihe]H#DohZT`}����������s��������������l������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������;�����������������������xS&R��y|ztmjdZRS[eUQgOCGEFD:*:<>>720...3nl&<8 'KEJ23(#
+$77HB<3@8.+)+,$(46-#
+$61/*$	#82.-*'#.<!5R>N}e2)
+DI7,'&h����f_��A"*"M��������˵sVGKw�����p�����������������������û��1,QTOF?;51-,)&%"#"  !"'4J�������������������|xu];t���������������þ�������?e�������������������9>t1DX>8/$&/H�������۶R9Q�û�O=������*?U@0##$"!!5����������������������������������=
+QvH%'%9���<*.�����Ĺ����������������½�������������l4.G"
+R]((����������������jL3F��G>�����{{���������������n-h�����Zp�whk��qejqzrrnp����7\�s1UfSEjjXmHET_���������������������!&$/9vBB$)NJ*$')*''$ c��: IVSTSU�g{u<874d~yyz{{|}z{{}~}z|~|zwl-S��829%Dm��9*EIGGF;-*#D~�yhGI?e}�������}zwtrpppmjhhgdaZ?<_sz~}zywwtppqpnlmkjiiihc_F CkdZRc|���������~z��������������d������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̿�����������������������xT(Q�y|yrmjdYTTXYQRhVEGFHE9+<<>>630000.\�jzu( %PFJ+0("	3:PPD496.)'&#
+	&<2-)%	"2,--,&!-<!(bR.3
+	=C7-%,dskG'OD *!N��������ĕ����������~�����������������������Ľ��/-UTLF@:50,*'%%$$"!!#(5c������������������}|xv[:s���������������þ�������>f�������������������7=e3K[<;."&/E����������˘�xy���������)AOB1% #$"  !v�������zwsuwxyuttc`gcblqiegUTYTWR	XvH#('>�ʎ%D3:�����ĸ����������������½�������������l33C$
+_Z&/����������������������<I����cX`SD���P! '3��e2p������f]^p���~|wsttso����2Z�c+R]:F}vavL3Ng���������������������*(Hhi3L% AF?3%(*(((%IWTV^���iD7566f}yzz}|~}|}|~}||{}|zwd%V��774#":[_4%.464-*'&!E|�zcBH<g������|zwutqpomjkifc`Z:;hz|~~{zwuutqppnmnkkigghc^?$GwiZO_{���������}y��������������b�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������vR(N�xxyrmid[RPVWTTaWCJFFF;+==>=71/2@YQiwXeb 
+ =DE,/(#	1:KRE7850+(&!		&81-*%
+
+2+-,+&!.? =p��fauu�`	8A6-"$+"M��������������������������������������������ú��.-OTME@941-*('%$!  "#)< pƿ����������������}yzsW;u������������������������;d������~������������3Bc1G\:;*'.I��������ո_( )Y�������&AI?0# #$"! &nvljjgb`MGMROSY\VRKEEBBGDDIIID<DMKXjA &&E�Ă2X9�����Ĺ�������������������������������j.9A#	fR"6�����~x�����z��������6G����i���vu��������@J���^0t��������������������������0T�i#0A?Mwm^zO78a���������������������',8.&' :*&)()))$ CURVe�yM630/17h|{{{{{||z}z|}}{}~~zvb)Z��562"!1D<! &+04::93.--%L�{bFG>i������~}ywtrspniikjhe`X9;gv}~|zxxtsspopolmjmjjigd_9)GsgXQez���������{y��������������d�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yQ)K�{zwpkhc[SQ\`aphZAEEFA<.8=>:6202U��~wpqs"  	;A=/.)# 
+27BED:<7/*&$ 		&:1-*%
+2,-+(%#/B"Euh]SNNA1*
+	<D6+"% !+"O��������������������������������������������¹��--OTLE@;4/-)(&%" !"'9#o�������������������t{tT9t���������������¾�������8d�������������sm|���2D^.=P<8*'.H���������X# #&S������!@G?." ##$!#N��������|pie^WWVRE638:ANSY^`^_`eh[eo@#&%H��y;>�����ù�������������������������������g)7;	pT?¶���������^EIHKI?=O��+N����X���m}������c@g����U6t�����\]NZ]~���������������.P�Z(Lj^LncWpKVQh���������������������%)85dm?0!1((,1)%$(niC GURSLG;:71/,-5e{z{zz}{zz|{||||~~}yu\*[�/61 2Q{|oXEHW_`bcWPNJA(L��w]FD@l������|ywvsqoolkkhdd_U4?hw|~|zywurrpooomlllkhihc`6'JohXPf}���������w~��������������d������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¾�����������������������vS+K��xxyrlhd[TT[ajmm]DFFDA;+7==:62/1A�NF`Q\h  893--(# 
+	
+01?E?7?6/)&#		$:2.+%
+ 2-/-)'#.I!*6
+>C2(!"^�Y4"#+$Q�������Ͻ���������������������������������������,0PTMEB<3/,('%$"  ""(.q����������������������U5t���������������¾�������7d�������������ekxuqt*
+C`.?I;8+&.I�������ڌ(&%������%DM@0 #$$!>������ysaimmnlfozmjmhmjkprqilifeY	hi<"&&U��m2e"E����������������������¿�������������~e&;<]LHŶ�vy�����������cFf��� P����f8ABi�����z>K������R2t����~_yUocf��������������~+X�`?_S9Aud`o2?Yr���������������������# /.Fr9B6&')1Wl:'5GW$!GSSSOI@8520-.6j}x{z{}{z||}}|||}}yr\&\��,3,@_svqld`cb_\[XTOKF&P�w[DFAl�������{yvsoopnljghec_W4@ex}�}{zzusrooopllkjljggd\1+NufXTg���������t��������������_������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¾�����������������������vU,I��zy|slhbWTSQVXNPHFIGDA:+<=>:42/1D�VP�ohj( 130,-(# 
+
+
+.1=F;3=6/*(%	%<1.+$
+51/.+($+F<osqqtxz`
+EN5' B|��t]9Ei9","O���������vc�������������������������������������+0PTMEA:3.-*'%$!    !#+E"w��������������������z�\;x������������������������4c�·���z���y�����h]_ 
+Ab1@I75)%.K��������X,^y[o����{"DH@.! %%$!:��������}���~~�yvxvqlqiheWSYX\O
+ff:#&%Z��y&$9K����������������������ÿ�������������|b";<	eN T˹�������������RP�����T�������������a@n�������L6{����xu�d�}g��������������x(f�ihzUIe{cftLBTx��������������������|!.KkQ:@-82$'-2l�F'-rwR!GVRTRM=730-,,7n{wzx}}|z}}|}|{|{~}zuY#c��-2+;[rxtsmfhf`[XYWQMB$V��}W@ECr�������|{wutppkjjggge_Y6Agx|�|{ywutrrpoplmlljjjhd]4+NsdXQl~���������u���������������e�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������­����������������������wT/G��}z{rlgaVPSRQPJGFEHFFA<+<>=;31/09eE9xhK(140-.(#
+
+.5>C:/:8/*&$ 	&;.+'#	!2..-+($'7[�]Sa�>:ER
+	GP4'!M��in��y�J"*!Q���������÷¿�����������������������������������)0STME?94/.+'%$"!    !#*Cw�������������������|p�c;w���������������¿�������1e�־������qeo~��jjk%	Ab1@H32(
+&/K��������B/;³��-|����v;B<-  %&#7������������������|}yvneh_aa\YWSS@d[6#&%a�ȃ,PWT����������������������ÿ�������������}c!=2!hG^ȶ���~z������mPm������V����dagag���886,.0L��C=x����wu�t�yk��������������s)l�`b��~zwS_vpkf���������������������s!,'G;!4EO>)..D1%&1:?$DSQRQJ=70)'(*9n|wxy||{{||z~{z{{|}ztZ(k�x'0, 6Skkhcb__^[XVVTPJ?+U�zXGEFp��������|{yvtpmbjiifdb_Y4Dkw}~{zxvvurrpnnkkkkjhhgbZ2*PmaULn~���������q���������������c�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������«����������������������xV.C��zxyrlg`XSQTRLJFCCFGEA;.;<<;61/0A�fI�[^d .52-/(#!
+.7CC<352-)'$"	%8/)'#	!0//0-(#%1_�5�#/N	FQ1%!9r��uR/9p=")&M��������ɠqiYQX���������������������������������*3PRLE?:3/.*'%$##!!   !"$'+{�������������������ys�d=z������������������������0g�г���t�l_gwz���pnb	Dc-DJ72(%/L��������<(Wի`p�����t >C<-!$%"5��������||~}����|vzurotqlhe_[STO9iY3$'$h��e69R����ʿ����������������ÿ�������������{_>0)q@cǶ����������H@z�������\����acM.I���yy|rqqtu��:>u������������������������e*p�`Ehq`QiS]YPec�¹������������������d#0E'I:' (,1psR'*rm\%GVSQKB;6,$!!';n}u{z{}}{{{|}{{|{||xtT)h�z'2.!9Vuqke]ZZ\ZYXXYM@2'X��uSECGs��������}yvvspjPjifY`b_Y5Fiw}~}{yvsrpqqomllmligjgcY1-Vz`VSn����������p���������������g�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yY/A��{w{smf_XTPUSMIECDHIFB;-9:996201I�LU�1S�'
+171+.(# 	
+.6A?=143/*&#
+#6-&%$
+1.//.($%3CX3)	R\2'!!^�a? $vO )'R�������Ϸqt��������������������������������Ŀ���'3PQLD?930,)(%$"" ! !!# {�������������������wq~Z;x���������������Ŀ�������-g�Ŭ��wu�flx�����zhX
+L`/?J82(%/L��������B'YԨ"7�����s!FK=/"!$%">��������vurqrtxwwqkkihefhfe`Z[WVXA	k^2&(#q��L7K9U����ʿ�������������������������������|^@.	
+,f<nɸ�~�������A3?FB@BFj��e������Xe���������������,<m����������}��~�~������~yX*w�X(8@35aokG,>I�ü������������������\$!I[TXV<$2(&!!'-4OB6%+17#KVSPH@;91(# "<s~v}|z||{z{|}||z{|{zqR)i�w(1. 9[prjdTRZYZ\XWW=-%%^��wRDCFu��������}zwwtofMkjeQfbaW3Fkx}~}|zvsrpppjhlllkjigheS1/Vu^UVn����������p���������������n�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{X/=��~u{spg_YTRTTMGFEFHHDA;06=<:7412>op��.e�(	5G7+.*$!	
+-7=@>26<4*&$
+!3+%$#	5/2//*%&0#"(6EQ
+	NQ3'$"NU;=EKQWR!*!P�������б����������������������������������ÿ���$2NOKF?840)'(%"!!   !" {ĺ�����������������yqxT:w���������������Ŀ�������+g�ŭ�u��bo�����~ydRET2<F60''1Q��������` CΧ*s�����s"GfE/# $$"E��������{toosrwywrjgfeddeghc_a_^[A	(kY2&'#w��H9B4_����Ƚ�������������������������������x[<+
+
+8Z:wʸ��������������������l����fKi����������������# :mzz�������������~}���}vrU%~�R,,@46_ocB3:B�ƾ������������������L!!B\Q_c?)ESK7")/@�|F$,tx\$KSRQJ>97/)#!At|y{z{{|xx{}}|{|}}|xqR-m�n'80"6Wqqh_VTZYZ[WUM,'$&Z�wRICDy��������|zvvtrWHkfUKcb`V4Hmx~~}|yussqppb`fkkkjigdcU.2Ys^TNq����������s��������������yr�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{V.:���wzsmh`YRRTPLGDDEGHEB:05>:8522/19Vc=#Iq 
+5G8-1+%!
+3>HF@43@6*&$	!0+'$"72300*$%1A������zsQ	MO3'#%g������{�S"(O�������Ͼ�¾�������������������������������¿���)4SQJE@83/)((%#""! !" |�������������������zlrO9x������������������������,c�Ů��q��`f����ylbS@
+JZ.<A4/&'1N�������ڊ$'��7{������o CY?/"!$$"F���������xsx}|{�~�tolkgjlllgc`^\V=	-t_.$&%��A/0b����Ǽ����������������ÿ�������������yW<(
+Da9�˸��������������������	m���j%*67T��������������(M��qt|}{|�����������sxzxuq�e+��N,5E4'>]J/1@J�ý������������������C  ?6%:L>/#(+/Q5,$ 32)$JTQPM8.,(&#"%Dt|xzz{z|{wz{|{|{{{}yoQ+q�m%80#+Ound]XXYYZXXUI'$$$\�sJFCH{��������{ywuspNEjkGId^ZS/Hoy�~|xtqqqqmSSNXekjhhc_Y+4Zp[VQu����������p��������������xt�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������­����������������������zX-;���z|tnhbYRSRPNHDCEEGGD9.8=;8621,+.+(!'+)  
+184*/+% 
+
+1@SP@758/'$#	"2+)($1120-*$&3)9*LxJ
+	QM4)# +7.*$kP!)"O������������������������������������������������'3QSIC?830,('&#""! !"|�������������������}tfK@w���������������ÿ�������'e�ɭ�����rpz�mYTTPOA
+Q_5:=4/%%0N�������پ���Č���������j!DJ;0"$$!L���������y{�����yuqpnqstsld^[VN4
+3yZ-&&%���<GlDl����Ƚ����������������ÿ�������������wR <(
+Kp8�ʹ�������������������{
+p����mssrw�������������})P���mtuv{�����������|nqomy�k,��G)>QA(4G9(7FO�ñ������������������8" (!F/$:5@("(-@kA*#:��Q'HSQLC72//0-,-Gs{xz{{z{zz|{{{zy|{|xqM.p�o :-"&>h`XZUWWRSTTPD0(%'b��vHEBM|������~v{ywtrSNgeDNbI[W-Koz|zxusqm_H=BAK_kjggd^S)3ap]VVt����������l��������������zv�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������®����������������������yX0<���z|wngaZUSTPLFDEFFEGD<.9>=;60/,*+)&$%$"! 	063-/+% 
+
+	/9UcB954-'%# 
+"2+))' 
+!2/./,'!$2 9{X'0AF	IE3)$,2-/,.1<xN",%R��������Ǻ���������������������������������ý��� 3PTMD>851+''%%%"! !"!{�������������������xnBAx���������������ÿ�������&^�ƫ����rxg[cMJWY\mn	PU/9<3/$&0M��������dQUYVLKJAG�����d#CO:1! $"V�������{nhnz��|�z�xnnsvxwvtne]^ZS3
+	0w\)&&!#���5.+o����ǽ����������������þ�������������vK!>'	;U4�Ƿ�������������������p	v���yu�lVt�������������x,U���rfnp�������������eejo��f'��A8GMK50,35AGT�­������������������1 #HG2'&%',7iL(!&45#+LTRI>7333420.Cy|yz|z{zzz|{z{ywyz{voJ1p�l#;+!">iaXXUWWOMOPMIA<.'a�yBHEPz������{fJ]rwrpXZm\KXbL`Q-Lpy~}}{yvtqmf\=<JikjijjdaU+4dmYTSs����������m��������������yz�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yX09���vxuog`\URSPJBCEHGFFB;06=<:60.-*)(%%$"" 
+	0<8--*&!
+	
+.5BN@3/1-(&#
+"3,,*' 
+0,*+(%!%28���w|�~yV
+C=3'$$o��������S!+ M�������������������������������������������½��� 7QVMF>;61,(%%$"" !!!}�������������������}yuL@z�����������������������{$Y�ϫ����_m^Zhdr����r
+QU1=?5.# (.P��������A$!",�����`!?L=1!%"]�������zqlnvuihlfd`eiknkddf\WSMSQ1
+	2n^)&&(���+t����Ǽ����������������¾�������������tJ%?%2C/�Ĵ�������������������cp�|yR_q<4E������������{k.Z����^gu�������������s^c}��b&|�XDJORNB3AEEHU���������������������+!(6SI3(529+$*.6F3%!W��U,OVSK>33222/.+Eyzyz|||{|y|}|{zw{|zvnF7u�k 9, 6Ufd`YWVQPNNMNJD2.g�xBJCV|������j;KYSjroV[lLHSKO^M.Ln|~}{xvtqnrmK=Ndjkghhd_N&9gqYTYv����������l��������������v��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}[23���vvtohcZUSROKDDFHIGEC:/8>:860.,**('&%##  	+><0.+% 
+	
+/6?E;0,/,)'#		"4-.-( .*)+'$ &2&C;.()$ 
+	<;2)$'}�HEh�96|T")#R��������ĸ��������������������������������������6VTMF?:4/*'%%%#  |�������������������yvsJ;y�����������������������y)b�ӵ������������fX\>
+QS7?A5-#!(.T��������9) !3�����^"?G?1#$! ]��������zxuz|wppje`cdfc`^]d\ZUQRO.
+	6kX$&&!'���"y����Ƽ�������������������������������qB&;$	EU/����������������������Zs|wqInbPqH}����������}|f1\����[^���������������R^���U-��O1@TY[^RTPKFT���������������������"#"!*?huc$.IFK(%*3S�L%)<8*-NSRNB34211/+,Iy{yy{{{}{z|}}zzy{{{vmA=z�g 5) 2MGZ`]YUSRPNOOOJ42m�~tCI?Yx������uVK_KSbgTYlPOOAP[M-Ln|�|xwtsroiaRHM`ipjihe^O&<emXS\u����������h��������������q��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������°����������������������}\31���xwsnjc[USSPIGDDGHEDA9.7?964//.,+(('%#  	+970.-& 		04>H>1/1.,'#		#6/.-( 0**,*%#(4
+	6;5'$&\G1EZF$*$O�������ЬYVPJKT���������������������������¿����7RSKG@94/*(&$%$!    ~������������������}vtrO<z���������������ÿ������u%`��Ķ��������sRO@O]E
+KQ7AG2+" '2T��������B/!! ?�����]#AIB1"$#"!S��������x}��{wroif]]YX\bjjg`WSRLG'		@rS%&%+���!%����Ļ�������������������������������q=(:"
+@Q/!����������������������I!oypm5;DXfJx|������~{xtY-X����XQ���������������IZ���M1�s3?2H]]^YYSH;X���������������������*"-kxkE,0$'*3Q�F%I}i3,OUUUcR875431/L{|{wx{{}{y|}|yzyyz|uk8<w�]!0'
+/S:Qi]]][YWTRSRO73l�|nHHA[{�����s`nTkmV]MUT_ZVOOX_J/Pr{}{ywutrm^P^XKZ`mljfd_P">iiYMXx����������p��������������r��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ò����������������������|X2/���xztoid]USROJGFFHHEE?805;77610.-+('&$#	'55.-+%!		-2=D>1/40+&"	":0.,&	1--.+&$'2		/4/&"$O`[`Yadi�L$+#P�������ϸ���������������������������������¿����6PSKE?740)'&%$#! !z�����������������}ztqoL=z���������������ľ������xX���������[SgdRUW][QOK2>D/) &6U�������Խ�����C'������Z#>FA1!"##!U�}���{xtgjfnskbhe_XSJICGOPND9:>?:		FnJ$)&4���!'�����ź�������������������������������p8-?!BO/#����������������������=injjR3Fdfcswz|~|xvqlL+Z����[V���������������E]���>/�_0W4)O^WT\R41`���������������������*&=!+..5#(*-7Z5$2B8.PVUThU9:74452R~{|x{{{{{zz|{yyzyzyuk4A��\"2)	/V8.<Ja``^\[XWTQ91k�|mEG>^}�����z[J@YM:NEPUXZQOZT]G1To{~~|ywvrk`KCR\G_afjhec]L"@mjZR^y����������m��������������l��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yX21��}wzvohe^URSOIGEFGHFG@74398630/.,)'%&$" !
+(<:.'&#!
+	,4FG=007/+($ 	720-& 
+
+3210.)%'5
++2.&""f����zru�L$*$P������������������������������������������¾���|6SSKE?71.*'&&#"!  x�����������������}ytqoK<z���������������ÿ������x`����þ��\KYjddd__ZQPR/=;.'
+'6V�������������Ռj�����S&=HB0"#" d�����}zpdb`cggXTNJJ?;;BB?@>67;<>< OvI"'%9���)�����ĺ�������������������������������n5+7 FC.(����������������������9jefffbdghkoqtxzxzyurojf@+_����WO���������������=\���4=�?1d_VV7&$3MIMj���������������������)&@axP$<MPB"(*-KgI'/bT)1LUTW}�M988763S�zxy|}}yzz{|{z{yyzuh0?��Q3'/Z;;dbabb^YVQP83p�zjBF@`}�����wZa;IH72HDFTTHX[W]H1Tpz~}{zytpdJJGMTCY^_ghec]J'>ocXN[z����������r��������������h��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z^5)��yvwunie_WSRPIFEFIIFE?95299631-,,*'%%#"!! '==0&'$!
+	)0DB73/1-*&#!		42/,'!11110)$)8		+5/&#*U���n[l�1$(%P�������Ϲ����tn��������������������������������{8OQKD>60.+'&&#!   x�����������������{ysppJ9z���������������¿������x&r����¼��aY`dZ^cix�|MF1;8,%'/W�������̡�����[>�����R(=LA- !!"!f�������|rmkpmnkg^WRNNNV[ZWMFAEGB@!	QvG#$&<�ß.�����Ļ�������������������������������n3-0	D@,,����������������������5$rgccb^befjnrvy}{{wspidd<(c����IP���������v�����0X���4Ot!7ktxpZTHHZd`k���������������������&!*Z_]h;0*63(*.>H7'(^y]%0NSTNBBQC9:974S~{{{}}|}zz|z|{{zyxsi3G��I"/&6J9KYXa]]XUUSOJE39t�xi@G;a}�����jXT>\\N@>HKWN<^[Z^I0Uq{~||zwsqkTJMTTFY_[jhdd^M'?pdTQ\|����������v��������������e��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~\5&��yuxwoif`VSSRLCEFHIFFA8.189840,*+)(&$#$"!%570(($!
+)-:H?911.*%# 	54-*& ,/...(#'7
+
++5/'!$gvG:_xfC#+!P�������άYZ^YYd��������������������������������v8QQLG?70-+''%#""  ~����������������~yxrooG9}���������������ÿ������{+����Ŀ����m~��������YM584*" )3T������ؽ4,$%$;�����O)=S@) "##!Puvvzzwvp^`_]heffe[NOTYXVRNLC@GK==	NwF"%&F�Ț.�����ù�������������������������������l-.-	HE(9����������������������))xpiea[^bekt}�����xqhfl>.h����N[��������������|>d���5XfOSivqxturlnb_q��������������������'*7%81 %-'*1[o0& i��,1QSX^q�p<78761Yx}z{~|xx{{}}|{zyxuj0K��J#1$*AIBnjhg]VOKKLJHB3;u�xkBG;a�����vxejdLC>ADCSC9UWZ\F-Xr{�~{zvsqmYTZWXPX^^ffeb]I$FtbWR_}����������z��������������i��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������°����������������������|[7%��{xzwnif`WSSPJEEEHIDCC;02;974/,++*)&#"!!!$13,(&$ 
+,-2A?:10-)%" 	41+&#	+++-,(% &:
+091&!4L=558<FnC"+$O�������ξ��������������������������������ÿ����u8VTKGA82-)&&#"!!   u����������������{wqpqG>}���������������¿������}*�������������������SF575+$
+'6[������ټ4+!! X�����L,?N;(###!MxxuyupnbNYXMRSVNHDAGKFBDBIF;>77<7	Ld> '&O�ɑ6�����¸���������������Ŀ��������������f*.+
+F=(C·�������������������� $�xrldccho{���������yspv<+k����Ta���������ok����=s���+WrV=+-3Rerr_U;Ku��������������������x'//$$530<-'*-AR/&!<@45QUWe���rL:872Z|y|{|||{z}z~|{z{yytg*L��F$0$,JXgtnkld\XSOMJHA7>u�yjFG?g�����p^eopUgPKLE@HDA;BWZF1[q{~~}{wtoTYktXLVXX[agfa\D'HjaWKc~���������|{��������������d��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ð����������������������{]5(��}wwrnidaXRSQKFCEHHDDA810:963.,+*('&#" !!#24+*+'!
+
+	(*/574,-+($!
+0+&$"
+**,.-)$$@
+.62%"d��������@#- M����������ÿ�����������������������������þ����p9SRKG@93-+(&$#!  t���������������}~zvqprE>}���������������ÿ������v*���¾��������������}NB377,& '6[������ݳ$ ?������J,?G;( ### Lnoouqnhfed]_e```ZY^]XV]\UQPJKMKJB
+MV9!'%V�ʄ;�������������������������������������c&/*
+MB(D����������������������-�|yqlmu}�����������y|=2m����Oj��������xFH����C}���\����~~���pe[]|��������������������n)8{o/C9 />H4 ')3b|G&,���*8SUTFU^UO>7656\|y~|{{|}{z}z}|{|{z{vg-Q��D$2%2gwkihhkhbXSMMJIF6A�xhFI@h����eNd{fSrYWNHTPE=4@YYA2Yq{~}}{vsjQXjoW<RUXV\ggaY?)JraXLf����������z���������������f��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������±����������������������|^6&��{xuqnkfbXRUSKFCEFFFD>75296530/-(&()'%$#"! #/2,+,&!	(),470,.,)&"
+-,&$#	++,-+%"%= -51#ISGDO��tv?$-"N�������͸��������������������������������ÿ����p9PRMG?82-+)&$$" r��������������~|wunke@=|�����������������������q �������������������vNB285+$!'7_������ؿqfXTOENw�������C*EP9)"###>VZ`cbb_YZ^VZ\afeZ[fe^`bdd^WSTVUUI	Qd< &%X��|<����������������������ÿ�������������|b2) NA#N����������������������1���}zw{��������������z|76m����@f��������n3;w��>���v/ozmtkrvktu�~sl_�Ŀ������������������_&9G"-#3>,,"(+1I=-$$!#(ESTRQQNMIA8448a|w{}yzz}}{||}{z{{|zta)U��D$2%#Bfpcdfgihd_TPPMLL3={�u^>J<j����ySHxT9YRLM?bTKELN]ZA1\r|�|yvrhYKUVXQPW]YOdlbZ<$Ms`WKd}���������y���������������j���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������b7!}�zwwqomh`WPSOJFDEEGGC>933966410,('(*(&$"! -40)(#
+&-/793/.+)&#!
+--)&$	
++)*,+'"!$8(2." `����}ZbA%,"N�������ΰ��������������������������������¾����p8QRKE?93-*('#""o��������������~{wsmd^7?~�����������������������io�ľ���������������rQA242)!"&7c������������ǻ���������C)IP8'!#""6PMMQMPOJFF@HLNPQJMPKONWYXOPXYXYZO
+bd9!&&c��x@����������������������¿�������������}f4)	%K8!G������������{��������0���yy{~��������������x~23t����Ji��������b*<x��z>���u3='(!#!#3�Ŀ������������������T$%*,3xt-",]|XSRRQQQNG>616d|s|zz|}~|}{|}|z{z{wb,V��<&2#*i}�rjhgfc]XQNOOLJ1?�y_EF<l����yxs|oX6<GDKUdJWX^W^Z>6as}�}zxupbTU_`UT[e_XHUebY9)Lq_SRj}���������w���������������e��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������²�����������������������_9!��xtusqke^VQQNJGDDHGEA<:5188851/,)(()'&$#! +3.'%# 
+(+08=3..+*(#!
+	0,('# 	
+,+++,*$!"6	'.)!)/*'d�:%,#N��������þ�ƿ����������������������������¾����l<TOJE?73.)((%"  o���������������~{vqld]9@}���������������þ������f_�ļ���������������nSA030&"&7_�����������������������B*II7& !#$E[fmg[NLD>BGOMBADBCDHTSX[ZYYXYSRVNl^2"'$i��jF����������������������¾�������������z`1(	
+/R;!O�~�������tt����������x2���xsz��������������y|+1u����Qs�������uH)H���wE���u2>./$!   %;���������������������@#'*+/8D'"0\�PQPRPQOOKC936c{t{{{}~~|||{yx{|zzv_+U�z6,5$ !!Xxwj^YWQPPHJRNIH0C�y^HD<m���fot|phle\T@I]dBR\_U`[?;^s~�~yxwqSFT`\PNVZ]XCJ_aW7*Op_SKm~���������w���������������g������������������������������������������������������������������������������������μ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ò����������������������|^7 ~wsstqlf`VQPOJGFFHDEB<73199631-+*())'%"#"!!  -3/((#
+	%+0370,1-)&$	/.)&#+-+,+)'""5 
+
+&/+$,�JiR&/!I�������ϱwunbLO��������������������������þ����j<POKE@82.*('%#" m��������������}zvqmhc6A���������������ľ������`X������������������gR>250%$(7a�����������������������;,KG6( #$" "I����reQEE8=ENYQFC>?@A@DFHEDB@C<;KEf^3"'$q��gI����������������������þ�������������{`1'	1M9J�g]dmle_UCK����foi��@8�|�~px��������������|wp7w����Dq������v@*+S���pO���o/GH52    >�ſ������������������<+C9<B?<521346:78$)*.8wd*$2]QROQRQOOMJ>6:e{v{�|z}|{yyzz{|{z{xs`([��6(.$#$" BlxsgXYXOKI>=C?<:)E��uX@EAr���XamuGKR?BGRYC7>OQQT^Y;:bw��}{xumdi^VLAUWW\cFQh^V2+Sl^SNm���������x���������������p��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������õ����������������������}_7!��yusuqmg`XTTUJECEIJFD;851;::40.+*)(&%%###"" $2E:0-%	&,7E@8./+(%"
+,.+&#	
++++,+(%"".
+
+)3.& "u�|A75(,!L�������ϯqlw|����������������������������¾����e;RPKE?91.+(%#!!    n��������������~{ywrqpj-B����������������þ������_Z����������������}~cD9140&#'7c�����������»����������8+FA6( $$# X����������~j[XKIE<<7:A@<?<>ACFEDD3`U4!($y��`J����������������������¿�������������z[7%	8E5
+0J7@K\bemk`_���~[�����6@�u}�xs}�������������wyp!#:~����R�������a-8h���jU���j<ymdezrE',:2,>ZU�ǿ������������������61l�����m]NN]iokc1)*,9wg+#2`}TURTRPLONKD76i~x{}~}{{zyyz||{|{zt\*b�v0)," $#"6ah^]RTSHFI=78997*H�{[HE?s���dVMXA566<=j^F<STKVX_Z:>`w�~zyvoOSccQ8BS]cfOPb`V1)ToZQOo����������p��������������yn��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ĸ����������������������~a8��{|uuplg`XTTRLHDGHIGB<72,:9972.-*(('&&%#""!!"1N>4.$"
+%,7I@822+($!
+,+*'%	
+()(**&"#!.
+)9/'!!E\WRb�sk�U%,#N�����������������������������������������������c:SUNF@:3.+'%#!!!  q��������������yyxtpnb3A����������������ÿ������]\����������������ysSS;26/&"&5e�����������ü����������9-D=/#!%$#H�����������������kfeVSKJMJIKHGEC?0YP2#'#}��LT����ȿ����������������½�������������xT3"
+8E3	<jcsg^UU^hkX<JDRYp�po�=C�sqy{pu�������������rtj$:���}M��������o94i���iQ���^#C'._s,.Z\�¿������������������/0q����w^ONR]lnj],)*()1/'4bwQSSSU_j_GIG<8l|yy~{|}||{zy|||{}zyr[+b�v0),!!  (TWB21/577>A<97;8+N�|wUGF@u���V0<i_8-7D4aQL^cXY\]^X8:dt}�zzyteVieJ:=G_`bY_baW.-UmZQNp����������p��������������yo��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ŷ�����������������������`8x�|zuuqlgaXTTPLIEEFGEA;7/+:8982/,+))))($#"""#"1>:3.&"
+"*0:>914+)&# 
+--+(%		)*(*-(%$ -
+
+*3-&"#n����x���<$-"N�����������������������������������������������Y ;WRMF@:5/,($#"    q���������������|xvsolg<@����������������ÿ������V\���������������|toM]<24,&#&7k�����������ý����������8-E<0%"$$$L�������������������wbZWTSPMKGGGA:0'bO0%&$���C#Z����Ǿ����������������þ�������������{S0!:?29dZ]VIDHKV^[cK4auz�}vx1 I�xty~vw�������������utf%:z���yM������{_2(Is��jV||vc�Ġv+#B]mvc]RE9X�Ŀ������������������,.]y���tWEDJP[]UG()()(((&7 5fyUTX]{k_��ELCBm|zz{|z}z{zy}{{{||zpV*f�s0+,4^a@2.8>:897;=975*Q�}zWGDAv���xqfpaAQTUGLT>W[VTMca\7?jx~�|zxsm`e^TP?F]_`Vcd`Z,-[iZQRr����������s��������������vr��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ų�����������������������`<y�{wswukg`WRRPLHFHIHEA:71'8:862-,-**//&%%())5.!/=80.)"
+")/<A7.4-+'#	,/**$	
+))')+)&#-
+	%,*% '-ciCkG!%, P�������α�Ŀ�����������������������������������];UPMC>94.+*&!!p���������������|wtplie;@������������������������R[�������������|qfer\J8.2,%"%6j�����������½����������6+E?2$ #$$!X����������������sqkf^ZUQXVRIB=951-+je4&'"���@W����ǽ����������������ÿ�������������zQ2!	@?0	+MLLD;89<BOUahKLemsk_S"!N�|xuz}��������������yrc$B����rQ~�����~lI)$2\v~U[rvr@bδt3(#4l���]2,Y�ž������������������"'#(((*)(%"^�,7jrSTZi�LL`�JLHGp}yx}|||~{zxxz{||zzxqR%e�f()'Dgh<=9<B?:85<A811(R~uRHCBt�����uj`Q]Fb>;E7C\VDXb_W6@ix~}}{xtphme??EVW[^\bd`S.,ZgWORr����������s��������������up��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������õ�����������������������b;u�{xuxsmkbWRSRLGGGGGGB;:5%797752),((R�/'*Fdk}|* .@8.-)#
+	&009;3-/)&&"
+	,.+*#	
+++((*((%!0!
+
+&/-&!#v��u',!M�������ϱ��������������������������������¾����Y ?YTME@93.))'%! h���������������~vqlcai<C����������������¾������Q^�����������xtrjcm}[O606-###:h�����������ü����������2,KC2$ #$#""_����������������zxnc^YVTTUSJ@;82/*0km,%(  ���>^����ǽ����������������ý�������������wI!3C;."=B=620127<FMQMEMMPNG8M�}{qrz��������}|���vq`'Bz���nS|�����|f@(%Ho�SYywr>L��**7>(/m�]+);Z�ľ������������������2Qbf`]THEGGELPO9 )*()*'$;��8lqVT[q�DVc�OOMIu}{y}{zy|yzyzz{||zyxqV-j�i1*(GxrFE?;>?;;9<A=77,T��uTHDDx�����{e\Zz]ZaK7>chLP_e_T3Ahv~�}{ywvq^ltcNVdNU]Yac]O.0\jWQTs����������n��������������mu��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ö����������������������~c;w�}{wtrojbYSSSMHFGGGE?986);:9954?3#$Lh#',_s1/& -64,,&!
+	-9126/++*(%  
+	,-))#	
+)**,))(%, 	(/+$##"^i@mnE'+ N��������ɸwL=Gx��������������������������¿����R!<XULF@94/*)($#!!m��������������}whcS`cg7E������������������������O[���������}vv}�yz�TL504*!#&7o�����������ü����������3-I@3##"#" `����������������|sof^[UVYXXQA6;:6,1uX&''���Ej����ż����������������½�������������xD$0	NF0+@F<2+*,-/5CNHOICC?94'Q�zsYg��������~}��topZ)F|���eW�����u\=&"9_�Qb~y;U�&1`m+1o�Q)C_Z�ľ������������������#" ))+*+%%'R��(9mnRTW`�kVksOPNPx{||||zzz|{y}{yzyyywoR,k�h1/* QwnDB>;<?<99>DB>;*V��rKEADu������f`O�tJYVG7LG@_cd_W4Ejw~�}{xvup_blqbR`NS_Uef^S*/^iZRUu����������h��������������k|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ŷ����������������������{a<s�|{ttqpibYTSQLGEGHGFA:83(;<;<>{��]S��0)+QoL^W  ,42*'$!
+%.,.50+,,+("	))%# 
+
+/-),+,($3		%.*"#l��������G$) O�������ο�|���~��������������������������¾����K ?URKF@93,('&%#! m���������sq{plmnn���yd2F����������������½������LZ���������{}���|xuEK731(!#'9p�����������»����������//F>1"### U�����������������{ofceca]ZRGAE@=.
+*dL')%!���8q����ļ����������������½�������������v@%/	VI.1AQB0'&'(,3?D5DC8=00*"\�oqVq��}x|����wv|��bgsW(D����bXz�����nU8#*QoL]tvm5d�u!1tz)9x�K(Jo_�þ������������������'))+(('%0���)!;nlQUTK\mc[VQOPUvwz{}~|z}|zy{{zyyz{vpK/o�c-1("Pud@>:87:;99<@@>5&Y�~rJGDD{������[Q88BHKUJ=76C_dd_V2Cky|{xwtn\W`riFOMM[Vge^S*3\eWKYy����������k��������������gz��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ķ�����������������������b=o�yyssqmd_WSOOJEDFHGE>:7/(;<==L��]��r:"(+PwYYO'54*'$#	!+,/41+.,)(#,,% 	
+-0*+,,%!1 		&.)! X��R8N{�x!&*N�����������ľ����������������������������������I BURKD@82-)'&%!!n��������������������un5H������������������������H\�������������zvmm>D91/(!'<r�����������»���������{--K:-!#$#I���������������|{tr|sfkf_\VRVaR@1%	)fQ%!8$'%'���/um3m7v����ż�������������������������������r=#(
+;4%
+!)33)%&'(-4?91=A1:3.,#	 ]�ieV��}mp|���zus{��`cmM(:`kd^]QYZWX\VHC.".LOPPRM/s�g$=N*<s�H'6No���������������������+***(()%e��$>oiTVSRQPRQRQQQQxwx{}{z|zzz{z{{yyyvn@1s�^**&!Bq_<0/0-358<B@0$)(Z~yoHHBRy������ihc><48N\IHCObcc^U2Fmz~~}|zwwqbXLd`QTKGSVfdbR#5^gVOUw����������q��������������f��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������·�����������������������f?p�|{usplhaXQPPLFDGJEC>:82$8==@U�t,�Q1R#(+NrL:H)54**(#
+	 /..1.*-*(&!
+0/'# 
+
+.3-,-'"4"		'.("!"d��eM����B%,"Q�����������������������������������������������KAVUJD?93-'%&%"!n��������������������tw7G���������������¾�������J\����yy�������~wqll:F91.&!(7q�����������ɿ���������z+ ,H;,%%"J����������z{|{tri]ROIETMHP\]ZQ;?#	.gJ$+d7# &(-���+he9�N!����ú����������������½�������������q<$(
+A9#"'((&%&&)38;3/:?77@:5'
+!i�rrx��suv����otv��^_kH*@junnihrquwwn^G/!#G^^Y\^R�ћJ("2X}��vF'6s�¾�����������������z'$.$-++**)$"Aj��J$?ufYUSSRRPPQRSKWywu|{~{y|zzy{zy{yxwvm<5s�](-&! GqaJ=333369<EB,$%)g�}tGGCN�����l__FH93HRQLCVded_V0Koy~|{zxum^\`i^PJJNY\gf`N$5aeUNYw����������t��������������\|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ø����������������������~f?g�}{wtrohbZPPQLHGFGGD=872#9<<?N����wv�:%*ZuV`Y&-1+)% 
+++.30+*'&"
+-,(&#	
++0,-*'"1 
+	',*%"!_����iL��5$)!O�������θ��ǿ����������������������������������H CRPHD?83-)&%$"   o������������ƻ������{v7G������������������������G_����}qz�����|wolm;
+C5//&! )8z��ղ�������������ug���u' -D9+ $##F�}~}}|�xstvyyrnlfWGINHHDFF85=;14-eH"-e2!'(3���!;#?�F$����������}���������������������������m6%'
+	@6"('((&%'').20)(4CB>H@4"	#g�yu��zrw����}rp���\^nG-?ei`^[\cgf_]RC6,!)EPJA<*�ͥf:#)H^e_7"7|��������������������n&!C���L+****($'}�w9<ziWPSRRRQPQNNHTyxpzzz||{|{{yxx{ywvvj99t�X(1& $^}bJA6323568HD-"%*b�{tJHAM~�����xd`KWKPBLOkTXWcec`U,Moz��~{ywulbXYqeYJDG[]gc\J":bdME`x����������s�������������x`���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~d>c�}{vwspicYTSRMGDCEGE>893$:==>A��j[QMm5&+;Z_d<#).('$"
+ /205/)(&&"
+
+*,)%!		,0--)&" . 		*/+#.i�o63~@$+&L��������Ń[a\Rr��������������������������������A!@TPHD@70-*'$    t������������Ž��{���}t6D������������������������@]����y{�����}xrnk8 R3.-& (8}��ܛ.SY0" #!d��s)!0C9+!"#"C����������|��vqmcZLB=?=><;>7-*-,-	-jM!;X""(&6���)�_C�:,�����ù������������������������������l4&%J< 
+)$&*'%&&(-1.&&5GC@@O?#
+$o���|khf{���~�ii��}^Wj=,<MMGBMTZWPLG;1-)"!09;4*H�F-=#&8�Ŀ�����������������c#4xx\by6"0/---.)#N`WSRQSRRPONPIU�}rxz{|{{{{{zz{yyxxvm;:x�X%-$$VxjK</(*0136MG.#'+b~xqDFAX������zraHTKLJDRnHRXgfd`Q.Mnz�~|yxuf_`Ub^RKAATZgd^G>fgQL]�����������fu���������{xnb\z����y���������������������������������������Ͽ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ź�����������������������jAe�|zxxsoicWTRQNEEEFHE?964#:??>=A9+.+@o:=7.U[ZH#(,')%!
+	"15350()&&$ 
+),'% 
+(/.)+&!!. 
+)1,$"_�rooxy|�C$-N���������Ė{z����������������������������������=$DSPIC>92-)%#!  ~������������Ž��{���{/A���������������ÿ�������=^�����yvy|���~yupkj4"M01-%!%:~��ߛ0���sYENVVV6eP"n��n'/A5) """@}}��~vx�xqpceed]MGA>90),023-$%11:$	
+1bA P^("(%=���*�Y+j$*���������������������ÿ��������������j.&#	
+F8!
+,#(1.'&(*,/+$&1>799\\!&n����ogb\����maZg��s\Xi8+7><;@JKHB=?7.*(&#%-0/((=L),R��f]_RQ^w^}���������k��k�������U$ B~\R\gG'CRhIDjL+"8UVRRRQQRPOPOBO{|uyx{{{{{|{{yz{zxvtl6<y�W!*##QreL=2''8@FPJ1'./i�yn>E@T��tuvWbmo_VGDFUxiFB[gb_N2Us|��~{ywueYZKNHGNCBI]ff_@<eaQAEU]__hddb^][Vaafiijhikcgtrqjlorpur���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʸ�����������������������h@e�zzwvsojcZSQRNJDFGGC=962#6<=>=84,)*Ax:hn=t^UE"(.'&&"
+	"/6461++''%	))'$		.4+(*'! 
+/ 
+
+,1+%!]�xqnfe�E%) O��������˰��{re��������������������������������:!ERPLF?71-+'$#"!�������������ž���~r~�~5B���������������¿�������;^�����ztsuv{}{wrojo8
+%I1.+%!)>~��ޘ(o�q�f�����?Z}*m��m-0<3)#$#:vptwtikokce_]YQIB>>B==?KVj�}~}�mj7
+8mC DL=&#''F�ɧ2�REx'1����������}���������������������������l*)!		A2	*"%./,(%%%&#!#&/.15C.'t���nbljo���xbOVh��p\[j2)226>GHA:764,*(&$!#*,)!)=,@x���������k���������������������I&N��k��L%5;B:<C2";XVRTTQRRQPQSGW�~wwx}{z||{z{x{{yyxyj7A{�S# )"'a{oXUOK>AELPPSLE>3j�znBK<N���`cdJGctfdU?;=EF>;Kcb`O2Vu{��}}yvyvlqURT`gre^daTQ28gbO<<KPW[dhks{{|zzz|{z{z{{z}��|~�xzmro���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɸ�����������������������gC`�{ytsspja\RPQNJDFGGB;870%7>>?A_��LBi�C135^@DP"!(0%%$!!/5051+-)'#	''$!	/0(')&! 0 
+
+
+(.*$J0%+M�������Ͳi[cbis��������������������������������7$FTRKE?60-)%%$! {������ù۩��ƻ�������n-D���������������¾�������:\�����zvtstx{wtqlgoB
+%H1,,%!(9�����5k�e�[ftrFXBJH#p��i%3@4)$$#:srsvvwti]][YWSHA;;865:9B^k~�{vgb4=f?D.+;P7$*&L�ǟ7�P%J!6����ſ����}�����������Ŀ��������������h&&!		C.	1%/654/)&$$%&)1650%#''z��uW]lj���gRCKc��o`]j0%25:DC<401.*&'$%" )( !#'(&.Q9%,Xa9h�3%=]N�Ŀ������������������?&/sQOi$&,,**+*$$>UTSTSRQQPQPODV|~ywy|zy{|z|{xyzyxwxf8C~�R%" QwhSXYUSPNRVTP;1+-k�zoFOLQ��������qg[c[^Wabged`]I7czvqqmiged`b\UXXSONNKFDDB:JhgP8;\jotz{zzzrmlfeb]^YTPLHE?=8750/))*���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ÿ�����������������������jD^�{yutspib]URROHFFFGA9781$;>?>C��l���Z.%'V}`eF!*.'&&"	#22/3/)+(% 
+
+))%!		)-()*(#"!/
+	(-*$$+J�������ϯ��������������������������������������6$ESPLF@82,(%"!  ! ������ù�զ��Ƴ��������3H���������������¿�������7[�����}wuusx~zwtoo|<	*F0/,")8�����9IgzjZZRHJJ='!w��g(6=2( $&#Avtx}wzsr_RQNKKHC@C?66;=CJTSMRUWah2	:^8BVqbN,#)'W�ȘQ';_8���������������������ÿ��������������}g#) 	 D2
+'M&084<AHNMLF7:ORB>FU: 
+$���]Ualm���|eXQat��qk^n-$39;>81+,-)&$$$ !+*!! #%$#.r��0 0Eb��8.%J�ſ�����������������6$Cmub:(+**+(&"%>XVUURSTROQPNCW�~zwy{{{{|{{yyyy{xxve1F|�P '!!08+  6]ZYXQ5*(-i�|m@Q_Q}������������}vynoe`ZVN>BXWKHECGC@AA;>BBBDGEHLNPSX[Y\bP:3<@?:<975333/./02203.)+//210330235���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������´�����������������������iEZ�}{vstphc_WTSQIFEGFA:592";<<>F���l/B"03QjcS9!*/*''!
+"4/,1.('%# 
+	*+'$"	)/*(+)%"!4!
+	!-)!%,"O�������ζ�ÿ��������������������������¼�������3%EVPKE@82,'%$" !!""�Ǻ�����}����ǩ��������2D������������������������5c�����yttsu~�|xuty2	)B0/*#'8��ߕ?A[����bO�Dx��e)8=3) #%#CpppsleRHH?;ABC<581=3467<DKFMTZekp5	<X4F�r1$(&Y�̎8w:c�8����ƿ�����������ǟ���������������}�sxd!'$E4	0X7>EDDKOQUUODA`gW?7YU!
+���AScjv���qf]o~���siZh&"0551,('*)$"""!" !#"!##$E�÷�-2t��:&'R�ƾ������������������1%"'++))('"'?WTRSRTSSSQOL?W~{z{}|z{zz{zyxz|xxtg.A{�O ")""#%4UZP2!&();\a[VN:5<`w�|qBTH=Pdeiklkiidf`c[ZRQPLBDFEDOSSWRJFEEFEHFFIIJLLNOPSRRRSTTWY_PB6:>ABEGGHHKOMLMMLMNNNSRTXX[YZ\VXaY���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������µ�����������������������iDc��yuvpid`XRPNIEEHFA:887!8<>@H��j�~w�Zlkb_]ZU$#(.+($!"1-+10*'#" 
+%*%" *1.,-*#!!8"
+
+!-*"&/ L����������¾������������߼������;�ƿ����������2%CTPJF?72-(%$! " "$y�����Ӻw���ý��������|,@������������������������5 r�����|xvx����~}s*	*=0+&#  %<���ߎ17Ayyr��rtFF6y��c&;=4*!#%#=hkjmid]SMF=?BBEA?7=FR]x�{�������w0	>]8!"%*8*h�Ή$K ".@����ƾ�����������ǡ��«�����������t�hog'	1P7	<ec~xoZUSSVRNGGbqbF8??	&��t=Ydt���gcPgx���i`Zi#!01-&%%&'&#!   !$#!! !""C���@&+%3���)$*W�ſ������������������*"'-/-*'' +DWTSSSRTSRPNN@`z|~}z{|yzz{{{yxyzxwug/C}�T+8=1/43*&(*:Z@-7;579;=A;DMGF?:;<8<BDEJMNWa��~{iFUJNLD;8422.26205-52449>EHJMRRSVSRPPPOMQSRTSTRRNNLPQONLLHIHGLSRVX]`bbegfegijlihkomlojjkknmpmmlke���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¶�����������������������jCX���{wrnid]WRQOIDFHF?;862?>>@B��tpjjzJXQGGDOT #'+)%#!	!1/-11*($!
+%,$# '.*),(!!%F"
+"-)"&,L��������Ļ���������������¿���ɽ���ŷ����������.$DTPKG?72.*%"! " !#m������؊fpvvmXck~���}p)C������������������������5(t����������~qpy�{\"	+>/)'!#(9���٥\PF=)"'*%#%%y��^%9>5,$#'$9hhdgkkb[XXhka\\^_bakou����������x+
+?X7D?2?8HR>LPXe1p��u1=5 F����ž�����������ŭ�����������������xb(7H/5X]stwnghcd^BA6He[GQ_[ 
+-�wm5Ygy���nbaG\n��tOEQ_ +*'$$#$%"  "##!" "H��f&<;q��"58T�Ŀ������������������ !*.0.+))!.FXUTSSSTRQQON;`{z}}zz{zz|{zzyy{ywwse/I��`K^\TOJKHA@?EIJHIMOQY]fjkldfe^^_]_^[_dbhinnr��yyUZVZYWRPSPLKFHEEFACGFIGGLLMLKIFJIJGJFDAFJFHHFHKLIMMDPUQTUY]bhjotussusqtwwrqrxywvusruxwsqusttxuqs���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ķ�����������������������mFZ���}ytolg^WRQMFFHGF@;;81@A?ABKD'')4g>dq``a\L#%)+)%$"#30-21,*'# 	%.&%!	-,*)*&!"$<!	",*#$+I�����������������������������������������������,'DSPLF>72.)#"! ! !# �������ĎjoiaS`z�~zttt&G������������������������4(y��������}xmcbfq�oR 	,9.,(# "*9���������ľ������������X#7=6-# "&$5ad`dj}yZ_gx�������zzvvw|��������/
+G^4evY^MVfHRQge+s��eOiU_bI����ſ�������������������������������}Z&6>(	=Z^muztmkeha6B>FULUgff(.�_f5Vk~��]^cGPm��mKDOZ&##$$"$# "$$"# !$L��k&LS;��S=;Y���������������������$,-.-+))"'&7I[USSUUTRQPQM>c|{}~zz|y|}{zzyzzxxwte6]��lgnohaa_]_\\b]]`hedhnilqloonnosnpsorwrpnjljkn^SR^_LKJLNKLBAFLRNQND?GPTSOLSXWXVWTNTYabiggliomqvtuwtqmkmknsxyyyxvw{yw{{ttx}vrswxvuvsquwvrikorsrqpp���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɶ�����������������������lE Y���|vtnij_TTVSFHLIE@:;93>@?AIkO786Aj2=;,W`S3&,-)%&#
+"0-.52++(&$ 
+%,'%!
+-/)))($ !:#"-*"%-!G����������¿�����������������������������������)&ATOLD?70,(#""   !���������̡��������~qv�%F������������������������3%x�������{ysfb]_^][P,7.-'" ")<������������¼���������P&4;4-$#%#9[[S^isxudhp{����{v{|�������������,HY0]b`RHSeKKBE@'{��Z'{c_Pu2Q����ƽ�������������������������������xU)/0"	\}lcgomhc[`cJNJRVVqob`%
+0~be4Ru���kQYY8Ef��S:=HN %##""!"! !"!!!!#W��n#Y���a# <6`�þ������������������(,..-,*)#7T.""#).7BOZUTSSTSSUSRM>i~x}y{z{}~{zyyyyvxut['@��mmpqrmqqkpoeieikffhab[W\XRVMPMKLLGABKCFHICEDEABJ[_WWVY_cgcXY^cktngdbgmrurruxzwyxtrtssuvvwwxwuwvxyvvuuttsuvutuuuwuusrrmpsqmprrpnonlopomkiklnliihj���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ŷ�����������������������lH O����~��|��rfqjQ]iUE<::95?A;>D������|J$*@r98Y!&+3+'&# 
+),163,+(('!	%-%# 	
+*.**+)% #@%
+".+#&,K�����������������������������������������������''EUQLD=71,(%$#!  ���������ſ��������oclr&G������������������������++y������|xpcccc^YXP)5,,'-
+#*:�����������Ŀ����������Q'"481,& #$"CkoojcdVeLSqw�����������ewcvu��}{%ET-.))&)$#"%#(+#���SH�f�^xF]����Ž�������������������������������zT$*2-e�lbcggb^YafYUVWWbqnee#!4vcY>Z~���MANODPp��J07?E %"###   !  "#"!"""f��y#$INBL(#',i�ſ������������������,1/.+*)&"!#$$')-0;Oc_WTQQRQPRRPNVw}uyy|x{{}{{zz{yxxuua92FUXXYUNGDLJNNHJCLQLKLJEKSZYVYYYZ]^^_cflgmlmmsw{�|}ztszytutryvqommqswxurrsrtrsvxxxzyvwtvvutvutsqstsrsrtsrrrqsupqrrqnlonqrnljloqoonkglokkikmljheb_a`���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¶�����������������������kF N����|�����{owoe\PLE=8::4:=:;?drROMKZt9&4]HYp-#'70+'%!')-64+)&%$!	&.&!
+	),(+-*%"
+;"$.+$$'/!I����������������������������������������¿�����&(GUQKB:61+('%#"! "}�������ֺ���ü����yvoe!G������������������������-+y���������}yuqoic`]
+'3--(*��S��A�I_H[ltE&+<�����������Ŀ����������M("7:2-& $%#A[W]hic^f7OR9]8k>Fq{\���Ml[�xuzxvtDS,'("���IWp�giKb����ü�������������������������������wR$'6-]z^dghfc_]_``YTNThnkfe#2cNaa�����aQEGNn���cB3/*!&###"    "# !!?���n.,Y~H" .o�½�����������������z)(9143./.*'%*//35489::CDDHJJOVZ\VUQUUSY[\Z@[��uu~|zv|z~||{zyxwvvsaI[]a`\[XZkkgkhbebhjhghiipmkjkolonnqtooszxxuvursuux{}}zwuutqoprrsrsomnjkkswwtpijkqrrssrstvutststrrstssrrtsrssrsromlljlnoolhkoqponmmlmonnjklmnica\ZWXZ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mF J����z}���rsjphaMFGD;99:3:?<88:4+*,)-FXN%:SV7	$(10*&$ 
+'),35,)'%"%-(&$&	(.(),)$")<""
+#2/%7Q*8.'0#G�����������������������������������������������%(KVQKB:50,'&$""#" "z�������ո���ľ����~z~�$I������������������������*.s������~�{wuqpohac_	(1./)0ҩ���2d�y��v+��[%+@�����������������������M'"8:5.' #&$LpcdhjhfsO��R�W�LUeuG��~[eZzyvqrriGP-)(!���EV�ƀq8h����ü�������������������������������wN#&+*UgaijigfcbdRHCCFXggagn$6I=3TyxwpiQ6!&5g���Z2"&#"!!$"!!! F:.?NA#!"#0Fu��������������������z7.%&! !"$$%"%-3;CLNUWXV^\`bdabcegdigmlnqttutlmjiflkjklnqsOQ��so|{z{||~||{{zyywwu`I`dkkjmnnqqpqqmnopspononolkmmnnqppkmpporqpnpqpllijmoorsrtuvrpqpprvttrrokonrtuwyusqonmmpomlnllnonkkkmlkmnmlnnnmikmlonqqnmlnprqponlmpppmnmnmjgb]Y[[[XV���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������÷�����������������������nI#N����wss���jejh[HAFB<::;25?;;730/,*'+2mr#%$/5$$)'*)%
+'-021,)''"&		$-))% !_X8/#		(0,+/+&#XU;=+1 hC <)	";:(W�bX!'1$G����������¿�����������������������������������"'HXQJD;40-'%$#!"!!��������Ը���¾���}r��&L������������������������')k���~vnnnaae^_TWWe`
+(1//,*�Hw��T�k��R*xc9$+=�����������ÿ����������F%$==51'"#%$Elipmltoeg��������|yhy���zt�}zwwynHL) ('!���A;�X{ik' k����¼�������������������������������wK%&1+	MVYfgece`^\B/.6@M^]]kk'88Qj7,3:GWft|{y����}rn_8%&#!!  !$$$"!"!)K?..Bjh3"",HX?E��������������������pBSQOPTQUVWW[\`afmnkorqpovuxututwspoqrqpopqrsqoomnnnmmoopqqWP��yowzyz|zy{|{yzyyyvudVbhlmnnomnpqqrrrpproomoponpqppppqspnpqoqrqqrttuqppmomknnqsrsqqrroqqtrpppnoononnpqtttutwwutoooqoommkknljkmkgfhkmouppnljmprronppnlnmonlmonlg]]`ddfba]X���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȶ�����������������������sK#H�~||}�y��tkk_i\UMMDACC;5 4<9971.-,)(+4icBNM[n.
+I]W0..%!
+,45<5,)+)$!!!b>CE)	
+#1**)#��_&a	'0,-0+$" ��`r0��>-P=B(K��~x%/%D�����������������������������������������������*'HYQKC;3.,(%$$"#"  "�͹��������������y{r��&J������������������������%)l���}mqkbfibf`Waf_
+(2/-*"*8+%$#.$*<�����������þ����������E&$9>51(##$# ?SNbfgo���x���������|���}yw|xx{|�iDI' &&&���<plaa>$q����»���������������¾��������������tD%$	2+)�cBR\\]\WN?2+'/;CYSC_[6.V�GA`p}�����������}wrG%1.'%&&&&'())'(+++*-.-**'**17BVWA7<=:1.07<GKVUPKNT_^\ech_fc`dabhgljliieikkjmnkklnpqsssrrstutusttvwtsprtsrprtssrqsppppprroqppVX��vuyzz{{|{|{yzy{ywrbXcjmpoqrrtutsssrqpnqqsutrtssrpppopqoqqonpnopnpssssssvvtrnlmnqrrstrqkolrurorrqrpmnnmoqnrptrqututqsrqruwsqsrooonqolklkoooppnlnnmpoqnmopokheb_cgiijjhif���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ζ�����������������������nK#F�|}�����tsle`\YXTFEDC94 2<;974.,+*()0jn]f]^p2	L��i:,%$#W$-0!
+12/:71.+(%$% ��=!s
+"1--,&n���n
+	%.,,,*%#q���|. vz��K
+./#;I>��&0%C����������¿�����������������������������������6!GUQIB;4/,(&$#" !"�Դ���������ĳ��nmpt}��&J������������������������%$l�����}msuwtnrrigbW	'0.-(#!%(?�����������ý����������@% 9>5/'""#"Mg^_dffhnw����������wqmoononpz~\>>#!&#*���6BC3%s���������|�����������þ��������������u@'%
+2*Q��dKKMMNIB4.005?END;@C"',0DPjzquwsx��}}}����yqkiQ=CKJIIIJMMNQMPUPTRWYVY\[Z\_baaeeeO;9<49MYhlcdklmnnqpqsqqrrqrrssrsrsrqqppopqrqrrprutuuuuuutuvvwusswvttqsttsturstutrrqqqpqponnjTZ���{uyyz}{{zzzzzyyyxtc]eloqrststsrqqrrrstwwtrssrsqoopnnmmmoomlllmmkkknoprrtpqqsvuljlosrrstrnonssrsopqtursrollghikmniijlljjiijikihgjihhimonqnllqmlklorqnmnoojebdghjmlmmmjkj���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʵ�����������������������nN%G����}����wqhj_]ZUKFCA>93.:88752/+((*0h^$,#&1#	%&*,(%##)��,4]		,,,660.)%#"#���x�2.+)# U@L��	$,,,*)$"dKV��-!X%R�^/-">B"<m%1%F����������¾�����������������������������������>#IUQIA<5.+(&#$"!!#�ɴ���}���������npbp��}#F�������������Ŀ���������%*s����������|xwupjaT(2.,&"$%<�����������ü����������>&!7;3+&! $$Reikoigd[Z��������������xlfbc\fnuT93 !'& 5���5+|���Ľ����z�����������¾��������������v@0'=6#,LVTSODCEGCBEDLPRTZZZa[]ab[^^``][ZX[_`edgcdhhffhjillnpoonqpksrkjjkmmqmonssptnmnrtspqp]8/95ChloqllmnqrrrutuuusvttvxwvvwwvttuvtuvxxuvvvwvwvwwwvwxvvwuuuvssttvtttuurttvurtrsrqppqoomX^��|xzxz~|||{zyzyyxxsb^fnqrsqqqqqrrsstuwwvusqspqqljhigba`_aa]__dijmqokimmkntvwutuuuokhortrrsspomkprupponporsuttspqqstnqprmmnnmijlgjllnloomkknpplkmmoonlnolh`beilnmmmlljjkk���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ų�����������������������nN%F����~vodZRORPIIFGA<;701?;9630,)()+/_x=&!""
+
+$((%" !!���ud		
+&)+33-,*&!!k<tҡ	0-&%-YIJY
+%-**)&"!#Q\(Tn-"#L4R<&* *'%2&H�����������������������������������������������D$FTOH@;4-)'%###!"#����������������|y{���}$H�������������þ���������$1{����������~{ywumfY	*30-' $(?�����������ý����������@$"582+%!!$#bg]digjfiUc��������������uhdgiokqJ%7/#*,(5���1'��Ͽ������W�����������½��������������xF%4&)@akZ`J1'1:)""&#/27LQWY[]cedgiihjliiklnqrvstswttsrqpqrqsqqoqsststsstrstssstutppprsqnopqrsrrsuttvtsstutuuvtd5'-HiknnooortvttutuvwvvwwwxxywwxvtttuwxvuyxwwwvvwxwwwvwxusvwwuuwvtuwvttuvtuuxvuuuttsrsqponX[��|yxyz|{|{{yxzyyxws_Wdjnomossttuutwwutqspqpnkid_]YVVRPOQSRNNRQWZ]afkmnlgaifinursqptrnhjoqpqrssqomkjpqqqnmknommnnnomknookknkkklojmklmmkllmlkichlpnkkmomje``ehkonklligkkjk���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������õ�����������������������nL'A��}yutske]VRORKFDDFD98602=:763/+)((*+5>1$!##"&&% p:z�	
+%**44,*(&"&VU  q�	.*%#$/
+	$,(('&"!:-+#$
+$)&/$H�����������������������������������������������F#EWOF>83.+'&$#!!!   "�Ͽ�������������~��xz�y$E������������������������$/r������������{yuneN,62.&"%)C�����������ü����������7$!GO62) %"Jh]bklml}xnz�������������zwsrt{{uK-GK?D3,)$&'%#(,/,6BOXSw�r<00)+,10/009976t���������B:d������������������������ueREIE4+,----=LMV^cmhbbiobRTWO]eghgikklossrrssuuusuxvxyyzyyyxxvuttwxuvuutuwwvvuwxxwxvvwuvwwvtsttuvvuuvvuvxuvuvtttwxxxxvuuc;Iamoqprrqsrqttutwwywxzyz{yzxyxwxxwyyyxwyyxxxwwyywxxwwwuuvuwvvxvwvtutuvwuutvuutsrsrpqppooY`��|}{xzy}{|||zy{{xxxt^Rdkppruwvwvtssuvw|z{tqje]YTRSQQSTWWTSQRSSPRRPNT[aehjlnkjegksrrpqvoleennppqssrpjjjgmnoponnllnnmmonnnlkollkhghjiimllnkjiggkpllkkookfaaegimnlmjiea^cjhf���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������µ�����������������������uN&:|}zxtqmhb\WQPNJEBBEC<891-::9630+(&'**)'&"!""
+!%&%! *PK"qd
+	
+$*)31*&$# !B')
+,+%!
+#,&&''$# *#
+$)(0'I����������¾�����������������������������½����M#!BUME?83/*(%$#!! !!���ƻ�����������|yux��qC������������������������"(q������������|wrj^@+330&#!7�����������ü����������430OrM<. #&$##8dTPSitnhhqx�������������|}|wxymL-/1697B?ADCADJLSXemy~|�}siqlkjcggedejklmnkhe^bjqpy�xgbbamqvuv�������|~|uklrjii[^db_a__``aded`bdffigehiihigijilmqqsuutuwxxwxxxyxxyxzy{{{|{{|{|{xxy{{zzyyxyxyzzyzyyyzxyxwwwwvutvwxxvtuwxxywxxwuvvwyzy{z{zwvqmqqrttwztsrrsuuvxyzxxyxzzwxwwxwuuxywvwwxxvwwwwyzyxvuvvvvvvxvvxsuuuvuuvvvutvusstrsqonmkooSb�|y}|vzz||{{zyyyzyyxrZZknrqstuuvsoqrtvstmke]Z\ZYTSSSVSUSQRRQNNPQOPOOOKMN\cgjkmnmieitomgotrjefosnlnpppqnllieffgihihihhikjlkmmmmnlhjjkjgidcacjmkljceecc_[XY_^`_\ZWVQOQQOLLLP���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƶ�����������������������qR)?��{zvsog`ZTQRMKFCDEB?991!);;7432.)''(('&$"!%" %&& #=
+
+&++.-'&$"
+	,)" 
+%-)''&%"!'"
+	#*&5&L�����������������������������������������ý����J(DTLE>850+)'%%#$! ����������������������r-s��������������ĺ����}\D")e����~wsmaZTLGDMID1*-3<,$ #(4t�����������¼���������S?>;RGA71111,*(+-,4@>CDBGEKEOS[]flisz~����{}�}����wsrh_\dghhhedgnkmrwy}z�~~����~zzwvttxxvvwzxuwxxvttqqqsvwwvwvssqpjjmmnhnllmjhkmllmnoooonlkoplklllmmlmnoponooqpqoppqsuvuuyxz{{{{z{z||{{}{{|||{||{{}~}|}||~}~|{|||{|}|z{{y{||xxxvvttwxwuwwwwxwy|{xzzyyy{yz{z{||zyxuuvutux��xrssuxwwxywwxxyuyvwyyxwvwwwwwxxxxxwxxyyxwttuuwwwuwuvvtuuvwwyyvwvvwuttsrtrqrmkmnRk�ysx|y{|}|}|zyyz{{{yrTYjoqqqqstttsvuqnhc[XSSSVTZWWSQOKKONNJIGGGKKKJLOLLHHOZbfijjlkgakrmmjlsphcgnpplhkoosqpmjjgfcd^`]ZZZ\]XYZ\Z[VX[XSUSVSRWVSVPJONNNKFHJHHFHHHFGEABDA@@@@@E���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������з�����������������������oQ)<~�yzxtog_XSPROLEBDEB@:75!-;>9542-+*'''&##"$)$ 
+!%'% 
+(+),/('%#
+	+'"(4/)+('#  +$		!&%2 R�����������������������������������������¿����I5>WOG@961.,)''&&$""!!!"%/w���������������������r3.4BNSS^WYX\bgi[SF651-28@MV\TSXH0<01**)*-/.3/:AED>833+(&'"+GZgpurx�����������{tsslllrutqcUQNLLGNQJMPUY_ahrxxrhifdf`_Z``_fdcfgichibdhhkgkheknoqtwvyvxvvwvwwy{{}|}~}���~}}{yz}}}|||||||{{|{zzy{{||yzywwwvtuutqqtrsstqqrrrsrtuuvwsrstqpppopqppopqpqrrsrrrsruvvvvwxwzyz{zzz{{}|{}|}~~}|{}}}|}~~~|}}}~~|}~|}~{z{ywxxzxxxxxwwwxwvxvwxxux{xwxwzzyz{}}z{}}}{zyvvurppv��rtstwwwxxxwxxyuxvvvwwvtuyxxxyxvwyyxxxvwzwwvtuvvuwuwvutvyxxwxwvvvvutspprrnnpmmlDo�}qrzzz{|}}}{ywy|}yvqPZgijmortttrrqid^YTQQUUUPRQNMOONQNPOMHIHDCGFDDHKMMHEFLP\a`aafjc]Y_ea^]_d]XVUUWVVWXZZYXQVQPOQROONLMKJJJKJIIFGHGDCGHGFHDEEEBBDFDBDCBAAADD@ABA>>>===>=>@���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������͸�����������������������tQ(8~�zxtsng`YSPQNKEBCCD@;74#.;>=:60,,+'&')()$%0+!
+ ')&$"
+
+'*+/1*('$	'%!
+
+(=2,,+'# -'"	
+#&"0 R�������������������������������������������ŷ��DA9POG?8332--.*'((&#!!%'&13BP_i����������������������udlsuniZ\QKLLMPMQVZcgks|����~~}rjgecgkfdfinnqrpuw~yxqje]YSTONKKKNPIJLMRV]em}�������znhjjgjmpptvw}����������������~������������{{{yyz|zwvtrtrturqsrssttvwvz|}��~}~}~~|�~}~}��������}|}}{~~~~~~|~~}{{|}|zyyz{zyywxwvuvvuvxxwuvvvwvvxxyyyutwuttssrstrqssrsrstuttvvxwwxyz||{{{|{z{|{||{|}{z~}||~}}}{|{~|~}|}{{{z||{{zzzwvxxzzzwyzxxwywxuwwvwwx{yxwwxxyx|}}|~~}~~}zvwututszypqpqtstuvuvxxyvwvvwwwvwuwxxxyyxwzyxxxvzzwxwvwwwwuwzwyxy{wxwuusuvttttssqqqrrporEr��spy{y{{{|||zz{|}zxmTWfilikmonnfaZTRPRPONNLIGFGJMMNJMKJKIDBADCEDBBEHHHJHEFEIONOPOMPOMKKNJJHIKKIDDFGGIGDFIGEHHFEGGCGFCBCDDDCBBDCDCCEEFEDDEDDEFEAADB@@A@?@BCB?@@>=><==;>>==���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǹ�����������������������rQ(4{�{{uwogaXSQQOLFEDED@<93!0AF@=70-.,'%')+.&&0* ',,,'
+	'/112,($#	
+(&	'D4.+)%!%<($! (!&&(T�����������þ�����������������������������ƻ���NSRXYWXVVUWSRQQMOU^___ejqu}|||~}��������������������������������������������������������~��~��}�������������������{z���}|~��������������������������������������������������������������~}~}~~|||z}}yy{||||{|||~������������������~��������}~||}~~~}}}|~}}}~~|zyz{yyyzzswwxvvwxuvwwwxxxxxywxxxwuxwuuuvvuuutvvuuttvywxxxyyyzyyzx||||{z{z|}~{{|{z|||{|{{{}zz|{||{|{zyxz|zzzxxxyzx|{yxyzy{yzxwvxwxwwwywwwxyz{{}}}}��~��|zywwvw|zqrqsuttvvuvwxzyxwwzz|yyyyzz{{{{zyyyyyx{zxxwyyyxxwy}zyyzyvxxwuvvxyvuuutttsrrrqsJx��xnv|yz|}{}|zz|{{zvnQMQTXUQVSJHKJFHHGHIJIGGDDFIGEHFFHGGGGGFFGFGFGFGHGGJHIHHHHDHJHFIJGFGEBDE@CECACCDFFFDCFEDGGDBCCCDECBCCAFDCDDBCBBDGCEEFBDDEDCAABC@?AB>ACA?A?<>>;:<>:====���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¹�����������������������tT,/z���wzphaZRQUQPHFDEDA;;5-ANA>81//+'&&(+/(+,'!  %(58)'-/-0.)$$
+'' '9/*(%"$<*!%J�*$%/7M[q��������ĸ�����~qigq|����������������qpwulbYQ\pu|�������{vrvw}��������������������������}y}z������������������������������������������������������������������������������������������������������������������������������������������������������~~}}~~~~}~~~������������������������������~�}}}{}{}|~~{}~}||||{zyz{xyz{xxyxwwxxxyzzzzywxwxwxyzyyyxzzxyz{yzzzyy||}}{{{{~|}}}~~~~~}}}~~~z|||{|~}|}||{~|{}}{{}|||}}}||||||{{{}}|}|{|||}||z{zy{{zy{{z{{{|�������~}|{{yxz��yxxwzxxxxxxzz||zxy|{{|{{{|~|{||{zzzyy{}yzyzxxx{zyvyyyyy{}zz|xvwvuurqqsqooqrqqo]���~vs|y|}}z||z|{}{wlUPLOOPQOJILKLMMIKMLIILKKKKHKGIJIHHHHIHGGGEHHFGGGEFFEFFHFEEGIHGHGDFFCCDCBCEAAFFDFGEDDFECEDCCCEFBBDCDDACBDCCABBBEHADFFAADCCDC@AB?>@B>?BAA@<;<>;;<::<=;=���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¸�����������������������vS,0s���wtnfb\RQRQMHEDDDC;;7+:@=?=40.-*&&(,-()*&" $$&,+#/)+),+)%&$!'+$*3,$!"#!!  (7>KTUTQ0*:NXbmsw|WREBFH?KLIKNTZ`hv���������{z~zzskjikkmp����������~{}{wurpkiehmmty|�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~|~{{{|�}~�������������}������������~~~~}~}~~~~}||zz|{|{}}{}~||||{z|||||||z{zxz{{wyz{{{|}}|z{||{|{{||}}|}{{|}|{}~}}~~~}~~}~~~�~����}~~}~��~}��~~~}~~~��~~}}~}||}~}~~~~~~~~|~~}}~}}}}}}~����������~|~||zyx��|yz{{y{{zzy{~}{{z|||{{||{z{zyzyyyxyxzxz}yuyvv}}wxvtvwronqpkpomojjkkjjhhgfeedgm|���|mwzzz||}}z|}}|zvhTNLMLNOMKILJLMJHJKIGHJIHIJFGIHKHGFHIHFGHGGFGFGGHGGGEFGFEFFFEHJJHGGCCECBCDBCDDDDEECEDEFDBCCCADEBACBBB@@ACA@?B@CCDBEEFCCEDCBBABBBBBB@@@@B?=;;;;=:;<;;:;���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɸ�����������������������uT/3{��tjca\RPOPLGBAB@>:<6".376852/++)&%&'&$&('&"!"*"7*"'56<4$%$,*(#$'"#1APWT?*2,!'/6GWedY<85/-,))-1/.168CS]mrz���������kRJJE?CIIPOSX]iq{������������}y}����}�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~}~�����������������������}~~��~~~~~~~}|{}}}~~~�~~~�����~��~~}~�~}}~~}}~~}~�~}~~}|}|~~~}�}||}|}~~|||~}~~~~~~}~�����������������������~������~~}~����~~�}~~}~}~�~�����~|~�|xyxyzwvyzzzz�~~|{{xwxz}z}}{zvvwztvssstqmkmomjihf`_b`ZYTTVQPNPNLMMHOLGEGIEL]bbb`bbbbccdu���wz}styxz|z}|z{}|~~xkPNLJJLLJJIJHMMHHJLIIHIHHHIGGHGHGIHIGGGGGIHFGIHFIIGEGHIHCEEEDGHEEGECCDCBCB@@B@?BDCCCB@CCBADECEDDBCCBCABDCCABA@DCBEDCDCACEC?@BA@BDA?@??AB@>=;9;<;<<:9;<���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������й�����������������������uX3<q����wjd`^USNGGB@969<8@>7-5=;8;97/.)/,..0..1459ABGHJQQ]j]MLIKB==@<3699DCBMS]du{}�|paWLJB6..()+&-*+,+169?@CBOW]jx�����zmib[[\^fhjor{�����������~~z{{}z|}���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}~}������������������������������������������~���������������������~~}~}~��~}}{}~~}}�~|}||{{|}{{|{z{|{{||||zz|yxy{|}}{{{~}~~|}}}}~~~~~~~~}~��~��~|~}}~}{{}}}~|{|}~}~}|}|{{z|}{yy{yz{|}{{{yvuux~tprsqtttqqookjeiigfhb`]ZYV[WZVSSTTNPMKHDED@B>==>?:9998:<=;:;;:==?@AB@ABCAJ[db`aedcgijn|���pt}xoz{|{{}~{z{}}|wiROLKMLLKLJLLLJIIIIKJGHGGEGGGGGHFFHIFEEEEFFFEFFEGIECDGEDCEEEGGFEEGDDCDBCCBAABCBCEEECCCCCBBCCDDCDDBB@@ACBB@A@?ACAADCAEBACC@>@B@@BC>=>>?@@?<;:::9;<9:99;���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ι�����������������������xZ7#k�~zhiiga^[STRKRRVZRU]_bclvrtsspokokmpolnqtu{{��������������������������������������������~���|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}~}}}}~�~}~}||{z|{}{|}|||{|}}|}||y{{}||{|||}z|||{|~~}~}|}}|}|}||}|}||~}|{{}}~~||{}}{}||{zz{|}~yyzzzywvvtuvvvvuswturqpqonnppmmoolllkhec_`[]^\\^`YQLB=B?9(&7@C>><?<8DDIGDFFC>AB@=;>>;@<<:;<:;:989877::9:<>>A@@@?BAEL`hdddghhjjlt����zvz}swz|{|}|z{}}|uiQKHIKKJKLHKKHEGIGFJJFFGHEEFGEDHFFHGEFDFEEFFEEEDGGGCDGFDFHGEGGGEEFDCDDBDBBCBABB@CDEBBCCEEDAADDCCCABBA?BBA@@@AABACBCCCBDCA?@A?@BCC@>=>@?@><;:;::=>;::8:���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɸ�����������������������u\:)����}~{||�~}��~�~|�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~}|}{{|~}~}}}~|||}{|||}}~|{|{{|zzy{zz{{zzz{zzx||{{{|}��~}~}~|{|y{yyz{{zwzwvwxwyuusuurtsroqnnnkplnnmmlgfgkhihiegifbeff`ab_^fe_[df]Z`bXXWTRPTWVUUXRGC>6::3!4<>>>=>98DJEEEECA>==?<=<>>>;<<;9:98878876999<<=??=?@A??DNekghhiijijlr����vt|zty|}}}}}xzzvhTLJIKJHIIFIHGGHHGIJGDFEFFFFEDEFFGGDCFFFEEFEEECEHEFGEDCDEDECBCFDBCABAABCAABAA@BACDDBBCACDCBBDECBC@BBA?ABBA?ABAADC@CC@ACC@@A@?@A>??==??;>?=;::9:><:::7;���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ÿ�����������������������u_<%����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����~���~||~}}�~}�~��~~}||}}}}}}}}||{{|}}|}}}}���~~~}|~{|}yyw{}xwvvvwuvtpqtqqprrqnmpoklonkmoljlghdikfdehe^cffddf_bbd`\be^`_g_dfgabad``cc_^^^[ga_[_c\^__XWXTRSVVTSUYRFA:777139<<=;=;6BJDEEE@?>><<=<;==99:;87776766645778;::>>>>A@==DOfkhkjkmommnu�����|v||t||}~}}}zz~~zuePLKJJILJHGJHHIGGIIFHEFEEEEDDFEFFGECEDDDDDEDDBDFCADECBCEDCDBCCEEDEBCCCBCABBBCBCCDCCBACDCBBDCCECAC@AA@?>@A@?AB??DA@DB?BCB>?@@@@A@?>>?@>;=<;9;:79<98:::<���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¸�����������������������x`?)��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���~~�����~���~~����~}}�}~�~|~~������}}��}~~}�}|xyzxzxwwvwxyy~�||||y{ywxtrqspnnlokooooooqooponomkjlhhjgadihgfdihfghhefhgdfcccagfe_f_b`cbbacadbaca_b`aadcebcddaadb`ba_[_bb_`__`_b\YUUURPPUTRSSXPE?957411::;;;<96AGCCDDA@@?<;<;:=;:9::6577777767867:;;==><=@><>COflknkllpppqy����}~xz|w~}{}}~}|{~~zt_MKKGIKJGFFFEFGDDFECCCDBCCDBCEDEDDDECABCCEFECCDFDCDCBEEECDGDEDCCDEA@AD@@BB@AAACCDB@A?BD@>@B@BCBBBA?@@>?AA><>CABA@BDAABB@AAAAB?@@?>>>><<<;:99:9::8888:9���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȼ�����������������������y^A)������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~���~���~~�������~}�������������~~������~|{z~wswvspqnqrvmrqqqqpttvupsvvvvvuuvtuvtrsrqrrqqprsokpppknnnnnllknkkjlkmkiikjjlojiljfggfghihfdeccded`cicaed_bcc\^`c^^ac`^_d\`dd]`bb^_dc^deb\[^][ab\Z_]]^]bZWQRRSRRSSTUTWND>7663229;<9::75EHHGCDCB??><;<=<9<<:9888976677766688;<:<<<=<=@@Oknlollmqrqnv����ouxw|u||}~~|w~{u^LHFEHJHEFFFFEGEDDEDCDDCCDCDEFGFDEDFCABCCEGEDDDEEDBADEECDBFCCDDACEBAAB?@@>=?A?AAA@BB@CB??BBACCCCDBACC?@AA?@>AAB?ACBABB??@A>AA=>?<<<=;<=<8898::877867:7���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ϻ�����������������������zbD&�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�������������������|�������������������������������������������~|{~}~��{}{|{vyxvuuxuxuvotxwwwwuqmnonmoqpspnlprvwtptpwrtsttsssvtruvutttrssrjuvurqqomprqkoolgoqoimnmfmlkfhjlijmjfhiggjkjddigacffedhfcbebddib_\e__dfV[bc[_bc[]a_\^]f__bc\_`c`_ca]fbb\Z\^\^^^[^]\]\^^[SSUSQRTTRQQVOC>9731+08;;<:744BEHICBC@>>><::><9;;76776644556436555897:;;:;<?BPmpnmlllqstsw����srwv{yz{{}�{y}}zs^OIEEHFEDGGCEEGFDBDEFEDDCBCECDFFDCEEDCCBBDECBBBBED@ABCDBBCCBBBDCDCBBA@@BA??>>@A?AAAA@CBAABAACBCCB@@B?>@?>@A???@?@B@@A@>?@<=?>;;>=<;;<<=:98889987676777���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Լ�����������������������|cG)z��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~�~~~{{yyz{zxxzvvyywu{zuzvwwzxwvyusrussssswwxvvwy{yzuzxzw|x{yzyyvzxyz{ux{|vux{xu{}usv|sus{tquwwstxuortpptwsjowsmprqjprognoojoqlgnokflhlhjilggjiihhhijjlfehdffffeceeecfcgggadaedccc^aab\bbb__\___^babaabc]bbb``ad^a^\Z]]]Y``_]_`[\_]TTURQQSUURSVNC>:942*39=:;:765AGEDBCC@===<;;<<;<:899866233676557689:9;;;;=<?ASklnmmnqstsrw����|uvxx}z{{}~~�|z}}{p_JHFGHEDFEBCDEDEDCDDCBBDBACBACCDBDFCBDB@BCCDCBCCDCCDDCCBCBBCDBDDCBBABA@?=?@????>@A?=BCA@@>>AA?@CB<AA>=?>=??CA>@BA@>@@>?@>==>>><;;<:<><9797788677766754���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̼�����������������������wcE.���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���~�~�~~|~yy|{{||zxzxvsuxxqsuswvuzzyy{xwx{ywz|}zz~�w~��{}��~z}�}{}yx}~{}}|xz{{||}yx{~yz{�{zz|xvwyutz|sw|ztwxysswxvuvzqqryqovwuprurlqsrnswqmotqlrqroonlilnqpnmkmonmkkioljfikjjilgdiifghgdcefefffcbbfaacdcbdf``bb`aaa^^^`]]abZ]_`Z\_a]_bbW`ba]b`aY^]][Z^\Y``ZZ`^Z\_]UTRQQSTUSSSSLE@;853*29<=<:866DKECBCA>><<>?>=<<;;:;9887544687565787:<;9:;=:?DSnrrqorsusrrz����xuyx}y{{}}z}}|}{xo_IDFGGEFFDCFFDEDBDDCBCCCBBCDCDCC@DDA@BAABCABBA@BBBBCDD@@BAACCABC@BAA>@@???=>@A?=>CA@?@??A?@?@BAABAA@@@@>@@>?A>@A@==><==><<<;=>=:<<99<:7675555455456534���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ƚ�����������������������ycD,��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z|}||}z|}}|{}}yzyvswsmmlljkkd`^XR_SRUV`hnongmloiillcegiejlokloromqtvpuxxuy{{z}{y|�z}~�y|~�~z�||}}y|}{|}}}{z{}zz}|xzz}x{zywx}xvx{vwz{t|{yvzz{y}yyyxuxvuruvvstvuuvusrrsstqopplnorlprokmmjlomljmpkillidikhcgljfgkd`fieceidbeebcdgbbdcbbacabac^_^b`^^_]_`]Z_]a]``b]]_a_a_a[a^`be^`]a]b^Z\]Y_`[^`\]_^ZVTRQSQUTSPTTMD@<973&49=>;9974GIBDB@>?><:<><:::879:657775666654345799889;:9=BPrvttrpsuurt{����~ytx{{yz|~�}|~}}zp^JFFEDEGEEDDDCECBDCABA@?BCBCD??@??@AB?@A@>=>??@?@ABAAB?@A?ABBCBABDAAA@?@A?>>A@?ACAAA>>@BABA>@CA?A@>>@>;>?;;=>=>>=>=?====;<><=<::;;;:;98676653445588544���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ȼ�����������������������{dH%�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~||{~|�~~{v||wwz{yy|�z}�{}{�zx}��{~�}x��z��w~��z}��yw�{rpmc\]UT\]_^YVQMKJILHJOWcdlljelhgahijeihkhllmimorrostvvyyxwyz~}{x}z}�{���~}|�~||�~~|�||��}{|}}}}}y|{{z}{}z{w{y{uyz{yvz~vuwzxvtvvy{ttxxssvwotuyurrtplrurmnqmhpqngmpljopjglmjhjnjjmkhgljfcikgdghdcdgeffhdhhdccbd_cdcbe`a`c`a`c`]_b_][a`_\b^]]b^_b_[^cd]^_a^bcc[`c_W^`YU\_\]`_[\__VZXTUTQSTUUORTMG@<;:5' 289:99:75BECEC@=>>=<=<:;;:87985875445545655578889;::99<BTqtuuspstvtsx����wxwyzzx{{~���~}~|ymXCCDBBDECDBAAAC@ACA?@A@@AA@?@>??=>@BB@AAB@?@@@A@ABB@?AA>@@@@@CA@AA=?@=<?>>?A?==@@>?@=>>@?A@>?A>=?>>?@>;>><??>>=@>?>>=<<=<;=<<;9999;9557555655733575351���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ƽ�����������������������zeH*{�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}}~{yxx}||zqrnrtwywzy}}}~}y|}}|t{{{rwvxmkrvllquoiorslloulmtwonw}soy}}tx~�vz|�vv~��|�|��z��y�~�y���|}�tokfc]VWZ^][UQLKKKJJMOR\kjiljiglehfkbijmigkommnsposvwsxzyv|~|v|�z�~�~���~}�����}|~�}|�|{}z{~|||~}zz}�xx|~xy{}vwy~zvy|wuy|vuwyvttzuyyxstuwrtuusstsnrrqomqtonqomoromlnnllmlklmjknlkkmnjjjmgfhfghjgbegfdfhdbdfd`cda]cfb_di__cf_]`c__ad\Z_`\]^`WY^_X[__X[``\\^_\___V_a\Y\\YT\_Y[`][[][S_[WWQPQTSQPSVKFA;:97)!28:998757BGHFBB@?<<<<::;:::998696545455556646686:99788:=Vtuusqrssuqoy����vwwy{y|z{}��}~}xlWIEDCCBBCDCCBCDBCA@@@@@CC?@B@??@>@BB@?@@@????AA?@@?>==?=>@?=?B@?>=;?@>=>?=??<<=??>A@>A?@AA@?@A>?@=>??>?=<=??>>=<>>;<=;;>=;;;:9887885478655445633464330���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ƚ�����������������������zeK,v�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|�����k\Y\XUTYSMLX\\_]\ZZZQDI`�z���}��~|~��w~�~suuvkmlxokrunjmmqjlmsllptkquxqoxyyuyz~r~{�}y~��z���~��{����~�}��|~�yqnid`]ZXY]`\SQLIIHHJKJR`geikjefhhbgeicfimggkoijpsrqtxtru{zt|~}vz~�w}~�z}��{{}�}~|�}{}�||{z|xy|yw|~|{|{{uz}~yz{{yxy|zyyzxxx{x{xvwwwzxzxwtvstqvquwusppxtmloroqsnknrpilpokmljijlkgimfgjkjghhg_dhcaeieacdcaceb`ada_aea[^eb``b\``bXY^_Z^_cYZ^]Y]^^Y\^^Y[]^Z[]^[\^^Z[^_]c]\[YZ[YY\[\]Z[\YYY_YWURORSRQRUYKF@;9:6* 39;;78845AGGBAB@=:;=98::77::65573444233333545455776577:?Xyvurqqqtussy����vtwvyz}{z|||~}~~xlVHBCDBAABBBA@AAA??@@?>?@>=?B==A@:AA?>@@<??===??>@@>==??>?@??@A@A>>>?=>=>?>?==?>>?@?>@A>?@?@?@>>>>>==;:=;:<>>=<<=><;<=;9<=;98;::77874555554446643455220����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ziP;{��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mTSTRMMNOOPU[]__`^\^WQH\q���~{��}{���v��}t}t|mpnurowprmqiskpltlmoqkvsquyxryyyv|{�w{~�z|��~~���|���y���{~�{{zxrmfb^XY[\^]ZTNLKIJHGJNS`nhehjhfgghbggjiijlilmpnqqwuuuwxxwyz{}||y~|�z~w�z��z~�}��~}�}{~~~}yy|{}}~z{x�}}{|y{|{x{{�vwz{wxx|vux{uv{stuysrqwqptvrqrunlmsplnplknqihmnkimnhfgkhegighjjhfghgbcedeehfadfbabeccaccbabaccdbb`^_d]`]c\_`a_`[]\\[^W[]^X]]\Z\][Y\]^Z\_^]_ac\\\\V[[[Z]\YW[]ZZ^^WURRQRSSSSUVLDA:9:5)"6;=:99845AGEBABA?;;=::;879:966676643245344557656787898<BYzxwsrqrttrry����wqvuxz{zz{{|~}voQDADDBBA@?=?A>?@@=??><<==<?>;=AA>A?<=@A>@?==??@?@??A@>?=??>?>==?>>>>=<=<;<<;>@>>=?==>>=>><>?>;?@@?>><==>=:<>>;<?=<><:;=;:97689867654533543442135235210���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̿�����������������������~hSGp�����������������������������������������������������������������������������������������������������������������|���������������������������������������������������������������������������������������������������������������������������������������������������������������������������|[NRNFJNMJORX^^__]^[WREMt~��~~�����|��~y|t}rwrsswtnntulnksjjkqjjjsojrwsiw{{rw{rw|�qy��|z���|��z��z�zxvsmf_ZVZY[^^WSLIHHHHGKJXiifeghggihdhfigikljlmppootrstyvuw|yxy}z~~�y���y�~��~�~{~�~�|}�~|z�|w}�{v{}w|}}yz|�yz}}uxxztxz}vuzyuvx{tvxztww|sqqwsqotosrtnpqsjkmphlnnllonmoljmoplkjikjggfejgffggfgdbacfcceicbbdac`ca`cd_^aa^\\a_\]b[_`bX\``Z\\^YZ]_VZ[]T\^]VY^\UZ__YZb`Y]_]W\ZYRZ\WU\\YXX]VW[\WVSRPORRNPTSLB?;973&"4:=:88639DFCDDA?A=;;;;;9896767668730023454456545667875:?\zywvsruutqpx����|uwyxxyxz{}}}}{wmOCBDBBAA???AA?@C@?>>==>=>==>????>?<==<<<=>=;<=>=;<==>>;<><>=<=====>?=<>>=<=;>==?==<>?=<=?>>???@@@?>==<<>=9;<<:;=9;<979;:::9767655457534642342122032/00���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ƽ�����������������������~gSSb{����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������`OQPMLJHILOVZ\]]^^\[VJRh�������}���x�qtv}mppvolpsrgqgseknmfghqnjvurn{zxtwx}t{}}y��||����}�~�~���w�zsqlc_YTWZ[_\TPLJIHGEHHTYbokaglhagkibgjldimlhkmpoopsqpswttw}wuz~}x~�y��{��}~~�|{|�|}{|{|�}yy~|z}|yy~|w}}}u|z~y}y}xzx|x|z|zxvyxywyt{uwvwwxvtpttsouqwrrqvrqmnomnoonkpnjnnjgkmjgilgeljgfgmfefie`fe`^bec`dd`_cb`aab^^bc\]bbY^ab]\\a\_^_X\\^[[Z[YZ[ZV\Y\V_]\WZ[[W[[_\W^_[\\\X[ZYZ[\XZ\ZYYX\[][[WVTSRSQQRQRQGDA;961""28::85639GD?BC==@=:9<;89;9687764563/002533454334456756:B^zzvutssusppx����{xvyxy{xz{|}}~|wjNCACA@B@=<?>>>=>>=;:<<<;=?=<<<;<;:9==;<<;<;;;<<;;=;:<<>>;;===><;<>>>=<><<;<;;:<==?===<==<>?<=?<<==;<<;;<;:9:;89:9:;9889;:89897755566334442101100111/.0���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������û�����������������������|eXagW^���������p{�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rVQPJIJKJJMRV]^^`_^^ZQJ[y��~��{~���v�{�sus}lqowrnsovhrfqfnkifnjonlqmosyrsvxuzyyx}�z|�~x���{���w��xuspjb]WV[[[]^XQNLKHEGIKQ\cfkjbghhafffbihhakjmklkqqsqspstxvwyywzz}~{y}�{�|�~�~��||{�z}|�~{}}~|~~{}|{y{||}}|~{�}~z}~}y|y{z}y}xw{|suw{vuvysruwpouwnorspopsoilpljoqljjnjgmnghijgfhjffikfgcibfedda``b]bbabcba``aab`a_`ac]`b`[ba_][[]Z`Z\]^[\Z]ZY[ZXYZ_XZ\\YZZ[WZ\]Z_]YZ]_ZZ[][VY\YYZ]YVYYVVZ]XYVUTRQQPPSPRRFC?:760#%187776628FD@C@>?>::<<;9:988:87543543223531455542466648<?]yxutsqrturpw����yztxvzzxzz{}|~~~{uhG?@@???>===;<>==<:9;;:;<==<=:<<=<;<<<<<;;<==;<<<==;<=<=<:;<<==;<<=<<;9<;;9::;<:<;==;<>===>=<<>==?=<==<=<;99:;9;99:98;:8:85667454235333322110/1//1./000���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ļ�����������������������|gWdmOF�������Wu�����ķ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������q\OLJILKJMLNSY_``]^][XRUx{���{����|�z�{yt|uyttuuuotrqjnkrghhqhinolgpuujqwxpsw|ru|�zx|��p��w}��y�~wrnha\UTYZ[^_YRMKLJIHJQYkjlhfiffefgfeghjhkkpinlomrsuqsswwwz|zvx~y|�{~��|���|~��~|~�|}��|}}yy{|y|~{z|}z|~}x{{}z{}~wwwzxyz|vtyzruwzuuuxstuvontuopqroppqmjlqkillhikjjjkkikijjjgffhgefhebehb`edcabba]aea_bc`_aa^^_`^_^``_\`\\^\Y\\`WW\\WXZ^VVZ[WZ[[UXZZTWZXTZ\XU_b[X]_XZZ][UY]UVZ]XWXXUVYYVZURPMNPPQRQSRGA>9860#%398556529DDDC???=;<<;:;<87:;76443234233233436642354226:;]yxwvtqrsttsy���{xywwxxzxyz{{|~}ytiICA>??<=??==<==>=;;<>=<<<;;;;===::<<;<<::<<<<::<=<;==;:9<<<==<=>=<;;<;;:;;;;<<:=<;<<>=<>A><=>>?@><<<;=>;:7689998:9778669966653531244431221121113100122���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ϳ�����������������������{iSblND���������\x��������������Ƴ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|dWQMLMJJLKKNTY]]]^^]_VS`��}��{���{}|{ptx}tmuxvgqrsfjmrgihoiikjkloookqrvqstxsz{||}{�|y�y}|�{uslea[UWXY\]_ZQLLJHGGNV^bjikgciif`dhfcfkjcjjldlqpknsvnsuxuu{}{xx~{v}��z}��z}��w{�x~{|~�z{}zz{}yxy~|uz~|x{|y{|�x|yxyyyw{w{yyxywvvwuuuxvvswvxutrtqpnspopronorljjkhgjnhgjnifejgfhhdefhd`dgb`cd]]ca^^aa][bb^^bb^^_a\[]^]^^][][^Y[\]Y]\`SXZ\WZ[YTWXYXZZYVYYYXZXYWZYVY^\ZU[\YYX[[XV_WXYYWYXXWXYZX[UPNNMNOPRRUSIA>:760##499666638BCDD>??;;<:99897688554323111210212133225544659Bazyywutstttty���wswvwywyxxz{xz~�|yshKA==>>;<<<;<:::<:8:999;99::::;<:9;;:<=<<9:;<;::<;:;==:9;;:<<<<<<;;<99;;:99::::::;;:=>;;<>===<=>>;;:99;;997799:9899887777655444444324310212100/10//0.-1����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{fRdkHC���������Xw���ʿ������º�˻����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wi]RLKJKJKIIKLTZ\]^`^]WUct|���{���{zy}s}tyupvxwnqqskokmjkijimifnnmjknqmorvquwzuw{{y}{w||y}~tpj^^[SVYYZ_^[TQMKKJMOY`khiikgghiebceedhhfdlknippnmrtvsvvxvz|}zzz|y|��~��v~��y~~�y�~{�~�|~|}{~|}{{|zz}||}}|{|y}}�w}{~zyy}vxyzuwywtvxwstvwsuwvonqtmjqtkknrlimohgijefjkegglhfchgcdfcgeffbbfc`d``aa^_`a__`ba_ac_]__`^]]]]_]][^[b][X]Y^[\WZWYY[XZY[XZXZX[YXW\ZXWYZYXX_]WX][Y[ZXWXYVWVYVVY[TV[\WX[ZUTROPNNOPQTPF@=865/"&499865428BCCA?<><;<;:989888876444431321/002233225435659D`zzxxwtsvtrrx���|qvwwywzyxyzxz}}zyscE><<<=;;<<;;:;;;:9<99::9;<:8:;::;<99;<<;99;<;::<:::;:98:99;;:;:999:79;;99989:9:<;<;=?=><<>?>>?==;;::;:99<999::998886467555433233420120110000/00////../����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������«����������������������{hUdnLC���������]s���ȿ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pYPPMLIJJIIHLRY]\````^YXq�~�z�z|z�vux{vstyslolthhlpjgkpa_lmddjpqhhpumpvyquvzvx|zw||~w|usng[YZXZYY]]^YUTRNLLOV[gilfkgfhlfdbgb`gijhijojmmrmnousuvzvuv|{|~zy~��z���}�~��z��{}��|}~�||~z}~�{z|~zy|~{y{}|w{~|vx{�xyzxrvxwsvxtqquwrsvuppssonoqkimnkjlqjhllfjhjiijhgjhjjheghiedfgccfebacda_be^\_f]\`b_^``]Y^_\\^^Z]\]Z[Z]Y[[[QXZ[YZY[UYXYTUZZUUXZUYZXSXYXTSWWUX^ZUW^ZUXZXUXZUTTVSUWUSWWWUYZYUSPMMMPPPPPNE>:544- &39:753429CBA@?<=<;;;::888878942331211//0101111112145438Bayzywvustsrrz����vuxwyyzwvwyxyzzzwqZ>==;:==;;==;:<:99::9::999:879989:8899989889979898:89::98::::9<<;;<::;:;:899998;=;;>><<>;<<<;=>;;;989:879:8888::8668534656544324333232111010001--////0.���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȿ�����������������������|iWdoKK���������^v�����­�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������`PPNKIJLJGGLOSV[\_a_^\\gx���z���zxx�ony|umtuukpkshjkphfkndelhdejnnfjnrlrwuqxtyzxvxzyzx|yxrmd_ZVX\[Z\`^YSTSOMOSZ_fohihmddhkeacjfbimkgnmmhknqmpsxqux|suz�vvz~zx���w~��|���{~~�u��y|z{|�zz~}w|}�zz|ww|}zy{|{vz~{wyz~wzxxtwwwvwvtqttxruuursprpqnnmnlllmlmjllkhlhgfiefgjddijc`dgbacea^ce`^^`_]]^]]__Z[]b\\]^[X^\Z\\]Y\[[W[\ZYZ[ZTWWYY\UXTXWXUWWWTWWXRXXWWZVYTUVVVYYXWWZXWWYXVXZUUTTTWUSTVVXZYVVUSPLLMNPOORMD?;765,&477433109BA@>?==<:::9:8975665211211/..00/./120011133239@gyxuswuqttrr{���ruzxxyyvyxzwxxxvuq\?<<98<=::;;9:;989979:97899778878:678889989<:89:9;;89;:::;<:;;;99<:8;<889869:879;99<98;=9999:<;89879978::88789:97767644577544324213310110230111.././0-+���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������þ�����������������������|jVdkQQ���������[s�����Į�����ý���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}oZOOKLMJKGHIJNPRVZ_a`]]_x��x��||v}v{zyvtvrunslqkmhlmjiifkjdijlgklmjnotqpt{suyzxswzytvtrld^]ZZZ\[]``YURRNQSY[`gjmgiikccejc]fifbimkfmlkjmoqnrsvtwyyvz{~wz|}z|}y���~�}~|x�~|�}}|}{|z}}|x{~z}{}y|}}{{{{}{|||z|x{y{vxwyvwxztrtwsuqtrrrsmnqqmjmnlklmghknhfiibbafcdggddfg_`bfcbcgdb`b_^]_``^]]`_]^^[\\][\[`^[[\\^[][[[][[Y\[YV\VYZZSVV[VXWWUXX[VWVXVVXVRUVUTWXXTWXVTVXWWWYVVVWSSSWTTWYVYZXTUTQLHINNNOMNKB==944+&46542210<CAB???>;;:;:;;<98974222311///10/10011112200327Bi{ywturotstu}���utyxvyzyxyxvuwwuskY<;;:8;;;;;8::878998;;9899789897:998779:88;::99::;;9:;7678:88::9::8999787868788:;;<:9<<;:;;:;<;;;78:88:;9988799556554434532134210003//10./0.//0//.-/.++���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ÿ�����������������������~jSemSS���������^s�ľſƲ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������n\URNLLKHGGHIHHJNY\a_b[Zp���z���wy{�ytz{ultstklmreilqgbkphahlkcinmfkoqjnszkovyuqtwyqrrlc_\ZZ[ZZ[a^WVUURRY\^bgmklhmkkeedidceffhkhhjljkmnlortruvxtux|{zz|z|��}~��~���|~�y}~y~�|}}�zz|�zy{�{{{}{yz~}yy{{y}|w{~|wyxzsxxztsv{sqtunppupnqrkknokjklklmjgjilhhgeegdfefceefdebdcddddccc^[_`]Z_b\[\aXW^`ZY^][Z[^YX[[VY[]XXY[UWWZVVXYTUXXQUVVQUXVRVXVRVWWSTVUQUVVTVWVRTWSPVXWRTWSTSUQWSVTTUXVUTUTTSQKBFOOONMPMC=;624*'3555331/=CBB?=><89:;899:779632320/0..//.-/0//1101001225Fl|xvsqqosqrt|���~yuwwwxyzwxvxvvwvpnY?<;9;<<;<99:9889999:9978867777768:75677678777779::99997877789:::89:9:9988888:<;;<;:<<;<<;::<:9:;8887677689669845655544552232232200322200//00-/10./.,*+�������������������������������������������������������������������������������������������������������������Ӽ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������­����������������������mTaiXR���������_o���ʾȷ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~|bZWSPMIGIHFECFHMNT[`^ZWi��y~��rux}usy{sfvutfilqdhmqddllhbfgifecifkkqloovovsvtrwuwqoh`]]][UX^^]_YRTRTVU]_`jmqfklkgfefdefgddikmdgjlilopjosvqsvynty}yyz~xyz��y{�x��x|�u{~}w~~w||�xy{ux{�y|yyyxz}zxwz|y{}{yz{{vwyzrvwxtttusrtqosopqqnonoikknjlmojfipikijfegicceg^adf`bac^_ac^\aa\[__YXZ`YYY^YX[\UYZZYXZ\YTXXUYZZUWYZUVWYUUSWSVUZPVWURTUUUUVTTVVUTUUUUVSTWTVVVUVUVVVVRSUTUSURXSTTUTUWTRVUURQJBFNMMKMPLD<:411)(1554422/9A@@?>><8998679966754331//0.././11..01112112339Kn{yurrrmsrss{���|xtuvvyyywwxwvwwvqnX?99:;:9898987789777656566664466678755766777767776789986866899:977897799768899:8::98998<:87899::8798777557756884456742433442122111/012100/-/.-/0.-/,+**�������������������������������������������������������������������������������������������������������������˼�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ư�����������������������lUbiZT�~�������`m���Ͼº�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�������ze][VOMHIGEECEHKIKRXZ`_]i|}~z�uxx{vxzztlzsvklhojmnogkjhiffajifbigmemmsnoqwrqtvvspnd`ZY[[[Z\a[XVRQRQV\\^fcnmqbkmjefefdgefdcgkmdikkjoopmqsustvxqz{}|{z~{{z|}��}~��{|{�s|�x�t~|�{|y}y}z�z~yyz|}|{|yw}}{y}}{{{vzwxuwuuwvqrtsonqsinqpikmmeekoggjjfdejghhf``de]add]aac_b`bY^`a[^b][]^\YZU\YZX\[[YYY\ZXXYWXWXWXXYWYYXVVWVUVVVRWWWSWWWSVWVTUUTTSWURTUSQWUTQSVSRSUQRVVTQUVQPQUQRRSOQSTRSVVQSTVXRF@FNLMOOPJB<83351"(2661201-<DB@?@=;::778:;966666531.../10..01/./1112101438Im~ztprpmsrst{���{xuuvvxyywvxwxxvtniW>99;;:97999:88977985566777755666765555555677467755677657567699877897886667898999999899<88879::97789996666467775446633324421122011/0121//..//.-.-,,)*)'����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȯ�����������������������lTcgZU�}�������^q���ʹ��ö�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|�������trb`XSKGGEEEDEGFGIOU]`^^Zv�x�{{wxx�wsvxynuntenmrhjipgcjld^kjhafhnaglrgjmvqlrqopkb^\ZYZY\]^^ZSPONOUY]`fpmpmlinjhdhdfgicfhhhikkljnmqorqrqwvvuwt{xz|}xz|�|~��}}�|~}|z�z~w�{�||�{}|yzy}x||~vyz{yyz~zu||{wz|zxyvxuwwwstwunpqqkppogmonfllpfeinehhhdeegcfdebcacadbcbdabac__\b\__a_\\^]Z[[WZ\_XZ\_VVZZUXWYVWWXWVWXVUWVTTTUSUVWSURUPTUSOVXURSUQNUVTPRTRQUUQPSUSQSVSRUTQQUULPRSNPQTORRSQTUYQQU\ivT7BMMMOOPJ@<735IbR0/474010.,=CA>>@::;:67877986364210.+-/0--.....-.///./0116Fp{wsprpmqqqty���}xtuvvwxvvuwuvttrmfV?9::9:989887888677655744556675545344355434643665466556655666998788989668868988988::8879888688778878985676455566566534445433221111221120./0/00/-+))(*&#����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������°�����������������������kT`i\R{��������ao���ķƹ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rd]ZSJHEEGECCEFFILRZac[^kr�y}stx}souxvd}lzgmnrbhepdclmcaihe\gfkbfjlgjkwolppqkd][ZY[\]_a[WQOLLMOY]elhoqsfiklefilecjnfehmjejnkimppmsstpvwzruv{wxy}sx{�y~���x|��y{{}w}z}x�}}wz}�vzz~yzz|x||}yzy{xyz~{z{z|yy{ywxvxtvwvststopppmrmpkonmjplnjkililgfgieddj`edd`bbd`bce_^`a^\\a[[]`ZYX]\XYZUVY[VWZ[QUYXSVWWSVWYVTUVUTUVTTRSRUTVSSRVPUSOQUTSRURPPSTSSRSSRUTRPSUTSSUSUSSSTSTQSTUSUPUPRQWUUUWSSU^t��`HLONNLMG?<527Phs]A5630//..:A>>>?;;:9787669852631110////,..--./...-.../017Bmsqprromrppsu���yvttvvwwuuuuuvqrqmgU>8977877776687546644484354334443334334534454465545557875444477767666546755576797688799:8998786797688666867655555743543333112//21010.//--.....-+)((''%"���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ž������������������������lR_k\Qy��������an�ſ��͵���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z���~���{{}g_^VIGCFEDDDDEEGKOZ^`ZU^|z}sqwytsvvtj}kwlpmmgpglgkleghfcbbiageheghmjpmmroic^YZ[X\^``YSQMKJLQV^dpsfkpsdhjpfgiibcilgdhlhfklijmnmlsstptr{twuzv{y~z}z~z����|~}~{{z{z�y~x�z{x{{x{}~z|z|y}{|{}xyy|{{zyvz}yxy{xrwwwttutnrrpmnptjjloklnmfhijgggjdbfidbcf[bab`acb]``_]_``]_]_]^\[X[VZZ[YXVXXZWZXXVYYVTWUUUYVWUTTUVVWVUURRRUSTRTQUUWNOTSOPSUQPRQPRVRPSUROPSSQRUQQTXSQUTPRQXVRSVMQOROTUSRUTPPd}���jKKLKJJE?<317Qep{lE620.0/0<@>@??=<::9:878:7435300//..-.,/,++.....-../.004Ccnoqpnmlroosx���ywsvxvwwvxwwvutsomgW;:;9788987689736675555553655334423432443334445443325664343446555546544564567777557988988899767875674575555224443321421211011/120/0../0.-///0.+())&%$""���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������þ������������������������nX_lZU|��������_l�ö��ǵ��ú�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���{��|���y�{kc\RJEEDCEDDFEFGFNV_`]W^n{uzvvtzxsst|issvgjovggesjahkgacehacch_cfjfglopga^YY\[\^_\XRNLJKLQX_jlnsljjqhijojjifdijmkhikmlkjjoomosvuvuwsyyxsyz~z}~�}}}�}���~{{~}yw|�w~z�y�{}{}}}{||�w{{|wy{|swy{zxy~ywv{wsy|vovwvnqsrlppnhkmnhglogjkjgggiffegbdeh^dbc\b`bbba^]a`__a`a^_]^__\\\]WXZ\WVX[VWV[WWX[TTSUSTVXSRSRPRVVOOTUPQSUQRRRNSTQKPURMQTRKNUQORVOMRRPLOSPOPTPORUQOSUNQOSQQQOMOOOMSTQPVRPTd�����wTEJJKG?:528M[covoL52/10/;@AA><>=:86766864333101..-,,.-.,-,,,,--,-/-.101=JLYplmlmport|���|xsvvuvvuuyxsttrqlfU<99::89:97786447664442542353332322222210122212233446652343364445556435645445854456898657866769954653554455244442233222111111233110/000/./.00,)'''$"#!!�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������oW^l_V���������^k�ž��÷·���Ʋ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�~���z��~zr^WQJEBCDDEEEFFGKNT^\[WVfr�rnr~wonx{irusdmmoajdpjghjfb`]iaa]f^efihkple\\[[Y]_]]]XQMLKLPU[aionjqsjfonnfjmjhefkimmlgflnlhlsqnnsurttvrvxzvwz|v|y�y~�z~�y{~|yz|y}}~u~w|x~{{v}{|w}y|xyz~uzxyxzx{{wwxxvxzwsxuvqrrsnplmilknklknllhkkhghghedcgdfagad`c_acd][]b]^_a\\]]XY[\YZ[YOVYZRUWWSUTVTTUWQRQUSURROSRRPRRQNPTSPQRROQSROQSPNOSPNQSPKOQQOPSOOQRPPQPQQPRRRQQONQSSTNSUSLNOQPPPSQRTTORXg�������`HJNH>9748BNW`lsqM8111.<??A<:==98885556543222.....,-,.//-+--,--//-01/3;E@A^dilmqqru~���xxtuvvwwtuvwsttqokdP:667767876663455442111322221200233223211112313233454223332243244334322234334655434766677877767445664765544441340221.121/22.121/12..../.-.//,(((&%#!! ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������į�����������������������nU^m_Q���������`o�������ͺ���Ȯ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����������}~��~}��{~�ybWOFBBBAEGGFGGGGISX\[YX`yvqtyrqouxpxrpjrlngqlklifefeeZpdc]gci^kpnkc_Z[[ZY\^YYVSMLIILW]`ilrljrqkfmkl`hnmgghlgkmlfhlqkgmsqoutwrtuyvvx{xywzy�w}{�{�y��|}}{y}|~z{z~x�w||~z{w~z{y{wz{|y{xzxxyxz}{zuzzxwvwwttsqossrkllmjmmmdlmkgjjjdbehcabdZ`ab\_^b]^__\[]`[^[\[][\Y\Z[XXZYTWVWQVVWTTRSVTRXUVQUTVRSSURTPRQQRSPPQSPORRPMOOOPQQPOQQPONPNOONOOPNLQROLPPNNORPMOPPPTUMNQPKKOLMNONLSSNLQYk��������kPJB<8547:CKU`itvU820/?B>??<<=;;998767654431//.,-,-,-..-+--+-.-.//--29C?BOVeqnqrqr~���vxtvwwwxttwxttsroldQ;7888887688656434531442342112101310110/0011212013321./1232111322322432476444556666657656666654344456654212220231430.100010.12//02/..-0....,*))'""%"!����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǯ�����������������������lW_l_R���������^l���ƿ��ƹ���ì����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���}������}���z}��{}��|��phUIDB@ADHKIKGEDGJOW[\[VYjzupqwunsuvlnrviomulegngaegfZifa[bbggnqja]\[^\\\ZTRRRPMKKOV\dfkkknopmmklirinmmkjjnjmlkkomllnqpqrtsxvvtxvww{z|vz~�uz~�wz��xx�~y|}}z}�}x|}�w{�z{y|txz{vw|~vwy|xux~uvzzwrwzxsxvtqrrpmpomfklogjjjdkiighfj`befbc_`Za_`]_^`^`_^[`\^]`Z\^`[[[_[]Y[WVWXTUXWUUTXRSVYSVVWPRUVPQSRMSPSPOOONNQPLMQNLMNOLMQNMNQOKKNNLMROMPMKKOQMLPPKLMPLKOQONQSOMMNJMQKLQNLNROMLQYv���������zS53422358>IPYhpt^>11?B<>?<::;:79:76753232---,*+,,***,++,,++++-.+,-2?MEMRI_porsqs~���xzwwwxvwuuvxutrsqmdN:888797668645423443332001/0...0//,-..0../1/..100212000112321232233332345233344456755555334354443324434422331112343102002102200///./--/...-+)(&$!!#!����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ű�����������������������pZ_jcL���������Yl���þ���������û�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������}��}��}���|}z�hUC@@@CGJIKHGEEFHMV\][[Qktmp{xkrtygnr{footfeble_eeh[me`\eehnohaXZ\]^_\[WQPNNMJGJV[bjjqlfnroilnmhmoqlmllkmhokimqjfnrpoqppouvunvxyuzz|w{}vx��t|~�|w��v{}}x|�}vz{}t|}ztzxyr||}wxw�uvu{vvx{uuwwurvyuqwssrrptoqmmjmjnhjiiijfiijdgghbddd^_bb_a_b^]^_[\]_ZZ]^[[[]XX[\XZV[STVVQSWWSRSVNSUVNSUSNQTSPPQPMQPRLNNONONNKMNONMLNMKNNLMPNLLNNNLONNPMLMMMMNNMKMMNLNJPOPNOQMMOMMKNQRKMQNLQPMVy�����������\;0023468<EMWcox_B7>@<>?=;::789:88752231/0.,,,**++++,,,,-,+,/.,.03F`Z^fYbqoqsqq����xzxxxuuwsvvwutrrqneJ98655733455553011112/001..0../0/.-...0/001/./000210//10020/000011232/1430112334545334543102546534542133233213/242//0/.01./0/./0/...---,-*)((&#"""# "�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������tY`khI���������Wl�ĵ��Ź�������ɶ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}��}���}|~�~x���p|�roRDB@ACHJLLKIGFGILSY^]WP_nsuvmrjxmrp|lrpqkmblfjjembneb`mllnd^Y[[X\_[YVRNKMMIHLT]bafjohckrnflomgppqllkljjdojjnokmrroopnrtutvuyxyuyuzu}|~t}�uz�{z~�z{}~yz~yz{|u|}{{x}y~{zw{tw{v{x|zywwvutxwvtutqrusnrnpljklejjkffihdgikbagf^aadX_ba\^`bY]\]X\]]XZ\^ZZY[S\[ZVXWZTUTWTVVURURYOTSURVSSRVTRRURRPSOQQOOPOQJLMPLNOMMNOIKLNOMLOOLLMKJMNLKMNIJMNJHLMLMKOILOOKLONKLLJFKOMJLOMLNPMY������������kB1123568;@HV]nshLBA??<:<;879877865442001/,,,)+,,*+,+*+.,+,,,--/4Jecjolmsqqrqs����wzyxwvwwtuwxussrpoeK;96677565665332102320021..0./0//00/../.-//-////00111113442131/0233421442122200220,.,,,+)&"$+0342243001//23/.0.02/-././10/00...0/////-+++)((&$! !!!���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������þ������������������������tX^kbG��������|Sh�¹��ʹ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}������������z�����������{���x���y|��~v~�|q}~�xylYH@ABFILMIIJJHGFHNSY]YTYintwtgvwvivrvpnnphhimh^ihjbbdmojc]WV[]\][VTSPLHKKJPV[beciihfhklmjmomhspqppjonmhlkokmqrmprtsruwxquwwty{{vz}�x{y�~}x�}{}zv||}{}|{z{}�y~}~txx{wvvePmx~tux|wvwxtssyuptvqmpupjmnlihjkfhhjhfifacdfaadeZaabX`__]^_^Y^]_X[[[\]Z[[]ZZW\YYXWVXSUQSUVTQTVRSQVPPSUMOPQNPQRMNPQLNPNLMMMGLKMJJKKJLPJHLMLJKNLGIKIIMMJHKKHIKMKIMMHKKLJMNLJMLLKKLIGLMKKKLLONQO_��������������yJ2332668:?IQZirmSCA=89=:66976674552/...-+))(*++))**)*+***++,-.5Njhotrqrrsrrs���~xzxxvsvwurwwustrpmfI;66665555552022101100/10,-.-..,/00./..00120024555979;89<9731,+*,-++)(($&$"!"" "(,/132112/011///00//00.//./00../-...../++*)(($      ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˿�������������������������oY_naE��������xPb�¼��ǻ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���������}�����z�~�}���{{�~}wz{}qvaH@@BFLLGHIKIFFGGDMRVYWWbrysbto~fsrvompokiiffcnkjcgkmh`\XVY[]\WRPOOMJIILOW^ecdfkeefoigmpnhmpsmostknppjklthosvmnqtppvutksvwpx{{vy{�w{{~y|}�x}{}zx~yv|}|x{|{uxy|v{yzpxx�tvpYDQp~ttvzstuyqrrxusuyophPZjmokhkikjlgiikhddhbeccabbhaa]c]^__]\Y^WZ[]WY[\SX[[SWXXQWWWQUUUMQTUNOSSPPPRORQRPQOOOPLMNMMOMOOLJLMLJMKJIKJIJJLKKKMMLLLJIJKIJKLIJJHJMJJJJIKJKJNLOLMLNKJMKHHHKIJKKJMQMNSj����������������Z=33557:;?FOYeooWD>:;;:8788646455310..-,***,*)))**--+++)*+-..7Rnkqspstsssst���zw{wvvuvwtruwuusqqneD94465445332101110000///0-,+++,+,-.-./.120/,*//,,+(&'(%$$#"!&.0/02/00//0..0/,-*&%-0.-0/-./,+-.-,,+**)(%  ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������İ�����������������������tV\nbF��������zVf�ÿǼ�������Ĳ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���~���y���}���vz~�x|z�|x{}vzx}qystJ>@BFIJIHJLJIFEDEKQVXXX_svj{hjuiumqommnielojehillc_[VXZ\_[VRNMNLKGJMRW\dg_bhk_afphfjpnhlnqioorkmlsklmrfoqvomptppvtxvvwxszx|y}{~y�x�{}|�w|{}z{�|yz|{x}yzxyv{z}y{xyv|xxiYSIRyzwuxvwssryrstuqqnrZ;Gmomlihfiiiegihcbgiabdc[`daW\^aX_`_Z[V]ZYZZUWY[PZYXUWWVT[YYSUSUSVTTSSRTQQPQPRQQRRPNOQLMPOKKLPNKLNILOMJKJKIIJHHIKIJKKIHHLJHHIIHHKFFIOIGHJEHKKHIKOIGKMIHMLFGHGFHHIILMKLVn�����������������hE366668<?EKVbns[F@>9:9878856653233/..,+*)***+)*++**+*)*+++-6Roprrptststsu���xwxwwvwwwtsuvutqppogE84676456552122112100//...-)*#%-.//.,./...---$*-../---.,//++**)'#  ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������г�����������������������uV\kaA��������vYg���ξ��̽���ï��®������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}��������}���y���x}��{��w���y{z�||z}|{{}zs{v�v~t~qT?=CDHLJJJKLIFEFHJMSYZV]nt|lutyinqxnkpsf`lpkcgnlb[WUW[]][URMNLLJJJPSVZ^bd_cdgbgdihhinrnmnslrlokolrmnnqouruusquzvutxxutvx}u~~~z|y�z}}z}|�z}|�}wz~~w|}�y{}|w{}|vyyzvwyz`VUSIXr|stuzpqruppsuommiNGBQfghjhdfhebffeacegbbdaY`_aY_]\X^\[X[X][YXXX[V[VXVUWYXXX[TVWUPRSVOOQRNQQPMOSQMLPPLMMNJKMNJJMMKJLKCFIHFIJHFKJEGHIFFGGEFHJHFHHHGFFDEFKFFGIFIJJFHHHFHKJGIHIEGHGHHHIKLJJLUs������������������vG773679;?FMUcpweJ<898778756552121.-,+)))***+))*,+*+**++*+.:Vqssqopssrrqw���|wxvxwxvutsvuttstpmdD95754456552101001/./,,,-,*(#$',.-/.,,...*	
+&*.,+.-*--*(*+'!!!����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɲ�����������������������q]_keF|�������wYg���ż��ķ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}����~~�~�v|z~}~yy}{yzz�q�|~vuwwbA>DHLLLJJMMGCDEEENU\\ZSnxntpuflptjlsqhglijgjib[UUXZ^[WVTNMLKKJLQXW]_c_abdbd^gadgllmrrolrrsgmnrkqtpjosvnswwqnuvrqvsvqvyyq|~}xzy�wzy~xz{�xyz�{vy}{u|}|ty|{szzyuzyxmuxxYUWVPDOVVbfpfqpslosupnhZHLE>Vhihkgigfeeddcecefc__cb\`]_Z]]`[[[\XY[YUYY[UW[YTUWVSVXUNSUSOPTVLPQRJNQOMPQPLKMKJNIJJJJMKKKJIJHFILIIJJFGIJEEGHFDDGGHGHHFGGGGGEFFHGGEFFHHJGGGHFGGGIHGHGGGIHJKHFIKJKJMQu��������������������[:7799;<@FMX`lrlJ:::98766644331/--+***)***)'***+,*))***,/7Xutrqpqsrsqtx���ywxuxvwvutuwvvrprrmbD96644555442410..00/...--,,' ',,,,++--(	
+"(*,'!$'(()'"����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������²�����������������������pX_mdB��������xec�¹��ļ���õ��º���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}������~������z��z~~�uy~�wu{�zuy{o{x~xtr~{dH?EILPONPPJGEDCBGLTZ\UU^swmtnwpppuqmmokgijg`ZRSZ\]\WSRQNNMLLNRXXU]]_\a`d\b^f^dfjmnppnmtqriqrqkpoqiurwnrsqnpwrttxuzvzuzy�|~||x�|}z|y{x}y|z~~{x}{s}|xwy{}v}yzy~xxpytmWVXYUPHE:A@?AGLMKNTSQQMQQIEAFikegjmbdgd`dfc`dfe^^d`[^a\VX]bYW[[VVXXLZ[XQUXYRWVTSVTTRTRRPQSSORPMMQONNTNNKLKLLLJHJMKMLJHGIIHHIHGGIHFGGICDFDCDHFFGGDEFHEDHJDGFECEGIDFILFDEGEFGGFGEFEHHFCGHDBHIFHGMU}���������������������hA68:;=>CHOS_mynP<:99777642230/.,*(('')(((((()))((())(),3Uqqsqpqossruz���zuzvvtuvvsuvuwutspn_E9533556742221010/00....-.,'
+#(('(&$#!  $!%%  "�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������s[[of@}�������xf`�����ƹ�������¸�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���|���y���|��~y~��y{|�vx|�xwx|xw~{zr|y{v{puwxrL:>FLNNPNOMIEEDCCFOUY[NZrklownkuvmelnhghc^XVV\\]ZWTPOMKLILQSTYYU[]`Z`\a]iaealgkprppntwnqotnqqvlqr|ntuvrnqvxsvwtrw{�s~�x�y}}yz|�t{|}zx{|st|{xxw|zuwvwrqrtqnnso\VUUXWWOGA==>>?@=?EFEGIILOKCEBOecgif_cfb_fedadee_]__[\^\VY[\VZ^YTVXVU^YTQXYURWRUVVSSWUOOQRPPQPJLPPMONPLLKLFJLHGIIJHIJHGHIFEGHEDEFFDDDEBBEBDCICCEGCCCDAADGDCCDBDFGCEGJFDDGDGEEGGFEEEGFCFEBEIHEHHMX�����������������������xH9=;<>?ADIR]lvv[=9855662101.-,*)(''&&'&''&&('(''(&(''+6\srsqqrppstuz���wvyuvuuvvtsvttuurnn`C743444541110//1/.,,+*+,*+*%
+
+	 !  !""####
+	#'�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������t]_mdA}�������}f]������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��~|~|�~x�y|{}y~yvu}}xz{�www�spv|wmWBBHMOSRQTNLIHEADFJRYXWWbmrvjhutihjeefa\YXXX^`XTQPPOLKMIKT[[Y\[`\``g]`bmadeofhqxpkjrrlqssmqrvmptwpswxnorwvsvxwtwy{q}}w~�~oz~vx|�ty~�zvUS\^ZYZXbfffgijiiijkjkotXTUVVWWUNH@==??>@@ADEFGHGKP@?A?Yfgefddbdfideefbcc`_a_^\[[]WYX\WWVVTWWXTRTYRRUXOTWUPSVTLPQQLPNNIONNIMNLIIIKGIKHFIGGGHHIGFGGGFGFEDDEDCEDC?BBDAGFEEECEDDDEDEDDACCEBCCEEIFEBEFGEDGHEFFEEIGBCDFFEGIGKa��������w����������������VB<<>>ACGMR]kw|aB<865645211.-+*)(''''')'&$'(()(''()(,6\qstsrqqrrrtz���ruwuwuuvwusuutsssoo\84246555523430020/,--++++-*#
+
+	  !"%&('$	
+ $(+����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ƴ�����������������������s[Znc>}�������|kV���ſ���º��ʷ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���}��}y}�w{�sz~vvy�uou{twzzssv�kls{qps]B@IORRQNOPNKJEDCEIKUZYV_ntmpqmjcbcb^[YYZYZYPNPMMOMJHKJRVc^UW_^V_bdU`ck[_clghorohottjruvktozpqtykuvvlqtvsqsuzuxz|z~{}|��v|||y|||x{|�zuG@PYVVTWYX[`dddcdffhkjmsfPUUVVWWWPG@?<<>>@CACCFFGIJK8??Jlcbeeb^bddaaed`bc^Z]`]TUZYSXWWTVTTSVWVPTTSOUVSPTSSPSSOMPLPOQLKJNLKKLJLIKHHHIJIGIIJJFDGHHEFHGEDDDDFDBCBEABCDBCDEB@BECCEEBBCC>BCCBBACBDCD?BEFCDFEBDDDAFE@BCDDDFGFLg��������������������������a?;?@BCFGKR[gv~mM=94567101.-,+*'$#%&&(%&&(&%'&%&&((,5^qtsstqqsusu{���tvyvxttwxutusuuusrm[%!,001555431110...,,-,**+,+$
+	
+
+		
+  "%%%&&'(#
+		#')*����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ѵ�����������������������uX]mdF{�������~m]���ɾ��ɿ���̹�����������������������������������}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����������������|���z���y���z|�|x{}�qz}|oww|utxyxuvtxqwq|pvvwtwttoIAHNQOPQPQPOKHFF@CHVZYSYnrsjfcZ^`\WWXZZ\[WOLLKKLKJJLRXZb`WZ\\UaaaZebb_dblhkmppoppqkuttptozsuuvqzwrnwxryxux{zzz~~~w{��{{z~xy|}vz�z{yyA;AQXTWVXWW[`accbcdgijily`QTTWXWVSMFB<=?@ABAABDEFGKOC:=@V`acb_^`a_^`bb^_a^V]^\VTXZYZY[TXWVVXTUTYTWVXWVTXTUVVQQQRMOPNIKKLJKLKHIIGFHIIFGHIEEIEDEEEEDFDBCBBDDB??BB?ABD@DEA?BBCA@ABAABBBECDBBCB@BCCACBECEDBBDCDBBC@BCCDEEEGSm���������������������������oN>BAADFIMR[ft�wU<3563//.-)))&%! %$&&&%$%%&&%(),5apssuurqsvvw{���pvyuwtstttuustssrqnZ$$.220//-.--,*****('((#	
+			
+#$#%%%())$	
+	
+	#)(*,����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˴�����������������������r]]jdEx��������o]���ù������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�~�~|z~�{{y�q�}|u|{uuyzsyzzqzwturztorxuotpQAGLPRTSRQPONNNFD@HKT[WTizg\XWXXWVWUX[ZSNJKKLMJJKMW]^]_a[\Z]_e]_`d_bcicfhrmmptqkqssnrttltv{stq~qnq|urvzwvy|zv{|}s{}~uz{|wxzyrz|y{ug=8<=LXVVXWWWZ^bbbcdfggjio{YLTVWVWVRNF?=@BB@BBBDFEGIGH=:>BWba`^^b`^`abb`]]^\`]\\[WVXYWWYZVUUUSTUWRUVYTRUSOQRRLOOKKMMKJMIHGIFJGFFGFHIGFGEEFGHHFEEEEDEDDDBBCBA@@B@@AADADA@@DABAAAB?@@BBCB@ACBAAA?CBCBCEEBBDCBEGABBBA@DDBBGTv������z{����}����������������W@@BEEGJNSXdr{z_@951021.)*)&$#&'&%&%$&((((+6`psrrsqnrvuuv���rvyuuqtvwvtwutuutql\:%%.10//...-,,,*++))*"
+	
+		
+
+"#%$%&''(*($			
+
+	
+"(*++*����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ŷ�����������������������v\]iiJz��������o]���½Ž���÷�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~������}��~}��~x{|�xxyxus~ypy}ynvw|spryrprvrnsvrVABLPRTURQQRPQRJGDCIQVWS]ZSQNMNQVWUWWRNKJLMLJILRU^bg_[_`[T]^`Y^`a\`ek^fkqkkptpjrsrnttwlzsxntvxursxsuxzyzz}zz~}|{|y�|{x|w}y}|}|qR<9:<=ITUVVTUX\^_acccdefhipwTPSVUVVVTJD@ABBBDDDFDEEHIIG79?Jcb^_cb^_d_]_a_[Z^\UX[ZSSVVSUWWSTSSPSRSRSUWSTTRRSSSRTONOPMMLMHHJLHJIIGIHGGHIHFHIHFEECAEFCDEFBABB@A?@???@?>>A>??C@@@A@A?@?ACB>>=@?@AA=@AA@CEC?@B@>CE??BA>?BA@CHV|�������~���������������������gHFFFFHKNQW`m{{eG6321/.*)'&#
+#$#$%%&&&&(+5_lpqqrqosusuz���pvzvusuwwuvvuruutqjV<>=+*..//-,+-,)))&&'!	
+			
+!$&'&'(**-.(
+	
+#%%&�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������y^]kjGr��������sg�Ľ��̾�������Ĺ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}��������{���|���}��y��zx�|r{~qyysrs{upvxvouuyusttoqqqtrsnoilIFJNPTRRSTRSURMHDCDHQWROKADEHOTUVTOLLIJLMKJMQ[\^`f]Y^`^W_^\Y^`a[bdj^ijmllnrqrrpusvrvt~uxr{uttutvx}yx|�{x|~w{��x|~�wwx�w{|�xz|�uF:;9::>e^OTSRUWX[]``_^addegokKNTTUVWWRHEACCDDEDGFFEHGFHC7;@Q_Z^c`]_d]\\`^ZZ]YWYZWUUUVTWVUUUTUVWTTUUTVUTRPSTOQQOILMLHKLJGILKFFIGDIEDCDFDADEDBCDAACC@AA@@@AA>=<>=?@>=??@=>>A;=@@??@@;@A?===>>?A?=@BB?@CA>>A?=>C?>@@?A@?BBG[����������x��������������������uNHIHILLNRV^mv{qS>4/.-*(&&$
+		
+ "$$$&&&),:^moopqpoqtst{���qvxsrstuwtuuutrtqpkP,.39<(%--+**+)()'&&%
+
+		!$'((***+,.-		
+ %�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������y\]kiNs��������kX�����ɾ�������ŷ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������|���������������}���|�������}�����~�����x{�~zv�~{qzx|tvsxuuvswtzpvvwrnoyrmuxtlmoo_NBLQRTUUSQUUSSNJFDCGPTRD59>BFORROLJIIGHJKKQ]icddd`d`_a]_Z]`b^a_g_cdmiknpooptspxxtqwy{tux{qpv|tvy}uw|yu}zwz}~uy{syx�vvx�wy{�n@99:;:D��_MTTTVUWZ]_^]^aaabfo[MNQTUXXYPHCACEEEDFGGGGFGGE79=DT\^_aca_^^^^_``[YYYVX[XVX]ZTTVUSSUTPSTSQTTSOORPKNNNIJKJHNIFFGFGDEDBAFEEDBBCCDCCBDCADCAACBB@B?>=>>=?@@@>BAA??=A?A@?>=>>=A@???<>>@?>>@????><=?@@@=@>=<=?A=<@BC`������t}���u����y�����������������YOMONMOQSX_it}uX:2/.)))'#
+		
+ %%'&&)+;\loonmmmruts|���uvwswtwxxvwvvtttrppN)(*,273#%*,+*''(('%#
+
+
+		%())*+,02-+(
+	����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������´�����������������������z`XfkKm��������mX�����¿��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}��}x|�}wz��tvx|tsr|rmuxsiwwxipv{lmrvojtvsjjkpddUFKNRUSRSUUTUPOKICACIRPA98:@CFGHIIIIGIJMUcbffld`dkc[`c]P^_bR_\hXaembelomhptolyvpouuwkuvyorw}rx{|w{~~{y||{y}}x~z�{||�xyt}z|{}`9::89?S���`RUTUUUX]__^_`aaacinTKPOSUXWZPJDDEEFGIIEFFGGGGE8;=F_]\_`_[\_]Z\b]RWYVTVYWQUXTPTVSPSUTRUSQPQPQPQPQNONNLNMKJOGGJIGIIIEEDEBDEFCCCCBCCDBABC@AD@?@@<;<=><>?>?>?:=<@<>@@===<:=>?<=@=;=<;;=>=:<>=<==>>?=<=;;:;=><=>BGg�������v{���}����������������������kRQOOPPSWY]gu|ya?3-))'#!	
+
+		
+!$%''+7Xfkmlkllrtsr}���vywswutwuvvwvvrurokG$&(,/63""'% #%%#!
+	
+
+
+
+	
+		 (*++,..,/11+#
+	����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̺�����������������������z`^diNl��������t_���Ƽ��û���ǲ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���v���q{��vwyu|}~owy}oqs�okwvolutxostxkntshhtrpjmkphlebJGORTUSTTTUSPNMKEABGJJC957:>BDGFGGHKNWen`cglabgldZ^a\Ud\aXgcf_bagelnmonqtqqzuttzt{w|svw{uxw}|{y�z�}zz�}y}��v{~~xz{~xz}yzP<;::<Ah����eNVWVVXZ]^^^`baccgjkORQQTUVVTPGEEEFGIJHGGJJGGH@9;?SWZ`^YZ[_\Y[_[UVXVTUWWXYWUTTTSSVUUVWQSTSPSWSMQSPMNPNJIKKEGHGEFHEBDEFBEBA?AAB?BA@@@@>>>?><==;;::9:<<===?9;;>;<<;:;<;:==;:<>:8<<:;=<<:<>>=>==>=;;<:<<=<<<@?CLn��������������z���������������������z^RSSTUWY[[dr~}fB.+*(%$ 
+!'&+8Zeijkikkqtrq~���wxyvvswwvvwxwwsutnlG$'*-394#!$
+
+
+
+
+
+		
+
+	
+
+	
+#)+-../11210)����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ѹ�����������������������xa]dhLi��������o`���÷��ú���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}��������|������������������������~����~���p���z�}q|z|x{yyuzx{rsp|rwwopqwrxutqrqvqjorrimqqgihpcgcSJQSVUTRUTRSSSPHHGECDFE9337;@BFGGIPYillbedhcjhkfc``bchT``h\`bf\bgojiosqjouvqsxzntw}otv|vux~ywy�zvys|}wt|~}wxzqvz~pw{}vqJ;99;=>������iPWVWVVY[]^__aabcgmePNOOTSTVQNIA9;DJIHIKIHGHGE:9;@R]a]Za`]]]^\YYVTXWWVVZVRTWUQQUTPSUTNPRSPRTOE>FJJNNIEFJHDFCAACCA>CAAAD@>=ABB@B>>==?><===<<<<=;;;;=;<<<A:;:==<999:;;:;:;<<<::;:;<;:<;<=>>=;;=:;9;98;=;9<?;ANs�����~z��������{����������������������gSVVWVY\\_ht}}kH5-)')+($"!
+	 )9Zfiijghimrrs����xxwrvvvwtwyxwvturroB%(*-01+
+	
+	
+	
+!)-.00242352*����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʸ�����������������������ybQciUl��������sW�����Ŀ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}}��zz|��y�}yxzq{xwput}vorsugvusnptwlgoqqblomdifnagisUPPSTUUURQRTUTPNLJGDBEA80148=CGKRZfqfiklcgjkgegg_\cd`V_`cY_cf[agjfffpmeoqrouvsktvyrun~vvx�yz{�x{�~yt}{xx}z{z|v|t{x~ryzzuk>;:;:=N�������bNUVWWWXZ\]\`bbbdjqUJOPPRSUSTNE?<@EGJJMJHGIFEB67;G_^Z\_]Y[\YUY\WUWWQIKURORSRNOTSMQSRNQQPORSP;,/>JMJGHGIHEGAACEABAC?@BB@@?@?>?@=>>=>??>><<8:9:;:9:::::9;;:7:;9899::::;99:989:968:87;<;9;<;99<998878;<9:<=<@Pw�����v���������z�����������������������mZXXZ]^`cgq}�qO5''(-.)('#!	
+5W_digedfkpqr����yvtqvttvsvyxwwuttpi=
+	%(+-.,&
+	
+	
+
+	
+
+	
+!)01232455531+����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ĸ�����������������������{dY`iKg��������r\�����˽���ò��Ķ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���{�������}���z���x}{�zy|�yq}~zstv~lxvzlrszpkrqvktnqmqqoijjlmfonjepepelilhjONPWYTSQSSTUURQSRJDA==93115=BMYgls`lmj^hklgfge`^ea_V]]a\`ac\a`dhighknolruwttrxuzuxqx}z~~�}|�~{y~z{}�v{��wzz�wzz~vzy`;8::;Aa��������YQUUVUVX[\[^`a`abkqTKOQSRSRSSKC@ADEEFHHIHGEDD=7:BMYYX\\[ZZVWZ[WWUUI45BQQRSRQOSUTTRQPUQNOODB0#&.FIFHHFDGGD@BEAA?@>>>@?>>><;>@=<<<;:<=;<87887777888:988999778879897:8966578:8856877:;<7999::;9:;:8::<:<=A?ANy}�~|{}��������������������������������޺�vod_\adbboyvZ6()./,**(&!
+-V^acegddjrqt����zwsqtsuwvwyyvwtpsqg<		&*+,,-&		
+	
+
+
+
+	
+'./2458987770$����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¸�����������������������{`V_iMh��������sZ�����û��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~z��}{�yyzx}zw}x|uus|r|kxozutrstortvlsswllmrljnssilnofmhpagfzdHNQSSYUSSSUSRV[[QMGB?>:3/3:Hbdjgscqhjdpkihijd`djbb]d]]`d^bababfjgfntolsxvqttwpvx}qwv�xx~�yw~�ywv|}u}}w{�}swz{tyy|uyvM<:;;=@{���������WRVUSSUXYYZ[]___cnmLOPQQRRTVRICCCDEDDEDEDCBDD86<?R^Z[\]ZY[]\YXWTQC*,1@UTQQTPPSURMNQPMMK@1*!&9DGHFECFEC@AB??@><>?=<>>><<>=:;<<;:==;=<:9:9779968<989:99988989689:9:987:;;677786799:99999:;;:9:796:<?BNXHPvzzwsmn~����������������������������������������vkhcdiu~zd<*.1.,+))&"*R[^aabcciopu����zwssqsuwvwzwvvtstpf:
+	
+ +,-.-/(
+	
+									
+
+(145899;;<<<8����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ĺ�����������������������|aTbiNg��������uV���ƺ���ü������������������������������������������������������������������������������������������������������������������������������������~������}�}}���������~������y|{uuxywvxw{�����������������������������������������������������������������������������������{|�vz|�z}}~~uzzoz{{lsu{spqyuhptucqpteinrkeoojdgio[mhm^n`qb``RMRVTNSSWRQW[[ULKPF@;966Jbpmofohtdhfribgpibaif[[`_W\^bUZ`b\_dfacjokfnprpqsuotwymwu�ww|�tw~tsz{|x|{~y{|{uyyxt{yztzoC8:;=@F����������ySRUTTSUWY[YY[\]\aoTHNPQRSUSUSGFFGGGFA<AEDCGF@57<F[ZY[\XUW\XRTXTH6')(/EONQRNKORPKMOMNPL:,""*9GGFDFFDB@A@?CB>>AB==@=<=>>:9<=;<<;:::8899977967889<??<:>?;<>=:>>>=;8757886555668657888886=G@27699;;AMgyp_cjnie\Uaz��v����|������������������������������ƴ��������qE110/-++*)
+'QW]baabchmop����ywpqptvvtwywvtsutqeC!
+			 +/00/2(
+
+
+		
+		
+	
+	&.588;=?>?@����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ϻ�����������������������{dZgmR���������rY���̽���Ĺ��ɵ�����������������������������������~��������������������������������������������������������������������������������������������jfmqmjkhjules|zyzyvp`dsyxxwzvYtyrprqpooorv{�����������������������������������������������������������������������}���}��||��ux{�{ww{{u|xypuwzqnswpmsyrgpqqhvmsiljnkjmjkkojpepajgudknntfRNPSTUTSTWW]_YJAJJGA><?Ramqpckquahisgagnea^fbZ_\^T\^a[_^_^adcaejjfhpmoppnvqtvvv{tz}|~y�~}{~}x{}{w||}yyz{vxy{vxx~c8:::>@\�����������yPTUUWWWWZWXXZ\]^ekKKOPQRTUUYQKONONLC6=FHEGHG=79>IMNTXXYWYZWUWPD.%')*0BKPMKLQPONNMPSRK8)""*<DDEGE@AAA>?EF+7??:<=;9:;:::;989987:9776767874566<VTOIINMIJKLMMMNMH>8456656458866778:=;<<J`z@>><?HIO_uqspbVQKMTRXcv��������������������������������������Ŀ����������{<100,+**(	&LU\```abgknu���wwqstsuttvxvwuttstfC/+				
+!,01222'
+
+	
+				
+
+
+
+%/59<>=?A��������������������������������������������������������������������������ʾ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������һ�����������������������{cXimj��jv�����s[���Ǻ������������������������������������������������������������������������������������������������������������������������������������������kcjpppporxi]sxyyxuvjPdvwvwywm^xvrpprolmnruz�������������������~����������������~�����������}���{��������}����~�������}}}�x��qy{~zxwzzrzs{ourzqsrvqsuqpksmpqxjpnoiklrlgjoqcjhm`hlodbnpqmeRPPRSUUY]g�eSSI?IKIH@;AMgpt`nmg`nhlhgcgefab_acY[Y^[a`a[]_dc`chgaellgnqtksttovwqz|�vxx�wv{~xvy{{ty{zszxxry{ytwyQ9;:9=@p������������xSTVWWUWYWWWZZ\\_e]KMMQSTVTWSPLPRRRSNHIMJJJIB96;?A;FTXZVWYXRSN@,&$%&(0<ACDIJIIJIGJMLJG3$$/@EGGD@@AA=?G];&4;::98:;:99::99877778898876765867>fgaZSSQRSRTSUVVVPI:75456614652576;J\ICKXbsLEGOU`iq{yruvgSNHKOU\ev��}������������������������������������¿���������A00/-.,-,&&KTZ\][^agjmt���xuqsrsvttwxvwvttsqbA1,(!		
+		#.35420%		
+
+	
+
+
+
+"/8=>AA����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ȼ�����������������������zfXor�Şez�����uV������������������������������������������������������������������������������������������������������������������������������������������������ohkqprtrxwYjzz{zx{vaPkxwxxwsc[vtpoqrnkmoqv|���������������������}���������������������������������������������������}���~zy�{yz~yv{x|q|vzmvs{lpoxtlnqrepsufjmohejribkloegfmZmkkdesnjc[SQOPPSWZ`gyM[cS;IMMNMGBBIfrkrihjqhikodbejeZ]faYZ\[U]cbU[^d^[`fa]aggdfhkeppnkzvzjzzyt~y}tx{{vzxz|xyw{x|wzw{xyyxs|F9:;;?E��������������rOVUUVXYYY[[[]^^adWJMPSRTRVWTOKSTVWVTRUSQPNIB99=A>:LW\RPWUQQN7(%%$$%(-<@BDFEGHGEFGIIJI+!(4BFGFCCCABBGTN#(8<:98:;::9::9;866666886664356867>]kkh[VUUUTVVWWWYXWH934443/2531456<UljPMUbn��TJOVZhr{�~vy{rXOKMQU\et�������������������������������������������������=0../211.+((KTX\\[]^fjmv���xtquttwvuwxxxvuttqc?/+)&&!
+			
+#077643*
+
+
+		
+
+			,8<@����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ú�����������������������|dXmp�Úp������rY����������ķ�������������������������������������������������������������������������������������������������������������������������������������hhpqturr�pYoz{{yzzqTXrwwvuunWoyqonppmlmnrtyr������������������������������������������������������������|��������v���y���wyw�ywy|tn{{yntxykssyjpl|qipnpdsqtilklhfimghojifkem`whikqslbSSUTPMNPRVZ\\NV|]?IMNNPOLJHHZow`glqcglm`a`ccZ\aXTX[XSZ_]KZ\aV][dbebegd_abemkrn{syp�wyv~wyw�{x{zx{~{w}|zty}|tyz{rs<8:;;AS���������������iSUWXXXYZ\ZY\]]^amPDLPQSRTWWWONUYWWXXXWVUQNJA8;=B9@QZNRWTRTI-&%$""#&%0>@CEFGGEEDEHHJK?& "+:BGHEFEEEEEMQ4!-:979878996788644446665444356566<]jknkbZWTTXXYZXZZYXK7453212523236:WpvvYPRbn��hKRV`ls|��w{pXOLNPX\es�����������������������������������������������x80..0232..-!(JPRXZ\^afknu���~wsottuwwuwyxwuttspb<.*+*((# 			
+%6::;:5+
+
+				
+	
+		
++8����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ü�����������������������~eYnu�y������vX�����þ������������������������������������������������������������������������������������������������������������������������������������������tintuuwtv~ict{zyyyvkTgtvvvvshNtqooppnklmnmoip�������������������������~���~�������|������}���~�������y���~���}��}w��w}u�{xxzqt|wyltrxqvqwnxf|qoplootkrlqhgilkhfonclmn^hftgmpqkaTSUTVXOJNRTWRRU[_DELNQPOPSNNJK^s_kloelhgaha``[ZWWXZWYX]Z]Wc[^]f[aalaafc^^abffmpwqxq}sux�svxtwz}vty}vvzywsw~xrsuwpd?CKRM/h����������������dQXZXXXXZ[[]]^]^fiKLNOOQTUWWYWVVZYZ\ZZZYVTOI:8:@@<BUSYTRRYD+&#""!!$'*4>ABDFHGGGFFFGIJ5#&1?HHGFFGGFDGOT-'4778889886776555667545656685468@[ejnrok_XWXXZZZYY[]]P?43233334225=\pswzhQR`n��_NSZcnx���twwk[SPPUX\eq����������������������������������������������l3.-..002100**JMQW[\]`ekpx���~wtqtuwwuvuywxvuuusa<.+,++*('&!		'5<=<=8-
+		
+	
+
+
+�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ο�������������������������������ü�����������������������~hZot�Ǚx|�����sX�������������������������������������������������������������������������������������������������������������������������������������������������gkrvvwut~w]fwxxwxxtbWjtvvvtpcbqpppoommlllkde��}��������������������������~���������~���}���z�������������������}��|y~�uyz|wxrz~oyvyqum}lqrzikoytfnqnbnjqcehnheinibmki[dbrnoph[TRVVWWSMIJPQTUSNKJMOOQPQOQTQQRPMR\shjjmdbepa\]]YLW^[KXY]SY[^TX^cY\_e_\dc`a`a`aehmewlxlijrlrs|oouvrmpsmosoooqrnjjlqqlcirwsOy�����������������]Z[YYXY[[ZZZ\]\`f_IKMMOQVUZalxWY]`_^]]\YWTQI:;<@=;IRWRJTW:($! !  "&)5@BEFGGHHFFGGGJE/(5EHJGHIHHEEIRM&09:9789877755566677345544452346@\chlossoc[XYZZZYZ[Z]\U@2232/00/03=Rhnuz}wUN[i{�mRR]gqy���zyzp]UTRRWZdq���}�����������������������������������������S0-,//003431.&*INRW\^^_dkp{���|vtrtuwxuuwwvyvutrm[>.,,+++*))'%"			(8?B@=:/
+
+		����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʽ�����������������������gVlt�͝yft����x[}��Ǿ���ʵ������~������������������s�������������~����������������������������t{�����������������������������WDi�����������������~����������������mhowutwuvo]qxwwxywo\`qvvvuto^qqqqpsqnllnlfa}�{�������~�������������������|������������������������{���x���}}���x��wy��puy~sstzzmwyudtrxgnqwiirtscrpjbvjtbghjfiljhaoff_jhopmdZSUTTXURNJIILNRTTROLOTUQPPPPPPSTQOMQfbjkk__di[WX[[LW\YKTX[P^\ZW]\\X_aa]]hec_`a_^^]ef{ouhXPJSV[][[]adcdghgiighlljhhhlrrmpv|{s~������������������XW[ZZ[\\Z[\]]]_bkQFLMPRQUZgt�wYY^^ba_]\ZYURE69=?<?KUF2=K-## !%)8CEEGGIIHHHGHKJC' ,>HKJJIIJHGIMT0(9:7577755536654564123434441235>X`eimsvwuk`[YZ\]]]]_a_[I632000/008I\iouy|aGUcw�~XV^fp{���}}}vc[USUV\go��������������������������������������������D..,.//035841/%+GOU[][]_ejo{���zurruuvvuuwvvvuuusqW;0//-,,-,,,))'!
+
+
+
+
+,<AA@BB6!
+	
+				
+
+	
+"+--,%$#"#%+����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̽�����������������������}hZkv�͜uX_iq|�xb~��ŷ�����������}�~~��������������w��������������������xsw�������������������AS����x������������������������J Gg��YFq����������j]fffrwz||~��������fjrvtuvswwffuxyxxyujWjtuvvuumfrqpqrqqmlmonim�~����{�������|����������x���{�������x�����}���}{���v���q���~�~�{~|w}~|n{w{qvvxxnwwskysukzqsnrsprlthgkxhkhkihfqjchnqaeenonh^XSTTUWWOMHFIIIKOOSRLFHNTOMMLLKKOPQQRPMPjlj_f`cY[XZ[S[UXPUVZU]XY[`WXZbaZ^biec^^^^]\[\`iijbVRMMOTWWY[Y\_bcddefgfikjkkilqsstw}��������������������lY\\[]]]Z[\[]\\^ajHGKNPSU\n{��zhd[Y^]bd_\YUK::<=>=BM2 #'  %,:EHGHJHIIIIHIKH7 '/AILKIJKIJIIRK(3;6777766457434652212555434556BS[`fmqsvy{sf\\\]^_`__`__M921000114=HZfnwz�}kPN]l��]R]epy���z~�}g[VTUX]en������������������������������������������7.,.-./1368852.
+.GPU^^[^_dioz���}uqrssuvuwxvvwvtusrW>2/...-/./0/.-,("
+	
+
+	,;GFFHE3
+
+
+			
+
+		"+1@@8,()))$$#"#%&#! "!!'*/9=����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ͽ�����������������������}i\jw�ɒj[cjnruw{����������������zjrqsvz|}���������������������������~}�hUX�������������������MTs���p������������������������yG8WjXOM|����������^Y_]Qalsxz|��������tcnsqstts{obnwzyxxxrc[ruvvvvunqusrrqspnnopnl~������������||��}�~�����~|���z���}��|������������������}������y}��y{~�zyz�us|�qpyzzptw|lqtyhpouogmpn_glodcjmeccmaejmfUhkmlf[TNRRUVXNIIGIIJHHJMNQLEBDFFGGGD??AGNQTQPMU`bbh]^^a[RY^[SWZVOYYZSWY[UV[`ZW[dje`^__]\][YZ[aa^TTRNLOQXZ[ZZ^aadcddefgikmnllnqvxw|����������������������^]a_^^^]^\[\]\[\fZEJMMRV^w��yzyukhb[XZa`_[VJ;;=?A>A,"%-<GJJIIKLJJIHHHF1#,7GLLKKLLKIINY=.7<946787476333422452345132447APX]cimqtv{}oa\\^````__`_Q=1./11127<GVgpvz�vTQ\g��[W^fo|������}pbZZZY]hmx����������������������������������������r/.*,,,.3457:853,/HQV]^]^_djo{���ztptuvywvwwvuwvuttmY<300010111231001/-)
+-?LLMLJ6	
+**" !!'(,11//*)(&$#$,ESO[H;86641,2-1=?58CNd_VSNIEHHD?����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʾ������������������������iXfs�ď[Wbhmptvz���������������p]]afkorvx{|��������m��������������~yowfTc�������������������\U^�r��������������������������{=PI2DMt����������ka_XKXdmuy|���������flqopturswgfvyyxxxvnVivxvxvvrkstsrqssqpopqpu�����}���|��|���{�������}�����������������}���|���{|�|~�}v}�tz~~qxz~rqw{pqtzskptzhoqt^qrrihnpodklnbehkedfnaggiiamnlaVSQQRRVSOIGFGGHGGFKSWSKGA<?<>@B?:9;>CKPLRQMNW_hZVY\QOUXXUXXUT\UXS[XWVYX[Y[]fhc``_^\\]\WVW[]\PRSRONOTX[[Z\^abbddeghhjklmonqtxxx~���������������������v^c``a``_^]\\]]\_aLGKMQUd}�Ϥ��zvtkg`Z\^[[WI;=<@=7," &/>IIIKLLLLJJJHGH(%/?NOONLNKIIKV\8.<826766443032422320242121217BNTY]cinrwy~�vga__``___`a_X?.1010148=FTepw|~�{^VVdz�`Y_hq{�������wg`]\Y_fht������������������������ŵ�������������^.,+,,,.2347:;962-.KPSZ^^_`gkp{���ztntuvxwvvvvuvvttskY:4234445335533466566.0AMNOPK1
+		
+
+
+
+dm;-('&''((%&%#%$" !!!&$&%(&)+5?I[U>@B@AA@>><=@BA@BDD;<:8:87787����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ż�����������������������gYhp�^\aimptv{~���������������s[XX]cgkqtvz{}~~����o��������������wzfnfUe�������������������o]O�u��vx|�����������������������cWJ*=DNd������|skj_OLQ^isz}~��������nipustusrxj^oyzzyyyvf]uxwyywvrsutrqrtsqpqsssy�|�}�{~��x���z����~��~�|�����zy���|���z��{���{���vw}�}}|�~xy}~q}u~uyx~ptruxroxtjtuviqmseuskkpnioimhkhkhkgiffgjjhljkh^VRNQQTTTMIGDFGFGHKMUje^MKD<:8889:979:;AIOQROKKTh]]X\XZ\XX\ZVXW^TYX_XVX\TWZb_hhdba`_^\]\ZXXVXXZVTSSQNOSX[[^]_`bddehiihiknonnsvxxz~������y�������������ldbbdcdbb````__]`_HGJOVj��Ӳ�����{wsqomga]XSC<<=@:2(# '/BJLLLNNNMLLHGI?#)3JORONOMKKKQ]]48@68776433252214432422342126ENSVY`djqvx}����rea_^_```abd`F5200/137;BS_nvz|~q\Xas�e]`hq|�������~i`^[\^ddo�����������������������ŵ�������������O,,---,.2258;@?;84+2IQTTU[`agjp|���{rmrtuwxvvvxwwttsrlV;667667856889879:;==:61$#:BIMLOK+
+
+	
+
+
+ c�eE=B?96AK?9:604898;>>=>DFAFABAIVU`^8=@ACD@@>???>?>>=<=<;;998766����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ż�����������������������gUdo���b^ahlotvz}��������������w[VVX]adkqsvwz}���~v��������������{yegeVPgp{y����������������hL{{��dT]_cgghnoqqppvryusy{�����zdW00AEIOUV\^YUYZbgcMIJWdoy}}��������jnvwuvwrvueexz|{zzytXgwyzzwwwqsusrsttqqrttssz��{���~���t���z���~}}��~�}���~�{~������|���}{������|zx���~}�|zy{y�v}{}u{tvqsuvmpwsyrrntkonuncnvmflnmciik^cehebfjgelkbXTOOPQSVQLGGEBEEBEJNT]kc_\UJ?97654576678:AJMOOONMQWdZUW`ZVV\YNY[^KYZZTUVYWV[b`hgeb`_^]]][[[XVVVXZZSRVVQQW[\\\[_bceffgghfjlmmnpsuwuz~|������������������l`efeecd``_^`_^_`TGINZm��ˡ������{vonpnmjd`YNC>BB8.& '4EKNMMNOOMLKIHL5"+8MRQQQPNMMQW]F+;:8644301331011002332210/14DMOSV\_fntxz�����{ka`abbbabccaH5.//.1569CN_nsy|~�|aU]l�j^\isz��������od`^^bdew�����������������������÷������������C)*+--./0458<@BC?;7+9JOSUTY^bhio{���xolquuvwtuwywxutuskR=88767:;:;;;<;:<>>ABAABA;.&7275-**&
+					"/.T��~gbX[Y]_UOPQRMTLJHJLPSVNGHBABCCESW?>A@CDBA>=@??>=<;:;<;::99665�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������æ����������������������h]em�_Xaflquw{���������������yRTUW\`dhmquwy{���{u~�u���}�������|haaSJLRUX`flnnsqury}}}��mR_r�kZ\\bdfgjkmopptuxyvwy|����hR6(@CJLMQVUUY[]afjXC?Q_jtz{}��������ljuvruwsuyn_tz}|{z{whYuyyyyxyuosvuuvvtqqtvurt�}z�{��~�}��}~�����~���~���|��{���~���~~�}{���}���w~��}|�|tz}�txu�wvvxpowzol{txjpktgmnrhglqifmjfbmgi]ceecdgfijjbROLLQQPRRLHEFFDEEDJOVcdk[]cjUD996524532466:=CJMPNNJPWWWV_[WWZZP[X_Q\[YVYWYX[Z_ehfcaa`_]\[[\\ZYXWXZ_]WVVUTV[_`_^_adfgfefhhhikmnmosuwvz|{uy���}��������������onkgedfddcc```^\IEO^s��Ý�����}{tsopomjljjicUXV>5(!!)5FPORPPPNMNMLLK,'/DPTTSSPOPPSYa6196444313322101/2222001//07DLNQTX\cipux}������vfbcdddccddcO:/.,.237;AM\gox|��kY[fxmbdkt{��������rd````fl{��������������������ƹ��������������;+'*-.//1459<@CHNJD@8?KORPSYaehkp{���uolsuutvtuvxwxutsqmQ<;:9:<=?>?@A??ABBCDFFHJJMLA-0/3, & 	
+
+#>Px�ylgfllkfb\NJGGGFCA@@ANJA??=<=>=9:A>@A@BCAB@?>>?>?=<::;<:;99766�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¨�����������������������kXeq�Ð^Xaimpux|���������������yPRTX]`ehlpsvyy|~��}|q\WYcnottvy�����gWZOJLOSWZ^_ciloqquxyz|��uVNiz�oY\_bdehjkjmooqtvxvux{}����mP<!:CGKMNRSTVY[_dkgD6C[fpwz{~�������tnswuvvvuvrbgx|{|||ztSlz{zzy{xopzyvwyzvstvwvtv~{|�{yx��~x{~�z|~�~{��|��w��x|�}w~��w~��xz}�zz��t~}~l~~�z|�~{vx{{mzt}trsvoouxqkmphtirjslpiroliiodejvbighcbaihgji`RLMILPQQPKIGDDDCCFJRRXa`eV_`dXZG8642441224468;?DLNNLMIL]VY\XZZ`W[U^\aXVW]XV[bXZhhecba_^]]\Z[\ZYYXXUVX[YTUVTW\_``b`cggggfgihhklmmlpvxuw|{so{���������������Ț�r���qihfhfedba^_^EK`��Ƶ������wsrpponklkjfdc[J>91$(8LQTSSSSQONMLNG (6HWXVWSRSRSV_W,47443235323312442011//0029FLMOPTZ_ekov{~������jbdeeedeffb[?*+-.178;@JWcpx|���wcWelofdkrx��������xkc``bgn}�������������������Ǵ�������������m1+*)-011246:=AELU_YTOONOQFKYaeilr���rlnssvuwtsvxuvusqplP><;<>BA@@@CECCFGIJKJKLKLKLICA,"&+1:/!
+	1E{�KKMGECEFEEFB>>?=<9;:<>;69<<<<;::=?<@ACCAAAB@>??>=:;:9:89:66644�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jWdo�Ñ`[cinrvwz~���������������ySQSX\`ehlpsvyy{~��|zq[W^djnqsvx~�����iWVOJJOTVX^^aglmoprvxzz�aPdv�v`Z]aeehjkilmoorvyxtwy{����xSE$1FFHLNQTRUWZ_dlpQ7<Taksyzz��������orvyxvxvvxjXt||}}}|{iW~}|}|wjv|yyy{|yutxywv~~x��{vy|�u}}�|}�zz���~���u��|zz��x}�{~|�~�~�y�}}y�{|}}xx{{y{z~w~xytssyvrqz|hptzkpmygkiumghmo\gln^gih]^ciihg[LMKJLMORQMIGEDDDEEKR^^^babZh\ddi^I;3344111212358;=DJKKHISRU]`XR[_[Q]`bRVW]XU[cW[igeec`^_^^][Z[[YYYXUSTX\_UVWVY\aba`deehihghkijlnpnouxwwz|{tq�����~����������l�Y������snceieecb\O@_����������~urpponjkjhida_TE?>:-##,>OTUVVWTRSPOOT:#.>UXYXWVUSUTYd<+975334511121231/000.-.029GMMMORTZ`gmsw|��������|edfefggffdaC-..0258;?EQanu{����kZ_iqddirz��������zmgddehoz�������������������º������������]/+)),.00248;>BELWftuh^YYYJF[cgklq����qmpututustxwsuutqnfG8;=>@BCCCCDGHGHJJJKIEGCA<;:1//+!22/'	
+)=C~}GMJJIHEGGFEA???><9:<<=?<<==<:;9:;>=ABCBA@??>===;:8997643432122����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ſ������������������������j\el�`\ehosvw|~���������������wQMTVZ_dhlorswxx{�zyt`V[afkoswx{�����kSRNJILSUZ]adfkkmoqtwwx{��eLXq��aZ]aecghkjmooorvyzuxz|����ON0$DDGLMPRTVXZ]alp^96L\fpvxx~�������rrvzywyywzs`i}~~vZv���~}}|qe~}|{|}}{xx{zwv|�w~~�|z}|z�w��~�~~{��}���~}y��xz��y~�y~�zy~�|y�yx~�wx}�|v{�zo}|rwx{potxmhs{smmovekkuajkpfeiklbjkj_igeXchiibRIJJIJMNPPNIFFBBFGJMVYZ]abY]al]_`hZVN8554422000224359<@AEJLJU^[QU_\ZVb^YOZYcX\\aVahfded`]^]^\\[[]`\ZXVRSWZ^[XYYWZ^``_abcdggggklkklqqqsvyyy||nt����}��������΀R�A������Ǹ�|tniif^W4a�ѷ�������ztrqonlhjjigb_\OD@?<7($-@TVVWXVVWUTSTP0(5JUYXZYVUUUWa`048423330/11000//00/0//14;GLNMPSUTZahptz~���������reehigghghfI1-,/1578:AP^ms}����t^]hogeksx��������~oifggjn}��������������������������������O/-***-00357;>AFOYgu��~jbb^]`ehkms����nkotttvutuwwvuvtspf='257:====9795210+*('%"!!! IK�sFLJHHGEEGEDCCA@@?>>=>>?>==?<<::89==@CB>;:877889866876//--12.23������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������k\dl�ďa^einrvy}����������������zNKUVZ_dfkoqrvwyz~�xxxaUYaeknrwxz����oUPNHHLQU[acdfklnpquwvxz��nTWlu�gZ[aefhhkkmnoruvxzwwz|�����bR:9BHKLNRTUY[]ajolD5>Rcnuxyz�������zpv}zyx|yzzn_{�����mh|�����{er�|}~��}z|}{wz��y{{~|~��x�~�{|{�}}}�~w���zz|�zvx~}q|z�qzy�yy}}uu�~�sw{�uyy�vu~}uo~yzowtxqqrvlpxtqrpmujrkphrnneklgkhjeffj^_agig_PIKJDJKLNOLJFEDDDILNU^ZW`]gS`_gV^[dZa_VA444431122011333686=EHMSYW_cYZ^aVWZa[a`lb`bghededa_^]]\[[^dydZZVRSXZ\]\[\YY__``aabeeffffjjjknqrqtvyxy{}yu|�������������^6q6���������z{~zusj[.V�Ъ���z{~~xrppomnmkjhec`ZKCA@?=1% )1EWXXXXZYXUUUTG(#0<QWZ[[YYXYW]hP3873344210/220/22//10014<KKNNPRSTY_djov|~���������hijjjjiihkV7),/2557:AL[hv|����~c]emhfkpw}��������phjjjko|�������������������������������@/0,,+,,0246:>BFQ[hw�����njjdfimov���jiqrttwwttwwuvvsqmb=$%$$# #$ 
+"%<I�hKNJIHFFCEBDDCBCAB?@??????@>=<;99;==ACA;8;844444677764/0/022333���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿ�������������������������iWaj�ȑf^ejpsxz}����������������QHTWY^bejnprtuxz|~wtv`PU`flnpwwy}����sTNNHHMPW[aadgjlmpptwwxz�}VJhk�kY\]bfhilkmnpruvy|yvy}�����wXI&(<DILNQTUX[\`fmlR98J]isxzz�������tu|~{|}}|}wcp�������|dr�������s]}����}}}}}{�}|v{||yzv~~uuw|y|~{w}�x|}zww{~}x�}�x�|w~}x��|�{}x}x|{z|�~xtx�p|xyruuxost{wmrvrimpvlikpkgfok^fikadbfV_ffe]LHKIDJKKKLKHFDDDFIOOU^_[\cYbXf_`Zl`b]kfd_C84201111..011/.1049@FJOXb_OX[aSY_^Y^ch[\chgdccc`_^]\]\Z]`��f^VRSX[]^_``]Z]_a`abbcdddcdhjiijnpprtwxuw{zur}����������Ёav=n���|orwqjw�����^I������lyozysonoooliigb`[RGCBBAB;+ &-6MZZ[\][YVVWVR>(2G[^]`]\YYY[``739420110./10..11/./..04@HJKMPQQTW[`ekpw{����������njkikjiikh\5%,12479:?IXgrz�����iacgdfkqvz��������solmkmq{���������������´�������������700.---,0248<?BEP[ix������unpmnov���~hjsqsuwuuvwwuvvsqoe45M 
+#6*
+$',%
+
+!EJ�eFLIIGFEDDEFECBAAAA?>=<;<==99;<99===BBA=:<;98866667985455047975�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ǫ�����������������������iWai�ǐc]ejptx|}~����������������UKUVX\aejnprtvxy{|xludVU`glpptvz}����xWKPHILNV[behhjmnqstwxz||��aE^a�v^]_dghkmllorsvwy}}yz}������bP-3=JMPRVXW[^_dkmaD7EZgpvzz�������zu{��~���rh|�������xj�������tq��������~���~~~{sx}xz{~zzyzz}{{}||~y{~�ztz�~x}�x|~�y|{�yw�u~|�t{|�vt{�xqvz|jvzynmqvnltxqgqtqjkltigiqfahvhYehjab`i^fhdWJGKGHHIIKLJGEEDGHLQRWba\\bfWbajZ]`h^^Zh`[`gQ:301221/..00.,-0236:FINZ[V[Yc[_`_]ddibegfdccdcab^^^^^\]i�MVd^WY^]^^`bbbb_`befecceddcdfijjjlnoporttuvxywy}���������ŵ���T8q�}tquwurz����)[�����zs�grutpmmnokigea`[PGDFFDCD8( $*2:JX]\]]]]YYWVS4.9U`_a`^^]][\K(1:700100010//101//.-.04@JJKMNPQRVY\afmtx}�����������nljkkmkklke:&.10467<AIWenz�����kbbceilpu{��������xronnoqx�������������ƻ��ol���������s821121/-.26:;?BHR\kx���������zutls���xglsstuvttvwvuuutrqc3$9	%@Zf9
+! "(-22%5N�gJJJIFEBAA@B@@@=;;>>=:75546746;;9<<?A:A@<=;;?=<8;=;<<;BCEEEHESL�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̫�����������������������mZ`k���a\fkptx{}�����������������bMTTX\bfimoqtvuwz{wjvfWX_hkopsvz}���z`JOEJJPW\aehilmoqssuwy|~��fJSY��_\_bghkmnlqtuww{��}{������rU:*9FNNQUVXZ^bbjphO6<Tbltz{|��������w{��������|qo��������mp�������o|������������xwq}�{ux}~tyy~{z||{x{{|pz�soytk}|}ry|�twztt{}{q�y�mzxvvy|vsxvwkuwxlqptnnqspirrnjmouffesceimhegcgdd`feeaTEGIFDHIIKJGFCCCFKNONR\d^V]^_SbagU^cdY[]f\X_fcL71212220/////001333;BFIQbd[]]d[[chafemifddcdcba][^^_^Zu�SY[]\^a_]^`bcefdbcfhgfefffeghjlklllnpnorrssuvrrx���������ʜ����E6b}xvvvuuy���l'�ϱ���swx]jjkok\jmkkhdb_YMHEFHGFE?4% $(1?;K]_^^^`^][XXO%&7G\_baba``]Y@",6@522/001/...-/0/-.0/4@GHKKMMNRUWY^bipv{�����������olkkllkllml@*-.146;>@ETcoy�����pcbdfhkpuz��������zuqqppnw������������Ź�������������^7412211./03:;?CHR^lx�����������y{z��tflrrsuvsttvutturqmb/1oQ'	"$%&(-3;BC& .E�bHKJJD@><<89::8:7:899764213866998;:Fg`Z]_[T^\T_\TWRF]ES]SYLJCDI�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ĭ����������������������n^`j���dahmqty|}����������������bNUUY^aeimoqtvwxz|ya�jVV_fmqorv{}~����cKOEILPX]afhjlmoqssvzz|~�~hQMXy�d_`cfjkkonotwxz}���|�������[G"!5?MNRUW[[^dcjpoa:3O`ipx{{���������{���������xc�������i���������v���������������|{��npxz|pwy~xx}{yuzvty}�osy|wsy}s~z}x~}uy}z}}�wzts||}xww{zsxtyttsvqpovspptunlnriqikhigmidinh\ehgege]RIFGFFHIJMKIEEEDHMKPTOZ_`_]d]^^b\b\d`^[aaa`gc_bj\?5212320103563566458=CIW]YW^c[^ehbbblheccdddbb_]\\^__�qV[WY[acba`_`behhebdfgfgiihjjiklnooooqpprrrsssppv{���������������V5/Kr~wruz����:8�ş���~wvqicbd]Tgikieca^UIGFFHGHHFA0# &*6SJDQ\bcbba^][\[B#"1CQacdedca^T;&/;>51.000/.0-,/---//.4BGGJJLLNRTVX[`fmrx~������������smlllmnmmme>+-127:;>CGRany����occefimptz��������{wtttqrz��������������������������Q432322421037;>CIS`k{���������������xcZgnorttosuwutrssrlb)
+"2&
+	!&%&'*/78?L\Z=!   )<�ZFKKHE?===:79<7889::86778558888:9:<Fux_IqmcheRn^fkaAgNYaUXTO?AA����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ŀ������������������������lZ`c���e`gmrux{|���������������cPUTV\aekopqsuxzzzy^�oYX^fmpqtx{|����fIMGHLPT\cghklnpqtvvzy|~��lXHTb�j`_dfkklnqmtwx{}���{~�������`Q/2:IORVYZZ^bgjnqpD0BZgnuz|~��������|��������~op��������{r��������}{���������������~|xruzyzrxvzvz~yz||v~w{}}tuvw|~~uyz�u}|�zzt}~r}��ovypv{{qrzyvlvwvlqsumloulfpwqjiqsemikfeeld_ike[efgfbZMFGJGFIGIJLKGDCEGKMMQY]a]ZaaaX^bcXacgX\cf^\ae^[ag`RH411124137899::986219CHJVW]c\acieg`jhfdbbcc`ba_]\\^e�a[[VWX`dca_`_`cfhjeehjjjijjjikkloooqpqsqrtusttrnty�������f������w=8n|tux{���#\�Ǘ����{wl`]]^X[Ufgdb_\SKIHHHHHIIG>-!&,<RZTCLV[abbca_^^[7!! 9FXfegfdb^UF*$)2;90121120/-./.-.0//7EGGJJMLQTSVWY\afmtz�������������opoooomnnnf9*-148:<@CIS^qy�����oeddfjmosv{�������|yvutprw����������Ö~������������D631443444225;>CLVan}�����������ź����aO`jlprspouxvusssrnb&
+.Y5!
+
+86774<KU\ZY[_U-#%#! +eJEHFDFCA@@B>==;;<:<<:<>?;??BCMPE<<;LyzbNejghgQ`[RfkPNNMOJJJG?<:������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������nY_c���f\flptx{~~��������������iMUUW\bgjnprtuvzz|~j|t]X\flprrx{|����lLEIILOU]ejkkmmqruuvz{{}��z[JR[�q^adgjmmoqptwy{}��~}�������jW@/9FORUX[]abfioqt^36Pdntz~��������������������ya��������r|��������z����������������ztvwxtzx|swy~{wz}{szw{tyxxro|�zow{|uywsqw|zty|lvw}nvu{rsxvuptwpjsrtjolspljrmggjqgkdhchejefheiekffbWIDFGDFHIJLMKGGEEJJMMXWY\a[Ua^^T]a\VbbaR]_b\]`aYYab\X`L;20033369:<><;;94118BFKR[ecdcghldhdcbdedbaba_^\\^o�Y_\YWW\bec```abdfklggikjijlkklmmnoprrrttrtwvvwwrk{��������}:�������g)2?wxwx{��{$��ۘ���mxzuurndYRKPhfdb_YPKKJJIGJKJIE<("(1GX\fSJMKLRYbbdb`]R,% +CH\hjhgea[S@&&,5:410//1/--/.,---,/:DFGHIJLOQQTWY\_bhpx~�������������ppqqppqqrr[/*,147;=BEKQanx���~mjhhjlnpru{�������~yxttrz���������Ǽ������������~5566787676645:@EMVcp~�����������������qN`gknqrrrtvvuuurqm]&*#'
+'@Z_d`^idbbb`cd; "!==CMP@LNKLFLKKMOAJKHFI=QXVVKLHISS>>=ANNUSHAJSSOMDLJCFAAIFCCDCBA;������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mX^d���f\gmqsx{{|��������������lJWVX\`eimortwwxz|~txtbY_flpsrxzz���pR:IIMPV\bgjkmossuwwy{|~���mXZL�|bbdhloopssvxz}~��{������gYJ($:DOTVYZ]`cehnrqg>4D]jry���������������������ni���������r���������z����������������vvsvnvxwqsz{wuxzyoys{tzswqu~~vovxzr{v{ry|xyz{yzuyu{t{vzxwwqwuutlhwnpjumqqqnppnkdnkocdgpabgmg\gghdaQEFHGDBFGILMNKJFHJLLOVa[\]^YZd\ZYc^\[d[dWb]```]_^ba\^cbZU?41210387;===<;:5008?HIR`dd_deiffedbcdbaaa`^_^\\{rZh]YWVY_cfda``beglpokkmmmllnomonopppsrtutsuxywwvqy��������w0P���~uvm98%Xygt{|�G>��Ք��{bwocuvrmmg]_fdba]XQLLMLMNONMMLG8!")3NXg��[PNPPMJKUZ`]]C$$ >YD\jjjhd^[R3#).870,--/./-.,+-,+,.:CFFGHJJNORTWYZ\aekt|��������������ytspqrrrrsQ1,/148;@BGLR`mx����|iijjkknpqv{�������~ywwt������������¾���������q',68;:99:99979?DJUbp~�����������������jP`fglproqsttutsqojX#(C#%-4M&=R;	
+(<Xdkifce`cedee@"3@Ytj]`niVjcZ]laZeLOe<XgRfaLMKHO??=T]R\XZ[SLMOXMJKFLLZ@XNFJJNC@������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pX\\���j^fmrtwzz|~~�������������pLXVX^aekmpruwwxz|xxwa[`fkpsqwz|��ztX9MJLNU^bfklnprtvxx{~����sZ`Gw�cadhmopqssvz|~~���|�������{[Z85=LUVX[^^bdflrqoN58Yfow}����������������������e��������{}������������������������{wsxr{vtqvywuvzzyv|t|x�xzy||sxxwrzwrxx}ysv~{txzzovx|rry{tmvxvnnqwllnsklqvlhorlakmnefin]`dmdXgfe^TIFGHDCDFIKLIKKHHLLGOc\_\`]^^efX\bg\^egXacjX_dg\\^fbW`j`\]^L951..249<=>?=;;7214<DGN\c`fehgfdcbab`aa_`^^`]d�^iuc[[SS\_cecabcfghv}rmmoponqqpqrrrqruvtwxvvyywwzwt��������sM+@RKB,. 3%<b-e~~}(b������p]p[`qropnlihfc_^\WQRQPPPQQQRMMOF.  #+:SUm��h[XWWRNNKNOQV]8&$E��b[_ijid_YH/$,5;4//.-./-,-//-,./;DGFFHJMPQPTUXYZ__gpx~��������������gvwusssvssO.-/47;=@DGNVbp������vehfjlortw}����������}||��������Ķ�nu����������d1),6<><==>>>=;;CLVcq}�����������������aO\ejooqprstutsrsoo]"5���������*.::58WQfhkgUO\acecV?&42,$")?]siOSjobieUhiUd}]QkS]i][dVKEHC=><Oola\YhlS^gUkeVPQL`AKJBD>C?8�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɭ�����������������������q\\^��}k^fmrswzz|}}}��������|���qJUXW[afjmqrtvwyz|}ywyd\_fmrtsw{|���wsa8JHKOW\agilooqtvy{~�������`^I\�lbgjlprsssw{|�����}��������mbJ!/7FSVY[\`cfilpusd:1Lbnv|����������������������sq���������v��������������������������ujvu{qtrvuruzwszzzqux}utwzwqyzvotx{ntr|qoy{yhzz|hvvzpqtusousqmqntjmnqfjotjfmmefjikehehciegedhbWLEEGECBFHJKMLHJLLIKKOYdUZ]b[Y^b_Tbc_Y\aaZ_ceO_abQ\^``Ved^]`a\O?40/-169:<=;<=<51/3;AEJWdl`ehfdbcda_^``_]]^^n�Toqq_[VRX[^acbceeegz��unpqrrqsruvuvvvvvwwxxvxzyyyxvy�����ǟrnU7@LC630.E96;.p�l'��ݮ���jnuWg^aedighgba`_[XUTVVUUTUUUSQRPC%!)(-@Y[|�|k_\YVTSRNJJJGF?0)*#E��ȩo]^cd`WE)'0;@31.,-/,*+-,*+./<DEEFHHIMOQTTW[[^_els|�������������_'.7DVdpwvF2//58<ABDJQ[j{����ɹ�zo_jlnqt}������}�����������������������������Y:2%)9@CCBCCBB?;@IWdq~������������Ŀ���eL`flpqrnqpsttspqohY%x{ibtR>;G" EE>3
+>sD^eehYNRURX^R4)SVD44//'! %:[j|e^ikijlTdl\ZbiQFPJKIEAIHDF?<;<GIb^Y\S`YNQKFJREDC=@?>98542/�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɮ�����������������������nVVZ��oh_gmqtxyzz{||~}������|���sXUZY^bhkmpsrvvyz{|yvwgX]govuvw{~���}xi;IEJOV\bikmoorvx|~��������dPJN�q_ilmpqtttx|~���������������wfb,%4BTWY]accfjnquyvJ/B`mu|�����������������������k|���������~��������������������������hsrvhtowrqsurr|vtmvv}stuvttxsrpvuvptnwv}zuxu�wxrxrvuzsquxwmprujpqqkkkqqnlopeborajjregfpd]eeaXKEEGECEFHKKJLKKKLMILPY[`PabaY[_^\[d_\[a^c\bbbXhac\ea^cfjbbbe_]^YB93/.1578;;=<:730036>DEThZehecccccb`_^^^^__~x\lnaYe]VVXYZ_dcddeiw���roprtutuwyxxxzyzzzxywyz{{zzxu����Ȋ|y{t_L,-<[]V)/2f���U:��ѕ���~s\bUWVYNWgfdb_\WX]]]ZZYWZYYYWXSP> %%/F]j��ylc_YSTUSRMMNKH=3//5E�����ҽ�mTQVT9',4=80.-.,+*++*(+,-;DCEHFFILMPSTVYZ]adlpx�������������A$9[gR;3/26;ACFIOWdu�������ƴ��ncksy~z����|����������������������������PB=5'):GHHHIHGFCAFRcr�������������Ŀ���bLbhmppmmspstturrogT-B�`!:K0 @sM\fcbUKLMJR^O(9TA492/($&:OGPYQREEUTPJ>FLIBJGJKRJKJKULEB?J=C@@@AC>=C>=>;9;;:;9::9866630�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ŭ�����������������������r[UN||gh`glrttwz{z{|}}}~����x|��zbXY[_egjmpqruvy{y{xrsh[]hovwwxz~~����nAHDJOV[dhlnoqsvy~�������hRP;�~bhlorsuwxy|�����������������os?#1:LUZ^`ddejosv{�e5:Vhsy�����������������������|h���������}����������y���������������snowkwouvutswx{tsqyvywzvquyxpqsxqsu|opvsmw{opuvinsyojvyqeosselqrigltkfkqk^fpiQkllbeehcaf`UHDDFFDFGIIJKLJLLIHHJVW]Y]\ga^]d_[_ca[`b`\`cc^`cg]`ch[]cd_[`aa[_``PI>50--2589:;89730/04:BFMZjfdcabbbb_^^^]]^d�cijifWbdZWXZ[[\^^bghu��{hkptuvwwyzyyz||{||z|zz|{|{|ztq�����z���}~�W)APPO$#(IDMS(Y�ݴ��}w|}zql]ZYcOdhfca^YK_iqlfb`^_\]^]\\XR4"2IXp��uka[VRPTTQOMMIG8.636Cz��������c[G&!,8=3---,+,,+++*+0?CACFEHIKLPSTW[Z\_djnt|������������,$<><414:=ADHNV`p�����������Ǩ�mffqvz���~����������������������������NGD@8(%;LMNOOMNLLGJ\r~������������ƿ���[RchlnonlprssssrqqmV)^�^?D;--,=6(BmafpTQTPMKJJOF"#'?b]]c`bd\RQ\[gW[OMUZf@WTTMMVUKEAODBCBBA?A@A@@><;;98:;;;9776510�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������®�����������������������r^UHypahahlrtvxx{yyzy{|~���}x��eZ\^`dhlprstwyz|{|zosi^`hrwxwx{}������uI@IHOV\cimpqstvz}���������rTJ;w�gfjpuvwy|||�����������������|rU$*5FUZ]aegglptvzy;6Mfpy����������ɮ������������gm���������r����������|���������������{rtsyqruxurtyysqrxrqtxsjq|vlptxlprtjmwwojtwylptyikkwlmuuhfqorhpookjiqejnokdkjhdrjgakgfge\QHDEDDDEFHKKIIKIKJIJKT_\^W[_bWZ^d\Xbd`Z]a`XacaUc`cZccc^add_`__d^da`ZeRB82,-02368898310//37@CMcfdabbba`_^]^\\]k�]{�f_Zh`\\\\\[X[]ahiq��geipsuvxz||zz{}}~}~�~|}~~~}vg{����|������t=I@Ij\E<6&('$��؍��gkuzzttrlkg]fhfa^\WMbESfmnmjgeacbaa`_R##4M\{�~rh^URSSRSOMKHHE?9558M~���������˺���\C2D=0.,,,,,++-+,3AABDEHHIKLNQRVYX]^ejmqx������������"(:??:645:?DGNWao������������Һ�}fl�m������������������������������wMLKIF<("6PVWUVV[YWUUh{������������ƿ���YUafloonnpquutusssqR2Il�ZB[K%?6$
+#4+#)9_coxUHSRMMMIF;#!)?Yijgb^omJdoWfrfPU[QcENNKGGCF><>DFDEEDAA@@??>?CQFCVC;:::887531�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ý���������������������������������������������������������������������������ľ������������������������p_P;phXg^fkosuuwxxyzyzz||}����v��~bZ]]afilnruwyyxz{}zovl\biswywxz}������zSBKEOW\bhnopruxz}���������}VB>a�hilqtwxx}�}�������������������;.>WZ^`ehilnrvy|�Z9:anv����������Ť������������]����������a���������ss���������������loqsnqruropwuportnqsvristsopquhpntowwrlpvuvosmtnqntqsyronqlpmwjloodjksohjjpdfipeceoieg\OBBHDCCGGFGKKIIJIHDCMWSZZZOZXbX\^b\[cb`\`^_afb`_g^a`ia``ed``c_\`dc\[`fXSF;2-+,.1457653211/15>H^dbaaa```_]\[Z[^x~\RlgYVa\ba]]\\[\^`ddn��dimoqrrquwwyzy}~~�����~���~zji��ƍ�������HYP\g^jRObCG:>�Ʈ���oanhsrtpolkigfcb^[M`b,/106IfsqkgfffeddM#4Ld�zocYUWSSTRNKHGE@?<655T����������ü������l?/.,,+*))***-3?BACDEEHJLMPRVWY\_dhlrw~����������v(%+>@A><976<AHNXdq���������������������~~����������������������������lOSROMLF3$/Laa^a_`acb_k�����������Ǿ���VYbflpqomqstrstqqrnLF��KTs*YQ.
+.%!3OWouTKOTQKKQE8 !+=KXed`bZ`fZYVLMWPGFEDCC<===;:6:=<@CEEDBB@@????A�dG�J<:98765320�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ú���������������������������������������������������������������������������¾������������������������t^N4ndWh`fjkoqtuwvuuvxyz}|~���t~�|gX]]aghlosuwwyz{}~|wxp\bhrvzxz{~�������^:HDLV]dlnoqsuy|����������bF@N�mconuxx{}�~��������������������_2;PZ]bgjjnptxx|u=:Wmu~����������ѿ������������vT���������ci���������<~��������������oipmrtrqrsrtnpqsntttqqsqrpqlrowmspxtnr{vlrpxjrquklrytjosrilpsdiooceptnbhkj`fei`behfbVIEDFEDEGGFGIIKJIIGEOXXaY]]a\jVead_^^gc\`cbZabdZafe[^ag\]bf^]`e^[`c^Y_eaTZWJ>4.++,/255343////05Gddbabcb`__^]\\\c�m\]khZ_[[bb_]\^^]]]`ct�uckrqrrrrsttuvxz}|~������������ud��ǅ������OS�pp]Ro\D^BD)lد���|n`aehdgmmeakifc_]ZOpL..,*'$(@fksqmmjhhi< $6Mj�uf^WSOQRNPLIFED?==720d����������Ŀ������a5.-,,,,+++++-6@CCCEDGHIMNORTWY\^dhlqvz����������X*#-?CACB?=::>DLXer���������������ſ���Lr|���������������]W`p���������aMUWWUURM?)#C]fhhjllmlmq�����������ǿ���XXbgmqrnpqturttqsqhJ>�<4&9.!
+($9aennWYXVSOKOD: ,<CD@FGEGCA@>AAB=@A==<=<<;<<98:::;>CEDDCB@??>@=:|WA�@;:98644230�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ų�����������������������s`N/nbTf`bfjknpqqpsvuxwyz|~���s|��hV\_aehmquuvxyzz|}~xypcbgrvyy{}��������g9LHNU]floruvxz~�����������lOD4��dmquyyz~����������������������},-7IZ^bhlnnptwx|��Q.Fhu}�����������ҳ������������io���������P���������6@��������@�����~qnnstnlquqmtvsnoquqnourlppqfopuhosxpipsodqow_pmrgmssmkmkmflmldpkldeinlehmg`e^jcdggcUKBADEADEGHGIJIJKIFGMa_Y\`_W\\cX`aeZ^_g^X`c\Xa\aXdfd\`bd]`ad`^a_]`ef\Zc]Z\d^ZK>3/,+,-.1113321/-.Igccbaa`^]\\[[Z]d�`ecjhSlXYba]]ZZ[[]^af�nhmnpqrssuutvvyvxxx|~����������|q���������Z^y��sM[QjWZGD'�ԑ���xsf`Y`YX^TWjifc_]VNl8/-,)*(&$$(<��|sokm_$!%7No�|odYVRPPONOMJGFEA?>53;n������������������N1-*+-+,*****,6?ABABCEGHKMNQVUX^`dhkouz���������<"#2CEFGGEC><<BJUbt��������������������6Sx����������������������������_UTW]^[ZYVJ2/Qkotrrtvwy~����������ƽ���TXdhorqoprtwutvusqoJ
+
+
+-<$-QL1
+42$#&?xzh^[ZTUTRONE: /=CEGGEFFEDCBCBABBB@@???>=<<;;<9:;?EDDDCA@>?@A==�UG{::::877421/�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������α�����������������������waN*jbSjaafjklnpqstswww{wv|���x���kQ]acejortvyz{{|}~�xxtgcksy|}}��������q;NGLV^emrvxxz|�����������vT@8��lnsw}~~������������������������M!/9\`dhlnpsvyx{�i69^rz������������̣������������u{�������{HS��seYZWT3L�~_VRSEv����oqnpqqhissmktspjoppjllvqiqnphsnrkrsspqplqkxgzhzkonwpoooomninknouihhnhjkkkgggj^kffe^PEBDEACFGFFIHHIJIKHGO[b\P]]^V[_cX`ccW^`b\\caX[gadakcb^ebdag_^dd`\chd^YdfV\cgVZ][A8/**++-/--/13321Pdcbcba`^]Z[\[Z^sy>MthcUZX\ba[YXYZ[]^`j��jjooqrtw}{��~}yuruvx{���������z�Ğ�����ZXz����pWQroOD7Eѽ������}wqg`[YbOXekgc^YP]a42,)()&#&"-Bi��~sj7  ##9Vr|qc[TQRRNNPLKKHHHE?:26I������������¾����w;0,+++++**)')-7@@@?ACEFJKKNRUW\_bcglrvz}���������&!=FGHHHKKFC?AGTev���������������ľ��hDZdy���������������������������]ZZ[^ccbb^[XF2&>cyvwz|������������ǽ���U]bhppompqrwsuutsomA
+4R*E>#/$ 		$BC/"?uucYYVUSSROKD8"8CEGHIFGGGFBBEDCEC:@AA@AB@=<<<<:;=@CDCDBA@??@@=A�QKvA;::986542.�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ʋ�����������������������tdM"ftps[[_dffkggjjq|������������qU^`cfjnqsuxy{~~�{yucdmt{~~����������wA?ENW^gotvxy|~�������������Z==f�prvz}}������������������������| *2Scdgjortuyyy}}I5Oqx�����������ְ�������������>mRI@:9595!I.224<?=(#^=06571^����jprsmnmtuqksvookvqmjusrrqtlpoumolwooquohlrwgqn{gkovmioqkdjlmhnonfegpcbhsjWgkk_fcf^NHDBCBCEFGGFGHHIHHGGP]_ab^^[[Wa\_Zb``]ea\agb]]bb^biiZ]dg\[ce[Y`d^[ac_[]`b[]a_V^ZaPF6,))')++,-.2328[bbba`_`^[Z[\ZX]~aTUZn\U[SWTZ[YVWXZZ[_o�{hkqssx����������}uuvv{}����������������dF�y����oUIothM.cۢ�����{yvplcaAQhfdb]XHkR3.-+)%! 9XaR8("!"9\oxi`e�bHINOMKJJHFED=90:W������������������e5/-.+****+*)+.9@A?@ACCFHJLMQSU\^achmsvx}���������*EGJLMNNNLKFBDPcu���rVp���������ļ��JPvuuqt}����������kU]j����������`\]belpndgdc^P<)9Vq}�������������ż���O[chopomqorvsqsrrpj?
+
+W�i.Dp
+1fN)	
++1+2*Gva\[WTSTRRJA3->DHHGGGFEGFDCDDEO�`D?@>?@=>><<<9<=?CEBBAA@@A@?=D�EVr89:9976432/�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������³�����������������������wjQ%g��ȿ�������������������������qY`behlpruwy{|~���|zxfdmw|~~����������}M:DJV_iouxxz|�������������c<<S�wqvz~���������������������������/&-Ecfkkqtuxz{{~��`*9fv��������������б���������ʡR128:9<=0 B6888:;:''bA99882j����inrrkknuqnlurhmkrikntkfqrrinmlclktjmpsgamlvbpkpcgjrlhnpmeijidokpbcfmccimi^iehef`TIEDDCACEFHIHGHIHGEFFR]g`[\a\TY\aU\Xb[Z`gYZae]Tae_\cfb\ccf`bdb]^`eb`ba`^_^caa`]\`X`\cK8.*((''**,034Daabb```]\[Z[^b_czVWWPYZX^\`XX]ZUXZYX]dv�pflrtw~������������~vw|����������z}�����V;Bw�����]LVtigN"'�ڋ�������{ttqmidZffda_UIk4-.*)("&41%"$!";^qsf^X����SNLMLLIGEC=83<e������������������Q2-,-+((*)*+**1;?A@?@BCEFHLLNSUZ]bejmrxz|�������e0KILNPRSSSTOIEM_t���belj{�����������:]y~���pY�����|�������������������{ku}��wjghjidN:0Jn}��������������ż���S\chmponrqrvwuspsrg<EA%D7' -'	#(=@3*H|rbZYVSRPMHA:07CFHIHGFECEEEEECBD�^?@B>?@<>=<:::>=@DEBCA@@A>>>?E�<]r=;99866420,������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wjQ&l�����������������������������uZ`ceilpsvw{|}}�������gdrz}������������~X8GJU^hov{{{~��������������e:7C�{jwz����������������������������4#0<Zfkopsuwz|}~��{TR_w~������������������������ĭU347778;-=3777:<:)#]A89764g����hmoqhllopmispipmojnmnjmrlnkojljqgqjtqmkmsjpjtllhmiprqqkkjkfgi|coei`hemkgfgnffd^QIEBABABEEFHHIJHIGEENT[]e\W]g\RZZ_T`\c[^aeVdcb\^cc^^gb_ajaabec]`f``aea]aa^V^b`ZY^_T^[_SM8-+(%&&(+/17M`abb`__\Z[\\]{gaRWYVVPYb_`[XYVWWYYZai��lkpv�������uw}������zxz���������sVR`jrvqBtaAhrkS9$%)q_aU2������������}vsmlljhdaa\QY_25+)(&-CB/"%.!(Gcqoe[r|���pLNNIIIGCA955Fz�����������������u<0-,+))')())*)1=@>??@BBCEGJKNSWX]bfjmrvz{��������L>PQPSUTYYX[XRKM^w���MH_��������;��[3k}�����m���v{������������������ұox�����umnoje]G3Jh}�������������Ż���Q[bkmonrooqusstqsqc9AjHeP]$
+0\=	#.452*IgVRIIGEGGB82*9CFHHGGGGEEFDDECDE�P>AA@=>>@=;;9;A=@CDACAA@@=??>K�8`j:;::875531/����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ſ������������������������xlU#k�������������¿��������������{_dffilquyy|~�������kdpx|�������������^0GIR^iqx|~~��������������n2/6���q|���������������������������=9>Rcoqvxz~|}����}WIl������������������������ŮP3467689-<35679:<+$_?7875/d����nvnqlqmmpvoopqsjortlinspknpqdjosgonxkfmsqhlosgijphkrtm_gklafmr\dhi`eimc\ggjed]PIDABA?DGFHHGHJJIFDEOY[^adbac_Y[a[_ai_\\e`_`h_^_da]_bc]_da[^ac^[`f]\Zc`[]`^Z_`\V[_aU[Y\S`Q@/*'('&)-2<[baba`^[ZZ[ZZ_��gQUXYXUNJV^a_^VUWWXY\cp��kkt~���|x���}|{z�����}}���������dATcpz��pT^Q4-+#.QqQX��AQ�������um�}��|pjmkfdb`[IeOHwZ;&"4TojB2<C-Tksjc`����}RMJMIIIDE@805U������������¾����i5/.,*)*)())(*,4>@??@ABBCFGKLORTX]afjnsvz{��������1$S[VXY\\^__c`\VM\x����gYb�ɪ�yVKp���L9iw|�����j���iy���[m�����������b���}o�������ovvsqk`GIx�������������Ǻ���OZbjmnnpoqruvtsrpog5f���nN
+,;6 	*024&O�`CPGHFFJMHB:+  #=DGIHGHGGGDDEDDDEM�M:?@@?>?>=;99;?=?CECBCA@@=<=;Z�Brq@<<798653/.������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ymT"k�������������¿��������������{cefgjnswz|~~���������nemw~�������������\+;=J`fmu~�}~����~�{ywtnd\WG1',��̍{z���������������������ƾ����?A@KR^bda\[XQUKZ_M7.7b�������������������������ñL5535686*934479::+#bC89753d����rsionsqlltpknrqhmoqgfmsoghnmahpqahltfdmqo`jirYhioaerojafkj`fknchcf`fff_ahdeaUKECB@?@CEHIGEGGFFFDIV]]`f`\af^XY\[V[]`ZZ_e[[^dYX_a[W^aa\_^aX`_c^[bcZ^^bb^^`]Z_]\\`\]X_W][fXTC1,*&%'+2E`^``_^]ZXXYY[\ggKRVYXYWRKR\ba^\ZYXYY^fu�wimz����q��̶|wzzz|��������������U;c`z����~s��������uA�0 ������}oZUytrwzyyphecb\YD`7S|]d[:5\qz�xrvD'Rqog^g�����FLORGIGFB?85<h��������İ��¼����O10/+)**''))&).7=>>>AAABBEHJJMQUW\aeintwy|��������%*T{�rigefhijigb[\v����Ѹ�������NM~��@Edpw�����d���gz����xu���������������p|������y|}}|zwo^cwqqjhjiimv}��´���U]cllpoprprussrrpok2
+
+C0
+'	
+!)(R�g^ZUTXYZUOF@.$#4?EGFGGGGGHDCEDCDDP�J>???>>=;<:8:;>>@CCCDD@>@?===GXBEF?98786630..�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˵�����������������������znSg�������������¿��������������|egjjlqvy{~�����������fkt{�����������~�Q'Dfl^__ac]^^\ZPMMKJHICIB?<8(���������������������������������C*bLVCD<...10+($,0/)'3^��������zwx}�������������óH1126798,8466799:,%dB:9974l����speolpomnupimqnflkrehlnnhiklbllnelnrikmnngnjq`mjlgnojjgjilfkhken_fbheecfib`TKE@BB>@CEGGGEFGGFBEKY^Y\\e]Z\c]ZYZ[X][`]]^bYaccb`_a\^abbad]a`f_a`e_[[gb`baa]\^a\\_cW[[aZW[aWURK3+(&'+3Ud^a`^[YXWWWXYWRHOVXWYYWSOQX_bba]YZZ\`gz�oinx���sv�����prrlw}�������������LCZ`���������������|:x0�������qc_[dwiksryvrjea^RM\07Xr�}�}D4!:^p{�����?Btqe]~����hIMLKJHHDA=37Ft��ò����Ǡ������}@./+**)(''))'+-7>?=>@@A@ADFIKNPTX]bfjpuxz~��������"3Qmuwjnrxwqqsqjbq������Ķ����xG9^��3c}x{kdlouhntgx��|q������������������r{��������������~wfDLWWZ[\[[]l�âzvqX\bilooqrqstrssonlb+
+
+"ZVSamq
+"(#Y�gb`WSTUPJH?>, (' A@DFGGFFGFGCDBCCB@R�B???>=?=;;:8:<@>@CBBCB@?A@>?<>=<:;:87796643/.�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˶�����������������������xmX c�����������������������������{\gmostw{�������������lnprrvwxzzyywph[gPK�zXXWQLHGDB>>:85541/0+,43*���������������������������������HD�beOO=/.+-,($$##'$ %:��?8.%"F��������������F4256576-423479;<* b@9:;91f����pqlsjppoosrkpllkohoiqljopojmksmnmqmkjpmgkpnbhhpgimtjejnjbfhlehin_adjccfcd\PHE@?B@AACFHEFFFGFC@GW\_\^]_\__``a_ZZ^_Z]^dY`bkb]cb_[\ha`cee\`ab]]_cXX`h\\bb`W\]_W\]_UYY`[XX_YTTZM</,().V^^__^ZYWSUVWXVUONUXWWWVUVRU[__`\YWX]en��lhnrx|xs�����{jms^ty}�����������w?Q[elmtv�����������Gh^N����{n``qoRa=Lolklopk^]FWP./,)0Lis{ulOc-Lcw������9Evq`]����UHLLKKIHC>938U��������Ƿ��������b2/+()*)('))))+.9??<?>@A@@CDGJPRRZ^bfkpsx{��������c.5797CFPfonrssqlq�������������ܚs��8q�|����zz}�������cTW`|�������������ww�����|��{xuvxyzz{qJANSUWX[]_j�Ǟz�wS[cjlooprqtsrtqoom_&
+d�zrsU
+	 66]{d\IIJFA???=A',-+QDFGGFGGGGEEECCDDCV�>@@A>>>=>;989=C?BDBBCA??@?>><<:;;9:;:8655421-������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yo[%^�����������������������������~alrtw{~����������������~zyxvwwwtutsmd^_N2�~78.'"'+),*+,**./0144AW@"���������������������������������G,�g`X[7/,'&$"!!2$(R��������������C4557755.4323688:,b@;=;90g����inpulmpupipsrjkkrmmfpg_lsnginrfinoifiljYlliUjim`clsgfinhbcfjeeei[aciecd`WMEA?@@AABCDHGDDEFFEEK[i^__b^^`eb]`f`VX^_X\^b[\_g]W`b^Z]c`W_ab\[^cY[\cY\bdV]c`\X[]aZ]Z^\\X_[[V[VYVUYYA4,*3]^^^^\ZZVMMQVWVVICRWWUUVX\\YZ^a_ZZYZ`hr�}jkostuur����wpiltovxx�����������c=]bltl_fju~��������~PH3 x��oofaXc}uB9)>hlnkemj^\Fa;..68*%M\���fP-:iu�������&Pol[l����{NQKHLLKFC@71:h��������ǲ��������M-,*()*)'&'''(*/:>>?><>A?ACEILNQQZ_afjosv{��������N(:?#"-4;=;=:=C:@?CUap|���������������깇:u�����������������������������������������]FYWZ]^X^`^YL<BOSUVXZ]co�ƛ~�wX\cjmoopqqsvsuqppn^(%4P'23%ftf]QJJMJMHJED% =dIHJHGGGHEDEFDCCB>_�;@A?>===><;::>?;@BBBBDA?@@@=>=;<:9:9:977422.-������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zp\"\�����������������������������{iu|���������������wh���zwuttpommnkfaYUK'��=@40/59<=@CDHFJJLGB@U�D%���������������������������ÿ����EJK<35+'&# %'&N������������÷F0346686,62135989+e@QqL94k����`morgjorlimpncgerjpdjcgnpkcjln`olnkhgilfojk`qinekjkjljjecfbhgjdecgdhgcbVIA?=@???@DFGFFDBBC@BNV]aR[`dUY\d_Z]^[XY^]W^]^W_^f\^a^\]^bc\c__]]^b`d`cZe`^^hb[\a]Z`c`Y]]]U_Y`SXX[RRWYTE9/B^\\][YWXVSOKOSUUPKJPWWUWXbb___a^Z[[]dix�rijorssrx���gVffcsptrq{����������O>agprom^^Tghq{������`1 /���{ZIV^[n�vE:DQ_fkigjd\UK\2-,1Gcq=*Eh\zT#Fq������\1dpa_v����^Ebz_JEGFA;51Iq�������ʷ��������t=.+*(*)''&'&%(+0;==>=<>@?@CFIILRUZ]afkosw|��������C! .@>=(!!%15BIG@42<?@<<@V�����������������ҷw@z�����������{���������������������������hN:<9DKFDB@@:2=JPRVW[\co�Ös�qT\fjmnnpnqturrropm^#
+		Qt%/1(juea^YXXXWSRJC#VnIHIJFGGHEDDDDAACDm�>C?>@==<><;:9=<?BBCCCDA>???==>==:8:9:967430--������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{n]%T�����������������������������v�����������������zo6>��|vrnlmjjkijie`VUJ#fwUWYURPMKKJGGFGE??<532D. ���������������������������������DB@90-(#!&&F������������øE1455677.6233788:2$hA^�T71i����hpnnlnlnhmonnmnhopthhjnpklkohnmwikjmiempocllofjgmhilmicdfj_cho[`fhfec]RC?>==>?A?@CEGEEEC@AGPX\[aS_``Tbb^_`cZ[\^]^^c^^^e`bacb\_a`^_baZ`ba[_ab[]^c]Z]e]XZ`ZV_c]R[\[QXV_WYW[WYY[YUP?P][\\[YXVRNRPJIOTTSNBPWWWYabaaa``_^^bgl��kilprtsr���p__�5\npmTr����������<Mdktux{trei^nofr~��zf"E���{jceky�wNJLLQ[dgijc`IXI,-;F%)a��o?%@$-Ur�������1Fsm_f�����VQq����LF?938Y��������ʷ��������_6.+*(((&&&&''(*5>===<<>?A@ADFHLPTZ]agmquy}��������)!&38Qu���sI4<>AC^�����������������ñ_D�}�����������n�����������������~m���������yWBEFCDB?A=80&3=DAJMQXbr���|�nT^fionnqnpssqtqpom^ 
+
+	%7*320fmbcaYYXXTURJA hmFJJJEFGHHFEEDCACIt�CA?>?>=<=;:;8<=?BBCCDCBA?>=<=><::9999986331.+�����������������������������ž����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƺ�����������������������{lZ&P�����������������������������\uoQNMH;6/+)#!" 6��zsojklhiihhhea[UI@UBAC?:10.0/+(%%%##!!$3/!���������������������������������B6=:0,&$!%&BMNS`mw="H������������ĽH1345476/4124689:1%hDa�J83l���osjlmpkjismfmpmbjmofckqmclpo]glnfjfnhbhnj[nlkeeekdfhke[gfc]ceo^b`edb^PC<=>=>??CCAABEDCCADHTYbd\`^e``[j^[`b^Y[\]W[^aV\_eZX`e[Yac_Y\a`Y_]_X^``Y`a`\]^b]\]_[Zd`[X\ZYXaU]Y^XWY_XVY^YT[]\\\XVWUPHMPQFFNSVRHHOWYY_adedababbfiq��kjlpqrtt�Ě���i�i^wuuw���������o8Zdksuwyy{��wutfgmytnX o�����wou}�tDKKIIO\hkh`ZB^?,*D�qG0Y�V+"=e{������m,Zqe^g����wFWv����r>?73:e���ɶ��ģ���������H/+**(%&&&$%&&&+8><==>==?A?BCCHLOTY\ahmqux}�������|+5Dc����޼d9>@Ii�c�yFZq������������T@vx{���������cu�����������������mkx�������������zoj`]UO:/575;;:>I\s�ŕt�kQ\gkonnrpprrssqromX%<!/8]/
+"-/0moa`^YZ]WPQOJ>rlCJKMHGGGFEEEDCB@@KP@B>><<==<<9;9=>AEBCBCB@A??>>==:;:988777533/.*�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̺�����������������������{n_$M�����������������������������L/!5��ysnkklsghfgefb\UN&4)$'&#  %&$'&)*,--006C=$ }��������������������������������D3?;2-&$!'@������~@"F��Ի��������ĹF2344276/31457679-&d@c�P:3i����hpgllojkjlhdonhdjnnefmlgelmk]mol`iamdfgllaohmgdbjgiegddi_abfcgehaa_XMA<>?><>>@BCA@@BB@AERWS\bbV\`aOY`c[[`^XY\\[T[``W`__[]\e_]``\]^aaacZa\b^][f]^^`^]^b]Y]ccY[[\W\_]QY[]UTV[WTVZXX\[[[ZXVUSPPJDOOIFMSUVMBKZY]^effdbccehku�{hjmppqtt�����Y6a�n]twx|���������U9`fuuuurx{���~�yps||}l"2�������{y{�u:DEGIJB\md]UIZ0,,;��r[4*$Kj�������D3npc\i��z�mM_�����L?<33Fr�������ǝ��������s9.)))(&&&%%&%%'.9>=<<=<<?@?@@EHKPUW\bhmruz��������_$/6Ar�ŕ��ؚ;?AJw�a�lDNk���������Ⱦ�BNglw~xtsjknjho��������������������u������y������������[7BTRSTFDE^t�����fU^hnpopqqqutosrqooX.���������<	&%"!#"!$3+3uk`_YXSTOLGFB=rdHJLMKJHGFGFCCCDB@A>@>?>;=>>=<:;:<?ADBBBBA@@@>?>===><::99876220.+�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȹ�����������������������{ob%L���������˶������������������O"0��z��xip�pemirl_ZVLBPC<?>::;=>=@CBDEDA@?;<B;&!y��������������������������¿����I5>90-('#%$3H���c!#H��ґ&"$&(%Y�ĸ@3345367.11147898.$hA`�U85l����eommkmmpihjounikpkncnlgejnjihnjmimhlhpkhljnhligagelfdeki[aeg^bfhb_VE=?A@<<>>?AB@?>?>?@FJ^^X[a^U`^_U_a_Z^_^\ac]^Y``_\e\[^d^^_a`\^ca^bebX_abZ^_dY]_b\V\_\WZ`_X[YZQ[[]UYZZSVUUTUW[YZ\Z[YXWVWPGOPJGGOLJMRVSL=MV\]bdfcbccfhk|�qghnppqtu�����M.6pllmq}���������DDJHhpxv`aev��{~~}|}}�bQ������������sL;;BPYRQca\KNQ.**.=Vrq�lF6D,Yo������1Mwl_d�����QPe�����9>804Q��������ɰ��������c4,*+)*(''(''&%(19<=<;;;<=>>?@CGKPTY^chmrtz��������H#15N�Ү/H�Ϡ;@CR��g�qKTe���ӯ�had���2dzv���{{|����|����������������������������X������������Y1;HQSXXWXct�����bUahqqpoqoossrssrpoT
+	 T@70(!	#! &&<wj_PDKIIEHA=A;"ydIJKKIIHGEDDCCAA@BA?>>@>;=<==<;:9;=BCCBCCCA?@>=>>=;=<9:989:632.-,������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|pc%H���������ˮ������������������O 0�����}���������b]VPSjaSMHCBBBB@?<<7882+)(+22&"w������������������~�����������B4<930*'#%&^��JV��;H��ѡi|H-����÷>4345347.500689:7."d@h�\;7j����blpqjkpsiglppjilpbmbsgbhklcjlmejgldgilgcmjjaigeYdcjfbdhaXabf_ab`\SB<<>?==>>>?>?>>===?FVZ^a`^_\]e^a`ha_Zc`\_f]V[\`Y]_dUZ_cWYca[V^b^[``^Y\\[U```Y`]`[[[_`[]Z][][YV]Z[Z\VTU[RUX[YXY\[[[WUTWVSMIIQMDHNPKIRUTMFP\_aafcccefjn��lilsqrrs|���}~>D`jkigjm}��������t6M&")2K^ioe`\lsw�~~}}A~�{�����xwu�ylXRSagikd_[DZ?+(%%! #?aj�w�K9^u������e/hsgZc����rRSw����oB=729T��������ŵ��������Y6100...++,,*)(+4<<>?==<===>@@CGKOSZ^bhmrw{��������.%5:X�ؖ/U�ҏ1@C[��}�ɢ��������{Y^n�8s�����������|����������������������������oktsrpu~~xqE2<88>ET]_fw���{�`TcimnnoqpptstrqrqmQ
+
+
+
+	&.)! :uj^NJJMKKMLFE:%�aLLJIGFFFEDCCBABABA@>>@?>=<=<::;;=?BDECBACA@@@??>;:;:9:9699633/.,������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pd$C���������̰��xzyomi����������U !!0�|������������c\SM,?@9/+*(&'#$$####"! %19%s�����������������a��������������A2;:41,%!$;��05hII����ѡ2$L���ø@234<=:5.324;C9890$fANdE>8j����flnngjmoegjkpjfim^neibejglcolianhk`fgfabmgf`pgf]j`ihdddc`cada`^WKB<<===??>>@>=<=<=>>EVd\X]c_Z^``U]`a\YYd^Z\aVT]Z\V]\aU^]`Z^e^[_a`^`b]]^_[]]e^]^aY[^aZW``^WY`[VX\\VY[^QRV]RTVYUSZ][ZXVTUVRNNIDHMRKKPPILPUWPKX`bacbdcehkp��iilppprqw����qXe|tttprv|��������`7L!):Wempmdz}~~}s*'������{yxrjfhl{|sokgjic]UBS1)%"%:`ik"Lg������?:wod]q����tP_�����HB<54;^��������¤�������{H;;99:9867765434:>@@A@>?B@?AAACEHNUZ\bilqw}��������()59T�ݜ?l��t8>C`�y�����ž�����eQ[s�j@v������������{��{fz�����������������������~yropmnmogjbV9;BDCGGR\_bu���oxVN_jmompqrrttutsromK
+
+		*2, :uja[\YY[^VTPI:,�_KJHJHGGGEDCDEBCB@A@?=?@>=><;<<<<?ADCBBBBC@AA@???<:::::96765320-*�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rd$A���������ͱ������������������V " &'"#%3}�vzwtqlklmnijha\QK)3('$###%$&*,-00344458:@B&q�����������������k�ؘ�����������>2=961*"#(?  H��Ҿ� H�B&��»?127FE:63645CK;891#hD\nG<8m����hnkmjnnmelkkoljimgvkfglmfnmmeigvgkdmdffmmbgdqcfdobgfgcbedeab``VI=;=><;>??>=>?=;:;>A>T^gVT]_[W^\[RaZ^X]^b_b_^X]`Y^^cZ`]e\]^c`[^b_[_ebY\^^Y]acYZ\`XY[bZS]`\XW_ZSYZZS[[\PUWYUVWWXW\\ZXXVUWVQEEJLFCPQMHMOKIQXXQPZ`abcccgikw�w{�{~qprrr|��ultkywwvwww}��������E@C $9Man�����c>�������}tqiee]pyvkhlj`\JBH.'$!#*.[t������|-[zk_Y�����LJl����uAD:58Ka��������¸�������i>><;;;==<=>><>>?@@@@BBACECBDCCDDELUY\chlsx��������{!+5<M���g���S:?Dp�b�����������˓r[s�YG|}����������hv����}jm�����������dp���������������xtnhQ?FRYUYYZ]_dy��{�RPajmnoppruutvsrromH		14.Bukd``]^a\VUPH8.yRJLIJIGFFDDCDDBDCADA@?????=;<=<;9;=DB@ACBA@??A??==;;::998767532.)�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ĺ�����������������������~qa%:���������ʯ������������������Z )""+2/*%/}�vsoojmpljkihg`\SJ!EbUKAAB?ACCGEDEEGB><;688<$f�������������ˬ����Ȉ�����������9*<969C965857730/+!D��ҙ!Y�׸`R���C239GF84/646DG>8=3#gAguG<5m�~��jqhkntnjgnmjknldliqe`fmibkmlajkqacan_djkf^jdoYd_j^cgh]^eae^]_XE;8:<::<>>=<<<<=;=@AMS`^`S\[]]\_Z`]f^a[e``_d\[\b`X\_cX\_dZ]]b\[_a\X^d\T]_]X^__X[Y_Z]]_Y[]^[\]ZWY]XXW]XYPZVWWYVUWY]ZYXUTUWVTOIHLNJJMONJMPJLUXYRT]_bbcehhk�o��k��xsrsr}efpewwxwyxy~�������}8O+%/G]r��Lh�~{������usqrlbeipponf_`BM>,&:a|������d)luh[Sn����OOwvt��cA@86=a��������ª��������\A?<<==>???@A@A?@AAA?@CCDDDEFEEFDEHNW]cimsz��������a*17Ee���G�י?>AJ��Z�������������Ȳ��JO�|����������^p������������������aqx�������������������oAJRY[]^\\_by��p��NUdknmnpoqtwtvtsrpjF			 $#	$$%L|hb`[WY[WQMKA95ySMLJKKHHGDCDDBBDCAB@>@?>>?====<::<<CEBCCBA@????>==<<:9:87767531-+�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʻ������������������������rb)8���������˳������������������^,>41?>6( ,w����xj���fkchhd\UM =e[NFJD<<=;96350.,'!" %,4$c������������ͻ��Яuv������������7'@97L[;?;5/1A'!- "(29% I��Ԓ_���Ⱦ����C526MF85.25:JK?:<4 dBg�R=7j����plcjkthggnnhlmman`maedjhdkgj_mdk_h]i`fiif^k^l\g_jbfde_egab^XPB:78<:8<?><<;:<;<>@DLY\eZY[e^[`abZ`bcV]adYW]_WZ\_\Y]\`T\^bX]^aX\`aa]_a^Xa]ZX`^\[_Z]]_[[\_][Z[ZSV]\TWXYUTSYRUWYRTWY\VVTSSTUTTSTNEHPOKJNMINUOOT\]TX_bceehkq��q�sk|��qswurromtotxxywx{�������j2[ !*@N(���������ywtkkeahpyua_Z>J0'#"Ng}������BDxqeYSQSR`eLT����tYC>73Cq�������ʾ��������xH@>=???>?A?@BCCAABCA>@DEFEDFIGHGFIGJQ[cjpv}��������K !  )66D�ܭf/`qK;@@Q��R�����������������4R�z���������gu��������������������z�������������������u;NUZ\]^]]`b�ǳv��ISfjopprptuvtvsssriD
+	
+)	"' ,3-IxaWTPNPRLBBD;0#aU2$ B�MMLIJJGHFEFEDBCEDBB??>?@==>>=<;:;;?DDCEDCCB@@?>>><;:99:9686564/,)�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȹ������������������������rc*1���������ʵ������������������`'8Q<&?\F17)(s��ƚ|����������`TN #93.$+#"$ """"!!"""""%+4@#d��������������������������������5&<84GE# %=* 2�������YF��Ҷ�ɾ����Ⱦ�D739JE64-159JH<793cAd�S89g����jlelhogjmkllnjljvgofnjiilpekipclisdghkiefgn`fbkdcdja_fhd^][L>7589979??<<<<<;;=?BSS^\cXTVc]X_]^Z^__Z]_`Z[]\X^_\^]_X`\b_^Xb]\Yd`[_`^\]`d[[]`YU[_V]][UX^][WYYWQU[YTTUWTTTXRSRYRVWZYUVTTSSRPIOTRMHJQRIFJLHQSORZ`\Z_badegnw��z�iov���{~wtvtvwywxwwy{��������O4J(�pv��������zsrrstqmgaaPKM-'() ,Wo�������7]zl`XVa[VUKKTr����CA<74N�²����ǻ��������f4<?@ABB@AAABCDCBBDBC>>DEGIIJJJKIIJJJKVagpw��������:3PM=.&58@n��ƌ2/<@??T��Q�������������ƽ��/Jgqz��������qh���]Z}����������������������������������l@QW\]^_^]aa�Ŭ���OUdhorqsqstvttsusok?	
+74KP	..(RzWMKGLMNLCBF>,&~|O<300,)"J�LJMKKJHGFEFEDCCDABA@A@??=?==::9:<=@DFEDCCBAA@?>@>;;;:988685552.,&�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ù������������������������qc+2���������ͱ������}v����������a" #=e��`8.'u��������������}\SU!/C:3*.,-..22589;::>:9<>@F(c�������������ǯ�����������������5%@;2KN!"$:'!-e|��IC�\@���ď@/jöt���A139FD450467EH:76-eCM{a:7k����eljrdklnkhjonhjmrgfeokehlj`hfk_gfo`aekg^ceh[e^ja_ai`\ea]YSI=7589867:>?<<=<=;99CP`XeZc[\X`]Y_Z]]_\^__^]]_]]^b^S^Z_VZ]a]\[bZX^cYU__ZV\]ZW[^]WWX[W^\ZX`^Z[\ZXVVYWWZ[TXUUTWVYVVUYX[XTSQSTTRP<@PRVRNNPQGCLOLOURV]a_bbccehm{�v��grz����iwy�z|zyxwxxxy~��������=D:5xAGHay��������zxuokhea_@aL+(5HA7-!3Wn������b3svi]Y[ylfeXRNQO`{jEA958^��������æ��������K +<@ACCBBCCEDEDDEDDEA8=HJKJKKLLJLKLMNR\eou��������&<��YPig*5;AK���߬^=AACa��c�l_����������Ź�{0^ip|�|smjjuvsw������~����������������������������������_?R[]^_a]^a_�Ŧ��}KXbjnqpppsusrtstsnj;
+
+Y�))\�f
+
+ 05&UxZQPPUTWSSSNJ-B^<0(&%#T�KJLLJIGHFEEEEDCCA@@B@>?==>=<;:99<?@BEEDBBB@@@@@A=;;::9853653410,&��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˿���������������������������������������������������������������������������������������������������������������������������������������������������������qf*2���������ή��ytnlia}���������` " %h�ҭM(;0(s���|v�~|tqjmdkgXSU%GkXOKQJHHFFFGEB@AA;752169&`������������δo��r|�~�����������6$C<8MM#,'))=FP2' G���9>�UG��ӧ*J_��9���D.18JE661446FJ996.hDRt^;9l����ekio_kqoebimlgjlmgebmfbiigckbgZkbj]_gfb`edhZi[f_`af`^`[YOC;6578777;<>@?<;:964J`Y\^gY\Zb[Z^\aZ]_]U^a_[[_`[Z[`[T\XZT[[`\[[aZZ^bXX^[ZY^\ZZ^]\[]Y\]`YYZa\XZ^XTW[XUXYZTTVWOTVXQSXYXXVSQQSUSQOF:AJRUSOIMRFEUQJUVUY`abdfefjo��p��iw��q�{}l|�oz���zzxyz�������o8R'-b��R``Tq8,6Hdu���~|ytspkgdeaX?`7(*<C>KJD4+!!@^r������QY�th]SWsr|ysdYRPLNLB?:6@q��������Ě�������y2/CBCEFDEEEHGFFGGFEEA6@ILKLMMPOQPLOPPVeov��������G�;-j�ˁ/6;BV��x�ӫFBAIj�]y�PLk}��������ɸ�k?x|}���������������������������������������������{mw}���SET[\_``^]a\�Ƥq�yKXdjnqopnrttsttrpli:	
+6[dH%b�aE
+!04'!\zid`[Z[\YVUOI)!X�GKJJJHGGEEEEDCBCCA@@?>?>=;=<;:::=??BDEEEECB@?@@A=;7542..-0/1110,#�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¹������������������������tf*1���������̬������������������b# 2����SH:%-r�tknfc^__`bcgeeaXT&2\SHB@786310.++)''$ "+6'[������������̴��џ�؝�����������;%C<5KNi����>\o<)"0��N����( D��֊.��@nϔ�ú=129FC74,727EI6960iCc�_78n����gmgk_oliciklkijjkjkhibgnggln]i_segcjlcbdi_fehXa`faab_\UK?965775469=>>?><9659JX`P]^fQYYcVV[Z^[]]\S``]Z_]aY[^_XZ_X]Ya[]Z`]a^c]^Za^X^]^[Z]_ZY\]YZZ\TX]]WTY\SQWXUQYPVXXUURVVYTVVVXURQPRTUTMDMKF@LTUSMHMOILTSSXVP]abegfgls��h�~��ww�lxo��mn���x���}��������[9M#o����� �KT�y^D- &=RgyzyqomigfcaNCZ)(,E6#'1<CEC5-)New�����sB~��sfWQTY]hpu}s`NLDA>84K|�������ȹ��������g!2CFFGFFGHIIGHHIIHGIB7ANNPSQRSRTTURQU`ny��������a`�H��S� -29CYs��}�Ħ8C@L{�O��KKV������������YF�|����������y���~���������������qr}���������������vv���HGV\\_`^^]`]�Ƥr��MYdhnpoqnrstuutrond6"Nis�C
+
+
+$77"c{fb`^_\[XWUOD'^{@KKKKHGEFFFECDCBCAAA@?>?>=><=;;;@A@CDDDDC@@@>@>>9641-/.//.,+*--, �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿ��������������������������������������������������������������������������������Ⱦ������������������������uf,'���������̰������������������d "  Dq=18;D<-&7������������������a' 4/)#"   "!%&&()(++28@'Z������������̵��̝�ќ������ÿ���:"A<8NI9z���=%% %W+*KC* @��ي%��3~�1�Ƹ!=227@<56-607?B8873"fA^vV97m����moflgslehnmjklleijmedcll[emlZegk[cckb\cdbZdeeU`\g\Z^ZRF=9888666799;;<;8631:N\W]Ud\aPd^^U_]\[__[]\e]\]g_^\``[Z`[VZ]_Y[ZaUZ[`ZXZ^ZX_]\[[\YVZ\ZYYUXV^[[X\YYUXXVUUXPYX^STTVRUUWSTZQNOPPQQRNCDILHDMUVQNIKPLMTUO^ZX_bdfgjov�}ikhlmge��qtq�~tz�uv~�r|��������H(-9w/3�JU�M6Cc|��qAE262J`orqmgdc^EWN(%3M-$.9DL<5Wl~�����^M���~{rmbVQOLTbdpPID@<79\��������ã��������S!,AFJJJJJIJLJKKKKJKHA2DQRSTTWWWXYVVVXgy��������HW�Iu�hO�'05<Ly�����̏5AAT��O��p]k�����|^KOY��IS|����������q����fn{������������is|��������ó����������CKX_^_^^^^]W���~�}IZdjmnonoprtsutrood0 =��F
+
+!
+%3-!oxgc_]\YRUXWMB& irANLKJHFFFFFDCCAACBA@>>=><:=;:869>==@CDDDD@>?>><8532100../-,*'(,+!�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿ������������������������vd,'�����������������������������b!#'(SI ",>-$$=���������������Ľ�d+2LE?:6766789<<<CB@><?>=AD(V������������;��ˬ�̳������¿���=#B<1NS[����;*=1( A��դ"<��y�ù"=547@A=7.335?A;9;7#gDe�V:6j����mncjikgdejhdjmlbhglgf`gh`ggi]eeeVece^^cbc_a`eRb_n^\YOA<86787569<;;;9741/2GR^bV\[dX]]d[YYd]Z]b`X\]`YY\a[Z]`Z[^^UT\ZXW^Z]P^^]Y]][W\`\\^]YX[^WWXZUX\bXVY`VTVYUUUYTPVUXQSTVOTUWSSVNOOPPQQPNKIBEOMIFMRRNGNQKR[NY]Y[acegjp�qjmkmnmquqv{p�qs��rv��{}w�������|3NQKi��znX=.9Y���>�/-7%8M[efe[;\5%!7L'*D: B_s�����}Lg���|zyyzsqeXONJRLFB>:5Cl��̱����ı�������wA(,HMMNNMIJMNONMNOMKH@5PUVWW\[\\[[ZYZ_u��������0��͚I�[ .8;Cvʾ���kNA?CX��P��廨������rPS_��=`~����������W��������������������ty��������ô���h]g���z=OZ`\]_a__[V��to�sJ\dlnorqrqrsrtrrppd*
+0+S�j:R>'#+,&"lwhea\YKDJUXND%!omFLJIGFEGHDBCBA>?A@?>=;6413688406::;;?CDCDA@A>=822210-,)**+-,('()�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɽ������������������������sj1&}����������������������������e!,=O/#18! "#:���������������¹�j1?h]UPJHLHGHIIEEHGEDA@@AAI%W�������������Ŭ������������ÿ���;%=<8LQMr>I.Da�S&"?���éx~�������"A539IG<61445CL?::8!jBHTG=7l����jjhqkhhigfimnmkimdjemedkljelgh^balba`ffZccdYb^jba\YK><747843479;;;741028CR\d]OZ]aV]]`QT\a[[]b_X_]^^`Y`^_^_]a`[Y^`YVY`Y\Zc[[[`\U[_ZP\\XSXX[RWX\TYZZVVXZSUVYSRVYVSVTVORUUTUQXWWTONOOOPQPNLORFKPRHFLRSMKORNU[V[\Y]dfhkq��jklmoooqqponj}ju��ty�}�s��������h' Mo:V5�����kR1"<L*�ast@,8D?9G&"!<J$(F**Mgx�����qB����}zzz{||z{yn^TIFB>89K��������ɾ��������f7."/MQOPQPOPPQPQQQQQMG:?T\\]^_```a`___m��������)8F'9P6*8@M|�jXA*1@@>F^��f�����º����fOM_�~7j�{����������Z�����������������������������������������qCPY_^]_`^_]_�����mN^flopploqrstsssqma) l��`Dr7
+()'%mvhe`[YPNQXTMFseDHCBA>=87=>BCC96??>;9425753063.29<98;@CBAAA@=62220.*((&)+*++*(&$��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˽����������������ƾ�������������������������������������������������������º������������������������sg4$z����������������������������f 5F0#1*!>�������ͱ���������m3@h_YRLJLIIGGCD>@DGEA@BBBE"S���������������~����������������?$<<;LO#?63%&!-�{B!F��Կ��������º%I31=KF:8304:JL=9;9oD?@>=6h�z��egkqgdhmicinnfhkm]feqcZjlf`effZ_bi]]aibX`adRicg^]UG::855851259:;851/02<VWW^^XX\a^Yc]\V`^^\b`[\_aX__b]^^`^][a\TZ[[VWZ`WY\`XZ^^XW[[VV_ZWTZWZVXT[X\XVTWUUUUUZVUUVWTXSUQTSUVWQTWYNKLMONPQRRMJROLKPPGCIRTQJRTPW`V\^Vbgjmu��gilnnnopprsrwzww|us��t{o��������R Fr/Fn5N���������nI+ 2GmyzoN; #<9$!4O01C!5Wk}�����[S���~|zyzz}|||~~zpbPA5>W����ƾ��Ⱦ��������O62,2NPQVSTUSTSSTSUTPNF4@\a`acdeeeeeddhx������{C���{R/-8Eb�Й���A;>?Gr�i~����������ƴ��}�o,`wv���������vg�����������������������������������c~����a@Q[\^_a_][Zc�����fO_einnomoqqqsttqol[%t�BH2	'1.'nvhda]XYZXXTM?$�[@AACDC@9,36:=B7/9>:4-0336430/2-.47775:AA?@?>73340+)'(*++./-*('$!����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿ�������������������������sj6%r����������������������������j"211-%)) =�����Ѹ��κ�����n6?h`YVNLJJJHUMV`hHADB@ACCI$Q��������������ȴuo���������¿���A$9<4QLc�Z&<)7s&&p����Z3X��ؚ-#"*����÷#B328GD;81.27GF?;=;"qE;A;<7k����efmpc_fneafikalkg\jdka[fhf^f_gYd_e_a_e`ac\c_la^[RB9:754760/1798850038DR[[`_WU__[[[eWWZaXV[`ZTZ^aV]^ZT[__ZX[_ZV[YXVZZ_W]Z\T]_[X]\ZY\\YXX\WWY[U[Z]WWY\STUYUWVWSRUVZOUXTNTUUPQTVMKLMOOPRMOQIJLNLEIOHCNUTMNUUU^WV`^ahlr|�vpnmllmorrrtu��}xvrrxu�u�������=!=���LXe!j�����������xgI0$ .I^}J=1%!&DB%&?4"Bar������Kn���|{z{|{zw}}|}zyxncNEl��Ű����Ţ�������lA;50)5UWVY[YWWWWYYY[UURK7>^geghghghiihin������a$p����܏ 0>Htĺ����~5<@R��W�����������������^8]emyyzzu}ywdau�����������������������������������]�����VFT[]a`a^][Vo���vs`Q_ekpmnmqqrqstspro_ 	Y2)1*-jofaa___\XVQKA(�J07BGIGDE>5.04:9341.(%/554.+)*1112/3505>@@A>91242/($'-//-142-))& ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ľ��������������������ſ�������������������������sj4$j�������������¿�������������h!('$ 9�ᧄ���ʪ��̼�����q8<c_\XOLJJJi�����_<CA??ACF%L���������������ϓ��uj������¿���C
+%<=:MOJ���m:,%Q�/!9�xs��_e��֨k}m'����ļ"F235EE780135CF;9;9h?^qL<7o����iiiiffgjgeedggsieblgebdhdffj]c^f]fbcZ\_g^Tbcf^ZN<78864572-.26964017<H[XRY^ZUV\\RYZ`TYZ_UV[]WV\]^Y[[ZZ^]][\]^[[^VXYZU]XaY\[dZXZ^XWY]\VZWXVUVZUVYYVYY[RSWXTTPUSRUTTNRUSRTSRNSVRKJLNOOQSNGMPNGIPLEIMJLQTRPRYTR[Z[cfins��o����llmqrtv}��xxwvwwvywn�������p-(!\1E�2����������~~||{teR8) $;'#1F@0# *?@#*Jbx�����sE���~|zz|{yobY_w|}zxupj^Q}�������Ȳ��������Y?;73/%Cb[]__`^[[\[\[Z[WSJ=Ohiijikkonlkjlv�����>&K02^_$)0;P�ы���̈́3=AW��G�������������þ��UG|ux��}ss|���x������vu�����������om��������������|s�����NIU]^`_`_]ZQv����tYR_flrpoprsqqstrpnl^	1	! 
+%,#/uleccc`_^WVME<-�>-38<AA=??;,+,0:85/* $07752*#%.454/2004;@BA=51332.-(*/00.3641,,&�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ý������������������������te/%o�������������¾�������������f!!;�涕��˼����������s::id_ZOLMNJ_�����a?CA@BBDC'N���������������И���������������E$?>;KT-����$7)%J�2.����z:f�������E����¸!H027IH:84315DH=9;9!d?oF<8o����kkeghledgieagkoecflbdcid_dgkX^`d\aagW[bf`Lh`]XM;556457630.-2452018?PY\WWZ\YUWWZY`Z`Vd]^W_^\[a`Z[]ZZZZaXY_bYWXZYSWZ[U][\U\X[VXYVWYZUVW\XWUXUWTY[XUWWWQTVXUTSTTVXUUSVTTSTRRRTSMJKMOOQRRVTJLOQLMNLJJOLMTVWQSYVZ]V\giqu��m�of��mloqu����unuvwwwy{�������Z 0pw/4aV����������~~|{{uttjg[K8*1<# .?JG@FC@(;Wh�����^X���}{yz{{zhcnid[lzxupeY\��������Ȱ�������~PA>874/$'`haadcfca`_aa`__\UI;Zklnlopqpooonp~����1"ơ�j:%*-3?V��t��{�r5?I\��A�������������½��Gb�~����������~���{m��������������mq}��������������������CKX^`b``_\[Oz���x�XT\dmqooppsqqrrrqlj[
+
+
+	("
+0}lddee\YTPKF@7'oN"-$-�>1615=>=911(++)3<=7.'*45-.6+%&/65400..19?A@923331/30,/00/5642/*&�������������������������������������Ǿ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ï�����������������������tg6i�������������¾�������������d ";��ֽ��������������v?7hdZ[ROOMNc�����OCAAAABEF(J����������������sn��������������K&?>3IT^ʹz>8*(K�/F���m8!c������կ����ƻ$E239KF767406EH<::7!bAn�D>7o����oj_ijjdd^fe^hgjddfk]beh\]b]jW`]b^aahXebabPcZVG736633662/-,,.02259@Qh\XX\_ZYZXU\``XZ\_Y[Wa[X]_]W[][TW[^UZ^aUUX[URYYWTZZWN[S[XZXS[\\Z^_^VXXZXVWZUSVXUTRVSRUUSNRXXPPVWOUTSLQSWOIILNNOQRMFRPOIPQKFMNFKOMOXZYVX^Y\]_glrw��x�jk��usrqr~���wkvxwxwy{��������B!.(2���Tv��������~~~{zxtqokljiaQ1G6%#H+7;:.#>]p�����~Ks���}{y{{{wpjgjrgbxwskaUg�������Ȼ��������qJD?<:83/!N��ncekmiededced_[T?Cduqqtsspsutrru����'0CUu���1#-8@Q�נÝ��[9?Lm��N�{Kr���������º��>l������������{�������������������hoy��������²���������}CP\__ab`_^[H������ST^fmqnpqrrrpqqromnX
+,M!(05J 		
+!7xjYOGCGGCGG@>8%�wF6.,+:+9�:345ADFDCA5()++4:>80,0951/5/%(/663..0006BA>73442.196,/1/165531+%�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȿ�����~������������������ug9b�������������ý�������������f#$9����˵������������wB9haZ\TONNNR����\<E@A@@ADH&L�������������Ǚ������������¿���J&A;7HWHc0=*)Q�3'��`+!_��׶��}pdXN�ɾ#C339GG657306BI<:98!dBgv@<6p����ekbnjgeccihemhpdefi]lfg^dh^jbc`gahdc\fa\^XZRC626840353.+*)),/27<DS]dZSW_\VYZXSZ__SW]aRVS^XU\_[U[[[UX[]Y_]ZWVWZWWZYZZ^YXTdZZX]YV[^ZVZ_[TXWURVWYQVUWMPRWQQTTMKTVUNORSOTRSOVQVMGIKLLOSSPDBQSLKOPJHMLKNQJQY^\VZZZ_dhms}�p��dkw�r��}~{wl�{quwwxxy}�������|-/,b�vW-1���������~~}}xxuqnkifdbM5Q($.ãH*Je{�����mJ���|zw{|zp`dfw{isyvsk`S�������ǭ��������^HFBA=:84-%6* )6N]kopmhgfba]OEPytuvvwuwyxusv���z&No���#"3;>;���̩ʴE;?L|�bm�^;V�����ü������?p�����������wu�����������������������������������������q@Q\]^bca^[WG���~}�KV`glnoooqqsppprpniU
+
+1���������!	
+
+!HI#"3=B}kLKRPTSWUTMD9�aNJ<4(/(>�66@CIGHFFC>3&*19:<80/8:79842((/340,153/7A@9441.0-396--.-231/-+(%�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ÿ�������������������������������������������������Ǿ�����������������¼�����{�����������������qe<Z����������������������������e'&8������������������uE7bc^[TONPOd����~@GDAA@AEJ&H�������������ư�����������������J$?:7K\!"/X >/)H�66!#\��֗06?N�̽F538GH444436CG;:;8b<s�G=4p����akmmfbcif`ghkcebf`dalb]bmi^ecd\`bk^^Yb^XXUOA524641143/-,+*+.26<IT\Z^XRU_YVUXWR[\\R[[aTYZ]Y\^^]\`[\\_Z\[`\XYb[YXX]VWZ]WX[[VVT\VWWYXUWZQQVSSPUQXQVTWOVLXUVSUSQTRSRTRQPUQQPVPRIFIJKLQRNHNGEMONJKPJGKOJNNMU\`ZV]]_fgls��o�zdn��m�snk��t�{pwxwvxy}�������c$/<T}��C��������z~}}ywrpnigec^C8K%'CM+���ײY-3Sn������S\���{yy{|xpdfadw}}yupeY\��ƽ����Ǳ�������~NJGED@>:83+**(0MhimqmhbYOAu|suvyyyyzxuw|��\L��N/"3:EVg��{o�~=>ER��T��UKWy�����vtlp��lBx}���������Ug�����������������������������������l�����dAR\^_```[XQG������MW^glmpoqqpqpqrspniM
+*wkcSD:7L
+		'�|rphNJ�o\TX]`^\[XQF6"�}WQ7/)."A9EJIJJHGGFB=0.9@8;843:778757/,257425443:@@831,-.-5:4,+))--*('%##�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~nb<S��������ҽ������������������f+#!$$'+6������̲�������û�zE5da_]UPOOJa����h=FCBCA@AE(G�������������ʷ������������ÿ���M"=;8IW)FU��: ++H�6]��ع�[+�����ǻ"@228FJ662.36GI9:;8dBg�S=8r����aiikbb`id`fff^caf]ddf]YfgaRb`eW`afW^\b^XTE;411631/461.*)*,/38=P^[^YbXX[\\[\\[[a[ZX`Y[Z^\ZY^ZVZ\^RXXYXZ[`[YY_YVYXZTVZ_V[YXW[T\[_WXYYYWTVVXWUYTXVYVWVZSWVVNQUWTNPTUNQTPPSSRPMHGGILQTSPCHOPKGMMHKRLJSNKU[NW^]W[_aeims��u�skx��s�{~o��pw���{yz|�������I!'0|=MT^������Z���~}ywsqokjgb]:B:'C�˜"']���[ ?]w�����xIz���~xyy{zthhpfmy|{xtl`Pl�������ʼ��������pMKHHGCB?<97,"!!7MdreZD]�~qstwvvy{ywx��8'e^.F�($08H|˷��eJ2>?F\��J�߬��������VXZt�N9~~���������lo��^dv������������������������������~pn���WHW^_`_^_\YRO������MW^fjmonppppqprsqplL
+
+#G
+	
+)����qbAT�lea___^^\WRH075''ExAIIEA>ADFFC?>3:B<863354.02770027<>97725?@?820+,2..4/)+'&*++*(&!"�����������������������������������������������������������������������������������������������������������������������������������������������������������Ͼ���������������������������������������������������������������������������������������������������������������������������������������ƻ����������������ž�����������������������}oe>S����������������������������m.0$ "#"#"##&)%#%'+--/��������ķ�����Ļ�}H1ab__VPPQJT����TAFCCA??@C(D������������Ϳs�����������������M$:=:J\j�ξQB<N>++Q�5 )4CR3]��پ~&@����¼%A236CJ772.35BL9:9: b?PhY=6m����difgde^ffefeedg]gfieaZcjd^Yg^a^k`_Xg\YVPG5/.0230/231.,*+,/48@NV`_dT]\`YXZ]^VV[^W[Z\RUZ[WUTYYYXW[SYUYX^X_\^[YXZZX\ZZX[Z\UWX_RVXaWUX^XRUWUSTTUQSUXUSTVRQTRNNPUPOQRVLPQOPSRRNJGIIINSUTTNHDLOEHLLDGMIKOOMTQR[a]Z\cgkox�|hyx���a��fhp�tqy�ux�������������34����V't~������q#-Jt~�}wponlhe^P1S-(nY3��y�NT.*Ke~�����gH����}xwwyxmahfft~}zuqh[O|�������Ȼ��������`TSONOLFFB?<5)&FVa_Np�nlilqqvzwvqsl#r�ǳ���-!+4:;p����׹O=?Gg��J�����������ON[��@6et{���������o~����sq|�����������l~��������������~bh����LKZ^aba_`]\MM����qoMX_gkmmmoqsrrpqtqlfH
+
+
+
+
+JaWqj=V|ifc_][_^^UQI+ S|FJD>>=<=CFCDC8:@<784212-28:72326<>9654>B@>742,-22.,+**((**+++'# ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|pf>O���������û�����������������j55,'%"&0'%'$*?PT1%-,5�������Ź���̿�û�|M0ce_\UQNNM_�����BGDC@<>AD$?�������������ą�����������������M$:<;CY0JT��'=!' ,R�5)�������vZ��۟o�It�����)<125;>775/26?F<:;7#fMy�L>9n����gjbeghbdfhdcefdZdhj^`agg]\adZ`dfW\\g[TL@60-021..032.+*+.269BT`Z_[cU^WaSVWXYQZTZW\RYW^\][`W]\^\X_]_Y[_h\^\a[X[[YSW[ZSY\YPXT[STPXVSTXSPTVQOXQWSRQWVTPRPSOSSSTTORTPUOQRRUVRRSKGHILPTTVUTPFEKRJEKNCJNILNMOYWW`_^]djnq��kfkgbbdm�}vrz�mu��uu��p{�������g!R+?o;~}������ri\7"+AalcmmjhfaB4F%*�/;�����P4Ul������Q[���{xxz||ucgfiu||zupfXZ��������Ȱ�������{QJRY\YYQOLGB>4%5^Sl|PNNUUW\cnwwtmI+=Qm���>".6;?Qboz���I:=Hp��[���������һ�xcX��6Neluxwmjmnmidr����������������|��ku���������������������BNY^ab`^`][HX�����sLW^hlklmppttrrsrpliC
+&!.%''% 
+ -+.fti:\lfc_\]]][VQG)XxGE?@=;95:ECED<=?<677521389845536@?9539@BB<851)+241*)**))'%&&&%$ ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ʋ��~�����������}{{y}�{pkDI���������º�����������������m42.-?m�g*)(A�����n*.3������Ģ���ɻ��û�|O/ae_]YOMMM]��ƽ~7EC@?>@@C">������������������������������O$9=9A^$("(G7.B:(&F�2MYR�x,�w]��ך_�Ҳ������*=50056563.247689:7`Ly�D=8u�~��hj[b`d]bbde[eeb\`gfU]bd_Ya`bX_`dV^ZVUK;3/-/10.-0340/--0259HUZ_`d[aXcW`Za]\\Z[XZYbX[Za[[Q]UV[]ZR][\WY[bR[_\RUXYWSWYXQZXXUXPWTVUVUZTTUTYTTUZOQTWOTW[QPQVPRSSMNNSSLQONMOSRHRUIEGILPTUPPUUTKAIKG>LNINMGQTPU[QYaebgims��igijjijmmhrk��pw��q���u��z�����U&����Px}������~��ze^9$$RqmiedZ894$|���qZ
+aZ @[u�����zA~���}zzyyzxnhhlq{}xrlaSj����ɶ��ı�������iC-+0=K[^aXTNJB-! ;ZujJOOSXWWYWV_n|j1F--.;B]����{<B7<<M��fp�����������Ŀ����:h}w��������������q{�������������}gu��������ô���fp����|GQY]acb_]ZVCa�ĭ��~FV\gklnopqstsrrqplf?
+
+HtJLT_pt~c%!
+A���z`g@^{kcaa`\]^[VQE'\mCAB@?:635=BCA<<<70/344699:855147@>747?BA@;74/(,11-'())('&))'($"!�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʽ�����������������������������Ű��}���������|}zwz~~}|rfCH���������ƺ�����������������r12/-U��l&*.x�WCJg�P*1�����������Ľ��ļ��Q.`da_XQMNMO�����]BBAAACBF#8�������������ȹ�����������������Q!;>:D^')$!""<]�T-*M�6 �u�tY��غ����������*;61254575134575774iAp�E=8s����elYo]eag`^fdjdb_d`f_gd`^ci[^`c^`Z[VSF7.+,021.,/344521356:J]`\^adX_]cY\``[ZY]ZTYW]SWY`X[TYVWZ[YW[ZVP[\[Tc`\V\]ZWWYWX[`VTY\TWX]VRX[VPTWXPUWSMNPRNSVWNOQOJSPSNPORPLOQMKOPQNUQEDGJNRTULGLRSQLFGNGDKMMMRQSYRZYR^ccgkms��fjikkijklnpmoijx�}u��kwi��������7Y�N'&RrD'h|}��������z]��qNeqjidbV2@+!5��FQԴ�@(Md������lK����~yzzyyukiigq�zupi_Vx�¿����Ȳ��������L"%2Hgfb]WC %.ixn}��ne^NLNSZ]P?+y����Z4$4:Hs����ڷ<:?@V��N�����������������sCz�����������������ukk������������xr�������������������oES\_cdd_ZYU?i�ť��RY_hjkmopruuqrqppli=
+
+K��������_
+"	A����C;:atib``^b_^]VPE"dpD?B@A;:99;?B@<:<:-'&'.388846328:><69?DBA@:53-,.01,&(++('',//,(# ��ÿ��������������������ɿ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɾ�������������������������������������¿������������|y|{{}~~|zpfCD���������Ƹ�����������������p70-,W�\A#)4\#.&:[$,�������ϻ������Ž��R,ce_][RNNPX�����t?AAA@@@C'9���������������̥���������������T =><HY'&#=/&))D�:,(''O���I6g����þ.;504466550135679:8!n>m�S<;q����]idrabdja^ghfYa`dX_\c\Z`hdQ[_bS\XVO@0/)+,.10--0435346436JZ_cZ]`iU[\dZYXYPWZ[WRWX[UXW\X\XZW][Y\]\Z[Z_YX\a[YW`ZSVXVRSWXTRUZNSVXOQYXRJTWUORRNLQPQNSRROOQOOUSRPQPQQOQNQQRMPRXMFEEHPSSSSLCGNPQLCGMKGKPKNSQVUV^ZY`einox�yfjjhihijknqoqpqvseq��|{p�������t$I���t@=|~���������{/u�n\lnkgc]D4J%Jl
+P��!3Up������We���{yxyyyrmnjgv~}xupfYY��������Ǟ�������y3BUX^skE[��vA
+4ryf���������cV\T3 .8j���n)78O�˗p���k6>@^��8�����������������`G������������x������������������������������¾����cr����cEX^^_ab_\[R=u�q�~NX`kllmnonrprtssqmh>
+9T(#&	
+;�]2*'\ohc`_`a^_]WND sg>A@D?<>>=9;AA<<<;4(!"$$+1367448:?=8?BDBA=643.05/-(&)+,++++.0.($ �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��||}|~{}}zypgF>���������ȼ�����������������s8/1*F`ae,+6bJWa(;wp',�����ͽ��������ǿ��W(_f_^[QMLNKYp�jL?DCB@@AA?(9���������������ͦ����������¿���S"=>8Q](&" = -(G�8 R��ܰ2:V ������/=400358635324569:7l?l�S=:r����ZafjVaa^[]ef`SbcaWi[cY\`b`R_``VVSJ;1,,+,-0/+,/24554231=N__^g_[`eU`bbZ][ZV^aYY\_YXY^VXX\WWYZ[X\`ZRW[[UVZ\UVWWQRXVVSTXYUVVXPXTUSUUUORXVRSURPTURTTZRPQTQOQSQMPSPPPPNLMNONRSQEDEELPRSNLPPCFPTRJAHTHFMNMXURYVW_[^hlmq��jfiljjjjjkoppsttwwuwsqwu�������[!$f2M��[������D-l��V )1YnokgcY3=@"q��|6LLA[v�����vD����|zywyzuphjgez|zvsncTf��������Ķ�������d%8; ?te#%qcX���tCwqg����~}���ă\Q%:WhG)49R��i0Y��]8=@b��:��c���������¿���TR�~����������\���p|~������������������������������u�����ZIV\_ba`]\YR;y���x�zRXahkijnnlqqtvstqld7
+
+
+G�e4&#lsgd`^___`[UK@"�g=@<ED;==9:>@>769/3+"%&!'27743689B<=CCAD?966427:2*&&(')++,,),0+# ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ľ��������������������ɽ����~������{y|z}~~}|yoeB<���������ƾ�����������������v;23,Jsqr*,3���&��J)(|�����������������{X+`da\ZSOMJKJ`�i8;?BAA@AAA(2���������������ɸ���������������S#?>:MY'%" !<@XP- (=�=HIO��ݓ"��W_�����-;31113673-20156885i?m�P?8s����O^bmWfN>>DQac]i^c\oXe`ie^\_h_ZTQF8.*()+..0,).230232/1<Vabbbddb]`^f]ZZbWUZ\[TV\ZSQX[RTV[ORWXRRX[WQZXXRZ[YU_XURY^TTV[VWVXVUV[RRXYWQRZVQQTTLMQQLQMOLPRRMKNROLOPNNTPPMONONPQLCBCHOSSSOHEKLIDNSQQQRNJJOMT\TW[XX_agknt��_fijhiihikooqtuusuwtuwxwv�������<# zGlW$p~�����}Stc}Cs_T`qnligbR2F4 @^���jM%He~�����fI����{xxwyypf_`kx|yyuqlYT{���������������G"7G3<)�Ou�Z]� Prop���ymX}���yWJ4�Sr.,4B_��T0`��C4>FqˏH��>[���������Ŀ��9U�}����������O����|to����������������������������f�����LMU\_bb`^ZXO;��z�xPVeiilnmpqtqsuqrnle3
+)&.C
+	""
+W����o`=nogfe`]^_^YSK@$~TAB=BD>:<8?DA9214/.(&,+,36931598<B=CEDA>835753:<8.((+..*)+-*(+.%¿��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~��������{{{|z~}~~mdF:���������ǿ�����������������t:.2+N�~++*7=DG>H))//r������������������] %ahb\WTPMMHCGhU�~AD@@??C>+2�������������������Ѹ�����������Z
+=;8E[*&"  <!"*(?�;(=��S��ݟ!��Cm�����0C61046872-31478768iA_mB;9v����DB\i`l63:679<RSHbbjUZbk`V\cdXRPB4/)''*-/-+))-10.00-2FU[\_^Xbda[^_cT\_`UVTYUUVYYSYZZTZZ_P^YYW\[ZZ\\TXW_YXX^TQU[UPTUYPSUWPROVQRSTQNPSRPQSSLPQOMQPOOSNOMPQQMPPNLRWMNPSLNOQPICBFKPRTVSPKHJPMDLSRRSPRLLTTW]UZ^Xaehkpu��bhhjiihhhlnprutsttwuuwy|������~/ @���H5}�������{h�m�dlyvrqmjhe`F2E#3E0;Lq�'6Tp������Rc���~yyxyzyscecdu|zytoeUW��������ȱ�������x0970' 7���RzV+^wk����jJTh��eS@''_��278\�яn�ì=:>M}�t]�aHYk�������{����/S�z����������h����������������~��ar��������������we~����EOWZ]`a^\ZWI=����q�vRUdfjkjnorrpsrpspm_-
+
+s�"A�(
+#"	F���ykc8lpjhd_]^]`YUM>&�C>BCBFC:;?B?:1)-365+).46610168756;A@=A?8037757=>:0**-362*),)(+.&������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˰��~}�������yy||{~~}�qfE :���������ǵ�����������������t:+3)?RWn,*3����ʜ),/,y�Ľ���������������^#&]e`]YTPKIO����ˡ0?BA?>BC*/�������������ʻ�����rz����������W;<8CY(&# ;! )';�:z�������V��۰)&5������1>:1126764(33467667 jHY|^=;r�{��G.Kd_i106665365;\`aSZ_b]TZ\ZRK=/,'%()*,-)''),-***/7JW^`^`behbcae_``d]ZZ`[[T_\UW[`UWW[YYT\VVXa]Q[\VNUWXSZVVUQUXNOWQRQTUUPSPUSVQRPTRSUTUPOQRPORVOLPSNNNQPMMOMLKOMKLMLFOMNLFACJOQSSSROSJEHKB;GNPRSUOKRVRX\W[^agjmqw�veiiihjiiilmpqststutuvvz|�������e%,#jtRL,N�~������U"5H|YbyuqonkidX78;t��Çn&>[x�����yG���|zyy{|xqqqgkw}yvql`Uh��������ɹ�������^$,NJ:K:S��t�54itd���|nQjl��\U31r}tTAH*"576=���οŎ2<?V��[s��GK~������WSV`��6Fbo|��������kn���n��������������}ox���������Ĵ���{�����~@M]d_^^^Z[UEH����s�pM[delmmonpoorrqspk`+6
+ 	TzAM[9urifa_ab`\XTK<,�9:?C>ABBA@@:0*'(/;<83/.-/05898417;963773.0455;?><5,+,4890*+)(**(������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ǯ�����������|xx{|�~|}~|pbG!6�����������±���������������s?+4,O��s/,*@k�n#R++/*u�ſ���������������c#$\f_]ZTNLMQ������8AAA>?BH-)�������������ǩ������{����������]"A=;I\#&$!AR_-(<�AZkWLA?��R���įio�������1@61234431,33777897kDo�M>>l����E,4Xcf8/26433434Y_aZab`^[^TPD7/*'%&*+,*(&&(++)((+;VXX^ea_`eaZafcZ]`aVWZ]WTTYVQUVWTVR\WWUVOUTZWPZXSQVW\VYRXTVVWSWXRSVWSUUVTUWYRRTUQPSURNMPQNMOOIMQPIMOPIMNNJLLQMLKMLLNLOJCBEINRTTSRLLOHBHLIFHORSTSTNYXVZYU`cgjnr��jghijjiijknppqtutvuuvvwy~�������H.2����^%j��������tta>*3owwrnlke`Q3@-^_C���-(Jf������gV����|yzy{{vhbceq}{yuqi[Jt�����������������P<*t�z8?%C����R� Aorc��qX4�����_P,1Wb����X*446:G���ƬX4<D\��F���̤�����plZSXk�`>kms|{siceajgg�����ep���������|��pw�������������q`i���q5:1I__WVZ[W=N����v�mT]ehmmmqoprprqqrnj^&
+
+
+$- :Z10	@Nh�rlV=wqhebaa`__[SI69�<<=BB<=BB=52-*'-7?@=<8658:9764579:867350,/3:>?>=>7.,-3885**)'(&&����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ſ������������������������������������������æ��~������}�xtz{z}�~~}~|pbJ$4���������ɾ�����������������q?)7-Crpk-).m�}���,+-(s������������������f!#\f`^ZTMJKMj|����@EBA>?AF/(��������������������é����������Z A=9E`$'$%8@31*+ )=�A2:L������ɺ����¾1@90343432,31456984nDi�F>8l����B.,;[d='02233321LYcVMVZ^[YOE2,)%$'**)*%#%'((&(*/AVfZZYdaZ_d`[_``WZW^QXZZVVUVVSXSWVUS[TYTUS_VXZ\]TTW[UYW^X]WXXRTWTRTTUQTSSNNTTLORTMNQVNLOOMKNOPKPOOKOONJONNKOPQKKOOLNPNND@AEKOQRSRRMGJMJDGQL?IPRSVYQQZTTYV\dhjnu��eiijjjjjilooprtsssuvuuvx~������}/$&F� % 3v~�������}���wvwusqnkkb]F,=:�zS�|>1Rn������So���~zxzyz{rcr}q^etxtmdXY���ǚ������½����vB? �u�E0+!,S���"Kpne��iKk{n���VD)>01Z7-;8/9830@J@/:;C`��>���������ɸ�JELi�NL~~�����������������������������������������ȿ����w�����b322!AcCGZWR7U����z�hU]diklmqnqtqsrrrml^"
+
+\T-vZ9
+)
+"o���{[44xohgdb`__]YTI4<�4>A?BA>?<6-+,+,4<>=:><;;:885458:9:864667214<@@?<61/.06974.+*)&$%������Ŀ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��{{}}{yx|{|~���~~|odL"1���������Ǽ�����������������sC+9+FqkW/,0fZ-X2$-,+v�ſ���������������l'"Ze`^ZTOMMP�����~DEBBBABE2%��������������������������������_<<;B_);�/8! (!(<�@"O��ܢ8*)*���ļ298-264453+20256998iGf�H@9q����=++-A^C%,3-+-/1.BRZS@DQWTQB1*$$%%(*)&&%%%&'&&*2I_]_U_`aa``\bdd]^\aZaWgW[YZWTUXXSVZZVWW[UTX[QSWYVRVWXVVW^UWXWUPTTRQRSRTVSTQRPROUPRNSUSQQQOOORQOPSNNNQNLNOMLNMLKNQMMJLNKK@?BHPPRSSSPNIGINMFGLJCLSUWXXOU[V[_\ehjnz�~iklkjjjkklopqstsuwwuvwxy~������f,T���}I~�������G*0HkcyzusqokhcZ845j�Γ$H8>]v�����wI����}yxyzz{xmedo{{{wpj`Ob�����������������bC@$/ry`9,1&aaO.*Wlol�wK>E�x��oQ;WI2P�6289<?=;9:<Fm��F�����������������@_������������}���ifu������������������������������pn����V,CeN(X<T\WQ2`����{�`O^ehjllonqqprrooll\
+
+EH|",")
+ p�|^9':wojieccd`]ZTI5C�.<F?<?CDB6,,).6;?<23::<<;6266899;<977779646<A>=8311015885.,+)$$#
+������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǿ��������������������˾���}}��wrqz|vxz{}�}~|rbL&1���������ȹ�����������������oD(:*')&***.MY`L43',,,l��¼����������v���n% Ye_]VPOPOG�����@BFDBB@BE6#�������������ǔqx~~�������������b@@;BaV³��,7N_K*!'9�@@EG��ݧ]kd(������3273254652)/1225995aJt�D@>o�}��? &''R[(+2,'(*/.<NLJ>9DOJ=/)%!%(('&$$#&$$%'*4O_jZY[ja]afbY`gj[\bcX`\fRXX\ROSVTMSTVPUSZRSWZQQVXXTXVUTWUYTXTTTSTQPOSPTOVSWQWQPQYSOPSROQSQMMPPJKMNLLNMILLLKMKKIKMOJNKLLKJ?AEIORRRSROJOIGHMNGGMFFRUUY[ZOY]Z^bfhlq�kevwhiiijkmqpstutsvusvvxz�������P**P-&EG [~�������^wtU3!Yxsqnmie`O/A*$mq+y8�*"Hc|�����cT����{yyzz{wv|nhj~}yuof\Po����͸�ȵ��������GD5cQJ+1N.&*Jk�wH 4^nm|�}A-8Ck���aU8$M" /8��?,48<9:;;=I|�x]����������¾�����Al�}����������}�����������������������������������~������R,(Z\+B:SZWL4f������WP]cjmmmmmpqpqrpomiZOSzV.B
+
+ *
+!cr' 5CFxohkgdda^[WOH6M�EAAGA;6;>;5019>>>8.49::876775788898<@@<96449>>=810--37871--,&#$#������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɾ���������������������������������������������|qefjt{vxz|~�~����~seQ%-���������Ǿ�����������������rG&:$&$$"#)%!W���9*-*g�����PXRSPTOJKk���n' Ue_[TOMMJJ�����8DGEB?@BD6$��������������������������������a 9<<CcD����*5FC0' '<�E<t��z$M�������X{�����3495444543'22224896"fO|�H=>o����B#$"4X,#//'# %/7KD=944:4)&# "'))&$##"$#$%,:FY`hXW]g\\bf_XbdhY^abZaV_OTYYPNXUUMYWXVWU[JZWVT]YUW[\UTW\SWUYTTTWSQUXSOVSWORRTNPNRMOPOLNRSPPPONMQOKKNMMMMKLMQMLLSNMKONNOOF@CHKOQQRRQNDBJKIIMMDJLJLSUY\^WK`[Ubeknv��`}��|hiikjkposutsuuttuvx{������1'H����A-o��������~���kEhrsqnlicbE,E +�ґ�5�-/Rh������Rr���~zyz{yzsgeihv|xtneVW��������ƽ�������}<J"
+)LuuV.34>dph������mx���ZX5"%9S<%50D�ÏD+65;889Q��Ss����������������tF�}���������pk������������������xo}��������������~j�����ND5<cY!'YXWI5l���ur�VR`dkmjlooqnprrroljV*h]
+! &$" *}�vqmkNP|qmjifea^ZVQF3&S}HG@CFD?<:;;;?@@?=6./386169622486662:A@>5///6>=>;5/,.4896.-,+'$%#������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̲��cWWYaq{wz{|~�~�~teN$*�����������ƺ���������������uG)?CJPW`p�ZR���v9&,-)e�����P]Z\W^cdbu���s)Rg][UNNLLO�����@CFCA>?AC4#�������������Ȣ�����������������_9;;G_"'!3�.6& (9�MBs���P* I��ڽ��Ι����Ŀ35=4554666&22455797#lGo�H?8q����?  !:;..'%.DF<4/-*(&" $'&&$" !! $(/CXW__g^^[fafda`ciae`c^_^cY`Y]]XU[\SVYZTQUWVVQ[UQT\RRXYQQTWXMUVXSUUTQOSSRPURTMRQQLSQUNSSOPTRONSQMLNRNLNOKKMLJLMNIJLMKILNIKMLE@DHMNORRRPLLGDJIDHPQKJMKNVZ\_^SS[Yafjnv��M���jhhijlmpssrsuuutvwy|������r$'.?F�eD{}������h[p��hewproklfc[7/?9x{��Ϗ;Zn�����xD����~{zz{ytkffijrzzwpiaVa���ʿ���Ŧ�������^3H	3?,!Jhqh�����������VN,<_IE]��e+4583\�Кl*7667Z��>����������Ŀ�����YA��}���������hj������������������srx���������³���ng�����?DD8DT^VZXVG:r���ukuMV`dhkkknpsnqrrrqnmQ
+
+
+$# 
+
+0���~qiMW|qnkkfeb][VQG-/mwW8)$##$atHKC;:DBBBCBABA@?<5)/366368/034954<858951.-25;;9640-./.03*),+('%!������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ϯ��mYZZT[r{z|{}��������ueP&+���������˼�����������������xJ#E�������eH�?10--+e���¦�������������s-Tfa\UONMKL�����EDEDBAAAF5#�������������������^�������¿���^>==B`", ":GZI+!&1�Mw���iSG1J��޻���`S�����58>3456554)30156799'nEm�I>:n����D% @#(-&$:EA8/*'&##&&#"!!!""#(4HRb\e[_ah[Zcha[ada]^bg[]`bTYY_WRU[VRQRTMRUXRRUWMSWWRQUUOQTVXTXWWSXVTRUSQSQVPQS\STSUQQOQOOQTNKNOLJKMNJLMMKLKKJOOJGKLMJHMLLNPL@CFHKMOPQROFGKGFKKFKPMGLMLSZ[^^\UT_egkn{�qG���mkihijlnqtsrsrrtuxxz~������a'(y��NJ"b}�����~8@.1HBuvsrnnmhaO,=0"�}.@#&Fau�����dT���~}zzz{{tckh`mzzytnf]Rq�����������������J25
+
+
+22)Pjof����������|\H!9z���jH&25<TO14k��q53:Cd��6����������½�����E<tx{���������rz������������������kiu���������Ŵ����z����v85741%'V\YT>Cy���y�FT^djnkknoqnqsqrromK
+
+
+
+
+""
+"gsox]BCX|rnljfec`YTQF&5��[B3+(&&vuCHC9568ADDECBA@?<2,/267111-26871465696351,03331220/..+*)%'-,)&#����������������ƿ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������­�wYV[_Z_ux{|{|������vgR%+���������ʿ�����������������tI#GkS;50"=KM�����C)-+c��ſ��������������v0Qgb]VQMMKK����:FCCBAA@D8"���������������ν���������������c?=;Ab/SJFDAQU@)  (7�L*8_���}#J��ݟ(#U����59:1124663+5235679: hFn|F?=r����O3!'%#.(1@DE4'$#" "$$$!  "&-8W\S[ShWY_f\[ae]Zbca\^[g]]^bP[W]UTUUSRUTTPUT\RXUUQZYUWXXRQWXQVX[UUVWQOQTRLQRSLNPRMRQQKOOQOOQPLMMJKLMLKJNLLKLMLLOMIIQOKLKNKJNQGADJJMNNNORQJCGMGEKLGIOKCIMKY]]_`\P]gijr��YMjfihhijmortpqssstvwwz������A;�Jt�0.p|�������h��mD+asrqrmlf`F-E"'��ҫc,2Wjz����|Qo���}{zz{{|ynnphozywskcWV{�������Ȼ�������s78(
+' 2Xlnt����������_S= K�=	438m�C85X���<8Eo��H�����������������9Obiovz}~��uhl����������������|�����������~�������ek����d/0^``bA<\WP5G������FU]fhjjlnnoosspsqooF
++	##]\HC[yqojfcea_YSMD#,<4/$xk8HE::8//8?BCA??>=5-/.31++-48994.3668:8672.../1/00/-..,+*(*-+)& ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̿������������������������������������������������������������������������������������������������������������Ŀ��eUX]bcixz{~}~�������vgW%*���������ȼ�����������������tJ @_K /�SJ�owJa;,.(^��������xo����w���y4Lid]XQMMOPv����FCFCBAA?B:#��������������Ø����������������c<<<?gV�ξ�2:+!)5�R9c��yY5# E��ޱ���Yq�����3=;2134465)42226798kGp�Q>:n����W?+*++@CIG("""#!!$"!!&,>ScdX[[e_^ac\dcdcdaabcd]hbb]a^d^YW^XUWWXTTTZQXTWRTRXTSSWUOOTTOUXWSRSSOOQQPORQRMQNPLQOQNQPQPRONNPQLMQPKLLOLLKMKKLNJIJKHJLKJHILND@DHLNNPNIPQQKFIJHEJKIMRIHTRS]a`_`^cijlu��P;�rcihhhhimqrqprttuvuwy|������u&2'2Ymd0Mx}�������z���cRsvurokjdY80A :4a��տ�=bw�����pO����||{z{{zz|yytxzxwqhaTd��ɴ����ê�������a$/		"N�J*=^nXb����������\Y3D�R#49<I�[Cl\<L�^3F}�m\�����������������;j�v}��{uwx��r�������������������������������ľ����{�����^3R_]\CE^UN5M����w��MX^hkkkmopopssssqnoG!$&:4"	A���{ud@]xqnifdeb`ZSM@""zj;=E<861,18=>=><;93,.4872/+08:71,29;::87862-,00/---,++.110/-)'$ 
+������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɽ���������������������ý��\UX\`cjwz{}~���������vgW')���������ʿ�������t���������sJ#>Y������.&(%iy�:+.*Y�����YKLN��KJKs���}:Ij`]XNLLLN�ǲ��O@DBBA@@B;"�������������ʭj������������½��i;>:C^*n���+8:E:+ '4�Wy���mQD2E��ݿ��ӽ������45<5244566)43446777cDTmI=:l����\G8#%)$$?FHTB'!""#""  "&.?]cY^bcX]^a]_ac]]dc^Zbef\ccaWZ]^WTW[UQRVMLTOTHVVTNTSSKPRRPNNQUSYXTSWRSOPRPQSSOQRROQOSPQPTONORMLMNKJLLKHJLMKKMKIJKLKLKLJJKKMJKMKACFJLLMMNBFJQPMFAJJIMNCKNLQWTZ_`acdeijny��Y$��egejhhjlppprrsttvtxz}������dC RAzl_|������S51=quoxssnmlhbQ+71,O��%Eav�����hZ����|{{{{{wqsy}}{zxtng_Sm�������ʵ��������G)B		73!]��ȯ�`=$GgjWF^es�������UR'9Z(5:Pu�����B3~p0P��Zq��������¿������vJ~�����������������������������l�������������������l}����RC>$.39OXRK4O�������OW_fjmkmnoppttrqponD9uejpu���$;	D���bXO0bzqmjgfec`ZRK@'�cE>>>611,4;?===:;:415:=:8436994/,2;>==<;=;3--0210.-+)*//..1,)'" 
+�¿��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȼ�����������������������������������������������Ƽ������������������������h[XVXZ`r|��~����������uh['#���������̿�����������������sK">$R���t0 +*"`��-,0-R�����Qci��pTh����;Lgc]VNLNLPv����ECDCCAA@A<!�������������������������������h=>8DgU����$?T^C*"'9�X(>Nk����#C��ޮf�zb������4193475554'4136889: kAe�N?=q����WK?.	
+'$	:HHOR9" !##!!$)1A[f`U]d_L]\bZ`cd^\`c]^b_c^dbaY]]^VXVXVSSUSSWSTQ[UTRYTTRVTQSSURWYYSSUZSQPQOKNQPJQPOMOORMMPTKMMOMKLMKMMJJIKLMKMLJJNKLLNKKLNKIJJKKGCGFHKMOONKFALPPKFJNIFNJIMPPVXQ[aabdejmq��s]'[��nkjggjnqspqsuutuvwz~������J-'���t"-v�������nDjvI9myvsspmke]A*<#2Sl}�����`s���~|zzz{ys]]birzwuqleZZ��������ʶ�������u7IO
+		%P2(,m�����վ�"+OriTPUUWPLQ`t�m[H*57F�����O;8B=3\��P���������þ������hQ������������x�������������������lz����������ĸ���������E@X[C:L`\XSF3W����r��JV^fkmjmoprpstrrppn?-nc[UF9*#;!	B�g564jwojhfffb^ZSJ?*�]HGA=642247?@A?==825:<<:8799652-,6CBBAABA;33310///0-(.2-+),+((#"	����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̿������������������������������������������������������������������������������������������������������ɖja\WZ_ns���������������vi[%����������������������������sL#ARsw�{*+\�����M-.-Q���×��������������>Ihd]SLLLJJNWMS@CDFDBBB@B; }������������������aj�����������k<>9?c0\)80<#&"%3�W,7E����6�+9^�����54>3565656$5236778;k@b�HA>k����YNC7!
+	!#	
+2FEJRP0""#"!  $&+3EY_c\X`][O`Wg^cc`^_ba_ab\dfk\][eY^V_UXWXXTTXWRUVYRRUXRPRTQOSVTMQTSNRTUNPOPMLONPNRLNNPPPJOLOLQPOONLLJPNIKMMJLLNIHJLLKLMJIKMIIJHHIGDEFFJMNPPMJDBHOOMHFLHGJHKTRR[TQ]`affils��bdS"{����{nmqrsssuvsstux{������}10/^�2K��������hY�ehgvtspllgcU7.<B8?[t�����~L����|}yzzzvpnyui_hstpkaRb����������¹�����d-Z6
+	
++V%,1>4��s���2bvhOPVZeXXWTWQZY>1345PQ3M�H7;94;g��D�����������������VY�}����������d�������������������prz���������ö���dq�����A/MQ3-K]YRA4`������~LW`hkmklopporrsrpnm:
+!)#, 
+(	M�hDIQZ:dunieffda]XPJ?1�TIHGD@:247:?><<84.588997125512520;>ADBCA?611311.,,//*+-,**.+'$!!������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̘jeWX\ktx���������������wi]'"���������˽�����������������sM!@g��~}r-(@hXeCM5,.,J���â~�������������ADib]UNLJKLIB<>;GDDCBAB@A;!x������������˹�����������������oB<=Cc&:!8565+%%3�\G��ޝZA�P���ſ5/94345554&3127689?#bCh�B>>p����XNF9+
+ 
+	*FFGNVG*!!"#"#',5LWd_[_chZ^]f\dbf]Zbj`\_a`[dglQ\_fT\Z^SS[VUPRRQPUTVMRUVPOSSPSSSTSQSRSWTSNWONMOPMQRPMOQRMMLRLNPPLMMLIJKLMKKMJIMNMHIJLJHILIKJJHJKKJIDBCFGKNNORLJNIBGNLKEGLIGKKSWSUYQX`dffjny��cdeN&j��������{uquvtttvy~������h5�#\��������xi�uSkkvtrommgbS,65~�e5 Hdz�����oa���{{zy{yteg[fw}vurnh_Vu����������Ŀ����H/{#
+
+
+9P1%D���kee\O��=jocWWUYcYVZZZ\^V:#)%133:>>8J�k6:88FuǒM��������¿�������Hi�����������W�������������������ks{���������ó���������z7)RCDbL6UVQA>f������mLWaijklmnooqsrrtqlh6
+		"T���P
+	
+*	[���sf:krmlghfe`[UOJ<7�SJJJHF=46530577/(/:837:5/14/05876@<:CCB=91,2441/+.67/*+./00+'&!����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƾ�������ʾ���������������������������������ŕiXX]aqx}�������������vg](��������ǻî���������������wM =Xvq}��2*,HPrgoD'-+L���¨x��r~���������F@fb]UNLJKNRkpwz}AABCAA@@= s������������ʼ�������������½��p==:AeN��N,=KD=,!&3�]&*D��޴M7��������6.63333454%402679:?"dDw�GA:n����YKC90"AGEJQPB(!!"#&+<IWTcYUbc_Ta^bU[`fZZ^e\]a_[XecaRbYhV`[[UY\XWRTRRSVSVPWQSSVUROURPTRQOQTSOPQSKKLOLJQNLKOOPLNMPKMOMJMLLIIKMLJLLJJMKJJKJIIKJKIKKLIKIJKICBDHHKLMNNIEMNG<HLPKCKNGKSUVXRUYWZcghkpz�vffijR%:�����������xsuvwx~������O 6'q������|}��j.Rqvtspmmd[H(;#|��ٰr9)!-Rh|�����\w���~|zzyzytah_fkxwsnje\X���ý������������w8;|
+S8:I����ð���W JpnYWl`ccTU[ZY[ZR1(Yf!%5348AZy8J;2779K��pa�����������������8k�{����������g����������������p���x|������}��²���������n8!C4G]L:VXP8=i������rOW`hjimpnnopqrqrnmb2
+	
+@t@ 87
+
+	
+
+4	a��kYN5nsnmiifb^ZWSJ9 3{QKKKGA98=;109:7.0;;64881/10/67989=:4265112-0.,-.,1981*-254/(%%!	���ÿ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������×c\ahr{z����������������vi])~����������Ǹ���������������wMA^���nr5+Q�����U&+*F���¾��������������H@d_\WMMKKLs�����PBBBA@@A>!q������������;�����������������p<=:?`$����(8'*!'1�cQ������!>���������ȿ���56>3343454"(703789<?!iY��C<<k����XLA8/&	;GEGLRR>& !$*;YTZV_[]d`_Zf^e]abe^dcdbei_aim`aai[a]iZVZ_XVUWVNQUUPRQRNSSUPOQRONUPQNOQQPQROLMLONLOLMLPNNMQMNKMNNMOKLJPNLLLMIINOIHJKIIJLHHIJJJJJFJIFABDFHLMOPMKFCHLC@CLNLJLKESVSZYSY[Xehjmt��jfhffjbH.6e������������|x�x���~9<|�������dLw��w}ytsslmldY7-9;G 3o����J9Zo������R����~{yzyzymjrmkp{wslgbZ]�����������������iVt�TX#V+.D$q���������<+OnrOQacngPO\]`[XM'/Q)$,i;&549=BZ�7;6567<Q��Vy�������ȿ�������p9\tv~��������tt������������������������������ɿ����ot����aAE;'AR9LVTL5=l������oQVbhijkmonnoqqrrqnb.
+		
+\R#@# 
+	
+2
+j�D!" /uuonkhda_[VRJ:>�QKKGB:7;C=3011126:62696,+.059878;>;655663212.&'-/7:82.0465/'$&#�¿�������������������������������������������������������������������������������������������������������������������������������������������������������������������Ž��������������������������������������������������������������������������������������������������������������������������������������������������vigs||z���������������wm`+z������������ÿ�������������yO>7���du1(8bY]hzL"+*E�����}�������������I>gb]XQMJJMf�����HACBB@?@@i������������ˎr��f������������q9=:Aa+����.3)%--!'1�gx�}��[��6���|vdVI71d��4/?4444545")7336798:!aR}�D@=l�v��JK>3-'1GHIINPO7#"'.<TY_X[_c`Xa]hS_`dZ`bc`]dd`U^ebU_bcQ]\^VTVZPSPUQHRUSPTQPNUSSOOQOMOUQPPRPSRSROKPNONQOLMNOKNPPLKLPMMMOKKILIIIKJFHLKHHIJJIJKHIIIIIJJIKLBADFHIKOPOKHLFBGJIAFMOMDOKQUPP]VU^_chimw��dfggfgffeWF7<Pt����������������p" [��������bG2/CWvvsrqkif_P+42v�T#*d�3$Kgw�����cd���~zyzzyxcJXaktzvojc_Wn����������ý�����Tb�����SXcC'3=;1����áx��#3VmqPQTT^[EKV\edTD#?F)de�[.49<Juui1:6876>[��A��������ž�������aHomq{xslstqrck�������������������������������û�����|����T=NWRFQRZWUL/@s����n~hMTegjikmnnnprrsqppa)
+
+
+ON&�[	
+
+%
+q�tA8|vnljhda^ZTPH7E�PHHA<65:@=62013303/28:4*)+277657:<>989;:6001.&'/06:6103664-'&#!�þ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǿ���������������������������������������������������������������������������������������������������Ʒ�����������������������������������´�zy|�������������������xk^-!v���������ȶ����������������{TB_�6A�q+,I�����M%+(B��ƿ�w����l��������K=ca\XPLKKMh�����<DDB@?@@?g������������͘��ˆ�ʇ����������u9>:BfSßb+9?JJ-!)2�hv�n�n� ;���#??rt���4+>2479744!&:545768;!gH\vF@;s�}��@GC2+'	+JJIHJLQF3'*07BUeWV^eZTcafQ]_dWc_a`_a_\Z\baY_]_Te`\YXTVSYSPQPTRQPWRQQVUSSTRPMQTKPRTPRSTOONQNLMOMLMNMJMNOJLMOMNLNLKJKIKJKJHJJIIJIJJKIHGJJGHKIIIKH@?CDHKLMOPLDDHEFFLFAIPPGHOQSTR[\_`beginw��ffhhfefileje^L=ETw��������Ȕ��V-/&2>K^h&r��������{��n<<qtsqokge^F'>'x��Ѣz!6-Sp������Px���~|yyyzxssqhepxxtmic^Z�����������»����{Kt����������`kT&E)E��l\8T�ƾ�;[knYhjfj\HRWZXbX<Bv0?���> 47NeU��a+97747Ak��@��������¼�������Sb�~������������������������������������������¸���ws�����H84CA+":ZWTN,@x�����iMYefjjkkmnnrqqrqqra'
+
+=A:
+
+	 
+#y��b<xrlkjgc_^ZVQH6!R|I?<6566;@<667345-*+49<5-))42//479B?98;<930/.,+,00031-,1541/,("�����������������������ƿ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȿ��������������������������������������������̾���������������~�����xk_0!u��������ͺ�����������������zU>U�r}��A',Gv��S()*+C�������������������M=da[VNLLMIX����U<EDAA@@AAd������������̑�����ń����������v8=8?_+D!7$'"'2�lk�#QlE`6���.��_d�����7)>14:EH61 "75;D?88<#kBY{RA>o���59J7,%
+	$FLPOKLPNK6-/28G[X[a_Zag_eY_Y`^g]`ed`[^cacd`bX^_eZ[\[WUTZTRUVUPSQTMNQVONSRPNPROFPQQMRORKNMNMMMMNOPLPKPLNLQKIMRLLMMKIJMLIIKIGHIHGHHIGGIIDEHHEFGH>>ACEHKMKNNLJABKJDGKEBNOOJNTORVU[`acehlr�mdgeeefffeUc�wswrl\K>DZ����ϻ���8-$�������;|����������S_ztqqnjgd\6&A44 5y��+7Zq������X����}zrwvyxe]^ehtwvpkf`Y]����������ÿ�����jL�������������ƬDMU���ͷ��y^!G^jcQtsjeSHTX\VXT0i���j�)$(37C�����N27748ExǎM�����������������Lq�~����������~����������������t��hv����������÷����r�����?.LP?aY5VWSF(G}����z�`PZegjjmkmmmrsqqoroc%
+		 :Y'
+	
+"!	Bb[=Cyplihea^ZWUND1\yA9548<9?C>75745;5)+19854,,2.,*15:DE>;9841/.-,/112.-,*(,11..-)!���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɾ��ǿ���������������������������������������������������������������������������������������������������������������������������������������������������������zl`1 s��������ȼ��ñ�������������yV<]����wD&1|����B**->�������������������Q8db\VNJLMPtķ��k>FDCB??@@a������������˝�����ī����������v9=8@g/HE!6--+(!'4�m#+8���"��WT�����8+>34<CA54 #73>J?878 l>f�RB;m����9.H=.% 
+		CSTUSPOPQI60/5;IVee\`haXb^g[_ag\\df^[agd_addW_c_RX^^URTVPKSSTOQRSMRRSNPQQOPQRONRQSNSPSNPMNMOPKMNOHPMOLMLPJJNOJIKLIIJLIGHJIGJJGFGGKGHJJGHGHIKGF>?@CDGJLLNLIIJEFJIFJLHIJPOOURQWTV_bdfhmu��effgfeeeecVo���prssutmYKIM�ի��x&N�]% +O{������ev��~K\xurqnjeaS)/5j�,FnIbu�����nf���}vkjrywlaa`buwrnid^Xl�����������������Q[����������������Rl�D`����ҽ:,Ofi^TfohdTPZ[\\[P+&#AH'D#-6<:S�Ґp37678O��oi�������˿��������M~������������������������������}gt���������ĵ���o[P���C*6J7`W8UWWA*Q�����}�[O[fhkkmmoqqqsppoqj]!	
+
+#<o|l%
+	
+	
+($5P{okhgea^ZXVOC.`o<<9;?>>BCA=6::<@?62481060/,*(*259DGD?<:51/-.23233/-,(&).,-/*&!	�����������������������������������������������������������������������������������ɿ�������������������������������������������������������������������������������������������������������������������������������������������������������������ȼ��ȿ����������������������������������������������������������������Ź�������������������~���ykb2 o������������°�������������z\!7B:DxYSC&6Q$@N()+;�������������������P5cd]UNJLLN`����S=GDBA?@@@ `�������������������������������x9>:Ac0�ºy;27,(!&7�l"3���)->+����¿<)<35>B=55&&33<G<669!kHj|GA?o����:+EA4'!
+
+;YYYXURQROI6/04:J`ZVcd\Yc_cX_bgY\cc^[[`_^b``We^]S_^[QTUTQSTSRTVSRQSQPOVQPPRQQQSTPROSONLOKIKMJJLMMGKLIGLKMIMOMIJJKIMKLKJJJIJKJHGHHIJLJHILFGIKGB@>@@CEJMSMMH@GIFFJHFLNIKQRQPWVQWU[`dhjpw��dheedddfeaIe1a��uprrrtvwz`9����[3#Ya$h{������D/;Zglyvtrqmjd\G&8&��֭s�s+Ok~�����Uw���~{sjkkrrebdeiyvqohaZW|����������������tKq����������������>,t��Ww�[[�� 8RiiTUmqnfNR\YWZVE$7qdU?,,7@RSW����i*83>W��S|�������ǽ�������pK�����������hz�������������������~t}���������ò���������pBK-#-6?UTN:*R����ww�XR[eglknorqppsqopqjW
+
+Fq|U&		
+%+
+1|�-?IVvmjifd_Z[WVOB,
+ce2879EECEC@>;@?ACA:2331-+.-)*,1313BGEFD?60,,273233/-+'%*-,/0)%!	
+���ÿ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ž�����������������������������������������������������������������������������������������yjd3m���������ż����������������y^:Z�V+T|?&=|b-B�e&)+;���¹|�������������R4jk\TMJKMP[����]>EGDB??@>[�������������������������������}7=;@`R�QU�.2  +$(:�o"+2=;3��⾙[V�������:'?44=F=65"$53;I:58:"fGn�L@;n����9%AF9-"	0Z\XXWVUSSTE4/16<NZ^d__be`a^dbf`caaacba_cf[^bh[ZY`YYVWSPTTRLPSTMPRRLPLRLNNNPMQRPLMJONNLNKOOKIMMLNMPKKJNKMMQLLKLLJKOJIKLJHGKLJHIIHHJJFHHIFGHHE>?>?BDGMb�ZNKGBGHCELFEMKJRTSOPZRWZX_finr{�{dhfcdefffbOkW0=���qtruuw}D����>)�Q<L�\7{�������~{|^<&LxvtopmicX7)>'I;e���L4Ul����rO����~yqgcvy{ti^`cpvqleaVb����������Ŀ�����cK�����������������&B����nVXu��#EXikRPW^dZGSZXXbY=!8r�����@/5Bp�ML?���J5/Ca��B��������ļ�������YQ�����������]z������������������������������������������^GW[TQLRVVTM61X����w��VT\dhmlmmqqqsstqpnoY
+	
+OuI
+			
+/-
+4���@=[JTvkigda]\ZVVNB(fc97>;DFDED@;?AABB?7+,144672-+,0.,0>EAD@:21.-4;62331*'+-+)-0)%$ ���������������������þ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǿ������������������������xkd3f���������������������������yW%77Y���S(+/q4e}>'*,7�������������������Y1hg[TNJKNO_����r>EGDCA@?<X�������������������������������~8<;?dU� 4�61I^L/")9�qf�������!3���Ҿ������ÿ=+B23:B;59#"54:F:78:!fAh�R@8k����:!4D;1$
+)W\[XXVUUPVQD3.15=MdbVccbY\_d[]`f\\ce`Z\cfT^b`V[Z^SWVTNNTSQHRRQKOOPNQMRKRQKNNPQOMPLQPQNNMRMLKMLJNMPIKKOJJMNJKLLHIKLIIJKHHGIGHIIIHGHIFHGHHIJIE>><ABEHLr��bKNHBGIDFICFKKRTUSIUXVX\afios��kfjhfdefdgegkll@����xrtvx|}O���r)"0[c��1ypM~�������q����^nxuromkfeV)28cm^ZA]r�����ci����}{k_l{yyxzytkeomje^Mm����������½�����PV��o7LYt���������xV�����ͽ��8%J]kiS[]c`PHWUYN[U1#=-3U�6"57E�h*TtJD�b33Ap��C�����������������GExz}���������k������{������������������������ļ����������SIV[[`_\XXWO.8`����g��SS^dikllmqqrrsoprpnU
+	8IG=
+
+
+		A%
+
+*��zpV)Qrlhecb_^YVTJ@&g\81<9@EEEE?;ACCBA?8++0147885.)+**.47886302118;6112/(%)-,),*%"# #"
+����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǻ���ÿ������ǿ���ſ��ǿ�������������������������ƾ����������������þ������������������������|i_2_��������˼½���������������}["8C~�~yw<)7xQY�iJ)++7�������������������V1cbZSNKLOL^����g=GDDBA@>;S�������������������������������8=8@c?�5:x'3CI8,$'7�tGm��u8l� 5���3B����þ@(>0379977' 437:<979#jAo}F?8l�|��8'8;5)
+"N^][YYVUTWTOE1/26=W_Pba^U^`dZ`acU[bc^X\af[`_[Y^VZUZXROSTRSPUQQNQPPNSNUMWTNPTRMORRORPSMMMPLJLLLILKKJLKLIJKKIMMKHMLKIKKJJIIGHJJHHIJIIHJIHIIHJD>@=@CEJR����iKNHDGHBEHGIMRUYYTOXWU`dflpv��cefffcbccdemkmk_�B@��{tvz~�|���_$]JCL&NF&f�������@G_wzUvytpnlifeG#8*�⻀>mm&Lct����{I{���|{xtqxtiptxyyrolhc[Y~����������������{Am����yjL^�is�����TC��YQd����H*LckaOsoijMMg^XTYP%"_7"n(56;��.P��|</43J|ʈM�����������������C\mr~��|{}�yudv�������������������������������ĸ����ij����KJW[]]^[YVUN+5c����x��PT`ejilmnooqrsnmqplR
+.F
+		
+H(!
+
+"Ayzr]2Ytlhfda`]ZROK@%
+lR/:A2>FGEB<<@BCB?>7+(+159::9521-+/31-15532116874/.+'%&,1-(&#"""%!	����������ÿ�������ƿ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ļ�������ƿ���������������������������������������������â����������������������|ib6[��������ϵ�����������������}_1@NFG)9R5��2R,+-3���¹z�������������Z.cg[TOKKMMX����]8EDEA@>=<A��������������������������������:>9Ae%3!'#&0�y'k��x0~� 6���8L�IY�����A%=23;AA:;( 126>>99;#jCl�HC8i�|��= #*56/ 
+	
+Ha\\[XWVUSSQP<./38?Q\d_^Yd]_]d`b]fe`^`]\dab\\]`RWU[TPPUUORUSPPPRPQNQLPPTQNOQNLOPNLONOJLLLKKMMLKLKLKNLLKLJJKQLHLOKIJMLIIKKFGHFFFKJFHIIGGHGFHB<>=BCFL\�����iMJGCGHEJMFMTVZ[YRW^_cfhmnx��afgeddbcc_\}iofd�uXl��{u{~�����M2�k��S5x�������gZ7.*Aswromkge`2&<%ad����^2Vl}����iL����}xqvwywm`gjdbuqolg`V^����������ÿ�����dA���d������X~�����+'7t�ȹ��v��'2SfhXSbkbVPT]a[ZWJ b�6/:gh-9:9V����m��758R��om�������ȼ�������yH{�y���~x����u�����qlv���������z��iv����������ĵ����������BKWYZ]\YYVRF+8f��¦l��JT^diinnnportsononlL
+'
+		?"!#
+)Nr�yom=]tliida_\XSOKA!	qG2?=2:DD><:>AABCA>8,)*48:;:9863-+.46/27754303896.+''&%(.,)$"!$%'	���������������������������������������������������������������������������������������������������������������������������������������������������������������˿��������������������������������������������������������������������������������������������������������������������������������������������������������ǣ����������������������{la:X��������ξ������������������c"1N�e2O�Q'.h��Otm.,-1�������������������_-ed\VOKJLLa�����HEFDA@>>;?������������ʴ���\������������� ;;:>].fdga$4<RO-#(0�{h�aa���U:���!��sK�����>#>35<B>97&75:A@9:;$hDj�I@:j���8""$*31$	=_\[]YZYSTTTSP<--35>_i]\ahW[_h\Zce][^a^Y]`_Y\\\SWX]RQSWSMSRPOQLPNRNPKRQSOQTQOMQPPNQMPNRNLMNNNLNMNNLLIJKMJJIKHHILHGJJIHIJJHHHHGHIJHJIIHIIIIIA>>>@DFJn���z��vHIGCGIFPIKTVZ[ZXUZ`dginr{�qddedcdddcWe�������¹���y|�}?2,Y�YPBS������������vhsywsonkhaR*1?V��&7=?`w�����Xc����|vi^qxwn[def^Zqnkg^Wm����������½�����RU������kj��Xb^Ok��A�������ĸ|=VgjPenmi^S\`XPVVB)�����15:ASc��zO��-5;]��S��������Ļ�������mR��������������������������������}mw����������ǳ���vde����?OWWX]ZXWWQB*?o��¹���JU`ejimnopoqrrqppnmL
+
+-�����I	?
+D��~I3ZAbwlljec^ZXUQMF
+	
+$>AA528A9-.;AA?CCB>9268:9::98751,+/650588767457670)''('(*-+%#"%'#����¿���������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǿ����������������������������������������������������������������������������������������������Ǽ�����������ƿ���ƻ�������������������ǽ�����������������������{kb6S����������������������������` -<F8:�Q$*4S@ /,(,(-�������������������b+`e]VMJKLMb�����gADCA?><7A��������������м�q�Љ�����������#7>9;eK����44LeL,$&3�;>:C37���!w�Ce�����="?33=F<75'!74<D>869%gDi�I>=i����:#"$#&-(
+0YZX]ZYXVTSSSQJ6,047E]Z\dbT[`bZaeaZ]_b]X][^U^\]U[VXQXVVTTUPQPQMSPTNQRTQPPSPOQOOPQOOIOONIKMLIKLMMJNKLKKILHKKLJJHJHJLKIJJJJKKIIJJIJKKGIIJIIHJ<<>>@BFQ��������{JMG@IDGLLPZ\[[\XV_cgjpv��fcdddccddcQ|�����th_`qux|��j* ^�K'0"e�����������~~}zwvsolibZD&>4~���.G&Jfz����~Ey���yuloklqghejp\colgb[R{���������ÿ�����|:l��������������d�pQ���ɼ��}V"HZfiHYhjf]MZXQNUS5IdP#47>j���z&9�� 8=h��A��������ú�������`W������������{��������������������rp|���������Ĳ���w{����u=PR0!#$1RVQ=,Cr������~LV_ejilllnprqqqqpngF
+`L@?H(
+			;
+G�q156j|nlhb`][YWRJB	'�=8;6.08404?A=>A@?<79::;9983.-/.*)38415755678:8871)&&(''',/*#$(' ��������ľ���ÿ������������������������������������������ÿ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¼�������������������������������������»�����������������������{mh;P���������¹�����������������h#-?hz���\ (*()*,+*-,2�������������������h,_aZTMJJLNQ�����LAFDA@>=9D�������������ɬ�����������������#6=:7h5&"2-! '%%.��6���[D������=!A25=J;76%83;I?76;&dB_nI?;h����<"$!"%(!
+
+"L]QZZWWXWTUUTOF4/.37G]ca_Zc`b`ef`_fa^^`aZ]]bX[Z^UVT]SSOUUORRQMPSUOPRTOMNOMOOQMNPNNKOMKJLMJILNKIPQKLMLIMKMKLLLIKLNKJIKKHIIJIHIIHJIIGHIIHGGC:;<=>DH\��`w�����lIMBDLGLQJWZ[]]]\`eimrw��aeeccdeeeaHscQTVX\ekmqtwz|����W2Nk��h +6s������������|zwvrmjgdW2&:-�U'�'�41Un�����mL����~zsohrrn]`T_vqnlje`Y`����������ý�����h@���jE<C~��xZY?~��Eb�LU������E+M^kfRcfdbYV^SLDWR( 27<s�Jg��6Of.6EvĦC�����������������Vh������������s�����z}�������������������������±���ly����`=VI:BEB?[UO40Iu�������NVadkjoonlnprqoqnoiE
+
+,8
+
+
+	
+
+<J}L#(4I:iynnhc`][YVQJ<
+-�8-8E9663/46:::;:>:6::;<:50(##"%&+34112411689:::741,('%&(.1/'&)'"�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿ�������ǿ�����������������������������ɿ������������������������|oe<O��������̿������������������i$,Q�����Z$*'  *8-/|������������������h*_bXRNJIKNQ����;EFCB@>>:F������������˲��͟n~������������#5<7=c 3/136&&#&4��1��������������>$C34=E956'639J>77:&cD`VA:h����< !!!"#"	@]OS\[WWUOMVUSPE2-146MbW]]d[]^d_[_d[Y]`aUX]^SYXZNUTTPQQTQLPRSOOPQMRQROOPNNQPONQRLMQULLLOJJMPOKLOMILMLILLMJLKLIJJKIHGHHIJHIKJIIIKHHGJGIIIGA;:<?AEHg��Aj������fOKEIKFNNOW\^^]`chjnr}�cfdbaddef`Fighmnqrrqt{xwz}��x<5!ev_''.dP�����������}zxwuqmkg`O)+7.�ޘ�?{)?^v�����Vc����}xn]pywogf^`ptpnid]Sk�����������������NW������s^������m��&+p��Yp�ZP��*6RcpWSffffYV_WVQUK %799H�L7��2554L�̇V�������ɽ��������Mu�����������[�������������������������������Ƚ����^o����RCZB4M:X5ZRJ10Gy����p�rHXbeiknnmlmoonmpmnk?
+
+
++@#*.SE
+
+		
+	8
+Y��|woh=kuljf`[\[YVPI;3�>27DC?=904734897525:;;97-(#! #*3413464479:9876942*$##&+00+'('!��ÿ�����������������������������������������������������������������������������������������������������������������������������������Ŀ��ǽ�������ʾ�������ɿ������������������������������������������������������������������������ȿ��������������������������������������ƿ��п���þ�����������������������������Ľ������������������������|mf>L��������ż�������{}���������m$+4!!2kZ..a������l#1w���²�������������j'_fYSOKKMPZ���Ë=DDDCA?=>"F��������������������������������$8>8=bG����9/&'$'3��1Zaep���(1��迲����lcz��>!@459?<74'638@<687%hFj�P@;d����;"! !!  
+6]RO\\XVULBT[WSP>,,04;MU\X`Y^_a^]]`Z[\_aVZ\\U\UYRXUQQUTSRPSRQRROQOVQOPTPMOSPMPSRMMPSHKKOJJMMIJMNLJMMLLMKMIKMKJKJKJJIJJJKIJLJHKLLIIJJFGJJF@;:<?@DH��k~�������^LJDKKKTPS\_^_aegkot��meecdcdfcegdmnopry~����xwz|���r&!1Lou�k!h���������y{�|yxuqkgf[C%1/4v�K[u��ӏ&Kgz����zFt����{vigmywgbchisspkfa\Wy����������������|Bi����������TYl����0����}\]u��?VhwSVfcebWW][VQTC*99=7a�_'9�s(58W��hl�������Ļ�������v7z�|���������}e����W��������������������������Ǹ����{t����JETS6K&2AVQL.1Q�����YzqHX`ehloppnnoqponooh:
+
+
+7����j*			!`���yi_6jtlhfd^_[XYQF8@�>46JJF=2-7:26:65406989:2#!  %/54034534979740.1,-*$$'''-2.(('
+ ����������������������������������������������������������������������������������������������������������������������������������������������������ƿ�������������������������������������������������������������������������������������˿�������ǿ�����������ÿ����������������ƹ������������������������������������˩����������������������{nfBM���������������������������g%)Cp�a'G��N����´�h"0x������������������m%YbZSNJIKNK����l7DDCCA>=?"E�������������ɷ�����������������(3>:;f'V�� 3'&"%3��W�����j[2���2%/5Dq��="A44:BC66';56==88;%gIe�IA<c����<#! !  
+
+,WWHV[ZWTL==\ZWQJ8+,/4<S_Yb^d_`]^\[\`^[[Y]YZY[VVXYRQSWSPNQPNOPQOOOQLONPLKOQLLPNMNNNOJNMNKLKKILNMMNOKLKLJKLNJJJMKIJMKIJKIIJJIHJMOGJJIGIIID;:<;@AFP�������v�����SHFENIPUSZ__`aehjot��dcdddbcdddm{����������sy{~��}`*u!	%`1u��������oA&"(IvztsnjfcT4#5!j���D#"@$4Wo�����fH����~znhipyynk_SVjqnjd_V\�����������������i;���s|���h���}g`��]?������ī�A"G\juPXacacXZYXPTR9'K`WD4.7A[�����]p�'5=Z��S��������»�������f7fxz���������wy�����Y������������j�����������Ķ����������AKTFUfVMTSPH,4Y�����]{lEY]eilknqmmooonnnoe5
+-="		
+&&	_�V@7/>;ntlhda`_ZXUPF5
+K�,'*>GD6*+57/:=71..50,/4,$%'/44.////-1//.+&&'&)+&$&'&),)(+$	
+ �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʪ����������������������zof@F����������������������������k&'NxR*83XKC@'FL6_&*r�Ƿ�s�������������u$#^dZUPKKKNV�ǿȌ=HECA>>=B"?������������̩a��j��}������¿���(3=;;`&t��56%%!&1��+c���Y',��縙m(z����ƾA!B46:D@65&938FB97;'dDRgLD>l����< $"! 
+#PdMOW[XWO?8X[ZTNF3,./6?XU]^i][_cZX^`]XY\ZTZZYTWWWQSTVMOPSPOQQRLQMOLOPQPQRQPSRPQPPOMLOLLLNKLLOMKKNMLKJKIKKLJMMLJKKLIJLJIJIIKIKILGJIKKJHH@:9;<?BIc�������b�����|HJLRROUTT\^_cgjlpw��dgdcdbddf]^������}n`G��sy{~��|@+V�ikJK�������nANggT/7vvrnjfbO*)8/�5�ɜˑCbx�����M\����~ykjoyxvxxwvmiomjd]Ym�����������������QR����������G\op���7^���������T+L_kqKbdeb\\b`]OXP.7������<56Cw��������*5Bl��G��������¼�������VTury�ttounkbk�������������������zqz����������ȳ����������APW':]\\[TOF*6\�����T|gGVZeilimmkmnoonmmmc3
+	?~dAk	
+	$
+!b}@.3BX@sskhdb`]WWUQF3
+K�953;@=/&,20.7722232*'+,$"(.1/2672/210+)*)'($%'%+0+$#$%%%&)*"
+����������������������������������������������������������������������������������������������������������������������������������������������þ��������������������ƿ������������������������������������������������������������������������������������������¿���������������ƿ�������ǿ����������������½�����������������������������������yng@G��������Ͼ������������������k$'L`-8�Y�S&()Q���r )o�ɾ���������������q("`j[WSLJJMLt����KBCCA>>>=$:������������Ϋ��є�ԑ�����������(799<cH�ʱ�51)&%&1��4��d4`��.0���ĭ7!>������@#C35<GA76) :29HB::;(cGj�YBBl����9%#! !  
+@e[OPVWUPA4F]]WSND1+.05?P_\bW\`bW\`[YY[Y\X[XWVYUWTVSQPUSPPRTQNNTOOORNPPQOOPRQLONOLLMOLLJLLLLMLKLMKKMIIILLKJNMLKLKKKLLLLMMJKKLJJIJIKKJFH?:::<>@Fu�������_������kKLGRQNTTV^`cgjnt{�xhfcddcfdbV&r�nKTV^ei>t}vz{~�q-=F]�b "c��������[r����|@Hvqljb]C$.05�l�~uj�\'Li~����wAr����ysi[nxumuxwwwrolid][~���������ÿ�����zCk���tc{mu����z[Hv�
++p���������*6Pcm_CT[aS6Ca^WJVM#!OE$=Zv0;7:t�?��oP@.2EwğD��������ý�������Fk�����������������mt������������mky����������Į���~|����r@R[6%(&4STOB&:`����yY�eHVZdhmjkjklonnnmlib/
+	
+IeLKD+	
+	#
+"s���{o^>tqlhda_\XXTND1
+	Uz==<761,*/1-04/*233/.,*("$,,.18BE<5662-*''&)*+.)*0*$"#(*$"&'#
+����Ŀ�������ȿ����������������ÿ�����������Ŀ�����������ľ���������������������������������������������������������������������������������������������������������������������������������������������������������������������ο�������������������������Ž��Ǿ������������������������������������������¿�����������Ž�����������������������qgE@��������ͽ������������������f(&H��ũ�ʤ2$%#�ˮE%,j����ý������������t+$Zj]YSJKKNN�����zCDCB@>?B'9�����ý�����Ϩ��ƈ�ˍ�����������+4;78g*[YLC#0%&%%2��U�D5\**���=>�aL�����C%E48<F?87(73:F@99;'fGn�ZEAd����<$###"!
+0b`WJQTUQF88X\ZWSN@0-026BZ]a[e_][b_Z\^\W\]^WVWYRXYXSNPWSMRSONOQQLOPQLOLNLLMOMLSMNMNNNLMIKMNNNMOPNLLPLJNRKKMOLKKNLIJNIHKLKJKLKILLLHIJJHF<;;9=A@N~�������X�������XPOOTSSVQ[bdhlot��gfcbcecdecfO$G��tkppn_ktwy|~�~`%Z���A1|�������vf���{{oHrojha[5$1!��ʄotIT9\t�����dD����skmlqztdXZbpztnjf_X`�����������������c>z���HQKJT�mY�{���}3���lD<����ATky[HPUUB &IRRR[C&b"$585G�k'4r�0,37M��yY��������½�������Ix����������������������������������������w���î���ugl���aCQXOJNUYSRO>&Ag����}h|^LX^egllmljmomnnnrm^)
+
+:RH2(
+			
+
+ 
+#}���r_L>vqkgdc`\WWTOI0	jtEC7530(189233.+1/++13/+%#"))#$2=HI=8775/+,,-4224-+/*"!#)-)$()%
+�������������Ŀ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȿ���������������������������������������������������������������������������������������������������������������|oeB;��������Ͻ������������������i.!4-\Y5:1Csrs��6!*'b������������������t)"Xi]UOIJLNR�����WBFDCA?>?$8����ȼ������ͯ��Ȧ�̯������¾���*3?;9h%2+))$%/��%(+���J�ջ������B$B26:G@98' 639FE;99'dLo�MEBf����9#%%#$'%#Qc^OJOTUJ=2E_[XTOH<,-.27GW^\bXV\aXWZ[YQ[\]QYWTOTVURPQOPPQQMORNNLPNPORQOOOOQQOSNRPPNMMPMMMOMMKMMKLOQKLLOILNLILMNLIHJKJLLKJKLJILKJIIIJIC988:=?CZ�������~T�����w�{LSROSSXUZbdgknu��bccabd`adeij]10���knorqsvz}���wF&+8�eL��������ts���|wqQrnhe_S*%7+}~;�ش5Hd|�����NY����|rbfkyyoW_]O@Wpjhd^Yl���������Ŀ������OO�����������ofF9^�U(��������` #H[lrPJROR6#/KSRTU:aa(+58A/M��86�u+59R��]s����������������wP�����������������r�������������������������ɾ������q���UBQP6*73K\RP9,Bj�������]OT^egkklllmnponnkjZ"
+
+F����j!
+
+
+	#
+hxF0!!H{smgdb][XXUPE.lkFE53/5946520,-/.+&&,32,).-./)&*6;KH?86345257154361+.*$$#&+((+)"
+�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̾���ɽ����������������ľ��Ľ�������ƾ�����������Ⱦ�������Ǽ������������������������}rkG@���������÷�����������������n4&M}������f_�����4),)`������}���������w,Uf\SPKKLLR�����<DGEE@>?@(4��������������������������������+7=;<i!l��`69HE0$%4�� (��輮���������E"A47;E?75( 428DC:9:&bFk�VA@k����9#'&$'.-$
+;a^VNISSNC01^]ZWSNF2+-/38I`]]YZ\^XYYZZX^ZXOXVTTXUTSUSOPWSPPSSMQQSPRQTPOOOMMQPNIONNMPKLMKLLLNMNMKLMOLMLNINNMKMLMNMLKJLKLLNLILNOLKKKIJG?89;:=?Ej��������l�nq�����oWTKRX[]]bfilqx��dcccbcccd^a�ihI W��ojurtx|��~q3",7�{Bi+!f��������lK��}zwm^qlecaI$*4.OmQ;q�.Tl~����vAr����|umrezujXGZiienmfa[X|���������¾�����x?c���R\v������»���-%Nu��������Y,M_ulMKTPJ,#@SYWUP0%_���w379;e�07�����-6?_��D��������Ž�������kW�����������l��������������������������������Ⱥ���{x�{���KER>@KKG6WTK2,Fn�������POV]eiklllknnpnprnk^!		
+OS6'!				H|smgda[ZZVUOB.+sf=$hhGH=65@>21//*%)0+'#$(.2-*/144,*/87AE>7426=:7965623.*0.'(+))+,+' 
+��þ����������������������������������¾��ſ�������þ��þ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƾ���ļ��ɻ�������Ž�������ǿ������������������ƿ�������ƽ�����������ư����������������������~rhH<����������������������������o/!M�����|�V#k��Jj6'*(]���³�������������y/Vn\SQLKKOS�����9EFCE?>>=,.������������ͽ�yvyuT������������.2:9<g>��s�//$'$%2��-���ųhEb������H @46;D@79+629B>8:<(gIj�RE?m����6#%(&'/5.$
+/]c\VNNQUK8'L_\ZXSLB0,,.3:W\WZ_][]^ZVZ^]WVY\TSVZRQSURPPRRNOQOLNRQNPORONLNMOQOMMQNMLRLLNOOMLNQLMMOKMMOLONRMJMQJMLNLKIMLKMNLKLNMKLLLHLH=99:<=@F{�������z��,F������ZVRMW\]_ahjlry�tdecdbccdeQb�yfj[,8��{~svyz|�d5+"Qq��%8����������jt�}zwsfdljeb\9!1#+����9><]t�����`>����|siepxqe_]QEkwoje_YZ�����������������d@w���kbMEB}��������(�����̾�Ka17RbwdIRdZ7%%VVTUTN%.P40%?kC565Z��r1h��e.6;i��<��������º�������VX��~���������U�����cp������������x}�����������ȵ����g�����CJS63BJIBTRK-2Jq�������NPX^bljkjkloponopoiY
+	
+,Wm~b
+!
+Ivqlhfd^]ZUTME%1��`F/'&#"j^JJH=@FA1))*$").+,)"$+51-*-55.,386;A=532:@>89834431,-,&)11//,+(
+������������������������ý����������������¿������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ŀ��ʻ�����������ϲ����������������������~oeD>���������ú�����������������m2 @[0KRU�QW��+**'Z���´����{��������z1TiZSPMKMPO�����@EECCA?@=+2�������������Ǿ��ȗ��������¿���-299:`T�V(�L*305*$&1��.���O.(@[�����LD43;E@76*869HC;;<%dLp�IEBi����;%$%&'*65* 
+
+(Pg_XPEIMME/8d]\YUPJ@0(*.2=OWZ`[\\\WUZ[YUWWWRTSWNRRQOPROPLJKLJKLMMOLPORRONQPNNORNOPRNNNOLMMMMKOPPNPMLLLNOMMMOMMMONNNOOLNNMNOOONONNMNE:56:<=AS�����������h|�������YXPQ\^_chjmt��gfcbeddcdfMem��okj?!|��tvxz}��Q9W��������pa���zmOVknjfbS+%A@�Y3{��!Jj{�����NS����~xpdmorjfhffMNlnjc\Xg���������ÿ������OO����m�j������cJP�r��jIQn���9@VfrfYcd?#">MQRTE!#EVJ��!635@\�˷i,$)14GxϖE�����������������@T�z}���������n�����e�������������moz����������ɴ����~�����9KUR*32OTOG,0Mw�������LPY_cmkjjklprpppppfU
+		F}cBJE	
+	$$ 
+9MJ=$P}qjjhhha\WUOB%BmV=1&%"#|bJJ@?GED@50/)%*,*/,##-76/))10,17988;7220:@?;5764531,)'$)00.,))&
+����¿�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ɿ���������������������ſ��ǿ�����������ý�������Ž�������ƽ��°����������������������qjJ;��������к������������������j1Cq�J@Lt�E4]u��Q++*V������������������s0Pm[VQMMMNT�����:DDCCD?>:*.�������ɹ����������������¾���21::=]T��e�>2UhR1#%3��/���'�g�L�����NE367>;67);69====?*dIj�REDe����G*&$$%'086)!
+	8ea\[RIIOM:*W_]]XSPJ<,)+/3@W[ZWYXXZXYXVVZWURVSWVXOQQSRMLJGIFBBDEKLJLKONMMPQMNNPNOPPMMNPNMPMMORRQLRNOOPNPMPNPQPONKPPMNOOMLMQOPPMMMNP?8557;<Bb����������|y�����~��wWWTY``ejkmt��dcdaddccec:e((��wjnW(��wxyy}�~x7.!o��������`-1G]q|wqrnlgbdE *;?�	Mr�,Zq�����u:k����zumhfuv]dUXe|ookhb[Xw���������¾�����{Bf���sU`VDn�cMo]|��DCm�7	*��$HYiyp_ae=!(9WT;+]4l�|F$533246d��Å<44O��s`�������ƾ��������D`mw���|��uk|������x������������hm|������y���Ʊ����s{���x;QYOKHGUYTNC(6Q{�������NSZ]djiiilmqrpomonlQh?*"
+
+	
+	
+		;fhH'Qzpjfddc_[ZSK>#�\IF=CDCB?<=;1)%'+//*)5982(&'(.75:::82/.,05750332521-%%&+0-*()*%
+ƾ��þ�����������������������������������������������ſ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƾ������������������������¿��ž��������������������������������������������������shJ;��������м������������������l2!<E�����qA�����8+**Q���º�������������w6Nl[RQKJLKi¼���DDGDCA>>=,,��������Ǽ�����������������¿���12=:>d'Tg2?#-/("'%&/��+���)�][Y���ĿLA349?:65);38?@=;=.eL_vOFDj����NF%%&$%(151)
+-_d^]\QKLJF2Fd_^YVRQI:)+-/6HTZ]^WW\[XTVZ\TTXXTVVXQSRVRJJKIJD@A=>EGHJIJHKLONNPRSOSSSQSRPPSROPRRQTQRPQSRNPQRNPQROOOPNMOONMOOPONOOMNNM<7757;<E\�������������y��tz���cWVT_bfkmpw�}dbdcddccdaTeiA%���ph=lvuxz{��h%)-,*~��������|�xR:/6HUgmlebX7"/*Mv��p@fx�����dB�����wnndpvun``]itomhgaYa�����������������fAy���{�������������!T�ø��jv�_+N\jyua`cZ("DR.!ws�b`1+54256Cr]9��56=Z��]|�������ż�������yI��w���{yvx��s�����������������s���~}������o���Ů���[c^���j>SP8,(-7TRN>*;U~�������IRZ^ejihlooqrqoqonlR
+	U6%
+
+			
+;[]C![}ohdbb^]XVPG=+TD@=IHDA;?B>/.,(7;:739;95+$$*/59:;<9361/-.0220113/0-)+,.0/./,*$���������������������������ÿ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ⱦ���������������������������������������������������������������������ɿ������������������������rgO 4��������ο������������������n59&.Q[UCD2@AJ=,)+U�����|�����������z6Ih[ROIJKKe���ǰBEFCA@>==-)���������¹��������������¿���23>:>h&'% " 3)%&#,��	 *���`}Z=������J<56:A=65'<36DA:<<-iI\sMFBh����R_&'%$%&*.0.$
+(Oec`[WNGIG;1``_\WVQLD2*+.07GV[]WWZ\VQVXWSRWXTTSWQSRVMGGIHKF?>;<ADIJJKKIHGIMONNQRQTSROOPQPNNQRQRSQOPRQNQQPMQQPKNOPOMNNONPPPOONNMQOG84679;>AGc������������Rg�k�����Z\\_cejms��jfdbadddddffgihG�_��~kmrvxy{�|S+ e^E��������tI[���w[B4Toif_S(&/D�}&'Ul{����~MY����|nenoswm]ajgnxplgb^Zl���������Ŀ������PR����:9Q`y���������
+%V������ȴ�.7S_mx}{f`f]G/),<%G�aCk285222=��2Lv*6?c��A��������ļ�������qU������������~��������������������������������������������XCOH5`aZXUSR9):\��������ERZ_dhgijkmppmppomjQ
+SR39H�hg}?		
+Db-]~nifb_^\XSPH<0�NABEGFD?8>A>02;2<<<65::86-&&1048:<>:6663/24130.,110,)*-1431/*&"�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƾ���ʿ��������������������ʾ���¾��ǽ�����������½�������ý�������������������������siO!5����������ɾ����������������p67/(&!$8i9E|����:*,,I������������������x:IsbQRLKKMKbueZRCGECA@>?>1)���������̾����P|����������¿���51=>>e.,'! 2,(%$0�� )���Ŀ���������P>35;A<56)=38DD:=<,kKg�TFCg����@h.&#%%%&',/)
+
+?fb`_\WNKH?3Je_]ZUTPKC2*-.08N^\Y[Y[YUYXVUYXXVXVYXXUSICDHHLJDCC@ADGHJLKIHIIKLKLNNNNOONRPPORORRSRPQPQTQMRRPPSPPNQOQPQPOQPPNPOPPNOPPC635789<=AIY�����������Rn�j��|���Z__bgkmr��`cdccbdfda_pieaP�A)u��qsvxy|~�z9&s�la��������^SXycqz|{vpmie`J!*46es����t;m����{oljjuvknoaQZkmkf`\[v���������½�����|?i�����g^Z��������^*����������DWbnu|�vfnlaYMKB@=,Nsa7633442M¿�n.6DuȵA�����������������\a������������������_��������������������������̽����gjx���PFRM+ARNTTRK7+9_��������ITZ`fggilhmpqpooonlM#����tmSG;	
+AF1b~rkgd`^]XUSKA9�K?EGGFD=6AA=3042<?<6588:80*,10379?B31345413232-,011,&&-33/+%## ����������������¾��������������������ž������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ſ������������������������Ƚ�������ƾ��Ŀ������������ż��ý�����������������������¿���������������ñ�����������������������tjQ"8�����������ƿ���������������l6:.*+),@p7*[���J$*)'L����������Ľ������|>I�qQRMJIIFHFHCDFHFCBBA?<1&����������Ƿ���������������¾���62<9:h?653-,<5-/+!#3��*���ĝ��}����¿PC24:A>67+=37AE;9<,gJd�VEC_����2p2(&$&$$&'*+$	4hc^^][XOFA39g_^\VUPNH>0+,-0<RZ[]\[[YYWWWYUWYZTSUYSLA@BEILKJIHFHHJJIJJIIKIIJJKKIKJJKKKOPRRQQRTXRRSQPQOVPQQTPORSNOPQOOOPPLOQQMOQOM<42246:88;DK[����������y��j~�����m]bdgkmq��decdcccbb[P�tsr{������}tvy{}�~o)
+'ZG.'u��������h�z{oVzwusojhc[;2/.Flt�����]<z����zrtwntsf_bjironje^]]�����������������j<u���{SaW����������4
+�������vM&IWdom|��me_a[SMLN:!C����!84312231Y�{8/3M�̜N�������ȼ��������Vt�~����������q�����l{������������u������������ʸ����������?MRE531+FTPJ0,Aa��������HVW`fhhdjglopoopoojJE+
+
+	
+
+
+	6O4 aztmfd`]ZXVRL>
+9~ADHIFEB55?:81*.08>;6873993-01127<BH7654354300/,*+..*%'.0-(#!"�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƾ������������������������������������������������������������������������������������������������������̿���ÿ��ſ�����������ı����������������������ujX!5����������Ⱦ����������������o8<.*))*.5.*Y���g)+*)I�������������������@F}hMRKKIHIIJJGFEEDDCAB?=4&�����������Ÿ�������}�����������81<7;QQUI@;61)% $4��,���4Y�����OF379><86)<26>>:8;/fNj�SECa����*hD)*&&$$%&()%	
+/^cZ][^[WNE3-Rba_ZXTRLE7**,-3@QZ[WTYZXUWWZSWYXTVWVRJ?;<@EKKLJIIJHFHGGIIIKKLKKKJIIKJIHIJLMMNNPRRPPSRQRRSPRQQQQRTPPQQNPOOPPQRQPRQPK51346:;984<CI]���������~��z��y����`afiloz�rbeccddc`_Uk�����������xtvz||~X 
+1(	C��������u:E[lq]zwvrpjhbV)#8!1�Z$Wpx����}IQ�����|xuzxxqdce]\\pnid\Zl�����������������MN����~v�uV��������� 
+1�]0S\ini���zbSYVQKNM1(`FNRp6)>53212111-(+08U��xg�������Ǻ��������O��}���������yj����������������z��dn{������{���ʶ����������<OS5LhcXSURI+0De�������yGVY`fghfklmoqppqnngF
+
+
+				
+	!NQT)"avojgd`^[XTRJ;
+@r>IJIGE>1:>862+04:?;4783785./1128;9;69854996.+,*)+..,'%())($!#$"������������������������������������������������������������������������������������������������������������������������Ⱦ��������������������������������������������������������������������������������������������������������������������������������������ľ����ÿ�þ��Ǿ��ƿ���Ž�������ý��ſ���¼��ÿ���¾��¿���������������������������uiT"3���������ʴ�����������������o9?2)*(((),=qYEDg?+**D��ƿ����m����������DCviNPLKJJIHHIGGECECA@A>@6&������������¢R������������¿���81=950.('$ #1�� '3&$���ê�ȑQ�����RE25;A?94(:58=A=:=-bIf}KHFc����(Ue+,*)&$%$')&!
+%Lk^Z__]\WM:-5cac^\YUPJC2)),-2AT\YZ\XXWWUYVZXVUXYVTG;879@IJLJIF@:;?BDEDDFHIJJKKJIJIIIKJJKIIKLNMLOQRRQRRRTQRRSRSPQSRPNORNRQQPQPPOC20155:<88;58EJY��������������~����xbhils��kdbccbcaa^J~��qc\PMQT`tsuvzz{}}?'
+	)h�u_��������wXS722[|xtrmfc]D"'79��ƚZ/4dr|����s5g����~zxvwxthdaccmppkid\Xx���������¾�����{;d����j������������q#4"&:T]hlg���z`GTQPMUN)-aQif5-7431001011/018`��[�������º�������u<��y���������xs�������������������iq|������y���ȱ���������w9P^/)EFJSTPI(1Ij�������uKV[afehhkloqqqoqomdA
+					
+XVH/!iunida`^ZWVRI7Q�KLLLHB6.6::63496:=93892276-+3218=483679<=:4.,)'),.-,&#&),%" $'% 
+�����þ��������������������ƿ�������ƿ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ǿ���������ſ��������������������¾���������ü���ż����������������¿����������������������������������������wkY#3����������������������������q>E6('(((**:Q<@"TL)*,>�������������������FAsqLPLJJJIHHHDDCBCBBA>?@6&������������˸g����Μ�����������;.<840+'# %1��f��V���ǽ���8���ÿSF139C@86-869AA99:*dPq}RIEc����(By/)(&''# '(%#9faZ^`_^^WG."Ufd_^[VRNIB3(),.4DYZ]ZTWXXSUY\YUV[YVQA7479>ILNNH3&&6EFGFEDCBCEFHHIGIKJKKKKKIIKKKJIKMONPQPQSQRTTRSRRQRRPPRPSRPNRRPP80/1579<97><9;FI_������������v�����kegjnw��gcadcccaa]@p\QWX`ijkoz~twyzz||t+!8�X/)v��������lw��yox{xtplgf^5 //2SG���ӰrBjw����_:|���zwwxyxs[b_`ospnhfaY]�����������������c6v����F9GDe��������J/���gL3$ CV_mg\��vTQWTLOUD$#gC�w375542//././14>j��=�������ƽ��������g>o~{���������s�����������������y���v|������v���ư���si]���kEQYC<828OSNF'2Ln�������pMV[ciefgjmoqpppsqog=			
+
++6YY1 $rvoida^\XVSNI1
+b�XKIKF=1,045873767>:6::42692,41/146:46<@C=6.0/*()..,-'#)*)#!!$$#
+��������������¾�����������½�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʾ�������������������������Ǽ�������̾����������������Ŀ���������������ǿ�������������������������vh[%3����������������������������rB@5&$%''*,D��e?�L&*'=��ſ���������������FBIQKIIGIHHHDEDCDBA@>=>8"��������������l����ɐ�����������:
+0@73-+'# $/��n��, ���T,)d���¿R>249D@99-947BB;9:.jSt�SGDb����--v2Enw|{{k1!&%"
+3Xe][__^]]]78iba][XVPLH@0**,-3HY[VRWWUSWYYVVW\WRJ<4158:EQMKE'!&?OLKJKJIF@@9>HJIJKJIIJJJJHEGHKJIJIHKLLMNNONPPQQRPRRRRRRROPQQRRM3,-025::979;=8;BIg�����������������dehiox�{ieacbbaca`Odegghhijinxxqvyz||{d#
+Fo�A��������uo|rTY�{yvqmif`O&%6 K25i�x!Sl}����~LN����~yrefgooc]aehpnjgc^Ue���������þ������PH�������vyq��������+&����ɾ��`*)KZci[Pw��rRQWRKPT<!7C%> 445420/--//.14EwƵ@�������ļ��������R_tlz��xrstpmdr�����gl����������������������}���®���|�����YDQRA:<ASUSK="5Or�������jOV\bhfihjjpqppnppli9	
+
+			
+
+=jiS*&supjea`[YWTOD.
+
+vzUJGGD60,*+2;:3545<6389633783011115957=?<92-.-,*-/-,+''+)%" !"
+������������������������������������������������������������������������������������������������ÿ������������������������������������������������������������������������������������������������������������������������������������������Ͼ��������������������Ⱦ�������Ļ��ſ���º��Ⱦ��Ž���½����������������¾��ú�������������������������xi^$2}�������ѿ������������������p@:0(&&)(()0kzol�J#+)<�������{�����������G@}�OPHHHGHHHFCFEDECCB@=?:"~�������������y������������¿���=-<61-+'#$3��l��N ��軀y��������TA54:D?89,;45?@:87,jHb�MEE!`����)ob�������!!$"	'Gc`\\`^]]_N+ eda_]ZWSQLF=.**,-5MZVY]XVVXYXVYXXXSB71.37:AROLI7&#/EJKMMLMLJF:6DILKKJIFGHIIH<>JHHGIHIKIIJKKLKMKJLNONOQQQQPQSTRRTI++//26985785;=6;AKk�������������m��bdfip|�rheccdcb`abbefffhghhjnttsvxwz|}I$.^ua\��������mrv;GQjyxvqlhe[G"(;o}Z63eq�����t5g����~xtlpqmbd^agqqnifa\Vw���������½�����y4\����{�������������((C������Z1R\giVGMVS_ORUSOQN1d���Y"$83340./,---./5P�ϓP�������ü��������S{�{�������������������������������������������ʿ����ag~���PFTG4CB8HTSM8'7Qu�������jNV^bjiiihhnnponpmkc5
+
+			
+
+?d^T,/xtnhea^ZWWSOC+
+
+{eCCGF=4/,--4;:22249548311.0564/-/188567:;:::75/++,-,)&%(("  !
+ ������������¿��������������������ƽ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʹ��ǿ�������Ŀ���¿��ý�����������¿��½�������������������ĺ��¾�����������²�����������������������yk]$1z��������Ź�����������������vD;3)('(((*C�����]%**6�������������������K;knKNIJGIJHB?PFEECCCA>?>8#v������������ʠ{���j�������¿���?,962,*'"%;��l�t*"��������������UA248D@56+:36=@99:/nKb�UGEa����,_���������W$!"9fd^[^_```XEMfc_]ZYVSPJG;+)+,.8N[ZXRUWZVWVZVUWN:50-18;>ITNKI>/*,:FKLMOMMMJEDIMMLIE>9CGIJ@3GIEA<FFHIIIJJKJKKJJKKJMNMLMLLOLLNMF6661/16714977<;9>FTw������������k�qbefjs��jfbbccpgbgdeddfffhijlnqrsvwxy}y.*RV-h&u��������u;;JwloyvrplgcZ8!/1oN;˯�~Fku�����]<z���~{vmqqqr\^d_]vrmhd_XW����������½�����d8r�����������������n/C?Ai�PGq�5:U]ggSHLLFQRTTTNTP(9Zx��0'7442//.,..,025]��jr������ǿ���������g��|������������������������������������������Ⱥ����sr����ILSB4ba^[TQL2*7Vx�������`MV^`hhhhghnoooonlmc-
+
+
+	
+
+?OJ#0rskifa^[WUUPF'
+wbEDEB6317;439<0+-/57:84.+)*0351,(0CC=:79;?@=67:1)+--)%!##! ##"	!�������¿�������������������������ÿ������������������������������������������������������������������������������������������������������������������������������������������������������������¿������������������������������������ľ���������������ǿ������������¿�����������ž�������½������þ�������Ľ���½������ɯ�����������������������vjV(0x��������ƽ�����������������uE96('('()+&Y��[UQ%+)4��ʿ���������������O9r{LOLIJJJV���MCECCBA?@?8!z��������ȸ���ʮ�}y�������������9
+-;73-*&#$3��T��F��������������XF048B?36-;47AC89:1yMh�YJJa����1L���������$#!"-Yfa\Z]c``]T34lea^[[XTSOKG9,*++0?XYXRWWSQWUXVXWE632-068;BPSNNLH>5.08BGMLLLLKJLMLLD:42/>HKHAEFD>/ADEJJIJKKKJJJJKIHJKJKMJIJJKJKGFDA>6.143267789<<ADX��������������dcfgku��eeaabbsrxqc~zoolhiijlmprtvxxy{c!
+F|[@A��������qpv�wZsytqmjg_Q*"5"�Дb)k��:"Uly����zIK����}yugepoldb_]Vkolhc^Xf�����������������JD������������������?/b������}{� EZbidLFFINOPSRVUWL @�3633/-.,-.-/13=h��L�������Ƽ��������}s��~������������������������������ir�����������Ƕ���zjx����@PUK"/4:QRQJ))7W�������_RX_bhiiihjnpoonnmm^*	
+		
+
+.BA!6oqlie`^ZWUROH$
+
+|MAFB91279;5345.**,26;:840))0012,'/AHC<969@@>62870-,+("   !!  ! 
+!������������������������������������������������������������������������������������������������������������������������������������������������������������Ŀ�������ǿ������������������������ɾ�������������������������������������¿���������������Ǽ��ſ���ż��ǿ�������ƾ���»�����������Ŀ������¾������������ü���������������������������vm[*0u���������������������������pC.1&((((()2��W��I&+(2��ɽ�~��{����������O7lvRRMLIHF^���CBDDDB@???9w��������ǲ�����ʾ��������������3):63.+&$""5��o�t:��������������\G038;;76/946>?:::2lHjWJFb����4=�Uk�������.%#  $#Iea\Z[_`__[G)`gea]^ZYVSONF5)+*-4KWUTWVVW\SXY[U>5200/16:?GSOPNLJKH?<:8>ACEIJKIIJE62>E719GJKHG@=7>CDHIJIJJLKLJIIHIIJJJKJKKLLJJHGGFD=.-,0333:95;=:AKf�������������bdfgmy�udeb`addbmi]p���ylzrjimpqtvxyy{J#'kgsa��������ok}rWkxxytqjhbZD!%6"�z��Ό/ao}����p7^����|wrkkprc]d`fojkkfa\Yv���������¾�����|5X������������������!;��j|����`'IZciaFA64?>?CIURTA$m552/.--,+-../3Gw��@�������¹��������lk��~���������c�������������������}is�������}���Ǵ����u����w9RS9EQE<OSOG*-<Z��������YTZ_bhiihgloqppoono])
+
+
+		
+		)HM";oonjdba]XUROG"
+
+
+�\JIA0/5;9;3211.,/-.5889861,33--)&/ACA=<57@A?936:3-*(&$#"#$$!  " &�����������������������ÿ����������������ü��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ſ��ǿ�������ƿ��ÿ����������������¾�������¾�������¿¾������������ƿ�����������������������������������������wl[%,r��������Ź�����������������sG.4'''('&'3j;2-(**4��ƽ���������������P4p�PPLJJIKf��|;EEDCCB?@A:t��������ʱ��ƈ��vg�ot���������w.*8410,(';WVJ#8��[��Q��������������[A546;;74.;45<?;;:2kNl�_JG_����7�tm�!�����:)%!(;fc`^^]_`__V2Jkfc_^]YWSQNJB4+)*/=PVY[WY[\SWZWH:11001/48=CPPOQOKLMJJJEBBAA@BCCEB43<AD@8.0@KKHHEB@>>AEJIIIJJKLLKHHHIIJKJJKKLJIJGGJIF1'+).216797;:<IUy�����������sbdehp��kccb`ahtplb\]kusth����vprtvxzyx3%,cCb)���������^m>DOyzwtqjhaX5 -2):�k@is�����a2z���zskiizwm\ba_snkie`[`����������½�����a-h��ű��������������"���rOF\��@1NYelXE<((*,-+4JRQ8211.-.--,.,-/4M�ЧH������ɾ���������Tn�}���������_�����~���������������l~�����}y���ű���ft����gAQN-Da^\SRNC)/@\��������UOY_`iijgflnnnonnml['
+
+
+
+
+	
+
+
+		&88(=rqomheb\ZUTQD
+u[CE@12>@:;53102.,-,/677888214.(#(4EC>;>79CC@;3394+(%(*%$$%'%"!"!'����������¿��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƿ����������������������Ǿ���ÿ����������¿������������ƿ��þ���¼��ƾ���Ľ��Ľ���»������¿���»��Ž������������������������xl`'(m��������Ŵ�����������������tL-5()''%&(-BH[SaO()(/}�ƿ���������������S6l�SPLKJJN_��x;EDCCBA@?>; l��������Ů���x�Ȣ�իq����������2+653/-)(h�_d"$5��h��M��������������\=656<=750@37<?><<2nNj�YJM[����:	a{q�9����9*+#);bida^][^a_]A3iiea^]ZVRPOMJB2**-5JYXYTVYZUYWR@4+-012/29>AJPNQRRQNLMPLMNMKJJJHC82;GHGDB90,7HNJHKIC:07DKHGDFGIIJIHIKKKIJIJKJJIJHHJGE=$*-,/122878@=BM\�����������fadglr��hbaa^`nqrebmga��ke^krtzpttvxzyn"")Hmq.;���������h;4Z{ltyvrmig^O+",$O��o�N Umx����KF����}wqekqwvtsolkpoifb^Wf����˾���ſ������K5y��Ȯ�������������^#RY���ĸ���I=S^hjUG9)03+.&-IQN1#70/,,--.,--/07V�׀a������ƻ���������Ce�||���������r�������������������������������������i|����[EUU1%-.@TQM?#0Dc��������RQX^`jiihflmmoonmljW#
+	
+2:/76"	
+
+
+
+
+	7XA!Dvrmlic]ZXUUPB%vH=B<24>?:@>91044*-,-0687795123+%):HD=:@88>@=8401-)'(.0+)(*+(%"'������������ÿ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ƽ��Ƽ��¾���ƽ��¾�������ļ���¸��»��������������������������������������������������������yjb+)h��������ĺ�����������������qK,6)*'&&)*)/P���c%)(,|�ƾ���������������S1`yRNKJILM`���<DEEBA@??@= l��������ʵ��ȃ�ͪ�Ю�Ĺ��������@,9730+)$=J++$5��#zU4)#��������������Z;548??770?26@?<;:2kKl�QIM"^����>	!`y�	E���U'2(!'(?\jdde^Z[^_bW/Xkhc`^[YWSRPKH>1+,2CWXXWZXUXZTI7.(*/0/01:=@EQRQSRSRRPQQQQOPOLLMKIGDFHKOMKD:.2BJFDEDC/'-AIFCCA=>DGIHHHHIHGJIHGGGFGIGE@$$2.-1403739=<IPs����������cbegjt�~gaaa_crog__��uznb��zqanquvwxxvP8!]r|![��������y�up�gdyxvrlhe[F"&/9���ם,,\n|����q8a����|vkeflswuwutspmjf^\Zu����ç���¾�����};U������������������2!���������L#EV]cgOGA3,2-,*8QTI'!2 %4/.++---,...2=h��gv����������������|G`ru|��������n�������}�������������������������˽����Qkn���NJXSMNVEAROI:%/Di��������QQX`djkidbmnmopnnmgV
+
+	
+K��xr9	
+
+	
+@WAJtsokgb]XXWTM?
+2z9:<3+3996?A=4,34.((+0246786133,%+;GE=<B96992/./*)*-152.00/,)'&
+-���ÿ�������½������������������������������������������Ľ���������������������������������������������������������������������������������������������������������������������������������������������������������������������Ľ��ǽ���¿������»���������������������������������������������ÿ������������������������������������������������yj`,+e��������ƽ�����������������rM.:*'('')+2���`. &)(.w�ž���������������U.g�OLKIJJI`���=DDDBBA@A?=f��������˷��ĉ�ʬ�ɳ�����������I,<61/-)%.731(:��0&z�������������_>538DA66/<37D@9::2hNj�VJI"b����@Y��P���)4/'/ 9aoiedc]_^\a];Amgeb`]ZYVSPMLH@1*/<MXXWXUVZ[Q?0*'(-11447=AENRQRTRRSSSTRQQQOOOPNMOMKHKKMMKH;..:CFE@4**0?IMLIEA605BHGEDDCEECCCA?>AGHGA/*,--242565;@EO[���������x`cegkv�ldcc`_egkedal�vx_mw~��prtvxxuq;8
+%`SN#x��������RHe{�zyzxuqigbZ6 &*ff5�R9br~���~Z5t���zvlhqtuwvvttsonie]\]����Ϳ�����������c3k������������������4z��������B*KZ_efJD>502105GVUD"#�}naI#.1/.+*,++-/.05Kx��F������Ǿ���������rP�}w��~omptzws������}��������������������������Ǹ����i�����HLVM4#*,@ONG3!2Il��������NTTaejkicajnoonmnpgR
+
+	
+'?!
+		
+
+	)C\9Ptqokcb^YVWTO:
+5776,(0541=@><20--((+/313565221(#,;FE;7=7443.,-/+(-36640032+))+$
+1�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ŀ�������������������¾��¿��Ǽ�������½��ƽ���Ȼ��Ǿ�������ƾ�������¹�������ú��½����������������������~����xk`++\��������Ŵ�����������������sQ,8)'((&&(/kY)+IW(*()r�þ���������������[,j�JKKGHJJa���;CEBDDA@AB>f��������ȷŵʶ�����¼�����¾���E)=731-*&\weG,E��'8/$u�������������aA437AA672837AE;:92eQ[pSKN"^����FT��(J��110+$-$+brnhefc]][\_Q3hjhd`^\YVSROMLH=/.7GUXYXVXYYM8/)&(,2569:ADIMQRRTUSSRRSSRRRQQRQQRQPOMNMKJGD?6//8DKH>0-6EJIDFFEC/$/?HGFA8.15;<:4/.7BGC8"%*-./133679?DITm��������f`bejp��hddb^_ipmcgt}�tmn��y^pqrsvxxuj&,
+4z�P;���������iY@96N}srtmjf_Q)#)!xyYUDgv����wCC����~xthhqwxvwvstrolgb]Xa����ȸ�����������F1�����������������mK���������e3R]dkeD@9,,-*/3KVT=1AVw��F21.-**++-./.4=_�Ǽ8������Ƽ���������b]��������������������������������lv�����������Ǵ����������BOU?=<>7ANMF0%5Pq��������KUW`fijgc`ilqonnmmfU
+
+
+
+98+		1\nY7Wtnnlhd^XTVUM:
+>~E73/'/688<>:::51/*+05642265121&$,=GD=67731010121.266651//,((()&/�����������������������������������������������������������������������������������������������������������������������������������������������ſ����������������������������������������������������������������������������������������������½��ļ��Ǿ�������º����������������ö���������������������������������������������������������~����xk^,*\��������Ǭ�����������������tS*<''''%&'+FWq���%((,k�½���������������Y*f�RNJIIJJZ���@CFCCDA@>B?e��������˼��˪�����{������¾���E);743.+'1'.K��S��ht�������������iD348?>551736@C<;<7eSLNMLM![����P
+0��D
+D�610.+#',!*<dsmhfhe][Y[[3Rnjfa`]YWUSQMMLF:13CUXXYYZYXK4/-)).577<>AEHMPRPSVTVTRTTSRQMKOQSSRQQQRQNLJHD?;68<CEJG:.4AGFEC>8.',;KLJIA% %9A92.+*2BB=&%++.0135379<CCMV|�������abbels��dcca`ddjn`aqp]v~jzpmz�jprstwwyW&"
+Nom/T��������y���w]_|_Mnlid[E#&2B�}NT�;$Lly����l2\����}xohctuvvuuutroiea\Xs����ê����������{&E���Co~����������<S���������9;T^fldE:12,(*'*ISP9-]}��.20.+),+,,-/39Ky�ӛ@������Ļ���������Wn�������������������l������������sqs�����������Ų���������yAQY1H:GEAQKF-)7Su��������JSX_fijg`^hkmpoonmeN
+
+
+
+
+	
+	1]bP:[tpnkgbZWTTSN6
+
+GzA400&'3;9<:4168871/58576254121&%(?HD=85783/.-0214888750.,(()''&=pp��¿���¿��þ�����������������������������ƿ������������������������Ŀ��������������������������������������������������������������������������������������������������������������������������������������������������������ǿ�����������ƾ�������������������������������������������������������������������������������������������������}���ym`.(^��������ʿ�����������������uO+E'&&&%%',Y���Q,"'(+e�ü���������������[*d�TQKJJLOY���>CFDCDA@?B@`��������˹��ȭ��m�������������K(<742,+'/<*"2J�� ^��Bu�������������f?246<=961935>B<;:5eRKLLLL'[����X��t	mC000/,  7%.6Ojtohhhc_[Y]D<pjgdb_[ZWUROMLKE:2<WYVX[ZY[K311+*16:9>>@HJMMQRSUTQRPRSSTRI6:PRROLPQQRNHF@CEC@<=@><EJ@308DHHD:*(5FMLKKJ7!/H@;952*9>:/%,)+222748<;AEHTc�������^befjv�xbba`bmpkdagx��xd_waaYtnorssutvB.
+ Vpf"n��������WWt���|nbnjgbY7 '2'*����q,Vq���~T-q����|wkhjwwvuttvsqmhc_[^�����Ĥ����������f%`�����-%"$F��������!+w�jUXn���k$FYbkjPC809616.5LQJ/��; #3/0,+,)+,./7Ej���xa�����ʿ����������Q|�y��������������������������x���qn������y����ı���~|����iHRVJ@>)0HQID)(8Tw�������}GP\ahiji__ilnnnmpmiJ
+
+		Emz3
+
+			
+."^tpnje`YSRQQJ6
+El0../+)+/6:>42029<;435155363051)')AJE?6389745526799:9761-+(+*('$M���������������������������������������������������������¾��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¿����ɾ������������������������������������Ƽ�������������������������������������������������������~~~��zm`0+[��������ɸ�����������������yP*F)&&$##&)MoXIHB')(&d�Ľ������}��������^ 'a�RMKJIJM^���ADDDDC@??BB!Y��������ȶ����ǚm�ص�����������J*:632.+(F��t 5H��!Mnu3P��dq�������������h;458AE;50946?F=;;5gQJJJLS)X����c	J��T>2121-&:-"'G_sqkfhhd_\YM6bniedb_]YVRQOOLJC87SYVY\[WYU<651033?>=>>GJKOSUTUNAAHHKNUTO:4DNOI9DINOPNO54AF@?GGE=<C>964:?CIJA3(4DKKKJE,#39989/29><7' (+.0/488;<?FDMVn�����m_adip��mca_``nsqhanh`fvi]��wvrooruuusn("6
+6�t@5���������`H?9EGo|xqnge`O* -/#\����%9`s����t<>�����yqlfmxwxwuuvtpkf_\Ye����ȵ�����������I3|������hoqr�r�{�vw�%��f������#+MZdosLC7*11.0->PSD$-iT02sI)6.-+**),,01<W����[s�����Ĺ���������|P��x���������ep����������������������������q���������wpe���ZGRPDX\HGVQJ?'+:Xz�������zMT]ahijk^]hmnmlnnljD
+
+
+<.	
+
+
+%efT5"asqold`SOTURH3Ra*,+'&())08>:5/0:==80+,22160+4.'&,GIGD?9:99;<=9;<<999661+'*-+)%]�������ÿ���������������������������������������������������������������������������������������������������������������������ʿ�����������������������������ż����������������������������������������������������������������������������ľ����¾�����ú�����������������������������Ķ������������������������������������������������������~��zoa/+Y��������Ǻ�����������������yQ*E)&%#%$'-m���Ə-)(&b�¼���������������_"&d�[NLJJJHDRGF=BDDCBA@>=BX��������˳��ʺ������������¾���I,;652.*)BrU:2F��"\��MN�{?p�����������¾k?168BB772226AI<:<6aPHJJLO$\����k��D:5522/+%8@)*Kdtqmiihf`_Y7Hojifca_ZXTRPNKJIF>JYY[[[WPQB<;:689?A>>>EIKNSWVTO91?GB:CD??;<AFB88DJ<=IL=8@N?3=EB?BEA>=99;@FMMJ;01:EKLG< '7961 )>>=;0*+.//597=<CGJR]�����d``cfp��fc_``]`fhn`wibaij^b}��roopstvuZ./%%9!O�����������xd?6?Rqqngb[G %3<�����> Few����j*U����|xoqsuwvwwuttomie_\Us����Ƴ����������}->���ī��m&L��[c#1�_M������u�_3P_eqmGC@1(%''(BPNB!*������I-3-+**+++,17Ep����B������·���������jA��u���������n{��������������������������������ʼ����{�����OHXQ&H[RRTQK<$,=[��������vMQ[`gjki]YimmolmmjeC?'C(			
+	
+?OB&&jusoje`TRSROE0XW&+($$%*(-6<><848>=91'(/1130,2/&%*CFDDC@@@???=;<<<98763-(*/,+(!j���������������¿���ſ�������ÿ�����������ƿ�������ſ������������������������ľ�����������������������������������������������������������������������������������������������������������������������������˾����������������Ŀ��ƿ�����������»��������������������������������������������������������������������������������������������������{oa2%[��������ɶ�����������������wU)L'&$%$#')Yq]QQ[%)'"Z������������������_$%b�^MMKJKJJx��Z@CCA@@?>?>W���������µ�š��ő�������������O
+):754.)'% 2E��$T��?��Yk�������������jG237?>87/014?D;;>5[OHJLLM+^����l	
+n�+#6G;9541/.,#'/& )Hmurnjjhcc]?8onlhfc_[YWSQNKJIIDGVZZXXVH>;<><77:>B?=>DJLMRVVURI6<G?53:28>;8?>:=IO?4;MDB@DD78?7/6>;AAEEFGGHJKHD:04?EFG* 5@@<7+9?=:5& ((.0056;?@IFMVd����X]acgr��ba```bf_[`ZZ��th|y|��mnppqrtwF(
+a<	!n��������~����ilnV`rmgbW8)7F�`�'Peu���}T.p����}xvuuxxvtvvurokgd_XT����˿���¾������k5W���ɀO~{Um�����kS�5 I>3OkjOMn�><VagtoA?1(&%+/7IMM=09,Ih{�E31,++*,-,.49Z���ʭ>�����������������]<��w~��������u���������������������������������ɸ����]e|���IMXS(%%.ESRG:%.B_��������rOQ\_fjifX[imonlmmnc?
+
+ZVA%	
+
+%4 &kvspic^XUTSNA.VP.3.))+-'+46=<4-4;<70)()/13/.30+'*8;<<<<==<::;;97862/.-('+-++'w��������������������¿��������������������Ŀ�����������������������������������������������������������������������������������������������������������Ŀ���������������������������������������������������ȿ������������������������ÿ������������������������������������������������������������������������������������������������������������{oa5%[��������ʺ�����������������wT$B&'&&%#&(EIIj��/)'%V���������y��������`"!\�ZKKIIKMR���c>EBA@@@??> T�������������������������������O
+'9852-))GZce! 1B��%'��}O�x0k�������������kH158==750213;A;9:2\RHKKLL'Y����b	4�=%!"JE=965321.++MuurkkigfcT6Yqnjgd`][ZUTPNJKJIHR[YUY\N?69>>:8:=BB=ADHLNQSWWUTNOPJHIL@9DC69;6>LP@9=RPKC17<AAD<5835;<AKNMLMMLKIDBBDFB3'$6?@@<=?=782%)01388?:>IFPWk��nV^_biv�vbaab_hy{tbc]nrsh]i�phpnppqrrqn2#h�84���������Qg|��u{�bJlie^M(!+.1r
+&Kg2Ylz���r79�����{spuvtuttuvurqic`\We����ɷ�����������QGw��žn'$!#T�X����r�yâ�ocw���"BYajmaEA622,.29JOK5 P�O!"81-+**+-,28Jv���؋N�����������������TWsow��zswusmi}�������������������qk~�����������ȴ���{hz����HQVRKPGHUUPG1$2Cc��������iLS\aeihgVZjmnnnmmmb=
+
+
+	SQC	
+
+
+
+
+		1rrT-/mwsoid^XTSSOA,
+^Q8>=8785+,978;5,4;=80,0*'-5/-11--.86540230-031./-,,))'((&'&''%����������ÿ����������������¾���������������������������������������������������������������������������������������������������������������������������������������¿��������������������������������������������������������������Ǿ���½��ɿ��þ�������¼�����������������������������������������������������������������������������������������{pf8%W��������ɵƶ���������������|V!A('&&%%&+L����T()%%P�ž���������������a%Y�bFJIIIMM���c>DCC@@A>>A!N��������Ͷ��ʰ�����������������Q
+);743-),MvRY "/C��&%��|e�l�f�������������mC348AA862735<I>993cXKLKKO']����a
++�\%9MC>;953420/)	)X{xpjjiifbDErpmhfc_\\XUQRNKIHIKMPIJHFA76;@@;:;?B?ACGNPQSYZWXUXWWWWWTQRVK@?==HK87@CHKN94GM:5AIE?49:=GOPPOPONLMKKLKGCA;559867640+78&#&+31668@7BEHT_��`[\_cl�h`ab_`gglmev���gdlnw��pmppsrqrb!TqQ+N��������}TB49Bj~vmdlkf^A!#, %Bkb0<�LBbs����e+R����}wmfovtwtuussqnhb^[Rr����Ƕ����������z2J���ȳz���rh�)--)C|oX�������̉)KZbjnMFEDIGA;BDKOG+&l��?+#40.+++--06Bc�����ka�����ȿ����������O��y}���{~������������������������inx������|����Ʊ����p{���wCQRE'- (=ML)%2Ed��������dJW^bgjgfSVkmnmnnlla8
+		U^U				
+
+	1S/$4qwtngb]XTUTQB,
+jW:85=B>7//8516449;;82/43,+52+(,.42>841/43-'+0/-,)''''(*.)#"#!!����������¿��ÿ������������½�������¾���������������ƿ����������������¿�������������������������������������������������������������������������������������������������������������������������������������������������������Ž��Ž�������ļ�������������������������������������������������������������������������������������������������~~���|pe8%R��������˺�����������������~T"<)'&'&&')F�sO&)**%%L�ž���������������d& X�aFKJJILP���j>CCC@??>?> H��������ʷ��Ŵ������������½���S'9842,)&$$"0Ez�($��{gc9�f�������������pE448AB75.445=B<<<5cWILLLO![����d(6by.QKD@@?;77300-'"4f{skjkjhgW<kqnihc_^_ZVSQPJHGGFC>89:;?:6;@A<98<BB@BCJQRTX[ZYYY[YYYWWWYXUSPPMHH:5=:4:G?9@MC2<KMH658;?GOOOPRPOPNMNOMKGDC@?=;:30.09;3%+137<9B?>FDJU`l\[[]dn��aa``__^\Zgfqd`^}c^�{qqrlnprrquL( F~U%n�����������~gMh|UbpjhcY1#.O����t8+Nj{���yP(l����{uhcfuuxvtsrpmkea^YX�����ò����������j1Q���ɛ+1H\w�r��lh@�J +Mv�ib:3U]gonDDHHXVKMPOONB)-v:V<n1",6/--,+.029Oz�����My�����ù���������{S��v����������������nr���������v���nr�����l���������������lFSSAF\L4?/HJ&&3Ji��������_MV]agggfSVjmmnmnmj]2
+		3+>
+
+
+
+
+
+	6sC#9qwtlf_[UTTSOB+$jM;(++,.0/33013-0<:72,+.22,/,$%/267?80-063.+*//01.((+*)-1,$!! ""
+�������������������������������������¿���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ʽ�������ľ��������������������������������������������������������������������������������������������������������������~~��}pg;$S��������ɴ�����������������}Z:+'&&'&&(N�����1(%%H�ý���������������f(T~\JLJKLNP���n;BBC@A@>?B"I��������ʯ���µ����������������W$<972-,(?TS_$ /Ay�)��lU��ee�������������qD457@B840435AE=;;7^TGJLLQ#\����` ,26C�H:PHBABA<896420.'"HxyqklkhgfBWupkedc`]ZWTQPKGFGFD@:757<;48?@=979@DBACINQSV\\Z[[\\\[YXYZXVXYXWXUPMLJ568759?=8;GEB:159<CQPQSSPOPPOOONMKGFECDDC@?>?=<9& )-46<>:B@ABBMVYXX[_gu�^b`^ahnp]`drfd]kdZ^Zpllmnpqrqq2.!;ni8���������mKAY��yxdjphe`Q&#&��;g~�9^r���s>2|���~zsjiouuvvtrroljd_\Sd����ȼ���½������NHn���čo^>)'�9,91fx�%X}M7#"i��$>X^hpnB?5.@:6>DKTN8"6��}r{&$013/+,,,-09Dk����Ǿ?������¹���������qW��w���������w�����������������������������j���ʾ����iex���YBSQ"PgP6B8FE#)8Pp��������UKR]dghifRYlpnoonmkb-
+
+
+ VTM	
+
+
+
+
+
+0dM"<sxrga\[UTSQNA&&udG,pHD5-)$'/66.+-3/-88-)*+/14-+(',563492-+150/..26:93))**+12/'%$ !!
+�������������������������������������������������������������������������������������������������������������������������������������������������������������������ǿ����������������������������������������������������������ȿ�������ý����������������������������������������������������������������������������������������������������������{pg9#R��������̺�����������������~[8*&'$&%''S�yoos-('%E�¼���������������d+T�_KKJJLNL���p?DBA@A@?=A$G����������þ������jy�����������Y#<962-*%S�``'/=w�)!�u{?��Q^�������������rC246@F85.634AI=:;7[THKKJP&\����^",647;g�QMHCBA?<;;8620-+&% (Xx{ljggghSDwpjabea`\ZVTRNJGGFCBA;569;54=@?867>BB@BDJNRUZ^\[\]_^]\[\\[YXZYYWXVUUC8=D:::4:47A@9;88<=@MURRRPPQQQPONPLIGFFFIKGFDCA@?9$+488A;>BABCHRWYY[^gw�ncb`^`kvvo^Z��koidddWawjloqqrri 5
+
+!mwJV��������q7K\L\{{ytsngb^D !' �B�)a:Ifw���c,L����|xphaowvuwtsrojid]XSn����Ŵ����������}/Q���Ʒeg�����y��K!L�b��í�|��$HY`jwlE<0*2+0/4JWP5/����w,52/,+)+.3=T�����ҦF�����ý����������ec��y���������[�����������������������������{���ȼ�����v����OHTT>"857=9H> *9Rp��������SMW]dhjiePUjomnmmlic*!?C8
+	
+
+
+
+	3VL$>rxqfZUVVVTSM@""��gF0+&%"h<EB8//202.''(,.-15,&(/320/4125634-0/-/11-.,,3;>;2)*+*(-0,%%% 
+(��������ſ�������ž������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȿ��Ƽ��ƿ������������������������������������������������������������������������������������������������������������}pj<&S������������¿�������������^!@.&%#$$&)N�����7&%#B�¼���������������h,Rv^MJIIJKM���`:CB@A??>??%F��������ϸ������˞�������������a&9;62-)$%+0Bx�*r�gL��l`�������������t
+D/49CE85-533>I<9;9XQHIJKM)Y����_#6957:M{THB@@?AA?=<:53/.+*%$X�vlfeigbAivo\M]a`]ZVSSPMJHGHHD>868:94;??:8;AFF@@CJRRUY]\Z\]]^^^^]^\\ZZZYYWVVSF6=GB?I@<8675168:7;<ALPPRQRQQQQQPOJIGFGJJLNLFEEDA=.*2899B@DF<DHLUWX[`h��eb__`^fcei_i[Ya`fdefdckjmoqqquT"2dz/$w��������b\���ev|yvpkfbT3&))D)��}%(Vl~���{O+d����ztmiesyvuurrpnje`ZVX����ʵ�����������i:f���űuEWy���Mp����g#D`d����ž}.PYbjnWC605:.6+5LRI2$PK'"$72,,+*-15Dq������wP�����������������Kg��x���������a�����zh�������������������������ȷ����a~����MMTXN7+AUMKJ:"-;Ur��������SKV]dhiidXYinnnlklma%
+%LX5
+	
+
+	2NADuuqdOHQXWWRJ?!(LA0% "f/8A9144.,(!$&'*2/.++(+...1857530.+1112320.-+2<>:/(*+)&(+)$"# 
+'���j�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƿ��ǻ����������������¾�����ÿ�������������������������������������������������������������������������������������������������������������~qg>'Q������������ÿ��������������aC1%&%%#&*T��{io7%%";���������u��������j.R�bLKJJJHL���I<DBBB@?>?A&D��������ͱ�����Ǚ���������¾���b%:962-))5[[Y-Aq�- ���I��a�������������yG35:BE65/124=F<799\RGJLMM(X����c7B88:JULB>?@@CB@?<:8630/,(!)o�ti_bghMKwobAJW^^ZWTRQNKHEFHEB>;:<=46?@?:<CIJEBDGQUVY^^Z[\]^^`_^^[\[YVVVWXZXXNFEC;>D79;:9794725;;AJGJNOOOQQRPLE5/5;>INSSHHGCA?8'17:5=@BJABMOVXY[an��a`__^`glcf^|��~kgoe]]uijmoqqrp5.!]}k5���������nr��~jv|xsmie^M(!))[�15�r;8]p����q76z����yqhgnttsttsrpnic\ZYc����ʮ�����������VLz���`5)!M��k,7�8?�kH:&8b��_7TZaiiPA3)034:1>KSB-t���z.&9/,,-,.5>Z�������ao����Ž�����������?_�xu��������x������y������������kq�����������ǲ����u�����@RWN55'#TQOI2$.?Zv��������LNW]dgildPXhklllllj[#
+?<!
+
+
+
+/XAHvuoiVGSYVVQI?%g5==96422.' #((&13+%&,+((-57330/,,+331333211./;?;/)()(&''(&$# 1/-5���������������������������������þ���������������������������������˾���¿���������������ǿ��������������������������������������������������������ƾ���������������������������������������������������ſ������������������ľ�������ĺ����������������������������������������������������������������������������������������������������������~rh>(Q������������ÿ�������������f":.%%&%$&'B^JGV�;$&"6���������w��������j/Nx^JLJHIIG���GCFDC?@>>?>(;��������;����ɚ���������������](>:73.*+I��#->p�."���.4��na����������Ŀ�tH257<=842545<A;785WTGJLKO(`����a,C?7?TRJ?AA@ACA@@?<:7620-*&7�ue[cdWBxunRCDOYYZVTPOLIECFEC?=:8=71=AB<?FIJFBEIMSVX]a]\]___`__`]^]ZSSTTUTVVVSNME@?;3;58?=:A>5;<>D::DDDIRRRQKB$(0DXWOLHEDB=0$4:==:??BE?HSWY[]eu�taaa_^cpzcb[i|lpsf���}rilopppof#G
+
+4`nYN���������{�~`Ix}vqmhb\B"!)!'�?9��qHfu���}c)K����}xnagqvustsspqlha[VUn����ĳ�����������C^���Žw^���q�l�|mq]�?���­����8 AW\gk`L@-)*(,31DOP<,.@U��!)4.,*+,-7Kv�������I�����»����������{Hcmrx��������s��������������������dpx������w����Ű���|X���y?RTCA^V7;TLH,&3A\z��������KMV\chiicQUjkknpmkhX!
+
+
+
+	
+
+	&,0OwwqmhYWWUURH: 2j5;=7<967::0'+)%*2+'%+//+0782.,(*+3;54364322326970(''&%&&&'&%#
+)��þ��������������������¿�������»�������Ŀ��ƽ�������ſ��ƿ���½��ƾ��ƿ����������������������������������������������������������������������������������������������������������������������������������ƿ�����������ż��ƽ��ƾ���������������������������������������������������������������������������������������������������������������}phC(L���������������������������~j%6.&$$$%&)95/e��4%%%7������������������m0Lm_JLJHHIHw��KADDB?@@?@>+9��������̻���ŉc���������������^%;841/*(:_N9+;i�0Ul^.F��kY�������������zA246;@<645369?>897XSIKIJN%Y����`#;C9JQJD@C@?@CCBCA?=8842/,)$V�ue`bZ?]uoaA>AGRYWVRQMKGEEC?=<4/36/;AC@CIJIFBEJKPUY]b_\^`_^^_^^^`_\XUSQTRSURPPWYRPOAE>8>>78>5;EDE=3;@97HPRMHG5!+P[TMKHGEB<%"3>:>?@ABFHHTWZ^_hy�mc``^`c`a]bx|s�yj[]_h�yhjnooonV$	
+R_a n��������g7\�vupzxupjiaY4"&����x�)Vly���uP&b����{yreaertttqpomhe`ZVY�����¹����������lDq���ư�,+��������ulv^sZh������'IX^gk^F>/0<<;9<JMP6''IM*"=f/2-**+,1=`������ϻG�����������������sS��u��xmjmqlz�����������������y��xps������l����­���}j����gITZ5B`Y@9SKE)'4A_��������MPW]cghh_QTklnmmmljY
+
+'CA	
+
+		9QU=OwvqnjaZXVURH8@m:22/77<=@A;230('+($!(-./4750--**&?D866:><3455310-**('%%&'(&$#&���������������¿���Ŀ��¿�������¾�������ÿ��ý�����������������������������������������������������������������������������������������������������������������������������������������������������������ɿ���ſ������ȿ���º�������������������������������������������������������������������������������������������������������������������sjF%I�������������¾������������~f&81&&$#%&(&E��m5+'#"1������������������m/HoaMLJIHIGJFRDDBCB@@????*<��������̴���������������������`#<951,)&$& #)?k�0\���Y�������������{
+A545>D=51136<F@8:6bUIIHIQ&[����c-HGKJEBBB@@@AEECAA>;;9510.(#!&y�}uh`]HG|tnI=9;@LUVURNLHEEE@<91-+427?ACEHJJHEFKINU[^bb_^`a`^^_`__`]\ZYXYWXXSQOPSTSYUTRJKNB5;75JTROJJLF9?I?.*-,()E[TPMIGHFB5-;=?AADFFNORX[\bm��hc_\\^jkgd_gcYsrhhmfYtoilnpqmj<-
+	%h$i.���������]oa~Sj}{xtpke_Q%&+4W��6Q{5_r����n53t���zvwytlqtsqoonmfc`ZWf�����ë����������^N{���ɥaqt�ux�������K�ī���h[je.Q[cnkWD<44>D@7>LNG4""Q�����2/-+**.7N}������ԟJ����ļ�����������gc��y����������������uy���������������������a���ʾ�����|����YGTZQ%*.4HNKF&(4Cc���������IMZ^dghhbOXmknonnkhS
+"[zya	:f@QwurokcYWWUOH4
+
+
+EwB6429<@BBB?69;4,02.'&(*-664321//-9;8646:=;6531//.-+*)&%'*)&#  "&����������������������¾������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������¿�����ÿ���¸�����������������������������������������������������������������������������������������������������������~tfE'M���������������������������~h):6%&%#%&*F��]AV;($$/�����{��q���������r2EnfIKHGDGFFDECCDBBA?>>>@)4��������ΰ�����ʿ��������������h 8:620,(U��l)*>h�1T}�NF|qkY�������������x>227@G;7/345;B?884ZSIJIKP'W����g";RLEBAAB@AAACCCBB@>@;942/+&!!1x�}te^Q:pvpY;788>LVVSPKHGED?9962)-43=AEFGKLJHEGHKV\aefc_``_^`a`^^____^__]]\XVVLORSSQTSSUTWNE@:DMNPRTVVTKMA$':RSQKLJHGE@0,8??@@ANJLRUY\^dp�{c`^]]ennbabiaaaahy��|fiklopolf':
+
+/vRJJ���������a�y�vhxxwrkkc[C +#)�a$53,6Dcu���z`$E����}vqkhvtstsrqqmkfb\XVs������º���������>^����'D�nm�����(X�����ż�J,UVZgsuSE<0)*-/)<JQF0"-LXq�h* 4/*)+,2>f��������~[���ɿ������������^v�|w���������������������������������������V���Ǻ����������THSRASHJMWOIC$)3Ff��������~LQZ_fgigaNYllnnmlkdO
+$3	
+,5@1Uyupnh_VVWVPE5	GfLA>:?CDDBA?96><79:84.)&&+-.0.--/-9:75553676631/--.,(&&(*+(%"!
+!(ÿ�������þ��ľ�������Ŀ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƾ������Ļ�����������������������������������������������������������������������������������������������������������������tiG%H�������������¾������������~k)8;'%%%&&()CgznK+%$$/������������������s5C}~IKHFFGGHDDBDDBCBA@???*5�������������˻���}������������i9864.)';64.*@j�4���gX�������������x
+E527EL971855?H>895ZTJJILL(V����l
+0THCDEAAA@A@CBCBB?@@=;852.,&1z�}rdS>U~sfB9:78@QVROMIGDCA9762-'308@CDGKMMKFFGKV\afjjdcbaaaabba`aa`_``^__]\]ZVUTTOQONRSVVRKLIHHJRUWX[WUM"&*049CDHJGGGC:*7=><DDIMLQWY]ahw�pb`^__bll_`]bkdadnhXXmokmmooonT2
+4tNn��������j32-RW]xyuqlgaV5"*8� U��]	)Vhw���uF ^����{tnj]dvtssqrokid`ZWY���������¿������u+W���ó�ri]gQ���N.w�0Pp��E1�d[o�vMDA==85,'@OO@/!-F>6,/A1(60,*-.;P���������ap�����������������U��y{��������`�����������������������������{@a��˵����������JNWT%BSVUYPI9$)5Mh��������xFQZ_eghi_KYlmmlmmneO
+
+
+
+( IL?1Yzwtrh^WWVTPE/]mHEEBBBB?:>@=7<>>=:9760)'((''#%(**97465545555541+*--(#"#$&&'&$
+	",�����������������������������������¿�������������������������������������������������������������������������������������������������������������������������������������������������������������������������ƿ��Ǿ�������¿��������������������������������������������������������������������������������������������������������������������tgH%H�������������¾�������������j(7=%&&%$%(1z���};%# ,������������������r9Cw|CJGGGFGFFDCEEDB@@???@./������������������đ�����������j!;:74.*$$#"#*Ag�5k�s!!%V�������������y
+C516EH870436?J=896VVIJJKJ)W����e
+6GCADCA>??@@CCDDC@B@>><84/.*#4x�~s\F<~vqS;:869GNKLKIEBBA;653/(/13>CEHJKLKGEHRX\bfjnhbaccc``ccc__``aa`a__^^^][\ZXUSSPNPSPONNNMKRWYYXUTQ+!(,-*'$$/AGIHF?)4;?>AEFIRSUY]aj~�e____]gog\bbbcyg^wlachmkklpoon<
+	\tn '���������~{ydHHs|wsole^N&$'(����f|
+5^m|���p10s����ytghrnssssrrpjeb^YWd�����������������`Gl���ƩX"/Or�8T,d�~i;= *k^_m�vEB@44285;EOMA6-@�����8&#-7.+,,5Bm���������F���db}~����������wZ��s|��������Vo������s������������do�������wDVs�������������DQSW."$"3POI0%,:Lm��������oDOY_eghgdI]klllkllcI
+
+K�Q'	#LQ@)&czyrpja[XUPMB-ccAFECBBA:0=@>64;<=:98:70-)),-)(-0.779;:646533275-**)($!$(%!$8��������������������¿�������ľ�����������þ���������������������������������������������������������������������������������������������������������������������ʿ������������������������������������������������������ɽ�¾����»��ƻ����������������������������������������������������������������������������������������������������������tjK&I���������������������������l*8<&'%$$$(5E;G#$%*|���|{������������s7ClfKKIGGHHCBNEACDCAA@=?B/.�������������������Ä����������l 8784.*&F��~+(Bi�5��^8bY+ W�������������x@347GK<70/44@K?898XWGIJJJ*Q����hICBAD@@AA@ABCEDDDAA@>?=:73/-*$2z�}nS2`{taC;885@JAEJFCAAA=4452,*/0<BDGJMLNLGHUY]cgmrnfcdefdeeddbccccab`___`_\^^^\][ZXUSSRVQRRONSWYZZZXU></#+/..(!"1AIHA<%1<@A=EIDMQU[_cp��]_^_]`ghf`g����n_X��ukjkmpnkj,(
+
+)c_F���������w����zvqlhbYC  (!)TL�Xy\	"Haq��}_$B����~wrkjnnrrtsqokgba]WWo�����������������@V����ͣZs�x\��ym�c�<s�2PW]p�aC=3,1*-0DLPMA;?S�NHW] 00++*.<W��������ηE�řK<m��z����y���g@��u{��������v~�������������������dqz������[l�x��wwuwwurtkgVAKK@V`YUOPNI0&0>So��������kFPY`fkjgdKZlmnmmkkbI
+	<<
+	&cUA)&i�}uphb[YWPLB.jZ:BC>;;:806A@:345;;:99720/+'*1.153<:<DB>;65/220-,*)))'#  "$!	
+'>��þ�������ý�������¾�������þ�����������ľ�������������������������������������������������������������������������������������������������������������������������������������Ǿ��������������������������������������Ƚ������������������������������������������������������������������������������������������������������������������~�rjP&D����������������������������m,4:&(&$#"&?��k9�T$&%'w�����������������x:=giMKHHHGGT��^:BCBB??=></.��������̽��ƚ�����ʇ����������h:9220*'Cqm\(Bg�6o��:km.!V�������������uC425DJ?62073?KA998RREIIJI*O����l+D??AA?@CACCBDDBADBA???><:622-)#7w�{l6FumM<8768FA9?BA@@A=63541+/.7AEFIJLOMHIW]^ciouvmgffeeffedcddecbbba````^__^^^^_^][XWXUTRNMPW[[YZXUN/;Q:"%-100+"0>-#*;CC0)@CEBCMNNPUZ_fs�sda___`jmd_bk\\a`a[]loiiikmolkS!
+
+Q9aAd��������l=2-?Eh�xupjfbV3#.0��K+)Qgu���xJ!Y����{uof`qrssqqplif`^\U_}��������¾������y9h����ǌ%Jbw��>0Kcvq�}p9UY_t�[C8.0B>:-<IMI?9@Y�B 3-++-3Gt��������אQ�΃4P?����lSMFJr�XIfsw{��������v�������������������gg{������F����zih`_`]\_VUABE6'HUTUPJD*%.?Tq������y�iKRX^fkjgaQ[knomlkid@
+
+
+
+		<'
+$_N3#'j~|wqic\YWRMA+qYBA@>??>=53:>><97548<:63///-(+.366>;8CGFD@9422/'(+)))'#!""  !!		&8��������������������¾��þ�������¼�������½��������������������������������������������������������������������������������������������������Ŀ��������������������������������������ƽ��ʼ��vu��������¾���������������¸��������������������������������������������������������������������������������������������������������������������}�thM&C����������������������������p,07'&%#""#*p�Z2U/#%$&s�������~���������v=<fiRKIGGHI\��p:CBBBA?<??.*��������ͻ��×�ѵ��Ā����������m8;62-**&#(>f�:��-p�0O�������������~B355;B<40242;C>:98VUFHKJH(S����gFAAAA@@@A@?@BBBBBCBAA@@@><9850+)%$>{{tR2wzrcB;986?C817=@A@>93463/+/3BBEIILOONNW]_dkqx{tjeffeggfgfffdccecdc`a_]a`^_`^`a_^_]\\YWUUSUY[[\ZVTI(D1%(.542,(#1EF@* -<A*@EEEEFLNPVZ`gx�nb`b`]\^```jg`abacb`cdfhhjlnnmH"	
+Z:7.+~��������qw�t\1Htutojc_O&%,8���ԻWJ6`my��j3.p����{phifpvtpnpnkjga]YTf�����������������iN���Ǿ�F2;�{xH*Pr%�0 <871+)+!ATZbnsMB9-(++..;ILB:0.[����:&6/,,0;a����������pa�ժes|}{}��v<AT~}Ri�uz��ymmlcbf������xip��������������������p)|����|zvqljlja[=GA>`ABZRSQJF#&0DXx�������zaNRY_digfbL]joonomib?
+
+
+	
+)[[))m}{wric]XURLA'vR???:=BE@98:;?=><935662.+*/10*-264A96=CA>@=620-&&(&(*($#"###" 		%1���������������������������Ŀ���������������������������������ƿ�����������ſ��������������������������������������������������������ÿ�������Ŀ�����������ƽ�����������ƿ������������¹�ʽ�f^^^^\_ZZio�������ż������������������������û��������������������������������������������������������������������������������������������������~�~��ngN'D~������������¿������������m/)3$#$$"##%!###%%&&#!s�������r���������x?9dkPKJGFGHG��TABBAA@>>>>/(������������ǣ�Й�˧������½���n5992-+&#%:a�7s�� @X2L�������������~A:47>C@665738DC97:PSHIJJJ.S����l'OB@@>=AA@ABBABECCCDC@@@A?;;:94,+*&J{tdDT~ukI?;969D9+,2:?A@<51334,,,:ADGJLNOQSW]aels{~|leeeghhhfgfhghhgdccab_^a```_____^_]]]]]]][[[[\[ZVUN&% &.37640+&,@GD/,?9FFFDGACLPUZ_i��ga``]\\]^^\[Zk�{c``aceggimmmkb3,
+@m>G���������|���vUvxvsnjaW=(($d+$H}\�cFbr|��~\#B���}uk_hmsuqmnnmkie`[ZWr��������Ŀ�������Zf����ȳg�����v]�l�y�M0�!+18<@B:%KX\bkfIBG6/))0<IKJ=2")E>BYu2'/,..5Gz����������O���������xc\W?8K�{X��t|������������������������������w��������.A������������~~HK9B_?A[VRMIF$)2@Wz������[LT]ciihgaU]kppnmkh^@
+
+
+
+
+	
+6aB"+i~yuqkc]WTRJ?$uSEDDAAA?=99979;<884312/)%(/02.-355@526;96341,..%#&'*+*'#!$&%#"	",�����»��������������������ý�������ƿ�����������Ŀ�����������Ŀ������������������������������������������������������������������������������������������������������������������������ΤtWWZYWXYY\\\QC8Zy����������������ü����������������������������������������������������������������������������������������������������������������������shR#Az������������ÿ�������������l3(5$#$#"##$%&&$$$&$##l�����������������yA8bpPKKIHIIC=79BDDBA><=@>0&���������ƹ�����}�˭�����������l6883,)$ $9`�;h��+t`&J����������¿�~8948BMB731;3<FG89:NUKIGIH,M����l3NB???@ABBBABAACCCCDCA@?BB=?<<83//+'!RtgUCzzrYB<976>>+)+/7===6/021--+5ACFKMPRTUW]dhnu~��qgghghiighgfffefgdb``````a``a__b_a_`_^_a`]^^][[TTYX)#(/36740,*$$%##0=BDE_XDAADFKOSW\j�~ca``__]^^__�}vo`abccdgkklmmm_"+
++nt*g��������sIm~�}rywtpmg`T.!')��{E$!'Qgv��|J!X����yokcbqtrqoopmkgc^YZZ���������ÿ������{Ou����ƣ\^R_f��i���l|(5�,3,$#+923QW]dh_F@A-,$'.<INN=,0~�4*+1,.2>_���������Ծ:���������������o�xX��v�������������¿������������������������e--F�������������JK)/F*6IMQMIB$)6E[|��������TJT]dhmkfbOZnmmmlli\:
+
+Yilp[
+
+/HG /l�yupjbXWWSL?##lF>>>=?>:74351/..0-&%*00,+,044/+144:5459:;84-(+0*&*-./-'$""$#" 	!&����������������������¿������������¾����¾������������������������������������������������������������������������������������������������������������������������������������������°�TVZWXXXVXVWQGCLY_ce_`adgl{{�������ú���������������������������������������������������������������������������������������������������������������~����ukV'@r���������������������������p5(:&$#"$""$$$%%$$$%$j�����������������B5fPHIHIIIB?ENECA>??==>>2 ���������Ÿ��������������������o5:94/)$""6c�@.���7^S;G����������¿�8525BV>63-84<OH8:8LUIIGHI-O����l
+1G>><>>@@CB@?@@C?ACBBA@ABC@=9:98430*# 'PbYRk�veH?;76;?1'')-6<>70.032**->CEJKPQQUZ^elpv���{njjigigggfedecdeb`d``aa``^_a`]baa_^`a^``]UX\Z[I<BL*0-,056744/-&)JIJHGWoNFEJKMPTW_q�r_^``]^[]^_[Zl]aca`beddfjknmmgE" 
+
+.(���������sS?+,;twrvpmi_M##((�����d7\jx��}p5*o���xh\ejpqprponlida]YY`�����������������hS����Ûs3CR�g?33��-`0:>AB@?);TX`gl[D@>00!%'9JLF:(A��rYb61-19O����������ӣ?���ǿ������������uZ��w���������k��������������������fz�������mJ>=Pe_\aacee]^\S>VA-),$%8MMJ7$+7H_���������PIS\cgiigcQ]mmlllkf\0	
+X^[9;
+	'TD-r}zvpha[WVQL? 
+)z?5//56531/-/0./,*$  #*-/0/3540+/85;355646852,*,,,2554/(&$""  !#����������������¿�������¿���������������ÿ����������������������������������������������������������������������������������������������������������������������������������������Ĳ�eSTXXWXWVUTRKACQ\]a_a`_acb_bb`a`aikw�������ȸ���������������������������������������������������������������������������������������������������������������wkZ*?q������������ÿ�������������q5&8%%###""$$$$$###"# f��������|��������}B6k�OIHGHHJ[���GBABAA=>?=0(�������������������������������q78641*&#!3_�D,>L*oqV"K����������Ŀ�}7444?O664,64>N>8;9MUIHHIJ,O����t	+G;<;<>=?@B@?@AFZlW=BAABBBA@?=<;62/,)#)O^VXtXA<968B:(((*0<>;3.022-)*9BEIKNOQTZ`glr{����}zytrrnjghihfeeb`aaaabcab`^`abbbc`abb`^_`G/3>CG>2' $NW402674860+"!>KKJINqnHGILOQUZbx�k`___]\[]^_yrXvfaaacdcghjlmmjd66C�������������z\tk@mokg\?$!!-P�EE\ny��y^%?����}vpfagrqsqopmkhe`\ZZr�����������������We����̼oR`<i������Rgi
+"6417638$!CXZ`koVD9,*)#05EJMC<%RsnBg7,.3?k����������څT���ƽ������������nl��v���������V��������������������ds}�����{GWrbTVE@:;81/23/2CR[`]YUWROLG2%,7Id���������QGS[bfkieaX[ikklokf].
+		 @!T]<
+
+	7v~}woia\YVQI< (n51,*.2110,*--./,.($$'--.//3442).58?765344550+(++.4985/*)()('$# 
+
+����������������¿�������ý��¿������������������������ſ��������¾�����������½��������������������������������������������������������������������������������������������������Ʒ�fWWXWUUVVUUTKBBMT\^__^^__^^_]]^]^__`aaZ^ffdjt~���������������������������������������������������������������������������������������������������������������vj])=k���������������������������q9$5%#"#!!"$##"$#$$#" a������}��r������~D/^pQLIHHKLl���FBBA@?>>@?2'{������������������������¿����t=<742*'$-[�EI�������������~6535?P:74-54?Q?9;;KRIKHHJ+O����n=<:<>=>@?ACCAEe���U?AAAAACB>=<?:620+( %JWR_sziH?:55;B/(&&*5=<7/-022*(1@EGKORTY\ahnv~��������������|sljhf`_babcacbbabbabacbcacc`_`][UNA4)#%K^710565?P7,%&FMLLMb�`BLORTTZf��da^[[][[^b]eqz�~bbbccbegikmnl^!*c���������`2);x�{ymnlgcU/$(�X.%Pdq{�}qHV����{upfaipqsqmmmgeb][YX���������¾������|Jo����Ư[%%!b������}?);=@C@>1(IW\dlmRC8*,(%'/DNKA=S�}�nZ#:006M������������\m��ǿ�������������Mk�yt~�������~c�������������������}`r~�����sQeyydKDADD?=>882ESWYZXSTQOIC/&-;Kh���������MHOZbgjgf_T[illjkjfZ)
+
+eJQ1
+
+
+			;u�~wphb\WTQK<
+
+)`).-(.2000.,,0035662/1/*+.+/44/(.2:=832/12341,++*+1773-,++-.-'" 	��������������������������������������Ŀ��ÿ�������¾�������������������������������������������������ȿ�������������������������������������������������������������������������ϭ{`WXVXWWUSTTNFBHSYX][[\\\]\]\]][^^]^^\]^\]_a]__]]`b`clpx�����������������������������������������������������������������������������������������������������vjZ(;g���������������������������s9#0&#$#"#"#"#$$$#%%#a�����������������~G3`rSKJHHKK_��|9BABA@?>??4#z������������������������¿����u7<851*'$ *\�E'i��(A��������������;435@I988*35?PB:<:MYGKIGF*H����r,?;>;<@BBBB@Bh�����I=BACCDB??;<;863/,("%EONS^tjF<879C:)('(0;970+/22-(+<DHLORTZ^dipz���������������������woieedbbaaacbbbbcbbbcbbbb`a`]\U+ ,K_[.,.456AiR/& >SOKMT~�RIQRSW\j�yb`_]\^\[_^\_^XPdcbcdbbeilnvrkQ
+&���������ur�zKWyzwrplh]L!!'",��ŝ��3Yiu}�xe1%k����ytk_dnrrrpmlkhe`\YWd�����������������gT|����ê����xrh���~`v5?<959:-/PU^fjbODC>?>@@=INH?:%(\����1-:14Ej������������I��Ԑtw������������J_�st{�������|w����������������z����{������ej����~ifb^][\[UQ>JTX\[XTSSPKE,(/<Mm������~��JHMYafihg]N_lmnjlje]&
+
+7k9
+		Ax�~xqh`]XUPH:
+4b*-0/221..-.49;98868551-+-+,/3.&-08<548:73561*,.,+.130,++.0/,&"	��������������¿��¾���ÿ��¿����������������ÿ�������������������������������������������������������Ⱦ���������������������������������������������������ɾ�������ȿ���������ų�bYVXTVWUUTQOG?CMVZZY\\[Z[\[[[]\[[\[[[\Y^[[\[\^]]\^`_[\]_]]_cbgjwx�����������������������������������������������������������������������������������������������uj[(8h���������������������������t8"4'$#""""""##"##%$$"Y�����������������F1]rUKIHIKF^���BCBA>=>@>@6"x������������Ǽ����������������o7>740)&"+W�P1��y@��������������
+@803=C8:>-64:EA;98IXFIHGF*N����o5;<;=>?@@CAd������~G@@EEDC?>==<88630-*&DHHJHnbD<:9>B+'%(,6974--112*(4CHKPSTY_dgpz�������������������������~wqhgc`bccbabaabbcbba`_[YU< -G]]N1+.436Bej;'!9POMNOi��ROQQX^o�l`]ZZ[]][]\``_^^`a_bcabfiklpog7"
+=���������i���dyyurolfX=$#*s�O5.[��`?]ky�x]"9~���wrggdgpppnllifb]YXSq��������ÿ�������X`�������������14(%1hw46;BBC>*9WZ`hk^IEKHGFJHJLMH?7*Dj1'#"341;[�����������ױ=���Xf�y����������Uwnovz~�}��|op�����������������������������vx����ylkfghde`[U>LWUXXVURSOKB()0ASn������z��CGO\_cghf^R[imlkmifX$
+&W44
+
+	Dy�wph`]VUQK8N\08632368;63<?=91+*.0730120001-&../548>AC;483,,--*+.-,-,*.3/)%"
+	�ļ�������¾��ÿ��Ŀ���þ�������ƿ�������¿��Ľ��ſ��������������������þ����������������������������������������������������������������������������������Ž��������������ſż�kUWWWXVVVTSOIAAISW[]Z\][][[\[]Z[[Z[\ZYYYZ[Z\\]Z\^[Y\\[\ZZ\[\^__^]a`][\cekt������������������������������|��������������������������������������������������������vl^)5f���������������������������t>#8($"""##!!!"#!"#""S����o{�����������I-^sVKIHHIE^���>CA@A?@?>>9v������������ǻ��ſ������¿����s6;74/)& !,Z�U.���A����������Ŀ��
+A424>E:67-55<J@89:LYIHHGG)O����m&E==@A@>@Cc��������u<@BCCC?=??<<<:54/-)*HGFA=d]A<9<I8('')1754.*/151*1AFJQRV[`dekw������������������������������yohfddaaa_aba``a`_`Q?*';R\_S.+-/468Elh\-%9RPNQT_}�yQTTZav�e_]\[Y\ZY]_`^^aaabaaaddeilllle&(Z���������Pu��ynzwtpmjbS0 &^���d,E*Qaoz�}tES����~vngghmppomllhea]XWZ���������¾������}Nr����Ⱥ�������t~rfFsO	.)&3.-7" BUX_ioYEHNJHGLKKMNDB-?������6635Gr�����������ܕE�к@=Kw�{rmfVST|�yc��q|��vqsv{q�����������������������������������ƣ�������{o@QX[[YVTSQOJ?&*5BVs������}��FGQ[_dhhe^TZgkkkkhcQ
+
+"1":Hw��xph_ZWTPJ8
+Y_36999:@@A;9@>:/*%'--011334202/+2./109AFD?631--00,+.-*++-02.($ 	
+�����������������������¾�����������¾���¾��������������������������������������������������������������������������������������������������������������������������������ʼ�tYYXZXVWUVWTLA?EOVZ\\]Y]]]][\\[^\[YZ\[[[[YYXYZY[Z[\[Z[YZ[ZZZZZ[\ZY[ZZZY]YX\]^ZZ_`flpmu{���������������������������������������������������������������������������wm^+4a���������������������������o> 6)%#""#"!!"##"""#"L������������������L)YmUIHGGHJa��y;CBBA?==>>9q������������Ŵ��Ļ������¿����z79650)% *W�W(���$>����������Ŀ��?547CO;64'64<QC599GXIHHGH)L����pJC;=>A@Be����������e>AECCB?@BC>;<9920,& (EDE:5WMD=<EC+&((+3530)+155--8BJPRVZacadm����������������������������������xkgeb`ab```a`[Q<>DWa_^a\/*+0546Xkih<(#"CVRPOQVj��uRW[e}}a]\\[Y[ZZ\_^__abab``bdefillkkR"'|���������g2;QSkyvspmh`D!)&}.�����-4^hu}�zk/)f����ztj`dmqpponkied_ZWXg��������ÿ�������lO}����ư������^%G*C^�(&*(KVZ`hmRCEGHHJLKKNOAD$"GGN`|�9 41=\�������������tS�ԹKmho����m9C[��hq��u������������������������������y�������������ư����������DT\][YUSSPLI:$+5B[s������~�|BFMV^ddgd]R[hlkjihbP
+*]`J
+
+
+	Ly��xoh_ZWRMF6[_B8/7=ABCA<;<92+%!&.10-04000.//.4149;=BDD6-./1/00++/0,+,.01.("	�������������������������������������������¾�����������������������������������������������������������������������������������������������������������������������������Ƴ}^XYXXWTVUSTOE<CLRW[Z[]^\\]\[[ZZZ[[[YZ[\[[[[[ZZZYYZY[[\ZZ[Y[ZYZYYZXZ[WZZY[XY[YZZ\YVY[^ZZ[a_aiqvxz}����������������������������~��|t��{q���|~�����������������������~�wm`*7[���������������������������qA4'#"""##"#####"#$"F���������v��������J)[gVJIFEFGBNCI@BBA>?==>@<m������������ĵ��ǹ������������y8:971+'!+T�^>�pg=����������ÿ��?:57BQ=76'63:KD997HSGHIHH.I����s0@=>>BF]������������Y;DCBB??AA?A@;;64/-&-BEM/6VNA<AI5)(&(1812-*/261.2>FLSY\`abhu����������������������}������������|tk_cb`_`aU8=[```_`bcB()5>:4\njmZ,'"!7SYVUVV_v��qW_k�s`][ZZYZZ[]]]aaa`a`__`bfhjjjhi= !>���������t�uWSn|xvsqmj`6$&!�2h�xX-sQBanx~}vY$=z���~yrhcklqppmnlheb[YXUq��������¿�������Za�����ħ������m���Vs"2SV[bjkNBDFGGJKKMNH@E&55J{�������������Ul��ͪ���tmowh=@W��j��xv��������u��������������������hs������������ĭ���������wGW]YWYUTPPLH6#,6EZu������u�y@INWaffje]PYgkjiiibP
+.):
+
+		
+
+!Vx�}umf]YTSNE5][J@02?DCB@=;<9/)%%,00'(11*&+/.,1.6?B>:=@4(/57310,'&++*/120+'
+���¿���¿��ý�����������������������������¾�����������¿��Ŀ���¿��Ŀ������ľ��������¾���������������������������������������������������������������������������ƿ��ƻ�bQUXXVUTTTQMI==IRUVY[ZZ[\]Z[XZZXXYZXYZYY[Z[[Z\\Y[[Z[YZZZ[[[Y[ZWXXXYYYYYYZYYXXWWXVXZZZXYYXWXWWXYXXZ\dbfkpsry��x}������������~���}�}~|}�}yw�}z|����������������������~wla-4[���������������������������sB1'#!!""#"$##$###$#H������������������Q")\gYNJHFHGI}��M@BCA@>=?@;j������������ʴ��ļ������������u9;;61*'",S�`.���&;����������¾��B746>L;45)74;IA588KXGGHGI,F����o2C@ADY��������������Q@DA@>A@@@>?=<953/+$,@FE-6SKA?JB+)&(,511/*-3872.;BGNV^a^W\x������������������������}z~�������������zpgaca_]]^b_aabd`T6+1IMP_hihnF/'"#5RXYXY\g~��v_p~h^][ZY[YZ]]]_a`aba^_`bcfgijigc)$b��������~<D]qo}}xurpngN%#���mW�KY3'Tdq{~}sER����}vljmlqronkmlgc`[YX\|����������������~On�������������.Qf|��d
+3$).2686!:SUYeleGBBEGFFILMNF??*8>a��������������B���ɼ�������{sml��g��tx��������P��������������������piv������d����˾����������lHW[ZZWTSQNKF0#,7E_z��������r?KQXaghoh\JWfjjihibJ
+
+*\[G	
+Wy�{tmh]ZUSND1nYKE;6>CCB@;::60(! $*-.%#(0-)-.+(**:CC>744/-1342.-/*%#%),..*'$
+������������¾�����������¿�������½����������������¿��������������������������������������������������������������������������������������������������������������ɿſ�lRYXUTUTTTSQI=8CQTWXXYZYZ[ZZZYYYYZYXYWZZYXZZY[[Z[YZ[ZYZ[XXYYZXYYXZXY[YYXXZYYXXYWWXWYZZWXXWVVXYYZZYYYWZZXXWYXZ`deltx~�������������}}~��z{��|}���|������������~���������zk`,1X���������������������������r@0'$#"#""""""##""%#D������������������P"%ZhYNLIHHGV���Q?DB@>=??@9d������������ǲ��ź������������v6<<71,&" 'P�e8���/9����������þ��9645;<744*759;8698LWGGGFJ,K����n:B@U����������������MBABAABA@>>>=:761.*!!,?F<%7XG@JO.*'()04/1*)059508AEIR\`^SWw����������������������������|yz������������|smjecdba```aa^`O-.I_`ccfefWB3(#!8X[[ZZ`q���~pza]\ZXX[Z[^]]]]^^_^^``adggikklZ$)����������yjI+-i{wtqon`<&%dZ+�ɨ2Yiv|~|l/&e����{ujflnpppnkkjeb^]VVf�����������������lNy����ķ�������[M2t7	4@D?><3 "DSW\hi\FDFGGGFGKLOEE<09K�������������ܩA���ǽ������������|]��nz��������Z{����������������v���rx������g����̼����������_FWYYYWSRQNID($-8Ib|��������oDNW]dhjmi[PUfkigghcE	:LF?#
+
+
+	 \z�|ulgaZWSPD-xUFA???CCC@;:;72)##(+/,#"%,010,()+*:=DA;52.,/1/.--.,)(('()*($#	�����������������������������������������½����������������ÿ��Ŀ�������¿��Ŀ�����������������������������ǿ�������������������������ſ��Ǽ����������������û��μ��˽�v]TUXWUTSSRPLD<AISWXXWWYWWWZZXZYZXWXWWZZ[ZYYXWWZYYYYX[YWYZYYXWYXVWXXXYXZYWVWYZWWXYXWXXXXWXXWWYXYYYYYXWWVXVXWWXVVUWVXYX]bchpuwux��vy}}vw{|ytw{{tu{�|u�����������������}���~�wmb.3T���������������������������uD/)##$%##"!"!""#""$C������������������R##\fYMKIGGJU���A@ED@><==>9c������������Ŵ��ö������������v4;:41+%!$N�f,���#8����������ÿ��780567784*92358878!K[GGEFG,G����p*ET�����������������}D?AAB@BAAA>?>9832.(!-@K9%>VDDV=*(((,5.-,'+27724?FIQYbcae|��������������������������������}�����������~vjdgegcdec^WA-$%4gd`_adca]OPFEGY_]\_aex���xr`\[XWYYZY\\\\[]^^__`aaeffgjif@
+<���������|���u_xwusrojY.##KeD_myvV>x���yskagmonnmkkgaa^YVUp��������¿�������Z[�����í������m��d�b&771-'%#*LSX^hjUECIGJIJHILK=I*5Cf�������������߆J���ĺ������������pC��lw�������o�����������������������������n����˷����������ULXZYWWTRPMED)%-9Mb��������iFOY^cikmjVMWhljgfgcD
+
+FfW7(
+
+		!Y{~|vnic\VUNC+
+ sL??EB=ACC?64974.)).01.'#%,11,'%(.*8?>?<641./-++*,,+)*,*)**+'"!
+������ý�����������������������������Ľ��������������������������������¿���ÿ������������������������������������������������������������ľ��������������������ž�ǫ_ZUTVTUTSRSLC<<EPUVXWXXWYVXVX[Y[YXWWVVXXXXXXXYXYXYYXYYZYXXYYYZYXUTWWXYYWXXWWYZXVVWYXVXXYXXWXVVXWXWXXYXXXXXWVVXWVVVWVWWTVWYY[ZWV_bbbhmootvsrotsyt|xxvt���������������������~�~xme.2L���������������������������wJ/)##$%$###"!""#!!"C���������v|�������U$#Xe[MIIIIJV���=@DCA==>>?;c������������ȷ��Ż������������~89852+&! )P�g���5����������¾��
+9:1276576)72434758 JZGGHGH,B����pC�������������������qB@BA@@AAA>>?;:761-*0@N7'BPFTP0+)()34*,)(.4842=EJRZ_War�������������������������������������������������{re[Yadfec]SI7-Qfhieed\]VOMS`ccabbcfp���ri^YWVVXYYY[\[^^^^^__abbdfghiib,
+Y���������Rcvoa|vuqqngQ+#!%Rdp{sDO����{vtqompponnkhfb_ZUU\���������ÿ�������Qk�����ä����������lej0<?@@;7'1MSYbjjPDEDIIILIIMJ?O%%>S���������������b\�����������������i\ruqw}������~�����������������������������v����ɵ����������NTZZZYVTRPKEA%%/=Nf���������dHQ[_elmlfXR\iiiiiiaB
+
+8HEO*	
+
+%4ART@#^~�|tojd^XVNE+		#l=7BHF@BDCC=87121-,1123.)%)--+(&),*99:=<:6344-(()()++-.-**,+(#
+�����������������������¾��¿���������������������¿���������¿�����������������������������������������������������������������������������������������������������^WUVTUTSTRQMF=:BKQVXXXWWYYYVXWWYWXWVWWVVWWVWWWWXXXUXVWWXYYYXWWWXXYWWXXVWXWUVUWWXXWUUVWWXWXWWXYXWVVVTUWXXXXXYXWWWVXYWXWVTTTUVWTUVWVVVXUXXZ_`achqlps~uuwz�����������������~���}�ynf33L���������������������������tH.)$&$%&&%%###$$$$"8���������w��������V&#Yp^LIIHIHP���L?EA?>=>>?<^������������ɼ��ǿ������������x38852,& 'K�j4�L�85����������ÿ��	782344472&60234697"IYEGHHH,C����q W��������������������j@AB??A?==>@=<;82..&0BI-*JOP[</*()06+,,'*0896;DJRZbZb����������������������������������������������������~p]TSV`bupjf]YXefjjie^MHMUX[^dccdkw�tn]YWWVWXXX[YZZ]^^_`_^_abcfhhhiX!&|��������~X@51:ezvspnmcF+!#1Zht}~~i1,c����xtsqroopnkmkhea[WUVe�����������������nP{����Ǿ�����������YvM
+00&23.7*;SUXdjfL@EFFGGGHJMICR-On���������������Us�շ��������������f��oszzullojcl�����������������������������������ȱ����������HSWYYXUSPNKE= &0=Ok���������_JR[_fjknf[U\kgfihf];
+-D>2	
+"B]��}|dQ%a~{soj^ZXUND*	
+
++yJB?GGB=@AC@>=3//11/04763/00/--)+)-B7479853550,0.,-//1...,,,+($
+	�������������������������������������������Ŀ���������������������ÿ�����������������������������������������������������������������������������½�����������ǿ��uVRXSUTUTSSNI>:?KRVWWYXVXXYYZYXYWVWWVVVWVVXXXVVWWXXXWXWWVXWXWWWVXVUVXVXWWWVUVVTVWUVUWUUWXVWVWXWWXWWVVVUXXXWYYZZWWVVWXUXWWTUVVVVTUUUUVWWUUUWXXVWWX[[^aagkm����������������~}���}�znb20N������������¿�������������sJ/8?</'(+%'*).163)" 4������������������Y(Tr_PLIHIIS���FACC@=>>>?=W������������ȭ���«�����������z3;:41,&"(H�o3�-v73��������������573343343(9125779:HXCEHGF,J����r2t��������������������a@A?@?>=??@@=<:63/,'5IB'+TSXI/+)),50*.'&-489;DKO\fjt��������������������������������������������������������znbTIMKVfnrkjmlnkheVFHOSYZbecdgkn_lVUTUXWWVXYXY\]_^_^]^_`adehhfeE8�������������rpzxvrpnmZ@%$#C`ny~~zY 6u����xutsqpppnmlhgd_[VVYq�����������������X`�����Ŷ������n-;<G[~)	,32.'(,!!DVV]ekcIBFECEGFILOHGO:^����������������K���u>XX{����������d��tv������������������w�������u��sgz������i�����Ư���������|HVXXXXVPMLHA6 (1>Rl������{�TMS^bgjkngZU^lfgiigd86N]h
+
+	2\�������`'	)a}�{rlic[YTMB*
+
+,rLF@EFD>:?A?>?92--/0377767664..,*'0F?>9:9889:5110/2574/0110./,'		��������¾�����������ÿ�������Ŀ���¿��ÿ��þ�������ÿ�������ÿ��ÿ�������¾�����������������������������������������������������������������������������ſ���ġxSNWXVUWSSRPOB7;DLSVWVTWXWVVXXVWX[VVWWUWVWUWXWVVVXWVWWWVXWWWUWXVVWYWVYXUTVVSTUVVUTVSUUUTTVWWVWXWUUUVVUUVVVVVXXXWXWWWWVXUWVWTVVVVVWVVVUUWVWWWYVWUWVWWWXZZZ[V����������������~}�����yof3/L����������®���������������xO=~SR]<WmPcacqUfo8"3�����s{|��������V)Sj[NLIGHKL���<@CA??==??;S������������Ʃ���������ÿ�����z1<960+("(F�r* 8������������Ï782363552%61345698!DWCFGFD+@����n1y��������������������R;?A?=>??A>=>;8710+&7M?">g\W8/+))/6+-+%*08<;BLR_hr�����������������������������������������������������������xoaTNHEGKT^afnpsrh]QKIMWbdefhf_bTQSUVWWWWVYY[][]]]a_``bedggf`2
+
+[�������������}|{xtomlfS>!#Tgv}sC T����}vussqqqnlkjheb\WTS\���������ÿ�������Ep�����ǭ��������nO,9r'8:=??4 (LUX^fjYDCDEEFHHJKNBPE -/"!&N|��������������ۧI���Y2McpbaefROQz�i��qz����������ÿ��}��qh�������o��fiw������d�����ë���������lEXXYVUVQPOII3'2AVn���������MLT]chklleYO]kgbikh`:
+
+.-+1	
+
+
+	0�����w^M*	)d��~pig`YXUNC%
+*lPHACFED<::???;3/345787666773/-+'$0MIFA@BAA?>8545557642310/0/+'		���������������������������������������½�������������������������������������������������������������������������������������������������������������������Ͳ�`TWVWWVUTRQLC:5@OTUWWWWVWWVWWWVUVVXVVUUUWVUVVTWVVVWWVWVUUVVWUUWXWWWWWWXXTTTWWTUUVVTVSVUTUVTTTUTVUUVUTUVUVVUVWVUWVUVWUVUVUVVXWVWVUUWVYWUWXWWVVXWWVWYXWVWYXXXZ�����������������������|qd6/I��������ͽ�����������������{NK�:M�Gq�e�iu�UGxP#".�������zw��������Z*Ti[NLHGIKJ���<AA@>=<>?>< Q������������µ����������������y1987/)'#)I�t":����������ÿ��782243657'91244688"EWEHGEE-A����r#>����zx���������������I;?@<?@?@>=>?;;72-,!"#@R22ig`B4-)(+4/*.('/6<=BMWclw��������������������������������������������������������������|si`VQLIECDFL[gluqje`Y]^]beif[K48@JOSSXXX[ZZ[Z\]^^abbdeifeV $|�������������~|{uspng^N4A4^n}��~h/(g����{wsrttqppmjigd`ZUTXi���������¾������mAw�����Ƨ����������ndY.80(#)3*2RTZ`ijVCBCFEFGIKLLB`B$2>8()6g���������������ۅP���OHA\����]<CJy�to��s|�������rg���������y�������q��|mx�����b����������������^GXX[WSRQQMIF-")3CUm��������KLU]cfjkjbXR`oohiifb6
+
+(:D
+
+	
+	
+$67.)"!!	-c��}rjgaZWSMB!6pLJF?GFEB>67:=<955887888667750))(%5MLIHEEFC?=::9876464221/.00,$		������������������������¿��������������������ÿ��ÿ����������������������������������������������������������������ƿ����������������ȿ�������������������ơgTRTVTUTRSQNI=7=HRUVWWWXWWWXVXXWVVVXWTVUVTUUTVTSUUVTTVWWWUVVVUUVWWVVUWWVWWUUUVWTUVUUTVVTSQVUUVTTTUUVUTSTTSTSTSUTSUUTVUTTTUUWVWUUWVTSUUWWUWUUVVVWVWXWXWVVWXUTVS�����������������~�����}pa8.T���������������������������wPB�P��Y��^cauO?FC%!+������������������Y(Mc\NKHHJLM���=BB??===>>=R������������ø���²�����������z 3:950+'$ 'H�x:����������Ŀ��581255563';035666:#@ZDGEFC-E����v!I��zv����������������~G>??@A@?>>?B?>;81-) $MP2]pkP40,*+02)-)&,18>ALZgoz����������������������������������������������������������������yqh`YSMLIEFGDKQ\fikigeegjhT+!)9NRKSXY]]]\]_abbeidcD8�������������~{zvrmkeZL+!$j/'Bgv���T!:z����wtrrrqppnkihfc^ZTSZq�����������������Y]�����ȿ�������������6*<?:#&"9VT\ahgPCDDFDGGIJMK@e<!#(&%M�����������������n^��ˏ�xhfjv�l8=O��o|��t~�������e`���lqn~��bq�����������������w����̻����������WNVZ[XTRPNKIF( (3DYq����¸{��HKV^bejkk`VR^lnjhfg`-
+	4KB	
+
+
+	 ,+#(+*)!
+	4j��ypjea\WSMA KjKJIACEEC>82/3:823788998677762..+(;KMKKGEFB?9998774243122111/*#
+������������������������¿��������������������½���������������¿������������������������������������������������������������������������������¾�������·�xUSUUVUSTSQQK?9<DPVWUXXWWWWWXYVVXVVWVXXVXWWUVUUUUWWUUTTUVWWVVVTTVVWVVWWWVVWWTUUUUSSUSSVUSSSUUTTUUVTTTUUTUTUSSSSUUTSTUTTSTSTVVTTUSUWSTTSUVUVVWVUVVXVVWVWVWWUVUUTO����������������~�����|ne8-`���������ľ����������������zO/IIMLCHC4..(*+)$#"!,����w�~���������[,Je^LKHJILQ���ABBBA>>>>??O������������ĳ���¯�����������{"5:951+'# 'G�y:��������������4;2145563&6/368788#@[CDEFE+A����s!S�u��~���������������w@?C@A@@><?BA@=962.&=TC>bt];2.+*-3.*,'',4=@KZgt��������������������������������������������������������������������|xqjbZTOMLKKJOPONPNUiqrpSA1(U^0:GUY^aacddeff]6&W�������������~|xtpmhbYI%"-XU=:5/+!#Rhw���t?!O����~xtrrrrpomkkigb\XVQ]���������¿�������Pp�����Ƕ������������� -:AB5) #BXW\dlbJCDDEDGIGJLIDg"+i�����������������Zw��Ⱦ�����zjd\ZX��P~�|q|�������i{���s||��{kn�����������������������ʶ����������IQZ\YWURSOKFD! )5G\v���������IOY\afjjg^WV]ijmied_*	
+
+1F3		
+
+	+,!		8v��ypkfa]\TM;
+TcLIGC>BDD7(+4772059:;888776764344,=PLKHGFDC?:<;97974543543430*$�������������������������������������»������������������������������������������������������������������������������������������������������������������{YUUUTTTRRRQOD75@MSWXXUXXWVVUWVWWUVVXWUWVVWWUUVVVUTSUTUUUUUVVVVTTTTUVUTUUUTVUSTTTTSSTTRSTSSVVUUTTTTSSSTSTUTSVSSUTUTSRRSSUUTTUUUTRTSRTSTUSVURTTUTTVTTUTVVUUTTTTSRRM�����������������~����|pe;-g���������ǹ����������������zP+/$$$$##""#!"##$!!!)z����|�y����������`(Gb\MJGHHJM��}>BABA>?@??@J����������������Ž������������{9:861+&"(B�{9����������¿��182103342&90336776#>WFFEDD+A����q#d��|uz��������������n@B?>@A@=>A?@@>9630)$DI<6fnR91.+,23*-)#*1;?K[hy�����������������������������������������������������������������������ztmg_YXUQPTTSSP58P\fI pS!8]s`MB6 -;EW`fiffV!d#%w�������������}zwrnkf]VA  "=097>VOH'</[kv���e,)_����zwtrppqpomkihfc[UTRf�����������������hO}�����˯������������u5%2/358=)*LVX\gm^EAFEFDFGGKMCN`8����������������ܴO���ǽ������������~Od�qsz~������u�����}~tk�w�����������������������ǲ����������CRTVTTVTOMHD<)5G]w������v��FOW]cfgid\STU_bhigeX*
+;GD				
+
+	 %$"  	3_{wme^XPLD;-OaCEB?<BC@3!")00379:99777446655569-<OMJHIHEB?===<::97664554441,%	
+����������������������ý��������������������¿���������������ƿ�����������������������������ǿ����������������������������������Ŀ���������������Ŀ��ʸ�hOSTVUVSTRTPG<8=JRSUXYWVUVWVVVWUTTTUVXVUVTXWUUUVVUWWVWUUVUVTUUVUUUUUVUUUUVTSUTSUUSUUSSSSSSSSTUTSSTTTSRVTSSTTTTSSUUUUVRSTSSUWUTSUTUTSSRUSTSTSQSTTTTTTRSRTTSSSRSRQQRL������������������������{pf>.k���������������������������yT,-&$"!#!!"! " !"!"!%v�����������������_-Hc\JKIGGIGGCEBACAA@@??>@ E����������������ƹ������������{ 58973-(#(Fր6��������������293323342%61335677$@WEFFEB+@����v2��{vvz|��������������d<?=>??<=?@??=9662-%)@N86bpH61+,17/+)$'/7>G\l��������������������������������������������������������������������������}{wlea]YYYW[[R0,:EE*,nC?\L50%=Yck{29re^I*f.9������c6Mn{��~�}xvsnje\X694 !-8@P?;Afox��zT!9r����xussqononljgfd^XUUZv�����������������KV������ͤ������������M? 8;94//(0TWY^gl[GCHGFFGGHLM>\R#N����������������ޞM���Ƽ������������yd�ogu{wxqwule������qn�kf�������|��bh�������������Ʊ����������BQG:?16YNJHE8#+9J_y���������CNX_dghjc[RRTdvkhifU%
+
+
+
+	
+
+C_,	
+	
+	+-55 "	
+:KTFJA;>9==990Yc?D?=@BBA?9347899898854721675436;/>LKIJGEC@>>===;98887677531.,'�����������������¾���ü��þ���¼������ſ�����������¿�������ľ���������������������������������������������������ľ��������������������ž������������sWTVUSSSSRSOK?67DPUUVWWUVVUUWVVWVUUTUUVWTUSUUSTTTTWTTWUVUTTTUUUUUUUUUWVUTUUUSVUUUTSTUVUTRRSTTRSTSSSUTTRQTTRRQRSSSSTSQSVSSTRSSSUSRSUUTTUSTRSSRSSSSSTSRSSRPPRPQPPOPOOQS�����������������������{nk<0t���������������������������wS)-'##"#""!!! ""!&q�����������������b/Ee]NKHGGHGFCDA@BAA@?=>?BA����������������ȿ�����¿�����}19863,' (CԂ
+5��������������.92433344"62334677$@[FFDDC+@����um�usx{}���������������D??>??>==?@@?<:930,#)GP/EqkD70*-41*+'&+3:E]p�����������������������������������������������������������������������������|xqmgea^ab`O*+>:;eZ[e�jy7I&.C(,>E ciX7*2'&%&#!S�����}wdJ;A:KaisvtpmhaZU+!']<(3A'&/"!Rkv���n=N����|wsrrponnllkeda[VSU]�����������������y8a��������������������)"T%.69<4!=VYY^efQBBBDDDGGIKH@l?'^����������������ւY�����������������o{��o{��~}���z������u���k����������\lz�����sr�����ë���������i9>+1I44QJIE3#-:Kb{��������}FNY_ccjl^VNKNZllhhdT"
+
+8e=
+	
+
+	@ap`aRQIKHG>7.eeHHFEFDBB@?:::88986995573365874?F2BLMJIIDCA@?===:9888877651/**)($ !$���������������������ÿ���ý��������������������¾�����������¿����������¿�������������������������������������������������������������ÿ�������¼�}TRVUSSTRSRPMC:8BLRXXXWXYVVWVVWWWWVVUUVVVVTUQSTSUTTTUUTTURUTSTUTTTVVUTTUUTUUUUUUSTRTSSVUUTTUUTSSUVTSTTSSPQRROPRRSRRRSSSSRRSSRRRSSSSSTTTRSRSRRSSRRSRSTTSSSTSPQOQPOOQPNRRz�������������������~��}pk@0q�����������¹��������������yV)-%#$$""!!!  ! ! !o����}���|��������d/CdbOIHHFEECCDB@AB@@=;=??$?�����������������Ÿ����¿�����y 19752.' (@҅
+5��������������/:2344357 93223587!@[FECDC-?����q,}{wxz}}��������������U?>?>>?==>A@?=<;621,"!/NMDkzeE4/-04,)+&(.6D]p��������������������������������������������������������������������������������{vqnkihid;%83' C^^#hJlNJucn~~On}�|`cU%+-*;=0.*''#$x�����__��e|[C<532Gfke`[O# 2Q6GPNAD8 /\iw���c*/b����{vrrrpmnnlkjgc`ZURXj�����������������W0g�����ȴ������������#633/-*01#EXXZ`faMBCDDEDGHIJIDr31y�����������������[n��ʯ�������������m���r��������z���ſ�����z�������u���y}�����b^����̿���������`)%1OSY2>IGB,$.<Mf��������uDLX]bcii]THDHNdiggeS
+
+3%	
+
+
+BJWIWDE@=8714-+&neKIHIFDCBB@<<<86897:85576777988R]OUMNJIHEBA??>>=:98996620./0.1436679:898787����������������������������������������������������������������ÿ������ƿ���������������������������������������������������������������������ͻ��eVTVUSQQRQPNH;7>IRUUXXXYWWVWWUUSUUVVVUUUUTVUUTTUSUUTUSVQTVRSSRRSSTRSSSUTSUUTRTUUTTTSTSTUTTUUVVUUTRTSSSRSRRSQSPPSSSRPPRRSTRQSRPPPSSRTURRSRRQRTSSSRRSRSTSSSSQPPSPOPPPQPOQMx��������������������yqi>,i����������¯���������������{S'.##$""#  !  ! ! n�����|}����������b0CcfMJHGEDDBCA??=><:86641"9�����������������­�����������|48763.' '?ω/��������������+;243534441124796#B]DEDDB*=����rO�yuyyyy�������������J>>?>>><=<=?@@=;9530) 3XaXy|e?50-40)+'%+3?Ob����������������������������������������������������������������������������������|xvrommR$4-EQMJc+/g1[qa`mFJ�`$�JU�=lbG"/+1o�{pWK=2*&#$#!!!<������pKGe$j�xTh_D6`gb\^A !%-DGGHEK9?0 B`ku�yM<r����xtrsrqonmmjheb^XTQXs��������¾�������=9i�����ǫ������������X04:<AA:-LTX]ae\JBCEEFEGIJJGOw#9������������������E��ͥ[gg�����������k��yw��������]����÷�����������������������ui����ʻ����������P/5=K?C53@BC""+;Og���������oCNW[bchc[QEB=?QjgffQ	
+
+
+
+
+
+G^;
+		
+
+
+	 XqfNKGE@EEEHH=!WcI-! p^IHHHFFDBA??><;::::9778766687<GTNBTMHBEE>==;:;97578=>=<A@FLNPUWYZXW\ZVXYXVP����������������������������¾������������Ŀ��¾��������������������ſ��ý��������������������Ŀ����������������������������������ľ�������¾�Ľ�kXWUUURRPQPOI?47FPWXWVYXXWUUVVVTVXWVVVUTTTTTTVVVVUTUWWWUURUUTUUTTTTTSTTUVUTUVURSRSSSSRRSUTSTTSTURSSTUSSSQRRRSRSQRSSSTSRRQQSRSRPOPPTSRSSRRRRSRTUTTRRRRQRRQQQRPQQQNMPPOOOOPOv����������������}���~�zqgB0g���������̽����������������|W!(2&##!!"  "   !!g�����������������j-:[_IDAAA@AFF;31/.)'$##$'!;�����������������®�����������{'37884-% %9Ό	 /��������������-+;2343320#92255787#A\BCFED,?����sZ�ywywu{�����������Z@A@?;<<=?>;AB@>;:872.) =qsQz~\:4025)+*%*08EHa������������������������������������������������������������������������������������|vusk70.G9'M[a]Mw;Op(�> 22a~UmV8*3(@y=8QhotrmUE=/(*+U���������yDMIOerZ@ggb\b3"$'!07H=QK:"Ohrz{k5$L~���wtsrqqollljgea[VSP\�����������������t*Do�����Ť������������7,'-&$'.715TXY_cfXIACFEDFFIKJBZmJ�����������������A���}<~now|��i`k��vm��pw��������N�����������������{������������|����ɶ����������@CJ+###%";=!,;Qj���������kBOX\fc`\SF8459G`iieJ
+
+
+
+ XwO		
+
+	
+!b��wke_^YTNNOJXeR3("#|ZDIHGFECBB@A?<<<;:867668;>ABDKLPFA?CF<IF=8:99?BDHIOTT]_cbeecbcca_^_c^`_^^]Z���������������������������������������������������������������������������������������������������������������������������������������������šsXUURSTTRRPLLA85@PWUTVVVWYWWWWVVUUUUSUVUTWWWVUUVUUVVVTVWVUTSTTTUSTTSUUTUUUUSTSSSSQRSTRRRQRTSUSRSUTRRRRSQSSRRSRSRQQRSRSRSSRQQTTTSPRQQSSSSRQQQPORSRRQRRSPPQQPQQRQQOOMMOPOONNORu����������������~�~}|riD-h��������н�����������������zU )/(%"!!"!! !  !"`�����������������f.8?0)$#!#+[d-!'"7�����������������ì�����������{(4;997.&!(<΍
+-��������������(92342335"!61236786#<\CEFEB-9����v f�uusty���������eAB@?=;;>=??;=>==::<@51-*!E}s[}�\=2050',&%,2<=,T������������������������������������������������������������������������������������}zxsQ-!4>:;T7�T_w!)Krr=WeR%)-*Nn''1P1FEFXfw`++-w������������zjNFB?Whd^_]%#)AQ>-&0!3\o}���Z$0^����|wusqqsqnkjhcb]XTMSg��������¿�������Y'Vw�������������������)/99<::;)=XXZ_chVEEDGDGHEGKKEma%X����������������܋L���jC:KxumgSCDIax]_��jw�������d�����������������~��{��������������Ų���������+:O+5.)&$@= .?Sk���������dDOZZddZZN?,&"#8]hghG
+
+
+		
+-09			
+#R��{rnha]VMNMC!''($"#wzYMLFHGCCCA<==><>?@?EEHJMORRPSUYYYZ[VXYXZ\Y]]X[^_`[_[\\^_````_^_`^^_^^[[ZXS�����������������������������������������������������¾������������������������������������������������������������Ŀ����������������������ȳ�[QUVTRSRSQONB94=JSTVWWVTVVWWWWVVWXWUUTWWVVUUVUUUVVUUUUVWUTUTTSVVSSTUTTTTTSRSUUUTTTSTSTUTTRRSSSSRUVTRSQQRRRRQQSQQQRPQSQSSTSRRPQQSTRSRRQRTSQQSSRRQQPPRROQQPQQRQPRQOMMNPPPPNOOOKj����������������������}slG-o���������ſ����������������~W#(1(&#!###"!  !! \�����������������e6,2#'Z_.!'#4�����������������į�����������)7;974,'#'<̐
+*��������������':1234447  50244675#6WCDDDA.:����u/iuoqwy{y�������mFCB@====?>======>;<AB940/,H�kX��\>37:,*)$).7F2&6d�������������������������������������������������������������������������������������yi. 'Uir[)e��=gaG-+.k[$(?h:jS-$%WZ#(<������������������echd_bN$.U@>RTM3#Ceu���|I ;l����zvusru{�xlggd`\XSPWw��������¿�������92b�����ʶ������������p)(()-35"%FVY[afgPAEHGEGGFIIGBxL!#!-k�����������������k^���ux�bsy��oACBkuTUzxrw{}}~���z���������������������^p������z������î����������&CRIH=4388?;%/?Tm���������YCPX[de[^H-'*)%-XghhC
+	
+(f[C
+		
+
+	$Ru~{tvr`VNJHE0"#(ctcRUYVVLLLQVOLMO\RXXZ_]afgcilheeecceba`_``]`a^^\[\ZYXUUVWYXX[Z[\]][XXXWXUQR�������������������������������¿�������ļ��þ�������¾�����������ÿ�������������������������������������������������������ž���¾����������bPORTTQQPPOLF;3:FQVVVVWXXUUWVUTUVTTTUTSVVWVUUUUSTUWWVTTUWXVTTUTVVTSTTUUSUUUTTSTTRTTRSTRRTUQQRQQSTSSUTSRPQRRQQQPPQPPONPQPPQRQQQPOOSRQRRPORUTRQSRQSRPPPPQPPOQQQPPNONMMLNOPMMOPNH9U�������������������~�~slH+r���������������������������z]#$1)'$#*-*(&" ! !! R�����������������i5'/!#<5%  ")#7������������������������������{%3:973.("'<˒	-��������������'<124546:$10135565$8YCECCB/8����r?sosvxw�������JCB?>>@?=@>><<=?>=>@CB=830-( Q�kQ�[?:>4'+%&*3GY9(+Ci������������������������������������������������������������������������������������j'0$(;IR'"7sw��Nj[:'5'/zV0/F[;pJ(&.fA#(T�����������������}oe^_g5'6X MLQR>0%!Qjy���i3#O���~yvrqqy�}~mgfc_YUPP[������ÿ���������q'?j�����ʬ������������I&&,'$"-LWY\_edNCDDEDFEGJJBM�3!" "!7|�����������������Ty��ɰ���~tlk_HKDikZ��nszxnllmqqy�����������������z��z`s�����<O����������������tF(PZM;+9GCA/%0@Ur���������YBPU\caZV:"*7>8,Vhij>
+
+
+
+$	
+
+
+Ihr{tgUJD@=<6*!$'$#*8UWSKKXXTRTVX_\XXY_Y[^_dcdeghedfdcdcccba`__^_`_`^[ZWURONNOOQONQRSTXVUUTSOQPMM�������������������������������½��½���������������¿���������������������������������������������������������������������������������Ĵ�hWSTRSTRPQONI>46ENQUWVUVVVVVWWVTUVVTSTUSSVWUTTUUUTSSTUTSSUVWTSTTTUUSSSUUTSTVUTTTTTPPSSTSSRSSQRSSUTTSRSSQQRRTRRSSRPOPPPPOOQPPQQPQONOSQPRQQQQTRPPRRRRROQRPOOPPQQQONMNMMLKLMNONNKB1H����������������������~slJ-p���������������������������}`*$0*($?cH-20*$   !!R�����������������m5)4%,E/&"!$)#3����������������ǽ�������������+3::72/("(7ʔ	-��������������$;2356434!1.045656%:\CCDCA-9����r"Xywvu|�������NCCC==@?@?@@?=>>?==@BDC@;42/-&%X�eY��_@BA++*().;ZqVC2-:\���������������������������������������������������������������������������������d+%%53DZZJLQV)2JYcV).6'0f}zlcVD]D-+:c,%+u�������������|{wqke_cc%!*?Bve)_DHIGG7!1Zm{��tW*0^����|zvqpoz�~{ohfc^XUQRg������ĵ���������P*Ns�����ǡ������������-'<:<<@=(4QX[]aldGCDDCDFEILK@[�E������������������B���Ź���������zhnej��vw������������þ������������l���|w������")t���˿����������dJT"#G9"0BEC@#&1?Yt���������TGPW`d_[V4%4>61\lkh>					#2DNVZI14:<>CBEDF?<:5:DHUX_gfeiffdbabbc```a`baabcdb`bdedcddcdccaba^a`^`_\\[XVRPMKKKKJKKLMMMMOPMNNMJIJJI��������������������������������������ÿ���������������ÿ�������Ŀ�����������Ž�������þ�����������ȿ���¿����������������������������ʦzWTSSSRRTRQPLA96;KUUWXVWUTTWWVVUWUWVVTTUVTUUUTRTTTTURTTUSSSRSVRTURTUTSRRTTSTSTSSTUTTRQRTSSSSSSRTSRTTTUSSQRQRSRRRSRQSRQQQOOQSSSRQQRQOOQOPPQPOQRRPPQRPORPQROPPPOOOONLLMKMKMLMNOMG;'D���������������������~tkJ-s���������������������������y],$0''.g}c=C:2)!  " T�����������������k4)9))]r0("#%*%(�������������������������������(/::75-)#*6ǔ
+*��������������"<1455527%2-024556%3\DDEDC/;����p2fzvw�������U?@AA>;?>>=>@>>?>==>>ABD@=750-*%(b�cU�~bGK8(-(&+5C\zzbRD82Ik�������������������������������������������������������������������������������M=BADEBMS8'a`G/.()*+8I_nwwkWMYY$&<��������������}|zvtojd`kS #.X0$�S4�S-%=@;^lw~}kD ?n����{xwttu|��yjfd`\WTOTt����������������~45]x��K���������������.51//1/">SY\`ai]BBDDEEEGJJJ@ly$R�����������������?���¹����������tiaz��uz��������������������������u����������~\'R}��Ϲ����������ULVN4@7:CEDCA#(3C\v���������MGPT]\__S5!-.Cionh>				
+	$-:GNSOTUQQY\ZWZ^\]`a``^[ceibffffggefcbbacdbbc`_`_`^^^]^_^^`a_`_ba]^^___^\\YXTJNKLMNOMMPPNHJIJKLMLHJJJKKKP��������������������¼�������»�������ý��ľ�������Ŀ�����������Ľ��ÿ�������º���������������¿���ÿ��¾�������ž�������þ���������ŷ�^QQUTTQRQPONG919EMQVUYYWWVVUVWVTWVVVTTSUVUUUTTUTTTUVWTTSTTVUTUTSRTSUTUSSTTSSTUTTSSSTTRQSSTSSSSTRRRSTTUUTSSSSQNPQSSRRSSRRQQQQTSRQNQPQQPPORPPOPSRRRQRPOPPQQQPOONNNNMLLLLLNMMNONMC2!B����������������������ulM,s��������ʽ�����������������}`.%0*&3HUey�V-(# !!L�����������������m6.;)%b�i/&!   !&&*#,�������������������������������*-8994.*"!+7Ĝ+��������������!<1345323%1/034676&9\DGFFE/8����q%M|w}������]?DAA@>>=>?>=??=>==A?@BBCC?<620,)$ 0j�\[�}eLI,/1+-2:?@Ji}scZH@?Zw����������������������������������������������������������������������������7./'HYW'!1<m\8",*,33-'*'%+4EWkvH(&R�����������yxz|zzurnhcbp<%/aC &�-Z�K*.#J`pxzzb7%Oz����yuux|����vhfc_XTQM]�����~��~��������f'>k��a U��z�qe��������X585-)-2$FWW\aebTBCEEFEEHGJGDzb+_�����������������V��ʿ�����������lgi���u}��t�����������������������|����������{MEdx�������������PPRVTPOSKGEA@")6D`{����ù���JIOR]]aaSE+'>]pnli;	
+
+	
+1<<AFLOY[\\\]__^`__`a`bbbcdceebdbefecfffedfdbabb`aaa`b`]\[XWWVVUYWWWXZ\]]^\\Z[\\[YURONMMPRTVUUWYXVSRSTSUSSQSTSPUTV������������������������������������������ý�������¿���������������������������������������������������������������������������ɿº�nURTRSTTSRRNJ?34>MSSSVUUUVUUUVWXWVVTSTTUVUUTTTTTVUTTTVUSURSUVTSTTSTSVVSURRSRSSQSSSQRRRQRRSRPQRRQQRQSUTRSUUSTSPQPPRSRRSPPSUSRQQSRQQOPPRRQPPQQOOOOMOPRRPOOLMOQOOMPONMLJKNMNNMLLNL?*C����������������������ukM-r�����������ƽ��������������|`/$/*(,01F��m(*% "#!J�����������������q5/;-"  ,m`9*(" #$$"!#&',!&�������������������������������//7673-(" '4¡
+'�����������¼�"<1112136#1/356555#0ZDEDDB/9����r-\{������qA@BA??>=>=?@>?><=<=??AD@BB?=9640,'"!9p�[X��iZ<6;4027?>36QamyukXIBGc{�������������������������������������������������������������������������)@LEP7:3R^1$RhQ)$*'7`[QID</.('$%',-*o����������|nltuzxsnjd_co('3\\`SB0.FbA44ajuz|sV)1]����}xurz�����|ofa^YSQNj����~���{�������M'Lq��5-�} 1!;QQZ����6
+-48:?@8,MVY\`gjNEDEDCDFGHJEL�A2z�����������������kp��������������{ddr��vu{�ua����{q���ÿ�����������������������pvmwou���|}xxvssplgERVXTPPMIHDA=$&3G`}���������IKPXZ^b`YM</+8Wopnkc5
+				!&' !$.=LRVZX[]`baadccaa``ccacddcbeghfedbcfdcgddcbb`aa^]\\[[ZYVVVURNPQQNPQPRPRUSSUYYXYVTSPOONQSTUWXYXXYY[[[ZXWW[ZYXVYZWVTV�����������������������������������������������������Ŀ������������¿��ſ�������¿���������������Ŀ�����������������������������ɷ�YTTSSTSTTRQNC62<IPQVUUVVWVSVUUUUVUTTSSTVVTTTTUUTVVUSTTTSSTRRTTUUTUTTUWUTSRSSRTTRRSSRQRTRRSQQOPRQQPRRRTSQRRRRPRRRQQSQQQRQRRTRRRRQRPQRPPSRRSPPRRQPQNOPPPPPPOMOONNLNNNNMLMNNLKMLJH;&?������������������������uhN*n���������������������������~_0%/)&)0/6j~|cC)  #!G�����������������n70<1&#$/R9)//(#&&$"#$').&)����������������ȿ�������������.38872/)#&6ä
++�����������¿� <1333433#2/116667(/\ACDEB0:����t $Bo�����yFDA?@???@=<=???>=====@BA@?B?==852.("!#!Ci�^X��uZ;A=446?HA@JPSYcp{}q`RHMd����������������������������������������������������������������������t,2VHdccXkgdq>&J;a`K*+%BK*H?CMUPOF<5&#);���������}e|ih~nxwsohc_m^!+McSUT][VJ;+(9Egs}�~rG=m����{xtt������}re_ZVSOTw����{��������|DB_y��!4�`"%(0*(?26
+%,.(*2,5TX\`bf_KDEDDDEEGIJBW�.;������������������O��̡i~}��������{c`n|�kt|�mg����ru����������������z��{_q������lky�����hcb]\UWYTWK6PPQOJLKLICC7##"&,Ec~���������JKPY[`^Y`TG<AJ_lnpmd6
+
+
+
+-8>BB>,"#&/9FNVY`a`aaabc``bcddbcdddeedceddbdc`cdb`cbccb``^_^][YXVVTSRTSPRMNNNNQPPPPONKMNLOKNLMNNOOLSVYZWXYXWXYXYZ[XYYZYZYXZZ[YXUTU���������������������������»��¿�������������������������¾������ÿ�������þ�����������������������������ÿ������������������ſ��`VQSTSSSRRQKF:58GMRUTSUUVVWTTUUTUUTRSSSTSTTSTUTTUTUTRQPRQRSSRRSSTTSTTSUVSPRQSTSUTSSSRRQRRSRPPRRQPQRRRRRSRPPPPQOPOPQTRPQRPSQRSQRSRRQQQSPPROPROPQQQRPPOONOOOPONNOMNMKMLLKJLLMLLLJB2!;�����������������������~umQ'h���������������������������~a0-'%(0159Cn�w,!! !G�����������������l61>3)##,E;982+'((%&&'++,&'�������������������������������018984/(#%1��	.�������������� =2532441"/..03368(,ZADEEC/8����t 4Sy����L@B@?@?BB>?>>B@?A?>?;9>?@?=AB??:623*!"( !Fh�[]��vR@PG=<?KLIIHJOUZakuz|r_POSi�������������������������������������������������������������������X>XN:g_nMagDRm`j�[2mY=!,')N89jYD;6318DY9"'Q���������x`zCYrfruqkd]dwB$.`81G4<LNOQXXM=1,,$Sdp~��~<$Qx����yvtw�������qe_YVRL`����~~�~�������i>`w|��)@�9 &-$#(6<>=;1=TX[^bfZBCHECEEGGIJ@f�!E����������������޻C���J�zt|~��xjfqf^_{kfx�{]d����z�����������������r��q`t�����x$(`�����pjhfdbfbagM>UE::7;;HEBC7))))-7Y����������KNRV[_`_^WJCFThmlmia:%" $(6JSUUVRORXZW[X]d_bbcacdfddeedddfeecbdcb_\^\c^^\\]^^^^\ZZZWZYWWUVTRTSUVVWVUVTSUXYWUXVXVSSSTPONSTQUVWYY[XYZTXYVTUTTTUUTUUWVXWUXTTVVSSU����������������������������������������Ľ��½��������������������������������������������������������������������������������kVTTTTTSQPNRM>34BNSSUWUUUVVWTUVTTTVVRQUTUTSTTTTSPPRQRSSRRQQQSRRSQQRSTTSSTUTSSRUTRUSTRQSRSQQSSRRRQSQPRRRPPRRPRQQQQQRSTROPPQQQPORPPQPOPRSSQPROOPRQPOPRPPRONPQPQPPMOOOOLMLKLKLLMMLI=*8�����������������������wnY(h���������ɽ����������������a1,(%$'/3<8SvJ   >�����������������r95A7+'':^iP;0)**+)*)),+/+#�������������������������������30<;641*$&3��)�����������½�"A2323431"31134369'0^EDFED07����t )A]���W>C@>>>=@???@?A?@B@=<=<@@A?>@A@A<742/("%$Jh�W]��wHP[PHJPPNNMKLLPW_gjry��tbUNVh����������������������������������������������������������������G+6( ?2S2ij)Ts_qI?iY+&*$1T+2BUaqj^B!%K6%*u���������}nsAopbosmg_\iw'&6e'EoZ2+)2@IKOLS:7^gt}�v`(0^����xurv������{kc^XUPRj�����������������ZJr���T+]�!)$!%.2+,/43"HXWZ^dhX>CHDDCEGFIHCz�$V�����������������N�̼pOSc�}peYCA@Rfe���u��fjnoli��¾��������������r����������j)N|��������������ZJXD71187HFDD6./1138Fw��������{EOUW\`a`_^RGH[jlonkaE<A@7041,&"'+,/,04468<7758=MORX\]^_]^^`]]^`c^adcdfdedeeecedcaacdb_]]\][ZXVXRUWWVXYWSTRTTPQPTTUXYXZ[\]]`_`_a_]ac`_`_^\]\^`]YY[\^\[[^Z[XVTSSPOQQQPPQPNQOPPRRPROOQOOOL��������������������������������������������������������¾��¾���þ��¾�����������¿�������¾��������������������¿�����������wYQURRTSRPPOLB4-:KRRRSUUTSUUUUSUVTTRVURSUSUVTUTSSSQRQRPRSPRUSQSRQRQPQSSSSRSTSQRQTSQSRRRSTSTTRSTTTSQRSRRSSRQPOPPRRQPOSQQQPQQOOOOPPPOOPPPQQQPOPONOQQPOOQQPQPONOOOOONOMNNLMMLMLNKKJD6&:����������������������wkX.g���������˺���¼�����������|c0 +)%""%*3.21$ !! :�����������������r95D9/*-Ej`>:2**,.,+***+3-"�����������������³������������0-7;750)$%0��(�������������� !D2422544%40345566(6\DBCED/5����s  6Pj�b@CB=>=>>@?>A??@>=CA>::>AAB@?ACCA>:5331)#+Mk�\^��yQ\a[WVRPQQONPNNSV^fjot{��yeRHO`�����������������������������������������������������������r69>:411'#!A@_srZy4Y_P*+'<P'%%$&AVRH)-N*%7����������{vcTSmhrojc[[sb!$C\&APdvl]P9,%*6N5Geq}��wIDk����|vsrs~����~ofa[VRKSv����������������v7L~���0%/{y$+2,'(.:>=93**RWVY`gi_?ABCCFFGGHEJ�h1l��jf�|�����������{X�Ϸip�mx~iC>Agja���|���~�����������������������p��y�������l)Eg|������������HQSOKI<BEGDBA45686758F���¿���yMUWZ_abb`^RDE^klmln_AAFJMNRXSNNNKKMGJKIOQSVWYZ_\\\]cdbbbb```aaabcdcdccbddbb`_^_]Z[Z[]YYVXYWVUTUSWRSSUSTWWVXZ[[^]_b__`___`a_`abb`_a_`a`_`[[\]\][Z\^b^[VWWXURPPNNKOOQROPPNMJKKIHIHFEDDDD@�����������������������������ÿ���½���������������ÿ�������ý������¿������������¿�����������������������������������������^SSRTRPQQRPKE70:GNUTSSRRTTRTUUSSTUUSSTTRSTTVVUUUTSTRSSQRRSSSRSRRRRRSQSSRQRQQQQQRQRRRPQRSSSTSSSSSRSTSSSQQQQRRRQRRRRRQPQSRSRPONPOQQQQNOQONOQPOOQNNNOPPPOOOPQPPPOPRQPNLJKLLMLLLLMJE>1!3����������������������xkU,e���������Ź����������������}b3-,$$#" $"!!!! !!  0�����������������r<7E;1,*5<0-C:.-,02/-/.08/ �����������������Ʋ������������4'6976/)$&0��+�����������¾�C2320245'5/126577)-[DCEDG23����t%)BMGFBA?>=>?@A@@@@>?DBCB>=<>A@A@@@BBBD;8631''""1Fu�[c��}N]olb]QIPOORSRVSRU\dkorv}��weRHSk���������������������������������������������������������R38:99<?CIGIA;5'$+jzxo(hV? *))LB')'&)El][5=D$$S����������zyhpwppvng^TMuL!-]A$**28Mhjb[8#)7&#Rdr���{=%Qz����zuqqr|���~uic`[TPOZ�����¿����������X5��Ğ;Q�GIVHOeC	';:<;:2 2VWY\dheW>CABDEGHIHAX�K5}�lYgeSiV`put�����Yq��ʸ����}vj_K>8kli������������}�����������������ww����������gm8,Ps����|xxvrqnloASTTTTQKIIEFE;=BCE>315B���´��{PUY^bbccbbVJH`ikmliY9CLQWVYYSRPRQNPPSUXY\^]\Z\]]__^_db`a`__]__^^`_`a_^[]Z[[ZWXXXTQTUVTXTVZXY[YVXZZZZY]`a`aaaa_d`ac``bacccc`_^^b_]]]]]]\\Z[[XWZX[YYYX\WXPSPQPPLMLKJNOKLKIGEDBCDCA@A>>@><�������������������������������������������������������������������¾������������������������ÿ����������������������������aORQQRSQNOPOJ>34CLQTSSSTTSRUTSUSRRTUSRRRQRSSSVTTTRSSQQRSPRSRRRSTTSRQQSRRSRTQQPQRRRRQQPQQSSRQQOPRSSRRSRRQPROPRRSRQPRRRPQORORPPQQPQQPOQQRRQOOQPPPOMNONNNOPOOOONPOMNPONNKKLLMLMLMKJD8*-����������������������xjX*T~��������������������������|b3+(%%%" "! !0�����������������q=5E?95.1504C?61/110/13371x����������������Ķ������������4)6653/*$#+��)�����������½�">2321136'3.111476),[DEEDE27����w#"!5R%,E@@><=?@>=@>>>@ABA??<=?>??>?>@BC@<97420!!#!;F{~\e��zAAnspf[TPPPRUTTXZ[\_`cjntx{��yfTCQj�����������������������������������������������������84998;<>AAAFHMPRXWJG:Mfb=?cV.&*&.V1*./-/9M[[5F7%&p����������}meuxosrkd[O3.#"2`(%((&&(ASCB.#)E1Uaoz�|g.5_����xvqoox�|vifb_YSNTi����������������7R��xMc�$UcOR�`
++:=@@=:+=TVZ]glbH?HCEDCFGKNE[�2">��~`edZcVVQM\l����@�����ö�������yi{v�������������o����¿�����������f��h��������tb;0=Y|�q]]^[YVVW\L>[XURPMJJHIJNOUXZUQ=7772t�ʱ��}d\Yb_efefj]OHaeknoiYFMSXZYYYUVTSPTXZ___^_^^_a_]]]]]\\Z[ZZY[ZZXXZ\Z]XZZXXVSRTVVWXXVWY[]_^ab`aa__``a`abafc`aab`ac```_bcd``_^^`^\\ZZZXZXWXZTURTTQTPQTQ<:=9139KYb\PLGFHGFEFEB@:+$:=<>>;>?<<�����������������������������������������½������þ��������������������ȿ�����������ſ�������ü�������½�����������ľ��̞mLRRRQSTRQNLI@32=IRTTUUTTUVURTUSTTSRQRRSSRRPPQRRRRSQRSRQPSSQQRQRRSSTSRRUTTRTUQRSTSQTSSSRSRQQQQPQQRRSRNPQQQPQOQPSRQSQRROPQPQPRPPPOPQQPPQPOPPOOOOOPNOPPPOMNNNPONOOMMOPMKLLLMMLKNMLHA5%.�����������������������viT*F]���������ƾ���������������|b5++%#"!  ! "!+�����������������w<;LE@FE9516?@:5,'%$$(134-!x������������������Ŀ����������7+7784/)$#-��#U6)�����������½�" ?2542218)2/134765((WDECCB07����s '(GD <@>><?AA@@??ACBBA@B@;<=?A@@@?ADDB>:9741+!!' $@D��ac���@*Or{umcXPPQQQUXY]_\\^aeiqux}��zp^NVo��������������������������������������������������V58:8:;<??@AFHGJKMNPY]a_\WbXO -0*1_6322<@:1+%*M-'7����������`opnnuzvqiaWE##>U$)-+'&+XnVUG%.7<^grwypJ!Fr����~wurqptz{nfda]VQNRy����������������k a�LLx�@obQL7
+%#2;69:21# EVX[_nwc<?BCEDEFHQUUd�#(Q��������xrsjan��ݯ8�����÷���ŵ���}x�������}m����y\���Ŀ�����������rT�zor}�����~cC&9DK]pi[WSQVRPLTJDeaSPRRRPLMOTYZY[WQC4221)P����pb_bfgfffg`QPchjlmhWINU\][WWVVUYZZ^`ca`__][\^\[][\[XXXXVVXXVUXTTYUTPUZVUY[Z\][^acdbebedccdbddcaabcababcaacab`aa`aa^b_^ZZYYY[VRTRSPRQPKFE=973+)++0C^D%=]fbXGBEEDADED@5.:A>?==<;������������������������������������¾�����������½��ÿ����������������¾��½����������������������������������������¯}WPQQRSRRSPMLF627ELTVWUUVTUUTTTUTTUVTRTSRPQTTQRQQPQOPRRPQRQRQOPPORSQQRRQQRRRPRURPQTSPQPQSRQRSSSRQPSQPRROPPOOPRSQQRQPSQRRPQPOOQQOPOOPPOOOPNQQOPQPNOOPNOOMNNOQPONOPNLMNMLKLLLLLKKMKE>/$'���������������������tlW.Ko��������������������������}`9*,%"""  !")����������������uA :NG?<FS?48>@85%!0441u������������������������������9-96960(##,��!i�y<*�����������½�!B3323124'4.114337))[AEDBB08����o":T+$>=>=<>AA>>>>BBA???>=?>>?>@@@BCCB?<:9632!$%,FI��]e���I'Fj|xqjaWOLQQSY\]_`a_[_clpuwz��~raTXj����������������������������������������������w89::99<>>>@CFGHIILNQSUYY[[]UD -0+0MMOGA92215.5F&'Q�����z����V_[ffaqsnh^P9$dN*469876DT`bI%9($Odpx|we6#S{����|wtsrppprlgfc_YSNI[�����������������N9##G�s#5afM#S+#  ,3+'MSX]cs}_:BACCDEEHTcWp-`�����������������:�����Ÿ���ȴ���ym��������Y����eK�ļ�������������V#CU_kq}�����<64EYdaW<5=97544/6:AJVWVTVUUSTTUVWXVUSK6002.(.5Iz�sghfffghhgaORgihlkfTINUWZUTTQW\aabefba_`^[[[YZXVWVWPRVXWVRUWYXYZ\^\^]``b_abccbbdeccccccddbbcdddabdbb```a`a^^]][[ZXVUTSTQPPNPPNLKNNJKMD*!:ZW)3SjmcNDEECDDCA74>=;<>;������������������������������������������������¿��¿�����������¾�������¿��ÿ��ľ����������������¿���ÿ������Ž���gTTSQQQPSRPOI=12@KRTTUUUTTVXUUTTUSRSTSUTQSTUVTQQRSQQQQPQQQRQQQPQQOQRQPQQPPRRRSQSRRQRRRQRRRSQQTRQSSTRQQRRPPPQRQQQRQQPRQOOQPOPPQPPPONMOPPONNMOOPPPOOPONNMLKNNONNMNNMNLLLLKKLMLLLLLHD9-"&�����������������������ul_0Y���������û���¾�����������{d<)+'#!!! " &{����������������r? ?PF<25JE6;::3(!/8882r������������������������������9,87862*$ #)��U~Nts*(�����������ý� @3244334'6./1003:((ZBCCCD23����n$*KU.>>>==>A?@??@BB?>>=?A>=@@@A?AFEB?<;9863+% #:GG�}ci���U7^v}wrh`XRNRTWZ^_`bbaa`gkotw{}�~p]MSh�������������������������������������������S2;<99;>@ABBDFHHIKMPQSWYYZYZS/$-(#$$%),5ALPKA=L:#'o�����o_jcmlrgaebkokeXK,KqUB2+*4?A;52,&>3^iv���f,5]�����zvtsronnkigea]XPEFk����������������}1 P$<f�M5XY_V3 *UE�R2QUXZb|�a=CDEEDEIOWpSzo*p�����������������p;�����ǻ���ì���uW;u������Z���f(T���������������TAKLWeikjgbhlVSTTYv��S?98;99@:BILNOOONRQVWVUUYWXWVUVE701/,.-Iz�ukhgjkkhgifSSejkkidPFLPQOTVZ_bccdddda^\[ZXWWWVVSSUWUZ[]_^`aabbfeffededddcddcabcddccecdeedddacbbb`__]\ZYXWYTTURQTSQOPONMLKLLIKOMKNOIHJC+	+Kk@'IlomWFDCBBC@7"+9<>>=�������������������������������������������Ŀ��Ŀ���¾��¾��¾�������Ŀ�����������Ŀ�������������������������¾��Ĵ�kQTQPQQQOOPMI>52:JQRTVUUWWVTUVUVTUUQSRSSUTRSSSSSSPRRRRSRPRQPQQOQRRRRRQQQRQPRQQOQPPQOPPRRRPPQQPPQPPQRRQQQPQNPQSQRRPRRPNORPOPOPPOQNNPOOONMNNOOOOOPOPQPOONNNLMLPPONNOMLLJLLKJKLLMMKLIC5'"����������������������~uk]2Y���������ƻ����������������~g;)+&#" !!   &y����������������sA#<PI>2-(.1;40'$1=??>6 n������������������������������;+7:751+$!#*��0�����[)�����������½�"=0002332$7//23468+$YDCAAC04����r 7\><B>????>@A???@?>=<?@=?>????BDBA@<;;8542&	 "" GBQ��al���Z*! 'Cl}yrmd]SLLORW[]_eeeaa^fjotv{�tfRL_y��������������������������������������v:DJFBABDCEEFFGHGLNMNPUWZ[XYXJ &-%#"#$#""!%'%-7:'&7��������zrcXZbeaclh`]R?3NW[M5*',3/'.7B[hw���`?m����xttromnkigec_[UL=1a����������������`!C`"%V��)/ILXT34c��C%!5YUXZi��j:AFDDEHNWe`<�J'N�������������|���D"�����˼��������uF#3Ig�����WalL+,5PQYio{z|zkcfxpba`feedghfd`a__\Y\X^dh`YRRU\aRORTSUTSVXWWUWVXWYXZYSRP?400-,,L~�pjfeiijhhieSUgkllhbPDJQVY\abccffeca_^[WWWUTTWYY]`_caccdddefgdefeeddceeddeddbaadcc``aa\___]YZ[YXXVTTRUTRWUSRPPOPPPONMKLMJHKKFHLKLMMKIKH2	 Dn^)
+=\qphQC@@?<7'6<<<����������������������������������ý�������Ż��½�������ý��Ŀ�����������¾�����������������������������¿������êsZSRSPRRPRMNKD8.8EMQVURRUTUWVSTUVWUTUTSSTUSQRUSSSTTQQRTSRSQSQQRQPRRSRRPQRQQPRRQQPRQQPOQOQRPRRQPPQPOPQQOPQPORRQPOPQRPOROPSQPOMMPRQQOMNOOPNMMNOMMNPONONOPONNLNOMLLMNMOMKLLMLMLONNMLLH>2""������������������~��~�vk^0Q���������º����������������~g;(*%$"!   !s����������������uC"BYPD;4-%&+$#"#,;CFA=6h������������������������������>+:;740+$$)��T�����v-(��������������"A11/1012'20/13566)&]DDCB@05����q +Oa(@>@>=>>A@@>AA?>><>?>@>>>=?BDCCA@=<8653,	$'!)H=K��el���g(0Vpyzrjd\SNMNQVZ]`bhhc^_fgotxyz{|vhXQ_u�����������������������������������N5PV]SSKXLMRQJLLKLOOPTVZ]YZXYB)(#!!##  !!#! "!!"'K����������~qh_fTOIULRN7#:NX\P?2,7(#T`kvy|l?E|����|vtsqollkigda_ZQH1,t���������������L%,>By��"&*MJC2DlB'+(LUX_q��t@ABBEHKTe~O!ij+),3- %>EM_koww}��mS��7"?��������ķ���}o@.69<DK`kw��f]VSXTRQUXZYVRSTYWcfdedfgebcdcba`]\\XY[ZXXXWVYUTWXVVVVX[WXZZ[]ZYXXUXZ[WTYL62421/O��qkhiijjiijfTRimjjgaJFOZ^`adgfgedb_[[XXURQSWY^bbcdejheffcecefedgebcbaab``_`^][[ZX[YVRTUVTQSQQSSRSUTPOQUSUSJLPPNNQNMMMKJLLJHKKKLLMMMKJHI<	 :Z|?/UotsY?.1*'"
+	
+*8;����������������������������������������������������������������������������ü�����������Ŀ�������������������´�bRTSRPQQRROJE;05@LNSUUTSSSRTVUSSTSSUTSSTTRTRRSTTTTTSSRSSRQRTTSQPQQPQRQRPRROQTSQQQPQQQQPQPPPOQPQPPPQQOOOOPPONQQQQPPNOOOONPQPPONNPPNMNNMMNNNNLMNNMNPOPOOPLMNNLMOOLLLMMMMLKKLLLKLMMKIF9-"����������������~����xl]0O��������������¿�����������}h?(*%%#"   p����������������tG$B]XPD@D5' !%$%4GGE@;5b������������������������������@,:=850+$%&��$lhk�[^rE�����������¿�%@1100006'70..2465)#ZCBADB/3����z%;cG1C?>?@BAA@A@A?=@>=>@@>><>>ABBBBB?<98540')!)"1J?X��fh���s4# <`vzwmha[WMJNMSZ_`deed`belpnompw~zj[Wfr�������������������������������n2;GIWSTN`\X]c]W\VR[VSVVYYYZXW3 *'"!""!      "  #)k������������wuid`TQTM-/CLMK*L]oz}tb%1|����xtsonllkiifc`[WN@#,Y������������v9(#$$$%.F��{"% !'&,.1046@@K=644(%! & $1MZd���i8AAACGP[y�B'-05@C3.550,--1624;@KVY_baFPeQE<<9Sv����ǫ���zjPX_dimoonnmlgecegdacddedegfcdeedcddecbba^a`a`]][\ZZZ[ZYXVY\[ZXZ[ZY\[XZ[\]\ZZ[a`_^^^Y[[D47622U��ujghjkiikjeQUijhkh`ONV^agafhfe__\XVWVUWX[__`cdccaacdcde__`^^\Z^\\\\[[ZVXXYWWVXXRVTSRPQSSOPPPORSPQTPMOUPQQQQNOPQONKJNNMKLJEJKIHJLLKKIII>%	$Nyc*	@awxg1
+		0����������������������������������������������¿���������������Ƽ�������¿��ļ�����������ÿ��¿���ÿ��¼�������mVTRRTRORPNPI@34>HOQSTVTSSTSTUSRQPQRSURSSSQQSTSRTVTSRRSRTTSQQTTSRRRPPQRQRPPQPQRPQPQQQRTRQRPPPQPPPOPPRQONOOONNNNOPNPPONNOOMNPOOOMNNNNNPQNLNNMOONMMNNONONMNMMMNMMNNNLMNMMMMLKKMLJLLKH@2&��������������������xl_2R��������������¾�����������|iA(,&%"#"     n����������������tG$F`[RHFJKB523/#%)1:EEA?<6b������������������������������@,8986/)#"$'��*������U�����������ľ�%C3211107'7/.24557*#VDCBBB15����t"%'Jk"9A<=?@AA@@=>>A??@?A==>=??@@@@AA>=:8453.
+	* +>QA\��jo���|?'%Gj~spkbXTNMOPWYZ]`dfecdcdfgkorvyvsc[fp����������������������������7189?EJOMOVTWihRk\Yjnai_d]^WVP$#+%""#!   !" !"2��������������}|wurf\SD"0EXjuxmMA_��}wqpomjjjgghb^YSI97Tu��������h844321036ARUD711/2A-#%3A.+-3567979<?>@BCDFHFGKRN8358Of���o;@CEEEWt�u-"'-378=BC??EIIIOVW[\]behnqqqlgigaY\[_ef�{ttnleaeijhgfhhjhihhhihfgeeccecedbcdeeebcdcadc_a`_^`__a^^[]^\]`]ZY\\\]\_a`^`a`a`]\\]\\\\^[\Z\G8855[��uhhklljijjgXYglkjhbTS[`afabd\\YWTRUVXX[]``]\\XZZYX[]\XZXXXXWWUUVUZWVSWUVWWUUYVYVURRSQPRSONONOQRPNRTOMQQRPSTNORRNNNJKONHKIEJMLJKMLJJLKFC)
+ >l:	0Zs|wB$	
+��������������������������������»�������Ľ���þ���������������������������Ŀ���������������¿���������������qVSUPQRSRPPMKC707FLPSSTTTTRRUSSTRSSTSQSRQQQRPQQQRQQRRQPPRRRSQQRRQQQSQRQRPPQQPPQROOPPQRQQSPPNOQRQPPPPPOQOOOOPPNMMNONNONONNNOMNNNMONNNNNOPOMKLMLMONLNNLNONOOONNMNNNNNNKMMMLLLMJKMLJKJF<0%!~����������������~}���vi[2M���������������������������}mB)*$$$#"  !!  j����������������sF"E]XOHFGKPRG@?625=?BDB@>6[�������������������½�¾������>):996.)$"&(��-�������a|�������������);2310002'2./24557,"ZDCB@C31����q$0iR'F:=>?@?>?=><??=@>===<=??>=@AA@=<865451.$#)O\=f��nl����J/5Sm{wlf`YTONOQRVZ`bgkkdccchjnmmnvxthe^p~�����������������������Z!286;<?BCDCHKVXT_TWnpd}mmemTRC%42###"!!   .��������������|xtpkd\Q>%:H^xq[>@\Xbjlnlhhhddd_[XOD0!%#"!!'DUx����T?CDCB@??=?>:;????EMILN\MBAGFDHHGGKJKPPPRSTVPNNPMEBB?:O��{lIECEHGMxw@#$*8?AHPQTT]ebchjgllorqpqsuwyuvsqmkighd^b_Z^cfhieffefggggdeegeeebeccefdcda``_[_dc`bdb_bab`bcba^\``\Z]^Z_`__a```a`aa^a`^\^^_^^]^^^_a_`_VA=>3\��silmmljijlgWXhmljhcSNY`Z[XSVTVQQQSSUYYVX[UUY[VUYZTXZZVVVTVWXXWWVVWWZVTUVVVUUWTUTSPQSQPRTQNQSSQRRPOTROOTRKPQLPSOLLMJILMIJJIJJMLJKLJJKIEE0
+-^�_$&Hdz`.
+	
+�������������������������������������������������������������¿������������������������ľ���¾��ǿ����������]RSSQOQPQPOOG:13AIPRRSSUTUUTSTRQQSTTSSSSRRRQPPQQPQRPOPQRRQPPNOQRPOQRRQQQPQQPPQQQRQQPOOOPPPQPMNPQQRQPOPQPNNPPOOPOMOQNMNONNNPNLMNMMLLMNONMOPOLMNNMOOMMMNNNNMMLNONMOONMNNMLLMMLMLMNMKHC8){���������������}}��xj^5O���������������������������~jB&)%%$$# 96# !b����������������rH'A\WMKGEGJOK??98=A@ABA><7[������������ı�����x��þ������C*98:72*$$(��"/�������V }����������¿�*;22210/3*//034377,#YCDCA?22����r "!'Ch.0A@?A@>?>==<=?@@BA@=<<<===?@BB@=:9:6330#% %(*e]Cl��nm����[3 ;[}}xqkd`YSNKNRVV\_fkkkhfeffghhhlrxvrjbgv��������������������)#467===>>AACDDDGMPS[acppknlVQ7Hq<!#%('#! Ck�����������}zvspjc[N4!1QkZ4  $%1?R_lb_\VZ\\\TL@0&&),12254;;947=LUZSVPPONNNLKKLJIJLKMORQRVXRPPQRRTTSUWWUVZZWVTTSTPOLMLLHE@?><A>CBADDCH:*682--2Eaaeiinmnopppqopooppnlmnppnmmmmmjjihijhgghefdeeccdgdghfgfgeeccdc``cdccba^]_bccbccbab`a`bcbaaabecc^a^_aaaabaa^_ccbeefcb`aab`^`bcfebdaUKE7i��qknmmlhghieV\jmlggcTOV[VTTVUVYWUWX[WXYWXZYVY[WWXYZXWXYVVWXVX[VVZWUVZWTSVYVVWUVVTUURQRRPTUPNPRNOSPNQQOQSSLQTNPSPMLKIKJKKKILJIJKLKKJJKKHD5
+
+ #P}�>
+ 8\s|r4
+���������������������������������������¾���������������¿���þ�����������Ĺ��ľ��ſ���û��þ���¼��Ľ�½�^STQQPPONNOMJ?53=GNQRSRRSQRSUSTURPRSSRTURRSTTRPQRSPQQRQQQPOQPOPRQONOOPOPPOOOOPQQOOPOQONOOPOPQQPPPOQQPQPOOPOPQPOOONOPRQPQQPPNNONNNMNMMMNOQONONNNOMLMQPONMNLMNNKMMNMNOMMOMKKLMKJKKMLJG>3&u���������������~~~��xia4K���������������������������}iD$)%$$$!=�|%$_����������������qE'A_[UPLIBFRVSVR[JC@@@??>7\�����������Ή&*#9tDW�¾������B)9;<90)$"#%��#%vaYsv]rB w�������������*B1244312(1//34576,$[EDCB?42����s  "!  "%*=<#9B@A>>>B>>=<<>AEDA=<<;<=>>?ABCA=<;6250-3-&5oQBj��ul����d(*Df}ypiea[SMNNORV\dhkkkheecgggimlnuvvlihv����������������K%5657:;=>>@BCDEGILLNMQX_ajhZJ&'��?$&(+'"!  !+Mh������{xusqolcXJ,&$6;*!#$%&%&()'(+<@:4366H[XH:58;;?DHILPQTXZ_cgikf`\ZZYWXXWVVVUVUVWWXWXWWUUWVWZX[YYYZXYYVWXWVSUQPTTVTPRPMOORRW\imrtkaFBD?><;Egifjkjllmmmmmkklmlklkmlkjkjihggeefehfdhefdeeeccdeggggdeecabbacbbcgeeefcbaaaddacedddccceddbabbdcc^^`cdcbbddfeefgfgffdccedbaadbeedbbb^_T9j�jimomljihjgW]hmmhgaWXZ^ZYZZXYZZY\X[ZYZYXYYXXYYYWXZXUXXTSXZVUYUTWVXYXWVTWXUVXVWWRRTPNRRNQRQNNQPQQRQPORRPQROQQOMNOKKJJKKJIHIIKJKNNKLJHJKD7"	#8h�d*
+*RlurM%���������������������������������������¼��¼�����������¾��¿���������������¿����������������¾�������sYMQQPPQQOOMIA847ELPSRQRRQSRQSTQSSRSRRRRSSRRSSRQQQTSPRRRQPQPRQPQRQPOOPOPQPQNNOQOMOPPOOPONNOQOOOQRQQOQOOQQPPQPQPOPQPOQQQQOQROOPNNONLNONOOMKPONNMNLNNLLNNMLMNLLNONMLNLKMMMMNLKLLLJILLHE<+s�������������������wmd4H���������������������������jG!%*%&$(E���sAQ��������������xcK:(Fh^UTQPj�������^@B??@>>8Z�����������҂! .��,[�ÿ������G*99981+##%��"b�����u*!t����������¾�->3325441*41135677-"WBCDB?3+����t   !!!!!#"##"#%%((("",??@@?>A?>??>>ABB>===<<==>@ABA@?<;85532*	'0	OsHDq��xq����i,0Nh}{vqkgbZPLMLRW[_cghjolgfehjjjkklptyulhy������������v*,-/347;;=>?BDEHIKKMPSQVXWUE,��g/'&%*.'" !  +Cc|�ukrsmklloh`I'"%(3:)"""$)07:51/../.--.2213145666:MPDEGLNSWWZ\bfgknrqstrqlhdaa_^_^]\\\`]\\\[[YYWWUY[[][\^]]^^[Z^\^aa_]bcigijojlkkkilottywwupkU;=>:2/8J``^deffgfgfggegfccbffhhhgffffhgfdecdgebe`^c__^edehjhghhfgffeghgdcdddeddeddefedffeeedcddcdbbceedb`aggfffgfhhfdgihggfcacdcdddbdcccbc`_[Nr��ijknmmjhhkgS`gnojh^X[[]Z\\[[\ZY[[X[\SV\ZXYYVVX[WWYWXZWVTWZXVVUUTUXYVWVUVUUWUTVWTTTQQQTSRPQQNNPOOQSOMQOOPPMOPMKLMJKLJLLLKJHLKIKPMJLJIJKF@(	"'S��=#Edv|c,���������������������������������������������������������������������������������Ǿ�������ľ�����������YNROPPONOOOMD7.4?IOOPSQRPRQQQQRTRRQRRSRSQSQQQRQPRQPRQPRTQPOOPRQQQQQQPOONLOPPOQPPONPOPOQPPNNOQPPOONOPOOPOOPOMOOOPPPQONOPPPPOOOPPNOONLNONNOMMOOONNPMLNNNNKLNNNMMMMMMKLMLLLLLLLJLMLJIJJA8(m��������������|}����vmc=F��������������¾�����������~lE"$+%&%1��d��T <_og]ZWZXVPQKGCI?65'@a_VTSP����α��k>B??=>=7M�����������҄DQQC-U�þ������E(88762,&!#$��!?�c{��a$x�����³���¾�->5535333,9./145890[DLQD@3.����u  !!   !""###$#$$$%%'))))*'%&4<?>??@@?=@??ABA@>???;<>A@BAAA?>:855530(&	"bbDIu��{p����r58X��uqmgbZPMPQRUZ`cimqsqpjfijiijjnrw~zxzy��������>"$+-159;>@BEHJMOSSSUQO7(:QD43-1578$#  $%)-,0<AELTVY`]Z`aM43468=@A80&*./3;AEFDA?<::;<>=>@A?@AEFHKNRUUUVXY[`dfikorsuyvxxurojhedc`abcaaba__a`aa``]^^c_bddeehgimronssvpvvxsuppnmljkkklmnommloplbT2274(&2ERV\\`aaadcabcbbbaccdddcedefccggfdgeffffgijfefeheehhigkgbccgfhfgdghhffjhefefecffeeggffffefedefefd`ageeffffgigeghfigcc`bfdca``a_\\`^[Z^_z�lllmkkjihkg\djllji`Z\][\\ZX[]Z\\YX[\WWZZYYZZWUYXXWWXXYXUUWYWVWWQTVWWWUVUUVVTUTUWVQORPQSQMLPMMPMNOQOOOOMMOOLLOLKLNMKKLIJLIGJJIJMNKKLKIGHG-
+!"Br�I	"9]qwv:����������������������������������������������þ�����������ý��þ�������þ��ľ���ľ��¼�����������Ŭ�bPNONLPPPPNLH;22<EOPSQQPPPPSRQQQQRQQPQQQQQRSRRQQQPQPPRQRRSRQOPOPRQPPQOOOPONONNNONNONOOOMPPOMNOPNOOOLOQONPPONONOOPPONNNPOOOPQQPQONMMNNMNONQQNNOONMNNMNNOONNNPNNNPNLLNMLLLLMLKKLKLNLJJF?1%j��������������}}����ynb?D����������������������������kG#$*(%##M{&=f% 6Vg^YUWWXSTSRQOMG>6'@]]YVTP|��ȿ���l;D@>>=;6L�����������҆3�����xO�Ŀ������J&78872,'$ ""��! _{R`�-%r����������¾�+?4433432)9,.3557:/!ZKq~H@22����u #"  !#""#"!#$$%'&(&''&())+,-/1.++/=@>?>?E><<>@@A@A??><;<>@>?ABCB@>956661.7
+
+/jWEHy��v����}B8b~��zvqlhd]WOOOSY[]ahouvsrnlljimljhmx}}wsr�����i& $(.37>@CGKPQQUWP+"'015367CW3+(&#$" !#(,1336798737794>@GOYYQFECBCA@@??>60)#,7<AFLNQTRNMMKHJKKJLNOPSRTUTY\_b]\]^^ahlnorvwwyxzyxwtqlkjghhhghhiifhjikknnnoonttqwvxvsrqpqqqroonlikkornnmmkkkjiiijjjhfdeb_7/8;/,0APBUWY[Y[VX^___`^aee`bbadfdggfehgchdeggdffeedfgedefgjknkhhhghighhfggigefggfhjhfhgehhiggeeggdeddfedbcccdddfcchggee``ad\ZZXXXZZYYXUXVWYdq�{mnlmnkiklmjUbkmmjibY[_]^\[\\[[][Z\[Z[ZX[[ZY[ZVYZXUVYWVXUTWWUTVXRTUWWYVTTWUSSVSTWRMORPQRQOKNMOQOOMQQOLQQNOONLPOKMNLJIKHIKIGIMKIKOLIILIFHH1 $1f�a!
+4UnuyY&�����������������������������������������������������������������������ÿ�����������ľ�������������iQPQONNPPOPNIB418CKMOQQQORQRRTSSQQSRRPNOPOQRRQQRPQQNPOPQSTRRRQPNMOPPOOPPPOQPNMOPONNMMOPOPOOPNNOONNOMMNNNMMOPNNOONNONNMOMNNMMOQRPOOOMNNMMNOPPONMMMONMMNNOPNNMNPMLOPNLMONLKKKLMLKKMMMKHD8.b���������������~������zo_?B����������������������������kJ$"+)&%%"!" #!!!4Sj_\TWXXTTTSNKMI@7'@^]XVUTuͰ�����q9C@A>>:8I�����������яc��Ѭ�]P�¿������P'69972.''%!!!��"-p��J&o�����ÿ������+E2322133(9/145578.!ZLlzN?2,����r!#"!"#$$%$$$%&''*))*++,,,,/02454425<D?>@@D>>=<=>?>?==>>><=?=<>@@B@>956673/&
+% 
+	AlUEJz��r�����N"$?[v��{tpmjd^XSSRQVZ^bimuyssmklmlkjjkpv|{smksv<* !"(,.6>FJLNPC'!'-/01436::=G?;75229526>ABCEFGHHHKKNOUUV][\[VQQNKKLJGHHGEDC?;7/,*&%;EIOQUWXYXVUUUTTRSUVXZ\\[^^^acaeeddbchlnpsuwuw{yyxywvtqpppqprrpqqsututsstsrtsqqqnonnonmlklmnlmonllllkommlmkllkkihggeghhgdcT=9>705:NW\`cgggdedffghiggghffcdedbed`bd^abagfdgfegikikihhjkkiigggfgihihlkkljihhhhjjjjkhfijjijgdhjjigkhdede``_]\ZUPQTRNNMKMHJHKJNUZ[YWYVXZXWiu��|omknnlikllhZenmmjha\]`^]\\][]]ZY[]YW[\WZ[XWYZXYXWUVWVWWWWVUWUTVWTVYWUSSUVURRUUUTSQQRRSRRQLMPOLQPJLQPMOPPRSONNOMMKLJHJIJIKKIJKGGKKGKKJGHG9
+#Yp1'Onuyg/����������������������������������������������������������������������������������������ý��Ľ���uZSQOMMONMPOMD803>KNQQQRQRQTQQRUTSQQTSRQOPONPQQPQOOQPOPPQOOSPQQOONOOQOMOQRQOONMLOOOOPNMNNNNPPPNNQPOOONLNOOONLOOOOQNLLLLNOMNOPNMMONNONLMOOLLOONNMNNLONMMPOOPOPNMNLMNNMMNNKKKIJMMLLJLLKH>4&Z���������������������zn`BD���������������������������mJ#"-**1++68:0.,51% 5[pd_Z[\\ZZ\b[_\XP=(;^^WTSTsŖrY���w9D@@@><8I�����������ӑ)3s�?P�þ������O'5<;:762/)''&!! ��$1aOm�붫���������,D4121324);/024577*ZCdzN<2*����t!$$$$%&'&&''()))+,-,-////03579;;;<;?JD>?AB==?<<=>@@>>=??><=>>?@ACB@;986652.	[sRCR����u~����\)&C_y��xvpnlib\UTUTXZ\_flrrtqqomnnmmopswvriT76.!"#&'(*+*-/6>?IJD579;<==?@CECCEEHIILQ[PJILPORTTTUWXYY\Z^\_`[^^ZYXWWWWUTTTRPPQQPNMNQMGJPTTXZ[\]__]\\[]]\]^^`abbdcfgfggkkkkjmoqrsvwvuwwvwxxvuututttstsqqpompolnmmnonnnmmnoonooonoonooooonmnmooknmmnmkiihiijikkifccdS?;769BPZ__bbddcfefffddeec`bcccdefiihhfhhiiijjljjiiifhgihjjijihjiiikjkjmnljjjjiijlhhiikihlohfb`^cdf_`YVRNJJG>@@@><=>A@AACCBEEHHLU^^[ZY\\]]bly��zpoomokjjkmh^emmmjg_[Z^`[Z[]]]]YVV[\XWZYZ[YYXYXYXXWVWXZWWVUUXUSUUWWXUSSUWVUSRTVSQQQPOQQOPQMLOMKOOKLRONNPLPPQOMONMIKKGHIIHIJGHJJJJLKJFHHEG?&
+ Am{.
++_sutt>�����������������������������������ü���������������������������������ľ������ľ���ļ��Ŀ��¾���eQRPONMNNMMMD;..7DLPRRSQSQPQSQRQSRSPQPPQRQPNOPQQOONNQPQPOPPNPPPONPPONOOLOPOONNONNMMLMMMNQQMNOOOOOPOOOOOMNOPONNPOONPPOPNMMOMMOQNMNNMNLNMNNMLLOPNOOMONPLKMNNONNOMNOLMMNNLKKIKLJKLKKKKJIF;. R�������������~}}}}~~zn`>C����������������������������nL$!-+.TibnezhiisO$ :����������������nG)7\^ZTRQq�ϸ����}9A??>=<8F�����������ӓ`Z$$O�ÿ������P*8<<=>:.'$*.-''��#!i��z�}�����/=3123435+7./02466-XCm{F=4-����r"'%&(''(')**,,,-/0111233467;;=@CCCCENXD?AA=<><=@@?=>>>???;;>=>CCDA@<9997420)		(miKEV����v�����g2,E`x�}ywtqnjd]WWSRVX[_cimsysqrrsrtrqnsv{�rM.  "$''(*,-01238:;<@FIJKPOQHFGHFHIKLNQQRPRVXUVXVTTRUX[^]\]^`a`baabaadba][[[\]]^`^^^]^]]\]\^^[ZWWYZZ_bcceeeb_deffgffhjlopooqqqstsrrsutstwwwvxxuututvsqrppnmmhknnmnonoomoqnoppoommmnonmmnnoonnnnonpmnmpnklmmmmnkljjkkjjiinjifhTB768Mfefihedddceefcdfggghljijijlkkighhiiikjhjiihgfiljjijljjhjkikjlijkllnkkefhgbb_\^ced][]TMJHD?AA>:7985789668789:=@A>@AA?BEHFJX\\\]]^^_`cmy��{nlopnkjklkh_gmnkjf]\[^^]\\]_^\ZYVW\ZWY[[YZ\XWXXVWXUWZ[VTVUVVTTUTXYUSVTTVSTURRTRPOPONOQQOPOMNMNOMLNPROLOMMMOPLLMKGJLIHJIHHKJHIKJJKLIEFHDDB-"1YC5`wuvxN%�������������������������������������������¿��������������������������������������������������aOPPNNMMOMLH=305AKNPOPQRRRRNOQQRSSRPQQPQQRPQPPRQOOOPQRQRQPPPOORPPONPPNNNMLKLOMLNONMMKLNPPPPOQPOONOOOQPOONONNMMOONNMOOOONONOMMNNMMMMNNMMLMNNNNMNPONLMLNNNNNNMMLLNONLKLNNKKKKMMKLLLLLJG?5*S���������������~|}}~~}znb=>����������������������������mM%"+).Owstep^YVP<#!7���������������~oH)9\_ZVTUr�ӻ���ʃ8A?====9 A�����������љ3�H"#,I���������V)8;=B=1'$%.A8%2&��$$m�쐘���������0B3334446.5-/02457-XDenC>1(����v&*)'*+)+,,,-/02345667789:<<?ABFIIJIKPk\=CDA<=<?@A@?>>=<>@?>==?@BDB@;99:85321	
+={_NJX����w�����u9"!/Nf~��{wutqmha\WUTU[Z]`gmrvxxvxwyvvw{���y1#&')+.13689;=@BEGIJLQTXXXWXWRRSRTUTUYYX^\[XZYXXVVXWZ]^_b_abddeeebbbccbbb__^`bbdbcddcbbbeeaee_ccdcegjilopppnoqpqstuwttuuxwuutvvsssrsqrqqpqqnlnopoqqrsrqoppppqqqpqponpppoommnpomlnopomnmnonmjlmnnllmmoqnnooqronlkkjjkljigjhjilbZFAGTfhloqomklmmkkkmmlmmllikljjijljijjkkjjkihhkjllmlillkjjhfhgigbeghgaa]YZSOMPGJOONKJMJCCB><><8986555633543456789<???ABCBDEFHLX__^_``a`afnz��|ppponkklnmhbimmiif[]_^\^][[][Y\]ZY\\XYZYYX[XWVXVVXVYXZVUTWWSSVUSWXSRVRRVTSTTRTTSNNQPLPSNNQNLOQNLMMLONJLNMLMPLLKKLKJJJJIIIHHGHKKJKKJFHGFDE5"
+ ')
+ )Urzw{c/��������������������������������������������������������������������Ŀ��ý�������¾������������OOMNMMLMLIC8/0<GLPQPNQQPPQRONOQQPRROOPQRQPPPRQQPQPOPPOOQPOOOPQPOPPOOOOPNNNMMMLLNMMLNNLONNNNNNMNONPNNNPPPMMNNMNNNOOOONMNMMNMKNPMNNNOOMMLMMMMMONMONMLKLNMLLMMLKLKMNMNMNOMKJKKLMJJKLKJE>1%M���������������}}}�}znfC:{���������������������������oM& *+,+0-+$#&# $#!5������uorz�����rF'7aaZUSRn�ɣ���Ɉ9C@>?=<: ?�����������җn�|\TdH���������T(8:?M4.'%);B42&��% h�띫���������/G3322346-6+.02466,SB^[?=-,����q(,,+,..-./013466689:<;<>?ACCFGKMORUVUfhBACC@?=>??===>==?@>><:<>AD@?;99996520-	UqWMGU����z������A  &#"1Nh{�{yvsqmje_ZZXYZ[\_eovy{{zutr|����z*.02469<?@@BEILMNQTTVYYZ\ZVXTUWXZYYZ[[\^]YVY[ZYZ[[Z]_acedddefhgecabbbabcefefijjlmlnqpppqxstvruutuvvvuxwwuuruusrsqqoqsurpppopqpmlilokmqpmooopppqqrqssqrtrrqooopqpppqqpqpnoopomnlnnlklmkmnmhmmonmnnonnnlllmllkjiiijlmigfgjijjheacVY`cdiklnoomnmmlkmlkijmkjjmmmlnkhijkjijjhikhjkjihdbcd_][WWXUSRNKH?5+.658<><:9;FIGDEHDAA@=<<;898656543344444778:<>=?@@ADDCFHLW``^aaceacfp|��zppqqolkjlmielmlkkfX[`[Z^]\Z]ZZZ[[ZYYZZYY\XYWXVYXTVVXWXWUSVWQQWTSWWTSTRSVVQOTSQSSMMQPLPRJKNNNOMLMLLMMONNNOOLQNMHJMJHJJGGIHGFIIIHIIGHFEDEDB6'
+"Cmrxyv�����������������������������������������������������������ſ�������ż��ü��¿�������¿�������qFLLKKLLKD801<FKOQQQQRRRPORRPPPRPPOQSOPQSRPNPOPQPNPPQOONPQPPPPPOQRONNPPQPONLLMMNONONNNNPNNMMNLKNOOOLNNMOPNOOMNOMLNOONOONNMNNMNPNNMNNMLLLOMKMNMOOPNMNNKNNMMMMLMLKMNMNNNJJLLKKKMKKJIHD:.I���������������~{~���~|qfF<v������������©�������������oO&*,-7VL249RIF:$$!)�����u^TPVZg����tK*3\c\UUNm�Ƣ�����;BA@>=<9 ;�����������ё$v�����k�Ŀ������V)8=AN2-)+6IC2 .!��$"d�뤪���������1F0334336-7/022786,VJ}p?>0*����u../..000013456788;<=@@BCDGHIKLNRWZ[[^_aTACCB@@=>>??>@@?======;>ACAA=:98:76311&"clWNH]����{������L(-- !!'7Qj���}|xvtqlid_XZZ^_bdmqyzofXp���À468:;=?BDGIJLNPPSTWVZ[Z[]\YZXYZ[]]]_^_`_^\^^__]_aacadghijijkjjkgggeacikloosutvvwwwzz|~~�|zyxvvxvvtsussropoponolkmmlnollmlknpooqpqonprpoonnpppnprsrpqsrpqoppopnoqnopnnmmnmnnnllnnonnnnnnlljjkhikkjklkjkkkkkigggedjhkjjkihigifebcgjilkjjijjjilkikkmlmnnnmnmkiffeededdgeccca^^Z[]VVQMNMOLJH@;96421& .16:<;96=HHFDEGBAC@A@<;=:7786443354455668<>>=@AAA@CDDGLXd`_bacebefu|��woponnmmmmke`mnjjjdZ[_\[^[\[\[^ZYYYXZ[YWZZWYXWUYYWVVVYYYUSTVUSSSVTUWTRRSUUQNRSPQQOPPMMPQOLJNPMMNMKKMMPPOKMMLJMKIJJGHJKGGIHGEFIGGHHDFGDCCDB8)	
+ !:`zut�������������������������������������¾�������������������������������������������������������OHLKKJIF?4.6BIPQQQPPQTQRRQQTTTQQQPPNOPQRQPNMPPONNLNNOONMOQONMNOOPQNONOOPOONNOONNMMOOMMOPNNNONMMMMMNMOOLMMMMMNNOOMLOOLNNLNNNONMMMNMNMNNMKLMMONNMMNNLLNMNLKMMLLONKLLKKKJHIKJKLMLLKJF@2(L���������������~}~�~~|qe?7m�����������ı��������������pO',.-=�vacnebV##!#����tYRTf��{����pM,/]k\VTLe���MnYzhAB@>=><:!8�����������͡��̾�Ĳ���������Z*;?FN3..6@LD9&#��'$d�뜡���������3E1255424,:20257892VGuq?<1'����t//211222347899:<>??BDEFGKLLOQRTUZ[]_adieQFDCBA????@@?>?<<??>=<=@CCB>?:8964320+	 	8xpUOI\����|������T58,""$'(*9Rfy��~|zxusqnicedegkkpo^ZV}��îO9<>>?DGGIKOOOQSUXXYZ\]][^``[\^\^`abcb^_aceefhhhkpmopsstuusssomsomlf_glttyxv}zxxwwxyyxyxyyzwvurssrsronqooqmqpooppppooonqonpponoppqppnlnnpoppppqqqqqqpqpqpponnnmmmnommmkkloqmlmmmkikjklimkjkifhhilmmlmljjjjjgfgfghgkminjgillhdddbdgheijlnnlljjjjklkikjhffgfiedcbb^ab^^]\\YWWRPTQSUQNLHHIJKGC<98610-$/247;:96>GDFEDCCCB@A@>>><9887344464355469;>=?>?A@@CDDEK]e`accefdfgt}��wlonmljjlljfcllkjjb[Z\^][[\XU[]ZYXZY[[ZYXZWXVVVXXXVUUYYUVUSTVRORWRRWTSTTTTSQQQOOOQQNLOMMOMIMNMLMKJKMMPPNKKLNKKJJJJHHGHHEEEEDEGIHHIGGHFECFE:*#
+%Tsw���������������������������������������������������������������������������¾��¿�������¿���zBJIHHG@306@JMPTRPQRRRSRRTRPTSRQOQQQOPQQPOPPOPNNONNNNOPONOPOONOMMMPNPNNNNMNONNONNMMNMMONONNNOOONNNNNNPQNMLMMMMNNPMLNOMNOOMNNMMMMLNLKMNNLMMMMNNMLNOMMLMLMKLNMLMLMMLJKJIHIJKIJKMKJHF=1"E��������������||~~}~}�}pdE6\|����������£��������������sN),/-)S`M9KRB>:$&" &����`UV`���v����pN..^g[URPUbUZ~tCLEB@=>?<9!1�����������������ÿ�����������\*==@P027A?JE8��)!e�유���������.D3344412*>11247982QExr89/*����r2254356679::=>?@BDEDIJKLOQSVWYZ[]_`ddgig^MDB@@>>>>@???@?>=?><===BBA@?=;9763240*"
+DdSOKBT����������]F<# !!!!" !$$%'+,+,/6C[r���~|{zxwuqoqqrurbWl�����7?ADCEJOOQRSWVTXZZ]^]_ceegelhhkkqrqnptttuuuvvvxzy|zxwvvyyxxuuutspnidbcclpsvwxvsutsvxyxwvvwwvvtqrqrrrroqqqrmolmlmpponoqopoopopoooqqqroonmnpqqpnnppmknnqpqpoppoommnonmlllklmlhgiijjijkjlkllljihkkjkjjikkjljjmllmmmllqoiggillkeb[_bbdeeheedeeec_b_^__[YUTQOZdifdeigcdfa\\[YVVXQPSTTUPNLGFHJJFC<:8610.%!0358:<:6=EBCCBAAA@BABBB?><:665333654555689;<><=>=>AAADH_hebdfegefjs~��rmqnmmlllnkggmmkggb[XZ^`\^\XVZ[ZYVYZ[WX[XXXXSWXVUVVTTXXSTURTUSSUUTTUTRSTRQTROQONNQQLKNMMMIJMNMNLLLLNONKKLJILJHILHIIHGIJGGFFGEFHHGIIFEFDCFF=,$ !Ag��������������������������������������������¿������������������������������������������ý���a9DAA=6/1;GKNQQQPOPQSRQQRSRQSQOQQROQPRQQPQPPOOPQROONQPOPPNNMLLPONMOMNNMMKLNNOOMMMMMLMNLLMNNNNNONOMLMLPPMNLMNPNMOONONNNNNMNNMMMKMNMLMNOOLMLLMMNNLMNNONLLMLLLMMMKJKKJJIIJKJJIJIIJGA8-@�������������~~}}|~�~qfG/Oo����������ź��������������tO(/93*&&#%*!(,+)3.#}���[S\����v�p��pO.+Z]WTQOI@\�l{tHKFC@@?=;5$/��������������ƺ��������������a)9;@M;@9/+HB3~�)#b����������1C0234303+81001478/KA{l>=/#����s335678:9:<<=?@BDGIIHMNMPTWYZZ^]_bbcfhghgdZFCA@?>>@???>>>==><:;<=?@ABA=;;663443/$("	
+@@MNJ:O���������jyeP8   !!"""#$$%&&'))*+,./0/..0/;Qg{�����~~~���}v�����Z:EHKLPQ[\\^ceedfhkklmopqsuuwwuwuvxxwvwwvuvuuutuwxwwxuvxxwwvttutpqokkfginsvtutsrtssuxywvuuvtssqqsrqqqqnpooonononopprooqooppplmlmmlknnlmnmmlnnonopqrrnooqpnmonmmkllmljmmlkkllkhlihiiikkkhhhiiklmmklolmlmnnmllnjlljhgkidcjkkifaVBJZaab`\ZWSPOLFA>;>CFHBFFGCCYinfikighib[``]XWXVUSSSSQOLIFGIJHD>;850/.$#14899<:6=CBA@ABBBADABCC>>>956644335554568:<==>==???ABEL\ceedfegegmt}��qnpppommlmkehkmlghbXZZZ_^^XZ[YXYYUXXXTWXUWXURUXVTTTTUWVQRRRSSSSTTTRPRQQPOMOQPPONNMQMLLNNKIMMMONILNKNOMJJJIIJJHHKJJIKIHHIHFDHEDEHIGFDBDDCDEA3&"4��������������������������������������������½�����������������������������������������������O'234/,5BFKKNOMMPOPRQPQQQSPPRSQRRQNQPQQQPPOOONOPPONMONOPQNMMNMNOMMLMOLMKKMNNNNNNMMMLMNLMMOOMNNNMONNNNONOOMOOONNOONOOONNMMPOLLLLNNNMMLMNNMMNOMLNLLNMNNLLLKLMMMLKKJJIJIIIJIIIIIGE=2';������������~�~~~}}~��tiE1Vp����������ý��������������sN&9haTH@@[dL]a]_ia){���TX�����tc\��pO,+X^VSONMXhYTXSYRIC?@?<:6#0������������������������������e#4;=IGD1((A<(|�+ c���������4D0235436-522/1566-OO�i@:3(����s66899<==>?@BBEGILNNPSUTWX\^]_`bbeeffghjjieWFCBA@><<>>>>?@@><;<<<@A@?@?=;87664622
+	&.)#!"#.69CNAZ����lL��pY{[U.!!"!$$%&'&')*,,-.//0241433334631/27A[n{����������������QSZ^`bgimpswwyvttwxywvxwxxxvvwvuttutuussqttsrssqsrrtsruvvvvvvvwxwuronmnpuy|yurpoopqqtuuuutttrqsqtqppponnnlllllnnmkkklmnjmnmmklmooomoppqqqropoooonkjkimlnllnmlkllollkjkjhjjijjiljhkkkkkllllmlmmnlklmkllliiihegc_bca_cehhikgccaZVX`ihfbOGJIIF>933335;BFFEFFCFSkijjkjhfeca__^YWYXQSUSQPKHFHIJIF@;9610.#%14698:929@@?>@?=?@A>ABA<>>967644345654447:>?==???BABDGL]ddeedfgggov}�smpppnnmlkiejillijb[]\W\^\XYYXVXYVVXYWWXWVUUXTUXVSSRVUVTRPRSRQPQRTQPQOOQRQOPRNNOOLKMKLOMKKNLLNNJKNMMNNLKIIJHJLIHLIGJIECFGGDFIGEHIFEEAACBBBA6%$�������������������������������������������������������������������������ľ��¾���¼������Ĺ�/$$#$"",37:?CGKMNLLONOOOPPPQPQPPPQOPQQPQQONOPPPPNNNNNNOMPNNNNNLNMMKLNLMMMMLNNONNMLNNNMOLLLMNLLLLMMNNOMNNLMOMLKMMMLMNNNMMNMMLMLKLLMMLKMMMMLMMMLNMMNLMMLLLLJMNMMLJIIKKIHHIHHGGHD9-!9��������������~~}}}~��qhE0Sg��������ú����������������rO&<x~ch\eym}oOMOK&v���Ys�i���MPl��rR+)U[TQNMM�����~�dID?@<><7 .����������������������¾������f4<;@E4,%$$"|�*#\�𖇗��������0H2/23013*71./1469-LPfW@80+����q<;:=<>@ABCEHJKNNQSUXZZ[[[^`baabbeihghillkkfPADB??=<>??@@>==<<;==?BA@@?<=:7895552)	86/**,$=.2>?%c���z3F��OA<[P((((*+,-.1225698:=>?@A?AFEHGGHJKNOOPHS\euy������������ucfhhlooqrstttuutuuuvwtsrsrqqttqrqpqqqssqpssrstsqrrrststuwvwwxvuxvssppqrnmquzxtsrrrqqrqpsqpnmmlonmmmnnkmonklknlopnrnopoqrqqpprrtspnnnonoqljjmllnmmkjnklnnjlljkjklkjklmlkmnlklnjlmljikjjiljkjjljkjhgigfhjhgedefggehihikmmlomlmlqqonrllifPFIIHE?:52104:BDDFFFDCJfmiklhehfa_a`YXZXSWXVROKHGHJJIF?<:600."%135889727<=====<=>??CC@>?>:87532335644646:====>>>@?AEGLdgdceffffhov}��pmpoononmmlfikkjig`Z\\Y\\ZZZZZYVWWYWXXUWWVTTXRRWUSSSSRUUSOQSRPQQQQQQOOPSTPMOPLMONLIKKLLLLILLLKNNKKLMJKMIHIGFHLJIHHFGGFDCFHECFGEFGDBCABCCB@@8$�����������������������������������������������½������¾����������������¼��½��þ�������êI! !!"#'+.37>BHIKLKLNOOOPNNOPOPQQOPOMOPPQQPOOPONNONNNNOQONNLNMOPOONMNLMNNMNONNLLNONNMNNMLKLNLJLMLKLLNMMLLLMLMNMLNNMNMLLLKKKKKMKJLLLNNMLMMMKKLKKLLNNMIKNMKKJIJJIHHHHGHHE@6)6�������������}{{~��~qhG(0=��������������������������vN"0OPJ12/.1.G4%&)&!s���o�nJj��MY~��sT-)X_XROLMri{swc`UID??<><6 )������������ƽ����������������j 5<::;7.&!x�,%]�������������/C4222213*70//2454+NC?><90&����u<<<>@BCEGILMOPQSUWYZ\]]^`aacbefegkkkkknnlli`K@A?>=>>>@@@=<=<?<<=?AA????<99::86441
+#<<:63;2.0-2!/k���T;E��0#5G82657=ACAFIORVWVWYYZ\^`^a`acbdedddggdf_^ecekq{��������okmlkmpqrstrsuttusrtuvvtruutuuustrssrrsstswuutvtrsrrqsrrsvuwwwxvwsrqprrrhcgjllmopprtsssrtrsroppoopqqprqpmmnpnponmlmnmnllmikmlkmooomnopnnoonnkmjkkjiiihklmmmlklklmnmnnlmkllkjiihkkjkkkkikiljjkijlllmnllnrmnprnlosrrooppoptqpopnnoopnpoijkZGHIKFA;863338=BFGGGDCH\fmkhikfbdca\[YWY[XUSNJHFIJIHEB=:61/-"&25679:8369:;<99;><>AC@?A>;:964333355346569;:====??=BFGPfidbefghfimw}��rnlopmnonnmehkmijk`Z[Y\\VX[TUWYTUWZVVWTRVVSRSRRWURTTSSTTTPPRRONROMQRNMNQQOMPOLOPOOKJMNKKLJKKLJLNJIJJIJLIGHIHFIKIFGHFDDCBEGDADEECEFBBCDCBBCA:&'!��������������������������������������������������������������������¿����������������������;"&+,/89<?BGLPPOMOONMNNMNNOPPPOOPPOPONOOPNLNPOOPNMMMNOPOMLMMNMMNOONMMLMONMLMKONLLONLLLKKJLNMLJJKLKLLNMLLLMMLKJJKLLLNMKLMKNMLMLKLLLLLLMLMLKKJLMKKJIJIHHKJGEFD;1"1��������������~|}}~|qfI$'8��������������������������vQ!(20(+F@(-116,#$! q���oYPV]UYt�rV/)YaYRMKHIBIEMCCHHD?><:96 (�����������̾�����������������n!6:<8653+'#! u�.$]����������Ľ�6F3442114*70014576-MA<<;82)����t<>@AEEFHLNOQTVWZ[\^^_`abccfghjjjklklmlmooool]CA=?>>??A?@><><>>>>@A@CA@A=;;;988352.)>DB<G1%5B173735F{��xc_b�jFILQPTYYbbahirnswsqnqqnnqpqpqrrpqoqomqpomiiihhhilpussxzwrqnonmpsrqprussttuuutvwvsrtttstutstutrstutquuvstsrrtsqqrpqssutvvsuvusrqokhhillihillolnljlompnnnnmlmoommnljiilkiklllmkikklmmommonppnnljmmljigjijgilkkklkmpopnlklkkjjjjjkmkknllklkonnopppooqspvrtsrtspqstqwutrstqpsttnqtpnpsomoonnopolqnjlnp]OGJHGC@;75126<?CFFFHFDUhhikjfdjb``_Y[][WSQNJHGHJJIE@<:510-")46669:835889;879<;>CA=?A=;8864223444445579;:>==>??@AAEQehdbdfggfhmw}�~snnqpooonmlikknkjj`ZWV[YSV[VTVWTWYYXVVVSRUUSSTTUUVSRTVRNRPOPOLMRNMQQPPNONLNPMOONOOIHNNKLMJJKKKLKKJIIKKIIIIJJFGIFBEGFCDECFEDCCFEBCGDADEA@B@<;-$*������������������������������������������������������������������������������������ü������@ !"%()-3<@GIILNKKKKLMNOONMNOONOONOONMNPPPMMMNNNNMNMMMLLOMMMNONNNLMNMNMLLNONMOMMNLKJJMOMKKKLKKKKMLJKLNMMNLLKLMLNNMLLLNMKLNNONLMMLLMLLLKKJIIIHGHHGHKIGD@7,2�������������~~||~}sdK 5�������Ǹ�����������������wQ"%1-,>vjhahpm@$" l����qd`dky����sW0$VZTQMKIHGHIHEFHGEAA><;7(�������������¿���������������p4:<84,))'#t�/$b����������ý�9L3453125-74454456/NB<<:70%����s<BDFJKMOQSVXY[]]_acbccdefhjjkmmlnnnmnppqqrqqmWC@><;??@>>>>=;=?>=@@ACD@@?<;::876541'$),6DIQTP8<ELZCCGASSZe����zw��decffegikhkmmrptutssrrusrusqrqsstnmomnnoqopmlmmnnoonprrqnqoppqssqrpqttstsuvutvuvurrrqrprqqqqqqsrstqsqsqqroqrrqpooqsssrtsuvvvrqnkklnmnolloporsrpprqoomnnnokmnonnmmoonnnpmonpronnlljlmlmnlnklnljllmmnponolmnmkiijifhefifhjjjjlknnqqqstrssuruvyttvvuxywtuyuqvwrrwwotusorttopstprtqqrtromnoonpqpnpoljmmgVIIIHGB?:52258=BFGGHGEN]gnidegcccd]][]WSNNKGGHJKIE?;8410+!-6676::63445688779:=??>@>=<8763224345555689;<==<<=??@AESfgdedeggghrx|�~omoroooomlnjklljji\VYYYYXTWVURUWVUVYVSUUQSTSQRTSTURQSTPMQQNMNMNNOPOPPPNOLIMMKMMPNLHGILLKKKKIGJJHIJGHKLHIJJIIGFFFFFFGEDFGFCBCCFECBDD?BCA@BA>=6#% �������������������������������������������������½�����������û������¿�����������¿��ÿ���D  !"$%')-5=?CHLNPPNNMMMONLLMLMMOMMMPPNNMMMNMLNONMLLMMMLLMLMLKKNMKLNMLNPOONMNKKMLMMLLLKLLLKKKKKLMNNOMKKLKKLLMONJLNMMMLMONMPMKLKLMMJHIIHHGHGHIGHHD=3&+��������������}~}|�{reK /y�������������������������xR$#/,,5ZFSIJUQ3# g������z}������rW1'X_SRNLLIGGIFCFGFFCA>=;6%������������������������������w5;;91*&" v�6&a����������ľ�<N4213135.567643350NC;;;82*����s<EHKPRVX[\]^__bddeghgfhjjknpqoooqqprsttrtuutsmN?>=<>?>>?>=?==??>@@CAC@BA=<:;;97443349@AFIPZ^_SWZZ^SQ[Xabbei�������nfjhihiikjlkjpptustussusqrqqqqpqpooooonoqpoommmmnnoopqqqppprrooqqpqrststsssrrsrssqqrrrrrrqqrrttuuvsuvssssprrqqrrrstvxtvtsqpqoqropmlhcfgkmmlmrpoonpqqlppnonlkjmlmkmljkjihnlhjkmlkmlopnoqqqqqqoonoqqoomjjhfhfecgghglmpprrutuvutuvyuxyyvvwxsxwxuwxuruwtuvwsrsurswuqtusrustrqrvtrrqqrsssqoqqmoqrkprnknqog[QIHIIF?<74346;@DFGGGFJZjiedfffccccZ[VQOMLGGIMLLF><8410,!-7687;:623246776568;:?>><;:7663344345547779;<<<;:<>?ACGUhhcfeeiihkuz��}onpqnmnonmljllmnkk]XZYVXXTUUURUXUSUWTQTUPQSTSQSSQRSTQRRONOPLKNNIMQLLMNNNMKLNMKLOLJIIFIMIFIKHGIIJHIGGILKHGJEHIHEGHDEGEADGDAABBDFEB@A@AACA>>=;7!
+#%���������������������������������������������½��ý�����������������������������������������E  ! #%)+/6;>EHKLNNMMMKKJKMMMNNNNNKLKLNLNNMLNNLMOMMLLMLLKJKLMMLLLNNNMKMMLOLMMLLLLMMMLLJKKJJLLNKJJKLLLKLLMLKLMMLKKKJLNKKLJJKJHFHIHIIIHIHFDB:-!(������������~~�~}}��zqfL-o���������¾��������������xS#"1.**.&&&)'%"!# ^���������������v\0*T[RQMLKJHGGECGIJHDA?=:7)}�����������������������������y*7;;5.)&# $v�5"]�������������;O4432136-978:76662KC<;<;6+����qEORW]\^dfcdegghhkilmnnpoprtvwsuvy{wxy{{|||~��lE;>???>===>@>>?A@?AA@CBAA?=::;864444:ELPRSVWWZ[]^^abclhdcdffp������yejfjliinlknnnpqrrsrqsssssqqqpqrrrqpqppopprppqppqqrrqrsturrsssutrstustutuuuuussrssrsussssstrrqttpqsssqrqqopnpqpqqrmnonqpnqqqqqpnhhjgijlmoopoqpnmnomqmmlknmlkkljlmoooonnprpnpoonqpprqoqponqrqonprstprsrstqutvttuzwxuyyxvyvyywuwyxtvwzvuwyvwwwvxvxusxvwywvutuuwvwuutruwsrsusrsqprsqqsroqqrnqsshqqomopolkaTKHJHFD@;52237=BGGJHGETglddfe`ceb\]WRMLIFEJLJJE>=9400- .6558::60102352344699<<=;:8665233334433777:;::<;:=>=?CFTjfbedfhihksy��|tqsqnmppnllklmnlki\YZXUVUTUUTSTVTTTXVVTUTSSUTPQSOOQRMMPNKKLKJLMLNNMMMNNMOLIMLKJKGGHHGILJIIIHGHIKHIIGFIKFEJGFGFCFHEEFCABEDBACCCCDB??A??A?:;<96%
+#$����������������������������������������������������������������������������������ľ������ĶL !!"'*04>EDDJIIJJKMMLMMONJLMMNNMLLMMLLKLKNMMNNMLLLLKLMMLMOOLMMNLMMLLKLLLMNMMMKLLLLLKNLLKKLKKILMLMMMLKJKKIIKJIKKIHIIIFGGGGIIHGHFB7*(������������}�|}}��{pcI,l�������������������������yY$"2.*)&"!$'%! "$"^���������������t`3&V`SSRNLJGGHFEIIHFC?><:8&+}�����������������������������}E<::5/*'$!$.t�8"\�������������?J7554355/?9:995975&RE?>>B@:x~~zk]acellmqrtvwvxxy}z|}||}������������������y_@>>>@?>=??A?@AAAA?@ADBDA>=<;;:655358DPZYZ\[[^`abcedglheedfegbnt���vkhiimnorrrrrttuuturrsuuvttutstttsqrpooopoqpnppoqrtqppqrsqppqrrrrqrsnrrqsqqporsqpqrqqpqqqppqqprponooqpoqrprrtsrrstppqpqrrssrnlnpopoqpppqqoprrqqrtvtwtroptsprooppqsstsprsqpqqqqqsrqtsvutusvyxwuwyxzzyyz{|yyzywxx~yzxyxxuwvzzzvyyvuvwzywxzvyuxzyvxxvw{zxuvwuuwzvuvvpsuvtrsusrturssrrrsssrrqsptnsopptnnsmiiXGHIHHGD=61025:AFJJKIGLhgdfgbdbbd_XPLLFFFJJJJFA?9212,18558;:5/100141244389:;<;:8674331124333369:::;<;;<:==CGWjdbdgiihhiuz��|vprpnoonllkjmmmmkf[[YXWWTWYURTRPSVPRVVRUTRQTRPOQPOPPOOMMLKMNMJMNMKMMKJKMNLJKKMLKIGFFIJJKJHHGGGHJGHJEGKJGEHGEEFFGGFGEDECDDC?CEA@CA=>A?=?A<<=;9'
+$'$�����������������������������������������������������������������������������þ���ļ������ŸR !#%',/6@EFLPMLOOOMNMKLJKLKKIJLKKJKLMOMMMMOOMKLNMMMNNOOMKMNLLLKLLMOMLLMLKLNMKLIKLJKLJKLKKLLKJLKJJJKKIKJGIHFGFGHGGGGHFFD>3&%������������~~z}}}}�~qbN-c�������������������������z[& 0+)($!!"&%#  #"U���������������v`3"U^Ta\XSKEGHIGHHECB>;;99/)y�����������������������������}k?:=:1*'$" !(6$t�3!&/+*7d�������������F/T?26:7:?@6J=@BGJIPQRpfh]]hpqz}�������������������������������������~}~|}}||}|uU???@@??=>??@??@>@BA?AB@>?>>:7666366=MZ]]___`cdddfdeefgjiggijmonkojkjkjllloqmmprrpooqpnnppqqqoooopnnnmmmoppnpoopoooqpppooopqpqqqppqqpsqqsstsqpoqpqsusprrqqppoqqqroononprqopnnonkpmonllmnppnprroqtuusqolmknnlljmlnnr�yusqrrqqtpqsqttwuunxxvvxzwuxzyx{}|xz{xy{{zxy{yy{{yxvzyyz|xyxzxxwzzzwzz|{{zzvvyyuy{zxxyytxzwsvxwt{zvvuutvvxvvsutyuwvtsvvursturuursuspqrrnrrrmnrpkmqmkkj[MFHJJHD@41013:EOKHIIFJYgdfeebcf^VNJIFEGHJJJGC?8211,19998:84132012124224889:977764331124444467899<<:;9;<=@HWjcaegkjjhkszzsornmonmnnljlllklfVYXXYWRVWQRUSQSRPPTTRRSSQOPOOPRSPNOPJKKKLLJGLMJLPNIILMMMKJKKKHHHECGIGJIHFHHFFIGGGFJIEGEDFHEEGEDEECCBBCBCBABAAB@?>>A><>==<=9)
+&*' ��������������������������������������������������������������������������������������������V!%))*16?HIOPQRMMKKKKJHIKJKKLLLKMNMLKLNLLLLMMLLMNMJMMMNLJLJMOMMMNMKLMLJKJKLKKKKLMLNNLKKKLKKLLKKJIHGGGGFGGFGGIGFD9-#��������������~z{}||�|reK1`�����������ÿ������������xZ% 0+)&%"!#$$$ !%$!R���������������wb5$V_YYY\WRJKKIFGFEDB?;:897'.{�����������������������������sJ8?D;-)&+(!+149<|�@4Redpw{zwr���Ⱦ���������pnteacjnstutzpssrsvyv{�~�~��������������������������������~�����}|}|}{{{|{{z{nK?>?@??>=>???>>@@@@>AA@?@??<7678754:PT]`cd`cdcdddcdbdfghghhiijmkkjkhjkkmnnnmpqqqpqrrrqqpqrrqpooooonopqknpqppnnrqpnmnppnopqqoopqrpsqpssssrtrssprqrturosstrqsstrrtrsrrrsttrqrqprpttstpqrqsrtrttrtvutonlllmrrrqppnmnnxz{xtxywwy{xwy||z|}}y|~}z|{ywy||z}}{y{{yyz|{zz{x{{zz|yz||{|zzyz{{ywyzwyz{yxxzuwxyrzz{yx{zxy{xwxyxyyxyvutwwuwxvqvwwsuvtrvxsqstrpsusstsootrnoqtqqrrnnomomnldPIIJIHG?31104:DHGGJJEAR\cffbbd_TLJJIHHJJJIEA=8200,2989;;:4011/11.02013457876554431113455356769898898:;<>DZmbbgejjjims{��yolplmonnnmkjljkjkeSVVWXXUTSQQTTSQPRRQSROORRLPQMKNOLLPPIHJLJJJKLKJLMMIILJKKIIKKHFEHFEHIIKIHHHHGCGIGEFIGEFFBDFDDECCDEDC@CCABC@@AC@>?<<@>==<9;=</$'&��������������������������������������������������������������������������������¾����������d!#&),39<CIHIKNNILMKKKLKJJNPNLLLLKLLJIIKKMMKKKKMLKJLMNNLMNMLMLJJJJJJKKKKLMNKKLLLJKLKLKJKKHGJHEFFGFFGGGGD>3%"���������������{{|~~}~raJ2\�������������������������w[' -**'$$ "#$$#"$#!L���������������yb: $[aUNKOMOMKIGEBCEAA?<=:753>w�����������������������������a:OEJOB22=XYC@ACJW]cmr��t}�������������������������|����������}}}}}����������������������~~~~|z��������������~{{~|{|{{|{{xvtfG>?>>>>>??>>?>AABABB@@??=<;:9898969NYaehiffeddfeegfgiklkkmklmllklllmjjnonnnonpqqrqqssqsspnpqpqrsqqrrrprsrtussuussttstuuuttusuttttvvrtutsuvutvwtssuuuvvursuuussttutvtswvutwuvvwxzxzywxuutvsstuvvwvprvwuvz|xyyyx{~{y}}{|}}z{~{v{~}y{}|z|}~z{zxvz|}z{{~{{{{{|||}{{{~zx|~|x{|zy{{wyzzvxzxy|{yzxzx|y{xyxzzzyy{xyxzyxv|wvxvurvxruxvqtwvstuttvvrrsussqqttssqqqqqpputsorqlmqpmlomcUHHIIHD921134;AFHIJJHFEaegdba^QKJKHGHJJHIDA=:753- 3879;;:4-///00.//01345645764232133335534588889:9:;:9:?C\ldcfehijinu|��xnlolklnnmmnjjlllheSUWWTVVQQSQPRSNRSQORQNNQPILNLMNOLMMOLIILLJJKKJKLKJHFIHJKKLKJIGFGIHGHIEFHEFFIFFHECDHGEGEFEFEDCDCCDDB@CCABA?AAA=<>?????=;;<<;5
+'*$&$���������������������������������������������¿������������������������ú���������������¾��e !"$&),3=EDILKKNKLKKMMKLLLLLKJKJIJJLLJJJJKIIKLMMLKKLMMLLKMKHJLMKKLKLNMMNLJJKLLLJJJIIGGHFGFFFGFGEF>1 "���������������~}~~}~�~qbI!6Z��������ɿ���������������{\($.,-($# "%&$$$$$" L���������������wbC++5^bXOLIKKJGEB<=<9??A9:>?<N_}���������������������������xyz~��|{xtvy}�����������������������������������������������������~������������������}�����~~�|��������������������~|||yxzzxwzz{ywz`E?==>>??=<?AAA@BAA@?@CA?=:;?AJD@?=K[diljillhhigfihimmnpnoopqqrnqrpqtrsttutssvuuwwyxwwwvvvvuuvyywutwutvwvpvvuvusuutsvuuuuuuturuvvvttstrsvtstvvsttuwwwxxuuvtuttstsrstnpprqkpvuvwyxy|xxwxz|zz}{yz~}vx}|y|��z{~{x||z}~|~�~z{~|y|}}}}|||{z}}|{||||~}{}zz{|||||zz|~{yy|zxz}yyx{zzyzxxwzz{yy{|wy{{yyxwtw{zvzzxwwzywx|wwvxuuvxtywuvxvwvutvxvtuvrstuqpvwqrtsoorroqtroqolnqonpppjk]LFHHHA8422248=CGHJKKFBWdib^\QNKKHHHKLIIE@:96771#4:89;=<5//1//.--///.1451344123234433233547::9;>99;;;;ACbiegifhihhlt{��xpopmlnonlonkkllkkcSUWVTTSOPSRPRRRSSRQQQNMOMJLLNMMNMNNMLIILKJKKJHIJKIIGIIKIJMKHHGEDGFDFHFFFEDDGHGFDDAEGEDFFCBEBADDBBCAAAAAB@AB???>=>?=<=;9:<:95&
+)1&$=?51���������������������������������������������¼���������������������������������������������q"$'*/4:DIOLMMMONMLMKJIIJKKKJLJIJJJJJKLKLLILMMLLJKKLLJKJJLLKLMMLLMLMLKJKLKHIFEEGHFFGFFE@9+ ��������������~|}}~}~~r`G'@N��������ɼ��������������~|M#'+,5..)'$#$'((&%''&" #?�������������{{ufQLS^iaXJIBCCC:9<:GIP[_b`fiq~������������������������������������������������������������~~����~�������������}|}|~~~~~��}}{{||~��������������������������������������~}||{{{{{{zyxyxyxywvxq[B@>A??>@>>A@AAB>???@BA>ANZddbRGCJTcknqnlmmllkijmlnpqqsqrrrrruqqrqrrutstuuuuvwwuvwvuxxvvwvwvwvuwwuwwxxyxuxyxwxxwwvxyvxyxzxy{y|zxxy{zzxxxwwyz{xyyx{zyxxxtuxxvuyytrxzwv|{xtxz~zy|~z~|z{}}z}}~{}~|z|}}z}��||z{}}||}~~~zz{{{z|~{}||y||w|}z{{|{{}|zz||{|{{{{}}{{z{{|y{{{z{z{yzy|vz{zvzz{xxzzxyxyxyz{y{xxyyxx{zyyyvwxxuuwwttxytuxurvwustupputsqvvortupopspqrsrpooqponropnnicPMIJF?7322215:=DGIMLFCQif\ZOMKKIJHJKKHF@97445/$5<:;=>=5001/-.../.-.1332332341133123112438989;:8:;:;;>Caiffkijkkimv|�vkpnmnmlnoonklkmijaOTURTSQQQSTQQRSTPRUPNMNMKLKLNLKKMOMIJIFIJGIKJGGJKIHHIKKHHJHGFFEDEFFEFHFCCFEFFGEBEBFIGDEEDBBCBEDBBBACA?CA>?@>>>==<=<;::98;<86)#.%#HVO��������������������������������������������������������������������������������������������{"&'(-1:AIMOMJJLKJJKJIIKJJKIIKKLLLLLILKJJKLLJLLKJKKLLLKKMKMLJMKJIJJHGIFFEDFGHHGC<2#}������������|}~~~�~��t\F2M[x����������������������zwiD@S\Y;'+)**&%'&(&-6AGCO_i����������xvpntxtsw��{qqopkkijdhimstswz~~���������������������}�������������������������������������~~~��~}~������~}~z|~}{||z|{~~|}~������������������������������������������}�~|~|{y{{wxwwyywxwvvvwoR=@B???A?=?@C@@??>@ABDQgvrtqgebYcmrwxvonnmllkmkllnomnqrsutsstssuttuyzxyz{z{zx|~{{z|}|{|||~����}||}|}}~~{}}{|{|{{}{|{|{yzz{{{}|{yxzxxxzuxxzwvw{xw{|zxx~{z}}~z~zz~~}|�~}z|~}|||}|~|{}}|~�{~~{}~~~}~~~{y~{|}~}}~z|~{z~}{z}~{|||z{~z|{{{|}|}}||||||}|}|~~{{|{y||wyz}z{z{xzyzvzyyz}y{xzyy{{xy|{y{{vvy{xvx{ywxzusuxuuvwuvvwsuwutvuttssuuvttsttssstqpmrsspstqmpronpqikoniihXLFFE>62133458;AHJKLKGG_e]QOJIHKIJKKJE@:6223,$6:99<=<4030120/.//./1112330242221221032348899889<:9;;>Gemigjiimjjnw|��tgpmmmmlnoonmllljkaTSTSSPPSNNQPMOPPOPQNMKNMLNMONLLLKMMKHJGHIJHGHHHJJGDGIIIIIIHHHFFEEFFDDFDBCFEEDECADDGFFEBCDC@ACBBBA>?AA@??==?@>=>=9;=989968;66*
+ +#&Ri��������������������������������������������������������¹����������������������������������� #'-16;CHMOJMKHIKIJKJJILKLIKJKMIKKKKJKKJIJKKKKIIKKKKJKJKJIHFGHFFGFGGFED:-{������������}{~~||~}�~p]E!Onv��������ú�����������mmns}����jae^]`aeimouy}�����������������������������������������������������������������������������������������������������������}}{z}}}}|yzzy}}�~}~��~�~~}~~~�~��������������������������~��~���������}||{{{xy{z|ywyzywvxxxywnL@>=?>=>>?@@???DEEO_ivvwsomopqoru{}||xtsqsrqurtuuvvwyz|}~|yyyz{{{|}||��~~�~��~}|~����������{{{{|}zyy{xxzxwwxyyuuwxvuxwuz|yx{{|z|~|}{}|||}{}�}}|}~~~~~�~}}}{}��}||}{{}~|}||~~{|~|y~~}{z~|}~{}}}{{{{}�}~~z}}||�{||~~}}~|||}}|}}|}~{y|~}}|}|||}{{{~~zz|zvy{{zz{z}{{|}yxy|y{{|y{xyxxzzwy|zwz{wvz{wuwyxwxzstwwvvtwwwsvuutvuusuvttuvsrttrtutqssqkprrpnrqjmpnnppmmnnjklfZLGFF>644445459@DHMPPJG_\SNJACJIJLKKFA:5232(&6878;<;524//1/.///,.0/10111122221323343378779888988:9<Iciigdehlkhmw~��simljnmjjllmmkljii_SQRRSPPQOOONLPONNNNMMJMONLLRLJLLJLMJFFFFFGGGHJJHJFEIIFFIFCDFGEECCEEECDCDDFGDBDDAABDCDEA@B@?AC?>@@??@AA?@?<<@<<<:8:=:8:889:980
+
+
+& *\���������������������������������������������������������������������������������������������% $(-/8?DILOLONKIJJJJLKJKJIKKIJJIIJKKKLLJKMIHHGIIHIHFFEGGGGIHEC@6's�����������}}}�}}{{}|~~r_@+z�����������������������|w������������������������������������������������������������������������������������������������������������������������������}{wxz{z{yvzzz{{{|}zzz{z}||~}}|~|}||||~��������������~�}~~��~����������~�}~~~~~}~||zzz|}|z{||}�~}{{zeD>>>===?AA?BCBHXhuuvwusqotywwvxx{}||z{z{z|~~~}|}~~~�}}~}}}}~}~|}~~����~�}���{y{���������{yxxwzzvvz�|wz||}z|}|z|}}|y�~~��~~}|}�~}}�~�}}��}{}��|}~}z}�}{|~{|~}z}}||}}|}zz|~}}|~�}|}~|||}~~}{~~}~~~~}{{�{}~|{|~�z}�}{{}|{}~z||}~||}}|||}zz~}{||zz{{~{{{~{z~zwx{xy{zwzyxxwxzyy{{{z{yyzywxvxyxwxxyvuvwqvwwsvxtquurstuutwxootvsuutqttsoqruqnrrnnpopmnoolkmkhih]MGJF?966544559=CNQPTJCLRNLFDJJLOOMG?96230'&69989=<54510/./0..,-/011100/132113223434646787888889;?Jiiefb`hijkpx���kjnjkoolknllklljjl_PRTQSSROPRNNOOLPOKLNMIJKMLLNLKJLIIKKGDFGFGIHGGHFGGFHFFHIGDBCEDCEGDDEDDDEBDFC@CE@?@CCDDEA@AA?AA?>>?@=><>A>;;>:;<;9:;:9;:;::982!
+
+
+
+$
+0�����������������������������������������������������������������������������������������û��) %*04=FHFHKIIJIIJJIJHIIIHIHJKJLLKJKIJIHJHHGEFFDEFEDE@;2*n�����������~~~}{{||}~qaB'~��������������������������������������������������������������������������������������������������������������������������������������������������������}wsmqtxyzyx{xz|{|{zz{{{{}~}|~||||~|{|||{|}{|}}}|~~}��������������~~�������}}|~|}��}}z|||{zyyxb>A>?>>@A@CN\gmpqrnllmmsuzyvssvxzzzzzy{zzz{|{||z{zz||{y{|zzyu{{{y{|z|}~~|{yx{y|{x{||z}�}~��{|~�}}~||~~|}}�}~~�����|~{�}~��~|~�}{~{|~~||{~~|~�{|}}|y|}|z{}|~~~|y|}|{~}|}~~}{}�}{~�}}�~|�|~|}||�{y~~z{~~|~}z}~{{{{y|�yz}}}{~}}~~z{{~~z|~z|~~z{|{}|xwx|z{{}yyz{zwvyz|zxxywyzzx{{zy|}zwxzyvxzvxxywuwyusuyrvvvwwtutvvvxutwyvurrruuuuttsrssqosuooprpnmpomnomlmmhjlh`WLHGA:75433258;@KJPOJNUPKHJKKLQULG?96331&'6879:::355131.0/....0///10.-/10321122435658768:8789:;?Dkjfc``ijjkqy���kmnjmqrpmollkmklkm\TSTOQRPMOPLLMLKNNKNKMKKJLLJJMLHKJIHHGDFGFFHIECHHGGFEFFDGGGCACB?BEBBEFCBA>?A@>@BB@@ADCBDB>?@=?A?><?=;<=<<<=<===<:::79888998763%#�����������������������������������������������������������������������¿��������������������+&,/7?@HHHKJHIKIIGJIIJKKJJKKIJKJHEEEDEFCCD?=5)#h�����������}�~~||}~}sa@'n�������������������������������������������������������������������������������������������������������������������������������������������������������xl`^cimrvwvyz|}~}|yz{{z|~}|z{yyz|||{zz|{zz||�������������~�����������������}~~������~}}}|~~|y~|{�|}}|{\>ABB@CJUgu{zxvtvsponovuwtommpuxvtttqssuurrstrtutqtvwuwuxsrtzzvw{ywz|}z}}}{~|~}~�}|��|�~{��~~~~}��~{}{�~}~�~~~�~y~~~{|~�}{~{{z}|}~|z~}~}~��~~�|}}|z�|~|z|||�~|~|y~�{z{�}y{�~{~�~|}}|{~�}|{}|~}~�}~~~~}}~}z{~}}~}||~}{z~~|||{z{}}yy}~{|~}zz{zz}}x{{}}~|{zyz{zwy||xyzzu{|yw{|xzyyyyxzvuyywxvzxwwyyvsxvwtvxwtwutvyyutvxstvtpsutrqrsprroprsopnsqpnppolpmlmqlhjkieXNIGA:765455548<<DLJSUNJIKKKMOSMH?95021%(687::<;345550.0-/10////0/0/0/-2421121334579768767779:>HghfbZ`kjijpy��~lmnmottrppllkjjllm]PPRPOPPNNNOOMNONLMOKJLLILLIHLKEGJIEEGEGFEFGGEDEGFFBCGECCCC@?@A?@CBCECCBBA??@BAAB@=@B??>A@>@=<=>=:=>=<>=;:;;;==98:857776797665*  ���������������������������������������������������������������������������������������������- !$*1:?CGLIKIIGGJKIHJJIIIHHHFDCFB>71'c�����������~~��||}~~}r^A$h}�����������������������������������������������������������������������������������������������������������������������������������������������mOOSYaeiqvuz{|}}~~}}}}~}}{}|{|~}|}~}~~������~}|~|~�������������������~�~}���������~|zyz}||�|~~�~}||zv_IFLSckuzxwwvtssrokkhmklmb_Xamononpomkppnlprptuvsuxxvvt{ust|yvx|yxzz{y{||{~}~|~�}}}~}���~�~~�}}��|~|z}�~z{~~{}~~z||}|}}~}}|}z~}�~{{�~|}{|�}�|{}}z}~|}~{y|~yy}�||~}{}|{z{}~y{~~}~|||z{}}{�{y{~||~}z}�}y~}{yz|z}~|z||zy}|z}}|{|||}|}}|~}zx{|}{~}|z|xy{~{{z{yw|}wz{{xzyzw{|ywz{yyxy{zwxxzxvwyuxzxvx{wtvwwruwttxxtsvwtuvwnrvsqqrtprrrqqqpqoorqnrrrminolonmjmnfehlgb^OIGC:46643339:68<HRUMJKKKKLNNMG>:4.21$)88699;;56896222000--//010///0.02023202324564366676789=IjhdcFUjjiipz��}qmikjprpopmllmnligVQPPQPNNONMLNJLNKILMIHIJHJKGGJHDEHJGGHGFEFGDDDBCEDA?BFECBABAAABA>?CCA?@?@AA==>=>?>;@A>><AA=@A=:<<:;?>9;::99:;;:867655796677357/&&������������������������������������������������������������������������������ľ���Ŀ��������; "(-4;DDEIHGHHGEFFFFDBA>94+!^�����������}~�}||~}}}�s^E!e�����������������������������������������������������������������������������������������������������������������������������������������������������}�{WCDFBFUdntuy{~}�~||}|}|}|}}{|}{}}}||}~��~}~���������������������������~�������������}|zwyxy|}~}}|{{{}|zz{xwvvwnkejkoqtqsppnoqoomjllmnnpoonqorpoorqpntspqssruwxuxyvsxvwruw}yyzzyz{yzz|{}~~{}�~}~������~}~}|~~~}~{}~}}~}~��|�{~|~}~~{{}{|�~||�}{z~||}}}~|~|~~}}~}{z{}{{}}~}|~~||�|}~~~~��|}~|{}}|{�|y{~~{}|z~~zx~~{zz�}}}}||}}|{{{~~~|}}{~}~~|zy}{{}}yxyywx{{vx{}yz|}{{{{||xwy{xx|{xwxww{}xwyzwuuzxxxwvxywsuuvruvuuuuutsttwvvuvssurprqroqtpmopnnrqnoorkimoioonkkkfghkhigfWNG?7677358:;889ATTLJLKKLKMMKF>94/10$);9699::59<;764211.-,---./000/002121003334565556887799<LmiihWYljiiqz��rpjlnqqponqoklnlhf[QKOONMLNKJMMJKLLJKKKJHHJHIIIHGFEDGHGDEDCEECBABEEC@AFFBCBAAB@>?A>AAA@?ABA@A@=??@>????@?=?><;?=9::::;;;;==;:;:879866654663564691!".0(���������������������������������������������������������������������������������������������>&/5:>CDCA@A@;50'
+V�������������~{{}||}}~t]C!`~���������������������������~���������������������������������������������������������������������������������������������������������������������~ztRACHA=?Xmrty{}||~�~~~}~�~~}~~��~�����������������������||~}}~~�~��}~}}~{|}|xvptswsvwywwtwutwyxutuwwxwurtsvvusttutsruuqqopmpppkooqmonponnrpmmqplqrssutuvyvuv{wuu{ywy|xwxzzxxy{z|||z}~~|}~�}~���~~|~{{�}~�}{|}}{|�|~�z~~{}}~}�}yy|}z||}|}}{{{}~~~|}�}}|~}|||{{}{w{~{}~||}|z~�{x{~|{}|y{~zyz||}~~{z}~|}||||{~~|~|z~~~}|~||}~{zz~{zz}|z{~{{{~{{y|{}}|yzy|{zy{z{x|{{xz{|xxzzvuyyvxzwvwzwxz{yzxyvvtzzzvvwxwututvtuuuvstwuoquvqqtuprtrpottmprplmppoopnnmrhompommmkjlihgfhkhhf^LFA864259<==<;>JRMKLLKLLMNKF?83.0/$+98889985<=;8632110/-,,..,.///11331002122234643665469:?Qjjhfbcghkjqz��}nmlmprqrqqpnkklligVPMNKMOKKKLJJJJIKJHHKLFEIFEFDBEEDAEGECDECDDAA@DDBBBBDB?AA@??@;=A@ABABA@@>?AB?==>;==:=?==@;:;;::89::8:<9::75:9878966655796577440##278&���������������������������������������������������������������������������������������������;"*/3;94+ 
+
+R�����������~}}~}|}~}}~t`F#a�����������������������������������������������������������������������������������������������������������������������������������������������������~zw}\9BKM=8Nepty|zx|}}|}{~���|}{~||~}~}{|~~zzzxywwwwxyz{{|yxzz{wyyvyxvxwxuuvwxxuvtwvxywvy{uxw{wy{~{{y||{{|z|}zy|}{ywvyuwwyvuvvsqqsqpoqnmopmlmnnmommooqsnoppprrpqtvrtuwruyzuuvzwvyyvwwzzyxx{{{{{z||{{�~��}z}�}{}�}{}|z||{{~�}z~}}{~||}~|~}||{|~~{~|z{}z|}|zz��~z{|y}|z{|{yz{}}zy}|xy}}}}}|{{{||}zz||{|}~|{z|}{z}~�|||}{{}|{}~{|||{|{|~|{}~||}}zzz|{yz}|}{{{}|~}|z|~{{{|y{{zxzz{yz}{vyzzyxyzwuwxxxyxzvyxzzzyzwwxysxzyuuwytuvtquuttuvppvvpqstqsrtsrttpppqooptqmoqpmoqqlmnqlmomijmifikedhjhhhi]RIC94047:=AC@ALRLLLJJKKMOKE@;3/1/!,89998:86=?>;9753322/-./..0.-031110//11122336446655576=Oljfedejhjnt|��}ppkmmpqpnonnmllkkeVOLLJKOIGJLHGIGFJJHIJIGDFEEDEEECECDFEBACB@A@?@DA@AA@AA@AA?>=>><>???>@A?>==?@?<;=;:<;<:<>=:<:79:9::86698898467876863452153255030%(! !.5A0���������������������������������������������������������������������������������������������E
+L����������~~|}|||�tbC#`���������������������������������������������������������������������������������������������������������������������������������������}��~~}}}{zwttyfH@ETA:L`mvxyzzzzwwxwwxyxxxwtwvursttsspopppopooonmklkkmjghkjkfjolmlmnlmnpmoqtuuuuuwwxvux{|z|~~z|}�{{}{yz}{y}|zy{{xvwwxuwwwttuusssqoppnonomnkomooqmlnponoqpknoqnqssssptrvwxwvvzxyxwvyz{yyyxz}|{z|z{}�z��}}��|{}|}}�{z|~|}|{|~�}|~�{~�z}�|}~{{|~{{|~{yy~z{|}wz|~}yz|xy{{|||{y~~}~|zz{y{~~~||}|{z}~}}{~}{{~}}|z}}yy|~|zz|{{}~{{}}{zz|{z~}|~}|z|~}~}{|}}{{|{z||~z{|}zz}~xz}}wyy|x{zzx{zzx{{yy{xyzywwxxvxxxwx|vvxzyxszwvwurtvxqtvxrwurrusrrstrtutttqrsurrusooqrlmonlmrniopklpokkkljmmlhimiegigghkljeiibUI@932469?FBBQSLMKKMMMMMKE=8301- /89:87975>CA<;:84221..0/.0//.01//001111323332254134446>Rmhfeggkjknsz��xonlnnnnnkkjjkkkjmdSNLLHJLIIJKIGGHKHIIFEGIFEHFBEEB@BBBBB@@=???@??@@BAA?AAA?>=>;;<<=><=>@@@>;=>?>=::<;:==9;=<9;99:98888969996686564465345423233222/'%('+*'&-1/.����������������������������������������������Ŀ���������������������������������������������S
+
+
+H������������~~}|}{|taG$^�����������������������������������������������������������������������������������������������������������������������������������������������~|yyxvvvuurhMCF>CPfnpsuwsqttpnpponmlkjhikhiddhiheacfhhggecfhegehhddeijgghhhgikhgjkmmopsstuvuvuxwu{{{z||}y{{}||~yy}~{{|zz|zyy{wywzvuvusssusqonpononlmmokknolmlnnnnonmmmpqtsrrwsxvzwwv{wxx{xuw|{vxyxw{{xz{}}z{~~|~~|}}~||}}{�~~{}}|{~|{~~y|~}z~}w|}�{{|~x|y~|{y~|yz~|}{}y}|~~}||{}}|~~zz�~{~}zx{~{{}~|z|}{z{||y|~{y{~||~{z||yz{|}|x{|{{~}~~~{}}|}~~}|�||{~}{~�||}}yyz{yy|~{{||{|}|x|{{w{y|{}zyy|z{z|yz||y{{wtvyxtvxwxy{wuvyyxu{wwvvswswvvvvwxtuwxssrrrtwrqutnosxlppqnlopmmqnmornknqnklnmnlkptljmjjjfcdjiejkkfhib\XLA82159=BADQPKLKKKLMMJJE;83./, .89978;87@DC>==9655201./01-/10///.01//0203322453133358=Ujhigfijikns{��ummllllmmmmkjiikjjdSOKJKIIJJFGKHGIHFHHFFEFDDGFBABABAABA>AA>=@AA@@??AA>>?A@?=;;;;;=?><===?><;=<:<<9;<:9:<<;:889;:;98848967885475255443354441230032-( /.&'),2.+21/���������������������������������������������������������������������������������������������X
+
+	
+
+E�����������~~~}}}{z~}tcH'd�������������������������������������������������������������������������������������������������������������������~}|zyzwwz|{z}{����~�~��{yxxvwsnopmjlhdYKJGOU]a``c``e`a]bd`Y^cbbad`bcdbcddb^]dfceffeffhcggjfdggge`ffbbfihdejjjkoopqurtsvswxvwxyz}z{{}}}~{}||~{~~~z{{{vyzyuvyytsutqqrqqpqonopmmjlmokomnmolmmpnnoonppqsprtvptvxquuxstwxwvtwwtvxyy{wwz}}|{|{{{~}|~~}{|~{z||{{|{zz}uw|}zy||zz{{{y{y}}{z}|}y~}}{~}|y{z}|}|~||~~{y{}zx{~z{~|z{{zv{}{zzzyy{zzzz{z{{~|yz}}||{z{{zy}|{{~yx{~|}}�|y}{{|~|z|z{}}{|||z{z{yzy{y||}}}||}~|z{|y{z{xz{|xxz}vyyzwxyywyzwtwxxuvwxzzywwvwxyvxxyuuuvtuxwrvyupstrpsssqstprrrprrslrnprnnqpnopqnqomklpohnopilljgejhgifcdggcgkjdegd``^O@9248<=>DPNLKKIKMKLJJC872./+,88779=:9DECDB?;986333///.--///0.-../00103324533223578<Yjjjkijkikkt|��vmolklmkkmkkkiijji`MKIILIFJIEEIFFGHFHGHHDCCCDEB@ABD@@BC>>=>>AA@>>>=?@=<>@B?>;;;;;<<;=;<<<=::;;9:;;<:8:8:<97997899766464454333453465//321222120132+(
+
+
+
+*56(#'-,1�������������������������������������������������������������������������������������������½c
+
+
+	
+
+G������������}}}||}}}~wcJ-c�����������������������������������������������������������������������������������������������������~���}{|||vxyuspmolkihkjijigglodeejhaced_a^Z]^`^a\`][XZTRSXVYY[]]Z\_\^[c`^`cb`bbb`cecacbcb`cgecffffghidfhicehifecffhfhjfgklhhlpnqqtrssvvuvwxwy||x{|~|~{�y|~}z{|{zz{y{wzzxuwxwuuusswsqrponmnnlmkmmnmsnknqnlnojloonpqpqoruuqtuxqwtxtwwvwwtvwxxxx{}xy}~z{~zy{~zz||zz{}{yy~{{{{|{z|v{}{{|}~|}~z|}}y||}z|~|yz|}z{||xzxzx|x|zz|~zzz|zy{~~yz|{{{{|{||||~{{||zxzxy{{{|}{{y|{zz|{x{~{zyyx|}||||{|z~|{{~}}|~|~�{{}|||}{zz{z|}~||||zz|zy{|y{{{txzywxxzvzxzxxwxy|xwyywxwuvy}xvvxwuuxrvuusstwutuvqtttqpstsrutstrrrorttorqqmosoknqnloqkkpnilnnemnoijkmlfihhiedjbfgkiiffdegd_^RA:558;?BQMJJMKLMKLHI>430/1+169877:7:GGEFF><>:8673000/..-.0--/0//00122013113322455>_kkhkikkjimw|��vlmlkklkkkijkjkljg_QIHJKHGIJIHGFGGGHGFEGCCDCCDB?>@A?BCA=;<=>>A@?;=>=>?>=;??<9998;;:;=;;;:;;9:<:9:<987977866787688756554552242362033/.01/.02101100/.#
+
+
+
+$.8/!#*���������������������������������������������������������������������������������������������f	
+	
+
+
+@������������}}}~}}}~�ydO9V{�����������������������������������������������������������������~|}yy|zxwxsoqsrppqqnsvqvrqniefcfeab^\^aab]``c][^_\^]__^^]^Z\]\\[WZVZ_YXS\[ZY^Y[Z_`[[ZYW[[YUVXZVVX^[WZ\]\\^a^\`a_]`b`adb`[`a`\_bb`beeceffdcggccejhbbbdbacedcfihdikmlqssqsssqsvvvxyxxy|z~{|{~w|~{z|{z{{|y|y{wwwzxvtusrvxqppqpmmnmjllnlllnikmnlpmolopkoppororrttvstsuuvuxstuxssvxvuw{zvy{|yy|~xy{{y|{}yzy}||y}|}{y{|{zy~{yz||{||{y|||y|||x|zyy{{}x{{{x|xzx|z{zxz}}{xz||zz~zxz|~zy{|yy{}ywz{yzy|wyz}xz{|yyy{zyy|zyy|{zy}z{{{z~||{}z|}~{z}~{z|}yz}yzz}zy{~x{z}|zyzxz{}yyy{y{{{y{zyy{yyzzxzzxswzwuvxwwwwtsxzvwvwvuuxswttsvswwuqutvsrqoquurrssqqqrkpstnnpplnpllnojmmmklmkknlmkllljjimmgdhkgdehddikehic_dfa``fWE7467<CRNJLMNNNMLHF7..001+07877697:HIHIIDB@;:7644101/,+.0-..-,...00//10012222446>dojjiikmjjov}��vnmllkkkkkjjkjjjih_QKHGIHIFIJFEFGGEFEBDE?>@BBB@>;>??BA??<<?==>?>;>><=<:;;=;8:;:8:9;=99:9889879:97865587895446535773453367235213212210/21..//-/.+-/-%
+
+
+
+			
+%-2!!���������������������������������������������������������������������������������������������r	
+
+=������������}}|}||~vfM7]}�������������������������������������||xvswurpmnmkofhbbj`adbcc]]a[Y^]Y\Z[\X]Y\SW[XRSSSNQWTRSXWUPQUTSUWVWY[XZY_]\\]\[\\]YZZ][YZ]\YY[^WYZZ\XY\YZ\^\Z]ZYWYYWUUWWSUXXXXZ[\Z\\\[[^\]^`^__b^^_a`^^bbb`bdbcggdefjeccgda`abbbdcdcfghfgllnnqrrqttrqvvuxyzx{~x~}}{}}|z{{zz||xuzy{uuvxussurqstnopoononljllmmojlkpmmmqmnmnokmqpnppqqttvrrrtstpvtwuwstwuuuyxwxzz{zz|}z|{{|}{{{|z|~z{||yyz|zzy~|yxzzz|||{{zzz}||{|z{y{z{z{z|y|xxz|xy{zxx|yvwzzxy|zwz}{xvvzuwzyxxy{yzyzx|y{z{zyy|zyz{yzz}yyy~z{z{yz|�}z{z|}~|z}~{{{~zz{}{|zz{{z|y|yz|}yz{}z{|}zxzzzxx}wxz{xyzxxyzzuwzyvvxywwwuuuxxywvuvtvwyuttuqwvspsttqqppqusnpqrqoprrrrspoonormmommmomlnojjklikmlgjljgjlhbgicbegdcgifggd`ceda_a\ZH<547@NMKMMMMNMLID8.-/22+14777895;FMNHFFEA>>95662010-,..,,-----.100221332123456@dmijgijiiknx��tolkiklmlmllkiijhi`QKGEFFFDGGCDEFFDAAAADBA?@AB?====?@=>>9:=;9<;98<<;989:;:979988769:88:9855689:96685367864545635442443336435333120./.-./.---..-+,--.9J$
+
+
+			
+#-)%'&)&$ ���������������������������������������������������������������������������������������������w 
+
+
+
+
+
+
+
+
+
+
+
+
+4������������}}}}||}||}weK=Sp{zw|��xy��wsnhhfdaeba[]\TWVUPRT_bb[OFFGCD@E?@A<;@BA9>::>HRNOSTNNTTSPKTUTTTUUUWTTSVUSQUSRRUVOQSVRPSTSTVTVVYZ]Y[]^]^^\\Y[[ZYZZZXXZ\ZYXWZWXYYXYWYWY_]ZYZYXUVUTTWWWRUXVVYYWZ[_]\[^\\aaa]``b\^^a]^`c``ac`bdghddeicbbda\\cd`bcccagfdghkilnqmqqrqptvuuwyxuy|{v}|}w{}~zxzzyz|{xx{yzxzwwvtstsrrtrsqoomnmkkmjknpjlloiklmjjklljjnnnooqptrsrsqqrurvvutvswxtvvywwyzxyz{zyz|uxz|x{{{z{{|xyyzyx{|z{x|{yyx{}{w{zzyy{}zz|{wyx|xwz{yzxzvwzzwywzxxwwvyzyx{{yy|{xyyxyx{yvxxyxx|{yy~yyyzyyz}ywzywyz|yy{|uxzywyz{xy|y}{|~|{|x|z~|{||~~ywz}yy{{wxy}xwy{yxz|wvwywww{wyyyxyyzyz{{wxxwwvwyxwutvuvwvsstvstvwutrtnturprqopqppttpppoqsqnpqqnmpolkpqjlpkikokkkmghjkiikmfjlhjjifddedededdehggdccaccc`^\]XL=45>OKKPNLMMMJKF9/./12)36556895;HNNDFIGAA?<8884321//.--,,/.--/10121/011122256Cgmijhihhhkov}��tplkklmmlkjklkkjig^NHGFEEDDDDCEDDBB>?AA@BA?@B@>=;;===;==;:;<9:;;9:;<978:::88777789898888965885586566355543353324444411105A21211.10,,./--.--,,,.,+*+/\yy3
+
+
+	
+			$#+.-+-)'"���������������������������������������������������������������������������������������������� 
+
+
+
+	
+
+
+
+
+
+
+
+
+
+1�����������~}~}}}|}{{}ygNAB:6=>7<:597729DGA9<AEA>CCA@DFE?FPXXOA==>>A???>>?>AA@=;:;BKLMMNPNNQSRPPQSRSTQSTUVTTTURSTUSRRVUQRTTPRVWRTVVUTWWYZ[[]ZZ[ZZXYXXVZZZZ[ZXY\WXXTXXYVWXWW\^Z[]]\XYZUTUXWWV[ZVUYZWY]`Y\_`]^aba\_^^]^\][_a`^_`__`bccadcdabaa`b^bca_cbabcc`ghheklmkppppprstwyxxwyxyx�|zy}|z{{{{}}zxyzwyy|tuwvssttqrstokmnkkmlljkkkglnmimlkhkmjkllnmopoprtrtrtqrsvqtvxurtwuquwwvuvvwyxxwyyztzz{{|{z{|y{{}wy{{y{{|zzw|zwy|yvyxyyz{|xx{{wyy|wx{{y{xxwyyyxywyyzxxw{{wx{yyzzwvxywvy{wswzxwx{zxyzxxvyyzw{zxxxvzzyx{{zx{yww{{|{{z||}yz|}yyz~yyz|zx{}yvx|wwzzxxx{xx{{yyxyxxx{wxvwyzyyyyxy{zyzzwvuvuvxvvtsutuuutvtxuurvwvrututssqoqsporsqmpqmoqpmmqpllnnkjmoklnlllnjkjnlijkliiklkhilhfefcdde`bcccgigbdd`_`b^]]][ZSA7<NKIONKKONKIF933200)35455677?JMLIHKKFB>@>;87621/.-.---/../00/1221/11222356Cjkijgihhjjnv|��rpmklkkmjjjiklklhh]MDEECCBCCABE@@A@>>?@@???@@?=;;:;=<=>:;::<;89:99:<758;99::778767656964754575695554434441342132201010/02[T831/.11-+.0++,+*)+)+-+**.\}��N
+
+
+
+'26.%'(%����������������������������������������������������������������������������������������������!
+
+
+
+
+
+
+
+2������������}~|}}|||~�{fLA=,+24175799969@@?;??@=??@?>>?BGQVTI>;=><>>===><>>@>;9;;CJNMONOQOORRPOSUQRTUQSVWVQSUVRRUUORSSRPPQRRTSSQTUUSTYWXX[ZYYY\YYYZWVU\\[YZXW[ZYWWVXVUXWWVX]\X\[ZWVXYUUWWTUUWTSUWYXZ]ZVYYY[^^_`^^]]\_]]Zb`]_`___ae`ace`bbda_bc_`__`_]`abbdgefhljmkqpoqsrrtwwuxz{yzz�{xz|z{|yxz{zxzxvuxwwststqprrnoromlmkhklikkmkkjqlkjnjjkmjijmmkkppmnqrnrrtqrrqottvsrpstrstvvstwxzxyy{x{x{y{}|zy|~yz{|twyyxxx|zxxzwwyywxzz{{{yzyxxyxxw{xzxyy{xvwyuvxzwxzytvxxwwwwwwxxvwwvvxxyvuuxxxwyywwxwwuyyxwwy|xvw}{wz|ywy{uvu|zzz{xy{{xyz|xy{}xux{ywy{www{xwu{xzyzz|xzzzuvyzvwwwtvxwwxyxuwyywwxvutvwywuuussttsstvsuwwstusorrurrromprompqlmoomnqpmnpommlklmmollkmnklmogjmjfhlifhkgeghfeefacdeaddccfgebccaca`__]]_\XUE<SMKLMJLMLJHC:4311/'15344467?MHIKJHIHA>@@;7662100-,----..0.-.0022121212224Dkkjjhijgihnv�polklljkjljhjkjjhj\LACCBACBBCBCA@?@@><@B>>===<<899:;==;7:::;:98:8:9:85786787678644556843553565686521123430340/320-..1//34[p`>/1--/-++-+)+*++,**)++*)Ww���e 
+
+&2:4( !"����������������������������������������������������������������������������������������¼��Ù)
+		
+
+
+
+
+
+
+/�������������~|~}|{|ygNB<.)01154848889@?=<A?=>@<=>@@AJQSPF=;;;=<:;:=<=>@><:9:>DONLKMNOPOQQQQRQRQRRSRUVUUSUUTSTRQOPTRRSTRQTWRSTVUSTXZYXXZXY][ZYXXYWXYZYYWWWVYXVWVSUSUXVSUWYWV[ZXVVTVUVVTSUVTUWWWYYZ[XX[ZY\]]^__]^`\^\]]_\\_`^^_ab^`cd`abc`^aa__`_`]^`^_begbeikijlommpsqqsuwqwyxxyw|x{z}|{{z{{zzzyyvxxyvwuwutrsssptsmlnpjillhjlnhklohglnjijkhijkjijlmlnormrrsqssrqtsutvqststsvwvsxxxx{z{wxx{xz}{wwz}yyxzxyx|z|z|yyyxwyzwvzzwz}zwyzzxwxysxvxvyxxvutusvxxvxywsvvwwwxwxxwuuwuvwwxyxyuuwywxwyvvwwvwxxuww{wwvywvwzxxyztvu{yyxzxzzzx{yyx{{{{{yz{{xwvzwxxyvxxyxy{zwxxwuwwxuwwuuwxxyxwxwxwwyxwvvutxyurttrrswqqswrqvvststpqsssqrqooqpnonnqnmooonmnlmnkgilnlmmkilliikmcfjighjjeehegffeieddebdddbcfecfgcaac^]\`Z\^[YZYSRNKKLJJKKJFA92110/& 36545774>LHJOLGGHCA@@>:57532/------../--//./0//0103216Fmkiigijiijqy���mnlkjlllkjkkkjkjhiXLDCBBCD@BEB@A?=?@>=>?>;;<=<<;;;=<<<;98:;<77679958637645544555433676632211465343/0111340231/011/0/1./14]ktkE.+,,,+,-,*+,+)*+*)*--*No����z.	'1991#����������������������������������������������������������������������������������������������-	
+
+
+
+
+
+
+
+(������������~}|}}||}}vgPA</(01232526769A===??A@B==ACEKTULC;:99:;999:<=>><;::;>DHLKLMONOPNQQRRSQRSTRSTTTRRUVVVUVSPRTSRSTTQPRSQOTWSQQTUUXVXWW[[WUWWWUWVUYWTUWXXVUVVTTRWUWUXVVYZZYXWWSUUUSRTUTSVYXUYWWXZZZWW][Z\^^]\\Z[Y[\]X^__^^]_^`aac`aa^^_a^]]`^][]]^\aba`fgfdhjlknopmntuurxxxwxwzz}y{|}yxxzyyy{yuwyzvuvwssstpqqspkkmnkijgdhjihlkjhjkljkjjhjkjhlkkllnnqosnoqtrpptpruvqqttsssturwvuxxwyvyyxy{{zxyy{zzxyzzwzzzxwwxvtvxxvvyxvxxutwwywuvxvzwut|xywwuttwusuywttyvuvwxsvxuruustvwtvvwutuxuuuwtwwwstvvvvwvruvvuxwzyxvxwxvyyzzyxzxw|yvy{yxx{xwyzuuvyuvvytvvywxxzvxuvwywuuwuvwwvxywqvzyuuxvtuvsruwsrsurtqursqtttttsvqsrrqssqmqqljmommopkkmolmkmkmkigkjkmlkjjjjjihkgkifjiikhfcehfcdja`da_cea_acbadd```_]]Y^\]\\\\\VRMIKJIIJKKGB6/,..,% 26535543?NIKMJIHGCDB@@<78741.-,,,-,+----/..////0013147Emlkjghhkiioy��knkijlnmllkijikjigXLEC@ABA>@C?==>==>>=<=?=;<=;:;:9::99886677567666376346433433353424224320024322/01///0//1//////0-.-/,--2YgnsxP,+*,,***(*.+*+//17@HM[ht|����D$$*5:<:/#����������������������������������������������������������������������������������������������2
+
+
+
+
+	
+
+
+
+&�������������~}}|~~~}zhOA>2)00343455458>:;;<<???>=BGOTQI?9988999799::>>;78;<AFKJMMNORPPPQQOPSQPQTUSSUUSQSTVSVVURPSTRRRRPOPQQNNQQQRRPSUYVXX\[XXYXVVUWUWXVUUUWWTVYVQUVWRUVXTTYZXXWUUSUSSPSRSRSUVWVXUTWXYXVY\[\Z\\]\]Y]\[[^\_^\\[_^`adbc`a`__b_[[\_]]\][[]aa`chgffkjjkoopqvvquwxuxvwvy{}wyz{wvyxvuxywtwwvrtturorsnooonmkklkjijjlhhkmjihmkkjkhhiljghmkikllloopkqoqonnrnrtspttssttsvvwuuxyvxwzwwz}vwxzw{|zvxxyuxwyxuswtuuxwuuwyyxwvwywywwvwxzttuzuvwwsrtwrrtxsqswtvuvursuttutqtuuuvuutwtuvvvvw|uuvwuvvyxvtvtwwxuxyyuwvxtvxyvuvzwwwzwuwyvwwyxwwyvvtywwuwtvvxxzvwxzuuwyutuvtvwvuvwwruwwtsutuuvtvvvturssuqrsrnstsorsslmrqorrokopmlmolnlnmnnnmnlmjnlkjjikokiknjjjieehhddhhfiifbcgecbfba`^]becbaccbdcbd`^_^Y^_\Y\]XVVSNIHJJIIKIFA2,)*--#06565432<JLJHJLHFDEA@??;963/.,-+,.-,-..--..--././00139Inkjkihhjiioy��~rplkkmnlmlkijjkikhZKCA@A?>?@>><;<=====:;=;9:<:67879988786577557743476235311211220423211322101011-01.--/./.-.-..././-.---1Vfkkz�[8)-+,+,,03499>EKSU]]cgms}����^-#!! %')./*$+89;<82$����������������������������������������������������������������������������������������������6
+
+
+
+
+
+
+	
+
+
+#�����������}��~}~}}|||jPB>4(..343536426;9<9;?@@=>@FMQND=8877998789::<=999:>DGGKLNLNOPNQPOQOQTQNSSRRTUSSQRMQRVTQPQQPSQSPOOQQQRVTSQRSQTVXVXY[XUVXVUVXVQTXWTRSSSQUWSOUTSRSSRSVXUVYVSSUWTTPTSRORUTVWVTVXWYZZ\\[]ZZZ\]^SZ\\Z[]^\Y[[\Z[^a^a`_]^]]ZYZ\]Z[__Z\ba_`cfddeiifgkmnousnrttswtttxyxvzyzutywvvvvwvxvvsussrrpqponmmollmknjhiifhhjehgifgigehhigegiigjkljpnporlpproqqtsopvtprssquvvtvxwtwwywwxytwvww|{xuxxusxvwwvsuuyvtvwvuvxxvtxxtvvvstvwrtvvttttrrsurssvqrrtuwtttvttutsprusruvtqrvtruwttuwrtuustvwustsqtwvtuuwtvtutvuwuwtvvxwwwyuvvxwwwxuxxwswwvrtuutuvwqswyttvvutttsvwuvvvutrtuusrtwutrtttqtppqqmqrpmqoqorqpnqqqppoonnnnnlnooknnlikmmhjljggiggijhghlghggcceebcfgfdfecbeccbcba^__abccabfd_acd^\\^Y^_[X[[WVTQMIHJJJIHEC>3-++-,#!33354422>IJJIHJHFCEDB>>=9432/,,,,-..--.-,./.-.-.//-137Jmhhihggiikr{��}nokklmkkljjkjmkgieXJBA@?<>@=<=;::;:;:;::;:9:8:7576658864544642343345331231/0013300122-%&,0//..0/-//.-**./0/-+-++---*+-)-3P__F\{�m?+.0-/147:=@EJPSVZ_behlru����q8'(,---/01.('=>;;;8-����������������������������������������������������������������������������������������������@
+
+	
+
+
+
+
+
+
+$�����������~���|~||~~|kPC=1',,343547438<<=;>AC@>CJPTMB:776679878;::<;89;;?DIJHMNMLNMPMRQNPTTPQRSTRRVWQSSUQRXXQRSTRPSSRQRRSPOQSQQVSSSTVUSWYYVUUUVVUTTSUUUSTTURTVVTSTSQRUSSVVVUYZYTSUVSTRVTUSTTRUWUTWXYWXZ[YXZYYZZ[[TZZYXZZZYV\X\^^^]]a^^]]Z\WY[_\Z\[\[]]\\_ddbcdfifhjkmmoporqsswstvzxux~zyvxyvvxvuxywuttvrqrrppqrnmmljikhhfgghbgfhbfffghhhhiigffhhikkillpmppqlpqrmpqsonrtrnqsoouuuuwwvvwwxyywxxzxxy}yvt{xuuyvuwxtuwxssvwvtuwvutwvtvvvtuuuuvrtuussssprstrtqrqpsvqqstqqsrporspqrtqorusqrurqsupsstptuutuqstwvttvuvvvtvwwuwwwsvvzwvwxtvvwvwwwuwwvstuvrsttsusuruuvtwutvvrsvwutvttsvrruwrqsusspsrtqrrqppotqqqqoqrspqppnqqnkmnlkmnijmlkjklihkjgijjhhjilkhgiiigifedebcffdfifddedbcb_aaa^_`_]bab_ba]baa_\Y]\^][ZXWZ\VOKIHHJKIHD@;4..//,!#22131331CJFHKHFGE?BBC=<<:344/,,,++----,,--.//.-/..-/24Prjhiighhgksy��|lomklmmkijkllnlhhdVG>?@?<=><=<8;;99<99:78:;;8874563067434324223331132312110//1000//0-(',/.0/./0-.-&,---,,,++,,)***+1DPS;5Qx��N0//01478>@CHLPTY]``cglpv�����Q1,/0000222,!$@C>;<:3����������������������������������������������������������������������������������������������L
+
+
+
+
+
+
+
+
+	
+
+		
+
+
+
+��������������}{}}}|~|iOA>2',-332556327=<;8>??>@IPPJ:75658977888:9<;88;<?FIIJJKLLMQLNOQMMSSTOQSRRRSVQPRQSPSWUNQRSPPSSRQRRONQRMNQSQRRTTVX[WXXWXWUTUTUUTRSSUVVRUVRSQSPPUTSRSUTTURTSQSTPTRQPRSRRRXUSSVUTPVXWWYZYZYXYYX]YXYYXYX\[Y\__^^_`]Z^^\[YZZ[XX[ZY[`Y[^aabcdcdfhihjnnnnprpsuxruvwttx|wvwyuquwtrwvuqqrrpoppmooommmkhhkiffhfdfgegehhfghffhigcdfhffkjgilmjmnmjnmnkprqkorroppqprtttwwuvxwvvxyuvx{xtx{vutwsrtvrtttswwutsstvuuutvvuvuwtvvuqruuppstqqrqnnqqnooqqoorpqpqqrrpqqprprrsqpqrrrpsrrsrssqqptrtuuprtxurtvrttsquvvrtvtqstytuuvuutusutvtutvstsustruvwsttvtuuursuuqsuuqqtstrusqttpssrtunosuropqlmnrnnpqoqqnlnomkopoijlmjijjljkmjjkjigihjhhhgfhlgehjgghhcadfabee_afeccdbaac`c___]]bcb_a`__`c`^^^Y]^\WWYUUX\RMIHGGGJJIE?;5///.)!%210//120DFCFHECEE>??B>=<:5420.,+,-...--.-+,,--.0//..11Tmljighijeirx��{nmiklmomjjjkkmkhheTE??==><;<>97;977:85865788545323+!(3313122232121222/01//././,--.00/*&+//,,/.*+*"(+-,*)***()(),,,7AB54;It��`6-12358;?CFHMSVY[\abginr}����`3,01000143.##5B@<9:7����������������������������������������������������������������������������������������������Q
+
+
+
+
+
+
+	
+
+
+	
+
+
+
+�����������~�~}{}}}}~}kSA?4),/211336307><;8<<>BJQJC85555775458999;;99<=AFJHHLKKKLMONOOONOTRRNPPPOPPPNQTPPSTUPMQPRPRVTSTTRONRSPORRQRTVUXXXTVWVWUSVUOPTRMRSRQPRSQPSPPOPSPPPRSRUUOQUURQQUQQRTRQRSUSTTUTUTYVWXXXVXXXWXWZVVYWWWY\\Y[[_]\_a[Y]][[[[ZYXXXYY[^Y[^_^_ba`cdhigimmkoqpkqrsqttuqxvwtvvwurtusuvtsprqqrqonorpnoomkhjlgfiibaegcdffdfffcdffbceefdfihgklllnnnlonomsrporprnsqoqtrrsvtsvvvuvuvruxyvtuyvvtsrtssrvusrwssuvssswvssvtqutvqsurprssporqmpqqnnooopoqrqmopsopqtqnqroppsppqtomqsqqpsprrtooqrpqtvpppusqrtqsrupusuruvtrtswtusuwwrwvxstvtrrttpttsptuupstusssspsssrsssrqssvssstrqqurortmoqrnlmqmmlnlnopoqqnmnmmnllokhjlkgijlijmlgijiehihdefggfjfdgifeffbadfcdedcccdfbccc_cbdZ]_][`bb]__]^``^\^\W\[XUUVVTUXSMIFEGHJKJE@;1---.*%0/0/...0BFGFGFFDEBC?AA?::7310.+,-++-.,--,+,+,-./..//05Vmmihffhihkr{��zpmijlmnljlmlkkjhfbPA?=;<=:9:;88:886754576565433210'&//012012//0/020./231110/-.//3565- (//,,.,*))#+,()***)()+./17=B?:;@In��n9,2447:>ADEKMRUZ\_^acflpw����o:./11101241():<89;:����������������������������������������������������������������������������������������������T
+
+
+
+
+
+	
+
+
+
+
+
+
+
+
+
+
+
+
+y����������~}}||}~|lRA?6)*.212325118>;<<==CLQJ?2/13456645788;:989:<?EGFFIJILMLMNQRPPMORPRQQOQSSPOPTTRRTWQOQSRSRUTSSQQOORPOOQQPMPTSTVWVTUUSQPQSSPQRROUSOOPONORSPPORRPPQQPSVUQRUTRPQSQPQSQRRRRRTSTRSUWTSXUTSXVVTWTVV\XUUXYYVVVWYZ\[\ZXWZXZ[XVWYXWVWWYXWYZ[^^``bdgdehjklonllpqqruuuqyuvtxvutrttvxurrqsnprplmnqomonhggjhbffe`abcceddcgcddddb`dbbdeggijljklommnplnoplmnolpnqolprrrrtuuututuuttvvvvvswwurrsurrsvssssppqstrrtsrosrrwssrsrprspqonnompnmmnlmpqkmprlnoqmmopklqqlmnqmnpqnloroooqoqnppmmqqqrsopprqsqrruptswqrtusqptrsstrtuvossuopstqrprprstqsrtqtsssussrqoqssrrsrpruqqrrmnqrmnpqmqprnonpoplmnomoqqmnomklojgllhhjjghhkiikkfghigheeefehifgffdeefeddabeddaadb^ab_`bb]^_]V\^][^]]^^^^]]__][YZ]YWZUSUWRSRKHHDEFIJHD?80-..,)"0/./--.0<GIFEHEBBDD@?@>:851//++,,++,+,-,,,-,,-.---.-07Wnkghgfhjikt{��plnlllmmmlklllllgiaLA>;99;8888777776665556443311/1/#!*-11100/..001-./21/23112367<>>@>9(&+,-,+,(&"**+./.0257;=>ABGGEAAJOn��z@155799=ABGKMQWY\[^acfhnw}���yI//12133674/#'58:;;����������������������������������������������������������������������������������������������V
+
+	
+
+
+
+
+
+
+
+
+
+
+
+
+t�������������~~~�}oUE>8*)-10333622699:;>BMNG91./1456654789:;979;<BEHHGILJKLONLNRQOOOQQOPSSPRSTQPRUURRTXPQSTPRSTSPPPPMOSONNOPOOSRQSWWUVVTQQRSQRQQPRQUQOQPPOQTSQQRQOOQOPQRRPNQRSOPOONPPONMOPPQSRSQTSTSUYVUWXVTUWUWWXUUVYVWWXYXY\^ZYY[VXXZYSSVXVVXWUUVZYY[]\_acbedjghkmmjknrprqtttuyuuuxtstssruuspqrrmnpoknnnkmlkhhghffgeddcabegcdegcdeeccabbadeedfhjikkmllkmjnnmknmnmsnppppprssqtvurtwvstuvsuvwsvwtssttossursssqqqrusrqssoqsuvpqttqqtsoponnpoommmmkkmmjllninoolnmmkmomkmlnlnnnnmloppnpormpqplprtprqroppropqspprsprrrqoosrtrtrsstqrqsqqqrruprqsqqqsotttpprrpoponorrprsqopsprrrpoqqpsppornpoplnoojlnnjmpokmnljlmlhfljihihjhjjjighhfgggbegedfhddfgc`decacb`bbb``b`_ab_`_`_^]\Z[Z]]][[][\^]Z]^\XZZZTTYUSVYRRPIFFDFHIHFB>80./.,'%/--..//0=EHEEFCABBB@?><<:521.*+,+*+++,+,--++,.,+,++-0:]okigfheiknv}��lgmlkllkllkkkmmmigcNB<;;987996745644554424313321020#'-21.-.//00322232257669;<=?AEDCB=-!)++,+&%
+!*,0348:?ACFGHHGIKJDKWSg���K/47769=@CGILOSWX\_`behlrz����Y11322258884(!3558����������������������������������������������������������������������������������������������b
+
+		
+
+
+
+
+
+
+
+
+
+
+
+
+	
+p������������~}}|||~�~kTDA;)).2/334510299>BINQB3.--.134456559;8768<?BEFIHHIJJIMONLNNNNOOPMOOPQPQQSPPSRRNRSSPTPPNQRQQPQOPOPSPOPQPQQTRRSUUUXWRPRSSNPPPOQQRMMOOLKQOOLOPNNMNOLPPROOQRPNTONLNNNNOOPRTURSRUTTTUVTVXXTUVSSWUUSVUVTTVWVWXZYXWVXUUWVVRRWWTT[VTSWYXZ\][Z\`aedfegikkilmpnrprrtqvtxttprtqrrtttqqqpoonnlomllljijhgcdffcbdc`bddcdca`bbcbb`b`cedddcehgjijjkjllnkjjommnqnnoqpoqsrpstrquwtqstustvvsutrttrqqtrtrurosrposspmrsporrqoqsqoonomolllnonmmkljkljknjmlomlmnkjmnkjkolkmnkkopkmppllnplmonkmoqmrqplnnolqprpopqprrpqqprqsqqrsqrrsnquqmpqqmoppnooqlqsspopqqqonqqqqooopqpqorpnnponornmoojmnmjklmjlnokmmnlklkjljkkghijgggiehjheegdbffebdfdcdededdb`cec``baa`ba_aabccab^^__[[^[T\^\ZZ^YY[ZY[Z[XYXVTSXUSUXTSNHBCCEFHIGB>70-..,&",-,,-/-.BEFGEBACC>?B?<:<8331-**+**++*++,,,++,,,,-+--.2bmiiifhijhmv}��njnkjlkjkjjijlllihcM@:;:86567764653255332332221111/#&+..--.00244778568769;>=??CEDDDE?1#'**((&
+	
+)0369=?BEGIKLKJLMNOSX\g���T647779;ACIIKNRTY[_]`cflqx����g;3234489:<91!&+2����������������������������������������������������������������������������������������������h
+
+
+
+
+	
+
+
+
+
+
+
+	
+
+
+
+
+
+
+
+
+
+n�����������~}}}{{z�kTDA8((.314455015;>ENTN=/,,-.-0113577899887=@CDDGGEFJKJINMMLQNJKPOQNSPNNPPQRQRUSSSUSSSTOPRURQQQSNRPSQPQPPNPPQQSQRSTXUNPRQOOPOOMPONLNPPOOROLMRPNMPNNPSRPPRPNOPTNNMMMMNPNMPPTNQOOPQQRQSUUUTWTRRVUUUVVUVUVUVXXVUXXSUUXUTTSRVUUXWSSSUYXZY\^^\`ceedejjjkjmlooqorrupttwtrqutortsrusrpooojlnojjjlgfihfddcba`a`__abcdba\c`bcbab`ccabggeghigihjjlkkijlmkkppllopopstrttqstxvtuvtuuutssvsstsporvqqqsmorqnmpqomqqoqsqpqsqnnonmnnjllmkkkljkjkjijlfgimljllihjlijjmkiklgilmjmonllmmjnknkmnnmrmnmonpnqoqqpnnpsooprnopropqsopqsnqsqlponioopnpopoppqpoopqpnnqpmnppmppmoppmmmpllmolmomjollklklkljmmmijmihkjkgfnfbgiedfhdggededcegeedcdedcdeeabccbdd``ab`_ad][_ba^^`Y[`_XY\[W[[XXW][ZYZY[Z[ZYXWUQSSURUVQLHC>@CEGGEB=60-,,)$#,,+*,,+.GDADD@BBB>>?=::9531/,*)*+,+++*,,+)+,+*--,,../7alggjggiijnu{��plmlmnklkiiiknmifkaJ;8:997677654633342443110/...10-"$/..--.1223766588779:<=?@CDDCEGED9&('(&	
+	*1469=@DGKKMNNNPRTSV^b_y��c<49768<ADGILOPVX]\^bfikqv���xE457:89=??>5& +����������������������������������������������������������������������������������������������o
+	
+
+
+
+
+			
+	
+
+
+
+
+
+
+
+
+
+
+a�����������~~|}|{|~�}mWE@8''-003465015=DNSJ4*')*-/.1/034797788<=AACEGHFEHKKJLNMLNSOLORPPQRQOPRQOOQSSOSSTPQSROQRRNLQQPJPOOMOPNNPPPPPPOQTVVQOUUPPPMLMNPNLLOQPMNONJMOLLKMMLNPOLNPMKLNLMOMMLMLKMOOLOMNNPPSQSSUUUVUUPSTVTTWXVUVXVTUWWTUWVTUVURSUVSTRQURUUUUXXZU[^a]]`cdcdkjhkkljmnonqrtopqwqnqtpnqrpnsromnnlijkmjijjhggffdc`a``_``a`bcf`_`cbaa`]]_aa_addddgfeghiikihhkklkmmnmornoqqppsrosuusrvusstsoqrspqssoosuooopmqrpopnnqqonqtspqsrnmnpllmlgjkjgkligiihhhgifggjjihiijillkikjiijinkkjlkjlnmkkmhklmlmmokkknlmmmknmllqqpmpnpopnporqrpppqornnnrpmnomppromoqmoqompqnlmnmkloononlnmnlnlnmnlkknlkknhgiihkkjfkjjhimhfhikgehghghfedfeedeedbdgedecaabc`bdd_`ca_bca__`_^]b]Z^`][\]Z[\][Y[[YXWVWVX[[WXZ[WZZVUVUQPRSQSTOIIC><BDEEEB<5.+**(##+***,+*0BAACDA@?A@=<<<:7431.+**+,,+))**+*)+,-,+*,--.0<blighgfjjjnw~��onllmlkljjhhjmllgj]E:666;B53443440100/01//0/0.-/0/)$-.,-../2254455678:9:;?@@ABCFGHHD5#%&#		*257<?DGJKNQQSTRUUTUXWNo��jB57869=@DGJLNTWY\\_adfils|����X74;::<==?>7(�����������������������������������������������������������������������½����������������������#
+
+
+
+
+	
+		
+
+
+
+	
+	
+
+
+`�����������~~}}}}��pYE@7%'-/-1332//6EOPG/$$&()+--00035665689>BEBEHHHFGJJJKMMJMPRPNOPOOQQPKRQQNRSRQQRQRRSQOQUTQOOSPPOTPMOPPOPQQPPOONPSTSPPRPNOOMKNNMJKMNMJLLMJLLLJLKKJKOLLKMNMLMNMOMLMMOLNNQQOQPONRRRQRSTSRSSSQSSURTVVSSSTSSTVUTUUTSUUQRTUTRTUWSSRRTTWWWRXZ\[[]``aafffjghhmlmlqrroqqupnrqoqroppspomolkkmjkkljijifdddd_`ca^^_`^`ab\^_a]`__]\^__`bcabefehkiijkegingjmpllmoolnqqoprqprsrprssqrsqqrrsrsqqprrsqsoonrpoppnmnqonqrqoprqllnnkkjkhjjihlhfghefhjfgfhehhjgfijgikjfhikhgjnhhikihkliijlhkllkklmkklmkkkllpmnotonnpnoqrlopqooopnnoskmnollnljmnnlmnqllonmnmlmmqnklmmonllmilmnkklnkjimjiikdhikjljhhkiigiggifhjiddgjeefgbcdecdec`cec`a`_b`aadbc`abaaa_`_^_`a_^\`\]^^Z[\\Z[[YXZYWVVXVVZZUVXXWYVTUVTQPRRSUPMHFB;6ADCDDA=3(&(*)$!**)**,*0ABC?AB@<>@>::;84130.-,*+,-32**,++++++,+),-+,0>bnjiffhjjmov���ponklnnmljjiikmlil^D9665@eS354322.021110..000.,--+("+.-../0132344568;;:<?AA@ABEHIIFE<! "!
+
+
+,348<BDIJNRSTTSTVUURQVWh��wJ7789:=ACGINQTWZ[]_bcgknqz����g:7::;<>@?@<2�����������������������������������������������������������������������������������������������#
+		
+
+	
+
+	
+	
+
+
+
+
+
+
+
+
+
+
+
+
+		
+`�����������~}|}|~�pXE@9#'-0/2120+-8RP=*#!#%&')*,/02465467:=@ACADDEGGHIJIJKLMNNOOQROMOROONUPPQUSRRSTQSTURQQSSQRQQOOQROKNOLNNLLNPNMMQRQPQQNLNPMLLMNLKMNKIJMLKLOJKKJKJKMMLLNNMLLMMMLLKLMMKLNOONPPNJPQMQSTQOORQROROTRTTSRSTRPTTSUVUUSSUSQVVURPSPRRQOPRQUVUTWY^[\]`^cbdfhhegimkmmpppnrqsrqpnprpknqromnlkkjlhijkgghigfc`a\a_][^^_]_``]`]`Ya^_^^`]^aabbddbdghfgfgafeidhklkllmkmknqnlnrrrrpstrsssqptsppstnnprorqrllmrmmoomjmpnnopqopnpmlklkigijkhhjkheefcdfhdefgceehecfffhigchghhghihhhijijjhkkhhljjjkjjjmkjklkkllhlmolmnnklnjhllmlmolkmnpjjlnjkljinlllmnmjjlnlmllljlmkkkklkjklgjlkijhiilgkjjjjhihjikhgjhfiifdfhedfg`bfhccegbdddcddaabbbc`_aa_`bd``ab__ba^``]\]`][[a\Z^^ZXZ\WWZVVXVTWVWVWWWVXVVXVSTVSPPQMORRLJHC>63>ACEB=;1'$'))%")'(('**0>BD>@B@==><=;<:6230.,+,,+(.=>,***+,**,+,,,+,/=cnkkgbgjkhpx���pomllllmljiiilnmicXD7456;v�d5330///.-//.-,--.,,,,+()./0-/111234557:9;;>?AADDDGIKKIIA%!
+	
+)157<BFINRQSUUTUUTTRV__d|��V5988<@CEHKMNTVWZ^bcdgilqw���xA5:;>?@@??=5$�����������������������������������������������������������������������������������������������%
+	
+		
+			
+
+
+	
+
+
+
+
+
+
+	
+
+
+
+
+
+
+
+
+
+
+
+
+W����������~�~}{{|~�~nWEA;$&-1/2121*,4F6'"!#$%%')-04444458:=ACACCFEEHIJJKLLHMPPMNMQQNPSROOPPMMPTPPTTRQUSSPPPOMPPOOMMMMMKNONNNOPMOOQQSRRSSQLLMOLLLMKJLMMIJJJJKKJHIHIHHKJJKJKKJLLLJNJIJMLJJJNNNLPPPNPOOPSROOQRQTRSRRQRRTSUWSRSTRTUURRRRQRSQOOOTPQQRPPPRTSRVVY]]Z[_acaaegdcikjjnnnllmrnoqpmlopnjmommnlljkhkhhhhddegdebaaab]]]^\^^b`]]^Z\[^[][\\[[^_]_ba`dddeecebgfheihmkmlkkponpponssqpprrqrttpostpqttmpqpnponlpoqlonkmpnonoonoqnimoliiihgiiicdhhfecdcdddbececdeedddfhjgefjfgghgghhghjigijjfdhlffikfhijhkkliijkhikljkjkjjkikmkmknmllomnklkllkijjnljijjjkigkkkjkkilljjjilkkjkhkjkjjfjmmffiigijhehhjffhiegieeffgfefagdfddcdcfbccea`bb_`c^\`a]\`a^^^_]\^\\__[]]_]]Y\[Z\\XXXYWXWWWWUUVTUWXSTVWTSUTSSTPOOLJORPNMFA<2.:>A@?;6/&$$'+)"('((('(1>BE@A@@@><==;99840.-**)))$#5:+)++++,+*,,++,/Ajlijfchijhpy��qnolklmljjjkklnoldUB7467;_y�s8,0..--,--,,+,.-*,,,+')02///002436789:::=>@ABCEGIKKKLMD*
+			'25:AEHLORTUUUSSUVTW\]Z[x��a>989=?BFHNMQSVY]abaeehkpu�����X=><?>>@>>@;0�����������������������������������������������������������������������������������������������.
+
+	
+		
+
+	
+	
+
+
+
+
+
+
+
+
+
+
+
+
+
+	
+S����������~}~}}~|||}~pZD@9&'-101020)),0&"!!"#&((,0424457;<@DEBCFGFFGHIHJLLGKONKLMNNNQQPQPNNORRRNOSRQRURQRSRPRQQOPONMMMNOOONMOQOMMPQQQQRROJKKMJKIIGIMLIIJJGHJIIGNFGGIJGJKKJKKLKLKNKJKLMLLKNLLKMPNNPNNPPOOPQQNPPPQRNPPQSQSQPPPQSQPORRPOQONMNPRNOMPOLOQPNPTWVZZYY]aa^`bcbcfhiimkkknopmponmlklnmonmooklklhihhfedfeddd_^_`_\\\\[]_]YZYXWYY\[[XZWY[]^_``^aecedecdfiegflgjkmkjkonlnpnoppnoqqoprrqpqrrqsqqpqonnqlknrnnlnkikmlljlmlnnkhjihffhghjghdedecebdffdccdabddaabeadhg`bficeeeedeedfihfihhdbejeghhfiighjgkhiiihkikjjhjkkjjknklkniimnjlmmihkjghhjjigjjjhggjjkkijklkijkjhikjjilghiidhjjccghfiggfhijhgfgfefghgddfecdfcccd^aab_`ca^^aa__a_\]`][_^]^]^`^]Z]]\[a[]\\VW\\XXWXVXXWTUWUTTURPRTRUWWTTTTTQPPRNJKPNNQMEA<3/9=?><84,($"%,*"'&'''&(3?=>A>:=>:8::86430/.,+(()+&.8,**,,,++++++,3Dijkifdefgjow���pjpmlmnmjkiijlnlidR@77:=BM_x�~A-../.,,+**,,+*)*,**$)/10/0023448779:;==>AABCDGKLLLLJF.
+		%06;?EIKPRSTTTSSTVWWV^aVo��h?<::;ADILNPRUWV\aacdhimns|����gA8=?>@@A@@<�����������������������������������������������������������������������������������������������5
+
+
+		
+		
+
+
+
+
+
+
+
+
+
+
+
+
+
+	
+
+
+
+
+O�����������||~||{|}|o[EA:&&-/./140*'&$"$&()+/026889==@B@ACECCFGHGHLILIMONKOMNNORPPRSNOSUQOPUVQQRSPQRSRPSRPOQNNNNNNMMNLKLNLLKPQOPTQPOLKKLJKHIIKMKJIJHGIJJJJMHHGIIHJIJKKKLKLNLKKKLLKLLJIKKMMMMNMORONOOOOONNPPQQOOSSQQPPPPQSPOSTPQQUMNNNOPLOMJLLOQPOQTWUXYYY^_^_bbbcgihhjmgjmnnnmqnmnnkjlnommnmikhiffggccegcbb`^]^\]ZZ[[Z\[[[]XVY\YYZ]ZZZ[[]]`a__dedddc_dfgcfgjfcjlilmmmkmnoromnqspnqrpppqonqsnoqplmmpkjlolmlnjkkmmplkjlmknljgghihghhceeecccbabcb___b``bb^`ad`ceebccdbfedddegehgeehfddfceghfhhjhghjghiigehiehjkfhjigghkikhjhgmmhjjhfhiggffijgfikigikjgjkhghjkhgiiggifhgiehhhghigdhfghicfggfghfcdebaeiebcfbbadbbad^aab_aaa`a__^]``^[]__]]^][]]\YZ_\Z[_[[Z[WVZ[VVWWVWWWVTUVVSTTRQTTTTTVUQSSNLMOMJJLJLNKD@>869;=>=94+'&$"))!%&&%&&%1=<<><9<;89:8665311--,)(*,'$2;,))+,--,,-028Lllkkhgcfhipx���oksnnomljkjjklmllfQ@:<ADGNR\t�~I)+.-+,+)**,*))))))!&01201124577899;=>>BCCEEFJKKKLLLJ5
+		
+ 378>FIMNQRSUVSTUXXX[]]Vi��wL<<9>BGILORSSTZ]bdeiijmorz����zI>?@A@@ABA�����������������������������������������������������������������������������������������������:
+
+	
+
+
+	
+
+
+
+
+
+
+
+
+
+
+
+
+		
+	
+			
+
+
+
+	
+
+
+M�����������}|}}}z{|~|qVDA;%&-/.001.)&$ #%)+,./47:;;>?B@A?DDADGFHGJOKLJOMLMSOMNNPOOQRNOQSNORPRPQQQMQROOQQPKMQNNIRPMLNOLLLMKLKPONOSQOMNKIJJKIJIJJHIGFFHJJIIIIGGGGHIIGFGHIJJKJIJKIIILLIHJJJJKNMLLQPLMOQNNPOMOOQPPQSQOPOPNOQNOOPOOOPULLNNONLNLMLMOONPSSTUZX[Z`_^_baaeggchijdklmllknjkljjjjkkjklkghfgeedebeeebb`^\]^[ZZZXYWZXZZ\WUY[VWZ\WWYYZYX\_[]abacb`_fdecffhhfhignlkjmklqrnlnrpnmpqnppollopnppnnononmkmlnklmnlknpkjkplijkjefghdddd`ca`_a``^_]^]]]`^`aa_`abababcbbbdfcadceffhebefb_cgbcdhghggeefgdfhhedggfhhhehgfgighikghhihjijgegjeffhdghddhiffiifcgfefhhffghihhighfffhgfjigeeiffgeaeedcddcbbbadfgdcddbe^accabaa_ba_]`cb\]][\^^YZ\_\\]\Y[Z[W[\ZZZZ[[YYXWWVUVUVWUSTXURUURRTTOPRRPSSQOOPLMKMLIHHIKIGB@<89:;==<60)%%# #!!$$$$$#$0<>>=<;;9998657531/*++))(*(("0;.)+++-./135:Qgkjkgfffgip{���qjpnlmmlkljiklllldQ@<=@DGJKNZn��Q,+,*,*)+)***)(('%!
+'/21012355589;;>>@AABEEGGJKNONMNH7
+	 279>EJKMPSTUUUSTXWWU\^Wb��W9=>?CFHNQRRVZ]_bdfgikmnry�����]>?B??@B@�����������������������������������������������������������������������������������������������=
+	
+
+	
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+			
+I�����������~�~}||}zy|}|pWF@=%$,..211.*'" !%))+.347=?>>AB@CBFCCEGFFHJLJLKLKLNQOOOMOQSRPPRPPNSWQQSSQQQSRQORSOMORPONRNMMMMLMMMLNMMMMNOOMMMJIHHHGIFGGGGEEFHHHHGEGEFHIHIIFEGIHKJKIIHIHFILKGJLJHILMMLMNMLNPQLONNMNNPNORPMMNNPPNNNNOPMMNOPKMOMNMLNLJJLLMMNPORSWUYW\[[\_^acccbggihjikiljkimlijkjijkljjghgfddbcchedbb]\]]\YYZYXXYYWYYYVVWXUWYYVWXWVXZ]\[^`abc`^bfdbaedfhggfgiighkikoolklpqppoppqpplnponrmmnonnnliklnijkmijlnghkmihijhggedccbaab`_``_a^`\\\_\\]_]]^`_aa`_`cb_`bc`addbccdaadebcdfacbecfeecdecdhffdceefhgffhgfijhhkidfikhijiedgiffehehigehighhfdghgfhfefhhfgigefgcddeccggccdgddfbcedcdebcdc`cfdcbc_abfb_a`^bbbaba^[^a^\\]Z\^^[\]]\\ZZZZYZYZWXZYXZ[VXYVSTWVRTUSOTVSPSRPQRRNOQORUQOOMLMRLLKIGHIJEC@><::;;<==6-&"!"  $##"##&/;>=:8;76676567221.*)*((')((#3;2,,,,-0126=UgkiiY]degipz��rinnmnnljlkhikljjeR<99=@CEFIORl��Y))*)'())*((&&'&%
+
+#0223434567:<===?@ABCFGFGJMPPOOMI>%
+		059>DILORSUXXWSTVXVW[`bbw��ZA??BDGKNPRTW[\_aadfgklnrw}����i@@CA@A@�����������������������������������������������������������������������������������������������D
+	
+
+		
+
+
+	
+
+
+
+
+	
+
+
+
+
+
+
+
+
+
+
+
+
+
+F����������~{~|}}||{y{}~q\F@='%,.-001.+&!!#$&)-136;@?=>?@?ABC@CDDFGGGHKKIKMOOMNPPMQRTSPOQPOPTVPPTTQQSUPOQTROOQOLMNOJIJJLHLJMKMJLJLMOOOMKIHHGFEGDEFHHHIIHGHGFDFGEGGGGGEFIHHIFHGHGFGFIIGFHKIGJIJIIKJLKONLKMMNNMNNNOONNNLMMMNONNOOONNMLLMNJGKKMJIKIJKKMNNSTTRVUWXY\\]`b`adffiiihjlmjihmkihkihikjffggabcdbcccaa`_\]\ZXWZXVUXWWWXWUTUUVVXWWUVVUTXZYZ\]\\``]_ac_```_eghdgfiikjkklomlmmmpqplpqonnmnnlnpikmnkmlghljjkkkkhjklgjjhhjhhgigdbcb_`c`]^`_\][\ZZX]Z\]\W^]__^^]^```^``__baa`bbc``baccabdecccecdfecbdgccfeceedbeefceegceghcjffhhgggeefhffffgedeefdddceffddeabddcbcecdbdaccfccfebcbcdbbadc`bebacc_^cd`abb^a_b_^^a^ab__`^^_^]^_[[[]ZY[]Z[\[WWYXXZ[XTWZWXWZWXXTRTSSQTSVNQRQRRQPOONOOMNQOOMNKHMPIIHGGIJGDC=;::;::<=:4+" $$"#"#$/9=<9786755564311.-,)('('''($!1=5-.--.0149ThjfdPTcffgpz��}piqomllkjkkjjjkkibP;66:>?BEHJLRc��_+'))((()&&''%%%
+
+
+
+ -14555557:;;=>@@CDDDFFGHLNQQRSQRH$
+	+6;>DILPRTWUXVTVWYXY]ddZp��gG?BDDILPSVVX[\_`begihlosv����tG@CACC�����������������������������������������������������������������������������������������������N
+	
+
+	
+
+
+
+		
+
+
+	
+
+
+	
+
+	
+
+
+
+
+
+
+
+
+
+
+=����������|~}|}}{y{}}}rZEA<'#-/.002/+'!!#$&*+05=<<;<?>>@DCBCDEEHIHGIJKGMNQOLOPNJRRPMMPQONPRPNOQROQPOMORQNMPOMKOLKIMLJKLLJKLNJLNPOOPOOLJIKHFGHEDFHFGEEFDFFGDEFDDEFFFFGGGHIHHGFFFHIHFEHIHJKNJHGJJIKLMMLMMKMOMMNNOMLMMLNLLNNNMMKMNMLNKLMKGNMJJJKJKLLKPQSTQRUWZY[[\]_^_adecfghfijiihiigihihfgiheeed`ccdbdba`ba^]b]VW[YUSZXTUXXQSSTRUUWTUSTUSUWUVY\[\]_^_``a_bbbaeefgihgjljhjlkilmlkoqpkopolnmmlnpqlnnnkmkkkmjkkmkjhihggkiiklheefdbababab]]^]\[[YWX\V\ZZZ[[_[^^^\^]`^]_b_]^`]aa_^___^^ab^baba`cc_abb`acc`acdeedcbededfddfgehgiddhieegecdfcabccaabbccccbdefcbccecccfcdcdaaadbbdcdaab]acbabc``bbbab`^abbadb``c\^^]\_`a^^`]Z_a]Z]]ZYZ]YYZZUYZYXWXVYY[YZYXX[VXV[TRUULSSSQUQONNQOPNMKOOMJLNLMOMKJLKHGGFFGGEFD?<;9887:;82+! #"###!"0>9;:87773234321/,,+&'&&%%'&%"1E=,---/15;UljeZNUdgffqz��zpknllmmlkjjkmllkh`I8568<>@CEEGLPd{�i-)*&%')'%&'$##	
+
+
+
+
+ -3476557:::=?@@BCDDEFFIKLORUVUSPK,
+
++7;@DINQRUTUVTXWXXYZ[ae^k��rM@CBFJNQVVV[Y\`bdghiknosw}�����ZFBEF�����������������������������������������������������������������������������������������������V
+
+	
+	
+
+
+	
+
+
+	
+		
+
+
+
+
+
+
+
+
+	
+	
+
+;����������~}{{{z{{y|yq\HB<'$-.,012/+'"!%$'),39><<<>@?@BCBCEFEEGHGGJKLIKLKKMONLIQPNMPNPPQPOPPQQQOTPPQQSPMOQOLLOJLJQNKJLMKNLKIKMNNMNNKJJIGGEEEDEDDCCDDCDGEDDEFDDGFFGFHGHIJIHHHFEGIHGHHHHJJIHGIKHKKKKKKJKJKKKJLMMJLLKJMKKKKMMJIKMLLKJIJIJLKHKKIIJLMMPPPPSRTUXWZY[\]]^abb_fddfiigfhefgiggghhgihfddccb`aa`^_a^Z[]VSVXTRQTSRRSTOSQSPSTTPSSSSUUVVY[Z[^_]_``]__``abdaeggffiifijjhiklijnmonoomlonmnpmnnollllghjlhhjieffhgeegghihegfbbcb`aa`][^_]Y[\WUXZVYXXWX[[XX[[[[[]ZZ^_][]\Y]][[__^]^`b^`]^__a``cacaaa``bacgdaaccaccc^aed`eec`becbacc`bbba``bddabeacbcbdddcbada_acbabb]]`b^^aa__``]_`bcba`a_`bb`^^`^__a^^``[[]^[[]^[]^ZY[]YYZZZYX[YYVXVXWXYYVVXXWZZUTYZVUVVQQSQMPPPPQMMLKNNNKKKMKJJKLLMMLKLHGFGFEDCADCA?<976789:60+#  ! "$#$%$$$4;8::77863101/.--,,+'(''%##%#!!-ED.,./049Vlji]2Ogifgpy��xollklkklkjikllkih]F61347:>@ACDGKP]s�e1%&%&'$#%$$#"
+	
+
+
+
+,3766679::;>A@@CEFEEFHIJLOSUUSSPI)	
+*7=@EJLQTTVXXZXWW[Z[\^f_f��uOGBEFJNSUVXZ\^adfiiklmqtw�����dEHF�����������������������������������������������������������������������������������������������^
+
+
+	
+
+	
+
+
+
+
+
+	
+	
+
+
+
+
+
+
+
+
+
+
+	
+
+
+
+
+
+:�����������}~~|{|z|zz|{q_KC>($+.-1142,'#!%#%+07;::;>>=<?@@?@CCDFFFFHLJJJMLMMONLMPRMNQOPPSUQQSQQPQORPPQTRJMQQMNMNJLPQKHJKKJOJJIMMMNNNMKIJHHHGEEEGEEEEFCCGGEEDDEEEFFDEFHFFHHFGGGEFIHFEFGFDHHGGGGHEIIHJKMLLLMIJKLLMMNNLKMMJKMNLKKLLLJJKKHHKJHIJKFGJLLLOLMPVSUUXWY[]\Z]^_`cde`bcgfdfieeggeeggedeeebbab^_^^^^\]\ZYZVUVSQRRQQRSPQRURRRUTSSUSQRTSSTUUUWZ[\`_]\]]_`aabcgecehhefijijmkkjmninnnmlmmkknnllmljkkjeihjgkihfggjhgghhigfffd`bba^`_]Z\[ZXWXWSUUVVUUUSVYXWXYYXYY[YZ\\[\[[Z]\]_`]]]```_b\]_`^_ad`ab`_^`c`_`a_`_`_c`b_abbcdddbcedcc_bbc``a`_`b_]`c`_ab_^ab`_`b\[_a^a_ca```\b`a``]``a_aba]^`^]_^\]Z]]]]a__]^[[]^\]\\]^[\]Z[Y\YXYZVWXVTTVWUUXUSUVUSUWTSVWUTUSRRRPNOOPOPMNNKMNLHJKJHHIIIKLGGHHECEFCAA@ACDC?:887799741-+,*--((%'+-//5611AGFHHA?>9320110.-.,*()'(&" %%" &!,MK,.101:Zkjib5Bfgffnz��xljklmlkjjkjjklkig\H301236:=?@ADGJN\u�s<$&$%$#"##!	
+
+		-46668::;<?BABDEFEFGGHJKNRTRUWVVP/		(7<@DINRUVXWZYXXZZ\\Z`c_az�zWEGGJKOSTW[]]_bfhikllmprx~�����wMH�����������������������������������������������������������������������������������������������i		
+
+	
+
+
+
+
+	
+	
+	
+
+	
+	
+
+	
+
+	
+
+
+
+
+
+
+
+
+
+
+
+7����|������~||{|{{zz}zs`JC=(#*+.2140,'# !"%+248:899>=:<@@@@BCCEGGFGJNIJLNLMOQQLNOOLQPNLKRPMNTOONPPQNMMPLJONMNMKMLMMGHIJHKJLIKLOPOMNMKIIGEFEFCCEFFECCCACGEACABCEDDDEFDEFEEEEEEFCEDDCEFEDEIFGGHGHFLJGKKKIKJKHJJJJLNLKKLMKKJJIIIJHIIHJHIIHHJIKIHGIHIJLNLMMPNRUWUUVZZY\\]\``^^a_dbbadacdeecefdcdacaa_^\`_]]][[ZZXXTUXRPSQPSSRPOQSLOPPPQOOOQRQPQRTTUVUZ^_\Z]^Z^`b_abgebdgecfifeikhgjkjglmmkkkkjkkjkljijjikkkfhglhfghffdgdcegdcbda]`_][]ZZY[XWUVVSRTTTTSTSSVWVVXVWWXVVXXWYZZXZZ]Y[]\Y\[]\][\ZZ]^[[\_\]\][_____^``b]`aa_aab`acc``ab``c`[^^\\]_\Y^_]]___^`_\_____`a^_`aaf^_``]_``^^`_[[^]Z]_^[[^]]]^\\Z\__\^]^[^]\ZZ[]XY]YWXYXXXZWVYXUVUSSTTVVVVTUWUUUUVUUTUVVRQSQOMNMNOMKJKLLLKHFJJIHHGGGHJHGHGC@DEDB@@ADEMT>66767742,*0GGIMKJIGDDINQTLFUWUVONKHD6//1..---+''(&'# "$" ',$&IG-10/<ZkhgcTRdffhp|��wlilkkkkiikkjiljjk\F4//0247:>>>BDFKOXo�l<&$%##!"" 		(4677899;<>AABDCEGFGIJJLNRSVVWXVP6!		
+&6<@DJOSTXXYZXXXY\][[`eaWs��`KHIJMQSW[]^_acdgjllnqrux{������P�����������������������������������������������������������������������������������������������m
+	
+
+
+
+	
+
+
+	
+
+
+		
+
+
+
+
+
+
+
+
+		
+
+
+
+
+	
+
+
+
+
+
+1����{�����}�|}|{z{|{s`KC=(%,,.203..*$ &.47:<;9;<==>BCABDFDBDEFEHIKGILKJLPMMMOMOOQPQNPRPNRUPOSRPQQPQSPOSONQOMKJNKHKIIHKKIHJJLJKNMLNLHFEGDDACCFDEEACEGDDCDCBBFDDDDEDEFGD@EGDEEEECDDEEEGIFEGIGFEHHFIKJGHGHFHHHGGKJIIKJHHIHGHJHGGIIIGGIHGIIIHGGGFGHKKNNNNNQTTTTXYXZ\[\]`]^_b`bbibgbecbcbccdcb`b`a\]]][[[][YX[VUSTSQPQMNRQMNPPOLNPPPPNMOPPOPSRQSVVUXZ\WY\\Z[^`\`acbabc`beeedghhijiilljmljiikkiginhjllhhjicddjgeefcfefbdddbebba``^]__YXZYWVVVSRRRPRRSRSRTTTTUUTUURTWUTVVVTXZZX[[XW\Z\\_ZZ]\[[[\[^^a]]_b]]b`\]`^\^_^]\__]^a_\^_^^_a_Z[YZ[Y\[\^]]^^]\_]\^_]^_`^^]][\^bX[\\[^^][\^\XZ\\]^[[\Z[\^][\[Y[\\X[]][ZZ\XYZ[UXZXTVWXWTWVWVUUVRQVWTWWUTUUTSUURQSUPTURNNQPLKMMNLKIHJJKLJHHHGHGFDGIHHIIKOKAAAA>=@@=@V�nB555432/,+1?GKLNOKMJGJPRUUNQ_\ZUSPNJB300,---+)(&'%&$!  $+-&%DE0/2<_ohdcaddefjq}��tkhkkjjlkhkkjklkih[D3.../258::<AACEJOVhyj<&$###"!  		%!
+		
+(477:<9:<=@AABCFGFGHJJLOQRTWXYWSO<!		
+%9<>DJLRVXXYYYYYY\[[\`ab[m��kNHLLMPUX[]`aacglnmopqqsv}�����������������������������������������������������������������������������������������������������o						
+
+	
+
+
+
+
+
+
+			
+
+
+
+	
+
+
+
+
+
+
+
+
+
+	
+
+
+&����~����~�z~{zz{{{{{t`K@<'$,-.200/0*%#.368:999<;:;>A@?DECABEEEFHIHGNMJKMOLMOPMPPTONNTTORTTPPRQPPRPMOQPMJMOMLLKKHGIGHGKJIFIFIMMLKJMLIEDGEDCCBBCEDAEEECDDFEBCEDCCDCCDDGBAEFCEFEBBDEDFEFGCEEEDEHEDGIGHIHCGFIJLHJHJJHIHHIIFFHIGFHHHIIGHFFGHFEGFFGIIJKMONNOSTSTWVWVWWX\^\Z\^_^adf`baf`_`b`^^`^\]]\Z\Z[YYYZYXYXTSPPQPOPNPPONOPMMNONNNOMNOQMLPRPORTSRUWXWWXYU[Z]Zbaa_cba_dccfhigilifiligkljijkigggigigeceedaeeffeedcfdccfdbbeb_``]Z[_\YY[YUTSRMMNNOQPOQQQRQTSTUUTRSUVVVXWVWYYWY\WWY[WX\\YX\[XY[\Z[^\Y\]]YZ^\Z[\ZY\[]]]]]]\]_]\[\^^]\Z\Z[\ZV[ZZX[\YYYZXZ\[VY]][[[XYYY]Z\[Y[][[^]XZY[XX\\XX[YWX[ZZ\YWYZ[[][[]][\[ZXYXXX\ZVUV[UVUYUSTTROTURTTPORQRRSSQPQUSSSQOMMLJILNNKJIIGKLKGFHFDDGDCEFEDEGLgrM>=????>=HX��q?43231-++0=@INMPMLNKGLQUXSMU]ZYWTQNL=20,,-+)('$%$#""',)'&AK238angddddegggr}��shhnnflllgjjjjjjii_A2-,+-/3579<>?BDEGLP\m`F$"""!  
+)*)
+	
+%49<;::;=?@AABEFFHIKKLORTRWWYYWWRB"
+	5=@AFMQVXYXXXYVX\^\]\ae\t��yQHKMORUYY^abeehlmopqpqtx{����������������������������������������������������������������������������������������������������{
+						
+			
+
+
+
+
+
+	
+
+
+
+			
+
+		
+
+
+
+
+*����~������||||{yz|z~||wcLA<*!*+,1000.*$%0458;977:<9:=?@>@CDBADDCDFIGIKOHKLMLLNRPNPPOLLNNMNQPNMQMONOOMJPPMKKNIKHKLLHIKFHHHJIHIGJKLMMLLKEEDDBCCAABCCBBCCAADECBAA@BBBBAAECB@ACCBDCBADDDEEEDEFDEEEFEFEJHFGHHDEGHIIJIGHHIJGGGGGFFGGFGEFGGEDFGGGDEGEEGHHGIKKKMOONORTTVWWXUZZZ[]\]]aa_\a`a_[_]_]^^`^`]ZY\ZZYZXWWWVUSVSQRROKLPNLLMMLMMMKLNNLNMNLLOPPQSSTUXWWXWWZZ`[[]ba__a``bf_adhfdghdefhhgjihjjjieggjihfccfedbeedehdcbc___b`aaa`]\][XY[WVWWUSTQONPMNNNONPQQPQRRSTTRRRUSSUVTTUTSUWXUWVVRUWWUUWWTY[YYZZWWYZ[Z][Y[]Y[[][^^^]]]ZY]][Z]^]YYY[XY\WRYZWWZ\VXXZY\ZXXZ[YYYWVZ^ZZZZWYZZUZXXUZYYWX[ZWXZYWXXYZWUUWY`^a^_a_`a`ZVVXWUXWTRUWUPQTSSTSTQQRTQOORPORSPQSSPRSSOOPMIKMKJLLJHIHEIJHDCDDCBDCBADCCBFQ{�xM>=?@><@JY���n>111/-)(/;>@PSPONPPLKMQTWTPW\[YVTSPJ5/+-+)(&'%&%$"#)*+)#GR39aiggedegffiq|�nbhmnjlmkjiikjkije[@1++++-01479;<>@CDDDGQbcI($
+#-./- 
+		
+'6:<:;;>?>@ABEFFJJLMLNQSSTRVZYWUQC!4>AEJNSWXXYYXX[\]]]]^bheo���[HMNNRW[]acfegjkkmnnprvwz���������������������������������������������������������������������������������������������������
+				
+
+
+
+				
+
+
+
+
+
+
+
+
+
+	
+
+	
+
+
+		
+
+	
+
+
+&����~��~�~�{||~zyy{{|}{udKA:*!**-1//.,)&%26448:77:;<9<>@>@BAACDDBADFEFIJKIJMLLLMLLLQMLOOOPOQROMNTNNMONONRMLNNPKLJNIHLMLIHHHFIGIIIJKHJKMIBDCCACCB@@CA@BD@>@CAAAB??@BBAABDBB@CDEDDBBCECCCECBCDCCCCEDCDDEEFFFEFFDDGGHFGJHDDGEEFFDECDFDCGFEEDEEDCFCBDFDEGJKIIKLNLMQQQTVVUWZWX[\[[]`]]\`^_\^\[]^][]^^\YYYWXYWUUXWSPQPNMOQKFHJLKKKJKNLKLMMLJLNJKMOPOQRRPUVVVVUTXYZWXZ][\\^\_aa_cdfedgfcfghhiifhjifehgfffhcacfccbeaabd``_b^b^``a^^__\Z[\ZVUUURRRRNNQPKKKMMKKKKNLKNPQQNOPPQSSRRRRSTTUUTUUTRSSVVVUUVYUWYYWVXXXZY\ZZZYSWYYW[\ZXY[YW[ZZYYXWVWWZXVYWVYZXYWWUWWVWYVWXZXWWVTTUWTVXYUXXWUYXVVXXXXXYYYVWVVTVVVSTONU\]c``a`bbdZRPUUUVVSTVUVRPTTSQQRPMOSQNOSPOPQMNPPLPQOIJKLGKMIGGKIIIHGHGFDECDGGCDB?@AA@DT���u?7:;<;BN`t���f;00/+&'*39=BQRPPPPQLIMQUWQQ[ZWWVTSPC1+,*('&&%&$#!"&+-*!$F\Pciggdefghhiq}�}rhhligkliikihhjhhfX?1-,+,,.02668:;>@@@??BI]aL( '.1/--$
+	
+!2:;;:=>=>ABDEFIJLMNNOPQQQTWXWVVPD&
+
+3>@DEPSUWZZYY[ZZY\^__fjei���]GKOQSV[^_cefhhkklnopqptw|��������������������������������������������������������������������������������������������������%
+			
+		
+	
+
+			
+
+
+	
+
+	
+	
+
+
+
+
+
+
+
+
+
+
+
+$��������|||zyz||{{zydL@>,$)),./0-,)'!'057456669<99:=>=<?@??@CCAAGFFFKJKJMMMMMPMMPQLMNQNNOSVMMPSKLMMMNMRNKMOPJKIKFFHKGHHEFEIGGFJKHJLMKJHFCCDGDA@BEBBCC@BCC@@BAAAADD@CBA>ABDABCC@ABCAADB?@AACCDCDCDDDEGGFDEFEFGFGGGIJHFHHFFFFDEEEFFEGFEDDEDDEGCBEGEFGHIIJLKOMPPOQTTROXXUVY[YY[^YZ[^[[Z[]]\[VY^[YWWVWSWWVRQSSRQOOMNPQLJKKIILKJLMJJKLKJHMLGJJJKJONNOQPSUSUVWWVVYW[[^[`\``aadcbcffbcefcehfdfggddffddefbbbc`abc`aaa`b^_bd`_`a]]]_\WW[XUVVSPPOMNNNLJKJJIJJJKLLMMOONNONNOQOQSRNRUUSRRSUTSVTTUUSVVUTTVVTUWXUXYXVWXWWXWWXZ[[ZZZYZ]XXYXWW[WVXVSUVVTVVURRRTSSUUQQTVUWTTVWTVUVWVVVVVWXTVVVTVWUVWVTVUUQTUUUVKFR^^acababdggXTRTWURTVRRUSNOQPMMNNLMOPPNOOOOOONMMLOMKJKHIKLJKJFIKKIJJLIKLJFGKLKJF>=@A=@Mq���p=7::>BKWeu���c60.*&$$+4;AEPONOOPOKJNRWVPR[ZXYUTPN=,)('&%%&%"! %,.-'?fhjhgdedgghhr}�~rifkjijkjikjihkjheZ@1.-,,-.01455:<<;<:;;=BIYbN,"!  "'**-//,)#
+
+ 289:;=>?BBCCFILLLMOOOQPRSTUWWURPJ*
+	.BCDHOSVXYZ[\[\Z\_aabfkli���kLNQQRW[]`begfhjlnnnnpsvw{�
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/cudaSift/inputData/righ.pgm b/third-party-programs/Velocity-Bench/cudaSift/inputData/righ.pgm
new file mode 100644
index 000000000..1b51fc73b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/cudaSift/inputData/righ.pgm
@@ -0,0 +1,1278 @@
+P5
+1280 960
+255
+Pemj\`]QJGF<76565324@Wx������dabSEGVaZ99?IOC;9:=BJSUTB5244;EU[ULJRNH7/8BC=8:5554G\ipx|seYK_x������c@VG;88::CQh������y[GffYchbUKM^rokeekofdlw�����vM;OTd����tteZ]agkh^pWFDCDGEKb_ROLIKIERdehkjkmlfaX_jqi`Ol������������������������������������������������������������������������������������������������������������������������������������������}hs��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������u��������xjehjj_-:brvx{~�������������������������������`6FECCA@AIXTMMkw�����������������������������������l9IE=82/?qbRKJC<;?89;;AUK73441}���������������������������������W0$!0u������������ub!?a}����mR7!(Jm�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Rilj`e^QLHN=986553/4?X}������decRDHUfT87?KPB:8:=DKQWW@5445=HW\VPV`]V5/6>?;798479H]jtz}tfXKdx������_HRD;99;<BPe������uRMdf]cjeZLIboqmmmllfblw�����yL:Q[k���wmtb\_chlfdo[EABEFFJb^QNJIMKESjhjnnnroga]gy�xn\n������������������������������������������������������������������������������������������������������������������������������������������q_q������p_~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������yojmnn[)?hty{~��������������������������������T>FEDCBBEGRYat�������������������������������������g>KD>821S�t{k[jPS]RF9==541153.x�������������������������������l1"Ln����������yn;
+
+
+
+;a}����nT22Y}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Uhoj^b^SOHN>67543005A[�����hgeSCGR\Q26>ORB=<:=ELOVR@5457?HU\UQ[eea=17@@;:68678J]js}~teZTdy������\GOD;99;>EUk������wMIgd[dkgZLFfopmlnppf^kx����uK;S\f���}psf^bflnffoVABADGFLfaQMKKMJDWgdfghhong`[l���{\n������������������������������������������������������������������������������������������������������������������������������������������p^q|�����nVz�����~�����������������������������������������������}oty}���������������������������������������������������������������������������������������������������������������������{ognpq[.?itz}���������������������������������[?EDDCBH\gpzy��xu����������������������������������f>IE?82.Qkr}Lp~jw~a_BXI44134.*fzx}}~������������������������o@
+		
+
+
+
+Du�������}�iP
+
+
+
+	
+
+4]x���{gP.An����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Whnhae[WRQP<56764223C_������~imdP?CR[O78?NNA<<:=BKSWU@5458?JV]XQU]_Y;39AA=;:6879Nais}teXRfz������XHSC;===?EQm������xNNgb[flgZNLhsomhhnmd_py�����xL=W\p����ur`[clotfgkRFDCEFFQg]QNMKLIGXa`acbflmfaZu���}ay������������������������������������������������������������������������������������������������������������������������������������������m`nu�����sPntztuwsmy����������������������������������������������dIGR[]eh{�����������������������������������������������������������������������������������������������������������������xpnqpsZ0=mu}���������������������������������R@GDCDG\|���tl]Su����������������������������������c BOF?;32WFeuJxrolv_hVpS22342*)]qqqpruxz������������������sA
+
+	
+	
+
+!NVg{{y�unb@ 
+	
+
+
+
+
+	
+
+6`{���xcD)3_�������������������������������������������������������������¿������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ykphac_WSNL>79972325>^������}gofOAESaQ7:ATPB<;<?EMUWR>5577=JW\UNRUNE53;A?;<85558Sals|{reWSe}������WIVC>DFB@FSl������xPNhc]fmfULMgqngfjmjdaqx�����uF:W\r����vsc\eknofehQGDFFIFRl^PONLKHJ]b_aaaeikfa^v���w]z���������������������������������������������������������������������������������������������������������������������������������������y��j_ls�����mMl_VT\gikv�������az~������~�����������������������������bMKOPMNRa�����������������������������������������������������������������������������������������������������������������xroqouX-Gqx}���������������������������������Q@GEEFGm�w^HMhy�����������������������������������\FOGD:20<.K\STQdb\cfZmZ93122-*P^`^`ehjnty}�������������}oQ$
+			
+?SWf[LH%
+
+
+
+
+	
+
+
+7[v���uX7)Mm�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������_krk`a_VSQH=8::73323Ac�������kndODHT_N99BPL=:<>AFNTXP=5457?IWZRPYZRH52<C@:::946;Pcks{zqcYShz������LNTGENTG@IWo������wOOpcajqdUKJfqnfdlokdcry�����sD>T_p����rse^hmppagjTIKIHHCRp]PMNOLHH_e_bccgkjd`^w�~xoa����������������������������������������������������������������������������������������������������������������������������������������w��mamv�����iUjUPNHT`jr�����ƑLpploqqqnrx~������������������������{��`NJLHFLP]�����������������������������������������������������������������������������������������������������������������zrosooS0Ipx~����������������������������������QAFGEFJgl]TRcn~������������������������������������["GLIN;20...,4.9CTYmb^d^63221)%GPQNQWZ]bintvxw{�����}yjY8
+
+$3
+	
+
+	2[{��xaJ(6c|�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������`mrjab]VQQI>8;863314Ce������lnbNBGW[K79ANL@;;;>FNTUP?4448AJX[STcloT33?D=:8::55>Pemu}yp`UNg~������KNQIKXaJCJYr������rKTndaineYNLjtmgfnpldcuz�����n=AU\{���}qvfakopnaikUNVNKJGTm\QOPOLHI[efhkimojb`Zjtrni^����������������������������������������������������������������������������������������������������������������������������������������}��hcmy�����_N^\[UOW^fl�����͎:g]OJLLWipt�����xz���~~���x�����|kt�dPJGFGKP_�����������������������������������������������������������������������������������������������������������������{spspoR1Jpz����������������������������������MEHFFGIRR\uy{q~������������������������������������T&DJOW:3-***)*+-/67>EKOB21211'!:CFGHKNRZ`florsuxzyxqnfYF.			
+
+4b���rZ2 0U}����������������������������������������������������������������������������������ÿ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������`lrl`^[RNMG<9;754304@f������yjocKBGX`M69BPLC::=>EOUXO=5357?LW]VWixvT84=D?=;:879<Relt|wk^RTj������ORTKN\\JBJXt������rETkcbhldVMOioligmpldbsz����m;@U]}����rtgiourqalkTSZTLKJVo[PNPNMHJ_ghknnrsja^\jwsojZ�������������������������������������������������������������������������������������������������������������������������������������������icmw�����cUcefbc^_bo�����ˋ9dVNJKLYjmu������st}��{tn_s{urt{���xif�dNJGFFJQ\�����������������������������������������������������������������������������������������������������������������xsquqsJ1Pt{����������������������������������HGIHGK`lu��xghW{����������������������������������P)GKQQ;4/+**()*,142646:521322)"7@BCCEHKPX`fkmqpqqome]SB% "$$'()+*+(**+,)(&"
+>q��u`="Kn���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������_rsha]WPNKE<;;555427Hj������ygm`HAI^eE5;CPLA9:;?HQVYN<5258@MY\TXemjN44<CA?>;79:=Tcltywj^UUk}������HSQIOdgPCKXx������rBZkbblmcWMRipkgdotnedtz����~n7CX^����~qqhkqtspcieUTYRKLIZnYPNOONJGbfcjilopjb]\elpoeY�������������������������������������������������������������������������������������������������������������������������������������������c_nw�����YWddkpwi`_r�����ˁ9fWLHIIXdgr������lqoi_[VODckljdkw{zp]^�fLMKIDGO_�����������������������������������������������������������������������������������������������������������������wtprqtM0Ss|�����������������������������������HGFDF]���x~mYGSd�����������������������������������H(KLQOH;1,**()+,0114221233443%#9BCCCBDGKR[agjjhjjgaZU<	
+%+.01125565234435676420/+))((%$!	
+.Z��~hJ&?d��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������aqpc__XRNJD9:;785027Mn������vcoaIEPbeF4<EQL?::<?HPUZM:5579BO[[ST`liJ44@FDFHD<88@Tcmuwtj_VVk�������BSPDTmlNCL[z������nFWi^blocULSjsnjhnsmfesy�����j9CX_����tspfgoprsalgRRZNJJD[pYPKNNLJKcb[efhilib]W]bjjWS�������������������������������������������������������������������������������������������������������������������������������������������d`py�����_Uafjv~pb]u�ʿ���z;iWLGHKXbbp�����zcmdREHHEDKYe_V^lpoiT^�^KHHGDHP_y�������ö�������������������������������������������������������������������������������������������������������zvrrrqM0St}�����������������������������������IHGEGh��n]YPXp|������������������������������������D*MQSQQG8.**)'(-/113233345983%%<CBB@BDGKQX_dffifcb^Q<	
+
+
+
+$*4489;::9788774566545864223//11020//+'&'%
+
+
+"Ln��oP0.Vx������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������dord_`ZOKID:::884424Mp������tik_EDN_eH7;HTJ@<;;?HQUYO<4769CP[[QTbi`D68CGHO_Z@99>Wglorng_TXl�������HTPFThfKDM`x������fB^k\bhlcTMRotohdnpjcbt|�����e7IYa����svucdjnsn`oeQT[QJLE_kWPNNNMLMdedfhfhljb\WYeslUX�������������������������������������������������������������������������������������������������������������������������������������������ebn{�����XZdq~��qc]t������yAjTKHHLV_et�����u`icTGLLB:>M_ZQX`fjnSVnRDABGFHN_v�������ĳ�������������������������������������������������������������������������������������������������������yusqpqF1Vx~����������������������������������CFEEGbtlKP`r���������������������������������������A,PQTV`VE0++)+--111352233:<92%#>FAA@CFGKPU[`ceeda]N4
+	
+				 #*3:<<>>?<;;::978664556645323232/01322302100/.)%!
+
+<k��uZ: Gk���������������������������������������������������������������¿������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������fopd_bXOIIC<;;77764:Oq������qgi[GAO\\E6=JTJ@98;@GNUZQ>576;CP^]QT]^WC64BGLZnd?::AWgnpsoe`QYp������|HUQHTfaIENc|������d@ai^\dfaULUpqnhirri_fw}�����b3HYa����xxs_bipsnbodPX]PJKKchVOONPPNPdfikhknrkb\Z_nuhSV�������������������������������������������������������������������������������������������������������������������������������������������gdm{�����O^kx��znb[t��¾��o@gSJFEKW]bq�����r]j`RPpaA69I_YRP_`hsGI_KACKSNIPaz�������Ư�������������������������������������������ž���������������������������������������������������������wvstqnF5Zz~����������������������������������|AFEDENRSiy���|o_�����������������������������������?2NSX\acWG7+++,/242231449@?90#'DQPIFEJJKPTX]abcb\N0	
+
+			 */39<>ABB??@<:;9:77875443443202233111011223000234/*(&"
+3]~�~dC#.[~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������foodbbWNJIF<;987855;Pu������qfiXGCQ`^=5<LUMA;9=AHOWWL;788>HU]\TYccYF45BHN]rd<9;@[hqv|{j^S[o������wHTNKXjdCDJa~�����e=ch\[dfaSNWprlghork`fv~����a5IXc����mwn]bhqsmdpeO[`MILLeeXQOLPRMSegjmlmqrja^[eoqfV_������������������������������������������������������������������������������������������������������������������������������������������~d^q������PamrwurmbWz�ʿ���kBeSJFFJX[^r�����n^i^Qa�Z@44FbXV^eXirGL\FBH[bUNQa|�������Ȩ�����������������������������������������ɿ�����������������������������������������������������������yvvsssmA4a}�����������������������������������zDGFCCFNx�ziYMO�����������������������������������97PTYacgj`J71-,/1232247<EF@:3"'DVRKLPOMNTW[^bdfaT9
+
+	
+					
+!(49<<??AA@>>===;:999:9744543220.22231110.02210013431//*)'''&'''%$"%V{��iK)$Ox����������������������������������������������������¿��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������iqubccXNLKC:;<97645;Su������niiXFDSfa:4<JSK?;9:BKQXWK<988@MY^[V`oncG15DHO^rb=89A]jrxyj\S[o������t>WOJZkfEELb������}f:cjX`dc]QJUtrmfimpjbiu}�����a4KXd����pvm_emprmamdNWZGJMMhgXQONQRKPfdfggjoqfb\\gqufUb�������������������������������������������������������������������������������������������������������������������������������������������dbr~�����Mdnosrpl_T}�̿���cE`SIGGJTV_q�����iXc\KOfUB0/H_XXnjZlvKRdOHShgXQO]x�������Ȥ�����������������������w5ICLaq������������������������������������������������������������������������xuttqrk?5ax~����������������������������������tCGFEEF[�~g[ZK\s~�����������������������������������56ORZdegttaQ@002434448@PSJB90"+=EHKNLNPU[b[duzaUC+" !
+ (269<??ACCA@>><;;:;;88766534330121200100010030///13312330..0/.--///0,))'!Ly��rS0"N}�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hsrfacVOLKA99;:87459Uy������mkjXEGVjb@8=KXI=9:<BKSWXH;768AOZ^ZWbsr\>.7EHP_r_<:8F^kpx}xj]N^q������n?TMHYngFFMc������|e>fk\bid\RMYtpibfpniciv~�����[4IXb����pvl\kvuslfr`RWWILKKhfUPPOOMHSgdccdhlnf`]\fx}nJ_�������������������������������������������������������������������������������������������������������������������������������������������dkw������Jchpxusk_T}�ɼ���[FcSIFFHRXbr�����i\c[PXrX@.)La[WifZmwK]iTPcid]WTa��������ˢ�����ح�������������k��r,*.26=HUcq}�������������������������������������������������������������������{utsqoh=;gx}����������������������������������p@GFFDFYhPXSg|��������������������������������������48MMXehforwhYQEBC=:79DW]WLC80 #9BFFHJKNSXYOl�zZbXAAEL<-'"" %+259<<?@?CB@??=::989987545434410220/0011/110010-./.10.0440002231.43233103/,)%!Dl��yY7 )]�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mqoc^_VPNM@9;;:8664=W~������kkkXEIXqh;8>NSG<:;>EJUVWI:459@LV\YXetnM608CJPbtW@<:Eakrz~xh[P_o������hCTLIVj^FEPf�������_@ji`foj`SLYrpigjuqi`kv�����Y1LXf����vykao|yujdq^QYVLKJKieTQQONKERdcceffjkea^]eyxeVg�������������������������������������������������������������������������������������������������������������������������������������������`l|������Ofmu��q^U��Ⱥ���YMdUJEHJRYat�����f\cZPVqW;),O`YUmoctzHkqUYiedc\Wc��������͚�����٭������������|@��p5311653.2698<=<EUn������������������������������������������������������������wttrrql8;ex}���������������������������������l>FEECDII[s{���wo�����������������������������������+8IHXkfbgjotsiaa^SNLS`eaZNB9.  :AGJIIKONL??fqNMMMJRd`NLJDA>;89;7/('$$**.25668::::997876788765522432110210-1011/21/..0-./000-/2212133332576454355321/,'!7h��|b>%=z������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mtpab_VPLM>89:78524>[�������kkfSEHWi\=7;MTE><=?EMTYWH:468?FR][YesmP:17EJQbmV?;<Bbmt{~|j\Q`r������cFSLIS]PAFQh�������_Fjh^fomcPM[qnkgnsojblv������U5O[k����xxjdqyuh_l[MVTLKJLfbQOONNJDWfaenmjnmd`[[b||aXj�������������������������������������������������������������������������������������������������������������������������������������������]p|������Mow����|^[��̺���UQhUJHGMTW`w�����c[dYNe~W8()OeXS}zr{yIyyNTfghll^e��������Ζ�����֪�������������-��i9<>@C@1&$(,3;<=6?Qw�°��������������������������������������������������������vrtsopg6<iy}���������������������������������g<GDCBBDIo��ym^OP�����������������������������������&=NIYwc\bdecrhgwykcotoc`ZK>8.":DILMLLMLH97XPJHDGIGIHHKLOQ\ZZ`[TSC4'%)(*-.,.02321100/0244774552211220/1--./101/0/00/0././.00./0/31210334565345676656640-+$.[|��d>5w������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mtndcbTMKI?:<:68636@_�������kidTDHWeU;6=PYF><:?GNRWXI;648@GR\XXcmlY>18FJQcqQ<;<Eblu}~vi]Rau������`BRJGR_VCGQi�������]Ckh`hpl\QN^sonmptqidnx�����}Q7Q[l����uvjhx�xrgdlYOXUKJKOgcTQNNNJEWilz�ypsrc\X\ixvaOj������������������������������������������������������������������������������������������������������������������������������������������]p|������Mtx����zY\��˾���RWeRJHIMRW^v�����_`gWOcvP:')PhYX�~w}vHzLK\kkmpfj��������Ζ�����٧�������������-��g>HIINH0&%'.@O\\GBJj�����������������������������������������������������������stutqqe6>kw|���������������������������������d@HDCBCCJhvnLEEEY�����������������������������������&?PFb�\]aca_YQbvwdjrofa_XL<5. %;EQ^]WUSPI86HBFFDEEHJHKJJMJLLNSY^e\2)/10-//-.,..,,+*+*,/-.34334310/13211..00000/..0/00/00../0.01.0/1014224554466678878888741-(%"&Pv�|dS}������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������munddcVNKIA:;<89735Bb�������jg`QEIYiZ:6<ORG@;;?FLRZYJ8457=FP\VS^gfT<29EITenP:78I`mt}�wj[Pav������ZIRJHUg\CGTn�������VGlh`knl\PK_vqnmptpfbnx�����Q7SXj����qyik}�wrggiYPVSJILQgcWSPMOKGZio���rurd]SWiuqaNs���������������������������������������������������������������������������������������������������������������������������������������~z�ycsz������Jxv����t^Y�����̿MYdOHHJMSX`u����}_`cWLj�V9%'Ukhq�z}vJ��NINVX\gdo��������˒�ɷ��ס���}���������0��`CMRSOC.&#'0Jh��]G@Z���iy��|z~�������������������������������������������������ttuqqqc6@ly~����������������������������������[AEDBCCCHSPKCADG\����������������������������������x"?LFl�U^^^]]]^lvn`Wcd^a]WM<5/':E`�|srkZI52:;CDDGIHLJMRNOMLIJKMONK#0MLB:2+++(*(&)))+)++*-/..//0./-/1210121000/0/./-..000///..1/.//01210133454256788988:::9;>820-$!@k�uu�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ovndfcWPMG?;?:97845Ad�������mjaOFJ^m_;9=NQF=;<?DLT[XF5378?IU]VQYfcO83<DKRelH797K`nu�weXScw������XHWIGRaYBHTm������|YJhd\jpj[RJasplhkopgboz������P:TYm����uxff|wteejXMWSKKJTe_VROMOJFYjo��wnrna[SVgxs`P{���������������������������������������������������������������������������������������������������������������������������������������xr}{_tx������Py{����v\\�����ξI^eOJLNOTYbw�}}�~Z_eYO|�L8%)Vnjs{|~uU��RKJIKKW_r��������Ǐ�ȭ��֙���}x��������.��^@V^WK?,$$(0Lt�]IB[�Čg{�yokix}�����������������������������������������������sqspoqc4Dlx~����������������������������������TADCBDBBDDCDABDGb����������������������������������u!?PDo�OYYWXX^l~qeebdb]^WRM;4,&=Hy�����cG7.-7HFEHGILP_d`YXTKIFHJI7"St_S@2.*&#"!!!! !!#$%()'*-,-..-.///1310/210/0/.+..0-1/00/./0.,/10101233345468767987;:::=??@?;5)3h|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pwndgbUMMH=<@;87755Df�������nnbPEM^p\5:?PTC:;<?ENV\ZI8557?HU^UMOVRD52<FJNfmJ:87H`kqz}qdWRey������PFUIIVeW?HVm������{XGic^knh\SMarnh^fnofbox������J9T\s����rteex}wrfgkVRZWMKHQf_TPNMNIGZjk��ngkj`[VXhum\Py���������������������������������������������������������������������������������������������������������������������������������������qq�{Xsy������M|y����u__�����͹EcgQOSSQTYau�}|�}WbfWI_fE5%+Xlhrz}|}wY��TLHFILT^r��������Ȍ�Ȳ��Й���~v��������+��`@[dVJ?*%'(2JghcVIE^���ftuni`No�����x�����������������������������������������sosrrq`1Flw~����������������������������������MDGECBABBBBABBCHd����������������������������������n ANIszFJKSPN[��ut�s``_XOLO73-(?L������bC1()8EHHFHLL_���zwhRIEFGI/KgYSF<92+'(%$"   "$)./.+--),//../0./00.0///.,/.-00/..0//11120/2324434567677788:<><@CDFD=2';k{�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qulcg_TOLI;;<:88746Ii������nrdNBL]mN69BRSD;<>AGPW]^L8658@KU\TJHHE;21<EKWqvL?87JbhosqleZRez������QIRKLao\BHYp������|QJmdaknh^SJbnmicjurhcny�����I>U]t����ovfgv{uodhjXSa[KJHSk^PPONLKK[lq��ukmi^\W\jtk[P{��������������������������������������������������������������������������������������������������������������������������������������|ms�v\w{������N{x����o_d�����ͳ?aeSMTSOU\dx����}WceVNfnF4&)T_`rz}~�yI��QLHEFHQ]r��������Ć�ƿ��ϕ���~t��������*��XG_fe[A*&()5I[^]TIB]�Å[lpnlbRs�����f[t~s}�������������������������������������oorqoo]1Hpw~����������������������������������LDFDCAA@@AACB@BGf����������������������������������l"GMJvxLDFNIES�����k\^ZSLNP43,,?L������Z@2!#8DGHGGJJt������ZIGGGE(,NNG90/-+,+)*)'$ -7.)&''&),+*+,.-....1201.-/.-00//.01/.000/.01323232456887769<?>ABFHJLG=2(Enw{~}~~�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qvl_d]VQJH=<<966546Hh������}qtdPDLalN:9DQOB::;AHQW]^K877;@MZ^SMMMF<20<FLZtoN>89KckmoslcWNg������TKQKOeqT>HUs������{QOgcalph\PJbmnhdnrogbq|������E:VVj���ymtccpsrqcfjWS`VMMIUhZOQMNNJL]kt��srri]]Z_k{qaQ~��������������������������������������������������������������������������������������������������������������������������������������|ow�wUvw���|��Tx}����l`b�����Ϯ;gfUOTQPW[c{����zXieVKjyE1#%MTdw~��~O��NLLGGKQWn��������ŋ�����̌���}u��������/��LOgou]=*&'(6K_jeYK@^��|ZnspoeXu�����ZEkyz{�������������������������������������nouqprY/Inw}����������������������������������OFGDBAA@AB@BCAABo����������������������������������b"HNDxwSNJMJHKf����[TURNISO62(-BQ������_C0#9DFFFEJM{������KHIIIE$&HGA2.-.+))+)&'%$$!27" #%'(*,-,++*-.-../01/-//-../0/.00.,023322133466779989;=>>?DFKNOMM>1#+bnpstustww}{|~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rul\\\URMF=<=:88636Jm������}lufOCN`nQ4:BRPA;9<BHRW__J756<CNZ\WV]YL@2/?IMWsmF<9;PgnqutmcVSk~������JKRIOioMAGXu������zRQkacnsjZQMdonkkornd`t|�����~?;TRZs��vrp^]lppmchkSNOLKNM[j]QQOOOJI^gm��fiph\\[bn�uZJ���������������������������������������������������������������������������������������������������������������������������������������|pv�uYvx�����zUx~����k_c�����ͪ;jdQPUOOV[h����x]mdVRwB1!'OZp�����zR�{KMLHHIHOe�������������ʌ���}z�������v6��IG\gjS;)%$'3Nr�{aH<\��{^u}sldPu�����[Bn��sox����~~z���������������������������lmsrpoV2Mox|����������������������������������ICECAAABBBAACEFN�����������������������������������[$ELD�^WTURLFJfuv_UOLOLFXP62'1?Q������TA*&AIGFEDKN�������JKJJH@#'GD?0-.6:4*-)'%##!
+	$5$"%))'(')--***..-//.0...0/-.10/,113111023455569:99:;=<=?DGKOV\WH;*"LZUUXZWTQQRMNNQOPRYW__eklnpmfge]Zabbaemkmoqorstyy}������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pykX[XTRLG;:<987769Lm������}oqbLCLcnL6:ASQD<9:AJSY_]H756<CMZ^[_pjbL72CKMXqjE;6>PhmgoqibVUk�������CLPHUjmL?IYu������|HPn`cntjZQQhrqmlrwmeds{�����|:BSUg���yspZ`twrm`hiPJOMLTRYk\SPOOOJH^fq��ggld[[ZcozoXP���������������������������������������������������������������������������������������������������������������������������������������zkw�sZxy�����uX{�����m\h�����ʠ:jeURQNOU\h����wZncRKdmF0!,K]������oK�kFJKIGGKTi�������Ӿ������ǋ���yu�������m=��IOeriM9'&%'3R{�}aK<^��~`��vofQu�����UDq��hbonec`fpw������}su��|~�}������������oosrrkP2Snw|����������������������������������CCDBACBAA?>@BDKn�����������������������������������X(HJG�|ea[YUME>PaYMPLLOOQ]N51'0@Q������T>#&>GGGGGKP�������FKIHG?(C?801>jqh^T3)&&#
+
+ !"%'%&(**),-,-.//..-0100.//111200344545878:9;;;=?BDGNSX]WQ@0 ./.**)&  "%',/18DIPV]gnw{���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qxiX\URPMH<<?<98658Lp������}sp_KEMgrK;;ESNC<::@IRX`_D558=DO[a`g{zmN54BJLZjaF85@RfmjuvkbRUk�������HPRK]sqLBL\x������wGSj`cnofXLLhrrpqstkcgq|�����q8EZZ}���rq]e�wp\jiQMWROSLZh\SOPOOIIbiq��jji_Y\YanskUN���������������������������������������������������������������������������������������������������������������������������������������ypy�qY{y�����sWy�����gUe��ż�ĞBkfQMPOQU\h����rWmaQMxpG."-M]~�����lH�kNKJGHLPWi�������Լ������ƍ���yt�������fI��HSp�mM<)$%'2J]dcUF<\��uZ~�po^Py�����OAt��f[id`YZ^ry�������tk����qZz���������}wwzyvnR7Upx|���������������������������������HFEAACDA@?>ABCP������������������������������������O)ILL�rcc^TMHE<FNIGOKMQTZcJ.1%1@Y������P=)AIGDEILO�������IKIHH=)B=7/<r�����M(''% 		%*
+
+!&&&&'(%',-+++,.--/./.031111354466679=;:::>@BDGMQV]]]WH7$					
+	
+	
+
+
+&.8=AJJ_���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������uvg]`\UMMH==?<9876:Qu������~rq[JENdpK7:ERNC9:<@HSW_W<659?FPZ`bivwfG33BILYg_E;5?Udow~naTWm�������EQOJZnlQDL]y������xFUlbdpqdWMMjurrusslegr|�����m5D\^����{rn]f��yoZlgSU]UQUM]gZROPPMHJ`fr��llgZW[Y^cigVT����������������������������������������������������������������������������������������������������������������������������������������zz�mV{y�����oY{~����eWe�ſ����?laQQSQRU[g����q[q]RNcS<.#5T^|�����gO�iKIHGKORZj�������Ժ���������xx�������_R��CWq{gK9(%$&2ERRSOB?a��jY{�vnZM{�����MDs��`[jjookjvw������pj�����tDt���~sir����������sfnw|���������������������������������|FFEA@BB??@AAA@Q}�����������������������������������J+IMM�cSYWOEBB:JIDDTHGPSXdJ23! 4BOo�����G:,CHFGGGJO�������?KHIG<)=>94?������V'(&% 	72
+
+
+ #"!"%%%&)++-./.-,/01232355467557;:;;:=>@BGKPU[_`^YN:)	
+							
+J�����xw��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zxe^i^UQME<==99667<Ov������ypnYHCMabD6<HSLC<??CIQV^T=778?FO[`_lyoeK13AJM]j_@94?Xfr{�{m`RRn�������BTMLWklPEO]x������uEXjcgppeXOOkrqnkqtn_et~�����n9J][����uslUf��yn\kfQUZTLUN]dWRRPOMGM_gx�~pljZWYX[_ljZV����������������������������������������������������������������������������������������������������������������������������������������|}�kX|{�����ga|����hVj��Ů���9nbRPRPRV\i����nYl\QIHK@0(Vac������dN�`EHFGMQX\p�������Ӹ�����ֿ����~~�������TU��>YwvhO9)&$'6I[\UO><d��\f��xnZD}�����NHr��bYjpyxyxvo������hg������?{�����dk�������������������������������������������������wAGCAA@@@????@BPq�����������������������������������K+HLQ�TDTSLB<AHWJCHREGKRW`D715CJN���yWH>-EGFFGFHT������y@HGHF=)><85C��y���W()'& 	G+
+
+
+				 !#$%')+))(*-/121344455679999;<<>@BEHLOY^`a`]M;)				
+						
+
+
+[����mZZs���ka`nsy������������������¾��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{zifj^UPPF<<<99955<P|������tolZ@EOZT@7<GTMDDJLJKQW[P<798?HQ[_^ltonK04AGN]r_@87@Wku}�}m`VYq������}GTMGRimKHO^{������u?_jahrmbVMTlqokfnrlbfv������e5GZb����vsoPh��vn_lePVYRLRP`cVPQMMMGJajoxtprnYXYXY\eaY[���������������������������������������������������������������������������������������������������������������������������������������|y~�fX|y�����aa{~����kTk�ƾ����6lbMNPLQV[d����tZl]OJceB-.lif������[S{^IHHLSZ_c{�������ֵ�����׻�������������Rc�~Gr�}pR6($&(6Ux{eO>:g��[m��xlWF������OHv��_Xis}~}|tl�����Ѿbl������<������jf�����zz������������������������������������������w?ECAC@@A@@?>AAJ\�����������������������������������F2NMZ�IEOVQH@BMbGAKJCHNSWbC3/7EHL���TKJ<1GFEFFGIQq�����ZCIIJF:(<<:5D������P%(($ 
+<
+ ! ""$%%()+,-0112465458:=<<=<=?AEIKPX[``a`YOA1%									
+
+	
+^����oWSY{��}A")6AJS`ox���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xvjgf[XPMF>=;98634:S}������rpl\EENZUB7;HVNFMY]MLSX^N9769>HS\_[ksoe@/5CGN\t[>94?\ms|�|k_Q\o������|FPMGLc_FFO`~������o@aiahrlcVMUjnohekpkagw������g7LYa����tupMj�wrk[mdS]_KJPN]bXPOMNLFLcfhmmlokOVWUXWXWW\���������������������������������������������������������������������������������������������������������������������������������������yp}�fUyy��}��agw�����jRp�ŷ����:p`GIIHLW^h�����kSn[OV�f=,-Zbg������VX�dNHJRX[_i��������հ�����׵�������������Mb�LurwkK3'$&*5Z��eJ@>h��Yl��woRB������IF~��]Tfpz}}}qg�����Ѹ^i������=������_[{����rqvqsrsx{����������������������������������t<DEA@>?AAA?@@@BC�����������������������������������D7OKX=?ITTJEBR]DAGABSRRWaB3.7BHCl��ALK8:MGFIHGHLOx���nGIIGHC1.@<;6H������K&)'$	"6JD>8-
+
+	
+
+ "%$&)*+*,./1304689:<=??@BCHKPUY^`__^XNDB1				
+
+
+J����eRQe���J% )4AYbv����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|zjgk`YOMG><;:8856>[~������qlhXECQdaC5;HTNHP]bOLPW^Q>75:@JR\_^mri]@27BIM\r^B96A[ls{�}k^M\q������w@QLDGRICEN`|������pDbidjqmdUMTmonecmpi`jv������a1K^g����otmRg}vqicpaMYZOMQJ^eYSONOKDQadiokihbUWWSVSQTVW���������������������������������������������������������������������������������������������������������������������������������������tm}�e\zz�����]jvz����fMq�ǳ��ʇ>ucMEDHOU_k����lXmVJPo\;, 5Vf������R_�dHIOXY[_l��������Ү�����د�������������Jr�|HYSWTD2(&')8]vjVKA@j��Yo��umNF������KG��[[hr|}~}pe�����жcl�����}:������][|����nicOHOTix~������{~��������������������������n>DC@?<>@A@>?@CJO�����������������������������������=9TJ^w>?GNMCB>PZD?>AIQNKM]<2+;DGJTrZLMJ3ESJGEGFGJRv��{NIIIGGC1,>>>9L������K%(&%		
+
+cnhvpQ
+	
+!#"#&()))+,.012558:;>?ADGKOSX[[ZYZYWJ:#					
+
+
+
+
+0{����zgem���l92AWn|������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yxfei_SPNG?=<;9877>Y�������ohcVADQff=7=HWKIS^_LOSZ\T;668?IT_`blkebF38BHP`qb?:9D^ns{~wj]R^r������nASJA?==<EOe}������nAelelplbSNXopibcprk_ju������]1JXb����nsg[h{upido_L]XMOOJbeVSNMMKBRbgovofgeWXUTSQPSQa���������������������������������������������������������������������������������������������������������������������������������������vw��]]|�����[lu�����gQr�Ǯ���~Bv^JEEHQV^l��y~�jZnVMW�e>)7Uk������Sb�\FIQWY[^e��������ӯ�����ة�������������Cv�s@QRNLB3%#%)6LRSOLCBk��]t��vjRI������DE���VZht}~{{he}����Ƶ_h�����wA����ʽS]z����hc\PJPVix|������xw}��}ww{{���y}�������������e=DBA@AA@@?AABMdp�����������������������������������6;PNeu>@CD@978EVDBCBEHIGIW<3(=DIU`e^VQH1#LNHGEGHHHO\��WNIGIIID1-=<?=Q������B&('&
+		$ug��{Y
+	 #$$$')+-.1225677<??CDHMPRRRRPKF>3				
+#W�����}~�����8'7Wl����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zxbge]VRPG?=>;;868@X�������mhaRCCSbX;8<LUMIU_[NNR\[Q878:AJT^^]gqrd@18DIPcv^A9:C`mt}�ti[O`u������hBSI@<<<<CNb������j?cjflrocTMXrpiehpri\jv������\4Q_f����vti\l�uqhco]T\WJLNLdeXQONLKCQdjv|ukliZYTPRRRTTd�������������������������������������������������������������������������������������������������������������������������������������������ab{~�����Tmu~����bRt�ů���{DrYKGFGPW_n��{f\sRMU}[:'9Up������Ql�aIIJNUSVa~�������ҫ��ƽ�٧�������������=��qCT\ZPD0%%&*8JUYXQC@k��bt��vkPL������AL���Z\ku}|~hb���öŹ]n�����kF����ʺL^z���^^ZPKY[kuw����Ѻsc[WZ]egrv��slut}�����������`HLGFCCEEDFFDG_�������������������������������������.AXHgv8=B@:213>DACFGJGHDJW=3'!=ELhqspaPG0*BIHGFGGHILLfgELHJLIHC,-<=@A\������<''%$(uf��r\			 !#(),../.02568:=?BEGJICCCHIC					
+
+
+2b�����������0+Md�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}{�����������������������������������������������������������������������������������������������������������������������������{udhe^TQNC=?@?>979?`�������kmbQDESaX?8<NRLKU`[OMSZZJ756;AKU]]\hywX815EIPcsYB:7Ceot~�vf\P`w������i=SF?<;;<FTd~������eDgkdipndTIWmnkbfmpj^gv������V4Q\h����vvh]u�uogbl]RYZLNNNcaWPPPPLFRem~�zqqiZXSPQRSUQa�������������������������������������������������������������������������������������������������������������������������������������������Vf{~�����Njqsyz�xdPu�ĳ���sEs]LFFHPV^p����zgYmSNNt]:& :Wn������Gu�dGJJQ\UOW}�������ҫ�����פ�������������6��mHesf\H.&$&+8VkpiTAAn��^p��riHN������:K��}V]loxxz{hf~�ϼ�̵Xs�����gG��ĿɵG`y���|Z[XOM`^gqw����εq[MGGHYcii_Z[VTTv����������{n`[VSOFUe^WQPTe|������������������������������������)ATJkq6?C=81/39;=BIKKJKFHV:1$#<GMcou}mNE0+FKIIEEHGFP]_`QPEIGHHE+2?=??X������5''$$	,tj��sZ $&(+//257=AACHGGILNP/			
+	
+
+
+
+
+3b~��������r "Di���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|uwzz|}|�������������������������������������������������������������������������������������������������������������������������xshge\VQOC?A@DE?89Cd�������oqfSDGXj`A9?OSLIWc]NOT[aO:557AJT]\^j~{X929EIM^^I?:;Gdqv~�xd[Pbu������bBTG?=><?FUf�������cAhibkpkbSH\nokbbmoi]iw������R7SYj����eukdy�vqf_iYOWRLNOQeaTPQRPKEWgp��vmmf[ZUTTSTVPf�������������������������������������������������������������������������������������������������������������������������������������������Oe�������Bot����xdOz�ǲ���iNvTECEKUY^r����{b_kWP`�Z:% >Yo������I��fOMRhpZSZ��������ѥ�����ՠ�������������7��f?rmgiG-%$%*9SjwiT?Dp��^t�riJQ������:L��|S[gegjnqfi��Ѷ�̱Uv�����dJ�ɴ�ȳBa{���|XWWPNd\foy����ѲiYLFEKX_b]HGJNNUx�����Yhx�|vuuqjgt���|~zwwsr���������������������������������|(DTLnn9@D@6/027<;AGIGHHFKS70#'<GJZcfteMF.*OPFFFEGIM_iji`UHJGFHC+3@>A=N�����q')(%$		6|���u[
+
+ !#&*0759B?EFG>-
+
+
+
+2Xnw������>(Ll������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z�������~�~��xvyy{{y}~�����������������������������������������������������������������������������������������������������������������������ztbgg]WRN?=CDKPH:8Cf�������stgRAG[l_87?PQJLYh\NPW_`R6679@IT^[\l{zT42<CHOXTD=:7Jfot~�wdYL_w������^AQHA?><=ETi�������`Fjecglj]QK]moifhopg]iw������M6VZj����etihw�voe`kXJOMKNMOfaURPPNIBVis��nikfZYWUUTSVUh�������������������������������������������������������������������������������������������������������������������������������������������Nh|������Gpy����~aU{�˰���^VqTIEGKXX_q����~]biUIOxZ7$AYu������=��bNMOgpUSb��������Υ�����ԝ�����x�������5��fDb[_`?,%%&,:HQZ\VEBr��au�{qgIR������6V��yTbfYZZ_c`j��κ�ήVz�����^I�ƫ�̳L_���uNVXOOc]enw����ѫcWJEFLV[^ZJHGKKQv����Zc~��wv}~}ut���������wz����{v����������������������������x#CRFph=@AC:.128@>=?@=CEDMP6/ '>DFLPRZRLE*0cZIFGFGHJcqvumPHHHHHB'0:9>@EVmotr5)-*%#		;�����^
+
+		
+		!##
+	
+			
+					
++;Nsf{vkK-Vx������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yvxy}}{|z{����������������������������������������������������������������������������������������������������������������������}maif[WRKA@EKVUJ>9Cg�������uvmUEL_sa25>OSKM\h\NQX^aS:88:AJT]ZWgusS42;FHO[_N=99KemsyzncWJb}������\AVH?=<>AHSi�������]EifacjdYPI_rpfcgnpf_kv������N3VZt����kvfgxuobbjUENNLNMPf`VTRQQJDVjt��oloe[[WUVTTTPd���������������������������������������������������������������������������������������������������������������������������������������~t}�Ph{������Pqz����}^S}�ʭ���^SrTHDFKSWat����~aalRPq�Z6$@Yv������;��bRKPefNQk��������Ѥ�����֘�����x�������3��cF`icR<-%#&.=NSSZT??x��\ksrmhIV������2X��yLjjWRSY^`j��ϻ�ͣU�����]M�ç�ʰHe����wLWXMNZYckx����ϦcUIEFLUY]VHFFJKQ{�����Ob���qonlnmipu|������|�����~�����������������������������v HTOwa??CFA864<A?::97CHGOS5,(>FGHJMMKJF)DgLGHGGEGMhoqwkSIGFIKE$49:<>ETchgZ73..+'
+	;�����F		
+			
+			
+
+
+
+
+					
+
+
+
+"24I;3"Gn������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���~zwy{|||~{}����������������������������������������������������������������������������������������������������������������������}reke]XQKA@DNZYJ?;Ej�������puoZDMbs^95@ORIN_o`JRW_bT;36;@JV\YYaf[K84<HJSmnK<99Ldnoppl_WNb�������UCUD<;=>AGRl�������]Kgf`fjcZRLdvofbhopealx�����I<Z_u����nzefsyrlcbfVHVUONNPd_VRQOOLEWhq~xlmodYYTWVRUTMf���������������������������������������������������������������������������������������������������������������������������������������}t�~Pk|������Mpw����s_Z|�Ű���XZpTHEGLSXbt����x]dkPIVkU5!"BZy������3��^RLKWZPUg��������Ν�����Ք�����x�������*��\BkqVA-$$#-C[_acTB?w��\ikmmkA_������4Z��zNniTRSU^cl��ͨ�͜U�����PS����̫Kj����uVUTKJNT`gt����Ο[SJGHJRVYSGEGILQy�wz��Qf���h^QJQVZ[duwssmsqt���~|}����������������������������r"KSLz\9EIJJEA=CJ@=:=?FLEML0+(AFGFHJJIID,#BOGGHHFEGQ^knpaPGFEHJF 
+48:=?GXcfhdC632/(		
+4r�rd	
+
+		;>1				
+
+
+
+
+
+
+		
+
+!Ek��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|zwz|{}|||{����������������������������������������������������������������������������������������������������������������������mdie_ZQOBDIOYXJ=;Lm�������txoYJMb_57BMPJQbrZLT[cdN;76:BKS[Z[gfXL63>ILVplK;8;NgrqzxvdXTg�������RGV@<>=>@GSm�������XLogbgoi\RMerne_gnpecmw�����}E:V\l����iyfaflmobcgRQ[YQPNUe^TQQQRLDWgmxtlijcW[WWVUVUSr���������������������������������������������������������������������������������������������������������������������������������������ys�|Qo{������Fqs����w_Z}�ͽ���TYkVIFGLTXbw����{ZgjSOfX3 !DXw������4��aSNK^]TUf��������͚��Ŀ�ѓ����~|�������/��WFm�wR=,$$$,FgqymPAD��Zhllkc@e�����~2Z��xRniTTSU_br��˰�їS������H_�˻�˦Gj����rOUSLKOUdht����Μ[TJHHJQTXTGFFHLV{�����Tm���jTIGFNRBRk]HHHblp�����vaq|��������������������������l$ORLsaPPTVVPMJVaLGGFELMDPK1-+@FGHIJIKIA&$6AGGEDGGGJ[glj^NHFGFGE38:;=ES`dhUA:971*
+	
+,��]
+				
+$ =OP1&&#!!!'%
+				
+#Nw��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ywxzzzz{|}�����������������������������������������������������������������������������������������������������������������������negf`YSN<@JRYWH=:Jr�������vufNALZjT78@QPIN^jVNUYcbH:67;AHT^ZbvyiU42=HLWqhJ;5;Pgqx��wfXRi�������OFPD=;<>AIWk�������UKlgajtlZPKcupiikope`lx�����F7Y[o����jud]afloeglQVZYLSHSe_UQOOPJF[fnxypmmb[ZWZZZYTJr���������������������������������������������������������������������������������������������������������������������������������������ut�|Rq{������Itp����v_[��Ƹ���M_kUHGFJVXcw����|ZhfRK_uM4 %F[�����|6��[SP[rfVTk���ϴ���˔��ÿ�΍����}z�������.��TEdjXN;)$!#+FZfn]MBF���ZglkkdCb�����z0_��vSpeUTSV`ao��˲�ϑQ������Ja�˸�ȣAo����nL\XOLNUciu����͔TPFGIMSTZUGDGKNP|�����Vr���iPCJNLG6LdYJHJ\mn�����~Vlu�|u{z���������������������h"MPMuc\Y]_ea]brgYVROMNJFQK3,->DGIJJKIH@'-CEDDFIFFGZnmoTLGFHHK?28:;;CM\bcL=:973/ 
+	!s�\		
+
+
+	",.(/1*),+,,1?6
+!! 1l��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{}����������|uwx{|z{|~�����������������������������������������������������������������������������������������������������������������������~lbie`ZTK?CIP^WG;;Lt�������omaMCJ[dM68ARQJN\aOMT\c`H767;@HT\]fy}pT04?IM[mlK=6;Rmrz�}r_SOi�������FIRA>==>AHUm������RNifeotjWOIgtrnnqsnc`mv������A:X[}���}mrb]aeiqbhgTU^WKPLWj]SQQPQKG\fnxwolmaYYYZ[[XVP~��������������������������������������������������������������������������������������������������������������������������������������ns�yMqu������Qwu����s_T�������G`jQHJINXZ^z�����\heRK]vN6!I[w�����{7��XLP[nlNQo���˨���ʗ�����͈����~{�������(��MJbor]?)""&,BNRZSMBJ���]jkkkd?d�����v5d��tSogWSRW_`q��̪�ЎX������Jl�Ę�ϟEq����lPZXPMNVckz����̏RQIHHJTW^TGDFJPY~�~���Wv��{fWQ]\J@3L_QIHMZfl������EZYSSXgn}��������������������d)SPAKHPPTZbcaehea_\XUOIHVH3+,@GFHIKKLJ?%+DHDEFGFIQptqkZOIHEGI>9;;<=AN^b`H997942$
+
+
+	Ft(
+
+		
+
+			!13-''*+,,*,5(
+				
+!!"    """&)(&'')'))))/3201110*'&#"!'X�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|x}����������|yz|~}|}~~�����������������������������������������������������������������������������������������������������������������������xogkg^YQIAFNVc[F9:Pw������{jj\ICI\`G69DUPHJ^bSMT[d_H657:AKV_[`uzkF31?HKYllA:8=Okwx��scTQk�������IKOD>=>?DJXn�������POhehrugXOMdswsttsnc]nw�����xC?X[����{rs`\aejobhjRX_PBOJSi^UPRQQLH\fp�zhhg^U[[]\\YWNu��������������������������������������������������������������������������������������������������������������������������������������}nw�zMq{������Kv|����y_U�����ķHdjXPPLQXY[|����SgaQNflH6!N\p�����s0��QJKZlgMWi���ɮ���Ɩ�����ɇ������������*��JPi�f@%!!$.GS[_XLAL���ZjkkldAl�����k3a��rSmcWTTW^cx��Ƥ�͈\������Br����ϗHq����dR[WRNS[en�����ɄSRJGHLSV^UFCEKHT������Oq�~tfOWkXH;-J[QLNP[`f�����x=QSJHJbns��������������������a)RNB80+,-*)2>HRY^__ZVQKL]D2+1AFJKIKKLHA,>GDCCDGM]notpgTHHHIE;2879>AVhheH?87750#				%					04/#%+,//)'%$! !!#&+,)"  $%&&)))()*(**)))*+,,/1/11320348@A=@A@A?=;96420/+&#!#L������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rm|����������xzy{~}y|}z�����������������������������������������������������������������������������������������������������������������������ynkke]ZRJADMZd]F;;Q{������zpk[KDJdrL97ETRHPfiVPUZcZB578=EMW^Y^u}dG43@ILZunA;:>Xmu|��rbUWm�������FNPA>?>?CJXp�������MSjghoodWNLiruusvvn_]oz�����t4=VWv���~stbbbfln`egQW]RHLHVk^VPRRRML]hs�zcfe^X\Z^]]\XP~������������������������������������������������������������������������������������������������������������������������~�������������~pw�rNvz������Mtz����sZV��ɺ���Dhpkon]_a\]x��}Zh_RKHEB4%O_������q8��WKJTkbL\o���ɞ���ƕ�����ƍ����~������v1��IH_qt[@&##$1I_xtYNDI���[jkknc>n�����d7f��l^naUTWZac|��ǩ�΁\������Au����͎Go����aR]TOR^`fp�����ȃ[RJGHNST[RECFLMZ������Py�ythRVgXG;.P]SOVT]`_�����u>PQGIN]hl�������������������X-TOC:4...+*+*,/6;@COMQQQ_8-&2?DFKKHJLJ?.CGCEGEFK_gswuoRGGIJH?
+39:;@Odgii]G87653"
+	
+!-1&
+
+			-80#(-393+'%7L>641-.'%$$$$%')))*,.,(')+,-+-/1232143114423346699;89==BEGIJJKMLKIGC?>=;9864/*%!J������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~pj�����������|zz{|}}~|�����������������������������������������������������������������������������������������������������������������������vkeke`YQLCGP[g[C=:S~������~skZLELbkH99DUPIRhjVJSZd\C676:CMX]Yasr_I53BJM\xp@99?Xnw|�qaQRn�������HNLA??=>AJWs������|LWifioldWKJkrrppstoa^nz�����u3EY]����voneefkolaidRS`SKNIWi[SRQRSJJ_iw�s`eg^XZ[_\][VH{��������������������������������������������������������������������������������������������������������������������������������������|ku�pLwx������KuszvztlWX�������Iiv���|�j\`v�{|�xXi_OGL`G3 )Nf������c@��SKJ`vbTZn���ˣ���ē�����Ê����y�������r4��EQdigT7'#!$/Jr�yUKCO���_hjmn_?s�����X8j��i[m_WTU[bn~��ŷ�̀a������A~����ʈBq����aS`TLXebho������wXQHGGOTX\QGGHLLY������N{�}q_OXkXF9,J[STZZcZ]�����o<OMHGI\\c|�������������������R0PNC:3/.+)+**/13203555=DF0/%4AEGKKLLLJ;0EFDEEGHJP^nqo`HHHHKH:589:?O[ekfbE86664"		8@E>*						.50%*7IH;.(&"CWIJEFCB>663164.,*('&)-*((),.23439:8776444566659::=>==?ABEKROSSSSSRNLGGGEDCBAA>92-)$ J������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{sx�����������{{xz|~}~~�����������������������������������������������������������������������������������������������������������������������hgkf_\SJDGS^fW>9;Y������~woYI?IXW@7:ESOFQ`_PLSZa[B868<BMY^\hnd^O.3AIP`{k>98?Tow|��qaSWp�������ELLC>>@ABIVv������zIWjdglkeWHLptoggnro_]o{�����l6BU]����vtr_fkptp_haQVZPHMK[dZSRSTSJK\it}obfe^X[]^Z[ZUU���������������������������������������������������������������������������������������������������������������������}����������������{ju�oNuz�����zGunpnnnhX[��ȱ�Ǳ@n{�����n\cw}|�uUkaRPqxI3 ,Mf������b:xeIFIfiUPWi���ɘ���Ñ�����ŏ����x�������m6��JVhhcN7&""%2Jp�sVJBR��|bhlml`Dr�����U:l��n]m`VVVZbk}��Ź��~a������A������Kp����]T^SQaj_ir������vYRHFIMUV]QIKMML^������O��paRZkVF7.P_OT\ZaZe�����e<NNKIL^_by�������������������O3VMA92/.-*,)),/223213445400'4CEGLMLLKJ:0AFFEEEGFHWceXMJLJHJH9379;;BQdheU<96432
+ !8;BC$
+				"
+
+			)33	%+JgaG1*$6IJGIKLLHEE=ImY<743/,.1"#"#%-8=@@AGB?:45623222223157:<=??BELMORNQUTRONOOMKMNLKIGEEA:3)$ M������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}y������������|{y{|~}}}~�����������������������������������������������������������������������������������������������������������������������whdfb`[QIDGQ`l`G;?\�������}ymYG=@A;239HSN@Q[ZOMUZ^V?668>EOZ][fj_bP/4AHMa�g>97?Xlu}��m`QVq�������@QMA=>>BCGVw������zHVfgjnohVJOoribcorl_^p|�����j2EV]����vso^invwm]kcPY^MHKH]dZSRRSSHF^gjrhceg^[]]\[[]XU��������������������������������������������������������������������������������������������������������������������������������������ylw�nNtx�����wMsikjjjeXZ��š���:u|��}��l\a{����xWl_PMhbG/,Mf������`/e\DKTac[KUk���Ǵ��Ӿ�����ּ�����v�������b<��GVagjY8&$$&1KfvgVIAO��{ahklneAw�����R9p��k^m_UVWZ_d~��Ⱦ��uc������=������wQt����YR]TRdbYhp������pXPIGIMTV\QPdXKMb��|���P{�}q^O]gQE6)OYQU`U\Xc�����aBOMJHLYWcy�������������������G6RKA940,+++*+,/244122132111'5EFIKJKLKK:3EEEDDFHGHLLNKIHIIGHH<3589:=GY`O:6:8765"%!'(34
+	%
+
+
+		'22	%.Uv{[1(%4GGFGEGJHHG?c�eHJHH=A=((6618GNC@,/4,))&'(''(%)./100478;=>@EMOLMKMMNQRQRRRQRQPNKE?9.' V��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|{z~�}}~��~��������������������������������������������������������������������������������������������������������������������yfY]^_YQIEKUdobA8>^�������ziVG<76413:ISLCSccQNV[^P>777>FOZ\]gookI13BJPbwaB<7?[mv~�j_OTt������|AQLA?>=ACIZw������|CalfkopgSKKksnijpskadr������l2ET^����ptnclsywj^pbQPQHHNJ^g[SSSTSHH]dhsidgg^\\Y[ZZ[XW���������������������������������������������������������������������������������������������������������������������������������������~uz�lTrz�����oOo_fnkgaU`��¤���7tvupqusi_h~����tTnXMG^iK+-Nh������Y<sa@[cgm\OUp�������Ծ�����׸�����|�������]9��KZkuwX5'%%%2FNY[QIBV��w`iklpfA}�����OAo��gbt_UUWZad���Ǳ��nf������:�����oQu����[R]SP^e[eq����ʿmZOGIJMVY[Q\sWNOh�����|R���sbTciVF3/T^SW\P[Xi�����[CPOJHMYRg{�������������������?6RKA:3.-*(*+-/1134312112020&9AGJHJLNLH67HEGFFGIJHJJJJJJIHGIG83779;9=>@;7888543!!#		( 
+			
+
+	"13 	&5m��O*&"5MIIIFFIGE@&_�ZFM^\MD5&"!"! "%"!! " !%'(*(&+.314;<<@AFIMLORSTSUWXXWUQJC4*!"e��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}z{|��}~�����������������������������������������������������������������������������������������������������������������������u^WWZ]XRIFLYhm_B7?a�������xeQ@734435;JTLLWgjLMW[bS?978>FQ\]\jsm^?/4AGR`k^<:7?\lv~�zj_LSr������yFPLA?>>?CK\x������xF^keilkeUKQovokmttiY`r������i4FT`����oslcmwvrh^m_MCFHKSL]cYTTRSPFK]elwndef^ZZYYYYZUY����������������������������������������������������������������������������������������������������������������������������������������{|�iQuw�����lPndz{rh^S\�¼����;vtphlnldYg�����nUqVOLmkF./Tm������NB�r<[bYVQOWp���ç��ҹ�����׳�����w����y��W:�~=YipiJ4'#"%2GNKPNF?Q��sbkmnm^F������DBs��fcp\WVW\bc���ª��kf������<������fQy����ZT_TSdk\dr������mYOGFHMTWZNcrNLJn�����vW���reWghUE3-X^NWZT[Xn�����PBNNIGLVW`|�������������������>:OLA84/+)+*+.12023410//2311' ;CGIHJLJJH56GGFGFJLVMOLILKIHHIJH75669:::9989888552 
+ "!#  !
+	)#
+
+		
+					"17
+$.Uk^0+# 8KLOPNMLIG>%DQC?CA><3*)&$"  $&*.1;;=CGJOPSTWX[[ZYWNE8-!"e��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|{z|~~���~��������������������������������������������������������������������������������������������������������������������w^SUZ`YRJGN\mt]D9Bb��������xiUB655434<NTMMXecNNW^gUB;89=ER\]]qwocF-4DHPbp[>97A_px�vh]MZq������rDSOGBBBBEL^y������uCajdgmlcTITmvrmnusk[es������a7JW[����otjalqrpi`j]LFEHMONaeZRRQRPFI`fr�pgfd^YUXXXXYU\�������������������������������������������������������������������������������������������������������������������������������������������gNv{�����nVmp���r[Td�����Ɵ@vqg_ffhe[e�����r]rTPOopG-4Tp������OS��@_eQILNUs��Ծ���Զ�����د�����w����}��QB�y;c|shS1&$"'7OcmdRF@U��k_jlnmZK������FHw��cgq]XXY^cg���ì��in������:������fXx����XV]UUwoZdt��ķ��dZNHGIPTW\PeoPLNr�����q^���ufT_gPD20\[OWYU\Wp�����OJPLIHMURa{�������������������8=QJA83/--2301321245430.0211' :DGIJJXSQX84GFEFGNd}{x`JLJIIHIFD5!67::;987987457641"$!#$"""#!	
+*'	
+
+			
+		 .2	%'+-*.-&!:JNZ\[YUUP<#>jJABD@;5.+(('&&!$$#')/559@EHMRVW\^^\ZOC7(  "f��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|zz{}}}���~��������������������������������������������������������������������������������������������������������������������s[SUZ`TLGGQ_pycB7Gf�������~zhTD764234=OVMJUa]LOV^bWB889>GQ[]ats`>05CHK]rTB96Bbrz~�sg]H[r������nFUSNNOMLKN]|������p>cjbafb[QIUovrnouqh\gs������_2ISSiwzmgtjcikpohaj_MGGILKNdfXSPOQNDHagx�pffe^ZWXXXWXU^�������������������������������������������������������������������������������������������������������������������������������������������eSxz�����jYlj���k[Sh�����ϙ7uoc_aceeaj�����oWqRPR�m@*4Rv������EU��=^hbQMMOu�����Ӵ�����֪�����y����x��IJ�r<r{rrR/&%%&6[��qVFA^��i]jlliXD������ACu��^ck`Y[]_bf�������cr�����|=�ǿ���_Yw����UX^TPsrbdt��Ŭ��c[NGIJQUX\MgkPKMp�����l]��|vdJYhOC01\ZL[_V]Vq�����DGOKKKPWPe|�������������������4EQH@84//5LTK=?9776542100202.#!<DFJLQw��?<GFDHKW����nIUWSMGGGI2"768:9896646567454	#% !"!!#"!!
+-+			
+
+
+
+		-0!$&),./+&!<LRjefcbd_;#8lJAC@@A83-'$&('!((# $')+17>BHNUZ^_`YSA5(!&h��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}~���������������������|zz|��}}�}z}�������������������������������������������������������������������������������������������������������������������vWTX[^VNGIR^txdA9Dm�������~|iSB734236>PXLJXdbRQV`iZ@79;?GQ[\\u~nN907CGHOYF>97Bbuy��uh`J[m�����[FZ^dgfd[WR`z������mAdjY[[YVPIVttokkstg^ht������[3MPHIIJNdtgbfijmg\m]KHIJNMNjgVQPPQNDIeh}~kgfe^\YVVWWXV]����������������������������������������������������������������������������������������������������������������������������������������z��_Qx�����d[li���i^Ph�����ΓD{tpibejc[k�����kSpTNIml@'8On������BY�?gwp_RPYx��Ҿ���Ա�����٧�����x�������GO�sBonowL,$##(7R\hbPFD^��bbkmliUI������EGw��cenbbkpfad��Ͻ�ſft�����zA�Ⱦ���\Zy����VZ]UXxq^fr������^ZMHIKRVZ]NV[MLQq�����ie�~{tbG[gRB27\\MY[X^^v�����AMTPNMRTRf~�������������������4FTG?8411R}�nLfFFF?F=62110030)$>GGLNt����4<EFFGKJ����lZztmHIIF1$47::9877755566682%$ !$&%###" 		)*
+		
+
+		
+
+,+$%'(,..)%">MUfffejze<%>jHBC;PQI@8&!'*&$-+"!")/49=DMTY[[VG=,!$k��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��}z|{zz~��������������������|zz}~~��{z{�������������������������������������������������������������������������������������������������������������������qVUTYaWMEDQ\nqZA:Gp�������}~lS>754425?QWPLXd]MPXbiV@:::>FS^\cu{mV?,7DEDC@>:86Ffsx�uf\M[hw����wOF^i{��}ufTa|������iBcf[ZXXUOIWuskdgpqg^gt������W0LPFEEHNgsgbfgilh]nYGGGJTPMgbXRPQQNFMdkwsjhje\[XVWWXYU]�������������������������������������������������������������������������������������������������������������������������������������������bVt~�����_clt���n^Om�����Ћ=zvrmimocYm����j]oTNV�m@(9O]kyw��:X�s>dw|bOOVz��ҽ���Ѯ�����أ�����v�������BT�p@bqqaE-%$#&6IJORPFC_��bdjlojOL������AL}��dhqjp��gbf��ѻ�Ǽbu�����nJ�Ȯ�ƽZ^{���{N_cen�tahu�Ĵ���^ZOIJMTW[ZKHWNKNr�}u��ea��uaG^hOA17^ZOVWX_^~�����=PSRTRTUQe��������������������/FQG=74/0G`�bM�]�VhmZ82211341'&<PXTRT����-"<GFFGIV����e|���zJHJE.'59<=:;:<;76766682%" "&'-5)$#"		
+"&	
+				
+
+
+
+			 +*!$'*-//)$  AOUdedemz]:#;QFBCMl`g`M(")*'&0-!  %(/6>CGRWSL@0! (i����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~~}}}zyyz{|��������������������~zz{}~~}~���|zz�������������������������������������������������������������������������������������������������������������������oZWU\_XNFFPYggQ=;Hq���������lQ=865656@S[ZU[h^KOW_dUA<=<?GR\Zcsvo\=/7FF@??=999Nft|��{hYM[ht~���qMLbu�����tSb������hHhi`ceb\QJVrrkdhqqf_ju������W1ONGEFHQiufbgijnh]jVJGGHPOJfdXVSRRLEOenqniprh]ZYZY\a_W_�������������������������������������������������������������������������������������������������������������������������������������������`Wv������Xail{zxa\Po�Ƥ��ΆEvsoggjob\l�����iUlSMIrb>%;KShuvw��7_�zHVztZLLT��������ҫ��ɾ�ء�����w�������8W�i@epm`F.&$#&8KVWURH?c��bdmoliNM������:K���]itsw�wdbd��ҽ�ǵ\z�����jN�Ǫ�ƼRby���|Wg}�����nq������ZZOIHLQUZYJagNIUv�����ce���wfLbmN>09`ZPVXY__������?SUPQTVUSi~�������������������*MWI=61./-O�8R�d�V�|b64421333(*Ijgz|u���}*&>HFFGHW�{r�c\��uAHGC,'79CJ@9:::8:896791 !!%&.FXO9&$ 	
+			
+	
+
+
+
+				
+			*(	%'*-//*%%IOVfccaksX9&7FDDDKanhi_8.0.*" *3."!! "" /+' &*.4?IPNIB1%$#*^�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}||zy|||||{}}}�������������������~z{|~�~��xxz�������������������������������������������������������������������������������������������������������������������mSWVZ\ULEHOXe_L=;Hs���������mI;655557ATcd_dsfQOXcfR<@EB@IR\Yatxqc>.8FD?>?=98;Igs|��whZKZhtz{��mGJdx�����rPb�������eDehclql^RIZqskaispd[iv������W6MJDDFHPhvdckqpmebkWIFGJOOMjdWSQRSLDQhqxrnsui\[Y\\dpgWb����������������������������������������������������������������������������������������������������������������������������������������y��_Xw�����Vbfjshf\\So�ĩ���~Gvri[^ch^[n�����h\lRPU�m>&<K[�{w��/u�oJHkkWNJR��������ҩ��ǿ�ԟ�����w�������2b�d>jmiYC.$#"(<Us{gUHAe��^drwphNN������;P��an{~{�zibi��Ͻ�ͱ`}�����bP�Ƭ�ȸNax���|Ui������dt�ƽ�şZZOHHKRU]\LqkLLXv�����_h���v`MfeN>2=bZOVVY``������=XRNOSUUSh�������������������x'OVH<60//.b�!_�V�Q�qF01110332',Qq��v���p')@IEHHLSz���Y_���dDIIE+'9BkymN@:;:<GI?;80	##$(0_}X""
+		
+	
+	
+
+			
+
+		
++)	%(*-00)%(IMYfabcigX8%5BECDNdgblfM@:41$!,51$ !! !#$$+6&!-"  !$(/9?FB<70.-Y�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�}zz|{|y{|z{{{~}~�}~�������������������{}}}�����}sv{�������������������������������������������������������������������������������������������������������������������nTYWXYRKCENZc^L>;Ow�������|�mN>645458BZionot`NQVa`K?HSJBJT[Y\v}xb6-:EC@>A>;:<Lku~��vfYJ\hqtq{lDOhv�����nPc�������`Dgffmrn`SI\sunfottg]jy������R.JHDCEISiuderztmeekWHGGJMNOjaTRQQRLDSkn|rmtvj]\[\`irgPg���������������������������������������������������������������������������������������������������������������������������������������zu��VVw~�����Whjw�toc]Rp������yL{ti^bdif[p�����baiOMX�_;%@N_|}�}�,y�nOJoxfML\���Һ���ӥ�����՛�����z�������/m�dCWQMKB,$##)?`��iSIDj��]p��vmMN������9R���]p���~b`h�����Э_}�����aX�ǫ�˴Pez���yPcb`l��mex��¿ʙ][NJJMTX]YQugOPXv�����Zl���x^Kb_L>0?d\SVU[cd������>ZTPSVTVNj�������������������w #VWF<5/.,*k�&l�I�P�M:72200341%+Qz��y}���l#)BHGFINWz���Z����eHKJD(*:o��v]B;:8Gfj^N@0
+#"!&'/s��E!" 
+%
+	
+
+
+					)'	 %(+,.-'$(FO\eacijg^5$3AEDAIkbjihZF><8'"+61#!"  ##$&&%!0" ),JL##*3:?A>=9b�����������������������������������������������������������������������������������������������������������������������kYr}�����������������������������������������������������������~}}{{{|z{||{{|||}}}~���}{|���������������������{z{}~��}wx{�������������������������������������������������������������������������������������������������������������������mXYYZWTMCFO]haO<:Ny�������{�mL<753358D[hstrp\KPT\YEER]MBGS[WXs{n]809BA@>?><9<Ljv~��veXG[gmpnx}n@Mhw�����mLi�������bEhffntm_MH[ttnnputc[kz������N3KJDBEHOjvbevuocbgVGHFHLMSfbVSQRQKBRdbjgemof[\]]boofJl���������������������������������������������������������������������������������������������������������������������|�����������������wu�T`v}�����Pdg}�w�jZSq������sLyrmhijk`Ts�����ccgSKKqf?#BL]���t�|,z�qNGllRDO]���Ϸ���Ң�����җ�����v�������,o�aCRPPKB-"#"(?WedYOFCn��]u��uiIO������6T��]r�~|�u``j�����Ϩ]������ZW�ţ�ȱLez���yG`[PSh[`iw����ʓ[[MIKNTX^XRugNMXz�����Wp���zZH_aH=+CeZLNOXad������DVSOTWWWVm�������������������p$UWH=5/.-+id"ufO~S�MP:3211241%/Fu�~~����q .DIGGHLZ����^�y~�e?LKE(*A��|�`;8;=Mx��}O2	$##$(5��qE%"!	
+	
+
+		
+	
+
+				'%
+"')./.-'$&BO\dggmnmY2&7BFEBLmolcZg??@9&#,6/"  ! !%&'(($#-$H�]"!#*0<BCEg����������������������������������������������������������������������������������������������������������������������z^AUV\u~�������������������������������������������������~|~~|yyyyyyzxy{~}~~��~~���}~}}��������������������~zz{~}}�~~xvw������������������������������������������������������������������������������������������������������������������iVXY\YSMBJS_k`M8;R~�������skM:864439E[hpqsr^OOU\UCES_I@HT\Y_sqn`9.:EC@>>>;8<Omw~��wcVJ]hptpwjFRf{�����mOi�������[Kngfpxl[MJ`xtppruqaUjy�����G<PKECEHUlrbfv}tndffQGFFJLKTh`USPQSLDSdeknmmnd[]]]`osdQn���������������������������������������������������������������������������������������������������������������������z�����������������wq�Q`v������Negloy�_YPr�ɽ���mRvqjkomk_Zv�����_`dSMX�Y=! BNe}��y�x.y�gLEVaRJR]���д���О�����Ә����|w�������&m�ZBRUWTA-!"$(>IKLPODAm��\v��zhMV�����y0V��~aqul{�wbal�����Ѥ\������U]�Ŝ�ͯHm|���{O`YRWoc`i{��¹��YZMILNTV_YSm\JL[������Vs���yWJ^^J<+EaVJHKWag������EUQPWWVXTn�������������������i*YVKA71...51*SAUbU{g^1212310/$4Y��������l 0DHEHFJf���]����`FJID"+M����H6;;<D����?4	$###*9no�Z&#!
+		!
+	
+
+
+		
+
+
+			
+			##
+#'-=@E4'%&CMW^ihkmqQ1"8BEGFPegiac`8@@=%#-6.$!  """  "$&(&)))"&>D#.+!
+
+
+@x+"&0>Eh����������������������������������������������������������������������������������������������������������������������ycJIFKUa]kx���������������������������������������}~~}yw{{{{{{zxyz{||}|}~�������~������}��������������������~z{{~}}~��xvx������������������������������������������������������������������������������������������������������������������kYX\`ZSNBHP\c\F9;S~������{oxdG:88444;G]immrs`NOV[TADT^H@GU^XYfhlW40;ED?>>=;8<Soy~��ucVG^horrxeFUh�����eJj�������THmhclqiXOHcwurpswo`Ylx�����D:OKDCDHWkr`gxtmbdcRFGHINJTi\RQPORLCSgr|�}|zi\]^_amqeKp���������������������������������������������������������������������������������������������������������������������������������������tq��Paw������FddZcuyZWPs�ʹ���gQwoabffhbax�����V`ePMFRF7"!CLg|�|v�t+��aHDZlRKS\���ϸ���Π�����Ӗ����yy�������#v�YDWhta>*#""(>MY\VRGHt��Xx��tfI]�����w1Y��~`ni]�ubbn�����ϟ]������Q`����˨Hn~���wJ`VP[yb_jw�ǳ���ZZOKLOVYbWMdYNM[������Sy���xXRYXH8-K\RJHMXbg������AVPOUWTRQo�������������������i0ZWSV?4/372-+2/:7<LT=2123332.!4Do���oaw�W4DGEGHNl�r��d����^FIH@+C���xK=C::W�xoO51	$" ")1X��D&+% 
+*#		
+		
+	
+
+				
+
+
+
+!&")?eyk>)%&CMQXagggaH2'7CDFCKUZ]RVG8AB<% *30# ##!#!!',+*)-**"(T][LMM/			&3 !'5Sx���������������������������������������������������������������������������������������������������������������������ygOHHMVXL`low�������������������������������{~~z|zzxyxxxxzyy|||}{|��~����������~��������������������~xx|�|~}~vtx~������������������������������������������������������������������������������������������������������������������kZZ\b^SMEGM[c\E:9U�������zmwaJ;87535<H]gjjty\MQV[TDEQZIAGVZRLOSL?21;A@?>==<;AVrz��scWO]ittu{�hEUh}�����eOj�������TMqgdmphXMGbwuqmsxsaYmz������E:SJCDEJVmr^i��vlbghPGHHINJTn\RQQQQKCVhq{����i]^``bpu`Nu���������������������������������������������������������������������������������������������������������������������������������������|z��Nbu������Ijfl}�z]XPu�ͪ���bOsobba^gd^w�����YchTOHHF;!#BG_xgii�p-��bAFepMJQb����ý��Λ�����Β����zy�������,��XEexy_A)##!'@[{bQFEv��Y}��seE]�����{4Y��|\oha�pebq�����қY������Ob����ΤJn����sNcURaybaj|�Ȱ���]YPOQQWZ_WVkWKL_������I}���wWKONF7+O^TJKQ[ek�����}AWPPUSUOWr�������������������g4\Zp�A1:^XP08HG7<<3764323442-!7[���rc^`hG5HHDHENb����O����`FJJ>*K���wYYki_WVRLA;5'#"$+:x��GNbD
+*'"% 			
+		
+
+		
+
+	
+
+(
+"'U��t<)%'CMPXbdcaVI5'8FDEFDIWXHN>=BC;&".:/$"""$$''%%,1/,,001(Irxskm{=	
+31 &;Yz��������������������������������������������������������������������������������������������������������������������}q\JQW^WNahmsz��������������������������|||{zywuwwutwxxxx{z|~���������~}~~~��������������������������������}yy|~�~~|}|zrtx������������������������������������������������������������������������������������������������������������������eVXZ`^TLCEKUXO?7:X������uis_F977644;K_c_bquXIRWZSFFR[GAIVXMCB>:701=B@A@?=<9@Wpx|��saUQ`ktww~`<Wi�����cLl�������PNoghlkh[LHfxsljoup^\l|�����?9QJDDEJUpp`k��rj`feQGIHHLGTl\RONPPLCXehmrvxue]]`cdnobI{���������������������������������������������������������������������������������������������������������������������������������������~z��Rdv������Mi``ilfWVRw�ή���]Uqqliehibbv����[ceTQTSOC #CKerick�l%��bDIgeKJPb��������˕�����͎����xv�������9��SLblq\@& &Dc�xdSFIw��W|��meG_�����u8\��w]of]}�qfbq�����ѕU������Ij����˞Jp����lG`TN`z_`k}�Ǭ���\\_quh\[_UThYROa������K����uXLUVE7-Q_THRVZch�����wAURRVRVMXr�������������������_4YZ��13s�ji]sjU9]TD;C=653433-":b}v�oaywrN2HGBGGOh����c����MCIIA+Nxs|�g����f��zxH2
+'$#&+E���v�w\)!!%(0+	
+		
+			
+
+
+
+		
+
+
+			
+!&
+#&P��x6(&'AJKRY\^ZOO7%:JFFFIY]]TRJBBC9&"/90%#! $',/.,58702697-Pltvt��80"$4!!+*!)7\���������������������������������������������������������������������������������������������������������������������xgYY^_VPcilsz���������������������}zy|yvwvuttuuttttvxyzz{}���|����������~}|~�������������������������������}yx{~�}~|~}zpry������������������������������������������������������������������������������������������������������������������_[WX[ZWN@BFJH@859Sz������tkx`F;75654:GX[U^ovXNSX\QDDRWDBKVXK@=>=711?B?>?=>;8>Zoy~��pbTK`lv{|��^?[l������eTo�������NQnhhlnhXKCjwqifnwr_[m{�����|<=QHEEELWsm^l��mlagdOHHGJLEXg\SONONKEXccchjnna[^beelreE~�������������������������������������������������������������������������������������������������������������������������������������xt�Me{������Jibekm\VXTz�ɯ���Z[ssqqnpm^by����~X`bP`���m"&EPw�www�i%��]MJ]_PISa���̾���ˑ�����̋����uw�������'��MTv{tc? $G_]oeWEH���Zy��udG_�����n2a��y]kdb��pebt�����АW������Ep����˘Oq����kM]TQe}bbl��ǫ��~_n����oYcSVfWNOb������F����uVSe[E6/U_TWfYZ`g�����pDVRQXUVQ^t�������������������Z9YT��,C�RI~y�|_b�nPTgS?DB954. 8_}��~����W6JFEGMOw���~n����SGHH=/A����[���u����>0#*#"''J����znd(! !!.@6						
+
+
+
+									
+$%
+%*Q��x7)%(AHIMPTSOLL3'>IEFGKY^feYJCED<)#/<4-(&'+0399<BA>==?@B1Srw}u��4 E] @GF=7&*$ &((&#2@x���������������������������������������������������������������������������������������������������������������������qccdeTRillsz���������������}{uvxyvvwuusstsvvtuvvxxy{{|}}����������������~~~�|�����������������������������~yx|��~~�xoox�������������������������������������������������������������������������������������������������������������������VWTUTUZSB@BEB<536Lj������iovYD;85667<HWWL\kkSKPVYPDDNI@DJVYH>=?>600=AA?==>;7=Znx|��ocNMclwy|��\AXk������`Sr������}IYnhhnogWKEhtqifptm_Xn~�����v;>TIDFFKZrpXj��ol\fbMHHHLKG\fZQONNOID\fgillkkaZ^aefswcL~������������������������������������������������������������������������������������������������������������������������{~�������������xw�|Mey������Oh^{�zl`ZU|�Ĵ���V[tz{wuvn[by����~WdbW�����'IOr~�x�]'��VIH]bMJW_���ʼ���ʍ�����ɍ����pz������~+��KXuvyi>#ES[jdZDI���[p��vbCe�����q5b��u\kdd��jcbu�����Έ[������Bt����ʕRs����gN\RNewbbm�ƫ��|d���ɱ�]bRReTOSc������O����uRZkZE72T[RZdQZan�����gEROPWUUK]u�������������������P:WZ��)c�B+�}|wdm�sK�|i[nU>54-<e���l{���Q9FFEHLMz���us����YHFE91Q����k����m���2,%)"#'&Cw��pQlb "$
+			
+
+			
+
+	
+
+
+				"""+Q��v4+&(AHKQUVPLLG.'=DEEHIKNQTQFEEC<'$3?:54449:>HFHKMGAFHHF4!^u}~y�y1%aU5_g`^b<)(0+%)-.;C����������������������������������������������������������������������������������������������������������������������wjfdcVTfjoru�����������~{ywrrmpsrsstsuttrtwvuvw{z|||{||}���������������~�������������������������������{yz|�}}~�yqqy������������������������������������������������������������������������������������������������������������������NPNNLS\]OFDEA=956BOakqsoh]qvZE<;8677=KXWSXcZOLPX\PDCEA@ELVXH?=>>7/1>AB@=?=<8>[pw{�mdIOdmwtx~�_?Zk������^Qt������~J\oheijaQLEiupigoul]Zo�����u7CUGBFEIXpnXi�~kjbf^MGHILMJ_h[RPNNOIC[ilqtstqaZ`bdgwx^S�����������������������������������������������������������������������������������������������������������������������|~�������������sv�zMkx������Jle�~�{^WZ�ɻ���P^z�����r\c}����|XcaV����t'GKu�s�Z+uzPJISTIJT^���ɷ���ʑ�����ʊ����q{������z=��JLcfqj<*JRW_a\BL���X{��scAn�����k6e��vbmfj��`a`u�����·a������B{����̐Sv����eLZTRew_bm��Į��|k����Ǜ[aUftQOOg������P����uS[iWE60V]QS]X]ar�����_DSQNQRWG[w�������������������L<Y[�}\��X;�Z_ys�J;�}w]j;54.;f���}����D8FFEFJKx���st����KFIJ81Y����_����h���Z6,&("#&);m�Qex�c !!$
+
+	
+			
+
+
+		
+
+		
+	
+
+			
+")N��w6+' *CLP^`^[[ZH,)=FIIHGGHGDHFDEC8%%4B>=A?CDFIKLNNXMJMOQJ7#"#`t���y.c>Agiks�B$&.'3S>8AP�����������������������������������������������������������������������������������������������������������}���������}nhebTTjpwy~�������~{zyxvutsqopuprtutvvwuwzzyy{~{}}|z}~~��������������������������������������������������yy{|~�|~�zppv}������������������������������������������������������������������������������������������������������������������PQMPRYckbVOMKJD=>BBJRSQNHWrpWD=<;998?MZZSPRMJMSX\RC@?>=CLUXJ@>@<713?CB?>@=;9C^qwxyqg_SSbmwpv~|[@[o�����VOt������~I[qh]b`UPMEktnddorkZ[n}�����w9DPFADFL[qgVextmi`f^NGGHJMF_i[SPNOPIEZgntwvwuc\_bciyu_Q����������������������������������������������������������������������������������������������������������������������}�~�������������}p~�wMiw������Ho_|��{YY_~��ŽƾI^�����j^c����~SdbXu���`(IQ�|{zx�R'luONHGKHOTd���ʱ���Ì�����Ɖ����r{������r<��JQgux_? 2PPOTZTBL�ċV���oe;q�����f6f��uclb`nm_aa{�����ʂb������C{����ʈMw����]R[UO_e_el��Ǽ��uj����ΕY`WilNNJj�����~S����qL[iTC72Y]NVh[\cp�����WFTPNOPUHZ|�������������������E?ZX��Vhq�FKt~{q3R��dYshH63.;b���v����E9IFDFMN����tu����LIIG6/H��xZ���sV���w=+
++)$#&*)LmS|��X   
+
+
+	
+			
+		
+
+
+
+	
+
+			
+")P��r9+&,FMXlgede`H/,;DHHKFGFGFHECDA:$%4??FIKQNROWUVUVSQPVQO;#%+cw�����/S%(+Qnoqw{<  +5;948'$*;`719Gadhz�������������������������������������������������������������������������������������������������������vw���������qonkcjt|���~{zzyxvuttrsrqqprsqsuxyxxyz|}{zz{{yz|~||~}�~��������������������������������������������������yxy|}~~}{mpu~������������������������������������������������������������������������������������������������������������������]][]_djrog`^^ba[WTVZZY[\\doeNCAB>;=<@O\YRQPNJOTXZQGA@?@DMXWJCBA>933@DA?>A><:Dcqsttoh_PPcoxov�VC\m�����}VRu������zC_rjadc[RKIjvoegptiT\p~�����q4ETGCCDL]pgWg|~qh_f_MJHHKMF_i\SROOOHF\gmqqsuq`Z_`djwi\S������������������������������������������������������������������������������������������������������������������������������������������xLpt������Fkcx{w_Xa��ǳ���N`yyz��nb^f}�����Vfmz����V,HVtw��y�Q+w�YNFEHITXf���ʳ��������������rx������q;��HTt�~YD$5VOLQVRIQ�ɍX���qb@t�����e7g��oanc`nvgca{������zf������@}����ȂPx����ZR\TQSX]dl��ů��pk����ΏZ`U^bLPTn�����{Q����rJQcVC33\`LRdWY_p�����HLSQQUTSN]x�������������������C@YR[TO&Pe`>Ufwa|g^i~tcVhjQ94+ @m�������? 9GFGGLU����R����~HHGG529c{k:Zvnxp_���l5+	"2)"#''J��m|��R !! 				
+	
+	
+	
+	
+	
+
+	#+P��s3+%/MMW_XY_a`L-*8DHP_UQMMNNKMGD9$"3@BJMMSV[Ub^^]]^YZ^ZUB"'+5h}�����2
+
+#'*3VpttxmB&%!"(0MV_VVY4+-D\-*0@GED]QLYy��������������������������������������������������������������������������������������������������}it����������zxyww~}�~{{zywvtuuusssrrttssuttvxzyzyz|{{z{{|zy{{}}}|~~�������������������������������������������������{yz|���{lpt������������������������������������������������������������������������������������������������������������������jihijnpsqmjkjjkjhgiijmklrsr_KCDGEBCCGRYTPTTQNRSWWOIJHCFJNXYOGFEA<34ACA?@A@=9Baqqwvl_IPeqw{��{TC^n�����|SSw������{>btojoleQINnxpimrsfVYr������m2ESFBCDL]pfXk��sibk^IJHFHKH_g[SPPONFE\egklmol]X^`dhha^P�������������������������������������������������������������������������������������������������������������������������������������������tOow�����zGmk��~cZ_�������IbiNMTVY_]d������Yi�����;1IRccvjq�G2��VHCFHHVV_���ʮ����������������ty������f=��FWv�jPB&!6VSY]_VHV�ˈX���o\7u�����h3j��malab}�gcez������xj������?������}Sz����UQ[SMO\^cp������jm����ΉU`PalIRVo�����wQ����mRWkZA24\^MXaWV[n�����CNQQTWVTL^{�������������������E?WMA6-+-./&8IT2nahZp\lYYsL33) Bh~��v|w�|6<GEDDIV��ru]�����EGGC37A_aR=P}���{���g9+	"'/)"#),c�{}���<""$"	
+		
+
+
+
+
+		
+					&-T��m1*$2JMW]ZTRT\J.*:GQp�{vngaP^j`T:#!2CDKQLS[_`d]a``\Z`c_\C&+/5Uk{���p-
+%',6WnsvzkF'% (/;cjnlon6+-BM)*5FPOPXN50\{�������������������������������������������������������������������������������������������������{ew�����������}~}}}xxwwvvvurporstttusrtvtxwwvxyywyxwyxxyyxyzzz{|{|||~�����������������������������{��������������������|w{}������vjow������������������������������������������������������������������������������������������������������������������tutrtuuutrqqpopqrqnqqstuvqeZQKKMLJLNOUWSRUVQQURRUSQVUNNOPZZTPNIE?75?DC?>?>?:Ehutswpj`NTht}���{VB_p�����W\y������vCbvmlqscTKMqxqmotthX_r������k/CPDBDFK[sfZn�pgdh_LGGFJIH`hZQQPNMFG]caeiikkZV\_bdebbQ���������������������������������������������������������������������������������������������������������������������{���������������������sQrv�����uGmh}��l[Z`��û���DgbPFGHQ^^d�����~\o��Y~�J2GRitxgu�B<��QDBFLO[]h���ʬ������̼�׻�����q{�����eJ��ITnqfS<' !&7U_u�xVCX�ˆa���ndA}�����g5k��kajcb~�jdg������ti������A������wZx����TT\TQ\i]cn������gs����̇U_PgnQTWn�����vW����jOZmZB21`]PciVY`h����~@SVPSXVVL`��������������������A@TOA93/-+)***.038=AI@badk:44(!Fu���szt~l0";HDEEHW����v����BHID04W|��qfv~��r���y;*	!&$.-"#',du��u�G!!$& !!	
+
+
+	
+
+
+
+
+			
+
+'.V��p2)%5JPVdfddobH0,:GU������vf�r_9#"1@CNLNX_dkfacfe]^adfcC*/04:MdigqR.!#()+6Znsx~{B%% )2Bbiqt�o6/.<H+*9IRQQ^U:/Fq�������������������������������������������������������������������������������������������������ym�����������|zzxwsstrruttrqrrsstuwwuwwwyyxxwwwwwwwvvvxyxyy|{|{z||~~�������������������������������}n��������������������|x|~������vkov������������������������������������������������������������������������������������������������������������������xyyxwwyyxxvwwuvwwvsqqsqqrngc^[YZYWVXSWXRQURQSTOOPRX[XUVTSZZV^^RPI:7?CBEGFA@<Gjumjkhh^OTht���zMC`r�����}OTz������t@bokjup`TJMswpkoswdX_r������g4GMECCDL^qaSj�umedh^KIHGKIF`dXPONNMFF__\beghi\X]`bdie[U��������������������������������������������������������������������������������������������������������������������}���z��������������|��nFpu�����mLkdgbhXTU`��Ǵ���?gaQJGIRb\l�����~Tn��V���[4HW}�}xz�?;�yH@BNVTbcv���Ǫ��θ�����ڷ�����r}���o��\L��CWv�tY;&!!';Xp��wUCV�Ʉ_���peA}�����\:l��icjdl��fcd�������jj������>������p[v����TU[SShp]co������bx����ɃWbRhmPSVp�����p]����eMS_L@35^\N[\RX`j����|@SUNTVVUOg��������������������5CZOB81.,)))),/11/13584>NB531%!=FLel[����4(>HCEEK[����m���f=GHE+5E����Sh��kY���{;*)&"$((U��oy��@  "%*$!",#
+
+
+
+		
+
+	
+
+	#-b��o/)'3JMUjoqolQF/(>IS�����wg~��`9$"0?DQKPZbklieimkfdfjkkH&.037EY]SLB1 "()*4Qmlv�p,&% !)2Hgltw`20,CR0,:JRRN^P7.=f�������������������������������������������������������������������������������������������������~|����������yuutrsrrsrrsrtssuuuuvxyyyyvwxxwwwwuvywvxxvzzzzxy{zxy{{}����������������������������}xsp��������������������zy{{������ujov~������������������������������������������������������������������������������������������������������������������zyyzzz{{zzy{|z{}zyzxvwwwxuponljhhheba_]YXWUTURPPPPQUVTVWUUSPYZTVT@8BCFRRNJC>Fdllmjdc`HUir}���xMLaq�����|UZ{������sBgtmnrobQGPtwojpvs_V_s������`1FMGDCGM_r`Tbjklfak]MHHHKJGceYSNNONGEccbcfhjjZX`adjzmZZ���������������������������������������������������������������������������������������������������������������������������������������{��nKty�����mRjW`dlc[X`��è���?eaQIHIT_]h�����{]s��n���T6ISyz�~z�>@{jE?DQQK\`p���Ʊ��ҵ�����ٴ�����s}���~��PJ��Gbu�yV8$"$'<[u�~fUE\��~a���qc>}�����X;p��hdker�|dbc�������eo������E�ű���j[y����TYZSUru]es������\w�����~V]SmpLSUt�����eb���zfPKLF?.;[YQ\YUY`i}���w?SSRVYUTJi��������������������.EWNB92.,+**+.121201242235641$#=FGOSh���q**?DCDEDY����e����hAEFE(:a���mY���hx���l:(
+"$$$&0m������,# "%.' ,)"
+
+	
+
+
+
+				
+			
+
+&-h��j,*'3MITefhhZSL.)KKW�oj�tuzc���J6"'6BEJIQ^hoolflromffqopM'-18:DUMHH@4"(),/;HN[bG$%#$+5LfrvzyY605TZ.+9MQOOYM4,>a��������������������������v����������������������������������������������������������������������������������sqpoopssrrtstuuuwwvwyzwxxwxwxwxvxuvwyxvyyxzyzyyxzzyy||}~���������������������������umhkq��������������������|||}�������slns���������������������¿�������������������������������������������������������������������������������������������xzxxzyy{{{{}}{z{z{}zxzyzyywuuurtrolkjggcb]]YWXVUTSRSVTUWSQNHLOPQOE=BDIWURPF>F^aivg^\VHSakv��uIEcq�����xM_|������pBhtonrm`QFTuzsppto`Wcu������c1FKECEGK_p[S_filf]j[IIHGKJGceZSPLMPEE``fehillYW_dgo}qZZ�����������������������������������������������������������������������������������������������������������������������}������������������iLsz�����oQibv��i^Xe�������=f^MFGJR^Wk�����wZx������Y9KRz��v{�:<s_:=CLKFSTx���ī��γ�����ٲ�����q~������HN�}>[\kjV7%#%&;R^oh[RCZ��vf��}oa?������Q?r��ehkbr�zace�����п`s�����yE�Ĝ���h]z����OXYRUsq^eq������\{�����yY\PjfIQRs�����e^���zeMHHF?-<_WP^[WY]cy���kBVSSTUWQIi��������������������)LXL@83/,*)*+-/.1313344455673$&>DHJEm���i*)>IFDEIf����j����m?EII(;d���vb���bz���b6(
+	%(%$'/b�����l?%"!$+&!33$	
+	
+
+	
+	
+
+						
+
+
+
+&,h��Z.'%0FHOWfdcaXI++DB]�hn|Fy\x�rI9 ,?FHIIQcrpogeoxwrjlvvtN(04FU[jXTQG8 
+
+"(),.4;?FB.'&%!%,3Ofqwzu[73=OP/,=JIQSXO4*>c�������������������������iQi��������������������������������������������������������������������������������mmopqsstttuvxuvxvtwyxytvwwxxutwwwwuuyxvzzzz{{y|{zz|z{}~~�������������������}}}��ztphaSbn��������������������{z|�������unou�������������������������������������������������������������������������������������������������������������������wxxvvzy{z{y{{{|zyxyzz{x{zxwsttttqnmonlnkhegedga`_^]^[\^ZWTQNMOPRPJFFINSQMKDAKVW`jbZUPIMX]emurdCGbn�����uYa}������qBhummrm^PGV|}vwwyraVct������_1JLDCCFN`p[U`gjme`iWJIHILIFeeZRONMNEKabfghjliYY`ffrqU]������������������������������������������������������������������������������������������������������������������������|~����������������iNt}�����oQg\cns]^Zh��®���<i_GDFLUa]m�����zZx������J9J]���{~~7;s`=>AKQKTZ����¨��б�����ײ�����o������H_�x;^nttT6$#%)?Tlzi]RC^��gd��vk\@������Q?t��efial�wbeh�����йbx�����tG������cb|����PXWQXpr`dq������a{�����w\\QocISTv�����c^���zbJIFG=/@aWMUXST[ds~�~aBVTPOQROKh�������������������|% KWK?630-++,+,023434545355551$*@FFKN���j*(AGFFHMg����h����bDHFB%:Y����r���nr���\6'	
+&'$&*3|��~~�B! #--!$6:* 
+&0&
+			
+
+	
+
+
+
+
+			
+	
+
+	
+	&.k��W.&%3EMV_nmpi[D++9@[�`aVD�gYr~u\7#!6BEHNMVdkfa\`xzoox~�xU(15\llsjgb^N!%++,/31/75($&'##)-1Ogr~}`176KS,-;HQUUWO3'=c�������������������������dLe}~~~}~��������������������������������������������������������������������������rolqsrsttvwvvwvvwuvxxwyywxvwutuuvywxzyyyz{{}|{}}|}~~~���������������������}{{ywxumiebXR_i�������x~�����������}}|�������sjpv~������������������������������������������������������������������������������������������������������������������xxyuvyvzxxxvuvwwwwvvvttuutqpoorqonnompoollnjllkmjklkjihfdcb][\ZWZWTPVVSSRPNKRUQV][TQOMLNOSVVWLAHZdq{~��hH^������l@gvooql^QGVx{zzxwo_Tev������X3HIEDCFNblXT`ilmdbmUMIHHLIIfcWPNLNLCObbdefhjfZZaeiv�nP\���������������������������������������������������������������������������������������������������������������������������������������yv{~hTu}�����iSiU\`kd^Zf��ì���;j^EFFJQ\]p�����uau������>:HZ���v~z1?z[8@EQRKR^����ŭ��԰�����خ�����r������=f�sCfy�rO4$$#':Z��h]RAa��ady}rkTC������MBv��hghai�yacd�����ζ[|�����mG����ǿTe}����OYVQUwn`es������Y������r[[QrbJSWy�����^a���vaMIIF</B`SN`_TVYdx���cBSROKMQMKm�������������������w!KVI?7334000/.034434454444550 +@FGJM}}jsO$+EDFHIG_����N����RGJG= =e���xv��lw��vR3#	
+)'"$)1����y��,! +.&':?. 4=2
+	
+
+
+
+		
+	$0l�qG/%#3CM]jljlkbD(-BKT}tbU^{aN���T5#$7@DEJQ[`WOQM[}�tiq���{S(/:couwrrsqY"%)*-/30+22($&'$$+/4Leox|�[/1)LQ*,:HRUV]M/):e�������������������������^Kc|{{zxz�������������������������������������������������������������������������polnstuuttvvtuuttuuuvuuvuwvuwxxxwxyzzz{||~}~}~~����������~���~~~|xussnkheb`^XWXb������xm{�����������}|{~�}�����slov�������������������������������������������������������������������������������������������������������������������uwvuwyuwwuvsrsrrrsrqnopooqnmnnqpprstsstturssttuwutwutrsrqpnomlljkigfjje^b__]_YTVXYSUUTPKJJLMNKIKRU\dijbUO]px~����]DlsljkdZODU{{zxxvo_Xfw������X6JIDCBEOcp[Xepqod`iWOGFIMLKfcVPOOQN>QdbccfhidYYac_o}hR]��������������������������������������������������������������������������������������������������������������������������������������yt{�jPt~�����eVhh}��i`Ye��ǭ�ȜBj\LGHKV_Wn�����tcw������G:GUlxpa�|.FwV=DLYLEN\|���Ŷ��ձ�����٫�����q������9u�uAXh[XQ7"$#&;_z`\R<a��c`w}unVI������JCx��hfgbn�tbbj�����в]|�����nM����ɶPj}����PYXU[qg`gs����Ş^������n\[ThYGSYz�����Zc���x_MIHE<-DaUR_TPW^l����iCTPLKNPMJs�������������������v!"RUIA;7Dh3K><B,8=84535344255/-BIIIJb]JNB#/GFEFJGP���uc����^HIGA >l���qq���qic<Q@2#	.($$(6���|���-!! *3'-CG3"-1,
+
+	
+
+	
+			
+
+
+		
+
+
+
+&0Wb[:/&# 2AKSdikmnYA)-Z\[����}xlk���V6##4DECHQZVV_\`mzzjhw����Q(4Bbt{�|{�S %(+-140+2.''&&%$*16=P^ktsB+/(BD**4DQWZZ=&(@i�������������������������UOazzvyvtxz}������������������������������������������������������������������������pppostsrruvtsuvsqrttuuvvvwwuvwyz{{{|y|}~�~~~~~��������~}~~|{~}}ywvsnkihfdc`^][WUP^������vmz�����������|{}~����shlv�������������������������������������������������������������������������������������������������������������������uvuspurqrooponmkjjlmmlnmnqrqqrttuuwwxzwwxwxxxxzzxxyywzyzyxwvuvyuuutvwtvrpqpomiggfd^_^^]RQNHMQTQPQQSXVVUOS[ccgnifbPMbe``c\UNJVxttswul^Xew������S0LJBBEHRfmYWfnqmc`gYKHIKPKLecVQNOPPCRbbeghhjdUZ_``mxiSf��������������������������������������������������������������������������������������������������������������������}������������������}��fTv~�����aXjr��{ih]g��Ɵ�Ŗ?k[KHHKU_]s�����tbz������8=FQSV_\w-M�hBEJPKGOX����Ƹ��Ӭ�����٪�����v����}��4~�s<SYU\m2###&=ZpufVKBd��ch}�xjMG������JE~��cfgbv�taaj�����Ч`�����jN��˰Ll}���~O[[XWac`ho����×\�ƥ���j[[O_\MPUz�����Zg���z[OMKJ?*E]RMNKPW^p����jCUPJLNQKNt�������������������q!$XTG?;7v�[�OlZ:aTF<F88666565/.AHLMLOOMLB /HEEGJKl���yy����[EIH>#9t���vl���oQ`K;610(%%)7x�s��|6!)0)4JK4!)-*	
+	
+	
+
+
+
+	
+
+
+	(/E_`O.)$1BLZnqg^XI@&6�Xa������mV}�~H3"%4AEEKRW]wvvxyzxljy����N*4@dx������U(.$(*,153.41*(&'%$*.5<HOVWQ5,-)7A**0COSPE2)+Cj������������������zk�����XVertqssssux}�����ytrox}���������Ȥ�������������������������������������������������tprprqrqtutqsttsrrrttuvxwyxyyxz|~}|}}~~��~|~|}~�}}��~|{{}|wxtwtoniifda__^Y[YYWRPO]������vl~�����������z|�~���skpx�������������������������������������������������������������������������������������������������������������������pqsrsrrqnjjihjjigfjlnmprstuvvtyyyz{z{||}{}|{|}~||}|~}~}|}}{{|}|{{|}~}{|{yzzyywvxutrponojiea`dd`_]XXXXUVQV]_X\`^YXUV[\Y__ZSPLXkiiqtoe[Ygt������P6NIDEEGPdmYZfoqiccgTJHIKPKRedSONNRQETbegkkkofTZ^``lqdMi����������������������������������������������������������������������������������������������������������������������~��������������������cYt}�����\_x�����Ri�Ƽ����DkZGHGMW\Zt�����s_������(;HRau}on&X�pDFHJMNT\����Ĳ��Ԧ�����ף����~u�������1��s@Yfdwk1%##%<[{�fSK<e��_l��qjQK������EH��`kjf��r_cl�����ѧ_������gW�ǝ�ΪLl���}J]dkl{sdit����Ò[�Ħ���d][Ng\NQ[{�����Um���w\O__eR)H^OIHKQXaq����cBWRJLNRGRt�������������������k(XYF?8.��f�K�N��cSKhCZ=56666/1BKPILNUVQA9IEDFHKy���~j�z��SEFH@%;q{t�ja���ukstjQ4 	+)%'*/���r��h)" &0.8IN7 %),
+
+
+
+
+
+
+
+		
+'.g�~E,($0CLYbZRUTQC"J�Rf����}�oq��^E3#%5CGELWYn�������|rx����J)1@i|������R!?)#&)-4<MJF?1&%&%$*/5@YSVNK=/,%BK)*3QSNK<1-.Ek������������������c������VYgponprttvwy~�{�XA7?ILUZy~rZFi��q\dnvz�������������������������������������������qoqrsqoqrrrrttstvutuvvxxyzyyx{}}|}�~~}}}||}||}||z|}|{||zyxwutsomihiea`_\Z[XVVTRQOONMKZ������w{������������}}~����~qiqz�������������������������������������������������������������������������������������������������������������������ooqoponkhheeehhjjinoprsuuxyyvyyz|{|}||~}}~~}~~}|}�}~}~{|}}|}}~}�~}~~|~}}|ywyyywvtttsrtqmjkdcabbce`WZ]^YZZYXTQVZWVXW[X]djlhaZYemu}�z{qG8LHFDFIShpU[gnto`deSJGHLMITfbQOONSRHTcegilmpeUZ`__hk]Kj������������������������������������������������������������������������������������������������������������������������������������������^Tu������_b}������Pc�ž��ƈDjXIFGLT[Zt�����p\������9!;L\���j�m%Z�qNKKKNQV]����ƹ��Ӣ�����أ����}w�������4��p=UaZ[M+$##%:RbcTPJ?e��Wh��tiNK������BK���`jhc��ncbm�����ѧ\������\Y��̧Kq���}J\elu�rdhv������_������_]^[}ZMRZ������Gp���rY]w~}J(IdPFHKPTbs����XDVPKLNQGRt�������������������d,TRI@7/��W�D�`�?Mdd�j�=66454*1CLQHO[vlU@;HDCFJN~���v_�z��QGHH>#=m|��hr�}�k^���`3	6&&%'?���p{_:  ,16EP7!$%09	
+	
+
+	
+
+	
+
+	
+#)-XgM-,(%0FMW_`a`_Z>"S�:h�p`be�g\vgcK0 2@DGR`ev}�������yx���A*3Biz�����}M!$')-8WvruqG&%&%%+10\��}olU0/$RV(+O�xph]1.Dn�������������������������NampnlpuwvvuwzyxvH1Ahk[UMXOP67b��UORV[QZblz~������������������������������������prsrrrqpprttqrsuuvvtwxzyzzz|||}||}|||~~|~||zy{{{z|{{|{xuvyspnnljgea^_]\YYWUSROKMNMIEJOQNf��������������������}}}~���|nkpy�������������������������������������������������������������������������������������������������������������������nmmpoljgffbccgikmmqstvuwzyzzy{z{~}}}~}}}|~}{}}}��~~�����������~�~����~~~{{{|{{}zyz|zxsrusmjjjifdcac\RMWVUV[[ZWY_cb__\[clfhiid[@>PMKKLOZpm^_fnvn]dfVLHINOERgbWSQNRTEWffiiilmcU]`]`lpaVn�������������������������������������������������������������������������������������������������������������������������}{~���������������_\v���v��Ye������Qc��ʤ�̇KkYKFHLW[[v�����rc��Ӽ���3?Mbuder�j"_�tTNLNPRT]����ŷ��Ѡ�����١����{u�������0��o@RSLLD/"#$';RkmbTI=i��Pq��xjOQ������=O���Zhjh��oc^n�����ӢY������X^����̤Jk����~P[TO\xuhju������\������Z`_byYNP\������Lu���rUY�pR:,LbNHJMSWbq����UGWQILMSNTy�������������������_.XUH>64�iW�I�j�@gvf�l}244574,5FPSR\n�sT?;IFEDIK����{Z����HDGE9	#9u���kd���l����E0
+
+!6)&()E{��H&"$! &+1GS8,*+>A
+		
+	
+
+		
+	
+
+
+
+
+
+!)(Mk_P/(%4GO]dfipn];#Nf?l��|��]Wy�R1!/;BFSeceq��zu|���|{|�{=)1Cj{�����wG
+#&+.:v����F'''##*39������h1.'\Y(-�������/0Bn�������������������������Uhorlkqwxwtwy|zx|rC5n�����g.-*=_��STYY_c]XZ[Zy������������������������~������������{nrrqrsrrqrtutvvwtyyyz}}|}}~~|~~|{||}}||{y{ywyxwvuwvxusrmifdccb_\XXXYVRLKKNKJNKNNPSSW\\]n��������������������~~�����|ojrz�������������������������������������������������������������������������������������������������������������������jfhijiffdddcehlmoqrtuuwyzzzz{||}|~}|}{}|}}~}|||}~�~��~��~~~���������~~�����}|{~~~}~�}~}{zzzvuyxurqophdd]\aaba]ZTZ]^]`_^_cU\_]\XIJSRQTTWfsmc`cfrg]dhYQNJPQHTgcXUQOTPAUedfgilnaU]_[age]Tp�������������������������������������������������������������������������������������������������������������������������z{{���������������Z\u������Qd}������Nl�ǽ���FkUKHHKX[^x�����r`��ş��%"DNz�|�x�a f�uWKNPQPR\����ô��ҟ�����ӟ����{x�������/��i?MMOMB.$#$'=\ws_SH?k��Rr��sgMV������=T��{Uhhi��``_m�����ҟW������P`����ϡMq����xM_bfm�shlx������^������WaZ_sTMP\������Mw���pXcxnlR+N]PDOSRY_h|��zLIVRKLNTJTy�������������������\3^WF;49�Pb�N�ezQypjr]o.53342)4DOVgn�nS9:HFDEJP����|c���qAEGH7	%;����vw��|_zxivV4
+)L+$')@}��H(&#!  &0IO4902L1
+
+			
+
+
+	
+
+
+	
+
+
+
+
+	
+			'7w��d))'2EL]iilqoX:%6DBd������Fv��zM3!.8>?N[WX__QJTf~��~z{o6)3Fp{����l{J
+&().9ds{�~<'(&#")4=������[./%d[&-������x-1Fn�������������������������Ygnphimqttux{|yxzl88������q1)):T��[XXZqqvhXXXq�������������������������������������{nqqprrrtswwwwyyyz}|}~�~}}}~}|~~~{|z{{zz|yzvrqqonmjnlfd`a`\XYWWSQOLNMKIGHJKKRTTZ[_]\`a`dr��������������������}{}������}nfsy�������������������������������������������������������������������������������������������������������������������gffefgdeecbefhmoppqvwvwy{}{{{z{||||{}|||}}{}~}}}}}|}~||~}�}�~���~��}~}}}�~~~}~}~~����~}}|}|~|~{xvwvurqonkfbdddcgeb]PNTXXYYTRTMNQRS_mmhkkej_]ge]\`\XVLQeaYXWPUJBWdegjjlobU[__\`\YJs��������������������������������������������������������������������������������������������������������������������~��~�{~��������������Z]t������Ti~�����}Nj�ɳ���xIfWLIILZ[^u�����na��Į��z7%GLny�u�b$p�rNMNPPOSa���ҿ���ҝ�����ա����{w�������;��hASV_PD2&$$(<Lch^SG@m��Tq��rhIU������<P��t^hf`eh_a_o�����іX������Oh����ХIu����tQj��fxbclz������f������Q^[SkURS^������Gx���p]l���\*R]MQiYUXZ_v|xoBLSQKLOVKX|�������������������R6[RC;69`0^RUq6zY�fwc_y324364)6GQi�n��pS=>HGGHJRvbf�acy�yZEHIK7
+&@~}��h46UbPjx��R3	
+
+?f)%(+O���L&%"  !-FO4$8109(
+
+
+		
+
+
+			
+	
+
+	
+'6���b&'% 9HL_deili]<%/FISw����[?poN4,69=N[TTUF59=Da{��zvh3)3Iju���aoG%&)-6k���~0))($"+0<������M./+pN)3�����n&0Fq�������������������������Wfmlghilprux{{yw{j8:������q0)&4Q��SX[s|��lTXVm������������������������������������{prrvvtvwzyz|yxzz|~|{{|~|~~}~~~~zyxxxwvyvsqmiffec`aeb[ZXVWSPPOLGFFDHFMPQRWY[]__cceecfjioy��������������������|||������|omsz���������������������������������¿��������������������������������������������������������������������������������_bcccdcaa`afhjkoqssuxwxy{||}{z|~|{|||}|}}|}}}|}}}}||{~}|}|~~|~�}}����~���~~~������������{|~}||}zzzzywwsusttqnhaaZWWZ^Y\XNLNOOUfijvxof[^gd_eg`]VFWc^^b_VRHD[egjlmnpbQ^_\[]^[Q{��������������������������������������������������������������������������������������������������������������������|vx���zz��������������_]u������Mf{�����yKk�ǭ���sJnZKGHKWZ\v�����id��Э���5%LRkm�vs�^#v�kJJJLNOV_��������Ϛ�����՛����|w�������C��aEaxrPD.%$%*B_yp]PF>k��Vx��siKV������8U��s]fd`^_`aar�����ϑT������Kj����̢Nu����uP{��htddl������{]������V_XS[IOQa������Fy��~oN`q��G-T\SXfVUZ^euwrfDKSQLLNYKX~�������������������J9ZPE<5.*'1),3'H[zDkMm`833443(8GQc~v��fR7;GIHGJILNOUGEAMIGEHGC5
+(<|j`�c4:>@9a���F1	[p#%'&@���C$$" ,GJ0'=8:>6 	
+	
+	
+			
+
+
+
+		
+
+
+	&:|��Y*'&<FL]`_gljU:#2CETu�{�kRc{��uM2*2=CQYTSK:879ETcw��yyh9,4Kk���xSuwB	'(,1<i���Q&))(#%,49}�����50/(dA-/v�����9&/@s�������������������������Pdkpjgjpuuvxyzxvtb7;������g-'$8X��PTWr��ekWYWv{}��������xy�}or|���xy|}~�����������xsuvyxxxx||z{yz|{{{{|z}|}}}}||}|xuunoppmnhec`^\\ZYXWWUQNMLGAGIFJKOQRTY[\^]aabbdfihkmnnsx|��������������������|{|�������|omq{�������������������������������������������������������������������������������������������������������������������YXU[^_`^_`dghjmpqrqruvxz{||{{{{~}}|}|{~z|}{|||{||}|~�{||}~~}~��~�~~~~�����������������~�������|}�}~{~}{y}}{||{y{srsusoliec_^]^cgeajlka^`ieZ^^^]XTbhbcicYWMH\ddilmoqaUaa]^^_]Pz��������������������������������������������������������������������������������������������������������������������|ty���~x{��������������\ds������Lj}�����xMp�͹���lNmXJHGLTY]y�����db��Ȝ���*+Qo������V!x�cLIMJJOT^���Կ���ї�����Ԛ����zv�������=��[GgueRE-%%&,ChzoYPG?p��Ww��skNW������4T��pXhea_^`bcv�����ґ[������El����̞Ou����qT|��Ekmfk|�����|`�Ŝ���YcXHKGPJd������Gx��|oM_��k3/U]P\ePTY]etysgFLSRNNO[Ia��������������������D<[LA:59A-10,*0603,A9U<132421(:HVo�tj�aR6!<HGDEHKKOPKIJPSKHFGHK1*Cvq��^7::<I����P0
+dq $''V��j%&%"!+JG/$L@YbI"		
+
+	
+
+
+	
+				
+		&:���N*'$:GMYXILM[W9(2DFv������kw�}J, &0CHXXROD8598JPUy��~q<-3Mn���{dqu?&(,09Z��v<*((%$-3B�����\+//*U>-3�����j'),Bv������������������������|Lbq|pjryyz|}~~{wsf7?������a)%!8^��XYZv�a_lVUXwzuvv|�����M#/A43@Obmk_ghktv~���������}wxyyyzzzyyxyyxz{|zzz{{y{{{{yxvuqolifedb_]YXVWTRPPLLIFEFEGJILOQUXZ[\^bbceehilminttvzz}������������������������{|�������ynkr{�������������������������������������������������������������������������������������������������������������������LNQSTUUY[\_dfihiklmnprwxwxxxy||~~{}|y|}{y{zy{|z{|z{~}y|~}~~~~~~~���~~}�~~~~~~~}������~~~~��~�������������}���~����}~{}~yyyxwutronmlhebcdb\\ae[QTX[^\^incafb^[VQ\]^`dfhfYW_``ca`cZw�������������������������������������������������������������������������������������������������������������������}xz��|{{��������������Sbt������Gj~��~��}Lv�ʺ���fReWIFFKTY_y�����gh��ª��m++Rv������N&��VFLNKMPW]��������͔�����Д����xx�������+��XIZa_aM,'&%,DUXXOMEBu��Ww��riHW������/T��lZhfb_]_bdv�����ы]������Gp����ʘMt����iTy�\]�ldi}���¼wd������R_SIGHPPf������Fw��{jQZqrX./Y\QZ]STX\fw|viDOXQLMOTAb��������������������C?YNC:5Ni9TFDG5B;<67404333561%=HVi�ypv`M3%:HEDFIHMVMHKRf[OHGFGK-)?l��yV8788Y����S2	wr$&(312/#(($" .OG31J:XhK"
+		
+											
+
+		&:���V)'#9IO]`\ZZhS9%8IDz������gm��Z<0#'-7DLTVRI;568=PG]�����w?,4Mnt��\ru8))*/A}��{D*''$!$,0C|����g<0.)K8.7v���wJ-/Bx������������������������xTh~ss|~{}���zwh1C������]($%9e��TX]��h�nUU\mrsxxx{}|�m(%)**))22:CIT\_ky��������}vyyxyzyywwyywwxy|zzyzzwvvssonjjdb_]ZZYVUQPPLKKHGGGCBGIPTVYUX\]^acdagggjnoqstzx|����������������������������~{{���}����xnmu|�������������������������������������������������������������������������������������������������������������������FJLLMMQSTWVZ^bbb`ccdgknopqrtuwwxywyxxyxwxwvyyyzyyy{|}{{|{z{|~~~~~~}~}|}}~~~�~}}}�~��~~}}}~~~���������������~���~~}�~�~~{|{yzzzzxuvtstrpqpkggfdea[XRYW\]aeaUZaa[\_Z[]Z[]]_^UXa_bgfeeT|�������������������������������������������������������������������������������������������������������������������{zyy}zyx{���������������Zct������Ok��{��yJs�Ϳ���^TgUKFGLVW_z�����`e��ɴ���(+W|������G.��_LKMNNRWd��������̖�����ϕ����{{������|3��XHXagkJ+'&%.CQWXSMAEx��Ww�oeC`������1\��k\ifba`aefy�����·\������Et����͔Ov����iSjpNcxZck�����of�Ƴ���Q\RHFKPMi������H|��}kM`�tm</Y[KZgYWY^j{��uCNUONLPQHf��������������������=CVNF<;��a�^sKF]U`EEGD@955794&<IVb���_P3&AKGFHIHNVLLVy�cOHGHGF)*<h�aX;59:;[~��E1	9�i#'&'.=0+.56/#  
+/MH3/>@eO5 
+	
+	
+		
+	
+
+					
+		
+
+
+&B���K*'"=HK[ecgpcL9';KG{�txxt�XQg\>@1%,1:DMSWQD6568>CCg�����u>+3Qo��u[sr7!)*+-<u��r;)''&"%-2;g�����H2-'?5,2O�����]+1Jz������������������������yVguwtv~|~����|zd3E������T*&(9n��TX]qw��aWZYfntyy}{|{e)#(+++*+*++.25>Wj{�~~����zuwxyzyxzzxzvuxxwxywsusokifdc^_[YXUTRRNLIDFFBCGGJLORTW[\[]a^_beghklmrruvx|}�����������������������������������~}������yjlu|�������������������������������������������������������������������������������������������������������������������FFHJJGKLMMLQTVSQORTW[]_`ccfghjmmoqoppsqqsssuuvwywx||{{|}{yz{|~}}}}~}}|}||||{}{|~}|{||~}}~}~}}}z|||~~~}~~}~}|}~~|~|{}{}~~}{~{z{}|zyyxuuwwwvtssqqrsqpqpopolljjjggfgffe`\baa^^^[^[SV[]^]Z]b[]dc_^U�������������������������������������������������������������������������������������������������������������������|{zxwwvvx���������������Vcu������Fn�����lMt�ͯ���^TfSKFGJRWb{�����Xk���¶�� 5W~������A3��`NMNMNQSc��������ȓ�����͓����vz������}<��PIX`fhJ*%&%/BZnbXN@Gz��VmrrneFb������/_��gVigabdefdz�����̅`������F|����ύNw����eM`aNdr^aj������jj����ώU]PGGKPJk������Bw��zhKp~��<2]\RbiWUX]kx�|q@NTNMLRPEg��������������������?KYNC;7��n�N�@ep�yrgcm]B74572!=LXq�{�}`N/$BFFGIKKQTIHX��`OFFHHJ&*7894:88999L���t3-	-b�Q%'%(8Q./?S]=$"!
+4KF1/B>=-$		
+		
+			
+
+
+
+		
+
+
+	'?���K+'#"?GHP^ejaRJ:$8HIz�mh_o�XXd\OB1&.6=CKUXQB6778?BMl���}�z@+9UmjxwuXpq5$,+,/:w�y`8*%''"%.4D������8..!=>*;������N,1Iy������������������������uVfqtswzyw{���z^-;`t���w3('*>t�STYbltnPWXWboxw{{{{{}_%"(**+,+)*)*()+4Mt�vvy{��qpvwxwxxxwwxwtvsrqoljfb_]\\YVUTRONKJDD@?B>DHIMQTVVY[[]]^_eehiknorvvwz}~��������������������������������������}|�������xhnu{�������������������������������������������������������������������������������������������������������������������EFDGGFIHIIIIHKKJKKKMQONOQVUWZ\[_a`aadffijkllonruuy|xxyy|zyzz{|{zz{{|{{||}{{}~|{{{z{{||{~|{~~�{|{z|~|{xyzyy{{zyz{|{||zwwxvyxuuqwvuxxxwwsporqtvtrnnoqrtrtttuttssurvussvtrprponmlkjida^`cbdca`ZPW]_[]f��������������������������������������������������������������������������������������������������������������������||{{xwwy}���������������Vev������Kr������mKv�ξ���[UeVJHGLSVay�����Wd��ѷ���$<]������<3��cROPNOSZf��������ɔ�����̑����zy������x>��MGX\bdF)$'%,HgpnWNDG}��Vhmqmd=h������-a��i`ggbbijge{������}`������<}����΅Qy����cR^ZSnx]]m������jn����ЉScQGGJPOp�����{Fy��xfJm{�l/3_^S`iWRX^iuwshEMSNMLSQFc��������������������<HUMA::�yX�N�5dc|�rl�e=67761!$=LWo�s��^J*'CFGGHJKQVJGa��\OGHJIF$,667999:9989Id]85.Ie�H''%(@X-4ath<!0HD-6<2'
+
+
+	
+
+
+
+					
+
+
+&@���K)'"$AJNUY]YSQN6$6CI{������PeytvF2'+19GOXYL@5478>CMg~�}u~n8.8Yr{���Uvq4'.,,2;]m\j>(('&"$.3W������=/-!QG+A������^*4J~������������������������pVgotvy{xw{��}~~xY..7LPSVA&'%(Dz�wSVUTPMMMWXWhsx}�s~|{uX$"()),0/,-,)(&& 0o|nqglumhlpvqtwutrpprlkigc_]YVWTQONOLKJJHEFCCGDHNORX[YXZ\^\]`ceiloprsuwy}~�������������������������������������������~}~������vjpv}�������������������������������������������������������������������������������������������������������������������CBCDCDEFGHGEDGIGGGHIIIJJIKJJMPLPRONPTWYX[UZ_`_bejjjknrrssrtvstvuwxxxyx{zz{y|~|z|{{{yz{|~}||}{}}|||z{yyxvuuvvuttuvvxwvvtrqsrrqpqqqpqrqqrtrppqswxwsrsuvustwxxxwyx{{{}|{|~|z|zywvyyvwvvttrropnkf^\]`[Xcu��������������������������������������������������������������������������������������������������������������������z|zyuvx}|}�������������~Xfw������Gs������kVy��µ��[YiTFGHNWXc}�����Yk�¿���l'O`������~;0��^RRPPQT]i��������Ŕ�����ʑ����y}������qG��OLW[b\B(%$&,FcnjRJ@F���Yhlpnf;l�����~2a��d_hhefllgc|������yf������?������{Uz����\R\VQhs\^k������ij����ˇP_PGGHMQq�����oC{��yePbwrK-:[VNelXUX\eprm`@NRNLLQNJg��������������������4LWKA8:�hQ�Q�3g`jm�OG�H466762%>KYt�s�YI&%?EDEIKKSVRSj��dMDGIID",4789:<:;==@<5267/	3Ufr>%('*BTCNdvf:"4JB**2(%!
+
+
+
+
+	
+	
+
+	
+
+	&F���E*&!&DKT`a``b_Q2&6DFo�����mDk��xD-#)/?KQZ[L;5458=BPt}�y`iX*/9Xt���y[�u0		(.,.2Dgr~~7'&(&!%-2Q������;1+(\;+@������X*5O������������������������kXistv{|vv{����~zU.(/=@D;+(&"(C�uXTSNONQVVUYisz�[o�}zuT!)(*2DIB90.*%!)pngXHQVUWZ`fgrocdcccabb]YUPNNLJJHGFECCBEEGMNMQXSYVWS[^^`^`dgjmqsvxz{}������������������������������������������������}z{~������vnou�������������������������������������������������������������������������������������������������������������������A?@BBBEECDEEDFDDCFGFFFGFEGGHIJKLLKJIIKLKOOOPQTTUUVX[^`acdefghilknqpqqssuuxwxzywywzzy|{{~}}~~||||}{zytrrrsrrrqquuuutrqrrqprqqoooppoqqpqsrrsuwx{{xx{y{{zyyz{z|{}}}��~����~}~~�~~zzzwwtvtonnt��������������������������������������������������������������������������������������������������������������������{||xvzz�|v������������Zev������@q������iZy������UVgNGIJNXRe�����|\m������`+X^�������70��[PQPOQU^j��������Ñ�����ɔ����v{������fS��MNY\`[B&$%'/I^mdTJ?L�ƅWhlnof=r�����y5g��cajlmjqohd������vf������<������sUy����[SWTTnt\`j������eo����΃R^MGGJOMq�����qK���ydNf}r]/;ZXS\^TU[`ltun^@RRNMLRPBk��������������������/!KVJD9>�Mg~U|Y^q:sHL�;246660'AKUs�j~wXI((?DEBHJIQ[gtu�}\NGFIIA,4669@F:;EhcE>7670	"Jenv:&&(+D_idiwe8"$:KA*&+53&					
+	
+
+		
+
+'G���K)&!+GLUccelidL3#9EFY|���sFW��|_<-"2BSVYccS:5678>FNo|�|QUH,.3Ww���`�p2		).,-1M���p3)'($!%-4X������42,&H<-B������I+3R�������������������������iWnvsv{{wtx������zU,'+,//,'%%"-L��qTUVWUQSVYWZiv�|i�||xwR#()-Hkjea_M.'#(jj`OB?=686LQY[LHMKOTSPRVPJHFDDA@@>EIIKNNRVUUUUWWZZ\]affjnmosuwx|�~������������������������������������������������������������wjow�������������������������������������������������������������������������������������������������������������������@?=AA@AA@ABCAAB@ACDCEEDCCCEDDGHIGGJJHHIIIFFFFILLKKKOQPNSSTWWZ]^^^^aeeehklmmrppqruuwwyxy|~}~|{}|}}|{xusrrsprqqprtstqprspqnoqqrrpsssssssstuuxy{|}z{|����|}����������������������������~}}{���~{|�������������������������������������������������������������������������������������������������������������{}zxz|�|w��������������[iy�����Bu�~~x��hS~����ĽLYgSIJMPWXf�����wXl��Ť���2_d������x62��YMQPPNS_r���������������Ŕ����v~������_Z��KJV[]VC)$$'3Lo|sXK;L��Qhknph@r�����u/e��danswwwume�������mk������C������oZ{����XQVSRigYbj������dx�����|V^MIGLQTt�����lJ���x`Pv~zt.<^[RV_WW[aktypV@NNJKKPNCo�������������������}( PSIA68U/VL6sm`Kf7qFU�;15555/(?L\��Yuv[J&->CEEILKSb��cwu\OIFHE?-5579DE=@e�oD8545/
+,Tlst4(((+Hj}cona3"
+(=F>&#9IC+	
+
+				
+	
+%K��zJ(% )JMQYQ[a^_R3$9CGm��vrtfilrj>0*TcrxxyoP66454<GF_{�|WTI,-5\z����[zh-	)1,/4Oyzj5*'(&!&06Z������8/+(Z:.?���|��S*2M�������������������������g]uxvv{zvsy������vV+')*+'&%&&"+L��hRVWUSSUVUU\muxw��|ywvM!()-c�pnuuZ*&#*idXK@97/*"4;<>6357;ACFINE@?A?BBEJKPQTVYXXVWZ\_aacdgjiooswyz|~���������������������������������������������������������������ukpx����������������������������������������������������Ŀ���������¿���������������������������������������������������=?=@@>?@ABAAAA@@CBAABB??AACCACEECCCDDHHEFCDCBCFCDFFGGFHJLMMJKRRQSSTUVZ\]^^abbefhmkmppqswvvwxwy{{~}zywurqpqmooppnnoommonoppqqpqqqsqsssrstvy{{{~�����������������������������������������������������{wx|������������������������������������������������������������������������������������������������������������|��ztuz�{w~������������`lv������Dv������gO����лG`fMJMNPW\e�����wSp��Ö��p!Ib������t38��\PONONS]q�������ܿ������Ö����u|������\]��EJW[\X@'%&(2IjvgQL>L��}VlnpqeEv�����t4l��c_sz~}|pc�������gn������D������jX����WSWSP]b\aj������]{�����zX_MIGJPTr�����iP���w`Sp]zj/>\XPgkUV\boyzqRDOOLIKQMHt�������������������w$#TRLB521*-.)496EF9j7dh:44344-*BO[�n�~UF'->DFHJJNXg��l�pYMGGFEE.7668DF9=i�kG7787+5Om��5'*))Oqv\M[Z0" *<C6$AUH-
+
+
+	
+	
+		
+	
+*B~��D)& +ZOQZXdYRdQ4%:EP�������]j��f=;Qu|���lF56755=JRd��rPQI.16bxz���Vye)
+
+).,/2R���c.)')%!+04Ct�����30-1k?15`�����M+4R�������������������������eh||zy||vpv{z}��rV((/5/-+*'%#-L��eSUXTRUUVST^krw{}ywurJ"'(1WaLZ|rP*&$)m]SJE>9,&-5771./13656?BCFFHIKNPRTU[\[egd`b^chklkpuuwz{}������������������������������������������������������������}~�������ukqw��������������������������������������������������������������������������������������������������������������������==>>?>AA@A@AAAAA@?<=?======>=>?@?AAC@CCCB?@@?@A>=??@AEEFHGIKLJILNMILNQSRSOQTTY[X[__bdghjijmopprsuuutsspnmnllmmklkjllnnnppqrqqqoqrqssqrsw{{{}~�������������������������������������������������������~zx|~�����������������������������������������������������������������������������������������������������������������urpxyy~������������~^kv������Hu������`N����ѹEecPJONOVX`�����wWs�ĵ���V?`v{����s.<��TOPROQSYo�������ں������������ty������U`��;P]]^W?'%#&0ITg_ULCN��xTnmnobAx�����k4k��cdtx~��~pc�������hq������@������fY}����OPVTWghaak������^}�����wW\NHGMOXu�����aN���r]Oe`pY+@_XNjeSV[du~{qWARROLKSMPr�������������������v"#SSL@4/.,+,*(-1556?4EC134454*,CL[jxq�YE#,AFEHJHLZl��z�rVJJHHJ=/744:C@:Js�hH:888*7[���@%'(+RtwRR^\2"  
+(;A.%HYF)		
+
+	
+
+			
+	
+"+F���A('"5[HSidfbegL1&=GP������~Rx��`F<Qgdo�t`=66446>TcivthQWM21:cqy��^Nzd+	
+(++/3Qkt�`,)'&&'.13Q������326=k=+J������J+5U�������������������������iq��yx||smrvy{xyzoN%(?\RG@:'&#*N��gSXYW\_VUVWZkppnttzyxrE#('.Y\=EXcQ,%"*f[OMP>8'!)273.//3:@0.6CW`URMORV[ait�����}zvwwuwx{}|����������������������������������������������������������������~}~��������tkow����������������������������¿��������������������������������¿����������������������������������������������������?>?@>=?A?>>@@?@?>A>=<=8::756577879;<:<=??=><<::989:;>@ABCDDFGFGKJJHKLKLKLMOMNQPRSSSVWYV[Z\_aaccfhikkkkhjggkkjjjjlkjjklnopqpoqpprqpssqrtvwz~|~���������������������������������������������������������{yz~�����������������������}}~����������������������������������������������������������������������������������������zut{�{{}~�����������~^n}�����Hu������YS�����иGdaPJOMOTWb�����uSt�Ȼ���wC_u�����s)E��SPRQRRU\l�������ں�����ھ�����t~������Pk��;ah`]W>($$&2Mfk_YMCS��v[omnpeAy�����j4o��aev}����qc�����ӽet�����|B������f\}����OTXRTppcem������\}�����r[ZLFGLMYy�����`W���q^Rr|~U*EaUKhdWVZdw�}rX@RTLKLSOQr�������������������r $RQJA5/-++,*,/14356676125676)/DO[wui�|[B ,IGDHLKMYj��y�sUHHIJJ=2755;FLZi|�_E:556)&Dh���?&(')VtZhhU."  	+>=(%HWA+
+	
+		
+		
+	
+#)K���?+'#4NKZuighheL/&9DQkwpz|{qZ��q\I222/H^liU965337D^inpqgVRE00<bn`o_8V}b''*+/5Dx�t?,)(()+,06q������7:C=e6(X������D+8Y�������������������������k{��~z{xomt{}zttskH&1j�ytzS,'&.R��cVXXds`XWUW_ossrps{{wrA"&(0_ZBDGgE(&"(h\GQP>5& )340.,.2HT4-6Jb^RSMJOYeq�����������������������������������������������������������������������������������}}���������sknw���������������������������������������¿���������������������������������������������������������������������������?@A@@>@>>???=<>>===;:854420/.0100034457:<<=<;9998899:;=?AAABDCCEFGGFHHHIJLKIIKKMNLNLMMQQRRRTSSUWUZ[]]\[_^accdcfefiggiikjlmoopppnopqrsstuvy{y|~����������������������������������������������������}sxz}~��������������������yxxy}�������������������������������������������������������������������������}������������ys��xuqs~���������z[p}�����yMx��v���Y[�����бFdaMJMLRVWe�����rTv��þ��pH_}�����k$D��YUTQRUW]m�������ٷ�����ڼ�����v~������Jl�>nj_ZU:&$$(4HbkimO;O��s^lmlq`?|�����`:u��cfu�����m`�����Ӽex�����uL����˿_a}����NRXRSwocfj������]����ҿiYYJHGKQW}�����]\���q`Lx��O(FZTQibVW[f|�wXEPPMMNUPKw�������������������l'QPG?50-,,**-.05645754114555'.BMUTVf�wXA 0FFEIKJMZz�w�oUKJHGH;	0856<J`�{ju\E9663,2Lu���5&'',WoqbffT,!.>3,"%BL:)						
+	
+
+	
+
+
+
+$(V���9''&$6KNV]\YW\[I-&:GR|~wxuy}VzzubB0'5@VbN865459Lblorvq[SA,0=bpedI;a�_()+,/5Hszp[G((')*,05u�����v1?A3O.*U������>)5[�������������������������h}���|woqy��{trohD$2�����M('%-Q��aVVTr�pYVVX_rswuuxzxvq;$(*4H\WLZX*%%"0nYEPO=4&)21.,-09b_2//:IKFDABJZhu�����������������������������������������������������������������������������������}}�������qmmv��������������������������������������������������������������������������������������������������������������������BADA?>>===<;;;:;<;;;9873010-,,*,....-134699:98:::;;;<<>?@@@AAAACCEDBDDFGIHFECHHGHLLLILMLNMLNLNOONOQQUUSTVYZZ\YZ[]`bbdeefgijlollmknnooqqrswvwx|~}}������~��������������������������������������������~vxx}~���������������������yuwyx|���������������������������������������������������������������������������������������}��|lkek{���������y^my�����tGy������XZ�����ѫ=ihNLLKRUUi�����oWs������H$Ja~�����k$B��XWWUWUU]r�������ش�����ܻ�����w������Aw�|5ed`\W<'%%(4Qt}��S;Q��l_kmnpZ>������]<u��agv����g`�����Ըay�����pL����̾\c����QTWQVtfadi������^����нi\YKGFIQZ~�����YY���o`^y�qO'G[SRpgZW_m���QBPPMLOVOLy�������������������h*VRG=40,,**+,.02445431124545(1CKTMKr�qV>7JHHJKKN[t�}|�kVIGGGJ:
+1956;G]�qbq\D:777(6P}���1'**0YppXolR+"!2411&(8A9'			
+
+	
+	
+	
+	%)R���B(()"8RMT\^SNZ^H,';GSvu~����e���fD-!-9HYK75457=HTfmpvskaA+.Bcu��}s��\%*.-.4j����T)''''+36v�����q04,,:2([������C*8`�������������������������^v�����{tv{��~uqpmE$5lz|��B&%$4Y��_TV`��m[WXWcsrvtwxywwm7#(+7[oigb@0,&!<sWAVI;4%*10/-..=lV+-,-8A>;9<AEGOgy���������������������������������������������������������������������������������~~��������qlqy���������������������������������������������������������������������¿���������������������������������������������??A?>>:99975565788999994466431.0//..,/212756989:;;:;=<=??@@>@AB@>DCACCCDFEECBGHFHKIJJKKGJIJLLLMLKLMNQPNOPQLNUSRTWUWY[\]`aabcfdeggiiihiikoprutwyx{~�~~�����������������������������������������{wxy}}}��������������������{twww}�����������������������������������������������������������������������������������������~sikkr~���������w[py�����uP}������T\�����ҧ=okPJOLSWVk�����oWq��CHLT3&Ie�����eG��RVUUTRU`w�������د�����ڶ����~w~������8p�w>id`]W7(%#'2St~��OBV��d\jknl\>������^<w��[dw�����kf�����ֶb{�����lT����ʻOd�����PUVOOje_dn������V����ҹ_[YKGGLRY}�����V`���m\]gkkC(IZRStgXV^p��wL@OPMKOVOUz�������������������d.XPG;2/,+)*++.02335521235654'4DLQOUt�gS>9HHGHJKN\r�zh�fRIGHHF7
+2657<Gi�ux{WA:667(7Nn���,&()/Vo{bpkQ( !+&26% -:D:%
+		
+					
+
+
+
+			
+
+	
+$)Q���9('%5TMUdgeiq[H*$<FKEZ����i_z�_E+,9DUH66548>FJcglux|uE(/Bf������Y"!" ./+/6h���J'))&%*37������q23,-=3(d������;*8_�������������������������Vk������~}���}vstpB'4v���{1)&"7Z��YWZg��pYYWWborsrxyyyyj5#(,=juijXKL7&?qPFXE:1&/21/..-;fL,/--7A>9:?C@>:BOd��������������������������������������������������������������������������������~~��������pory��������������������������������������������������������������������������������������������������������������������;<<<;:86541/00//10012566679::;77555534566999;999:::;=<<>=?>>ABB?ABBAABBDDECCCDGGHFEFHHLGHIHHKJJIIIIKMLLNMNMNQPOPQQRRTUTWYXY[[[\]a_``bdfhiijkmnprvxzzz|}~~~}~�}~~�����������������������������������}ywwz|���������������������|xtv{���������������������������������������������������������������������������������������zysnkknt����������v\q{�����nQ{������W[�����Х@jeNLQNRWWk�����lVs��@JSL4%Lh������cU��SUSPQRW`{�������خ�����ڴ����u�������9{�vC_aaZQ1'$#'3Klu�rHCW��c`jmoi[A������R@}�\ew����}je�����ӳb}�����fV����˺Me����KXVQSmi]co����Z����ѷYYYGEHNO]������O_���oYHHHH7)J]MSk]UV_r��{uPDOPMKPUKP|�������������������`3YPF;40-+**))-12245524324653#8FMLLQotZO<8IGFGLOLVo�zj�lTGIJIH2
+ 3659?Kj�{uzYB8496&2H[y��%(''.^zqR_eU(!  $8?$%5GJ;#
+			
+	
+
+			
+
+	
+
+	
+$(W��{.(%!<`HTktuzpXF)&<HJd����uB_v�w?J0"-8CUH6557;@NYjjlt~��],0Bh������}V3#*L'+,+-5ck`^y@&&'#$/68������i02.5D-,\������/,9a�������������������������Og|}{��������yvxnB-9s���J')&%8]��]ZZd��wYWVZdqqsswz|zvk3#(->UM<L]`T4'@kMKSG9/'1450//1;dH-/-.:D<9=ELJ?9;?V���������������{|���������������������������������������������������������������~��������omsz��������������������������������������������������������������������������������������������������������������������===<;;;85532/.,,--..../2447:;<;<===><<>=<<>>?<::=<;<=>?>=>>>=?A@B@BAAB@CECBCDCEDFEEDEGHEEFCDGFHIHHHIIHIKJJLLMMLKLKMMQOOQQRRRTSVUXWWYY\]``baabcfikmprtxvxzzy{~{{|~~~�������������������������������������{z{z~��������������������}xvwz�������������������������������������������������������������������������������������lktpllnoq����������r[q}�����_S{�~����Q`��˼�̠Ae_KINLQVXk�����jSt��CdpL3)Nh������d3d��TVOLXSV\z�������ת�����ٲ����}r�������9��r9Y\\YP3(%%)5JekhZF=X��]akoppTE������QEz�]ey����}if�����Ӯ`������`U����˶Nd����KYVSZ|jYcl������X����ԱQ^ZGEIOOa������Jf���nVEFHD5*M_KUaTSU_n|}|MBPROLPTIY}�������������������W:^RE<60,**)))-01144445434662 9FQIFOMQQM:;FEGHLLOUf�k�mQIJJJI1
+ 4569=Nw�nz�Z@8795%1CEj��"&&(/^lmFWeR*" "/II"!.CG9		
+
+
+
+
+
+		
+
+	
+
+!(_��m3*& >QEXlkmld`H))<HT����lV[OZcKJM1(8?BSC4578?O`innov���o2.@k��fv�{\@)9= )**-7g�l�s*&&%"'/6:x�����S+2,)4+.H�����e!-8a�������������������������Gevwuy��������zwp>,9n��u?((%"4c��WYZh��p\USYestrrv{{yth1%)-<VWWlaTN1'BgQNWK6/&.25.-/1=cF10./?A8:EVdW=78<Y������������z{dZ`hnwzyxurkeh��ʦx�����������������������������������������������}~��������~ljs{��������������������������������������������������������������������������������������������������������������������==>A?>>;::8854310//0-./1112599<>==@?BAAACCBBCC@@BA?A@@AA@@@??>ABA@@@ABBCCDBBCBEEHEEFFFHGEDFFGFGGFFFGGFEIEDDFGGEDDFGHJHKMMNPNPMRRSQOSSTWUXZZXZ\]_bcefhlnoqrqssttvxzz|~��������������������������������������}~}|���������������������|zy{}�������������������������������������������������������������������������������~qlmmronpsts����������n]r������_Xy������N`��ʺ�Ϟ=fbMHLLQT[p�����fVu��CplP2)Ni�����dGb�vVPLX]QU`�������ا�����گ����|q�������5��n;U\]]T7($&)3Ldj]SE>`��X`loooTI������OA~�}]hx����|gh�����ԧZ������][����ͲNf�����KYVP\�f[dl������W~���ՖK]WIFGONb������Fq���nUJIHA3,S^Q[kZSU]hru{yOHPPKLNUH]��������������������V;`PD<60,,+**,-23443344214673#9MpmodKPQL5 9FEGJJKPVWWT|�fTLIHIK3 4469=Jq�s��\B::96$/>I[��"%((1`on[kqO)  $-%&5\N!+CF/"
+	
+
+	
+
+
+
+
+
+	
+'W��X2+'7GHV^Zbgi[B'*?I`���zy�wOopubO4 ,;OMFSE8689D`trwyx{���z61@k���r�}y\1')*.6s�}�M)''&"%349d�����2./%)'.5o���|0*,7g�������������������������Hcsssrz��������zrg8%E���x6(((&5d��YYXk�{g[W[Ybpqrmpw{zxg.%)-:Ymvr=BL4) No[VfP>5$-23.002BiC//02:73:G\dH848>Y������������aU68@EKVZXUNHGLV����Xqur��������������������������������������������}~��������}nlp|��������������������������������������������������������������������������������������������������������������������?@@BBA?>>>A@<<;865763434334688;=<;<=?==>@@@BBCDEEDDEFFFEFEFDBBCB?ABABBCDCFDADEFEFEFGECDGEEDDFFGGFEGFEEEFEDDDEEAAA@?DEDGIIJKLMKLNOOMOPNRQQTTQSSRSUVXYZ]`d_bdegjikmoqrtz}~{|�||~�������������������������������~~����������������������{zy{���������������������������������������������������������������������~�����xoljmlrnsy|z|����������nZt������SV|������Jc��˹�ϖ=i[KLPNSUZs�����bUw�~?ggK1-Pk������SHZ}pNIFW\KNW~�������֤�����ڬ����{t�������5��n<T^^\R9'#$)8SoyXPF@c��WdmompQD������NE~�}Yiy����ei�����Ԣ[������U\����ͮIk����~J[YR_xa]bn������P`����bO\XHHIQPb��|���Fx���mVJGGA3-R_O]l]UV[blpwwCFOOJMPVF^�������������������P<`LE=60-++***,23455412224662#?p���wJOQM4 :HGHIINSWPIX��gSLLIKJ."5789?Sq�lowU>:876$-<JW�x%'(4buoSpuK& $2"(=]H!*LF+$
+	
+		
+
+						
+
+
+
+&<rn[4+'5EKVXOSQYU?&)BMNgz|}��Sh��SH8,6H^RLQJB99;@a���~���y52Cn����ixM%(+-3e}��H*''($)549Jz���O00+&*0Ax�~n\M0,:k�������������������������Fanwurv��������{p^4.A{��a3(''&7e��WWXgtuh\UWX^nqn\lv{{|b+#&->Vzz_O[I-& Rqc`mWQ@%+13237=NlI322,/018@HG;76;?c������������G50,*+09?;8;:;ISy���Z\]_hlmkl{��������������������������������������~~��������}lis~��������������������������������������������������������������������������������������������������������������������ACCCBDBDDBFC@BA><>==<:7987899;<>=;;<;8:<:<<=>>ACEDEFEFIGGFFFFGGFDFGCDCDDDGFBFHGGGEEGDDDFEFDCFEDFFFEGGEFFEEEFGEC@ADDCDDDFFHHGHIJJKMLKNOOQOOQPPRRQSSSRTTTVVYWY[\]_a`dfilmoooorsxy{{~}���������������������������������������������������~v|{}}�����������������������������������������������������������������~�����}qnkhjmst{~x}�����������l[r������X_�������Me��ʷ�ϑFfYLLNKSWXr�����gZz�xFidI0-Po������XG_�qJHIZZDIT�������բ�����ث����|u�������1��j=W]^XN8'#%);^��VOFAf��XgmpooRF������KC��{[gy����|`j�����ҡW������Pc����ΩEm����yHYVS_xa]du�����}XTTekZTU_UJGGPT^�����M{��~lSGGGB1+R]P]lYTV]cnrxrDEPOKNRVCb��������������������K>bQF<60-))*)).33356431354550! =o���tQ_TR4 <IHHJNPTXNN\��fRKKHJG,%8:8<BOn�hfyW@8686#.?Oe�d!&''6cq`EtmG& "%3 )K_C!(B70%
+
+		
+
+
+			
+
+
+		
+
+		
+&]��r9+'6GJX`_\ZfT@%+BIL_��{|{Slvd]DF/$.EcWOMOIA9=>]����~z��u33In����hiK')-03p���F*'(('385;f���kaE2,,/-l������5.=n������������������������{@`n}zuv���������s\95=gqb]7+(&(=f��XXZg|�gYWVWboqg^px|yuW)$'/Imqide^>+$SsfioedB%,274>RLiwPB8/(-/2:99489;=>f������~~����F5/+).8>?;983.8=a���`^behke^Zm�î|������������������������������������������{pis{������������������������������������������������������������������������¿������������������������������������������DCDBFFCEFDGCBBCBBBBA@?=??>==;=>?><==>=;<:9:<=><?@ACCABFGIFEFHHIKKJJHHGGFFGGGJJGIIHIHFFGGFEDDGDDDGFEGGFEEEEFFGHECDHFGGFEGIHFGGJJIHILJLKJLNLMPPQOOQRPNQSQPPSSUTUWY[YYY]\[Z_abdfhimlqrtvtvxzz{}��������������������������������������������|vwxy{~��������������������������������������������������������������}}}~���{somknuwvty������������f]r������Y`�������Mb��ɾ�̌:iZJKJGUWZu�����hWy�uHe^J.-Os������WMf�oMLJ\]DFS}�������Ԝ�����٪����zq�������3��g=XY\[N2%$%*;`zjSND>e��UgmopoNL������II��v^i{����zbj�����ѝY������Jd����ϧEp����wKWUSXfZ\cv�����wUQKIKOTR\SFFJRP]������L���~gNHHGA/-W\PY_RQV]equ{m=FROKMRVIe��������������������ICeVC;60-*)***-22254343555560 !@l���`jkp{;8KJHLLNSXKM`��dMIIIJC)&467:?Oz�hy}[@9875# 0CUt�b#&)(5WT6HunG%"%/"4Z\D"%*26#
+
+
+
+			
+
+		
+
+			*j��p2*%8KJZiolniK@&+BFX������gepldQE#;PXMILMK<?@_��|�ws��u/1Hm|���Yl}G'(*/9z��{F-*()(066E������S0,#3-+{������/->n������������������������xKet~{us~��������x_=/9]r{r4*)&+>e��QXZo��`WVWYenngiqx|zuZ'%*2Nsiid\L@/'!Oddlqss<$,1;?eicy{hZL/)11178669:9;=f������a�����A5.-,2DIGFE>1+/3^��~_afrxyh`_^�ʥZSep{���������������ztt~������������~�������zrls|��������������������������������������������������������������������������������������������������������������������FFFEGGEEFEFEECEEEEEEDCCBBABC?@A@@?DCC@?><<=>=@>??AA@AABCDDBDFGGGHGFHGHIIIIJJLLLLGKLIHIIKKIGHIIHGHFFGHFEEDDEFFEEDDGFFGGGHHGHGGKJJIJKJKJJKJIKKJLLMLOMMORQPPNQSSTSUSTUUQVVUVVUW[XZ_``bcccjhkmooqqtuxyy|��������������������~��������������|vqsw~�����������������������������������������������������}{z}}|yyx{{|||yvtswzxx}�������������gdr������Qc������wFm�����·@n\JMLJWX`y�����dWz�sCfdF.4Ss������QNh�iROLZYGHQ��������Ԙ�����צ����{t�������4��`=VZggQ1&#%+=VidPLD?k��QgrspmLL������DJ��s_l{����z_o��ϻ�єZ������Hi����ΠHq����sFUTQQUVZdt�����qRPLJJORTZQEGLSRd������N���{fPJJG?10RYOKNQUY[`nprj<FPNLKRTBh��������������������CFfQC:41.++,,+-0334444312577."Bb���c����< 9FJILLNTSPJb�|[PJIJKI)&668:@Lb`Fz�V?8886$ 5Lk|�f#')+7U2(PtiC# "#&$>Y[B$$*;:"
+
+
+
+	
+								
+&`��i-($;KMXkrpm_M<'.AFf������lm��v[;(COSLCJNEEH_��pspq���k,3Ml}���\o|D)**/Dq��{?-*&)'+15J������P,+"0,,{�|���{//Br������������������������sRhu~}vq��������e9-P���j3*)',@k�~S__v��hYVXYdltkipx|ys\$$*4MYI?WYOK3(FXcpuxj9%/6?Txvv�qO..5239;:<;==9@p������������@5.--5FKJMOB0+.4^���aes�~la`Z�ʛLIEQT[jx�������zmjlfbagx�����������~��������yqou}��������������������������������������������������������������������������������������������������������������������EGFFFGFGFFIHGDFGGFHFEEFEEDCECDFDABDDCACCBA@?>AA@BC@ABABBBBBAACDCCDDDCDGGGIKKIKLKQNMKMMKMLMHJJMMKKIGGIHGFFEDEEDEDDFGGGFGGGGIHHHJHHJIHIIHKJIKJGHJJKLLNNOMMPOOQRRRPPLQQRUTSSSQRRSUUTSYYTSUZ]_^_abdgijmnoouwxy}}������������������������������}utv{������������������������������������������������}|{xzxxxxwvvuvywxxvxxxyzxv{��������yx|��rqsjmy�wDf�����lGm�����ҀCl\IHILTZ_x�����f]{�nI�lC/0Sy�����~LIi�rVSKQSGNW��������Ҕ�����ۣ����yu�������,��Z=Zk�sN1'$%,@\qXNEBq��Vo��toNO������=M��mZmz����v]o��϶�ѐ[������Hn����ϜDr����lIYTMHOU[es�����mSPLIJOQSYQFFIPSi������L���xiRIIF>,/SWMIINVVXajmne?ITNLLSTCh��������������������AJjNE:211-,++,-1555544512685+#G|��ws~��~6#?FIKMMN_`[TVl_SLJIJLK#%788<DI>5E��V@9::4$%;^z~�c$(*,9L-0Oh_:$!%,$!=T[C-&4H8#
+
+	
+
+					
+
+	
+	'[}�V-&$7FM]illjfV>$-AJl�r�~s�qp�yoW8-RVfUFIRPPT`~�qikz���^+2Nn����ds~@"(*-1?r���=,)(+%%/5L������N-*++.}�|���}--As������������������������p]iu}~xt����������e1)M{��j5''(-?h��`b^w��gWVX\clsifkuzz{W$$)5LZT`qD`T-% DVfquwa:'0<@Yv�|���z=,0438=BCECBGKRv������������@5--.8FJJMPC/).1`���^fr��sk_[N�ċ?EGIFJM[^dXinnhT<A@CFKUbjqvw{{����}��������xoou~��������������������������������������������������������������������������������������������������������������������CEDDDDEFGHHEFGHGHHIHHGHHGFDGGFGEDFFFDCCGECABBCBADEBDCCDCDBA@ACAB@@BCCBFECEFIGFFHHHIILLKKLPNJKKOMLMKKOMMKKJHFEGIGFGGHGEGIGHGJHHJHIIHJIIIJJJIKKJJIHIMLJJLKMMMNONOLPNPPQTSPQSQRQRSSSVWSSVUTSVVVVUUZYY[^``dhigmpprsvx{~||~������������������������yuvz�����������������������������������}}|}}~~�}{{{zxxxuuttvuuttutvvvxvwvwxwwtxz|��������vv{���{lfhjlbTer|����\Fs������nEoZFGHKU[_x�����dZ�mI}dE+1R|�����PSn�mURLUOJVe��������͔�����٠����zv�������)��SFeu�qK0&$%,?b�oYODBu��Yy��skQR������=S��m^n}����v]r��̬�ЊZ������Ir����ϕBs����kGXTOKPU[ct�����lSPMKKOMQ^RGGIPUj�����L���wfRIHG>*2XZJHIOUTYaimpc?GONLMRQMi��������������������<JbPD;40/-,,+-03655433446574+&Jqrw�l��}�6'EIHKMRb���~YPSMJLKJJF&678:@E<9L�T?::85&)Gn���V$(**6;.5KTC,"! '5&#9MWF4!&>K:!				
+
+
+
+
+	
+	
+
+$5dkV.(%5GN[`adjkT?%0BJl���dW�mgsmnP7 Speh\OLRNQX]{~jgpvy��U,5Qr����c}{;$ '+-1D���wA,(),(&/5K������J/,3-,������}..Gu������������������������pcfs|{xx����������g3)V��~f3&)(+Ap��Z[X{��eWWXYbkrj_iwzxuP &*4@_n{WHg:'"!DYjqtsfA)7K>Ko�����h=2147<GQUVOSY\`}�����������z>3,,/9FKJMJ>0+/3b��|^f|�jqh^\S���?DECCHEPWVARSL\>/86479>CLVY[cjs����~��������xoqw~�������������������������������������������������������������¿�����������������������������������������������������CEDBCEEFEEDDDEEDEFGHJHGHHHFHGGGGHGEFFEBGFFDEEDCADDCDDEEDEEDDCDABA@ABABCDBDBEEEADEDDBGDFHGJJGJHLMKLMMOONNPNMQNKKIKKJKKJIHHIIKKKJJJIJHHHIIHIHHIJIHJJKKIJLKMKJLMMMKNKMONMMMOPNRSRQSQSSSTSTRRURSRROSTUVTVVXZZ\ZZY_dedehnopsux{{~�������������������~uqt{|��~�������~~����~}|}|~}}~}|~zzzxyyywvvvutsttssrrtrqrrqrtvvwwvutvtttwwz~���������������vvw{vmhjnmkloaWSn������UQnYIKLPX\d{�����b^��iHfOC,4U~�����|JTg�gRNJPMMYg��������͓�����ם����yv������*��OKdv�rJ/''(+>WecWNCAx��X|��rmMT������:V��oZm}����v[t��ͫ�Ԇ`������Fr����ΌDv����fIWVRORV]cq�����nWQMJINOR[SILKPRk�����rR���weQIIF;*4WVJKMRWWYbilm^<FPMLORPSm��������������������8L`OE93/,,,+*.02554554356664*(Lx���l����3(CKJIMW}����ZU\ROMJKJF&78:9@C7>U�|L>9865'0Rv���Z$*++=cj_A1,%" (9!(#(@Q\Q9$&DJ<!
+			
+
+
+
+	)Npvc/('3ILTWW`ofP>$/CIp���e{�_bzzwU5%'rrgidQH?<AHN_bZblmetQ.3Sq|���`zt; !'*,1G��n=,((+'(/4Q������E--$80/������{,/Iz������������������������i_cq}xwx����������d+(Np}�^,((&*Ey�wLZY{��fVZXZdnnhYry|xuP!&'0<\ztVV@7+$#G[hqvxh=%"AE:9s�����jJ64<;ANV]\`caa_}�����������u;3--.8FJKNK>1+/3d��v]hys}f^[K���>?<86;BOMKHUZST536325765136?HQd|���~���������xkry��������������������������������������������������������������������������������������������������������������������FEDECBDDEDDDCEDCEEDEFEFGEFIJHGGFGFEEFICFEEFGGEDDDCDECDEDEEBDEEDFC@BEDCCEDEEEFGACDDEDBDDECDDDDDCDGGGIIIHIMLMOQNMMONNONMMLNLMNNMNNLKLKKKKJIIHGIHHIIJIIIJJJMLJKMJKJJJKKKKKKMMKOQPNOPOMOOQQQRRPPPROQSSSTSUWTSTQSRPWWTXY[]_acegkkpsttwz~~�~�������~skjltux{|~}|}{yyyyyz{xz{{yxxxwxwvyzxzzvxrutuuuroqoqpqpqrqppqsrqqqnprsrsurtttrstwvx}��������������������|wxxmhmna]_twncbgpF_hc[a^Z[Zdw�����df��fFIID07Y�����{GC[�jLKILLMY_��������ˑ�����՞����vy�������&��OM`s�tI3(&'-BdndVNCH{��Q}��rfIU������3[��l]l|���u_v��ά�Ҁ^������Bw����цHu����_FXUPNSW[ct�����eVPMKJMPV\RP^VQQk�����jX���vdRIIF9,7VVLSWWXY[ckmm\?GNMNPSOHr��������������������2 MbMB84.,,++),/1445444456675*+Kt���i����++GIIKLSu����SrjgjQJMOA	+9789?A;?YzoH=:885*$7Z���_')*,`��zI=6.%"$-=#,.=KX]M;' -FN8"
+
+
+		
+
+
+	
+
+
+	
+
+
+'h��h0(& 5HHKPZox\Q>'0DHf��uP��As���J8'&Dsia^YTL83366;BL]nc[kN-3Xq}���lyr6$ "*,+0GfZxw7*'(*')/7_������C0..;/1������q*/Hz������������������������f^fs~zvw����������^)+Gv�{6+,(&,Dr�rX[]}��dTXWZepoi^qyzxvM &)2SmlgaIQE+#(JYcqx~U3!#B<@3������tQ5<LIELTY`ml\RD~�����������m61,+.6CHLPN@0+.1c��y]et��{c^ZD��A=5/39>FDBHY`YN74421<D=>96356@Yy���~���������wkqv}��������������������������������������������������������������������������������������������������������������������GGFFEFGGCEECCEEDDEEEFCDDDCBCEEFEEFFFGGDFGDFEDCEBACDCBEECDEFDDDFEDBDFEEFDFFFFFEEHGEFFDDGFBBBCCCBBCDBDDDDEHHIIJKKKLMMNMLOOOPOPPOOPRONMNNMNOKKKMLJJIJJJIHIIIIHILIJIHKJHJKLLKLJKMLLMMMJKKLNMLMNOPOQQQRTRRTQQQSRTQNRQRRSQOSTWWVVW[_aadcdilorpnqvyzy{xka`_dfnru{wtturwyzwvvxxytwxvwtvxxuwxwxxuvrtsttqtqrrqpqppqpppstrrprqsssrstqsssvwvvvz}������������������������{}zvuvwyjklgdYjwuhggc`\airwzzuag��\PJJF);]������x5&n�pGMKJNQX`��������ˎ�����Ӡ����wz�������,��QM`v�vT5)%&+<]dYPLCF���R��siLY������/]��jXm|����sZz��α��z[������@|����υJy����cKYUSU^[Ycx�����dVNKJJMOW^SfxXOQj�����h]���xcQJIF:+9WWT`^YXY^emnoW>GQRSTUPIs��������������������/!TbMB93/.++*+-13544334444675)-G^y��a����).HIHJMRg���n_�y�WKPL=
+,789<<<;:QlQ?;9996+)7a����Z#(*.f���hd{R%$$,D#.;KX__N?-#!0GM6$
+
+	
+		
+
+
+
+
+
+
+	
+
+
+%g��c+&$3GGNbujPJM?#1DF\�fBK|vHm��wS91Irt_YMLRK@;458?JWWpUIj�N/2Wt����nzt7-)&0.-0DZ��b2+())&)16^������:1-$-7/1{�����L'/M{������������������������a_hv|wtt}��������{Y*,DyoTO+((-Gp�oYXY|��_WYY^ksoecrx{zsI (,5Qemn_Yj>)#(M[`q}vE-&6BAFwy���n@6FVRLJQWn�aI9<~�����������e5.-,.4<DKLH8-,-.h��q_cpz�rc_\D��|B<504;<?>AF[bRH94327Ok`YTOH=47Nu���~��������wmpu���������������������������������������������������������������������������������������������������������������������GHFEGHHFEFECDFEFEDEDEDCDCDBADEDECCCEEDDBDCDEDECDBCBCCDFGFFGFFGFFEFDEFEFEFEGHHFFHGFHEEDGHEEFFFEDDCDFCAABCEDADEGDEEGGHMIKLLLMMNNOOQQRQPQRQPNNPNPNNNMNLMLLJIHHGGIHHHIIJKMJKKJJKKLLJLLLKJJKJJLMNNLOOPORQPOMONPPRRRPMROMMNOOPPNPQTTVURQY\[[]^]eghidf`TPOKIOZdlnooquw|~~~~}{}}~||~{}|||}|zwyyzyw{yxxwwxwxvuttsrrtttuuwxwwvstsvwwzz}����������������������������|{~�������|yu{�|opqmhdlpjjhcaVdrp`[URI)?^�����u50��vLOORSUVZ��������ɍ�����՛����y}������s&��NJ_u�t[5'$$+?Xc`RMCE���T}��qgEb������.`��hWoy|~}~nW}��ɭ��yc������C������{Ny����]MVTTbi^]bz�����aTOKLKMNV[NjsVOQm����b`���t^NIJE8(:[XV^_[YW[fmnoX>HRSWWVMMr��������������������)!V]MB81.++**-122444354432473),S{��tv���$.GKIKMQl���ir���~VNMK;
+-99:;FLIC<<;<989:6.3P�����Q%(+.^���{��T$#%1J#1AO_b`PC6(!&2EK7"
+
+			
+
+		
+	
+
+
+	
+%s��]+)$ 6CJ[udSRSN:%3DDs�KT���c{��qP98[hJ@B?GLLOI87;>IYNRD9i{I/6Xt}y��j~n81+4-./B}��V2*'(($)27W������51.!*F51p�����M,3M}������������������������_bluvsqs{�������uT-0h����`++*.Kx�oWXY���cXVZ`nrslks}|xoF(+8MQQJ\cF5)"&]d\p�i@*,6IAFTk���M28PVUTTWf�`;8E������������e20-,-/8DII>.*+,/i��g\dottka\ZA��uD=515:=A@FJY]MJ5543Bu�ponrmB21Oy��~}}��������tkpv���������������������������������������������������������������������������������������������������������������������IFIHGIGFFFEEFFFFEFFFFFGFEECBCEBBDEEEBDCBCACEDEACABABCFFCEEEFEGEEFGEGFGIGEGHHKJIGFEEGGECGGGGGGGFFEFEFDBCBBABADDCEEFDDEFGFGHIJJJKLMMMLKMOOPQPSRRPSRQURQQPMMMMLKKJIGIHIJLIKKJKLKJIKLMKKJJJIHJJKJKLKNMMMMONOOONNRRQOOOMMNONPPQRSTRRSTSSSQSTUUVVWPGGA8500.-9BP\dknsv|~|~��������������������������������~����||{zz}{{z|~~||~|}|�����������������������������������������������~~ztuumie`^Yerlb_]XJ7Q[_pu��p6I��nPSUVV[[`��������ƍ�����՛����wy���x��s.��OK]s��f0%#%-FmxlQPBE�ʏQz�~rfAh������0d��h]nwvwxxj^��ȵ��te������>������wQz����^KWRSej]_b~�����_RKJLKMNV[MfkWPPr��|��di���u[PLLE7'@^VUac^VSYdkolRAIOSZ[WLSu�������������������y''Y\LA92.++)(+/11533454433472'1Sw��lsz~�m1MHIKPT����yj��|�NNNP:
+/:7;Jv�zuM7<=;89;81:c���ĪK()*.P��x���M "&4H$4@P_a\PD9-'+5@=*
+
+
+
+
+	
+
+			
+
+
+!v��_+)% 8EL_ed`b\P=#4DGz�N����iw�wxI4%0'#348>COUG:768?HCA?<^tC,7[t{���k�k7'+21/0K���c2+&(&#(14J�����o420>S3:������j,5Q�������������������������ciorqomq�������}rN&1n����U++'-Gy�mXX`��tVVYV]krtmmv|zxsC )+7KUR\qVA6*"+g^Vvd@*,2<955]���s020BRUYcjo�~X87C������������`..,,*.;DGC3+*-,.k��d]_fkne]ZZ<��sHA6.16>GHUNSVLH8575Fzshvy}k924Qy���~��������tmpv���������������������������������������������������������������������������������������������������������������������LNMMLKLKIGJIJIIGHHFGGEDEGCCCEDDDEBDCABCEDCDGCBBDDBABDEEBDCCECCDEFHFGGHIHEHHHIIIHHGGGHGHHKHIIHFFGHGGHGFFEDCDDCCAEFGCFFEDDFEIHFFHHGGHGFIJKKMNOOQQOQQRRRUSSRRRQOOMMLLKLJIJKKIJLKLKKIIKNKJJKKMLKJKMKMJJKLMLMMLLKNNPMMNMLMONOOPPPRTSRUSTSQSRRSPPKKD@;2130-,/6:EHQU]fimosuvy|~��������������������������������������������������������������������������������������������������������������|wqnklld_c`]RP\WLVVbhoe?W��ugb`[^abi��������Î�����Ҙ���ws������j*��MQ`|��])%%&,C_aULQAF�ǅTx��si@b������4d��gbjsstsre\���ȳ��oi������B������qV|����[JWTQfj]`a{�����aSMKKKNLUYKcmUQQt�����[g���v_MVSG8)A^XU`bZTUZbikiK?JPTZ]WMQx�������������������w"(Z[LC92/,*)*,.02456653334681%3Mt���y���t3IIHLQR����wf����POMI:
+-87=i����PCE??;:;:1?g�����A&*+4v�~m|��V!$.B'5AR]_`PC;1,>=1)#
+
+
+
+
+
+
+		
+			
+
+	
+
+
+
+
+%u��W,*&6HJWiong_O9#3DIz~x���iq�~}J0'84357AI?65:<;;999<XuE+6[sw��yt�h.,79/0P��sV2+))'!(17d�����}81+FA.A������g)2P�������������������������^fjjilko���������tF$4gul[wK+)&/J|�gYW_x�s]VWW[hpwmmw{{xs@(*5;61nr3C5'#2fUUz�a@, )28526X���W-2*8KWa�vm|qV87J������������Y0/,-*.5<B8+*+.,3i��d[\Z_dc]][8��yLC:-27BKNPRXYOH5564=fP-Uvsd838Pz��}��������tinx���������������������������������������������������������������������������������������������������������������������RQSQOPQQNOPOMJHJHJHIHFEEFACEGFEEEAACCBCDBCDDCCAEDEDCBCCBFDEFCCFEFIFHGFGEFEFKGFHJHHJGFIJHIJGHKGGHIHGHFIIGGGHHGHFFFGFFFEEHJEGIHHGEEFGEEEGGIHHJIIIJMMMOQRRQQOQQPPQPQQPNMNNLNMNNMMKJLLLLMOLKKKKNLKMLLJIKKLKJJLJIKKMIKOMKMLLMMMLKNPPSVTVTTWUVSSSNNKHFB@@=<=<=?CAHEINQ[\acdfillnnssuv}}~�����������������������������������������������������������������������������������������������������ywtrsqnnifld_`]Z^iX/6j�~{rmtury�������׾������Κ���}y}������Z@��NVo}lJ(%&%,AOOLNQAN���Zz��rfBc�����/k��gbjqrrrqd\���Ǻ��km������<������mY}����ZQZUOdh^_d������dTNJKLMNVYNbgMQSw�����Sk���t^Qg\H8%C\XU_]XVYZblnjL@JPU[[UGWv�������������������u,\XLE82.,***+/33234531225772%4U����n���q3HJJMPR|���vx����HLJL<	0:8;c����AiHjcA:;91;j�����:')*2pr��~��U#%6F(4CPWZ_PB3,AaO-(#		
+	
+
+		
+			
+	
+	
+
+
+!ZvqC,)$8FNcfehc\N6$2CE|���uj�iz��yE/<L81356;<46<?=;6447WrA-9^r����ye(/D>2/VvmfY,())'#*47q������5-)09/;������Q+3P�������������������������a^ccfijm��������v@"5q�qz�-((*2N��dYZ_s�zZZXZ[gpsbjtyyul?(*6?I`�YDT?("2[L^}b<,!'07416Z���R2.-:DSq�odvpV89N������������S22/,,068:4+),-)0j��e[[]bc`]]X:��qHD704;DBBMV\ZTL-563IoY=R]aj:68T}���~}��������qhnx���������������������������������������������������������������������������������������������������������������������MSRRQRUSQRSSRQNNNMLKJHIGEDFHKFGGFDCDACBBDCCDBCDECBBCCCBBDCEDDDGFEGFGFEFDEEFHFFHIHFHHGGIIJIGHLJIIIIHJIIHIJIJIJKIIJJIHGHJIIGGIHIIEFGGEEFEFHIHGFCCFGFIHJLMLKKMMPONNQRPRPRUSQQOOQQRQPPPMRQONNNOOOPMJKMKKJJJJKIFHIKKJLNKJKIKKLMLNNKMQPNRQSSSUSVURSSOPNNPIJJKKLKHJKLLHKPORSQXXWY]`bceiiioprrtxwyzy~}~�������������������������������������������������������������������|{z���������������������}~�}��~|wuqjlolVD<Uuwwupu}uz�������ѷ�����ȹ���{q{������[b��LUkfebK*&$&.?JLNTPBT���^���qgAf�����s5p��ickstsrsb]�������jr������@������gZ}����QQWSQec[_e������_RNJHKKLVXOigNRW|�����Ol���q\Oe]F7"Dc[Va]WTX^irqhK?JPS^\RFWx�������������������o/Z^LB83.+*,,+/43454531034662#9`���kJk��k4IKKNNR{���uj����DMMP<199:U���pZ���uB9;:26j���m�7&)*0n��rs��H!%&DE+6AJMX^NC1'Qg>+)#
+		
+
+		
+	
+			
+
+
+	
+MooQ+'#9KR_UQQ]YL2#4DN{���i��Rn��oI/OX;24348848>>==9114Uj6+;bt����owa+!8S=-/c�fq[/((((&05:o�����}71, 0B25������9+7O�������������������������X^cdgkjq���������qB"6t��T'()*3P��`[]e��pVYYY[juobgszyvk;"&)8O\viR`V9("5SIh{y`:-!&25325_���G/-/<CKfunlru[56K������������T62/-19BEC;0++*'.k��a]]`jib][Z6��qKF9278<;@R[`[TH2455IkPDMIp`637X���}�������rlpz���������������������������������������������������������������������������������������������������������������������QRRRTSSSRSTUQQSRONONNLNIHJIKLJIIHEDHCEGEGEFEEEFFEAAEEDBBBCEECBFCEEDDEDEGFHFFHFFEFEGGFEFGHEGGGFEEFGHJHIILIIJJLLJJHHJMGJEIIIEFJIIHHIJFEHHEEEGHGGHHEEHFGFFFGJIJKKJIMNMMMPQSQQQQSSTSRSRTVUSTSRSQRSQPOOONMLKKLIIJJKKLLLKKJIJKKJKMLJKLKLLLNNMMKPOOQRPPQQSTRRTQQQQROPRQPOOOPNPQSTQOQVWXZ[\]]`bcgiikqqsvxxy{{~~}~���������������������������������������������������������}zyxz������������������������}~��������~��{sllbNNLKP[f��������ʭ~�����mnvuvly������Rv��JTa]_fO)%"$.?IMUVO?T��~\��{ofJl�����s7v��damtvvtsda���ĺ��hr�����|G������dW�����MMXRQ`]Y^e������ZQLIGJLLVWMlgRQW|�����Jq���rYQlbF4)HaXU`ZWSX`lvqjIBLOSYTPDX}�������������������c1YaEA92/,*,+,/3355354313545/#;m���}q���]8KKKMPSm���g�����GOKV?!3<9<U���bm~��r;98708l��Sa�9%)+7f������G!#(GD+8CGIZ\NA+ AXG.'
+		
+
+
+
+	
+										
+g��W*(%"6KU[WRIJXP1#/DHo��Ns��L���lE,#!P^?4;CA>;69=?@@;126HU/->_y���vq|`&!?c3.8avj�R+()((*367g�����d132QW,8x�����M-7U�������������������������^giijiit���������pB%6c|��C'**)4Q~�eZZc{vt^VVYaqtr`ds{{vl;!(+7EQos\PS:)"7U]t}�\;-.4663?t��|G,,.;EMgxdw�Z77M������������N4212DXX\[H:3-*)0i��]]^jsjd^YS4��hB@9667<CKS\`VMF333:B]]D<Nk9133X���~|��������lmp|���������������������������������������������������������������������������������������������������������������������SPONTPSQQSRTOQOONNQQPOOLLNPPONLKKLILKJJJKLJILJJJJJKJJGGFGFFBBCDAEDBCCBDDEGGECCEFDFFECDEFFDDFFEEBCDFFFDDEEGGGLIKIIKKMIJKKJKJKJJIKIJIFGFDHFFGJKJIIHIGHGHGKJJJIDDGGJKGGHLLKMMNNPOQORRQRSUTTTUSQQRSRRQQRPQONONMKKKKKMLLLLKKJJJKLKJIIIJKHJJIJKJIJILLLMMPNPPQQQVQPOQRQPOPOPRPQRUSRRTRRRTSTSVWWYZ\\Z\adbhklnopstvxzz}~}������������������������������������������������}�~zz{�������������������������}~������������}zutokb_]h����������usrvqljjtvqkgq}�����:|��NVZT\_G(#!$->JJOTOAW��|Z��uog@n�����s2w��fanx{|{{ce���ĹϽ^u�����|E������`a����OQYRNY\[_e������[QMHHKNMWWQfeQNR}�����Ex���rUOe^E3+H]WU\VURW`mvrgKBJNPXXRC^��������������������`2[ZIB910.,.-144577345434225. 8a�{�[t���M7NJKMNX���{Z����wIPN\2#3:9<r�}�zc��zz8::4*3o�����4(*,,S������A$#(LA&6AFH\]N="!Ob@,$				
+
+
+
+
+	
+
+
+
+
+	
+
+
+J��Q,&$ 8LV__TMQ[N0%4EKQ]PBUsPNkstP?1+*Zb99Q_[SJ;CCFEF@.17>B,0=b}���yu�b'
+%AI-.9Xm�|O.*((()548Z�����Q221$WX*E������],7]�������������������������`nrrngfy���������p?$5h���G'(+)8O��`TW^v�XWW[fpprbity}yl8"(+58^tdWZX<*"8Vft�X>*.3664W����F3/1BHNm�vm��M;;Q������������H614AabZ[dVE<4,+0h��V^_sxj_[UF��_@>655>CFKU\]QI@1428HcneefF0335Z�������������pnq{�����������������������������������������������������������������������������������¿��������������������������������OQPNNNPOPQOPRQPNPQSSPOONMPQQPPPNMVPORQOOQQOPPNPRQPPSMMOOPONNMMMIIHGFFFFFDCCACBCDCACCCA@BDDABDCDBBBB@@?@>ACBBCEGHKJHJJJHHKKMJJLKKJKLIHIJHFGHIHIIHGIIJHJIIIJLJHJKFJHFFFHHGGFIKJIKLLLNOQQQRRRRTQTUSUQTRRRQQSSQQPOPOQNKMJNNJIJKIGIIFJKJJLIHKLIIIJJJIJGKJKJJMKMLMOQQPOPPNOPPRRRQRQQPRRRQOPPQRQTTVVUXXXZW[^]_aaeghlmlptyyxz{|zz|~���������������������������������������~|{vx������������������������{|}}~�������������~�}{}���~|�{kX_kmkighr{sf`^hlmnsqd4���LBHFKQC(%$&-=HJJOMGY��sWxwrqg?t�����s4{��dbo~��}hg��־�еX{�����tH����оZ_����OUWPP__Z^d������XRKHHLOPVTR\VQQU������C|���mSUhYF3(J[RSVUSSWbnwqeLFMQPTUQB_��������������������\4_\K?60/..12>E9<?8556335344+ =j�ztq{���SJXLJOQ[���k����tHPOO)5=8<h����d����9996*?s���˭*(+,9m��pw�|0#$*R@ 1?CF^\K7#EN3*!	
+		
+		
+	
+
+		
+7��L-'$ ?IUg\N]dfK."7GFJHFGEHFGILDGB//.IJ3J|~rrd]]X\^^M7-1=;).:b~���xv�a#	#73)-2V�\F-((**0B9;v�����n52/-dW/H������J*6]�������������������������fq|{riluy|�������o>-8z��xF)',/7Q��]WXd��qYWXYbmqqkqx{{wg2#)-7[�q`ZcP4( =bcq�t^A(,5<5A�����B/15EJPg�mu�tN?>\������������K629]_IHQ^RF@7/+7k��^\g���k\YSA��XC@427ADDJW]\RL@6514\unnbFC@45a�������������}qlq|��������������������������������������������������������������������������������¿�����������������������������������KNPOMNOOOQOPQQPPQPOPMQRNTRQQSQORQRPQSQQRUSSTTSRTSRRTQQUTUSSTURQPPQPONNMKHGJKJEFEBBCADB@BAA>===><=;<;99::8:;;<>?BDEDFEHGFHIHHJIKHIIIJHHKIGGIIJKIJIHIJIJHHHIIHHKJIJKJGHHHGGDIGIHGHIJKLKMKLNOQQOPSRTSTRQTPRTSQURTUQRSPQSRPNLMLKLKJIKLIJLJJIKJGHJKJJJIIIIHHIKIHIKJJJLKNMNNNONOQRPPQQRPPOQONNORPQSSUTTUUWWVWWWYY[[^_`aeflmnooorstvxxwz}�{}~}~��������}}}{}|}}|�}���������}ywut}�������������������������}yxy|}�����������������������yqhlqpjd]^lpfbYYadfjkdYM���_NH036.%$&%,9BEGHGGg��l^kmnnhAz�����m5��bar����_d��Խ�ҪX�����qR����͹Rh~����KPTORc^Z]i������QOKHGJMOUSLKJMNS������E}���kVViYD0(OaVPVSVVXdovqeMHNNQSSQF]��������������������W5^ZL?510147Ku}VZd@:74312365+:z�������M3o[LLORR����v����sGOJM(7=8@c���{i���{9:<7+>n���ś&&+,:t~{o���<$$*M5
+
+.=@I\YJ8*GZYA/"
+
+
+	
+
+
+
+		
+	
+
+.y�J-($!<HPZQhyc[N0"7FFHFHIIHIIJIEF@1:-052K~yw�yuq{~�whK..42*.<f~���t{�Y$	--*.3@X:C1*()+,=R9>������i042)jZ/I������I/:e�������������������������ct�tlqw�������r9.=p}��?((135U��[RXg��sYXWZdmqspswyxxf2$),:WN)GV@H5( A]`kxva>*)4:/G����t=0,6FHRr~uv�tZ?>X������������J72AS>13GPA@@:3*8k��`]f���h^[[U��\F@76;@CHLY__TRL474:TaTWcZX]M89b���~���������|qgq|���������������������������������������������������������������������������������������������������������������������OOQSLOPOMOQPOPQQPQQPPOQPOPQOLNNMPQPOQPQPRSTTVUTSUTSVTTTTTTRTUUURTSTVSRSQQQQPRLLKLNKGIEDCDC>:::;<966555442357::<=>ABC@DEDGFGIIHIHKIHIIGHIIGIJJJIHHHKJIJJFIIIHIJIHIJIIIJIGMKJHJIGIKIIHIJLIJLMMNOLPPPPLNPPOPQMOTRSWWRURRRRSQRRRQPPPOPNNMKJJFHGHJHHJKIIHIKIJIGGFHKIHJJIJKJJGJLMOONNNQONMOQPQQPMPRSTQPPPSUSTSSSTVVVWVXXWZY^_`aceejljlotwvwvwz}�~{|}~}{{zzy}|~�~}}|}�|uumt{�������������������������}{yuuxxzy{~������������������������~ysjefhcca[JQ[U]YQK|����t4.,%"$#$+479:;CKn��t_fnnk`>z�����i8���abv�����[g�����פU������lP����βJh����JPVPVbWY]e������UNIHHKMNWRHEGKPU������Hz�~{jWYeXB/.O^RNYUVXZhtwwhCJOPQUTT@e��������������������S6]VJ=6123@b�����e864333552)Am{�������MVqMKLNR`����Oc���oHPKL'	
+3=;>Z{���f���z;:8709e�����$)*+<u������9$")C&)6>L^YK<>{�g?9#
+					
+
+
+	
+		
+
+
+(t�N-'$#>IRVd�^W[M0$9BHFEIIGJIIJIHG@3?//64DVRfqqtt~���~R1.1,(/Aj~|��tz~U"	
++..1551332+((+2S[5:}�����c,5/fj*M������D,Il������������������������{Ys}�wnu|��������~j7,8p��=*+358U��]U[g��i[UV[dlsrnqt|zyc.$),:OUKUYRY/%!;XZn�`;)+165Kw���q=2/7CJUnneo�{NA;a������������F64AF714EI52;>4*5p��b`h���f\\XX��`ND:7;<IQQ[ejP`_654:QSI-Ljj[D4@c�������������nir|���������������������������������������������������������������������������������������������������������������������RRPQNNOOOOQQQOMPNNPQQOQPNOLNKLKKILJJNORORTTSUVUUUWVUSRWVTTTUSTUUTTUVUTWTUUQRUWUQQRQPOLMKPNHEFBAB?<<:8865334587:<<?>@??@CCDFIGFIHKIHFHGFHGHJIGHJIFFIJIIJJJIIJJLHHHJJJJLJHJJFJKIHIIJJIJKLKJKJJJKKJKKMMLLKLNMKKPOPQOMSSQPPSQSTSRQPQRSSSRQPNLMMOMLKIIHHGIKHGEIIGIIHIKIHHGHHHEFGHGHHJIIMNMOOQQRSPRRSQRRQRSTQQSTSUSSVUURRVVTXWZZZZY[]`afcfiiilmnstswywvvwwxzzzz|||zzzyxxxywvv���������������������������~}yuuvtvutww{{}����������������������uihpqrnommlkg]ZELz���q;=0/,,*,1647:;^����jZcic]NAu�����U=���cgx�����]k�����סT������mV����̱Oj�����KTVNOTSX]f������RNJHGJMMZTHFGMQY�����zF|��{kOX_VD,-U`RT^YXW\ktwpdJMSTSUVSBg��������������������Q9]WL>5128dx��èʵ^CB7335421'Cn���h����F'FOJJMPUj����an���gJMNI#		5<9:Sx��p����p>:85.Bc�����())/@���9j��-##(7"0>L_ZMCMXF@D?%
+
+
+
+
+	
+	
+		
+						
++h�I)($$AFNYegJT\L.%:CHJIJIIIIJIIHGC:@06MV>769DWjmlszvox>-/1)(3=hz��}uv}T"	*0159;@DG>4,)-4Ub6C������f+6/~n.9^��ľ~,/Q`������������������������wXkv~zry���������|c6/9~��r<),4/:T��TS]h�]YVWYakqihjt{zt[+%(+<`mfdUZ9%$?l[s�wZ6'+3899Ty��r>316ERPPSLv�pRC?e������������;54<A:25?<34?B7%:~��b]i��ze]\\Y��cPE<887PXT_hgYqZ-63=X[^awYHIA5Af������������~jkq|���������������������������������������������������������������������������������������������������������������������WVVUUSROTRRRSONPOPQOLOLNNMMMMLLMKMNNMOQPPQRQTTTSSTTURRSUVTRSUTSRSRSUUTWUWVSPQTUVQTURURQTSUQQQMMMJHFFEDA?>>==;;;==@?=?@BAADCCBDFDFFFGGGFGFHIHFGGGFHIGIGGLGGIIGIKKNKMJJKKKJLJIIJIKIKIHJKKMLIIJKJKJKJKMJKJJKLKJKLNMMMNNMMOQPPTTQRPOQQRTSTTRSPRROOPPNNONPLHJIHLJIJHFDHIIIHFGGEEFGFHFFGJHHKJKJLNMRPNPRPSPPPSQSVUTSSUTTTUSSSUUVUTVVUWXWYZYZ]^^`^bedbfijjjlnpqtuwxwxvvxwvwwy|����������������������������}|xuturtstwxttux{}z|~~���������������nmtvyzyyzxyvnlcgbgsi_^^[QNLNRX\^bl��ƼÝdh_YNNGObjr��>Lw�jZ_{�����^l�����ԠY������d\����ЯMi����GSTLILQZ`i�����yRMHHIMPO[UFHJPQZ�����nA~��~nOS\VE10Y^SZ^ZXV[lvxpfIMSQWXUTFf��������������������L?`WI=5004g��ħ�ˤv�A8544762(!Cl���k}y��?"*DKJNONy���~l����]JOMK!
+:;6Du���Qtu~�c<;85+Ed�����"),1L��t���)%&)8"1@Q_]PHKP?88D@D 	
+
+
+
+		
+
+	
+		
+
+@klC+(#%CFDPXRQXVI,$9HKQJIJFKMHSILIJ0CA[zr^=54=arnss`NGF1,01,3<@ht~~ypz�R#
+1204>GS]XJ<2.16XZ8Bfz��±Z04.1�a-C��ò�{B0Ca������������������������qLgw|ytw���������va9/@||s=(,4.=Z��[X]`|tbYVVYckkfejv{yqY+&'+8c�hVG7#$%!PnXs|sR6' .6;12@}��o81/8KPLH@Fs|vWBDl������������6439><98:836EK7&@���]]l��yc^`aY��XKIC:;C_][ghcZtL,66?Ninvk?EQA4Bl������������ykkr}��������������������������������������������������������������������������������������������������¿�����������������`bba`]]\]ZVWXVROSRRLPQOOPPMQNNOMLNMLJMMMMMNPPPQPQQQPNMPPRSNNPQPONNORSQTUWUSQPQSURWSRUUURUVSSSQTRPPSQNNMMMKJGEDGEEDHCDECCACCABEDEDFFFDFGEEEFFHGFFIHGHIGFIHDHHJGLJIHKIJKLLLJJJMLKKLKLKJIKLJIIIKHKGILKLJLLJJLKIKKJLNKKLLMKMMMMNMOOMOQPQRSQQQQPRQQSSPRTRQSQPSQQNMMLJIHGHIGGHJIIHHGGGIIGHIHIHJGIHKHKKJKMMONNOQQOQQQRRQPTSRRTSTTRSTSRQSRSSSSTVVYW[\[YX\\\abafklqtttwwvwyy{}������������������������������{ywvsrqqvwxvtvwyzzz|}�����������������|vx}|~��~{wqquvuuqpnpusqnkoty||�����dabOROIMY_ac^[[G\bi^Ziy�����[m�����؝V������\`����ΩIn����}KVTLIMT^_i�����uVOIJLNMQ^RFEGOS\��~��mI}��}iQU]YC00V^SY`[XV_ny|xjBLSSXXTQGf��������������������D?YUE;63/09w�������a/3534575($Hn~��y����=/IKMNQNt�uqn[����]HPOJ
+><:Cny�}mz���f9<82.An�x�ґ$**.Q��zr��p$"$)6	!-DW`[NGCC[[LZYP
+
+
+	
+	
+-jur?-(#$CGBHLMNLPF,&;IRyW\zcWSm����]/H_k|{lX=7Cr����Z;95-*-/1DBBcdm{ur|~J 	!7/-06EQWTJ:4336L<5:W�����K55.2�b*P�����9,9d������������������������jMitxxrt���������{f4,C_Z�y0'-0,7S��YXYc}zhXWUZ_jmgakw{ztS)&),7aU2Y^@*%%! NaSozwN:&!07945G���h80-:MKHJABk�UC=l������������:306?CC@;99>LH5*A���]_l�d_aX_��JGHD<?VlwkfdeYaS/45?Ck|nFDWg94@n���~��������xkhr{���������������������������������������������������������������������������������������������������������������������vtuupopmkigdgda_^YXSTVTWVTSSROPOPQOQPNNNPNMMLMNNONMMMMMMNMJLPMLOOMKKMONQPQNQMPSRQRQUTSXWSWVUUSUTTTUTSTSUSUURROPNNPRKMLKKIIGHGHFHGDDEEDCDBADFEGDEFFHHGFFIIEGFKIJJHGIHHIIKOJLLLMKKJIKKJJQNMLLKLHJHIKKKKJKKKIJKHJJKKIJKJKLKKKJJLMLJJKMNJLMONLKNQMPPPRSQSQQQPTRPRSPPPOMMLKLKJJIHGIKLLJIGIHJHIHIIIHIIIJIJIJJMMKLLONLONPPMNOPOPQQOQORSTSSRSUPOQVVTUVVYZZZXWX\]]dhjoopqsxz|��������������������������������~}{xwvvuwvwxuxzxz|}��������������������yxz�����~~xty}||}zwx||~|zxxxusja_X\[[\X^[YVXZ\aZZ^WV_jtzb^ku{{}tbp�����ѐV������Xc����ЦLq����zIXXMKMU]]l�����rTMJOXTMO\RHEGOP]�����iP���ziPR^[C.1_`S\bVWUbp��|jCKRQSSUUHo��������������������@DZSG<63/-)Cz��Ȧ�d.65354660&$Fm���O[���2:SOKMQT~����u����`MNNF
+>=8>U���yw���o<=:3/?k���ԍ *)/W~pan��j&$!'4.EV_[QHMt�cRajI
+
+	
+
+
+7��yE*($)CGCINONMNF+%>Iw�r��Xd�����Y:Zdhq~zk[DEms|�vX<85-+*./A87EJUieiw}9  ++,,089;A=40/2/?I6H������X/5,/�_.<�����{*-8g������������������������dKWfptsr���������za0,8`��c/**+-;V��Z[^s��kWWVW_lqh[kxywtU(%(,?]8R�_C*)&!OYZy�xV9) &28<43K���e51/:HKJMEGk�jSCEr������������;2-.;MONKIEEG>1+@���\_m��z\]`L^��KDEFDN^t�icbe`wX/34F]moifdkJ02An���~��������zmjq{��������������������������������������������������������������������������������������������������������������������������~}~�zz|{wsutnnighb^^][[WUTOSTRVQSLSOMOQOLPPONNMJMMONNLRMNNOLKJLJKMLKKLIGFFHKGLMMNPSSTTSSQURUUTUUSTTTSTTSUTRRTQSSRRSSQQROOOMKKIGIJHHIFFGDDDEEHHFFFEGEGHHFGHHHGHGIIJMLJJIKJKIIJJIJJJKKKKLLMLMNMJKJKKKJKJIGIJJKJJIGGHHHHJJIIHJJIJIIIJJHHGJIGLLKLMNLLMOQPOPRPPQQSRSQPQNNNMNOONLLLJKJIHHHGKJKHHIGIIGIHHHIJIJJKJJLMJKJLLLOLONOQQRSQPNNMQQPPNRRSVVVVWVVWWVX[[[\`bdgklruy~}|���}��������������������|zxvxvwvxxxxy|���������������������~y{|}}{vy|���|ww{���~~~{ywxxywutqolomggfda]\ju�uchv�vkoce[oz�����gX}�����De����ΤNs����uHWUJGKS[`k�����nSMMckTKM^PHGGORe�����bO~��ycOZfY>.5[^OX_Y[Ybq��|qGMQNNOROFl��������������������< C\XF<61...)P��ԡR/565454661$(Gx��ys���m-"CSKLOPS~����p����cHMLF	<;;D����W@r��j:<93-De����|!)+.Y�xw���q&#$),-AW_YMKRzoeXcdD"		
+
+
+
+
+F��xF,(#0CFIPSSPONE)$<G����|Xg�����Q5Znsux��yommiinhNBA9.)++*36586?R@Hp_3 +)+,/1.176./.02XS4>|�����F48'-�[*b�����x9-9k������������������������W(?Sbigq���~�����y\618���g4*,/48Y��VYYi��dUVWY^jpg\lz|wv`%%(-@PN�t\@*&&$O`fz�uW:)"%2<94:T���c415AJNOODHlkYGBGx�����������y72,/ATVUTRKB<8-+B���]`m��w^[T?W��PDGJDIcvofdcac{P.69Fv�qphYPB96Gp���}��������ymkqz�����������������������������������������������������������������������������������������������������������������������������������������~}z}ysqqlmihjcc_\^[Z[WXWWTTSRSQPMNOPPNPNOQMLQPONOOOMNLMJJIKKHGGHHJJFKJMORQOSQQRSUTRTVSUVURQTVTSUSTTTTUVUTQQSRRRRQNONNLMLJKKKJKFIIFFEDGFGFEDDEFEIGHIHIKIIJIHIJIHHIJHIKKIIJMNKKKJJKOLJKJIJJLKIJLHIJHIIGGHHHGHIJHIFEHGGGHHEDEEEFFGGIHHIIJKJLJIJLLMNNNQQPOPPNMORQPQQOONLGIIIHKJHIGIIHJGGEIHIJIJJFDJIGGGIJKJJKLPLNMJIKLIIIKNMNPPPQPSQRSQRQQRQSVWWXWXZ\_beiklsvssvwwzz{|}~}}����������~ywwvtuvuvvwy}������������������������~{wz|~|zvvy|����}yxz{�����������������������|{{wutqqsvtqrt}dfg`ceu|~{tja\dkwsrwn@n����ʊKr����uHYVJJMU]ak�����mSNMdpRMOYRHGIOLh��x��[N~��ycL]i[<+5X[P]\YYSbp|�|lGPRKJKVOHr��������������������6F[TH:4/,-./7q��T2866547454.#+T�{�dWel[%0HLKOOPz���~i����XOPQ=8;<I����qs���c5==72Lu����Z#)*-e������m%%%)/,BTa^SHUspkZskH"	
+			
+
+
+F��}?+'"1?HNW[ZZQJG)'>B�����nWm�����J#Cgzx{�����scidhf_SLH6,.==457BSCJkJ.,+).31,.:?==:1<\84?������914(3�Q(r������?,<m������������������������H.=PTYgy�~|�����w_93@���d1),58Av��VUYr��fUUUV[onidm{}y{c#&(,;Rm�J]7(%$!!Onqz�{_<)$*6:<3Bj���]45:ILPMMHIOGEACAy�����������u80-/7AGGHJC831*+E���\^s��r^\[@c��GBK[YfmvywjbagxG155L^MDN]XHG=6Gt���~�������vhis|��������������������������������������������������������������������������������������������������������������������������������������������������~�~~wxwttqokffgc\a\^[[ZXSUUTTTSQUSTSRQQPPPOPOQPNNLNNMLMLMMKMJLKKHKKJKNMOOORQPRROSRSTSUSRUSUUVWUTVUUTTTUURSTSPRRRRTPOSRONOMKLLJGHEFFEEFGFFGEEIHGGHGGIIJHEHHJIHGJIJJJKIIKMKIKJJGJJJKMLKIIKIIJJKHGHHHHGGHFHHHHFHHGGHHGHFEFHGDEF@CEECCCFFGFFHIJLJIKLMKMQMOSQPONOLRNLMLGKJKIFGCFGFHHIJKIIJJIGJHGFHHHGHJIHHHILJIHFHGHHHHIKJJJKLLLNOMMOPQROPPSPSVVWXZ^[_`ceijlorsttvwy{{|}}{{}|{urrrprtuttuy|�������������������������|wyxyuvy~������|yy��������������������������������{{xqkfijghhmqjcgcfjmxrqlaZSj|����SKkz���jN^ZRJPU\_o�����mRPOhoTNT\RGEJPJi�����_X���ybP\iW=,7X[O]YVYT`u��fIQQNKIZMKw��������������������3!GaXE83/..0=e}�r^P>55543344-$+Lx|��Zw��o,-LLJMPKu���mt�~��RRQO>=:=J����_����W9<=32Iv��~o@'++/c}��z��`$%$,=	,JVbaTKX��k_ubF
+		
+	
+
+
+D���=((".BIPbjrzWNI,(?Ao����i^g�����E!:cUf|������q}����zxwmD0>W<797FU?A^C.
+	 /,,/12-2C[gkK,.237S������N26%3�E(e������4-=r������������������������F".9C<HWo~�|�����v]B>T��sY3(,86A{��SX\z�~dWVVT\npibqwyyu\ '''.KuU<>'&%$ "e~q{��Z9)#+8<=5b{���lD=DPNOQQLIED@?@E������������u82./6@C>7=A4.-)%M��zZ\s��qc[Z=o��M@Wps{u���mabcgA143OZQ@7\POU=7Nx�������������sgit|�����������������������������������������������������������������������������������������������������������������������������������������������������������������}{vuqrmifbed_ZZ^Z[ZYWUTRUUTTQNOPRRRSRQOPPRONOONOPPOONPQNLLMKOPOOKPPPQQRTTQRTTUWXUTWVUVXUTTTRSPRSTTTTVUSTSSQRTTTRONLKKJMLJJHFGGIHBEGHGHFFGGHIHHIHHGHIIJIHIIIHHHJIIIJKKOLIIGHJHJKIIIHGIJIGGGIHJIIIGFGHHHHJIEGGIGEEDDBCDDEEFEDFFDDFFGFIIHHJKJIKKOOMQRKNNONLLKIHIIHFICIJJKKIJIFFFGIJFGGGGGGHGHHIHGGGHIFFHHFFHGFILKJIJKLLMMOOOOQPSSSSOOSSY\^acdegjjlnpsvuwvvvurmlnnkoqssrux}��������������������������~z{|z~��������{yz��������������������������������������zxvvtqjilmjfhjjibcbT\ahnjaTUBReqtsrRSgddcZZ_`r�����fSPNqiPMUaPGHINNo�����TZ���v\NWcN>'9ZYRXXWXWf���|iDQSLIHTMOv��������������������3#HdUE:41,,/H����oA54346565-".J����q����(3OMLNPU����xy���TQNM=
+;;;F��ngb~���\=>;46Ks����N)*+,\���ru�c&#'3N&)@QbaTJT��^eeYE
+		
+
+	
+
+	N��q:)' 2GK^r{~sONG+*?F�����qQm�����I,@MK\r������ozw������|I1IN8C@7HTVewC/	",+./22.1U|wwG+&)39_������U.3!7�F*W�����\'0Et������������������������C%3>C;:Eo������~t]I;Q�naY+*1;6B��OXWaqoaWVWX_kpmmuzyyrR ())+11,*(&&&"$gjj{��P6* *9@<?r����uF>IWSJJRTRPJGGIN������������n63,,8INKIMH600+'L��wbau��u_\Y3N��A?f�yux���j`a[\=77:Q\_dsq8he53Ly�������������qikr~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������~z|vtrooihaaddZVXZSSSVYVWVURTTQRROQRQQSTSQRQQOPMNMLNLOLNNMMNLLNPQRPSTTRRUUTVUTTUWWUVUSTVWVVTSUTTSUQQTTUUSRQRQRONQRNJMLIIGGGHGFHGGFCGGGIIHIIHIEHIHHIHHGHFGEIKJIJJKLJJLJHIGHJIHJIGIIHHJIGHGGHGGGGEFHHHFGDGEBDDECAA??ABA@AB@ADBAEGFGGGIIHKKLJLMONOONNOMLKOPMLIFKJIEGIFGHGGGGGGFBEFFEFDEEGHDDCEEFDFEFDEBDFFEGGHIMKIJIJJKMNLNPRQRQRQTVSX\]`]a`ehklkhfggghgklnonpsx|�����������������������������������������|wz�������������������������������������������~}xxwuutnjkgfdbdcd_\^[[^bdbh`]Rju}yxibceo�����]WTQgZPPUaUJJINSn�{���E^���p_OTZJ@*<[VOZZXWTh{}zf>RRJHIQLGw��������������������-(PaRD:42/,/=�|��{l514445554."0Nu}��]���}#7ILMOQY����lX��ÒKQPN<	
+!<=?J����}����_<?<74R����S)+*,_���vu�f"$&6H%!<WfcYJcylpkki6
+		
+	
+
+
+KmrM/)'!8IKWlz�vVPE&)BH�����zT{�����C,(9HR`swuu|ytk`mons|zm2-;26K@8F]mys>-
+(-+./450/Po{yD.&(29Q������?,/A|F,^�����Z-3Ht������������������������='5>B=<Eo��w|���|t\EC\�g�^-)3<7Ey��UY[\gi_WVVYcmrsptz{wwL '(+*('%$&%%$$(U^cx�vC8()<@GUw����s>:WqaGO[^]\XVRVS������������h;3--:TZ]]Z?385)(P��qZ_v��u\XX1j�y<EYdsyhr��a]_XP<865CTlsuJGhB04Py������~�����ndls}����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}{yurpkkhdda`^W\XXXYXWUTSSUTTTRSRSQSPOOOPNMNONMLLLJKLMNOKMMMNPRTUVWTVXVTVWUTVWVUTTUVWUSTRTUSUVTVUTRSTSUUQRQPNNNONMLLKIJHHIGFHHFFFFFIIHIGHHGEGHHIHHJJHIKIHHHJIJHHIJIIIIHHGGFIFGHGGIHGDGGGHGGIGEEFDEECA??>;<:;;<><>@@ADCBDGFFFFHGHIHJLNKJMMMNQNOPRMLOPMKJKHIKHHHHGIIDDFEDCCCDCCDDDECEDDDDCBCDDC@BCFCBD@BDEGJLLKONNMNRSPMRTOTTVVXX\^_]^_``abcdghikmsvz|~�����������������������������������~}zwz�����������������������������������������������������{|yvwtrongfgfghfebd[RYr�{qodc`j|}yurm[Z^a`\WT\gVOMKPWt�����Fa���p^JIIG<-?`XQ\XWVWenmqsYCPQJGHUIS|��������������������+#Q_QC:41/,/Ctrqwve302443664.#2Ok���z���_:ONKORN����ee���rLONL;		%>;;K����}����`9>=83X�����K&)*4m��pU��I&%'4C+<VdeZIcq~llM)	
+
+%-*,.,'''$=KQbx�fSNE$-CG�����rMx�����C!&9?WT_lwsv{yo[KIKMTR@*+(+4C>8Fjw�r>-
+'.-58:AFC]��u=,$(13P�����t-0.!>{B)|�����r50Dw������������������������?)5@@>>Cg�cm|���zrXHK[k��N0)398Dc��WWQb|eXWXZbmrngnx~ytD'))))('$%%%%#+S\e��jF9(*8F>Y{����d3;h�jSYcmongaadg������������]=7/1@W_c`TD==7+*L��q^`w��tc[Z0��t?Rat�qa��}OYZVX@789BHvxbMT=755Rz�����������pgkt~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~|xronjfida___]UVXYTRSRPSTRRTQRRQQNOMJLNLKLLMLKIJHKIQQUTSTUTSSSSUWWVVUUVWVWYVVVVTUVTVWUTUTSUUSVVWTSSSUURRRQPOOPMJIKIJIFHHIGGEGGFEGFDEGGHGEGIGHHEGGGFGIIHHFFGIEDDFGIIIGKIIDGGHJHHJGFFFEECBB@>>;:8899:9<=?@BCCCEDABEDDEEEEFGFDFHGJHHIHKLMQPPPRRMLLLKNLJLLFEIGFEEEIEDDFCCBABA@>>>=;:;:;<>>@?<?>BCEFGFIJJJILOLNSQPQRSTURUVSNOVWWYZ\]_aacgnmrsuy{||~������������������������������{zwsz~����������������������������������������������������������~~}{vwyvsomlgeecjdac^_^q�oka^_[_ilhaXTcf]\WQTXt�����Fe���l\KHHE9,A_XNKKVUVdlmmmZELLIGHUHWz�������������������|'%Q[QB92.,,4i�����e5./2345780$6Wx��bmnAW@ >KLKMOW����qc~�x_MNML5(;9:H����g�}��R9>;54\���ò:)*,/h��xm�r1&&)3B&
+6Uke[O]��|WZ6,'###"%%*++0.)*-&>MT[n�{eXPE',CK�����nTt�����B!$DmhYTcotp|���udND@3.,+()3C99Ikw�n:)	%/7NUV`eS[�m<)#)36R�����o830%;}E-���ɣ��31Fx������������������������1&,79:=Fq_[ct���snRURUz�jH+(091Ck�RY^{��\TWW[boqcXpyzyr> '))**()(%&$#",VXa~}eD8()8D9Wo|���O4Iy�eZar||njqrj������������TB?/2FWTWVSH>>5)/L��kX_t��o`]X6��qYXd��^p��mUXVWg?856LhliifEHQ84V���~��������neku���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xtrwrnhghba[\ZVZUUQWTSQQSRQPSRPPPONLLMMNNPPRRQSRQRRSRSTWVWUUVVUUWWVVVUUWUVVUTUVXVVVVTUTRTRSURTWUTTTTPNPSQPPLMLKJJJHJJJGGHFEEBBEFIFGIGFEEGHGGGHGIGEDCCCBFGFGHFGGIHHJHJKIFIIGHHGDCAA@>=<<;>?@@AABCDGFCCDEDEFEBCEDBBBCBDCCEEFFHGIGHIJKJIKNPMMMLLNLLJHJGHGGFFECDBBAA?<8944532259:>=>?@CDCCDGGHGHGIJKLKLLMNPPRQRSQPSTPSSRSUVWY\]_cefmpnqstx{|{~���������������������~zxvtz��������������������������������������������������������������������~~ytpifg`dceb]d^a]Z^]fec_SQYdjie\XUXgos}yfLe��}i]HGJH:.DbWKJMVTUiyyxy_DOMHHKWMW�������������������y%&TZP@82/++2fy}��^2011234870"5GEJWNYYOM<@MKKMO]�|��lZzyynROON26>::E����^~v~�V8<;48W�����,))*.o��cEOJ-'%&7A% 		-Omi]Um��w`YB6+%&(+)&&('&5F>3382)#$"#()*)/-(,,#?ORg���r[SE*+DO�����tS�����n64Pqna]_nsmn�����ufS?30+&(27:?Um{�k9''2Ky����We��l<+"*49l�����=07'>�C-e���{�v%.H{������������������������)(0469CUVle_h�f\CaNCkRJ3'(075F}�}OXZx��^UXXYconYZqyzwm?!(')+))*,)'%$#+P\g�c>6( ,:<9Rhy���L4J}�b_n����uy~�n������������^WA.1=FHIKKA9:2(,I��f]a|�wj_YV;{�m^`y�fdw��~RQRan>979Umpro_HmL38Z����~~��������mfms������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}zzuroglhfbaYYXTYRRTUTSQQQPQTSQPRPOPRPSRQRRRRTSTTQUUUTUUTUWUUUUVUVVVTXVTWXVURTTSSSRTUWUUTVRRSSRSURSTPPOMNNPKKJKKIKJGFHIHGHGDHFFFFFHHGGGGFFDBEEGCCBFGGFHGIGHIIIHGHHIIGIIGEFECCBDEGFGFFFGGHFGFEFGGGFGHHDBA??@A@CDCEGDFEDEFGFEDBEHIHIKKILKILKJHHHFFCAA@BCA?><866333578<=<==>ABBEECDEFDFGGGHHHJKMNOPRPMQQPNQRPRPPPRRSRVVXXY\abbhjllnprpruv{{|~|~}}}|}~�~|zvuuu{�����������������������������������������������������������������������~}{wusqnomljffhhebgeb_WZZ`e[[[SX[lnlg]]Uz��|hWLKKH<0JfXMOMWUXp}���^HPMGHNUMT~�������������������p ,WZNA91/,,,9>IP[\B./0024574- 5FKMPMOOOJ;=KLMPPW����a�����LNOS/
+0:=;;Q{���dv}��O6<:4;V�����+*))9~��}oyI$#(>D)#	#Kki^Wp���xyTM:3, %/6:65:<8;K`NDCD?4&+155.$!#%&'',&%'(**/,())%ESTn���s^VI%-EH�����kV���y�[4&c�vablvqogx������sT:2,'+37BQ]oz�a8*	*9[�����Vh~yg>,%-55t��ê��@48&?�D1?x�\e�@'1L|������������������������&*1344>Rm�W_��sPB7eC0528.+*268G��sNY_~�|]UVYXdoraasyyxj=#()*)(+<A9,%%#,Yeo}�d@4(0<=<Mdy��}G9Ntxg_�����~���m������������aW;.3;?==:>?<<5*-S��`^`m}~i]\S7z�paanjm|��}VNNa[570;HMWY^^[[@35Y���}}��������kfms����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{{ywolmlefa`Z]\\VVXYWWUSSRORQPOQRQQSRQUQPSSSSQRTUQUSRSSTUURUTTUWWVTSSRRRSUUTTUTUVVVRTSVPSQRSRQRSTTQOOPRQONNNMLLKJKJHGGIGGFGIGHEEFEFFIFFEFIHGHHIHGHHGFGIGHIKKJIGHGGHIIGGIHGHIJGHFLHJIIIHFIGHFDDBDDBBCEEDFFEEDAGECEDFFFFCDEEEGHHKJJHIJHEDAB@A@??>>;;<<<::;>@>??@?@CBCABCCBBDEFHGGGGGHJKLLLILLJKLJJLMKLNLQPQQPOLNTTTVWUX\\b_dgijkkporuuxvx{yz{|xvuqptw|�����������������������������������������������������������������������������~|yyxwuttssqppponnjc`abb^cbboibb]_f���x]TUSPK<<\fca^XZVZm}���YJRNKIOVJY��������������������l"2Y\M@91.--,,+-/382/--/245670 7FMNMOPPNI< ?NMNOQ_����b|���HNPL-/2=;;L~���Q\���H6:848`����k)*)+3y������I&%(GC($
+Cond\t���y}vtJKB5$ ,:BHDDFGEHUlTGLLH<+7;;=;3+*-374770&'&+,/-'())GURlw}}n\UC%.GP�����]f~�����<!H{|jcs�qome������yV;0+-4Bcaaltv\>-	6AJZeq��QjvoV9,'158c���~��03='F�8,2?O:GJ+$/M~�����������������������~&#-69229N�uRt��oA6;Z>3>EE:2.355H��sQUb��}\TUVYfprahu{xtj=")''(*HpgS<'%$1jiv��[C2(1>=9Ff���F9Jjsqw�}�����~b������������_N8/2>HJIDHB9;4'/^��Y\dy�}g\\O4|�vbXnu������aSQ`U694=UbYVW_TFA69\���~���������}kemt���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~�tqrnie`a`[[ZUWUTTTTSQTSRQMPMOQOMQTRQRRRRQRSRPPRRUURTUTSTUUUVVVVTTSTUVTVTUUTSTTTUSSVVRPSRSUTRQSTSPPPNONLLLKIKLJIJFGHHGGHGGGGHHHHFGHHHIIHGFGHIJKHHHHGHIHHGHJJIIHIKMHHHHFGGIIHHIHGGEFFEFFEEFGFDGFFEFDDFFFCBBBCDCCDCDEDEFFGEECCCA?<<<:999:::<=ACCCEEDDCCECCCDDBCBCEDDEEEDFIKHFEEEFEFGEDGIIJHHEHKJIJLIKOQRQSSTSSUSX[]^__``dgiklnnpqonnllrx{|~~~~������������������������������������������������������������������������������~~|zwwwvwwwxutrruqoomjfi^dcedy��fX[[VRLDRhovsm`]Y]iv{vgNLXSSPQUHa��������������������e!6\]N?6/,-----02130---.03677/<IKKLMOONH8 ?TMMMPUv���Y����gLNOK*#-<:<?���ucy��e97;818a����\('*+.p��s���7%$#:4'$
+
+Aspiax���|��}_bXO,%5GKPJJHNNM[nXKLMOA5==>@>:57;@@??=;#')*.+/-'*,-FXYhlqobVUB(/DK�����wc������<5etk]dhpth_s��������_<.+5Trmfiem[>/::3?gp��PlYJK81*145Dr�yk�V-6EV�5,3G[XM7,(/N������������������������|"'3<<89CsyY_}��rD7=N66DWK=40425G��rVUe��u\XWU[foncjuzyth<$)())>��hVG)%"7kek|zZC2*2=<5<t����B9?Zt{��~�~{��nc������������QD700CRW[YQ<6:2&,W�qY[g��xc\ZPD��rVRo��t���t^[RWK.85?KLNTnoIF=7;_�������������|kamu����������������������������������������������������������������������������������������¿��������������������������������������������������������������������������������������������������������������������������������������������������������������}|snpkgdbc^\[[[TNSRSNSRORQNOQPRRRPOPPSQQTRQQNSSRRSTUVUTRUTVVVUTTTUUSTWVWVTUUTQSRUTTQRTSTSSSTRPQRPQRQRPOOMLLKKLKJGGIIGFGGHIFFHIHHGIGGIHHHIHIHGGGGIJHIJIIGHIIHIHIHIHIIIIIHGGHGIFDFGGGFGFEECEDFEECAABBABDEDCCCCBABAAA@ABA@@=<?=<<=>?@AAACEFEEFBGEEEEEFGD@BDCABEDCEDFEEADDCDBCDDCDDABABDDEEFFHJLMLNNNPSQRSRSVUUTRVXY[\\^`cedddeiorsuuxx|}~�������������������������������������������������������������������������������|{zyxwvyvx{yyxxxwwvwvpirwmgneb`\TQ^nuong]_Z`kif^XJXheeaWWLg��������������������a 7]^P@50,,+++-123541.-/02578/<JKJIILNMI6DRMOPSg����Y�mOjCLMNH',><:Q���z_TcbP9;963?Y���|])'(*4c����r\#&$%03*%
+Avunj�������qidaa-*?QTTPQONPO^{ZQNMNC:AAAEFA>?CECA@>:%'+0:74.'**/IXXlssoeXQB*3GQ�����}v������0-MijZPMp~ki���������T3.8Rzxch{YQJ/231e���zi~{tnj>(9<7:@MTU]704;Tn2-3VvqX6)'1L������������������������v$)7AA??Jppieqo�uH986*.4<3///208G��pVWf��yVYXSYhpmgju|zwi7$(**+Ne:B`M)$  5TVa|�aH5'"6@>8Z����sB<B`������~}��pt������������RH7/5E[ba\M>::2)1Z~m\Zcy�wh^YVX��oS\r��j��~ve`PMA463<>50C�]5C:89b������������|kalv�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~zvspljge_^ZXTTURSPOROQPOPQQNPPPPPLMPRPQRRTQQQQRTUUUTSRTTTQUVTUSSSUTUSVTTVTTTTVURSSQTSRUTRSTUQSRQOORPNJLMKJJIJGJIHIHHFIHFIIHGHJIGFEGHIJGFGFIFDEHIIEFJFFHIHGJIHHHGHJHFGGGEGHFBDDFEEDDDDDCCBACDBCDCCBABBABA?BCBBA@AAB@CDCCDDFFFGGIHCEEECEDEFFEFFEFDECCADCCDBB@BCBBCBACCCDDCBBCEDBCFDEGFEHJKKKKLMNNQQPMNPQQPNNTRTWZZY\]bcejklortvwwyz{~���������������������������������������������������������������������������|{|z{xywuwxzy{{}}|~~}|yvtpmljfcghedd_bb][TUUm�usg]YUk��������������������]5WWK=61.,*)+.013311/.//348:0<GJJIHKMMH3$HOLNRQYmuxfMqULMMPQOI$.A=<Vr��\r��|J<:63;^��YcY('()/N��zV%(&%$(74%'	Aqurv��������oed`-0?O[ZPOQNROfw\SPPN@5??FMIEABEEDA?>9(*/BkkS1)+'+PZ`��|v_RC#3JP��x���������c&$@dm\MY�whx����������pE18cyle��qg�yK062b���jv����y80X@6:;Sd]O;01.Jb1,2Zmp\5**2L���������t��������������n'4?EEDFUowsff�eG>82&')*)(')--8H��jSVk��pWYXYXlrjdfuzzvm9%()*1NN<;^:&%".IQk��_F3=>F�os��q@?Jr��~�~���~��n}������������VK5-8O[WYVOD=;2&2\{tb^g��|i_\[c��fUVu�~[x�{h^OLA651<BIT}z>DVF9>e�������������|icnv����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}|zzvnhfifd]YYUWUSTQQQNOOROPQPQRRQPQQRQSRQQRONPOPLPTRRSTUSUUTTRSUTTTSUUQRRQUUQRRTTSVUUSRUSSRQORTSQOOQLNNNMLKJLJIJHHGHIIGHFFHHJIIGHIGGFHGGEEEGFEEFEGFDGGFHHHGHHFDFFGFGGEBEDEEEEGFGDCDBEEBBBAA=@ABABAAAACBACDECDCDFFEGGFHJIIHIFIIHHHGGHHHGGGDCEBBCCDACAA@BCBECAEFECDCDCGEEBAFFFEDFCEEGGHHIIJGJJJKLLMNNNQOOPQSUUVYY]`bdiiilnqqrxxz}|�����������������������������������������������������������������������������~|zzx{zzx{{{}|~�����~}|tolllnrl_YSU[X]c}lg^[XRw��������������������[9WVF<60--*(*,134422//1033680<HKIHHMNLJ3.NPKOQQNMMJOKTTPLLOULG$-<9<I����p����F>>72;d��GZS&(')8o�uH-*(%&'*7%"		Ewwtv��s����njia,3IUa_STQUUVfw^TSRO@!9@EJJIEA?CCA>?@9!*,4p��f1++&)QXg�}~�s_]E 9SO������������}3"<fq^OXthfy~���������L?DQa`k�wZ��xP=</d��tw|y{m6Hg:4:Ks�s]A01-Ge6-3VilP3,*6T��������}���������������j:CGLGCH[�yUb��ZRD8-&''('&('+)6O��`RYkvscXUXZ`lpkckx|zwg4#()).DIPXK*'&"2M]w�}`C3'"49?OdQw��qAC]�����w������x������������RH3.6GQSPPPF=<4*6^~wa\h��~d_\YH��aSP}�xl���{cXOU@074=P[g|_`d\@4Bi������������yfcnw���������������������������������������������������������������������������������������������¿����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{|yqojgea`[WXUTRXSMRQLPOPPNPSQNOQNLNOONQPPPNOLOQQQQQQQSRSSSRPPRSTSTSRSTVUTTSRTRQRQQSSSRSSPQSQRQQOOOOLMMMLKJJJHIIHEIHHGHGFHEGGHHGFEEEFEABDBCDBBEEDDFDCDEEIHIFFIGGFEFHEEDEFDDEECC?B?BA@@@??>@@??ABCBBCBCDC?CCCDDCEEGEHFGHFHHIHGHEEEEDFEEDBBCDEEDEEBDDEDAACDDGGGFDCCFEFDFEFEEEEEDFCEFGEFEGIIKLLLLKMOMQRMQNVUWTY[\``bddhklpsuvx}}}|~���������������������������������������������������������������������~|{}|{|{||}}}{{zyxwwyzxsrqnkljcchb_X\Xc���������������������U<YWH=61.-+*+,342334300.1123.!@KJIFJLMMK17SPJQUQQSOOOOPPNKLOSLK	0<8<P|���d����@<;7.<d��V]K(((&-213('''%'&')
+F|xuxy�������pmi^.7P[k`XVSUWYkz^USVRB8AFECB>:8:==@FF9)+2���\-*)%0RZkyw}{i^_D"BX^�������x����y-%$#2YndPP[Zfkjv���������]Xjecds{i���SS A26s���hgxux�m8HK:7:P}��g>13.Qf11:NZR70-*4S������������������������qNFKLFDIYym]R}�ykXD4)%(()-/-'(&6S��bVV`dmfZUVZbjqoapx|zub.#(()):cpc9('%!!9Ra}�{_?4%9:FAFj���mBFc��~��s�����wmr������������P?3/6A@<=BOG;9/)6\{k\Zg���ia[O4��`KR��vr���q[TKJ:264;LSek^PVW:3Ek����~��������zdgoz���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}}ogkic]]YYWSXRRQLMMMNNPMNPPPOPLNLJNNONOLONOONOONNPQRRUSRRTSVUSSTTTRTRRSTTSUURUUUUSSSRQSRTQOMNPONMNONIMNKJKIHIHKHHGIGGEEFEEFGECDECDBCCDCDECCCDFEFFFEFFEFDEEFFEDEFEDBEEEDBBAB@AA?A@ABACDCBCDBBDCD@BCECDDDABCDCCDEEFEFEFFGHGHHEFGHGFGGGIGGFEECGFGFEDDDDEEFFFFFGFFHEDGFEDGEEDEGFFGHIJJKLKNPNOOQQPPSUVVSSWVVZ^``bcfjmonrwsx}|~��������������������������������������������������������������������������������|}}|{}zzyy{|~~~{xrqmklghx���������������������QB[UE;40//+(*-142334321.0135-%>HIIGIJLNI/=SNJSSQRQMNPOPQOMNPQMI/<9;Ch���o�r{Z5;;70@e�|_dJ*(((**'((''&&&$%'	K~zx{��������onla08M^o^VVTUX\lx^WRTSA5@DABD?63168@EF;+.1���\0**&3RZi����raX= JS\������������o01721OgcZZ`agejw���������m�������|��Tcg,-7ay~�h^����H31557:U}�{^:/0.KV/;`dZMH>.,3T������������������������qAEIGFFJ]i`YS|�~kQ@1)''(-7=4(&'8]��^UX[n�rUUVY`jrmasw|zt`-%)*+*Iyvf4'&$ !=[i}��aG5&<@XEdv���i;D]�������z�m[Xo������������L@5/5>:<>Z]@84,&7Zqf[^n���maZKA��UFQ���v���tZPNJ=566;2>imhFM[D3Eo���}�������{een|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������vqqki`]YYYRONLPIHKMNKONNMMNMNMMMNLMMMMNNOONPPNQSSSSTQTTRSUSTTTQSTSSRSTTSSTSSRUSPQPRRQPRRQQQPPOLNNLIMLKJKJIHGHHHHGGEFFEDDEECEBDDECBBCEBABACABCDDDDDCCCFFEEFDCFHFCBCCDDCCCECDCBBCCBDEEGEDDEEDDDCBBBCDDABDCCDDEFFFGGHGHIGHKKJKKJJJJHIJIGHGFEHGGGHGEFFEFDDDEFDDFDEDCGFEEGGGGHIIJJKJLLJJLKMPPQPQQPQUSSSWWY[^^^`bdijioqruwwz}~~���������������������������������������������������������������������������~~yzy||~�����~}zzvvrx���������������������L I]J=6/-..,)*,.2233422103457.%BJIHFFHLMH-#?UOLPMPQPQRRRPOMMNNOMG 1?:<V~��ghi8=>::<71=g�fQPD%()+)*()*'''%'&),
+Lyyy{�������}rona, 6L^pbVUWU[[f|\ZVSSC3<@@BFB92259<AA8$,+2���W0*(#/TYm����q`X=%GL\�����}w�����t,8O<.Fjmcehgjhp~���������r������y���kvA)/>z���qj���n2,'147;YnukN:20'DT1]��yyyR2+5[������������������������eRRHLLKO`id]]vuk_M;--)&)6LR2(&(8]��\SYk��rVTVX`jrnjqy{xp]+#))*-V^k`:)&# &Khm{��gM5*':P^>k����j>BV~�������vuNHLv������������MD3,/16EcjI234.&9^i^[[j��zoc\Tc��LBMy~r���sTRS`>865:;w�j^ZeU?8Fs�����������ygemy���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~zrqjkf\^WXSPSRNMJIKLOKLPNNMNONNOMMNNKMOPPRRPPOMOPQRQRPSSQUTSSRSUTTSTVSQRSSSRSSSSUTQQRRQPRQSPONNONOKKLLJIIHHGGFEEHFDGGFEEGEEFEB@@@A@>AABCBCCDDDDFFECDEEEEGEDFEBCFFDDCCDCCDDEEBDDDFEFFDDDCCCDAACB?AADFCBDEFBDFEFJHIIKJKKNNJLKLNKLLKJJJHGFHGFHEGGGFDFFEEECGFFDEFFFFGFFHGEIIGFIHGLKKJKKKMQPQPQQQSTSQUTTS\Z_`a_cgikmputwvwz{|~����������������������������������������������������������������������~|~|}~}}|}~�~{|yvnl���������������������N$A@4.+)))*&')+.0233413245579/'DLJHGHILNH-"DWQNNOOPPPWZ[TQNOOMPNE	2?:;@MNQBBD8<=<>=5/?i�I#BE')))))+())'&&&%)2Mwxz|����t|�zrrme*5M^maZUVX[Zo}\ZWVTG29=?BDB=:89;;?>5',+5���O/*'$;e[mwnjxs[V9!<M`�����pj��ȶ�{-?T0,Kdb`Z\fkm����������^Wz��}}����fC-.1:w���s~��wE,**566:PZ_VI;3/"Qc5n�����I2-2Y������������������������p_MSYPT]ltl^aecYC502('+7JN/)&(7Y��^VXp��gSTWW`jvofo}}xq[)"&(+.Kf~[6($$ &Xoep�aL7,,;QX;w����qRJ[�������m\JMNz������������T>0,.8MjhF0296,&:]gb^^r���qd_\n��SFSw�zs���x\[]gB9769ZzT]^WVJ;5Gu������������w_cnz��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|vvrkeda]ZSVWVULNMNQONLOPOMLLILKHGLKJJHFGGJIHNNONNPRRTTTTQUSOOQRSSRTTTTTSQVRTSRTRRTQQQRPPQPNPPNLKMLMJHHIHGKIHFFGFEEEEBDDEBADBFBCBBCCCDDADDCDCCDEFCDEEEEFFGFFDDECECCCCBBCDEDDCCGGCFCBCCCDEFDDDBC@@ACBBCCFDFDEEFIIJIIJLMNMLJJJJIJHIJJIIHHGFGFEHHGCGFEEFGDEECCDCCDDDDEDCGFGFGIGILLLLLMNMONRSPOUQUVURWXWT[^^bbefgknqquvyzz{{}�������������������������������������������������������������������}zy|}|zyyysj���������������������H",##"$" ! !#%),-01101243588-)DJJKJILMLF,(BQONPPONPWhmj_QOOMMOM>4=99;979<=<;;:<=84/Bl�DJH((()).1.0-)('&&*/Qyyxz����ztyvrqoe,!6N]nd[[XX[_}�^YVVTB179;<?>;===:;A@8+/,8���P/+'$EwVk~���sUT7!>Kb�����nU�����s)%,&3EISYPT_iy�����������V���sZ~����S2#40A���eb[ZU6,(.77:MhWYSL:3/!][)a���|H,,2[�����w������������������b>G]aZ[fx�s]tjnS>016*'+:QJ.'%)8b��\SZr��cUUWW`nrmci{zxrW$#&'),Hsk\8)%%!'WdS\mRF6+.<OYN�����vUG]�������w\UPRQ{�����������>4.-1Fb`@84991()>Zng^^s��~jci\d��UETipzt|��j`[ef?265<ZM';WRFN91Iv���}��������r^gp|������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ytsslfd\_[RWSNNLMMLKJD@A==C@>==???AEEGGILONNPQQQSRPPNQSSSSQSSSTRRRSTUVRQSTUTTQSVRQSTSQQQQQPONPNMMMKJKIHHGGFEEFGEACFE@GCCDECDECDDBCDCCCFDDDDFFEEEGGHGEFEEFFDCCCDDCCDBEDBFDDDDEFEDFEDBDBAAA?@@@DBB@CCDDDEFFFDHJIIIHKKIIJNMLJLMLKJKKJIGJIGHGGGIHFEDEECCECCCDB@DCDEDEFEFGGGFHJIJKLLLMMONNOQQSTRTTSUSVVVXZ_]acfiiknrtvxyy{}}~}����������������������������������������������������������������~}}{z|tm���������������������P?HE@81*'&! !!!$%')*+/0388,/HNNMMLNNNH-)DUNNOOPNP_u~udVOONQON@	
+;>88;==;:;;<;===73.Ey�=(cR+(&+2?:=<51,'$%,3	S�yx{����|yvutqqd*":N`me\\\Y\\��^ZYWVB1666788;>?>99;=71..;���Q.)(&>bUf����lYS7!?J`�����\`�����W) .?@=N^][_h������������n��KJn��tw�P*#9.Fy���XeqphF0(+44@wytzmR730!J[0u�����N,-5]������������������������F%Fbb_\dy��iWrs�N<-0-*(+:PG,(%'9o��ZU[g��gXXVWamvlcjyzwlO$'')-H^dc<*&&"(N[OXpWKD8%.?QWXl^~��hFAb�������lRTQUS}�����������x53./8KXMDEC;2/*+>]�p`^q��ogehM\��TEWi|�l|�wib[_V;363BTXRP]\UU66Ot���~��������scgq{���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}{xrkjje[YSQNGDA=95:6766<;<=?@DFIJJLNLNOMNLMNMMPQSQPQPQPRRSUQQTUTRRRSSTPQRRSRSTQQRQRRPQPONNQNNLKMKJIJIIHHHHIGEEFFDDFDEEDCFDCDGEEEEEDBDDECBDCDFCFDDFDCCCDCEDDBABAAACCEFEECCDCCCB@@??>@?>AECBCCEFCCBDDFFFFGEFEFHIJJJKKLKMOMJJJJJIKIIIGGHGFGDEFCCFECBCDCBCCDCCCDBBCDEDFCFEHIJHHJLLOOONMMMOPKNOPRORSUVWVV[]bddgjmqquww{}|}}���������������������������������������������������������}{|vu���������������������`k{wplf`VLE@:1.)!! "%(-12*4JNMMOOQPQG++EUQOOMQRUeqxqbVNQQQPN<	9=8;<==<=<?=<9;<860I��E^�K)*(-9>AJJ>6-&#%.7
+Q|zz{����{wvutqob'"<Obnd]\\\\\~}U\YWUB.4234469;>?:87935/.@���M/)(&A]Whz���p`T5$?Js�����\^�����j++C=77F[_ZX|������������nd_]`{~}���S%
+)?,E��k[x��o8.$)45K����vP61.#T_0s����m=,+8a������������������������A'G`f_Xas�k]i�|K5--,*)-=H=,'$&>r��WY]x��eWWVXalti[mz|tjL$')*.Tkte4&&& (NWLU^RIE5(-@LOLH:k�wB=De�������gYWWWS������������k43.-5IWVWXI91/**Cs�g__l�~efc[Ds��VG_x�|mv}sjaXRO8451@dkjgd[_A44Mx�����������regpz�����������������������������������������������������������������������������������������¿������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������||ytlheaWRIDB<@8>>CACECFGIJIJKIKMMNMLMMNMLNNNOSQRSQRPPRQQPOOPPPPPRRSRRSSRSSRSSSQORRQRRQPONNMLJKJKJJHHHHFEFEDCCCCBEEFECDDDCCEDCEEBCEEABABDCBABEEDBCCDCDCCCDBFCCDDECDDDBA@?AAA@DDB@CDDCBABACDCBCCCCABCEEDFFGIJIKJHJJJLJNJKKMKJHGGHIJGHHFEEDDDCCDACCBBCCCDCABDEDEEGEFGFEIHHIIIGGHHHJKJKMLMNOPQQQSTTWZX]^bdgkmmnrtwwy|}}}~���������������������������������������������������xz���������������������h��������{vqf[UMF?60*$#$#(,/)ANOPOPQQRPD(/MUNOQQRST\bgg\SLOOQPJ9	4?::;<=;CCC@::9::6/N�c��B(*(+8@OUJ=9,&#%07V~{x����{wwvsro_)"@Qetd^\Z^^`wxX][XW?,1033777:<=9558171.C���I/*'&@dUp���{u_R2%@Hw�����`Rs����d%'?>367<[a[`�������������TY�{}zs����Q%/J3Ikotti�aC/,&,27Lv���nB30.'e?0t���[B3+-7e������������������������<,I]g\Sa{�{n^p�]A1/26)(,9=3)("%Cv��RX^x�z`WWVXantj^nzzuoJ $'')1XihS-&&$ .OSMPVKHF7(0?JPFECh�_@@Gg�������g_\\XW������������g63./:PQORNA863)+En}i`al��od`[@��ZP^�}ov���lYSIL9444<m�v`TB=115Ox������������qaiqz�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zrnka`[Z[TPMIKIGHIKJIHKMKJLKLLMLLMNLLLNMNLNMMKLMLOMLOOOPQQRRRPQONQQQQPOOOPPOONMLLLKIIJHEFGGEEGDBDCBCEBCDCEDCDDCDDDDBCBCCAAAABAAABABECAADECFEDECABBBCACCB@CBACBAAABCBACFCDCBCA@CBBABCCCCBCC@ADCDDFDDEDEFFIGGHIJIIJIIKKKKIIHHFEEGGFFFBAABCCCDCCACCC@DBDCDDCBCDEDAA?>?ABDDECEFFFFHFHKJKIMPNOSRPTTUY\_afdhloprsuwy|{{~����������������������������������������������vy���������������������b����������������xsoh[ULF@9@E?1Ag_WSNRQQRPG(<VZUSSRSTRQUSQSPOPNSPI6	3>99:>?JSSKA:;::;62G�����?()*,9H\ZP?8(%$%,,Xz{z����zvvvtrqa)"ASgrc[\\]_`{za\[XV>*./01578:<>9777-6..B��y:-*)&A`Mt����}\O2(@Jz�����Yd�����[*)278=;9Tegepv|����������Ca��tpyy{yH(=L.@muz�feSDZF+%*16J}���L=404*306n���D02--7h������������������������;4IZgXK^lruhQ_WL233C:)))4=/30#)?m��OR\u��]VVVW`pskgqyyvmG%((+9`f_E*''$!2RTMNQLMG5(2BV_>EE^iQ=AKf�������rnkb\Z������������h72-08AAED?<895./Ir�g^`t��rbaX@~��YUW``s���bMKBM:5779YcCGYL1//0R{������������q_iq{�������������������������������������������������������������������������������������������������������������������������~zxvttzz����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yvqnkid_\VSQONOPQMJJKOMHLMKKNNLLLNLJJKMMNMMMMLMMNNMMOJHIMKKJKJIJKKJKIJHIIHEDC><<<;;:;:899::;;;>>BDEEEEEEFFDDBBCCCCDDBBDBBCDCBCCCCABBCCBAACBABA@CBACBD@CCC?CB@CAA??@?BA?AACCBCDDEDACCCCBCCBACCDDDBBDECDDGIHJJIIHIHMIJIIJKIFEEDFCDDDEFEEDCDDBCDBBBBA@??<<8766798;:=@><=>@BEEEGIIKLJKOMLNNPSTOOVXY\_bdghloqpuxvxxy{�~�������������������������������������y~���������������������b�����������������������}vursvu��yqkd^]\WVH3Urth\YYZUTUTRUTRPORSQN5	8<8:==I`mhTC=<=<;62N����|;()*.<P]]SF9*%%'+$\~{|����yxwvutqg*&AUhre\^]__^~y`\[ZU>./0./3569?@8686*2,-5f�xH.+,)CVS}����s[T3'@Fy�����RZ�����a&$6HURCE`yysv{z}��������@]oglsp}~��E-BG.@���]]f��D*"'38S���pJ@12>',5:s���7-3-*8j�����~������������������,2DWhVGXcpxnB;84244E<104CCCL:*1C���PU^v�\TWVYbssprxzzumF()++6\`[R2''$ 3RXMRWPNI7$6Ch_?D@IYI>CRe����������ytf������������a40,0;HKH:49893//L��l`_x��ue^XC}��[UPet{���ZNGBI7887BmK,mvS<301V�����������mahr|��������������������������������������������������������������������������������������������������������������������������|ttpkiighm�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{ssnjca^YUWWPMKNPNPKPJNNLMNLNLMKKJHLKKIJJIKHEHIDEDGEEGGFGHEGDCEDB@?<7886565532222454468<?ACFHGGBEFDEFDDEDBBBCCEEAAACCBBBBA@DB?>@@A@AAB@A@@BBCB@@BBAAA@?>>?@@@?>AA@BACCEBCDDCCDDECDBCCBBBDEBB@DEDEDEEDEEGHHGHIKIHHKIJGGHFGGFFEFDCBDDBCBCA@??<97201-02569:::;:==??ABCDEGHHKHJKLLKNMMONMOORVVVX[\^bgefllntsuxvwzzz}����������������������������~v~���������������������^�������������������������������������~|wsj^���znafdY[YWVWXUTTUVTR7		BA:<>>TdjgVF?;==:5/L��m{p7''*-CS`d]S>*''&*!`~|{{}yyxuuvvvtpe*#DVjrgaa__`bz~^\]ZQ? /-///1257>>8654(/-,G���E/--'GUYwz���t]R/%>Hy�����We�����Z'+LdmcRc|����������������<a{t�um����F(?91Ppk~�Z_��k/)'38V���oNB317%185m��r6/2,+:l������������������������/.G`lYQXnwze:332341CIIELKLpT;.1I��|PW_��tbQVWXerttx|{zvnC'))'=jjgR.'%!4[]IR\QLI7!8GhXCE?EWG>GTh����������~pd������������W5//2FTVI97=:94.2N~�e]b���uf]O1y��]Vb�{|����_UF?K1888IdFN�lR;139X������������icis}���������������������������������������������������������������������������������������������������������������������������|vpje`^^cz��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|xrrrhab^ZVXMTTOOROMNNLNOONNONLKKLKHIJIHGGGGGFFGEFFECCCCC@@??>>;=;9:87767655569>BCGFIIJIIHIJGGFEEFDEEDCCBCCDCCBBCDDDBABAA@BCB?A?A@A@A@@BDAABB?=?AABB@?>@@AB@ABAACCBBCDCEECBCDEDCECCCCCBBBCAABBDEDCDFHGHHIHIHLLLKLHLHKIHGHIHGGFECCC@==;98588889;==<<<<>=?=>??@BABCFEGKFIKKMLMPPNLOQQPRTTVXVZ\Z^^egfklqorsx{zz|�����������������}zk��������������������Z�����������������������������������������������~}�smjigeeda]___[YG
+#FA:;=AS_b^P@><<<931Q��ami4((),EVhihR8*('''	c}zz{zvvuuxuuvtr`''@Zorf`^_`a`~{_]\]X5!,/1/13678:::843'1-,M���B,-0%H[^z����u`S0(BJx�»��]c�����U,9f}qmqxrx�������������=k���kk����?*#Hm>4./FYy��Gt��kB+")68Z���qHB511&;46t��q=14.,9n������������������������69Zmpd\[[Zo[7267710P^`\NQd_<<+,I��nRW]x�cVYY[ipvv{}{ywl@$((,=ICV;(&%" 8a^JXcLKH:(8Je_EEBSYE@FQs����������~se������������W;3.1BPJ?;C?8<6).Ly�e_c���ja^Q3���UQelu�����^YLAM9;61DUW}xT]523;[�������������jbiq���������������������������������������������������������������������������������������������������������������������������~xogb^][^t������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zyzrkgdee]WTSVMKKNHFIMOONNLMMLMMLLIIHIHGGHIFDDCEDCDCDCBCB?A@??AB?>??>?BDDGFHHGIEKILKIHGHGGGHEDDDDCDCCABBCBABAA@@AABDBAAAA@@A??BDA@AA@@BAAACAABD@ABBCABBBBA?BBACDDDCBDBBEDDABDEDCCADC@ACCB@CDDEDECCDGHGHHGJIIIHIIKJIJIFEEEECCAAA@@?>??=@@>==><>>==@?>===>@A@ACDDFGHKLJJJJKKMMMKKQRPQQROUUVWY[`acginmortuwz{{||~~��~~~||zv[���������������������}Z�����������������������������������������������������}y{xrllllluc!5UG:@?>A?>CB=>=<<930S��Pac6*().?PX\ZA7,'$')$
+d~|}{ywuutvvwvvrb''B]npf`]_`_^�{a[YX\8 -02345:;<<96665*5.+O���>-++$I\`{����r^S*)EGk�����Xc�����K/Qy�~}vdSTlw�}~���������>cqdbYh����64X��F,-->n��_g���|>*!7A9]��gJD736'2/;v��y27:26<q�����������������������:Khorlf`UL^G545730/K]VXMPWD<1)1L��iQW`��z[XYXZhsxx}|zzvl<%'*,234C.('&#"?[YO\\OOJ;.:Lo]HFEZ]GCHPy�����������rc������������ZB6,0;?49IQ@<@4*2R��n^^r�wfa]V+���KF[h��s���d\QRQ5:80?Xg�V[U3127[��������������fciu}���������������������������������������������������������������������������������������������������������������������������}ukb]\[Z`u���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}xuqijgd`ZU[XVRPQPOLOOMMNNMLKONLMJIJHGHIHFGGEFFFECEEFHFGGGGEEIIIIJKLKJLJIJJKJJIKLJHHIGGIGFFCDDDBACCBAAB@ABB@ABBAAAACECBB@@A?A@A@CCAEEBCDBBBCCEA@@BDBCCBCABBA@ACB@ACCDCABCDBCBBA@BBCCBBBA@AABEDDEDDEDEGGFGGIHIHIIGGFFEFFBCFDCDCCBA??@>??>>=>;=<<<;<=>?@CBDDDEFFFGGGIJJOMMLOMMNNPTSRRUVYZXZ_bdfglmoqrtvwxz{{|||}|{zt_���������������������|a�����������������������������������������������������������������i2+$6iv_MQICC><@B>?=:=:3/Q�mAe�P,(+-:ELNG=6+'&(3&	f}||zxvtqtxwxxvsa&+G\lqc`_^_bg�|b__[Z9"015::?ACC>95:;:,"40*P���;-*('J^^u����eWR$*DK��z��`]q�����A2_z~z�~pYLVfjbfw{{������:ivgbip���y69���B ,./U�pTch���p7-5cN:Rz��_LC64?&/78s��g7MXTI?t�����������������������|ETbkqkgf_TP:356510/QMSfHCRF:)&,M��_RW]k~oVVSWYjsx��|{vj8&'*+*;TI8*'&$$B]\W_YKRO;*;Px[HEB\ZDCISv�����������p\������������_F2-3<:5RgS>=;3-2_��rW]l~{lc[R+��yEEf���g���`^SWN29623Qve<T61139^��������������nccs}��������������������������������������������������������������������������������������������������������������������������~xrib\[WW]s������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}}rnkgdY\YSTVOQSPQSRQOONNMNMKJLKLJHHGEFHHIJIHHIIGFGIEFHHJJJLNRONMMLLKLLLKLJJIJIHKIIIIHHGGGEDCBCBE@ABA@ACA@@@AA?AAA@?@@@B@AACBAACBBAAA@@BB@CDBBBAAC@?A@BBAAB@?@AAAA@BBAAAA@B@DC@@@@A@ABC@BBBAABBBEGHGGIHHFFFGGHFIIGFEGGDDDCAAA?@>>>=<;<;<====?@?>@?@BABBDEGDCGIGIIJIIKMKLNNLPQQTUXYYZ_^acceikmprsutuxz{yrm�­������������������{\������������������������������������������������������������������rigw��skg`XPMMTRMLG=FA7)8U5C��j3,-/579;870*)(*/!	#f~|}{yuortvvwyur]%(G]lqb`_]`ah�u]_b^U5"048<>B@><759A?C-#7/.O���5-*((H[d��{��`UL*,DNbMCkiN[_�����M7Wkvzz~�sWIYccfppl�����}9p�{~ms���y=7���; 110Mqsyyl�u[:0Ir@8S���XLC87:!=5:z��iIu{M?w�����������������������WUZendad\SH=9:74650;@VNANYC-''/R��_UVZdp_XUVX_ou|��}~{u^1'&(*8kgNA-'(%&AjbW]RKTN;.;S|[GIKfXDEHP`o����������yi������������Y<0+19?SuW:>?93.4a��mWcz��seZO6��tBLr��zn��}d_SPJ8:5306=6272133=c���~������~kbhq|������������������������������������������������������������������������������������������������������������������������}wqmha]YTU[q�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{qnljcaa^XYTUWVQURQPPPQPNLNLKKJJKIIJKGEHJIGHHGGFDGIHLJKLPMMNNOMMOMMLLLNLLMNNLLKMKIGIJKIECCFDBC>@B?A@>??=??A?>=?@??>???@==>>???@>ACBBBBCDCBBCDCBCC@@AA?????@?>@@?@??@@DA?AA@BA??@AAA@ABACCDEBFEEDDCDEFHGHHGGGGHFDECCBDABA@A?====<===<<;>@>>>??@?ABCCCDBADFDEFFHHKGIKLNPQPQTUTXXZ\`bdfhgjhknsyz��®������������������tY}�������������������������������������������������������������������������|qnfhrke_[_d`L #8|��wT=BB621300,*)(-.%%i||zywssrtvyxvsa%+G^oof`_^a`g�y_a`]Y5#49=??><;:46<=>>. 6-/S���7,*)%L]bxv��whXL,+DMKGLMENOy�����C28euzx~���]IH`dhsrr�����|<jyts]x���v1A���5"541i���|nkpwm8.?P67a���cOH@75$$D8Bv|�lS���7D|��������{���������������QNSeo^VSPMJCB@87=82;HOMBII>7-'6\��`VTVg]QTVXWapv}��~}zua."(()*BY[aG+)'$)Dn`ZcUQVM8->\z]LLXiRDFCDSd�����������n������������QB1,2=J]R32==;4/5_��f`l���qd\U@��oAQ{��v{���c`TUM794///13?=7332>d���}}�������|idjs�����������������������������������������������������������������������������������������������������������������������}{tpjgd^\WRT[r���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wtsojecba[XQVQUSTSQOPOMLLNOOLKKLLJJIHFE>DGEGFFFHIIJKLLLOMMLLMLPOOQOLMNMMMPOPNLMLJKLIKJJGFFDDC?>?=>==<><;<<>?>><<=<:;=>>??@ACBABBBABBCCCBCBBDBAACA@?>@??>>>=?>>=>?>@??>=????@BBA@@CAC@BCDBBBCBBBBBDDFFDCCCDDDCBBBB@@<>>=>><=><=<=;<<>?=>??@?@<?BA@?EBCEFCFGGKKKJLLOOPSSRTV[__^behkv�������������������������y\v��������������������������������������������������������������������������������}ytwzywY./?a���zgbofVI>=<71,*,.:6't{|{ywqnltwxyww^ *B`nne``_`be�x\]^\Z4*<=?>=?==<:=;9;</%5,.O��r3**)(R^dq��~xkXM*+CJJKKKJNHZ_u�vS;*/ewxqt���r^FN]jxtx���q>aQ>>Tz|x~k-Y���-'733c��sk���r6.,26=g���g`hY98&%KB?es�rC|�e(I~�����������������������rAJRiuWOHHGGCE=7<B938AKREHLM:+(8W��OTUVV\ZQVXTcpu|���}yt^+"((+)9Yoy>*(%#(Km\Z\[TSJ9,AbxTLQZ\TIFCBNv����������pi������������RB-*09FB4,47783/4h��k_i��{ibZME��jAWm{wm���~daZ[O5820010Sn[F721Ah���~���������{gbjr����������������������������������������������������������������������������������������������������������������������lleef^[[TTRPT]u������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~|zynmgc]X\YWRRPTTLLQPPONNNMKGGHGGEABCB@@?BDFGIIKMKJLMMMNOQQOMNMOMONOPMNPNNNRPJLMKJIHGECBA?>>>??<>@A@>>>>><;;=<=@?@@@A@@BA?BCBADCBABDCCCB@?ABA@AA;@=:;<<<>?>>;>@@>?@@?AACAACBAAA@B@?A?@?@@@AA@B@@AACAAB@@B@BDABA??BA>=>=;<=><;=<<====@==?@=>AAACDCFEEHHIKJLMIGFCTacjmu������ľ�������������������z]oy�������������������������������������������������������������������������������������~n^dnw|tqpl~�yk`abbZPHB?BeL		)r}|}{xtqsxyyzvr\$,J_psbac`acj�i\_]^X7/FC@@@?>>@@?:99;/%2,*;WrF0-**/S^^q�udvaQH)-NMKJKKJLNNCQMAD=,G_ro[bn{��t[GLpwuu|����a?E@68[zq�d2U���&$2-4_]R[jj|��o1+).3?a��z�\?@&,lE9f��dI��Q/S�����������������������f:DQruPKEHHGEF@8=A;8:HYW>FQX:'&5R��OUUYdmeVUX[ant|��|zq[*#)*+,<jwfE,'%#+Je^_e\UVK:0@_kSMPX]SIFEIa�����������on������������H<1/.01,*+/113/-;u��xak~�|lc[S_��hFZkqjdly�yac]fL774341Q�z]M<45Dg������������zccku�����������������������������������������������������������������������������������������������������������������������a]VMOLGFCDDKQ]t���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zyyukmie__\ZVUSTTSQNOLOGNLKJFEC?<?@@BAEDDEFIJHJJJLMLMMLKLLMNPONOIMNOOLNOMMONNMKJHFECCDCCCACBCB@A@?;;9;;=<?>==>=???BA@@@BBAADDBCCAAAAB@@A>?><>?><==<<<=<>??>=<>@B?>@?>>?AB@>?>>>??=>>??>:=:<?@>==A?>?ADDBBDBA@@@A@@=@><;:;;<;;999:::<<=>?AB?=>?A@?AB@:5Cr|x|�����������������������������wbpsw{����������������������������������������������������������������������������������������{nmkgfajknkpswpjggfq�A	
+)o�{|}yxvsxzzvxvs\$+Karsc_acbbgrca__^[:8DC@>><<?AACA<9<1(6.+-,2//,*,/NZ\`^ZcaROH&/OJIJJMNMOLJMKKF?09Z|eRO`x��kTGbtsmy��wvZ=588;Z|z��\6G���$$//3FHCiQCm��V-+).5;c���t��G>@ 2yA:|�uIX��B0Q������������������������`3AVuiJFCEEHKIF:8::9?B`L5IVY9)'4R��ZSYczucUVXYYit��xwpV*$()*.>Tcd?*'%#)Jfd_k\VVI94A^kSKIWaWKEFKd����������vl������������A91.//*)((**+.,0<z��o[n��~mc]Vj��gK[s{rlo��sdgciC5:3344x�c_Z<22Ck�����������w]bku~����������������������������������������������������������������������������������������������������������������������kh_ZUOKHC?@HQZo�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{|tojid`d[]VVSUUNLMMMNKKGIIHEEFEGIIGGGGGHIIKIJJKIJLKLOMMNMKMPOOMOOOPMMLKLIJHFFIHFGCCDCDFB?=<><<?:<===>=>>>@???A@@CCBBBBBB@??@@@@@AA@?A@@@?=??@??>>==>>;=>><<<==>>>=>=>>>=<>A=;=<::;999:;;<=@?>?ACA?@?ABACAA@>;==<>>;:::;;::9:;>;9789<=;;@A@AX����������������������������������tR_ehnqt{~���������������������������������������������������������������������������������������||ywrigfdggknourtl&/r~{~|{{yyy{{zzwsY/Fbuuf_`ccbac_`b`^\72CD@>>:8=AEB?=<<,*?,*+.-0/*,+,P\_a^_`WQRI$2HJKMEP]__]WULJG;.+\kcVTbpr|}l\JDmsqw}xqi[2065<]s��]/D��%023248=.Q��[I3+)-6;m���~�s;:;4uA3Tht]apf26Z������������������������\;GVoZC;?@CLQMB87=<:AQ]FHQYX6*%9V��YVW`yt\TWVW^lv~���{wtS($())+-/HL2+)$#,UxhandYVN87F`qYQMXdZJFFFd�����������nn������������B;0.//+))(,,-.-+A~��m_n���nfcZ^��`Og���n|��zkj`_<78424=�n_�h428Ho������������w]dls�����������������������������������������������������������������������������������������������������������������������yuql^XTWOLJLRZt��������������������������������}����}���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~|ztnkic`\[[VSSRRSPNNNKLMMMLLMJIKJIIIIIHHJHILJHHMLJNMNNMNOPNMPOOOOPPNLKKIJHGIHIIIGGGEEDC@?@@=@@?=?>>>>@@@@@@@A?@@???@@A??@A@>??@?@AA???@?@>??><>;<=<:<:;;::;;;;:;:;==:<=;<:9788;;99:::;::;;=>>>=??@AB???@A>=?<=<<<;9=??:;<=<=@=<;:=>]����������������������������������mFRRQT[acgiktw}�����������������������������������������������������������������������������������������}zsrpmlinWA?>6)$!1y�}|{{z{||{|{ywv[,Ifyqebbaaa_`bbba^`68FD@@<;<=><:9==<)3?,(+--0.+,'(Q]aa^_]VSTH!1GIJKKimksm^]LFH;#/Vel_akedqid]LGZlrvxsngG1169=[x���[,C{��$390.1764c�hhW0+(.5?v��|��e484<|C4e��pgS7,;Y���������}��������������S4HSdI9<<<BOWI>87>88?UOJ>?QR5%$4Z��RTYfys`VXVQaqw}��|zvjS'!&((+./461.'$#,XvbdscVWP:8F_jYRKZjZHFEBb�����������Zn������������D=1062----8=<3-+8���sZl���lhhYc��ZRn���x���xhjgV777436Szs��=.36Fr������������v`clv�����������������������������������������������������������������������������������������������������������������������}wun_WNMIHJPU\y�������������������������������}}�~~{vphjihikljjs{����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}swurjib`\XZZWVTPRPPQRPNNNMMKKJIIJKKGGIHIIJJJMNNPNMPPMNNNNNPPONNNMMLLMLNLJJJKKIKKGGFFECBAAB@?@?>?@@?<<>=>@???@A??@?@@A=??@@?@>?>ABA>?@?=>?=:;;;;9::9899989:78:;9:;788:989898:9:989::;;;:<;>=<>??==>@==;<?@HGB@AB?>=<::7582]����������������������������������pLQPPRSTSTXZ\Z`beinsuz������������������������������������������������������������������������������������������}}�|yog]UOHP�~|yzyvz{zyz{zvt["*Ncvtifccddbdedba\`5;DDBA=:@EB=7:?@@+=?.(,/-//,+)-R_cknkjm^UE%3JIJJRramt]a`IGD84Lcwaajq\UQTUJFN^mlkif`;.168;[~y��W6G���&L=/.245:mvo�b2-*/6Dnwyx~�Z43)Fz6K���{iV+*4N������������������������R5HRV<688:AQUA969@86?WQF=L]O4'#<p��UQVbw`WVTRerv���~{xpP$"')'*3Qc`V8(%%0Vh_gpb\]R?<IbrWQN^m\LIE>T����������e|�����������zCF68NE6/.?[ZQ7/,@�¬}cr��ykhcWx��YPm���{���xhggS8551023Xvy\1336Jw�������������scfmu�����������������������������������������������������������������������������������������������������������������������woniaVBCA@DRZ_z������������������������������~v~����|rkhdb\\^\X[[WVax�����~~zuz|ywvwuyvz|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zxztiddbc^_ZXUWSPOPONPOPOMMLLMIKHIGHIJJKJJKKLKKKKJMPONQPRRQNOMLOMLJLOLNNKJILKJHIHGGECCABA?>>=?>?@=<>?@?=>=>?<>??>=?@;>=?A?AC@>>?A@;?>=<9;;9888:97866656675576667787998897788998989:9876::;?>=>@AADDB@@@?=;:99965;h����������������������������������mPORQRRNOQPSQTTSMRXY\[adkkmsxz}������������������������������������������������������������������������������������������������}{yyxxuxvuvspU",Mewzhedddcbcdcc``V.4?@BC??NXQD=;?CE,C>/+-.,.-*+')Xbl~�|��gUD&3NKJJPbWj`Nb[EKE6+;O[XUqb7<8DIAABKdd`a`Z>14779Xm}��Y.L�ƃ$G1.0121:{���X.-'.4=YmpnmnE03(@`/`�kHk�g+*/P������������������������L7GHQG978=BOH;56?F:4>97=FYYG-&'C}��QRXi�w[VTUXdpv���~zxjN$(-*)+O~�~d0$%&0Vddikc``O>;Khx\UOblZMIC?d�����������cy�����������qVR7MaX@23UvrV5//K�¬}cz��phd^Iv��RTr���wr��nhfbP:66312=clbB2234Mw���~~��������sUfk|����������������������������������������������������������������������������������������������������������ÿ�����������e^a^[TLNOPSY^av�����������������������������xju�����|qjghgeb_XZZWSMNer�����}xvtstqnkmkjghfejkoruwz��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}zsnpkebb\VWUTQPRPQOOLKMMHMKLKJKKKIFIIKJJLLLMNNNNOOPNNOMMMNNMMMMKNMKIKLKJKJJLJECEEEDCCCBB@?AB?<<==;<;<<=<<<==>@?>>=>@<?@???>=<=<=<;<<:89866547634455445547868789988888775576:=<?@@>=;<=;:<::6<9=<;?CGHVu����������������������������������nQVUSSSSRUVTSSTUUTSRUUUSXZXY[[_hmlqv{}~����������������������������������������������������������������������������������������������}{qwvulQ*@Sm��rfhfgfdddddcb\22?DGE@HacVI@>AA<)>7-+,.-.,++&,[`n�����fZC&4JKILS^fskjiZIIG5"(0:A>?\E(04?B=<9<S^\Z]b>0467<Vp��vG-W��w#6,-000.5clx�V0.*/59VxwqeX@15/2Z=O�fV��D*+/S������������������������J6AN]J;79<CIE75:CF=52.,3=UTG0'*Ah��ZUZjwmYVUUXenu��|ywlP! /-'*2[���o:('&3^d^gnecaQ><Iv�cVQkpXKJA:d�����������e~�����������q]J4]�fA/7QenG0/-G�̮mhw��uha\CZ��QQt���bd��cjfbS;44333;WccN8226Q}��}���������uXgmy�����������������������������������������������������������������������������������������������������������������������^SLLPMMTXZ\__bs����������������������������tkhz����phb\Z[XUUVXW\ZTPG@^���������}{utrpnljg`bc```^b_`f�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~|sqopef_\ZVUQUPROQQQNLMNLLKJKMJKIKIKKIIKLKLLMNKMLNMKLLKNMLKKKKKJJIIJIIIHIHIIJIHFFECDCBB@><>>==<::;:;:;<;;<<:;>?>>;<=<<;<<:;;;::;989:7554343424476565655455665456:<>@;:898865698998?FEMNSQTOMSt����������������������������������hLNPPQSTTUWVUVWVUSTURSSTSTQRVVZVU\Z]`efflpuy�����������������������������������������������������������������������������������������������}{k``h|��vnonlighjkkkruF1FKJFENa]TICBB?:):5.+,.-/.,-'1Uds�}�~jZD&8KKJJSdvt{lfYGJE8&!!-55>N3-18HOG>68Ob^U^c>1255<X`ei]6-?��s&3..00/.4e��}023-17G{��{cP>2B*Ct>F�st�X-,+3V������������������������F69NVB98;;AG?47AGK:31+),<VRJ0&)>]��US]euvYUUVXbmw���~zwoN!,**+1[���^0('$5Wdbfsnf^L7>J��]WQcjVMJA:c�����������^������������x^=9]sV5,4IZQ;1/)F�agv��rh_YC]��NXn���v���ynjbV:520028cqsU6227V|��~|���������s`dlu�����������������������������������������������������������������������������������������������������������������������sm^XYYVY]_____n���������������������������tbfj{�����ukic\XUMGMGJFJN@<a�������~}wyytsomimjkhhdgeb^YXu������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wtrojdcc`\[XUUSQSPRQOLKMLLLNKIHILMKJILLLMLLLLNMKJKLMMKIJIIKLKJJIKKKKLIKJLMIIIIEFEGDB@?A>?===<<<;=99;;;<<<<;;:9;89::99:;9877768747677987655432234777778:<88:86532222:@CFPXYZVTOLGHIGJs����������������������������������bHLKLOOOOPPPPSUTTZZWVVUTTVUWUTVUUWYXXY[ZZ]\`_gjprvw|}�����������������������������������������������������������������������������������������������{|{wupnqrqqv��L&AQMKGIOUKHAABC@>)=40/02233.*(8Zcp{}}�yhVB$=JIIJQjjacdkXIKH:-848@.09QddVE89R|hT_\<0236<YXSbO5/C��g%0,-/1002AYZ9"21,>9Qwz��[M:24 Nf(V���a+-++2[������������������������D39?G?899<B@73=GJJ/4/)*.AIHB1%+Al��PS]m�v^SUXZ[oy���~|xmH&)(,2]qkbM0(&'3Wcckwkd_J?@O�{VXTafWNJA@r�����������h������������pL>=X\B/*4IXM80-+M���Yc���wi^X@r��MXi���p���pf^Q330.22FnokO524<Z���~��������qeeoz�����������������������������������������������������������������������������������������������������������������������{wk]]]YZ_cdb_`b{��������������������������hh`f���ĥ�|wrmkd_[__\[[[UA=g��z{xokcVNJRQTSTUYY\__dedjgYTg���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}vvxpiged][ZVWVSOJQSMMLLOLLIHJKILKIGHHIHIKLLLKGJIJKLKJIGHHIILIILMLKOMJIIJMJHHFHFGHEC@A@>><<><:;;9::89::998976699877776668776777635524688878777564432221/35?SRPUVRSNIHHEABDDHx�������������õ�������������������YHMNMLNKKLMKLLMLSRSTTUU[YVWWZVXVVXWTTWXXYXYYZ[[Y[`[affjps{|����������������������������������������������������������������������������������������������~|uuuw~zB9^\UNHIG?@>=@BA?;)D:0<?@>=6/*(:_dip|��lXTB#<MMKLTe^^aaiOJJF<1956512>R_^W?2:WoWRYN/-277;a]VbI8/I��^%0+.-00/03-(%&..@U5Ju��nQJ93/!<B%Srw�\)*)+:`������������������������2.9BG:9:7;>939BHF;$0,)(+AWQC0&)@k��SV[p�q^WWRVbow����xl@&(()*>_eeF'%%)7Wbgmtmd]P8>S{sRXYcjXMI=<[muo�������r\������������rQAA_M<0,2CXR6.+,J���]bt��rh_W?���TVd���w���~rd_L/3312/?\jnS314>_�����������laeoz�����������������������������������������������������������������������������������������������������������������������wwkWVRRX`deba^Uq���������������½����������wh`�����|ytqkfdb_Z]\^^`XD@p�������}|tmlgeaYZTNMHGE>NIRYYOj�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z|xnjffgb[\ZSVWQNRNLMNMOMKJIIHGJGIIFGGJJJHIIGFGGECBCEDHIHHNLMMLLMMNKKJJJHIIGHHGDCAA@B@>>>>;:;8665786365665565554444436555769;99:9875555412458;>AFXVRNLHGCAABDEDDFEP|�������������ĵ�������������������^GKMLKLLLKILMKLMMKNNPQPRRTTTVYYWXWX\Z\YWYXWXZYZWZXWXXV[^\Y^`ehltyx�����������������������������������������������������������������������������������������������xk|�wjdaWTNOIGHGDCA,%GBEmcejfG/,+8ebdu��w_RU?%H[IMLUdU\[^iXLIE81614445;KTUM:/@[Q@S^O4157;Oi[ZeJ51R��N'2+//03101+)&)/4j]7Ks��ZQG945'7C2Xqx�Y+*(*8`������������������������05=FC76468765?CEA3).-('/APQF1%*Ff��OT]jvlWUUVVerw���~}rmB&')+2hzkQ3(%(-;\hjsxgd_N=>V�tYWXgoYMI<9[tvy�����tvdZ������������nT?:DHP91>`kS1,**O��_d{��th_ZI���YRj��{{���|pg`G03312-GpkhM/14=]|������������h`fo|�����������������������������������������������������������������������������������������������������������������������ptohaZ\bghgdb[Lt����������������¿����������fZ���Ęyvsnjda^\W\XXY]XFAn������zxxtqqpopormjfeedcbVZ][Jd��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}}xpopkaa`_WSTQLORPNNONKKKKIJJIGHIKHFGFDA@=CCBCDDGGJMOLMMNNKMJJKHJKKIKIGFFHHECBBB@?=;9889944454323211210333477:;878976897757<?FB@CABDB?AHDD@@BB@ABB@DGKRRV~�������������õ�������������������WGIIIGIJKJKLKKKLLKMLMOOOONOQPRRRPSUWVXVVXWUVUTVUVSSSRPRTRSSUVUVXZX`bgltvw|~���������������������������������������������������������������������������������������������|wrmgfaYOROO51ct�����pC01)9g`g��{lc]?([oOKKSajjhbnSKLG5,5/10236HQTK84@B9;Vh\:06:CavkwxS80K��H,/+-.16521-))*/:iT0L��bRF95@"GT4>bp�K))))6`������������������������08DE?;543448>DEA6,*44*(/HZTF-$+Di��PR^gvcWTTTYhsy���}|sg=%))+9hidY1'%(+Blukopjg`M>Bq�pZSVhhUPM=:s����������wc������������kU<:GaU65B[kQ4-*1O��p]d���vh_[<{��OLg��yh|��wngYC.5631-C_igC124;\���~��������fcem~�����������������������������������������������������������������������������������������������������������������������jhijd__eijihb[Ry����������������������������bV���Śxwrligb^^YZXRW^YB@t����zwtsonmkigghehedhhhkgop\R\�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������vtsplecaXYWUQQSMKOOOONLNPLLJHFFHGGECCD?CGIHKJJLNMKLKMMKLFGLKKIIJIHHEGFEDCBCB??>=;:6664222246646745666668758689=@EMPY[NKHB@?<878:999<===BCGNQPLJO��������������´�������������������REGHGFGFGHHHJKKKKIJJKMMLLMLNNNORNQPPRTRTTUTMKQQTRONJNIIIJJLLKILMMRPSTUTWWZ_bdknsux{�������������������������������������������������������������������������������������������zuvoTf������vf=6@JMkcs���}tf^A+~{KKMXjihkgtOKJG2(0//--/7KRUJ3153.7Lp[506?Nsz|�yS5/]��B*/,+-2A?9<4)).6>gR;P��}^MH;2,VG1b~��B*))+5]�����������������������.8IFG=53524=BDA80,-C;)).CNO7)#+Co��XS\jui\XWS_ltz���}|sh=%)(,9[ejM,'&((HwoirpjgaP=CoztUT[gfXRL9>~����������u]������������dUE6Jh[6=Eb]E3412T��tai���rf_^@���OJp��lk���pjcW;/65121FP_jG346>\�����������~fbfn~�����������������������������������������������������������������������������������������������������������������������a^YVONQakligbZS{����������������������������c[���ęuvqkhgdc_[XYWZb[JIv����zyurpnmjihfecddba`]]`io_Pa���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zzxrmidb\YUTURSSPSOKLMKJKIHIJHGHJIIGDJKIJKKMOJKKKMKJKKJIFHHIGGEFFDEAA?@?;:8685567:886766554555799?AHOSSW[RQMGB?;9:985786:;:>>>@@A?>?@AP��������������ó�������������������UGEFGGGFDFFGGFGGHIFHHHLJJLKMLONNOOOMPPMNQQQQMSKIGIKIJEFIGFEEGEFFIJIJHGHHIMIHLLGLQV\cejntuvs{}����������������������������������������������������������������������������������������������{lgTVo_^�vsslnoj_Y=1�yJLLOZ__df^HIHH1)002/*,9FIMC2+--,1<I?/19AVmyy�{P/8x��B-3,-->WOIX@-)/<AeF8X~�WPJ;2)=95NWaZ2)*((0a�����������������������{/?KIX<42337??>62.*0O8$)-=IJ9*#0Gk��SVaykVTVU^nr}���}|td;&)*,6LYiO-'''-Jwqmstkf^R@N^�vWX[lfYSJ4@z����������pb������������cbR7VwU=GCMVH7=63V��u`e��kd\Z7���LIbnkao���if`S1,41143CXqx?024<e������������e_er~�����������������������������������������������������������������������������������������������������������������������mjf[ONSamljfb\T{�����������������¿���������fc���˝ytqmkhea_\ZYWX^[HMv����~{zuronmlihfefdcba`^]`ks\Nb������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zyztnkmib_XWYUROPMNLLNNLMLJIJJJIJJJJJJIJJLKKLJIIHHHFFFEDEFGBB?>;:78:<::99898887532664;BHTRUZXYTOOGDA@>=<<<<:>@DFBB?><<;9:;;:;:I��������������ñ�������������������MJHGGFFFDEFFGGDEDGDEFHIGGJKJIJHKMMLMMMMMOPOOOPORQPQQNMLOKJIJJEGCFFEGEDDDCCFFCBEGD@BFJGJQNV[bffhmpuwy|~�������������������������������������������������������������������������������������������m���|ukfdha`PD�wDLKLILOOQKKKJH/%6C50.+,9EBG=.+,..1445879=Wkw{�yI2<���;04,-2JaV`gB/(.7;UC8Zy�s\RK;0*87020:?-)+**2e�����������������������x.@BNX712258<:50...3H5)(.KXL9+&0Km�}OWdt}mZWVVant}���~zse8&*)+1XulL-&'&/QvsosthjeRARa�rYU^ndWSJ3J�����������ve������������lyI4bzNGL8E_MGS6:Y���`b��lc\R8���JDZ]_]t��|dbYK4/83234<VzZ4112@h�������������{dait�����������������������������������������������������������������������������������������������������������������������}zuk_W^glnkfa\V�����������������¾���������ee���ĕ}vrligca^[ZYXW`\LOv���|}|ysqqnjljgfedaacb_^_^msYT^���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}|vrokgf`_[\WSSQLROPOMNNLMLKKMKKMLJJIKIJIIFEEECBAA=><==?>=><9884466333248?EKSUYTSPKIEB@<;=><>??AFEHGJIDA@@=::=<<::<;;Q��������������ð������������������}HHFEFFFFFGFEEGEDFFDDDGGEGHHGFGEFIIIFIJJJLMLLLMONNOONPQQRQQQOPKLKLJJJHFFFDCECDABA?@AABCDEBGEKOLTT[_`fhllpsv{���������������������������������������������������������������������������������������������~{wsw^P�pPPMJOQGMLMKMJC0 $(Oc3--+-//58/)*...001:?66:H_x�|b:36���;(01*-5JYTW^C.'+23NJ=Wy|dVQL90,6?2--35-*.,,9h�����������������������q3QQLE3-/237<84/,.,2@.()/?<73)&.E�QW`vhWWWXgpsz���|zq]7!()*-Do�uF-&'&,QvpmuthhaR?Sd�pZV_hcVRJ5D~����������of������������qxHBoo?BAAdsGJK19Z��|Zg|��jb^M=��~FBPW[\r��wcbTG0/61025@W[:21/1Eg�������������zeaiu�������������������������������������������������������������������������������������������������������������������������ypf^eiklkgc\S�����������������������������ie�����}vqljgbc_]\Z[Za[GL|����|zwsrqonlkhfddeaa`]^_bnt]Qg�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}{srqomfcb^ZYYTRSRNPSRSONMMLLKJJJGCB@<===;:AQXJGC>?=;4+-2214;@KMMQTPQMHECDB?>>>>>?AEHGHPGEDCB?@AA@@@A????=<@V����������������������������������|KIGFFEGHGGGGFFFGEFGFFDEDEFEFEGFGGGGFEHIHIIILKKLLKMMNQOPPPPPPQQTRSRPOQNMLIIJIIFFECACCB@?AAADHIIMOSUV[]`_aa_innlvz��������������������������������������������������������������������������������������������x�xkjia^a_[TQMQOD6$#+}w80.,./-,,+*,/1233036069=]��iS729���6-30+.7N[UXV?,)+.3^E9HVWWRNO73/GY2--/7/06+->m�����������������������ta��I?1--139AA=4-,)-6)(),3//+&'6k��{SY_s�`TWWYlqt{���xxs^5$(***CltmR,(((.Rxnjurih`S=Mx�rWW_naVRH8R�����������ri������������n~EGu^>63=ghFK?-5\��nZj��rb`[ME��}JIMV\cn�oh_VK5/42/350ATD7202Df�������������y`ckv�������������������������������������������������������������������������������������������������������������¿���������}womjhijiihgb\Q�����������������������������gj�����wtnkige`_^][Z^d\IPy���{}ywsqoonlkhfeccaa`__^_mr[Tg���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yvuqpmkic[__^ZRVUTOMMMMMG<::7;DNU[dwvYTSNMMF4$-;=CGJMLJIIFDBB@>>>?@>BCDGFIDFEB@@A@@AAB@@BA@ABBABAC@\����������������������������������yIHHHHGFFHGFGGCDFDFFEEEECFFFFEFIFEFFGFGHHHGJJIIHHHLKJKKLNMOOOOPPPPPPPTRPPOOPQOOKMLJIFCDECDDFGHGIJLOQTTVYYY[_badghljoqruw}��������������������������������������������������������������������������������������������|zwoja[\TPG%(D��C84012.--,--377761.0347Ao��iS;0Dy��:441,/2?T]\M6)'(-6_I6?GNYXRS94/ib1,,2GD:/*.;n���������������������������=B30258?GGB6-+*+.)'((**(&%!I���sRT`}{bZYVUjnt~���{zs_2"))*-;n�rN*&'(0\yqlwmgj`RDUz�lXVckcXQC3P�����������fk������������qAHxV6-/Bf^HA1&0`��e\i{zic`ZH@��wRIJT\dgurnpe\O3153122EmYI9106Di�������������wadlv������������������������������������������������������������������������������������������������������������������������kcb`_``c]abcb\X�����������������������������kn�����xsnjigc``a_ZW\f[MKy���~|zwuropmnlihffeb_`a`_bns^RMu�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������||{urmdcc`[ZTSPJ=@ACTkmijnzwd\``_]U3)@PNQNLKE@@?=<:;<<==AGHIFFFCAAAA?@CA?A@@BAAAACABDDDBDC_����������������������������������w??AABBDFFDEDGEDEGFFEEGFDGGFFGGHGGGGHHIIGFIJIHIJHGIHIJIKGJKLKLLNNNONMNPPNONOOQSQRRRQONNMMMMKLKKKKKJLNMPRSRUX[[[[^_`^__aacfjossvz}~�����������������������������������������������������������������������������������������zwnsYGL��jJDAB>:6311139==;824965:Fs�rcJ91@x��5:64./1;enX@2)(*-NrG6?HUliYO;6+pX,+2Ij`=-).?r�������������������������Ƌ:eD79;=BIJ>1-.-.,)()()'&&%$^���nMZf~cVXXW]ksy���xzq]/ ***.AzylL.%$'1krkomkkcT@Rm�cXWeleXRG4U���}�������dn������������q{>I`I5-0@X]K0+(9a��iaduvhjaYF=��nTMMMS^egfsvygR814322:]l`U9117Hs�������������tcakx�������������������������������������������������������������������������������������������������������������¿���������][TLNMNOJLY_b][�����������������������������ih������xqjifc``_ZZY^k[JK|���}}zxvpnnlikjjefgb`a_]]ampZO5l�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{ywupojd\STbwqolpyte_bbc_Y30HZYWRKKHF:30149>BCDCDEABBA@??@@@AAACBBCBBBCCDBBCDEDEFb����������������������������������u;9=;<<=>@AAABBEDFDEEEHFEHGGFHHIGGHHHGJHFGIJIIIJIIKIKLKJIJJJEHIIKKLLLKKMLLNOMOOQPPPPOPQPOOQPPNOMLNMLMPLQOMQQTSRRUUXWVXY[ZYZ`a\bcggkmqrw|���������������������������������������������������������������������������������������������slc^\UNF@9>?ACFHG@:GF57;?Ye[QC:/G��Y6;;;-,/Dnn\F4)(*+W|E7@Je�mZJ64(!bK+.:^�f;0(-Bu�������������������������ˆc�F;<>>EKC5.)-.,*'&(()&&$#$<S��nV]b~}eUVUWcor|���|zo^1!)*+0NrmjE)%$(3j�qotolkaNBVq�]W\incUUE0RbQ[��������ct������������px9RWJ7.1=V_?,+(;i��wjhtujjbVC<��lXRKBQ^[]ay{~qZ5043028KZsZ8215Kx�������������t\dmv������������������������������������������������������������������������������������������������������������������������hhb\XRPPJPV^b][�����������������������������lm������}tmkifaab]]\^cXKK����|yuomllkjkigfccab`_^blm[L3t���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|zcUjwwtoszsaacbbcW.1N\[[[^e\F+#&-6<>@>>@A@AA@A???@A?BACCBCDECBEFFCCFDEGHGe�������������¿�������������������s<9;;::;:=>=<>==@CBBBBECDFEEDGFHIFFIHGGHIGHHJKJIHIKIILKJIHKJIMKIHHIIHKJJJIKKILKMMLLMLMNONQQPPOQONQPOOQMRPNROOOPRQPRPQSMQTTUWXY[[\[^]]c`eejlosvy}�����������������������������������������������������������������������������������������|{yqk`]YZZUVURYi_GDDDT\IE@9/Hsn"=AFA0-1IcbRC2'&*2YqB;BLn�jXG5/*Rc13>]x`=/*.Lz�������������������������ǃw�G=@?ADC82/01-((&&&'*''%#%3N��gaXe~{eXTUYgqt��{vm^.#'(*-Ffss@(('(8x�pp{ngkcPFWot`X^is`WR:5EMFJ_^iw���{Ot������������yy>CSV</18U\@.,,<p���rlpzujeWAA��n`YOFS][ceh{�tc7262020X{yY:237Tt�������������u^enu������������������������������������������������������������������������������������������������������������������������ljhc]YXXUVZ`a^\�����������������������������gf���~yxqnkige`_aba`b^VJK}������~{utronlihfdfcecbaaakf[I/x��������������������­y����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}siw|zxvv}sbabacbV14O]____bbB,'+.1146799;<>>=>>?>>AAABBA@BCCCEEEFDDFEIJIJl�������������ÿ�������������������n37776787:=<:<<8;<:===?=>ABBCCBDFFEHGHHIFEGHIIIHHIIJJKIHGFIJHHHIHHJHGJKKGHGDDFGGGHIJIHIJJNOMMMOPNPOOQQQRROOMPPONPPONLNPQRRPRSTUUUVYXXXX[Y\_^_cafigmsrw�����������������������������������������������������������������������������������������|xxrnr|~obde^d`NSO=.I7HOVO8016?D;7-&%*0YsD<CNs�hVA3/+$xk-06o{]?0*2W~���������������������������@<>;?>623246/()()(((''&%$7R��lfdiyz]UWUYgqu����{wqZ+#'(),Eo~mB+('+>v}rt{rmmcREXmxbZ^gk\WN>:GMGKKMJOTVXUR}������������zx9D_a=/3BYZ9)+,:t���jm{�wleXBA�~jeaXENYX^^Sk}tb:453/3;ZhjQ:45;Yx������������rZcmw������������������������������������������������������������������������������������������������������������������������[XXROMNMONT]_[^������������������¾���������eb���{wsomkgdb``]\][[VRKOt������}|vwuqoolljjihgcfcbbjgYH5y��������������������¦m�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{z|~}ywx}rfcccb_T33Q[^__abcF53202001354358779;;:<>=>???BCCCEEEEGDGGFHIJKm�������������ƿ�������������������k234224358888:;:;99:;;<<;:==>>@?AA@ACDFEBDFGGFHHGIHIHFHHHIHIHGHHFGGHHHIIGHFEFFFEEDEFEEFFEGIJIJIJLJKLNNOOONPNOOPPTSQQQOOOPPPQQPQRQUVUWXXXYYY[[[[\][\]``_efpuw{~����������������������������������������������������������������������������������������������syndldLPl,3dbyrR>67??64/)*/2e|A:J[��k\A545(.�a02Gqs\L4)1X���������������������������|yi?=<8785456982,,'))'))''$%9Y��wkbc}yZWZW[gqw���~zvlZ("'(+/Hw�kA,(()Bx�uz|oomdOJY{�\WZbc\XO=<LKJLORSXWVWT_������������~yw4HZ]:.3Ec]:-)*>ūj`n�mhcYJ6rzhij^JOTW^\S]su^7231118;RhD546;Zz�������������qbfmx������������������������������������������������������������������������������������������������������������������������a]XSLACEDHT^^W_�����������������������������bk����|xrlihea`_\XXVYWPIMx��~{{vvvqspommkkilljjilihgibVH:}��������������������ær�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}ywzqgeedb_T-4Q^_a``b\CCF@<<96754444666667779988:<>@AACCEFFFGGHFHJIm�������������Ž�������������������f145333333543667998:89::99:<<>=<<>>=?=>>>?@CDBCDEEFFEEEFGGHGGFFGGIHGHGHIJIGGFGECEFBBBBCDCDDEECEEFHGHIJKLKLLNMNPORSQRPNNOQRQRRRRRSTTUVXXXZYWZXYZX\ZZZYXX[Y\`Z_dhjnptuxz�������������������������������������������������������������������������������������������}t~j7F`x��{fWLGPK?>9624<w|@?Sm�}sfB39A06�[,1ZljfS,*5Y���������������������������wiY;<:65<A>7:;852-))*(**'$!&:]��jf\i�t[UVSYhqv���}yumT'"'(+3Muvg@+('*O��rzxpnldJKZ�y[YXhi[WR?;LLKMPW[\ZX\\_������������y{x.?nk6-3MoZ8+)*D��~Y`q�ifbXN4u�lnrgPOTXa]UZdeI/0402366BF6545>[~�������������q_dmz������������������������������������������������������������������������������������������������������������������������nifh^[TNKOYabZ^�����������������������������aq����wqkigc`[ZYXXXZVPHN�����}ywtpqnmjjiggghfffffdba\VC5��������������������¤y��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~yx|�uddeeccR+8Xaaa_adb]klb][XNOIBA><;79;::9;::899;<>??ABDDDCCGGHHJKr�������������Ľ�������������������f87976544553123367877889976:<;;<;<==<===<<=>@>==@AACCBCDFFEEGECFEGGJHGFGIIIHIHGHJJDDECABDCCDCBDCFEEFCFHGEFGLKKJKLKMMMMMNPQPOSRQPNPRQSUUUVXYZYYYY\\[[YYZ[[YZY\]^[Z_a_bgjjkquwy~}��������������������������������������������������������������������������������������}x����~{qg_`i^XYVSTWY�}DATeswt^D3;M;H�e.19;<=4&)2X���������������������������rWM>;548S_OF?>>:52+)*+.+(&#(>d��W[[l�nZVUV[htx~�~}xslR$#&)+/H`ok;)'(+\��xzvrknbNJ]}q]Z]rnXWU:>LKKML^f^ZZ]_^������������w}u7PnH1.<\dK5-,'J��mYft��iibZRE��kpujZPW]d\QW[R:/363224FMBUB54=^��������������oafp{���������������������������������������������������������������������������������������������������������������¿�������\X\YQPPKLPT]`\_�����������������������������cq����ztnigda^YYXWWYXTPFN�����|wwsqnlkhhigffdbbc```_^[T>7�����������������������{����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~{y|�tfeebebO-:\`bbbdheh{}x}}unskcc^YWONKKHGFDCDA@@A@??AA@@AACBGGHIIx�������������ƽ�������������������dAA@>;:8876444436767766765689::;<;>><>>><<?=<<;<<=<>?>=?@@AAEBAFDDEGFGGFHIGHIHHHHFEEFEDEFDCCECFFFDDDCCEDDECFHFFFFFDGIIIJKLNLPPONMPOPPPPRSUTWWXYXVXZYZZYXZZYY]^^[]]^^[]\]^[]bcilmppsxz}������������������������������������������������������������������������������������|zqtrutonmlqnvh�{LEOJXX]YG:?aTr�m0../,+)()3S������������������������~��nKO<825Orx^QCED;:2)+/85,)'%*=\��[WZj{r[XVXZjsvy{~||tjQ$(+*.LfnW1*)*+f��wyxnlndPEZxp^\_og[WP8BIKLLZmmc`_^`X������������x�p:PC4,,5N^T8.+)L��_gw�|jh_`ZW��bkrn`XX]cYOUPC;013221@bTvpI55<b��������������l`grz������������������������������������������������������������������������������������������������������������������������[XUMPMEDKLN[^^d�����������������������������cr����wsqji`_]ZYVUSRSQQJQ����|xtspnnkhhfddbaa``^_][\\[P;9���������������������¦|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}zy}tgefdffM.;Yccccfgcjx{||zwx|wsw{wmnqjg`_]UUTPNLGJHDCDCCCCBCEEBGMz�������������Ƽ�������������������eHFECBA?>===;<<8888777867466979::89<>=>?@@@AB@???>>>==<=???><>=?>AADDGGEEEGIHGHIGFGIHFFGEEFFFGGGECDDDBAADEBADCCDCCCDFGHFGGIKLKIKLMLMKLNPQQRRTUUVTUSTVUUWXY[Z\\]]\\^^]]^]\]]\\\`[^]]_aijlmrvz}�������������������������������������������������������������������������������������yzywxx|��u[\\X`__bZWWsp��o521.+*))+7[��������������������������jNL=1/7YomeKCI=78+)0EZK.(('.@[��]T^n�t\XVX\lqvx{|{zuoP&)*+.=NK;,)()4u��xwujsrkLH]vr^\bqe]YN5@JKNTcikfb_`bc������������n}c/3/,*.5QhY6.*-P���cdozng`oTL��YeqpgaX[_WQRLG?212223HZy�hI58Bf��������������i]hqz������������������������������������������������������������������������������������������������������������������������jed`XVWZXX]a`^a������������������¾���������\r�����xrogea]\YYVUSQQIV���~{ytoolkgeffda^]_^]]^[YYXXL<<���������������������¨~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}y}sedeffdM)<[ddcdgiel{}}}~zvyywttvvrtxtmhpheef^[[TRRMMGJKKHFHHH|�������������Ź�������������������`BEDCFEGHFCDBBB@@?=;99:587879976665668<=?@A@BACBA@@@?@@@?@A?>?;=>?AA@BBBADFDEEEHHEEFHHFEGGFGHIGFEDEDFECCDCDBCDBCBACDEEFDEEEFFGGGGHIHGJJJNNNNNNMQQPNPRRSUTUVUVWYZZZ]][Z\]\\]]\]_^[[Z\[]Z^``a_ejimmsvz|~����������������������������������������������������������������������������������������y{wqs}vsq�|��i?>;851,+.5b��������������������������\AA90/7MbbZEGF30,),7fsJ/((*/Aa��UNZn�iYWVUYlstvx{zwuoI(**+,+,0/(**,8|��sttrurdKOd�z][aiaYYM9BKIKO\fib]^``f������������pzZ/,+++.7\oW3.+1P�ċado�ngatF\��O`onhhX\\WQONND334323Af�{Y725Bj�������������i\hp{��������������������������������������������������������������������������������������������������������������¿��������[[[ZQV]fhebd`[h�����������������������������`q�������ytnjgdbcaa]]^TJZ������~xvvolkggfa_^^]\\\ZYXXVJ9=���������Ļ������������{����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{�qfdefeaL,<_efffhifn{���}{|{{xxxwutwvqruuoouwmjnkkfea_]`^VQVTU��������������ƹ�������������������aEGFCFFGFDDDDGFFFDCCB@@=>==;<<:897655789;99:<>?>??@A@@?@@CCBCC=@A@A????@A@@BAAB@BECCFFFEDDDEFGHHGHGFHGFFHFEEDEDFCCDDCCDCECDEDECEEGGFEFGHIJJIJJJMMKLONONNQPQPNPVUTUUWXXY[ZZ][[\\^[[[][[XZ[YWXWZXZ[^]dehintuy}�����������������������������������������������������������������������������������������������m`WTRPJA<7Jr������������������������z��Y9<3/17M\\UGH>0*()/=]iH1(),/>b��VV[hlWXWWV[putwy{{ytmG!'*())**)'&()0>���ururssfNQf�~\ZZca[YK<DKIIH[fdZVZ\[b������������nrS+++,+-:hsE3-*0W�ĉ`fv��nkiiA���Wnsigf`dg`UTPI>+45221=|^J025Ip�������������|iagp}������������������������������������������������������������������������������������������������������������������������XWQPEJWhmlhgcYe����������ÿ�����������������ds�����w{zumiggdcba`bbUG\���������}zysnnmgfdbb``aa_\\WM9>���������ó������������w�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~|~�qfebddhE,?_eghhjjdl�����}|��|z|{yvwzwsvwtrqtropqpnnsrpuurnmkgl��������������ƹ�������������������ZHJIHIHHGFFEFJIIFEEEEEDDDCABB>@?>>=;:9<;:756679999<???@ABCGCDEEBDABB@BACB@@BDAA?????CB??@?@ADEFEFFHFHHJHHHHFGGEFEEEEDFEEEDEDDDBBCEFEDEDEEGFFEFGGIHIIHKNKLLLNNMPPQPOQSTTUTTTUVWZXYZXWTVSSTRQRRRTUVXWVVX[]_`aiemqssvz|~����������������������������������������������������������������������������������������z}zmc]Zv{������������������������x��R95//47P]ZSBF:/*++,BhfI5)+./Dh��VVVWVUYUWX_rutxy||zrgA (+,+++*'%&))0@���vtvrpsiKOi�wZ\^dd_WK8EJIHJYdaWRTTU_������������lkO++,-.0F\M,,*,.]�ł_hz��rkg\L���pypghklpulTYRF9,25335IigWB128Kq�������������}jcjr}������������������������������������������������������������������������������������������������������������������������kd[XVW^ipolkd]d����������¾�����������������a{����x~�|rmjhec]\]``VJ_�����{z|zywtspmhghfgikgjikd]O<@���������Ķ������������q���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yy���������������������������������������������������������������������������������������������������������������������������������������~|~pffcecjB)@afghhijex���������~{|~{xw|zxxzxtvxwqqtsqrsqrsvuuvz{v}��������������ù�������������������\IKKKJHGIKIIJJIJIGIGGFEJGGIHHDEDCDDCA?A??<:98988787:<>>@@BCA@CEDCCDCDEFGFDCBBBAAAA@AAA@?@?>=>@??BBCCCBDEFFGEHFEFGGGFFGGFFCEDECCDDEFDDEEEEFEFEDDEFFHFGFJHLJIJKJMMMLMNNOPRPPORTRUUTSPQNKHLKGIKJJOUYZZ\]]^__a`aadehdimnpptqvwz|}���������������������������������������������������������������������������������������������������������������������D83136:BKMI?D8/-704WtcJ;+.//Fq��[WUTUUXXVW_suvw|~}{rd: '++,++*)''((.=���rwwtuufKQj�x_\`gd^WJAFHFGN^cbVILHOb������������liD,./25:<<7/*+-3f��|bh��vlc[`���qqkedjfoymRZTG9/55231GZmh;327Iu�������������{hair|������������������������������������������������������������������������������������������������������������������������ce\XXYajsspme^f�����������������������������]z����zy��|wtroljc\[^bXH[����zzzyxwuvsnllhgedccdcggjpdR<B���������ö����������Ęw������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������skiov������̷���������������������������������������������������������������������������������������������������������������������������������~}|}�ogedgfcF,C\eihhjjcy���������~~~{z{}zy{zwvzyutuvutvvtvxwwyzyy{���������������÷�������������������\JLKJLJJKKKKJJJJJIIIJIHLHIJIGGHHGGHGGFFFDDDFCA>?=><;===???AAADEDCCCBCDGGGEFEDCDDDDBDCCBCCDC???A@@@@@>>>?ABCCDDDGFGFHIHGFGGHHGDGGHFFEEEFEDEFFFFDEFFGFFDEGHFEEGHJJJJKKLLMNOPNQPPOPSRPRPQMMMLJJJLOUY\[Z^^_`abdccdfgdhggdcbdgeikjiqu{y}��������������������������������������������������������������������������������������������������������������k9:86:77<BDB>G926=2;baUJ5*++2Hs��ZTVUVUSUTWcuvvu}~|ypb6 ()**++**)()(,8���qvvswseGKm�sb[^cb][J:BEBBJ_edVGFHNd������������ae@/6>GPQGCA;3/19f��}cn���yuhW]���_cjd^\PdtdZ\VI8043123Gr�d:23:Pw������������yf`jq������������������������������������������������������������������������������������������������������������������������YVPHGMWmvvqlh^n�����������������¿����������`|����vy|}yxvsnlme][abWKb����{xxvuw~�|rpnknofa``_b_gyhN2>���������������������Ô|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xrijkcghfr������Ϸ���������������������������������������������������������������������������������������������������������������������������������~}}~�mgdefe`E)E^ejijkgdw����������}|~~z}|xwxzwuuvvuwxuwy{x{}}z|����������������µ�������������������TNPQMLJKKKMLMNMLKKIIIJIHGJJIIHKJJHJJHFIIEFGHHHFHHDEECABCBACCCDEFFECEEDDCCEHFFEEEFDEFEEDEFFGDCBBBBAAB@@>?>?@?>@@A@BCEFDEEGGFGFFHGHHGEHFFGFFFGHGDEFFHGFFDFGCDEFEFIGEEHIIIHIJIMLLMNNMNPPQPQQPOOPQRVVWXXZY[^]^_^becceeeeddefedeccdfdhhjmpqsvz|~����������������������������������������������������������������������������������������������������e[7BMNHA<88;>DCO;7AF738;97+()'4Ho��VTVUSVTWWZfxvuy~�}vnc7')++-3740*)*,8���wvssutgFPo�m_]\`c^ZD3ADCDJX]ZMACDId������������`dC=IZmr`ZZVYI734e��zZo���}yiUP���X^fbWQGZlc\_WN2.21123Rv{c:45;Qv���~~�������x`ajp�������������������������������������������������������������������������������������������������������������������������ia]RRS^nusojg_o�����������������������������_z����|wuusqnliifa\[ddSL_����}yxvwx����~ypuwmd`__``gwiS49���������³���������Ƽ�w����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~obababdcffgq������Ŧ���������������������������������������������������������������������������������������������������������������������������������~~�kecdfe\C+E_eijknigy������������}|~}{|�}xy|{y{xxzyzxuxyxw~yz����������������ŷ�������������������`b^YYUVVROLQLPPONKJJHJKKIKLLJJNKHJJLJJJGFHIKJKIIHGHGGGIGEEGFFFFFFFEFEFECDDFGFGFFFEFFECCEFEFDFFFFEDDCCBACBCB@>B@A@AABA@BADCBDDEFEEHFEGGDFFEFGGGCEEEFDFFFDEEEGEFEEDBCFDEEEEFCEFEGKJIKMMONMNOOOPRRSSRRRRTWWUWXXX]]^_^__bbbcbcca`bccbdbaeccfeikllopv{}�������������������������������������������������������������������������������������������g:6Okuj^]WJDDFGIM=;K]G.---*)'()4Jx��VWUWUVVWVZkuuv{�{voa2 ()+,5LcSF3*).A���wyusvrfLTp~e[]_cd]Y>4:>?@DJQL?==>Dk������������]_CQcs�hQhhhn\F72`��wao���{saUO��|Y\`]SVZ`pe_bYF-052036T{~]7537Tz��~{~������y_cju�������������������������������������������������������������������������������������������������������������¿����������ulaZUV^krqmif`m�����������������������������`{����~yy{vplkeca^]^aaUOb����{yxyxx�����wpz{qgaa``^eweN4>���������´������������x�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wg`WX\_]]^__bchu�����Ư����������������������������������������������������������������������������������������������������������������������������������~}gedddd]?,Fdgijjkhdz�������������~{|~~{{|{y{}zz{{yvz}zy{�~~�~���������������ĵ�������������������zzytsokhec^\WWTSTOPQMIMMLNLKLJIJGHHIILIHIIIIGIHHGIJJJIIHGGFEFHIGGHHIIHGHFGGFEFFFFDDDDDDFEDCDFEFFEEGHGEGFFFCABDDCDCC@@>A@B@AAABBBBDDDDDDDEEGDFDCFDFEDFFDFFFFFEDEDCCCDCDEFDDEGDFCEGFGHJIIJKJKLLMNOOPPPQQRTRRTVSVWVWXXY\\\]]\^__aa``ab_bdbaaaedddbdgknpryvv������������������������������������������������������������������������������������~w}���~xxrb]YUPWTBAYub4,-.-*+-38P���UUWWVVVWXZhprty�ywpd-"'(,/F}�zm3)*.D���zzvvyteITtwd][]cbYY>0;>=>>?RP=;:;En������������V]Xcq~nBendp}q[>:e��sbs���pm`WP��zVW\QWeltuidgW>+/54236^|qT844;T{���~~������vedkv�������������������������������������������������������������������������������������������ÿ����������������������������i[MNJLYhpolif`m�����������������������������a�����y{��zsrsmeb__^`_WGb���|xy{|{|ztuvqjpnje``aaajthL4A���������³����������{����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������tc[XVVWZ]^ZY]^^_afu���ռ������������������������������������������������������������������������������������������������������������������������������������}y~~hffgdca>+Hbghijiicy�������������~z�{z}~|}}|yy{zyy|{{|~��������������������ó������������������������z}}zuvroliefc\WYXTQSOMNMKMJJIHGKHHIHIHGHHFFHHJJIIIIHHGGHGGHJGIGFIIIIHFFGFFEEFEEEEFFDCDGCDEEDFGHFHHHGIHFGGGGFDDDDCBCBBBABA@ABA?@AA@BBCBCBDDBDDCDBADDEFEDDCDEEDDCBDFEFECDHGFDDDGFEDIGGGFGHJKKKMMMIMPONOOOPSQQRRRSTUWXWXYZZ\^]^_^bb`__`bb`````bdcbghgihosvy~�����������������������������������������������������������������������������������������}xtlni[X{�iM8530/0>E9S���OSVVXTWWXYbprrx~}zwn^-#)+,.Y���{1,+/T���{{wvzv]KZrtg^[]b_[Y70=><<?RygD<:;Do������������W_^n~j=;ipfn~~mF7h��obr���tm\QL��yVPPN`qxxjd]M<115333<el[N834;S���~}������reckw�������������������������������������������������������������������������������������������������������������������������WTMTW\bjoplif]n�����������������������������c�����zz��{xwtqjhg^]c`VLd����}zz����ysrupjjhdada_bcjrhM6;���������ù������������x�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������s_PRYTWZY][\[X[[\\Z]dp��⾒������������������������������������������������������������������������������������������������������������������������������������{z|ffffdcd:.M_eiimnkh�����������~���{�|~}|~~zyzzz{||{y|}~�������������������Ĳ�������������������������������~��zyxtopiigaa\]XRSRRROLMKJJFHFEEEEEFEFFHGGGGGGHHFHJIFHGGIJGGJGGIJIHIJIEFGGEFDFFEEGEDDCFEFFEFIHGHHIGEFGGEFFFGECBCDBAA?A@AA@@??A?>A@@@@?AB@ABCDCBECEFFFEDCEFEEFDFGHGIHEFEFHGEEDEGHHGGFHHHFHLJKJLKLMOOOQROPQRTUSSRSSXXX[\\\\]]^^^_``abcbbbcbacdedefggmlpstv|����������������������������������������������������������������������������������������}��~fVRMKDGfZ?k���`[ZRXYYZZXdvttw||xuk_2#*,--U���m3,+1W���z|xx{q`QZrzb[[^__\Z67>?;=Jy�xH=<>Dv������������V\asa@9Gnubb{~qQ@m��mds��|ui[TY��tQMKM[r{�rb_[L=614336>TS_R733<[����~�������m^dlx�������������������������������������������������������������������������������������������������������������������������mlhhefjmnnkjg_m�����������������������������^�����{|}{wvsmkiig^]_]QKh����}zz�������{wrmmkhieaaajseM2C���������Ľ��������������Ҟ����uqsx}w���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ve\enku{y~xuspieg`YTUX[g��ס��������������������������������������������������������������������������������������������������������������������������������������|{zbbefede6-Sdgjjlnih��������������{}~~~~|{~|zz|{{|}}|~}~���������������������±�������������������������������������~~�{{zwqqqlgfgb\]WVSTPNNKKGFGFCFFFCEEEEDFHEGIIHIIIJIFIKIIIIGKHJIGHHHHIIJIIHHEFJHGFIGHGFEEEHHFAEHFDEHHHFEEGFFCDDDEEAA@??@A@>>??><?A@?@@ABDEDDGFDEFGGEGGHHFFHHHGGHHHHHEFEEDGHHGFGEGGFGGHHGIKJKKKLMNPPNOQQNPOPSQQTVUWVWYWYZ^\\^^^^`ac`abcc_bbbbeafefbajpuwx|~���������������������������������������������������������������������������������������||upn�dn����wstmonbe_Xsvvx}{wune1$)+.2S���P/++/Q��}x}zy{teO\wxb\]]`_ZU>F@?>B[��{_G>@Fx������������TW_W@::AXdNY|znI:s��gdx��xsj\L]��pNMKLShxn[`^KK714347>YahZ803?\����~�������{l]dmw�������������������������������������������������������������������������������������������������������������������������~}yqkhhimmkjf[s�����������������������������f�������|wrqkfdd`^^\bbWLi����|{}�������}vsvvtme`\`hpaK5J���������ĸ���������������{vq^GBD7AJLWhrw}|���z}��~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{ccjw����������������v`MNQTa�Π��������������������������������������������������������������������������������������������������������������������������������������z{�{\_cebd_5/Paikkkkfe}������������}~~}~�~~|}{{zy{y{|{z|�~������������|����������������������������������������������������������y{~{uurnmie`a`][WTRPMJMKGIIFDCGGBEHJJHJJJJHJJJJKHHJIIIIIHHHIIIJIIJHHJKIHJIIDGFFGGGHEGHFFGHGEFFEFGHGFEFGECDDCCACAABA@@B@@BBB??AADCBDEDBDFHEDEHGFFFFFFFGFHGFGHGGGGGGFGGFIHGEGHGFEFHIIHGFIKJHIKKMLNKNMPOONQRSRORQPSUWXWXY\`]^_^^`abaaeeeddcdddgihklmoqrvz~~��������������������������������������������������������������������������������������~������������|vy��utx|ywutl*%(*-3Leb`B-)*-K���x|yxxrbC^so^]^_c]YWQI>=?Hz���uKABO~�����������zQQG;89:;DG8YxvjA7~��^gw��yrg[LZ��kOMMMPbsw^W_]KI114425BglkS724;^����z��������}l[ckw�������������������������������������������������������������������������������������������������������������������������|}xtnijmlljf^s�����������������������������c�������yurpkhecfbaecWLh�����~��}}zwywrpooomia]^_fo`L6F��������������������Ƚ����jORAE`ZC6('*02/8DGIFEPd[]lvx�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������u��������������������pCFGN\v�ͦ�������������������������������������������������������������������������������������������������������������������������������������{|�y\_cced\53Qcijlllkj~�����������|z}{|�~~~}{{{|z}{{{{|}}���������������������ò����������������������������������������������������~}��~yuxxrpmljeb^[WWVSRPPLMLILLJIIJJIHGIIIHIGIJIIIJHIIIJHHJJJIIGIIIJIJHIHHJHIKHGGGGFFD@GHGGIHHGEGFHFFFEEEHGEFCACCBBCDDB@AACBB@@BABCCCACBCEEDDECCDCFDHGDEEEDDFEGEDFFDFHGEIGEHHFGGFGFEEGHHIGIHIIJJJJMMOPKNTONONORSTTUWXYZYZX\_^ab_``dbcddccceefefggjklpsuvy|~�����������������������������������������������������������������������������������������������zvvxzxxuxe%(,-+1=EGG9,)*0J���xzwxys^J^no`]\_`^[[T4:;AY����qFDFP������������yVK549<=>DB<Vvp`;:w��hcs��vme]Q]��eNNPLL\hm^W[aQ@.13207B_jpN3449^�������������k\dly�������������������������������������������������������������������������������������������������������������������������y{ywtqkjiiigd`t���������þ������������������_�����{{vupnjhgecdegdcXLi�������z{xwrsoklljigfbabbho_P0C��������������������������lTREBjzm[@0(''(-498655CJRWS:w���������¹�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������uFB?BLs�Ω�������������������������������������������������������������������������������������������������������������������������������������}|zv]]`bdd[64Schijkljh�����������}�|{}~z}}{z{~z||�|}}||{������������������������������������������������������������������������������������~}~}|wvwqnkmhedb^\ZVSSQLMMMLKIIJIIJIHGIJJIHIIIJGGKJJJHIJHJHHHIIIIJKKJHHIJIHHIJHHJJIIGGHJHFEFGHGFIIIGGGFFGFDEEECCCCCDDCC@AABA?AACACAAA@@BBBCDDCBDCBCDCCBCDDEEECCGEFHEFFFHFFFFGGFDFHEFFEGGEIHHHHIFIJHKKKKMMLJLNQSTTSVXXZZ[Z^]]^`ababb`bcdabeeefhifjmrsuxyy{||}����������������������������������������������������������������������������������������|xxu^'#%<=3027<>:0))-3G��tvvwzs[M_ua\YZ^WXP4/8:=V����kFCCK������������qSE235<DMSVIVpjJ+6x��ecr��pkd`dx��^MRTHMX`g_Y^\P=+25110=`nqK715=b�������������~j^dny�������������������������������������������������������������������������������������������������������������������������kklligebdceed\u�����������������������������X�����x��xvoibdb\]_a`VIj����~}zwwqrqojjjhjhghfgegkp_M3I���������½���������������nVRF38b��fR5'&+6@FF@96BMPRN9t����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������tB558Am�Х�������������������������������������������������������������������������������������������������������������������������������������||~s[\\bffX42Ucgklmmig�������������{}}~~{{z|}{|}{x|}{}}�����������������������������������������������������������������������������������������������|}�{xvrsmiiga`\ZVPOROPNKJIGGHHIKJHGIIGHKKJJIIJHLIHJJKKKKIHIHIIIIIIKJJIJLJJIJIJKJKJIHIKKIHHJIEGGHHHHHGFGFFEFDDCCBBDABCEBCA@AA@@?>?@??=?@AAB@@@A@??@CBBCCCEEFEGGEEDDEEGEFGGEDFEFDCFEACDACFCEDDFFGFFIIHJKMLMMNKOQRQVUVWW\[\[[\^_^aacbbefadhgggfijgjolqrqtuvy|}������������������������������������������������������������������������������������}gJFGUcVF?=77/0/,.0:P��}trrwztZJa�q\XWYa[YM-07:@[����hFCEO������������rQA0/5ALZdaITbK6-?z��aft��pjdlq�Ʀ^TPNJPYejaZ\_T?*04332>n|oJ225=d�������������~kbeny����������������������������������������������������������������������������������������������������������������������¿�SNMLSNNJMPY`c_w�����������������������������_�����{����|vpghf_]Za_TKk���~||zyzzuuqljicbaabeb`abil`J/I��������ƿ����������ǿ����lUTC2/V���zA(+7BHGGGA<EQQRI9v����������������������������˸�it�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������f3--1:g�Ӣ�������������������������������������������������������������������������������������������������������������������������������������}|}�uZY[ceeV55Tcflklkjl������������}|�~~~{yy|{{}}{{~|z{~����������������������������������������������������������������������������������������������������~~���{{ywtrnmifc_`\YYVNMHJKKKKIJHFIJIIIJKLJKKLHKKKIMLIKKKIJKIHIJJHJIJHJJJJKKKKJJKJIGIHHIGFIIHJJIHFHGFFFFGHFEDFDDEDFFCBBB@@@@@@@?=>?>=?>?>>???@??=>?@ABCCDFDDFFEFHIHIIHIHEFCDFECDCBCC@@A?>@?@@AABA>@CEFCCFFHGGGIJLMLNOPSSVWYY[[]]_a`a`acadccccdeeecfglmoqstwy|{|������������������������������������������������������������������������������~vw��sjb`WRNMA7<@EP���zvwz|tVHd~q\XWZ\YWG,/69?Z����f@?BN������������jM=1-0;G]k]IH?/-,:y��bgx��njayc�ˠdVKKLW`ltd]frcG124456DhonF432>e��������������hafn{���������������������������������������������������������������������������������������������������������������¿�����¿�dYUKOKMHCGOadax�����������������������������b�����|����}yqhklc_^caTOm����yx{����|zskhdghhgd___agqbK1H��������ǿ����������ɿ����kTRD8S|���W/(1?B;==DJAJUSOH8w����������������������������ϳ\#R�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������P%&',6f�͞�������������������������������������������������������������������������������������������������������������������������������������~}|�t]Y^`deT20Xdeilllhe������������|�|}�~|{}z|||{zz~�{z}��������������������ÿ��������������������������������������������������������������������������������������}~}zxxwromkhha\[VTXWTPMMPMMLJJMNLKKIMLKIKIJLLKILLIJLJIJKJIIIIKJJHHIIIKJIIJJIIJJKLHIKHHIGHGGFEFFFEFHFEFFGFFDDB@CDDA?BBA@>?=@@@==>?????=?>=>>@@>BB@@BCDFGFGHHJGEFFFFGECDDCDDBA@@A@?A@>>;;<><>>:;AAA>>?@C?AAEEGEGILMNLRTVTVY\]^_ab`abbabdcbde``gdcfijhkorrvx����������������������������������������������������������������������������������}uohhhfin���~|{ymRFf|oYXVXXVVA$/57;\����a>?AW������������hM90,,4ITKBHA.,**C���bev�|plbb`�Ŕ`QQQYainrmt�iN757534=\oj@744@k�������������{h`hq|�������������������������������������������������������������������������������������������������������������������������sng]VZa_ZWZbd_y�����������������������������b�����{�vzxtlikh__^c_RNm���{|yy�����ulrsrrpojb_^`jr]J4G��������������������ɿ���fWRGW�dE5*)*2@?994<GDLVVLD9s����������������������������ˮLS������������������������taysSGJKISivrv�������������������������������������������������������������������������������������������������������������������������������������|��������F##%*3e�ʠ�������������������������������������������������������������������������������������������������������������������������������������~~~�t[U\_dbR11[deikkmhl�����������}�||}~}}~{{|||{|~z{|}��������������������ľ��������������������������������������������������������������������������������������������~}~|~yvxxttqljidca`]XXURLPQHLOKMMMLMLLKJKKKIJKJKKJJIJKJHKIIGHIIKIIJIIIILKJJKKLHGJIGGFFGFFFHGFGFFFGIGFEBEEGFBBDDCCB@BAAA>@==>?>>?======>?@AA@BCAACEDEEGECBBDDDDDC@@A@AA@@BB??A????>>>?>==<?A@==@AAB@@B@BC@D@?C@?CFGMRTVVWY[^^`__c`^adabbcaccbc]^^bfheiilswx{�������������������������������������������������������������������������������������������~|qZOi}s[XUZXVR?+289=V|yvoN@@BV������������cI9,,,7ONSUT<-*)+J���Zis�}rjegq���`WZY]bglsx��sbM267315B`kI9326Dl�������������|g`go{��������������������������������������������������������������������������������������������������������¿���������������lje^Z`eaZVX_dZy����������������������������]��������|rqqkhgc\_\c^SMn���}{zz�����yuxxvwxrmb`^_io_J1C��������������������ȼ���`XQJJR<.-,0,*3@GFC6=GDMVXRE=~����������������������������ˮI^�����������������������z@#;@;&"#"*'(13;I]W[\o�����������������������������������������������������������������������������������������������������������������������������x��������; $%'2^�ȡ�������������������������������������������������������������������������������������������������������������������������������������|}~q_\]`caO5:]fgkmmmcl������������|}z{~}{}y�|{}~x|�~|}��������������������ƽ��������������������������������������������������������������������������������������������������~}~~~�}{|{w{yqotrjggb_]\\XTRRPPPLLKMKJJKKJKJKLLJMMKKJKJIJJJJJJJIIGIJGHJJIIHIKIIJHJIIIIIHIIIHGGFFGFDFHDEGGHGEDFBCBB@CB@???>==?==<>;=><>=>AA?A@@ABBC@?=?@A??==<=;9;>>>><;>>>@BDEBB@ABCABDDCDDDDCCECCCDDDBBCCDDCEIMKLNNSRRWVXZZ\]]\_``aaa`cbbba__ca`cb[bejlqpv|��������������������������������������������������������������������������������������~po|�zmf^b]ZV@+66:>AGHVOC=AD^������������UFB.)-5MPUYU;/,+0G���`jy�~rkdjm���^ab[Z_biwtnia[I16511366@<5224Hp�������������{cags~�������������������������������������������������������������������������������������������������������������������������ihgeghf_YSZbdaz�����������������������������b���������xtqmjigggcf^TQo���|zyxz��~zxrolopqrpja_^bmrdG6K��������ž����������Ƚ����dWSFZurjgcU<*1>IMJ7<C?LXVSH<�����������������������������ήG_�����������������������|:49+(&#"" !!!"%$! %'2HTp����wy��������������������������������������������������������������������������������������������������������������������|��������8#$"$1]�Š�������������������������������������������������������������������������������������������������������������������������������������}z}~odb_`a_M/7Ycfiklkfk}����������z|~}�~}|~yx~{z{}~|{}}}���������������������ƺ���������������������������������������������������������������������������������������������������������~�|}�|~�zwz{xutrojieeec_[YYWUTMMLMMLKKKKKLJEGJIIHHIJIJLKJIIIIGIJHIIHIIIJHJIJLJJHHJIHHIJIFGGHHHFFGIJGFGEGGFDCHGEDB@@??><=;:<<:8:;;<:;<=?>>@>>=<>>==:997889<<=<;;::=@BEFFDABBEFEDHHGGDHIHHGFGFDEDDGHGHIIIHKJKHMMKNOPSPSVWXXXY[]^^^`aa`cca`b`_^__\Ybc`acimrvw{}��������������������������������������������������������������������������������������ztoneVFUIIE;?;;A??AA`������������^_D,,/8OXXVK<,,*/=���]gy�{njioi���chgbZ`_bhdca^XD(5423355643336Is������������zdafs}������������������������������������������������������������������������������������������������¿��������������¿�������jlhbeeeedbbdcd}����������������������������{h������|��wttrlliknlo`SOs���vzyvvvqopoljfgjfcdcbb`bjn`G1M��������ſ����������Ⱥ���`XUNv�{}��|I%.<FHC?A=9L[YOC<�����������Ľ����������������˩Ic�����������������������v71:,-)""  !  "$$#!##X�rK-3BNRVbhaejou���������������������������������������������������������������������������������������������������������v��������3"/b����������������������������������������������������������������������������������������������������������������������������������������}{}|jdca\acN.7]gijklkgo�����������~~~~�~~~yz}}}~~{{~}}����������������������Ǻ���������������������������������������������������������������������������������������������������������������{|~|{�}z{�ztzxtqtnllgca`[ZWYWQPRMOLKKJIHHHIJHFGIIKIGHGIHGGHGGGGIIHHIJHIHGJIJJJJJJIHKIJHHIHGGHJFIHFGGGIHGFEEEDC???@?>;:8:;;8899;;<;;;<===><;9988;==<==<:<;=?@CDEEDCDGGFFGIHGFIJIFFEFFDEEEFGHHGHIJHIIIHJJKKLKJMLILPPQRUWWY\[[\\ab``_]`a`^\_a]a_[_\^_[ddemhmtxtwzxxtuv{���������������������������������������������������������������������������{ytngb]SOMH@?CHi������������ljG,,-9DIIKP7,,)/H���`hvzsljgnc���`jtlbda`bdmpcXK.4423357964126O{�������������xhais}���������������������������������������������������������������������������������������������������������������¾��������YSOSWY]hkigfeb|���������������������¿�����~g��������}ppsllljhknj]JNt���zxzwwvtolljhhed_]^`a__bkm^G-M��������ſ����������Ǻw��\VUKeh8,>YtJ$2AIJOIFCBPZWOF:����������ǿ�����������������ХCh�����������������������w548-/)"   !!! Ji>#%)(&('"%'(09>FN`jclyy~�~tvv�����������������������������������������������������������������������������������������w~�������o+%4\����������������������������������������������������������������������������������������������������������������������������������������~{|{jdfc_`_J/;^fhjkllgr����������}}||~||~{{{~}|~|}~|~��������������������ƺ��������������������������������������������������������������������������������������������������������������������~~~��}}}}}�z|}vvuvljnjfec`\[RPTRPNMHHICGFHIFCEEDEFGIIFEFFGIIIIHIJIFFHGIIJGHHHJJJHHIJIIIIGJHFHIJIIKIFFFGDEGEDDCA?@?@>>=<::88;89:=<=;:?=<:9=><:;:99;<<==>@ACCDGHIIHGGHHHFIHGGFGFFGFEECCDBECDEGHFFGEGIIIHIHJKKMJJOPOOKJPSUXXZ\\]^[\__`ba]_a_]_\\\\WTWSQGTZ[[TPWY`hr~�������������������������������������������������������������������������������ywtonlechx������������kjF1-.5DECED.)*)-D���^eprnpofl\�ŎYhvnkhdeehloiaK.420428HOI=24:T�������������yc`js~������������������������������������������������������������������������������������������������������������������¿�����^ZUOKJNgmmjggd}����������������������������{l�����|zysnlhgfgfddee]TOv����}zyvurpnkkihfbcc`__^adkt`H3M��������������������Żx��`VSIgm@)/FsE'4CEFPLIIDP[XQH<�������������Zv��\�����������ϣ>"j�����������������������v268-1(   !  "  IS(#&&&%)(')&  -<(0G1 %2>EOO[lx��������������������������������������������������������������������������������y~�������o/ %3Y��������������������������������������������������������������������������������������������������������������������������������������}{}~kdcba_bH0<Xcfinomhs���������~�~}}~|}~~zzzzzy~{{}||}}����������������������Ƹ�������������������������������������������������������������������������������������������������������������������������������������|�~|{}zy|{tusmjjf`^a]YYVTOLLJIIIHD@DBCAADDDDEGGGIHHIJHGGJIEFJHHJIJIHGHHHGHGGJHJKIIIHHJJIJJHGFGFDGEEFEDCDB@???=<=>>=<><>@>==;:8688556869:<=?A?AEFFEGJIGIHGGGFGGGFFEDEEDCCDCCDEEDGFGEFCFHFGFJJIJIFMIKLNLNNNMONRSUUUUXZZXY_\\]^^^`a^]\XTRMKONQMGJJKS[gos|��������������������������������������������������������������������������������������������������ioE.01037681)()),E���_jnplkkflw�͇T`monnea_dhkjhB+33131LtumM448Q��������������xcdlt������������������������������������������������������������������������������������������������������������¿�����¿�����pmiibX[fnniiec|����������������������������we�����xtplhgc`[]^X\`cZPOv���~{{wvtronmkifeca^____b_kq\G0K���������º���������Ǻz��_SRFWuX=?g{7'2=9=HC@DBQ\ZUI;��������������¨8T�����������̤>#k�����������������������q/7>-/' ! !!!" "#FU&"#%(')'*)%" !!""  !#"30%""!'(++.7B���������������������������������������������������������������������������������~}�������t1 )U������������������������������������������������������������������������������������������������������������������������������������x|��{~}i[bc__^D1?Ydgijmjeq����������}|�||~~}{{zzzzzy{z{{{|}~��������������������ɷ������������������������������������������������������������������������������������������������������������������������������������������~���~}{~~z|zyuwsoljjhfc\ZYUQSNGIFEFFDDDBAAABBDEGFEGHHIIHGHIJHJJHGFEHHGFDFFHGHGHIHHIIIIKKHFIHHGEEFGFEEEEAEBCBCCABAABA@@=;:::::7786569::;;;>?@BBEEEEGHFIFFGGFFFEFFGGGEDEEEGFEFEGFEGIFDFGEGHGEFHHGJIHKJKLLKLKKNNNQPPRSTVWXXY\Z\]a_[\]ZWZ[XWYWXXXYWY_^hlswx~��������������������������������������������������������������������������������������������wvVAA;8960-/+*+)1L��{ajprlkkgq���~VY]kmla\`fadcV>,44358n���D25:X�������������sffmu��������������������������������������������������������������������������������������������������������������������������cd_^ZSUflnkhfb�����������������������������zb�����xtpkigcc___ZY_c]OLu���~|zyxuponlkidee```____`mt^G3P��������������������ȴw��`SQD@kpn~�M#%-75:G=:@?R^ZUD;�����������´��Z<j�����������˩C!p�����������������������y0>>*-&   ! !! !HR%%$&%%'&($# !"$$! "##!5/&#$#''*,,4;���������������������������������������������������������������������������������~~�������x,)V�����������������������������������������������������������������������������������������������������������������������������������r{��~|{{f`be`_^G-=]ghlnojdq��������~��}z|}}{~�{{~}||{{~|{z|{{}��������������������Ʒ�����������������������������������������������������������������������������������������������������������������������������������������������~~}}~|||{yx}{||zvvurmphfb_\VVPOKKGIHGECDEBCCDEEDEIFHIGIJJJLHHGJGJJJIHIHFFGHIJIHIHHHHIGHIHGGIHJHHFFFFDCDDCDCCCBCAAB@@A?>==;;<=<<;9:;<<<==>?@AACBDADFEEFGFFEEHGGDFEHHIJIFHGGGFEFGFCEEEGEEFGIGCFHIIGGHIHJIIJLMMLMMOPRRRSUWWWYZ\[Y^^[]^][\[]`aaa^`cdehlkqqx}~���������������������������������������������������������������������������������������ukhc]TOLC?=6727X��|djssonll����|XUWbhfbdpeT\][A/6616<s���E65;\��������������tcflv��������������������������������������������������������������������������������������������������������������������������_SNMLLWhmkhgea���������������������¿�����ze�����utnijfea]][Y[_c^TNv���yywvtrpnmihfdc_`a][`aajo]E2T���������ù���������Ȳy��XSOB4C_kjH&&%,7;?E>=<:OWUQC:�����������­��}�������������ɢ8%o�����������������������u+A:+/&!!" FD'%%%&$%'&"! !!!$#""$%%#70&$%)))'+-5B�����������������������Ǽ�r=?e��;NOUenosyy����������������������������������������}�������o#*R�����������������������������������������������������������������������������������������������������������������������������������wax��{|}zhbbbb_XA+>\diklnj_k����������}}|}�}{|}|{|{|z|}zyz~{~���������������������Ĵ�����������������������������������������������������������������������������������������������������������������������������������������������������}~}}~~�}{~�}�zx{vqomihiec\XRQRPPLKHGGFEGHEEFEIHIIIJKJJJMNKJKMKIHKKJKKHGGFIIIGFHGGIIFHHGGHHILIFEFFGFFFDDDDCDDCCB@ABBAA??=>=<==<:>=;=;;?=?@@@@@BGECCFDGGFDGHHIJGFIJGEGIHHGFFGGHGEEEHEEGEDDGFFGFFFHGHHHIIJKLKMMNONNPQTVUUUVW[\ZZ[[_a_``a_]a\`]bcb`ehmsvw{������������������������������������������������������������������������������������|{qpkg_]X]q��ystuwuto{����|\XWSYbgnvbS\\W809736>l��pC78=\��������������r_dkw������������������������������������������������Ŀ������������������������������������������������������������������������mgfb\`fjmligfc~����������������������������uj�����tsnlifca^[ZYZ]baOKw���~ywuxvsoomjiedbbc`_]^`_hp\F.T��������������������Ʋy��]TOFIbe[SFE6',:DIPOIE@NXXQC>�����������õ����������������ț5#q�����������������������t095-0(!   :8%$&%&%$#$#  ##$#$''%5-$#9F@4,+-3;������������������������Ͻc"P|U$))*'(*25EZ[Yg}������������������������������������}������m!,Q�����������������������������������������������������������������������������������������������������������������������������������oYy��||}za__ad_W?-?^dhkmnk]n����������~|~�~~��{|}|{{}|yy{{zx|||}����������������������Ķ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������~��|~}|w{xsqpjfhb`^\WRWROQMIIJKIIJHGGHHJIIHJKIJINKILNLJJIHHJGJJFGIJGHEHIHIIHHIHHGHGFFGFDEDEEDEFDDFFDEBDDC@AAA@@CA<>>=<><==>=>@@?A@ACDCACDEFFGGFFGJHHHHIHJIFGIEEFHIGGFEFGHGFGDDFGFEGGHHIHHHGHHKLJJJMNNNPRTTUUWWYZ[Z[]]]]^`fc`c`abccc`chghnqswz|��������������������������������������������������������������������������������������������������������|fa[NQ^gptibgbP20<757BkywV;89?]��������������taenw��������������������������������������������������������������������������������������������������������������������������ke_b__fkmkhgfe�����������������������������ri�����stnmifda_\[[Y]e]NKv���yxwuvuronkjhdcbbcb_]\\amnYA+V��������ƿ����������Ŵ{��[TOI^|~��|zF%)3;JVPLI@PXWT@@�����������������������������ɛ5$t�����������������������o+;6-5) :6!#"$%$"##" " "$#$%$$%#2,%Dzt[O=1.6?������������������������Ѿe-VpO'(++)'&'')9*!4b{�w?DMUUZbiqu|����������������������������f +W�����������������������������������������������������������������������������������������������������������������������������������kXx�|{}za\^cf^Z=1A^egkkki^u��������}|}~|}~|{|~{{|}}|{|zxy}|}~����������������������µ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����}�yvtsyrkhjf^_[[ZVSPNOKKKKIJIJJKJKMKJJKLKKKKLLJKLKJIIHGFIHHGHGGFGGEHHGHHGEHFEFGGEEEFEFGGGFFEDCEDBFBCAA@>A@??@????>A??AAA?@AACBDCBDFFFHIGGHHFFFHHHGJGGGFGFHHGEGEFIGEEFEFFEEGGHGHHHHHHJKKJLKOMOLNOORSSSUXY[[[\]]^____b`adcabba]edfiljoouy}�~����������������������������������������������������������������������������������������������wlheY\egqxlmxdJ66?844;MVYH<68Bd��������������safmx��������������������������������������������������������������������������������������������������������������������������URFILOYgmljkib�����������������������������tm�����yunljfeb^]\]Z\d[PQw���wzyvuurpnmjheec`a`]]]^ajn[B+U��������������������Ƕ���ZWPC;=h�uhyD")1>GQOJH?QYXUA>�����������������������������Ś4-t�����������������������k)A6&6( ;=#$%$$#$$$#"&&$$*'#"""$#6+&GI>TWQE44=��������̗����������������Y,QyA&*2:*&'''.5+'+;i�l )'%&"!,-2<Q���������������������������c&(M�����������������������������������������������������������������������������������������������������������������������������������um��}{|v^\_decV<0@\cejlkg\y�����~~�~}}}|}}y{}}{|z{zy|}}~~��������������������õ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}|�~xy}sqsllhdaa\ZRSROMKFLKJMLKKHJJJLJIIKLKKLKJIIJHHJHIIHGHFEFGGIGEGGGGHGHIIHGFIIHEGEGGGEFGDEFFECCBBCDCBA@ABBABBA?@?=>>>?@A@A@CCCECECDEFGFFIHHIHGFKIDFHHHIGFFFGEEEDEEEEFFEFFGGHHHGIHGKJLIKMOLMPRRSQSTVXXZ\`]_\_`^abaecddaca_dcdggejkmoswz}����������������������������������������������������������������������������������������|vvnqsu{{x}�eT5:KB65;DMI;767Cf��������������qcgnx���������������������������������������������������������������������������������������������������������������¿���������nfbZWSYemlklja�����������������������������tr�����wsnjifc`_]\\Y\d[KNw���z{zvwvqookeheddd_a`_]_`jkW@+T��������������������Ƿ���YWPA?]��qvw7$-;KHIKK>:QYYRDA�����������������������������Ė3+z�����������������������o'D5$,' DA%"#%&%#"""&/2'%&$!!"#$#8,'6F;)B]RC:@��������˪����������������V-Pb3)6e^(*&((.5+$(4c�h$&'$%%$%())+;uimxqssvuz~��}zs�����������] &N�����������������������������������������������������������������������������������������������������������������������������������~}��|z}t]^`adfU;,CZfghjje]w����~��~|{}~}|}}zx~}yw||{z|zy{}}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|��|}zwxurpmidb`_]XVVUORQLLLJIIHJGFIKGGIIJKKKMKHKIIGGHHGGHHGFGFFFFIIGHHHHHEHGIIJHHFFGHGFFEEFFEFECDDEGFEDBBA@@@@>?B?>?>?>AA?@A@B@CDEDFGJGEGIIHHHGGIGFGGHIKHHHFEFEEEECCEEEFEEFEFGGGIJLKJJLMLKJNPOORRQOUXXVXZ[]]\_`^`_`bb`bc`^````bdebhnoltx���������������������������������������������������������������������������������������������rh9[pVE>>;<<7565Dg��������������jego{��������������������������������������������������������������������������������������������������������������������������lge_c[[fllmlhc�����������������������������ok�����zsnkjhd`_]\[YYe\PMy���{{zwutsopmihgfbdbcda```kp[B.U���������±���������ʸ���\XQKj�napq=&&1AE<8;625SZYU@A�����������������������������ș2-~�����������������������n%@2#'# J?!#%'$$$"!#8RA'$%$""""$' 5*')SV@6KYI>?��������ε����������������U/S`4*B}ug_^I(0<-"%3h�d!%"""#%$$'),>|oqxtqrrqkmqgggLl����������d!'R��������������������������������������������������������������������������������������������������������������������������������������~yx}qa__bcgV7/E[igiiid_y~�������~~~|~�z}~~|z~||{~|{||zxz|}�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�}z}yutqkmniddaXZXWPOQPKKJHIHJJHKLJMJIJIHIJJKJIHHHIHFFGFFFFFEFGGFGFHIIIJHHHHHIHFFGGFFFFHGGGEEDDGEDEDCCBBA@????>?@?>?@?@@@ABCCEBDFFFGFFFHHHHGIIHJIHGDFFECEEDEEDDFFEGHHFEFFGFHGIIIGGILIIKMKMMNQRPTUSVVUUXY[Z^_]__^^`a__]]_\^`]^b^_gjkptwz|�������������������������������������������������������������������������������������uw�zbZXQJCBDC?;Rj��������������gbfp|��������������������������������������������������������������������������������������������������������������������������^WSOQLUfklljfc�����������������������������po�����wtmkifdc^\\\[\b[PPz���yywwwrqsomkjhgfdcb__^_bmoY@,R���������²���������ɻ���[WQHYG)+/.(&&/;AA?90-9QZYUAA�����������������������������ɕ.)~�����������������������l >0!    I=""%'&%&$"%;U5$#$$!"!!#&5,'2XtYTHKOEB���������ɝ�����������̻R.OX5*5}����[$1B*"%&c�c &$$$&((&((.@�novsprx~��|nogEp����������`*R���������������������������������������������������������������������������������������������������������������������������������������|yz{n_\[abdX9-EZdhijlg]v���������}|~�}z~~|{}zv{}{xy{|{}}|}~���������������������°������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�||~yurpklfgec_\\YTVQONNLGNHJJJHGHGIKGHHIIHHHFFGDGGGFFGFEFFHJEFIGHIIHGHIGHGIIGHFEHFGFGEFFFEFHECCCA@AB@@A@@>>@@A@?@@?=@?A?@BCCCCEEGDDEGHHHIHFFHHFHGHGGFGHEFGFGDDDFEDFFGGFFFGHGHIJJJJLMLMOLPNONQQPSWWWXZXXZ\[]Z]]^[^^]^]_^][Z`didffjnswz����������������������������������������������������������������������������������}yvtneb`^TQ[o�������������lahr|��������������������������������������������������������������������������������������������������������������������������dca^XW_knmljfd�����������������������������pv�����xtoihfdb^\]]\_bXMW{���|{zwyvtspmjjgddcca`____mmVA/X��������������������ʸ���TVPCQa^TPKE1&*1=FJF;6>T[XSCC�����������������������������ǔ,.������������������������j#'G4  " !! ! N;"!$%&'&#"&7=/$#($!#! "# 4)%&Ek`KWMT>?��������ү��������������͹N0NW5)9����u:%0A*$&f�\ %%'*AL8*)).C�oswto����~mmh=s����������b!'R�����������rt���oqpsz{xx���������������������������������������������������������������������������������������������������������������~zy|n][^aacS7*D_eiijkcXw�����}~~~~�{z~|z}z{{|xw{|zz|z{{�������������������ñ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}~}zwuvoklldgd^]ZUQQNQJMIKJHJKJIIHIIIKIIIHHIFGFEFGIHDCFDGIIFGIHIIGJHGFGGIGGEGFDDFFEEFFEDECCDDEBDECACABA@BC@?@@@>ABB@@?@@BAACDDCCFEGGFGHGGGGHHGGFFEFGFFFFGIIGGGGGFEFHEEGHGFGEFHJIIIJLKLKLOPPOQSRSTTWUUYYWYXYW[\\[]_^`_]a^`c^gdbdhmprrswz������������������������������������������������������������������������������~wvq~��������������kbiqz��������������������������������������������������������������������������������������������������������������������������kmnmgefmolljfe�����������������������������nt�����wupkifda`_]\[_f\LOy���||yzyttsokhgecaaca`_^`akkVB,_��������������������ɹ���NYPL������s>&')8LWUJ@>SYXT?G�����������������������������ɒ,3������������������������g /V6!&+&&!(*$#!!""#D3 "$#%%##&9G8$"'%##! $& 4'$!(Qbb^VG6D��������Ǎ��������������͹H/R^6*H���h3((26* "d�W$%(GwvO-)*+H�muxts�����{qmdAo����������[",X����������mWby}_[ZXZ[Z_a`gffmmprtyyy�����������љ����������������������������������������������������������������������������������}|zyzk\[^b`cS40F\ceihjd^������}}~}z|}}xy}{z{~{x|zyy{{xxyzz{{~~~�������������������²�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��}zzvrurmnide__YZWVSQLLILKMLKIJIIIJHJGGIHGFFFGGFHHIGFFHHHGGGFGHHHHGGHGGGGGFHFFEFEDEGHEDDECCCDDCDEBBC@A@BBAABBA@?><A@ABAACCCBEEDEEFFEDDEEEFFHFFGFGFFHFFFGGFGGGGGFEEGFEFGGDHHFGIGIJJLNMLMLMLNORRSSSSRTVVVYZ[\]]]``]]]___]ae`a]^afjmotwuvx~�������������������������������������������������������������������������������������hckq|��������������������������������������������������������������������������������������������������������������������������WZZY[Z`jomlie_�����������������������������ps����~vuqmgdc`][\YY`hZMX|���~|yzwussomjgfda`a____]`ljU>(f��������Ż����������ɸ���SXSW~kTxsx>&'1=PZVL>=P[YR?H�����������������������������ʔ,0������������������������c!/W2)JZSXPaR,-'/30'%!8.!#$"$$"!'9K;'"&%""""$&!0*%-Tm^Sh]O;D��������ʺ���ż���������θE5_^5)?���g&('.4+ "%j�Y!%$&Pi]?%))+H�nsyut{����tplcCg����������^ ,V����������mZamn]YVXZYVYYXZ[ZYTZ]S]^`du����������Ԃq{x{|�{}��������������������������������������������������������������������������~}{wxxk_^`_deR10J]ddghhba~�~��}|~~}}y{{||{wzwwxz{zzzzzz}|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}|{zwwsqohgcb]_\UXWSOOLONNKJIHHIIGHGGIGFFGIGHHIGHGGGGGEGFFEHHGGHIFHIHHIGGGGGEFGGDDDFGHEDEEFEDEECCBCBAAAAB@>@BAABBC@ABABB@CEDCCDEDEIHEGGFGGFEGHGIIIIGFGHHFFFFGIJHGHGGJGGHJLKKIJIIGILKJKLKLLMNNOQSQUTVUVZWZY[^^^]^`^[^_``cddcagimonpqttvzy|�~~~{|~�����������������������������������������������������������������������hbjq}��������������������������������������������������������������������������������������������������������������������������VUVWWT^lnokidd�����������������������������ps����vunkheca__\[Z]aWJZ����||xwvvsqoliffcbbbb`_``^khT@*g��������Ż����������ȸ���TVSTsNFoDQj6&,9CMJC:6=RXXQBI�����������������������������ˏ,2������������������������],G+0nwz���Y778FF</#('A/!"%$" !*ET5&"&% !"$( 1*&,ahD&5J?=G��������˟��������������Ͳ=5[T2(E���N%'',4$"$g�Z&%&$*9D7(&)+M�j|�zvqnxwfhlk^=b����������` ,U����������nZcjh[WTUVWVVWWTVVWZZWUZYXZary��������׈uzyvwwurqtxwxwt}�|����������������������������������������������������������������~}|xwwj_\__ccU34HZceghhcaz}}�||~�~~{y|}}{}{{z{xyxzyy{}zyyyx|�������������������Ľ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~|}{|zurpmkfec`a_ZXSPPLONJHGHBHGHHGEFGGIGIEFJHGGIHGHHGGHDGEBEGGGGGGFGEFGFFHFHHHFFGGFEEEDFFFEECCDBBDCAABC@@C@?@ABA@BAAAACB@@BACDDDDEEEEGHHHHIKJJHJIGHKIIJIIJIHHGFGGHIIGGEFFHIFHFHGGHHHHFIKLNLMNOSRPRUUWXYZ\^\]_^]]_`[_b_`\\]^bccgjkljloqvxw{~�������������������������������������������������������������������iber��������������������������������������������������������������������������������������������������������������������������hfhbbchlmnkhce����������������������¾�����or����|urljhea``^\\[^dZJX���}~xvutsoqljdeeebbcb``bakjX>,g��������ƽ����������ȹ���UTPKT9?P5LT+&.;><:732<SZYPAJ�����¿���ö�����������������ˆ)6������������������������e(C2*;/Y^BoO=AFF;>4-51B4  !$#"! &8=/$$%# "%(3*&=m_8-(,1;O��������ǚ��������������̲<8YX0)D��v_0)'*,%!'p�T$$#EvoD)&'*K��������������xJj����������] ,[����������j[bji[WVSUTSTVUSTUVWWWVVTUR>Bfdghtt���ׇqwutwtuvpV( 2V72;BIQXVYbpskv������������������������������������������������������|z|wvui^_`_`eP.2GZ`dghfaby~��~}~}}}~zy||w|}z|{zxz||{yz|zz{|{}��������������������ż���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}|�zyyvsqrlhfec^[[ZWSRNMJJKIIFGHFIFFGGGGIHHHHIIHHIHFGFGGEHHGHFHHGCHGFGGFHIHGFEFFFGFEEEFDEDEDEDDEDEFCBDDDCBB?CBA@?@@@>???BBABBBCEFFDGHIJIIIHILKKJIHKIJJJKIHGHFEGEAEDCDDCCCEEEDCEGFGIIJJKLLLLPOPQTUVUVYYY[[_`]``^`_]]__]]_[]VYacfilkmprsvy{{{~������������������������������������������������������������~gejs��������������������������������������������������������������������������������������������������������������¾�����������^[[\^`ejmkhda`����������������������¿�����ou����{wrmjheb``^[\Z^i\MX|���z{yvvtroomkdeeebba_^_`bljX=,j��������ǹ����������ǵ���XUL@DL42AL=%&0<>?@=<8<P\XPAQ����¿����´��lx�������������ʄ%=������������������������],H3,82D8.N04AC5+69=V0I- !%$"!!'>?-##%"!&) "2*#9R]J6+,06L��������л��������������α>6`]1-J�O`p*'(./& %o�I!&&+Ojf>(()-L������������ü�mm����������\-]����������eS`jhVRURQSRRTTTSTSQUVWUTTR=Bgbhcohu���m_jhhklpokF-P*&))(&&')((+3f�����������������������������������������������������~|{zwute]`]\^`N02HZbcdfd`cw~�~}~||~}|z|}}{|{zx}zwwz|yyxyyy{{{|��������������������ƻ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}}zxysutolkg_c\TZWWQPIJKGDCFIGGEHIHIJIHJGGGHIHFGHHHFHFEDFDCHGEFGGHFFHJHHHHHGHFFGFFFEDFDDEFEFGECEDCDBCBB@A@A@@A@@?@@@ABAABDDDFHGFGHHHJJIIKIHGIKIHGGFGIHHGFDFEDCBCDCBCCCCCDFFFHIGIGIJMMNNNNQQRTUVWYZ\]]__^__]]\^^^\^^^_\`ab`^ahhjknqsw{}�������������������������������������������������������{gfls�����������������������������������������������������������������������������������������������������������������������¿��URRRMO]gjjfdb_�����������������������������px����}tpmjgeba_[[[Z_cZHR����{xyxvusrqmlhgdfbbb``__amiV>+j��������ǽ����������ȷ���XUONva4f�uZ)'8ECBJIC>?RYXP>S�������������vd��������������ͅ*@������������������������a,C1/VyrZ?"$-88326<]t(M0 #&%$"  &77-##%"!"%*!&5)#&A^h<*,08R��������ɣ��������������ί4:f^.,?sf�k5-&-1&!&t�K$&'0EM6((+0Q���������������os����������]-]����������dT`ihWRRPPQRSTTTUSTTTUVSSRR@J[heofrb���E6A>AA?GNJ)(D-')(('%%$$#")Pgs������������������������������������������������~}||}xvutd\`^]_]N06O\adcdc`cx�}z|~~z|}{{}}||}yx{|zx|{{yyxxxy{yy|}~�������������������Ż������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���|y|{zwrqmkihb_[WZSQMJLLKJJIKGHIEFGIIJHJJIJJKHGGEEFHGFGHFEFHIHFGHIJHGGGHHHGFEFGGGFEDGFFIFFEEFDFDCCDCCBBCBAA@@@?@A@@??BBBDCCEDFGGHHGFIIHJJIGHIIGGFFFDEDCDCCBDCCABCCBCFGCEFEFGIKIILMLNOORQTUVWYYZ[[\Z]^^^_^a^_`^____a_b]]adedglmpruxy|~�������������������������������������������������xcenv�����������������������������������������������������������������������������������������������������������������������¿��hfb``bggfheebc����������������������¿�����nz����ytqmkigc__]]\[`eZK[����yzyxvtsqpolifffcaaaba`bjgT<+m��������Ƽ����������ȷ���TQOWtWY�ole.*;A:8<<87?SXWM@T����������ú�����������������Ƀ%;������������������������[6U0>��}xN"(6B@@C=<dd#Y3!"%%$"! &<8*#$$ !!!&*)=*$*]xf6(+/9Q��������ǭ��������������ͱ,?h[,,H����}?*,/%#&t�M"&%':grG))+.S���������������kn����������X0d����������cW^fhRPRPPQSUWVWXQTQRRSRQQR>=^lfnivj���8 ('2F8/'&'@,'&(''#!###"$&*MOGWe`ZW[fvtlrw{{���������������������������������}}zzyvrrpd[]^\\^J/4I`bcabc_d~�}}}|{}~{|{z{}{wy}ywy~|xzzzxy{zz{}~��~����������������ƻ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~|{}zwyxrqmkikd]^ZRSRSPKJKJJJGKIIHIJJJHHIJIHHHGHGHGEGHFFFGCFEEFDDFIIHFEGGHHGFHHFGGFEGGEFFEGGFEEDBBBBA?@>@@>>>?@@????BACCCDDEFEDGGHIIHFFFEDEFDDEEDDCCBBBABACBCCDCCFEFDEGGGJKIJKLMLNPPPPRSUWXVX[\]^_`]]^`a`_`aabbccbccdbdghkmonqssuwz{}|}~���������������������������������������uainu���������������������������������������������������������������������������������������������������������������������������ddecimkfccab`c�����������������������������j|����}upnnkhea_^]\\`dXIX����|}zxvqtqpnkigfecccc``abjhT9-m��������ź����������ǳ���PRMLvs�i0fg,,8;0.32.0?PWUN9T��������������ſ��������������$C�����������������������V=_,8Ycb�R!*6;;;:34YO "j- #%%#"! "*+%"$#   "&.#)=)%-V_G.*,05R�������������˿���������Ϩ(>eS+*P����j.*,/$ #,t�G#&%(S��F((-0U���������������nl����������W$3f����������fX_edRPQQQSWiofpk\dWYPSQPNL<?keogskt���5*<p�vJ#!*G-'()(($!!  !I8*./.,-28;529DCFPW[p��גpw{{}�������������������}~||{zwvqtqbY\]\_aG-2G[^`bbc\e}}{~||~}}{w{|zyyz|xx{|xvyzyy{|y{~}~��������������������ǻ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~�}|}{yyurpmjd__a]ZYXQQNKOJIHFEHHJJHIIGGGIHHIIHHHIGHIGFDEDEDDEFEFHIHFHFFGGFEEFFEFFGFGFFEFEDDCBCAA@??@@A@>>??BA@BA?@AAAACBCEFECCBDEBCDDDDDDDBCDBBAB?ABCADEDHFFFIHHIJJHHLMJKNMMMNPSSVUVXYVX\[[[]____dbaadeecfegfhghikkiijlklonjkmptwy{~~��������������������������������vdglv���������������������������������������������������������������������������������������������������������������������������WNURNR^`ZRZ``^�����������������������������k{�����|vsqkgdb`^]]_bbVHZ����~{zywvusqomkgfedbaa``aaieR;,l��������ż����������Ȳ���KSO;XlZ20_A&*4:69<744?TWUO:T������������������������������} @������������������������T7]&*25]d((4:;9720C?!#T* %&%#! (/'"$$! !#%-%,<)#&?NB1++.6X��������˶��������������ϥ.>gS,*T�kjP*))-0% 'y�D#&%&;QJ4&(+4Z���������������eq����������R$2g����������fQ]ebQOOQQUq����w�|kPNOOOI=Qbfimion���3!)Dz�vC##+@.((())$!!!!" N9/-++-///,*,++-/222Bb��filhijortvvwx{}�������{{yyyxtpqp\X[Z\^^G+2N]]^ba`Ue{|||{y|}|{|}xu|{y{}zyyz{|z{zyyzzyy||{}��������������������Ź�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}{yyvwwrroifg_^aXWVTRPNPLGLHFFBEIGHHHIHIHHIHGEFEEBADDCACBCDEEDEGFFDCEFEFDEFEEFFDDDDDFEECBDDEECCCABDCCBABA@ABBCCA@@@A@>?@>?A@@BCB@?ABAA@A@CBCDDCEFHJGGIIJKJIJLLMNLKMPMOQQRRRUVVXXUWZ[\]]]b]]]`aacccghighhikjjklkijiifhijjjkkruwwy{|}}������������������������vfejy���������������������������������������������������������������������������������������������������������������������������hfaXLS\`WJ[_\`�����������������������������jy�����}vsokjfdcbbbecWL_�������~{xvttnmljhfecbcbaadjgR:,m��������Ĺ����������ȯ���QUO@]T,5UU8'*8ABILFB>ARXWP4R�����������²}d|��������������{$G������������������������Q;\*-FDC<- &4?BA@</*+  D,!$""# ":>-"##   !#&,$&8)$@__`>-*/7^��������ő��������������ϡ+DhZ0*EJ)F9.('+4' !({�A#%$%7^bH(&)/[�������������ӽix����������S2m����������fRZe_LMNPOR|������nQPOLMG;F^shtex^���/")<s��>"")=+%(&'(%&%$$" I9/..,..//,,--.0//35=X��~jonnmmnnjllnknpjw�xwxxzsvurrvrqok\[\YZ\]E.4N]]aab`Uby}}|||~~{z~{y{zz{|zvwywz{|yuyxxxz|y{�������������������Ÿ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~|{yzwyspokkfda_\XYWTTPQPJJJHKIHGHKHGGFGFECCCDCBBDCBBCBBBC?>?@AA@ABAAFCCDDDBEFFEDFFEFEDDEFGDEEDFEEEDDDCBA@@A?>>=;<===@?==<>?>??>@BCACDEDEFGHJJJLMNNNLLPNNMPPPPQOQQPRRRTSUWWXWWZ\Z[]_]]^a^^cbddeffiijjjkijmmkkkljkjjkmlklknpqqutx~~��~x�������������ucdnz���������������������������������������������������������������������������������������������������������������������������{zzk^_cd_]``Y^�����������������������������fs���zvttpnljfbcdcedcaUIX���������}{yurpojjjgeffc`fkhU;*j��������ĺ����������į����NROUP8v�rS,,;D@AFIHBCPWWM9Z�����������¦z|���������������sH������������������������PCX'9z�{sK!$0869BH-#"!L- "!"$! (CI/!#%! !#(+:+(EZ`b<,-18b������������������������ϡ&EjY.)/2.*)+*'.<( ")}�D"%%(R��F&'*.X�������������Ѿjy����������R.q����������dQZf^MKTXTS^`ciqhiozRNNPLKE8/VT[Y[aj��*!*C���9$"'?*%(&(),<K='! N9.-,,,,/.-,,-,.//069X��~jomoomikinnnqpoksl7?KEV[SRTZ[__Z^YXU[`[h_`A4:N[\]_d`Xdu{{z|~}|~}}{{{{{zyywwwwyzytw{wz{zzx|��}������������������Ŷ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�{||z}xywtqplmhgda^[[VTUTRNMIGIKFJHGFDFFDCCDBBBD?@CC?=?==>?@?=???@BBBEECCBCCABEFEEEDFDFGFHFHGFEDEGEBABAA?>==<=<<<<<;<;<89:;:<>AAB@BDFIKIJMLNPRPORPSSRRRSRRRTTQRSTTUVTVSUX[WXZZZZ\^]\__`aa`b_bdefgfggfhkkjkmlmkjfccbcc`]`adbhkopsty�������������pair{����������������������������������������������������������������������������������������������������������¿���������������wtgWT`cddeba[^���������������������¿������er��xuspnnjigd`]]]`]ZXRHVz������~{wxvwtqpqrolijkjlnqcT<+i��������ƾ����������ƪ��ߑPRP[v`l�`ne,+8>500;EDCPYVN:]�����������Ʒ�����������������t!K������������������������S?T$,D2@k: &7@BEFD,!"!P-!$#$%"  %:@("&'!  %+ +8*&2JN:%'-28c������������������������ΛCc^-*>^cZVU7'2:' $'}�=%&(D^P/((*/Z�������������ѽhz����������P1z����������dQXa[P[�wmrqd]_WXXPPVXPVTNE1)3.87CDT��, )@��z4"!0H+')(),T��tD!#P;.,+,,-000.-..,/105;Z��|ipju����rmonnnporQ"&%'0.****-2/0029>LWZYLF;9;NWORYc]Udxyrruz{||z||}|~yxzzwy{zvuyywwxvwx{}}~������������������µ��������������������}}ytuvzu{�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�~}�~{|zw|xttqkkffda_`XWVUVQLNMLKIHFDGGEEDCCE@BB?@A>>?><===>?>@@A>@AA@@BBCBDCDCDHGHFGGGGGHKJHEGFFCBAA@AA?@?A???>;;;::;:<<=??A@BEFHJJKMNPPSSUVWVSUVWUVVSUUWUVXTWWXWZVWXXZYZ[\]^^__`]]aa``abaacacdcdfegfeeZ[cd^[ZXQZcdgfhllq��������������rdkr{���������������������������������������������������������������������������������������������������������������������������vdIM^dddefc`\d���������������������¿������fx���|xrmliifba^^]^\[WQETx��~zywvssnolplkmkiigefghjhe_U;(n��������������������Ĩ��ސNTQOry�V2h`(*6=5./:KCBS[XR9_�����������ȼ�����������������p"L������������������������MBX$,@?BL!&088<E?,!#&V4#%#"!  +@8%"'("   %+/;'$<rZO<0/28d������������ƿ����������͖GeX,/\�����?$/2$ "$��? %%#3IO7(')2_�������������Ѷez����������N1{����������`LWaVN{�����������xx�urwt[E.&+++-2;?��*"(B�a:"6G,&''*J���xN#(O;.+,--/14742....015<\��zkox��¾�ynrrnjommL(%),+**()('&&&$#$(+!),+**+2237;;@ALX[Z\]gv}������}}�{yxvvxz{ztwywxyyxx{{z|}����������������Ĵ�������������������UGNU^obRQY]fo|���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������||~||||zzxwystropihec`_YWWSURNOMLHKKHIHHGEEECBCB@BB@>><==>=?=?@?==>>=?A?@DCCDEEDFEIJIGFGGFGGDFDEEECDEFB@@?@@>>><=><>>?@@BDCEFFGHKMPOQRTTRTWYY[[[YZZXY\[]\\\\[ZZZ[Z[YZZ[YZY][\\Z[][]Y\^__]_^^```][\djib^\\Wkmpprrrv}��������������peiq{���������������������������������������������������������������������������������������������������������������������������WOTjqkdefgc`]c���������������������¿������d}���~wtmjfdda`^_][Z\YVFT����{yyxutqnmnkjjjfecbdba_a`^S;/l��������������������ɨ��݌OTL;JVD,6W6$(3><<?EJ=@T[ZO;b�����������¤���������������̼n#O������������������������NZ`"@{zp\8 *;CAAG<%" .]5!"##"""'65( %&"  !"&,1:((@U_dUE836i���������ԥjo{����������ГHbW,/Jfz��o.&//#!"&��:"&$(U�v:'(+0`�������������Ӹfz����������M9|����������aQU_UKw�~����������������aB1'**+,2;E�߻)#+@WAa<!?M*&').LZ"$PD#'I7-++,/18DMJA1.//028;\��{mp����ÝlprponnlpH%$(++*.5/)'%&$&&&('$%%'''%$%'$$&&&'&*+0Jdz�������������{{yyxxxxwxyxyy|{}}~����������������ô������������������w40Ja��nb]ZUT\q�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~|}~}|xz|xx{vutspmigfa_^ZZVSSRRQOKJIIIGGFFEFEDBCB@ADBBA?>?@@@A@>=<AA@CEFFFHIIIHGHHFGGGIFEFHFFIEDDCBCABABAC>@?AC@DFEFFFEDFHHILNNPQPTUXYZ^[[Z[\^^`]__ZZYZYWWYZ\\[V[\[[[ZZ\]^]^`abd_cddbijfhjsvtnjegnvvwxxxy{���������������rdho}���������������������������������������������������������������������������������������������������������������������������Tdppf^aghgc__g�����������������������������cx��wsqlhdca__\YZYYZVPCQ����}zyutrnmkhghhgebcab``_`_\O7+n��������������������Ǩ��܊NRJ6/2),.-($%-6@GJJA7=SYUK:e����������ƿ����������������ʹh"M������������������������KMK 5^Zl~A )8;:;6'  5X," "!!#" -CH-""$ !$(.4)%1PVY_[I:5j��������ԩb�������������͑JdV,*0@bl8@*'.," &��4!$$+FYA*&)-2c�������������ӷb|����������F4{����������`OU\UJOYPR`_X]cagps{����~P@.%)(+-39F��&#+Cn��:"
+CI)%'),?H7"AI&H6,*++/4?ZfbP6,,-117=_��uir�Ļ����������poE%%(**,Ki>*,)&&&'&)* %$'&'*.,,,,++*&%&$'5Lx��������ú����~��~}}|zxvxwxvvyxrsuw����������������������������������u18Po��omleeefp�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}~�}}}zzzxxwuyqoqmidda`^\WVXPRQPMJIKJLIHIGGGIFEFFFFGGEEDDCGEEGGFHHGGJEGHGGGHGFGGGIJHEGGEFFDEBCFDEEDCBBFEGIIKKLIHIJJKKLLJNNPQSSSTVTUXXXXZVY[XYX\\[Z\[^`^\bb`_bcegggjjlopmnpqstvuvvw{|ynopqvyzz{zzz|��������������qcip}���������������������������������������������������������������������������������������������������������������������������u~ymhdfghhf`^h�����������������������������ez��ytrnifc`\]]ZYXXWRLFU��|zwstpnmkigedbeba`a_^]^]\ZN:,m��������������������ȧ��ׇQQJC[cYTUN;%#*18@A922CVVTL;l����������ǿ����������������̹c$V������������������������FGH'Ftpx�9$-7:85) 6I+"! !""" #.CC*#"# "$49(&;q^KDY[I8q��������ǎ����һ��������ː LgX-.Z�����?$*+ !(��5##$(5LU@('*3f�������������Դa~����������F0{����������^NS\SDBCCDBFEFFFEFFQTNJTPG=+$(+/06<D��)#,O��z5!
+<B'&''*>ed\w[&&H5,+,-.3EchgU6++-/28<f��too|����������ǝpoE#&)*.:ywD7802,&%%(&%&'&)9EBBAAC@:,&%%)3Ds���������ɠ���{~������|wsicZW]XPPYXj����������������������������������t0<Ps��oonjikfq���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~}}{{}yzyxwtrstqpnhidb_]]]\ZXSSQQPMNMLONKMNNOLMLKNLMMNLLONKLNLMJGHIIJIIIIIHHIHGGFFGFDCFEFFFHHFJKMNOOONOQPPPPNKKLMOOOLJJILLLRQTTUQUZZ^`cdhgikmlpooqqrsuuwvwyzz{{|~}}~������}urtuv|}}~~|}~��������������}mclt���������������������������������������������������������������������������������������������������������������������������nqjdeddghjhc]b�����������������������������i������~vqnjedb_\XXVURNFX��|}{urnhjghfgeab]\]]\[ZZZZXN8+q��������������������š��ۊKUKRrxww�wJ%%,7981/,1BTYVN=m�����������½�|�������������εb Q�����������������������EBJ#8TRju<!+DK@=<2" $1F-"  !!#! "*;?,""$!! $%89&%B[[O7;YD:t��������µ��������������ɏ%NgQ(.g�����0)-- "*��5$#%,Q�D'(+1d�������������ն]y����������?)7�¢�������_NRZQCDABBCDBAFFCCCDDDBDDC<*$'/IIEIG��%#'J�p[3 @B*%$',Y����f"&I5,--+.3H[bbI1,+,.28;j��nppez�����������ksC &))-=s{��z{y3%$%&' %'()1HVUY_cfjW1&$$(2@v���������Ǔ���vy{||z{}zqO<>:>JVev�������������������������������������m(;Sv��nnmijkjt�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|~}|{~zz{z|zxwvtvpomkkihgd`_\[ZZYXXVSQSSRRQPQPSSTSPRSSTSQLOMKJLJILNMMLLNLJHGDEFEDEFDFFGHGGHMNPOQPQUSSSUUQOUSSTTRRTPPWYYZ\Z_adgjnpprttvwyyzz{}�����������������������vsuvy~~}~}|~���������������}ncmv����������������������������������������������������������������������������������������������������������������������������`ZSRUNVbghigaa�����������������������������g�������{wssnjgdggd^WPI`���������xtromjjhfda``\^^]]ZXK7*r��������Ľ����������Ƥ��܈LTJ9AC3ZkfB"%0<@B:3/2@SWVN<s�����������è��������������ͷa [������������������������F?D#B{x�s'!5D=2/@:% ).E0" !!!" ! %<@%!"#!" !&(23'!E`XY3*284u����������������ɿ������ˎ$QlU,,j���xe$*2- !,��4$$%*FeW/')+1g�������������Դ[�����������Q\i#=�Ģ�������~]FPXP@C@AB@@@?CB@CAABAACBA;)%0[���xP��&#%G�{9"A?)%&(*Gq��s7)J6,,-+-4BUXM2+,+-.16=p��lnt��ο�������hoorA$&&(-:g�����+&'&'&!"$(*7OefhgnhhQ,$%%(1?x���������ō��{wz{{z}yyxgJVgr�����������������½�����������������������f&=V|��nnnrplmw����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�~}�{�~z~|}ywwzyxvuuvuxtqqnmkighehcba`Z]]ZWXWXWYXZZZ]_b\VPOMNKJOOPRPNLLGIHEHECBFFDFCDFHGJMOQQSUOVWW[\^abbcfdehihhikoklputuyzzz|}~�������������������������������������ywyz|��~~~��������������~khqw����������������������������������������������������������������������������������������������������������������������¿���ihdZ`\[ahjkjef�����������������������������c��������}zvspjjhikkg^QF[����������}{ywrsolhgiikljhe]N<.t��������ž����������Š��كPSJB\WTv{mR*%(3AOKB<7?SUUL=s�����������į���������������ǲXY�xu~y������������������~A;B)FZD>,;=<=D: !5I.! # !#   !''#"$$ "(*97%.`eOA,+/57|����������������ǽ������ɉ#PiN)P����t8&*1* !(��3&'&&*$"'()-5m�������������ϳZ�����������h�i+B����������~`GNXO=?===>>?>A?>A>@@@AA@@:'%4cw}�tF��%!%S��l+ :9('''-]��e!+Q4**,,-5Fc`K1,.,,-078m��jrs��ȿ�����iomo<"%&*+-07HlvuB%%%&(% $$',;Wd_ZWUQK<+&&$)1?}���Ɵ������{xxxzz{zy}y�����������������������ã���������������������^*@Z}��ip��wmny������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~�}~{{{yzzvxzxxxxvvuwwstqsqppokjhfggffhihfgjie[PNKIGFIIMPPNOPNOKHFC@?DGEGGFFIKPSVXYZ]_`cdgjjlpqrstvvvvwxz|||�����������������������������������������������xx{{~�~�~~���������������lhnv����������������������������������������������������������������������������������������������������������������������������~}|z{trnnpokgl����������������������¿�����f���������zxtqimkklmkaRG`������������~|zxvsnmmoqttsuteS9/w��������ö����������Ğ���RRJHvqyySub*%$,@UTPG<GRTSJ:x����������������������������ĭVGW<568=CJIORXabeoppxx|{{o4>F7]YRE'&4;?A6'4D-!"!!#%)(*)$&)''! $),9;((V^^Y8-068���������Ư�����ԯ�������Ɇ&OeT-p���}R(',,!$/��2%%'(+//,)(-9k�������������ҰV�����������c�U+@�����������bILWN?><<;=>>=?>=??>>===??9'&-/Mk�mB��&#$Fd[n5!=7()))/\�h:#! ,S4--,*-7G\aL3+,---39=r��jqk�ü������Џlpqn<#&(*+,/_���n?'%&()&$'(+2CNUROJC?2'%&&*2?���Ѫ���ν���|zxyz|zy~�������������������������Βz��������������������V'Bb���s���uloy��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~}~}||}|||}}~{|}|{{{}|z|}xyzwy|~{wutsrmZPMGFB>GPVVWZ[]^\SOJEA:GKMRPQLLPSZbceeghmoqsuxy||~~~���������������������������������������������������������yz}~�����~}���������������{lhlv��������������������������������������������������������������������������������������������������������������������������������~|wssspnii�����������������������������e���������}ytplkiimllcPI[�����������~�~{zutrrqrqprttpdQ:-y��������Ÿ����������ơ���~RPF8Idynd�Z)&)4EQRLA6CSVQJ7x����������������������������Ʋ\ AS1(**-,-)++.0/247638>?>7>@ [�pra1&4<<;4*6G,   !$&4JLJGA<<:2%#)0;8%#AisX-./5;���������ͤ����Ϸ��������ɀ$SmR,c����S"(&.3"$/��+%#&,IeR2))*5m�������������ҭV�����������k�@#?�����������^DLXM@<9;;:;=>>=<<<<===<==6&%,\q|�`B��#!%Ca��)>5"&)(2_�v_- /S/+,*(+3FZ\H9/-.-.28@v��dqlm�������Ħpflpm8!%%(+/O����vM-'((*(#'&+3Qa^aRC/('%&&&*3?����Ü���̼���{yxwyyyw{�������������������������τs��������������������R(Ic���x���ngmy�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���~�~~�������������������weXYVSIAVaabffjimh]VMG@>NSWWVTROSYbgklqtwx{}~������������������������������������������������������������������zz}~������~~~��������������zignx�����������������������������������������������������������������������������������������������¿�������������������������������}wvtrpnjm�����������������������������l���������|wvqlkjiomjaQJe�������������~}yvusqrqrqsutrdQ=.y��������ĸ����������Š��ڀPQD62=VgoY*%&1@GKA6/2DWWRK;y�����������è���������������ɲZEU6*)')**(*)*****,)*'(''#E> `m?ax5+;@KSA*>J*!"!#-Ly|zusnnbC& !)073%$)>5&56068��������������������������~(TnK'[����@()(10!$1��0%%'3Y]A*((*4p�������������өZ�����������l�8C�Ǧ��������ZBIXJ<:99;:;@HDDD@=<>?><<:2&'4q�{nUE�� !%/t�S!;2(&'),c��`#! /M/**,,.2DY]WH6,-,028Aw��cqr{�����������zmo6$$$'*5X\."7RT-%'(('#%(,:PXW`WB*$$&'%&)3@����˯���м}��|ytnoqvuliy{y���������������Ĺ�����|v��������������������Q%Kd���w��wbclw�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ukiebYaggljikmnrk_ZTI@CRYZZXTSRX`jmuy}������������������������������������������������������������������������y{{}�����������������������ydgrz�����������������������������������������������������������������������������������������������������������������������������}{zxtsqmih�����������������������������j���������{vtpnlklnlj`SH_������������|wvusrrrqsuuuqhM;,|��������ķ����������Ɲ���zMPD2+..0+&%$'3;5,**+1CWWSK>y����������ȿ����������������˴THS0*)&''&'''()((('('&&$$ QD a���i#'0=OE0#AG*!!" #1Rqyv|�|{xJ%!   (+38&$%<<>M>279����������û��������������w)TgF(r����<()'.* !,��,&&&(,3/+&(+4r�������������ԪX����������i2-4H�ȡ�������~U=HWE:87788<Jq}yspY9=;99:91&)3`�pi?H��$J�o]. ;1'$&&-[}z`# !2K.,+,,.4Obj^D0*++.07=߿dks�������������nh0#$&(+-Xgnmj}`'$$')&$%%+9KLNZY;)&%&%$%*3B���վ����ѻ}��{wgUS^kv]Pwsqtlqrtv|�������Ёs�����}x��������������������I1Qi���es^]\iqy������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ypmhfegghkllloqum_ZQO?DW[\`YTQT[fnu{~������������������������������������������������������������������������y{|~�����������������������yihpy������������������������������������������������������������������������������������������������������������������������������}|{zwssokhk��������������������ÿ�������l���������|uollklmmmj_QB[����������}}ywxurprstvuutpcM8,���������Ŷ����������Þ���vLPE6;<72,,'$(/1,++)+0CXWSJ?�����������Ƚ����������������ϲQEO1(''('%%%&**(%'((&&$!! $dL$CC'0'",>NM>7*9G+#  $*9CJXn{jezH%! "#(&>8(&KcNBG=88?��������Ըm���������������s*ReD.q����L,'$** "1��1%&%-OcP1'),2r�������������Ө^����������~q~=Ooh_�Ý�������}P<FTA678667?k�����w3:766882'(6o�f7J��# $Cm�q-=2'%&'5p��c!""5O/*+*+,3I\cK3*++,/28@��aop����������çtog/!$&',/T�����0%$"&)&!$#%*5JPPUO@<92($%%+4C���Գ����Ϲ|��{wbK@Oi}[f�quzvrtysqz������Љ������|~��������������������OH]l���`idb_^ihhpvheimtu}��|~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xmihhhgfijlnkottj^YRMCJX\]]ZYTX^ir{�������������������������������������������������������������������������}����������������������yijpz��������������������������������������������������������������������������������������������������������������������������¿�}}wvvwtqoljgj�����������������������������f���������}sgffklllkh_PI]���������|vwzupqrmmnqrututtp`N6.~��������ö����������ę���xMRJLpqpgc^A'(14154221FXWRHC�����������ɸ�z��������������ͭNEN2)(((()*((+*()*)'&&#"! #]]!)RW`hL'&/5749,5E,!$#!""4eoJTqG!  ""'+D7(+]{pUNME67��������Ҷ����������������o,]nH-_����?!'&+/!!3�($$%)4LH))+.6t�������������Ц\����������|oyF{��j����������}Q?FRA557647<n�����u06665671$&0=AfH<P��%J��T(C5'%%)-:Sud>9"4O1))*+-2DZW;**++-02:A��\pw��������ϴwiind."%%,23fryY(+)##%*$!"$&,=W\YZ[Y]XE+$'),3H����¸���ѷ}��yxuOKQjwWy�j����zvv������͜������y��������������������cZ^l���jsnlkmkK286+48?D4fiMWfouprvty�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zolkighiikmmmnssg`ZUOBIZ^b_[WRXalu|���������������������������������������������������������������������������~������������������������whgn{����������������������������������������������������������������������������������������������������������������������������rpnlmlnjighjdh�����������������������������i���������~ogdflmlkki_O@b���������rqpromnkihglptssutpeN66���������ò����������Û���tNTJPky��~S(*7@CGHEA=HZWPK<�����������������������������αMLR.)()**1212.1..0**''$## )��Si�u��_"" CK)!  " "8lsBWtE#  !()F:$$8X[e]eX:>���������ü���������������m.]iH,?ox��F1%&2.#$0�}-&%%,Rsn5*+/:u�������������ѥV�����������svA���j����������}T>DJ@424437?r�~���s1555664. &-1??;BO��"2=+# C3)&&)0f����\#:N-*))),3G^WB.++,,.39B�ߵ[mky���������goqoe.#"'',:]��{z�m%%$')%#"$(,8NULW^[XW9'#$'-5L����ţ���еz��zvbESSrySeqr������zvu������Σ������~���������������������lbXn��xdpnlkkeE61/4GK:.#36'/.146=;EQ]`bn�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������znlmihihjmpokmvujc^WPFL[\a`]UU\dpw~���������������������������������������������������������������������������������������������������vgis~����������������������������������������������������������������������������������������������������������������������������PJDJELHDNUbifn�����������������������������h���������}pifeknnkjj_SAd���������urnonkkkihfhnstvutpcP75���������ö���������������tKQE4+:\YUzY'->DDMPMMAGTROI@�����������Ⱥ����������������ѯMHP/'('-8WaVCYWKFKD3*'%#%!3��>(AA FF)" 5qoB\uC"#,*Q?$$Df\]_=9=��������ּ���������������d/XjC)/m���fE($,) #1�)%$%4o�m/'&,7w�������������ӡX����������}uo>��l����������}T;@F;211335=t�~{p�m,312442-#'4]wpwnV��!!!&R.=2)''(-d���j+=Q+)(&(+5L__B.,+,,.3:C�߶[nr���ľ�����|lnlb&#%(',;������_!$$&)#!#%(-9PG@[VC0-'##$'+4N���Ӹhb��αy��zvcYY`vuY]pt������wtu��������������p���������������������lb\h��vdorljidH611NoTF,".9-+,+-7B77/,00@p�������½���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������znmljijijkopprurha\WOEK[^ab\WV\cmy���������������������������������������������������������������������������~������������������������vghr�����������������������������������������������������������������������������������������������������������������������������bYSKJQVUS\chel���������������������¾������f���������|riggjlkjii^QCd���������xsrolkljhhhgnsuuvuocR<=���������·����������Ô���xTRB1+5X\s�G$0=>@JJII?HWWQH=�����������ȼ���������������ѰC!FL0***0Q��ja�wq|{jJ+##$" +hUFQcpJ$!$#',(  JI-" "7ti>[rE""+(!bN)#9rmVK;79���������Ű���������������e/ZiC%2k���n(&$-,!#8�y+%&&,CVC)('+6w�������������ϡV�����������xrA���f����������zP;AF9101335:v�Q5d�\1222330+ 'A����aR��!'n+! 7/&%&(3u�k<?V.+*((*3IYX7-,,+,.29=��cpg��������ѻxipp_(#$&),2SPPlaf;!#$%( %&(/AW`mj[?&##"#%'+8V���Ҵ����έx��z~fPMYpr][qz��u���pox��������������o���������������������k`Yr��neoplkicC208eh^_/"8;,,,,9Xmld8/.15Z��������������Ļ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ypnmkjjlklpqpqxukb[XOBQ_`cb\WU]diy����������������������������������������������������������������������������|}�����������������������shit�����������������������������������������������������������������������������������������������������������������������������g^VJR^eedgkjdm�����������������������������g���������|pgfgjiijji`Q?i���������wqpplklhhihjptsrtsnaM8:���������ú����������Ŕ���sUTA/.1FevT$"-;>EQKKF=HWVTG?�����������ǹ�~��������������ϫFLI1)(+7T�}jmotyyv}O*'&"!!!@2&h��yO'/00BF9)LE,  !! $':^Q8bm=" )(%zF&$>TE[bSA;>������������������������Ͽ].^kN%5s���B!&&.-!$:�w&%$&(EWT8*),8y�������������КS����������{tl;���e����������yI;?A81.11139[Y74[v5121022/+");cU-<:V�� #$>) :0'&$&.XA\O BO-))'()0CVR=,*))+/26B��Zln����¾��Թwjos\+"$'(,3[}��kuV"%&%' &&&->R]UMR6# !"$$'+6S���ҽ����ͪ{��xrK;PexrY^oz�wo���vwx������������ܿg���������������������h^^q��oiopnkhbA1/>xbqf-96*,,/D��m4/11:^�����������������˄nwwwy{}zy{������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ypnmklkklnqooszwkc[VMAT`aa`[WU\dp{������������������������������������������������������������������������������������������������������ngnt��������������������������������������������������������������������������������������������������������������������������¿PSOVdiigffhkho�����������������������������i���������|riegjjhhie]MAh���������uqonljlkjhdhpvvutrmaP;;���������º���������������pPQA5?TRY\I5&'4@GKD@92JYVTGB�����������������������������өFLO0-5B@Kbfc^puhuys9-+($"I87V) (48@H66.G?+" "%1=<@@4=ee7$"('$Q4#"%;E9IJ.4=��������Ѭ��������������пZ6dzQ).RbpN)')0-"";�r &$%<q}o8*),9}�������������МZ����������xvo<���c����������xH:=A90.0//.315<8243211/10.*"';jeKC=\��!#+N]X-?-$$&&-Gk�C!!IX*+)(*-4G[VA/**++/35D��bmo��������˞omouZ("$&).=������["%#$'" "#&*4OWRWH/$$#!"$',7U���е����Ψw�~ypYZiovvZbpw�e[���{z|������������ڸi���������������������g^aw��kfqpnmgc>10AjT{N*$52)*,/Dw��i.,.18c������������������{ptssontmgqurs����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qqommnmlmlqqrswumd\YNEU^`bb]WX^er|������������������������������������������������������������������������������������������������������ojnt~����������������������������������������������������������������������������������������������������������������������������hlotuxumgfhgam�����������������������������i���������|oiegkjghjg^O;e���������srponkijjgginuvuuvo`O9=��������������������Ǿ����sSTDJ��{}�t?$$.<?<:679IXXQD?������������ɾ���������������Ϧ>HN,4XxkV\dVH\WNYhH:?<2'# J;*:&.:@3'-'DB*" !"-Unijhdfgd6 "))?0'$&FY\DGLJ?��������ѵ������ɮ������кU9lyK*0r���;&&&/, "8�t#%$$-DW<%'(,<z�������������ϙ\����������w|h9���c����������vD7>B5-,./0/5FrtpheM./////-($,@p|wL;Y��"&\��v)=+#%&&2a��?  LQ*)()*,3=SVC4*')+/4:J��]lh_]agcaglthlpqq\)!#').8`XKXlo>#"$&% !$(,>_cc\L4&$"!!"%+5U���Ժp`��΢x�|zr[OU]pwV_nt�qp���vwz������������ܸb���������������������g]^s��{pqrrroc<//7UYn6) -.)*,0I|��b/./4:m����������������Ǽ}rtsrmoqchpomi��z~����������yruyzy{�{|���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������vrpnonnnoqqqptxvlb_XICX_`bc^VX_gs}�����������������������������������������������������������������������������������������������������pjpw�����������������������������������������������������������������������������������������������������������������������������}�}{{zxsmkig_q���������������������¿������g���������~jeeejkkklhaP=h���������wpqonnlihfggntussunbR1G��������������������ǽ����sSUPo�L4<D?*#&4?DNJGDAKWUNHB�����������������������������ҩ<"MM/<r�}v{urv{}]n~jhibL(#!bT+9-$&43,)(#UE)  ";l�|}~w~j9!!)-L4((\zbTZmfK?������������������������ϽQ=inF*3t��u5#%',+ $@�w$%&&(=JF3'(-:u||������������yS����������w{c3���a����������~M5<?2.-/..1:x�����N,.//...'%6`��uB<\��!!B}�b%<+%%%&1f�J,NO(('')+4E]_L6+)*,.2:K��YklklmnnpplkooooqY& #'*-3OMCBGXD%'&'&"$'*/DW_abV5$$"!##&)5\���Ѳ����˝x�zwdSQ[vsT_mt{v|�yqw�������������۲f���������������������b[^{���ty|vh9/./LaH1(!,-+++,E��|O,/04>v����������������ǽsvtslllhpplrl�������������nI?CJRW\adhrqqu����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xrroonnoppssqu{wkbbXLEYaced^TS`jt|�������������������������������������������������������������������������������������������������������qioy�����������������������������������������������������������������������������������������������������������������������������sxwspssqnmjgap�����������������������������j���������~pfdfimmmlf\K<k���������yqqomnkgihhipuvxvsp_M6E��������������������Ⱦ����nUTNqe."$&)&"%/7FYSOICIXVQGD�����������̹����������������Ϣ6#NK1>k��sqq�{�sf}�n~��T'" )wD)ZnO30?<9;7#UA) ""!6kuget{qqh2"!(("O2$*N\T)%O>;C��������Խ��������������μJ;_eE%4i��xC$&).*!A�r(%&&>p�i3''+3rV>oknnkpmtxv|~KQ����������w|cC���e����������}K4<?5-,,+-/>�~vh/,,+.-,,'&Xx�}gB:]��!&[��\# >.%#$%$%PU(()()-5G\YF1*+)*04:J���blhilmonpmporonopV( "'(,;ui_ew�P!$%&&"$&',?Z`caP/"!!!#$&)6\���Ҳ}���˛w�xsZMV^rqQ_mjfdcdorv�������������جf��������������������d]cw���s����ug8/07KNB8'+-,,+.M��}X-,/3B{����������������ǻ{twssnlnnqqmnj��������������f52525526<=>DHNk�������xz}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������vqqnnooqqrtrquzvkc`WIG[cbcg]VZ`kv}������������������������������������������������������������������������������������������������������qjow�����������������������������������������������������������������������������������������������������������������������������jrn][kpopokg^p����������������������¾�����h���������|phfgjllmkg]K<l���������usrrpmkiihijqtvwwsobM9B���������·���������ɽ����qQRHffK4-/.)"#(5FXJJGAIUWUII�����������˺����������������̡6"MM/8Ua\Dgo_bplewrkzvtI'$ $GBLl�{cU/#18748/Q=(! #7ho=CnlJdi7  )')F1&EaNDC<07N��������О��������������ϼG9`h@&3o��f='(+1(#?�q&%''5QE7&&'+6uXBxwxxuwwutuumGQ����������|}_B���e����������~J6<>3+++*+,;��<(+*+)+,..+%)Um�swL>e��"(M`cC#":,%$%%(<QA?;'UN*')(*+/7==3+*)'*.4:J�ݗcmjljmpnnprpppnpuT&#$'))1:,7~��2"$&&'#%%+:UZZZL0#!!""$'-8Z���Ϫ����͙w��yjCGQRuuUcmkfbchnsv|�������������׫e��������������������}c]^��������wb:11<JOJ:')++,,0S���M/,/4@�������ä��������Ȼ|ousrnlqqprplo�������������g:434554365676=Z��Е��|nssutw~�������}z{����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������vsnnpqrsqrtsqt{uld]XDA\aeff]YW`jw�������������������������������������������������������������������������������������������������������}njpw�����������������������������������������������������������������������������������������������������������������������������mmfX]inrrokfbq�����������������������������g���������xidegjmlljh\K>l���������wrpnmkkhgghintwwvvqcN9@��������������������ʿ����rOQDRpvsed[7#&3AHXJKB9JWWUDD������������»��������������ȝ2"QL**.**+44.71:>JQIKDK2##!<Yy�uI�{5",6776,Q?'$6gj;Bq`:ek0 *''D/',\bbYI94;I������������������������εC;]sE%+EN>=,*))0($;�o"'&((.AF0'&(2tYDuuvuttuwwsuo@O����������u}]A���e����������zC5:=1,++),.9��=*-.-+*+*+,+&)A_o�}>:d�� !@��i*!1)&$'(?�����6PD%%'()+.1773,)))),1:L��Xjfk������yvolnsO%%&')+-5m�ȓ1%%#%'&$$$.Jc`_cW7#  !#$',8_���˳����͖z�~xpU^VWypMenidbcgotv|�������������ئc��������������������{]]g���������c:11;GJJ9' *,*+,0X��i?0..6@������̴���������Ƶ{vtsomprpppnnp����~~�����~a8344444356568;^��Ȗ��zla]\[`egs���ǂbgiotw}}}������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������tqonnprsrsurru}ukd[THE^beff^ZZaly�������������������������������������������������������������������������������������������������������|lioy�����������������������������������������������������������������������������������������������������������������������������ga]^^cgqrnjf]n�����������������������������m���������wjgcehkmmldZKBp���������vppnmkhihfdhnsxyyvscK2I��������������������������oOP@:q���g2$'49;F@728JXWSEG������������η{��������������ɛ.$XJ.**(')()()),*+-*()+'#!":-0Eo�{B 0>DMI3S>(%=on6@nZ>jd3#**'C/'*@L_\[LB;N������������������������ҷF@hvD&'+-1)/.)+-) #9�l"%$'3ZpX0&&)8oWFxwwwuvtuutulBO����������u{]C��]����������yC5<?3+*,,*-:��-,,,*+))(*++%(6?x|fI@i��$&b��@#1(%!%)@����m%D@'&&'(*0<OM@0))+*-3:O�ގcnl������������nrN#!%'),8v��kAT@%$$%&!$&)/D\_ad]5!!!"#"$,:h���ԣp���͐|�}woUSG[|mVdid``cglnt~�������������ܦa��������������������u^[i���������b6/07CII2$#--+,,2Y��sK../4C������Ѿ�¤������ƴzvtrrlmqpmnljw��������������^6234565545579>c��ș��xeLGHKIQUt����nV[Xahnqqruu{lcdkpnsxwz�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������tponopsssvwsrw}wkd^TJJ]bffe_X[`my�������������������������������������������������������������������������������������������������������}nkp{�����������������������������������������������������������������������������������������������������������������������������VLKOKZfpqnje[p���������������������ÿ�����~i��������wlhgggimkifZI<l���������xvtqoolkiifiquxzxvraO6J���������ó���������Ⱦ����jOOCD|�vXT<1$%,1:F;504JWVREJ�����������������������������͙1%TI)'*((&'((((('(('''(%%$%E*2[P,'1FQ9&"T<&&>ii3Cu^@k`5! ()%=.%!RgTYVZUCO��������͠��������������д@AftB'(.BLHFB0-2)B�l #%'*0HB$&')8qTDxuwwrsstvvvhGU����������yz[C���\����������xC3;?1'())*,/UC!,-.**)()**)'5=;WbkD?k��|#'Quua+#1&%%&(2WH1A_$CB+'&(+-7Qe]J6*()+.3<P�ڈfo���¿�����Ȼ�yrK"!#&)+7`jVP`d2""$&&"!%(0DUX]^I*#"" !"$+8l���Д}���ɑ}�}zvaOTsykWae_\cfhikr~�������������՞`��������������������u]Sg���������|`3007@F=-%$/,+++1Z���A/-/4E������ϧ��d������ȴuusqmekpomopnz�������������a4135556543479;e��ŗ��whOHHLNORt����w`[X\akqlkllhI6=@FHSUX\_dn��~txvvz|{~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������snmoopqrtutsqw~ukc]TEK]aggf[VZcoz�������������������������������������������������������������������������������������������������������|nkpz�����������������������������������������������������������������������������������������������������������������������������romeckrromjc[p����������������������������q��������~xsnjhihikkf]N?n������������|xtsqoolltwxwuwpbM5C���������·���������ǽ����cTQLb��w��rE$%0=GPG?:;LXVRJP���������������������������Η/&UI(()'&'''()'&&')+**+)('!'E*%n�d\D%$1CJ@7-(^>&"=ec3CnS;je2!!*($;/(.R`bbZ\`?R��������з��������������Ӱ6AgmA*.Fn{rlcE73)C�f%%'2WcV/')+8sUQtvvtihjhpwri>R����������w~YE���e����������uC0:</&&(**,&4UUSPH4))(())*,B?8`xyK=k��y"(i��v& #2)%%$'2w���bD@(''((.=Wa]U:*()+/5;S�هglxſ���Ș�����wjC# #&%(*'%$&-6%"!#%%!$#&+27686-%%&#" !#*<l���ȟ����Ǎ~�|yvZLZlulTdhijlkikou��������������؜e��������������������uXXl��������~_71/4AXD.##1-,*,1]��o=..07I������Ϭ���������ƭqtsoknuxqoonm�����������Z313445655457:=i����xjULMLKPRw��ڻ|g]VW`immnlhbB=;8;=9<;><BU��WDKQZ]_dijpw���~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������tqpqpprrtutqrv~vjd\TDK[bdfb[W\doz�������������������������������������������������������������������������������������������������������{mgp|�����������������������������������������������������������������������������������������������������������������������������yuqknqrmlje[l����������������������������{o��������|xsplhjgimlg]KCk�������������}ywvtupsuuvwwvqaO5M���������ü���������Ǻ����]RPD<<[wqiuC%+9?;<@D@=MXXSGJ�����������ɲ����������������̘.)SD)'''%&&%&('&')...,,*'& 'H(:|WBb[G0'3956,$S:&"<aS-/;.5fY+"*'$>/(#GUX^]WP6V���������ر�������������Ь8CglA*?l~qhbfVF<-!F�f"'&(?hjO'&&'5qQPzxsob^``osqlCT����������wzVG���b����������wD0:@.)'('(+4x�����@((''(((&86Hy|tECn��y $`��[  &5*%$$'.o��s&KB%'((&+:N]\N2('(+06<T�ԁ_l}�ÿ�ɸ|���ͧnmA"#$&))**/::,'$#!!%&!##$)1:<965544,$ ""+=p���Ȭ����Ɖ~�|vh5DXevmZgjoporrqtu��������������ؔc��������������������rIQm�������z|~\40/<h�X0#$3-,+,1[��tF--07I������Э{~�������Ʃurspp}��vpooi��������}~���|X2113445446777;m������|mUMJHFMR}��Թ|maZU^hppnlg^A<;9;=::;;;@N��=485446:;?I_w�icionot}x{~~�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}sqppppstttsnqv~vjb[RBJ]befa\W\fo{�������������������������������������������������������������������������������{~����������������������xkhov�����������������������������������������������������������������������������������������������������������������������������]^ZWS_gnomicXm���������������������¿�����wq��������~xupkjkjkmkf]HBp�������������|ywssrrrstvxupcM8G��������������������ȸ����dSO@4K}�etw0%0;6-02BH?LYXQHQ�����������ʼ����������������͖*&UI)(''%&'%&))*,./.*'&%%$0Q%A�fb�tJ&#.432'%O6$!-KK>914>SC%#+(%A.$`mULS^XFY��������֞a�������������Ы8Cdi?1Yy_=8EQYVL4D�a"%$&)7B9$%%)9tQOxutodkrpstuhAT����������xxSL��}^����������vE0:?/)((&'*5q����r0)''&&)'!#,4Xuxacu��t$`zxa)+;&$#$(D���R;Q@''('&*6J[V?+)))+/58X��~fpd�����������nhk<##$&*+-Ar��tF(# !%& #%&.K^[^]\\YO8%!!#+9t���ϭi���ă�|weN\M\xlWgmoroqsrvu��������������Րc��������������������i3Qp�������uyyY411@|�P*"$2*,.-2]qvw>/.05M������ѱ|�Ĺ�����ƥtrswz��~zrpnl��������}}���|V3223553147765:n�о���lULJJKNS���ָ�rf]W\hrqmgfb?;;;;GQH>::@M��=8675654678Ehd=DAAHOURV^bit}���}u{||��~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|tqpooqsuvusoqy}uhaZRFOacefdZV[gq{������������������������������������������������������������������������������|w{����������������������wfls|�����������������������������������������������������������������������������������������������������������������������������baWLHVdlmlhb\l���������������������ÿ�����xp��������{wuplkkkllicYGEo�������������{yvttrpstuuuuobL6G���������û���������Ȼ���_ROG^�x^koH%%.:4+.4@IBKVVPFP�����������̿��¿������������͒'*[E*'()(***,..-*('(&&$#!"5O&=re`U"!7B<=8-'Q5!!3]pmikkb`L)#--1aI./Q[XG5X];Z��������ɤ��������������ϩ4CioA6ng>*',6Unf=( ?�]!&%&6fkR/$%)@vPSzxvrkmpprvuj<V�}��������ywSJ��{d����������tC09;,')(%&&+)*212I,&&''))&(H����^r��q,h��]%0:&$##$=��mJ'M@)&&&',6L\L5)'))+/3<]��{ho����Ŀ�����chon>##%&'+8�����X1#!"%%!&&2U`aabcdh`;""!#-?y���ґu���Á��}whULBf{hSjmnorqsvvt��������������Վd��������������������c0Rn�������trtS424>trK*%(0*,+-.A\ZL0.-05Q������ʹ���������Šlrtv}�t}|rpnn��������~������S2343355456767;q�к���mTGJKJPV���ڶwl`WZdoqmig`=:::Htq]D::>W��8546676467:Ce[><<;<;==<<:=Qw�ǏKT[]^_dhnnrw��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{sppqpprsuvrpr{~thaXN@P]aed`YZZgpz�����������������������������������������������������������������������������}upy����������������������ucnu���������������������������������������������������������������������������������������������������������������������������¿{zmTR\gkljga\r����������������������������vk��������zuqomlklnlkdYHAl�������������zwtssqrsrtvvvrcO5K��������������������ǻ���[PRK^O*'4-! !,8978:DG;LWSOEX������������ɽ���������������ϒ(,_A+*++-/0/-*(&%''(''%# !/G"-6)+7'*?;.076%
+,W3 =l{vrx}ws[( +(Y�iRGOM]>*2:8a�������������ղ���������ά4GomF>mR7,(+0YxyO0"C�^'&)BruX*%%&?qLQwwtpfghforrj7\����������xwSL��w^����������pC49;*$(%%&&)**++X�D'&&'''%'L�skM>t��o%Sm�V27'$$#$L�fT5J<*'&')0@U`R5)%()+.2<b��thm���������ϔeopq<!$%'(.RuUPK[T3$""%$!%%1ETY\]][_Y5!#%'.C}��Կ������z��zxvYNYqxfNkmnqqqsvuw��������������Ԉf��������������������^1Xm�������{utR621JieL,#(.+++.4OkaL6/.18U�������Ȍg�������isttz|w�zonon��������������}R345347:=97568?x�Ե���jPIJLNRX���ز{xqeXValnmkmd=8<@l�s]F:9:_��05699CC7667BfY@::;<<<?==<@Bw��b089=><<@EFPj~�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yopppoprsturps{}qg`XNDP]\ad]ZY]hqz�����������������������������������������������������������������������������yoiz����������������������vjnw��������������������������������������������������������������������������������������������������������������������������������s_Xbhnnkhb\p����������������������������sq�������xsqomljgiikeWEBs������������}|xutssqptssuvraN/K����������õ��������ƺ���ZNQ@0@]_Q>)!!(4:?EFE<4NTRNEX�����������������������������Е.%[D),//0/*(($$&%&'(&&#!#!,B.t�~�{=*89:167.W0Akn[SX\Y^D#)%Iqfi[ZdR5+157d��������ξ��������������ͪ+HnrI@bO6-(*1`rL-H�Z %$%,=<-%%&&?tLSvvupfdffpurl9Y����������uvNI��y[����������w<19:-'($%&'*1=:D��A%'''&&$*@Ldr?<w��l"?r^"06&$#"$;F@^9'"N>)(&(*/@S^K/(())+.6;_��vglg```i����Ɏiqpj=!##%(1Ol�g<r0""$&$$&%,?V]dfea`L*#$&).?���������пw��zykNQ\jvdPgjghlnopuvu�������������΁i��������������������V1Xm�������wwqL440KkxI*#&-,)*-6q��n;.-3:[��������ufr������đnuvsx���wlnng��������������zK12466?[\F:76:B}�ճ���~kRKMOQQV���جwzvl_[Zhkmkl`@8:EiQOaG<:6f��/67:PpcF657Eg\?8::=DHE??>@F|��X48:::99:::7[|������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wqpproptuttrqv|}nc^UJCQ\`bd^ZW\fsz�����������������������������������������������������������������������������yim�����������������������tdmw������������������������������������������������������������������������������������������������������������������������������|xpicchmnkic\s����������������������������sl������yvrrpjigedgghbTC;o�����������~zwwusrrqpnqsqtsr^M0O��������������������ƺ���ZROAK��q_@&!#)4AOH<59LTSMDW�����������������������������э*,\C011/*('%((&''&&&#$#""!-B%9bYWnQ!0::./#3R1 Afd.$**-3'!($0IQZept[E:47:f���������ǭ�y�����������Φ&JpvG7UM=/*+?o|^<#!M�V!&$&*@NO,#$(BwGWuwtrlkjkrwql8[����������yuMM��v]����������y9/98-(&%')&1`�~���7#(''&'#)@t�i=;w��n5BO&36$%$! 6i�od+M9((&&).<SZC.())'*.6;f��mfkjt}{����Ĵtipoi9!"#%)/J��I-lm$"#%'&$%%(8SY[YKC9+#!"%).B���������оx��yvW<K[myeNieZ\[]ajsxw���������Ǽ��ƀm��������������������N8Zj�������}xqL742J��J,#&-*)*.>v��f0.-26^������Τ^��������Ɠtwws}���smlmj��������������wG3247<k�aO;769E��Ӵ���}lTKQRROY���حz|yrg^]gpomo^?9;EiY[lI:;:f��,45C|~aL965Dm[?;;>Deo^C=@AI���V68:;;<<996;T}�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������vqoqplmruvustw{|od]WL@Vb`bb\[U]iqz����������������������������������������������������������������������������}u{�����������������������thmw~�����������������������������������������������������������������������������������������������������������������������������[\[UQQ[fihhbYp����������������������½����pg���|{wsqnmjedda``^]YQH>m��{}zywwywurpmmljjjjhjklmmmh\N.L��������Ƚ����������Ŷ���WVOKr�F5L_O/!!)1><<8;OVSNFZ�����������������������������Ї!/`G2,()'(''%(''('('&$#!  1N%>\DE -741.$0P1!Ag^* ")$.>:GA>YbRMFA?i���������Ը�������������Ф!Oqt@.DOPF7<g~oF,L�V!%%&4dqO($%)AwJXuvuodc[_quth;^����������xwDL��u\����������t</87,(%'((%@������7"&(&$%#'<cab@=v��j%R��t".2%%# $E����UI5'''&)/=Z_N9.(''*069f��jgn������������~jg3!!$')+*I;'3O- #$&&$%%/Oe`d\D-$"! !#(-D���������Թu��zua\mow{\PgeYXOPZduyw��������׵����}p��������������������L6Xi�������}vtJ422S��D-#*.**+0<p�we4-.38\������ǜ���������ďsrrrs��qnmmh��������������vG2346FlRR[:57;D��Ѳ���}lPNTTTR_���٩{|{vnielrros[:;;Hmo|rR;=;i��046OiJYT976HmX=;;Ae�q`F?@AJ�ĲK4:<=JUP=78;Yy�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������uponlhhpuuvvvxzym]WXL@R^`ca\YW^hq{�������������������������������������������������������������������������������������������������������rinv������������������������������������������������������������������������������������������������������������������������������babXQLV`deeaXr����������������������¿����mp����~ysqkkjhee``_]YWQD4f��}{yyvtsqpookkkjgffeedbcc^`YK4O��������Ž����������Ʒ���\VQQs_'1Yf:! %0;FFC9;OVSNF[�����������������������������ˇ)4aB)(')('(&&&&&&((%&%$"" 6L"8��sd7$4<CF:*2U.!@n[*"-'2=)"=]YPfdS?j��������ύz�������������СMlq=-;Wge_m�uP8'  Q�U!$&())-+''&'@wJUvuuogZWdsutd8_����������y{BQ��w[����������r<.89,''''&(3GIVc��4$%&$&%$(>dwnNM}��i&VvuX.3$#"#G�xX-H5&''')1@^cXG4)))+/6;f��ein��������ƹ���mg4""#)*-Ejog`X[. !$&% $%'2MRJZ_H,$" !!#&/E���Ԭ����Ӷp��yxf\jjst]Tgf^]XX^kuyy��������������tq��������������������G=Uh�������~wtG342U�xA-&.0+)).;s��j8/.07h������ɩ���������Êrpooy���tnnng�������������tG2345FT9TW9559E��ϯ����nYaYTRS^���ԣ{}|zz}tprpjq\=::Fy��sC>>@p��157LXN_Y;68JkT<:;Bk]TcI>@@I�íM:9;IxoYB77:Ypz������|{���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������trpngbeottuuvwzwl_ZUIEP[`ba[VX`it{�������������������������������������������������������������������������������������������������������mgnv��������������������������������������������������������������������������������������������������������������������������������}yrdbdffd_[u����������������������������tr����~xtqkjhfec`]\[YWP?:j����}zxutroonkjiigedefb`aa`^ZL3N��������Ľ����������Ǻ���\QORl\%'+Vu= $1COQC6,8PYSOD]�����������������������������̄'4Y=(''(''&&''&'&''$&&$## 4M"MxNTr: *3D@1".Q,"?o\' !-&0;)$4hjSCB\R6o����������ÿ����������ʛJiv?'2MhqsyvM66%U�R%&&),)('%'*BtG[vvsnaRahpuse2b����������uyAS��tX����������x>276*&&%%$&''#$.��1'%&$%&$(X~�iWB~��g"Bk{c 35#""Gzc! L7''((*1AX`\H1))+*05;i��gmmi�����������mof2 "%'*1q������*!!%'$#$'1@ECWT<*,($  "$0A���Ծ����Ѵm��xv_LIKjx]Pljdbb[_nyyw��������������lr��������������������A<Uj���r|��zrnG434U�vA-(/3*)*/<u�h5//38i������оwd�������onpr|��~pnonk�������������}oD2444>SZsO846<H��ͭ���uylcUTU_���Ң{}|���zokgfqY<:=Go�l?:<<;t��166Tjkv\>69LmR<:;FgYYhI>?BN�ĩF;9<Yo]YF:69DRmz��znjiintrvwy{{{{����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rrqnd]dosstwxxzyk]WSE?S[_a_[VXais{�������������������������������������������������������������������������������������������������������phov�������������������������������������������������������������������������������������������¿�����������������������������������}|qnhjlhc^Yz����������������������������ri���zxsokghfd`]\[XYXTN>:o����}ywtronmkijhffcbdcbb```_[K/P���������ö���������ƺ}��XQOKiX+(/ak-!%3AGJ<12:RYTPFa�����������������������������Ǆ 9U=((*)*)'(()(''))'&'$#!!:O!]�{|x,#3AH>4'4M.">jT'!$0$-9)$16@lJ-/;;s��������˺��������������˛!MlqB&-@bhebXD61#!S�P%#+OMGD<=.*IwN[vusqhdeepvug5_����������swAS��s\����������s6196)&'%%$&&*4.8t_&&$$&&%# +Mn�\?=���c&j��S56##"V�mc'!L6&&()*2AXaU;+()),/4=l��ako��Ҿ���ù��jore/""$&)1]wx���g%!"''"#$&2?GEOJABB=,"!#$1H���Զ����Ӳo��xvfZJSj{_Vold`NIbtwxy���������Ǭ���in��������������������A8Un��h���|tmG422X�xA+!55,,,-Bw�|i2-.4;p������аkh�������Ærrstv���pnoqk�����������~m@1436Emy{D756;J��Ц�����}dcZVXd���ѡy|~���ynibhrW9:<It�g?:;>>z�u/5:X~��W568MmO<<<Ghp{qL?<@N�ɥA:9<YXDaC87500Ud�~E:==FKLOSXm}kiootrtz{~��������������~}�������������������������������������������������������������������������������������������������������������������������������������������������������������qqqkcZepssuvvxzzhYTTH?U]__`\WV_ks{��������������������������������������������������������������������������������������~����������������pknw������������������������������������������������������������������������������������������������������������������������������hgbaY^ejkg`\Tx����������������������ÿ����le����~xslffeca_ZZTVUPO<:q��|}sttrnliihhgcab_____]\\][WH/P��������Ǿ����������Ÿ}��XSPCKM/5@VD !'1?GHB=?PWTQGd�����������������������������Ł!7Y<(*,10&),(),(*++*(&$#!!@I GXDF4",1447'8L+#<gT90/-+))#,!0:(%(@\s>.177r��������ȗ��������������͙"KlmE%0SxsntpdJ6$"V�S &1s������H'ItEZwvuuqkiisxtd4f����������s|CU��p_����������{6/79(%%#%&&->dELU= $$$%&%"(L~}fGB���]"Yp�X64%# AK`g!!K2$$'(,2B]aP9.())+/6=p��`kp�̼��ֿ��Əcqr_." #'*/?s��b~o! "%&"!#%3CNTZ]bhkO.!"$'0K���־����ӯq��zwskcesvXZjjcK:Lepwyz��������ز����fh��������������������;=Us��uj���|wqE721Wyo?)$@6**+,Dx��h1-/3;w������Ɨ{����������stru����qoppk�������������p>0146CbxmA7578K��Ф����~\ZeXUVd���՞x|��|{lfdfpT6:=Nxyj;::<>~�m169Ru}Y9337OqO<<>Lt��pG@>@S�ȝ?99:\adiJ:71-7P_wh254242174L��hAEVQRafdqqm}�{qlkuuxysvqlli__ceks���yv~���~����������������������������������������������������������������������������������������������������������������������������������������������qopkaYkrutuvuwwteWRSHCW]_a^WVZ`ks{����������������������������������������������������������������������������������������������������njmw������������������������������������������������������������������������������������������������������������������������������VNMHEMaigfb\Ww����������������������¾����go�������|vsmifb`a^\YSN=>r������~xokkjgefea``^YXYXZZZXRG1S��������ɿ����������Ƶ���]TQFPjige[G+ %-;EMPB@TZVRDe�����������Ĵ�����������������}:\9(*3MW<;=B0;/14++)%'"#BM!:F'#@dj`abYYXD##/"0?'#!3NA).26<u��������Ǜ��������������͔!JnzQ'3Wklswn^D%!Y�M'7�����Ň1'LyC[wuunacdcovsg1l����������{|@V��q]�����������3/:7)&$%''*<��z��n*$%$%&%!)S��qGA���\(a��;;3#!Jx�s$!F/&'(),2D[_ZE4*(+,18Au��^ji�ÿ�ˤ����zfoq^-! #',6x�����Y!"!%#!&(3M\ghjhd`@'!!"&0N���ҹ����Ԫp��wzzttsztYZjj\AKYcryxw��������Ҡ��¾_k��������������������:<Vx��sy���zznB624Mpi;*%9/)()/Fz��e++.4<v������Ȥ��������ÿutqr���}kopmn�����}�������n=1468CgxlJ8578P��Ρ����uajeYVVg���Ԙ{|~���{iefgrO7:<Nk�hA=<;8��g/77X{qH/468UpN>=?Gs�vE=?>AU�ɚ;:9>crrK862..Mczx95798869O���{U8;41FD:HGK�vB?CLXZ\UdtXELNA;<>@O��hTbophedfllnxz��~��������������������������������������������������������������������������������������������������������������������������������������pqolhgoqtsuutturaTPOBDX\\^ZWV[ajr|�����������������������������������������������������������������������������������������������������mhqz������������������������������������������������������������������������������������������������������������������������������qkbTPV`ggea]Uu����������������������������[s��������{xsnmjjjjgb\R??{���������|wuuqmmjihccbde``a[SF.U��������������������ȵ��UWQEYeq�zdE)!$1=GOF?6:V[XTDe������������������������������~;X9'0m��~���tht{aQ]KK "KS4?($Jozz|xxwrS!%-6=*$.G<3.46<|������������������������΍!On�Z&0EY^pq_imH(\�R",<������/$+Nv@\tutkdjfjtwsb4j����������x~?Y��qZ�����������82:4*'$$%&+G������.%$$$%&"(=ANK:B���]$i�|SA3"V��s$#@0&&&(*3BV]YD1))*-18?uۿ]ojv���������nwso`( $&*1YV37aQ ""!&#!&(2N^b^WNHF6( $,K����ŋq��ӥs��wwurqtxrVWkhVGPU`ryyz��������Ӷ����dg�������������������~3<Y��������xxn=413GdT1)"/.+**3H���Z/-04;}������ˊ��������þ~uspl��vklmkp���|���������k62467JnxcB536:V��̝����{zzeWUZj���ϗ}{{���{ggggmM79<Rs�iJ=:;=��c08<ZxnS8676UnK===Isj>==>AW�ȕ?::Bd{~[8772.0_n�q2=MGD?7F�����wJ;8:BFA</9l9*-00888X��cJLN?;?=1B��IBB:8DGD?A?HZfo^^psnprz�~�����������������������������������������������������������������������������������~������������������������������������������ppooqnprrrtsrqtm]QPM;?PYY[XVUYbjr|������������������������������������������������������������������������������������������������������|ijqx������������������������������������������������������������������������������������������������������������������������������{teWVZbhhfc^Wy����������������������������jv��������}wvpomllnkjdW?@x�����������}zxtrponlmlpqppqgYG3_��������ȿ����������ȴ}��TVN<2-JbB&$"!'<JLJ:308Z]WSFj�����������ɴ�����������������{<V6.>����������������}"FL7>'&Hwu`]^bjpQ!'2!48)%.RZTC/355������������������������͍"Op�T&5YvwxpqyjN.  ^�I *+l����o(%,MtC^xttnijijstrb2j����������vx9Z��q[����������|;5:7*'%#%&,K������,&%($$$"%'$.89I���\)s��XH4#Kz�q%G/'&%)+3BW[K6('').17Bvݼamq�������������o`)  #&)/Leb]MPI%!#&$#%(8QY[^``dbJ,#/J���Դu���ѣp��xtid^jrvVUjgUKKJZtxvw������������ͻ^h�������������������w2DX��������~yl=537OdY3)"91+**0L���X.-/2>������л��ɦ�����»{vsmi���}nnpor��~��������i5/357?]oeH524:V��ɜ������}^RV^l���̐~|~���xlkegpK79=Vw}l>:9<=��W16=RuwN=75:VlI=<;Ku{kB>=<?X�ǎ59;Ci{c678750Ln~�f:VUVQEBw������i>CHSZSG.%<.-.1498W���tP@?:?D;(Fp@=6.>A:72.4TU=6/47;@=NdSo�wktytpuvt|}{~~��~����������������������������������������������������������������������������������������������������������}oopponqsqrtqommfVONH:<KVWWVTSXbls|������������������������������������������������������������������������������������������������������yekpz������������������������������������������������������������������������������������������������������������������������������vrf^\`fiffed[z����������������������������rx��������~wtpolkkmojdV>Gy�����������}|zxsqqomnoruwvto_I3_��������Ⱦ����������ʳ{��RQN7-1Kg<%&""$.;CFA=6=W\UQDm�����������ĩ�����������������v=U8/;�������ź������΁#C?5;#'Jt[('/3VgO#(/"59)&E_idE.256�������������������������͍&Ss~B(7[vh^kwn[[4!_�G!*=������3&+NrC_xruskhfgqspc7n����������ss;[��n]����������{7/<6(%&$$('N������'&&'%#""#&',3<K���U/v��WF-""c��^%F.&$$'*4H_`F0''&+.18>|ܹ\mm����������ʲ�p\&"%(2�����r!"&# $'*=QYX\d_ecE#$2O���Ψ����Сu��xqTH=[rpZXkia\TJ\rvv}��������ؽ��ĹYm�������������������q*Ec���v�����}g=524c�X1',T2+++1K���M+,/1=������з��ӥ�����øtsqoq|��|llmpu�������������|d30239Q��}L536:X��ƞ����x}u^MY^p���ʍ{|���{igbipI8:@[~�_99;=<��L37;WzwP=99>[mI=<<Jn�oI><<@Y�ʉ68<Dhsf@88768V���pJVS\cYZ�������vWWMNTOF&&:4/04:AK�����oC7CFF:"/oY0/,,540228o�rC1/-(-5558v�CBPPHOPRTZ_ehhonlmt~�sqvuw{|{����������������������������������������������������}�������������������������������������|onnommopppnlkigYKKKE88COQURRX[ais{�����������������������������������������������������������������������������������������������������zkkp|������������������������������������������������������������������������������������������������������������������������������c^YXYZce_^dbXz���������������������ÿ�����nx��������|wtnmjjkmnjfT@C}������������~|zutrpopqquvxvm]H3[��������Ⱦ����������ư~��QSN=>NhtN:8&! &,4=AB>@RYTPAj������������������������������s@Y8.8o��������������^##D=0;  'NvO$MqO" )3!68&"&LR;(,145�������������������������ˇ(Won>'2KhYTdhVW]-"\�E!)?������)&,Ps?]uvvsnolottsa6n����������uo7[��q_����������v41;3(%&%%),N������#$&$$%$!#&(,5=N���O/l�x?#&B+"$Jy�T"#@-&%$%(3J_^E,&&(,-08<�۶^mp��������Ҿ�}opW# "%+3i~v|��E"&#!#%,9MURTSIJL."%3R���ո����ϝu��xrgMAmyoXWlmohYPbtxv���������Բ��϶Rh�������������������o5Ng�Īm�����}d?416ad4,#1S/+,,0H��Q..01>������ҿ��ʪ�����³rrrrrv��zqnnnr������������|b32325L}ud:368;\��Ɯ�����sUPX[u���ȋ|{{���shffnrH::?O�f;:::E��K58>gztO657=ZhD;;=Nw�qL?<=B\�ā79<BcyiC:767:`��ʚcOIc�xl��������\GMMILF&+E802>GE|������]ENLN5SO-+**+,027o���b;/2./889Jk\34689656;B@:?BJHIP_nYPYZ^afkonnmnrxrt}��~z{}}������������������������������������������~��������������������������������zqplonmnonmhiigbSKJH@.2CKNTTSW]djt{�����������������������������������������������������������������������������������������������������xjkr}������������������������������������������������������������������������������������������������������������������������������TNODGTdbVUb`^~����������������������������p|��������}xvolkjlpqmdUCDz������������}zwvrppopouvvwn^H1a��������Ǿ����������Ű|��QSOLr��rtZ* $+05;@@:=OYVQCn������������¿����������������lBT6-/*//*.1./15]M?VQc*$#OM7@&OuO#*MtK"!+7$7;(%8oT<2./4:��������������������������~'RnuD'4TldeoujWH'd�I")+q����e$%+OsB`xuslddbgpsr\4o����������ui:a��s^����������u-0=6($%#%',G���}�Y$&%$%$" '1DBCNM���M)XevE*&@- !X�}=#A.'&%%(1F][A+$%&)-19?�״Ymn����������]iooT#  #%),Gg96ga3#!!#&"!$*;SYY\REI:" %3X���ˍ����ϖq�~xteFSmtuTUnomk[Wctw{�������������ͲG_x������������������l1Ph�Ĩ������~b8306A;1,#,9+**+2V~�M+-/3E�����������������¯mrtqo���xjlolr��������������`21217Ue`T9566;^��Ǘ����xxp[TXXw���Ĉ|{{}��wifkprC::<Z��c::;=D��D28>d�pB556<[g@;;>Rx�kE@>>B`��}4:<B\ueC:68:E����{N?���t�������oFV�yMXD)5[:28TPH�������rICDD*QR-+++--02`����~Q5A40AC]b!=@325;:667IhP835;98;:MADKLJKOOSSPUZ`[PUt�ea]bipzy{���������~���������������������}~~�}~���~~������������������������������yqqnnnnlmmlgiif]PJIF;(,<GNRTUU\ckt{����������������������������������������������������������������������������������������������������wjjq}������������������������������������������������������������������������������������������������������������������������������mc_\]cih_adb^}���������������������¿�����qz��������~vsomljnnnmdUAFy������������}zvtsnopssvwwvq]G0b��������������������Ű{��NSNANXJIN\:#!)8@COMG>=OWSOBp�����������Ŧ�����������������o@U5,/Bty\Y]]EN4BA09(+$!"
+`O39'KpX3"#4[xH  +3$9<($7NYT<-.3;�����������������ġ�������|&Qp{A&0CXVjw~^@%"i�E")7�����y1$%PtD`wush`Y`cnsr^7n����������uj7a��qV����������k'0<5($%#&%(/LQH>J. %%%%$$"".U��zX���K.f��>D018##>,'%%%(0EZZA,%%&*.26D�ܯWkdx���������fnrrQ !%)2th8��X?"!$(" #%)6JQSTPKPA/"&1Y���˨����ϒo�|wmO@I[uqR[iid`QM_vwz~��������έ��®?Pp������������������j0Vl�Ǯ�������`5323E^N1#%-***).Lx��G,,05A�����������������îprsogz~w{mlljv����|}�������}]21238Zmj`>537;d��Ê~~zx~�|`SUUy���{{v���xbhoqpH:89Zv�^D:99F��E28>e�rH866=\cB=>>U{[>>??B]��x9:<Hozh<:88:b���ٿ�aT��y�������7A��}GGD,Ns93^�pb�������t59KI(aO++*-115J������sXJ?>JNdC7;503:755I|�cG57=:6649>EHKIELNORONMMB""WiK?9<LU^\j{zxyxv|{{�������~�~��~������~�{~~~}}~���������������������������������wopoonomnookihaVLGD?3&(>JTYWVS[bksy~���������������������������������������������������������������������������������������������������wlkt}������������������������������������������������������������������������������������������������������������������������������j`]abcimjjhd`�����������������������������m|��������~vqljkopoongW?H{����������|zyupolpoqvwyxwq^I7c��������������������ȱz��OTMIbaM8'%  (17AQSM<<QVRNAv�������������Ƚ���������������g@T6*6�î���������y�w|G!""bJ 5:'JohO@:TrqC *3!E>'#%=G_>*/4;�������������Ǵ��rq�������r&Tsp:'-ColxxtjJ7$g�<#*I������-$)Tv=bzutmghkkstv[:k����������xl3d��n[����������l'2:1%$&$$%&')'()R_("#$#"##%/g}r}kL���I(Y^/#P+#<,(%%%'.?QW>+''%).17@�ݩ[os�����¾���~moqJ"#%)9^Fq�ePN#'"#%%+:UVYZZXWS<$ (3]���ѷ����̎q�|zp^QSgzvU\hbMECFdvxx���������ɭ��­APk������������������c)Vq�ǥ�������Y5539e�U/%&.+(*)-J���J+,/6H�������ƹ��������¬prsni|���nkjkw���~|{�������{]5234:Occ]5546;g�շaSRZ[|�uMLOP{��ܾ�|vm��h\fnsoD98:b�z\D9:<J��D26Bp�rG765=`bC=>?U��b?==?BY��o8;;Fm|_<88:=h���ɻ�sI]~��������kQl��UBCM]}�=>���a�������i0e�a, M~^-+,3DH8q�������eLEEHGC&=<300223E}��u]>:>8797:;BHJKLMUgePKMH;"My9.--1.0;[^SYZ]^_djjqu|��y~~y{|~}�������||}|}~}��������������������������������snonnnnnoomkieZOD<75-#+DSY[\XU[cmt{����������������������������������������������������������������������������������������������������wknu~������������������������������������������������������������������������������������������������������������������������������VPECFQailljc\�����������������������������j|��������}tjeeknopoleT@K|���������uwuookjkhhilquxxxwp]I/a��������������������Ȯy��RUNDSnujN6*"%,<JK;08SVQNAv������������ƺ��������������ӻc"DQ8/4����������������N !&_I&;8"Cextno{{b0!*7!B:%#>SSYD1/6=���������Ѯ�{e`_ekz�������o(Vrw<(7e�{unaUQ:%i�>$(8�����L"'(Wt?dvutrqsrqsttV9l����������ui3a��m^����������u-37/%$%!#'&),1-F�~($#$$##"#-KJ=PhQ���D M"  #=-'%$$'.@VW=+'(&),0:C�ۦYpk��������пqnpoJ#!$')8We��9vF"%!$$+7MPTUVYYS4#"(1^���ؼ����ˊr�}wqTBUiusSajeUQYdnuxz���������ι��ŪMTa������������������b(Sr�ɟu������V563<p�X2%&1(()*/Z��z@-,.5F������β���������¨lsrpp���}niidz���}{~�������}Y5115:LUn]=634=k�ӱeWWT`zzkYXTU���ۺ}ypn�~{nbekqnC;:<f�uV=:;?K��B56BhznJ:65=b\A;:A]��`A<;>@`��j9<;Gt|^=89;=\n��ǳ�r77x������������p0@S����RN�vq�������2K��^/T���S02ApyV��������V;=KD80#@800235<r����oTBB<=<=<;BGIJKX{��jQHG8mw,/2332/21)+/278:>GPY_{�idhmlpqwvyy����|{~~}~{|{�}�}~������������������������������rqomnnpoookie\H?5012,&5MXY\\OUZblsy��������������������������������������������������������������������������������������������������tgpw������������������������������������������������������������������������������������������������������������������������������ge\WWV^gjkjf^�����������������������������e{��������~rieejlmnmkeV=I����������xtrqmihhfcdhrtuyzwoaH-a��������������������ǭ{��FUM8-@u�sfC&  '6CD1')8QXTPBv�����������¨���������������Ҿ_DR7.5�������±�����ðC##$\E *G9,Oix~~{nP+)2<:(.[ZZ`\M@==��������ӲhWcefhq|��������j)Wmw='4^ifhigfX>&#m�?#+7QB,(" '',Vp?gxvtsssupqtrV2n����������xk4e��h^����������u+181'$#"%((+Me_u�w!$#$%"#" ,Tkz�kL���F B"?2"=-&%$%(2Ib^H6,(()-4<D�ڠ[or��������ɜdpqoK$ !$')1Qsr@<Q$!$ $#+<QSUTRSQN8" (6d���ˀ����Ɋr�~vhAEVZjjS`kniccfovvv���������ǳ����GGU������������������b&Z|�̡{�����vQ971?v�W0$&.)'()+3J_R0,,.4G���������r�������Ǥstqpiwx�tmih`x��~}{~�������~Q01029`�}Y:336<p�е���|}������|���׷xsin}�}ocenupA9:>p�yI:99;R��;77Fu�gE865>a]@::AYz�`H>>@Ai��f7;;Itz_<::;;_���Į��@4t������������B5Eu����[7Hm��������cPy��GE��ɳx<7f��i��������74p�T90#(D924:E7W������eNICB?;:<BGKKM����}cMH8jS/0AJE?:1''*,.1..0//.@xR:BFMORYad^h~uppmuyvsvxwz{|{y|}~}}~������������������������������smommnpnnmjf^E01//11-+;PZ\_[QUZbjrw|}�������������������������������������������������������������������������~�������~����������������rcnw�����������������������������������������������������������������������������������������������������������������������������xpjkjmmjkkdV����������������������������h{��������qfcfiloolkdU=G����������uwuunjihgdehqvxzyxp`H.[��������������������ȭu��NUL9.Dtt|�P%!'7JF<1+0@SYUPFz�����������ǲ���������������Ի`IR6.2L]QJY_[vc������|0)"!YA )?5+HZdeUPQ8*1 E;('>^gMB?��������չ��sp������������k,Xos:(/J_Y[_^WA6#"k�>$)3Fa`]X=*&-OeCdyvtqkmiiosoV6l����������|i0l��kg����������t3171&""$%%'L�����v &%%&##"+6f��iT���?#;8U"%<-&&$'*3Nc_QB-()+05;?�۞bnj����������loprJ#!%&):\|phfkE #% !#*=QY[[`[^U5 )6i���������ǈ�}wfHXJJgjP_klb^]br{v~�����������žš1@i������������������_;t��˟v����zpN:62Br�T."&+''))-+,00+)-07Q�������͇bb����ɾǜsprqn���yjec_����}|�������}R03333P^JP>677;v�ι���sv����������ٲsogdwxu__dlul?88<j~wJ;9<:V��56:Fpyd@767Ch\@<=Bb�}]F?@ABr��a4;=Nq}\B::9;n��й���L9p������������76M��x��e05i������������T6Aj�ƽ�XDt��|�������a$l��W;5,@d?59PkOe������rOHFE;78;BHLNo�����pOF3UN/Dae`_X7&%)*-1../1/+%7220155869?&>cRIFI^_^`gkly�{rquw{�|~�{}��������������������������rmlllkmmmkhbR5042012/0DU[`_ZVUZ`jry{~������������������������������������������������������������������������~}������~~����������������pgnx������������������������������������������������������������������������������������������������������������������������������{|wijosrmlkc[�����������������������������e{��������}qdcgjlnollcSAL~���������vwsqolkkfegiqwvyzyo]G-a��������������������ūv��ORK<Jv�xbA)".CJGFA=<DYXSOF}�����������Ȫ���������������ҵ[HX3,-+*)+(&'+%+($)%2)&'!#XA#52 *0+/So@ */G8(  $,>D>E����������ȹ��������������h+Wsk:(7_yokqjZC5&$k�>#*:�ƾ���.%-Sh>dwxtjadbcptpY2p����������l0d��fa�����������=390%#$%$#(Gt{��o &$'&$$""(-b��sQ���<
+&8$:,&%%')1GTVR;*)(*-4:F�ܚ^ru��������įnopoJ #(/M������L"&  $(:RYYUO</,"!)6o��ּ�����Ł|�}viNO@\rlS\jgZ\Zjrxw~�����������Ź6c�������������������^U���̗t���yqlM953Bn�S.#&-'*)*+,,+*)*,07T������вa������ǿęssrmv���pjgcb���|}�������}Q12469H]lj;469?x�Ϻ��n7CV|��������ڬljdfnxwb]`guk>:;Bi�{J8;:9\��288K{[8766@gX><>@i�yU?<?CDv��[5:<Kst]B:874Ev������qVn�����}�����z/=]��r��v55f������������94&#o���tRH_��������RQ���<<>Uk{Q2E~�`p������s9Nh_>66<CHLM}�����kLE2UN1Zrsrsc9*(+,0:>:992((000/47632*&H;.5GC87>?>Sp�bY[]jkkjnllu~{{}~{}�������������������qmlkjjkmliidK2370-3:94EW^b_YQSZ`hrx{}������������������������������������������������������������������������}~�������}{~����������������phow|������������������������������������������������������������������������������������������������������������������������������txmVblqsonld\�����������������������������g|��������}qdbejnpookcQ?M����������ywsqoljigefhquvvxzn_F,a��������������������Ǯy��PSMQy~N4%#  "+9;BGC?AERRROH~����������������������������ӵZJV/,-+,++))+**())+*)''$"%`A1/%#$$#0]q<+6#F;($").15;E�����������������Ƽ�������e*\uh7+;h�oo|kcY?$&m�8$)X�ʱ�ș/',Sj<myvuonifkrto[4s����������{f/l��eb�����������B490&#%%# &)&"%W�m#&$'&%%!#1h��gRM���=,:$" 3+$$&')+2541*%'))-3;E�ݓ\nq��������ӬknqnD!)-Ad^n}oc(#' )@[XY]L1'3q���ɳ����ƀ}�{ynZTYmykN\le[a_jrxy{�������������ğGu�������������������[Q���͎y���vmlI851>s�L*#(-('(**..*+*,./5Z������̡�������ƽÕrrpnw�piecb���{}|�����yM0146>nxx]8357?x�е��U2Th���������٨gfbh{�q^Y_byh>9=@d�|?;;;<a��587Mw{];547EiYA>>Ar�{L><=AD{��V8;<LwqW<988./`�ܿ���fn�����|�����n4`���~���IDc�����������r32)N�����S0B�������������;4;Xy{�h;L|xju������QBz�i@87=DIJPhx����YLC0%TJ-`osux`9,(*0@YYRQT7')0-02<DFB@,(Z95`uvnj`*.ZS.08<AABELG@mzdjf[cgjjnpl~��y}���������okjjklmmllh_G731+09A>:JV^`^XSXYbipx|������������������������������������������������������������������������}}������~}~����������������okqu�������������������������������������������������������������������������������������������������������������������������������sqf]gjlopoke`����������������������������j���������{ogccimnnplbR9M���������zvqrpkjjgdeirzwwxwn_I0e��������������������Ƭz��NSMGB?$*57+"!*6=DF@?@CRUTQG�����������¼���������������Գ[RZ2./---++***+,(')*(&'$#&Y<  :6 6KMICAIin= ,=#J;,/*),0028?G����������������ʼ��������c/\um7(=_hI\gRff?" "q�<$-R�����q*-XlElvtutqnhmrrtW5q����������~e1l��ei�����������@3<1%%&%$#&&'')N�]!$#$%%$!!-OjZdcV���:1=7=352"1&#!"&(+-4>7,&&').3:H�ًej{��������ҧfprlC"#&-<\jxmN`8!&!(9GDM[R. ):x���ֶ�����y��{vfTSVixkT`e\ZW[ckwvz�������������ĘM}�������������������SEz��̍}���~nkF761H�{M*!&+*)(*,?K<2,,./5`������̙�������Ż��rqoqu���qfcab���~�{y�������vF00238=HcU;557@~�յ��U5RiUx�������ۤdhb[rid\W`j{f>:;BirT8;::=e��49:Lx~b=547GdV@=>Bh�xT?=>@B�ŰS:;:\|vI::;:28|���ø��v������������hQ��������u]b�����������b7:C�û���T/C������������`2RI��x�tMAWlww�����oEb��K;9>CEIJO\ksqqlOJC0([I7httsxY3+),3Mgndl`3&&/-2;WaXW_:)R4R�����y$*UC)+.0020.-##cj749@EECD@*:�}LTcilmqt|���|okilllnnkmiaN;633>BE>;NZ]^^XTWWaiou{~������������������������������������������������������������������������||~�����{|~���������������hjpw�������������������������������������������������������������������������������������������������������������������������������e`[^``kqomjc_~���������������������ÿ�����l~��������{riechnnnljcS?I����������wutsolljgfeiqwwxxwpaI.d��������������������ĭ|��JNLIfV*X|nL'&6DELRIGCDQSSOC������������ɿ���������������׶WP\D;68640/-/,++-+++*'()%)T;%$>1+Y{tstsruk;,5H@QhcbachbXHH��������������������������a1\qf3*:ZW?VM;hg7#"s�8$*=�����m&)1`oBluuul[agmssoP=t����������x^.l��fl�����������73;2''&%%$$$()'=z`"%%##%%"#0OY`q]S���7+5?et�c &>&#! #(.<XVD0(()*.5;L�׌oll��������͢orpo=#(/P������B!&$'.71AL4 &9y��Լ�~��Ӿw��{rbF9LeuiU`cVXSVWizy~�������������Lz�������������������N;}��ʎ�����ojE872N�pA-"$+)**+@tqeQ4,/19e������ǖ��t����ŻÑrqqqz��ydaccf�����{|�������tE/025>RflT8359E��Ҳ��]8>]a��������ڢjg`_puw^Uco|`;8:FZJ@B=:89j��179Jo�V3658IiW?=>Fk�}R>==?I�ĮO<:<]{rH;:7;C�����ú���������������sz���������fe�����������X>Qa���˹�W6C������������SV?`�mn�{J8=q�������}���_8<DMDEHMNXijhdWLJH-'\C6fqtwuX0*(,4RbccoW3$,2/3C[ecnk3/J9X�����n$(Q:,,5;<;;7.*y`)5^ge\`N'"LD&+1277?Fc���znjhijiknmkfbUEB@BFIH>=Q\\][TUWY`jow{|�����������������������������������������������������������������������}{{�����}|}~~��������������|kgnz�������������������������������������������������������������������������������������������������������������������������������[[NMP\psqnhaW�����������������������������k{��������~shcbillonkaSCF����������uvuuooolgcdjsvwwxwn^E+i��������������������Ȭz��NSP\uKI���`')9?<EKCFCAOTSRE������������Ȳ���������������ַR$f�qihca_XTRQLHCC@<98384.'%T4**#@3-Voqrutwvn8!+3?>Siy�����fED��������תp~��������������^0Yme6(6KUOTGIZG0""z�5'*_����.*2_qBmvvthclfluupM<v�����������^1t��ci�����������2580'($&%$&'-(2z�]#&%##$$!$2^flqko���4(3a��}74L+%" #'/D`bK*%&)+,29O�ևep~��������؞nqoo@ "'-L}qw��o$!% !#! ! '?}���ɯ���Ҽt��zpfGW`mwgT`d]\[[^mx{���������Ӽ���ʌY��������������������KN����}u���}miD<62Nmf@+"'+*)*-D��wc3+-09h������Ť�x�����ļonqq~�}vjeege����}z}�������o@.137Fo{lB4458G��ҭ��pZdt~��������ءifak�wUVen|_:;>YelnpC99<o��169Ppc;5678JoW<=>Fd�vH>=<>C�ȪD7:<[wwL:89=I�����»���������ɿ���������������|������������VWv_\��˸�{\?������������B79��rn�W5<o�����������>8Gam@EIJMNQSTSJLJD)-d@=eotwrP.)),4UeebgO2#*0/1E[abld-3K5d�����^%,G>.4IOPLJC1(wQ,V�����g#!F=.+.0-,//Y��ymgfcgkjklmjfe^SNLLNLG><MX[]ZUUWY]fnwz{~����������������������������������������������������������������������{z|�����}}~~��������������~mgqz������������������������������������������������������������������������������������������������������������������������������Ä�umkpwspmic[�����������������������������i{���������rgbcjnmnlkcPAH����������qvtrooomhefjquwxwyo^H,i��������������������Ĩz���JTKWkg�yDoh*(674?A6@<?OTTQA�����������˼����������������ѶP#y���������������yvusgc]\I#(L2-+ F.!6<?BNOWgi507 =9Gz���tpaGE��������դ\[Va`]]Ty�����мW/\um2(7Uk`^efbF."(v�/$-\�����u *1coBmuushgmbjtutQ8s�����������]3n��fp����������|1590&&$&&%'1Qc���%"'%&%%% #,D~���t���1-7PtYG34A*"! "%,6GE0$!#&(-2;K�҇flkuy�������shpqm;!&+?R@)/!$  "(;����ӷ���лm��xrdW`[o~fSckbY]^bnwz��������������ȅY��������������������E[��������~ph>961U|pD,!)-)'*.@ds�Z)+,16j������ʢ�������ļ��nomo}���jdef_���{{y}������}l=0238@WVS?4579H��ҭ���wu{���������ٛ`a]\ajhSYdl{^;;=V���c<:;>t��28;KN88965:LlR??>GjpO<?><?H�ɤB9:;Z{~C999:P����չ����������ƫ����������������������������g��GF��Ȼ��yA������������6=i��bz��e=<n����������z9CX�jCIJJKLMKLLMJGC(0d><dnpsmD+''+3S_bcaJ.$+024H^bchP,9S4o�����U",NJ-@WYWX_U3&cC0[�����\$H=,+,11237Td]ec`_]decdfihgha[XSNNLF>;QWWZXTTVW_gmsw{{~�������������������������������������������������������������������xxz}}~�~}~}}��������������yliqw������������������������������������������������������������������������������������������������������������������������������Ç�ytsnvvpnje_�����������������������������e����������sfdelpmjjjbR>J����������qvrppmmnihgjquuwwvn`J0l��������������������ũz�ޘKTIKq�k14j@!%.+)33/64<UWRQE��������������ɴ�������������ԯO ������������������������o-(O1+&
+A%!!1bo3!16>I��������~SG��������ա����|{�|������ѼY.`vo0(4HQVbuvoR,! .r�3$(@�����k)(.goEqzuslda`mtvrQBp�����������]0n��bb����������z889/&%#%%&)^����5#$&&&$$!&<}�{TYh���277B����O3;&"! !$'+/)%$""#+/3=M�Ӕtrtmkfba^Z^bputrj< #(+Oo[UC3 ##!$" "$ *C���Ը����Թn��vt_N:DgrjOciUKKM[ozz{�������������Ã]��������������������=T����t���ymf=95-TysC*!),())-5���J(,-19q������ǥ�������ý��qpnpx��weadfb���z{y}������|i?3448>`�{M537:H��ͪ���������������Փ_a]`rwlY[fo}\>>;Iv�vB8:8;u�}09CXfgkZ569LkU:=AI\SGLC?>CN�ɠ9:=<`qZ:88;9Q����Ѹ����������������������������������ǡ�������a��������h������������:p���`���tQBp����wm����l9Mg�_;GJKLKNNMNOMJB);b92Ldjk_5*')+5Q]bgjM/",026J`bedW*@V6r�����\#3]G,APUWWaM/'K9/`�����K &G7+,5>>@<6<BLX[WPSVYUX[`aimea`ZTRPJ?>RWYX[]TUYahnrssuyz{}�����������������������������������������������������������������}xxz|}�~}}{}~{��������������xilry�������������������������������������������������������������������������������������������������������������������������������bXSVT]nvsqmg^�����������������������������l}���������vffeknlkkicR=L����������pqqomljligfhrvuuvxo_G0k��������������������ťz�ۏQUJ8AK:299*"!"!"%'%',<QUSND�����������˹����������������ѪN%�����������������������q,)Y0
+H'3`g/ ,4BL|�������}PF��������Ҽ���ƫ���������ϻU3eud2(/BR`pupdA/" (v�/()^���¼�((._mCmvutnfhkpsurN@z�����������_4s��_b����������w555-%#$#"&0Tx��|`4%%'%&%$ %9d\YX``���4,1(7Ijb"=B'%#$#&*14-6-! %+.4?P��³��������������xp;!#',;Pq~x�e' $!"$')CGEE@;1&*A����ï���ӷf��vp]DHUjxbRbfUJJM\qyy~��������Ѻ���d��������������������;>����ny���vneC:5/Pvz?('+')+/G��l-*,,16t��������z������¾��tqrq|��ogafg`���zyx�����zf>1337@w�^4456:J��ˤ���������������ϒ_a\k�}m]bho{X:=>N��c7:88A}�j39Cq��yE468KpQ<>BblttoG=?@J�ʛ:<:;TJ7:;7:=S����Ի���������������������������������Ľ�������������¾��������¶�����o����u����oGq��|�������\5Oy�]@IJKKLLJLNNLJD)Ae:3Kcg]I/**))6V^bjeA+"-002IZ`eia'DM2w�����M7W>+ARVXVZF*+H31h����xC.G1*6NRMUM4:ISVPID?DGLPQP\xoefgd_[WSLWa]WR[VLOVZcbhdhlhfjnx�}~~~��������������������������������������������������������~zwvy{z{}~~~}{|{��������������vhks|�������������������������������������������������������������������������������������������������������������������������������oh_UQasvtsof_�����������������������¾����l���������tegdhmlkjgaR?O����������tsooklmkhggirvuvvzl]J2p��������������������ŧ���TVMLx���|pO# $# $,>OQQNG�����������������������������ҭH)������������������������k(-Y0
+X'2YQ' 4=B;dztx��x|rQJ��������Է��۲���޾�����йV2i|g0);^ppvkZQD3##(z�+&*Z����ǃ&.dnDntvrib[alrusK>�����������~^1q��]h����������v644,%&&$%%(+#/h�T&'&%%#$!'>}���kd���2013d~]GC*%$$%)5\^a`H)$(+/5@]��������������½����Q"$'*:q��ŵU!%  %0^�syyueG)*@���Ӵ���ӷh��vqjcsku{bTbhZIJQ^rx|����������Ŭ��Ći��������������������80y�ÿns}z{to_>:21O�v7( *+)**/Ds�g)*+-1>y�����������������utqp���uhfbfb���{{|������|f:1336HkZNC645:S��ʟ��xnw����������ΐc_\bszqfggoyQ9:;I��Y<:7:5��d18:]yx_346=QqP<=AU��]@=A?M�ɔ7<=LY`bdB::=U����ֻ�����������������������������������������������������������˞x��������~����{d��}�������J:k��^?INQJMPNLLKLJC'Ba=3:FQP5/,)*+4DS\^X9)!,/04GU[`bI"DN4m����u15MI0CRWVWVA)(@/5n|���rG-A00>Z[^fV2<MURUhsgee]SOBZ�gahnla][dkqofL?MBBEGKRMKBA@@JMSlmgdgjjptx{���������������������������������������������������}wuwyxvw{}}~}}z}��������������xgjq}������������������������������������������������������������������������������������������������������������������������������Ó�s[]jrwvupga�����������������������¼����g~��������~slihillkkg`S>L����������y{wtqnonmjjnrtvvvxm\E0s������������������������ڈJVR[��}�~tS$ %%!&/<NRQNF�����������������������������ϭD*������������������������i++P0
+L'$.)"7@&JJ|�������{TI��������ҝ������Щ������ѸP2fxc-,=[nlij_ZP5$!&yx1&+^�����z$&/ihFovvuj_W[grutJ@����������}`1w��]h����������s435/%%$%%$&),,9o�F%$###$%$4W~��d\��04/Ig\+C?($#$&+Bpr�vY-%(,/5>_��������������������P"&(.c���U1"#")G�}qv�~xhU4+D���ϳ����Գl��twvttv}w^OdgTKRPatxyx��������ϻ���˂m��������������������52��·jnwvyrnc>724Y�U3'#+*'**,O{�`)**,4?������¥�{����ʿ��}trppifhhfhgef��}��������~wc;1357Nep}D5466T��ǚx^Z{���nq�����ʆ_`Ug��yiggp�R7<:Oyn];969>��`479h�|G5479QlM>?AM{�qH@A@CP�ʐ69;Qn�~e>9872Bu��̾�����������þ�����������������������������������½���������Ɛ{��������t�����������ľ����^u���Z?M[d`^^\SMLKKB'RkdgRI>1.-)((-3BXWVE3+ .122<KXYS4OV47K^icP,<iK2BRUWZVG*)>,2i�����C0C21@RW\iG/>QUTr�����{ZRG`~`]jjbVWbnvwuc8,2024342334039=7!5GA@?DIHV]ct�jmrtusy�������������������������������������������{ttwyrfox|}~~~{~��������������vjkt|������������������������������������������������������������������������������������������������������������������������������Ò�yfemtxwtoh^�����������������������»����c}��������zwonjiknmli_R?O������������~~{uutvopruuwvvwm^G2x��������������������Ƣ}�ޏNTN\sBXk?OG( &(#(1?RUPNB�����������ƹ����������������˧F.������������������������j*-U/P*!04)NE]o������lAG��������Գ�����β�������ͳF7bxg2)5CIGGNOL@6# *�|-(+e�����h&',bhHouvrc]cckuurGC�����������}^.v��aw·��������x745.%$%&$'(,4/,:L($$$%%##&6k���J`��-3+1p�]&KJ'#!#%+<gysj])%(+05>c������������������һD!$(.L`&%#$ !#%CidgqK$:fo1!,L����ï���֮j��xxwvwtxya]ih_][[ltz{y��������������~q��������������������8M��óilvtwqmZ=801@N8.&!)**)+-W��h2,,/4B�������Ϯde����˿��|qrsrtslighga`�������������yg82266;`�h95457S��ōlmx����}mr�����̀^_Zq��nfhgr�P7<=Sex^8:8=E��T489^�jG4467ToI=??Q��gB??>@T�ˌ489<kyvJ8977-@k��Ŀ��xZ�������ƿ�����������������������������������Ļ��������̼����������y������������ͧ}�������_=WwyqqptSOMKJ@%Y�����l2/-()+.538;;/,'-044?V^[L-RO79L_aS7,Aj61EUV[_ZF&+B27m}}��w56G-0DXXZ_:7BRT^��Ļ��{YUCa|dhoj]SZfokf^R2)20++--.,-,,--*',C?@DQXVTVPgtTMNSVYZh~����mnnty���������������������������������xssuuh\jwz}}|}|~��������������ukis|�������������������������������������������������������������������������������������������������������������������������������xnnov{xtoh^�����������������������������f��������~vurnkjlmllh^S=R�������������~}yvtrrsssvvxyxn_D8{��������������������ġ��݋QUPW`7aU7V<%!),'(-@OSSOD�����������ĵ����������������̣@*������������������������i'0[*$+W'(,($!002Q:Gs���qf_CK������������������������̳E6iym/',/.*-2-+8A# #}w,%)Cfiq}e$"(,di@sxvsoprkmrsrG?�����������|\,{��bz����������y468,$##$&',<jvsnd?!$&&'%% '2^f{mGb��*/*8�{eCSH$"!""&6clTM4"%&,04<^����������������ǽŷA '+H^VQMad+#$"#.Wmok9-_sW##,G���Ҹ����Ԫf��xwtmlqwsa`dgUKMNasz|x����м��Ѿ����tr�������������������2h��òdlttqmm[::...,0,%,++()-X��q:.-/4B�������Ǣ����������zuqrv��|uhhea]������������|e62337J�rW?455:\�����������������Ȁ`^Zo��ujggq{P:=>e�wV=8:8=��R39:gw\I5348WpH??@H�z]G>?>>U�Ȃ3:;Du�d:9:8:O���˾���I@������������|�����������������������������������������������������|���������������������[<^}xrxylOPNMJ? `�ƿ��b.1,()-Ad\QQ4**'.47<=@LN3(VJ7I8:>91,4;//AQUX^T5"5A,2L]lyxH%>J.0DTVZZA4APPf������tZT@c~qmiffXW^^VRNH.+2,+,+-/,,,+*-+$-D?@SmhdidJU[HIHFIMJZn���`Ub[\lov������������������������������}ropppd[jwyzzyzy}��������������tdlt|�������������������������������������������������������������������������������������������������������������������������������]\VW[gt|ztof[�����������������������������d��������~xvqkjiklmli_O9R������������|{|xvutsqrsvxxvn\C5z��������������������ã�ގKUK9.#/*'4+ !*,$%)APRRPG�����������ļ����������������Σ=0������������������������h%6c,& $T*(GPHC?9-# 48,IL��������vKR��������Ϊ�����þ�������β@6h}l-'*/620/.,<A!  ~z)%((&/0&"$&-mmIsvwurj`^jsvnI=�����������{[-z��br����������|/7<1&'&%'(1p�����U!$%&&%%! (3{��n]k���-1*7��6X@!"##$):dfM9+)()-/5Bg���������Ԟ���șZr��="(,Po~����!"&"&'TpnlfeliU+".O���Ӽ����Өi��xrcVUbrw][ebVQSTgty|y��������������tv�������������������~2���ȲdnvtomlX=802:IF5%")))*,-P}�v6--/5C�������ȝft��������usstv���ohgb[_����~��������|^51335KkaT94568c��������������������vX\[p��sfgep~P;@S��{[<8:=G��Q08<Yno@6359]lI>>?Svt_C>?@@V��~598Dt~Y:889:n���ٷ��X6F������������Ci������Ŋ_C�������������������ɿ��������������������������������������������SC\b]DG_kRNLJJ>k�����Q02.)+6y���6.-)!049bjZ^=.'N[n�|yT0/*5@4/5CMPN>+!7?/*1DNL?/%?F,/BXY__G7FSLl������iXSFl�vpingVYXVVEG@0.5.--/6F2--/,,+&-KEFYiik{hCLOJIOTUXOPMp��`n}{{zbktyuu~{z����������������������znmqrmdWlvyzyyxx{��������������t`ov~�������������������������������������������������������������������������������������������������������������������������������lfgd^et{xuqf^����������������������þ�����d��������~xrnjiiiknmjaS=U������������|{ywvtqqqtwvwzmZH3x��������������������¤��܈LPF1,+'%#($  !'(%%+?ORPKG�����������ƻ����������������͜9,������������������������f"5S)&%K&-Pb`di[E*!48.MKfkkmuty}i?T����������������mY������ϩ3:fve+%0DPNMQLFF>!%;�~'#$)8TK<("%(2ifEotwsdY_botskK=|����������~U-}��]q����������k/7;.'&%'')/F[����T &%%$&$ ")A_=?__m���*5)A�jS&\A!!#,<mvnbT=/)+09@l��������ϒ7Ai��~HF��5"',L�����7 %!#*B�vkt{rnt_) 0O���������֟g��xp^H7Xsy\Ufekinjkw{zy���������ɾ���nz�������������������|=���ɩcpurnllV;649Rva:%$,*(*,-`��f9.-.1A������й�������ʾ��qrrqpwyvlifbS_�����������~|]33358;8201446=b�м���vup�����������nXUWi��hiiek~H<Ac{wn@7:=@I��I79BseC755:^gJ?@CTo{\9??@B_��{5:7FsgV>68:?x��ƺ���<5J������z|����BMe����¯VB<������������g�y?����������������ª�������������������������������PESadgh�pMLLMK<" k�����8///*.D����v/--)$21b����90)Zy����T--(8G5.2AII=.' =C.)-9?@2)">B-1HUX^Z4/DMGn������]WREz�wkaiaox{�dKNH313../6TY<B<?4+*#>WCFZeijvXINPNLdwspuaHj�wo�����\p{����~hi���vstvvv|������������zwuxxurr|~|zwwxvz��������������peou��������������������������������������������������������������������������������������������������������������������������������ykkt{yvqhf����������������������þ�����a��������{vqnjgijkklj^N;P�������������~{xwvssuvuvvvxk[I8������������������������ۆIUJ=E?,/;6+"  &'#(-AOQPKL�����������ʼ����������������Ð64������������������������j!3X'"&@!4V_\\[O=&"86/J- &,684\�����������ڿ�qeof������ϣ3;`rc4&=_spnnoe]J(+9~v""$)8MN;'#%(4ijHrxwukedflsskID�����������}Q2y��f�Ƴ��������e29:/)$%%'(*'U��x�M!%%$%(' (BfvZa_l��&>-6RhZ$"_C$ $(.dwy~fG/'+18Bo��������ɒ[S_{��`e��3$*+K��xSD2!$#,EgbaY[`bdR*" !".U���������ӟm��vqZGUfus[angihgbjy{z���������Ϋu���gy�������������������y:���˥`puqnljR974<axf;$#,*)+-3\��\(+,.4I�������ȗ������ɻ��srror}��rie`Rp������������}{\3357;EKFE9544;d�Ѹ��|zot�����������gZUTz�|rnlfnxE==<6699:::>H��E7?a�wnG867=beC>@Aj�{aA=?A?b��t699J^t[:679>{������48Q������������J7M����ŉ-4G����������NsX-m�����}8p�������Ů�������������������������������MAMdnv��OMOLNK:&g����h9./-*.J����j.,+('4<�����3/)%a�����K..)CN/,.-/2/)'>?,),,+0+)!?G-/8JVVE01=?Hw�����maXQF��yiZZy����lUUP123..0BfWiwsz>.*#!CMBI]gfgjTGKLH]�����aFq�rt�����Ux������el��\NQYbferx�����}}���}~}�������{yy||}��������������pimv��������������������������������������������������������������������������������������������������������������������������������fe\\Zfuzywqib����������������������¿�����h�������}xtqniihilnli]P7T�������������~}|zwsrrqqtquwwl[D2}�������������������ǿ���چERKSpE2_s]C% !'(#**=PTRJN������������İ��������������Ķ�93������������������������e 1S"#+M!:]ZVPIF:&!1)2L-$#$#((6<3[���������Ϭ�jcjrlx������ϥ6;esuE)>Yfelnic[R)$.}r%%$(:LO<*$%(0kcErvvskfegnrsnFH�����������~V5x��e�¯��������`39:-('$$&()E�����G%'&%%(('9r|[kUi��%I/7de!"eC"#&>tywbJ;)&*18Co��������й���y{v`�ϲ/#)2k�����u!###(7`ijghjniV) !#"0S���Х����җk�|vmWVakwukqkgccXSh{{~������ƽ�ͷ��ÿm{�������������������r:���ʡaqvrmllP9=15b|b4$#0*(),3_��F(,,.4G������Ѳp������ɹ��xtpoo���jgaZPx�����������~Y6358GYolb@5569c�ҳ��x�y����������ܽaWVTz�{rnnjqyJ?AA>;;=<=:@H��F8AmrqW3467=ejA=BX��x]B@@?Bf��o3<<O|yS:688A�����ήQ55[������������G.?�����S.5M������������BJK_�ÿ�ɋD2x�������ù���l��������bl���º��ǫ�������NEMVq}{mZUOLMI<)hi����I1,,-3I����O),+'%5?|���|50&'Z{����50/)GF**-..-+(&C;*),0@A0) JF*,7LLD6.3=ESr���yaxp[RJ��l][amx���}XVL542..26R|����?,,%?FBH_idkiXCHIGc����YNw��~����t^�������^\ymKNZjliffj����zy}����y}�������x}~}��������������}ojkv��������������������������������������������������������������������������������������������������������������������������������nh`\\m{|yvqgc����������������������¿�����_z����|xvolmigfdefgdaZO;S����������}yxvutuupmnoqqrrvjZH5��������������������ŝ���~NPLdj<[}jv`'!)(%%(>NQRLL�������������������������������72������������������������c2L"%1GBZIGHGF3!#0+3O.$/N5*3:JLB]��������ЗYagju���������Ѡ.7az�7$5HM[kkYSWD#(�u'%#)<HLA.$&'4mbGuvusabejotsnFG�����������}Q4���b�����������b388-''$#%)4v�����=$&%$$$%!!):n{{}ej��&N++e|S!%l? !)IiZN2*)$$+2:Dq������������Ă85O�Ƨ+#)4v�����X!#"%)8bkfedcbdQ,!! !4X���Ņ{���ґe�~wnXOfz{vnsggcI:[n{}�������������ʸh�������������������nF���ʜhuwrllpP=?5=g}]4#'5+(),5w��@*,-06P������Х�������ȸ��qsppr{�xqgaUT�����������|�{P3357Dc�vR7345<k�ײ���������������ܶ^TObsstnnnkuyVNNLICEC@CADS��D9=:48:5346>eeE@CXkjdG>BAA=n��i4;Hg}pX989:;������y685m����������R/5{����/23Y�����������}988}ʹ�ƼN05|�������ú��^_����z���e2T�������ĵ�������GAYv~vtvg^feZX@%n�˹��A0/+.4A����D--+'&4=x���V2.#)]q���n,1.(?G---/:92*'>9,*,Mxp@+PA*-.149.+2<KYc`Y_Z{�iXPR��fW_h�����rXXM764./13Xp����Q*+& 9D?D]elprYBGKOf����LS��������nc�������^QmePXy����wh����}w{�����}z|���w}��~��{��z~�����������~lenx�������������������������������������������������������������������������������������������������������������������������������Ê��{z~}zupe^����������������������¿�����\w���|xuolmjfcdb`]][XUL:O���{{{zvvtuqpplljjidggiccfdf`YF9}��������������������Þ��քORJ\iu�V:�_" !%%%%+=MOPLS�����������������������������ĕ47������������������������\7O$*,>>K87751.!#56+;,&LnPVMFOK3^��������̔ivs�����������̟&9f~n*'?_{qrtha^:!.�n'$!&9MUG+""%2m^Evwusl\W_lsrlEI������������S4~�`x����������c199,&&%%&):qcn��i"$$%$$$%!)M����et��#P&;lyY0)m<"'-F]]O.!!#(/7Aq�������������f"26r��$"'+2;$"!$(R�sdffdkiP&" $2[���̳����Вr�~uj^WcvzqefgeUARhp{|��������������ɹb��������������������h[�s�˓\svqlklM==29g�T5$+6+)))4v�r?+*,04P������ͦ�Ŀ����Ǻ��qpllp��zraaTX�������������uP33468VP\R9546:j�ӫ��������iCl����ۭ^`iv}{zqmmo|����}yrnghbZRg��@::9:988888>kgCACB@=>@A@@B?m��`7;NorhD7699@����܂,47H�������������^91j���\-33p�����������}734u���͔32<������������fX\���m���=3U�����������Pk���FEhzehhj`vsqqoA3�����}80/+.,2����^,.+&(3:u���V3/$-XO���vC1.'"NT/*+9dlH*'E<*-6~��6) !L;+**))-+,4<MX^bdhiwmRTPT��W^k������SSYH664-.1,Zs����J+,#9E@HcfhmeH?HLLd����vY]��������z_������~`LhcZb�����td����~|�������vs���{������tywqy�����������~mhrz���������������������������������������������������������������������������������������������������������������������������������sw|~~{wshd�����������������������������]�����}uqnljhec`^]YXXYN<O|��~~{xvuttqqonlihhefdgdcc``a[E6}�����������������������~KRJHkzX)Co5 !!#$$&,<NNNGM�����������������������������˔38������������������������b8V#'",-8 ?^UKOLJ>%$??&;.(CUQ\XY_Q7`��������Ѹ������ĸ������̣(<i�m2.Cd}mlwslV4# ,�p###&5FB1(""&3p^EqvuscSairtsmAI�����������P5��~ap����������h196)%'&$'(-8054&%$$%%$&%")C]\^ha}��$T$E��^4*n<  $+>}�rgF#!#).7>\�����������ӼT&10HkK%)-*'! ""!*Tg:5BDCFD1 #6c���ʗ����ʎr�|uo]LfuxqT_icSR_cq|~��������������Ǵe��������������������ns�i�ΘktuqnniM@>49mU0$+3*)*.4s�zF-,,.6T������β���~���ȹ vrojw�r{oaZPh������������sL54589T��^8435<r�ԥ��������\C^����ݰstw����tklk�����������������MFCDD@?@>=<AhdHDCDDBECCABEFx��\;=;?>>>6776?p����C.6C|�{1Q����������h2L�n[S841�ú����������65E9\���_*2A������������oS[���d��e35Y�����������<Fm�{KNe[__]_jxpsi><������930,,+X����\-/-&*57n���p6/#+Xn����@10&'`Q0-6o��I*)'D;+-D��~2($P;**-.+.,-5BSVY[ksrmbXWLK�y]i|�����|JXZN463...:������.-)"!>GACHT_bS?@GINh|���zT[��������n_�������VPgbTb�����qn����||������|ti����������qvsr�����������|jiry��������������������������������������������������������������������������������������������������������������������������������~k]mwz~zvrkh�����������������������������[�����ypniheec`]]ZZWVTK4M�����|xvvsrrpnkgihgfefdeb`a`[B5��������������������Ř���yQSH310(*0;8" "#"&,:POPJQ�����������˸����������������˖27������������������������^=U",&&.3DNC>?@5*!%<@-?-$3LH=TveQ?`������������������������̣+<g�u/-AVWMUTJHA5&!5�q&%$&+680($$'1r`HswurefoqvuxoAK~����������~J:��as����������i389+''%%$%(-FIC?:-&$$$%%#'B_aar]w��"I$"OyaL%/n3 %,OsbksN&"%+04;D�����������ַh;966;3!',A/$#"  ""# !.ObMMLA?;<=)%3b��ֻ�����ˊr�{sm^OfswrXpkebXH[r}}����������ҿ����e��������������������qwj�ˍeuvqnniO@?3?x�[3%01())-6w�|N,--.6X�������ū�{����ƹ��pqmgsvm�h^TXp���}|{y�������rN1268;a�p>4545>u�΢��������rUUy���۱}yy~��ohib�������������������{vsqlfaYX]qric_YSRQOLKFFFzõY>><=<<<::99<kwG|�E04H��RD�������t`��{35TGSh@6^��sq��������}JVAY����3/1G������������mT\��xg��C47c��yzpln���v?/Z�yCG\gjllond[OWa?K������72/,,2v����K.0-&*6I����b6/#2a����p12/'+]L+09���E+*,M;,-B��u5(+M2**/19<0+6FXY]jwssm`YVHH�|iqy�����_PY[K371//2_�����\..(  <E?AGSWNF?AILMj���ySd���x~��yFl�������XVg^Qh�����jq����}w������zpV����������svtq|�����������{jjpx�������������������������������������������������������������������������������������������������������������������������������āwlmssx}}zskg�����������������������¿����Xy��ytrmifdcb^ZYXWVSPOI5N�����|ywssopllmngheeccfc`a^_]Z?7��������������������Ô���tQUG4*)+*;pY$$$"#+?RQRIS�������������ʽ��������������ʘ49������������������������ZEU  2'&'.<OWQJI=4&$?<-=,$'HPK]aZQ6d��������Τ��������������Ϟ'>n�s/,?al_ZVJEA7$ 4i)$#%1FF=.%%'3q[Dsxvrigkjkrul?J�����������{K:��~Y{����������f598-'&%&$'2b����~?%%%%%%" (Fvo��Zv��#? 4apZ'1n2$*MmwrwG "(-15:B�����������ː[RF<9:0#-Jp`RIA<40M=!""*X�nejeifoc,#8f���������ʅu�{to`Ncxxoj}kc[:Aft|���������λ��íg��������������������jomn�ʊdvvqmlmKB@5@w�X-#/2+*+.8t��W0,-/7\�������ϼ�~����Ź��npjir��b[Wcm���strn������rM2366;brbM8348?}�П���z|���hZy���ۮxzy|���qgd]����������������Ŷ���������������������~yqoi�įaMKJIFBC><?;<lwJ��E/1C�n1l�������uP��d=EQ3\b<J��Z$u��������y}_P���?*12h������������sOKt�p��p248p����������r>2J�xGJVmmkpndhhgsv5X�����|5510/4l�x��R0/.',6W����N0/!?f��~�t71.&'_T/1>���E)&)W=+.A��t1(*M6+,0Lmh5.9JY]]ahkng^YTGM��srtw~}vk]Y]]I583/17Sx���y:*,)"<B?CKKOC>?@INQh{���[Lh��~LVbbTNo������wPSl^Wm�����jr����{������xp]��}�������wwuu������������|niqz��������������������������������������������������������������������������������������������������������������������������������fXVc`fx��|ulh�����������������������������dx���}tqpkffc^ZZYWTONME9O��{zyvrpoojliihgddccbb^__][[ZU=7}�����������������������uSSG9FKB?U�R""#"%+ARTQIW������������ʳ���������������˚05������������������������\AM 0$(+7/Tg_fk\B#%6+/;*$=QLZ[]V4j��������ˀY^X\_gom������̝%?ls((3KfccmnZJ8"!9�h%##(3DJD2'%(8s[Gsxvpa]`bntuf;I�����������yJ=��\����������e698-&%&&%'7x����p,$$%%%&%'.1Y�{Az��"?M|�h-3m6 %(<dwf\, %+059A���������ȡ[?Z`N@98."/m���������G"! #(UiYX]___pd"#5g���˩����Ɂy�xunXNdwxnkzkaQJ`nu}~}������������þ�]��������������������m_Zt�ʇmxvqmmjHA@3Fx�N.#./***,8v��P3--18\������̶|a�����Ĺ��npllv��scVWjl���hnjg�yw|uvxqJ2669?r�bQ:546@��О��}Ym����pk{���٪t{xx���pf[T����������������ɿ��������������������������������}yunhea^UPrtK��?026>63q�������mJ�[TVI*JL0S��6[�������Vg��FE���P#.2F���g����������XL]�w��=15?|����������u>4CxqEJfkf`kuguvy~V/b�����uJJA<3-_����I/1/& -7V~{��Y1.#Ed~���x3/0&+gL+0C���B+&3W;+-A��h0(1Q3+0>��t2.;NWZ[\^ab`[ZSDT��rrv|�ypf^_``F472-.03FPKGF3,+)"@C<Pib_SPMCJLIJRag^CIj��xO[\UJUo}�����mUWk[Xl�����nx����|{������xq{��y������zxuv�����������xjfnz��������������������������������������������������������������������������������������������������������������������������������ed^Xar���|vli����������������������ÿ�����d�������xsnjgbac`^\[XJ;R~�����{urommjifeeecbcb^XYZZZ]V@3���������������������ĕ���uOUKW{�yvuuH"!"##(CVTRJY�����������ɵ����������������͗/;������������������������[@G  -#*,25KQJMC5*(2(-6'#,NUMMZE4l��������ʄy�{xt{um������˜$An�n-+4Th_mxxgS2 7�f"$#&2BHB.$$*9p\Ivyvofkqprvvj?M�����������}N;���`����������d399,(&#%%(,:g��}2$$%%%%'%%,S�}FN���"<(1O=;p,"%)9K5*""#&,/5;B������Ԭ�]LWh�}YA6;2 #0��������ƨ;#"!#*Lb_cc_^YrR$6l���Ĩ�����~y�{ul]Te{znpvi_ST_`r}~����������˶����b��������������������bB]�˂lwtponjGE@0G{D+$+,**+,8|�~H.+.16^������˱�������ù��rpkkz�uxdS[ll��|kjfk|ky|es�uI6568HeCMW9347D��՝��j8x����vny���بv{y���ykaRV����������������ȼ����������������������������̷�����������}tV��>313234f�������lT\^VOH8$68/@k@)�������Wv�};%P}G'-7���*T����������|LE}i}U*24U��{|�������yB4>jaCLbXfu�}flvxdG1j�e�ʹg���};+e����SD57-/6K����Q4/#La|���~874(/cB,-F���8*&1\>+0M��j2'7N3+4J��m./?UYZ]^__^__YSE]��r{yph]Zab`^^C572//,39A@<5.-+(!(>CN~�����}KLKFN\e_QIQj��t?KOKHUpw|���ylZ^n^Wp�����`z����|{������wt���{�������zxvw�����������ydgpz�������������������������������������������������������������������������������������������������������������������������������Ē��������}vki�����������������������������b��������{xuqlhikijkh]M9V��������~zyvrpoookhggfcabaa^`VC6��������������������������tKSHBDEJMS^?# """#*BRTQGT�����������ǿ����������������ϕ-B������������������������XCJ"+#"-0+:423/-+!'6*/9(#*HMS9,726p��������ɥ�¹�����������̖#?o�c*,EerillgS?,":�b!#!"-:<8(##)6r[Frvsqklopqvui?N�����������{K;���Xv����������d6;8,('$%&()G���|X,&%$$#%&(G��ks[���9&FC6-9i. #%'$ !!$(+.6<J�������F&y���ĤzSP]K .aS7?w�wh{~5"##%(4]adfcY\V1#5s���ѩ�����{w�ytlVFgv{ohqhYIFHTs|}����������Ͳ��Ʃ_��������������������\5_���{pytponfEG=1Hy{G*$(,*+*.>r�p<**,18e�������˻������û��qmjh����dRaoj��rcmmly��|��pD5568FQMkU7468D��֛��l?�����ohu���ؤtywz��xgXPY����������������Ʋ����������������������������ξ�������������qS��7111225M�������bMJNKC9."/1.1516��������HSnX8Y�@&.5��Z;�������}a���;@P63>24B}�S=b|��u{~yZ><SKAIHY��uTfgdf`b1h{��ɘ�����;7x����lyxN15R����\L@/Mh����sN_VI"4T=-0H��w:-&@d4+1J��Z+&9N.,2N��f-3AU\[]\^acb`ZULj��t~snkffoc[YW@661/0,--.///0-,&&@Gg�������GLMGT\XUJIRm��qBCEGFXnuy�|uoV`o^Wiz���pWu���z|�����|xz���|������{xzzx}�����������vbioz�������������������������������������������������������������������������������������������������������������������������������Û��������|vlk�����������������������������`��������~ytqmjimmnmm\L=Z�����������{yyytpopqnmpqpof[A8��������������������������qLQD0('%&6M4"   %.DUURIT��������������Ƹ�������������Β)B������������������������WCH /'.0?ZYPLIMD(&2)4C*(E]`aSD=65r������������������������̑?o}c',E]dXPG?87. !8�b!!#.ED;)""(9q\Lswtoddecmutk@E�����������yH<���a�����������d8<7(((''$)5}�WU�y6&&%%%$$'6OGZ]C��� 1!k��|g+8q, !%+5/("" $&*.7<Q��׸����������Ū��� (9*+P��Z'?{3$!#,Mffdgg\[VC"#9t��׵~�����pz�wtkVVivwmivh]X]_fx~|����������β����h��������������������U8]���umvsooqi@D92IszA,$*+(()-?y�o*+++/7h���������Ʒ����½Ímkju���n_Pfoe��xgnji���~r|}q@4578@]{kK657:H��Ӗ��n>����s`cy���ՙkuv���qbWR]����������������ƴ����������������������������о�������������uX��30102158j�����m`\QJGA:/"),,+,01s�������/3CB/b�>*12][+J��������h��e<36 /N46V�v1[�����}ilzuI49<?K`��g\a`fz��h-o����������57Ke�����T09`����q��n"PdXx¯g����&7Y?.1E��v7.(8O-+2S{wG)%=J1-3M��e.4CU^^]\[dige^VMx��rssxus{tdYRM838.-...//7203.-%'DEf������uFKLUs�wqpZr��fDECGK[muvvvttm]hp`V]ekkf]Tm����zy~���}xwu���x������yx{yy�����������vbjpz���������������������������������������������������������������������������������������������������������������������������������������|vkh����������������������������a��������|wvtqnmnlmmk\K;^�������������~{xvusqoprruvwwi_C9��������������������������lJLB28<62:C, !$" #+DWTRIX�����������ɽ����������������͊*A������������������������V@>1&.1 DSMILO[O&(7(3A)':CN[]YK@3s��������ǰ��������������ˎ@m~]$+D`d\ZYVG</!!5�c!"%;SU?'"$(<qYKsxundcebluvm:K�����������{F>��~Z�����������b8=7*''&%&+@�n:�y/%'%%%%#"#$&/>E���!1I`ixS#8r-!!%)E\T?/&""'+069U������к����������р &*+/Ib];/q�6& ",Kbinoqqhh>#9y��ӱ����׿p��vtkYVg|ylirkgkpsu|�����������ɪ��Ǜe��������������������J8e���sovtoopk:C91?xqB.&-+)**-@��k**,-17m��������������˿���jki}�phdVTkqj��ynlju���~dx�p>3468HymWL8568J��Β��h>���{bYh����Вgnqxvrn^XM[����������������Ƚ����������������������������ͺ�������������vX�}61103104Bz���S[hYKIHFC3%,(++.6S������C.9EI3Xo44;434-L��������<\c@0..=`31Y�C:y������d[�C.49@Rovkkmjn��|tR,p{��������v00K�Ǟ�����B39<a����� T]Z�Ò����~>\;.2W��Q/,(8M0*/BbpN,$?J3.2M��U,5DW^__^`lpolaTN{��sw}�}v�rniOM617/-0579AP;:=3,$*BCa������JGNJv������cv��iXe_XT`ptutstqjafi[UW\ad_YXo����{vy}}zyxvi��tz����zvv{tx�����������pcjoz�������������������������������������������������������������������������������������������������������������������������������Ü��������}vll�����������������������������`��������{yvsnmnoomnh]L=^�������������|yyvurprttvxzwhYE>��������������������������nILGPvxoo_T;!!%$#$,GXTQJ`�����������ư����������������ʉ+F�����������������������RAA.$00=TYRY\[D!'4&8E)$ RSLFZ]O5u��������Χqp|��vgp������͎Dm}`'-GehjvzpYE/!#9�a%"")@RK3'%&(:pULtxtnfhj_ktsl9M�����������x>B���^|����������h9;8,(''&'+:`8'(OH&%%%#$$""$%+5>H���); DWJ.4!9d' " "%/Y}zhX8$#',26<^�������������������u").7' 3m��&!!+CcllicSDE7&=|��Ы����־k��swo]Rfvzkdinkilkqz�����������Ǵ��ƚo��������������������I>g���hlwvroqk;?9/KwmA+&2/)+,-A}�j-*,-39s��������������ʿ��~iljsqmf`I\opj��qglm���zkz�l>1679UmZhV8478K��͓��fI���rSWq����Њ_jjqy}o]UP^����������������ǿ����������������������������̻�������������{`�{341111456X��bM[`J@JJIH5,3',,-4;v����Z-3?KH3Qe0A<.,,-=�������O'+;7-.+XX1/@A1Ar������cO�_6129BGHiyurhlm]QZZ*c}�ǯ�����A,7��������7"46H�������b_u��������uDS7-3I`nO.+% >K.+0V��S-%IR/.5\��V/7HW_ba`am�ucUO~��r{���y~�xuiO358206GTTTM6EP6+$*CGh�����sILRU�������[z��u�����nruuussrgTbkWUSVZYUV[q����|z|~~}�xls��xw|}~{wvu{sx������������qais~�������������������������������������������������������������������������������������������������������������������������������Û��������|vkh�����������������������������]���������}xuomkjonng\G<a������������}{wwwuqqqrtwyxthYB:��������������������Ⱦ����mHPJShm��joL#!"#&-DQSQG`�����������Ƿ����������������Ɉ&J������������������������P@@ 2#06#BYXXaZO?$'9'5=(%FqqI5D_F5x��������ؾ�xfjeal�������ʇAfd-,>P\enfcdU6!#9�[$%&*ARF2&#$':rTOuvsrgbbcpuvl7L�����������p>C���_y����������j3<;,(&%%')3TbC+*)&$$&#$$""!$+4>L���)?!f~X8(=b'  %,Kw{zv<"$'-38:_�������������������p$.EU:43(8RRA$!!-YmeU>85FN<(>�ҿ�����պj��ttjQRiv{hZemfNHA^x������������ȶ��ǖp��������������������CFh�Ľdhuurqqi6?83Pup<*':4()+/J|vN,*+-3:w������������������|nkionjd[Riqpn��pffl����hkk�t@3878d}ss?7569K��˓��WD���nVZu����χ]eaq�yeRPR_����������������ļ��������������¾������������˾�������������zi�z.2023025KaopT[bXHJQOKH072*,.014F���]/26CDA2Ce8.-*()-.b�����{)*5>957&GI0.1.2?m������KEWD41/8AHt�ph_SUer�c(q�Ю�����q+03g�������y$ 6<i�������wlco�������\EV3.0Du�c/,$R\/-2g��L-$OO,02R��M*8LY``__bv��zfXO��~o~����~�{r[57920:_aGLMO\M.)$1LDX|�����ePSV������U���������ntuuvttthVklTTL]ifeldj���~yx������zh~��tusuwutttwtx������������ndhs�������������������������������������������������������������������������������������������������������������������������������Ĝ��������{xol����������������������ÿ�����a���������yvomnrpomh]L;`�����������}{ywuusonqrtuvxti\C<��������������������Ǻ����mMMA-/Y�cZuC!$$!%-AMPNF^�����������Ǹ����������������ʇ&J������������������������ODB 0!<:*JQBOTHMH'(<+8@)"8QS:&/<83~������������������������ʆAh�\+-?[gkiZinP4=�a!"$+?QJ5&#%(>rQQxvutjjjlpsvk9M����������q<C���^|����������j2>8,(&&'&(<w�tV6&'%"##$$###%,4?R���-? _|kL6"G_*%,R~{{f*"$(,18<f�������������������m%7t���~wkchx*!! !.Qtg`ZWUV[G'?��ѻ�{���׹j��wsnVVmwxgVhlUEPZs���������Ŀ�����ʖr��������������������>Eh�Ƹfmvvtrrb8?83N�j6*/:/()+-5Td7/+*,3<���������������˽��{ifimjgdZSkpnl��kdcphhkio��n:367Bq^ZC2677:Q��̒zqKP���sll�����̓]e_s{jdTST`S8=GKRZbcjsvr�ν��������������¾�������������˿�������������{n�t,334624Gw�{yijfTJLKSQG'7-+,.1355`�`0229CDH1Lu6'*(((,0<�����5,2@E>=4'AD.-/.04\�����l5RRD3//8D^�[L\dbc{��nJ$$~��M}�����V45�ƪ�����= #:7T�������Ro`�ĥ�����!GN6.2a��a--&/o^..2j��D*#H>,.7a�k;.6HZ^`a`cy��}kXN���t�����|��p\<7724A\G8JSfc8-*% 8OES������VMSU~�����aK��������}gvvwwttsiTmjWS`w����f`���~v~������yl���osqrroortwqv������������jaks����������������������������������������������������������������������������������������������������������������������������������������}yoj�����������������������������a���������smlllopnmh\N;_����������xvuussrnllnpsuuvwrhZC8��������������������Ƹ����hNMA6i��p�l*%%!%.@NNMHd�����������ƻ����������������Ȇ&Q������������������������KGE)<6(A?.>5/DB%&>/A@*" MQ@/2565���������Ǯ��������������ȃApY$-Gm{mcks^B0 A�^#$*DTS='#&(;oUTuvtoegagnsrd?M�����������l;H���^�����������g8@6+)&'&')4_��{bB)$#$"#%###%*4?Q���*:$a��y^*E]% $2aw^B,# "%+09>c�������������������i&<����ÿ��Ý"! ".apcc`]]a\6&=���ϯ����׷e��xsfPRsxygZkncity~�����������������ɑp��������������������?Kg�ɷdowvsrpa:A94PjD-)'/,(),1Xol\G---2?���������������ʼ��h^ccjfbdUWpokm��jffnvnppop|~h7445FU6346767:R��̍npp������������]``vxzgNRQa�M3678874328<:�ɚ�����������������������������̼�������������ww�m)27AA56K����xfIJIO^SB+..,-/3ELDL=4759:GKG1\|/(*)**-/.O���<.3<?A?85.:;0////1Hu���}:5QW<./29D_kKCL_o{{k^VM"*���<~�����h1L��q�����a*%<8u�������#`��}����n2EQ2/3h��Z.,+HpH)-4o��J,  C>)-4Ekg=15IZ]acbf{��{hSM��}t��|}��ulk_655338IPGE_\ZK8*%#=TOz������[OR\�����r[��������]iz��zxwsiWngXUo�����`c���{w�������x{���tmllljlorxs{�����������|jcmt�������������������������������������������������������������������������������������������������������������������������������Ý��������}xpj����������������������������~c���������|mgiknoqomh_M;\���������{srnnnkhjgghksvvxwriY@8��������������������ǹ���iQOHb~N<PM/ !%#!#/DPPOFb�����������������������������Ȇ$Z������������������������KJL4.%A[F<GIJC)%F;98&#?muH.1788��������ٽmemnv}{x������ǄAk�^$)<TZ?@UK;@<#>�X$%*DUP=(!#(BqTRuwtnfa`gnvrd:I�����������n;L���X�����������g5@5)&%'&%(+H����o2##%$%&#!%%%*5?O���'3Rx}`<B`&$/QhXVN1#"$+17;a�������������������f';��y������s!!1[YY]YVWWU9'?���ع����ֳb��ztbKTmwxg[jomrxtm{������������ɸ����w��������������������;Kb�̰^o{uqtt]<J:2?P<0+'/,)(-=z��~I,-.1>���������������Ȼ��TONPTQRPHZh`dj��^akvz|ywuru{q;225??5666656;[��ʌ{���������������zXWb��|[PQLi�Q688;><=:99=@��[6>CHIT\aehgf~qlu{���������}��ɭ�������������{|�j%5Mvw>3H������dKHIVXLG020-.3Niolf>MY`S:g]P1k�2(**++,-.5cp6899@>EA8<556/../.04Qv�xA16DE1+/4<EZcZPVnjP]babU!5���|�g��ŧT+S�XA������+(=>��Z�����O)f}�NW�����]?I--3m��X++%*M?).9u��A*"NB+-1Z�|>/9LV[bcbf|�r_TQ��zt����{��xblm64732F^T=6YqaPA0#!;MNt������VPUOa������\��������wv�������|VmeVRn�����]h���yu}������x����sgdgegmrswu{�����������yjdlt�������������������������������������������������������������������������������������������������������������������������������ě��������}xnl����������������������������c���������|ojekpqomoi^J8]���������|tqpnllkjgffjtwvwxshX?9��������������������ƹz���`ONHH;.*,'#  ""!",DQOOJf������������®���������������ƀ"[������������������������JHD;.)JUIVYIRD'+K35:) .VW2+0578��������ٺmurrrqohv�������y?h�b &.84-14+0B9 B�Z!""'@VS>%!"%<sLTswuqddkmrvqd7O�����������q6K���]�����������e6A6'%(('$'*H����p$%$%%%%" ###(4@T���,2 b�t.Ga$ %,c���h1!"%+29<j����ֶ�������������b'4M9 #[G! ""(C[bbVY^ce5(E���ѡ����ձc��wqaPSqxwbUnxup\G\�������������¢����x��������������������5Fd�ͨctyvsrpX=K74Qd[7*)1,))-Uv��g,)+.5B�������Ǳ������ȹ��426455586>FII^��TYknorrrusw�g6/014212421457_��ǉ����������������rSViwp^RTQFg�L688>OJM?86=G��S05251641436WgE=>>ACDJJIWSY^�к}}����������t{sx�h,>���U=S|t����`DOZZKO* 90,0Blzm�{v~��S]�bA$}w.(+,-/0-.-//>cYUC@F>=E012..1./14@^cM;/6>6/,/4?F[g`_glZVginqS>�����v����[,I�sk�z�ór%*;DsgA�����X'jt~fmu`���R@F1-7q��U++""K?,->u��=)'YE+.7x��;-6GOU\_^boslg\PQ��y|��}vu~�~_q]44742@TK4>L`cQ?1#=HFo�����d>PUP�����qT���o����{y��������OgbWWv�����Zn���ys�������z����ndabbgmruusz�����������{kdhu�������������������������������������������������������������������������������������������������������������������������������Ɯ��������~xmk����������������������ÿ����~^���������~rlhkqspoph[J8a���������zrspnlkjiifgmruvwvrgX@8��������������������Ź���dPNFNqqqoZP7!"!"*EQOPEd�����������ɴ�����������������z"[������������������������HXNE7#DX\O6FW=#,<!58)"4kQ9./58:��������׽����������������yAi}c$&,43/23.5C2A�W"")AUR:)##%>tOWsxxuuttsvtsh8R�����������r9L��~d�����������`7B5(%'&'&(*Z���s2#%$#$%%! !#$(4@V���-3"[qZ-Q_&  #/gxUH=$"%(.4::m����ң����Id������['3O[E<4/$/A" "#'FfZA-Ub^U4'H���ɣ����ի`��vrgXXpvxcUpxmN7Gp�������������­��ƒ~��������������������7Gg�Ϥ`uyvtrqVCK43[}`9',2+(++H|��W5+),5E������Шn������Ⱥ��,/00/+--00/41X��8CJKQQSUY_eg?,++-/..-00/235e��ǆ����v|t|~�������eHRdh`TQPHDn�I89;TlmrK98<I��L4778967567:]iI@??B==@@?@=<e�̥=89>?DJPUV^UVzpy�f,>���ZZejz�����[PX[WJK* 8.,1arIX������W��O.uc()+016@40119m�{sC?@ECD'/1-23215<C9KqW/7<3-.06?G\ihkhipxxrxyQC����������T0S���d����t$0>@uyr}|�Ĥ2-jy��a����NLJ2/=w��I.+"*S>,-@��z=(+_@).9�z8.4BLRW^^eloug[L]��vv��xpn��~`fQ15735IWC8ElfYUL4"!;IR����yqhPTU`������w]��������~��������|UkdU[|�����Zm���wv�������x����lcdddgnrvtr|�����������}idkw��������������������������������������������������������������������������������������������������������������������������������ĝ��������ynl����������������������¾����~Z���������{rjilpqqrpj\L=[���������wrspomkjifgglstuwwrgX@>��������������������ƹ|���eJOJp�q��ik@  &%!#,FQOQFg�����������������������������ǀ#_�����������������������E+oID2#I[H3K[N9"+848)$E_UL7/476�������������԰�����������s@i~d()1FLDFD<DI5D�P"#(=MG7)#$'@sNSrvwxvxutwuth6O�����������u<N��y\����������a4=5)&%$&&(A��|E #%$%$%%%!$'*3>V���,31_a9GW(!#+[]..2%#$*-39>p��������Ұ�����u�ԿR%C}�����}v�{ !!#1ceSGash^\B(I���Ͱ����֧f��wrfR[mwwaYrnYK\q}�������������ĺ��Æ{�������������������x3Gb�ҢbtywtsrWAE23YnX9&+>)'))2m��_7)+.4C������ϫ�������ǹ��,<=2303244323[��-75250144476.()+--,+*,--014b��ă���slqh}t����ڽbDV^_[WOGFLq�D8:@aw�qN<:9J��F578:J?C856;^kG?@@ADDBDA>>@h�Ԟ698967845462X}j{�_'U���y���������dz{i_[M,%3,-0u~9h���������B,"xh++0H`VZJCC?P���|CCNL@;!0518C@88J@:v�a08;2/./3<EH\s~|�qztrzhNL���͹���ǐ>0T��������%1;A���n����F1jy��������^WK41;y��F,,#,M7*,=��w6(.[;*-<��x5.4BMSW`bpw��|eNT��wy�~tml�}XZS65636KRECalcccU3#AKEb������TQVUu�����`c��������m��������]XncVa�����_t���wv�������yz��~jcdgefnrutt}�����������zeepw��������������������������������������������������������������������������������������������������������������������������������ğ���������zol����������������������¾����~^���������zphglnpqpok\K9_���������vrtoonlkiffgmuuvwwthX?=��������������������ƺw���gJMGh[DuU@f; *&!%/BOOOHi�������������ž���������������}#`������������������������C+`AI5+UVIUUGU?#.;9:(!7LTR6288:��������ٵ��̳���ծ�������mAm�d#,AikbiohbU6#F�R""$*?QK2%#$)FqKVvvwxwwuwwxsc3R�����������v?O��z[����������^9>6*'&%&&*OxO $$%%#$%'&"%$&+3?T���+0(r�oE)JY(!%.Xrro`4#%+/39;y��������׾���������M&J��������Ɖ" 1PT]ifJ3YtI)F���������Ԥf��vvhQXnywb\tvzuy}�������������̿�������������������������o-Gg�ѝauywsqpV@F47Ype2)<K(&),1g�a5+./3J������ϴ������Ƿ��=bVJLEEKJFGC;a��1VHCD=>AAC<7.(+850200.24101d���t��rKEGHYNfy���ںjSZU[a\OIKQs�B8:@Z�ziC9;;M��F68>XnsmC36<fkE=>BEW`_TC=>Bi�̈́699:;><8765:\�^��X']����������������x}{k.".-/8�������������<+/~f(*?}��xvrp_|�z�|ZNL<@99<5egWNM\NO{�mMB?50/03<Fc�~wqe_[b[_FX��̸�v���@3>������ǧY-8C��������D/id��������9%UG20<���H..'/Q=*.D��w5'3c<,/=��u319HQUZahx���xfNT��y~�wec��qcf[44416GLLQRJaYV?."DJL�������SRWd�����~[f��������n��������\]scW_z����{Vw��wu�������ra��leddeioquvv}�����������zdgov��������������������������������������������������������������������������������������������������������������������������������á��������yom����������������������½����}W���������zojjlmoppni\L;_���������wrsonlkjiheejtstwxseW==��������������������Ƶu���dGLJaI@dE@Y3(%#%0CLNMHj������������Į����������������xc�����������������������}E-\> $K0)N\`P4FM2.764)!4]H;+057:��������׶������͡��������i>o�_'1LusblvmfP.%D�R"$,CTK/$#%%DrPZ{y{{vuqwxvqa8U�����������w;O��{_�����������];>6+('&&&(.,"$#$$$#%%%&&#$'#$*4>S���), a{{SSY$$1n�xwo0"&+.3;;{�����ӹ����������ԹE %F�qkt�����k!!#(@SUN1(CbcE *I���Ү����Ӡd�srbMUtxwaewz~~~��������������Ŷ�ƾ�~�������������������l/Hd�ӘcvyvsroQBC68b�c2+T=)&)*2h��_2+,/6M����������z����ƴ�Mxdlegkg`g]QBa��YxhidbhdcdXF-,@ZQLLLJQGLG;3g�ӴIe^H?;8<>?]q���غi\VWbd`UNKM|�@:9Ac�v\678<K��C68By��f<45>gcB:=BZz��]?>@Am��n4989INLG9419^�R��a*`�������o�����������{&%5,-6���thTOe�����=+/y^**T������}v��y��t]RD;,G?`�}ttrd���ykYO8.24=Klwk^SO_WSUPS8`���uir����F0^�ȷ�����P /34r�����Ɔ3q��ƥh���z*'NB02C~��A-,#1^A+1D��s4(:l:,4K��m*0;NTW^`boxgqnXE\��{����b^vyvrmV/54.3AG>>K^YD9/+"CLY�������SUX]������le��������}�������t`vaXZlz���{Vz��~wv������|m_��~mabcgkpsvuu~�����������ybgow��������������������������������������������������������������������������������������������������������������������������������ơ��������|wpo����������������������������yU���Ĳ����zpjilnlonnj\L;\���������uusqnlihgffgksvuwvqf[?7��������������������Ƿw���bHLFG6=K,3:#!$#$&.AKMPGk������������ç����������������za�����������������������~C.a<(*I+&H[\OIRD..752'*X`F=2158>����������Ż���ҷ���������h=m�\&1NjW=GJED@+H�N "#+BRF/%#%%GpQ]w{��yzywwsX3X�����������}=P��zW�����������_<A6)'&''&'&$%$#%&&$$#%'&$ '%#%-7@[���-*#q�y3]]# !&8gogxh*!%(,3=?�����ԋR����������ӷB%5A7@Fuk&"oe"#$)CXTTNMS]nK!,N���Â����ћi�~uu_O]uvw^fv|o`^p�����������������Ŷ���������������������j/Hj�ԙjy}xtstSAB42h�X-'22)((+6f��T)*+16O�������Ӵ������Ƴ�zSeafhrideibWAg�|[�yuv~yruviK--\ngjnoqmpj^F3i�ҟ)527432466O]���ڹeTQY^`ZOHGL�=99>r�sB88:<N��=28>l�kU=56@daD<;?X~��W@?A?u��f387JnnmT:546fzR��b*]������|Uz����������o$/7+-2��z\=>d�]K���;)4�_(*]������uu�||���xkU.)D>~�������h�����}pbH<2>Jgic`e}aQPNLJ9X�����}����C5z��oX���i16Q�ī~���t.>���[^��z}C(SE.3E��@/* ?c7,1F��q4(J]8/0J��f/2?QY[_`bu{tnbKCa��|���~kYp}|qkG082-29B@Gat_A++)!"BHW�]ela�WHWUV������ui���������t�������igybWSn����xZ���wz������yjO��ymfeegkpsvvv�����������ydgnx��������������������������������������������������������������������������������������������������������������������������������ǟ�������}zvmm����������������������¾����xX���Ʋ����{pkhkmlnnnh[J:_���������yvsqnmiihhfglsvvwvqh\;<��������������������ƴr���`PKDPqoha\V5  #&2CNOPFn�����������ʴ�����������������v"j������������������������C<oA%,I)"<Zb_tkR0,646'(O^ga9368?����������������µ��������f@p{V(0HaH341,3<+K�J ##)=JD+! #&FpJ\w�������|w^7X�����������y>R��wU�����������d?D7(((&'&$$%$%&$%%$%%&(%" $$&-:@Z���-%,t�k)`Q %/_qs|N#!$),3<?�����Ѕ���}�������ӹ>&-b����xuv�i!$ $-O_X]^\[cpI/Q���������Иk�}urTL[luv^hvpLKQe����������ü���ʲ�����������������������d-Nl�Ӎdx{vqtuQCC58q�X0%-0))*,2S��A(),03U��������y������Ų�y8@CBKJIMUPKH3q�xE^[[ca\cb\[?*.W_hbppegmfgF:r�ט'96330431/,:���گA:ABEGEC?AJ}~77:Al�qI:7;<S��149F|siL965AbbD><=Nu�xA>>@@w��_875TypqK9653fpP}�]*V��vt|RR8Lpzyc������O).+.-A{��pt��K1m�l6(9�]$+`�y����}|�w|�����s,(SO��������l�������|qdLFJfd`s��PQQMKI9\���ʷ�����B7t����u���g3<b��tc{��x)C}�����j���8-YE-3C��~<.*<T3.2N��f2&EQ4//O��b14CU[\`^[{��rOIGi��{~~�ti`g||bZC592-4IO@=WXW<.,)%@F=[`WVV\EKXTu������ok��������x��������jkt^VZr����k`���|wx������vch��wleedglpsvvu������������wigo{��������������������������������������������������������������������������������������������������������������������������������Ɵ�������|xll����������������������������zV���Ⱥ����{rlikmonmmh[K=a���������xwuspmljggdhnvwwwuriZ;A��������������������ƴy���aQNKy�t�{uo>!#'0COQMIu������������������������������s!g������������������������@Qr<&'J/&CTOFE>2#-:<9% 4W^B.159;����������������չ��������h>l}S(0I]SIJC498' J�I!$)8>5&"""(GtL[wx��������{^4Z�����������yAQ��v[�����������]5A3)('&'&&&()'&&'$$%$%%#!!##'/:D^���/( 2H1`P"%*Bgnb:+%&*/5;D�����Ο�����������ն9%R��Ͷ����:!$ #'6HB?A@ITjA 1W���Ƞ����Ӕi�~uqULQfst]huvnw||��������������ơ���}��������������������`0Om�Њl{{wtwvKGD4>tV2%,.)((+6b��>'++/4W������Т�������Ķ�q+.0-.0054793.x�r%24A@7@CFA=1((7=@EEFKOZNR/3y�Ս+USLLKKHIEAD���٥7562057777Dzx<8:@p�yI97:>U��176D}o]>545Ahc@>?Dg��Y=?>AFy��W687QVdvJ7763_jS��S/\�}GocTuNgyYbZ]y����O%2+.0a����|�v<35^G/&D�W&.`rw�����y�xz�����j.(0oO��������\����������jHJJU|�zTJRQONK;Unp�ҙ�����;9u��������_4=c����}���)Dv���������42YB)5N��y6,&;O446N��R1&BS1.2T��\05ETY\^^`r��bTVOu��r|~qebf|veUZF8:1.7LU<6@ND3-+(%@JJr����~mRY[w������\q���tihsmk��������e_q_W_y����nh���}wu������{u���vlgffglrvwut������������uefq|��������������������������������������������������������������������������������������������������������������������������������š�������~}xop����������������������������s^���;����}qjhlopqpmg\J=]���������xvtrpmlhhfggnruvuuqfV;A��������������������ȶv���[OOFp]:p\@Q8 !"!"&0EOQOBq�����������Ʊ�����������������pk������������������������BIY1(&T3!0/3441+%.;>4$09(*047B��������Է������¸������пb?p�R(0A_ihd^WL9%"M�E ""&.85+#"$(KsK^yyzq������qa/`�����������xAT��y^�����������W6?5('&&&''),-(*)(&$$%#%%!#%"&/CF`���~6)6[G;,]P$.SomhY2%&*.5=>�����ڼ�����������Ѯ7#V�aS2+t��C"!$-Te[SYUR\t?/X���Ŧ����Џn�}urWP[lyt]jy~}��z����������ƶ��̰�ø{��������������������X.Ps�·lz{ux{uFJB/=juT0#),(*+*5j�5(--/5_������̞������¶�k+399<789;8313��k(5EaX\YM<5.-'&)-6<9<?=7121:{�Ѓ<kfgcigfh[MB���ך?KB@=><=>=E�w:99?j�t?7576\��279Gfs_5434Cp_A>?Cm�{P@>@@Iz��V68:Hv�c578:=aqU��S:j�>O����w�����cXY���Y&&9,/A��o~�lv\TL;23.%L�S'+JPglr{�eidpuz���J-)EnCs�����|\R��������mGI\��l`aXNONNH6\��в�����~B9G^�������U!6:[��������)SwTj�������/6_>-3Q��l5,"?OE^nm�lD;/GV0.1V��X.2FWZ]\\]y�w^cQNv��l��fQbsvqg\]G990.4LG398<:1-+)(HJg������uOVTamY]SigNt��nm|vupt��������Xeq^Xa����rr���}sx������|w���tfgdegksvwux�����������sciqy��������������������������������������������������������������������������������������������������������������������������������ġ�������}|wop����������������������������yb���μ����xpjdjmqqongYNEb���������~stsrnljhfefmsvvuvqeY<B��������������������ǳr���]NPJnPBpM?P2 "!"$/EQTOHv������������������������������ml�����������������������~>;^5&'!].*T`TQRPK71A=5% &(+/46@��������׬ahhiglmm������ͽ_Cn~T(,4Qedn|zhE* L�E##*DVG-#!$&ItG[xxz�����wtpa0Z�����������yAU��tY�����������X8?5)'&%'((-IX7GI3&#%%$%%!"%$'0<Ab���x3%8��|U\H %6o�}xa-$&*/4<A�����ʝ�����������ҭ2%;N- !#(% "4mdOQedW\a<3`���������όv�~upUM]q{u]mywhqlg������������׿��¶z��������������������X2Vr��~jy{z{wlBB>2BfxS0&+*'(),@��o1*,.06c������̪���r���µ�e,?TYVOTXOIA66��c*8]wksm_O6/-'&)1ENVe[T>0-18��|9\T]g`[dcdJC���ؘTcZYVVURSKK�s77:Fq�l<7876`��.78Go�[3335Es\>==Bf~yTA@@>C�­l68=S�xL555;KrxN��V>H8?�����������}nw���d+.B/-B��h���}�ze:51&R�S&+.632C>6/5>:J=[�h+)'Hb9;CBQu�V=bxqp���{��PANlwfaiiWMQPMH:n���������`;/G�Ȣ�����Q"675Z������v awd�Û����z/9^A26R��C1.$ D_o��aORMF7OV3.0[��O,4KY[\^Y\s~sgbSMv��p��mavunnlgaF87/.08=C8<GC3.+',IHm������lPYRIp�}wom[y����������������djr][g�����pq���{w{������ypy��{oihhhjqvvux������������qcksz��������������������������������������������������������������������������������������������������������������������������������š�������zwon����������������������������s^���Ѻ����zqljloqpnlgZM;]��ú������ywurqpnkigflsttttqeU:E��������������������Ʋ{��^NLAG15C/7A'   "%-GVQNGs������������������������������nm�����������������������>3Z8)%!S+1VVKLMX^;19?4&%(.48A��������ԩn�z|yptq������ϼZFpU(-@fqp{�{dC- J�E $-HSB-"#%)PtG`xxw�����tvt`1a�����������y@T��uY�����������X6>4*'&')(&C��{��\%'&$&%&##$'.9Bb���u3#+WUR`1`M !&-`~wza&$'+17<A�����ϵ������k����ѩ0%Hq[KKFFCA_V$/O3/@U70?S1+d���������Іr�yujQCYuzs]hxfANOV�����������������Ȳy��������������������X/Uw��|ly||zrpCA<0@`[5-&,*('(.E��^,**-07d������ϲ��}�������_/J\dZafeYTC57��])5TZ[_]ZB0/-''*6LSedieG0,0>���u(56<A>?CGD8?���ՐZ`_\ff__YSS�p998CsqA6679b��067Juy^7336Hp[?>=?f��L>??<E�öi;9=Yvm@5559Aw}Q��S;73:s����������������R#?L00A����������{PD=.Z�U%*/2/053...**,)3si()*'D[7/11Ax].e�d\Q?v�ZevmZHKGLSXa[QOQNLK4jfVXjiybI[29=������yw|205H������Zc�������s{^<]J5BINN995)"Kf���eVTQK:bT2.6h��J/3JXZY\XX}�v|cULu��mz}urnP_{lc\?68+.4BWS>\\H6-+&0LIg������QOYXs������b|��������v��������cfn]Xb�����gx���yvx������r_b��uiefhhjpsuuv������������qfkp~��������������������������������������������������������������������������������������������������������������������������������ơ��������|xln����������������������������ta���ռ����{vsqomooomeZK<c�������������zzxtrsmoqrtuvuqdS;H��������������������ȱ����_PL=)(('*5A4 $0JTPMEv����������ɽ������������������j#q�����������������������|<:b5+'#C--N][VX\P./4?3# &)/46C��������Է��������������μUFlR'2NsumrcQE:*$K�D""#.GTF,$%&+Ol@ayy{�����|wt^/e�����������z>S��uX�����������Z9=3*('(((,a�����n"'('&'%! #"&-8Ce���y7%0d[P[:iQ!  "%9oplnH$#%+08@M���������ѭ��g����ԥ/'[�oilpw}��a )&! +ELJSUVSR^=/f���ӱ����̄o�yuj[Tgv|ubk{phttz��������������ı���y��������������������O.W~��uk||wrmHG=/163,*%*+'')0D��d2,*+08j������Ӻ�z����̿��Y+AMRJOTKJM:0=��U/?DDEJME622-')-6>@OPXP5,-/;���n%;GA?@99831I���،BIGJMFFSMGH�h876Br�q<868=j��36;Np�]5458JqW>>>Bj�wC?=;?L�óZ66:TtqI6539I��S��D<96?igw�~��|������~��JKC93K��k��������zn_Ah�]0,/.1@N\YMMMIKB8@3(*,)@V1.005A>j�b^^VQujJ`kkZFIJMPMPPNONNLK1dh]P\^\\RNC>;Objql_:NC$!5=j�����nc@grOehk[^?X5GpSR_JBMPZQ4!Vct��vf^YL=#_O..:i��9/7O\\Z[U\~��wgTNy��fv}x^SQepm_S76;.0<chF<YlZB/*$2OBV������^U\Y�������`��������t��������`nt]Xn�����bn���xv}������rct��ridfghlrtvuz������������qfkr}��������������������������������������������������������������������������������������������������������������������������������ġ��������}{mm����������������������������r]���ռ����zxsonmpponh[J9c�������������}|wttsqontusttodT?E��������������������Ű~���\ML=.53-+IZ8 !",IUSNEx����������Ǽ������������������g"s�����������������������}86_-+( O)+P`^VXO@,.2G3#%)03:C�����������������ͣ�����ϼQFqrP"1I_[LJKCA7* %Q�E "0DSG+#$$*I^?bwwx�����xttY/d�����������{AV��vV�¦��������Q9A3('&&&(/[�y���j#'&%%'& !""&-8?d���r4#)}��u qK $5QO[Y<($&+07?G���������˭�������ҟ*'HVWu������>#/. " )IZ^gnteim1 3g��Ҹ~�����|m�zwn\N_sxr]kxtz����������������ԥu���u��������������������N5X}��ri�{stn=J?--./-*&0.(().B��m/**-27p������ćx�����˾��T'07,?;C;:6049��J7\hfa[XVRKB-''3CHCCKDE<;;1@���g1Xebe^WXLB6Q����+<9::95:<;K�`769Jy�^>;9::r��*67Ow|L5658JtX<;=Al�qK<>==L�ƤU98@`{uH7546L�zO��H?>=@UIMod�~e�����S�a5#FF;20��=����������eKs�\1,/,M������{��{q\=002*=T/.012.h�Q=J^a_^NB1@`ZAIJKPOPONNNOLE.#q�nmxnqhbSK>/:YRWPOJE=(!::>AKTSX3DenZj`caca_MKoNb`PVZn|k8N`a}�}id_V?%WK059avO04:NZZXZYp���raRD}�]o�yXP_oso^M86823C_\<IXgaJ2+#0KBh������]UZZ�������R���}����z��������Uon]Wl�����cj���xvy������q]x��rg[dghlqvyvz������������o`it��������������������������������������������������������������������������������������������������������������������������������á�������~|ykm����������������������������m[���ѻ����|tronnopnni\O>d�������������~zwuurqrpqtvutqfV?I��������������������ǵ}���YPNDSrsjboo6"#!#$-IUROK}����������ʿ������������������hx�����������������������w3;Z-,,L&$CabcohM0,1=/#&+079D������������������������жSGt�Z%1Ngecjsq[B+S�E  )CO@+#"#)GW=g{yz�����|pr\0d�����������zCV��tW�����������O<@5*''&()/a�����l#$%&%&&"!!"%,8Bg���u0!3q�y$!oL%7jzwpS*$(+16>B������Ƹ����������Ԟ('<r����Ĵ�^"( 7jqtmc^VXU(!2m����������u�zun\Rcuwrbtvbosz�������������ϳ��ʱs��������������������D<\���qs��uswl=S=-../0*'60&'(+C�h1)),2:u��������������˼��M(.3ETWPOB//3A��IIvssijuoliH0(/Pcabkgmb\]P6B���_5WV^__idcD6Q����}@EEMIIHCC=S�`769NvyaN=8:8{�~,57KtnO6665MrR<;>Hu�sG>@@AS�ɦC9:>Xvj7644;S�sM��=CF?9FLMRIO9,?TVW[@2M>*$<H/02��D����������i@s�O.-0H�������������j?74*AW3-023G�\8BEKf`_=256fgHFKLJKPQQQPOMF.&jvU_cei`YNH;=r~jeca^YQ/'<78@B@@@;E,g�||}~{c"H^OWSFEXv�l/%Taem�x]a][C*ZVTZW]WNCCHUXZ][We��}m_Y>�rX��lQK`u�w`N968/3B_N79Qb^N;*#"8OR������zPV[Vo�����oO��������yw��z����{Xol\Zj�����_s���}v�������r]}��pfbgghlqtwt{������������jcju���������������������������������������������������������������������������������������������������������������������������������ğ�������~|vig����������������������������p[���˿����{vtojkmmnmg[L?b��û���������~zwsrrpprrtvwvrfW>I��������������������ǲ���\OPG`uwmlwi6"#!!&0LURMI�����������ɿ����������������ؿe y�����������������������x3O`+.%$E%,Sf]YVK>'-37.$ &,27:G���������������ͱ�������ϲN Hx�L&1AQTct|~fB*"T�A *CTI+""*AP<ewyy������tp\,h�����������y?Q��rU�����������Q9A3()'&')/f�����d $%%%&%"$"!#(9Ef���p)!G}k$mH#6alql<"#(,15<B������qv�����im���Ԝ$'F����hH&#,OY[PEGGJF$"1n���Ŧ�����tz�ztnXScvyn_w|dDRPg�����������������Ьp��������������������@6Z���ox�~pvxiCU=.//-/)'5.&&(+K{�a0*)+2={������Ī������ɼ��F(,1U`[h^C./4C��I?S]YRU]`bT8-(.N\[abmopghH0K���Y,?@>?<CHD52S����~IN\^[]QRUD[�_587R{qC97::��t077YztL5558OoS:;=Af�wjX>?>T�ĜG88<Z_E3764@Wxtf��-@GHcxeH:554./845E@<@;+)SC*1B�gY����������R%%}�C,+1|�����O4k�����pK1,%Yh1,.1-qoB>A@=Yb_MG;b�gFHMNLMMNPPQQME)"gmY__cfdLHG:EwmWSY^\aS+&7:DSSRUNWmZ$n{qi|uov��^#P[SMC?GNfte+(Yabh�lTUUT=._e|�����xnlaZY[YRg��}skSB��c\��aMTg��vfS96802AT?/15LgK9+$"@QN~������\W[Vn�����^���������~���{sv�g^qp[Yk�����_t���{s}������o`���tdcggglrttnx�����������~jbmu���������������������������������������������������������������������������������������������������������������������������������à��������|ukl����������������������������q]����Ƨ���{wrnklmnnof[J:a��ŷ���������}zxvtsrqqpruvutfX;F��������������������ų����]OM=(($%*AU6 !!"'2HTSNL�����������ȿ����������������ڻc �����������������������t4]a,"1'"@%0X]TTRGB1./8-$!%*39;I���������������о�������ҲK!Jo�M&,=Tgw|yn];&"X�>!"-AL=(" #)BSAfvvw�����qus[-i�����������}AT��tX�����������S:D4))(%'*.c�����B#'%%(&%#$##&-:Cg���m. 1u�U#iG")EihY6"$)-47>E�����Ʈ��¿�������ϖ!(PqposomlfT)" !&($"7s��һ������n}�ywjWO`s{o_u}xx}z��������������Ү����o��������������������F>[���r|�|xyweEX:.-//.($/+)'(-I}~B,**,1:������̰������ɽ��B..-KJJLI3+.3C��@,FRF>;G>DB0*');?DIEOQMQP7/K���S<LGA:?=9<,:[����u:GTRNQIKH6X�\298T|}k;9887��g/69b~qO655:QpM<;;Jx��vJ:=<Q�ɚ>556:?Q<764AOxym�{5KXl�mXE75:;5/,=in_C>:.$LA26@[PMPTX[Zlje�F0%'}�A-.I������=Ic�����pX+,$c\2//2X�G=>A><Mgqq_e��gCHMNORNOOPRQOE+*oqafggkwWNE9HlVRYONTZP%%7>KVVZaa|�a#m|w{�u[q{�d!^pQC?FOOgj`+)X]]j|_QICJ92dx���������d[[\TOw���pcEJ��ad�}j^`o��reW;88118DO?/07A=8-##CNN�������LX`d������r[��������~��������epo]Zw�����_z���zw�������rf���nccefgmtvsrz������������gelu���������������������������������������������������������������������������������������������������������������������������������ß�������~zujm��������������������¿������oX���հ���yvqmklmnnme\F;c��ï����������{wttsspprssttndT9J��������������������Ǳ|��TLJ;-**()?W-  "#"#&0HPPMJ�����������������������������ֽb#~�����������������������v0[\,.'#=%9XYVYLHX: 12=0$!'*15<N����������������϶������϶MLt�T)0SmwwufJF:&"U�:$#'570#!#(@OAeywwt�����wr[0f�����������wCU��sR�����������W=F3+'&%&'+7XQ:@> $%$%'%%#!#$&+9Hi���j05g~a)kD"7t�viK%#)-17@C�����ϲ�����������В )l���������F !4u����������m��usia`hvyobw�������������������γ��ʥq��������������������=<[�μr|~yxvveJY7---,/&$,*'&'(,*4'(++,2>��������ȷ�����ɺ��:-/++-.-.---4D��;;_j`^\`c\I3)%(*1FB=:?E;;20M�йW[f^\Y]``UIJb����c5C=BCECA?8a�X297T|�_9:79>��b/58YstS224:VnE<=?Ll�yN@><=T���@687Lz_=7546K|o[�~LUU]ZXtR?HY^THBq�h[@=>5$CC?>>;61.)-3<ON-121+!z�:0/Y������<W^�����u`,+"VG1/09ql:BBB?Ii���u���iDLNMMRQKRQRPNF(.qmfzrjxZPC6Ll`khVnhTN)':<FPTclf��Q%i}z��wguy�i!kvE8>U[V]fX'/\daltcVLB?+1_s��������~]RYTLUj��qTA@N��\e��ncfnvtnh^;6400>eb;3655:6,#FPPz������S_ab������xY��������|��ù�����Qqn]V������\|���zy�������mg���kfdfhhntutt|������������iglw���������������������������������������������������������������������������������������������������������������������������������Ġ�������}zuii���������������������¿�����nW�������~wtqnljihhgfbVKBe�����������}|vwsqrtnprsttwncT8M��������������������ı~��VPMD>;858>;40.+)/../,6MNMMH�����������ù����������������ֿ`&������������������������t2N_-1+$7%/V\]T:DK,248,$ (-38;M��������Ы������Ʒ������϶HJu�\,4QfnmjkodE$$[�: !"';C:(  #)DLFjzzy������yvT2q�����������v?U��sZ�����������V<D6)'(%&&+8UG$'$$$#%%$$# "$!'.:Ik���i7 1��S(qD  #BthfvT&!&+08@J����ںxrw}~�}����͈/v�������ȟ6 ! :y���ʴ���־h��yurmjo{yqev��z~|y�����������������ȡr��������������������@@c�оv||zxvxbMc8-.0/-%%.*'''))()())+.3A���������������˹��=-/,.,-,+/./2K��>>eptz�yspP5+&+5Tdfhfhnc>00J�ҸYVTUUZ]cjXOGd����gLXUUQMOGG@e�U59>j��K8979E��Z/7;c�M3459ZrF<==A`rL:==;=Y�ǎ379>eu\=5554Etx}��=:DXd������tkho�~ti@B@3-MQE81.0.-/6:ZZB,/4<?!�{914`������C\Z����|x_,,"FF321D�_QQML`��ky�����\BPRSTc[NTQRNLC#,wq|�orxxWRI;Ongr``xNPL')=<HTlyzv��E*q~z��~vv��\!qo?:G[`\aaL"5eccjpk]TK9#7cs��������|kaYPT_s��dCBDQ��\m}tkc_cnnlh^961-3RlX6IG:;B3+#!KRJa�����cO`\`������z`���x����l���������Wun^Yx����Zx���xr�{|��|mj���nhhgelrstvs{�����������jhmx���������������������������������������������������������������������������������������������������������������������������������à�������}zthe����������������������������lT�����{xuqnlggd`\[^[TG:`������zyuxrqpplpkkkhgiiighf`_U<K��������������������Ʊp��PLJLSOIKIGGGIEDDEECBINNMLMG�����������ļ����������������ۼZ%������������������������x1B\0-'(>)+Q\YIFE>*530)#"(-17:L��������Гbcjicdnj������άA#Jo�S%1IV]ZbcdM8$!Xz5 "2QTA($ #*GQAgwy|��{���trT/o�����������yAU��p`�����������Q>D2)&$%&%.b�va<*$$$$$&%%""#!!%.:Ek���j47l{O!-wB!%Douu|J&%(*07>H�����ͥ������g����τ)PJ',/:;Xf!"#=}��̕u���׼f��ztqheiqwqiz|[K[Wv�������������������w��������������������8<d�Һyyzzwuvb[a7--//,$'1+)')*(''((),.1B��������ľ�����ʷ��K>>:776735052W��33HPY[ZW]WC3+')3Xehholnc;0.S�ֶ@0;7;7@F>9?9j����cLX]\\^XTRCi�Q5;U��vT<977B��Q/7>dxv<356<ajC9==<KeT=<=>@`��|0774TnaD5333M����}48BRu����xV[kvw���bHI=1,PJ8//.-,..3?\T2.0>ZG$�t88>i�����kL^a����y{_--#HL367|wdgim���MHottu|pCGcedp{XXTOROLB*4�z�}prtpdbO6PmVMPTRGPL#+<<G]s��y��81u|���|���O'kb<JRZbmr^I#;f_^fsujiT0 ;`{��������vpaVSX_z�}U@BDS��`f`WU^cjlgjh\;52.3JTOLVH:L@0)""FK>c����t\V^]c������y`�������x}��������gYzo]\����_����wt{�����nd��rjfghnqsvwt}�����������zihny���������������������������������������������������������������������������������������������������������������������������������Þ������~{ujn��������������Ŀ������������kM�����{yvrnkhec`_\][SH:Z~���~{{zxvoqrpnmlljgffffeaba_U7L�������ƽ�����������ǯi��QLMLMNOMKJKLMLMIMMJLRTOKKKG�����������������������������ٵZ&������������������������z,DZ,8'.P),PiegrfP-631,#")/49;K��������Δ����tqq������дD$Ln|N'2LelhojbO:("-_�:!&=MSA)#"")DQ;iyz~�xw~xtvsT1r�����������zDW��vZ�����������H8A2*)%%%&/I���lO+&&$%&(($"!#"'1:Bm���e 8C��V/sD#4bxpc."%(,17>I����ظ�������p�����~ )M]9(!!&""%;���ˣ����ֵd��ytkTTcqwjh|y\OTVz���������������û��v�������������������}7Aa�մju}{vw}^S^5/.0/,&)1*'()*'&&))+,,3B���������������ʹ��h]ZQMGLMLFFGA^��98879152>?7/.)).6:8BGHE@0/2W�ѰRPSK?ADE=A>It����W?HFLJKFED=i�G6:_���X5768D��R/:Q��k@556CgmB9:?P�~\HC>?Ah���457EnuiD513:h�����B>AFMu���U@EEO\��]6QG:*(N@92.-,,--/5?:0/?lmQ&��B<@h�����\Y_a����t~[1-!HS8?Ybq|����bUYefXHHD?`sttx\ZWQRNLC$=���vkoppzlJ:QnTPOQMDOL"0:<Rr~�����6:y����zy��}N'aXL\UXirYD';c_ak�{yN.=h��xx`���xqYPPXap�zXGHFM��daW[_hkmlkmn]>71-2J^OGONLa=-(""HOY�����gT^]`������`d��������{�����õ�v[{m[f�����}`����wy�������kP��sihihiosvtu}�����������zgdnx���������������������������������������������������������������������������������������������������������������������������������ğ������~wkj����������������������������iM�����{wsnkihga^_ZXXSG4Y�����|zyvqsqomkjifggghfdb`b_R>J�������²�����������Ůj��RKNNQOONMKMLMMPNQQLKQUUOJIN�����������ú����������������ԳU(������������������������n.EV'!3#0B&-Zc^][L?(11 8+"!'-478Q��������Ϸ��¼����������δ?/Vp~U'9]trjpoj`F+&3\~:!&9MSC%! "(FL?o}���xvtswtuQ)o�����������|DU��s\�����������O=?4*)'&&&),j����P''&&%&&!"$%'.:Cj���b 1 P�=3r=!%/=,("&,07>N�������ƿ����������|!,r�QMA0%##&;���ͦ����׷\��wtndioyyme{xl^af�������������ʯ����z�������������������|7Cd�ֳlu}ysv}YN[/-/0/,&+1*()*(((%')**-3E�������¤������ȶ��xnojgglmidf^_l��IDLVODBF@?=>6/2431200133434^�ӮlkiXOY[^gge`|����K1DC?<=A?>9m�F8=Sr�P3877D��K1At��oC676?ekD;:>XtmO??Bi�ċ367Bkqe73344c����x<CBFDi��PCKKDDqY.<JF;)(C:60,--,++03037Advx\9��857k����]T]Z_����r|Y21"jV?>emTy���|lijbdhaP<1?[jknvo\[WRRPOC!J���jdhkri?<Rg_]WWRLSJ/:@g}�����1;}���nw|uxM(\QINLJlzcX9 Adbck{uqgB1Ck~tvtZv��qskRLQVXlwrg^VGL��cYT_|�wutonhV483,2KiD:NXpL),*"$LPMw�����cQ_XM`����xQh�����������������ufnXc�����|h���zuw������~iU��yldcc_dpsson�����������vehoy���������������������������������������������������������������������������������������������������������������������������������ĝ������{uic����������������������������gM�����{wqliigca^[ZWWTOD7[�������|xxurpmnlkhffggfcca`^_S:K��������������������Įm��VONNQPRY`fhabfc```WPTSSPJLO�����������º����������������ٶS(������������������������m'IV$"8#-?&5[]WXQGC2!66"7*!!',39;V�����������������ƫ�����̮HH�lu�U)6Xn[COUieF&#"X{5!%/FO>(# !)DI>p}��������vpS/n�����������{HU��sg�����������P>B5('&&&&*3o����H'%%&$$%!!%&)/:Gw���b 2JxaL94s?"! #'+29>Q������e}�����������x /sndwlMD3 %>���Ϋ����ش]��ztiacmxzje��dWeg�����������������ý�{�������������������w2Ek�٨\t~vsx{Y^b2-///,$-5()**')*()))*.5H�������ϙ������Ǹ��rlkeefnnijkfis��a^v{^W[]\XYcI9>A@:::<AA??;9d�ةRRSHCTZchli]w��ݻLHX[PMJKEF?p�E6;Af�zT:757O��B2Ajz�q>459AejB8;>X���]?==Bq�ņ177Gv{S75469l�s|�v@DBFJl��|UJWQD>e_CCFE>))<772-,.-,,1/-9?\x{�a"T�p,-8q���|]WUHa����n�Y95%%aI9?j_g��gowowsmilk[C9>Ncjmxr\[VPRQN@!T���k^`^vzdG@Yhec`^Z[dG1>Hu}}�����)=}|��ov��t|A$[WBDG@`hXD/ Eiiea__bO:/Nckq�rRettqo_MJTVKOgsjbRCF��gR[w��}|sldP6;3,0QY;8X]ZSC1) "OPR������mU^[e����{im�����{�����������jd{o_f�����rj���wv������}e\��}lefbbgputqs������������tchoz���������������������������������������������������������������������������������������������������������������������������������ĝ�������~ztfg����������������������������eJ����wvqnlidaa]ZVVSPON@6^������yxwqomljjiffddcbba`^][P8G��������������������Ŭj��KLPOPRYv�������~��mUVURPNOM�����������û����������������ٱN*������������������������l#DP& 6"/C$1N\ck_XX4!<6 2+! &+4:6U���������������ս�������ήh��kq�O&1Th\VYnnX:( $^y3 &+BN='! ")EIAp|���������sO-m�����������{GU��s]�����������R>A1(''''&-2����f$$%$%%%$""#$*2<H����a /:afur85q=# ),+.)%$(,3;?T����؛K���Zo�������v!)E8A�zu}�wH$>���Ц����دg��ysaOK]ovkk���������������������÷���{�������������������q/Eg�֠au~wtwzVa_1..-.,%,1))'&'')*()*+/7L������й�������ź��mkkfeikmifggiw��^i�w]_agccjmOAEFG@ABEKLIIDEj�ԡ*1.989:@ACEJx��ٴHUd`TSRRVMBt�>477cyxI6668P��=58Sm�n=247Djh>::<V�|`?><;Aw��q/88DtrQ<5469p�w��x?IGIGv��{R^q^KElb@@;BB8+>4641---.--/.8Nv|��U!M�j,,;y���|\YNHj������W;6#$VB:HplllQ_{|tkirnpocI9Dhwy�|lY\VOQPMA#i��{aLWUpvbF7@K]b]a[^R0"2:Go}������"B�y}v����C-`P?AACfaU=+"Nuic\RRTE:-Qhgy�yMZszxiXCISMBCTmgSE@F��_R`t�~���l_ZL;?3,2EE65?jl]I8)&OQm��]g��uU]^h������Xl����eU{~}��������tg�q]b�����Z`���|w|������xfe��zkchfeiqrsqu������������tginz���������������������������������������������������������������������������������������������������������������������������������ę������}{uff����������������������������fQ�������xsnlfcc_\ZZQPOC8\������~{uqommjiffa`__^][\[ZZWN4I������Ⱦ������������ƪi��YRRPPR[voORZy�~]s�sNSTSQOQM�����������������������������ԯR,������������������������l"FS%"73A"6PWORNWJ($@;!4+"!'-699W��������Ұ�����ǽ�������Ϋj��qsU*1Ibkovua=4& %_w1 "!0KRB$ !)DJHmwyvr������vT4n�����������zJV��sX�����������SAC2((('''.^��t:'$$%$%&$ #"%)0<J����[!, #9q6" "3Ra_[@&%*.29?W����ԭ�������q�����p#*Ok}����Ҭ_! %C���̣����ةb��xtoMAPmwnk��������������������Ñp���x�������������������o2Fl�қexvtwwWb]-/0.-,$,.)'&&&(()()*,/7M�������ĸ������Ʒ��oljfeinljghhct��Ws�`X]_ddfkgNAEED@BEKOKJNKEo�ΟFEEINKMJJC>G���ۭ?;BFBFHJLB;{�<484X{vN855:V��@56Cz�i:657Cpm;9;=[{{Y==;<Bz��_177RzwM8414=r�|��t?PPO_�w|zc�{dUMupA9>JOO.9,142--,--0//Apx���K<�g'.<r���vc\GGn������T53!$ZC6BrftjXe~�cRkt||q^?7BZo|lcUZUPQOO?#p��wha_amx_@5-8`gbc]]D' 09Ku{�����{#N�{��������E1aJ=BBWidR9+#YkaZRPMH>0"^ih��uXm��xi\JORIH]�{qUI@N��ST^p||��lg]]M;A3,/9=857YxhB7-"3YQm�nIDo�\Qb[Z������do����{��t}��������og}l^e|����me���}tz������yfo��wmehgehrtuss������������oehr}���������������������������������������������������������������������������������������������������������������������������������×�������|zscd����������������������������dS���˞��~xtojhieb_`\\UG;a�������~yurnnllecc^^_`]Z]]\ZWQ7J������ʽ��ƽ��������ëe��OONOPMYpdFHFr��Ya�pNTUTPOQP�����������ɻ����������������ԴM/������������������������j"ET'#03= +0! /50% $@5"7,"!'-49;Y�����������Ÿż���������ϫv��nn}P(3Fbe`_O:24" "]w1""#/IS;#  (GKEpyy|�������oN/r�����������}JV��r^�����������O<A2(&''''3x�B$''$#%$%'' ##&)0>K����]%7k2!#Mw�|H"%+/38?U�����ͦ������������l#6�����ʱ�W#!'H���˧����դg��vtaHM`yymkxy������������������ɢ��ōx�������������������n.Jr�Ў]x{uvxyQ]P,././-%,/*('''(''(()+.6R��������p������ŷ��kkjggjjfhghib~��bqqf__ehegpoMBEFBAFILKFILIBr�ϡb_ix�����������֧E;=88:8589?��978=h�|J644;Y��:45Ere?767Gqg::==Qw�Z=>==?}��R698Nj]<7746;~�z��i@QTXz�frpl�oib\�y:<=EhI*5//45/.-.-008bp{��{HB�e).8u���kjcLQ�������e@;#*`=1M|v�qc_yuQ^q�zkP76B[r{vpkZYTRPQO8p�wshgefnzU=43g�|z�tR)2;Jpv���u�u"Y�~���~����@/OOIKMbreD6,'^�daYMMPOB0&_iix�l]t�vhQJRNI]���}jQ:R�~FS\r~ls}yg^dJ<;1+/7H73LpcJA<+"3VKd�~vmujFSg^~������op����������������ci~m^h�����_a���}uy������wdd��xkbgigkqvwww������������qefr|���������������������������������������������������������������������������������������������������������������������������������×������~zyra`����������������������������aN�����yslkhcba^^]ad[D9d�������}ytpoomjggdbbb`]]\]`_]S;N������ɿ���ó�������Ūh��JMMLORZmdLIEe��i^�oRSSSNLLR�����������������������������ҪL.������������������������g$HR(#03@!"!!7.%5*"!'/58=^����������չ���x��������̨n��qtxU+5La`ZXSJ?4$ #_u3!!%2IQ<*  ")EKGr{x|�����~stL4t�����������~GV��uc�����������M:@0(('(((/7:,*.-(%%%$$&%$%&*4@H|���[%:h-#5MfqrG$&).4:=U�����ɤ������������k&>�Š~M-!'H���ɡ����՝j��wr_T\pyvmlcA\��qp����������������Ž�x�������������������l-Pw�ъcwyutxzP[L.../1.#./)''((((())*+-2V������Χ�Ƹ����Ź��nnihgiigi]eha���josnfgjikmogFBEECEKMKGGNLHCt�͞bh{������������ڥ`\SNMGA:?GR��759=a�v@677<]��766Gmwi=346Gte99;A_��\><<=D�®U6979AGGD;55=~�v��dJRSY{t\b`pznlaU�xI=5=_5*7,/641/,.123Lmv}���HK�\$+9y�yfybON�������c:6.a?1a���o[h�xjpqw|ymP27Eqz{{yf\]TRSWZD$ccEY[Y\_da0<74Zz���v?*!46Fkz���t�lY}��������;0d_W^bkiJ94+)f�eaVJKTUE4&clnrvdUq~�ycKMKKOW`i�}nY?a�rHV\w~ox�tge]C8;/,1=?05N^OA?5( 7ZKb�����y[Xea��jKV��iy��x����y������ı�Wo}h`k}{zwTc���~w|������v]`��wilnmilqwxvr~�����������nbjq}���������������������������������������������������������������������������������������������������������������������������������Ò�����zyxrbd����������������������������aN��ļ��}vpmkhec`]]\adYE;i�������~{vstrolijhfcba`_^_bd`S>M�������ª�����������Ũl��OMKLPQXkbDIP��o`�nRQQSONLO�����������ʸ����������������̪I/������������������������hEM""47A!!8-%6,!!'-586]��������Χ�yzrwvtq������˨d��lq{_+1=W`]dnkT;$!_|0 !3GSC,#"$*LLDtx{�����vwsK3y�����������|OY��s_�����������O==0)'%&)(*Dvj_re0$$$#$%$#$%*2>J����S"+5Z+ %@^QT)"%*.37>^����ի�������z�����c$4nH!(L���ȣ����՛a��xoaPYsyxni_Rw��cd�������������˵����y�������������������d/Sx�͇fxxrrwsNWE*-../+",.(')(((()*(),-6\������̠��Ŕ���³��mmjhikkjfgehf���^o�uglojkli]ECFFDFLMJGIKNJHz�͖Xhw������������٪yoiebaRFY\_��768Dj�i@7789a��685C`wk9467Ixc;:=@[|zN;=<>I���V599KjrmY944?}�l��_NPQTrz^QSmnlna[�{E3/47 *3*,4510--/4<`x�����BN�[&);qoidkx[SK��İ�εZ20<c94f|��qt��snqmpxvqU25Cfru{yeXZSSYknU%c;4GFDEJE;/:35j�����F(!6;Nt��zi�h#`w���������/5uhfomhTF>8.'fs]WPKL[[H6/njloqZEl���cTMJNOR[^hvk\Ef�kFU_uupx}qf[N90B/-1@RO3<QL93-&!8YEFx�����XW`Y{�~sg�pO}�������v���������cr~g_l����vXe���|v������sZ_��simokilqtuvw������������k^hq~���������������������������������������������������������������������������������������������������������������������������������Ë����~}zywp^a����������������������������bT���ď�}xqmjheb^[\]adZD>j�������~{xuuqnkkjfied`abbacj`S9L�������º�����������Ħj��PGLMNNTjbKNW���a[�lLRRQNLNO�����������ɾ����������������ˡD4������������������������iAR$!9P5#'8D"5*):)!#)0597e��������ϊqxty{���������ͪ]��hs�j*0Bdgfr~qc>#$^t3 "3Q^F+#$%,RRDtyz{�����{wsK0|�����������zOT��uf�����������N=>0*(%%**2v�����I"$%$&%$#$&)4>O����S,2/Q, $KjbMC)#&+/5:Ae������������ww�����[$/KI+/-+&$*9"#'M���ʟ����әa��vp\U_txxdhjj}��mk�������������ɬ�����������������������a0X|��}fxyrrupLXI,-../*#01)()'(()())+-/9^������ͩ���}�������knkgimkkiighk���[v�snoqhdhnfMFHGFHLNIGKLPKL��ΕWep������������٨vtpmkgYSa_^�~887Bh�g@6688g��178Frm856:Rw`:<?C\pW=<<:9I�Ų]58;\��lD545G~�n��aOMMMq�kaekfnybc�C1./)#+)+2330/.18Im~�~��x=V�Z%)<scTdphZRQ�����έX-)Ce8.bw�����wolneets}M07R}~�zfX\MT^~�Q)b?M`UUVYP?.;5>w�����u9&%<?X}���pk�Wd{��������z1>zhcql`SG@8)'chSLKOM]ZL16ghlrvJIv��dTIHPSbmkbeiV;e�kLXerrs}~k[OH74<0.>_pF0H\Q0,+# ;XPi�����eCX`Yx�����tV�����~������������\s}cZo����x^h���zu~������pY^��qlinmlnruxwv������������leiu����������������������������������������������������������������������������������������������������������������������������������Áv��|~�{xvpa_����������������������������_X���Ό�}zrokgdb`^Y]bg_H?k��Ð���}zttomjijefcdbb`bbhpgT:O������ʿ������������ŧh��GINOQQWshGNV���Ld�pPSRPLMPT�����������˾����������������ƞ85������������������������bFR%+t�y*7> $3&(6( "(/694h��������͎p�������������ϧW��eu�i+4Ypjfdc\R9#&^q0 #.E@.#!"$+OL?y{z|�����|tmK9{�����������KU��sb�����������P;;0+*&&)+5|�����J &&%%$$!$&+5=N����L.1+Q*"%Kqwqa<&$(-5:=_��������ҵ�������ӿX%;urVTOOMPly #)O���ě����Е_��vm`R]syugjh[t��om��������������ʠ�����������������������]1Z|��xkxytsumMVG-.0..+(21)()&&))*)),,12c������Ҹ��w��������lnkhjmjigfggc���P�srmjc`euiIEHKKIJNKFLKJNL��Γ[ap�����vw{u���بwtsrph][]W[�t37:FlmS6769:m��136En�`2456Uv^:<<@FSWNA<:>L�ȯO78;VqfG.444A�h��_ONMRnsqnlafbW\��J4..( %/*+07620.0<Unvw}yn[1`�Y#*>xcJck]ZS\�����ϮL.(Ie3/Qos���~tield\osr9/5O\UgagYX]US^t�I,W@V]VZ\\W<1>3;x�����k;''A<Xx���vw�Qg~��������o'A�`KXTGCA=4$*Z`PNRVWZ[Q39hgj{z]k���k]QJMV^r�te\]Q:f�hS_jstt~{\NJG9971/Bc\76WTI:,+&$>TU����}raK[^S^������[���w����l���������Xx�c[n�����do���zsy������p[a��qgdijkmrvxuw�����������~kekw����������������������������������������������������������������������������������������������������������������������������������È{|ru�{wuq`a����������������������������\X���ؔ�}ysokgfb`^[Zaf_DBk��͑����|zvtroliheffddaba`muiW?P������ʾ������������Ǫm��JPOOOQYrqQk���TAr�nQQQRMMPU�����������˿����������������8:������������������������_IO&Z��S&8@!%7'(5)!$*.9;4h��������ʢ��������������ΥW��es�Z&6QXRE>AJ]C&%dv, !!$$$" "#&+OHAwyz�zx���xumH5|�����������}JS��p_�����������S@=.**('*-5�����G!%&'&'% #&+5@J����M'//V,"?rvywA#$'.3;>b������¹��ǲ�������S %E�g__^bo���!#+S���Þ����яa��yn`MZszugquwt��~v�������������ϫ�Ź���������������������U8_|��tkwxqrvjFZF,.//0+(1-)''&())+)),,16m������л�{���������kniikkihfefjc��{T��griedcgm`EGHNLJLMMLMMKPL��ʎ^`n���mlwxo���أrvtqnh`YXY^�q589E]TS;6888n��/37Hu~M3547Ty^=<;B\zvZD=<=S�¤O98;HlrE4533E��j��]MJHKt��iimWSGNU�z?2-0.&,5*+2<?PB@4<ZocXOCHE-i�P'+F�dH[[WXNJ��þ�Ф=/(NZ00Kno{����umqdbqte615BOWddaSYYUWh�|50N=P\^`_^[67A2J��}{�}o9()@=Yz���u~�O!m���������h$L|S28>98673&1]aKMQWUVZR4>haf�zp{��jMWJFLXap�{q]`R6i�lZfqvrnthOHLM<:7./<MC//DSJ9-+%!>RRw�}����S[^Vp�����nU������~x���ƫ����bz}dZp����`s���xw�������pWg��rgdfgkmsvvv{������������ldlv��������������������������������������������������������������������������������������������������������������������������������������pr~�}xuo^e����������������������������aR���ג�~wqkifdba^[]efYF>l��ɏ����}yxurpmjiffgdcb``cjrgU?P������ʼ������������Ŧk��JONMMQZv����fcl��nPRQPNNPS������������ȹ��������������Ž�58������������������������cIP#C��n)4>%9-#3+$!&+09:5n������������������������ΤX��et�Q'6MVQJS]rlG(%el-"   !%%-SOKs|���wq��vunF:}�����������{KT��pd�����������U>>/*'%%)+8������I!&'''&& &#&+3=H����M(*:Y*'Q{ukN$##'.4;?k����ښY���������ӺI$7J$$+5h���Z$.Y��Ҳ�����эa�zvl\O[qyukqlNl��q����������������κ���������������������K5^{��pguvppumF[F.../1,$1+))((*)'**)*+07r������ɒp|����ɾ���jmjhihiggffhc��wg�xstliif``YECJKJJLKKJKJKQQ��ǈ^fw���okksrt���՟syvoke]VW]j�k8<Jk|znD668:q��/36OuvW5347Q[7:<D`�U?==?R�ɦE68:Xxc@5554F��c��\NHFD|��U^aHDNVe�X=2-/1%02*+2Ei�ej59FD40/-14"q�F'+I�`KKKQUGF�����ɜG0)EP0/Mmm|����pujisr_328EbzumdWZ[WVm�]:0C4SchjjgW22=-E|on���p5(*?=]���yy��H!p��������l#K}TFHFC64<<+5aU;=AFFHLK-?`[n�uv{��TOSGEKUcp��ykK<v�p\ilpqqp[LJMN:76-.7BE32EbI2,,$ ;PJaz����^I]ba�����zX[������zxl����t}���_vxd^x�����ar���yw������nVq��ogcdfinsuvt{�����������~hdlu�������������������������������������������������������������������������������������������������������������������������������������~��~|wo_`��������������������þ������[P���ҏ�~vqlhfdba\Z\fj[C:h��ȋ���~|xwutpmmkhgfdbbaacksgT=R��������������������ĩn��IONNMQ^�����������vNTSQIMPQ�����������̽���������������ļ�59������������������������_RN#p��x&&9=(D-#2,)""%,09988m������������������������͞[��dx�I#8YoklvtxrH($ft*"   "#$,TOHu~�������vlE8{�����������wIU��ri�����������N<<0)(('(*:}�����3"%(&%%$",3&%*4>L����K$(8U'!*QdP;0)$#(/4;;q����ˆu��r}������ӺM!")+/Bm��ڷg#!*]��Я�����χe�{vpYN^s{wkrjWq��z�����������������ֿ��������������������L7bt��olvwrqtlCaF,./00,%4*((*)(((())*-2:v������Ŝ������ʾ���mmjijjigfceeb��yq�}zrrsqnjfXBBHJIIIKLJHFINT��ȂVg���ipjlosu���њswtnldXSYdo�j4:Im}�n9787:z��-68V}~S344:W{V8:;@_�tG;;<?T�ɟ568;QqeG;558J��Z��MIFDEe�rU_[JJX^o{^E2-0/+0+*0P���h;=;?./000- u�E)-IxbKBAII@O�����ȨR.(FB,1Skm~x��zns\clrY,19P�xpkgdeh\XrsmB,E;WeegffW43:2D�����t9'*?>f�������?&w~���|x|��N QxSYW]O88GH*/SJ+--.10:R@!?_eztbnn�vVLPICO\an���~gL:t�thhfx�h[[OKMK534-/?\K58MOJ4,*$">L@[����xYRafe������|d���RQV^__�����~��|Wv|`]~�����]q���xsy{uu~vjWr��tideggnttusz�����������~fdkv����������������������������������������������������������������������������������������������������������������������������������ä������}|wo_Z��������������������¾������]P���ғ�~sqkgfdbb^\]clZA;h��ɍ���}{xutromlkkjfccbbb^kteV>S�������³�����������Ȧl��OQNNOR\���������ŴuMSSQHKOU�����������ɶ����������������2;������������������������[XT;���o(GE*C-(79ULDEOTQRN:o������������������������Ν[��eu�I%3LXVX_fnpB&"iu,! !   !#&+QJHu|���������nE8w�����������xJS��pd�����������K?<-)()('*5b��f}T!'&&&&'&(;9'&+3>M����F&(8T&%GcdcZ7%$*/59=n����Х��}�ǳ�����ѳC!)9o��ڬ\"4# ,_��ɧ������h�ytrXQdrxukuhMq��������������������ھ~��������������������G>`}��iguurrtfFgD*-010,&2*())))(*)))*-1>}������Ȫ�{����ʾ��okjjllkiifeec��sp�wnow�z|zjV@?DEFGIJIECFJOW���xRe��qjnoosut���ЖrvrolcYRYdq�g069El�dG?888��{,7:Qv{Q5448ZyS9;<BbzjH9;<>V�ȏ077<[ymK9438M��W��FCEBF]�_Zd^UOW^v�dD/-,'/6-,/H���GLRY`2@>//-!x�C)-IpfN@?B?DVft���̳P-'0:14Jlw}qw��idgCZfuL-38Xlh_`npyu[VrwA9RAS\adebW/08/K������h3#*=>fsvz~|�-+uvE<NOSV\Z+QdSX[_Q6<IB$4cG-,,-1:jL?\X_SMZWue?HSHAS]`r���s`K:|�}mgv��bb`PLKC046.0CSF61JhN/,*"@RV����}vhTdf_u�����e_���NEGHOd��������n\||ac�����xZt���zuusqrqrqhWu��rgfffiorsvry�����������|igmx����������������������������������������������������������������������������������������������������������������������������������ħ������~|xo[U��������������������¾������VG���Ԙ��xrligc`^\\Zcm[B6g��̒���~|yvtromkkjfddd```_krdW?W������ǻ��ǽ��������Ťj��GPOOOR\���������ðtMUTPMMNV�����������ʾ����������������/=������������������������R#^R(~��=HD )@-(:[{b_k}�pjcAm������������������������̜Y��ct�F(,4746>PfbH($ih+" " !!!##$&0NIKu||ww}�����pD?{�����������wHS��na����������M?:,*)(('(*,0)!+#%&%&('(')6-'&)4@P����F &)>U&!+Ruxzt9$%+16:@o�����ʽ���ҵ�����ѱ@!:��д{A1Jsd"0d�Ǹ������~i�}unb^aqwukn_b���z�����������������ؿx��������������������F@^{��djxsortbL`?,-010*(2+(+)()(()))+-1:~������ϸ������ɻ���nkkgmkkhgdada��no�lajrpjlk_V??CBDFGHDACFIOX���wZ`��hlpmpx}}���ϒousplcXPWbo�b,49HyrlF869��s*59UtuK323>[|L9;>AX�uM=<=>Z�588=etlD4325R��^��;=D?Fe�_cfb`^Ue��k4-..("8;034I���O��nAed400 }�9(,Dmo`FC<ASgjp���͢F1$1>11>ls{kqtfRbXCVjz7.2;[qscfqpvjYWnpm1JZ9MX_edaV./40N~�����d4!.@>P[eg`jsS'xr0EfjdabW( S`LW]ZA6DH;)?i>+./05F|�;D[JLIKJKRH<IXHJU^cx��mg\J;���ifu~aUijONLB388.2@XI54[gI-*+#!CYMinoryqKJhcMa����~X]���LJHKPl���������Xv~b]n���|[Zv���vrttuwuqpfV}�qklijoustut{�����������wdfnz����������������������������������������������������������������������������������������������������������������������������������ç������zxnYS��������������������¾������RH���ȡ��|tojgdb^]]\ceUB5`��̖����}xwtplnmkif`cccba_itfT9U������Ǻ��������������j��CLONMPY�����������oNTQQNPLW�������������Ĵ��������������Ǚ3>������������������������URM$A���pLF!+?*'<^e@?Y��wke=t������������������������˜Y��ix�E')++)+2AQJ<#!(fn)!!!!!##$%'0PKLw}|x������}pE>y�����������yMU��l]����������M?9-))&''&&'$#&&&&&%%'''%#'('&*6AS����>$<V;D0((<J#!(Nwssm0!%-15:?t�����������������Ԭ=">owo^Uadh�W!/f��˯������m�}sk`WThtujlho���������������������׸}��������������������E=^�Ÿcmysoqp[Ed>.../0)'/*''&()))()*+.1;�������Ҹ������ȼ��}klikljifedcfe��qxygemr_WWSZU??CCBDEE@>BBEJZ���vT[��fpsps��z���ϋrutqkcVKM\t�_586R���Z588:��i069H_W:426=]yI8;=Gw�oL><;<^���486>e|j:4436V�yX��.7D<Q�tj^gib_r��p?2/-'=FA==T�������hj�k60,(��;()5gusfWPWmuhc���Ǝ9,"9:0/8enpmmhZXhMDV�S.-1<XrtpvtlpnXXeib&VZ6GY`egdT.271S{x����^1!1B73Mlojh`M,{s<q�����b* SdK\_V?DPQF4>XB-//52Kxj&D[OECEBFHDCMRKMU]av��o_`JD��s`jpm[[k]PTOF5<:15FVD3@ifD/--!$OT;<<>A<@<Phfb�����zkg���HLROQp��������{X�~^X[afe]Z_|���uttu}�rofT���wrrwzwuttst|�����������vfep{����������������������������������������������������������������������������������������������������������������������������������§������~{woUR����������������������������Q?���©���ywlgacfda`gfQ<2]��˰�����}zwrrnnkhgghfcealreU:U������ɾ������������ţm��EOPONQ\�����������sLUPPNOO[������������μ���������������Ɩ1:������������������������XRQ "^�Ų[KD-C&+;^eGHE���he8u������������������������ɖS��gv~L&')+).3653/# 'cf(!!#%'(%&$&0NJOvy||�����|uqD?z����������}xRT��pc����������~J>7-((%&('&%%'('$%&%%'('$ "(&',7AO����BC��t�E$'!:I$ )YofpV'#%*.58Cz�����������������Ҫ8"'% "!#3f/  /h��ع������ro�}to^JOnwugkhp���}�����������������ֵr��������������������C@_�ŵbnwqnprZDd<.-///(&/,()))(&())(,.2?�������ʇ������Ȼ��|ljjkmnkhfdghd��qvm]fjf^a[S^V?@EECCDC>?BACDT�Ϳ{Mg��vrst|�~����ʅuttpjaWLHVx�Z57:X�_6469?��`0776PT@313:_uK8:<Cc{nP@<;>a�Ł058Bd|^44236V�u_��0;;@w���rqqnot���Z=/.(*Q^LJJg�������y|�I20)-��;&(,Opruxtz�~uw��˿�A.!:?302Bmytk_X__FIvt,,-2<ao}�wkunTRaqc,Z_8___cc_L068/:LR^mrmC.#2@4M�����q1yr4y�����`&!UjX^`UGR[XL.B_?3=IKGhzd0H_I;=@ACE@BJRPQUYSq��i``HM��jbgin`Paba]UI9>:24>M81AZ]J;5.!)UV>@A><;>CPjfc�����uUf��|KovRRu��������i\�|]YWW\`YYd����tsv~���voeS��}xx~��|tvwsu}�����������wfgq{����������������������������������������������������������������������������������������������������������������������������������ç������|yukKN�������������������Ŀ�������M@x����tbZTOEIFDGPUNQRH7+V�Ŀ��������}||{supnpkotvskwq`S:T��������������������ơl��IQRQPR\�����������]NVSRNONZ�����������������������������ŗ0@��������������ß��������TRM2?3 RF+B&,<^hAD[���ib4{������������������������̓T��dt�N%(--*-11-0."(fh+"!$+>G0$%'0WINvz|������wun>A����������~yOT��o_����������}K<7.)(&$%'(''(''%&(%$&'&$#*+))1=IR����BP�|��='*=F$&8*4NG-%&)/5=Bx�����������������ѧ8!%$$# &1m��ղ������ox�|uk]Q_vyuln^g|��������������������״w��������������������CA_�Ǳ^nwqoqqVDl:.-/01)(1-*)*)))(**(+.3B����������ѫ���ȹ��vjiikmidccaed_��{xrgff^`cYV^W@BFFFFHE>AC@BEU�кnEz���try}{~����ɆsstpjcXNHQr�T37:Y��\<859@��[268U~`M:34=fzI9<<]��mH<<=>g��x.57Dkt\55547R�ol�w/54Jo��noyyz��x��wZD-.+4gZNTXu����������N30*/��5()*/Ght{y{������Ǚ@.KI:02?xtyeXVhKEQ�U,..2=Zs���xkqjX]o}h.dg@\XZ^]]L+6:*3T`XZ]_E-$4F8Lv�����`2|m?w�����N%'frZ^]THXdXF"HaBi�}pw�|p>Qb5././/539HVQPRRQr��g[XJV��pheio`Xnzoc_O9>;0297-/<Z_QC6( 'VQ9BA>==>AUj`IHGSOQRLj���r�sUR{��������z^|w\YW]mtWXl����utu����rmeW���v{���{tvxtu�����������seio|����������������������������������������������������������������������������������������������������������������������������������ã����~zqpjY>E����������������������������B;p������ymaZ^WUSOODDOI5-Q����{ok_^YYRIKRHDNHOPTVSVS\_[P3M�������¶�����������Ơl��KSQNOQU\UTWWZZ^go`GPTUVROMY�����������������������������̙1D��������������ʧ��������UPLZL,9 +<aeGCe��xje3y������������������������ʑR��as�G#',-*+21+./$",if*! !(Ix�g,#&/PFRx{w~�����xvoAB�����������~yPU��m^�����������M=8-(&&&%').-,+-)'&%#%(&#%1..6?OXX����?Gyo��=))DF$#.JXk`.$')06=={�����������������ϡ6$%$$ 7r��ɛ����ڿlu�|wo]Uduysoncdy��ix����������������װw��������������������>Cd�ǩ\sytsssWQs8..020((1+)'***)&(*)*/5C������Ӽ��ɛ���ȷ��rkihlledccdfgj���~|rnngddaehcEFIJJIJF@DC@BF^�вTQ����xpywrcv���ǁrusqlcXOIJw�Q579U}}c>85:G��T06=eyuiX:4=kxF9<P|��k@;;=?j��k166Bptc97532Hznl�q+26Jetb7JfkhZ<8nqWaA,/+7gWRXb�����������a2114��2)*+../483445?V���ρ61!ZM5026coqX^idNQx�9-/17AWxyxzleujTXi�g1qeC[^bdaaL*A@/[��wsplJ,"4>2?n�����Y/wk?{�����V%)ntP`_VNY[Y<'Tgw�������w9#]Z1,+,,-/5?O[SNJHMk�|e[XLY��pg`eolq��phgU<=8/-,++1<M`P;.& ,YIARSON@>CVdZHHHHFEIRn���z�iSR|��������tZ}v`Z\p~x^\l����ttw����rleP��}v}���|tuxvt������������saip|�����������������������������������������������������������������������������������������������������������������������������������vrnknqp^WPO>?c}�������������������������f40h��������|yxwuqlgjeXG4)J�������xwqldaaZWYPLNLJHFGT_[S5W��������������������Ɲf�ݞLRPNNNOOLMOLIJMOTQMPOQRMKIY�������������ʾ��������������͚+G��������������Ϧ��������S[MXL,5"(=cgET���^mg3|������������������������ɍN��at{C''+*(-60/1/$!+ke*!!#)=���I!'1MAOzzz~�����wvmAA�����������~wQV��jb�����������N;6,(&&&&)2ELF>DC2&%&&''%(23>HPca\���<N����4(%PN%*V}}nG$"&+19==������������������ѝ0#%)(%""  '(%&(8u��˫����ټeu�{wkTJfuztpo[Qw��������������������ծt��������������������CJc�ȥg��~}{xWf}844561')/*'')**)&(()+/4J�������é��v���Ʒ��vlihjkjihjjhgl��q||tsxtplpqsi?GIKKKHCEHHBEG_�ЭHo����odh`WJs����~susrmdZOGAw�N467^|�W8749H��Q05=i�|pB55AnsC;<Rz��m>:;<Do��m577?ix\66536X�sp�p.35Hqj:/;D<0-06c_po6..)9hYPSo������r{k��94<?>��1)+-<E4550247*.~�ʸh=7"F</1109npfjsZMb�R0/379Dcuoxxpi{gNPk�^2wZDcigiffC)JA3r�|trr`3*!494q������W7zhC�����~D#.ovO\^SIQQK4*[gp������wl8'`N.,*+,-<NRRZROKJPj�~j^TH_��mb`huow��nll[9:4.+***/AYD;0-&"-VQ\tnn^DADYgZDKOKNIIOr��vpvpRR������ĵ�]Z}t[[ct�wbZc���uuy����tlbV��{x{���|suuur������������rbhr~�����������������������������������������������������������������������������������������������������������������������������������yuuru}�����~}������||wplcd``da_]SWTVTOK>&5c}���{rpfeb_\Z[VTURJ@-$Et������~{usnkkmgegcfgfifec]YQ5]��������������������ĝf�ٙJQOMNNOLKLKJIIIKMMLNMNPMPKZ�����������̮����������������ȗ-J��������������Ϙ��������O"ZG_G/>#-@`og���bVoh7�������������������������ǉS��fv~G'')+*14--1+!!,h`* !$(1�̼l'1PCUwy{~�����vuj?F�����������}xRS��fa�����������H<6,)'&&)0Rq�]Opt:(&%%&'(+49LLR`dg���9"S����.*&QI%-`rh_G,#&-29@C������������������љ/#!&+1,+-(" 5]QQWaLC=% ;y��������׸a|�{vlNFhzzrsv}����������������������կw��������������������?If�̦z������UisGNGFC8*)0*)(((**'(()-.5G�������̶������Ʒ��sliimnlkgiefdn��q~zphfjeegbgT=EHIJIHHIIGBHIc�ЩVfq~��KKQCEAy����~xwtrmg^RIG~�F35:Xw�O:759K��N17=d|nN446DqpC;<Jh~�e;:;=Cu��q677HptO7554:d�pt�k/:IHM6248>BD755Bcq=0014C^]Wn����ltIMCB]C5=PAC�~.'+;fmB98/8aiZT}���tOA!C;.1119ptyzgRO�l936?:8CkxxwppqfUSp~]4}NHlgffe`C-K:3e~�yqplI,"7?7j������J:�a>�����uB",ssU[[PDIMF2(Ucv���q���y''ZM.,*+.3PcRKTTPLHOi��iNI@b��h`foupx�yonlX:82.,*()-37</*+&#/WZp�slSB@D^jWJ[eleHFQw��jw�xMS����¹���m[}rX^dt�v`VZ|���vsz����sna[��zw~���yquttu�����������o`hv�����������������������������������������������������������������������������������������������������������������������������������������������������������}}}~�~}|}ywuorllwzyyrmki`baijb_YSQLIE2+Fq���|vsrqnkjgeee`^a[WXWZZZ[ZXM2]��������������������Şd�ՖKQLKMLMJIJHGGHGILKJKMNQLOJ]�����������������������������Í(F�������������۸���������M!\GZ>#.<&4Du��������z3�������������������������ʆM��`t�I'),-+,.--,'"!-hb& "%*9���w (0PGOwz{}�����{vkBF�����������}zVQ��c`����������~@=7,*('&)3Z��9O��1$%$&''((2=MIP]kl����: O��zu0*&MN%!1LYkZ-#&-25=E������������������͙1$#-JfXY``G?6 $dzbn���i[)9}��������׺Z~�|wlRRgwwsv������������������������שx�������������������:Hg�͢u�����wOgx}�zo\6,2)(''''('(*+,.4K�������͸������ƶ��qnliomjfca^ddm��t}oiee]\^\WH@DGHIKLLLIEEGIa�өqox��LAA?;@C����xvtspiaTJG��D36;_�mC9869L��G36>v�k?736HvoB;<>X��_>=<>Gx��j066KqrU9357<h�nu�e(Q|iN4876AVyi\SIB<1329AJi`_�������ce>D??=KX/C�v*)/Y�vOTPEl��ypTj��~ZJ##SA.0012TuyxeY{�B:9@C:9FbjonkiircW^szb7vIMhdfee^?-E97m�����zD+%<?9l������H8~cT����{qL3~pU^]PHPOD+&R[o���j���j%-_D//3676V\?GTQRMDKg|wTCD=c��bbjvultwifjjU672++('))+,/)*+'5WUded^MA?H]gVYu��qNKTy��y��mGY���������cW�t]`cx�w^X]���ss|����qn^\��v����wuuusw����������i\kw~����������������������������������������������������������������������������������������������������������������������������������×������������������������������������������������z|}|}{~|trromkmr~��wunngfca^WVOTROEKOJNLJIUVE.\��������������������Řf�֒OSOPNLNNIHGFEEEIKKJLMOPIJK]�����������������������������ő)E������������ּ����������O"cJ!!O>%'2'1A"7B�Ž������~8�������������������������ʄO��\s�H&(,,*')--,*! ,fa'!"&/d���t!%1RAU~{x|�����xvkA@�����������|y]R��da����������z?=8,)(&&(2[��D]��5%'%'''&%.7FGS`rh���5#R�d\i+))YT"#2DDH:#%(.25<B������������������̗2%%<z�kv��{aC 'bfO\���f_&:���������׷[�|vl^Xfquqx������������������������צx�������������������~8Jg�ϖ]qxrptnMj�������C.2*''((()')++,/4M������ҹ�������Ƿ��pnljlidb^adhkw��oyujfeea_\[THBCEFHKLLJHFCHKk�Ӳ����UBB;<@DI���ݺyttsrkcWLJ��=256BJD4698;R��E55=lvfaO56LvnA:;>QlrN<>=>E|��]574U{yO7547<m�lt�e-}�wKUYAB[����}jW68>?EAOn`e��������cGTSiTlX!E�o.*1\��u�if~����]���{^K#'R714524@gvuw~�K:GDNE;9Gahlonjir\R^s�^<uCJaddfe_;*?67n�����u<,(AA8s������2@{[\����qp@8|^U`_NHPN@&)U^r��me���g 0bB-7LL>AUP>MSOOMHGQh[CA>6d��ZelsqheZY_dhQ581)*))*)*++*,+&"8YPcmmj^D@I]gYe���lDLW|��t�pQRtry����v\a�|ich~�t][d���twz����sm[`��{t����xututx������������helu����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������||�������������}xuqof`eddab`]e]a[gebaw����������������������e�ƑmjfbXQSQFIHFFFELNLJOTUPLKJ\�����������������������������ȓ+K�����������������������}J*cJ!$!Q;$"CK3,*)& 2A"4@���������}9�������������������������˃L��^p}B$(+*'&(*..-$"+mg$!#).R���P%/S<Vxzz}����|uxl>A�����������yw[S��k`����������yD=8,*((()1^��y��x.((%''&$!(3HTWhp`���11`]m�-() ^O" 0]reNA4/(-36@C����������ľ������̐+#(>xjVe���\?)agSV�Ԧh]& =���������ʱY�}xjYOcnvvz������������������������գw�������������������y6Lk�̓`qwolomLi�������D./*'')*)))***,/5K�������Ȼ������Ķ��rlfgjheeegjojx��hxnkimia]ZYSF@CEEHHLKJJIEFCp�ұ���k>A<?BGKR���ڳywrqrrmeZPO��92541255577;V��=54<prrf?57Jxl=9:<ANO;<<;>B~��U667QsvV7435=o�mr�e*�~h�xZ_i������f`mmdU5MhRU��������pz���u�M L�p-,2j����h������ǮqZI&+Y-6KG945Dl{��T<LVOLB?:BZp��|ommaIXx�SAp=Mfehfd_9+709r�����w>(*=<9q������0@wOL{��yup:$4fYW^_LIUUF-/\e��W`���\"4bI1DdWGWZXQRRQRNIECHGC?;4d�|WblpolgSRZadP780,+++.,++++,+%!:ZXjrv{_BAL^gbb���oHJR��q��mGFVnzxuvrsnbk�}gck�s\Xc���~tv{���smX`��|w����vqtwvw������������khmv����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����������������������������������������������������������v�����ywh_a[Z^XTRTY^YZafc`YSZZ\�����������������������������ʓ)J�����������������������I2aG"!!$.4)"$ZB)%ED6:82'#3G%0F���������w.�������������������������ȀM��br{@%&*,(%(,141%!!)g_&%&.Dw�ݐ*#&+:(Ty{{xwvsuxxvm9F�����������{{[Q��jb����������wF@9.**)(*3`�����v2))&&%&%"(/Ugovol���3J�g�o$*& ZI"!+[yuqmN3)-06@E����������Ƣ������͌'&!)DpeX^�ϐ[> (ffTi�ޤfb(!<��}q�������[��{uiWQ_rysx������������������������Ңs�������������������{6Ok�ˍ`runmolMp�������<(.)'))))(())),/7R����������ʹ���ô��oljikklkjkicYs��s|stztmaZ[YUD<CEDGKKKJKGEEAo�Π���QCA<?DJOP���۲xusrppoi\RT��<45545545657\��7438@47:225Kvi;::8:9;;<<9=G���R55:Tw|V9656<u�x��e(������ynU��~��������V1QlNK���������������E&R�v*.0n�����o���������YRJ%,S/QvcB6<=?z�zf`c]NLHB<Gi{�{rjlpePj�~QEm<Qeb`]aZ2'62>u�����o;'&7<?u�����{.BrIN�����u85e\X]\KKZ\L-3_e���\d���\"9fG/G^NXqqmXRRSQPKGEED@>93g�yPbnttyvOPZ\`I4:/++021--..+*+& =VEOq{}S?CK_nug���lFMX���gt�uEDOqyxwwvuo_o�x`^g{�s[Zc���}uu|���{pjWd��zx����vtvvuy�����������kfmu�����������������������������������������������������������������������������������������������������������������������������������đ����������������������������������������������������������������������������������������������������������������������������������}}�~|yy|yqtz|wsmnqij�����������������������������ǉ)L������������������������K!=bC0IK;*)AY]N5+eI.(!"3,010*"%8L%0D~��������p0���������ֽ��������������ŀP��apu@'(+,('+-/42%""*h_('2d��ӏ(#$&$&Xzzxyvwxwwvvk:C�����������wy[R��fb����������}D@;.)(''*3c��;S�v5('%'('&#.T�����|����2'42$)% `K" %Vzx{aD2),07@D���������ٽ{������ω*%!)HvcYn�ڍT> *fjQv�ށ\^#!=��eYe���r��[��{seSWdwyt|������������������������ҟz�������������������w3Oo�ʉ[qtnlmgIz�������5(/+()(%*)***+,18[���������������¶��nkjkmpspha_\Ww����{~�vlc^[YTF?DFFIKKLKJFFFDu�Л���HA??AGPSW���ڭ|yvtrtqj^UR��845436545777a��:5333102227Owg=9:::99:<;:<D���K556Oqq@5464D|���=������}qV�����������K1XgJE��f������������D'[�v,-3n�����o|�������xFLD/M6v�jEPND]����yrZVZVA;JjjeammqwaQguUNc2ILJIJI?-):36p�����p<(%A<;|�����z'@oD]��~}zm5Ep`W]YIK\XF+2ch���MJ��RAb?:PXI\fjhWNPRQQPJHGCA>;8j�uJ_swx��[[fWX@7;0-27;:8686.,+&!<WLl�~jBAAG`~{d���hDM_���u��bFF\vzywwvvp[p�wV^j��oXVb���yus{���zpiWg��sy����uvvwx}�����������}jfmx�����������������������������������������������������������������������������������������������������������������������������������Ï������������������������������������������������������������~������������������������������������������������������������������������������������������������������������������������t5e�����������������������|G(Om[Cl�w`:  %>Xj��{\+(ZS7.$ " !  $8A 1D���������g4���������ֺ����ž���������}Q��frsB&+.0*)..-0.%"$2j\"$3n�sI!#"$%"'Tzzyyywwvwwun9B�����������zwZQ��g`����������}GA7-(&(()0c�l1^�j7(&%'(&&&0P[_cihh���3+86$!*'" gL"!3g{cJ=2((,07?H���������֩u������̂'$"*Gwf\{��oV; *enq�ǦUd\"#?��h_c�Ҥ�¥\��zsic_jwyty������������������������Ӝq�������������������q/Sw�ˁaqtmkleN�������'..+***)()*)*+,04]�������������������nmhinolf_Z]dc���v����ngb\ZYRC@EFILKLKIHJIJE|�͚��u=>>ADKOPU���֫tzwtrsqjaWX��976423657777`��.5223222127Mwa999;:999:::>K���E766L[F54357E����ݫi����w�bY���������v?'SSEA��r������������<h�k.-3WntshmVy����~v{Q:C5<U.w�|u�d^o�����xw{woB=Lgoqv~rsu_Pg��EL_8KUPJHJD0.>4B������w;))VCD������XBqPfdRWZqN+DmbXaYIKURA%8dg��|JV���PHdMVV[]elneFNRQRSPOQOLE@=:p�rLaz����~�zWM:7:0/5>EJFDB=1/.&$J[Zrqc[MAA>V�kl���cIM_���\nzbEDUrzyxvwwl^u�u]am��mVWk���vtr{���{rjVk��uz����suv{{{�����������zhfov�����������������������������������������������������������������������������������������������������������������������������������È��������������������~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yA6j�������C(17Rstv���h61kfF:-&$""## !#"#%$" &55 -:biipz����G7��������������������������zU��^qxG'-35++0.,/.(""/bW #$#  !#$#&Vzzy{xwvvvvui?G�����������zx_S��db����������yC?8-(('&(2YuSKgwX/'$&&&'')BLIMMYXm���0 8M\e7+.)" kJ  /TY=58'$(,26=G���������ԥ��������}%$#(Ixks�˓LV8 /u�����y�w#?��a]v��xĠ\��ytgV[ivvv}������������������������Йq�������������������p3Xw��tartnlmcPzlWk|��M//))()**))),,,10_�������������������mliimia^bfjmh���p���thic`^YSFADGJOLLJGEFFII~�ϝ��i:>@DGKNRZ���بy{{wttqmcZ^��666544655677f��,4423211335Ls`<99:;;:::9:>K���@65557665458O����⻃_>IRhc_8HgpiffhycciS6;EJFL��������������|6J��z394/1.-,45?=<B@9BP@/;1CO,�����|qk���������qEAQoqosuopnYSka3P_>[mbgb^Q/1A1E������n3*6^?>~�����nJvI6BBFAWF(Al^W]XKHML>%Adk��}Ol���FO^]ijmgnc^ODUWTUTT\e`ULC@?y�qHc�������zUG34>117AXcMFC;541('N\OcsvqT@?<JsQr���WILc���[v�eIETt{xwuvwm]w�m_ao��kWVj���uuv~���xqk[u��u{����uuv{z{�����������xhgpx�������������������������������������������������������������������������������������������������������������������������������������������������������}}~~~�~}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cm��~����rRVXZnumgtywhG%_�{aVNG@@?=<7415656420)+.:A.6) %(/0;=��������������������������tG�_Q|�J)*46.01//01(#&6kW'%$!"!!"  ""##'Yzzzyzxxxwwyd<E}����������wx[[��c`����������wA?8.**)(&,5468<70&($&&&&'0SYoern{����+*DL��I73'" ! &jI 2^uqqZ+%'-36;I��������ҽ���������y$#!.Q�����sr~N>�ø����Œ%D��a^���tyȜU��zscPYlxxv�������������������������Зx�������������������j3Xt��rfutmllaQ}P+6;<D0!//')((()())*-+/5c�������������������nihgihhchlmg^���r~�~uplf`ZVSFAEHKLLNKHCCACD��Ѡ��i=>AGGJMUb���ףyzyvusrnf]a�|06545554457:e��15333325525Nv^;;9::;:9999;K���857654355547Q��o�̹�^28`�|A2;DC=09i�T015KXPNLR����x��y��m�u�`2;��ˣZ>3/./0/035411.8CF@6]=!^\/������vm���������jDADFGRcjemiXYpnnFW[8al_ccaL*0A+@~w���f7(=V7K������|RwDPsqkh^1:jPR[VEBKK;&A]i��{Qn��x?Na]afceKVXNJZ^UUX]kleZRKED��tOl�������vVA2=@03@P\M8CB>=<4)(XbYs~�xP@>9@VRv���RGOd���w��\HASszvwyvulTq�k]`l}hWWl���yuu|���uohS|��w|���ysuuyz}�����������ydgoy��������������������������������������������������������������������������������������������������������������������������������������������������~~�~���}�}}|}}{~~}}����������������������������|��������������������������������������������������������������������������������������������������������������������������������������������������yrutmqqsroijlipkoqa[|�wrhca\\YXXVQMNKKMQSLB>@KB" >I@5/./13:AGJ��������������������������kDrzK(*12/,/1285)#%9lY#%#"# "#!#"$$$&Wwzzxyyxxxwue9D{�����������xZ[��dl����������y@>8-*))(&'(('&%$&$&'&'''(0Hggkjv|����,7Qw��hG#,'"   %eJ#;u�lb='$(-28>J�������Կ����������w""#1e�ŷ�����f!I�ķ����ԏ&J��r��ȔU�Ț]��ztmkjvyyq�������������������������͕x�������������������e3\v��maqqmmlcS}M.12/1*"1.''))((')*(+,/3g��������������ɼ���khiimpvqlg_`[���szzxzukb_XUOD@EEGKLOOLGEACE��ϡ��o;>BDBJOVd���ءu{yvuusphad�y286444544687o/6544335427Oy\;=9:::;9:::=M���757745455577J�fg�Ƹ�q;7z��acs�qZv��S33A�pf[DOGYmT1>HNSaGn[a@4j���n:733:83.19?:35@FKHYa;E�m3������uj���������eH?@Q\jjb^jhZUh�|2V[?ad]aeeJ+2@/O���~t{R0%7H<K������hVtHw��}yvY,=`PQ]WGLTS@*B[h�pOy���=#Q[UbefXRbZQN_^YZZaglh]UPKP��yPp�������uV=5?=07GTG66AA@A=5++ha^sx}wL=?7=PTw���PIQd���i��ZHCYy}~uxvkVp�kZ_lc[Xp���yvu{���vohQ���wz���|vww{x{�����������zghoz�����������������������������������������������������������������������������������������������������������������������������������Ã���������������������~|~~}|{~{||}~��������������������������{x|{z{{{zyz~�����������������������������������������������������������������������������������������������������������������������������������������������������������������~zvtpmjh\\XSZWYYUUTRN@2JihZOMBDCFIPW]������������������������ؼ\%9fvC-45-)136<80-);pY!%&##"#"!#!$#"'\|yywxxwvxwte9>y����������{z^`��^n�Ģ�������vC?7/**((&%&%&&&%(&%%&')+*3Un`neyw����(?\�кsI ,%"!"# .qH !7eF %"%*-05?M�������������������u%%!%1n�Ĺ�����^!  @��������z)K���������ɒZ��zwvstvwzu�������������������������΍x�������������������\5[z��jfrqmlj^T{N-01/1)%1+%%()&(+)')+-08n��������������ɼ���jkjjq��ha_bb��~rux||sibc\VQEACDGJNRROKKFDM��ˠ��q@?@EHNRWd���Ӟyzwwustpfae�r/66545643366t��*3354222348OxW;<;;;;;::::<K���968855544567Dr���˶��C=w������������;44c�yvlFO^��YFOOjaL]��Q37T���΄J90Be]A38COG36HVULL<@g�Aclx{q}L_�������vRF@Hr�xleflnWSn�e7X^B^cbfj`H(4;3:LMC0<P6-#0B7B������m]wHauxpx}[*ErPS_TBKVS@(G_]}�qVu��}3$R[YigaN]hjVKeaZ[\coxi[WSNS��vZt�������{U@2><0=OJ2.899<>=2'-l`Zhv�lF>=5:STz��wXJPh���v��USJ_����zuiUt�l\^k}t^VXq���ywuz���wphR���vy}���wtwyw}�����������vdgoz����������������������������������������������������������������������������������������������������������������������������������������������������������������}||~}}}~~}�~��������������������~{||zyz{|z{{}}~�}}}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������~z|zvl`di\QNJDDFHPW[_gu���������������������º�7&;DL~��VAEE@=>>CKIEFDZuZ$''%##$#$##%'%)\wzyyxxvwxvuf:Gw����������xxe_��Zr�Ư�������yBA8.,))'%$%&&'(&('&&$$()+4__khnqww���& :Z�ӽfE.'""!#""-tH $-$ !$*.28AN�������������������o##!&.j��������R"!:��������s+O���������ӇY��}yvuwvxyr�������������������������ˎ~�������������������X5\~��girpklk`W|I.///1*%1/(()'''(+))+-3<t��������������ʾ���hljho���f\aff��|kpvzyoa^a[UOCAEFFIM\o[JIIHK��̞��v@BBGLORVf���Қwzywuutpicn�n.55675535577}��+3544332468OxU<::8:;;::::?N���478656334695Z����ǹ��F>Kn����������R59:��}�|XYp��m}����v���B0.)U��ԓf;9m��E1HfhTBT��]F2@ii��J75IncA2JXXQNV�rFJ@FK]|mdffioiWYsrkIccB_caed]I#09+.Qd\XXUL-#0B9\������e"h~Gu�����b+ SxXZ_VIT\TD'NdbnwQ<z��s.$TZ[lj[Q]]XIRjaYZYbpug[XRMX��vft�������oNF.<:2EE5,-3107>;2&7oYCl�~OBA>4=S]��xoYJJp���n��p|Wt�����~zqXz�k^[k~}eYZs���uww����tohT���v~���ywvwwv}�����������u`go{���������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������������~|�������}{}}{y{{|}~}}}}|{}~~��}|�����������������������������������������������������������������������������������������������������������������������������������������������������������������������rdSIBACDJNY^bcba]aidfooogjghliflkkpnkcTO]kccyufYXYYSRQYda^bX_l�\+1.,*)(%'')**),]xzyxzxvwxxsd9@m����������y{a]��Yx�˿�������t>A8/-*)(&&&'(&$%'&'(&&((+6Taoboow����&/C���H6 ,'%#"%%# 0wA"%*/39@R�������������������f#$#%0`������ŴL!@��������h(W���������ЂY��}ywuvxyyt�������������������������ʇ�������������������Z5a|��ehtqkkkYYC-0101)&2.*)(&(((+-*,/1<y��������������ɽ���jkkmou��ubglh��ycmosqheZddYOCAECDIS{�xKHFFM��ʝ|�|IBGQYUTVh���іwywxwtrqkev�h156756646667��~-454333135:TzK:<;:::;::9;>R�ć57755634469;m����ɻ��rY9]���������Y.4Ec��}��^`l�����������h5314o��Ҟp47~��fl����t���OE7r�[��^3=j�nOMgohQGs�g37G\_\fja_kfb]S\uyvCk]IchgebbI#4A+Pzz�v[/$5D8R������R!muC{�����O+!bx\_^SHX\V= QZC;@2J���n+&U]]ghYGMS\RYj\WUV_pzhYVQI\��ydnz�����wdWJ1:;842.),0..1;81'>n`[~|dUEBB7BT[wvwLKLr���u���V��������_��g]bz�v`YWr���wuv���}uohX���y|��vuuwwv~�����������ochr~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��~~~~�~~}|~~~}��~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������ufVID=;IOW_glu���������������������������}ypfgkfgfb_`_]ZYU]cz�eRLDFE>AC?:::<5:o}{wuttswtru`1<v����������q|fe��Z~����������l>F<1.,+*(%'''%&'(')('(((*9Sqamfws����&"6PFC8"*(('%&%#! 0jB "&+.39AQ�������������������e($ &/_��������I!2v�������,+[���������̅c��|}{yvy{yy�������������������������ɉ��������������������P4d��civqmlkWX}>-1101'#2.)'))'()(+)+.3:z��������������ɻ��kigmn��yekk`��veklook|phd\P??A@BGM���MHFBN��ɛs�zTBQjthdal���Бvtvwvtsqni|�`/7775676668:��u3554333344;X~L;<;;;;9::<>>W�ń35454533358Ar�yu�ݼ���pFZ���������M2Fl���}��ofZk���������w98:3\���̩yFL}�����������{PAI��Y��h:;c|w~����wy��R18P{de�ycehSVVNYgutEpSHZZ]^[W<#>E+Bvz����U,'7C2[������Z%oqGt|qzztY.%gwX[ZSJVbU5$SW3+05O���l,-]_Oik]OZ__LWeZVUZjxxh[VRNg��whnpvy��nf^cT8;92.,+*-/-,/43.%>l\a{]BMO9CRU��wMQTu���x���~L}��������[~�d\cu�m[Z[t���yuv���rpgT���yz���zwwvvx�����������nbku~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wjXI;>Q\ekqx�������������������������������������������������yvwvsqonlljikjkrwomkkqrpinl^LW~���������lb|if~{X����������Z?^Z>5100-)'++++*('(()+**-<_fiimv{����$!&!%*)'(''%&& 3l> "&*/27@P�����������������ҽb!$ %.O�������{ "*Y����������~_�����|z~zs�������������������������Ɔ�������������������J8_���`l}wnsoV\�G-1/02&%3-))+*+)**)'+-3@���������������Ⱥ��{hiiki���qkf_R��zhjlmt���jgbR=?AA@GA�ĤIHEBO��ȕd~ybFi��zqel���Αvxwvutrokc{�]07:<99<9989>��i/5544435565XL>=;;;98:<<??^��{36445655587<v�����ú���u����������YLx�������pNY���������M0>KMv���ͮ�kWFs���������|PMKh��M��p@ADf���������g74>d�\\hkgfaRWSLQi�gAiL+LLPQPI5$OC+O������V-&8I:c���r�|*%ovM{�����V'/x~ZZ]QFS\J3)RR1+-3UxtsY+hc\ksW<RX`MT^WTW]hura[WSLe��sknpw{{widhjX>@8*)*+*+-,*.-.+%?oRW�xl^RhY8EW^���lKST|�������Ey��������^��g]bhtqcY^{���wtz����trcU���z}���yvttwx������������icmv�����������������������������������������������������������������������������������������������������������������������������������Á������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yl`Z`jqtz|���������������������������������������������������������������������������}�~~���|svwqlijhv�to}zm����������oMw�{^MILDC>8588563///03224@hgqhsox{��� !,)&((&('& 6h4!!%'*/38=T�������������������X$ $*,'#  *b����������|_���������}y�������������������������Ň��������������������G:e�ĩj������Vr�E-//21&'3-))(')')')(,-2=���������������Ȼ��zjiihq�Фeg]^e��xfjprl���tmjT9>@BBKW�ըGFDBR��ŏbm�wb|����nt���̏twwvutsqoc}�^4:BUUE=FI:5>��h08455556557b}O=;<<=:<==;<;a��x96465646669C����������������������������������_^���������H4Zd[j���̵��]+a���������GITe��wa��v`L7a��������k=7:P��ZQfx~}mXXSP]w|_CmLE^SRRPL:'O=5w������K+%HQ9;gW7@ad+)swD��vy�~Z 3�zLaeL?INF/&UQ0+,1G]XbC 2le_fdVIliaCKZTVV]elga[VPGf��smnvxvwj^affX==3+*)**,++(**))%>eYhxnhbcwX9J_g���ret\{��}����}H���������_��]Zcu�z^Zc|���yvz���~qlbY��~x���{usvzz������������mejv�������������������������������������������������������������������������������������������������������������������������������������xw|}yy{|yz{}�������������������������������������������������~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~wsqsxzy~~�������������������������������������������������������������������������������������������������������}{zomkjr��wmjgga^UQRNNNHFC@ACA@BJ_rwuwxz����ے8-%""##$)'+4)(*+*+*)$4\4"   !%##')-/14:?U�����������������ҼZ&"!%()(% !  .g����������wa���������vw���������������������������������������������E=i�Ŧ|������Uy�=///0.%'0,)()*)(*(*+-/4A���������������Ⱥ��zjkhr���xdZV]j��pllste�̪~tuZ5=FFEGZ�ҚCHDAV��Ɣvh����z��{}���ȇywuvvxvqle��\4Ad��jt�w?:D��`4:8><;9876:cyJ7;>@AAABA>=:`��s67875778757F���������������������������������������������]Z�xQW���˹��qGa���������>Oh���|w���v[:c��������g>=Mw��UZ~��|_SWTNQt~gIzOX`VWWZV9(G;.\�����}M+)RH3Z�{x���A-vr[{{p�wP!7�qMTM?AIGC/,VP1,+,,)>aP(7o_\ebYSbga@GSORWVYhj^SNIEk��okkpmc\LM[cgX@:3,))*,,)+)+,)(#;e[dicf_cqS:Vgf������[|��~����bL���������]��a\dy�n\]i~��vwz���~qo\Z��vr����yuvwyz������������p]jv��������������������������������������������������������������������������������������������������������������������������������������������������}}~~zy{vuwyqtopwwxwyz{y{|~{~~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}}}~}}~����������������������������������������������������������������������������������������������������������������{zwtutqjkkdfdec_]`_]]]amxy}{}}}�����t^JA@ABEIOB?C876345855-''""4aG*(&%)'&.>;888HSRFAIo�����������������ԻW+$ $)*'% "" 0l����������ph��������wx�����������������������������������������������D>h�£������Ru�9/2/0.&*4)))))(()*,+.15F���������������Ƿ��yjkk{��lb\[_cm��nmospz�խ|ywZ;?CFCN|��h>GEBT��~u�����yy������Ǆvxvuw�qkd��V4L�����8<J��S4=Ur`AQcR65lzE9>E_aMIXWB>;a��t8:;>::=?>98E�����Ժ������������������������������������������g����ɾ���{����������]o����������pYr��������lSY~���Zmvm_Y[U[TNf�~SFpES[SUSUQ8#G93n������G'%XL:{������8-|jNfnm|Q!9{e16536<:6(.\U/+,-.5i�^?j_cjeYUty^@GLFLQRclbSJFCEs��khkssbOCBOah[:70+*++.+)*+,*)("@c]gfNTK`]B>^bm������Z���||�shFP���������^��bZez�p`_g���vuz����sl]^��{x}���yttvyy������������mckv�����������������������������������������������������������������������������������������������������������������������������������������������������������������������}~}}}x{vy|}zuyyovsxrusz������rsyxz�~�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����������������������������������������������������������������������������������������������������������������������������������������������������������������{|y{wvnibbckhgeghcdg`_\VSRPSQRQNLGAA=4_~ZAAHFC?Lmztogt���lgz������������������ӴV/('*-,*(#   %#!3o����������ek�{ztrryzzx��������������������������~��������������������>Bg�Š�����pSt�5./02.&(/)(++,*+++-+,06G���������������ŷ��kedffcdg`YY`gl��kjkmlr�ϞuyxZ8>EHM���sDEGEB]�ҿ������˭]]qq�����}uvss���sma��T5H������{68M��S9P������e68mvG@Fi��~��{A?@e���87?afPBVU;;I�����Խ��������������������������������������������������������������������������������������s{�����_bdnqxiUXPMWQSDNqKWbYYXXK2'J?6r�����{8+2cK2r������83ulQ������F!8s]/.-.,,++#+c[,++,.9cj6BkaegiPRhgZDKMEFHIQZQJFC@Cy��[[rzvfSGBK[e[85/+*-2DA5*++**)FeJFYZTIFD98RVhy����x]���upfQICS��������tl��d[ex�w^Zc��|wx{���zrl__��yy}���ystvyy�����������}gekt������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{y{ywyx{tvzv~||r{y|{�}�~}~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~{{{xvtrrpppokjimowxlffieeclsqqtomz��nv�������������������ѲgNDDG>;:71-))*)'(')/'%2r��������ڿdq�~yuvtvyzzy�����������������������������������������������=<f�ǘ`wxtsqiQy~50111-%'/+),06544763227H���������������Ƕ��^WXWYZZYTUU\dj��dgjlx�˴�y}}Z9?EIPtwXCIHHF?^�ͽ������ͨbIX^�����~wqqp���skb��O5Q����ĸs:8R��M4^����ǯ^9BltB?L�ź��=@@k���<9h��~��z9<M�����ٽ������������������������������������������������ý�������������������������������������������ROs�xYRZOLNLM@OiAWc`_\YP4%B36|�|��j;+3hK?������}*3tdO�����}F:|X-.*,**($9k\,+,,-4<8MfZefdLJ\fcANX@CGFKYZME@>?r��QU\mwiVICFP]Q9:4,+2LWF5))++)( RdM[|n`O?>86MXf�����Y^���ci|lHDL��������]h��cZd|�l]Yc}��xvx|���{qj]f��vv~���vstvxy�����������|fflw������������������������������������������������������������������������������������������������������������������������������������Ð�����������������������������������������������������������������������������������������~~�}|�}yx|w}zzzxy~z~}�||�~}���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}}}}vrnlkllqlirzy}����������������hda][WURMGFDA?=>>?DNN@72,)%%))$&&%%++#"#(-6y����������[p�zwtpvzzy{�����������������������������������������������:Ch�ƔYqsmjjhIox7///1.$&2,+4DGKNOKJH=35H���������������Ƹ��<9;;=@GDBCGIMb��abds���~royyT;>BEEC?BKIGFF@h�ӹ������єmWHI���ںyrpmgx��jb`��I4T����ůg67R��J4b������V7@ml==K����ĻuAA@p�ˑ==����ºx;<H�����ں������������������������������������������������ſ�������������������������������������������N`|yxobRXPLKJK>PjL]deedfX/)>3B}�����Z3*;hF<y�����u%6|eP�����s>!D}Y*-**++(#>n\.+-.-./$K_YdddQTa`Q<YU><DI\cYNDA<:k��DKXhpg[OGDHVL:>.).7JWM4)*,+*(%[cYt|n`U?=69PX`��dQMZ���{��eEES��������ej�x\]f|�gY[b~��|wx}���xnlYm��xx����ursvwz�����������zhelv������������������������������������������������������������������������������������������������������������������������������������Î����������������������������������������������������������������������������������������������������������������������������~|~}�~|x�~��|}����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}|{|yvtlmlfjlomlfihjhhjhfdd^`_^[]^bjpk`ZRMEFGDDE;==@MJ<<ABF[���������ܼ\���|yvsux{{�������������������������ٹ���������������������8Ej�ǌ\rsniheLtm4./02.%+1,-:J\]lpfliJ57I���������������Ƹ��542577D@728;;]��HPNMT^]bfbriB77;;=>@BCDB@ABj�Է��ms���pdPEL���ٶwtskj�Īh`]��?6D�����v@9:W��E0f������O6?vmB?P����ƺuEAFv�с6A������o?=I�����ռ�������������������������������������������������������������������������������������������Kh{{�v\QVOMOLKCcl:XcfhhfQ*)>5;y��~xum<(Am9B������|&:^Q�zt��p>#S�T*+-*)*'"#?mY,+,---+ !R]TSVO6/4SONcP@>BLU\]OE>85i�u?GQfmf[TJDEP@9?.,.8GTH0)++++'$ZcblmjeQ><5A^Zi�y^MFM^���t��WGF_��������sd�uZ]j|�j]Zd���yxy���yrnZ{��vx����wruwx|�����������xdfny��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~yz{wxyz{vvusvllkkggdeaioa]abbkv}����������]��}|vuxzz�������������������������Ӳ���������������������4Fl��]mnkjidJjl800120'/0,-7BJ[n|��~N56K���������������ù��5403:JeF20165b��8;:<@?6BBDJA1+.13986::<9886i�е�}Pb|^<8FGBL���زrvrfl�Ďbef��?761X{sQM795\��@2i������>9Btl>=H������Z@>D}��q7E������c:=G��~��˼���^T����������Nk�������������������������������ƾ������������������������������������������zOuzttobTXNGLKI@cb9^hkjecO(+=0I������s5*Km;J������dD}^e�qy�~o4#^�@,-.,,,%$%@xQ*,./.,)$T[C23/-0E^YKZKA?CGRccOD>82k�n@FK^_XYbUGDJ:6:-+-1O^G3+-,,+%%[g^qyqfG<=9SlXWqyvTJPf�Ģn�^AIt��������df�oY^o��j\V`��yvt����xqkUq��us����wtwxy�����������wefnw���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}{zww{~�xkojotnv{�qw�zxyyuuvvuzr}�������������Ļ������������������������������~-Cd��pcjgffhaZ��G1453.(,1,,.-4Cmxy~b415U�������þ���������{42/2=eC30148g��3:8;L\=UD833.*-22JTFPAN:138m�ѳ�jD><8;=>@BK���ݰnppo���l`dg��?8>Ypnonc@85^��<8?^���iN59Kwi?>Bq����lA?<F���b9E������P9;P�����»��qYK����������@Am�����Ů�|d����������}���������Ǿ�����������������������������������������wSt{t}qeSXOJLKH;YZC^ijkeaK(*B-C~�����e2*Nq=G������jD}USpnxw�q2#\wA+-,++,&$)HpE)*,-,*''[d<+/-/3LUJ>TE>@EKepWJF@95o�jAJTf[YcgZH@@43;.+/7aXA02;;2+%(jjR|�m`G@AMooUg��}ZJRh�ġ}��[Lk���������gg�nZ\j�{dZ[e���xvw����zriXs��u{����wuvxy}�����������udgox�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|��������~�~~|���~�������}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}}|tsifgefdgjglpq^ea\bhlrw�������������������������v)6QosPQTQONYXs��Z;7541).2+++-,/07581.19R�������ǿ���������o-./16]x>10/47i��3;7:tset=112--04Hrhlf^m0137t�ӐQA<;:9899;>Q���ݩ`ggw��e_]^a��95?g}~�wX=76]��;:4C]hVPK78Ftc9?<9[wfSSD?=K���S;APh��nT98;T����н��^]\^����������[FI`������njKk��������|4Y���i����Ű�kb����������������{����|TU��������������y[louuk\VTIJLKJ<XM@Ybccb_I&*B3G������i3-Rm<S������VN�[_�����n3"^zD..,-+,(&-DY>').-.*&,^_9-/./1:A.=VI@BDM^aYMHB;4s�eAJPXYVfhZH>>25=0*.<QTH8AMI5-&3zmX�ke_MACZwnXv���WGRi���o��STx���������Vg�jY\kziZVf��xxz����{pjQu��sz����usvwv}�����������ubfoy������������������������������������������������������������������������������������������������������������������������������������Î������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|{�~�|z~|}}y|������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{wvyuswrme��Ĵ������������������o !2=?84568;<K���yiXNG<486-,,,*//0.-/.58U��������Ƽ�����·�h%/.04`r9...26tŉ-36>�sju624/*).2M~f{gmm104<y��s/76:GA::<:7L���ژEYRTWQLQNPW��97?inlgVF867c��1:Gp{|}rT75Hxf:=?_sy{|fE>>C���L:;=Th`SP?9=W���ȿ���]XU�����������|a:>�����l_K:s��������~:Dk�~w���ż�wWG����������Mz����f���tO<?}��������d`u���vPQRWX]OQOJKKLK>[R@]^^bhhG$3A0K������U/%Nj;;^aL8P_*V�[g����e5#k�>...-,,)(-FU?,-+--*'2ZZ7/.-,00--CXJ?ACM]icUMF:0u�fDO\\REU`[F?<19=.*-BYVG<X\I4.)=|i[bXikMDI\xlTu���QKQm���x��CV����������bo�l[\p��f[Xh���vwy����wqiS���t|����wvtvv�����������s_hp{������������������������������������������������������������������������������������������������������������������������������������Ð������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̵������������������m''/57532325<Y�����wrePGSK879947675326HRk���������������¶�a-1-05bR4//.58z��'05Ahls634/))-,Mz^xbui3036|��t38>We^TJP85K���֐7<:=IGIH=>V��978=1886766:lğ2<IswzsdH8:O{c<>Bi|��w`C>>G�¨G:<_uwuw_>8=]�������r]dr������������k<7}����BAAM���������}[NKe�����ĝe^J9���������|<d����W���N;5B���������gY\l��t>HIKKKJMMLLKKG;_PJ]\_eldC&6C+K}�|���U/-Yi61/,,,.-#V�PLpsrt~q6 l};..**+*''-HZ=+*,/.)%3eb5./.,2/,+K_C<AHTjslWQK@<��dMNQ]]O_ZD@=92:<,+/9DNI=BRD20(8xe[ku�bCBF^}n\���~MIOq���v�uYm����������gs�aZ^q��aZVj��uux����tpgN���tw���vsuxz�����������qagq}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~||}���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ϵ������������������s@AEEF@B@ACIS^cahlpuwifjhOLRPVTQRSHIi�v������������������d421348413246:��|&33C|Yr_2./.*(-.TtUxXw[2024|��m37Dr}pmrA23T���׎;=@Sg^WB;<`�46544424468<pě-7CWJPLH77:Q{a<=B_c_[TH@?;C�Ü<7<^qnlkM;9;^�����͠`^Tv������������[;5j����3CPs����������{T3Nx�����}@Q<[�������Ķ�HUX���M��k585X���������p\IGt�rAEGHJJJLLJJJMH>WMAOORWZK6"9E+T������W-+bg810../-*UYj||z��a,"kz8..,+*)(),Qd9*,..-($<g^5..-.10--N_A=BHUk}c[XO@B��[Uah}dDPQGA<6/95)*.:GfI<NU8.-&Bq^T��~]FCJ^zg^���nNEMu���z�rU^����������gs�dY_s�z_TTk���wwz����xohS���sx���vtuvw~�����������pehr~������������������������������������������������������������������������������������������������������������������������������������Ò�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˵������������������ydckfeeefecfhgda]^c[^mt��pkopkquswojs��z{�ƺ�����������ñ��[::<:4699::759��r469G`@^A132/+,05JMF[Ab>003>���_+5Byop|k246X���׌=;?XliY9;@e�}554324443689zɖ18963456666U~\:;<>:;<<;<>>G���<9<MLNGH979;Z�����tR^Zey������Ɩ�lD=7Q�~�_MVe��u���������K6Ad���ضR2<K�����������}[Aq��O��J57H����������l;=h�lADGIKIINNIKKJGAUE/.01.55+"?>-[�|���rB,'b\831/--,(]�E58;AEY5(hy>/.++)'%%+Vm?,--.,)#>f]0-../2/-*S[FEGFVuv`_WPC@��aaivuTKc^L@;6-41)(->_m<8BD?1*'F}dd�~scFFMewd\��sPJQv���y�sLW����������UpybX[t�wbXVm��wwv����wndT���tx���tstuu~�����������oehr~������������������������������������������������������������������������������������������������������������������������������������Ó���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{{�}�~}�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������̶�������������������vwy|~{y{{|}~|~�~���}||{zz{}}}~~�~��tzyy|����������c[OLOMNLNMJKNN��j<<>C=5<;=@>:4;?952015;6664>���Z.2HzWo�O/27Y���Ԇ59<JmqM<;Aj�x254344443655�Ȑ26676457687V�\:;;::<=<;<<9Q���=:::;96987::d�¾�֋gje_ZO}�������p5?A>9=ebe_XRe�o�������ʮwF9?X|��܎646q��fy����u���\GZ��n��045_�wfu���xi��l;?[�fADGIJJJJLJJKKHAT@,5=9:A5)$<>,8BAFOPF0+(VX610.//.*  _y@.3766</)"lw<..,**(""-er:,-.0,)!BiX.12/.31.1W^AJMRgqe]WNAK��ljqufPPo^A<<5,51)*1H[L86QZD3-,Q�^lurtbBCLjxe^���cIITx�����{T\���������kDvy]X\p�~dY[m��}vwz����vndU���uy���tsuvx�����������odls}������������������������������������������������������������������������������������������������������������������������������������Ð��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~������~��~~|}�{���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˵�������������������x}����������������������������������������~zz~{{}����~zxwutqrqsssrrs��a^[YYVQUXVVRRT^XKKNHKJFDDE=I�Ÿ[45Iq4`a+24:[����099:d_=89;m�x04444556666;�ƌ2757534568:V�Y8;<<;<=;;>=<U���?;98899788:Bm�u����wug^]Z~�������j3=;FHBM@N_VJV`C�������āF=9Iu���P367q�\<f�|odK���ZKM���Z./6[rOSc{�ypi��V:=Ou]CBFIKJJJJLLJKG?^=1Xd[ip?)7;*+/53000+)$VY42/3645,]k:/D[Z`Z4(!sr8/-,,+(" )gn5+-284/$ChV314/.32.)V]FKR[m|lc[TL?R��qg`aeTSVIG>;3.:5++0=PO;?cP:1/,_�PclszS?AKo|gc�}`HIV}��}��gN`������}~wbMtt]W_s�]WYl��}xx{����tncP���vu���zsvuwz����������nalu������������������������������������������������������������������������������������������������������������������������������������Ï�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~�������|}������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ͳ�������������������|���������������������������������������������������������������������������~}{zzywuww|}vywqrtnnjjffj���\XYXbJ]\LOQPk����o;>;<GB<;=>t�s4677678987:=�˅4976477798<Y�S:=@=<=<<<><?S���=<987787778?J_a��}qcGQbf��������L69Aj[QH1YlSFPKG������ʱV+8>R���׀8998DFA^}�wf\fvPOLH{rquD100=:F\|��|q`cfN>=BhO=DFHKLJJKMLJLGAU5M{wu�mA*!>>,/<QNMZ@,)(aW37<IJLX3"^m79fnqvT6)"pg/.../-)"+gu6*2BOPQ.HkU422-/30//Y]JQX\ba`\TOHB_��h_cp^MBWaJ>:43@8**/C^F<<KMB1.0o�O^tu_A@DLpzaa���oEJW��vniRKb����}xuudRtu\T^s�m[X\s��wx{���ysm^L���vw���ysvvxy�����������lalv���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���|~~��}�~|��������������������������������������������������������������������������������������������������������������������������������������������������������������������Ȱ���������������������������������������������������������������������������������������������������������������������������{{zz{x{ywzwr|����XOOLIKJHGNT��oFGEDA=@@==@F��}9><::;=<;9:`}V;?A>===>=?==W���?=:878889984!f��\SVTYbW\gkotrmP226YnZXI3jeHHLIL��������G27;Bd���kC;522?Sdinhns^><CMK3TE<fC01023NdmtppukT[E=;@OC?EGHKJJKJLKILG>D2PutjrjF+I@,7Zd_nk@0(-fT29Rb^n\5(^j2=chjgU8%(jX..464;++rz5.Bbdh[*KgM40//34/-/_`RVW[_a`XOHC@f��^VYOLOZdTE<945@6,,0=?97<XU91+4nEEKLE@ADSn|jh���lCKX���dMHKLh���uuvwurdUxuYX\dg]YY`w��wxyyzwtso]P���qt}zuqstt{�����������}kblw������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�������}������������������������������������������������������������������������������������������������������������������������������������������������ʲ������������������������������������������������������������������������������������������������������������������������������������������~snlgffefiks|��yknjhhda`__bn��pSPPPNNQNKJP|�jNHFFCB@@>AB>[���E@?;:;:<=><4&!m��KRW[YN?IIQUYTDB>8:WPQ`J<nZBNJGGv������[32687&?��aH<304BRY[YOa`OE>?QB/5#6jD43333Sdeg^in]WUC?@CCA@DGKJKKILKJJJB<?:\ppq}i@'$V>+<_gbj`A/(6fP3=Yddi[8.jq3EjfehV5&.m_04GRRW4/}x3/HfjmP%IgN1332483/4fcWXWY`gcWGC?>i��MIJFILNJFE?942C5)*03464BN>80+9y�A@BA@?AEUu�jc~�OGL[���^NMNQj��~xywvuqfWyv]WWX[\ZZd{��|vvvtvxvrm^R���srtousqsuv|�����������}jfmx���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����������������������������������������������������������������������������������������������������������������ʱ��������������������������������������������������������������������������������������������������������������������������������������������}{�}|~����������������������{xvz{{zvsoqz��yjhda^[[]XWRWn��}VJGFGHHHEFFE='.s��[EPON:39BOWYUWUIBHA5QaNGqaOULE<8:>J?86/2125/!M��ZD;/3@KOIC?=CEE79AF83-5gG3222E_^QPRPPVWVHCEIG@@DFHIJJJKLKKJE@C9JTp{tI0$#P9+?X`]b\@/*2dM4@V^^dV64w|3EV^mjF/&4ws8DdclZ03yn31D_giJ"LjL.8ADFM902fgWXUT^sbNF><@k�zEDFMMFDNNC=824F5)*.3:8/2==60,7}z>BDDFDDHW}�cSsrUCFKZ���]SXZ^g��|yxvuuubZyuWVVU[`ZXf���zwvuwz}vsm[Y���stwuysstwz}�����������{hcmw�������������������������������������������������������������������������������������������������������������������������������������Ē�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}}|~~�����{|~�~}��������������������������������������������������������������������Ű�������������������}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������~yy~���{qonjjhihfeilZFU���]KTKE<<<:DSZY[]USJ:F^dW^�ta[EC=::DJD>741/16-%^��RA>4BJHF>987:<97D=873- 8rQ7535N`QHFGILKLSICGIA>AEFHIKKLMLLJKFAB..d�|N<.$(L;-=S\diQ5/(5fJ4>NZcjI,6�{/8Elz]81%=�~;EY[]M.7vl02A_pj>OfK0H]ahf<0:kiZZRR`f_PD>:;i�rCELSTNM]YF?;29F2+,1;DC66EJ<20@�xA@MTVXOJV��^HJIIHIP_���cdirll��ywvvuqp^ZzpXXWZa`Z]m���|wvvz�vrmYe���uuwyytstxz|�����������xceox�������������������������������������������������������������������������������������������������������������������������������������Œ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}}~}{��sikllg`\[gotsutstsw|{zxy|}}}~����������������������������������������������î�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{|z��zbb`[VXXSRQU`cc^\XNRX]ef}�og]KRPMHKOQGD?<99=CCy��b[NAC@:83331129IF34660#M�S:746DB2443/08EMFADA<>CFHIJJJLLLKIHCF@.T��q\B-#,S7+0Ait_@2+%8`C26<]n]6)B�u*1X��^@3#=�{7DPZ\E* 6g*/;pz^? NjH7Whktc72ErkWRNJXecH@<75d�p@GTkcR``PA==44>2+-4F`I8L\L42.K�s=IafebQJ]��TGLRQQNLb���hntugv��yuqnplk_b|mZWX^b^Y^q���yvwv|�}wrkZY���ttyywtutxyy�����������ychoy�������������������������������������������������������������������������������������������������������������������������������������Ē���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rcfebZI2+N_eilkjecqxwvuptusosupqtsonsuqtvtvvxxwyz}~����������������������������������������������~~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{usstqrpqsuttroopkjjhfejps|�wsopuwljkkjiea[UTYZg���wogR@AB><?<:?BHTB9;>>8*f�c>;87<96643224@JA=<>ADCEGIJJJJKMLKK@DE;m|k_T4+&2S3/4Q~y`L3,(=`?48JvvZ='F�r*D|�zaA1#<�~AAYi]=$=�d.4QqmZ9OgH9Qdhi[60IxhXLIBZq\E?=72e�p?I_mVGIGC?;;23C1,08W_;5?C8+/+J�o=N_hf`PH`�{LJX``fXMd���hklnfv��zrljfjjae|oWWY\b_Y\x���zwxy�~vrlUX��xrty{wuwvwyy�����������wchqz�������������������������������������������������������������������������������������������������������������������������������������Ô����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������sbdedWF/-Lcdiihgcetwyxuuwutrqrnooiilpnmppnkmqnknmnprrsuxyxwvvx{|}zy~����������������������������~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}{{yz|z�����������������|zztw����|xteZ^ZSXYWYYX^_NNOWTPG��iRKE@?ACD@;=><GNC@ADMICDDIJLJJKKMKG<JO334776.,$4U40AkqkbA0-'>cC4?dqiS4%J�j0@[YSE2."B��XVssZ?%B�^,0?JI>) JfL<P]mjF6/XyeRFFEcXI?=<80b�l@ISOOJC@><=:29D,,.6G>;5/,-,//S�g>Q\abZMFa�wRSfnqpWNd���bgqo_w��soejhkice{oTWZ^d^\a~���yxyy�urlY`��wrty|xrtuvyy�����������tfhr|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������uedgaVE13Lefikihehuz{xxxwtrtsqopqpmnmnmolkmonmoomnoompuvwwxwwz}�����������о�������������������}x�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|||~{zvwwxxuyzxuuuz~{wuw��sjha_bffd`\]_S[^YUU^`MMMOQOOTLMNQMJAhW<./34123)<L53;GECA40/&DkB4:DHD8.$K�_433-/22-%V����pbN6!!H}]*,250.% S{W?OqzbI4<e{eQJEEMFD><;52j�i?FO_hOD@>>=:/9@-+,4CTS5.,,,.5c�YCOYcfVGLi�uNTiookTMh���`jpbS{��xqkkeji_`zrSXZ_eb]f}���}ywy���xumTa��{uv{~xstuwz{�����������rehr�������������������������������������������������������������������������������������������������������������������������������������Ø����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������t_ehaUA01Lahjjiidgu}{wxxsqrtrrqpopopmmqommmomlonjnnmpotwuwxww|��������������ν�������������������{w{}����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~wwzxwrsqfhjijlehbbfe^`gygWIEJDHOSDRYPA@9305;<9-WyH7643/0/%J�Y.240123.%b���mPD6-!!"K�d-.24/+#`�aB[vsaH6>cycMEA???<:;:33w�d=FSulIFEA><907?/*+5UrC1-,.---axW?IYj]MFOl�zPWfmpdPMm��|Wsl_^���ttnjhliWdpQZ\`d^[cy���ywwy��~wslI_��{tv|}vssuww|�����������rdkp}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ufggdWB0/Scikjjhfhvzwwxxtsutpqqnmnonmmpknnnnmmmllmmnqpsvuwxuy}��������������ν��������������������~}��~~}}~}}~���}}�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}|~���xrnqou|xpy{nfe`]]^][W]`RNIHGEA5(-f�]F>:97454(z��mKN=/,&&\�j-0331+"!j�g@TWZN87;cz\H@C?><=<<:41q�eAI\hTMIFC>=94=D/,->aW;0.-.-/-XgV;Jbn_RJPp�yNX_jkXJNn��|grf]]���wtlhlpgVa�pSU[`a[Z`y���zwyz��{wrhOf��}uv{{wtstwy}�����������oeku�������������������������������������������������������������������������������������������������������������������������������������Ô����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������sjgghWA//Uhiikihejv{wzzvsttsqpnmmppmmoolnnklnplkmnnoqqrvwwyz}��������������̼��������������������������������������������}}}~~{y�{z}}�~���������}�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z|��yxrpjiig_Ze��keaVSQOPMU��qWikP@884+v�b77<852'.��f<>9>=6AEq�`CA@@?>?=;:50n�^BIXUNKGIE@?:4AB-.2?H?3,-,..-)Rq^?Rie\ICRx�rSThv`JHSt��v[\TPY���uqmnmmfZe�qXXZ_]YZ^u���yvwz��zwsgNl��|vvzzutsqtw|�����������mhmt��������������������������������������������������������������������������������������������������������������������������������������Ô����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wjgigXB,3Tgklljifkx{y{ywsutssrpoopomoqnknmmpoollnmnorstxyxy{|��������������˻������������������������������������������������������������������������|u}��������{yry|~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}zxxvux|��{y�yfc`^YZ��\USSQME?8>Y��YJJNMOT^k��nIKFAAFFC@>9;u�]CCBFKKLKFCB=4BC/-../.-,,,-/,,Y~f>ILLFBDSw�gPZpraQKU|��pFEDH[���rrsqpqi]p�nVWZa`[\]s���zwz|��{triQs���tvy|xtrsuv|�����������ohkr��������������������������������������������������������������������������������������������������������������������������������������Ó����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wfhjhU=.8Vhkmmjghmw{|zxuvvsqtrpqronnqommmmmoooooqnoorstwvxy|}~��������������ʻ������������������������������������������������������������������������������������}}||{xyxx{}~�~������~~}{|~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}}���zzuutqpnjmx�udjgjmstt~��ka^VVWZTPOOLD~�VHGDFKKPIJFG?5?A1.-..0../-02//d�wPEBA@?ESw�`J[odVMIQ}��gKGGMf���zyxttshVikWXZa`\[]t���{y{{��{srgKv���xx}zuqrtuv}�����������odmv��������������������������������������������������������������������������������������������������������������������������������������ē����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������uijkfS=-9Wlmnmlhhjw{|xvvwvsqrqnrrnmnpmnommommnpoopppquuvwzyy{~��������������˹��������������������������������������������������������������������������������������~�}z~}}�{��~�|~�~}}|zz{xy{|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}{~~}~�����ywwuxvurpqtkf�yda`abdc_[[XYSLWPC;87997345587B��t[MGDDEHU|�vVKKGJHFT���gEGHPj���yxuvvteOn�t^\]_^[[_w���{{|}�~xurfR|���xuzxqrtvwy�����������menw��������������������������������������������������������������������������������������������������������������������������������������Ó����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������tkiiiQ8/8Ujlmnkiikw{zwwvtprqpoprpoommlommlnmnooonnopovuwzzzy|���������������Ƕ���������������������������������������������������������������������������������������}~�~}�����~~~~����|{|}|~~�����������}~~�������}���~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~��~}}|}y|xvuuwqisk^\Z[TRNQLJLMSl��nbXRMPW\h��vbUOKOLUh���fKIFQg���yyxwtp]N���n\Z]^^Yb|���{|{|~ywvtjY���vvutwtuwvvz�����������~jgox��������������������������������������������������������������������������������������������������������������������������������������Ŗ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������umkniU6/;Wglnplihkw{xuvtsqsrqpqpmorllnnnoomlmomlooopsuuyzyxw|���������������ʷ�������������������������������������������������������������������������������������~��~��}��������������}}~~|~����������~~��}}~�~|}~||~~~}~���|}�}||~}}}�|���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~~wxwvvtooqspliljkonsst|��zqigdihnz��xaYSYbs���}�|vtgk���ulgffe_i�����{}zyxyxpd���xwwvxwtuwz|�����������{jhnz��������������������������������������������������������������������������������������������������������������������������������������Ė����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������smklfT2,<]imnpmjhmvywvvtrpprrrrmnqqmkmnlnnljnpnmnqnprsxzyy{||���������������ɷ��������������������������������������������������������������������������������������������~�~���������������|��~~����������~�|}|~~}{~~}~|z|}~}|{~������~~~~~~~�~~}~~~|{~~~��~~�~||������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����~|{|}���}z|||z~}{}~|~urqqsu������|zt|��~vtqpropz����||{{{yz}zo���}y}���xv{}z�����������|hhnz������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wommjQ/,:_jmopnjglwwwwutqqprsqqqppopmmnmnonmpomoopnpqswyyxz{{���������������ȸ���������������������������������������������������������������������������������������������������������������������~�����������������}~~|~~�}~{}}}}}~~�����������}~��������������������~}}~~��~}|~}~���{���~|{x}���~|}~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|||wuvxvxzy{{}�{vvyvsttww{wy��zqr���zww~~~�����������{ggp{��������������������������������������������������������������������������������������������������������������������������������������ē����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xonoiQ/,<anppqojhlruwvrqqqorronoolmmlkmomnnopolooopqquwyyyy{{���������������ʶ������������������������������������������������������������������������������������������������������������������������������������~~~~|�}}}~~�����������~������������������������������������������~��~��~�~~~~{|z}���}{zzz|{y{{|||{zxvwvvvwz|y~��~~|{z}}~~����������������������������������������������������������������������������~���������������������������������������������������������������������������������������������������������������������������������������������}|{yz{yyzy{ypomlhifjlnpopiae]OQYhqsjiowz������������yjhq{��������������������������������������������������������������������������������������������������������������������������������������ē����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wqqnkM.*@bnpqrlkgjrvwtrsqqpppoornlmnmlnpmlnnnnoonorsrvyxyz{{~���������������ɶ��������������������������������������������������������������������������������������������������������������������~���������������������~���~}��~~~���������������������������������������������������������������������~��������~���~~�~|~}{|{|||}}|~}|{zzwwyz{}~����������������������������������������������������������������������~~}|||}~~�������������������������������������������������������������������������������������������������������������������������������������������~~~~~|||{wusroqqqoqnrokdZF8<AEF@@CMb}������������whhr}��������������������������������������������������������������������������������������������������������������������������������������ő����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ympmiL./Ccmnqrolintxvtturqpnnppmklmmlnnmmlmnkonnmnorrwyxz~|y���������������ɳ��������������������������������������������������������������������������������������������������������������~���������������������������~~������~������������������������������������������������������������������������������������������������������~����~}~�~��}|~}�~�z|{{}|}}}|{~}|}����~~}}||}~������������������~}~}|}|{{}}}{}~����~����������������������������������������������������������������������������������������������������������������������������������������}|}}{{xxvwwwxwsuutomdVLGEJMONMLSa|������������uehr~��������������������������������������������������������������������������������������������������������������������������������������Ŗ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xnpnhJ/1Ecnnrtqnjnuvqssprqmmnoolkkmkkonlmnlmnopoonprruvwz{yy}���������������Ʋ������������������������������������������������������������������������������������������������������������������������������������������������~~������������������������������������������������������������������������������������������������������������������������������������}{�~}}~}~}|zz|{|~{z|yz{{zzzzzzywwyvw|x{z{~|z||z{{|{{z{||{{zyxyxxz{{z|}~�}|~~�������������������������������������������������������������������������������������������������������������������������������������������~}|{yyz{yxxwtusnk^UNLMRPPONNPi�������������rhlo}��������������������������������������������������������������������������������������������������������������������������������������ė����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������trrqjJ10Edkmrsnllotsnsspponnomlmmmkklnilmpmlmolnqorstuxz||{}{���������������ǲ��������������������������������������������������������������������������������������������������������������������������������������������������������~���������������������������������������������������������������������������������������������������������}������������������}�~~�������~}}~�~}~�~|{{z{{{|}z||yyyyzzx|vyxwxzyyxwywyyxwwwstsrrsvuxwwwywvvyyx{||||~||}z}�����������������������������������������������������������������������������������������������������������������������������������}{{{{|{yzxwtoje\RMLMONLMNOYo�������������sgmt���������������������������������������������������������������������������������������������������������������������������������������×����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ytqpiI31Gbnosrphjqursrqqqqonmllpomlmnljmkkklnnmnoprssxz|zz{}���������������Ǳ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}���}}~��}|}z}~~}}~~|{}||~|}{y{{z{{y{zz{y||yxwwxwxyvtuuxssvvttsvvuurtyxyvssqvxz{xzzyz{{|}}~}�~��������������������������������������������������������������������������������������������������������������~|{||{yxwvvtnhaXNMQPOOONM]kv�������������rglu��������������������������������������������������������������������������������������������������������������������������������������Ô����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wtrohH3.Iblqtqolelsttqmprononjknnklmlimmjlmnmnoomqsssxz{y|{|}���������������ű���������������������������������������������������������������������������������������������������������������������~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~~�~���{~���}{}~}~||}{~|}�}|{{|z}}zzz|}}}|z{{w{yyzzzywxyyzxvutywsvuwxtutstrnsssvwwuuvuvvuvvvvvzzyx}~�����������{{zzyy{{|}~}}�}|}}}�����������������������������������������������������������������������~|~~}|{wwywuoic]VPOSPPOMN]it|�������������qeov���������������������������������������������������������������������������������������������������������������������������������������ĕ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zqpphI/0L`mqtsrnkpsttqoqpnoqnklmlllljhkiklmolpqppqrrqxzz{}{|����������������²����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~����}~����~�}~~}~~{|}|~~|}~||{z}|zz{|}{|w{{ywyyyzyzyyzy{yyyutxwuvwxxywtuvtttsuvwwuvvuxxwxwyz{||z~|}~~�����������zonnnommpponqstvupnmqpnqqrtsuuvvyyz{|}~������������������������������������������������������~~~}|zzwwxwslfc\SPUWTTOU]ht|�������������pfoy����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yz�������������wnpoiJ.4Lblpsrokhnrropspnlpokkmlklkkkjkhlllonoporqoprvyx{{z}����������������î������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�~��~�|�|{}�|}~}}{|~~}~|~}|~~{{yz||{{y}{xzzy{{zyxyx{{yyyy|ywwvvwtuusuuuwvtvxywwwwxxyxzz{}~}|}��������������qiecbddeg`]dikmp^TXUPFPY_bb_Z[afekqsvx{{z{{||~~~}~~�����������������������������������������~}|{{yyywwtqic`ZRQTRQSSbgmu~�������������njoy���������������������������������������������������������������������������������������������������������������������������������������Ɠ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������y{~�������������vqurjC.5Mfmrrsplisvupqqpopsomnmkkliklmkjkmmonopprsopsvxz}|y����������������Į����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~|}��~|~�~}}|}}||~}}{{}{}||{zz{zz{{zzxzxyyxyzywyxzxxvuvttuwwyywyywvwxvy}|{|}~~�����������������rihfgfdgd\]bglqoXMPOE:GX][RRLFKUafmqswy||}}��������������������|~���������������������~}}zyxwwvuqmgb\UPPOQQ^gjksx|�������������oknz���������������������������������������������������������������������������������������������������������������������������������������Ė������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}{|�������������vqrrhA.5Penqsspkiptussrppqpmmlklnkjkmljllklnonopoqqutv{||{|�����������������Ĭ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��~~�~}}~~}}~}~}{|}|}~~}z|{z{{||zyzxzxy{zwxy|{wzwtxvxvuvvxwwywuxxvy||}}~�������������������qjhhhgeed`_ejlpo\PNMGAJW[ZVQJHMVbiopswz}�����������������������������������������������������������~|{|zwrlh`XROQNTahjknswx}������������|mgp{���������������������������������������������������������������������������������������������������������������������������������������ĕ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|{�������������wsnlhB,3Sfnrstqmipsturqrronnplkmnjknnjjlmjkmmpoplorsuwz|{|~~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}~~~}~���~~~~}}}~|||}~~~~��~{|||zy|{{|yy{y{zxzwxzyyzxwuvywvwxzyxyxwyxxx{{}}~�������������������pkihhghih`[flmqm]SPRFANY[[XQKIMTaiosvy~}����������������������������������������������������������������~yrkb[VUS[agjjkptv{�������������}iir|���������������������������������������������������������������������������������������������������������������������������������������Õ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wropd=,4Ulopsupkjpvxwsqrropomljmlklonkljkklnlooppprqtzzz{{~|}��������������;������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}~~~~���~�~�|��|}�||~~�~}|z||{{{{z}zzyy|xyzwzzyzxvwvxxwxuyyzzyxxzyxz{}}~�������������������smjkjihkjb]gnosmWSQRF@R\]\ZQIINVahptxz|�������������������������������������������������������������������wqjg`]]elnojqsw{��������������|gks}���������������������������������������������������������������������������������������������������������������������������������������Ô����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������znooh@-6Soqqvtokhtyyxvutqnppnnnmlkklmmmjjmmnnooprrqqvzwx{|}~���������������ξ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����~}~���~����|~��~�}~~~~}||}|{}{{|{z{z{{{{xyxx{zxzzwxxxyvz{z{zzxxzy{|{~��������������������unmmkikkib`gprwqTPVSFES\^ZXPKKNYcjqwy}��������������������������������������������������������������������yvrpjioyyxvxyz~���������������zknu~���������������������������������������������������������������������������������������������������������������������������������������Ė����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yqnnfB)5Ukprusoigtzxwwutpprnmomlpmklnomllnnmpqnqspsrxyx||{|���������������ͽ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�}��|~��~�~��~}~z}�~}||~{{~{|}|zyz{z|{{{z{zyzzyxwxywwyy|{|zy{zz||}����������������������tlnmkijkjd`gmqunZPRSDCU\^_YQKKR[bmtwv}��������������������������������������������������������������������}yusrtxyz|~|~�����������������xknu���������������������������������������������������������������������������������������������������������������������������������������Ò����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ysplcC,9Yiquuqokluzxxxuspprnlnmmrojlommmmlmopptrqnrvvyy}}}~���������������˼�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~�~�����~|}~}}~|||{yzz{|{~zz|{zww{xyyxwwz{|z{zyzz{}}~���������������������ronnjkmnmfcinqwmYURSHFU^a]YPKKS\dnuvz���������������������������������������������������������������������{wvwz|}~~��������������������xmnu����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������yrqsfA-;Xmputpmkjuzzzxtsprrpnnnmllloommnnkopppqqoqsvuwz}~~���������������˻���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�������~��|~���~~�~}�}~}~}{y{|{}z{|{{xzyz{zvyywvwy{{{yy{zy{{}~~���������������������opnmkmnlifdinrvjWVSQAFU^``ZNKNU\epxy{���������������������������������������������������������������������}zz{|�����������������������ullu����������������������������������������������������������������������������������������������������������������������������������������Ė������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z|�������}vsqsgD/:Woquusnlovy{zxttuqoppnnnonmonlonnnpmmpoppqtvux||}}}���������������ϼ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�������~��~�������~}}|}~}|}{{{||||{|{y{{yzyzyvxxx{{{yx{|zz|}����������������������}opnmopnnlfbjptxm\WVQGIX^`b[QLNU^ipyz|����������������������������������������������������������������������}}}}������������������������vjny����������������������������������������������������������������������������������������������������������������������������������������Ô������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������v|�������vtpqcA0=[oqtuqkinyz|yttuspptnloopnoplmpollpmmqqorsssvz{{}|}���������������ͺ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~������������~~����~~~}~}}|�~~|}~|}}||{z{x{{||{zzy{xw{xyyz|xz|z{||~~���������������������zmpnnnnlnlfenswxkZXWQDK\abcYPJNT`ktxz}�����������������������������������������������������������������������~}}�������������������������thqy����������������������������������������������������������������������������������������������������������������������������������������Ð��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}wtqnbE0>\nqvuqlhmw|{wvyvrqtrllqponpononmkorloqrpqsrqy|{{}y~���������������ι����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~����~}�~��~~~}|}~~~|||{|}{{{{|{z{{|}{{x{zw{yz|y{|yzyy}~}}|���������������������qoonnoppqnhfnswymYYWVHO_baaYMIOWblswy�����������������������������������������������������������������������~~�������������������������pjpy����������������������������������������������������������������������������������������������������������������������������������������đ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|wtsnfJ2?_rqttqlgkv|yx|{tqvupopooonoooolkmnnnoopqrrssy{z|}~}����������������̸�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����~��~��}��}~�}�~��~}}|~|z||}}||||}|{|zz||}zyyxz{zzzzz{{|zzz|}}}~�������������������oempomooqrnggorxzk[YWSEO^aaa[OINUbmux{�����������������������������������������������������������������������~��������������������������slpy����������������������������������������������������������������������������������������������������������������������������������������Ô���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yvtpngJ6A_qqtuolils{}|{wsuwqmrpllnnoqqokkpnmppoqssrouyz{|}~����������������̵����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�����~����������~~�~~~�~~~}|~~~~}}}}||}|{z|{{yyxx{z{{{{y{z{{|{}}}~~������������������nQhpppppponmgipsx{l^[XSGQaba`YOKMWdpvz}������������������������������������������������������������������������}��������������������������pks|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ywuqocF1A`orutnld^gsxyxuvxuqopmmonknqommqomnpopssqrquwy{{{|}���������������̴���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�������~����~����}��~|��~|}�~|{|}}||~}|}}}|}{|{yxyy{{z|zz{z{zz{{|~z{}~�����������������jRYhrrqpooppoikptz{k^[YQDTabbcYMKRZcnw{�����������������������������������������������������������������������������������������||���������rhs|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{xuqoeG4@foqusmj`LShowwwwuqqrpmopnmpppnnmlmonmqrqpssuvyzz{~����������������˳���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~������~}������}��~~��}}~~}|{}|{||}}}}|~}y{|{yxz{zzwy{{{{x{z{||z||}|�����������������iVU]fqsrqopqrpjipu{xi^\ZRCT_ccbWLLRZepwz������������������������������������������������������������������������������������������~���������olr|����������������������������������������������������������������������������������������������������������������������������������������Ŕ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wvupoeI2@epqssmi]C@Rbqtxyrpsqmmppmorpmnpmlonlopooqtsuvzyz~~~����������������ɳ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�����~�}���~�������~}~�~~z{~|{||}|~~}zz{z{{x{yyzxz{wyxz{z{{xz|{}|�~�������������fQYTWhqrppoqrspkjsw|vg_][TDWaee`XNJR\fpy{������������������������������������������������������������������������������������������~��������|lkt}����������������������������������������������������������������������������������������������������������������������������������������ē���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������uuurofJ5Ddpttsnk]C=DPbovtrqpnmooooppnnmmllpmlpolorsqrxz{}z|����������������ǲ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}��~�~��~���}{~{z|||||}}||||}|{{z{{yzxxxzzyzzxz{yzzz||}}~���������gYWTLSgrrsqqorsqppuwyug_^[RFZdfgbYNKS^gs{~��������������������������������������������������������������������������������������������������}nnt}����������������������������������������������������������������������������������������������������������������������������������������Ó���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yvtrncE2Heortqpl`E=?DOflnrsnmmpmopponpmmmmommqpoqqsrtyz|}|}~����������������Ʋ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����������}~~}~}}~}}~{{{|z{|{zxwxyx{yy{yzy{{z{{{{{|~~~~��������fVVPIHVkrrsoqqsqqppwx{we__\RJYdgheVKMT]ht{}��������������������������������������������������������������������������������������������������{mnt~����������������������������������������������������������������������������������������������������������������������������������������Ó���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wvsqofC5Genqrsok]C=<=AO\hqomnmlklpmlmqnnnmlmmnonoqsuvxz{{y|����������������Ű�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������������������������������������������������������������������������������������������������������������������������������������������}�����~|������������������~}|~~~~~~~}}}�~~}|z|||{yyyxwxz{zyz{{wyzyz{z}|{|~�~�������fZVSJCFVjqrqqqorsrprwy}wb_^^QK[dhhdWKLU^jtz~����������������������������������������������������������������������������������������������������zmou����������������������������������������������������������������������������������������������������������������������������������������Ò���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������vtsrplD3HenrssnjY?;<<?CL`imnmkkmnnkmmommnmlmkmponorutuxzxy|~���������������ñ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|��~}�~~~}~}{|||{z|}}}yz}|z|{zz{zxyww{|zyv{|yxvxz{z{{|{}�~~����xbVWOKDBDXisppolnqtspqvy}vb_]]OKYehfbWLOX`kw|�����������������������������������������������������������������������������������������������������ylow��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wutqkeA4KjosusniZ?78<=>CM[bjnmnnollpmmmmmkklmlonnpqqrwxyy{|~����������������¯�������������������������������������������������������������������������������������������������������������������������������������������������~�����~���}����������������������������������������������������������������������������������������������������������������������������������������������������������~������������������~��~��|}}~~}}}{{||||{}|}|{|{zzzz{yxywxxxwxy|{xyz{yzzyy}||~}~�}paXXRFDB@DXorqnkflqsrrqw{�uaa`\PLZffccULNT_lv|�����������������������������������������������������������������������������������������������������ulnu�����������������������������������������������������������������������������������������������������������������������������������������Ð�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}tvsnkb=5JiosuqkiV@77;;=?CKXejknnnjmolllnkjjkkkllopqqrvwy{~|}����������������������������������������������������������������������������������������������������������������������������������������������������������������~���~�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����~�~}~�~~~}~}}~~}}}}{{|{|}|x}|{|x{zzxwwxxzyz{zyyzzzwyyyz{{{}~}~~}yo^WSQB@B@AGYlrrpkflqspqswy~qb`_^RP]ghecSKOVbmx}�����������������������������������������������������������������������������������������������������umpv�����������������������������������������������������������������������������������������������������������������������������������������ď��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�����������������������}strnnc?8MfprrohjX<67:<=>ACMYcknnollmknlljjjjlkmmnnoosuvw{|z}���������������ο��������������������������������������������������������������������������������������������������������������������������������������������������|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~}~}~~}}y|||}z|}|z|zy{{zyyxwxyzzz{zyyvyzyy{xzzz{|}�~}}|ymaYVMC??>@CFZlssqh`hrtroou{~rc`_^NK]ghf`SIPWdny�����������������������������������������������������������������������������������������������������vnpv����������������������������������������������������������������������������������������������������������������������������������������Í�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zsrqnja;7OgosrpjgV;579;=<=@CHZdiklllmmljikklllllmllopssuuwxy~���������������̿������������������������������������������������������������������������������������������������������������������������������������������������~�}�~������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~�~��~~z~}�~}||~}y{y}{z}|yz{}{z|zwxvxyywvyzzyyxzzxyxxzyy{}�~{xr\ZWNE??>?ABD_mqqmdZjuurpqw|}qaaa[OMbffh`QIPXamz�����������������������������������������������������������������������������������������������������vkpx�����������������������������������������������������������������������������������������������������������������������������������������Ë�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���������������������}��~����������������xrrqmj_86NinpomidS;5689;<>BDFKXdgllilplhiihhklkjlmkmprpruuwz}���������������ν���������������������������������������������������������������������������������������������������������������������������������������������~�~}~�~���������������������������������������������������������������������������������������������������������������������������������������������������������������~�������~}��}|����~}~�~~{}~|z}{z{{}|{}yzyy}x{{zyxvxywvvzxwyyzwuwwwxxxy{~~|}{vi_WVPE>;>ABB?D]qppmdbptvvstx}~o`a`[ONbgih`QMPYco{������������������������������������������������������������������������������������������������������vjqz�����������������������������������������������������������������������������������������������������������������������������������������Í����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~|��}{~�~~�������������ystoml_<4QdmnplidN<677:;<?BDGFLUaijkllhghihikkjlkjjkoqprsswz{}��������������̺����������������������������������������������������������������������������������������������}�����������������������}���������������������~~~�~~���������������������������������������������������������������������������������������������������������������������������������������������������������������~~|~���~~~|}~���|~}|~~~}~~y|||{zzz{|||~|zyzzzzzyzzvvwwywxywxzyxwvwwwvwvw{||}zti`[VNE?<<BBD>;@Zmopqllssutstw}naa_ZMRafie_RKPZfrz������������������������������������������������������������������������������������������������������ulr{�����������������������������������������������������������������������������������������������������������������������������������������č������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~�~~~~}{z}x������������ynpnlj^:9QemoqmicN:778;<;=@CEFJMW_dikjghhhjjgghkjklmnpqpqsvvu}��������������ͻ�������������������������������������������������������������������������������������������������������������������������������������������~~~~~~��~}���~���������������~���������������������������������������������������������������������������������������������������������������������������������������~~~~�~|~}||}~�~|{}{|}{|~|~}}}{|{zzzxzxx|yywwzxyyzyuvvxxwwywwxwxwxwttvuwvvz|{{vi]ZVNC>==>A@?:7A^mpoqprrtvrsty~}m_aaXMVbihg^QLO[gs{������������������������������������������������������������������������������������������������������qjq{�����������������������������������������������������������������������������������������������������������������������������������������č�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{��������������~����������������~���~~}~}}|{{|{}|�����������wopoli\;8UgnpomjaL:9:9:;=>=BCFLNOT\ekjjjhehgffghhjllmnnmorssv~��������������λ����������������������������������������������������������������������������������������������������������������������������������������~}~}}~~~~}���}�����������~�������������������������������������������������������}��������������������������������������������������������������~���������~��~��}~}~~�}|}|~~~~~~��}{|~}}{z}|||{|{z{{{y{yxyzzyyvxxyywywwxwwvuwyvvvuvvvvuuuuvvwyz{xgZZYMC@<>?AA?:54@fqnqqpqrtwutwz~~mca`XKUghgd]QKR\gt|�����������������������������������������������������������������������������������������������������mkr}����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���~~{|�|{|||z}{z{|{{�����������wmppmj^36Vgmppjf_P:8;:9::>?ADFHJNRX_dfijgfhfeghhgjjknmlmooosv|��������������η�����������������������������������������������������������������������������������������������������������������~}~��������������������~}|~}}}~}~|}�~{�~~����������~~~������������~���������������������������������������������������������������������������������������������������������������������~��~}~�}~}|}|{}}~}|}}}}~~}|}{{yz{zyzyyyxxyxxwxxxyxvwvxwuvutvuuttvvvustttuttvwxtg[ZTND>?>@>=?:68CUpppqonqstvvvy{|lba_TGWfgfe[QMS]gw|�����������������������������������������������������������������������������������������������������nkt|�����������������������������������������������������������������������������������������������������������������������������������������ō������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������~��~�����������������}|~�~{{{{{y}xx|zy{������������umoplm[59UhlmmifaK7799;;;>ACFFJKNPSW\dgfdghfffghgjhinkknqoptu{��������������̵���������������������������������������������������������������������������������������������������������������{}��~~~�����~���~��~�~}|}|}||~~}{}�~~�������������~~~���������������������������������������������������������~�����������������������������������������������������}��������~~}���}��}}|}~||}}{|~}|}|||{{|{zzzy||{yxxxvxwxxwwyxwxyyvtvwtuuvvwvvttsusrtrsuuvwxyj^ZXQJC>?<=;;:8:G[lnpnqppprtuttvz|ia`^SJXegfc\PMS^iv|�����������������������������������������������������������������������������{�����������������������}kit~�����������������������������������������������������������������������������������������������������������������������������������������ŋ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��}����~}~||~~~�}�����{}�}{~~~}|{z{zxvz|wxz~��������qlnnjiV77SgklkiibJ=988;<=?BCDEJMLMPRW]acdedaegfgggfilkjnomnrpw��������������ζ������������������������������������������������������������������������������������������~��~���������}�����}��~�~�~�����~���~}||}z{|{{~~}|{{}y|~~��~����}��~|����~����������������������������������~������������~�����������������������������������������������~����~�~}~}}~~~~}�~~{}}~}{|�|||}}z|}{||{zyyzyy|{wwxwuuuvvvuwwuwwwvuvvuusvvuvvwsqssruursuuvtqc\XVSNHB?<;;877<I_nmoonpnnoqstuuvy}zj``]RJZiffc[NOV]jt}����������������������������������������������������������������������������{x�����������������������~jkt}�����������������������������������������������������������������������������������������������������������������������������������������Ċ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~x~���}mtv|�������������������������������������������������������������������~��~��������~��~��}~~��~��~~}~~�������~~{x{|z|~{yw{zxw{yvwz~���������qnmmigV78Yhjklih^MC>7::<?@BCCEHKLMPRSW\chdeffgdfighjkjjmllpqnz��������������˵������������������������������������������������������������������������������������������~}���~������~��~��~}}~~}{~���~�~~�~�~~~}||}}{|z{{}}{}}|{{{}~�~~����~�{�����~}���~��~����~~��������������������~}}}���~~~������������������������������������|��~��~��~~~}�����}����~|}}}}}||||}}|~{||z|{z|z{|yz{{xy{zxxzzxwwwuvwvvtsvvuttwttvtutsyrttttrqrosususrrtofZXTRQLH>=>;;87:Fcrolmolonopssrssuy}ymda\QKXgiid[OLTaks}���������������������������������������������������������������������������wr}����������������������|nmt}�����������������������������������������������������������������������������������������������������������������������������������������ċ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yXY]colM<CIRSX`osv|������������������������������������������������������������~~��}��}��}�~|}�{�}~�|}~}�{||~��}~�����~||zy{{x|{xvwywwvxusux~����}~~��mjlkieS58Wdfijhea\XK;::;?@AABGHLMLOQRSW^baceedegfghhhgkklmnnpz��������������̴������������������������������������������������������������������������������������������~~}}��~�~��������}��~}�~{z��}��}~�~�}{}|}~}z{||||}}~{y~z|�~|��}~����~��}|~~��~����~{������������}��������}~}~~~|��~}{}}~~||}}~}~}������~���������������������~��~�~}�}~���~~~}���~}��}}~~{}|}}}|||{{{|{{{z{{||{z{{{zxzz{zxzyxyxxxxwxwwvuvtuvwvtrvuusrrsotrooqsrqurssrspl`[YVONPKD=;<>=78Fgqrnjkmlnpnqstustuz}xka]`OJ[eihg\NNVaju}���������������������������������������������������������������������������~sk�����������������������|hmv�����������������������������������������������������������������������������������������������������������������������������������������É�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pRKKHJD<=?=<.(*0<J]ry|������������������������������������������������������������|����~��}|�}}��}|�|z~~{~{{}}{{~}{~~}}~}}��~{yxyxux|wuvwwvwxusruw}�����}|��gfjkhbO2=Wcfikhe`cdYI?;;>?ABDIIILLMONRUZ[^ceabfdegfgikljklmor{��������������ȶ����������������������������������������������������������������������������������������~}~|}�~��|��~������~~��~{�}~~�~|~~}~~}~|||z{|zy{|||~{z}}{|{~}~~�������~|~}~~}~���~����~��|~~~��~���|{|~�|�~}�~~}}|z}~}}z~}}~|~���}����~�������~�����~~~������}~����~~}}~}}||~~~~}}}|zz|~}}}z|{zzz{{z{|z{{wz{z{yyzxxxwywwywwvuwvvvtvvswvvvsstsvsrsprsppoprqprprrrvl`[VTQRNKE@=>?@<<Jivupklknopqqrsstssvy|wb`^^NP^giheYKOW`ku~���������������������������������������������������������������������������}uz�����������������������yenv�����������������������������������������������������������������������������������������������������������������������������������������ć�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������kQIGFD@?><<=932/-.-;Qgv{{}�����������������������������������}~�������~~���~{~��}~��~�|{�~~}~~{}�}{|�}|~~{y{||{|�{z{}{{{}||}{z|}wwwyvu{xtsxwtrwwrrpqqz�~}~~}��ifjhecM48Vbiiigc`bgd[H>;;=ACEGIJJJKOOPRTV[`bdcecdgffiljijilnpx��������������ǲ��������������������������������������������������������������������������������������}}|}}~�}|������z||}}||~~|}}|}||}~~~}|{|}}}{z||z|zvy|yy}}|~}{|~}|}}|~���}~{|~}}~~�~~�~}|}~~~�~~~~~~z}~|~�~{yz|~~|}~��||}}}~|z~|�~�~�~~�~�~��}~������~~�����}}~}��~~~}�~��~~�}~z~}{||{~|}~|||||{~}}|{yyyyzy{{y{zz|{xyzzzyzxwyxvuwxvuvstutvvsuuvwsusrtrttqsopsppooqpnpoqqpsj_\WRPNMJB??=??;;Nlwspommmknopqprtssruy�xeb_[LJ]gjfbYMOV`ju}�����������������������������������������������������������������������������������������������������xnov������������������������������������������������������������������������������������������������������������������������������������������Ĉ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|gNFECB@?<:=?AC@>9643014CVdox~}���������������������������������������{���~~�~|��|{�{|~~zx}}{~}zz~~y||z{�|zy||{|}~~}}|z{|||}�}{}~|xwwvvz{rruvxuvutsriYey~|}~}~|{��zgghfd`K6=Vehifec^]ggbXL@=;ACDFJNGMMNPQPQSV[^bdedegdeiiijkklpow��������������ǰ������������������������������������������������������������������������������������~��~~}}|~~~��}��~~}}~|z|~||}~~~~{|}}}~|{}{|}}{{|zz|{y{{y{{z|~{y|}}||{|}~�����||}}{|}~~�~}}~~��~~~}~�}�~}�}|wz{|~}~~}|~}}~{{|{|~{�{{~|�||}�}~�~�~~�}~�|~�}}~��~~}}��}~�~}}~}�}}|||}}}{||y{{||{{||}{||yxyyx{yzywyyxzyzxwyxvyxxxvwywwwtrsuxusuutsssrtuttqpqornoqoponopooopka\WSRRNJB>@?===?Nkzurppmmkjnnooqssrpqtw{sfb_ZLJ]eggbWMRWalu~�����������������������������������������������������������������������������������������������������xmox������������������������������������������������������������������������������������������������������������������������������������������Ć����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~eJDDBBA?===<BBCC?<<96666348CL]otu}�z~��~}���������~�������~���~~��|�����}��}|~�~{|�~~||{|{|y{}}|~|{x{{wvz|zyz||z}}{z||wwzzxz{{wy~|xyyvsuwurtvtutvsqn`XKf{zy{{y|zz��ydege`]K0:Xbdheec^_ggfbXM@<?@CGHHINMNOQRSSTV\^`aceccdfbfjkfhlly��������������Ƭ������������������������������������������������������������������������������������}|}|}|}�~}��~�}��}~}|}}|{|{{|}}�}|}}}~|{}}}}}}||zz{{{z{yyzz{}{xyyy{{{|~}|}~�}|}~y}~y{|}~}|{|{|}{{~}|~~{|||}}}}}}~|}{{}~|{{|}�}}~~{}~}}}}}|||}{|}|~~~~~~~�{~�}����|~~�}}|z~}~|}~|}~~~}|~}}||~}}~|{{||}}|{{~|{|{yyz||||||||z|{xyzv|xxzwwzxzzwywvxuvzvwwuvvwytutrtvvutttqrrorstsqsrqqnoqnmnommonoh^ZVRPOOLE>@?<;>@Ogyuutppmlkkmlnnnronnpswwoe`_\NJadef`TOSW`mu~�����������������������������������������������������������������������������������������������������tjmx������������������������������������������������������������������������������������������������������������������������������������������Ä���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mccjr|��������������}{z��������������������������������������������aICAA@>>=?@@A@A@=<<:8:;<99997636AMYdpuwy������������~~��{���~���{}��~~�~|z~y|~}z}}z}~}||~}|||||}}{zy{zzwyyzzzz{|||{yyzzxwxyxz{yx{~ytwyuqtuuuuutuuspl`OTNhzzxw{wyyx�t`dfc_^J1=Ydbggdc\bfeff_]KAA>AGJKKNMMNRQTTTTVYZ``hefeeeghgejjm}��������������Ĭ�������������������������������������������������������������������������������������~~|}}|}~|~~}}~��~}}|{~{z}{y|}}|}}}}~}y|{y||z{xy{{yz{zy{yz{|wwyx{{{{}|~}~{|{{{}~x|~�}{{||}~~}}z|}}{~||||~|}{z~}|{z~}|~~~�z|~|zz{||||~{{|||}}{~~~}}||~~|~�~�}|}}}~~}{{{}}~}~~~}~�~}}{~~}|~||}~{y{}}z|{z~}{~{zzz{||{zz{zxzxwyz{{zyvvxyx|ywwuwvswvtvuvuuvvvvrsuvutssssrnqrrtpmmnrnnopnoopnoomj]\YTPPOOF>=?;;=@Kduvususrnklmonmnqrrqqqrxxod_^ZNM^edd^TPRWbnv|�����������������������������������������������������������������������������������������������������rjnx������������������������������������������������������������������������������������������������������������������������������������������Ņ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{\KNOML[cow��������������~xy}z��������������������������������������^JCAA?>?=?@A@@>>=<<<;<=><;;:;7766639DLZotx||}��������~��|~�����~x{}|}~}zy}|y||z|zy{~}zx}}zw|}{z{}xvy{xxzzzwyzxyz|yuxzwywyywy{wvwwyxvwwtuvssrttttrog^TWUVguyvrsxyxy��sebec`_I0?X`cffda^aceed`c_NC=>CIJKKKOQRTQSTROUX]bbcfdegghfgkkl{��������������ĩ������������������������������������������������������������������������������~��~||}|}{{|~{}{|~}}�~~~}{{|~{}}||zz}}}{{z{}�{{}yy|}z{yu{zyz{yxxxuwzyyzzz{zz||z}{|}{{{y{||{}~}|}|{{}|||}~}~~}|~z}}{}|z|}}}}|{}|}|{}|~}|||{{{z{{{{zz{{{z|||{}|}}{}}y||}~{}}}~||}|}~}~|{||}}y}�|}{{~{|}{~{|}||}{}~z}|{{|{}}||}{zzz||{zwxyxxwvwvx{ywxuwyxwxwwvuxxtwuvvuvtvvuvtqrsstpqooqmpoosrpnnmnjnppnnmnmnkfZWUPGHNLHB><;;?BH`psvwvsrookjkkkmllmroprqwwl`__[KMaeee_SKPYbmv{���������������������������������������������������������������������������������������������������~phmx������������������������������������������������������������������������������������������������������������������������������������������ń��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xZLMLILOOMU\ajv��}���������}wsy|����������������������������������{\B@@@@>@?>>@@@A?>><<;;=<;:89977:;9995149<HUfnrx}~}��}�~}~��}���||~}|�~}|}z|~{{~~}}~~|{}~{z}~zzz{yxwzxtuzzusxyvwzyttywvwxxvwxwquxyxwywturrrrttsrtse[VVUSSbswqkqvxww��rbbca`\H0=Z]aedaa^]cgebae`ZKD>@FIJLMNOOQOPSPPSTV[^bdcdgfffhghk{��������������Ī������������������������������������������������������������������������������}~~�}|�}xy|{y~}z}}z}~~~|}~|}{{|{{|zw{}||{{|~}{||y{zy{{yx{yxy{zzxzyyyyzzxzzyy{xy}|z}|{{{z|{z|}{|~|{||~|||}||~}{}|{|}|||z{|yz{z{|zz|{y{zz{|{{yz{z||zy|{z|y{{zy{{{}|{~|{}|~~|{||{}{{{{}}{|||}}|}||~{||||{z{{|}{|zy|{z~z{}{{|z{{{{zyzyz{{y||zzzyyxwwvyxvwvuwxvvwxvttxxutvttvusvsstsstrqoqqpqqppmoqqqlnmkknopkklkkjc^\YPCBFIHHA;<=ADL[fotwvurrpnlkjkiklmorooqquth`]^ZHNehdg`UOS[dovz�����������������������������������������������������������������������������������������������������qhoz������������������������������������������������������������������������������������������������������������������������������������������Ć�������������������������������������������vnq}������������������������������}����������������������������������������������������}�����������������������������������������������������������������������yz{z~����������������������������������������������������������tQKIHGGKMKMNNNOS^_djpvz~�������|sqqqyzy|����������������������������wWD??><>??<;>??@==<=;;=<<979998:==:;899;<==<=?IQ`mruv{xy}|xx~~}z~}{{|}|z|�}{|~~yy}{w{~{{}~zwz}zw{|{vwzzyz|zvvyywuwwvvxzwtwxvwwwwvvwwuvuuuvwrqsqjnstnplncUWTTTRLcttmdluvuxq^aa`\\D0>Z]add``[]deaaba_aWMBACCGKMONPNNNSQRQPRW]`bceffcffgkn}��������������¥��������������������������������������������������������������������������~�|}��{{||||}}||{}}{}|y|~}||{xz{yz{wy{x{{yz{zy{|{z|xv{xwzywyzyxzwwxwxyxxxxz{|{{}}{�~||{zz||y~z{{{{~}}~|z|{y{}{|}zz~}}|zy{{zzyyz{{||zyyz{{y{{y|{xxxyzyz{zxzywyzy|{xzzyzyz{{|~{{{zx{{|}{{{y}}zz{z|~|z|z||}|yyz{|{xyy{{|w{}yz{xyxy|zzxwyzxyzuwxx|zuvuwvuuvvuuuwrwvsstttrrtstuttrorprpqsonmoonlnmmnpqollkkkillklkihc[YVOA;AHJIF>;<AKUegfmsvuqqqrpkiiljlmmpnppnpusi`]\UEPdfde_PNSZcmv|����������������������������������������������������������������������������������������������������}nkrz������������������������������������������������������������������������������������������������������������������������������������������Ç������������������������������������������{aHQ]gw������������������������������~�������������������������������������������������yio~~������������znsxwtu{����������������������������������������������rghcporxy~�������������������������������������������������������mOHKHFGJIIGGJIGBFIIOVZ]elu�������wsmdnux}�������������������������rRA>=>?><<;>>@@?<==;;;<<;99;9:::;>@?=<<=??>?BA@=?DU`_ehefegoy|yz}|y|{{yz|~}||||{y{yw|{vw{}zvx{xw{{uvxyxwz{wsw{xtuzwuvzwuwxwvyzvtvuvuvtttzwtpvrpmqttrrk]SXUTTSOMfrtk]luvvxn_`c^ZTB4?X^abc`_X^bdabc`^`^WPIEDGHJLMMMPOSPQQQPRWY^cfdcegfhll{�����������������������������������������������������������������������������������������~}}~|}~zz~{|}|{z{{}~}z||{{|{}~}|}}zxzxz||z}|yy}|yzzy{zwxzx{{yzzywzxvvvvwzywvvxzzw||yz�|||{yzz{|wy}|{}|z{|zy{y{|{z||||z|}{{z{{{yxz{{}||{|{}~x{{yz{yyyz{|yzzzyxwxzy{}zy{{{{{}{z}}{yyx{zy~}zz||}zxzxyzz{{xy{z||z}zw{ywy{yzy{zyyyxz|wz|yywwywxzxxvwwwuuwvwwvuruvvvvuvuuttusrttttqstrrqpspoooopnollmloooqnlmikjhlilllgb\XVL@:=BLKIE@@DIXcfcfluvvsrrrphhiikkmnopqoppvxha]\VKR`ghd\RORZcmu{����������������������������������������������������������������������������������������������������~qks|������������������������������������������������������������������������������������������������������������������������������������������È������~�����������������������������������zeQKKT^mw����������������������������xxz�����������������������������������������������u\bdku�����������|TCJVXUS_i{w���������������������}fcp~��������������������tqsnqrwuy����������������������������������������������������jOEDCFFFEFEDFECFFFEC@CFGO[emrx|������k9.8HWdmmv���������������������qRA<===>;<<=??>>>==;<:9;::99<===>??AB>>A@=?@A?>=@OioM:.+45=_uy|~}{{~{{y{}y{}{yz{yzz||yx{}{xwyyxzzwvxyyvwxwvvxxvtuvtvuvvtuutuwuqssrqqusstxtqpsqopoqpmhXOUTRSSRKIiqreWkruuu~}l^``[WT@3DV^_`cb^U^ca]aa_^`b^YQICDGIIJLMOLQQQQPOLPRY^befgggijk{��������������������������������������������������������������������������������~����������~~|~~|{}|{|}yy|zxzzy}{{y�{x{|z|}|{}{xwxxyzz{{uxzzyyyvzyvvzyvzxwxyxxywxzxxxwwywwz|zyzzy{|yzzz{zy{|yy{|{|yzz{zzxy}|zz{{}|z|zx{ywyywzzyx|yzzzzzvwzyz{yy{wwzzzyvx{xvvwy}yvxxz{xz}{z|zwzyyzxy{xzzzyzzzxvxxyzzx{zzzxxywy{xyyyzwx{wuxww{xxzwwywwxxxvwwuvuvvtuwxvvutxtuuttutustsppsrssrssqooqnmnnnpklmnljkolmnklkkljjlhljibTVSL>;;?FJKIHEGMYbgedfouutrprrpihijkilmnnoppotvf^][WHPaghc[RMRYeowx}���������������������������������������������������������������������������������������������������}oks}������������������������������������������������������������������������������������������������������������������������������������������ć������������������������������������������{cQJHHNYfv������������������������������{x}��������������������������������������������q`^^]`kr{~�������tQAENVZ\ZUT_y�~������������������kXV[\dqw�������������������������������������������{|�������������������������������rnkVFCCBCDDDEDDCCBBCD?;;956;>KU`cfmt|���mZOF=8;<I]lxz����������������mLB::<<=;:;<==>===;;:9;;989:;=??>?AA@??BA?BCDC@<;S��9*%'-/0Ywy}}{z}}yzz}zu{}zuxzxx|~wvxyyxy{yvxzyvyzxuvzwuuwwtuvvutuvusstuvvupqtssrqqpruusqrqnlmoomgXOVTRRQSQKNeooe`osrrt||i[^_YTR=1BUZ\^a_]Z_babc^^ba^__[QHEDGIJJKMLQOPQOOOOOSXaejifikki������������������������������������������������������������������������������������������~�~z~~|{{zwzzzz{|z|{yz{yz|{{{{y{{yyyvvwxy{zx{{wwzxxzywxwvxwuvzwvyxwwvtwuuwxwwxywzyy{{zz|{zyxy{xy{yy}zz{zzyy{z{xyxy{yzz|wy{wwzyyzxyyzxywxxxwzxyyyywwwxxxzwsvxtvvwzztvwwxxy~ywzyvvvxzww{wx{zyzxxyuwwuyyxyyyzxyzwwxwwyy|zwxyuvwuvywxzvuvx|yxwvvusvvuttuvtttsvwsstursusrtroprssrrrsrnopnommnonjlmkjlnkljjlhjjjjijhfaYYVKA>=?DILJJLJMVaddfdgnuwspouuqkiijkfhkmoooonusf^^ZPJTafcbZPLQ[dmvy|���������������������������������������������������������������������������������������������������}mks|������������������������������������������������������������������������������������������������������������������������������������������Ą������������������������������������������zaOJIDEKR]l~�����������������������������}sp}��������������~��������������������������qb\]][\`doy|}����gVSRWYZ[ZZZUW]ls}�{~������������}n_\VPRWUThu��������������������������¹���������������pmtz|������������������������xUBDGDDCCB@@AAB@A?AAA=435443786;DDFOW]dimttuhTA8:=HRY`lw}{��������������nH;:<<<===>=<?>=<;:9:8989999;?@@?@A>?A@CCBDFEE?8;X��<+#&017Yx{||||}{uxz{y{||ywzysvzwtuvwvwxzxuyxutvxuuxxtuvwsrvvutuutswusrvxrorsrpoqqrvvqqqqoomnnh]QOSSPQOSRLNNcmllnorrqpz{hWZYYVQ>2?VZ\]]\\XX[aa^]_`_[_^\XTKEBBHJJKJNLOROPMMNJMY`ghfihhm��������������˾���������������������������������������������������|����}����}|}��~~~~~}z~}ww}|zy{yyzxwxzy{|yxzvvz{y|}zz}xy{ytvxxy|{zyyy{{vzywwwvxyuuvxuyyvwvruwvuuuvwyxvyuxzyzzwv{zxywvyyyzztyzxzzyzyyzwvwxuw{zxyzwwyyyyxzzxyyuxxwxzuwwxvvuuvwxyvxvvuwuxyvvvvwyvxzswytvwvxxwz{wyyuwwxxxuyvvxxxyvvxxyvuvvtvwvyuvzutvuvuuwvuxtrvusvuwxuuwuusqutsstrtrqrrrpqsprqppqprrpqopononmnlmmnlklkijkiiiiigghggedhb`ZVUK?<>>GJLLLLONVadccdchpttqqrtsqigihjjikllnonmtobZYXPJVadcaXMJRYclrx|���������������������������������������������������������������������������������������������������{lks}������������������������������������������������������������������������������������������������������������������������������������������ą������������������������������������������aNJFB@CEJWbp~����������������������������qZiwz���������xvsuw{������������������������s`]]^YYX\bckqvyrbYY[[\]^[[ZZYZVVV\_`hoqsux{{|~|}~{voj^WQMIFFMZgs����������������������������������������YMV`hsy����������������������YKRMHHEDC@B@A>>>?A@=822489=CCDCA>>=;8HaQCIMKC637:?FKPTPKZy������������hF99:8::;=?>;??==<=<9988889;<>?>?>@>@AAACEHFFD;36U��4(!'.18]x}{x{|{yy{zwv{|zyy{xx|{vuyzwyzzwvxzwtuwxwwvuruwuorvupqutrsutrrvupqrqmnpmotvqnpoomlmlh^TQPRSSRQOMNKNeomorpooqpxxgW[]XVO;0AVY[]]Z]UX`b_^`__Z]`^[[ZRLGCBFIHHLLNPRONPLLKOV]ehgegn��������������λ���������������������������������������������������������������|�~~�|{~~|{|{z{||yxyyyxzyzyyywvwywzzyz{yyzxwyzuvyvwxxyxvuxyxvyxywwvxzwx{vvzxxxwuywsuvtvyxvyyvxzyzzwy|wwxwy{wxyxw{yyyyxxwy|ywvxxyzyxxxyxvyywuwwvwwvywwyvtwttuuuwvvvuuzvvvuuwvutquywvxvswwvxvwvwvwwxxvzyuyywwwvxvvwtuxvuxwwxvvwuvwuuwutuuuvwvtuvrsvuuvuxtttuuuqpssspqrsrrossssqqqnpqqoqpooookmnmoljlkjjijikjgiifihfhefffib^ZWSG=9@GJMLMMLMNZddddccfipssqrssrnjhhigiihjjmnnnqlbYX[QHVbde`VKLRWckt|}���������������������������������������������������������������������������������������������������{not������������������������������������������������������������������������������������������������������������������������������������������Ĉ������������������������������������������}^NKHCABB@DJTcjnu|��z{���~���{{v��������fSY^drtstnnpmkggiklrx}�����������������������l[ZZ[ZYY[\[]cda[]^[[\[_aa`_[Y[XYVUPRX\agjostw{{z{|}zyslc`SKGHKNSRh��������������������������������������SGKJMQZafsz~~���������������|bjeTMKHGGDECB@>>>=<<;;=@CCIKJGC><@EG\�9&.,.02222111633-Au����������{dD889998:9<>=<<=>>?=?><=<;=A>><<B@>>@ABBDEGHGC6,0S��:'%*.2=]x|ywxxuvxyvtx|zxyxvtvzxtvyyvwzuttwwuvwuruyvrrwvqqvvspsuusvsqqstsqopnmnomotrnnpnlllkeSSTQPPSSQNQPJGOcnnnnnmnpryycW\[XTM<1ASX[ZZYYSY`^\\`\[\]]][[ZXWNECCBEHJJKOQNMOLILJJTafhgjm��������������;������������������������������������������������������|�~}~~~~~�{|~}|~{{{|y{}{yzy{xxwwxxuxxyz{yzzyxyzwyxuxyxxxwwxywxxyxwywxwwwxwwvvvxxvwwuvwtswuuvvuvyvwzxzzuvxxvyvwzywzxwywxxxxxvxzyvuvvwxxyyxxxuvxvrtvuwvuvuuyvtutsvutuvsuutuvrtusutstusvuvwutttuvwuywwuvvxwwwutwuuvtuvtvvquvvvvvxusuuuwvtvuutsutuvuuvtsuruvvutrsqqsrorrsqpqrpsqpqqqqospmomklnomllijnmkljjjijjihihfgjhfgfgedeeec]XTPG:6<DHMMLLLORXcdfddccfjmttsrttuoigijgedgilnmkmqk`ZZ\PGWbcd`VNOQZbltw|��������������������������������������������������������������������������������������������������xlkv������������������������������������������������������������������������������������������������������������������������������������������Ć������������������������������������������uZIJGCAA@@BACLPWjqskb\^[VTX][QSUZ]``eikqokHHLOSSTX[\Y`ecdddfhkotx{���������������������j]\YYZYWW\ZZZ[_^`aa_^]^^``bdb_[XXYYXPNNU[bgpuuy{~��~~{{xtmd\RMGE78Rq������������������������������������UGHGIJKKKRUcju���������������zr~xlbZPNKGHHFFBEB?>==>@CFGJJFA;66?DDh�~7+.0//02211324130Ap�����������xbA98999877:>=;<<=?AADDBFBEGIGED@B@?>?AABCDFFCA6(&Z��-'&+.3<_uzuyzutwxvtt{ywx{wtsyyuuxxvuxzvsuvrruvtrvyrruunosurquuqqtuoortqpronkmolnopnjnnnmmj^WQTPPQPPQOOSOHHMcknmmmmmppuuaQYXUQM80COW[XYYYTY\^\^_][\[Z[\[Z]^ULFABDEGHKMNPPOLJKJJNU^egki��������������˻�������������������������������������������~�����~������}|}}}}��~|}|{zzxz}zxzwvxxvuyyuwwvwzwy{}z{zxzzwxyywyvvxwuuwxzxvvxwwyuututvxtuwvvvturxttwvtuuvuvuvxxx{ywxvwyyuwwtvywwzvxytuxwvxwwwuuxvuxxtvwtvvtuvuuvsuxqtzttuprtttsrqvtrtqptrrsrrtssttvvtuttvvvvwvvuuvvvvvvvvsrrrvtuwutuuuwvuvusruvvwvtrvsrtttvutsrtsrutqtsttoqsqqpqrpppoonnqpoqpqrnnmmllmolkkjkllkjiggijhhehheeggggfecfddc\XSQH:78BILLKKJJM[dbefdbbbejorttssutniegfc]^ejkmljmni]Y[ZMEX`bb_UIJSYdltw{���������������������������������������������������������������������������������������������������zlnv������������������������������������������������������������������������������������������������������������������������������������������Ã��������������������������������������vWHIGCA@?AA=ADFRj}paSKED?;<;74884;99841022466225GS]`addbdeeehilqw~�������������������~j^[YXXYWWZZ[YXZ\_aefeg]ZZ[^_acd[ZXTUUTKMJIN[adm{�����|{}|}|xnbVO;345JXr���������������������������������UGFFGFGDEEFFGR[gqy|{���������tq{~|{qkc^SMJHFGLIFC@;<:>BEGGA73+-;AEn�{2+-0/./0122233221>q���~���}��}r]>8899889:;<=<<?=??BEFFIKQ\b[[QGC@?>?@AACBCDA>3)+]��+'%*.18esww|zuwywtsyywxywtswyvswwwvvwutuusqtvuutuuqttrlotsprurooqolpqmknpmijmmhknllmpllmh\OPOPMONMQMQTNKJAI`mklmmmnmotp_RVWSQI42BPVXVXWPQX_][]\\\^]Z\[[X\^[ULE@@ABDGJKONRMILJKIIVbeig��������������ɺ�����������������������������������������~�����|}�����}{|}{}~|{||{|{zz{z{{{wxxvvwuuvwtvwxwxxwyzxu{ywxuvxwtwwvxzvsuuvxtrvxttwurusrttrsstuuqsutrtvuvussusuutuwyvxxuvwuwxvwwwuxwrwwuxuutuuvurtuuuvususuvtsustuurtspsrpqonqopsrqrsoqrpqqoqonrssrqqursustvsswtttsuurtsqstssqprutttprsttsssstsstuuvsrttrrsqrsrtqqtssusrrqqppoqqpnqonnpnnmoplnonnmllimmklmjkiikjljijgfhfiffgfecchfddcceab\WVRKB;:BJOMKIIHJU`cbdeefcdfiopqrrquvmfcge[TZcjkmlklkfXVYVMIV`c_]TKJTZbipv{~��������������������������������������������������������������������������������������������������vhmu������������������������������������������������������������������������������������������������������������������������������������������Ć���������������y��������������}����������vUHFFDA?>@CAADFRl�teXMD=;:87422324311-++++*,,,+*<Q^dhfhefehljilnqw}������������������{j\XXVWWXZ[Z[YYWZ]]bdhaRTSROQRPPPQPIGFKGJD9& "+g������mgf_]_gf]QH4,/3507C]r������������������������������RIIEFEDDDCDGFDCFHWcjpx~zxxvuninx{xzy{yunc]WOKIGGIHFD@@ADEB=1,&%.5<u�w5(,0-,.//2211122->u��|���|qZ=68898799::;:<=<=?CDDFIQ[glpz^RJJIEC@@CEDED@:/')`��,)'+-38fttxyvsuvtsvyqtxxtswyussxvuvvtruvqpqussuutrtuqnorqonqrpqqooprpkloomkllllljjmmmlmgYNNOOKMNOPMPQOKOH=Hbhjklkkkjouo[PUTPOE43AOUUVUXRRWZXYXXZ[[[\]\Z[^]^^WMFCB?@DGJNWcd`\^^\WYgkqw��������������̻���������������������������������������}~�~~���}}}~}�|}~{y~~zzz|zyx{yyyyxzyuvwvvwwuwvvvwuwxxvvzwz{zyzxswzvuwvvyxtvusuvtvwvtwuttvtuuqqvqptrqssnnsrsvtqrustuswxuwwuuuvsuttxwuswusvuvzuuuututrtttstsrsrutsvttssstsrvumjghfkinpqtppossqqqqqqqrqnoortpsrrurpuuqtsttsttrqwrpnprtrtspqrqsrqrsrsqsusqsrqrqrsnoprqpqrqqttrroponopoponolnonnjlonnoonljmjhjijklkjhikihfgfhighgefgdffegegcac__\XTOLD<9=KMKKJIHLVacecbegebbeinqpoqpusmgegeYPXchklkilldWVVXNFV^`_]SLLT[aiqt{~��������������������������������������������������������������������������������������������������ukpw�������������������������������������������������������������������������������������������������������������������������������������������ă���������������{������������������~����~sSHEEC@@>A??ADDNl~ve[OE<::96313200/-,*(((()*)+,1=O_t�������}wu~��������������������whZTWUUTVX[ZZZ[YZ\]^_\WbaURJJHA;:7:9:86695.!p���Ǭ}TI?*"')!(0.//20;Mh����������������������������QGHEDCEDCDEEB@>96:?JTahcajib\]`adeiknpuvuld\YSKDB??@BDCEE@:0'!&4t�v7&*..-..00010131)At������||yU:67997778;:9:;<=??@BAGLV^iptrYYUXTLG@?BCBDC@9-#*j�{1)&,.0AhutvvussprsuvtuwvssxwqrssqtwtpqtupmstqpttpqttqmprmnqqnppplnonkjmmkkonkjllfkonmjdSNMNLLLKLKMQQKKMGC;LdgijigihikplXOTRONE52@MRTVTWQNUXVWWWYXYYZ[ZY\]\^]ZVNHC>?AEHNm�xwxwwwxw{}���������������˹�������������������������������������������������������~��~~}{|}yxvx{xyyyzyxyyzyvxvsuvrsvstvvtvwtstvuxxvuwssvttusrvurrvttuusutrrsotvqrtsmpqoruppsoqssqtuprstsutuvuswvssuuttqvwsssuuuttvussttwursrsssrsqprqprsoqrqqsqpmf`LDJMSV\aaecfjkllnkmnooj\Z^abdgkkknnrtqpqqssqrqorsqppqrrssporrppprqpoporrqsqopoosmmrpppoqpmqqopplnnoonooommlnkmlimmlnnnnkjkjiiijgijhggfedegdegfeddecbdcb`aaabaZYTTQKE?9<CKLJIHFIS`baac`ccdccehlqqpstspkebdaWOYdhkjijkjbVWXUIFX]^^YOMMSZbhquy|��������������������������������������������������������������������������������������������������uioy�������������������������������������������������������������������������������������������������������������������������������������������À���������������{zzz~���~�����������������rYHFDA?@?@>?AACKn��zg^ND=:;:53320..,*+*(&'(**)*+15=y������Ż�������������������������yiZVWTUTSUXYXXZY[]]^a`o��k[PHC@<?=<:9987672!f���Ĭ[NA/ "#'/-)*.09@V����������������������������JEFDBBEECBCBA?:94126<RfYb}vbXNIBHIKMOUWSYZZ]]TC;:96569=CFD<3+!6w�{7(,0..0./0201265+Gy�|~�}|��{y~vP778997889:9998;=??@ABHMW`jswi[WU\RNG@>@B?@A@:+$,n�u*+,.41Betvwvtuxsuvttstuttuvusqrrrvwontsroqrrsrsrprspoprolopmmqomkjmljmkkiiljgikjflmlm_RJPNMKMMMJINQONQKDEP^cijhighhgfqmWMQOMNE20ANRTTTRPOVYWVW[\WWY[[ZY\\]]\[^YQIA>?BGTpwrtuwwvwuxy|}��������������̷�������������������|y{}~~���|vy�������~������~���|zz{wvsrruxvwvxyx{zyzxvwvwussuvtutttuvqtvuvyvuwtstsquvrrtsrsssttsrsqrutqsruwxrstussrnoplnqmmqpmqsrrutuusuupmloqqqpuuqqqsvtstsssrrtusqtstutqqqqqpqrpqsqprqpdQJ9&$$&(048;AFMT\_^^afhihP1,06<ALOUY^ggilmoqupqrorsmnpsqppqrppqpppoqqopqrsnprpnoooonpqonppqmjjkpnnokdknkonmmkklkkijmhikmlmhhjhjiiigkjgfefcbddbededddaab_`baba``]XUTPJG@::?FJKLGDHS\`acdb_bcc`acglqppttspjdcc`VP_fgjihhig`WUVSGGY]_^XOJKSYakpty{~�������������������������������������������������������������������������������������������������simz��������������������������������������������������������������������������������������������������������������������������������������������z���{}����������ysop||}���������������v_MHB=<>?=<=>@@Er��yi[ND=:<;54210..-+)''&)*))))*..9�����������¿���������������������zhXVVTTQQTWYXVYXY\^afh��hYLCCBAA@=;:978881$f���͸�XG=/!!%-.)),14<I����������������������������@BDCCCABBABBA>961//13MgYn�uaWOE?@??@AB@>BBEFC>:750//124558:7*1}�/(,1../.123236<A3A~}|}~}{{~mH67778778::98987<>A@ADJNVanssbZVVUNNJ@=@@@?>>9*%/w�`)8BGS?AhvvtrssspturrtvuqrurrrqsqrurnruspstqquuqpprqlnqpllmmmoqnlloljkmkjhhhhgiihgkicYKNLLKMKLLLJPMJMNKHHS_`bfhehghhfgnjVMPONOB21ALPQSRRNNUWUSXYWUVYZZVZZ[]\]_b_ZQIA?BCO`jmliklooknmkq��������������͵������������������hfeggedhdg\Z`^aijhijjlmillmmmnlonqonokmpplhmkjifdfhkhkmptstuvuvvspojmoprrtrstssvvvuwwtuurtvtuwututssusrutttsqstsvts~������|tf[X]^[]addeilprnnqrsvkU[\ZaddgihlolpsnqspqqqprrrsuuuusrsrrpppqqnpoommmR21/+%##'''$! #'-.36<BEJOQ8# ! !&'+3<FIQW\deijmmpturqstqprqoononnprrqopoqonoopnnoomlmlnmlldabacdbe^UW[[_abdedhjmeYY\]__eedfhkaacdeefged`g`bccdccd_`b_aaba^a`a`[VRTNHEA96;BDGFEBET_^__bdbbbcb`bcfkoprssrrgcadc_^cehhfdfgi_URSPFFV\]\VNKLQV`hnswy{}�����������������������������������������������������������������������������������������������~ohmz��������������������������������������������������������������������������������������������������������������������������������������������s��{wz���~yxyz}}vi_irx���������~��������rcPE?:9<;;;=>>Ip��yj]PE;896521210.-+(('&(&),++-03:�����������������������������������}g[XUUSPSUYXYXXYYZ]agh���]XJAEFEE@=<:8:><:4$i���Զ}VG=0"#! (/.+),/49G����������������������������GBCCA??AE@@AA>:60--/7TkVl{l]WPHB?>>?ABDDECBA?<;95-*+--255355,/���3&/7:85565697;LR3Ez|~~}�~|{|zd@57778768:999889;:?@BBGRYdqtm^XVTOJLE<:=???>=5("0w�X-Th�i>AitvsprqqrsqmrsusptsoqsspoqsppstqorrporppqrrojonlmplklqpjjnojjoohjjhfiljiihhcTLLMKLLLMLIKMOJKKADDXcbbceefdddhgimeQNSOLJ?22@KOQSSROLSTSTWVRUVVXYWYZ]\]^`_]^ZRIDCCL_hjjjklmmklmqy��������������̷������������������zdb^ab`aa^_[Z]\]`a_____^^^^]^\^\\^a^_`]_a_`\``^`[]a_bbdeghjloppqnje_Ydhjkmqqpsrosstuustvtrxtqttsssssttqttrqqsrsrqtq{����������nd][QPPNPSRSYaccfflnmTDLKBFJMUPS[^chfeqqnonmppprqqssttsqrqsstrroqqqpniYNJD?;8653-)'#!"! ""!'&(#    "!"!!&)-2=FMW\cfhljjmnptroppsroorqpmnqpmnnnommmmmllmommi[ZZZ[YZS=<@BBGFJNOSWZbQ716;AFMMRSYVSRVZ\^\]aa`baabbd`bca``_a`a^_b__]YTSMIFD<78=DCA>>CSZ\]^`bcddaaaaacflnosqsongb``bacdcegefdee]TQRNEHV[][UNGJPW_fkruy{{~����������������������������������������������������������������������������������������������ojqy��������������������������������������������������������������������������������������������������������������������������������������������cjolnyxxwqmljkptvz}vib_ir}������������������tgVD@;899:;;<Ho��yk\OB>9853/132/-.+*)('('++,-/86;�������������������������������}���{cYZYVTRQTWVVVUVXZ]bhh���a[RKIGII@?<=>>@?<7&l���˵~VI;/# %& "-1.*)+/4:H���������������������������I@@@@??@AA?>?;73/-.04ZoUrxdYVSLD><==BEDEFCA??<863--+,-.24455,3���.(4HTURPOOMLJGZX1Qx{~|{~xw{{vcA7667996::976789:;>@CBGT\fqof]WRMNLKB<;<=?@==5& 9��V.a��K9Fmrrrrtprtrqosrrqqsonstspmprpvrqqprqltqnmpppnkmmjmpkjlmjhhkieijedihffiighif_RGKKJILMJJIFKIJNIFCMZh`^`aabcabbfegkdQKLMLJ?/0?KNMTTOLLSUUVTSSWTUXYYZZ[^^_`]\^\[UNGFL_fiklkklkkkmov����w��������ɲ������������������{ab`aa`a`_a``a``_`_aa``bb`^]\[ZZY_`^^a``bbabbdcbbdbabcccccdggfdfffdbacbehikjmonmmnnoppsssuurstrssrqsqpqtrqropqqqrqn}��������������ytnfc]TMONJLMUZWPX]ZZYSMKEBBDFIKPQSZ\acghghjhikmmmmnmorsrqrstsrpnmojjfc]WQRIC>74.+'#"!       ! !!! "  ! !&,4AJQW\chlikmpx{||zttqnomonoooikmlmkjnllnh_[\\[Z\ZQQNHFA@=;=CEFF2!%'*155235;:BGKWXYYZ]``_`bbda_^^^^]``[[WTOOJFD=45;ACA=;BS\Z^\]`_aa`_a_`^agmnppproogaa__`acccghccfcZROPIAEW[[ZTKDJPV^fkptx{{|~~~�������������������������������������������������������������������������������������������|ifpz��������������������������������������������������������������������������������������������������������������������������������������������bdhiloghklnmmoqttwxrhcbgpx{����~�������������xl_MD978888;Jn��~qcYJCEHB86797034/00+*,-+,-13<5@������������������������������}���wdXZXWURPSUUTSSVWY]akh���h_XQMJMNHFBACDEEC9 %l���̷�|qeVE@CFC80159@?<92139M���������������������������w;9;;?>???@=>?;61,++-4[qYqr^WYXNE=888=AACEA>@==951-+*,,//3564*1�Ë0+8]wxnlpkimlhl[+Qqx{yxy~zxyytaC<<988777;98888::<?AEEIT_iri`]RNHKJFA;<=<==;;4'$;��Q)S~e;3Jnrosroosrmprrppqsqqpqpnpoppptnoqsqppsonoomlmonllsniiljiiiigfihfhifdgjhgig[PLMIIIFLLJGLLJJLIEFOZ`aZ^`c``cdeedcgi`OIMLKG=/1>ILNPROMNSVTUQRUVTUWZUXY[^__`\^`^_^ULKP`fkjhhijjijnow����~z��������ɮ������������������}bbceccccaefddeddffghfefgggfdc`acedcdeddfffeeeeffgegggggfghiihhhhhhghhhhhhiijjihkkiklkorssrrusqsrqsrnprrpqploqoqsqs������������������������yslb^]XUV`ffihhfaYYVSNJF@@<AA>DHNRSSV[]^_bfggjmmklpqqqprrvwuwvruuqqnjfb^USMF@:72*)''$""!   "#$!""!!!"!!! !"#)+8?GNZiz��������~|{xwwtopmlkmmlkkld^Z\]]_^`__][_Z]\XRJFGG>5343--'&% "&'$$&*+.336<BGMVXZ]adba\UUZ\]\]^UVVPLFD?64:>@>==DPYZY]]`_`b`a`b``_aflprmorqng]_a^]_abaddaaedWOORJ@HTWX\YSRTUZ`gjpsuxyz{yy|}|��������������������������������������������������������������������������������������|mjq|��������������������������������������������������������������������������������������������������������������������������������������������w{|{wtojkonomnsssrm\V\djknw~���|������������}m]QF?:4558Gn����}tqqqi_WUUVU[]TNOE=FGIKLPVYS6=�����������������������������������r^SSNMMNRSTSSQRRTX[`hb���lf\TQNPONKIGCDFFF=&%t���ͺ��������}ushihiki^P</4;J�����������������ÿ��������t8225699:<==>=:3.*('+5rWno[VTRNB95348<?AB@=><;850,,+*+-.2464)	5��|,)8\plheghkgfhdQ,G`hjqstyzyyxytf[SLE@<87787788899=?AEHKWblqg^XMHFHHEA;9:89::82'%4��J4jtT50LlrrsqmprplprqnpsspoqomnollorojmqqmornkmnmljonhhlmihmlihjifehifgjgddghihbVMIIHHJKGKIHIPHFKIAEPY_^\]a_a_`bbbeeaej_MINNKG<-1?JHLONOKLPORSRQTRSWVTTY[[[_^^Z_[\_`ZSON`ghhfffghgilmu��������������ǫ������������������w^dntqmqqqrrqrsqpposrqqqqpqqqqpqqpnmnqokhkmjihkkjknnnnmnnmllmlmllllkkklnmkjkklkinllmmonooqqrtpqsqrrpkprqsqlmomnpppt}��������������������������������{ywxrrogbcaa_^^^[WUQLJI@A=8B;ACJGPPU\`__bejjghkprvqtvvvywyyyzxywvutofb`WRJE>75.*'%&&##"####"#""%%''&&&%#"!! !,Kj����������������zuusrronmjda]^]Z\]^^_`abcefghfgfb^WYWSQLGEA@>:3+!$*18>@EKRUQD;?CHLPOQRRSPLJE::<??:6=DPVYZZZ^^]abbb`]^_acflooopqpme]]`^\]^``a`]__\VQLNH@ES[`a^]_]chhkmstvyzxttsuvxyy|}|{}~~~~~}������}|�����������������������������������������������������������������|ljq{�������������������������������������������������������������������������������������������������������������������������������������������Â�������������{zvquwnnjjriiw}{{yz��}yyxwvvux|���zg[XSG;756Cp���������wqrnrx{~wqngcekux|��|c/?����������¾����������������������xoaVQEBAGNQUVTSSRSX[_cc���tncWUQLNQMMIEKLHJB(*o���͹����������{srttwpn[;04<N����������������ÿ���������t?631-10158::;71-(''*5v|WulZTQOL@742459<>AA??<<96/,+,++.02465)	<��i),;IC?@>@@HFCGQK*?PW[^dhntvywxwsmgd]TKFA?=;::997:<>CGJOYcmpf[QKGGHHD@:9877776-%$>��?,at\45Okrsqspspnotppprqppooqprnmnrrooqpnmpqkimpllmolgikjggmkhijheeggdfid^bggd^RKIHGIIJJIEDJJHHMH?HS[]^XY_\Zbbabccbbbdf\LILLIG:,0<FHJMOOJKORQRTRPORWVUVZ\X^b`]]`]]`b^_[[bggebeecfhkijw��������������Ȭ������������������oU���������������������������������~�}}|{zxxxusmz}wutuvttsqqqsqsrvvttstutrrrrtttuttuvtstvwvvvxvxyuwvtuwtqmnmonnpq{�����������������������������������������yqnhb_fhehihdca_]WTPLLJCC?DC@BHKRVW[\_bfkhjmopqrttwxx|�������z|ysoid\WPKE?95-+*&&&#%$&&*,**,+(''(((,5ES[hs������������������zxrpijkjgfcdcaa`bbbdddfeihihkolkhdffhda`VIFA81**$"(.3)$'-46BFGLKDAADGHFBCIUY\XYYZ^\_a`aaa__``bflnmmmnpk^\\^^^^^_aa_\\_ZQLLMG>EWVTW]dcdlmkrspqu{zwtttstuvvvvxxwwwxz{z|}}||ztfdiloquwz|~~�������������������������������~����������������������xmir|�������������������������������������������������������������������������������������������������������������������������������������������Ā��������ǿ�����������������qfklnqlmnnjiijqrv{}�k`^^WLC;8?l��������xrpnnox�{uti^bint|��|t\+I�������������s`e�������������~~��|soibZQKB>BKPRSVVTRTX^^`���wn\SXXRTTQNKHIMNOA(*m���Ϻ�wrnl]\ead]TT\^`\WG2/4<R��������֪VLV_a\b_addeu����p?<:850,-/22454/,&%%)8rzUskXTQNI@732336:>@@@<<:950+)*)+.13453(D��^(5?@:46487579<OF)<FLPRW\_chmpsttpkkhgc`ZQJE@?=;9<<=DJMRZemncVHDCEHF@:7651/01.(#!?��7N�qV32Pjorpooqmnpqmptrllpqpnoqnnpqlptqkkqpmikmkimmlijjihhhihhjfddggedfe`_dfbYQJHHGHIGJJHELKGHJJBETWY[\XY^ZV_b`_`````be\HFLLIE:-1<CGHMOLHFLOONPONQVTSTWYZ\]ZX\^]^_^_^a__cea`Y_\Vbb[_gy��������������ʫ������������������ca��������������������������������������������������{z{{|yusustzwwu{|{{yyzyxwxxywzzy{{~{z{||z|}~�����{{���zjknllmmly������������������������������������������������vtnhgggghjoomlhga][WOHACBA=ABKMOTYZ\adfillossyz{~���������������}wria_bXJEA<8711330100+-0/4320//05M��^_epqw~}�������ztpmlw||yxxxspomkigegeddgfhhkllmklsutvvurpohf_[VOI?94+*$""$*/17?DJFFGJMUUWUTWZ[\_^_`__]\_bbgkmmmmmnh_YZ\\Z[\\^__ZY\WKJLNE<ERLGNWWVW_`chfdjqvuwxxuwxxz|z{}{toopqsrtwyxvv`MMLMTTZ^`cfirrux{}��������������������|xtt����������������������xnjs|��������������������������������������������������������������������������������������������������������������������������������������������w���������������������������nqtmgegfggijkklqsu|~}jUY^[[XI?Ad�����~~}xtsonpx~xuth`bhpx~��}ua.P�����������������������������{}}xngdbbc^ZTMDDFGNQTVSVZ]`b�����{ssowcWVNFADJMP@(/t���ԻsOJ`�YFpHTMSWMNHN8)+-3=T��������ڟ,('& "!&D����p??=<:765541//,,(#!"':o{UslYTRKF?620116:<??>:98741.***,.03563'	M��R0HPXQHJGHEGBBNR?,26<AHMSV[^bimprqpnmmiifc`[TNJEDA?@ENTW_hmj]PB?@ABB>830.+)(('%"&H��1Acp`44PnpnmonnmnpploqqinrnjlnlkkookprjhmnlhkmkhhokhhkihiihegigdehhcdgecbdfcSLHGFEDFFGIGHIIFJKGGNZ][[]\[^[[[^``a_^a`_bm]FDKJGC7,1>EFILMMHFJONPPNNRVTUXWWY[[W\]\Z`_^^`a_adeeWW[UYcWK\gw��������������ʦ~�����������������Ye���zuy}~{{||}�|��~~����������������������������yooooqmlimjjlqpr�wvuwuuusvutxz{ww{w||}y}ywy|}���}~����~hlnkklkhz�������������������������������������������������������wuu{��~���~{uqg^ZTRPM@>>@GGKPRUX\bfjmrtx{}}���������������������zzskiecbc_]YXTXUSPNKD9/2i��<57>=AAHDJKGDOSQTSVXh|����������~|yxvsponmmmnnmlmostvxz|~}����wtogb^[VRKD=4,'$% (+058ADEGFSSQSVXWWWWWV\]_djklomnof_YWYYY\]\]][XWWTJIIJB:COMJLRPOTVTWVUY\dfglmomtx{}~}�{mfefgjlnpqttvlb]UIKKLIDDNRSV^_ghkntxx{{{}������������~{{smhp}���������������������vklu~��������������������������������������������������������������������������������������������������������������������������������������������j��������������ſ�������������������������������{rzy|}|vdXM`}���}|zxuspnow|~wuqgcejqw{�}x\2S����������»��������������unkih_\\]^\\^_a_[UNGBDINSWZZZ^e������ÿĵoZO90.5BKRD&2����ұkQv�ٮ�ǰ��������:'*-3<T��������ٝ1"..+%!%)+N����nCBB@>?@@>;;82/,)&%#(6sxYsjZUQKG@720027;<<==;7563/+)*)*-11254(
+Q��K.KbsrlmmobgdccaM1.-/39>CKRVZdjpuxxyvvsonlkh`WURSPMJMTYZajmgWF:99:=<93.)'&$"#&%%(K��0I{~\..E\]bchijmqonlopklspkhmljknnnopmjkmnkimjhghlhifijhhheeghdedda^debadd\QFGGFEEFEEEDGJHCHIDAR\Y[][[\]\XX[_^[`^[`_^aeVEBGGEA6*1@GFHJMKDFMMONOOQSSRVWWVY[[\_\\]ba____`cedbWV\TYeYV_hx��������������Ţ~�����������������I^��lD8CGFNMONUWUVVVY[^abgjiploovrrvvxy{}{�������}[MHIIIHILJJMMNOUXRSTUU\[^a`_d^^^`adceediihhghjjklqw����rflmloljmw�����������������������������������������������������½����������������}lurrnf^SMID97?>=BKSY_chlorttxy|�������������������������������}y|�}wyo_;*7s��8:=@<?=@@A@=;<9=>9>G^ntwz}��������������~~{yzzxvtuusstvx||�������������{vnkf_ZTO>:3*&$#"'CbC<BCCCFGGHINOUZeddjgllg\VUWXW\ZX[ZWTTTQGDGG@:BLLHKNMNNOOSSQTX[Y\b`][Zaglnpvyrbeeghhjkiiklorsti^WXUND>>@FFKMSUWZ^beegjmpqtuxzz}~~|xvslb]jz���������������������vhkv��������������������������������������������������������������������������������������������������������������������������������������������h���������������������������������������������������������c,Ei~��~|{wtspootxzurpgdelsst{~|z]2R����������û�i����������s]YYYWZ[XWZ\[^`acbcaSGACHNQVUTWh���������qYD(+-.9FLH(4����ϳjN��پ��ſ�Ǿ��ƕ'*+.5=T��������ؘ3#/0(##$**O����mBMPHGGFEEFFF@842231/;n^Upi[UQJHB931/47:;>=997451+(()')+02577'
+Z��I)7UmpljkqrwojkdP7.-..036:DLR^kx����������|tlab`^]_\Z_a_dkleTB;768970)%%$"!$$%&Q��-Jo~U-,('0?MY\_jjginmlmtsmkknjkoplmqrljlnllkohhkjjikhggkgeddfddgecabddcdd^NGFGGEHFEEECEHFEIIEJNZ\XW[WXX\XYZ\^_^_^_^]]`bVBAEFC@2+1@EEFHLIFGJKMNNNQQRQTTUVYXW[]]\aa__aa`bdfe^VO]TYe[W]i|��������������Ğ~�����������������Gj��P
+!&*27;<CFIOU[Xfks�������h7 ""$#$#$#'))+++-,0357;:<758>8<B@ADEIKNRTSRV[\f���ymkkjjojjmu|��������������������������������������������������������������������Ź�fJfgnllpkcTLEBB@>CBKPT\bgijlmpqt{~����������������������������������}t`;.B|�w7>F=GB@@@@@>C=>=>>CBFLZbchlpvz���������������������vqrstuwz{{~������������������sfWA<=90:t{8...135441/17<@HNLNRZVTLNPPSUYUTYWSRRQNGDGD<9AKJFGIMMNNKLPROSWX[a\WTW[]adafiigqpoppnmkkklppon\KMZacekljggb]YRPRPQUVX]]a^`abejloqstppmf^Zgv|}||~����������������ugmu~��������������������������������������������������������������������������������������������������������������������������������������������az��������������������������������������������������������U'@f|���|ywtpnnnrvuqopfdfmutwy{{qV)]����������ü�������������lVWWZZ[\YZZ\^`__ecedWKEEHKPSRMQe��������ßk[JBFCGGLTH*;����ϩeQ�������¼�����e$+,.6:X��������Ӥ�~zue1*S\N.U����nIbqn\QSTRNNRJ?>ABHHFGN;QlfZXSLIB8211447;;9765550,(%%(),/1674%
+`��B'*AUWQUQ]nl]RSUF6-+,--/4:@EKWh{�������ǽ������d]]^`efejnkeXIA7473/*# ! $$%*]�w*Ccg6** %,6HPS\hdekokjlljgllkjmnmilmkhjjjehkihjnideifaegf^ddb`aedecaWGFHGEEDGEFGDEGFEGJGIUZYVYYXUY\ZUY\\\]]^_^\[[^aQ?>DDB@3+0?EEEEKFDGJOMMLLPQQQSSSVXWXZ\]]__[`aa_abba[RLTT\dWOZg|����������������|�����������������Ht��D"!)(14>HO\kz������sV!!#&))*.33:AJSf���vlkljkkhknuz}������|~}���������������������������������������������������������ɾ�Z@`]`QLOVRPOMKHJJLJKJIOW^`befilnrvy|���������������������������������s`92I��l:MgMmIGGEMMCIGH>GKLD6,HRNQQ\bekpsvy|����������������rmnoqruvvuy{||}���������������u^LHHD7		
+<�r-+*++,0/,"  "%&&(*/34338<@DEIGGJNLLLJGBBCA:7@HGDAFLIHGIMMLJKQVU\UTTUTX]bdbdflqprutrooqnmswwuloslrx|���������}uspmc^OSNRTOFISX]`bacb^Y]hrturttx���������������tdks~��������������������������������������������������������������������������������������������������������������������������������������������FX��������������������������������������������������������U*Fj~��~zxwtqnnorurnnmhcdlstvxwynV,h����������þ�������������gIS[_bceba_bffbbgffe_WPLOUXWUCE`����������wbXTVTVYUVJ*>����ʟb`�������ż�����D&,-06;_�������������̟8,~���8]����eK���oYdaPIPY^hhYNSXTHFK_mk]ZTMKD832258;>=;966764-)%''*-127::(	h��>)):\gb^]gtf\WVZJ6,+++.036;@IUfz����������������eVY[afggilmlf\UOFC<7.# !$',a�x(-Q^J1(! #'09GLXdbelnkkmplklnmiilljhkjhghifehigeegebdda]da]`bdcb`SHEGGDDCDFGGEEECAGKJJP]ZWVZXUX\\YWZ[Z[\[]_][ZZ]`N?>BAA>2*1>CEGEIFDHMPPMMNQOQSUUUVXZZZ[__^^_aa^]_adbZUSSS[cRI`j|�������������˼�������������������N}��B #,1&"#'(..219?Q]j������uQ "%((+27;CLXv���wojgjjggjmqyy|~���}xwx|{y{}}~���������������������������������������������������Ƚ�RP`fePUQPOOLMLILLMMKJEELPTXY\_dgjlrv|��������������������������������q`70J��eLssyzV\\VfbSRUYR]afQ60ORRVUXZ[`dglpsv{~������������unqqrrsrtuuvwxx{}~�������������q[LIGD7		
+<zc,+-*(*-/-(&'''((*(&&&&%'&&)(,-035999;=7688944>EB=?CFFDEIKKFGGMQOVQQRVUV]_bacfjlmnrrqqpqqqusuwwwz|���������������������~vrcE'+7?==BJOSUTY`cegcgim~��������������qblw���������������������������������������������������������������������������������������������������������������������������������������������8>i���������������������������������������������������¼��T)Cf}��~}zxvqnnpqtsqnkjeciqrrux{qX(g����������¼������������[1Pj������}yvtqroljkldbb_a`[90W���y����ߣm^SRQVXMCWN*<����Ė_q�������»�ŝ��?)+.15:a�������������ј43���s+^����V1���gJG7*/7BMSOC?@C<2AO]ol^Z\`NC85669;;B@==;::A=0++,*,-.3:;8%
+#s��:,*FozprmtuqnkplS4*)(*,/36;@JSd{�������������Ƹ�fW\]abegfhmoomomfbZTB8-$! "$-f�w$:z�_;)(@dH0(%"### &-<IO\bcgknnjijmjijkfgijffhhgefefefiecbecccda]abcd^OFCDEDCDDCEFDDE@AGIMSSTYZTYYTQXYTTVWWYZ[[[[[ZZ[\[L==B?=?1*2>BCEDFGCGKKMLNMOOPPTTOTWYZZY]\\`^]]\]]_bb[XQPQ[[IXdh|�������������ȸ�������������������G��A(L9797211327;;;:4/'*=Ynh\UOLKQQLPU^`fz������xR0@KOPNLKIFBB>?=9;?=:972&6@C@A@>48<9<=?@?>?@AGKIQU\v���vmhgifhighouwy{}~~{yy{~{z{zxyz��������������������������������������������������ǽ�LL_bfXSRNMMLLLLHJPQTO><EFEJLPTY]`bikpuy~�����������������������������}rZ4.P��\Erw�vamjmyn^YeaiqohP3.RRSUUVUW[^dinpsx{��������������~~{{vuutsttuwwxyz}���������}o[JHHA5
+				
+
+@vU+)))*,,0-& "$(*-,/124444533/000.*)((+*())(*',59657=EEGJKNLFEJNOKONPSVSUZ[]]agikjjnpoooqsqrostmtxz�����������������������uC-:LG;52678=AELPOPQSR_s��������������oemw���������������������������������������������������������������������������������������������������������������������������������������������09g���������������������������������������������������½��P&Dg���~|xupnmnpuutqnleekssu{~}xa(k����������»�������������`Y���������������������������Z4^���~���زnecTRUTIEM`Q)F���ο�Zw���������ëc�u+*),06<a�������������˚> X~Z-.f����N3���dI:*&*.21-*()*-,)8AOtmbbv�qZJD?BECG_LAF@:KWX3.110+*38<A9#$x��9+*Ec[VZ\_Z^_Z^[E3*&')+.04:@ITd}�������������Ƿ�uhdb`acffdhnnowyzz{uldXMA5/$ $.p�p,bzWM8"3��o;/39>6)+:79=DNUXbjmjgkljiiiigfjgbfiddfgdeggf_bfdadgb^_bcbYIDBBBDBCCBEFADC@@DJNUWXXXWZ]WTUZYXWXVVXZ\\\\Z[[VYWG:=>>=</*1>ABCCDFCGLLJKMLLOPMQTUUXXXX[\X^`]ZZ\]\_a^[[UPNWRGYdk|�������������Ȳ�������������������F���DfwKHMJAAEFCBBCDA9,#)S���pjf_hkaerh_T^������ugf�����������������~}ueW}�������z||y~������}u_T|���ujfgfgjifioqtvxwy{ytvx|}|}}}~���������������������������������������������������ǻ�IOZfgYQUNKLJJMMJQXi|]6<RVNNOSTUTPRXZ^ahqu||�������������}gccnx~�����|tX82R��ZBnzwqimm{qa`mioooeM.6^VSUUUTUVY_agkmsx�������������������{vupsrqtusqsuuwwxz}����wmVKHGB3					;mK%&&))+-.(!(-279<A@?DBABCA>;;91+'!#).014;AEFIXa`]ZWWXYXWQQPPRVWW[]acefdfkommnrroqqsspvvw�����������������������oD-AUK>:9:8::7<?@>ADEEXm��������������kdov���������������������������������������������������������������������������������������������������������������������������������������������.4^���������������������������������������������������½��M#Ih}�~}}zurpmllqtvutqidfktxx~�~^ l�������������������������bf��������������������¿�����_4c������л�ogbTB55>HXbO*H���̴�q�iHbD(:OD,4Y7(B8,.5@h�������������ʣT !!&8w����N6���YC7+'+21-(),//.,&'*Kvmej������������SMke]h�h--6BUJGHIQP:")y��8()>ULDGAEC?DFINA1($'*+,049?HTd~�������������ɼ���������~z{~~}~��}����~sk`T>8,%,v�`1PDK^<1��d!&'' *325:JRJNblkhjnkjlljhihhedgfcfhebcfe__deb_aa^]cbbVFBECCBBCBAGEDFD?>DINRVVUTUUZXRUWYUUXXSUWY[[\\YZ[XWUH<;<<<9.)1<?@@BDEDOPNLMMNOOOPURVVYYZ\]]]_]\\\^Z^_ba``\[XSPQ\cj{�������������å�������������������B���Ext>@A=2489:::<=;0%!B���{j`[`da^a\OO@P������wo}���������������������yr������������������������VL|���tiggfghehiloqqrttvojhowyvy~�����������������������������������������������������ƻ�JKTgcTTTMMMJIPP\v���RC`}�����~|uqpnfbfhiowx~������������NARVK]ju~����|pX72O��VUYXjk`jmqmm_c\ajgbXG)B\STTUTRSUX[[aehmv�����������������Ǝmjd`dddgf_acbcgb`bddagwwqhUIHF@/
+:bD(#&+*+-/(!'.14:?BILJKNOKB8/)+-.3679;<?BHXjqrsoonmlhhaca]ZWZW`_^Y[_b`imjkkkihoqrrtvtu~~����������������������nC)DXL@;9:9;;9:;>=>AAFTh�������������mfnv���������������������������������������������������������������������������������������������������������������������������������������������+2\���������������������������������������������������ü��N)Jj���~}zuqmljlsyxuqkcejrvy��Y,z����������½��fr���������^f��������������������¿�����[+c�������ڽ�m]?##*,?P\R)G���ʶ��ٯ��oA|��}�|�{��s025<i�������������̴q$$)0E�����H?���ZE8**.30/(+6730,'#"Kuocj����������ɞc������l05Q�������k9 -��3),Jjsjnhhhgfef^D1($'()*-27?IVj����������������������������������������������pK&0{�b0Yu�s2$5��R#% "%))))-3Ghjhjmkijnkhhhgefggcdgfbbfhc`dda`aa`_be`SGCACCCADBABCDFEA>@GOSUUSRRTUVUTWUUTXXTSVXZ\[YZXYXXXWG;;;:;7,'0:??<?CACLLKKKMPNKKPRRSUXYY][Y]_\Z\^\\^_abbc``ab`]adg}����������������������������������G���FyrGB>806898999<:1$#H����wd`_`\YYVNQ=T������wo���������������������tm������������������������RL����sihffhhdhhimlnprsrkgcksmjosw{���������������������������������������������������ż�BOUhbTWRMKKLLl�����aRZ~�����ż�������������������������D`��|rvrv{����qV64X��U@>9KUWd_]YVYXRWSY]T<'G`TRRTTRRSXZ\]ainv������������������}SJDBDD=A@<?>??ABGDGC?ZlpoeRIIF@,5J1$#'-1013)!*,05AGLIC=78:9;>>>><:;;?Ziorvyy}}~}yywwvwvusqqonljnmjjgecgmoqrtyyw}����������������������n?/G_L?;:9;;::<:==>A@EUk�������������~jemx���������������������������������������������������������������������������������������������������������������������������������������������*4\��������������������������������������Ÿ�����������¼��H%Jk�����xrnklmtyxvpjccfnsw{��sW-�����������ý�������������[k���������������������������X+k�������ο�pZ@??>CDT]P'T���ʷ�����Ղ���һ��������/36Ak���������ٱ����z#(4d�����D?���RA@*$04206fyb;0.(!Mzoco���ÿ�������f������\,:[�������_8 .��|2*-Ov|oqoottmomaD1'%%$(+.38?GUg��������������������������������������ú�������V"8��X1_��^9!4��J%&"!&,-+).3D_fegjfeijhcdeceeff`cdc`aeb`aba`_bb``c]NECAD@AABBBCCBDEB>?HRWXVTVVSTVVUUWTTVYWSUWXWXWUYVXWWURB89:885*%.9>;;?AABHLKMNMNMMQQRRVTVTX\XY]]ZZ\]\^`__abbba`_a``fj�����������������|����������������~C���;}wB@>6057866;<;81%"C����qYTUSKLMKHF5Q������xk����������������������nl������������������������HO����sjffggffhfgikoooookijppgfjlonp~�������������������������������������������������Ź�ANT`aUUOMLKOh���wXOVZ`��������������������������������S{������uv~���sR61\��M;8=Mg]^`dWU`jb^V\aP7*K_PRVUTROQXZY]cinv����������������ӺySKC?>=;9889;;<;=BCBACXmomaPHYe\4#($ (.)%#+5SVUG(!"#$""!!%(*.3;CGEB?>>?==???><:6/+3=DNT]`eousx~����������������}~vqhkmmpqvytz������������������������n8-I]G>:97899:;:><@@?DWn�������������~gdn{���������������������������������������������������������������������������������������������������������������������������������������������,1\��������������������������������������ʷ���������������J&Kp�����ytploovzzxtneackqv|}zmR)�����������ÿ���b���������So���������������������¿����X.i����������hYKMOQ]WU]H%Y���Ȳ����������ѿ�������}*25=p��������ޞ%"[���=%5i������DC���NVusTE74F�Ⱥ�Z0/$W|phr��jgj�������VSORJWtZ2:NePexjaqW89��{4+/RxlMNS\ijUS\_D/'$$#'+/26>GSk���������������������������������������¾������L<��I4s�yN-5��G%% %+.+*.2Fbhhihfijifdegdifeebcdb`aba``_]]abacbZJCBBBBAACCCGCCHHC=>HRXYXVUVTSUYSTXUSUXWSSTWXXZWUWYWUVTRB77:973)&.7<<=A@@DGMMNNOQMPSTRTWVUX[YY[\ZY[[^]]][`aba`ca`b`^cj���������������u����������������A��~=�u;>;4047766;==92'7���pPKTWRKLMLH@2S�����~wf����������������������sn������������������������LW����shcehfdebbhjkmnmoomjmrpnoqrsss~~y|}����������������������������������������ٻ���ŷ�BJS^`WWOLKLJZvq[`acka[�������������������������£����zv�����~��w|���{oR51a��L96>hlD_via_ityjXegQ7+IUTSUVVNIOTWYadipy����������������ѹwTKC@?><;;88;==<=@A>@CZkppcXb��k2%2DO='"!!$%(()*(!!3dlm]B'"#"!!"$$&'(*,,.////576=AC>ABA??@>>>@@A==<4$  &'&+28BJW\]hmr{~��������������ymnqtsuzzz}������������������������i<*IWG>98699;<:<>;?A?DXp�������������|cfoz���������������������������������������������������������������������������������������������������������������������������������������������+.b��������������������������������������ű�����������¼��I(Mq������ztqmmptxyxupg`bkpu{|zkO+�����������ý��o����������\m���������������������������Y5p��������֚gY@6-.48P\L$\���ǹ�������������������z/15:s���������q$*k��f3p�������>J��K��ű�e4]����c6/" `�sfcfTKGD|�Q@L��UZ_[Rk|R3<=?E`mp|�\<B��n.+/TrZ312MfX<;R_A,%"#&(+-27=HWh���������������������������������������ÿ������??��?6mvS0)4��A$$$+-**.1Faikjggigfdeeffhgedcbbba_a`bb_^a`ca^TDCACC?ABBCCC@@EHD<<GPXZXUVWTTTXWRTVTTUUNMPVWVWXUUVWWWVTO@88::72'$-6===>?@BHLMJJNMJQSONQSTRVUSYYWVYXXZ[XZ^a_`^]_^]]_`ai����������������tq{���������������|H��zF�w?>;405::::;<=;2(!J��zOAPcaVPPQOGA1W�����~te����������������������ol�����������������������~DP����pgcffcdebchjjkmmnnfcegilnuxx{��������������������������������������������ˏ������<LQ[_UTSMLLKAMT\lrshX^�������������������������������x�����u}���yz���}jM00f��D:5Cqkfsuli\cwrjbldK.'GWRRTVU@5=BEKV[clu~���������������θqRJCABA=:<:9<=;<<=?>>CYnttgVg��R-!0DchE*"!#&'++-,(*;CQK8''+.-//1//1021553<AGNW^WPA3=@A@@@>>?@ACA=<5%   &%(-0;EMY_hjou{������umtrutssty}������������������������g;-J[H@;989:;:9<=;@A?FVn�������������vdfm{���������������������������������������������������������������������������������������������������������������������������������������������+/_�������������������������������������ӿ������������¼��I)Nq�������xrnmppvyywqjcdott{|{pL.�����������ÿ��s����������\o���������������������������X3s��������mh^F90,-@\_K%`���ϻ������˲�Ž���˺����,18<u���������i6NV���/0~�������0H��{N�����r/C><YoX5,  j�thb`YUL_��sR[��i������U:ASrq�����L9 E��k-+/VqU:39N_I;<UW?+#"#%(+-28=IYm���������������������������������������ľ�����}7 B��@<`vd5*8��8&'$ &+,((-4Iajhgffgbbfgdcefebccb`ac^^bb`^^a_b^MDBBBAA?@BBCCCBGGC?<KVWXYVTVVVYXWWWWWWUWTRXXVUUVXSSTTVTTVM=7:9751&#.7=<9>=>@JMKKMLJLPPOOPRTUWWY]YVVXYYYZ[[\]\][[[\[[]^^h����������������ljr}��������������uJ��rH�yE?;548<==>=::93&%O��vJYb\XSOQRKFF7Y�����th����������������������mq�����������������������{EQ����qeefbbdcbdfikkllmlc___`ckszz}����������������������������������������������ȅ}����w=QOX]XUNLIJRb�}�xfZYi�������������������������������������t����{z���ziJ-2i��B92@kmee`bqX[iif\aVF/(GRRUUSA$!%*17AIT_mx��������������ϳlJIEBA??<;=;====<;;<<CZqtsdOj�cF(#6Jdf<)!""'+++-&&0OOB/'*.18:?BCDDEDGEKRX\]hkcZR@07=?>>>=>@ABDDA=3! ##$'&***$(,/118;EJTZckjjPMV`bbadhtyz|����������������������h93JUJA;7979::99;<?@?F\p�������������xdhq}���������������������������������������������������������������������������������������������������������������������������������������������)0`���������������������������������������������������ü��E)St��������zqnkosz}|sjbcosvxzypJ1�����������º�z�v���������[p���������������������������U6}��������cql`RIIIOX]\J#e���̶���������¢�¿�����a$1:Ez���������n<��u���?%j�������<R��yG|����V/9PRUoQ1+ m�wkfc]Z\���rV���w������WAEd�}����UH8J��j,).EZB645;;86=NJ7'!#!"%(,16<HWl���������������������������������������ľ�����x2C��4<k�hH,9��9"(#"&/.((+3Hbiecdddccedcceeddca`_`a^_`^Z[_b`ZE@BA?A?@AA@BA@BDEC>BKVXWWTUWVTSXUQUTVTTTTRSVXTUUYVLHRUVSRSJ=89755/&%/9<78;=<AIMLNNJLQOMNPPSVWVXZ[UWXXY[[Z\ZWY\ZZXWWXVX[]b����������������bZaq��������������nI��pM�sB?<327=>==<;;<6(/S��y^iZOPONRNIGI5\�����}uo����������������������mr�����������������������zDU���~lffc]`bbacehikikllifa`bgmsxz~����������������������������������������������Ȫ��ffAPOY\YXOMIMY�����lSVYf������������������������������������������zz���zfG15n��<74:CFA@IGGDBHFHBIJ>,(ERTUST@'$$*4>IViu�����������ЮeLKEA?>?==@CA<A@;:;;>E]msreWpt[A%%6Iec9)"  "(,-,+&-N_G6*%.5>BIU]aha\jsiikgf\UXB/4<=<>?AABBCECA<0"&'*+../3244442.-,1.).1DHNPQLJHGVu�������������������������g0.JSG?:8:678:8;<<?>>F\q�������������xagp|���������������������������������������������������������������������������������������������������������������������������������������������*0e�����������ſ�������������������������Ʒ�����������ý��@-Rz�������~xsnopt{��ujbbklqvwphG7�����������ö�������������Ws���������������������������U>����ø����q\IDDFFDPZK!c���ızPR?F0NWGO2MPM:UYT=$*17F{��������Ύ�Ѹfp�~-$^�������7W��vA���ɤM5`����V1*  m�{qkf_eba[XWp��napobbi]DBGGHkzprsOJ8T��a(((3;63313237<=90&    !&(-5<HZp���������������������������������������ÿ�����r3#J��4InzlE.9��5$("#'1,'*,4Kejedhfdcedbbcdcaaa`c`]`_`_\Z^bcWHBA@@@@@AABBB@CGFAADQWUTVYTTUSTVVSTWTUTUTRSWWUTTUVREHOTXSSRK;99754.&&0:=77:;;@HGKMKIMPHINOPQTRQUWTPUWXWWXXYYWVXWVURQTSUUY_���������������o>BSj��������������oG��mG�pA?:228;<::;=<<4%)N���qeYSNFDEEJQN:c�����~sk����������������������ou�����������������������z>\���|mfecab`ccbfehijkkllkfehknswz}������������������������������������������������Ɠn^siALMV[YYRMLPWhjhdVfWX\e������������������������������������������yy��wfF17q�{=85:><:8;<<<;<=<<?@<-,GRQUVUB/&%&*((+.33-)033:CKh�������ѪbSJC@?>>=>FexvtX@>=;=E]krobZszi?#%6Ig_;&#!!$')+-.%!1NRH1(&/4?Qaoqedhbbghgg\]a>'4;>>?BDCDDEDCA:/!$&(,-/44:<=??<=?CE@?BEFGJJNU\p�������¿��·�������������~`75NXM@97978889;<=@?=FZt�������������qchq~���������������������������������������������������������������������������������������������������������������������������������������������*-d��������������������������������������ű�����������þ��G.Sw�������~wsnonry}~}tkecfjnsvqlG6���������������d����������Yw���������������Ǻ����������R<����������oR.%'()4FXK f���ȯwkkYK3/0772(()3<90-2008J���������ۛOZR+;P'!-a�������0X��l5[�ĲV*3��¼�e2,!'o��}po�����������hx�~ysVOMFNt����WI7T��c(('08845574389;91%! !$(+08FXr���������������������������������������þ�����n+$L��,+3C>(C��1%'!"&.,*,-5Oed`dfb_bb_]_bca_]]^a^[\_][[^e`KA@?@A==?@A@CAABDD@<GRUTTZYXUVVTVYTQV\VVUURTVXVTWVUTREIOSTPPOG:78542-$%0;:668<:?GIMLKNMJKOOMPTTTVYXWUVVVYWWXZXTVSSRRPMNNNOS_���������������O)FRb~�������������mM��c>�n<><56;>=;===?<2%#A���of^YPHGGHLOG6d�����{ok����������������������nt�����������������������w=_���}jbb_aa]b`^cefiijiklifehfhnsy{����������������������������������������������ӱ�cWf�w<MOUYXYSMMOVhcXVdgTY_m������������������¿��������������������~ww��~tdB1:x�v:82:@<:<:<=;=><;:>>=+.IRRTUVB,&')-/27@BCLW`c\O;,Sm������ЧfSHB?=>=?Fm����`BA@??E`knl^Yx�i6%9Lh_6### %()-0.&"0K^K3*$*+5Mcpqg[\]bggimdbU<(39>ACDCCBCCDA?;1 #$()*+.1679;;?CKPUMILKLLLKQXe{�����������ŷ��������������`26O^K@;:88999;;:<?@;I^r�������������sbir~���������������������������������������������������������������������������������������������������������������������������������������������*0f����������Ⱦ��������������������������÷�����������½��D.Vt��������yrnmosy~}~vkfbcgpwvpfG7�����������ù�h�����������Vv��������Ϧ�����~������»���S?���������ΚpR>:98=FRYE!h���˱������z|ughmhmjc^?33:H����������~$)'$/n�������.X��cDm��~O>4c����N2,!+v���x���������˽�������d\P]�����P@H8Z��_.))7CIIJLIHMMHG@/&!  "&),18CVq����������������������������������������������n("V�},3R}d@(L��-)% #(0-((-8Ocb`cdaac__aaaa`_\]^`[\^_\]_^[JDAAA@@?@@@@AA@ADFCAKWXSSVYWVXWUTWURRTVSSUTSUVVRUWWTTQHISRTPOPE<97422-%&1;;558<<BINOLLQNLPQRSTWTXWYWVYVTZUXVYZVUUUQPNIJLKIKP\}�����trlbem���FPl[Tz�������������gO��dB�q;>>:;=@?@@AAB?4& )S��vaca`YVYVUSND3a�����ypq����������������������rr�����������������������uAc���{ja__a```^_dfghhjjjmha[_``eiltz}���������������������������������������������y`i���r:PNUVVURNKKOjuqtxt[Y\s�������������������������������������|wpuzqz��~u^A3;�u<85;A@;<;BA>BJB@=BB?++IQRVTP:&"#%*.3<AAGZm�}tnr�{������̣\OEA???>AQ�����SABBCCHaklmZUnwb5*;Me]4&"!!&(),/+$#3YePK0  "$)+4J_qtfZX^bfhmkaSO?-5>ACFFDCDDEEFFB:!$''(+//248::=BJRPJJNLLMLMPXdz�����������ŷ��������������g6;UaK@;:::<;=:<;<>><K`w�������������p_ks���������������������������������������������������������������������������������������������������������������������������������������������')f����������ǽ���������������������������������������»��A-Xy��������xrnjmt}���{ofbeipuvrhE:�����������µ�������������Xz��������Ϫ������������¼���N?����������sfXQSV]_]_bA h���̴����������vqrnopjfC/19C����������T),(&# !=��������+b��]T���˯^0|����P-+ .|�������������Ƶ���z�q`VR��`?AQHF4_��S.)+Jrurkqmkmhf^M4%" "$%&*.3:F\s���������������������������������������¾�����f%%\�u-^�Z<%N��0(&#*2,()-7M^cda`afeb_b`^aa_]^_\\^`^^_]WIB@@@?A>@C?BC?@BCBDCNUXVQSXXQTWTUUURRRTOUUVTSUTQPTSQRVPJMQRSNKNF<85202,&'2::668::@JMJKKNJLPNKTUST```cgkliiade[OKRQQMMLFFIJIGL[|����mWHFC?U}�yKa{\L}�������������cX��`M�sBA>:<>>?A@?@A?4&.c��vc`b`\Z\YVQLE7g�����ynn����������������������hw�����������������������o>g���{h`^`_`_`\abcfehjjjlidbcdbbddfpy{y}����������������������������������������߹s���Ħh;KNVYSSQNHK_����qWZ[r������������������������ķ��������{��}vx|}z���uY@0?��o:76AVI@>BIFCR`XMKEMN+2QSTVWS1 $'+5=@BL_p������}������ΠVJDA?@??BL�����EBA@ADLajkhPHewd2):QgY0'" !$%),/-#$?accM.(.*%!!'+3DYmnc]\\\afe_UPN>05?CEEEBBEHJJLQNA"#$'&&),.238:;>BJRRLLMKJLMMPWd{�����������ƹ��������������k3<V^I=:98C?BB==?B@>=Laz�������������pclp~���������������������������������������������������������������������������������������������������������������������������������������������'.g����������Ż�y�����������������������Ѿ������������¼��@.\z��������vpojkt�����ocbgmqsvvmFB�����������´�������������R{���������������ӽ����������RI���ҽ�xmmsnjRHQZ_`b`^Bm���ʫ�snkdY[`fbbZX]adb\L2-1;C��������ݶV 5IF85C��������{%i��\J���ܢK3���ū]0+1�����ļ���������z{~oSQepYTLcf''Ee_DH4
+"j��M))5k�zooqmnighiT3$ !#&',03;K^s���������������������������������������¾�����^"*\�l)WI(=9&[��+&% &*.((+.7I[cb]```][^^[]^\\\[[YZZ]__YPCCABA@>>@?@A@??ACFEFNTWWTSUXWVZYUTXXVUVVRWYVSPTURUWQRRVSOQSQQQNLD;63001+&'4=<33689@LNHLNMJOPOfeQVm���������������aMIJIHDBEDCCGTw���pK7BKH>^���\^`@Q��������������\[��VS�o?D>99;<<<<;=B?2'#:i��r_ibY[WTQMJJ>1h�����wmr����������������������fz�����������������������mFi���zg^`^]`]\^``dfhhiiihffgikjkjjjsuop{~�������������������������������������Ḕ�����cEMOWXUSPLIKZo��w�vWY_x������������������������ǹ�������������������}zrZ=0E��e>65N`PB=HY^TszaWX_{i)9VSRUWU, $%)4<>?J]o~������|������͚LKGB??>>>Ix���gECAAABLdlkcLFmwW1(9QeW0%"  #%)-0-$$5We^D+*-&%+.BYefc]b[W[`b_ZVP?38CEEEEFINTVXZ[VC!!$&%%&*-0498:=DIPPJKJOOOMKOVfy�����������ǹ��������������e/?XZL?=?JVSLLMNNH@=?Mb{������������~mclw����������������������������������������������������������������������������������������������������������������������������������������������'*e���������ȿ���������������������������³�����������ý��@7]{��������xpommrx}}zujcagnqrwvjC@�����������µ���s��������}R~������������ſ�������������LL���̹�yvustoYLSVV[a_cCp���ĤkK:5% )$!&2,%(',1:G��������ۻ�md��������������|(h��WHm���w=<\lr�q5.*6�������`_POIGTdug~��}�zNPDWt|z�tCH3$p��K*)Dwz`OOPHHMP`gV0$!!#&&(,05?M_v��������������������������������������½������Z(f�h&OXHD3"f��&%%  %,.*')-6N_a^[``^][^\\^^\\ZXVYZ]`^\MABBB@>=<>?@A@?ADCBEHMRTXURUUVVWXVWWXTWVQSU[VQVVRPSTSQRQUUUPSSPQOLD:6310.)$'4<=5469;AIJILLLLPSj�MGY��������������ͻ�HGHEDC?@AA@CRx��mSOW^^KPy���_B6Cj��������������\_��TV�oBD=96:==9;=?B?4(!9g��utp`XVPKDAGMB8l�º��xjq����������������������d|�����������������������k:g���xf^]Z]^Z]^_acdeggge^\^chjikjnpvwwy~}zx~������������������������������������㹮�Ž��]@MMWTWSMLKHM{�~p�iTZ`|������������������������Ŷ�����������������~zwwoY<-G��_:67V`P@<Xr\C^UGOa��H%=QRUVWL% $$%,377ASi}������t������ȕQKHE???=>?[����TC@@ABNcjh`OSipW-+;WgP1%"!"%)+.0,#&GsjT5#'&!'+1CW_^b_aZST\bd_\WB48BEFFGOUZ]ad_[Q> !$%&'*.35688;BKURKMKcw|jVQSes�����������ƶ��������������i3>]ZL>?HX\VXSSNJEA>@Ocy������������mdkv����������������������������������������������������������������������������������������������������������������������������������������������),d���������������������������������������������������û��A5Xw��������|rolnqustuogachoooppiDI�����������¿������������}T}�����������̵��������������VS���Ŷ���|stlYG;>=@DM[D"u�����rMB5( #&'%"(.,(')+3=L����������Ϋ���������������{)q��VT�zr��J;TZBCH.3+B�������j_RPKPY^`_n�����IDMA������sFH0#y��@*(FqvggeV?;KZfjM0% "%%(-38AL^v����������������������������������ľ����������U'h�c2fwgJ. r��+$'" (+-*(*08Sd`\_a]\]__]]]^[Z\YXY]`a[MBAACC?==>=?@@@BBEBCKQUVTQQRRTWUWTTTYXSVWQTWUUTVVQRSRQRRPRRROQRROMKB;50/0-($(2;=7337=AHIIIHLOPX�mBMn���������������՜PGEC?>::=@@EQq}m]Y`ae\Kp����jOTm���������������[f��XX�qJG@;;>>??>?CC?3($3g���re[ZSQMJJKNC4p�����ufs����������������������a}�����������������������f<n���xi`\]_^\\[]abeeeggc\YZ\cb^_eiprv{}����������������������������������������ޯ����qtV@MJVPUWOMLM^�hTmmeVY_������������������������Ʒ�����������������~zzxoW90J��Z:79IM@>@MB8JD33i��M8'>RRUUXI(!$$!%,04@Ph{��~���_������ʕSHFFC@??BY����oD@@@@@Neig`PVt|\1,<[fO3&" #%(+0/-!(BUM7+#&&!%)1BV[VZ]_UIRZadb^ZB5:GHGIQX\^__\YVOA$!"%$%')-3348;=CKTUQVn��}qfSTgx�����������Ź��������������n3BbZM><CIKHFKKGDBB@BRe}������������~ldkw���������������������������������������������������������������������������������������������������������������������������������������������*5k�������η������������������������������������������¼��@2Yu��������vroptxyyxpg`bfnopppj:B��������������}z���������{]}���������|���������������PP��³�������nM,%#')5OZA v���ūpaXD4&'-/& (/+'*-+1<J����������vS^`bbelkegrr����x"s��M.8,J�S1;f����N7+B�������_QLGFLS\]^Yf~��qnKGFfaZ[hn[?K/(��=+,>jzwur]=?_kmeB."  !%(+.28?Ncw������������������������������ƺ��������������Q-o�U2s�r;(y�w+''  )*,*))/=R\XY^^ZZ^][Z[ZWT\[WV\_]VB?A?=??=??>>@?=ABDHIJRSVYUTVUTTUVWUXXWVWXVUWWVTWWTUXWTRVRPPQONOPNLNJ@93/..+'#(3;95355:@EIJJMNPTt�NET����������������ݣIEC@;:779=?AMgj^acdfcLe������ml|���������������Qg��TV�nFEA?@A@===@CD=0% .m��td_db_]XRNNND7s�����tiv����������������������h������������������������`:q���wg]]^^\\Z[]acdddffec_aad^Z[afkjlmu{z~��������������������������������������ܫ`cX]jrVBNN\TSUQLJGTkhdh�uWZ`�������������������������Ʊ�����������zzx��~xuvlV5/K��Q:47CRGA@>UfN>D�l?:8)<SSTUVC#%%!#&*0=Ifz�~xv��[������ǐQKHFCAA?AFh���sJA@ABBQfhg]L[}{`2.>YeM/%!""&),/.*!#<PO;+&&("&+0@SWHNW_]TRUWZ]_]N58EGIPWXY[\XZ[\VK)!#$$%(*-13677<@GPVTg��kxtePTg|�����������ƹ��������������b.DaZMFCCEDEBBEEFHGABQg�������������|gaju����������������������������������������������������������������������������������������������������������������������������������������������*1q��������ʺ�����������������������������������������½��@1Xw��������|wrpru|�~tjb`enrtqna8M����������ƿ���xy��������v]���������Ѭ�Ǽ��������Ŀ����IX���������̥vQAGKEMMZ_C ����˴����������}tqw{uskcI/39J��������۷N#& %'*8}���q%u��M&2\lS%,M���ΌB4+B�������tSIBBNS\jfPHf���}GF70*,-=icFH-'|��>,*2N_gb]R?EY`aUA-#  !#'),19AMcy����������������������������������������������I0r�R/ewl@+y�v#('"'-.,))/;R][\^]\^^ZZYZY\[ZZX[^`SE?>?>>>=>@??>><>CCCJONORUTMPWURSUVVXXVSUWSRSUTTSTTSTUQPTUNJOPNNONMLNH@72./-,)#'28622327@HKIKMNN\�q@Gf����������������ݡ@CA;:65648:=FY^dhfe`Pa�������wu����������������Wm��N]�dFB@BDC?<=>ADC=2%;}��mdad`_]VSOOKA8u�����tgw����������������������d������������������������_?u���whb`ba^`]\`bbeccgffcbeeheedinlhdhnpprw|������������������������������������ڤt�����TGNMZUUUNMJJSXWm��^UY`�������������������������ǲ��������~z}yuw}��{vyylU5.T��R=5:QaLDFYv]?Fu�cE@@5-CUSUWT?!$%$&(+7Id{}zx�l[������ĊSOJGBABBCG�����J>@B@DScgh\G_zy^) /A[cM0%"!#'..0.("&LbJ>+##%!'.1?TTCC]yn`RJOW[^aO12=BGPV[baZ[lxp\( !$%&(*.13557<BKRWXhlFG_qgPVd|�����������ƹ��������������a2DbZQPONMPSTSRQSRMDCQj�������������zgelw����������������������������������������������������������������������������������������������������������������������������������������������(4q�������ɭ������������������������������������������ý�~;.Yx��������wtppru|��ui_^dpvtpf]?K����������ľ���x���������xV����������������������ý����GY���������՚tdbegkkeacE%����¿�������������ý����Z*58L��������ݵP&+-)" "(.2=~���j%w��89��ʫj.I|�yV.)3*H���~������������sj�þ�}:BB3//3:L^GHL-(���A+).Hdh``gadccc[G)$ !!$'*.39AOe{�������������������������������������½�������D4v�P3kzgD)!!��n'&'#&++-)'*.<PWZ]]]^]YW\ZWY\[YZ\^[N=@?@>=>>?>>??=<>BCBIPTOQTUTSTYRSVUUTVXVUXUTTWTSTUURRTTRTTRLJMPMOLNLLMG>4.+,++)"'1520.015AIKJKMNMn�TDM|�����������������6D@;6433465:AO]gga\GPt�������x{����������������Oq��F`�dEGBBDC?=<?ABB=2%!Dy��qond[ZSJPOOT?@v�����pgu����������������������`������������������������]As���zoikljiiffgighfihecVUX^abeilmmmnonlmisuvx|���������������������������������٫���ƪ�YELRYSVTPLKLHR���hZVZ]�������������������������Į�����������������z{|wjR5-T��K;6;XXIHGSLIB=_^OVSI3(ASTVWW>#'('')3Ji}�~{�Ue������āQRLIDBBCFb��|�KCBBAESbgh\JeytN"!0A^`H1%"$'@nR//)"(=@B7*$#%$%,1>VUKXn�oe[U\d[^cJ*-5=BHTcg\Uh�{rU%!"%(')*+.35778=AKSUV^[RNUkiMTc|�����������ŵ��������������c/G^^STOTQUZWSRSRPJDFUo�������������vgfmx����������������������������������������������������������������������������������������������������������������������������������������������(3s��������º�����������������������������������������û��>2Z|��������yursruxy}}via^`ptsnj_?O����������ľ�������������wY�������������м�������ľ����CW��������Ɵ{o_U]\]^Y_hA'�������������������������X*79O��������ۯ]/5652(!,50D����b}��7c��к�7Jy��mZ61(K���xl��������������k>.1CC//26AD?<GF)6���>,*>|�yyvwvwqoseD(" !#&(+028BOc{���������������������������������������������~?3z�E1VstV9!$��uaK41(!$.HC+(&+.>PX[XXZ[ZVTYXRTYXX\[YJ<<=@A?=;>??>>=;>@BDGNRSRUXVTSUVUUWWXYYXWY[XUYWUSVWRVVTSUXTOKIKNNMONNNLF=3,)-+*'!%/3.++.06DKKLMOQ\��BFW������������������z<C=842011235@P`dcXIG`z�������}����������������Mx��@f�gKHCACDBAACDDE@3&'=k��zoh]Y]YUUWWYFAw�����mhz����������������������^������������������������XEz���}tqruttvttstssrrmnga`_[YY^adjklmoptqt{zvuy}��������������������������������֢���ķ�RDNS[TSTPIKLX��|chdY[_�������������������������í����������������}|zvmS52\��I84<KNKLF@EBEI:A_aei>,JRUUTS8"(&#'+4Jj|~vvwiMw�������NQMHDBBBC]�tp�sGGFDDETchg[IfwkL !/EciD0&"&A��T22,$!+34-*&'*1=*&,2?Y^ccjlgb\dtkVZ[C(.68:<AKLBHVZ[d_>& #'(-*,.*+++/56788=CMUVT^de^WplLUc�����������Ķ��������������d1Hb^MGEGGHHFHHGEGE@HXo�������������wcfkw����������������������������������������������������������������������������������������������������������������������������������������������'2r�������ξ������������������������������������������û�=1Yz�������~~xrrsv|}}vkc^alsuvsa:T����������¾���~���������rU����������������������ſ���~DW�������zkovlZTb\O?7[g?)��������Ʊ���������������R+6;O��������ܿ����������������g)~��4b����e3P��Ÿ�B.(M��~thmb[WLRbn~��gkzlaZRGFC86Ba{yiPGG&7���5+1S�td]\]\]]V[S;)!#&(.38DQf}����������������������������������������������65z�@<u�L++������yqrp��W'(*+/AR]\ZX[YXWZWUSWZZ\\TE<=;<<?>;>?>?<<=?BBCJNOOPOPSQORTQQUVVVYUSVYTTTTRRRWVTVURSVSQQHCIOLMMLMLKF>0)*,)(# +-($#(07BHHKLJNf�i=Gh������������������j7?<70.-.0//2=S`c\SWeo�������������������������Ky��@l�aJGA>BDDAACBBA>3% 5e��xj`bfec[[[VNA<v�¸��qg{����������������������e������������������������SB{���{xwwxyxzx||||}~}zz{wvqljhefiiefikjppwz{z|����������������������������������ՠ���ú�KDKTbOSTMLIKWqrfqshY\d�������������������������ì�����������to����|{wvhM50]��J94;MVNOG??9Iemrx��}90JSQTVS5##!'+4Ln|slqk[x��������tKPKFAABAAL����yIFFCDGUdji^L`xpI$!0J_gD-'#(U�y633/#")2271'*2RX*&-0A[ijg[Z[WUfrcSTT@./7<>?@?BFFJIMQF9( %),0475-*+-/35798<DKTVXcs{nc{iNUd~�����������Ŷ��������������Y1Kc\H>;;;<:<==<=?A@G[p�������������yefnz����������������������������������������������������������������������������������������������������������������������������������������������(1q�������̰������������������������������������������û��:.Z|���������ztrsx~���wlb_cluz}vaAV����������½�������������qZ����������í����������Ž���}J_�����������upvsR/,FinD*�������˪�{xx����tijs���J-6>P���������׸����������������Z)���7z;|qv:?x����:1)L��~y�{lYJ:@NOcrefvrpnreMF7F��ŭ�uUI%:���6-7ZxlUNTJLPQUXU=+#"##%-6;ASj}�������������������������������Ĺ�Ż���������y2 7��=@v�V+(6������������I)*,-0>U\WU[[UVYZXVX[[ZZOB?==;;:;<;>>@>:>@ACGORUTRQUTSQSSSSTWYUV[WVYYWXWVVVXWTTTURTUQSWHGJMMMLMLLJC;/(*)(( ''%07BGHJJGSw�Q?Gz������������������[7=950-,-...2=U_^Y`mv}��������������������������H���<n�aLI?;@BA>>=<>?<3$:o��~ooogbZUUQNL;Bz�����rh}���������������������}a������������������������SAy���|xxz|}}�������������������~yuxopnqorvwz���������������������������������֞v��ƱHFNS[KRSONJE^z����mX_g�������������������������Ū����������~Y}�����|xvfH20a��B83<]\MREGKQa������`12JTTWUR1!&,7Ooyrlqj~�������ѾtVPJGDBA@Cd��¨lGEFDCGWfkm\Oi|p?#$2IdeF/$ "6t|M32/!&4?HS7&)6PM"',.@]psdUVRVWgsfVSSB35@HLLKOWZZ[[XSJ<*!'*.::1+*)-/357:<@FMUUUi�����pUYi{�����������ŵ��������������\4M_YG>989:9::9<>>?@H]t�������������vegp|����������������������������������������������������������������������������������������������������������������������������������������������&+s�������ι������������������������������Ǽ����������ĺ��71Z{���������{trux~~�wga`cmv{zs`AX����������¼���x���������qV���������μ�����������þ����O_���������Η|qN*-HkskB-���������������Ǿ��������>05<X��������أ6&.32/,,3]�������X1���4h��{�U-O����~63)N��������������u��wpkhFMH>�������hK$9���2)0Vyxvvvoopnc>&  $#!&/7<DSh����������������������������������������������u0>��9BuyT0&=��v`kobQ��B-*+-2AVXPSWUUUUUTRY\[YOA>@>><;<====>><;>AEHJORQPOSUURSVTQSVVSVYZYZ[XXYYVVYYVTVUVXUSTUXJHLMNMMOPMJB9.+++)*#%08CGJLKLW�A=L������������������H8=80-,*+,,-0;QZYgr|����������������������������H���3m�XIE<9=@?<;;=>?:1# ?s���wrg_\PJKLLM><y�����pj����������������������|]������������������������SH���}~�����������������������������������~ompuz{z�����������������������������إ�����sACQVXPRTMLLNv����qcS_f�������������������������æ����������f�����|rwsaG02h�~A95>^RLYY]g_htZMJGJD,3NSUXWU. *8Nu|umvt��������ϼtURMHFC@>@S����oJFGGFDWflk`Tf~h>$#3Hed?-# (W�s9/0, -GTxQ3(*9K?',.=Whop]UPYakq_SPOC6<GOTUT]dea_[YQI?1,--*')&%(,(-./--0//05667<AFPVSNZ����|USVi������������ų��������������W,MaWD:8799799;===ABI`t�������������o^bs}����������������������������������������������������������������������������������������������������������������������������������������������*6x�������̴�����������������������������ž�����������Ż�~71]|���������~xtz|��~uha`dnx{xob;]����������ý��`����������xU���������ę�����������¾���Ma�����������yU('/Uikec:2��������������ѻ���������0/6;Y��������ܤ9 &))%"";h�������V.���7YrNTP+*]����m32+O~��~����������ʧeUg�bfPRO[�p7DRsugF!@���2)-AgomoolhkhegX:(! "$%)/5;ETg���������������������������������������½�����o,@��2Jz�R.'B�������sc��n3+)+-2BWWTUURUXVUVX\]XJ>>=?>>;<>>9>>=;9=?CJNOPQOOPSTQPSRQSTTSPWXXVYXWXVTSWVTTUTRSUVRQVRKINNONNMMLKE8000/..#!$&09BHONPRb�v=?O������������������>9;83.*)))+,0>R]hrv����������������������������I���3p�[JD=;>A?<<=???<1%%:r��pha`[URONPM@>|�����qh����������������������{b������������������������NI�������������������������������������������rmkprsryz���������������������������͍qsjfhsBHOUYRPQMMMIakdbNg^Y]b������������������������������������xg������|xyt_D.5k�z=96?RQT`_escYJ6<<>@<.5QUTWWP*)6Tsxrpyu�������ʳlURLJGB=>Gg����ZHJJIIJ[gij^Xo�iJ$&6Ned<-$!#Dzs>20(.IhX;/%(6I?(/0>`n��j\ihiflaVTM@6=JSWYZ^^`\WSUONKGHCABCGFCB@DFEBFE@52689;=9AEPVWSk|`ZWTHQWg������������Ŵ��������������U/QbUB899898:::>==BCMdx�������������nbjs}����������������������������������������������������������������������������������������������������������������������������������������������+/x�������ʩ�����������������������������ſ�����������º�|76_}���������|}~y}��~vja`dnwxvre5b����������ƿ�������������{^����������������������þ���yIa�����������xZLR\_^Rea65���������������M��������5%/7;Y��������١5(,)#"9a�������P1���+S����q1M{��{5.1(O���~��ſ��ľ¾��X@\���dUTenK@F=MreF L��t+++0Nc_YX\[[\ZYN3)  "(*/5<FUk��������������������������������������ſ������i(M�~/K�yG,&K���ycUCZ|�U--((,.4GQSYWSSVWVV\Z[VI=@DBCEA=A=>>@>;9=ABIPPSRQRQTVUTSUSUYWQU[\[[[XZ\_\TUYTQXWVTUZXWW\ZSUUXVYVPPPQI>9AGHJKEGC:.%$,7==>AKMTXm�c@EU�����������������ڟ7<<:60+))**+0AUcpsu~�������·�������������������I���2v�UIFCAADC@?ACDC>2'&@q��r`chf`][WVTJ9=�����ri����������������������zd������������������������KL����������������������������������������Ʒ�usrtutvvz����������������������������jz[b�oGHNSXMPOJKKNhk[^bkXZ_g������������������������������������}�������}wq_B-2j�r;97BYLObV]`UO\I><=@;-3RSTYYL*(8Zrsnnto�n{�����ůeXQKIEA>?F]kFOXFFGGHJKZeij^Xx�uH &7Qja<,%#+Q��93/%)ReD8,%(;T>$+.<]u�����yiindUOH<5?PZ[]]\WYVRTUUUUTX[\\]\_]`acba`b]VNTPOJIIHIEMUUR[RINNP_XYk������������ĳ��������������P2NbSB997799:9:;;<>>Lcx�������������pchr~����������������������������������������������������������������������������������������������������������������������������������������������*.y�������̼������������������������������������������º�}96a|����������}w|��wngdfqxyvpd:i�����������²������������z]���������͹�����������Ľ���~Og���������Ð~truoklilj84�������������q[YuvtvttiNG628<[��������٫{vupkcH $\�������O8��p6�����e.d����y;0&W��~��us���ou���_h�����KXYhmaIEHb�lI#R��n)*-EmywurrnlnjV6)! #')04=ESp�������������������������������������Ƹ�������c,N�|0QmiI,&R��b{����vgL60)),.5HQUVUOQWWVY]][IFLOPORRQSNEFFCA?>?ELUUUXWUTWZYVYYYZ^`^abedbdeehkkfecdfghjjhkijmmmpmmptnljiffjiijnrsvvxwqf_YY\ZWNE=<?AKW^PKRj�����������������הBGCA?82+(((+0E]ioqv�������÷�������������������F���/y�SHHA@ACBABDDED=1%!=t��|nlnjeaZY\XM;C�����le����������������������xf������������������������EN���������������¾��������������������������mmptwyz}����������������������������ȗ�����kEFKTWOQOKKL>^{{|}}e[_l��������������������������������������������}vvo^B.5r�l:94AVNVg@QST\p\SDAA<.7SPUXXK) (8[oqllmo}Bz�����ɬiWOIFBA>=AFIJNNHGHIIHH[ejg[Y��g9(;Pi^<,%$+b�h03-% 1[Z;6+'/G`;#*/>\t�����n[ei_RJG?;H[dgga_^`\YXZ[YTX]cjlppqvuvzwxwvtsqoehd_]PI@EQUTZ^ccbovy}�������������ó��������������M5PaRD==8:99;:;???BDQg|�������������meiu�����������������������������������������������������������������������������������������������������������������������������������������������)3t�������˪�����������������������������ú��������������x75`z����������{z}}����{ohchuyxto`7n�����������·�y����������{b���������Ŝ����Ż�����ľ���|Mm��������黉yurpoppnf:6������������ſ�����������;.6<_��������������ƹ�C@��������J9��a1����j0"_����r4/%`��}vmbcz���P<U��s���yWKX^\gzusur��bN$U��c**4a�|pkgghgddjW9-+('$&(+12<IVq�������������������������������������ǵ�������^"!U�y%NtyL(%Z���������yj\8,*,.6ERVRTWWZ]\]^^]^cb^_caddcbZVURSSNR[]_\^ba__bbadfcchkmppnqpprprttuusvwwxvxwxzwzzzz{~~~�~}~~}{yyzwx{{|}}~}}}|z{xsiaYPG>87?FRh�����������������ԍKRONJE=6,('+0A[fouu|�������÷�������������������I���4��VJE?>@DBAA@@BB;0%"Q���zqmkie]VW[[I3E��´�kb����������������������wh�����������������������|?S�����}{����������|wxstqopnrsyz������������ccgiousw�����������������������������٭���nBHMWYPONKJMO����z`X`m��������������������������������������������{rys[=.=x�l>86DQI]oLajXaqX\SMH?/<PTWYWI0# (<Zopheko['������ЩfTPIEB?=<CPl���_GGHEDI]ejiX[wuW3)=TmZ:,$$)Eyk84,"!0@A@9.)2JZ8%+.;Zt����veXag]OKJD?HZiigeeec_ZWWRNLT\grvxyvsvz{vuvqnkg]ZSOOD:8@LTU_lt|������������������³��������������O4NYOB;9889888;==>AEOg~�������������jait�����������������������������������������������������������������������������������������������������������������������������������������������(2v�������ʹ������������������������������ʻ����������ú�{54]y����������xvz~����}qhclutpqn^3p�����������Ŵ������������}h����������������������Ľ���|Po����ϯ�ǹ���|utrqppne9:�������������������������:-7=^���������������֧>0|�������CC��]2�����b-\sodg8./&\��~rhr���ǡ�hy��l�xNNcri`\Y{�����ZXM$_��[**:k~hURWM?BNSeU7/4331/..-/:ERo�������������������������������������̿�������[  [�o%V�{M+%o���M91'&9Tyk;,*,/8GQWWWWZ\_a`abbbcccehgeejimiljghlmijlighklmlnoorqqttvvvuuvvvxyxxy{{{|~~}}|~~�����������������������������������{xnf_SG>:<BMax�������������юMXWUSNKC71.+1A\fotoy�������Ÿ������������������|P���1��VIC=<AD@??>>>?:1)5e���omjgf`TTW[RF7J��³�|n`����������������������uj�����������������������yGZ���}R8.;8>::=3/).01,38?@>EGIMLPSY`hs{������iffcgnjio{~���������������������������Ν��~s:KPW[MNNKKLKZ��{|~YX]k�����������������������¼����������������~{~}p[9,@|�e:74FQEbo\wjSM?\aVOb`1@OUVXXK:,$");]nmhjn]/9������ҥ_UQLHA>>=Jp����VCHFDCJ]gigZ^wv],*;VkY:,$%-f�f33+!.CUTG0(-@P.',/<Yq��|wh^V[dbZRQH:AR]^]_\[WTQPOKIHRWhuyyvlfkjg]UTIDA:5530121.0=RUV[w�������x������������´��������������H0V^O@::98::9:;:=@AANg|�������������lait�����������������������������������������������������������������������������������������������������������������������������������������������'0z��������Ǽ�����������������������������������������ø�{57]w����������vv{����|tgdhpqorne3p�����������ȳ�y����������ze���������ҭ�����������½���|Ou���ñ������}pedjhbemi9;��������Ľ���������������6.7=a���������Ü�����N$Y�������AE��[G�����l.j�j( "-,$c��ur���ǳ����Ɏb������m`Yb�����v]^N#a��S'(:gzqkm_D:HbknK-4:<>??<:58;DSg{�����������������������������˺��������������T%^�g+ZwyF+'y��oftrgaco�N3,,.28LVUVVWVY\]acdeeggijijkjmkmoprrpqqqqrrpotrrssuuvuwxxy{zzzzz{||{||}���������������������������������������������������{nfWLA89CJ`w���������ǒilgedaa[OD>:7D^fjkiu������Ķ������������������wP���4��QGA=;AEAA??>>>:3-:e���rqlid]TWYYTK6I��ķ��pc����������������������to�����������������������zE[���~jky��������jortwyx|���������������������|oomqhltur{��������������������������̷���{7LNYYLNMMKIJt�uz�eOZao�����������������������»������������xxv}��}zvnY6-D~�_766JPHgpYZPBB<PV\t�](DPSYXVI;4-#!%5]nonpmGHi������Ρ]UNKE?=<=Hi����RFGECAK]gifY`ymC%,=YkZ7+$#+b�X23+!8T`cE+%)7B-(./:Veepza]YUVij\WVF5<LSTUSPSRRSSSSYUPSq�|wkfY:11-210//,,.10/./?PUU^v����}vw~������������ó�������������~F2W^M@:87:979::;=AACMf|������������}f^lu�����������������������������������������������������������������������������������������������������������������������������������������������)5|�������ͣ����������������������������Ը������������ĺ�{59\w����������||}���|yrhdgqssspa1t�����������ǲ������������xh��������ٻ������������ý���xKw���ۺ���­�vR1.+71Cff4<����������{w}����tuuw���318Bc��������ӓ-"'+)"&V�������@I��[Gai���C0r���|U/-%i���~�����x���̲qt������k]Wk�SQ`vpa^Jb��M*+4ZwttueD@SkoZ;0>JNLNPQQNVSTWUn����������������������������ں~�������������P(e�X,cv]3+,9���g�������T1::9<==V]]]]YRTZY[_adehhikmmpoqstsuvvtwxyzzyyvvuvwy{{|}|}|}~��������������������������������������������������������������������|rf[SK<8<L^u�����������}|wtsnf[QYadd`ho|������µ������������������rT��~9��RIB><CFEFFD@?>;62Co���~xsmcZT[YXWN=P��Ŵ�}mi����������������������so�����������������������v?]��������������������������������ý�����������twzyyz{yw}}z������������������������Ż�z:HO]ULNMKIKQmYGPRTW[ar�������������������������������������upo����}vvmV6/G��Y968GWWpyAM\ACFTh�o7*EQRWYXK:76.$ '&'?hwyyyoc{v������͟YSKGB>;:<D_����\HGCCCL`hhdVSZS?+,>]nT2'#$,V�i70+ 4O[M0)%,AF+%,0>YafokLQROWhk_XUB9APWY[Z\a_\^\\ZZQL[t������p`@3012221,,+../.-.>PUXj}����wq|�������������²�������������C2U]MA<97898::;==AADTk������������|kelv�����������������������������������������������������������������������������������������������������������������������������������������������)5�������ˣ������������������������������������������ø�z25]v����������~|}��}zzysjejsxvrk[/v�����������ȵ��t���������ze����������������������½���zMv��ˮ����ͮ�qJ&.177Ie`3=���������˿�������������{.36Ad��������֏'#),+')i�������AR��[7MZFDN;6���ɸr5.$k�����ӱmAT����nXj�~�}�w^XSehSGCdn^_M!e��D+(-Nkhb`[KFTXRC23HWdggghgmmpspqwp}�������������������������������������������M'c�R-/+,/2Cz��z������rYd�wqokie`acbbc__[[Z[^`beilonqswwxy|}|}}~~����~{zywzz{~���������������������������������������������������������������������������������yl_SD975CObs����}|z{yz}rke^emnomot~��������������������������qO��p=��RKC@AEFFEGEA@@=83Jv�æ��xkaXT[WVUN>M��²�~ni����������������������sl�����������������������sB`�������������ů�����������ľ��������������Ų���������������������������������ӻ��ȿ�r;FNYSKNKKHKHOGJWiu]Zav����������������ÿ�����������������zgk}�����{xxlR7-H��X:46LaVsuEjZJK_���SA80JTTYZYD5342(%*')Cn���vw�h������ΙQOLFB<9;>M����mCDGEBENbikgWQcq_7 .@blR3%!$;��S21*">]YE2(&.BG/%,-;O`a^KGIJJXnt[SPICM_cfigca_WSSXTUTP\v�������hF4101232.-/..0/./?RWRUt����j`��������������±�������������yA6X_I<:98878;;;==@@DUh������������xiejw�����������������������������������������������������������������������������������������������������������������������������������������������(5}�������Ͷ������������������������������������������·�v14_u���������|wz|ywxwumfhr{xytZ4}�����������ź�~����������xi���������������������������vMy������㾧��vd_\RU^ena4I��������������ʷ���½���h(14>j��������Ջ4'),*$?��������4V��T7i���rD3a����S3/%k�������tXKUlshQRXVSPGau_SLaqtlk�T_F%j��C++?lvqkkigfgc\I-"B[w����~����������������������������������������������������G*b~J+,,039J��븄����|�|s�������ihiiihkjkkjiec_bfdjmqtw|~���������������}zxvvz}������������������������������������������������������������������������������������xqbULC94Nb\H:9=<<ACCA?Lfv~�������������������������������mR��pG��NIDBBDEDFECA@@@;7O{�͵��k\ZZXWSPP>O�����{ok����������������������sk�����������������������s@b�������������ı������������ƿ�������������¯����������������������������������߹������mAIMTRKMKKKKGN`x��sX\br����������������������������������ky�������~vyugO6.J��V>47T_NzzM[OPl��^C<?62GSTXYYD/+,/)$&$)Jt���~u��q������ΕUOLG>:8:@Jt���{OHIEACPbmsmbk��]0!0B`fM4& !,csR41,% >WVH3&&1U_-%*.:O\ZMF>CFN\miSOOI>GV\_[b`^\PX`bbbZNZq�������oG3120232//222433/>RYUj����r]q�����������������������������z?7Z_K=888999:9<==??DUi�������������yfflx�����������������������������������������������������������������������������������������������������������������������������������������������)0{�������ʳ������������������������������������������¶�w02Sl|xvyvw}}��ooww~xotttf\cosumpQ0������������¬������������of���������Ŧ����½����������vKz������ϝ���~udT_e^e_4G���������������d��������7'07<k��������Ҥ���~viK/r��������/Z��UP�����Q:l��qhM4/"!k��������LMy����cQUVILuw\PBS�����`FV@$n��@**Co{xlosttsl^@0^c^YQ\`^ejnpv�����������������������������Ь�����ȵ������B-e�I-6AA><N���ŧ���|�����������moqtytxuwyyzwwzvsjkjqvyy��������������������xqtstu~�����������������������������������������������������������������zz}{��������������}tj]N@<6)((+))))'*..7d���������������������������������gY��i=�vIGDA@EFEEEDB?BA<6N��ɸ���k^a^XXWRM6R�����xln����������������������oq�����������������������o;b�������������í���������������������������î{���������������������������������ݯj�����U>GMSTNNMLKNPn���nZT\dy�������������������������������������������|ztgM4.U��O<36MVS�w<ZbY�uI6?@?50JRQW\SA(!'(#!%,Sy���~��s������ˏNMHD>:89BP�����TEECDANdnvtbk~pO(#0EafL4("%.^vT20,' %G]WJ.%&7VV&$)09M_UGB;@MT\lhOJLD:@RY^gkglhdfjqqmeSXn�������vF30110412/248:975?QUS[[RMMLc�}k~�����������±�������������{=:X^L>877;:99;<=>?>ATk�������������wddnz�����������������������������������������������������������������������������������������������������������������������������������������������'3��������ɫ�����������������������������Ŵ�����������ø�w+"?Vd_[UIHHIH@ABEHDHD?DB;4:>@99<0,��������������������������md���������ĩ����������������r?x�����۲�ʹ�zs`ZVKRo\/N�����������{vj=<UiiomP#$-17Bl��������������˽�4a��������-[��VH���ҬH9n�lm~C13!"k������{NH������~`v|rk|uXL>7q���aCDL9(z��;+,=QTPXirsrl`I*,, #%/7@Kd������������������������ڴ�����Ǿ�������~=1o�J8S]YJ>V���ǭ��������������}i����������������������z|�����������������������ved���¿����������������������xm~zxwwsrtwxts~�������������������û��<7;>CFFLOSSWZXTJOTVTWWC*,.())())((((*.5b���������������������������������cZ��d?�uHHD@AFGFDDBAAAA=6J��ŵ���lmsl_Z_WM6X�����yho����������������������qv�����������������������k>f�������������ì���������������������������å^���������������������������������ܹ��~s�n/5DMUVOOMKKLR{ydW[_W[`y����������������������ÿ���������������������~tgK1/Z��I;19RST�rPiQI^=:>@BB54KTScyi=$#$  $*9^�������o������ȊNLEB?;99@Gr���lECCBAAQdntpcisqE)#3GabI1'"%2fZ212/' (I^WD0%&5TO!#*/7LYUFH@GRScrkTKHA:BWcltsqvojnt|zulRVm������vC22210332359?>=86@RRNTROKVYoqhu������������ï�������������x=?]]J=889::9:<:<>?ACWm�������������tcdoy�����������������������������������������������������������������������������������������������������������������������������������������������'4~�������͹������������������������������������������·�v,#;O[XMIC<64423...-,/.-*'&+),,*-03�����������ƿ��{����������me���������������������ƿ����uN|������������tI0+5G\m].P����������������lUGDD<13/-15Fr���������������Љ):�������)c��PL}�oy[45h�|��@/0"$n������VOi��ʸ����������VG@VuxnkwpGL3(��8-,3BQ]jrvojc_L/
+-,!%'.36I������������������������Ӿ��Ž��ý�½���v<2z�F3=PUH@[���ő��������������ro��������������������Ƕqx��������������������¿��qR�������������������������³t6QHBIOOCFTTIO]e��������������������Ȁ#-,,,,+.--*)+.+$&'((-0*&,('()((((())-/Y�xotvvrsrrwtp��������������������__��`D�uIIC@BFGEBA?ABBB<2K�������x��tfde\Q;W�¾��tin����������������������ov�����������������������g;g�������������ı���������������������������ěI�����������������������������������̽�a[M06DKTQOPLJLLCGMdmqbW]b�����������������������ÿ�����������zopu|��~xtpaH-1]��I<29PUZ�g9HHOR>@CBGJ:9LUf��j4%$%!$).@n��������k������ǄKJCB?<9:>T����dJABA@AScmslZfsc?'%4I`hG0'!!!/H200/&%@WV>,%'8_Q"#*-5MZPJCBFOVcqiSHDB=CYgwxruyvvw{|ztdKRj}������sD343023357:?BDDC9BOSTT[lsw}wr~�������������ı�������������w??_[J=99:8989<<<??>BZp�������������sdgq~�����������������������������������������������������������������������������������������������������������������������������������������������(7��������Ѿ����������������������������Ͻ������������ø�q(!8P[XNG@=75343.-,++*)'%&')'*+,.11�����������ý�������������mb��������ټ�����������ƿ����sK{��������ð�|L'*/9@Xk],P��������������ɻ��������f117@t��������ֶ�����y:*p������)l��GR��V,&+5]�ҕP1/"$m�����rXN��ͪjp��������nMEA]j]l��iDJ/+���5*-@i�zn]NOX_W8
+,&$(*07I����������������������������ż����������q54��:1>ZQFD_��结��������������jn��������������������ӵQ{������������������������pK�������������������������̾l-HA<?H@4;HD=@GW���������������������z$0.*,/0.*-,..**&&).1-+&#+)($'&&)()*(+3^�ywvurtwwuvnr��������������������_d��UB�rJKD@EGGEB>=ACB@<3O�������{~|pkkjcT@Z�����uis����������������������ox�����������������������g>m�����������������������������������Ŀ�����ÓCn�����������������t�ʿ������������Ƴ�]YmtS=GMVNNNJIJKKXw��zjYZb�����������������������ÿ������������}qy}~��xqomaF.0^��E84:UTY�eE`]Tekklglk;;OUz��b4)$!'$&-4Ds������~m������ŀJHA@=;98>O�����OBAABEVdkoeQZmeB$%6J`eA/$"&2]p>30,!(K_U=.&-HXF$,/8N\LA@@DOV^mdTFHMDEUeu}uv{|~z{wqh]MNdx������uE6322237;<ACEHLG<BPTW^w����rz|�������������°�������������q89a[G<;::8:;<<;=?@=DTq�������������r`fr������������������������������������������������������������������������������������������������������������������������������������������������)2��������н����������������������������˷������������ø�q(#8Q[VMD>;62/01/,*()'&"$$%'$'),/02��������������������������jb���������Ǻ����������ſ����sN|��������̷�tX[e^^]ekZ*M������������������������t117>u���������v $+*",r������(o��JV����k;8b�ڻx>1.!'j�����~V`���}D[��YMB;?^iPE84/I��|<CH)2��y4,.N}�oZF746?GE,
+)"%')-5L���������������������������̸�����������p.8�}5:XYGBDd���p���]e~j|������hn��������������������çR�������������������������fQ�������������������������˼\.F@:<>639@<5=IS���������������������rFlnmlggbZXUQNE>-'/ESNPB"+(&$'&&**)((+2c�yuutstvuusps��������������������Yk��UF�lKIDAFHEB@??ADA@:,M������}wxvsqomjW?\�����sfu����������������������my�����������������������d;t�������¹������������ƶ������������ž�����?]�����������������t�ͺ�����������٣l`e���_9ELUNNMJGIKS����le\]g�����������������������¾������������uz�vw��zxzp`B01a��B:5:RO[�n[cWa������f7>QV~��[33. "*(,17Lw�������wu�������yIFA>?=;;@X�����NCBBCEWdkpe\hyp>"(5LedA/$"(L�j72.($=[S:-&*?XD$,.5O[NDBAGNMQdcSGRTBBOcsqmuwy|{zxsodSO_pz�����oD622136:?DIHLKJG=?MSS]plheWOWh�������������¯�������������l5=\WD:8899<>AB=>??@EUv�������������q]er������������������������������������������������������������������������������������������������������������������������������������������������(7��������Ͻ������������������������������������������¹�q%"8RXUMG@;761/0.+*((%&"$%$&')'+..0��������������������������ih���������گ����������������jL~��������ߺ�tj``Z_W[hX*S�����hI><f���{rz��������o*/9?y���������x%(,+" #8�������{#r��Ia���ѴH5��ǎsC3.+q���������ͬXBa��FYUNOlhKF42`��p2:BC'0��o++1LefPC?CHFCAA'*$!'+.7V��������������������������˻������������l,<��18D>EC?i��粠���u����������fx��������������������ƧI�������������������������dS�������������������������̸V8SE=<?36674.7;T�������������������������������������T'P����X$*'#%('%((''(+7g�ysuuwtusuunt��������������������]l��[HiJIEADECAAA@?DCB:2S������xuxxtsroj]9`�¿��qfx����������������������j{�����������������������e>u�������º������������ŵ����Ⱦ������Ļ�������@g�����������������p�Ƕ�����������ؚf��ǿ�]=GMXQOLHGJH\qfOYniV[c�����������������������þ�����������}r{yq~�~z{xq[>-3g�B;4<RQc�X<BN]jSo}\YK7ATY���Q;73&%-*,15Qv������ox�����ϽqJDA@?<;=AN����wFC@BEGXejnf[lyf8 &4Icb?-%"%Aoj91-&(TaL6+#'4UB"*,6L\_UGCFHFMacTCGJC?I\gmotw{|�}zwkRFYlu~����jC643358<CILNPRPJ@=LTSWQHX[TLX{�������������®�������������r6@YXE;79;:=DNE==>A>DXw�������������naht�����������������������������������������������������������������������������������������������������������������������������������������������*8��������̬����������������������������ʷ������������¸�s'#=QVUMF?:85300-**))'%##%&('*)+--:��������з���������x�����b^��������ۼ�����������������hM~��������ǫ~mM5%$+8XmW'Y�����vvwk����bf~��������m/2:B~���������t1,.22+(6m�������w&v��GJ�����4-PxbZG/1+/x��������Ƀ@S��yjwlr�sID6`��n=QVD?&.��h,,-A_wrmpoofi_R-
+)""&*/4\������������������������׾��������ÿ����h'A�z29MTQD@p��䧞��������������a}��Ĺ����������������ƤB����������ê�������������_X����������ʼ��ų���������˳a_�rioztje`\UW[h�������������������Ѿ����������������P&Upu��M$'&$$%&#&'%'(,9h�yuutuvvuttnx��������������������Zl��RH�hNJC@BCCBBB?ACBA:7]���������~ywvvqd<b�ÿ��pez����������������������g{�����������������������_B|���������������������Ķ����ȿ������Ľ����ƿ�8^�����������������s�ʷ�����������֞���ũ�XBGKYRNJHIKK\hsz��eR\e�����������������������ü��������������~���{ztlW>.4k�q?94AY[h�O9HPI5W�f9CK8BS_��{P<74)(+*+16Wt}������c}�����͸oIFDD?<=>C_����hFB?CEHYejmdSk|c5&8Kd`>,$#&H{{=/+%*PXO6*#'6R?#*.6NcjSHHEDGPbjZFHPF>I`lqvw|}������iNDSgr|����f>433328>FMTRSVTN?AMVPV\o�{ndm��������������­�������������k3DZVD:8;:;BZN>==?@>EXz������������~m_it�����������������������������������������������������������������������������������������������������������������������������������������������&7���������ʽ�������t�������������������Ƿ���������������o##=RWVMF@;7320,*+**''###"$'(+*+,/>�������ֻy1%$(& ##&A|����ib���������������������������lJ���������©�w]F;6?XmkP%Y����������¸������������g+59>���������̥����wr|��ý�����o-x��D_�~EF6+*'!%.0)1}��~z����ʛHO���[gj`u��cHB9��zkz�YA?#:��X)+/R~�{rspplk`N/		0!"&).7Z������������������������ͧ��������¿����c$F�r09TdRCCs��ܣ���������������[����������������������<���������̴~�����������ѽU^����������Ȟ�����������Ͳ}�����Ĺ���������������������������Ѻ��������������߱<$16Us�B#*%&&'(&&&&((.:i�uvvstorrpsox��������������������Vt��GN�fLHB?BDB?@@?BCB?:5_��³������zyvre@f�Ž��pe����������������������c~�����������������������Z?~������������ȿ�������Ż������������ż������Bh�����������������s�ʴ�����������Ӧ����}zS?HNUQOLGJHCSv���k]X]b�����������������������ý���������������~��}tskT<.7n�r@83FeZh�UXhU=d�xPHSZ5CRX��{I:52'(-+.5BZpz������^������ͶjDFDDA<;=>X����~JDCCFJ]gmo`Wt�b3(9Pg]9,$!*[�g9/,%0V^Q3)$'8P7$(,4G\WJIGDEJPdlXHOZE<Ssxu{~~~��~tbMDQfnx}��zc>34566:ALQVXTTTK;@NUR\t��ynq������������������������������g2EXSC<:::;@G@:=>@?@FZz������������|ibkt�����������������������������������������������������������������������������������������������������������������������������������������������*5��������̰����������������������������ι���������������m# <TZTPIB=830/++*('&&#$""$')*),.,8�������Ӵw/4HJE,';8I|����ic���������ѕv���������������kP~�����������xwgeelmedQ$_�����������ø����������z>"28>������������ݹ�������������h(y��AZ��^<,*+&"##*12+5���~ko���{C7X���c@8_��s-B@.ALCDHSB?< ?��R*,4`�w^SWVPRPLC0,"&*.7_����������������������������������ÿ����_"L�n1G`kUCDv��ަ��������������|T���������������������ƛ;���������θ��nS��������ѷRf����������ū��N|���������ͮ��ͽ��ӭ���������������������������Ϸ���������������i$)>Qg�Y $,$%'(''&''((,5p�yxwz�����rry��������������������Mx��GS�^LI?<?ED@=?BDEDA:9a��ó������~xutob?h�ý��hf�����������������������m������������������������XE������������ȿ�������ź������������û����Ŀw6`�����������������p�ʴ�����������Ӑ^b\SkyQ>GRWPKHHHJLNboi_pkY]d�����������������������¾���������xysdqu{��vqujU8+7r�l<51I`Pew]dbGX�qEL\{_,BVb��tE620%(/06;E\nu|}��n������˳fGDDD>::;>b����zFDEFFH^hno`^wwZ4 +9ReY:+# %Go;0-&.VZI3'"&4> %(-3HZRJHCCGKRcgTFMSE>Vqmq~~{{zzzyrjZC?Oaiuv{yr]<-3579<JVUXSPNHC:?NRPWi^V^nu�xw���������������������������l4EYQC=:9::;=;8>@AA=D_|������������{jdju������������������������������������������������������������������������������������������������������������������������������������������������(3��������������������������������������ʵ���������������l##<R\UNFA<73/..,*&'%&#$%%%((()*.2B�������вy[����RJ��kW����gg��������ص�����������������iT�����Ĺ�����iN>?AABUeV  d����������ĸ����oj\QC9+-)18C�����������Z(!$(H���������g$}�z5j�Ƹ��;,&" ")/1*8���{hd^YK6<@Mo��kCw��a(0DB>c�}|]C=B��M1-8]~s`W\]YYXRJ/	'#&)/8`����������������������������������ÿ����XQ�g->^aQ@H}��Ԍ����qyyn������~f���û�������X��������ÒB�������������h���������аNn����������Ž�ys����������ˮ���������������������������������̷L&-124686:6=<:4%!-Kfpl8"%,%%'&'('('((,7u�yz�����̘rqx��������������������O���GX�]IA;9=DCC@ADDCCA6;g��ò�����~yvttobGm�ú��l`�����������������������g������������������������SO�������������Ⱦ�������´������������ò����ýu=c����������������~q�ǲ�����������Іb�xe�tL=ERVQLKFIKM^fac�cYZ_�������������ü����������������������|vzz}�||shT6.;z�m:43KUIir?=HO�y7Cc|~?3ITh��pE3/,!'038<B\nsqt����~������ϮXECBC?:::?W����dFFFHFJ\emobcry[2 *9VdX9)!!+c�p63/' 3V[H2% $)%$*,2HZOFHGFHGRbiSESS@:Ohchuzurouqnf_I66HYckqssoX;2358;F\UPMKGGDA:=NPMRRHHRij\\o���������������������������e1G_S@=::9:;<<<>?@?<B`{������������{iakw������������������������������������������������������������������������������������������������������������������������������������������������':�������������������������������������������������������n)(;S\VNG@:852/.,,()(&#&$%%&'(),.8L������������ǻ�h���rk�����fg��������־�����������ǿ����dK���������Գ�e6&-,1XgP#l�������|q\UTMIQGHAKbouV,38C����������E'''#<{�������d-��~9l���՛6)$"!$*00*@���~to]I=DEDK��jw��nFZTHES������_D<G��H.+2Sr{yuwuokdaO#*"'*/;e����������������������������ɽ����������UV�b2FPMG<F���ٷ���������������R�����������tX�������ž�?���������˺������������έNx����������Ƶ�������������ȭ��ο�ů����������������������������Ͳ?',132).--.-1-*&%0K��V*&)&&&'*//+((),;s�wy����عsqo|��������������������M���Fa�[E>7:=@CDECABAB>59m�������x{wrsppn_>p�Ĺ��kf�����������������������f������������������������PH�������������Ⱥ�������������Ⱦ������Ĺ����üo/f����������������{y�ɲ�����������͙�����xO=CNTPLKIJJIKJ]��uWZ]g������������Ǽ��������������������������~~�~{tneR2+>�g;32ARThl@V^[cC9AZiO8;KVn��{J/,(#037:B]mrjs����v������ҭXE@AB?99:Cg����^IDDEDL]ell`dt�Z**=Zi[7(""+g�`431) 6M_L+$ "(-$#'+0CSRIIFDDHQbiWIPM83DTQUYcdabd]UPJ7-,<LV_ekfhL91458>GQKAAB@DEA7>NSNONOLTrlOYk���������������������������_7H[P?<;<?GHKJEGGA>=Ee~������������yf]my������������������������������������������������������������������������������������������������������������������������������������������������(9���������ɴ����������������������������ż��������������h#$>U[ULF?:642/-,+*''$%&$&$&&)*+.6K��������ňf^YSONMKJG]~����jl������������ȫ�������������cH���������ݮ�lOQSMP\blS !q������������������������F(3:>����������}%,2+!("L�������`(��{>R����c*'"!"%,13)D�����������~wz��fv�kguvPJHJ^e��dpWD?H��O,*)?crokkkjd`Z?
+"/!&)/<k����������������������������������������O"[�a1AERH>H���Օ�������s������nX�����������������������>�����������������������ΩB}�������������������������ǩ��ª������������������ʽ�������¿��ˤDQ[^^VROLKCEC9:4(6_nJ2,(-&%'*3>:1'()+<q�vw|��ħwksh��������������������I���Ak�VB<66<?@BB@>>@?:4:n������ohg`]_dceS<u�º��lh�����������������������f������������������������NP�������������ȹ���������������������������øl>p����������������y}�ɮ�����������͡��cit�K<ALUPLKJIJHHq��oc\Y[m������������ɽ��������������������������}~�}ztgP2*C��a=41@XTef_mZJF@797??>9KUo��kA+'")--5@]usjw�~sj[������ҩWC>AB>98:<_����fGDA@BJ]ejjX`xQ**?[hS5(#$)U�d521* !6Y_F-#"'3J6#(*4HWVQNDBFKRbhWKKD5/5<;@BEJLQTOHA;3+%1AMRQSUQB3257:@ILM>9>@CB=5=OTPPSbkz�YO]j���������������������������_4G[O@<>APeihli_KB?>Hb~������������zb^ny������������������������������������������������������������������������������������������������������������������������������������������������%8��������ֻ�����������������������������µ��������������i#&CX^XMF?:433/-,**%&&$$#'%&%'(),2L�������Ѵk(#'"H�����ei���������������������������bO��������İ�~rg\PORKYjP$$w���������������������³r/+39B����������Q&T|H=cJ5������Z-��w6u��~rs1&! "&+00)O���z������������Y@WQOMYNLH=O�����UD<M��K-*(;hvpnongcb]A!+"&*0:k��������������������������������������L#_�Y/F[YB?M���͏������~�������y^����������������������}<�����������������������ͥE��������������������������Ɵy�´��ú�Ŀ������������������������ɤ����������������+*Kikc9&)$#'-;D<0'()+=t�z}�������tm���������������������K���7u�S@:55<@@??>??>;71?m�����cJOQHGDENQB5t�½��lg�����������������������g������������������������HW�������������ɷ������˿�������������»����ŶeAy����������������v}�í�����������͖cWs~��D>BOXNLLFGILe��lb_VY_g���������������������������������������������tgL1,F��^;43DTTb^OL@@JQQNQV_F9KZv��]>+$&%&-7`wsjx�xeUO������ѠQD@@?<:;?Co����^DDA@BK[ehfZ\oqM' .@]iM4& $5z�U010) 4QXB,%#+Nd(#'*3EZciXGGRVW\fRJH?1*07:99<=?EFB?;;6.+6QWUOIHD=324;>ALQM>8;?CB;3=OTPUg��~eX\m���������������������������\/N[N?<>Onsw|ynWDA==Id������������u^bpz������������������������������������������������������������������������������������������������������������������������������������������������(7��������Һ����������������������������̸���������������j#(@W\VLD?94330.-,)&''%$%&&'&%&*-3L�������үg4Uxuqihgifx�����fi��������ռ�����������Ŀ����^L����ǳ�����|d4284XrI(t����������������������h<(+2:F��������޲C%`��a��d3v������V-��q2LbOq�[-'!!%*./(S�Ɠ|����������Ťd������gOIK�����w?E8U��C--3d��utppkhdZC#	"+"&*2=r����������������������������������������I!b�X.CO@?@S���Գ��������������x[��������������������ȿv>�����������������������̠@��������������������������Ɯs�ȿ��ú���������������������������ʭ��������������ٛ)8n�oH('&##'.9?8+&(),Cv�|{t����ŋup���������������������M���C|�O?959@HGDBACDA<61<k���kJ>AFKIFECDA:6t�Ż��jf����������������������d������������������������KY�������������Ƿ�������������ƿ������������ǴgE����������������y}�Ů�����������ԙz�����C@ETXLKKHIHGLLLGGMJY`l�����������������������������������������~����tbJ1-I��\<42EQR_YID?Eh�zyyz{F7KWi�q]>-&%$$,?hvqk�xhTL������НMC@>;:;>CEa����eEC@>>KZdjeVYp~M&!/D[fL3'!"&Xb-10*!;aZH:'#.IQ,"&+2C^t{eLMT\ZXVDGD;/'0797689<?@????;31FegYIB@@=527;=BKTL;36?CB<7?NSKRi����kUTm�����������ȿ��������������P2NYP?;8AMSiz^VD??<=Mh�������������p]fq|������������������������������������������������������������������������������������������������������������������������������������������������)5��������������������������������������ɽ������������·�g#'DX\ULE>:742/.,,)*(&$&%#$&''(+--F�������˪nx���������������al��������ֿ�����������ÿ����_F�����������cEB]cZ_onE&y������������ǻ����hf_J-3,,3;I��������ܪ>/�Ϫ\��;1|������U4��s=044AA1,% $,/-%W���������������j������\NK_�dW�yI9G:X��D,,>r�p\WWVTLKI>!	+4"&*3>y������������������������׳�������������}C*f�P/=NOI@X���ä������}t������uZ����������������������nH�����������������������Ж>��������������������������Ȗp�˿�������������������������������ͮ��������������؆$:RHEF0(&%&(.8>9,(().Du~{~����ęmur���������������������L���K��M@;7:BIIFJNLKFA805_w��fSGBHPHEDB@<75y�Ź��kg�����������������������`������������������������NU�������������Ƹ���������������������������ƱcL}����������������y��ī�����������֟��ǳ�vE>GUVKLJJGIIHTVXY`ioal����������������������������������������|����sbJ2+F��X;34DUO\UNHLR����~|a@:PZh~j;.($"$,Bhrkl��yjXS������ΘNB=<;<=@DMz����fFB><>H^ef`Xd�rJ%"0H\fG2(#$9}�W11-(!6_dVC'#1RS*!&*/@d|_QMMQOFGNLA:0)-476679998:@EB;0/?LKB?@CEB:479:@L`J937@DD?4?VTQY|���wURYm���������������������������U6KXMA<;>BQybKIA?@<=Mk������������~kaho|������������������������������������������������������������������������������������������������������������������������������������������������&7��������������������������������������ʽ���������������h"%@V[TKD?:650.-++*(&&%%&%$&&&&).-B�������Ǯ{q�����|{xmr�����Zh���������������������������_H������������o^jvrmphiG'z���������������������aIC4-3;N��������ެI6�ʥB;+:�������N3��e413760/+%! #,/,#S���ws`NI?3<BAJ��kohao\]VNHLn����dDG8!e��C).Dmwn[VZ[URQOD 	1>"&*0=y�����������������������ؿ��������������{?'k�N8QVOF=[���h������hq������pb����������������������gQ�����������������������̐@��������������������������Õy�ӿ��¿�������������������ż��Ŀ��ͣ�������������īF".G_bT/*'$$(/<B<.(((.Hy�yzq�����wom���������������������O���[��H=:68?DEIU]XRIC:11Mj��kZOC@EB@@@@>49y�ø��ij�����������������������e�����������������������JQ�������������Ǹ�������������ľ������������ŮW;x����������������y���������������Ñ���{uv@>FVQKLIGHKH[|�����~\p����������������������ÿ���������������vo}���rdJ0/I��L:25L]FVPObaMTB=<BKJ9?R^{��b4,)!"!&(@lojm~�{e_a������͕N?::;;=@BL����{KD@><;J]ad^P_shM&#4EbkF/'$&=y{E30-&#GpiP0"%3PS) '*-B]gfQMDBJIK[dSF>4+-37669;<::=CF@9.(/4/27FMOH?9779>NZQ76=EGE?2<TURU\TOSPNP[n���������������������������W0LWKA==BJumHJIFB?<9Nj������������}mego|������������������������������������������������������������������������������������������������������������������������������������������������$8�������������������������������������������������������f"(EZ[SLFA8741.--*+(&%$&'&'&%&(+.-E�������ĥd'$J}����`l���������ɼ����������¿����^K���������خ�dECFCCATa@,|������������������ŪhdT:,28N��������ߵ^4�ƥ@ '[�������J9��f8/2421/)$",0-$[���q_YTI>9;<FY��pqefdWmWNAH�����lTG7#h��=),:gw}vttsiik_>	
+47 $'/@����������������������������������������z:-r�E1ACA>=[��乣������vv������h]�������������������Ż�bQ�����������������������Ɍ<��������������������������Ř��˷������ǻ�����������������������͆015:=?>?@FKHNG>()6`usZ,*($"&1?D:+&'&*Ly�s{��ҽ���pq��������������������{W���h��MA;57=DGSaf_VME;-*Io��a[TB?CAAADCA3;{�Ƿ��jh����������������������~_������������������������E]�������������ƴ���������������������ý����ǯXT|����������������w��������������i^aP`xr9>FQPMMIHHIH�������j`u����������������������¾����������}~ysvno~���~q`J0/S��F;16U[RWQgtBEPB<;>AB3?RX���[+''#"''Ipplp�hWWd������ȑIA;;9:<?AD[���SE@=;<IZ^b]N`vwC#$4JcfC/' #0i}L72.$&DXC8-&$3\Y'$(,-EZSJJB=?FKYtjSHC9+/5989;>AAAABA=6,"%'(/?RUTOA958;CRaS97=?@DA6=STQSPJJDCMPVk�����������ƾ��������������R5OTI>;>Gi}F>FIHC=<>Sm������������kdfp~������������������������������������������������������������������������������������������������������������������������������������������������7���������½�������������������������������������������b"&H[YSJEB;542.+*),*'%%&&%'&%'))+.L���������Z#4JIJ.3D;N~����Wk��������Ա�����������¿����ZU���������Ŝ]1#%'-8S^B/�����rRGOJAMbqy����}b\^\E-+18K����������u,���6 T��������D<��_3.1241.'#  %,/- e���n_^QH<99:T���b\Q=<[jYSH^}P>DggXI7#i�}=,)-Ldfaacb[TRD,
+12 %)0G�����������������������������������¼���w41q�E3AHJ?;]�����������}�������e_�������������������ú�]_�����������������������Ȅ<�����ɮ����������������������ӳ���������������������������������~$/--),*+-.-*+%.-+Fru`5$+('#&0?D<+(&'/O{~z�ǋo���rp��������������������yU���s��`H@78>EN]be^VPG9'&Dm��e^SEEFECCFD?29�Ŷ��in����������������������xa������������������������Aa�������������Ƴ���������������������������ƧHJt����������������y���������������k��m��s<=CRTPLHGIJJiaV`pWT[^|����������������½����¾���������������}{���|o_D.0Z�~M:27QYKPMI>9M][J?@?>8ATa���L#"%!!)Lmopw��D>Fb������ǏD@<:<;=>AGh����_B@=9;I__c`Us�d8!#2JheC/%#)P�v:33+!&BNN@/#$6WL#*).BW\UMBABELdvfMFC:.07;=<>@BACBA?:6-!#&/ERZYP?889<DVdP;599=D?5>MSSSOV_fhdU[j�����������Ƽ��������������Q1XVI@?AVuR8<DJFA@>=Vn������������~kbir������������������������������������������������������������������������������������������������������������������������������������������������1y������������������������������������������������������e%&E\ZTKB<8412-*))'(%$&&'#$'&&),.,M���������j\���GW��e[�����[o���������������������Ŀ����[U������������fKNLHLU`dF.�����aw���������|wrh_`aW8*+1<R���������Λt���lf��ľ������CC��\8+143/-(! !'./, j��}gr|fT;:>FX���JSeX[kzcPE`fT>>]kUH9$x��<+'&08667;72-384*.#',7H������������������������ֲ��������������q/6y�?3OeX73W���Ŀ��������������]g����������������������RY��ȿ�������������������ʀ=�������������������������ξ������ý����������������������������uKyxtmkgdcb_YUSN8'?LHA1%#-'$$(3BB7*'&(.N��{{������|ti��������������������x\���v��nTF>>CKVbb`YSLE7&&Ah��e^RGGGA;>B@<19������io����������������������vd�����������������������{Fd�������¼����Ŵ���������������������������ťMW|����������������z��������������ඝë���r;@DRTNJHFJIIEXXpltsd{����������������������¿����������������|���ym_C-4^�{C637VWHKIEKYb{^UQHC?5CPd���C "% 'Kkrv~�z4$(b�����ǿ�?A=;<<<>DU�����WDB>:;L_\cc^u{gH$#5Me_@,&#!1h�F1.'!(NYK8,!%;N0#'*2AT]THCBBDLajaMFB5,/8<>;:==@CCA>=;, *@Va^J:8<;<GT\C7156=?<3=OSRU[o|~z`R\m�����������ǽ��������������R4QYK@>@RY?;=?A@@B;>Xt�������������kbhr������������������������������������������������������������������������������������������������������������������������������������������������'a�����˼����������zysvwt����������������������������·�a!%C[]ULB=8532.+**((&$$%##$'''(,/,Q�������±������q���to����an�����������͍��������ľ����[Q��������ſ�h]cPLYQX`B2����������������Ǻ�����hE0,3<Q��������������¿��¹�������=D��^<.2420-("!!#'.0-!g��iu��������}��l������cA?cnsii��ME3&|��;)$#*22.141,+8E;,(!',1M������������������������ո��������������l,7~|7<M@1))2`����{x||~����{vo_0c�������������������¹�Da���˿�������������������y;�������������������������ν����|����������v�����������������������������Ŀ������_$@s�_B(#+%#$(1<A5)'(+-Q�yx����̏tvn��������������������s^��{q��kZOIEGMT]_WNJGE8&$:h��i^RFFD>;;=<80?������ip����������������������td�����������������������w<d�������¿����ó���������������������������ǥGR|����������������v��������������߲�����|o<@CTUHLIFGJPj������qd����������������������þ���������������������zm\@+1b�xA73A\IEIKRv`RRZeTPMQ<EV~��o>$%%%Plrv�w7c�����ǽGB>>?==?CV�����FBA<9?L]_eb[p�{D(7Qj`>+##*P�w70,'!..,/)"#.60$')0@VVQKKDABGXibMC>3,07<>;<<;>@BA?A<+"9S]YH;79<;DPTI<448;<84>RQORcush_KP[l��������������������������~I2SVH@<?GMF@?;=>@?;AVu������������~i`it������������������������������������������������������������������������������������������������������������������������������������������������!;e�����|nl`VFADIPIFBCCEN}���������ҿ�������������������c +G_\TLD?:63/,,+*)'&&##$%&&((),0.J������ɾ��i\\\WYZULLd�����Zp����������ʑ���������¾����ZZ���������Ġ�a>331We?2���������������ʾ������pT9.4<R��������ݷugpsqomgiv�������BD��c701232/' !#"'-0,k��b����������ƿ�v�����yN1>R�����cBC.*��}:+&%1FGBID;5>IQ: 7(!'.2O����������������������������������½����l(=~{4.0,-+#M����U<6+(5G^A%(.360n�������������������µ�Bh���ֿ�������������������tD�������������������������̹�����������|�u�������������������������������������a%Ip]SJ&"(%$%(0<=4+('*/U�y{v����rsss��������������������qg��zt��^_[TKIOcl_SMJIH;)!8g��]ZRAGA?<::<:/A�����|hp����������������������wl�����������������������wDh�������������³���������������������������š?U�����������������u��������������ݮ�{���o;AGUSJJHDIIL�����~w[b�����������������������Ŀ���������������������zlZ=,3e�uB72;EFCCI^qKD8J_ZY{l;Gn���o>.)&/Xosw��z8!l�����ľ}FEA?>;;<?Dq���nH@?=;?M\ae\Vw�f0*:Qj`?)$#'Aum=2-& *7471&%4J@#')0?UXXRMEACJYokNB>6,19>?>>>>?ACDCA:'.GVYH:9;>>GW`RE>:8;<61:SRNDEIDJLNNXl���������������������������N6SRI@;>HMLIIEA?@=>EYx�������������kcit�������������������������������������������������������������������������������������������������������������������������������������������������!:^�����~zxtePEBBA=7>?BLW����������¶�������������������b)F_YSLC>:61/-.-,)(&$%$&&&&''**-*X������ʾ�S!!M����}aw���������������������������VX���������֥�iOER[MVli=6��������ñ��������{���ondA,4;S��������ۧ; %$#O�������7I��b613410/&!"#%)./)r��m����������Ʒ�UWQD<PeM2>Hl����tK<'-|�u4*(0OxslofFIfd\K-%>kt)#0&&,2O����������������������������Ⱦ����������c&A�x31CKF3$X�������}}~��4!'*021v����������������������<o���ĝ��x����������������pF�����ê������������������̹s{�������������~�����������������������������������߶E!AMO|T$$*%$%)09:5+'&(2[��vz~�Ⱥ��yrl��������������������h_��yp��kqnbPLUon`VQMLL:("8m��[TOAEC>=98:;/M�����ueu����������������������rh�����������������������s>k�������������±������ɵ������������Ž�������D_�����������������v�Ÿ�����������ݳz�����h5@FWTNLGCGKI`c]bdmbYb�����������������������ý���������������������zmX;*4h�r<608?>??IZSCCAHNX��L6Km���o=2*%2\mpu��w/ !m�����ȻyED@=;::<?El����KA?<:@MZae\WtmQ)*9Ti\;*#"*P�y>0.%'?[]aL)(A`A"(,2=VXVJEBADITgaMB>5,0;@BBBCCA@CEE@8$'@Y\H:8:;>J\]UQC?:971,7QSMHFGJLNLOYi�����������ǿ��������������K9WVJ@==AHNRTORFA><A\z������������zhclu�������������������������������������������������������������������������������������������������������������������������������������������������.\�������������{rlkmko|w���������θ��������������������\,H\XRNC?<50/.+.,*)%$&'('''&()+-+X�������ª�rqpkc0*S_LS�����Vw���������������������������]a��������ӽ��wddsmdggf<@�������ȧ������oe[S]e`j~{F.6<Y��������إ;%*,)'"!,Y�������1T��V533331.($###'01+ n��p�������������Rfo[Z��F<@EWMT���C7!4��n1*+H|�somUIhskj[=V���)=|c5""&+7_������������������������Ы��������������`&C�z:GbeR2!S�����������i)!'(/1/|�������������������ý�1s��ȷ����v���������������mH������Ź�����������������̵s}���������������������������������ѻ���������������g"(Bp�z4"%*%$$&.42.(''+1[��|�Բ���~qr��������������������kb��ri����|nZN]nn_PMJLL8($>v��RQMCFGA=;<<:*H�����xeu����������������������pi�����������������������pJp���������������������Ȼ�������������������ƖG_�����������������q�Ķ�����������习�����d>BKVRMLDBFJPt����q[\b�����������������������¼���������������������yjV:,7k�h?613?C@<<LVR@KZ{��R78FSt��d>0(# 3ant}��n/ %u�����ʿrIB>;;<<=BY����nAA>;<AOX]aQOclZ,*:Uf]8*" *l�g35/' 7nz{e7$(?V=!'+/>TVNFCCEEHSldMA<4-1;ADGGKHA?CGHD;$,L`aD7479>CT[YSGB<:5-*;OSMEGHILNLO[k��������������������������~D?UVG?<=@>GXjtcI@><E^|������������{gdkw�������������������������������������������������������������������������������������������������������������������������������������������������.o������Ĺ����������~trcV���������ɶ��������Ǭ������ƿ��_+H\_SJB>;53/.-+'()%$&'$%%'%(**..^��������ķ�����J~��hh�����[w���������������������������[[�����������~eD4539:ZfD=�������������Ϳ����������9/6=U��������٤T::;-$*h�������1S��P60CI:*.())%#'/0* u��j��~SEJ��KFb�~n�oq��{P:Ad�����Q?8$:��g3+.VpVHKGamOA_\4]�ǋ#"T_B3$!(-7`������������������������Ҿ��������½����\%G�x3?SWH, g��dbRM|z;=od*"%*/05��¿�������������������1������˹�����������������_N������ȴ�����������������˱p����������������������������������ζH"(&,)'0A*.,-&$! %-:5!#%)&%%&)--)%%&*5c�}|���wp��xpp��������������������ki��nj�����u[ITfhUFGLPF5#%=|��YUJHJMG@?A=8-L��°�vbu����������������������ij�����������������������fBr�������������¶������ǿ����ȿ���������������;O|����������������p�Ƕ�����������ݪ���{hva:@JSJIEACHFT��}�uh[]a�������������������������������������������ufR3(7p�a>63?SLA<MjdPQn��b@@9;JXw��];+%!#9dmw���g( 'x�����йrIB>::;<<@Cu���tH?><=APY[[OPotU.,;Yi\8)! $Eug43-$(72@:,%(?L/!&).:RWGHJGGEFRg^H>:3,0<FJJPWREAEJJC8"0VjmD447;CHX]SGHD??9/*>QTNGIJLNMLO^m��������������������������H=XYG?>>ANfz�qG=@A=B`{������������xgcmx�������������������������������������������������������������������������������������������������������������������������������������������������(o�����˿������zwn`WSKGWY���������˸������̧�v��}{������],F[^TKD?9540.+*((''&%&%%&(((),.&Z�������¦^ANmt_g����������]s���������������������������W_��������í�\/ '-5?fr>C�������������ų���������r'-7AX���������������{>)Q��������,]��P3X��^74?P3%$12)*���q���lan��rUd��vvg��zgG5?����nA>B9!9��i2,3Tsm`]_js[F]m^:a�ċ#%8*60!(-7g�������������������������־�������������X Q�k+<X[@,!s��������z��t&"$)02<���ĺ���������������ý�,�����Ƽ����������������ҼWV�����¬��������Ŀ��������ʩo�������������~{�������������������ӱB).-,-<]cA.+***'$(+2DRA"&)'%$#,692('(*0e�~|�����ќnsu��������������������nl��ed�����~VKLTYTIILMA7# H��|URMLPSHDCC?8,L��ò�rgz����������������������lo�����������������������kKu�������������¶������ǿ����ȿ���������������Ba�����������������s�Ĵ�����������ٚQ]ZIo~a:AKVLKGCCHFTdafW[cZZc�������������������������������������}~~~}}~�yhS2+;w�bA64I^T@=KZ^Rm��J9@@8:L]z��Y7'!*@bmz���c!0~�����ҴkGC>=;;<>?a����`DA?<;AT\[XNKlxT)->ZdS8)"$-b�j42,%#4ISC5$&3;+"&)07S\RMNKGEEQdeOB<4+/;FJMXgbTEDIKF8#.Vvb=558;CKNNJHMKPL=.+:KNRPQMLONKT]m�����������ƾ��������������E>^WH@?DTx�uOD<=AA>Ie}������������wc`jy�������������������������������������������������������������������������������������������������������������������������������������������������#O���п��n`XQXZ\YZTPQQRXT���������ϸ�����ū�������������^-H\\VJC?8560.-+)&'''&$%%$%(')--(Y�������ŢaB* #0;Ea����Zy���������������������������Xl��������տ��qZjbWckur?L�����|������ɵ���������o0#+5AX��������������ά`)X�������~*a�{HE�Ҹ�m/;gysligK, +��������z���v\���z���{MpD36WVRb^jkE59��g,*-Hr~truraBQmmK0h���/@U5!!(,5l������������������������ӣw�������������RU�^-DWN8*w����lcdal}�M""'*/1?���µ������������������.����Ȫ��{��������������ͲN[������������ʿ�����������Ƨu�������������{~�������������������Э=(+*,,>opY@*())(''5p��f0))%%$)9GD3)(',1c�~|{���§grrv��������������������en��`a����}dLHEM[WJGIE<1%"J��w^[PVVPIEFHE</N�����sct����������������������gn�����������������������dCz�������¾������������ƽ����Ƚ������ľ�������<]�����������������r�Ĵ�����������ח�}t��`;?KRHJFBEGG]qZLYfcXZe������������������������������������}|v}}}~|��vgR2+7s�b<46KSF:;JefEjd5:?A>9<O[���V1!1Jcp}���R#!,�����д`GC??=;;<>Hae^XBABA=:@RYXSEVtf9#,AXdS6(##/l{<20+$!0RlgfA"(<E)!().8RZVNJMDAEMf\GA?6*.<IMObrm\GBELI7# *G\<8858;CHGXUGSTNI@0,7OVXgzDGOEEP`}�����������ƿ�������������y>G\WH@@PogG63<;>A?>Hd������������wbely�������������������������������������������������������������������������������������������������������������������������������������������������%<d����tkc^X[YY[XVUTPSZ\���������������ž�������ѷ�����]!,D^]VLC=9530/++)('(&$$%##$%&(*/,\�����������d=Y_L:(&+W�����Z~����������ռ���������������Wf��������ù��|zzag{jlqDO�����`PPJF8@FB>>6569@<.&-5B\����������������Z.g������+b�|K3B[VW%.>����Ư]+3���|����pjca[v��lw���Xp\9.2847Fe�]=4?��\+*'6NXVQSM:7COK94q��~ +c�x*#(/;s������������������������ɰ�Ͼ�����������M]�c2@A<9)'~����iY@#%.:#!#%(00>��Ƽ����������������þ�(�����������������������ʬHc������������̸�����������ʧw����������������������������������Ϩ;)(+,-<o��nB)**'%.f�ysZ0()%&'+8GD3*()-5e�}zyw�����yoy��������������������hs��]e���ygUMHDISNFB@@>7*)K~�{hpne]UPNPQNF2V�ļ��ii����������������������mo�����������������������aC����������������������ǽ����ƻ������ƿ�������Dh�����������������t�ĳ�����������Իɘp��]8@IVNJFCEGFIly|~{mYZg������������������������½�����������~��������xfL1-;s�`=23?C=:8ETAAH98;>>?9?QU���S+ (;Uhu~��wR-+8�����Ұ`IB@?>==>?:;;;@AA@A?=@SWXTEFEB4"-A^fR1("!$36.2/(#"BnnaS/ )8E*!'*.:R_bliN:@IQ_YHDA7,-@NTf~�|eI>CKH3!#(028:5:>FS^i\SURLJA2-8X`k��U`qilq�������������ǿ�������������xCI_YH?>FJC=AA@>??@?Ig�������������ucgn{�������������������������������������������������������������������������������������������������������������������������������������������������">c�����yncYZYY[[ZWSSPU[W����������ϻ�������������ɰ����X.F[ZTLD>75310-+)'''&&##"%%&''()&Z�������ƻ�b,:aT4%0f�����`���������Կ�x���������������Xj������������mlox~q]urAU�����_RJ:&%'%#''"  #.59^��������������ϗ( *[������z,g��EDo��}V0K����ϤV- 9���{z�sbVHCJedSBRfc6.>-''*0358>D;8.>��X+)"&/4,*11)+07=97x��u*Qf&#'-:x����������������������������þ����������K!_�]1AJRC&*��������XOJB4!##'0-G���������������������%����״|����������������˫Gg����Ȫ������ʰ������϶���Ƞv����������������������������������ћ0*&(*-D���{:&&&%'2jZA\6(*$&'+>MI6)(),9i�{y����ɵ�uhy��������������������cp��Vi��zm\TPLEEHD@??@A:')N�����sc_ZSRSUSS8V�����re}����������������������ms�����������������������_H�������������Ƚ�������ƽ����Ž������ĺ����ľz3e�����������������o�³������������ϓYogp�\2=GQNJIFEHLb�����`XXc��������������������������������������������|vfM3-Dz�Y=4278775687:::::=@>7AQ]���K%!,BZkux|�y[A=C������ЬZIDBDGKIHGFFEEEDCEEC?CUWWQFBA=1" .@`gN2'  #+231.($2@UUD+".IN&#&)-=Tk��yO8GOV^cPIB:01DXi����mL?DG?0%$!%+,06959=GSai\XTRQJ@728T_u�u�������z�����������ǿ�������������x>FYUC<?CNbdcef_G@?>Li�������������v_do{�������������������������������������������������������������������������������������������������������������������������������������������������%?c�����xkc][ZZ]\ZWUUSVZW�����������о�������������Ȯ���[-G[\RME;9631/,+('&$%$#$#&('&'*)$[�������ÚI")(+0:K\p�����}_��������ѻ�����������������Xk���������ǣ��t���tx�tAX�����aSD7)$'$!%)& "%-5A`����������oS����-"+i������}1r��ER��Ͻp0:s����L((;��slga]RG<AFIDBDC>5-,+)($&/0558544*F��X(("$-,(,/-+,39>97{��u'GK4!(.B}���������������������������͹����������F&f�Y.FaY?)0����e�������U&$&(/,Q����������������������~$���������������������˧?r����â������Ǯ������ϯ���Śp�����������������������ſ��ü�����Ζ+*'&).Bn��F#&&$"'6SE.Hh+*,%#'-?NA0&%*,5j�}y�������smz��������������������^q��Uu��qcYTROHAA@>?BDE6$+R����r`]_YRTTVUT<V�����ui�����������������������er�����������������������XF����������������������Ź����Ž������ż������Co�����������������v���������������f�����X7<JRNJIFGIGNfmfdme[^e������������������������ļ�����������}�����zyyueM1*I~�W9315655778777:<==?>:DTa��zI#,@\npqz�zia[V������өUIFHOXVUYXUUVQLMQPNIBDTVUNB@A:0"!1BadI1%! #+/01.'"!BhkX<("-EQ'!%*+9Xv��vNEOSYigRIGB75Ok�����oK?CD7/(#(-"$**-5659AIUih_ZYZXQC909PZhy�������x������������ǽ�������������p9F^RD?DUnyxzvoXHCA@Mn�������������tchp{�������������������������������������������������������������������������������������������������������������������������������������������������#;_�����wke_ZY[[\[WTRRUZY���ԧ�{�����λ�����������̷���W,I[\SH?:7642..+(&''$$#%$&%%%&*+&\�������ǣjWfx������������{\}������������ù�������¾����Xq��������¤��dByrDGhtj:[�����^SC6'$&$#(+*BF),7?_��������ٕ"@���X"<�������o"r�~CG�ú�7-J����ya;+0w�zmfaZVL@;?BBA?>><4-'&&%&)..234111'
+P��R,)*5FA6<@>79AKMB:���f)Y^)!(.<���������������������������Ʋ����������|C(f�M7We^@')�����������r2#%()3,S����������������������t'����ȷ�����������������Ω?x������������Ĵ�����ö���ɿ�o����������������������������������̑&,('*/F�l('(&%$(/CYivK++%%&(/2,(%'*,9m�~yxx���Ƙrkx��������������������^~��T�s^XXUUTOB?>?@CIC1(-U���wl`WZXTWWWVTFY�����pa�����������������������gw�����������������������YL�������������Ⱦ�������ø����Ľ������Ľ����Žx>p����������������}y�°�����������э�Υ���V8;LSMKIGHKHT�����jY^j������������������������Ĺ����������}}���~~xywscH0+M}Q831555687678489<<?>:DQg��uA  .Famnoz��wupd������ѡUHHKPTW]a`]\[WXXWVVNEGTUUNC?;7.!"3GadE0& "*241-(!:LWP7&"0LS*!&*-7TcnkRAEMSUb^PJKJ:@Y~�����sM?=:2+'(-)"*-03329=IXdfd_`bb\QC9>MUXm�����_s�������������ǻ�������������rCLcRB@@N^X_k]UOHDBAQt�������������s_ir}�������������������������������������������������������������������������������������������������������������������������������������������������$<^�����ykfd[Z[Z[ZUPQQW[[����Ⱦ���y������Ѹ��������;���Y+IZ]RHC<7641.+))'&%$##%'%$'&(,+,a�������ʥdU\c^[RM>AAd����uZy������������˼�������½����Tt�����Ƽ�����oy{7<csn7Y�����VVA2(%&"%"(:e�r+-6Ae��������҂$)1]���-:�������r.z�w<N���.+1R����ˡJ)3���l_\WRJ;8<@@@?=981*'%#&&(02/23110&V��K/,6Xlib_c`__cjdHB�º[%FP-"'.C����������������������������������Ŀ����y?*iF0LbD2%5�����v\C66*&&!"',43_����������������������k(����Ӿ�������ʸ��������Ң>}����¹������Ż�������������u����������������������������������Љ#,)**-J��`*&(()))/j�nN2,+'''&+31)%''+;s�}u���¿�tsm}��������������������\��P��e\\ZYYYTI@?<=BE=2%+X���p`Y_]XX\[ZXK\�ƾ��mf�����������������������du�����������������������\K�������������ɽ�������ø����ú������Ļ����ŸoAp�����������������y��������������ʪ�yf}��T7=NSNJB<DIMo�|xrkcZ^f������������������������Ʒ����������������|ywoaA-*I}Q<22457677768589;>??9DPg��q=   /Gemml~����ym������ȖYIIMQTZekidbdhe\Y_\NFJSSTL@<<8."$5G^aD0&!!$;aJ2.( "<WmK5%"/MV*!%+.:O\VND@CHNN\\TOOJ;@V{�����fH=86/*)+/##,134338=HZimgfhkjd_M=?OTW]���gdb��������������ȼ�������������k;M_TD>=?@QeeWZ_HAA>Tu�������������q`iq~�������������������������������������������������������������������������������������������������������������������������������������������������%=`�����wida\YXZYWPQTUX\Z�������ȷ�������ķ���ƽ���ʿ���S1K]\QJC<6550.*())(&#""$'%$%&(++'^�������ėS4/2'! $&^����pUz��������ʙ����ƿ�����½����Vx�����½��͢���O'<nvtm8d�����x���xa_eff^WIMWUZ��k)-4<c���������|,`�p���G3s������k.w�u8S���K,1B����ǐC'A���iZXUNA428<>=<:75.*&%$&%&.1012020%
+Z��J/,=j�ypqrlnlmmeE>�¶S*PW*#'*A������������������������ؿ��������¾����x9)pE<KC:3$8����K03"',6R<#&-31o�ƻ�����������������ʻb-���Լ��������oLwoqnsr��Ә8��������ý���ľ���������������������������������������Ľ�������ʃ$)''(0V��k<''')'&9ZLMY3-*$$&*6@:,%'(+;v�}|������uo~��������������������Y���Y|�jhlgbcb[QGA;=A@;5'6c����tjef`\[\[]eJa�����qc�����������������������fz�����������������������VL�������������ǽ�������������½������ü����ĻnGy����������������{v��������������еnUiy�N7?LPLJEBBFH]bcV_xcV^i������������������������ĵ�����������������~zvm^?+-LxE9314567896678789<??<ENg��h5"$$1BXfks������m������SJIOW\bsofcfotbYV]]PDJRPPI@=>:-$4F`bB/#!%=y�G1.( "Dj]N6$#1[X&&*.8OYREBCEJIIWj^NLC73AT^cmd^H:852/*(*($0442338>KZknlmnsrlaM:@OTWc�~czuh��������������Ǽ�������������i:PYLC>?FQu|cdoU@B=;Tw�������������o_fr��������������������������������������������������������������������������������������������������������������������������������������������������"<`�����shc_ZZY]ZVRRSUZ__����������ĳ��������������ļ���T0KXZSMC=7441/,)*(('%#$&'$#&'),+'h�������ŵ���k>' (;NeotzWH{��������̿��¶�������¼����\|��İ�����Š�tJ!'_vfqm5g��������ʿ�����������{��\)/7=h��������ʏyǽ}���:0a������j1��x<c���M00/<chksG)(<���^XURK=1/499;<952-(&#!#"$-1223240%
+[��D**?iy`QNXbeTJY\AE�ĲU0]Y!%-F����������������������������������¾����s63tx9@NQO>#C�������{|~��K$$,2-t�ï�������������������W:���д�÷���ҷ{���������ώ1�������������³����������Ⱥ�������������������������������ÿ����t")(%)-G{�q'%'%%$$5Q^�]),(#$'.AE:+&&'+9y�{|����ɺ�sq���������������������_���W~�{{~ztqmdZTG??=<=7+6g����zpjjd_\\[^dFe�����sd�����������������������bw�����������������������NL�������������ǹ�������������û������������ŷf>x����������������yv��������������׈p����xO<=JQJJC@DCKZs���}UV]m������������������������ĳ�����������������}yrn^<)0MysD:3244566766788::>@>=FSl�}_8"'""3BS]du�����|q������ƔRIIQ^f{o_^X_ndSXZ`b^HHSPOI@>=6,#2L^_@.%!'M�t93/(  Af\U3"$9`M& %'-8IWRIC@ADIOnqPJIA2).320437743133,&&!$059955:;HWjqontxpeWB7@OQUZwo��|t��������������ż�������������oAP]KA?DMnwSWcVDAD:?Yz�������������o]ds��������������������������������������������������������������������������������������������������������������������������������������������������$>`�����tg_]ZXXWXUQSSUZ_[��������������ù���������ü����U2L[[SJB?9651.+()(&&%%$&%$#%&),,/o�������òM6+(%%0LhfhpqpkTO{��������Ω�����������½����]}��������ڳ��wK9PtaZvm4f��������ƴ�������������O'.7:m��������٥��JX�J!+c������d+��r;h���+-*O����rU5&?��w]ZTOH;/,17;::741*$""!!"(/2444530#
+"a�zB++Afi<).?ZX<2L[>E���R*5!!%,I����������������������������������¾����n25w{8@W[O4 F������������@$%,3/{� ��������rz�~~�����Q<���ε������׵������ع��ϋ.�������������ƻ��z�������ƶ�������������������������������������k+*'%)0]��Q!((&$#'0X�~M(+'#$'1>B9,&&&+?y�}z��ý���qq}��������������������T���X�������~vnhbRFB>=?82?l����}rlof`]acea@b�����oa�����������������������]~�����������������������RP�������������ȸ���������������������������ĸfL|����������������|���������������Ɓ��ì�xG8:IMJJDAEHFGp��zb`Zao������������������������Ų���������������{sphQ7',N}qD81/355677768689:@A<<GU`v�m1!'#"%7BLT\s�����yp������̒IIIVf~�]WWXggXS``dllNHWRRJA>=7+&4L`Y>+%"&=vzN2.(!(N^\I+"$9ZL"%&*6FZQI@;<ADP^`PKI@1'*.10323344599.$$(2::9:=>FSenojjlbRLB8>ORQXv�������v�����������ź�������������m;P\G@>CVdF9AC>@AB<A^{�������������ndeu��������������������������������������������������������������������������������������������������������������������������������������������������#?]����~rf^]\XXYXWSSUVYZ_����������Ϸ���ż��������������U-HXYUKE@;740.+)**'%$$$&&%&%'(*/1p������̿�@''$#'')1Wqzzt\O}��������ʲ�����������¼����Z�������Ȩ����}x��|��g0f�������ȶ��������z���|vI)-67m���������3%%%+$"1j������[5��{s����E0)Z����ĝB#B��m]WTNF9/,19<:9541(#"#!#"'.4568874%	$i�<++@ki@10FYI5:R[;I�ĴF"&-K�������������������������̾�������½����m-6xv8<AB:0"O��nlpw��cU��5"&*5=��˶�����������������ɳGA����«�����Ѳ������ۥ�ς1�����Ⱦ����������������Ÿ�����½������������������������������^&((&*0T��N&''%$#)=k�~[+)&%&'4FD9,(&'+=t�{y�������sq���������������������V���Z��������{wslaTKB@B=9Er����wvxnggilk`>j�����xj�����������������������Z�����������������������X`��������ż���ɹ������������ʿ�������ý����ĳ\Cy����������������t��������������仟�ǵ��uB7AJNMIEDEIFVw���~g[_m������������������������Ʈ����������������ztpdM3'0T}kA80/25657777768;:>@=<IVk��d&$$&*7@FLWh{����zs������ˍHLN]��oTUTW[ZYeraa~xWMSPQHACA;,'8MbZ7*$"(U��C3.'!#?\[C0#$5G4!$(-6M^QE<98;GQcgWLI?/%)0202013569@8-&"$,38:?CEKVcjf[YUPMLF9:LOOTn������~������������ƻ�������������g7N\L@=ALI@><??@AB@A_y������������k_lx��������������������������������������������������������������������������������������������������������������������������������������������������#>a����zpd_][YYXWVUTVX[[f�������������������������������T/LWYRLFA<842.,+*(&%%$&&$$%&&(+-.o������̿�dPKH:<726?@`x{zrZM}��������̾�����������½����]���������������������l/m�����omaWJGO_gdS74@A>Ngg=(-7Br���������r!%),*#!(9������X8��oh����8.'M���ſ�:#D��m[URNF;2/39:99720*%#!""#&/56:<<96'
+#s��:++:Y_SIHLI?BQ]W7Q�ŪE'H[N"!%+L������������������������׵��������������g,=}v5<ACC9 Q�������wu��g!"%-5>��ͷ�����������������ƧDG����˺�����ͷ�������Y��ʀ5����ͻ���������¢�̴�����ȹ��о�������������������������������ҽX#'%%+2W��N%%%%#")FoxkO( )&%%(4?D8+''(,>v�{|������{ul���������������������P���i���������|{yreXHCFB<W���������ytrstneGn�����{|�����������������������_������������������������`q��������¸���ȸ�������Ľ���ʿ�������������­XD|����������������t��������������⻤�����uB8=KPNMGBBHKx����qaXat������������������������­�����������������{xo\G,&1TzmA70234455887678::=@<=JTr��e$"&$%09=DR_s����|w������ȅHMMx��dVZXWUV`kf^c�xYMSQRLGOT?((:Nc\8)""*`�m/1-'!*QiI>,!&1" %*,3LYSD999<MSdkWLHB1*1993211144591*$#&,8>EJJJU\cYPKJHLLC9:NQMLJg������������������Ż�������������h;QUG=;=AFIEFGIFBA@Aa|������������{gblw��������������������������������������������������������������������������������������������������������������������������������������������������#=c����}pe_]ZYWYYUTTS]gnm��������������Ƹ�����������º��U1N[\RKF?<851.,))('%$%%&##''(*,-,x�������µ�����zzweP_twwpWJ}��������ɿͩ�������������Y����ú���³����������k+!z����{QJ:-!#$$$ $"$'2EdZ7'-4?t���������f%#(/+%!-f�������U=��jCv���50,Glr���V*%O��n]WSOH<2149;::631*''#$$',379=@@=8&
+&u�w4,+2Uwohkljigll]7Q�ȧ?G~{K!'*R����������������������������������������e)G�s27?TN.[����Q5 1ciV>%!"'06?��ջ�����������������ãCT���Ϭ�������ɝ��wsld����z8����˶�������������������ʷ��ӻ�������������������������������ҼQ%''()3`��G%(&&%#(=W^^M$!)%%&+8DM7)%&)*<u�yy~�osv~xup���������������������U��ol������������~taKBEDGc��������|tsuvpjVh���zz������������������������_������������������������t���������þ���Ƕ������̾����ʿ�������������ĭXW�����������������|��������������伃gt���{A:=LRNKFCCDHajgfSe`Yau�������������������������������������������znY;2+2S{k:51563556886777:=>?;?M\~��T $'$")17=JXm~�����������ĂLQb���objc\UTY^\Q`w{VLSRRNUcTF((:Qd[8(##%>yr52-' +AIVF*   !&),6IYUC=?BEMQdkYKJC11?DE922202456.)%$&,9?DIGJSZbKFBFHHE>4;NQU\t���{|�������������ĺ�������������f<RRF?=BLcppmfcUC@>Ad|������������{fdkx��������������������������������������������������������������������������������������������������������������������������������������������������%?a����}pe^[YY]^\YV\espdn���������������������������ú��Q"1L\\TJE@;641/*)((&%#%$%$&%%')+--v�������Ĺ��|owuicZ<5SkqvnNO|���������ĩ���������������~X���������������������h+ }����vbpi\D=DFPZ`dWYvuviY:*+3Au��������E3]}{v���������R;��lS���q//*c���cYk9P��m]XVPI;305==<;643,('%%'*.58;?DEB;'$x�n/+-Bvoggihc`di[;\�Ƥ60`eI! &0V����������������������������������������^%I�o,.Ma6'!"g����wpe[[^}{.!&(24C��ϳ��������������xy�5W���˩�������ĩ����������u;����̶�������̾�o��������ɲ�����������������������������������ҹE&(()*1a�|A.)&&##+Fi{qO& '&%&(5?@7)$%'*>syxwvy{wxwtr���������������������`aW`�������������~fSHEFD@b������{{}zqrvwtmag�~gi��������������������������^������������������������~�x�������º���ȸ������˿����ɾ������ƿ�����çNN~����������������x��������������������8;@PPMJEAADIhh\Y\j^Zay��������������������������������������������xmOD=-7Xe>60374677766686:;=?<AN[��zL#''%+.5>KYk|����������̺vFRl���~|�|kUSWULLcz{MMSTRLK^dS0+<UcS5*""+c�c72.& +K]^7( "!')+5FZPBCHEGLMiq]LJC79GRP@11111363.)$"$-9?EHAFNTG;=AEEE>5/;KPXf���~s}��y�����������Ÿ�������������^9RRG@?Ge�}vtp_G?@?De}������������{b`jy��������������������������������������������������������������������������������������������������������������������������������������������������$Ac����~od`]WYZ\^_dtwb[w�������������������ƴ�������ƽ��P/K^[QHD@96410,))(''$$%&%&&'&'+-2|������Ⱥ�>!&(&!!'1NelnfOQ}��������ҷ��ƽ������������}]��������������������g*&������͹��rrz��������|ohF'(3Bw��������Ȩ������¾��������O;��`�����4/'M�a;_�t0!N�}g^YWQI;204;;;:8750*)'**+06:<?DGD9*+�i0+0OxlOEQa\C=ScZDc�ǜ4Gw`/ $/[��������������������������������������\!J�e0BXC3(i������������( &)24I��˥�������������������)\���̳������ʧ�����������l=����Ͼ�������£����������ɰ�����������������������������������ҴB$&'(*1p�}Q>(%%""*OqbO,!!)&%&)7ED5(&(''@y~ywywswxyytm���������������������W5Qy������������vWMGDDFEAd�����}v{}xpuxwvnchmffx��������������������������]������������������������qz�������ù���Ƿ������ʾ����Ȼ������ƾ�����ŤKIv����������������s����������������������~:=CPQNIDBCFGbhb^dyeZe~������������������������æ�����������������zpeJPC-8Yx`?513546798777769=?@=@Nb��tF#-37;?KV`k�����������ʽwLLf�������xX_cKKVi~pINTUTLOm|Y-,>WcN4*!#(Y�Z02.%$Ne[E' $" %*+3G`OGIIFEGUnr[MGB;<IZVD60102533/'&(.8>E?<AHI=>?ADED=4.8MVYc����rr�yy�����������ĸ�������������Z@UWB?>E][MGIKGBBC@Ih�������������xceny��������������������������������������������������������������������������������������������������������������������������������������������������$Aa����~oc__YVXcl��lO\{������������̲������ѽ�������ƿ��M1K[ZQLE>9751.,('(&'%%&&%%&''),0/�������ȸ�S1077.,+.56KhhhdZR����������Ϡ���������������{X��������������������}d'*�������Ծ�������������vea5#'4Ey�����z|xyqW]xtqnkjm�������J?��`^���x6/,>VC0Q^5$!W��i_ZVRI;004;<;98:71/0-,-/17:?BCC@:*-��f1,2Vtoe]fm\DJdqY9f�Ǚ*@^>"([��������������������������������������Z"M}Z/562/$"p��qjno�da�p&#$(03N��˷���������������¾��#i�������ɼ���Ư�D������мc>�����ǳ������˻����������ɮ�����������������������������������ԯ=&&((*3f�{`,&&$!#(ENB<7##(#$&)7CB4)''()Iy{xy����|sc���������������������G1c������������pMGCA?BA=8Z�����~uz}vtvvvskbb^bc{�������������������������z]������������������������yj�������˿����ŵ������ȿ����ǽ������Ż�����ÛAR�����������������q�������������������ɾ�v2;BLOHHBBDGJdabj��UWd~��������������������������������������t{}}{ti\KT=.6`vX=51274677768679:=@B?@Z���sD#8DJNU]ks~������������ȲtKL\�������u_faSR\oyaDOUXUMeytQ)!-;WcJ6%"$+Y�d42-%4`aVA&$# $',2G]RJJGB?ANimZHFB9;K_VC:1014431.)$&(%#().:@C<:@KPD?>CHJE;3/9LTYg����hlmw������������ĸ�������������\?SQE?>CKH>>?<?ACB?Gk�������������scdmy��������������������������������������������������������������������������������������������������������������������������������������������������%?c����~qd_ZV_r��}SGk��������������é������ñ�����������N$4MY[TNH@:73/,+''(('&&%%%$&'&*,-0�������ȿ��~��}pg__O@Ohhii[R���������՗����������������|_���������������������c(-�������̽�������}upmhYB:7("(3F{��ۼvVTH;/%#! 9�������J>��cH}��v/--N~���|i2 c��i^ZVRJ<227<?=::<9521.,-047<?CDC?;*2��c-.6VuxssnnYJWngG5m�ǔ)!Na1",`�������������������������Ħ�������������SN~V+-;B8'!q�������qk��T #$'/.H��͝���������������ý��!q����Ⱦ�������ѓ�������˹_?�����ų������ƪ����������ȫ�����������������������������������Ѩ1''(),;t�o3"%%$"",OnzrC!%'"#&*8D@2'&'(-K~zw������|ul���������������������>;f������������LGJE?=?@;8V������|{{uvxzvskc]Z]b~�������������������������xb������������������������vm�������������Ƶ������ɻ����Ƽ������ƾ�����ÝKa�����������������y�������������������ɿ�v1<CONJGAAFGGEQt��_Z\d������������������������ƿ������������}x||{xpiZMI4/9`mS=62455565676699;=AA@GX}��p>#=JOS[dlr|������������ʪlKIU������`\e\VWcosXBNW[YYjwjU+0>ZbM3&"%7|�H42,%.HOR>(#"!#'-3HWRLIC?=>JbgWGE?88KSHD>622453100.+.'&,,1=C@<:HUbVOLPTOF;2.;KSUd����gco�������������ķ�������������[?SQD=:BLND??>?@DB>Hl�������������qbfo|��������������������������������������������������������������������������������������������������������������������������������������������������&Ag����zr`Z`w���h>Ny������������������Ǿ����������������P2KYWSNHA:61..-*)((''&$#$#$'((+.0��������ª�����tifdI>RgkmoQS���������ɫ��ɺ������������yY��������������������e'-����������������ykfdlofUS@$+6G��ٽrVXO>6((,*$#A�������@C��eDo��p+.-s����ł1!`�h]WSOH>329>A@:;<;762.+-058<>BBEB<*4��d.-/E[]abcaUCP_[E8r�ȓ/0R4"+c����������������������������������������O!R}O/C^N9(#z����d</[~\(!"$(1/O��������������p��������!w���ȗ��������З�׼Ac�̴UM�������������ɻ����������ɦ~����������������������������������М()'')+1Ro8!&&&%#$.Kpya0&)$$$)283(%%%*/N}�z}���ɲwqsi��������������������~7=j�����ý�����\ZTJ><=;5/Z������|zwrtvvsql`ZZUc��������������������������wf������������������������mq�������������Ķ������ɼ����ź������ź�����ÙDg�����������������w�µ����������������ȿ�q3<AOMJHDBFHIS���db[Zd������������������������Ž��������������}yrn_OH6-9\nQ>5134777687569:<<?>;CSj��k:
+#;HMQX_glw������������ɦeJHNz��}��qWV[YZ\fopSEQX[[]eps]*/E[aJ3'!#&_~[41-%,K_Y?*$"
+ $',3DWRMJE?@@L\h_HC<3199;FD:556642352/2$ )/-2>F<?CSjha]hp_OC83.:JQTg����vl��������������ŷ�������������U?QQD;:CNF>><=?ADB?Nl�������������q`fq}��������������������������������������������������������������������������������������������������������������������������������������������������*Dm����qrw����zLEk����������������������ú��������������M3L[WPKF?;840-,**('%$%#"##"#%(,.0�������Ʒ�G601+**'*01RhnstUU������������ѻ�������Ŀ����y[���������ҽ���������`&*�������ļ����������������V&.7H���ڷ�yyj_C()2B;0(#D�������>J��fI|��y-13{���Ū_*e�wg\VQNG:229>>?<<<;76201/28:=?A@CC=):��Z-,(;_okiljkafibN=w�Ȉ$*hi4$*j����������������������������������������J$VyL7FE87'(����wWPHJOQdT%$&*2,S�Ѿ������������������������æ���������s�{gx���ͱMW�����ͮ����������ţ������ɨ��Ż�������������������������������Ӗ%*('&*+DaK80'&%%%.;hvfA &*'&$%(*('&'&(,Q��zzs�ɶ��wrk��������������������}6Du������������m`WJ@==946\������wtonopmlnk_XYVe��������������������������vh������������������������nt��������ÿ���ù������Ƚ����Ź������ƽ�������Dj�����������������t�������������������Ƚ�j4<CNLHHCBEHKp�vfnkS[g������������������������Ƽ����������������zvqeWM2.=hrG=5035657898768;==???FSi��a7#=JLRZ`hpy������������̤eKJRr�����qUQ[\_`gmcHERVYUMVkcF(!/C\cG1&"$:��G31-%?ejS;&#"
+"&+3BYTNKEDGHS[j]E?:0(-2=GC:9:965134201(..;?C>DKQZi|��gOOE;409POR_qn��aTp�������������ŷ�������������TDSQC;9;?=<<<=>@B@=Qn�������������q]fq~��������������������������������������������������������������������������������������������������������������������������������������������������3^����������U:W�������������������Ƽ�������������������M5PZXRLF><83/-+*+)'$#%%#$$$$&)-,2�������Ǳw7'.--( '-2ShnsmPY������������ѽ�������ÿ����w]���������Ư����������e$.�����z�����~�������������K(.5F��������ƨN&8q���i+Q�������:M��iFB��w).0Z���}[-%f�kbZWSND824;>>=<?@?:7444349;>AAA@<6$E��W--3U|ymgikhbdfdS<}��"JP8$4s����������������������������������������D&WtH5=CI=%.������������\!%(*0+Z�����������������ut���|���ҿ�������ն_Hp������ήEZ�����ǝ��������ɰ��������ɦ��Ǿ�������������������������������Ҏ(*(&&,E���oC''%#%2W��o2&('%''((%&&%&'-T�z{�ū���ytn��������������������z5Du���¿�������n`VKDA?821[�����znigiieddgd\VXSb��������������������������vh������������������������nt�������������ô������Ⱦ����Ƽ������ƾ�������Hn�����������������r�������������������ǵ�e9<CPLJJDAEHDXoP_�m[`h������������������������Ż����������������~xwq`OA+*;eoI<2/35545787878:;>?@@GSo��_1!:GQZdjow~������������̣cHJSk�����uTFR\daehXEBPUYTHE`W;#"2E_cJ1#!".exE1.*"4X\J5$%"
+	!%(*3D\VQLHGINR\iXC<8-',29?>;=<;752131.)!)/7><>ELSc��hUT[[G>7/7RNNUl��u\\��~�����������ŵ�������������WGROB99::<==>>=?@>>So�������������o_er~��������������������������������������������������������������������������������������������������������������������������������������������������3t������̽�]8?l�������������������������������������Ǿ��L1U]XPKD9895/-,++($$%%%%&&%&%)..8�������ȱ|DUshV=-<UM;UhmngT[���������������������Ŀ����vY�����ٿ�������������b#1�����������tutc_eegmyvsoY+%17J���������A&`����x,Z�������:Y��hKr���J12�ï��U)dzib[VWPF947>AA@CDEEA<88537<>@AAB?;4!K��U/,2`|cNIX_VBF\dM:���x'U^7 %6u�������������������������������Ƚ������|?+^sG7MUN7$4����}ywgow��4!%'+/-f��Ŭ���������z�y��¾�u������������˕���������ϮEe����̹�������������������Ƣ��Ǿ�������������������������������щ$+'&',7k���<'&%"&2P�qY5*)'&'+(>90%&')-V��z���z��{srl��������������������w2Bn������������p`TMKIF<25^����}qgbdddba_a`XXWNg��������������������������rh������������������������pv�������������õ������ȼ����Ƹ������ž�������Gi�����������������t��������������޺�Ŀ���a6?FOOKIB>BIOyj`��f\^h������������������������ö����������������~yun\G7(+<ehJ<4013436888679:<?@??KTq��Y*
+
+/ASaiotz�������������ɨYHMZiov{}�jH?FVcimkXJHQX]XQdcS8$3Ha_C/#!#<|�K3/)"!DjTK2! % "#()3EWUKDCGHNS\cTF<8.(-16;>=>>;54443.)$&-4=>>?HVttK;BZdRF>71<OSS]}��tqiwnu�����������ŵ�������������MBTPA99:9:<=<?A@A=?Rr�������������l^hu���������������������������������������������������������������������������������������������������������������������������������������������������&o�����Ơd<6\���������������������ϳ����������������ȿ��M6Q\ZQHE>:942.,,)'%$%#"&&$$''*,*5�������ȷ�p��mEIsrOI`jornZZ���������ǜ����������Ŀ����w[��������������������~]!3���������������������l_IO6)08H��������מA)^��гPB�������5`��]G����?/7����Ҿm'cvjc`\\TI=:;BFFGKLMLHA<978<=??AA@<6.J��V-,5cya][hhP@SinI<���u!?XQ- #2q������������������������Ϻ�������������}>&bsA2GOL9#;������\M~~wl8!$&-1/g�η���������uz�����ý�i����ƺ������ѻ���������̬@k����ɿ���������w���������Ğ��ż�������������������������������ф,('(-F���u.%%##(7bjSP8*(&'*/P��R"&&(-Twr������vqm��������������������w5Bq������������p`VQNMF=28d���}thaaa``_]Y_`YVSOk�������������������������qe������������������������ny�������ʾ������������Ǹ����ĸ������Ż����ľ�;a�����������������r�°�����������۫������b5>FRPMHDCDFS^a��ehYZh������������������������õ���������������|wtlUE8'+=gaI;41333358986789;>@@=LWr��T*
+$7Nbmrx~�������������˥XOU]YYdb_dQ?:=Qdpm`OEGTY]VEWg`F#&4M]a@-"!$>�_8.-)##;?IJ0!!%!!#'*2CZZD7;AEKNV^\G>:1),06;@??<753220*''! &&4?=8=FPck`lsyfDA<52;OSS`����oV]{������������ķ������������yNCVPB;9:;:<<=@BA@=?Pv�������������k^hv���������������������������������������������������������������������������������������������������������������������������������������������������#V���ͫ^?X��������������������������������Ķ�������ʿ��I"6S[YRJF>;83/-,))('''$"%$!%%()-,4�������ɾ�b\WQOEDIG>F`kqxtWZ���������������������þ����w[��������������������[7������������½�����������U?<;J��������ۦP*7u��I&A�������3g��]Cq��r,25X|����M* _rgb_\ZTJ>9<CJLQUVTSOFDA>?ACDEDBA=6/N��R.+8b{stkvmNH`l^>D���o0k�W"4w������������������������Ɛ�������������y8+lu74K[T.<�����R^��tq}G"&&-/)n�ʶ���������������Ŀ��_%���ι�������Ɠpjkpu^K}�Ѥ>r����ǽ������ʽ�����������ƙ��ĺ��������������������������������}%+('',GhZ\8$%%#"&7HPkt4('&),=���3#'&(,T�z{����xrr��������������������t7Jt������������la[VNLJF@?d���wke`deccba`a^SWVPk��������������������������rf������������������������m|�������ɻ������������Ĺ����û������÷����Ŀ�Io����������������t��������������צ������Z0<GWNLICADGPl��ecdZ_f������������������������õ���������������||vrjSK9),<aaG;4/344246776789=?@?@KVu�qO&,E[lw|��������������ʜSOMKGFCABE:9:<BJPNB?>ERW^SFc}mH#&4J]b<+#!'7820-)"2J^R1  # $'+2BTZF88=AFNZi_I?90)+04:>>;9974331*+..%$$(*8B<;>IYr���~oNA?<72:NTUZ��������������������ĵ������������zJEYNB=;;BB?;@ELDB>>Xx������������c_it���������������������������������������������������������������������������������������������������������������������������������������������������(Ds����us}�������������������������ý��ý�����������Ⱦ��D!5RYXQJC>:850,*++*('%%%$#"%%*(,,2�������ƴ�7%$$!(.TiqyqO^�������������é������þ����yd��������������������~[ >�����������ü������������\G?J��������޳v/#-- )I�������.o��VK�ֽb02.0S���v)% \kd_ZXWQE958AKOTX[]YNDB?<?BIGFFDC@9,Q��R,*3NZWUUUM8;MPD0D�ľ^,S>#7~������������������������̾�ȩ����������s82ky7;XO=*;���}X^��nB��2$&$+.'r�ʼ�������������������R'���̵�������Ϳ���ĉA`��̜=w����¸�������������������Ŕ�ÿ���������������������������������m%+)'',DG*)(&%%#!&3Kx�I'&'%'*:h��K%'').X}z���_��vnt��������������������o6Gu������ſ���}d`[ZVSWVH@b���ljebfhgiijkeXMWYXl��������������������������id������������������������q{�������Ⱦ���ʾ�������²����¶������þ����ÿ�Cq����������������~p��������������Ҫ������Z5<HTOPKEBCGBNaLB\i[\c������������������������Ķ��������������zzsmfTJ9**>_[D;4013223577678;==>=?JR`zxV!"9Vfu~�������|������ƕPMFC?988<=:7798<<:;;<FQV[MInxf>"'7Jb\>*$"&@fB0.'"C_\J,$"$%(3DVXC86<@CHWe^D=7/(*.16:><=>;6340-173'%*.4>C=;?I_|{eXMKADC>949LXZ]��������������������Ķ������������y?CXMB<=BNNMCLb^FB>=Xy�������������bahu���������������������������������������������������������������������������������������������������������������������������������������������������3Yx�������������������������������̩����������������Ǿ��F!7SYVOH@?=742.-+,*)&%&&%$#&&'(,,8�������ɸ�tcIF>((7:6:WhqtaBU���������ț����������þ����xc��������������������|[D��~��`RB6A����Zemz��������dA;K���������ɜ\$$## !1t�������0o��UP��w6-11d��̴�A&$`mc[VUTMC736?HMQVXYSKB<879<ABA?@@>9,Z��O,'&+-.,0.-',5:;4H�ƿ_#9���������������������������ҝ���Ƕ������p30mo6<QTG5C��nms��kMk�O "&$)/-v�Ǵ��������������uo���S*���˼�����������ĄL����ʛ5{���ƿ��������������������ĕ�þ��������������������������˾�����d$*'&(+,@A-,&%&$#%)5C9)&,*%'+M���7#&'))Yz{�����rnr��������������������l8Hy�����������v_\ZWTPUP@;[���yxe`ekorppni\SWYYm��������������������������oj������������������������s{�������Ǽ���ɿ�������Ž������������¾����üz<n����������������u����������������Ũ����W6>FPLLLEABFETknp}|]_h������������������������ó��������������{xvrj_MG2&+@_[G;4001334687769;>>?>AKTm�vP .Fds��������y������ȒRJD@=99;=LhYSYUH;;<;;HUZ`TXntjE$(8M_T;,$$@vn92.&!E]YI,#!$&)1CYWHA<@?AEUg]J>81)+1359CEJF>8771-13,!#,5;>EA>?M`ieN<6@KNKB:48PX]r����yn��������������Ķ������������n<EYKB=;?OPT\pxGBB=?Y{������������{`dix��������������������������������������������������������������������������������������������������������������������������������������������������� ;w������̽������������������������˿����������������ȿ��E!5RXVNJC?;662.+*++)%''%'&#$&()++1��������ĸ��n];,BTS73XgqudN[���������Ͼ����������Ŀ����xm���������������������ZI��t��d�����Ž������������o42:J����������ÝmB-%#2v�������~0m�}V6Gg[;-59|�����F%'_laZWUSMD967>DGKOQOH@;96559;;<<<=:5)
+a��C*'"#&&(**('*28:3L�ýZ'B���������ɿ���������������Ȇ}�����������m.6td7CSL@(M��;/DB,)44&"$%+/-~�Ĩ�������������}s����K.����ǽ��������ҝQl�����ǒ3����ƾ����������������������������������������������������������["(&&*.O|vmb0$%$"$',-,)%+'#&+<t�y>$&&**]�}{~�����crlq��������������������i9L���������û�fWTLFEGE=09Z}���wehsyvssvvrc[]]^p��������������������������nj������������������������w������������ɾ�������ý������������������ĹxAt����������������}r���������������̦����R4?GMKMMCAEHT����oZ`k������������������������­�������������}ytrleWC:%!+@e^D:3/22125688568:?@?>AMZy�oN&+>[o~�������u������ɓPIB>;8;>K{�����_;;;::J]kvrq~�r:"(9O^T:,$ %8tZ90,%#D[]D*!!#$(1BW\QIIB=?FWieI@;6-.8=BPfom]H7993-+&!%-49@FC@FU`ikcckqhXQD92=W]bt����~���������������ĳ������������j7FVI@<;=DUd{|T<BA;<Zx������������zcclx���������������������������������������������������������������������������������������������������������������������������������������������������#D�������������������������������������ƾ������������Ⱦ��D8Q[XOID>9651,++*+(%&%%&%%%&'),+1�������ͽ�7%""'$"'6Xjp{oS]���������������������Ŀ����wj���������������������XO��}�����������Ż���ĭ���y0",9M�����������ť�rix�Ľ������z-i�yYT����B7Cx�����E%+_h`\ZVTND:8;@DEFIJFA;556669>>>>>>92'
+a��A,&!"$&)*)'()29:1V�ĹT'F�������������������������ˇ��ť���������j+5m`6.)(& K��;*,)!"')(##%'.-,��ð������������w������F5��������������j8~������ʍ1����ƻ��������ǽ���������˽����¹�������������������������������L%(((+0h���d+%&%$&(+++)% )($&+S���7&%''/_�~zq������tmv��������������������g;Z��������ʿ�cNJF@=;<:7-6Zy���{py��|zz||ujbccco��������������������������mh������������������������|�������������ɾ���������������������Ź����ƸoDz����������������z��������������ܳ��ͳ�~R6<FOKLKECFJVunf`leZao������������������������«�������������zurlf^Q=+ +Bh[C92/11004688677:<@@?ENZ��oH$'9Qiy�������h�����ʿ�LIC=:87<R�����|D:878=Nh�����\4(9MZS;*!!)Cpj?2,%!'OTB2' !!$(0?TZSLGC>?C]jfFA?:25@JX}���udKB@=7(!$)09EEECJYak�����hYUJ?8D]fl~�������������������ĳ������������\4FTJA<<@Me�|WJFD?;?]x������������zaao{���������������������������������������������������������������������������������������������������������������������������������������������������%H�������������¿������������������������������������Ƚ��@6P[WOHA=875/.-()'&'%##$$&&&'(,*0�������ϸ�S@CIAG>0.5LimptnW`���������������������¿����vo���������������������SN�������������³������s`I* $-9Q���Է���������z��|�������s1n��OT�»u:5;���ƹ�H',_kb^\ZXSH96<BFIIHJFB98896:;A@?>@?92&
+!d��@+'"#*''())(+2<;-U�ɱF(A������������������������չ��������������g);jY4+-42$P��IA:3)'*143(%'(-,-�ľ������������r���Ľ��=7�����������Ѿp3b�������ʇ3���ͽ�skp|���������������ʻ����¿�����������������������������׿B%))()0R^hzb)'%##%(*,+*%)&%',F��p2%%')0a�}{�������qnr��������������������_*W�������Ǥz[D<:99:8683)5R}��������|z|~{xpfhiir��������������������������em���������������������������������Ƹ���ǻ�������µ���ƿ������ƿ�����õk<q����������������u|��������������̥��ͻ��N5?UVLJJDEFJLY]]WjeZao��������������������������������������}xsnjd]P<$#.GcVC81121015776666:<?@=GQd��kA 
+
+$4G]s������vg�����ǻ�GGA;:9:9=Uz���{N9544<Op����}lP)*9N\P7)""0k�]61)#%'+.$"#!$%(1?UWLJGA>=CTgbMCC>45CRi�����lWHIL>%#&+2<HGB?JWe���ykdZTPH?Icpu���������������������Ŵ������������Q/GRI@<=Gf�oRMKID?;D_{������������v]co}���������������������������������������������������������������������������������������������������������������������������������������������������#M���������������������������������������������������ǽ��@;UYVOGC=7661,+)'$%&###$$%&&'+.+4��������«�����sb]U`fiipujQ_���������������������Ŀ����}v���������������������PN�������������wd[M8.&"!!!%*8K���Ҥ^\WD7-!"&V������n0o��QL���Y-13a��ϻe%&3eslfa_^ZQEADIMTSPPKG@@:769?C@@?>?;3)
+&j�|@,*''689091'=KXX:Y�ŮL$-) *G���������Ǽ�����������������������������a$Ao^/2GM<&X��dYQJCCCNWC*$%(-+.�½����������pt��������/A����Ź�����о�����������|2���̽���z�������­�ƾ���˸��¿�ô����������������������������մ?(*)'+2U���C%&%"!%,-..-& *&$(+L��`0&%&'1d�~|}������qnu��������������������\'Ft����ϫwM@A<689996594,8T���������|yyyzwnjjlpy��������������������������fm���������������������������������Ǿ���ȸ������������ƿ�������������ĳj=t����������������v~��������������ƪ������P7<KPLIIDCEGZhkqizlYbp������������������������¤~�����������|urlhdbW<$#-NdP?81022225886767;>?@<DJl��rA
+	
+/?Ql������pg�����ʼ�IG@<::9::E�����X9544:Sl��|lmmR!*<QaQ4)" (Twg71*"';A2$##"%%)1?QRD?A@<=APbPGGH?34BOg~}mnxgOHJL;%%')4BLGA?LUYinmpfk_WXWOERlw|�����~}��������������ŵ�����������}C0JYI><C^y_B?GGDA>=Ce�������������vZbn{���������������������������������������������������������������������������������������������������������������������������������������������������%L���������������������������������������������������ǽ��;9PZTMHA=7551,*)'&'$#$#$"$''(+/.5�������Ͻ�VFE535/227<YfqwgVc���������������������þ����{{��������������������OO�����mhSC*!!&" ! "*6S���ҟ]bZL=5&$((#!-[������f+o�~IS���X-3Bktsg>((9v�zrlihaYNLOSWZWUROJF@>87<ADDBA@=:5(
+*w�u@-*(C}�~w�h]����FZ�ɨF2]v\$ &I���������ǿ�����������������¸����������]"CtX.<F=1$g��daRMOZTUR=&$%(-*5�����y������r���������)D������������������������u/����Ŀ������ü���ʯ��ĭ��̸�����Ƹ����������������������������ٯ9')((+0^��S$)'$#$&-023/' )&&'(Cp�u1%&'(5i�}zv~�����rpz��������������������].5S��ѽ�T@?BA<::8864451-5R��������|zyyz|xnkoww}��������������������������im���������������������������������ż���ǹ�������½���ƽ�������������ĴeK�����������������v{��������������ƥ������D7=MUMLIC@DGBW����[Xao��������������������������~����������zrqiddjZ6&#,J\M@90020023797569;>@A=FNj��e7
+	
+/>Lay�����if�����ʼ�CB=;87999<m���v@8754:Sl~�uornJ,=Q^N2("!/j�g61) #D`J7$ "!#'*1AQQB5697:>EJFAFJB42=EORRIIDFBFGE;,!'(+7GOMHIOW]jkg[_kfZYUOK^x|~������x{mp�����������ô�����������q63IVG=:@OJ:;?BDCCA=Gg�������������rdenz���������������������������������������������������������������������������������������������������������������������������������������������������&S���������������������������������������������������Ƽ��;:T\VPJB<862//-+**)%"%$##&&'(*--<�������͵ySLHH;& ""*;]jtxiSc������������������������������������������������}LQ�����VND:*!&&  "! $,9V���ΡqrcWH7&%.3/')_������g/x�wO4_��J55=NNPTVO8&<��~vrnnj`WW[^_^\ZXTNFA?=<?CDBAB@>83''|�xA.+:�ð���������G^�˫=3apW !-T������������������������Գ��������������\#EuS1;>7-$k�}YI=>GD9DL4&##(.)@�κ�x������������������(L����Ͱ������������������s1�������Ǿ��������Ƣ�ɻ���ʵ�����ű����������������������������٧7)''(+8s�wF)))&#&*02341'!*&&(-c��r('&)*5j�}{������jskx��������������������a6.B}��jKHB>@@;675464561,5X��������{{}||{wop|�����������������������������js�����������������������jy��������Ƽ���Ǻ�������û���Ƽ�������������Ų]L{����������������t���������������ŗ������@8BMUNKHB?CK`���dNTXbr�����������������������Ŀ�������������zsqgafmS2& &/K_MA:102//04676768:>AA>HUn��^0
+	 0<IZm~���zcu�����ɻHF?<98;9=M����cD9644=UgwxnivoH!!-=T_H1%!!/y�O10*#.VUG9%"#&,1@SQ?3378;=CDDDIKC516<@?@BA=<>CCB@3 &)*.7M^VJPWZalldXl�lUVVONcy�~~����v]Y^s�����������±�����������l32OUG><>GS[[ZZYODA<Jj�������������mbfp}���������������������������������������������������������������������������������������������������������������������������������������������������#P���������ɿ����������������������Ʈ����������������ƻ��88PWUOIA;9620.,+**''$$##$''&(*,.>��������ǻ���c7"#,CW_gnvuhZf��������������Ǽ�����¾����}�����þ��������������|GT����{WPC8*$&% " ##'.>Q���Ѽ�����:&7t���J2l������d-q�mJU���B56@\�����J#E��~wssrnc\]`ffdaa_WNF@<<;=ACACA@>80%*z�w7*)?���������~�5e�ʤ5F\-#(^���������Ǿ��������������������ʿ�������VIoM0BF<3$"m�~bQD=-,:HK2&$"&,#@�̵��������������������%U����ʕ����������������˾n8�����Ľ������������������ʳ��Ȼ�������������������������������֡0(&'',2o�z5%%&&$'-48751(#+&%''O��h*&%'*9p�}w{�����|vgz��������������������gD8.QxuSGB87:>9555352582+3[��������}}zwqjhn������������������������������dq�����������������������]e�������������Ʒ������˽����ĸ������ƾ�������TKy����������������r���������������Ǯ��u��zC4AQUKKGBBDFb~aOUZeYdr�����������������������ý������������~wohb^afF)#"'4OaNB9101-,.5966748:>?=:IRt��[/
+-8DXeu��{uj������ʽ|ID>;9999;Hy����L5423=RamfTfyqE.>Q]B.$ #,Xu]21+"/DKS:& !#$'*0>OQC:579:?ABBGJKB526::;<@?;<ABAC@/")*.6D^pYKQX\gomp��}_VSTLM`t|{}����qU[bt������������������������]-4MVG?@EXrrqpl^MEA>Kl�������������l_fr���������������������������������������������������������������������������������������������������������������������������������������������������&R���������»����������������������������������������Ǽ��7 ;S[UOHB<841.-,***('%%$$$$&&'*,,A��������ÓT=-,()>Thi_envti^l�������Ĺ����Ϳ������þ���������ƿ��������������vHT����yWOF9"$&$! %'  &*&)0=V��������ځ(1���խY9~������`6t�uKX���C22K����՚A"F�~xuqsqmc]]`eeda^]TIA;7577?BAA@>:2+"
+0��r:)*<���}�m����}9o�͛-CfG")_����������������������������������������V $MkI-CON='(t�z`SB0%*9ED1$$$'-'M�ʳ����������������¾��]���β�������ΰ��������ƿg<����Ŀ�������������������ʮ��Ķ�������������������������������ٟ**''(,6f�y<&((&%*29=940$$)%%'+l��h+&&',=w�|y������ttj��������������������lRN=IqoLE936787433532463-3a����~tx}~|zvlXXVd������������������������������`u���������������������S`�������������ķ������̽����ø������ſ�������\Y����������������q�����������������Ɯ���yA:CTQHJGBACDSegl}�s[ew�����������������������ľ������������xpd[Y[ad9%&"(5N`P@:41/*+05:96479;=???JVl�W-	!,6E[]ivyvzu������ϿtMF>;8688:T�����>2334>O_eZMbmY4 .@RY?/$!$7|zI./'-TgU4#! ! ""%*1?UVNE7248>@BDEJLA77=@<89>>=@DBA@8'$.:?MkyWJNVY\f���xkaYQNHF_t{sr}���qUZas�����������±�����������T-7OUF@=C^nmke_PDC@>Mq�������������k\ht���������������������������������������������������������������������������������������������������������������������������������������������������'O�����������������������������������̽��������������Ƽ��5:T[UNG@<862.--+*(&&%###$$%%&)++>�������Ҷw&"#+# 2_otvk^o���������������������½��������������������������tES��{��g���|vwy{taCBJNQSdoe1'2<\���������z++���ў:&b������^7x�|M?m��N81O����p;" ?rvrnjmkg`]Y\dec^[UOD<63238<=>?=;5.*!
+9��g3($%?`[Rk~~����c4|�Ε%2M@!)^������������������������Ҳ��������������Q"LiF1EVS<#&v��gRC7/5>RQ2&&#'*'J�Ʃ���������������ƽ��}U�����������ЧH_ypJ\���ɺ^@������Ź�����������������ɬ�����������������������������������Օ#)((&,:k�5%%&$$*8C>996&&*$$'/`pa2$&'(+Dz�}yy�����ntl~��������������������[jpb[geJ<6247763.0320342-4b���yX^ny�}yeMLD]������������������������������dw�����������pv��kZ_g���~Pb�������¼����ó������ʽ����Ź������ſ�������QM~����������������p���������������и������::BOPKKGB@CF\l~���S\fw�����������������������ļ�����������tWWTSV\_R,()#'3WdN>8320..26977799:<?@@KYx��N*	
+ *3GVQ[kroql������ѼsLH@;7568<Gx����65444<Q^d[H9<9'!0ATRB/$"#'`uM2-' +K`R6!!!  #&*2=T]YH3315;?CDFGC<37AD@<;>?@CECC<0#"2@IVr|^LLY^bw��edm\PIHDBZpsq~���yVTW`q�����������ĳ����������|S.9PUH?>==GHEHMOFBA<Mm�������������g]gq����������������������������������������������������������������������������������������������������������������������������������������������������*V����������½����������������������Ÿÿ�������������Ƽ��3#<SZVNHA<985/-,+*'%%$&"$$##&&(*(?�������Ӹ�QVaffdef_PUgpxumZq���������������������������������Ŀ��������������|E]�������������Ǭ����������2(3<]��������ɇ8$V���9*V������Y2{�sHr�ˮ�_3B]nW6-%)#I�wrpmh`VRRYaccc]XQG@9678;>@>>=;5-* :��h7*&A��������fHB33��ˏ%6`N" .c������������������������Ѻ�������������L&UoG4CPB5!)y��`ICABN`cH*$$#'+(W�Ū�������������������y!c���ͯ������͢��ɣ��Ȣ�ʳVF�������������Ŵ����������Ƥ�����������������������������������֌)'((-;v�x2&$#$#*5=99<7)'+%#%(.*('$%'*+J��}zx�ʶ���up���������������������Qz��`^aH:6246640+-0/-251(,c���]MSex���~iTG>c�����~s����������������������~]}����������yjmyl]Pn���{Ld�������þ����õ������������ĵ������ſ�����åTU�����������������t��������������弡������?:AMPLKG@>ADU���n^][fz�����������������������Ĺ���������s`WIJLMQRSB-..$'5`cJ?620/,,28767769;>@@AKY�tO%
+
+
+ '.;AJQYbc`a������ѴkNGB<7646>X����c;6334<R_e`QF>6+"1@TS=-$!#:z�J1.&Hc_L2! !!$&+1=Q_[B43138?@ADD@8/0=FGFDCCFJHGC9/$!)7DQ^y{YQTZaj�qJUa_QJGJGGWjnltufaXURU_t�����������İ����������{M/;UTD=?EVhpoplaKB>9Pm������������d`js����������������������������������������������������������������������������������������������������������������������������������������������������*Y���������»�����������������������ľ���������������ž��4";SVULE@=9842,,*)'&&%%#$%##%'(.)D�������Ӽ�u���������uhpvum`p����Ƚ���������������������w�����þ��������������yD`�����������ø�����������x,,5?X��������ԝM-4/$0`������Y9{�lBn��ȌB0P�����r7!^��~yztldXHLRY_dfa]UJB=98:=??<8862,)
+<��o1)&J��{z{}vjNKZVC��ˊ#>G).i�������������������������Ѭ������������F'Te?3?@@4"(}�g79HQOUSB7*$%$+.%`�Ī���������f��xo�����nj���˺���u��ˡ��֡��Μ�ʱOI�����ķ��{��ƾ����������������������������������������������Ӂ))*)-C��a#&&###)375785)+-##%&4]lO,&'+-J��||�j��wti���������������������P���ESbB;4/0010-)+--/25/%+_���QBN`x���rJEAj�����ri����������������������~b}����������omvyhNm�����xIl�������û����¶������ɸ����²������Ż�����ÞK_�����������������m��������������㺚���įz::>KOLKG@>@?Q��v��n^g������������������������ķ��������oPEGEFGHMOM>/0-$'7SXJ>4/10+,1567767:<?A?@J[q�|R"
+
+	!%&,5CGLSWRZ������Ҳ_GHB>:6466H����xF5433?T_ji`VJA6"$3DVU;.#&Gzr4.,%.JUR3" !!$%(/>OWN;73278:=ACB><0/9ELNJDDHIHF@91,+2:HVa|�bTW\_hlhdc^ZLIKMHDTljd\WTWTROU`p�����������������������qE,7VWD??RmzyrqfNCA<8Nv������������|daht����������������������������������������������������������������������������������������������������������������������������������������������������)W�������µ�������������������������Ļ�¼������������Ƽ�~6;SWTNHC>872/..++(()'#$%%&'(').(G�������ոs8($*18ACKJPVrvsj[n���������������������þ����z���������������������yBi����y�{{oz����������c$*4@\��������ݵ}.'(# #A�������T8��nHz��|C+*U����ƛ7!X�xsrsohWEDSY`b_YTPE:745679;:872/,)=��i/)&-Bemn{�r����jB��Ћ 2q�����Ͼ�����������������ױ�������������~B(\hB8ITH6!.��aE[idJ@9@@."%&++$a�ȸ��������������ʳ���fz����˹�����̟��ц�Ǽ��ͭIP������ǥ��������Ȳ��ʱ���������������������������������������z +)').I��`,%%##$+367562#''%%(1n��G#%*+.I��|}�����xotl���������������������L���=Q]A:3//1..+))+-.23, V��t@GRd|���eOLIm�����rm�����������}����������~_~���������n{yxt]]������vJj�������½������������Ǽ����ó������ľ�����ÝOf�����������������f��������������㸠�����s;;@NNIKH@?A?a�����`Vh������������������������õ�������zT<;;8<@CJMP<+0,"(8W^K=4/10,+16876679<@C?BLb��pI 		! 0:>B@R\d������ӰaHD?><965:`����T>9524@RbprlcZQ@)&7GUU<,!!%8i�E1+$!B^aH1 % #')/<QUB:8<==99<CC>>:21:FMOIABFIJD@;6546=IXf��bTU[ahmlpv~tQJMLHCMdc`\SQSRONR`q����������������������~rK+<RTB;==CBFHEEBAA:7Mo������������}gdjv����������������������������������������������������������������������������������������������������������������������������������������������������&W�������÷�������������������������ø���������������ƹ�}3 9SYTMFA<9831-+*)'''&$$!#$&(',-*@�����������p75PN0$##Bx�vpk\o��������������������¾�����x�����ý��������������}Ep��y�zU\_iu��{���������m?")4@a���������ЩwD/&A��������P:��sGa���253Eu����x.#T���zvsnj[TW]ba_ZVSI@9635:==<=<:6543 C��]-((R�Ų��������J?��ǂ "1J4!.s������������������������ͪ�������������y@*]c:5CMI32��`PKC?16?KF*#$%*+ f�ƺ��������������Ҵ���a~���Ъh�����ʥ��~E��tq�ϤE\����ο�q�������ƺ���ư�����������������������������������������u)(')0I��b&''%#$,46653/&($&)*P��]&(***Ey~x{������xsg��������������������{Q���5W\@90*+.-+())+,-/1*#Z��P;PYbpy��vcZUEi����ng����������{hn���������y[������������~pVl������sEr���������������������ȼ������������Ľ����ÿ�Kg����������������j��������������ⷺ�Ⱦ��p;:COMJIF@=>Bq}kc[[TZd���������������������������������j>,/2237>GRJ9+-&'2YaG<500/--14656668<>@;DMPeicF
+
+	
+	(069:PWT������Ѩ^KD>=>=86=b�xJOC<8647CWgx{skgZF- (8G[R:* "+W�o61.'!2QXH0# !#%)0;KRD@DGE=89=DC>=724<EJMJCEJHFB@;8756?P_c��`PRZ^\Ux���]FFLLF>Ja_[XRNPQOMR`q�����������������������yK+?VRGA=9<;9==>=>>50Io������������x_bku����������������������������������������������������������������������������������������������������������������������������������������������������'^������������������������������������ŵ�������������ź�|4&>TZVMGC=86630,()'%%""!#"#%&%'+*B���������ǡN6dwS5"#)V��|tjYp���������������������������w�����ļ��������������?m�yx����������y���|k[L>8FE+,5Da����������ҷ�|ZJt���������L?��nIu���;14:i����h+"[��}uwsolaZ^`c`\ZUSKC><???CFEFCA@==:)H��\1*,m������mt���`J��ɀ$H�u5#:��������������������������ʩ������������t;.\`47EHF35��iUJ@646<ID,'%%+*$r�ƹ��������������ė���Z����������Ϲ���������ɢC`����ŧ�������������������¡�������þ���������������������������l&('')1Dy�],'&&#%.45652*&('(*C���@$&('*Hx�||y���Ǣxqc��������������������wU���6SY>5.*,//-)*++,-11(&`�}RGOX^gszrhhbY>k�����oo����������ebl���������yZ�������������xdl������sDu�������½���Ⱦ�������Ǿ������������ļ�����ÖHb����������������|l��������������߲������n7;BLQKJEA>=??7:ALQU_m���������������������������������a:+,++/3<GLF8**% (9[[D<410,,/36855459=??@EN[p��F			
+		
+ *03=FCI������ЦWMB=:<=97<^����m?:758H[m~}une\F* )7EZR;*! '>]6.0/' !J_^B)   "#%*0=QUMJKPH979?EDB?967;AHNUXXVIBD@9743<I[ah��^QUZ^ay��pO:JLONH?H_]YSPNPPPQRYn�����������±����������xH,BQOE=;8:;=<?A??=3/Lr}}��������waemw����������������������������������������������������������������������������������������������������������������������������������������������������'Z�������������������������������������¿������������Ÿ�~1$@TZWLHD>:865/.)(%$#"! #!"$%()+#E�������ʭ`)BT|��qth[n����˼���˽����������������l�����ſ��������������|;t������������|rz|ot������z+,6A`������������ɐo�����������D:��e@>{��<13c��˹�|.a��|xvurfbbdfdec[VLHDGFHGHHGIJGGEC>*L��V.))f������~����MG���v&]i='$;�������������������������Ҧ�������½����r80fa7*2GG0>��u_ZNFEMTQ@*#%&**#t�ð�����������iy��h���X��������Ǿ�������������ě8d����ž������ƻ����������ǿ�������������������������������������U&+(()/I��S-)&$"(054453*))&%+6g��I&&'))G�}{y����psqi��������������������q[���ARX<1+),02/*+++/-.1(-Y�{QFJLZkqoedcc]Bq����jn����������ks����������vX������������}���������pLw������������Ǿ�������ż���ſ�������Ľ����ÿ�>Y����������������|r��������������޳������h99ALNIHEA>=>AMPPafnyr���������������������������������]/-(&&&1:DID4(*$ );WVE:520,*.2577344:>@ABELr��{=
+
+
+
+	
+ *+/>G=S������͟PKD>89;99Da����tA;879J\owpf^E&(8I^R;* !#AQ70,& !;=93,  )# !##'-8M^YLNR>:9<CFEC?=96<GNTjry\FAA<7525FR[`u��XLP\^i�sMBDRRLJIA9I\_[UOOONOOP]s�����������������������uA)@SPD=979:<>@>?@<2,Jpwz{��������u[elw����������������������������������������������������������������������������������������������������������������������������������������������������*[���������������������������������¯����������������Ź��+#>SYRLFA;764/,,))'&##""###&$&((&D������ɿ�tYZe{}��������svhWn������������������ü�������l���������������������t8��|z���������xonp}�������m'*4C^�����������~. F~��������F3��dAT��~923]�����H&!%y�������nddejkjhb_YUQPMMJMNRTSOSKHD,S��T/*/b������M2apeGF���v"$EW`0%?�������������������������ˮ�������½����q4-ea:;KSG/G��Y6>HIGFHD3$#%'-+*��̶�����������������ĲU!����˰������ͭ���������ŝ7i�����ì�����Ļ���Ƶ�����ƿ�������������������������������������Q'*()*.C�}[;.$#$&.20020***%%*A��{6&('((L|�|}���|jvwon��������������������q]���0VT81**.11.+,*,,,.0) 3\�~LFB>Qjme^]^edKp����kq���������������������u_�����������������������mNy�������¼���ǿ�������ƻ������������Ľ�����Be����������������~w��������������ܨ������d48BNJHHB@>>;Pw������p��������������������������������~]6*&!#+6:CID2%("(:TUE;520.,.0245336:;@A<CPZ��l4			
+
+
+
+')8HLKZ������͝SLE>;:::?Gs����j::98<J]s}umg]C% +:JZQ9' )M|o50,'"$''0:<#!!%$).:OZVMM@>@>@DCDC?A;;IPUazwfFC?=:63.6EQYi~��TFKV\elcagz�SEDC;7G[[TROOQRMOT^s�����������ı����������lB-@OJA=979<?LK>>?>2/Jowww{}������wcen{����������������������������������������������������������������������������������������������������������������������������������������������������+X�������������ÿ�������������������ó���������������ĺ�v,#=PVRMGA<651-*)('&&%$"!#$%%&$$##0�������ǻ���½��������ktpd\o���������������������������h���������������������o:y�ehvtv����ug\]^^g}ohtg))2C`����������t"*% <{�������E7y�`I���}52/B``Zg<& #n���}~t`abfkmmllh][UPNNPX\\_\ZXPMG-W��N((6~�����U-LtzwMO�̿n/fI%@�����������������������������ù���������k-3eZ1;BC6%O��RM^`UEGD=/%#$(--,��̿�����������������ǷN'���ͻ�������ǘ���_X����ɔ5l����Ħ������������������û�����������������������������������ܻ@*))**/O��mC)&$#&.2220/))(%&*6k��@&&'')O~{v������woi��������������������q]���0NO91+'+/.-)*))++,,&1W|sPHC@L_mh[SWjiEs����{js����������������������tb���������������������lJ������������ƾ�������ƻ���ž�������ý����¾�;c����������������{o��������������ٺķ����f17ANJHHDA@><������hg���������������������������������c:,%#*89:JRF4%%! *>\SD;401.,*+/24348:>A@=FSw��c-				
+#)>LRN]������͚PJC=;:99;Bp����Q;878>Ibu~zsmhZ7!!-;P\O3&!!+k�T02.'%+4:ITXB# 
+!%')/:P_\XLIKKKIHFDCCGBAGLOTTMEFC??=<5.6HQXo���TEJUajqoo��f@@@>95EZYVQONNONOT_t�����������í����������n@+@OJB<879@OYC:>@=1*Kkuwwvxz�����tdbp}����������������������������������������������������������������������������������������������������������������������������������������������������+V����������������������������������ƺ���������������ż�p/ @RXUMIA<653/0.*(''#$$##""!$#$$ 5����������ü���������yowreWp���������������������������m�����ǿ��������������t7n�kpzmXXfohFHZTB=>IO[dkhV2$)4Ce���������6$)-( !Q�������@9��a?s��z:23f�����`0!'h�����~|qd``ejjgc`_UPLMMRU^_a]\ZTPMF$`��L+(9u�P%"8p����PO�ͻ_+KM'C���������ȿ�����������������ʱ����������f)5eX/1341%S��i\K@8;@E>7*$$&*+0��;�����������������ɺH(���ͷ���̲�����ڼ��ݺ��͍/t���Ƚ���Ƕ���Ī�����������������Ǳ�����û��������������������޹@-))*'1X��[&$%&$&-22222* ')&(/M���3$((%&N|��͵���spk��������������������na��z6OM;2*'+-.+))'),*+,%!JvjSQPJC[ohD?Rke@s����zky����������������������pd��������������~z�������iN��������»���Ǿ�������ĺ���ž�������ü����½�=`���|������������yt���������������˺�����d1<DOLIIEC><9\cU{�{glk���������������������������������lI4-,5><EX^J5*$!*@YP@83240+*+.021489>@??HNz��\'
+		
+
++AKNRf������̕GG?9979:87\����e<758<MewyqnhU0"0>Q[L1&!#(Ovb10,&&/@_r`]H$
+ #'(/<R^c_\`aZVQOMJINNEDHLLHGCBDCBBA=5.6IQ[o���PFO]jkdV}�f9CC?<95BVVTPLLNMMPV]t�����������������������n?,BQIA=86:<LE==?@;/1Onxwtqsu|�����r]cq|����������������������������������������������������������������������������������������������������������������������������������������������������,N���������������������������������ɶ����������������Ĺ�t/$=SWULF?964310,+)'&%#!""!"!""#! 8���������ǿ����������ywwqbPu��������˽�����������¼����o�����ƿ��������������n1#m�p{�st������{ont}������zS%,5Bj���������})$0c��[+5z������;8{`F�ǤL.2-n���̸{1#)p�������{pkhgfe]YVTMEGKKTZ\[^\WSMOOB$"h�}D+)0p�pejiku|~�l@Q�ȼT/\V.%K���������Ǿ�����������������ʵ���Ŀ�����a'9g[037=@)V��S;<71')<F6(#"%-+-��̶������������p����ͷA+���̹����������ʆw�մ��ȁ,w���ɿ���ǫ��ɶ���������¿������˻�����¿���������������������٬00*)()1Uzk/'('&%+4>>>@A9&"('%'+={�z0&('&+S��v���kq��pqo��������������������h`��r6VS91)'+..*&()**(-.%AoeNSTF?OaW>,Hc`Gv����~m{������������u}��������qb��������������|x�������fG�������������ǽ�������ú���Ž�������Ź����º�?i����������������wp���������������Ģ�����b3<EOJHGDB<;;Hs���gmrl���������������������������������v_A=>6CFOa`Q;,&!#,@XN@9210-+*+.11037:=??@JW���V#
+	
+!/?JONg������đKE?;7889:Bq����L:757=OgwxpkbH* -<SZK2%!"/o�S10*#"-IogYXB! 	!$&'.<Nbjhiifd`]YUQSSOGBEJIKGDDFFDDC>9/6DN[u���TO`ikd\��lR^RF?;73ATUROMLLMMPR]v�����������������������r>0EVK@;8988;<=<@A;/1Qq{wpponw~����magr����������������������������������������������������������������������������������������������������������������������������������������������������.W����������������������������������Ƶ���������������Ĺ�r)%>UWRKD@;75311,*+'%$$""" !#""!!!7���������������������~yxqcUr���������������������½����q���������������������o1#�v}�������~xuqt|��������b'+6@j���������e,6�����9-n������4@~}_;584,011S�����_'!2}��������~tokiggfaZWWPQUXddc`_]ZWSO<""j�z@)'2~�����q/Ctyp?Z�ȳO7aQ!'N���������Ǿ����������������������ÿ�����_!<iQ02<L@ X�yWUTVC,2CG0'$!%,*8��ʝ���������ľ���̦�ή@5����ī�������ŔVP�����ľ{*|����Ƶ������Ű�����������������ĸ����������������������������ب42//1221>5254441<LXY[WZP/$+(%'-O��v1''&'*S�|�����rpo��������������������i`��v7UP9/)'+/.*&*+,+)+,$@heQSVDAKZSH5:TVC}����{gy�����������yw���|�����qa��������������~~�������eL�������������ǻ�������ķ���ż�������������ùy<l���������������xt��������������ڦ��ĵ��U4<FMKHHE?>?@���aGQjlk�����������������������¿���������uhhH4VPVf_R@10(&.EVNB:200.+*-052058:?@=?J[���V
+		
+-@LQGb�����˽�IF?977779<`����O9556<Ohv|xoi_C* .;V]G0$ ",n{C41,# +IWYb_@! 
+!$%(.=M_noppmifd`\[[XRGAEKMOIGHHDBCDB<7>O_h����USYdpe~�tageMB?;64DWXSOKLKNKGGZr�����������ë����������uA6HQH?98979:;=>@A:05Uuyxsmfegy����g^gr�����������������������������������������������������������������������������������������������������������������������������������������������������+W���������������������������������ǥ����������������Ĺ�s''CWWQKGD=93120,+*))&%#$"!" !!+l��������������������|xvqcVw���������������������ý����o������»�������������n1%��t�������wiikkpt����y|��f+,6<k��������k*L���͈3.l������8I�|]</011/.1Nlu~�[3&1~��������{rporstskf]UNRVVYZ^[XTUREI@#$i�u<+%:���su[8T����H`�̮K!AaP'*Q���������ǿ����������������������ÿ�����Y@eL.6NI-^�tZTNO6.5?<*%%#%,+:��ä�����������u��ʚ�Ф05������������ăV����{_�ʿs*�������������°�����ú��ÿ������������������������������������ؠ1==AHRSQTY]UPPRBI_gikmk]-%+(&)-T��p*'%'($W�}y|�����hrol��������������������ig��m3OM;0)')+.)(+,++*--">eeXWTKKDQZJ72=E8~����uj}�����������v�wq|�����o_�����������������������bI�������������ƺ�������ù���Ļ�������þ����úv:k���|������������xu��������������ѩ���Ĺ�R7>FMHHIEA@BC~~dSR_`dm�����������������������ü�����������o==NGQZZSD:8&$/DUN@7113.**+042147;?A?AIW���Q
+ .EOTQk�����ǿ�LE<777558D|����M6556=Qfx�xmeX?& 0;U^B.#!$-R{X3.)!!*BfwjP7! !%&(-:Ncpuxsrolidcac^XK@EMNNJHIHEFGGC@>Kbt�����[R[jicyl`bggMA@<41?XWSOIIJOT]bcp�����������ì����������uKHMRI>8789:;:<?AA:29Yqwvpkb]_t����h^jt�����������������������������������������������������������������������������������������������������������������������������������������������������.\���������������������������������ȿ����������������Ĺ�o)&ATXPMHC;7410/,)'''$!!"!##%  (i��������������������{wuqbVy���������������������ý����t������ü�������������e+#w�p�����nYTKFFRPPWSiifz��e',7Bm���������y%<��ƠG0t������2G��[<.-22104s��zVha) 9����������}������znkffdY\\``b`a]ZVa\5%l�n6)%@�y2"(,M�����Bg�ͯE4lxf+(W���������ƿ����������������������¿�����VCiJ1=?,(a�v^I?<.,8DG1#"#(+'E�����������ȥwe���{�ў1=�����������μ���ӷ����Ƚp.�����î����������������������ȼ�������������������������������ٚ4FHLTRV[\[XSY[R8Ikruwxtd1&)(((.Z��s-'%')-Z�{v|�����son��������������������gj��m4SM81(#'+-+)*+**)./&=ose`_^L=DLC83793�����xh������������|�yy�������k`�����������������������`N�������������ź������������û�������������·tBx����������������vy��������������˯��Ĺ��S7;DJEGGFACD:j��uopeep�����������������������ý�����������d@5'4JRUXC82$$1FXK@7113/))+/212389?A@@HX���D
+
+/HPYet�����Ⱥ�HC:976657>u����I8666;Qk|�ulbP<$$1C[YA," "5�|F0-& !+UlaSJ;"!"  %'*.9Odu{{vtsmjhgecb\Q@JQVUQPOKIKJHE?<Mk������[NWXPVbpvv}V@>=4-@QTSPDK^|���������������ë�����������kWNQG<779::;===?A=5=_pwurjbO^p���hbku�����������������������������������������������������������������������������������������������������������������������������������������������������+`���������Ȼ��¿������������������������������������¸�m&(ARUQKFC=9410-+('&$###!"#$$# )k��������������������zxtpdSz���������������������½����w������ľ�������������i.&p{m�������vjmy~����������X%,6@q��������˔9<\H&?������5J��Y;*-12100DiR9kG($ J���������������������xrjjoorru}z~~z<(q�i9*$/S< ',1V����|6m�ʭL5N4*]���������Ƽ����������������������¿�����SFdE-&$&#`�qRJEBDFU]I)"#$&-&P��ư������̨o�����v��Ӝ@��������������Ӻ������ĺl*�����ì������û���������ľ���õ�������������������������������֏/TZ]foqspqlklib]]y�����q4&)''(1b��g&''('+a�~x��Ǿî�nns��������������������ei��o3ZQ70&#(..*(),*++..$;fwlej\<7?EC:9<==�����tg������������~{���������oh�����������������������WO�������������ĸ�������þ���ø�������������ĳm<u����������������pz��������������̩������O4=FKEFGGCBAG���o~o\ck�����������������������º���������}c<"!-<ISXB45''2IWJ?61230-*)-010159>A@@IV��y<
+
+0FV`Po�����ȸFD;754458Ag����N8765<Sn~}sj^M:$$0BWUA,%! /Q:)/+% !0U_^XS; $&)/:Mesyzvwtmkjigfhb\Zgnic[VSMNNJGD=5@Yn�����UFET]m����{aK<9500>RVRW_r���ï�������������©�����������vQOQD;86788;;;=??<7B^outrkbY]n|��}e^jv�����������������������������������������������������������������������������������������������������������������������������������������������������*[��������ҽ�����������������������������������������÷�m'%BVXRLE@>:52/-,)'&%$##!""##!"+v�������������������yzzxtbTv���������������������¾����v���������������������r3(jsezxv}��}rs������������2$.7Ft��������٭m+$&$+a������|2O�~T6)*041/0It�xtN,)% Q������������������������}��������w:-y�h6(#""#%),.N����v8o�˯](\���������������������������������ÿ�����O!HcC+#"#!(p�hBHPTPJUH0!"#$'-+T��Ȳ������ʹ��ʹ�����ϓG����Ȳ������Եlfrx����Ŷb3��������������ÿ����������������������������������������������щO��������������rg������u3&(&&'1W��n(&&&'*d�z�������oln��������������������]n��e2ZN:/&$(,,)')+++*+,"<hodacK;6<DC?HNJD�����ti�����������{zy���������mi��������������~z�������QY�������������õ������������¸�������������İfCx����������������k}��������������̢�����{K5=GMHGIEB@>Oiiu~�x]fo�����������������������¸����������|a6&9FKSVB@:&'3ISI>7132/-+),.-.259=@?AM`��r6
+
+,JWTCz�����ɵxLE;642258N����tA9645=Ul~�vl_Q:"#1BZV>+# $6T=3-& +8`fXI* "&)/9Ogu||vwtokjlkljhit���~ja`YPMKHE?8?FP|���uIIReejul_LUaB754/1;QXWa|�������������������ì�����������lNMQC:98578:;:>??<5F_ovtqmcWZky��}fbhv�����������������������������������������������������������������������������������������������������������������������������������������������������)^��������п��rdbS[����������������̻����������������ø�m)%CRXSLE@<9420.+'''&$#""!""" !,o������������������{}|zue]z���������������������������u�������½������������q0+gkYnsehstlfilo|��������n5 &-7Jw���������ʝe8$""R�������x+J}R7()031/2��̻��\)$U����������������������������������o0.x�a3&!!$#&,.W����yAy�˭R285*^���������ƽ����������������������¿�����J Oc>($!"#q�kUQQJ7@C@5%"!"'-+]��ʫ��������ʷ�������ɋN���˧|�������mv��º�����Z3��������������������������������������������������������������ЃW��������������hZrz���j*%*&%(7o��_"&'''-d}y�������rms��������������������_r��b9[N8,$$),+*)(+,*'**>b^OTUF;8=DIUX_RD�����wm�����������fn{����~����hi�������������}z~�������SV�������������ŵ������������·�������������İaEw����������������r���������������ɏ������I2=HNHFGDAA<@����cUer�����������������������������������{^<,4GNPQPEC7%&3KRG<50340,+,++,-17;>?=@Kh�_4
+	1CI@D������̺tLF>633346C��v\K78533<Yn�~ul`S<!&4D[U;+#$=eyK2+#"8aQF6"$$!"%&).8Nex�~{yvpkkmklljn}�����xjaYRMJF?9=AZ����sGFWaa\_fe`xf8454-/<SY^y���ƽ����������������������������lKLPG?899899<;=>A:8HbrwurmdX[cs��v^alx�����������������������������������������������������������������������������������������������������������������������������������������������������-^���������ǰ�����������������������н���������������ö�n*)CQVSLEA;9640/*(&%&%$$!!!#!! 3m��������������������~{vaY{���������������������������t�������ľ������������q,,mlZtw^fn_YJCJLO\[[ca_SNKPB'09H{����������ƣ~]GFn��������t0J�{S9++/0211}���ǫX&$"Y������»������������������������~|wb%1}�^0& !"#',2]����q>|�̨O"Ox�P""-^���������ƾ����������������������¾�����F#J_;'" !,u�uh^RJHT^L@)##$*.$c�����������͎s�������ɇT���ɬ������ͫ������ƙ�²W;������û�������������������������������������������������������zWw{�����������u>@^isyc$%+(%&,W��o(%(()3i�|v������ppiw��������������������Yv��`=YK6,%%)+-+)(*))'-/  MuXFIHA;88DOX]`AA�����tm������������~�xur|���di������������xo~��������VU�������������ų������������¹�������������Ĭ`Gy����������������t���������������Ƨ��˾��F7?IMEFGEB@=I�yc_W_[dv����������������������������������~z`KA?INPPLE?5'(7KTE<4122/,,-+(*-3:>A?:AKVh_W1
+		!2=7;J������дkQD<62355549D67456322?Ym�|sjaR8#&4FVR8(#!%K�l60* 88?E0 !"!"%'(,:Md|��~}ypnppmmljjs������|l_WTNG>7;@Z~���mKDTcb_gr}�xE6676/.>S[f���ʽ���������������˿������������fDPPE>98889:<<;>B:;Idsywtpg\Tbn�vZbmx�����������������������������������������������������������������������������������������������������������������������������������������������������,_���������������������������������ͷ����������������¸�j+*BRUQIDA>:762/-*&%$%$%"#"""" !-i����������������|{{��}xoRU{���������������������������t�������ÿ������������p)'`d]psy��wphjjjv����������Z$/9Iz�������������������������t+N�uS9,+/0110E{����=(&%c��������������������~����������~zvs]!4��Z1&  "$)/?{����[:��ʨ7$U{u<"-O���������Ŀ����������������������������}C'R_9% !"2w�t^OC=6<DA4&$#$+.%m��Ϯ������ҝr���������u\���ǯ������˥������Ņ�ǪP>�����ű��������������ƿ����������������������������������������n7;<C@HEDA@>;;72,Kh�����u*'*'((7���E"%'''/f�{wsytrqnsqiu��������������������Yt��[:^N7,$&*,+*)*''('*, )`sU=CCFB95HTV]Z>F�����tj������������{��vnmq����hm������������x����������TZ��������������������������������������������ZP�����������������s���������������Ƭ������A6>JLDEGIC==;^fmx�zWbv����������������������ÿ����������zl]C;EMMLIFD7),8KSB;5221-,,)('(-4:>>@<CLRUYX,
+	
+
+!'*$$8������ͱoOD;5322246?NHZZ;6303@Vjzxpg`Q8#&4HWR6'"##@kn?/**?SH2$
+!%&).8Lf~��~{srromnngb_l~������yf[TLD=?Jivtyt_MEWe_XbwykTOC895/.=Q`s���ɺ��������������������}�������lNSPD;:988:<:=A?B=<Ngrxywsi_]_i|tadny�����������������������������������������������������������������������������������������������������������������������������������������������������*b���������ʿ����������������������ͽ�������������������g&$BRTNIF?<9530-+)'$$%##"####" ,g�����������������}}~ztmaZ|���������������������������w���������������������l(/eibjq����~{{r{�����������S%/7Fx��������j868;9046R������m*HnjP9*)11114a�ð��L*(+u������������������������������|zunT#6��V0% "%*.Eq����_<��˔"DcL (U���������������������������������������|>%Za5(!"#4y�q]I91)*,+$""$$),%y����ž����đ�����ɮ���l_���ȶ������ʮ�������k�ɣKE����ƶ���������������������������������������������������������c*0..)+*+'(+*)((2U������`!'*)*-5_k[;&&''%/i~yxzxrv���njs��������������������\w��W6cQ6,$$(-+('((**)+-!3ksXHKSSK>>OWX]VGH�����vo������������}��{pry����jn�����������������������R\�������������ó������������¶���������������WZ����������������l���������������¢���´�@6;JMEGHGC?=J�����vWe{�����������������������������������xm]E@GLMNLLH8+(6NQA<5121-,+(%%(.48<>?<AMSVYO)				
+ "!A������ǬrSG=633337Bk���{@4213?Qcooic[L8*6IWQ7& #(Z�q:0) 8VYJ3 
+	
+ %&(.9Mg|���zsqrpnpniaUVeiw�����~m^RG=DYbcbhfRJGZfaWVllu|oH:96/.<Rbx���ʻ���������������ɿ������������qKNNC;888::;<=?AA=>Qmvzzytla^_hw~qffn{�����������������������������������������������������������������������������������������������������������������������������������������������������)c�����������¼����������������������˻�����������������f&(CQUQLE?<92.-,+*(&&&$"!$$#$#" .e�����������������|{{vrreY}���������������������������u���������������������n%4ps_lt����wplnp����������|3&-7G��������X%#%$  %I������l%ChgO8*+12208��ŷå@**0�������������������������������~yyumV#;}�W.&"%)*<i����`<��ʇ"H^8(W���������������������������������¾����|8'X[4'#!#<|�qbPGB:54,%$ #$),${���������Կ������ͦ���db����Ƶ���������|xutw��ĤCH��������´�����������������������������������������������������\*-366549+/<997+0FXV]^bb9(,().<ang@%%%&'2k�zvv�����upmx��������������������S{��U;jR8,$&),*&$&)+)'+-#2luVJPPNJ@FXU\_\HE�����vl���������������znny����go����������������������O`�������������¶���������������������������§N\�����������������k���������������Ȭ������>8?HMEEHED@;Ksm`i|uae~����������������������ÿ�����������yk]JCKOOPQSJ7)*8NQA;5000-**)$%(.58;A?<BMSVYL)
+		
+
+	!F������ƫmRF>83321:[����o93212=KX``]YRH5*7GXM5'""&Yzb//(!AXRF.
+#%&.8Idy��~}vtvwsqqqjaW^mlinv����uiTE;?MYZ\_]OHEVc_cv�~�q[E:64--=Tc}���ʺ���������������ʿ������������uQQNA:88778:=<=A@;>Uqy|{ytnb`\hy}jafo{�����������������������������������������������������������������������������������������������������������������������������������������������������)b������������ø�lr�����������������¶���������������µ�d%(DUUSMFA>82//,*))&#&$"!"$"""!!2k������������������{yvtsseT���������������������������r���������������������j&4plZmu���xPC@EJV\]WZPHG>;'%,7E���������]&"%"'N������j&GlfK6*+02/.5dta��N&++5�������������������������������}y|{s`$A��N)%$%'*9j����VD��ň&#Qa4 ,]���������������������������������������y1,QP6&" !<y�hOIXYIPSA4'"!%*)$|��ʹ������¤������x�Ͻ[h���˵��������ƶ����ȿ���@P���ž��������������������������������������������������������׾P/3GQMRi\/PnpkL*.DMPSMRM1!)-)),/L��T$&%&$2e}zw��ʱ�rtsgy��������������������N��HAkQ:-&(*,*(%')*)'++$1n{WCEDHF>CWY^bbEG�����sn���������������pcar����`u����������������������Lb�������������´�����������������������������OX�����������������o���������������ں��ɿ�zD9>IMFCKFCA<?;3An�f_h~����������������������ÿ�����������xhZHGPRQRWVP3(,;PRC;3.12..+(%%)/5:>A><DMSV\M%
+	
+
+	
+ I������ǫdMC<953/16_���~_73003=GQVSOMID0+9JWL3(" 'Jzi7-'$HXTE+
+!%')-5Kbu}|zvsrqssspriffw�xhc]\kxzqeTC:?P[TVZYIAEQ^U_f[dZUUF:74.-=Qa{���Ų���������������ɿ������������qSUPD:56989:;<?AA>@Zv}|zzvqhablwxmbfp{�����������������������������������������������������������������������������������������������������������������������������������������������������+`����������Ŷ�jUk�����������������ʹ����������������ö�d&)CSWRLHE>6310/.*)'%&$!"!##!!  6n������������������~yuruxgZ���������������������������s���������������������f$4i_Tqw���c,'*)!!$ !'$%-9E��������᷅xqjcF'"2d������g(ElcH6**/1/-.+ "84)**;�����������������������������������|a$F�~M.%!$')3f���a6K���#CQ3"1d���������������������������������������r--KK3%!"!E��hZT`WLUUA."$"%*()��̴�������ʹ����~��ӻSo�����|��������˵�~����@Y�������������������������������������������������������������ڻE25L]o��O3gwi\@*4HORUWWM1!(*''*A���?"'&hz{������|th}��������������������P���JGiR8-('),*(%&(+)',+#B}�Y@ACKG?JZ^`daCG�����ok���������������icev���[w�����������rt����������Lh�������������ô���������������������������¢M[�����������������q���������������İ��ȷ�u<6>HJFFHEA@?>Cp���i^h����������������������������������~rcUJKPQSWZYM0'-<QTC;3033/-*'&%(-5<>??=EMQV^G 
+	
+
+!V������ǦZJB<942024G����k<5012<FSWMB@B;+,9LUI1&!!-l�R20'$GWQB)	
+!%(),5J`wzzvqpnnpnmkkiiu��w_\XXZZ]a^NB7>W[NRSQE:AJU\ZTSTNWPA;93//;T_x��Ź����������������Ǽ������������pSSOC;987799==>@A;@]w||{|yskfgnw{lZis}�����������������������������������������������������������������������������������������������������������������������������������������������������+]���������ŞaV��������������������ƿ���������������µ�b#(DUVOKFC=6530/,((('&$$$#"""" !:n�������������������xtuwvhZ{��������������������ſ�����t������ſ�������������d"2`_\nv���y]L?FIR\]^cpsryuD&18I�������������ȶ�d-T�������e'LlcJ6)*110,*&!(.*&)*/<������������������������������������c G�}K-' #'''.10525T���q3]c3!.a���������������������������������������l-0SN2&!""I�{haRF941-%  #"%+&)������������ʽ�������ƴJr������������̳��roz�����:_����¾�������������������������������������������������������ڲ?/0Cv��Q$6QMO\>*5HQTVWWN/!*)&'(9i�p8&&&&&4k�zwy}����vlc~��������������������P���KKeQ5,&&*++'%'**((*+"G��\@AKME<J^daa\5N�����qn�������������zsosy}���]x����������wbc���������yDk�������������³��������������������������ÿ�O_����������������n��������������ᷣ�����s97?JICEGHB?@Dz���sl]e�����������������������ÿ�y��������zk_OJIMPQ[_YE,&->PPC:1/13/-*%%%'-5<>>?>EMUW]@	
+	
+#f������ÛRHC<983238c����k74311<GSXL@;<4&-9QZF0'! !MP7--'$>VXA' $&)-6H_wz}vrrrpnliigfep�]SXTRQQRWUMB8<UPHMNJD>BLaYRXVR]fcD:7410=Ua�Ĺ����������������ý�������������mKSPB98668:8<@BCA=Ba|~~}}|tmkmouvhajr�����������������������������������������������������������������������������������������������������������������������������������������������������+b��������������ľ�����������������κ����������������³�a#-DRSMMHB=8630.+(''&%%$$#"!"$"! 6q������������������urrpod^���������������������ſ�����q���������������������a#6lf_pu�����zjo~�����������E$09N��������������ҽ|'7j������]%Kq_J5)*100-'#"'+))+*,@������������������������������������cG�xJ+& !%%%%&-698T���i%:G(!-b���������������������������������������h*4^P1&! "MxpU8.& ! ""$*%,���ż����������ɿ����Į?w��ο�������Ȟ_EZ�����ü�4b�������������������������������������������������������������ܪ7-2Z~X<7,1UoqZ-(5FQTUVXN+ )'')*?l�v9%&%'&7k~zww�����wph���������������������Q���BDcM9-''+,)(()*)'(** B��]<?CB>:Ndda^U+U����~rn�����������xqv{}�����|^z����������d[i���������vHk��������������������������ž����������������DV~���������������|m��������������ᴓ�����p88>JKCELHCB@VsdQ[yn]f�����������������������¿�{�������|wk[KHHKKNZ_WC*%->OMA90/1410)%$&)-6<>>?AFMVWYA#'o������SKF@=:856:k����_56314>IRVJB;80# -:QWF1& !%.OJ0.''KYO;%!$&'-4J\p|�zz{vqppjkjf\el\RZYUUSMKOQOE73FBDIIGA>BUWISftz{tfB:982-:Sa����������|���Ŀ�������y�����������bGUOC:756899;@BA@<Cfz~~|{urpporte\is�����������������������������������������������������������������������������������������������������������������������������������������������������-`���������������������������������ͽ�������������������a!)EQTRNJB<830.-+*&&%##!!"##"##""$4u������������������{sqhlobW���������������������Ŀ�����w���������������������a 5m`Tqy�����ysw������������A(/7K��������������ϸZ"K������['Ol^H4*,0//,'  $**'(+)A������������������������������������dOwF+$ %$#$$-784U�̹f$KW'"/f���������������������������������������e'6WM-#!!!Hwc6''! !!$)&5��ȹ������������{{���ϧ8#w���ý��ǽ��Ƥ�����������1e���ý���������������������z~����}��������������������������ڪ223GfjppP-NmgQ5*5GPSWWTH& ((&'+>w��<'%&&":lyw������wqi���������������������Q���EDcM</'%*,*('&('(*-*M��_;<977:Xgc`[F0]�����pn����������������������{]~����������������������wKp������������ž������������Ž��������������E_����������������zn����������������������p78=KMDGIHBC=Xly���]_j�����������������������¼�}�����~|ytfVKEEILPY[S?)%.?LG?921340/*%%&),4:=@=AGOUWYH2($$ +y������ĝTPGBA<975:g����U24205>GOTGA<7/! .>OTB/%!"-VcB0.%#:NW=% 
+!$%'.5G]p���vtvtprnfZWMGW\^__ZSPQTRG2)38AGHFB<:=BEZivjne@988/-<Re����������q~������|vtpqt����������w\JTNB<8679::;?@@@>Fgx�~|yvvropndahu������������������������������������������������������������������������������������������������������������������������������������������������������*`�������������������������������������´������������´�b.FTVRLHB<710/,)*)(%"#  !#!"#"$&':n������������������zttvxscV���������������������Ŀ�����v������þ�������������_1[VOm{����zlhox����������W$%/9J��������������ʫtW0H������Z%Lf^G4*,111,&! #)(%**-G���ÿ�������������������������������\X�t@("  !#'%$%&-673Z�˸_%U;#0l���������������������������������������c"7\K*# ! Ox]+$%"!$(&5��ɼ������ι�id������͢1%}���ź��������ƾ���|}���2j���º���������������������mq��qFk��}uzzyqz�������������������֠,6En����F:q{w_8)6GMPRPRD" )(&*.R��o3'%'&!>q~xuwqtk��ng���������������������R���FIdO=0%%)+*'%&()(,/+X��a;<3127ame^R=2`�����nr����������������������|\����������������������qIv������������ƿ������������ļ��������������Fj����������������{q��������������Ὣ�����l35=JJDGHGDEAIp���a]ps�����������������������½�y�����|}zqfVIDDGKQWWM;'%/>KF>9213200+&''*-4:>??@GNTZYH6012010-*%""#5�������ɗNLHC?<875=e����Q33215?GPPB=;5+ />SXA,$ !&GC0.,$.T[H4$
+#&().7Haq����|yuttutpi^PAQ[[^abaac^XN@+"+6?GGD@9:9;>Lc�hdyY7:;80,>Sdy�����|��|p}�yu{ywtrwups|���������xaOSK@:88888:>=@B@>Kk}����~{ywtoni``iu������������������������������������������������������������������������������������������������������������������������������������������������������-a���������ʿ����������������������������°�������������[.EVWQNHA<71/.+))(&%#%#!""!""!$*&<p�����������������yvuntqdZ���������������������ÿ�����r���������������������Y2_ZXjx���~C1.;@>:77430**(!&,:J���������������ĸ�Ca������V%Gf`I2(,111+& %*)&)*.O������������������������������������WS|m?(# !%'$"!$-685b�ƺU'-$4x��������ɿ�����������������������������\!;dG*$ U�Z'$$ ##&(%9��˾������̵���������Ɯ($��������hw��������yZe���{5p��ÿ����������������������s�Ŧ~t���wgot}{��������������������י#0B]aVZ[7:\_NE1)5CHIGHH7 "*)'(-?mx].&&'$#Aw�yx}�����tha��������������������|U�=OeQ>.&'*+*'&((*))-+Z��c96///@otk^F;7`����mo����������������������y[~����������������������nFw������������Ż������������û��������������Ei����������������wo��������������۱������h68?OHAFHEDFD:Qvu~}��n�����������������������½�v�����}|zobRFDDEGLTRI7$%.?LD=920320/+'&(+.5:>?@AGOTXYB2-0139>?>?BCEB>92(""9�������ȐPKFC=:677@q����G22325@FONC<83+"0ATXA,# !:cI0*#-KK@6$"%'*/5K`u����}xpruvsqlaOBNSY_`dloq\MA8(!*8@DGDA;9::?RrZXiW;78891,9Qc|�����~��zr�����������������������bNQK>966888:<<@C@@Pp������~|ytmdY\blv������������������������������������������������������������������������������������������������������������������������������������������������������/_�������������ó�f����������������ʳ����������������²�W,DVXSNIA<841/-+)'%$%$$!"$#%$!#)(;p������������������yvsly|fY����������������������������p���������������������W7cXTet�ǨX%"!" $,:Q�����������θ����X)Q������U#Jg\F2(*-1-($ "&*)')*,J������������������������������������\U|o?(# $&# $,862h�ƵR%8���������ǿ�����������������������������[=XA)"!Y�P$%#!%%')&F��ɮ��������ɾ�������× (����û�|�����van������s2t��������������������������s��������]8Wv����������������������Ӕ$5:VqppvD9QLIP.(3@DFDC@2"*(')/Nv�c.%%&$&Dz�u|������xi`��������������������v[���9M`Q?-$%(**'(('()(-*&g��b81,/4\z}v\<;4a����|or����������������������xb�����������������������nIy������������ż������������û�������������ľ�<b����������������us��������������٬��н��j4;BMJCJLDCEA[������of��������������������������y�����||{o^PGECDEJOKA.!$-AMF>7003331,(&'+07;?@@CIOVYZB201.17=BFLXalqqmea\O9+;R�������ŎQJDA=9579Bc���yJ42314>BIJA;71*"1ATX=-$#6v�F0*"7STM4#"%()-4H`u�~�{squvrpolcNELSX^cfjj`KD<3)#+;BDFDB;987<SL7;=5588::3.8Q[{��������yz�����������������������}^PPJ=977789::<BD??Rr~�����~{sgQXZclv������������������������������������������������������������������������������������������������������������������������������������������������������-]������������òa�������������������������ȵ�����������X.GVWSKHA=740.+**'&%###"$$!"$"%-.>z������������������yzvu�gY���������������������������|m���������������������W9cXObhxvZ, $-9S�����������G,62($ -\�����R"I^ZF3)+..+'"!!&*'&)*)D�������ÿ���������������������������^!]�o@%!$%""'/868m�ɳM  $4���������ǿ����������������������ÿ�����X:M>)# \zO)#"!%#%( I��;����������Ǳ�gl��Ƒ.����į�����ǿ�UY�����½�p/w��������������������������r�������oE>Ur���������������������Ҋ%7O����i1:TQ\K.'2?CDBA>.#*'').P��k/$%&%'Gww{u�����haY��������������������uY���:MWM9.'$)+*&(()*)*/+#.p��f5-+2Ez��~Z<?6c����{nz����������������������w\�����������������������lL������������Ž������������û�������������ü�;b����������������wx��������������ض���ƭ�d4:BLHCHJGCD@W~ocejipm��������������������������y������p]RIB>AEHE?5&$-BLF?8004210,)''*17:@A@CGOR[YD765369=AFITbo|�����}qnk}�������ćJHD?:869;Fx���tE23203=AHG?;72)$3DWV=+#&:zm8.+" ;ZUH1!	#&((+8H_sz~�{vvvsqsplcQGLSVZ`a\VNEC<2($-?FIHDA=;9:;><=957668<=2.;R]y��������z�����������Ǻ�����������}]QQJ?:789=@ABAAC?>Qu~�������|ufTZ^alw������������������������������������������������������������������������������������������������������������������������������������������������������/^�����������ȯtd���������������������ͻ����������������V-FUWTKGA<541.,*)('%$"$$%%#"$$$./E�������������������|yqnzt^W���������������������������yh�����ÿ�������������W<bUL`]ND<, $,9Q���������F"$('  "!-h����O#I\XB2)*.-+("'*&%((!D������������������������������������Y%_�o>% $&"!'.666r�ɮR)JJ<!%?���������ǿ����������������������þ�����U;O<+$  "b|L%#"!$#')#S��˰�������ɯ�|v�����Ň1����ð������������������j/y������~�������������������i������zG@HRgx���������������������ρ69NH?gW0=UlcD+(2>CAAA>,$*'%).U��`*$$&''Gw|v}�����~_WX��������������������zV���,K[H3,&%)+*(((()(*/-(:z��t>118a���x^RL<h����{n{����������������������wZ�����������������������jF������������ź������������º�������������ºHj����������������xp��������������׹��ǿ��b89BMGEIIHEDCEdt{��~rn��������������������������|�������pbULC>@DC:1+$ &/>KG=6013221.)((+16:=>ACHOU[WD;964:?AEJMUao|�������������������IHC>;988:@u����U120.3=CKE=851)&5GZU;+"';i�F1*! 5PUP2 	 &)),7D[pz�}xtutsvuomdQCLRVX^^XROLE>1(%1ALOMFA;:;;<=CT[A6669;;1/:N\q�������z�����������Ȼ������������_QQJA;89>HLHME?A>@Uv���������ynb]_\kz������������������������������������������������������������������������������������������������������������������������������������������������������-e����������Ɵfr��������tz��������������í����������ȿ��T+HWXTMGA;82/.-*)*)&$#$$$$#!$$'12G������������������zunntgZX���������������������Ŀ����xo�����Ÿ��������������O=fRM^ZMF9* !$.;R��������ܑ6,).+ ""?x���ME_SC3)*..-("&)%$%&7������������������������������������B#d�n@'"%$!!&/344s�˫NFob<%E���������ǿ����������������������¾�����NCU=("%ixG$$!!##&( Y��£������Ǡn`|�������3���ȷ���x������Ƕ�������^3{�¿�����������������������a���j��kSnljknx���������������������w%4<TI{�V8=UeT?#(6?BBBAA,'-'%'0\��r.%%&'(L{~yzx����uWQ[��������������������tW���+DXD4-''+-))('()')-,*F{��x9/2;Yrusk]RF3e����xly����������������������w^�����������������������hF�������������Ź���������������������������úy:i����������������yr��������������Ԫ������^69BOGDJJFCDAf����ljdm��������������������������~�������tb[OE?AD?6./)#)0ALD:6013230-(''+06;>>@CKQVYTB;974<DGJNQ]gpy����������������̻KG@;;:98<I�����O030.4?DHA8540&%6GXT:*"%R�s5.' =SSG/  	$)),7D_q{�}xtrsqsvsnkaSHQUTT[`_`\UK?4.)6ERRNHB<;;<@Id}x=567<<92-:L\u�������}z�����������Ǻ������������`QQG=99<I[YWUF>@?B\x��������{qh`_doz������������������������������������������������������������������������������������������������������������������������������������������������������,`���������Ȫ~���������q^}���������������˽���������ȿ��S,FWYTLE@<61--+))'''&$##&$""$#(51H���������������~~{trz||t[Y���������������������þ����ro����ƾ���������������R=_TNXUJD7) !'/=U��������ֆ3+*Adge`1*Fh����L$JbYB3*)//-(" ')#%''3Y�����������������������������~vlddeS#%e�h;' !$&$!!#-574t�ʡE6;2*H���������ǿ����������������������¾�����K!FT9$!! *cmB'%!!##''#c����������ǰ����������z3���ɾ�������������þ����]6��������������������������b��pW�{bv���wot���������������������h%3?Kp�dW59\lT=()8ACCBA?,&-(''.g�|>"$%&$"My}yxr����^PTb��������������������oW���1J[?1+&(+-*(%(*)((+)!2_��[113=K\dd^ULE:h����uj�����������������������ua�����������������������eG�������������ĸ������������µ�������������ºw;k����������������ww��������������ө�����}Y79@SGCHHDBD<Rm^Wineaq����������������������¿��|��������zj_ND@ADA705/$'2CNC87235230+''(,17=@@AFLOU[U?:<97?FHMSYblsz����������������ǴyPC=;:<;8;P����pE1///3?DI@4340%'7MYS9(!!Ac:,,''-;7) 	 !#'),6G\qwxwwwvqnopnmj`WOQTSPR[eikh^SJ>,3EQTQMD?<:<@Ox�|A255:;92-8N[s�������|z~����������Ȼ�����������}\QPF>99?QedbZF@A@B]y���������}wmd]foz������������������������������������������������������������������������������������������������������������������������������������������������������/a���������ɹ�����������n{�����������������ç�������Ǿ��J/HWWTMF?93.-,+*)'&'&#"%%#""$$(0+P��������������{wtujny~|rWZ���������������������ý����mk����¸���������������O>_ULWUIA5'%0?T��������ь7)\��ĺ�4T������F%M_TB3+*..,& #((#&*/>o�����������������������}~��zzqtomw�vG(eza8& "&'$"#'/54;{�ˠ=4P@$)I����������������������������������������E$FO3% " (^n>($" #'(!g����������������������m3������������«����������\6���������������������������j��nh~gc����|gr���������������������a)3B_�fOP,FjUB7)):CDDBA<+(,'&&+,+&#&#&%$#Et{wz�˰{[QPMa��������������������g\���.N\=1*&()++'&)*+**,+&*Rp{m]SR\jospmc_[Mp����ti�����������������������qa�����������������������bN�������������Ź������������µ�������������öuCn����������������rz��������������Ϙ������X38DOECGHFBD=Qy����geq��������������������������~��������zn^MC?ADA62>2%'4ENC:7222131+''(,/6;???EKRW_S=8:88AGJOU\fpw~����������������İnQF=<:;:88G|���q:0,-.5?HNB4560#(6NZR6& #&)-.(()%	
+"%)*,5H\mtwxxyvojjkmle`WOQVVST]jx��{ukM36DNVUPG?89=EQy��J8777:80+;PZr�������}y}����������Ⱥ�����������y]NSH>99AVkjcSCAA@D`z����������zm^\fm|������������������������������������������������������������������������������������������������������������������������������������������������������)g�������������ÿ�����������������������ƿ�ȷ�������Ǿ��J/GUWSLG@;40--+')('&$#%&$#!##$,3/P������������������ysx~�nXR���������������������������ol��������������������H;]TO[VLA4&! $0>P��������՝J/���ɺ�*]������D,H_SB2)+-.-(!#'(''*.8k��������ľ��������������������������Q,d{f8$! "%%#!#'/657�Ơ43MF% +U���������������������������������½�����@#DJ1$ !-gs>$#""#)($p�����ù���������Ƽ����b8�����������ò���~�{GS���S3���������������������������d��iW``s����vdp���������������������T,1<YN=kO/JD-;>),;DDGKD:'(+&$&()RPC(%'$#$Gv}w{�~_VPOOJa��������������������g^���-TU;2+&(*-,*)+,+*+.,*@i����zz�����}yumXq����qf�����������������������le�����������������������YO�������������ķ���������������������������öl9l����������������r~��������������Ѿ��˽��Y5<GPHEGIGDA>p��yob_q�����������������������������������yn[LC=?D?40;1$'4HO?:5121020,('(+06<?>@GNSY]Q<996:CGJNW_gqz�����������������ǪjOH@><;769E���n80,,.5?GMC8780#'7MZO4& ")./' %# &&(*.6G^jrxywwsmgfhjgbaRKT[\\]cq�����}Z75COVYRJC;>@K[��uM<778872-:MUl�������qmz����������ȹ�����������}]MQH@:;@SgbZPECB>Gcy����������yma`fl�������������������������������������������������������������������������������������������������������������������������������������������������������-h�������������ü���������������������Ǿ������������Ǿ��K-FXXWLE@:40.-+)(&%%##%$"$$#$&.2-M������������������������s`W���������������������¿����on����·��������������H@bRN^UKA5%!!&1<S��������۱yT���Ƴ�]�������D,JaTC3*,/--)"#'''%%)/R�����������������������������������n&+e{`2(!"$#!!"&054:��ƚ)7[E,`���������ž����������������������������x;'NO2#!! 5kr8$# !#*)){��³������͹����������V;��ż������������ǣec�º�L>���������������������������^��UEm~y����wlv�������������������ػM*2EOI�P2;>:B4%.>FHP_aF*&+'&(2k�vR#%%$"!Jz{wtug[WSQMFa��������������������ha���+QM<0,((*/0,++,,*+--*Ft������������~|mIs����pg�����������������������le�����������������������YQ�������������ö���������������������������òi?w��������������q��������������������ø�Z6;EQHEGIFEB>Ybcfz�gct�����������������������������������yp[PF=>@?3-50%*5FI?93/2123/,('(+17<@?CFNSX]O<989>DFGNX^fp{�����������������ŧkVJA=;:76<Y���zJ50//05@ILB9<:/#(9Q_K4%#(--%&%"!',),'%$(.4A`kuwvwvpidbdfggaXQXbbdhgix�����[58HTXZULECBBJSr�tD<999632.7JSb�yy�e^Zs����������ȹ�����������yUKPF>9:@OVQPJCBB@Fc}����������{ncbft�������������������������������������������������������������������������������������������������������������������������������������������������������.c�����������Ƕ�������������������������������������Ǿ��L0IYXRKF>964//-+('&%$%$#$"#$%(-5-Bz����������������������tbb���������������������ÿ����mo���ļ���������������GDcPQ_VG>3% '0@]����������ľ��������������@*KZRA2,-/-,'#!$))&'%#)E�����������~~����xpmlfkrtqoha_WSZ[D .jt\2'! "$" ##'378A��Ǒ(<Y9/f���������������������������������������r7%QI.$!  >sn6%""!$*(%��Ӽ������˿������gc���P=��������������ȼ�\������HA���������x����������������~]��gz�pj����rr{�������������������ϴH*7GP��WQ4>BB>-'/?ENt�t=(&+&'*1Jy�Q%$%%$$N{{wrlf]XRNKLe��������������������da��~!PO<1,))+.0,+-,-,+,+,Is������������|xnJx����sj�����������������������jc�����������������������WV������������õ���������������������������Ĭ_D~���������������s��������������������ƹ�T7:GQGFIJDACCa����rYft�����������������������������������zp\PF>=@@0+0,%*4BG?81.22240,(')+29>A@AGMPYYL:789?CDENX_gr{�������������������m[G>:;857<Z�d6842/.//4?HMC<?;/"*;N`I1"!',-$!(' %&(/:DQ\^]YVM?38@^ltuxsrma\acdimdZUXagjlifj{���yS+6GRX[XNFCDGLPe�~B;995442,7KNSYQLGMJFSXu����������ǹ����������vUPQH=69;?CBC?>ABAIh}����������qfbgt������������������������������������������������������������������������������������������������������������������������������������������������������2a������������cZXk����������������������������������ƽ��H0KWWQKF@<62/.+*'''%"!# ""#$#'.5.^�����������������������~tcc���������������������þ����np��ƾ�����������������FEbPQ_UG?1$ &0=\�����������V6&4N�������B(L\Q@3)+.,,(# %*($&'&*>y�������yrplkihpoocZSQJQUWYaehfdgkkV%2jvW0%"!$$##$,695H��ō# BQ0"2f���������������������������������������q5(NI.%";mf2$"#"#*&)��Կ�������Ƽ��Ǳzk��βH>�������������ģfe��ȿ���CG��������������������������s^�����k�����wr~�������������������ȵA-4Kg�TZS09KTU=(0>GX��S7&)*&%)7r�z<&&&%%"O{yuore\VMEDGf��������������������ab��y&PR92+&)--,**+,-,+.*&An������������}zeAx����td�����������������������nc�����������������������WV�������������ö���������������������������­_O}������~������o��������������������ĸ�P:;JNDFGIFFFBr�psr_Rhx�����������������������������������{n]QE>=A>/&($#*5CH>83.03221,)(*,2:=@AAEOPXZJ737;>@@EOY_gr{�����������������ǦjVE>::655:Ng9.22//.//5?IL@9?:.*<T[F.! &+*" &%$'.7Nb{������{L7CZlryrlihb`bcchi_ZV\dikf]W\|���fA*2EQWZWPJFGHKVx�|F9763552-7HLKGEEFHJJOWu����������Ʒ�����������vVROF=6667:::<>AA@Jo����������tfbju������������������������������������������������������������������������������������������������������������������������������������������������������.g���������ɭ���������������������������������������ƽ��E-JVYTKFA;62//,+)))%""!  "#$$(174n���������¼�������������v\^���������������������������hu��ĸ����������������AGdLLZQF=1$%0?V����������-*#! ;s������|>'HVQ@0'+.,+%!$((%&)-9e����������{zwvtqmeaYZ]]psxz|yuwyz{]6m|W*#"  #$#"&,591G��Ɋ#4Q9$0g���������ÿ����������������������������q4+OJ.$!:ja1$""""('-���Ƴ���������Ьsu���ȬBC��������������Ad��������9D��������������������������sp���������������������������������ͭ7,/7@87UK/<ipS.(1>GRfnfA$+(#&)-NkvL(%'%""QwxunjbYQF@>Ab��������������������af��q,RU;3+'(+,+*++,.+)+)(Cr������������~h=7|����qk�����������������������lf����������������������QX�������������·���������������������������ì`J}������~~�����|h��������������������õ�N59IODDIJGHD>ELw��o^hz����������������������¾�}���������zm_RF??@;, !'5EF=92/33122+&%'-38;@@AGOVZV>646:>?@HQY^epz�����������������ǥaPB<97443453/120.-./17@JH86:7,*<R\B- !%++# %%$$'0B]w�������V5F\ltrjgfe`]`a_dfaZU_nj`UMHTp��hL;)/APU\WOHDFJGWq��G9:63550-9HMIGCCFFHJK]t����������Ƕ�����������v\QTF<96789<<@>@?>Gq������������sbamv�������������������������������������������������������������������������������������������������������������������������������������������������������+_���������Ǽ���������������������������������������Ż��J.HWWSMF>9630.*+*)'$"$$""##$%(19/k���������þ�������������vW\���������������������Ŀ����`p���������������v����~>D]LGTQD;.!!%0>Y���������G%**&"$C������z:*HWQ?0(-/,*% "''$&*+3f������������������������������{zxvrQ9psQ,#!  """!%-791N��6Ha(#3k���������������������������������������n-/TD+$">p^3$$  "'%4���ŽŲ�����ҽ�e�����ˢ8B��������~����`\{~rmq����7H��������������������������gp�������������������������������Ӥ72/5F?QnO1Q_?5,(2@FSy�j;#)*&%*5m�w?#$%# #V|{vne`XQF?;=e��������������������^f��i'OS<5,())++))+--++-)&Er������������jB:>�����um����������������������ed�����������������������P\��������������������������ľ��������������èSK��������������zh��������������������Ĵ�Q7>HNBCFFGCC<V����m`d|�������������������������~���������zo`SHA@?:)'6DE<82/21020*'&).4:<>>BFPSVQB667;???FPX]clu����������������ǡ]GC<963322430020-,/-09DNK;784* /@RXA+ "&+* !%%!$ '8Om�������R2E[knjca`[ZVW]`cc^VS]haLDEIR^_^XL8',;LT[WMEBEEH]|�{I:;8553-+:GHFFBEHKPOQ_v����������ƶ�����������uYQQC=9778899;?@@@Ip������������rbcku�������������������������������������������������������������������������������������������������������������������������������������������������������/f��������������������������������������������������Ƽ��F/KZVRKF=:51/,*))'%%!$$""#$%$&-52i������������������������yV\���������������������Ŀ����em��º���������������}c1F^OHTOC:. &3>\��������݇0'3ZL%OQ/p�����x70LXO;0(-/.,'!!&&$&&(,Z��������������������������z|vjhmlfC:woJ.$!#&" "&.9:/S��ȑ8(OR/#2v���������������������������������������h*.MA)"!EqX0%# "$(&4��ϻ�������̧a`������Ĥ/M�������l_��������������3R��������������������������]q��f`idW|��sYjw�������������������՜-/9^uw�P.NTTS:&2?CQ{�f<" +)'%)/VnnD%%%$$W{vqje]YSG=7>g��������������������]n��g*WS?7.')*+*((+++**+(?l�����������~YB>9�����po���������������}������ej�������������}���������Pc��������������������������ľ��������������¨SU���������������|b��������������������ö�L9=IMDADFCDEGx}E`zx`j~�������������������������v���������|tfTIDDB:& )7BA<71/10020,)(+.4:=>>BHQWXXE868<?>>FNU\bmu}����������������ƜWHF;8621/0121221/--.1>RdW97:7*!0?SX>*" (*)""(##$$2Hh�������Q1J[imf`]YZ[VQX```ZSPWZPIDBHNOTXTI7(*:KSUSIBBBBD\���G8=>:94-+9HKIIFGO_eeirz����������Ŷ�����������rROOB<877779<>?C@=Pt������������qddnz�������������������������������������������������������������������������������������������������������������������������������������������������������/h���������Ƚ���������������������������������������ź��C2MZWRJF>9410-*)'$"#!!#!#$%#',55l������������������������y^]���������������������½����gq������������������rl[1FaRHXM@8,!%1AW���������x1C��tl�2a�����v3.FQL;,',1/)& !'%$%$&*Y������������������������||}{zvqkhf^=7hjH.# #%# !$.681[��Ƅ 'YW+"2{��������������������������������Ŀ�����c$0L?("PsQ,%$ !$)$2��л������ͱq`z��|��¤'M������~n���������������z5S��������������������������av��T?VVVy��bM[x�������������������ט"1;dfiobJ2Tij\6'7GHd�zG8!)'''):l�y?&%'$!VvzrleVPG=?@Dk��������������������^q��c+VS@7/*+,*(()++*)*,'!4^����������wVF?:�����on������������������t��~�gg������������rVm�������|Me������������ſ������������ž��������������ƣQW����~��������wi��������������������´�H:=IMCBFHCCELeRMs�lYk��������������������������{���������|pf[OIFE:)!)8EA;92.01021+''*.5:>?@AFW^d]B578:>>?HPW\gsz����������������ĚWKD<8411//1214220.--4Qny^77=9+"0BUX>,!"**-* "&#$&"1Dg��}��~U7F\fmb]]^`^[X[]^]XOOUYVND@@EJQVSJ7&+:HSSPFA?<=@]��sK:;=:96-(6GKJIISk��}|y����������Ŷ�����������iPLKC;8777::;>?BA?Wt�����������}ocfo|�������������������������������������������������������������������������������������������������������������������������������������������������������+i���������ƥ���������������������������������������ļ��@2N^VPKD@9530-+(%%$""#" ""#%$&0?>q������������������������{`]���������������������½����\l���Ⱥ������m������xui/?^LFSH>8,&.>Z���������m0rŸj��V'e�����t4*FNM<.,.10*% #&%%%%&+Y���������������������������}{uojhf\=<koK+# #$#!#/7:5]�ſo(GR6$%<~���������������������������������������c$5I<(!MpN-&#""$''@��ҽ������̶���������ʜ!I�����d`����������������o.Y����������������÷��������t|��RG\^Qb}oJNa{�������������������Β#3177'9`E0Oe\Q0'7DEXsmI7))&&)8ll9'&%#"Zzxrk^SOLLKKLo��������������������[o��R1QQ>7/+,.-**)+,-,++&,Tz������~{|{tgcVH�����np������������������y����bh������������z^��������zLg������������ž������������Ŀ��������������ĝIT�����~~�����vi��������������������ñ�F9=HIAAEDBBDBVSNuo^\h��������������������������x���������}qh[QIFE;( *7D@;6201102/)''(.5<>@?ESgpoeC67:==<BLU[`is}�����������������ŘVJF=9520.025;?<500..5Uz}]:<D:,!"/C[Z8)" #&?DF9+#&"(..%$5Jh���~}��U2D\cgcbaefba[WZYXULKU_`[KA>>FSXUK7%*;HOPME@=;:ATw�c@;;<:85,&7DJLIJg�����z{����������Ĵ�����������kTPNC:69889:;=@AADXv�����������~m_eq|�������������������������������������������������������������������������������������������������������������������������������������������������������)f���������˼���������������������������������������ú��? 4PYWQJC?8540-,)&'$#$$$""#""#&.94o���������¾��������������`_���������������������½����[p��̹������kp������~v]$EcKJPIA8+%.?`���������p9�ƴaB;%-w�����t1+OSN=/+-..*$"  ')'%$(2Z��������������������~}y{|{wuxslmqnf@AtlC)$!!"##!"/575\�ȿi)>YC&%=��������������������������������ÿ�����a$2H9("XqK+#" #!%*%@���������������������͑L����zTsvv��������������g5i�����������������Ǹ�������p~âCJf_KPhPDYi��������������������Ȏ$32>DAFF(3FCFA1*6@Cd�oH3 !)(&'+Av�s4%%$"'`{vqg]WQQPOPMo��������������������Qv��\/VR>5/*,/.**)+-/+)*$$Ky������}zzzwwrW@����kr��������nx�������}ty��dl������������{p��������vLn������������ſ������������Ŀ����������������JZ����~�~~����wi��������������������ò�G7?NLBAEFDBD=Ddiq{vbe��������������������������x���������}qcZNIGE:& *7A?;70/2233.*&'(.4:=@AIgx��mE99<>?<AMV\akr{�����������������RLE<820--16JONN;20//5Qou[@AI8-$$1D\T9'!!*Myq]<+$(&$.DDE+$+=Qj�����U7GXbdbdhhedf^YYYYTNLUdbYPGCDJSXRH3&.>HOOKB<:=:>L\yjI<:9:97.+9EHIKQh���{zpu����������ĳ����������gMOLA9678::;<=@@BDYx�����������~m]fq|�������������������������������������������������������������������������������������������������������������������������������������������������������1k����������˳��������������������������������������ù��>4LXVOID>:6220-('&%&""##!"$$#)*,&l������������������������[]���������������������¾����\q��İ��~���u�������pP"LkJIOJ@5( )1?]���������7�öY!'J������s/3T`\>/*...*#  "'''&#(7^��������������������~||xx{|}zxuutrj=EpoH,%!!"$$%1596e���Z2OP=#D���������������������������������¿�����Z5I7'!#dvP+#! #!&("L���������������������ˉ N�����������������������g2q������������������­������g|��Tblh[USFQel��������������������Ƀ*3A`VQJC/+9?HM3+8@GUtgH2!+('''E���7&%$ %a~xoa[TQPPQPMs��������������������O}��^/XP<3/+-/.**,+,.,+-&Dm������}{zzysbM����~nv������������������zy��di��������������������vJm������������ſ������������þ��������������ÜH`����~~~}���pg��������������������ð�B9ANJDCGHFBBJu����w_h����������������������¿��|���������|re\QQID9# (9D@95/.23330,(&(-5:>@DPaw��mD899<<<DNV[bkqy����������������½�TKB<630-/5NjTYZ?31-,6OglZADC6-!$2FYV9$"&A���c@)%*),G\ZT..9IYr�������W4G_febdgd[cg`][[[WPNUgh_WQNSWWSK@.(3S]ZVJ><:;:AO~�XC?=;?<8,)8GJIHLc�weedcs����������ò����������gONJ@:998;;;>@ACBB_z�����������|icis�������������������������������������������������������������������������������������������������������������������������������������������������������,g���������ɠf^d_gq���������������������������������¸��;2MVTOFA@;42//,(''(&$$$#"#%&')%$n����������������������}sX_���������������������������[j�����������������vUL[FGNH<4*!(2Ad��������ˑ7|��[;�������p/0YlR=.'---*#!#(&'($&2d�����������������������|����yxwto]9SyrA*$""##"(1563h�˶T%@JT.%B��������������������������������ſ������T6G:'!(bpE-#!""%'#W���������������������łT�������z���������������[0v��ϵ��������������˸�{��Ǫar��lelslfZ[hlm���������������������u(4-1]s}m32Q[aS+(9BHa�xI/!)'%(/U��e/%$%!*dyypa\]VSRQMMw��������������������P|��J5VH;2,,.---*,,-,+,1$>jx}�������~}}zgP����nt������������}~�����{���\h�����������w����������uMn������������ƿ������������º����������������AX���}~~��~�����|uo�����������������������G:>RLB@CIEC@G}elvu_i��������������������������t����������ug[QKEC7#!(:D>85.-1232.)''*/49=?BO`r|_=678;;;CNV]cjqy�����������������QJB;640,/9eiC_a:0/..5H_eU<=>9.%4J[S3$#)[�}�cC&"-63-.WipT-,<Pe{�������]3G[fhgec`cddaa^\\]SMQY[ZSNMOPOD;5,9^vuxeJ:99;=@ITWOA;>><93++;HKLGHZcLMFN_s����������ı����������}dSPJ@;899:9<>@ACAEc{�����������{ecgp}�������������������������������������������������������������������������������������������������������������������������������������������������������+h��������ѽ����������������������������������������¹�33NWVOID?96300,+)((%#!!!#$#&%&$-s����������������������}zoZ]���������������������������\n��ò�����t�������~pZ?FXKGJC95*'3;i��������ةq���hLt��������p+/NZK:/*++,*#  &'$$&%&.d�����������������������������|xrkhW4W~mF)"$%%#!'064;t�ɻN#L]=&I����������������������������������������Q!?I5'!"Q_@+%!"$)"[����������������������zS����z�|Ut��������������X:|�����~������������Ѿ�w��ƨaz��UQgy{wsusol���������������������k'4.Y���R(7OGGE,*;GLr�x@+"*(%%0FzyW'%%%!%c{xurykTWUQQX~��������������������R���N4SH;0*+-/..,,+,--01#>lrqry��������}eT����|pz���������~usjy��x����Vq����������vo�~|�������tMu�����������������������������������������¾�GY���~|}~������}to�����������������������B;@OLABDDBEBD=1&[�nam��������������������������v���������}tgYPKGB5!",<B=95--1244/(''+27=>ACOdp{}[7249:;=DNV]agnz���������~�������ĉLJA9521-0=g]NgR201.-4@T[P??D>-'6G[T0#$9l_rl[3("?KHKF7/ZqbE&$0Iew�������f5D`fnmdcdk`_egc_^[TGCGEFAACEF?83.*:j���jG;67;?DLaibQ@>:><6-)<FKHFEHGEJKNZv����������õ�����������bPON?9989::==>>DEEb�����������{lcju�������������������������������������������������������������������������������������������������������������������������������������������������������/l�������������ÿ�����������������������������������¸�~83GSVMHC=753/-+()*%##  &!#%(&"%s�����������������������{kY]���������������������¾����Zf��Ʊ�����s����{ia[ZO;SdIFIB<4( (2Bc���������«�Ʊ�����������h)5NTK=/*,,+)#$'%%%%'/j�����������������������������|smhbZ8WxmH)"#$$#")1713u�ʲH'F���������������������������������½�����K:D2$!!MaB)! # %&"`��������������������ƺo X����szjz���������������S:~���Ɲ{�������������ǣn��¡`��hMbksy{zwmX���������������������`+:U��O-"*)0=NF)*9EIo�`C.$*'').KlyZ-&&%#&d}yv~�yg\XUTW~�������������������~S���K4QH91)&*.-/-++*+/10&DysNHg�������vaL����|oy��������}i{vgiow}o���`s����������xx�{}�������lIq������������Ľ������������º�������������þ�@b����}|}|~����}qp����������������������~A:CQF>AEEC@@?Gju}�y]l����������������������¾��t���������zqfWMIF@6!",=D<74//20020)&(-38<@DMam��}Y6148879CLTZ\boz��������}���������RH?8320--6XQQT92.0--3>JRM@BOB+'5GXJ1#!65)MH3,( B]ZS\M3+MJI/'Gi�������a5I[enjdcea^]dfd_^[O945788:GPI?41-,3]��c@;799?E`wt^EA?>@>:1+7EIJFBCFGIKP[s����������ĵ����������|aJJE:667789=?BDHM\t������������jcjv��������������������������������������������������������������������������������������������������������������������������������������������������������-h�����������������������������������������������������z=3LXTNIC<6430-)'&(&$  ! ##!&''$)l����¿�����������������yiI`���������������������¾����[n��ů������~uvi\Yid_ib1!ZdFEJB<4' *1?g���������NJFI@;:7H������g+6LUMA1+,.+'##'%%$$#0p�������������������z|}���|xxsfdcbc]1QynB(" #&%#")165<z�ˮE$F����������������������������������������L<@3& )]hB+"!#!&%#f��������������������ùbX���~l]^���������������{D7����țv�������������ˣl��J{�Ydf_r|trrl_W���������������������V17D<AVV5(8V_eI+*:CKbqeG-%*&()0P~W$$&%"+i|yy��}ha\[YZ�������������������|P���A2RF6/*(-,,---,,/352( PvbSt������|wpaX����{ky�����������xiepw{rs���}`t����������|�����������pKx������������Ļ������������¸����������������@e����~~~}~���ss����������������������A=AMLA@CFDCCL}����n`i����������������������¾��{���������vnbUKGGD8""->B=:4//20/1/*&(-38<?CMUnw{nG63487:>FOTV\cmy��������}}���������PK@:63/-..7@;3121/./5>IRMBCL@%(5JUC,!"  $)+'CUNQ[B,%413#=Qm����F:HYflfc_^]]_ddd]^YJ7,/259ALSK;40-('AVbcN@8255=DPQPLDGJJKKD:/=KKKGEDGDHJPZt��������������������}oLMQF<8;;;=>@EJSdnv�������������~gelv��������������������������������������������������������������������������������������������������������������������������������������������������������)g�����������������������������������������������������y8"9NVWQMC>832/.*('&%$""" "$$!"&($&r�����������������������|iLd���������������������½����Ha��î�����zejh^gv~����(QaGELF:3$ (4Bj��������h#$%#  ""A������e)4MVOA0+.,)(#"&%###)v����������������{|wuuwxtltvtnledeU-!Wwc;'"$&$$%*283<�ʬ?%P���������Ǽ�����������������������������C?G1% )Zb@(! !"&%"p��������������������ŷZ*Y���~he{���������������qA6����Ǘz��������ź���Ŝe����M��DoxvoRQ^ikz�������������������ؽL029l��iH,>[`R>&,;FOjzk?*#)''(,K��v1%$"2exvwunz{a]]UO��������������������tO���?6SC7.))+,-,+*)+2365)#1_ng������}zzvqo`V����yl|������������qmpntlrxz�{]r���������������������kBz������������ĺ������������¹�������������ľ�Ih���~~���}���mu����������������������|::CKLBAEFDDALziZ]lobk��������������������������q��������|pgYQIGGC4 "->A=:4/.0/12.*')-28;>DHZmkqd?3/236<@FORT[dmy����������������й}PK@762.,-///0/010-./6=HXVF@B1 '9LRC+ !$(,'@T;HL9)$$")0AMK9:E[hieaZ\^Z^]\b_]YJ7.0237BIG>320,&&0?IG=3123@FHHLKNTSVSQK;/BMKKICBDEHJLXl�����������ymqxthkljqoqopmeb^_ZY^ceir|����������������zbdmv��������������������������������������������������������������������������������������������������������������������������������������������������������*d�����������������������������������������������������x.4MXZRJFA;520,+)'&$"!#! ##"#% #o�����������������������~nRb���������������������ý����J_��ƴ���������������q:P`JIMG;4% *4>k���������l+!$%$'D������a&6KXRA0,/,,'#  $'%$$ (p����������������~||vrnptpwzvrokaa^N$"[|a8)# $$##%(255<��Σ7+X���������ǿ����������������������������~;$EL/$ -Y`:%"#"!&%m��������������������˶R(Y|���������������������l@8����Ēx�������ų����Ɛ_����]�gE|���~rmv���������������������ٵG/4PwTRXD5:HJJ<)0=DKfx^:(%)&'(5y��Z&'$ 1gvwt�oa[VZ��������������������rX���=7UF5/+(*--+*(*/4345+'<ax��������~ztpm]Z����wk}������������~|tnzpk^q�~\z��������������~�������kN~������������ƹ���������������������������ſ�?j���~}~����~�~}s{�������������������ʿ�y><CLH?AEDBC?;Wqy��mbj��������������������������x��������th]TMGCB>1$/=@<97301254.+*+/8<>CHWlpzqD0-.149<CKPT]hny����������������Ϲ|RK<3210/.-.../00/.-.6<G[Z=42)'7JVC*#)+%)I24C@5! #'+/029>N[ehg]N[abcage][VK9012346;8551.+)&18AHA=/.19ELPNORTTZYQVPDBQUPVNHICEJOVZe|������zruxxw{vqqx}~���������������������������������xdhoy��������������������������������������������������������������������������������������������������������������������������������������������������������-c�����������ÿ������������������������������������ƿ��y2!<S^YNKFA:40-)*(&%#$!!!!#!"#!$$!+p�����������������������zmQ\���������������������¼���}Rk��ʸ��ξ���¸��kQH]Z:"U[IHPE:/#)4?o��������~a_ZTJ<* A������\)3OXPA1,--+'# !%%%#&h���������������|xvsnfc`\^bdf^MHPOM>$^r\6(""$##"%*185@��̢4&U���������ƽ����������������������������z:"@B.$ .W\:&$$$"%%"t��������������������ʲO$\~���������������������pA@���ͼ�u�������²������d����gztu����������{�������������������׳B39HG/2AE0=F@Y8)0>ERv{_:'%(''(/W��a'%$# -l|vy�ε�v^YW[��������������������pS���75UE4.)(*-/-,+.23475+!/i���������~xvro^a����|m|������|����~yv}pf`��yZz�������������}}�������hK~������������ĺ������������µ�������������ſ�Gi����~}������}}kw�������������������ʾ�v;;EQI@BEC?CDY����kR`j��������������������������x�������~obXMIC@A<0$1@@;7AD52>D<36:15C?FMLZnt��nD1/-..17>EIPZdlv����������������ʴtJE9320//-,-.,.///..-19AIB/,-('8OS<* %*)#1,+0+($%#%'***,-/239?I\dkh]Yaceiih`[YSI9.24234355747<BJLMRQMJ=@@IRW\TY_bed``hkomkkknlkiefipy����sriqqp|���������������������������������������������v_fjz��������������������������������������������������������������������������������������������������������������������������������������������������������.f����������ž�������������������������������������ſ��t18S\YOJE?852,)*&&&"#" !$#!"#$$&"'p����������������������yiS`��������������������������~Qe��������̾�pth]\[ajgK$NVIGMA7-"!)5Ap����������������Z;u������[)4LURA/-.-*$ #%##"0o��������ustz|ysome\VTQRXX[XKIGLNPA%(jy\7%" ##!"$+285A��ɚ0*]���������Ǿ���������������������ÿ�����u7#<<-"3]X:)##$"%#"|��������������������ɩ?,e����~����������������s:@��������������Ĵ������s����j���xvw�����tv�������������������د<59I^oBXJ(>Ol_8*4AGIl�a<'&)&&&5|��S'$%"9s~xy���t_UWT^��������������������oW���3:QC4/))-00/--123697)@����������{zzvdb����vf|���������twxvru�tz���zX}����������������������gR�������������º���������������������������ý�Oj����~������~}mz�������������������ʾ�v97CQI=CFEACDLXhgX^abj�������������������������x�������~pcWLGCBB?1!&1C>9D_O7?ZU>8NE3;A=NOGYu���nC,-.--.048=BKXbo|���������������ǫpMC6221.-+,./.-.//0-,17=?4*),()8LO=* "&**"#%" ,=EDD@@?A@99;9<@GZflidbbbdfhcYWSLC=?BCCDIGLNTX[bjmlkihlmnjkrxxxw}~���������������������������������������������������������������������������tfhmy��������������������������������������������������������������������������������������������������������������������������������������������������������/j��������Ѿ�l`VXRa������������������������������������s/ 7QYXQJC=831/-')'&%#"!!!#"$$#%#'t����������������������}whP`��������������������������{?b�������ŵ�ZbnpjllnldM$IXDEI>4, #+3@p��������������ӯX.k������W$5KUO>-+-,+% $&#" 4v����������|{|}}zsohb\^[^feg`XSSUWQA (lv\3&"##!!#)252J��̕&!.]���������ƾ���������������������¿�����r6%><)!6ZT6% ##"%!"���������������������ȧ:/n����������������������u7A���Ǻ��������ƹ�t�����{��������������������������������������Ө336_�dMU4*AXZD1+6EGQp�X8&')$%&2\`N*$$&%$-W��vxupnkd\UQY��������������������nY���-8SA50+*-030/.237;;8*+]���������xwzzr`b����vf�����������zyto��u|����tY���������}���|~����|���^F�������������¹���������������������������¹};j���}|||~�Ī��}|n��������������������Ƚ�s7;ESJ@GIFBB?Aq�rvtgal���������������������ÿ���}�������|rgXPNLJJB2"%5??<HO43?NC5:G3079<DEJh~���r>320002343478AOYgu��������������ĦpG>3011.-,-./--.0./-,29=<3-)(' +:PQ7&! $()! !#$#('2CU]^bcca_ZWSLHJMP]gklied`^\YSK@LUW]adbhjkoqv{������������������������������������������������������������������������������������������������t^isz��������������������������������������������������������������������������������������������������������������������������������������������������������,g��������ѽ�������������������������������������������n+8OXVQKD>72/.+((&%%$"!" "%$##$!&r���������������������|{mP^���������������������»����Hf�������ê{Xovvtrrttrd*"SWFEF;1) (4Cw���������������e$!K������Y'9PZM;-,-,+'   $&"!.q���������|yy|z||ywmheefhikg`\UTYQM>-ryW2$  #$ #*383O��̒ 1b��������������������������������¾�����p4'@6) 8__:&!"$"%"&���������������¼����¤/,w����������������������w8E���ı�����������Nr����z������������������z�������������������ؠ,5/163,,()2=1/')7GDV~zT8%()"%''&!!%%$%(/O���vxuuspj`WM_��������������������oW���2=S@50-*-130/047<?<9*%X�������~yvqrwrnQd����xj�����������zvi}�se|����tX������������x}����y���_P�����������������������������������������ùz?p���}{}���̡��~~|n~�������������������Ⱦ�o=>DLICBGGBB@\��gxyg\j���������������������������������zrk`ZWSRQJ8#%7@A;<7.245310+)-26;;?Hd���uL@>?=>>>?==@>;=@GXlz������������ījG<3/220//-,-,,-/..--15>>7.+-*" ->OT9%$()%#&/2532149EMTZbikpprrtuqpniea^ajnqpokmke_]ciotx�������������������������������������������������������������������������������������������������������������tdiq{��������������������������������������������������������������������������������������������������������������������������������������������������������,g��������������¾���������������������������������ÿ��r&!:OXUPKC=740-,*(#$$$###!"#"!##"o���������������������{whRc��������������������������Nw������ȿ��txy{x}����z-"M[GED;2& )4Bx��������]7172!!$F������U&9R\K6,)-.,& !$$%X��������riliippoge[[`a_]_\YZVKMPLN=(gjH/# "&#!"*372Z��Ŋ 1j���������ľ����������������������������m,'<6( AeR5$"!! "$�����������������������(4x�����������������ȿ���q7I������������þ�uEt��������p7XMG?363/1666Aj�������������������Ә-7,*')(''++)$''*9DDSvsQ7!)&"#$#%&%%#%'&1S��wwuwuqkdZQZ��������������������lX���.CQ?51.(*/10.05;@><5&D�����~urpkjig_=l����vm��������vm_]iw�l[v�z���u[�������������mvx�������[L���}��������������������������������������¹wDq���||~|������~}{e��������������������Ǽ�n@8DMICBDDDC>]fq���[_o���������������������¿��{�������}wql_\YXWUK:"$5A>:62000220*('+069<<AOgx��kQKLLLKKPPPQRRPTURTT`gq����������ɫ[B941441,.0/.///1010032;>8200,"#)4APN7(!! !"%)*,+8ELVSUTT`mqotwtvuy||}�}}}}}{����������������������������������������������������������������������������������������������������������������������������r`jq|��������������������������������������������������������������������������������������������������������������������������������������������������������,h�������������������������������������������������¾��i)&>RXVPMC=84//,)*&%%$"##!!#!#%%"!o���������������������~|xgPb���������������������������c���������������������v.)S[BFC91'"+6Gx��������S,)+& !&([������S':PYI5++//,%!%$!([�������{tsnmpqpngighjihhjggea]^[^XC0beK1# #&"!!$,673X��Ɂ#2n���������������������������������������g*+=5(!CaJ/#!!  +����������������������� 5w������������������Ƴ��k-T���������������SAz��������j4\TIC8:132268Dq�������������������ѓ&>INPVUMFAAGKMRRCCGTx�X7('$##"&'$$#&')2Y��}vvwuusph\W_��������������������k]���+AN>3/,)+01.+.5<@?;3$:z��}usqjjiebbc]Kl����vn�����vh}}cUco�r{������q[������������yksw�������\L������������������������������������������÷v=s���}~���Ţz��ye��������������������ǽ�j6<HQJDDGEDB:Y����_S_m�������������������ÿ����v�������zsvmhh`^YUC"%5==:73.01230+((+18:<:9<JUbj[UTYZW[__abceginqutvzywz���������ğ\E;:6856041578669=?CGFDKHA@@;?@HLPR[P?515388>@BM\fknrpsuu}~���������������������������������������������������������������������������������������������������������������������������������������������������pejq��������������������������������������������������������������������������������������������������������������������������������������������������������)f��������о�g__UTo��������������������������������½��h')>PXVPKD=741.,))%&%" "!! !##" $r���������������������}|ygLb��������������������������}b���������������������n'&SSDDA:1(%,6?w��������P,%))"&?�������N$9MSL5-.0,*$!$#"&Uty|�|wrsvqmosuxvqpqoomtwrqkfghjgbI4dgP-% "$! !%-872W���$4q���������������������������������������e$-<3%!G_K0$#!'���������������������g3~������������������į��d,R��������������h8I���������gQnf\PNQ=79;;@Q��������������������υ6i������uikt���gACJc�|C4*)%%%%&'$%%&&)6V��zturqyz{qbRb��������������������fd��x0@L;2-**,-.,*+47;;83#6q��]KVabdc`ab`[Im����pl���������hkvw~zz�������t_�������������tk}�������WS������������Ŀ����������������������������·pBt���~}����~}~xg��������������������ǽ�j8=KVJDFHDBC>diHGGIS_l�������������������������u��������}�wsqhbba@!(7?>;730/0120*)))17:;70-*;LTV`cdfgjknorrux{~�������������������h]TPQOKMMKORSWZ]`efghdigdbfggnssqqlnj_^`dekomrux��������������������������������������������������������������������������������������������������������������������������������������������������������������nekt���������������������������������������������������������������������������������������������������������������������������������������������������������.j��������Ͽ�������������������������������������������i%#:UWUOJD?83.-,()&$%"!""! !  #j���������������������}|{hIa��������������������������|^���������������������i#&JODCB90&"+6C|�������޶�|spkhj��������J";SYL5--.+("###"!Jdjmnlljfcb^_ehmlgegijhhmqqliilnmkhM9ouH*$ "####&,677`���x"3s���������������������������������������d$,<3& JbJ-#!! )����������r�����������N>|����������������������_+V��ſ�~����¾��pJX���������su~rgo�}FAEA>Vu���������������������|:�������rmw����_@DGThL<3 *'#$&'&..%$%'(5\��vvu�����h[c��������������������de��r.>H91-+)+-.-+)045575!9t�x^W]eca__ef^VEq����nk������������|xoYs��v���qa�������������ww��������SU�����������Ŀ����������������������������µlGx���{{��|~~}|{|te��������������������ƻ�g8;NXJBHIDA?>F>4@DLRZm�������������������������q��������~|xujb[\R:&*:B@<95222221,*+.6:;;5.,/4FL\kloptusvyz|�����������������������}vwvsqpuxrq{zz{~}||}|������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~o^mw���������������������������������������������������������������������������������������������������������������������������������������������������������/k�������������ƿ��������������������������������������f#%:PWUOKH?820/,*'$$%###"!!""! !%k���������������������}{x`?e��������������������Ŀ����tX������ľ�������������d$(LL>A@80&"*7D~����������������þ�������H)@UXG5,,+,(###"!%Kcilomg\XUTNT]`a[WX[\[^cgknehkkihhfICvh?*""$%$"$.586f���q 6}���������������������������������������a&.>1$ GYC.#  %io`l}~{}yu����������Q?������������������ž���_/[��ü����������Zl���������Rd�{ht~WEPTIJdw���������������������q?������rcdjz�zlA=CCDD@=1 *&%&&)1Yn8!#%,:Z��zw{�����mbi��������������������cb��k.BH74.+(+---+(+02256"G��}fffgdca`if]XIq����pi������������}zqc�r}���m[�����������������������VV������������þ����������������������������ĳg?v��~}||{||||}||}wd��������������������Ƽ�d4<OWNHGGB@C>;98=DIM]p�������������������������q�����z|}vwqoiWNGIE9)"3EIGA>97779532214:?>=;8;<AJZlstwxyyxyz}~���������������������������~~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~|}{|�������������~l_mv���������������������������������������������������������������������������������������������������������������������������������������������������������1g�����������������������������������������������������e&<RYVOJD@:320,*()&&%%#! ! ""!!!j��������������������~{ukY>h��������������������þ����nEy�����»�������������c#(NRA?=6+"".8D��������������������������B+>T[E2)*-,*$"%"!!!$:k����~tj`RSY`ed`XUVSV[`_dfdc_\\ZX^`A@nlI)!$&%#$-584j���o"3~��������������������������������¿�����Z3B0# J]F-# !Zo}��yux}�����ng������E"F���������������ƾ������W,a��������������vBZ����~����x�ͯpNS]q����hX���������������������h<ilohe[MMJMOPPI;>ECEED>1 *&$%'-]��g!#$+8b��~v}��w~��jhr��������������������\a��l-CK;5/*'+-.,*)*/0166%M���ljddfd`cjhccRu����ph������������~tq{mj����j^���������������������TX������������¾����������������������������űe@|��||}|z{|||}}}~xi��������������������ƻ�`7>OWOJHECAA>;98<GMP_n�������������������������r����zxxvsnidUF@?A@7&'?NPNIDEC@>;==<9;:??@DEVahmqx|}~~}}|{|~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{zxzyuttuuostvy}�������������|khmt���������������������������������������������������������������������������������������������������������������������������������������������������������0g���������ż������������������������������������������d!!9OWTQJD=83/.-+'$&&"""!!!"#!"&i��������������������|xphYFf��������������������ľ����f9l����ľ��������������\(NOA>=5*!#03G����������������Ŀ��������G&>SVC0+--.*$  %%"#'*7g���������zwvsqof`TNNEFSTXQJMSOQQTYR/Dmi@'$%#""%-479n�̼e$7����������������������������������������V3:.# !I[A)"""gv}{viehmswpqdi}�����w2$G��������������ʼ�������Q1b��������������`<Ei���{������N;J\����pBX���������������������[,QY]ZQQNUROPRTK5<DCEDB?0)'$$'*T� %%(9c��|u������~kjy��������������������Zh��l-GJ;60+*+-/,+*+..041!Q��heaadc^ajke_Dw����qg��������~��quuynb]y����me����������������y����~L\������������þ������������½��������������ëhM|��|z}}|{|~||~~vg��������������������ź�\2=PXNIIGCB>@<:8;CHJ\m�������������������������o����}xxsmkdQFE?A@@408HKTXWTPMRRNIHMLMOVWZ]ahny�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zmnoponrtuusvyyz��������������zeemw���������������������������������������������������������������������������������������������������������������������������������������������������������0j���������ö������������������������������������������`"#<SYVQMD>83/,,*'%%$$"!!  !""!!$h��������������������}xpiYDi��������������������ž����c4`����¼��������������^*QM@?<2) &16I�������������������������G(@OQB1-..,)$   %&#%',;���������ļ�������}pj__c]YXZ[][ZYXWK+Cj`@( ###""'/88;t�ϹX$A����������������������������������������Q5<.$ &PW?)!!!Zili_WTX\dmtxroy����yX$$D�������������ͼ�������~M5b�������������|U:5Sx�����~a��{0Hd`T���>Gp�������������������ٿV9aTXTHCA@::<<8.':CEDCC?. )'%%(,c�ד%$*;d��zx|�����jqh{��������������������Vl��f3HI=71,+,//,,-/-.040V��w`_ZY_^[bkiYM>y����rk���������{d]dm|xpr~����l\���������������}t{����}L_��~���������ÿ������������·��������������ɮ]K��|}||||~{|}~�vl��������������������ķ�\8<SWLHHFDC@?=;6=BCDWn�������������������������v����|xppm^WY[[WWZekquronkjijllnlnmoptz�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|qruvuvwy{yy{~~~��������������{jgoy���������������������������������������������������������������������������������������������������������������������������������������������������������-g���������ƿ������������������������������������������`$$=RWUOKC<82.,+*(%$$"!   "!!!f��������������������~zumZ?m��������������������ľ����b6W��������������������P)JI=>;3) %08H�������������������������~B'?OL?1,+/-*#!$&$$')8~���������Ŀ����������vicadeb_[ZWSOD+HjbA+ !$%$(0757w�˷S(M���������ɽ�����������������������������M18,!(TX?*#"C[d\PHHKMS^gmheloqnfY=%L�������������Ǭ�������~K;d�����~�������xS3/Bi��|���z]��l1m�hC��@c��������������������ֻO+6&%))%&$%')&&%+;DDC@B;*#*((')5���|$%,<h��wxx|�{ywrpm}��������������������Sp��d2DF<60+-.02/,.1../1-O��ua[TUZ[_glcJ>Bz����jm���������~ulgn|}yw�����i`���������������vu}����xGb�������������������������������������������V\���~~{||}}}}~�vo�������������������ʿ��W3@UTJHGECA?=?:;?CABUe�����������������¿�����������������{z|zy{~�������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}|yxyxy|||}}~~}}}������������������������������������������������������zssuyy{||}{}~~����������������{iipz���������������������������������������������������������������������������������������������������������������������������������������������������������/k��������Ͽ�wspa`m�������������������������������¿���^%:RYVMHC<730,()('$$""  "!! !"d��������������������||qZFm��������������������ľ����d:[�������������������wI*LK@?91( "-7L�������������������������|?&=RM>1,,-.,$#%# #%%4y���������������������snlhijgbZRONF;(TrnH) "!""#'184;}�ζL)G���������ȿ����������������������������~I4:)!,WY>("!#B\`VIBAA@IT_hcfdb`YQG2(L����~�������Ⱦ��������zI6f�����~�������jJ50<\�xz���ib��a1|�UP��Hs|�������������������ֶF01((''&&'(###%#-=DCDDA:)#(&'(+<���: $#+?m��uwttvssttqp|��������������������Os��b3FE:40-0110/+.02/.1-W���g^WUY^`iocG?C����pi����������}ytx~��������ki���������������zp}����tHe�����������������������������ļ����������fPZo����~}}|}~}}����sm�������������������¹��TAIVRHFIE@@A@B8?FELPfz������������½���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|{xvrrrsrrruwy{}}}~}�������������������������������������������������������~xxx||�~~}�����������������zgks{���������������������������������������������������������������������������������������������������������������������������������������������������������,g��������Ϻ��������������������������������������ý���[';RXUMJD<73/,*))%&%! !      e��������������������~|yoZDi��������������������ľ����`6^������������������~sH.WM@A80*"$-7M�������������������������{=*=MM?1*+,-*$!&$""%*6��������������������zqnhgjjfbUNJGA8'!T{uA$  "#"!"&1846z�˶I )A���������Ⱦ����������������������������{H58("1Z`<%!G`bSF?;;CGQ]bV^\WQMD:)*P�����������ư��������z@5c����{�������pYC4.5D^ds{zy^v��xJXT@g���cbc�������������������״>-,'&&((>SW>%%%$-=EEEDC:(!'''*E�ȽX"$$,7j��yuuuussrspiz��������������������Mv��_5IC:50,/00..+.12/.2,"`���gZPRSWVksfI7C����jg������������xp~��������gh��������������{|u|����tEc��~�������������������������������������nPZj|���}}|{||~�����vm�����������������¼����jdeic[TXXZZ]```eot|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~{zx{ywutupoljgccgimrtw{~�����������������������������������������������������������������~x{|~~����������������������zaiq{���������������������������������������������������������������������������������������������������������������������������������������������������������.f�������������¿���������������������������������ÿ���['=VZWQJE=630-+('%$$!#$! g��������������������~{wmWDi��������������������¼����^;_����������������{vwoI2JF@?8/'#/6I�������������������������z>+@OO=1)*,+*&!!'%#"%);���������������������{snegklidZOJHD<*%Xuf6&"#$%%"&2845~�̰K /K���������Ǿ����������������������������}E#77)!4\W5# BUVH?:9;=ER[VMVURNLE=-*Y�����������ƾ��������{:9\~�����������s\D3-/8Phyvtub��ќ_CE_����hE]������������˺�����֮2-,&'),^���d<$%&/@IGHGG;*#((&2n��= $$$%)6e�yvwuvusttol|��������������������J}��Z4IC94.-.221.-0411/1("a��|f[NIKNaxvhRLR����jj������������wm��py����dj�������������z��������^8j��xce����������������������������������kPVj{�������~�������}����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~{zwvtvsssqnheecbZ[eda`\RRZ_hqy|�������������������������������������������������������������������z{��������������������������vejr~���������������������������������������������������������������������������������������������������������������������������������������������������������1i�����������������������������������������������������Z%?VZXSJC=830-+)%&#!" ""! !!! c��������������������zxslZNm��������������������¼����^5a��������������||tt~|M-GD??8-&%09I�������������������������v;)BTO@1*+-,,&!%$!#$)A}��������������������~wsppppnh^NRRKA'%TmY7&"#&*+'*3829��˫E,H*7=%"+K���������ǿ���������������������ÿ�����y?"21'!/KH1  5@><:78=AJQTHIQSUTURD,-Z�����������ŵ�������y~s><c���������~ubK<2--2AXiie^M��сNDAM`jjW39j��������ν��ż�����Ԥ-3*')-Q����qM"$%1EPT\OUY5%)%$(+(#!%#$$#',?kyxwwutvuutol{�������������������L���U2D@73-+/32101233223*#a���smd^btw|ysf^R�����kq������������zp{zo{����ag��������������������}nZ]��uct�����������������{xwplib`]YTKRRPQPQW^fx�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yqpolnnomijkjjfb[W[\RBP\a_]XMNX_lt{��������������������������������������������������������������������z|��������������������������qblr���������������������������������������������������������������������������������������������������������������������������������������������������������1l���������ƿ�������������������������������������ÿ���V)?WZVQIC=83/-*('&$#"""""!! !g�������������������|qigm]Jn��������������������»����^2c��������������������N,JF=>5,%%1;K�������������������������u;'=ML?/+-,-*$#'$!"$7t���������������������~yxurmjeb^b\O=$#Oj^6# !);>1-471B��Σ?Fzrd|_#%,P���������Ƚ���������������������¿�����y7!61% .OJ/ !9><<<8:?AKGHBAO]Z]b`G*(\�����������ª�������un=:`uspght|{qmgTJ?:1-*-6DQVSH3c�|0A]ohPJGHRl���������ϻ�ȫ������՞*3)&,5y�����X%$%3Tiriewd3&($$%&&%#%###$%-;c{zuvutvuutnf|�������������������M���V3D=62.,.00132214654*'d����zzoiv~~wfPC�����oy����������������������{Ng���������}zyxxxwvtxwzy~����vcpyz|zyqnokjgeeaec\a_`_gfhnjmnmpuvz�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}|~�������~qedfegjllkjkkjc]XX[ZQIW`ba^VNNWalt{���������������������������������������������������������������������}|~���������������������������rilt���������������������������������������������������������������������������������������������������������������������������������������������������������/j��������Ѿ��������������������������������������þ���R+BTZUOKB=:41-,))'"""#"!!!  !e������������������wicguq]Cm��������������������������Y1c�������������������u>.HG:;3+$$07M����������������Ŀ�������q;/=OL:+(,-.*# %(# " .s��������������������}wqnmkffgiok[L< "VoZ1" !'8XY6.26.@��Ϣ8Iq`p�a&1Z���������ƻ���������������������þ�����w;#81$/ON- !!! :<<>>::<>DGD=BR^[Y__]/-a�������������������sj<9Ub\W[gnj`[ZOC=;82++,/8DHGA;SdC<\������������������ʫ���������ӛ$3*&,:������?$#&8X]_WY\E'')$####$$$$##$$,@dyxuvstuuturi�������������������{I���T4G<52.+)+-040/13543*#K�{g[NJKd}��oT=I��������������������������~|ul~��qpsqolgmrssruy{}~��������s^^fklnwwxz~�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��}}|zzzz|~~{||||}���������jceddehkkjkkkic]UU[WNHYac`]TLOXbnx~���������������������������������������������������������������������}����������������������������rhjv����������������������������������������������������������������������������������������������������������������������������������������������������������0g���������ǻ��no��������������������������������������T(@TZVPJB<731-,*(('$#"!!    "g���������������~yy����tZDo��������������������»����U6c������������������}o9.EC9:4,%$.8N�������������������������s1+ANK9+(,..*$$&$##,m������������������{pdZZ__[[_a[YVII?#SmW0#")EeV.+272D��̟1<`]w�Y&/\���������ž���������������������¾�����q6%3/#4YQ+ ""!$:8=BA<9;@@D?:@PWRR_v\"2c�������������q�����}~f-7PTJQgg_UPQL@<8881-2327>CB<68PY]y������������������ɻ���������Ж"5*',9��y|�f!#!&8FHIIFD7"&($!##$%##$""$%,@hxwxxwvuvurod~�������������������}N���R3I=45/('*,3;3.775378/9_xo[=56Fn��|fXQw�����~r����������~zvtsmjnjov{���~zxzyxzxx|{||~��~��~��}���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�|zxxzvyywuwwxxuvyxwwvwy{}}{}}~}��������~ndcdddhiklkllhb]SVXXMJ\dba\ROPXcmz}����������������������������������������������������������������������~����������������������������ognw����������������������������������������������������������������������������������������������������������������������������������������������������������*e���������ʮgt{��������������������������������������P(BSYUOKC;630,*(''%$#!! ! ! !g�����������������������t`Mq��������������������������T6d�����������������}zg:+DC;;2+%!,:N���������������ſ��������p3*@LE7+',.,)$&(%""*d�����������������|sf]YXYWQUYOGCDB93!VqU.#!%+D]G'+584J��˖*%Z�qy�P &1a���������ý����������������������������n0'1.#6WD)!"';3?EC><:<<:12:FGEPZP6"3g�����������po}���{|{k+5INQcfYNHIG?;9874.5@<67;=<8-7v�oUigX���ofhs��������Ǿ�տ������΋'5(')-Okp~e* #"'8EFEFDB6 &($""#&##%$#"$%.>nxxyvuurwvsol��������������������zN���Q7SJBBB?89=PdLFPPMQWZZez~xj\^cjv{~}xt}��������|yssloifeg`cfbbcdhnsw|~~}|z{|z}{������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|{}|}||zwxtwxwwvuwvwvtsqrtuussuuvstwwuuwxy{}{}}~}~��������~kdbcefghkkljkic\UXXVLLXbc`[ROPWdox}��������������������������������������������������������������������������������������������������mhow����������������������������������������������������������������������������������������������������������������������������������������������������������0g���������Ĳ�Ŀ���������������������������������������K*EWXUOMD<740,)'&'$"""!##!!  &e�����������������������}dJs��������������������������V3d����������������{wj:+IE=:3*$ );M����ٷ���������{qh_UTm���m6*@MD6*&,/,'#  '&#!$)`���������������wqg[SOJJFGIKA<83A=8!"RkQ-# '2QdF*-681N��͖$"i�jko8%(6h���������ý����������������������������k,%3."4R@' " ,C?DJKID>><;<;EORQA@D=%1k����r�������znm������za(2ITY]UJBBD@9446411;B95679884m�ƙd7@R�¹�BC~��������������������%5*%)'0=L<'"$%$'9EFDED@4,'#"!#%#"%%%#&'0Dlzywwtvwvtsog��������������������tN���ccwoefea_[d{rgoooty������������������������������zwxpqrvwxuqptz|~~}������}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~|{z}yvwzzzzxxwxyvwtsxwxwvuvvvwvutsuuqsssttutsuuwvuxyzzz{||~~~��������{jdcdefghjjkijid]YYXWNL_eca[PLPYfqx~��������������������������������������������������������������������������������������������������}lilx����������������������������������������������������������������������������������������������������������������������������������������������������������-j�����������������������������������������������������I.DSWSNJC<740-*&%!#$""!"! %g������������������������g>s��������������������������P,e�����������������|yi3,LH>81)% *9O����τJg`E+*#%)**':?a���h0(>JD6)&*-+("$$!!!%]������������xxqf\SNNKB?ENR]e`aellX0#SiQ-# (9T]I<5483U��ɍ##i}dhi<#%)3k���������������������������������������h+'4/$9O?$ ! !<UMMONOSV\\_\Q\ijaV[_X*0p���qu�������|w�������xZ$2HQQMEB>?=87445454::5123689I��دS4:Q���w*G���������������������u'3*%%%'&&$$$&%$&8EGFEC@1"!*&$$$%$#$##"#'+5Kpzxwxvtwussmc��������������������sR���~�����}{{~|~��������������������������������|}�����z}������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}�}|yzz{z{{yxxwywuvvuwvvyyxywxvvuuxwwvsuuuvuusstrtsrsrttststuuxwxxxyzzz{|}}~}�������zjdcdccgghjjlmic\VYYVOL^eea\PKRZfry��������������������������������������������������������������������������������������������������|odmx����������������������������������������������������������������������������������������������������������������������������������������������������������*n�����������������������������������������������������D,EVXRMID>63/+'%$"#$""!!  #l������������������������eGp��������������������������O1c�����������������|we23ID<6.)$!)6N����щu����knw~wumdhYh���f5/CKA4*'),+'"$$ "!!W{�������yxsqlgaZTQRQKHQWg������ye.'\nL+#%4IPNTO<673X��̀ ,iw[XY1&'*8u������������������������������ÿ�������f*+4.$ASA+&%! "U_RV]`emppppd\gzyrcbeX&-w���kx���������������yxW%.EIFB=<<<953344587562/0156;W���l%>M`���Z;]���������������������h$5*&#$$$&$$$$$$):JIJGC>1 %5/)'$$##&%%$',5=Tuzxwwvuuuvtok��������������������pX���������������������������������|������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~||{|}}|y{zyyyzywy{xxvxwx{zzvwzxxyxwuwwvvwvwxwuuvuuuxxtuttvwuuutttsqttrusrrrtuustxxxyz|z{{||||��������yhaacbcegihimljc[XZZUML_ddc]SOT]hs{���������������������������������������������������������������������������������������������������{kgoy����������������������������������������������������������������������������������������������������������������������������������������������������������)i���������Ǻ������������������������������������������J*BVWQNHC:53/)(&##$"!    p���������������Ƹ�������dHo�������������������Ŀ�����Q6f����������������{tc54C>;7.($ (7O����ʁd���������zgghTg���e--AJC2('*..'#$$ "![{�������yusomg]YRONNHDIPZlz���~wtra..YcH*!0RX]quO,35/Z��ǀ%PaDOr5#',8v���������������������������������������c%+1,#L`A&%$ "" ]ov���uqsqpribj}u^ZcQ&6|��|k|�������{������umxN!0@D<;9:98532224687530.//247X�ȻZ+S`X�ѥ>Tf�������������������վ`*2(&%%"#%""#""#(>MKIGF@5)?Q=0.+)''+-.,2:EOb|~vwvvrwvvsjf��������������������qP�������������~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~|{~}yzz{yxwywzxwvwywwy{zxvwxyywwxxxxwxwwzyxuwwuvwxxuwusuwwutvvwvsvuuvuttruvutvutsqqrpqqsqqtssttsvwxwzz{}}|{{��������whcbdadggghjkkhe\YYZXKJ`dbaXPMT^hu}���������������������������������������������������������������������������������������������������zigp|����������������������������������������������������������������������������������������������������������������������������������������������������������1l���������ȱ�}lQf�������������������������������������F,CUWTPHB;62.*(%%$%#""!!     )y���������������±�������cDp��������������������������S9f����������������}yvd>6EA:7.)$ +:T����ǃh���{t~�vY^XQK7h���c,/BH?3)(*-/)#!"%"!""%\z������{qgaaa_ZUOONNIKNP\dhdc_fovxe.+V]D)+Fe��c&-58+^���zPpdt`-#",:z�����������������������Ǿ��������������a'+6*!"Qb=%$# !$dy�������}zubW_qyjcqwc45~��wh{�����tt����wdgvH3<<667986332003677531/0/048]�Һ_%Zu�ƴT-]c�������������������ټU+5)$%###$#"!#$#+BLIFEDA65OJ531/./0688>HQTaq�yxwwwxxywts��������������������{f}�����~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~}}}|||}zzyzxxxyywxyywzz{wwxwxxywwvxvuwyxvwxyyxvwxxvwxvvzyywvxvvxwvwwxvwuvvuwxwwvuustuvtttturquttpptstsrtrqstutuvsvvxzyz|y|{|}�������xdbdddeggijjkjhe[WZ[VMOaecaYOKR^it|���������������������������������������������������������������������������������������������������xffq{����������������������������������������������������������������������������������������������������������������������������������������������������������/g������������������������������������������������½���D+FUYSMFA<53.+(%%##""#"      +z��������������ʾ��������[9o�������������������Ŀ����}P3f����������������|wwo@5HC;5.'!)8O����·]bq��rrwe[cZS2,k���a(0DI>4+'+..)! "(&##!$W{����~rhe[TSUTUPGKMHLRPYeinn}�����p++UfB)#2JaA#+8AE3e���w+[H2G-"&/>���������ȿ�����������������º����������`!,3* $OQ7$"!! #\v���������t\Xalsgy��m54w��rc~�����yxph����m[djB09:546665100002666430-..057a�ܸi/f���dD@RX�������������������غN/5*%#$"""#"$&'%1KOLIIDC:1356=@EJJNTUX]chly���z~�����}���������������������|����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{~~}{~|{{}}{}{wxyxxzvxxxzyxzxzyy{wwwxyxwywwywxxywywvwwwwzvvwwxwvvvuwxvwvwwwxwvwwxxuwwuvvvuuvuxvtuvuruvvvtutstuvurtsrprrstsppqqqpstqqpqrtuutwuwwx{zy{z||~�������vhddccdfhiihiihg\XY\XJRbca`XMNS`lu|��������������������������������������������������������������������������������������������������wdioy����������������������������������������������������������������������������������������������������������������������������������������������������������,g���������ο�ysohh�����������������������������������~E%DVXTNIC;53,*)($""  !!  !!!# )o���������˼���Ƕ��������b9n�������������������ľ����~K/e�����������������~�y54E?;7/(#+8V����хG_���������jY3-h���],2FJ?4+)*.+'" $&$## %Ux��|vnid^VWZXXVSUVW\hx�����������n)0VaE+!(:;=>>IJNX`iX2l���r$BPJ8(#'/?���������Ǿ�������������������ý��������W".2(!#MN/! " !*p�������|~vlhiqty��~j- ;p��rf�����vnqk\{���_QX`<#099546322120236665441,..035`�ٯ{QBPK^zfG7R�������������������سK39-/)+'&)()*-1:L^^\`eceghnptuzyz������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�|~�~}{|zxxzwyzwwyuwyxz{y{{zzzyyvxxwyxwxwxwwvwyyxyyxxwwyxxyvyxwzzwvvuvvwxxyvuwvvvvwvuvvwwwxwwwwxvvwwwxwvtwwvwuvwuttuutvsuutuuttsuusrqrrqsrrrrooqpqrqqpprrsssstuxxwyyyyz||������teccdcdghhhgjkid[[[^WLPbab`WONU^jt}���������������������������������������������������������������������������������������������������xlkp|����������������������������������������������������������������������������������������������������������������������������������������������������������.m���������cr}���������������������������������Ŀ���}C'CSYTPKD=73.+($#$%#$!!##  !"## )x������Ż�½���ǯ��������g>o�������������������ľ�����I0j�������������������m*3D<72*%""*7W����Ȁ@O������mRQ6+m���[,0CK@5,(+-+(! &&""$!#Pqz{}}|xulinuuy{xwyx���������������s-3_`@+4_plmggmcderlU4p�о_1q�keU7'&.?���������ǽ�����������������������������T02)"#?C*!  9���~���}yv~zupruv���|l*%Eo{{op�����lhgho����cPRN%"098533100/20248763212/--./.P���yRFBF`j@,Ci�������������������եMMIGEE@EHMPSTZdmw~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}ywvvrsssrtsqtpvrsusvuvuxwtvwvyxwxwvvwywwxxwywyxxyywyyxxxvwuuxuvvvxwvvtwyyyyz{xxxvxyxxwxzyvuuvtvvwxwxxxvwvwxuuwwxwwwvvwvvvwuuwvvttvuvxvvuuuqstussutusssrrsstsrprrqpqoporqnoppqppqqsrrrstuuwwxxxyz{������~qca_abdfhjiihjic][[[ULU`ba]YNKU_ju}���������������������������������������������������������������������������������������������������vhit����������������������������������������������������������������������������������������������������������������������������������������������������������-k��������ξ���Ŀ��������������������������������¼���z;+ETXUPIC<42.+($%$#%$! !! !"## )�������²������Ĭ��������f<p�������������������ÿ�����J:s�������������������_(3C>70+& &.=V����ɔ|utf]c\[SECC@FX����W.0CJB7,),//)%"$%"$&#!Lrvwz|���st������������������������w,7^\>+<jrmdhibbbg`A0u�Ͽ`#`�dz�mJ*%.D���������Ǽ���������������������ÿ������R22("(EB) <�������|{|�}utpqx���}m),Fkpns������q_X\x����lD=.!3:85520211//135656652/-./.',^eQMYccS@00Mnq���������������������puuwxwz}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|zzvtvsqonoigbc^_`_`cb^ca^`a_efdhihkmnrqtssvutvwyyxwwvxxwwwvvxwyxyxwyxxwwxxzxyzvwvvwwwvuwwvwwuwyzyyyyxzwvwwwxxwyxwvwvtvwxwvxvvxuuwxuvvvuuxwuvvvttvuvvuuvuttuwwttsuvuttrsuttttrstrrssspqrooqqpnpnpplommoqpsrssttuwwxyxwyy{~������}oebaaabfhiijjjic\]][TJUbaaYTONT_lv}���������������������������������������������������������������������������������������������������siks����������������������������������������������������������������������������������������������������������������������������������������������������������*h�������������¾�������������������������������������z:-DVYVOFA<62-+(''$$%$    !!#%(y�������Ŭ���������������W:s�������������������ſ�����Q<y������������~����{Q"2@;74,&  ',:X���������ƾ��������������V+1BF>7.+,1.)%"%$"#%"$Hlrsv|��~{z������������������������o&7]W9.<fcUVQTOJC>>>4<z�ͼX3�����xM%)1H���������ǽ���������������������þ������O"35'!'C=*#=������|qfaaiy����n(.C^^Xv���{wlglt}�����zobU5$7<75420030/127:<:97:926325-Geh]SWzsO<=Sqrc����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���������zvppjhbebb_YUVTQLMJJEFFMMRQIGMGMTSVVWXZZY]_b^cdgghinoqqssttstvvwywvvuwwvxwvyvvyvxxwwuvwvwyxxywwwuuvxwvvyxvvwwvwxwxyvwxuvvxxyzwvwxwwwvvxxxywttutuwxuuutttvvtuttrttssttsutuuuwuqrsttsssqsqstttrtropnpopqqqopnmnnnommojompooqrttsuuuwwuuwyz|������}ldca``bfhihikjleYZ]\SJVbc`^TLOWbkw���������������������������������������������������������������������������������������������������rdmv�����������������������������������������������������������������������������������������������������������������������������������������������������������-h����������������������������������������������������w8,CUXTPLD>83.*(&'##"#!!  !"#$%%!%e�������ҳ�~}zwbfn{�����tX<q�������������������Ľ�����T?w�����������{sqw~�~jI!3>;73,'$ (.5Y����������������¼�������T*3DD<70,-1.*$ $#!#$ 'Uqzyy||xyvly�����������������������c#8\Y>*).!%(+(&&',184=�ιSD�����yN$*0F���������ƻ����������������������������N%34& +<7(!   "=u}{����~�r]JCO_s{|{g-BV_x�[J[o��������������q;,AC@>97>>;@A?;DJKKPQNQRVUO^_mxyveWVK>Rhv{z��������������������������������������������������������������������������������������������������������������������������������~~���������������������������������������������������������������������������������������������}{~}||zxxvrrvrvusprooomgZWMLGEJJF@CBCB=BDGGECGHHEKNPPRSUWX[\[ZY[^``bdcfhilmnppqssttutvwvxvvwvvwtwwtwvtwwxyvyuvvxxyxwxuvvvvutwxwvvwxuxyxxwvvxwvvxwzwuwvvwyxvsuvwxvwvssruvuutstvvvvvuusttuttusuvuuswtrprsqqqqpqqrssrqqopqpoooqqronmlmmnmmppomlonppqspprsuvvwwwvy{�����|k_``^]aggihjjkkd\Z\\QMYbaa^TKNWblx���������������������������������������������������������������������������y{����������������������nfmv�����������������������������������������������������������������������������������������������������������������������������������������������������������.i�����������¿���������������������������ſ����������x:,FTWUOKD>62,+)%'&$"""!   !##$&"%h��������Ǳ���shjr�����|wY4q�������������������ý�����I?������������tplpssj`H 28851,*#!(0=\�������������������������T+3EE=92,+0/)#"%%!#"(^qxxxyysnf[pmk|�{x��������w}�������W;XV<'"  $&&&%)-375A��ϴPH�����}B$+0J���������ƺ����������������������������F#41$ )<5) !   C�{zy|ywuoVB?BLVfrpnkL )COg��u�����������������qUY_`b^`chiiklorpsux{~����������gUSRXt��������������������������������������������������������������������������������������������������������������������������������||~~�~~}||{||yyy|~����������������������������������������������������������������������}uwrtxrwvrojknnkmokiomkollnmknmllljmlih`ZRJIJJJFCACA@@=AFFGDBB=BJMNPPRSWVVVYYWZ\\]`bbghhijlnlmspttttuuwvwvvvwvuwvvvsvvwvwwwwwxwwxwwwrututtuwxvvvwwtvuvwwwuvutvuvwuvxutuwwuuvuwxtusrutsusustvuvwvvtuttusrtuvstssssorpppoporqoprsrqppopqnoopooonnlmmmqmnkmmljomopooopqrssuvtutvz�����ykb`_]Z]efhjhjmkg^\\YPMXbba]SLMXdmw��������������������������������������������������������������������������~vv���������������������lgox�����������������������������������������������������������������������������������������������������������������������������������������������������������.d��������������������������������������������ÿ������u8+DTZVLIC;61,*'$$$"##"!!    !#& *r���������η������ô��z}{b:v�������������������ÿ�����M=�����������uoqrmjhbJ!18863+*FM(9G).<320ALG$)0<\�������������������������V,4DG@:2,,--+"!$%!%&!'Uquvz}{ywtz��y{wylt������z~�������|W;[W:$!##%'&(.473=��̰H@����i1%*2O���������Ĺ����������������������������B%3.$+C4& "$ !"#!&y��{qopmkfVI=BLTakk]deUIOW^m�������������������������������������������������������w]S@`���������������������������������������������������������������������������������������������������������������������������������~|z{zzzvuuvvvvvvvzz���������������������������������������������}yyyxywwywpvwusuportompnmlmqnnnkjkkeiljlllkmlllkmmkljjlljkje`ZSNLMKIFDCA?=>>DGJHA=;BGKOQTSSSSUUUTXVXY\]`cdeghkkklnrqqpsuvutuvwvvvvxvvuutvvxuuvwwvwwvwvvwwttutuvuvtvwvvwvwvuuuttqsttvuvxwuvvttsuvwwtuutuvuvvttstuttsuusurqtqssrtspssssrsnnpqoosqptprqponpoppppppnllkllklmkklkikllmmlnonoqrqprsuttvy~����~xjdc`[WZcgiihjlkg]\\ZPN[dba[QKOYcnw�������������������������������������������������������������������������}sm{���������������������~ngmx�����������������������������������������������������������������������������������������������������������������������������������������������������������+g����������������������������������������������������r0/EUYUOI@953,*'&!!"#"  !  ""!*�����������������û���yO4w��������������������������K;~�������������|wrmfV$.87735x��B���]d��K\�|Zi��]&(0>^������������������������}I-4DIB:3,+-2+$"$$!$& .czyz���~�������������������������\9XR3""$!!#&'+274E��ҰB(nutuy@!$*3[���������·����������������������������z>%60$0?8&!%"! !""$'#%%*S���uifbieed]aintuxwwxz~}������������������������������������������������������������w`Fp��������������������������������������������������������������������������������������������������������������������������~|}}{yuuyyxtopmkqtnqstupqw~��������������������������}}�~zuzyswswwupstqsrknnnopprtsrsqppqoopoolmnllqnjmjhlllkkjjjjlmklmklmklmlknkilgggaZTMMLKIDCC@=>@CCFFC>>ADKQTSSSRRTSTUXSWZ^[[`begehgjklmopoststsuwuwvutvvttuutuvsutwutwxyuvutvrsurvvuvsuwssutvutvuttstuuvuuxtvuxusqstwusurtutwvssrsutrsrtqrsqstpspsrropsqrsrpqqqmproonorqpnnqnmmnnomljkkklljijijkhhijkkkmnopqrprrsusuyy����|vfcb]WSWbghfiijje_^^]KP[b`^ZOJRYdmv~�������������������������������������������������������������������������}pp����������������������|mdky�����������������������������������������������������������������������������������������������������������������������������������������������������������,i�������������¿�������������������������������������s3.IW[UMH?:72/*)(##!"!"!"" #""!'�����������������¼����}sS;x��������������������������J@}�������������|{wvsmO!-7783J��ht���n�̖W~��Y���F%)0<b������������������������~L*8FGB;2,+00)$!%$"!$!(Ynsx}�����������������������������S:VI,  " #'-352D��ѫ@4cjV1!$(/_��������ɿ����������������������¿�����y@2<3$  $FVT?9?6.).0*).)/9C\kpkpxz{}||������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|{}|yyxvwwuvtuwvusqomnoliprusrpssuv}~~�}|wvxvtvturpnppqrssptqputpqsppqnnqnppllmlmpqrstprqppppoonnmkmnkmomjnhjkijkjikkkllimmlmnkjkjklijlhihfc[TMMIJFFECBBBAA?CC>=@EORQPQQQSRSTRUVWYZ\^^_bedhhhkkmonqqrrqrtstuttuttutsssuutwvvttwwyxuttssstttvutvvtuutuuuwwtstuuuuussrtxutrtuusuutsstsruruvssssrstsptoqtqsrrsprqrqrprrsonpnooonoopqpmmmmnnmklmkkkkkjkikihhjiihlkklkopnmpptqrtstwx|����}ticb^WRXejhihkllc^\]XNMZ`__[SMQYeqx�������������������������������������������������������������������������}v~����������������������|kjpz�����������������������������������������������������������������������������������������������������������������������������������������������������������-f�������������¾�������������������������������������n+-HV[SLF@9422-))&%"$"#"$ "!#$!&w����������������ÿ������hBw�������������������������zK;y���������ztwroolhmmP!.9:94M�ji��:��O?��T9_T8$ '-6g������������������������{H-9FLB;3+*-,(#"%$!$% $Mjmty|�����������������������������L4H>) %/523D��Ъ>  !#*0_��������ƽ�����������������������������tF+AA2((('((2Mkyutyk]\cdVSTUWfk{�~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yywwwttrqnjkprssqpopnpolmmnonpopqnpptssrtsuuqrtqprllpllpolllnpqnorppsoppqoponoplomlnnlmpmnrqrrnorpopolnlgmlikllkkhjigijikjhjmijjikmkklhhjhihjhjffhaVPMHHFHGGHHGHD>=@@@DLPQSRQSQQRRTUSTVYX[\[]^acdegiilmgnpnqpqrrsprttsutststutuutuutwvuttusptuuutvutvvtstsvtsstrqsutuvvtttuvprqutrssttqsrnqtssssppststssqqrrprqoooqsqorqppppnolmnnkpnmnnmkklkmlllkjkjigghgiiihhhiihhijkmnmmnopprqrtsv|���}{rebcb\Z`dhghhmnkd[[ZVNJZ```[OIOXcnw���������������������������������������������������������������������������������������������������zfkqz�����������������������������������������������������������������������������������������������������������������������������������������������������������.h�������������¿�������������������������������������s+,GWYSMDA940/,)*()'%#" " !!"#%$ )y����������������ù������e7w�������������������������}D7p�����~{zuqruprpjkrlN 07:9313&=1-$"#!(-3i���������������ſ�������wH.5GJ@80+*.+'"$&$""#!1^uu||~~{y~������������������}|wz��v;7H;* "#(177:R��ϨC!"$(12d��������ų���������������������û������t[X\WGCDEHOWforwy��~���{}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������||������~xutnppqrrronnopqqnooprqqqsqopsqroqpqooonoptporspnrqpoonmlmnnjjlmoponppqrqopnoqpmoonnnjlpmmnnnqpqtqnrsppnmmmllllljillkkjghhghlhhkjijhjjgijjghkhhfghfdfibWSNJJGHHKLJNQH?==<>GNSRQQQPQPRSSSSTUUXYXY\^\abdghhjllmoopqppqsqsustsprrqutturrtstrptusssststutuuuuvvurrussrprrsustvtsssttrsrrqrsstsqrqrsruutsqrsrsrtspqqsqpqqpqppoppnonnnonpnnmlmnmmlkkljijjjjihhigfghkjhffhgihfhiklljmnnmqppnqsvw{���~zoedba^_eegigiklke\[XRKJY``^YPKQXcnx~��������������������������������������������������������������������������������������������������xflp|�����������������������������������������������������������������������������������������������������������������������������������������������������������,e�����������������������������������������������Ŀ���n./LZ[UME=750-*&()('#   !!  !#&$#�������������������������f>z�������������������½����wD=o�������}zyz}zyytvtoJ.7860+'$ #(-6h������������������������xC)6BE@71,+/+($#%$"%%/h|{|�{ws|������������������~}���t;+EK=/*%%$$))+.189<CHa����Z9/31/6>@><ALNPu����������rg�������������������������||{wxzztywz|z����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xvvuwsuxtrrsrsrrtsqqtrrsrqnporsnnpomopqqpnoqqnppnqppnppppqqoqqnmmlnnkilkjpomnlmopmjmnpponnmmlllmljlnnqqonpmonoqonnomkllkjjiljiigefffhfffgggihihhhgeggghggiheegbWWUQKJHIIJKLPRNJB>::BEINQOQRRPPRSTSUWXVXWXZZ\a^dddhhklllmlnoopqqrrprrrrrrtsttustustrustsqrssusstttssrrqstvurrttssssussrqqqsssrrrrrtrssstqpqssropqrpprqopppnponppnonooopmmmmmlkmnmklkjjillikliikiiihfhhfhiffhhfggffiikjijmlmonomnnrv{}~~{xohea_a_efeghhikld[[YRJLV^^\UNIQYdny��������������������������������������������������������������������������������������������������whjq|�����������������������������������������������������������������������������������������������������������������������������������������������������������,f�����������������������������������������������Ŀ���n6.GWZWNC;742.)'))&'$!! !  !$'%&��������ʹ��������ru�x���c<{�������������������½����y>=s���������������ysfD.795/''#!(07k������������������������uE*0BE>82,*,*&#!"##$#8hsrvz{}���������������������������u;!2GWUOMGBBCBABFIT[`i{������{qnqmwuuwz}�����������������������������������}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ztwxtuusqsrpsrpturpqrpprqnnpoqqkpqnnqppppppqpqsmorrpmrpnpqoopmlmlklmljkikmnmomlpnonmppnmnmlnmilnkkklnqpqpnqpnqqllmmknkkijklkhiiffeeghdeffhhghhhhihgedggedgebd_[WVVRLJHHIIHLOQXTMJ?;>@DJNPRSTSPSUVUVTVXVWXVW]][abcdfihikjkmnopnprpppqqoqssssrqsrsrtusrtsrrrssqtttsrssqqrsssssstsrsstssqqqppprrrqqrtrrqqooqrqqopnpqopqqoppnmnmllopoonmookllmllklnljlllkjkjjjklkjdiigggggeedgigdgffcghjhimmmmnnnlmnou{~~zunfabaa_dgeggijklc[ZZSIN[]\[ULJOXeoy��������������������������������������������������������������������������������������������������vgjs}�����������������������������������������������������������������������������������������������������������������������������������������������������������+h�������������¿��������������������������������ÿ���u9/GWYULD=842.,()'%&$##  #$&%*�������������½���{sr���`@z�������������������������{<E����������������ytiC.9<83,'% ").9g���������������ÿ�������uC.4IZJ>4+)*-&!#%%"$$2Zkoquz���������������������������nD>FMXY_^\acegkow}����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������vrvvsuupprsrtsrrqoorqnoqqqqonpnmtonnonppppnoppolooplnpomqqoqolkkjlllmlkllkmnnmnmmonllnnmmklmkikkijgkmnnpnlpkmpqlnkjjljjjijlkihfceddgffeedbhghgghgdfgefebdeca`ZYXWTQNKHGHGHHKPVV\RDA>>AEJORPRTRSTRUTRTSVVVUVXY\^\cabffiiikklmllmopqnqpoqsprsrqtrqqtsrurrspqqoqrrrqssopqstrrsrrrprropoqqrroqqtrrqpsrooppmmpopommnoooooonmnnmmmnlmnmpnmlkjkjmkkhjjjjihgjkjiifhjijghgededeededgeededceegghikmlllmmmmouz}}{zwlc````addeffhhjkc[XXRJMZ^\ZSLJPXdox~��������������������������������������������������������������������������������������������������pdjs}�����������������������������������������������������������������������������������������������������������������������������������������������������������-f�������������¿��������������������������������ÿ���x<-FVWNJD?853-,)(('&%%!"$ !"%$(�������������������|v����X@{�������������������������v?C������������������ykG2;=:3,($! +-1m���������������ľ�������wG9OelZL;0.11)&()(),/0&'9Zgd`kt{{z|������������������������uot���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|wruttqloqqokmr����������������������������������ytturtrosqprrqrqppppnnqpnonmnoknqmmmnnnonmlmnonmnnnmpnoooooommllkkkkljgmkknnikomnomlloqoljjlklmljjknpnonmnnmpnmlnlkkkkjghjjiifdbceffddfeeedefdfgeddfgeaceea^\ZYZVTRQKGFFEFFGKKQTJHDA@>>CJNRTUTUUSTUVVTTTRTVVZZXX__adceeehilmllnnpqnooopqrrpqrtsrsurqsqqqnooqrprqoqolqoqtppooqpnpplnnpponmppprqpppqnlonmlmmnommppoommnonnmllnojmmmnlmjmkjjjkmjiiihghjiihigfjihhgeegecccacacfccbdaffefgfgkmmlkkjjlptz|zyytjbbbbabddegfgijkcZWWRIN[]ZYRJKQYdoy~��������������������������������������������������������������������������������������������������mfks�����������������������������������������������������������������������������������������������������������������������������������������������������������*e�������������¾��������������������������������ÿ���vA#/DUVPH@>851-*)'(&$%%!""  ! #'%,�������������������������UCy�������������������ü����xB?~����������������~se@6=;94,(# "()*d���������������½�������v_QVif\QC=7:=87:77@IQQX`hih`efkrtty~�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|wronklicbc^WY]TT_]\[_a`de\abbk����������������������������������{tsqrrorqopqomolmqpomnnlnlllmpmpommmmmlmnnkmonnonkijnmnmlmlmlmjjkjjijhikklnmnlllqokllnpkkkkklkjjjjlmllmjmnooqnjknnlklkjihkihgcdddfecdcbcddeddddccca`dcbdca]XYXVVUWUOJGKGDDCEFHMJGIEEEA?BFKPTRVWWXTSXSSSSPRSSXUYXZ_^addcfhhhjkmkmnnponpoprpppprqqqttrpqrpqqoqqpqqrppppnpnnomlllmnppoommomnmmmppoooomnommnmnmnononmqnlknnnljkmmkmlmlklkkijjjlkhghiijiffgfigffhhgeedfedcbbc`abcbbcdedfffgfifikkjihinqx|{yuohb`cbaabedgehiihbZVUODKX[ZZRKJNXfov|��������������������������������������������������������������������������������������������������phkt������������������������������������������������������������������������������������������������������������������������������������������������������������.c�������������¿��������������������������������Ŀ���wB$1GYURJA;750,**''%%%$!!#" !#'%!5�����������������������~Y@s�������������������������o2:x�������������}yupeT3)7<<<4.*$!!!#$)6[������������������������|usnqhba]`bb`abdlpqx|��������~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���|xvqryvvmjklhfebaX]]\_^Z\_ZX\VWZ[VW\ZX]`aaaaaadbn����������������������������������usropqmqpnqqmqqmprollmmnmlkmmooomnkkmkkmnjmnmjnmjjklkloppnllllkijijkikkkkmmmjkkmklljjnkkllkkmkjjjikmmnnmmmmnmlkkjmlklihgffhgedcabed`dabdcacdcdcabccca`_``\[YYTRVUWRKMLNFBCDFEHKKJHGGFDA@AFMQSWWXXUVVPPRSQUTTXUXZZ^_^cdcceihikkkllmmmppppprsssrorrsrpqrpproooopoqpooponnoqonmjmnmmoonkmmnnmonnmmnnmomlmlkkllmnmnlmnmjlmlllikkjjjjlljikhiiijigfefhhgefggggfefggfeececbbddcbabbabcebadeeffihiijkhgjntyz{xuofb`c`a`bdedffije]VTRIDMVXZYQJIOUcox|~�������������������������������������������������������������������������������������������������nfjt������������������������������������������������������������������������������������������������������������������������������������������������������������-a�����������������������������������������������ľ���zH+0FTUQH@<95-+**(&%$%# ""! !!$%%3�����ý������������������R6k������������������������h;'Lu����}zvoi[QLC?DJMH:2.79@DHE;.*+('"!%)&"#&%%&$$*1:Aev|��������������}}}{~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wogee]M=K[[a_]^ZU[kkgdge`]_c^][\[^\ZY\ZXXZXY[XTWWVX]`_`_^^dc_r����������������������������������vrrpoosolmnmnnojkmjjjjjljijkkkjklnjijilnmjkmlmnjkmmlinooomlmljjjfghhfjjjilkmliknkklkjmkkmkilmjjiiihjmmlklikmljkjjljhjgfdffidbdc\cc``aadedadcdcabbaac``a^YVYYSMQVUVQLLLHDEDDCBDHGFFHJGFDC?BHMRWYZWWWUTSRRPVSQUUUXWZ^\_`aa`dffiikjjlllnnpmmnppspmpoqonollopornqpoooonpmnnpnkmllmnmnmkkmnkonlolmnnlmmmijkkjmmmnmmjjklljllkkkjjkjijjhhhggaihgiigfcdfigeefggeefeeefdbbbccbbbab^_``ba_aaccccghhihijfejiqwwwutnc`__]_`bcdddfhhe]UQQJBLUYYXOHIPVanz|������������������������������������������������������������������������������������������������~gfju�����������������������������������������������������������������������������������������������������������������������������������������������������������2f�����������������������������������������������ž���zM)0DRUNGB>:50/-,*'&%$"##" !"""#$<�������ÿ����������������S/P����������������xlbXMGA@IVekuwwfNROE:C>FFLLRZZZagjjjff][\YRMOLKKJLNIKQPQVY^hlu}���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}yyyyxuvzrd\XUTP;4DT\\[WZXRbnieehd]bc_\[\\]\WZZXXWVUUVUSVXSTX[[\]Z^aa`cp����������������������������������nmqmknrmjnmlmlmkmjjllkkkjkkklmjkllkklkmmkkkkmnkklmmjinnnmkkmkiiiijkhhighjijmkjnnlokilljkmjjlmiihhkikllkjkjmlljjgilihhheddefbbcb`b```_bdcacccbbabbaba_`^YUVTPJMRTPPNNOKFFHGGFAADCDFGHFGFDDCDJNSWXXZUSUTRQQSSRSTUUUVY[Z^aa_beegijjjjjkkknjnmmpqlnonrpnommomlonollmmmnnnomonmmkkllklljklkjmlkmlmmkllklhhjijikllnnlklkjkjjkjijhjjhiighhjhgihhhhfeedehfdeeeffefeddcdcabb_`]^_`_`_^^__`bbbbaeggghihggjinstvttlc^\]\\^`adcbeeffZTRRF@KTXXWODHOXbowz~������������������������������������������������������������������������������������������������}fclw������������������������������������������������������������������������������������������������������������������������������������������������������������3h�����������������������������������������������ſ���{L23ARXPGB@:641.,,++('%''$$%&$%"##'@�������������������������dNS[[XSJPJABABGMIHFHU_dly��������~ztstyxz|}�}���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��~��{yssqtvvtwxpf^PQUP<0BRWZYXYVWdnfdfea]c\Y[[[[^YUXXVWVUVTSSTVYXWZ[[\^]``[^`q����������������������������������ompknokkmmkmmkllkjkiklkjjkiinmkmkjlnlkmmlkkimmjmnljjikklmkmnkjkjjjihhihkjijlijljjljjlkkljjkijjhfihgijijigjlikihhjigfdfgeddeca`^^^^b]_aa]_`_`__a_`ab_]^ZWVTKEGNRRMNNNPHFJHIIHCA@@BBCDDEEGFDEEGNUXXZTVUSSROTRPORTVVTVX[]^_^`cccgiiihijjknknnmnnknnoqoooonmmllnnlnpmkllnlkmmmmlkkjkllkmmjiilkkllkjjjijkiiiiijiklkkjijjkihjjiihiihghigfgfggfgefdddehfegfdbbcbcbbbbcb`_a__\^`__]__^__ab``adefgfhhfeehkorutsojb[[]][\_abacdcecZOPNDALSXVTMDGKWbmuy}�����������������������������������������������������������������������������������������������}kgmw������������������������������������������������������������������������������������������������������������������������������������������������������������5i�������������¿�������������������������������������|P85@SXPIB@;86431212/,)%'(()(*19IV_n���������þ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~������|{wvorruvwuvvlcZQQSN;-BQYZYZXVXcjgfebb__\[\ZX[ZYXXXXXWUWTQSVUSVWXY\]\]^\Z]cu����������������������������������onmjnnklpljlkkmlkjjhjljgikjjmfimjikjijlkjjjkliknmjiijjkmkkmihihhhghfeghkihjkilkjkkjkilmlikmlhjgfhjhihhihgjjhjifiihgfdggddccb__^]]^`_`_^_a]_a`^_\```]\YXWTMEENPPNMNPLIHIIJKJJFC?>BBABEFEEFECBCGKQWXXZWTSSOPPNPQPRTUVUZ\[[_bbccfgiihhhkkkkmkimllkmononmnmlmmmokkmnolklmnnkkllljjklkkkllkkjlmjkiljjjljkjjjhjihjjjhfhllkhhhiighhghffjgfeggfdfcfedfgieeffca`addbbbbcab`^^_^_]^_]^^]]^`_]]bbbdededceegjkosrqnib[[\ZZ]^^_abbddcXNMMEAOUWVQKFHLW`ku{|�����������������������������������������������������������������������������������������������|eeoy������������������������������������������������������������������������������������������������������������������������������������������������������������2o�������������¿�������������������������������������~W;7GXWSSFEBFHFF=:>EFFHJORW]fnst�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���~��}���{wssssssuvrtujaYROOI6/CSWZXYVUXaeefdabb]]][YZ[WY\ZWXWVTUTQSVUUXTTWWWW\ZX\^bx���������������������������������~pmkjmjjjkkllkkkijjjikjhgjiklihklihigghhhjihjkijljhhghkijhmkhihdhgfhkggihfhhijhhkjiijjlmjhkjiihfggghigfhjjiiihgfffghfccbcb__^]]][[_`]_^]]\Y]^_][Y]\[XXVXSMFHLQOMHLMLIELKIMLNMIECA@@@ACEEEGEBAAAFKPSYVVWSSSRSSOQOPURUUUWWY\_`_bcdfdffghjijjjkmmomonnqnllllkklljklkmllkklljhhjjhkijkijiijjijjjhhjjikhhhfjigjehihihggiiggjhijfhgffejhdffgfededdcccdeefdccbbdfd`a`bc`aa^]_]\[^]]\]]\\\^]_^`aabcdabdffgioqpomf\XZXTY[\][`_`bd`VPMMCAQSUURJGILWaisyz|����������������������������������������������������������������������������������������������ybhp{������������������������������������������������������������������������������������������������������������������������������������������������������������+s�����������������������������������������������ú���d\`blnokrroolkijpuw}�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������������������~~|}�~��}~�|zxtttsrsuusstk`YQNLH72BT[\YXWSV\ffeaac`\[ZZZ\ZWWYZXXVWVSRPSSRTYSVZZZ\\Y]]]`w���������������������������������~qkklkjkmjjkjlkiiihihhegihhjighihhifdghgfhffihghhggggghhhjkijjeehgfgefgheeggghggkiijhjjihijjikhhhfghgfhjjghjjfegdceddc_ab_^]Y[a^\[^[]^]]\\[]]\\\\]ZYTTTRICELPOKJLRKICGLPKONOMLMHE><<?@DCDFFDBBAAEINUUZVTTURQQMONOQOQROTVT[Z[`_abbcdfgehiiiilmnlmnlmommllmkjllkjjjlkkkkkkjifhhiighiijgfghghiihhigiihfghieeffhhiiiihjggiffghhgfecdhfcdedcdcdccdbbdcedcaaabcb``acdaa_a_[]]]^\[[[\ZZ[[_]_^_b_acddbbeegjorsnjd[VVVWXZ\[]]]^ab^SMKF>@LQTSPHFINX`jquy|�����������������������������������������������������������������������������������������������wdir|������������������������������������������������������������������������������������������������������������������������������������������������������������5v���������������������{s}��������������µ������obWaku���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���}~}�������������}~~~~~��}~}{{xuussrqtttutlaXQPMF60CS[[YYXVX_jjeccb^]ZX\]]XVWXY[YVWWTRTVSTUVUXWWZ[ZZ\[Zcv���������������������������������~nikjjjjiikifjifghgffeehggfgfhhgfggedgggggehhgigfdggfghggkgehfbffafgdfhfegfggghhihjkijjghlkighffggggffgjjefihedbcedbcb`ba^_]X\a_]\[Y\]^_[Z[Z[ZZZ\ZWUTTPLDEINNKJINKGGILLNNNPPMMPMGB=;<>@@CCEABCB@ABFLPTUTUSRQQNOKNPLRTORUVYXZ^Z_``aacecfgggijllknkjlllnlllklijiikkjkiihikigggiihgieghfffgiijghgghgfihigfdfgefhihilihcfgeeefgfedcadgcccacdacacaadccddcba``a`^^`ba`a]]^\Z[^]\[XY[[[Z\]]]]]bccacdcccbfjnqplhaYRSSTVY[][ZY\__ZSMKE?>IPRQOGEEKV`jqvxz{}~��������������������������������������������������������������������������������������������seks}������������������������������������������������������������������������������������������������������������������������������������������������������������r���������������|qllmnx�������������{yyrgacjkjry}�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���~��}~~}~�����~�����������~}|~}��}z}}|wvussrsuvsusj`WURPH50BTXYVXWUWckgdcca^]]\^[WXXWWXYWUVUSQRTRUTUTUUVWXXZZXYaz���������������������������������|lhjhiijghhhgjgfhgfeecfjfgeeegeeghfhegfgddehhgheefgffgfgighggffgfcgedffeedeffhigggijihhghhijiffccfhhfegjhegdcedbbcaac`_a^__[Z^]Z]\WYZ[[[ZYZYZZZ[YXVUTULDCJKMKJKKKEFHLOMOOPPOKPOMLID?=>==?@BCDCBA?==BJPTTUTUQSQQMOPNPQRPRUWWYYZ__```cbaddegijkjjjijjimljjhkkjliijijighhhiiffgjgghhdhffgghghgfgggigfgggefggfeehhfefhgfgddbcfedccccdcacbbccabbba`aabbcb```_```^^___^\]\ZXZ\[Z[ZYXYZY[[\^\]^aaabdbcbbbjnpolg`XTSQSTUWW[YX[Y\VNKJD<=GNPOMFCEJT^hpuvxy||~�������������������������������������������������������������������������������������������nfku|�����������������������������������������������������������������������������������������������������������������������������������������������������������Ǖ������~�}yssvw{�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��~}}}}}}�}�|~������}}|}~}~�z�}}�}wvurrrtttrtsk`YWTQF3/EUYZVWWUWegecb`^]][^^\XXYYWXWTUURRRQRRRSTVVUWWVXZYX]_|���������������������������������yljihkijghghgedfgeeebdgfeedcdecffbcgdbffbbefegdegghgefdgiggffddeeeddefdeecfgghhefiiiigfiihihgggdgggggfefffgdcdcbbbab`^\\[_]X\^ZXZZ[[Y\]YYZXZ[YYYWUQTRNFCHNOJIILJFEIKLNPONOMKNQMMNOKE@=:;==>ACAA@=;9<AHORTUUQQQQOMMOOOROPSTWVWXZ[]]_ababbdeghhgjhhhihjhijhihhjhiighgggijhhfehhfgfgfffhefefffeffffegffdbeefdcdhfedafffdcc`da_cccccddcdacccacdb`_bcb`a^``^]`__^\]^_\\\[ZYZ\ZZYXXWVXZZZ[[\]]^_``bb^_dekllmje^UTRQSSSUVXWVZXYTKHIB::FKMLJEDFKR]fnrtvwxy}~����������������������������������������������������������������������������������������~mfkt|�����������������������������������������������������������������������������������������������������������������������������������������������������������œ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�||�~{~|{|~||~�~}��}~�}~��}����~}~}}}{}~|~�~|zxusrqsuttstsld\VTRE2-GTYYVXYUWeddhd`__]Z[Z[YZXYYXVTVVSSSRQRPRTRRVVVUWVVXYUz���������������������������������whigilegfeffeccdcbcecdbcdccbddcghcefdadddeeeddchifccffdefghgfeedecddeeeeeeghghhgghhiighjhhhcagffgbdeedecdgddedccaab`^`_\]^]^]YZYYW[ZXYYWXWXYZXYWURQQNHBCIKHFJJJEDGMMNNQNOPNNROPPNRRJEA;8:;<=>?@??=:;>@GPSTTURQNMMOOMOQLPPSUUUUWXXZ\\^`cdedfggiffffkigigighggfgihffegfhifhhgifhfeeefgfdfdeeedbcfddfefccebeaceddfdcdfedba`cbbaabbbdbc``bbdaac``_^``^^^b`^`_^\_]\]]]\][WYYZZXYYXVVWZXZ\ZZ\]]_`]`^\^aeklljie^TRPMOPRTVVVVWWWRKFG@8;DHLKJECEIP\elqruvvwz{}~~������������������������������������������������������������������������������������|jais~�����������������������������������������������������������������������������������������������������������������������������������������������������������Ɣ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}���}���}}~}}�{|~}z|~}|~~~~}�}~���}�~~~}zv~~~|{zz~{yxwusrsttutvtlc[UTRG31HUVVUVWSXgfdfb`_^[[[YW[XUXYWTUTSRQOOOPPQRQRUTUVWVUXZY~���������������������������������ueeghieefdddedfcccdddc`accbcddcbbbeabcabcabed`ceccadeddcdghbgfdceecdefgfeeeefhgghegihehjijecdeceeccddddeedceda`a^_`[]_][\\\\ZVYYXYZXXZYWVWWYXVUTRQRQJDDIIHDFIIEAHMQOOPOMPOMPPNOOOPPPKF?978:;<>??>==<=>AIPSSURQOOKIMNNNNPNQRRVUVYWYZY]__babfeedbggfighjfhggfhfhghfeegdghfhhfiffedddeecedbcdec`edeeeecadcabbcecefedbedcda`abca^`aa`ba^a_`a`ad`___^^`]^__^\[Z[_]][[\Y][YZYY[WWWVWWWWVXX[Y[^^^^\][\a`dhmnlhc[QOMKNOQTUSUUVWUQIDB<6:DHIIHCBDGOYeknorsuuwyyz{z}~~�~��������������������������������������������������������������������������������{lckv������������������������������������������������������������������������������������������������������������������������������������������������������������ƒ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~��}��|~~|{��|{~}{{}~|{~~{~{}�|{~~}~�~{�z{}}zw}z|~wz||y{{ytstttsttusi`[UVSH53DUXWXUUPVcegc^_^][ZZ[[]XXYXTVUNRTSOPPOOSRRTVTUWWWWYZ_����������������������������������wg^egefgdbdddddbc``bda`cc^_a`bda`acabb``c`acbbeedeedaabbdgfcededcceedddeffdefffgffhfgghgihfgebdeccdcbccdec`aba`_\^_\\\[\[]^ZY\XWYXYXZXXZXZYXUSRSRPNIADHHEGGHHFDDINNKRONPOMOQNONMQQRTRMD@;78:<==><>><>>=BGLOQSPNKLJNLKMOOOOPOUSSTVVUX[Z\`]_ccecdddfgghgegedeeffdgefgffgffffdgedbbdccbbcaacddcaddeccdbcb^a_b^^cdccccdcggfebbbb``ba_ba_``_`_ba`]^`\\]^^]]^Z\ZZ]\\[[\XXY\WVWVVWVWUUVXXUYYW\]]\\_^[Z\`afjjjhbYQNLJLNPSSTSRUSSNDBA<6:EIIIFBADGNYdjkmpqrrtuvwwwx{z|{|}}����������������������������������������������������������������������������{ifkw������������������������������������������������������������������������������������������������������������������������������������������������������������ǐ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��~�~��||�|z~{|�~z{{y|}zy|}{z|}zz~|{~~|{}~}}�wzyyzz{{|zy{~{{}|yvtsqtursupha[UUQF11FTXXUUSMTbgfa_`^\[XY]\YXYYWUVTQTSQOSONRQQRUTSTUQUVVXb���������������������������������pbceededb`baacbac__bcabdbaaabcb``aabbcbac`aabcbbcgcabbbbefddfceecdgcaddedbbbddehfhecggfghedfdaddccbabbbbbaa``a_^]]Z]]Z[\Z\\WXZVXXXWVUWXVWXWXPNPQPNGDDFHGDFHHEBFILOKMROMPONMLKNNNPPRQTSJGA<9998:<<====>=>AFKOOONNPMLLKKMMMNPOQQRSTVWZZY\^^^acb``cdedefgfeddededegghfddcceeefdb`ccbbbaba`bcb_cacdcbbca`a`beihbdbcabefioromjgfcaaa`aa^_^a]\a_]]\\[ZXZa\\\][]WWZZX[[ZXWWXWWVVVWWWUWZXUWZYX[[][]^\\Y[^aehjie`VNKJHIKNRSSRSSPPKDA>849CHIHD??CHOX`ehjkmoqrssutuuvxyxxyzz}}~~~�������������������������������������������������������}��������������ycclw������������������������������������������������������������������������������������������������������������������������������������������������������������Ǔ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~|��}{~~~|}~||}�~{{}{|}zyz{zy{|{|||y|}|}|}~�{y|xxz{z{}}uz|zx|~zwtsqrttrqsohaYUTPD.0GRXVSSROXffbab]]][UX\TWYZUTSSTSTROPOMNPQQRSSSSRRVUUX`����������������������������������k_cdacdabbabb`]b`^`ba`ca`b_^a`_`a`_b``ab]^bacccbceb`bb_cebdeabdcbceaacbccaab`efffgefgedhfdbecbcbgf^aababaa````^]\ZY[[YYXZ\ZZZWVZVSVVUWVTWWUTRPPNJF@DHJGFCFEBAFHKMOLOQQONNMLLPNNOOPQRSQPNHA<:779999:;;>=<?BEJLOPONNMLLKKKMLNNOOQPQTVYYYZY\Z\[^_`ccacfccdeeceefffefgddea_bddcba_abcb`ba`abaa^aaab`_a`_`abbltvtpmmfabdotxzxvspkfdbcc_[UQWW[b^]ZVK@FEJQUZYYWSGIQTV[\ZWVXVXWWWUUVWWUVWUWVWWY[\YZ\ZXW\\aegjhc\RKJIEEIKNPPQPNOQIB>;717ADGEB?@CFNU^cehilnpoqqqqqrruusuuuvxwxzzy{|����������������������������������������������������}��������������zfdmw������������������������������������������������������������������������������������������������������������������������������������������������������������Ɨ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�{�������������������������������������������������������������������������������������������������������������������������������������������������������~�����~}��~}���~���~{~�|{~~{y~|}{}zxy||yz|zx{|{y|{wy|z|~|{}}|~}vuzyx||yx{zyz}{xutsrrsssosrh`[UTO@02FRXUTRQRZheaca]^`YVX[YYYXVWUSSROMPQOMNOORQRQSRORTRUVa����������������������������������hdaabdbaa`aba_]b_]_``^_^``]___`a_[`c`_`__a``eaabccbab`abaacb`ba_aca_bcccbceddffefddefefgdfbb`dbaeb]`adba_b``^^\]\Z[ZZYYWYZY[[UXYUX[WXWVVWUSRONMIDBCGHHDDEFBACLJMNNMMONOLLKJJNOQPPQRRSTTTQF?<7778787::=><;:?DGKPONNMLMKKLNLLNOMQPPQTUYYXWYZZZ]^^```cebbccdcebcdedddbdcccccbcb`ba`cb`baad`__`b`bb``]\````co�����}vnd_agmuz|{ywtqnje_R:8?HNQW\XO9 $+4>IOPRL>26=CHNPRSVWUUTUVVUUUUTUVUTUUTWZYZY[ZWXZ]`cfgea\QKKGECDHKMLNNLMMF@=:5/9ACFEA>=?FNV\bfhjklmlmmllikmoqrqqrstsuwwvwvwy}~����������������������������������������������������������������wfglv������������������������������������������������������������������������������������������������������������������������������������������������������������Ɨ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}{vvnmjiggnnnuz}z{�}vty{yyz}|y}��������������������������������������������������������������������������������������������������������������������������������������������������������~~�~�~~}|��{v~~{z~}{|}zxx}{zz{wvxyvwyyww{|xx{ywz~{{}}{{}|{}~{wxzuz~{z{yv|~{wwusrqpqrrqsqf_ZSRN@15FRVUTSOPXgdad]_a\YZ[YZZYUVYTSTRPOPMLMPONOOPRSPPSQRVX`����������������������������������i_]_a_a``]_a``^_\]a__a^\`__a]\^_^]_`^\\^__^^ba`]``_a``aaabbbccaab_aabcdcafgdddeedaceedgcbc_`cfab``_`_ba]__]a_]]]Z[\Z[YXWXXYZXUVUVYVSWVWUTQNONKHGAADFGEEFGE@BHMLNNLMNONKKMIJKKONONPQTVUSSTNE?;88766688;<:;:<=BGKKLNNNNLLLKJJLNNONNOSTVUVYWZZZ][]__`cdbccbcba_eccdcabbcbcdc`abab``baa``ab^[_``^_^_`]]`]_\cp��������{vicbgjryy{~|zwpfI4/,/4;BMOG.!)2:?<2.045:>BDILPVSSTTTURTQSUUVTUVVXWWYZZYYY[\^deda_ZOIHEA:?DILIJKKMKC<:83/8@DEC@><>CLT\bdfggijkkkjiiikjmnnmoppoqrrrrqqswy{}}~������������������������������������~|����������������������vcfnw������������������������������������������������������������������������������������������������������������������������������������������������������������ǖ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xwthkjnpnncb^W[ZUXUVbowy{~}z{}yxyy{y|{{|{�����������������������������������������������������������������������������������������������������������������������������������������������������}~���~}~~�~|~|~}}}{~~{{|zy{{z{zxxz{z{zyvy{wwyzxx{zxx|zxyzyz}{z{}|{{|zwxzxv{{xxzuw{|ywxurpnoorrssnd^YSQN?04FRUURSPQYdcca^_^Y[[YYWYVTUWQTVROONMMOPPOONOSQORSQWWW_����������������������������������g_^`a]`a_]___`_\^_[]`_^^a[]^][]]\^]\\^[^^]^````^__^^^_a``_aba]_``^b`_`a_acbbccccb`cbedcacb`cddbb^bbaaaa]]\]]\[\[ZZYWZZYXZZ[YUYVTVYUUUVWTQNOMKGD@?CCFFEFFDAAGLLNNMNMMOONMMKMMMQNPROOTVSSSSPLE>;886656789:<;<<>@FIKKOONOLJKJJKKNOMOPQQSRSSTXXY]YY[^_`a```_cdadeb`dbbb^bbaaaa_aba_^_aa_``^_]^^___]^]^^]][[aq�����������{rjebjotz}�|}qdY@1++/2:3*'$  $'*.1444457:=AFIRPPSSQUTSRUVUVWVYXZZ[WVXWW\_bdcb^WMFGB=8:BHGGHHHJH?:850,6?BCC?<:?AHS\`abdefhjikosrqrppnljhillmnmllkkmortuuvwz~�������������������������������}x{���������������������sbfpy������������������������������������������������������������������������������������������������������������������������������������������������������������ǔ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|uokaagjkjh`YZZY[ZXZ\doxy||{vx{|{yzyyz|}|{���������������������������������������������������������������������������������������������������������������������������������������������������~~~��~||~|||{}~{}~||~}z{{z{z|zy{|yyzywuyxuz{yuxyxx{zwwzzvz}ywz}{xz{zwyzwx{yxxxvy||zzyupnoonrqssnd]WRQN@05ERTTRQOMVacd__]ZY\\[ZYYWXVURTROOONOQOLNOMNQPNRRNOTTTa���������������������������������~b[_b[[`_]__\]_^\^^^_\[_`]Z\][[\[\_\\^^[c^^^_^^^^^`\\]_``__]aa^`___d_]__`bcbddddddfdbdcaaca^ca`___c_]``]_^^_^\\[ZZZXZXXYWX[ZUVZVSWVUWUSTPOLLIFD@>BFEEDDEB?@FLMLOONMLLLJIJKLKHKKNMNOPRQQRPRQNLE?;9767644688:::;>?GHGOMLNKJLJIMKKLMNOPRQQRPSVWXYYWY\]]__``^`b`ccaacddb`aa```a^aa__^``^^__[]]^^]^[]]]^\[][^aq�������������~wqkfeisy{}���ueTC3+)('))&'&$  '-245245678<BFKNPPRSSTSVVUTTUWZWXVUWVXY\^bcb[TJBBA<78@DB?ACDDB>:86..6>AEGEEACCGQX\^^acdehhjs{}��|zvtoljggijggfccegikmnmosvx}������������������������������wpu���������������������oaeqz������������������������������������������������������������������������������������������������������������������������������������������������������������Ƒ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}ytplgeiiolf^ZXWWYWYZ^ftwxz{{zwz|xwxyxvz|||~��~������������������������������������������������������������������������������������������������������������������������������������������������~|}~�~}~}~~}}}~||||z||zy{{yyzyyzz|yy{zwyzuuwzwvyxwxxwwyzyxwxyz|zxw{}|zxzxyyywwyuxxwxzzw|{wupnopqrqqrlc\YSPO@/3ESUTRPNNVbeba_]\^ZZ[ZYXY[WTSSPNPMKMOLJMMLOOOMONMPQRPc���������������������������������|c[^_\`_^^_]\^_^]]___[Z\\Z[^[Z[Y\\[[\\[Z\\[^^\_^[]a]\^`_^_^_b^\``__`^```cfbbccdfbbdb_a`bdba_c`a__a`]_`__]]]^[\]XY[ZYZXZ\WWXXUVVRTVUUUTSPOMLHGD@=?DFEDCB@>AHNONNMMPNJLIHJJGKKILMONNOQRQPPPQPMSNF@=767742245689;=<?BEJKKLJKKLLIIJLMLLOOKNPPRTSSTVVWYZ\^[^^^`^_c``baddcb``aa__]_^[^_`]^`^][]^^_`^Z]]^]ZZ\\\ap����������������~snlkirv|�����xo]E5-$$&()**'#%+/45644689=BIINOOSSTTTSUUUVVWSUUTWXYZab_[SJCB@;6:??84559:<<862..9@DMW\ZUPLKMMSWY]_abddfmv|������~{wrmkhed_[]^\`cegeeinquw{{�������������������������~qhn�����~}��������������nfgp{������������������������������������������������������������������������������������������������������������������������������������������������������������Ɠ�������������������������������������������������������������������������������������������������������������������������������|{��|y}}yww}{yy|}|x~�|~�����������������������������������������������~vsokhhhlmmd^YUVWXZXY]fv|yvxzxwvvwwxyyz{||||���������������������������������������������������������������������������������������������������������������������������������������������������~~~|{}}||~}y{~{x|}zzzzzy|{xw{z{zzvx{|xvwxyw{ysvxxvwxwvyxvxzxwxxuvy|yw{zzz|vswywxyxvvxuuzzwu{xvsrmlnooppplb[VSPM=,3GRUQPNMLUeaaba^Z\XWZXTUZRTTSRQOPOMMLJKKJMNMMJMLOSQSUc���������������������������������yc^^]]_^^^^\^[[^YZ][]ZYZZZ\[XXXY[ZZ[\[XZZZ[\\^\\^__[\]]]^^]^^_____`^]^_cca`bcdecbdcacd_baa`b`^b_ab\Z]]]_^]\[Z[\ZYWXYYYWVWWWUVUUVXUSTTRPMLIFCCA=?BEDBCCA>BHMNKNOOLLKJKIJIKKLJJLOPLNQPOOMNPPOOMMNF@:767533234789;;:<?CHLKJLIKLHJKLKKJNLMNPOPSRRUVVVWWY][Z]^]^a`]``_ccda````^_``_`a`a___^_a__]__^_\]\ZZ[[[]al�������������������vdjhlrz~������|jW>;0"##%'('$" &-2588786:;>CCGMOSPRSRRSVWWTUTSTXY\cb`[TKED@>;<?;0&03-/1576423=EL^fnkgf_XVSNPSTUY[^`bgnty}��������ztojgbZWXYZZ[\^^adippstxz}~�����������������������phq������{z��������������qfip|������������������������������������������������������������������������������������������������������������������������������������������������������������ƕ���������������������������������������������������������������������������������������������~�yursw~������������������������yz{}}xz~|wwz{ywx{}{v|}{���~�������������������������������������������{uonifhilolc^ZWVVXWYW_lv{yyyzyxxyvwyyyz|~|}����������������������������������������������������������������������������������������������������������������������������������������������}�}|}{|}z{}|z{{~}zz}|zy{yyz{{zxyz{{yvzzxwwwuwxxwwxxvwvuuyvstzwvtuwuwxyxy{wxyyutzytwwvsvwutwwxyyvtrrnkloppppj`XWSOJ9,4IPSQNMNOXb`baa^[\YWYWUY]VVURRRPOMLJKLKIJOMKNNLLMPPUXa���������������������������������yc]]]^^][W]]^Z[][XYYZXZZXYYXYWW[]XXZ[YXXZ[[[X_ZX[[ZY\[Y]\[]^[^`___\[]]_ab`abab_^ac^_a`aa``b`_`]\a_\[[\^___]Z[[Z\ZWXYYXWVWXWUXVUVWUVTPMMKHFAA=<=ADC@AA@=AJMONNPNNOLKLKGIIJIIJJMNNKMMLLLLNOMNNKLOLE@:5486631357798545:BFHJHILIJHJKLLLLIMLNNOQPRUQQUUUWZZVZ][^```cbcbab_^aa`^^__]^]^__^]^cklcad___^[ZZZ[Z[Y_j{�������ohu����������fr}|z~��������~uaF5"  "%'&#  '05799779;;>ACHKMOPRSTUTSTTSUWW\bb]MIIGIEA=>B=56?>2(*-02347?FMX_ehijlhf^\\WPPRTV[\`cglosx~���������xrhc\YWPUVVVWZ_chimqtv{|~��������������������xw}�����~{|��������������ncjr}������������������������������������������������������������������������������������������������������������������������������������������������������������Ɩ���������������������������������������������������������������������������~{wskgcaa[^^cdegccee`[VS]iz�����������������������xz}|zw~zy{zxwzzzzy{{z|�~�����������������������������������������~zsmfbegjlme_\YWVVVVX\bmy|yux|yuwyxuwxyy{~�}|�������������������������������������������������������������������������������������������������������������������������������������������~}~}|~z{}|y|~|{|}{zyz{yz|{xyyyyzywxyxutywuuusquvttxyvtxvrsxqrxxvtsttuvwwuxxvwxuuuwxutqsuvsuvvuwyvsrqnjjmnnoqpjaZVPNH5,5HNROMLNPX`cda^_^ZYYYWWXYUXTMPPLMKIIIKLLKNIJLMILMOPSUe���������������������������������|b[[\^\[Z[]^[[[^ZZZYXYXYYXWXYXW[ZVWWXYYWYZXZYZWXYWWZ[Y[^YY\[\\]]^\[\]]]_`__^`a^_b`_`^_ca^^`]_\\]`\]][]\Y\\Z[]YWYXWWVWWWUVUWVUVVTTUVRMKKGECB?:<ACC@@@B@>GLPONPQOMMLKIHIIGIJJLKLLNONMLMLPNMMMMNMNNIF@85766433456753/.39?FIIJJKIJKJJLLKKKKMLJKMQPMPSQUTWWU[ZX]^_bgongec^`_aa`__]][^^]\\]_o{��}qica`[YY[\Z[[]es{}|~��uhihjnz������������������������}lV6)'! "%'%# ")/7;>;;999:=ACFILQQTSRSRQUUUZZWF16>BEGGFGGEFPWL:2.,*+,04:?HLRX[_dillkjhdb^URTSRZZ]ceinpv|��������wslgXVSQRSVX\_bfglmpsvx{~~�������������������������~~�������������zk`is~������������������������������������������������������������������������������������������������������������������������������������������������������������ŕ���������������������������������������������������������������������������{tgaXXWUVVUWZ]bcdedegda][\lt~����������������������~}}~�}}}}|xy{zzxzzwt|~~z��}��������������������������������������zrnibgghnpk`\XWWVUUY\bmv||wwyzttx{wtxxwx|�~|}~��������������������������������������������������������������������������������������������������������������������������������������������|}}|z|�}y}~{w{z{{|zxvz{xvywxwzxvxzyvvyuvwvwvwurruvrtvutvutsuusuxvttsstwutuvutvxvuuvwuvtouututvuuvuutsolghlnnnookbYSOOH4)5GPSONLMMV]bc^^_XVXZVXZWVWVSQTOJJKGJLIHILJILKLLMLOQPPj���������������������������������ye][\\]\[Z]\Y[Z\W[XVXYWWYWVYWVXYVUWVWXXXYZX]YXVZYXWZYXXYY\\Z\\Z\\ZZ\Z\\^]_`]\a^ab`__]```^`^]]\\^\]]\Z\ZZ\ZYYYWWYWYWUVUVWXVUUTVVSUSQOLHIBCAB<=@CA??@A?AJLNQPNQOMLKKJIHIHGJJKKHJLKKMKKKIMMKLLKLLJNJIE>:745532433554-./07>EIJILKKMJKMKKKJLMKJLMNMOQPPRSTSSYWV[]_iz��zvlfda`a``^]]\][\\[]ay������|tjc^\[ZXYZ[bjoponrwstspnmont{�������������������������\972,'# !%&(%! $08<;<99887;<@HJMPQQPOSTUWUN5 (*-6=BGIKMQSTPIB<93/-),016;AFKRX\_gjmpookf`\[YUVY\]]begnrv|}���������}tpg\UQQSVZ[]`begimquwy{~�����������������������|{�������������{hblt������������������������������������������������������������������������������������������������������������������������������������������������������������Ǒ����������������������������������������������������������������������������~ti`ZWVPSSTTX\`bdefggfb`[Wbq~�����������������������~z|�z{}~{w{{ywvy{tqw|{{w~�z|�������������������������������������wqmccghilmj`ZZWWUUXY_iuy{{zwwyxuwyxwwzyx{~~����������������������������������������������������������������������������������������������������������������������������������������}}~}}z||zvy|ww||xvzxwyyxwv{xvwzxy}zvtwwwvuvtuvvvvuttutvsutstuusuustttsqrsqtvvsuwurvwuutstuutrttsuvtuvvttusojabkmnnoni`XPLKI808HQRNNLKMVacb`__XUYYVXXXWXVRSSOMKJIJJIGJKIKLJJNLJKLNMi���������������������������������w_Y\ZZ[YY[[UX[VYYZUW\UUXXXUWXVWTUVVWWXUWYXVXVWZYWYWXUXWVZ[Y\[XX\ZZZ\Y\]]]\\^\]^_^^`_]^^^_]\][[^^[\[Z\[Z\\[[XXXWWWVWVVTVWVVVUTVVSSQOLLGDAA@;<>@A>?>>?CLPOQPQQONNIKKIHHDFIJGKJIIJJJJLJJJLJHJKLLKLMJJJD>:7255443244320.,/7?FGMJHHJIHJIIIJKJIJJKKLMLKPPRRQTUVYX[bq������xpkcbc^[ZZ]\ZZ\\ai{���������{ri`Y[ZZY]aeedhjosvwvutttsppr{���������������}x{����yL5KM>41.,% !$')(%$ !)19;=<<9879>?AHJKOOPRTUSK996/*&,.4>HJNQSSRPNKB>800.++-49>CIMT\achkqsqqlhfa^[WXY]_^ehlprv{�����������yof]SOSSUXY\^_ehjnptuyyz}�������������������}yz�������������yfeks������������������������������������������������������������������������������������������������������������������������������������������������������������Ə������������������������������������������������������������������������������ypc\WSQRSSORW^acdfhheec\Wbly����������������������~|~��}~~~|z{zwuy{xtwxzz{|~{z�����������������������������������}tmmgbfhimnjd[[\XWVW^hmuz||xyyzyvuyysv|zxy|{|��������������������������������������������������������������������������������������������������������������������������������~��||~~}~}}}{zyzyy||zx{|ywxxxyyxwwxyuvwvvxzwtvvstvtttustuvrptsrstssqstrotrorsrturstuuruvutwxwvvtsuutuwvpsuusuvvuuuuwn]aknnnnne\VPKIVXHEJRPMKLLLYed`a`\XYWUVWSTVURSSONMKHJKHGHIHIJIHILKIKMNNi���������������������������������x^Z[[[\[W[\YZ[WYYUUVWTWYUVWVTUUSSTTUVVVWWVUVTVYXWZYVU[WXYXXXUVY[YYZZ[[[\]YY\ZY\][_^\]\Z^\Y\^]^`]\ZXZZXXYYZXTVXVVXWWYXUUUUVUVWWRSROKMJBACC>;>@?>>???CLPORPPQRPNLIJIIEHFIJJIHIJMKKJIIHKJJILKLLJJKKJJJHD?;665562121220-)*08@EHIFEEFGJIJIIGHHHHJMLLKKMNPQRSPRUWZat���������uljea\\]ZZ\`eks{������������}ojd[YZZ[^]chjnpsuwxywutrppsw~�������������������i;*35@HJC?4-(" "&*(%!! *17<<<<;;<;=>AFJLOURPMJQMHC;50-+.4@HLMOQSSRRQJA:430-.269=CIRU\behmnpqqnkgc\\ZZY[`cgkllrw|����������|reOIJORPUXY\_adgjkpstwvz}����������������|{�������������vdeiq�������������������������������������������������������������������������������������������������������������������������������������������������������������œ�������������������������������������������������������������������������������}oi_XRSQPPRPV\_`cegefebb\Zev���������������������~~����}||{{ywywtwyyuxz|{uz}{���~���������������������������slifdeehlmhca]ZVSWZ`itwxz{ytwywuttvvuy|{x|~}z�����������������������������������������������������������������������������������������������������������������������~}{��~~~��~{}~}|}}}}~yyzyuvxz{xyyywvxtruwvvxwvuvvruxutttstqttsttssvtqqtsossqqrropsrprsrrttpptutrutrtwutttqrssprtopsqqttutstx|��kdjmmnljd[VNJ\~obVRQLIIHLYda^__\YZVWXWSVURRSQNMLJHJJGGIHGIKIJMJIJLMNTd���������������������������������ycXV[^\ZYYXXZYVXVUVVTUWUTVUQTTRTVTTTUTSWTVVTUWWWXUUUVXVYXTSTVWYYVWXYYZYZZZ\]\\]\]^]\][\]YX\[[\[ZZWXXXUVYZZWVXYUTXTUXWVVTTSQSUQRQLNJJEBCCA<?@?>>??>AJQOOQOPQQMMIHHFGFHGHDGEEGJJGIHHIGGIHIKIKIIJJJIIGFFE?7444433301431-+-17=AD?:=@BGFGJHGHGHHHJJJKMLNLMOOPSRWXZs������������yod`_[[^clptz|~��������������th^YT[\^`bghknqstvyyxutupprv�����������������lOA423-4=KH?72)"$'(('#!!,5;>??>><;<=@EGJMNMMNOPNLOF@:1,+*6@DLLNRVWURNHC>923/--5:AFMUZ]bgjoqqrqnhgea^XZ[\bdgilqux}���������mC9=CJMPQRVYYZ^acgijmqtw|}������������~zz}�������������uffku�������������������������������������������������������������������������������������������������������������������������������������������������������������ė���������������������������������������������������������������������������������wi^XRRRRSUUVZ[^adefeffg`Zbq��������������������������}}}~|wvyxuruywtvyzxwxyzz}~�������������������������ymljcdgggknme`]\YWZ_dmt{{yzzywyzustvvux|{z|�~~������������������������������������������������������������������������������������������������������������������������|���}}~~}}~~{z|z{z{zxxzxtvyxvwxxvsuvrtuttvvstvwttwvsutrpsrrrttrsrtrqssprspprqnnqsppsrnpsroprrprusqqrrpusqqtsouurrtqqvuuuuuz����ynjnnljc[UKQh���{lZSKHHIMYb_`_^]ZZXYXWWYXRTQOPNKIHHFGHJHHGHIKKKKIHIMP[tv�������������������������������v_YY\[YZ[YXXZXYWSUWUTVUTTXUTSSSUURRUTRQRRTRRTUUVVURRVWWWUUXWXYXXXYXZYYWYVY\Z\]Z\]Z\ZYXZYZ\[[[[XYZWXYYZXX[XSVXVTUVTUVVVSTVSRSSPPLLIGC?AA@<<====>>@BJQQOQQOOOLJJHHHFGGGFFCGFFGIIIGEFGFHJIJLLKHIJIHGFFEEEC<96242431120.,,.015;;325:<ABDFEHJHHGGIJKJJMMLKMMOQSUU\r��������������~tjcbbcgjosuwy~��������������~tga`]\]bcehjnoqtuxxyzxuvsmr{���������������{k\NB<7402?JK91-&"&)*(*($ !,5:BCA?=<;=?BDEIMOOQPRSPPKHA;51.,3;CJMRUUXZUQMEC:232.49<AJQU\agjnppsutrmjge_Z^^_cfijoquz~������`)")19>CHKRUTWWX\_aadilotx|������������}xux�������������rgflv�������������������������������������������������������������������������������������������������������������������������������������������������������������ƚ����������������������������������������������������������������������������������znaYVTRPUTTVWX[_fffiffccbbmu���������������������~���~{�{uv{xsuuvvvyutvwxyzz}y|~�����������������������|rmkggggijklle_^_ZZ\dgmxz|zzyxyzzvsuwwvx~{vy}�}{����������������������������������������������������������������������������������������������������������������������}}��{}|{~|{}{yyzzzyzxwxxvsvvtuvuvvwusvxutvutrsstqsstqrsrqrpoqrpmrrqnppklssoqsrmoppqqropqokmpppossqrqpqssqqtuusspqtrrrvttwts�������slllj`YRNRh���}mWHIHJOW_^a]\][XVXVUVTSQROMNJIIHGFGFDGHGGHIJHJHGKMQ[ibp�����������������������������t[XX\[Z[ZYXXWWVUTVVSUUSRTUTROQSOOONNRPMPSPNQSSSSSSQRSVUTVWWVUWVVUVTUUUVSW\[W[YVWVVURQQRWZZWX[XWVVWYVUWWWXTSVUTTUVVWWTTRTVRRSOMNKJDB@CA>99><;=>?@CMNRPORQQOLLLIHHGEFFGHFEFHGGGHHGEGFFIIIJKKKIIIIHGEEFCCDB>95302220,+*,-/012574877679>BEEFFFEFHJIHHHHMJJKKNPRT]o�����������������{qkgcbfklpsvx|~���������������ukb`a_acegiloqtvyyzy{zuxwyx}���������������{o^QIB82.4<EB3-""*//,+($!!#.5=ABA?>??>>AEGJOPQQRTTSSPKG@;4,./<FMPUY[\[WUPFA=643458;BJOW]cflosuuxvtrmifb^`cbeikmpuvx~��xX4+#"$*08@DINQMSUVWZ]]afglw������������zsns�����������obcnw�������������������������������������������������������������������������������������������������������������������������������������������������������������ǘ������������������������������������������������������������������������������������uf_[WSSRQRSTRX]`cfdggeff_ex}�����������������������~~|{{{zwuxwtwwztswxxuxyzzw{����������������������}qkjgfhihjonjfa`_]W`dkqx|{|zxuwzywvvwtsy~{yz}|{��������������������������������������������������������������������������������������������������������������~������|||z|~}z}}{|}}xy|zzz{zwvwxruvsrvvttvwssttqrtqopqmqqrrspqqpppnoqrpoqrpqronqrnorqmmqqmpronprpkoonoprrnsrprtrqrrpsrqnpsropssvwqs��������uijha[PKPeu����}p]MJJMW]__ZY\YUWYUVVUTQQQPPKIIHFFHHFHIHGFGGGGFHJHQ]fekx�����������������������������qYWYYY[[XXXVVYVQUVSTTSRSQPQPNOQPOONMQPQRUPPRPSSQPTVUUVVTUUTRTVUWWTSSRRSUSRRTRLFHEBA><CHMSTW[XVUVVWWTUVXWUSSRRVVUWWUUSTRSRSRQLKJKC==?@<9:;=;;==@HORSSQPTTRRIJJIIEDEEFGFFGFFFFGGEGFFGGEHIKLKLJIIIIGFEEDDDB>:742./21,(&((,./14;FJEB@;899=??ACDEDGHHGGIKJJLIKLNRZk��������������������{rkgcgihnrvx{~����������������vlfdadddhknortuvx|}|}|{yy|�����������������}k]UHC6658:6,&#!$*./-+)%!"!#*8?AEFC@AAA?@DHMNPRUUUVXXSQHC<5.-4@JUXYZ[^^YRNHA>;7535;?BHPZbgnosuvzz{xupmhfdedegiloqtvvp`RG<3+(&&-37>CHMRPTSTTY^biu~����������}rhfn|�����������|k_em{�������������������������������������������������������������������������������������������������������������������������������������������������������������Ŗ�������������������������������������������������������������������������������������tj_\VSQQQRPNQTX\_cefegeeegku�����������������������~yy{|wtvwwrtyvpoyurtvx{wxy}|������������������zplkhgiijkknjdcb`_cfjkpw|{xy}|wvw{xuuywuw||y{~~~}~����������������������������������������������������������������������������������������������������������������~��~~|z{}||}}{{{|z}~{uwxtuzyvuuutrttqpwuqsutlstrpqsroprpssqpppooponnnpporpomomlmommoqlkmnjlsoklmnkhmnlnqpnqvomppoorroonlnpppprquutqv���������{phaVMKS`hqy����q\MIL[`a^]^ZXWWWUVSTTROPRPKJJFFGFEEGFDDFDGIEEFGHM]nrz�}����������������������������mYXWVVWYXXWVUVQTUSRTSRQROOPQQPONPOPONNQPOPPQQTTUWWVVZYWYWSQQSURQNJIHDDC>;99876325421248AFJQVPTWVWTTSUSUSOTJBDORSUTUVURRSRTQNLKJD=<==:79;;;<==?GOQUSSRRSSNNKHHILIHHFEHEDFJFFGFFFFFFEFIIGHJHIIGEGGEFCABCBA@>;73..//+($%&'),06>EILNMGC?;997;>ACCFEDEFGIHKJHJLMNUhz����������������������zsjdhjkprsw{}�����������������{nghfeeijkmortwxy|~~|||xw|����������������~tdVLF=66333/*)*,)&*31//*'%! ',6AFGHFC?<<>AEHMPQVYWWYYZWSMG>74/1:GSW[^^``[WSKD?;6578:?GOVZchmpvyzz|zxutpjjhiefggkoomjfd]UK>4+$%*/4:ADGMMPPVW\erz����������wfXXbt~}���������wjcckw�������������������������������������������������������������������������������������������������������������������������������������������������������������Ę��������������������������������������������������������������������������������������xog[WUTQPQQNNPTY^_cegjjhebm|}�������������������}��~zx{{wuwxyuuwtqsxsrvzwts{z{y|��}���������������|xplihihghlnnidcacbciknq|}}{{}|wuwxxvvwwtx{{z{}�{}~����������������������������������������������������������������������������������������������������������������}���~�{z{{z|~|z|~|zz|{ywxwvyzvuwwtsutopuvtsstrovsrrsrnrrrqrplnpplmnonmoomnnlmnohhimmklnkjpnjkpjkmqmkoomnpqmoqolnrppqsqpolmpnloqonsrpr{�����������nQNLNSW]eny����udOQ]b__`_ZXXVUVWSTSSQQQNKKGDDFDDDDDDDEFFFFIIIIMd���������������������������������mZYZWXXT[XVWTSPTSRQRORSOOPMNNNMMNMNOQQSTSTRWSUXYYUTWPKGCA?:999864444110/...-...--+*+--/18?BBHNPRUQRUTRTSPI0'1>DKNOQRTQORSROMPKE;==>;9;=<::<=<GNQRSTRUSQQNKIGIOPONLJIEEGJMKKJGGGFFFDFIHGEFGGFFEEEDCCBACAAAB@;820/-+&$%%&'*,.29>EJOOMLJE?;;99::@ABBEFEEHHGIJLOU`s~������������������������wrkjlknruy{~������������������rjjiiiilnnqtsuxz}~}}}}{{������������������uc[SG>999:@IPLB%&.6620.)%!!#,7BGJHE@>>?@BGMPSZ`\\]\\Z\ZRJC;856>LSX_bddeb^TMJC>:466=BHNV^clpux{{|~�zvrofjhhikjjnnlmjgaRKA:3-*,16;@HLLQRVamu~���������t]OFShns~��������xi^alw�������������������������������������������������������������������������������������������������������������������������������������������������������������ŕ����������������������������������������������������������������������������������������yl`[URQRRMLNMOSXZ]_adfggeioz���������������������~z|}{yxvvuuwtqvysqrwxtrv}uuz~~~|��~���������~~wlhhghggjkllkd`aa`fimoty~|zz|{ytwwvuwwxvvz{yx{|{z|~��������������������������������������������������������������������������������������������������������������~��|}zyzz{x|}|yz|vwzzzxxxwxyytuwuuuusptrqttrqqqromopnmpomnpnloolklmmnnnnmlmiinmkjlpmmmmjlnjkomlmmlikkjlnmllookmnppppoonmjmkllrqmpsnnu|�����������~`QKLMPT[alv����udX^a\^^[Y[YUWVUTVSQPQOMLKEDDDDFFDFGDDDEEEGEEIHj���������������������������������nZZWUXWVXTTWTRSRRRQNNRPNNNJLOJFHIHMMKINJJDCA?;;:9758654241/0//0.,+,+)*(('(*)(')'''&))(++,.14;CINQNQSQPQQPB)!.9@GLQQPKKLKKMLB;:>;:9:=<:7:=AHOQRRSTRRPQOLIHEEETQMMNMGGJPONMJKIHFGDEGGHGEDFGFFDDBBCCDABA@??AA953/,*%""$##''(+-17<BGJQSTOJFB=989<=ACABCDGEIJGIP^ktz��������������������������{vlkpqtuvz~����������������rnkkklkklopqsuw{{|��������������������������xib_TKGDCKahP6!&0;852.*'&&%,=FGDDGEDBACGIPSX[_`__`__`^WPF@;<9@KX^aeiihcaZQLF>;7299>GPXajqtwy}�����}xuojffheijlnqqomjaYRB:/('-:FOVKMT^iqx{}����~t_JEO]dmx}�������wc`eow�������������������������������������������������������������������������������������������������������������������������������������������������������������ŗ�����������������������������������������������������������������������������������������xnf^XWWTSQPPOOMSRVZ]bdgfffpw������������������|~�{xz{vuwxxqowysopvusouwsvz{|y{~�����������|smjhiifhjnmmjb^_cbgknpvy}~yzzyvwuttvyxwxz|}{y||z}�����������������������������������������������������������������������������������������������������������������~|~~|y}{xxyz{|}{wx{ywzzxwxuttxwvuwvsturqqrolorpoooommmnqomlmppmnnnknppmonmlmnllnmlkmliknmkjjkjlmikmljinjjmnjjmnnlmnnmommonljlkjprinvpoqu~�������������lTLKMPTY_hr}���{oh`^`ZX[[VTXWTTVSRPOLKJHECEFDDEEEEEDDCDEDCGIDm���������������������������������lYWWVVVTSTTVTUURQQQNNOMKLMLK:14328754412/00010,-++***,,('((('''(&'&$%&%$%%&%%%'&$$$$$&'''(*.05<CFJNLONLLN=-(! *39CEG;8>AED@87;<98:;9:8:<?GPQQQSRRTPNMLLHFFCJ\WNQQPNQQRMMMPMMKKJHGHFFGEEFDDDBBBACDCAAA@?>@@==82/( !!!#%'*(,-26:@FKPSVXOJFB?>;::;:<@DFFHFHMVblosy��������������������������{vsoprtx{���������������yronorwwsrrorrtwz{}~��������������������½���~tlqrofd^XhneJ	
+ ,6=835.%!#&+4=CNPMMG@?AJSVYZbcbabdceafb[YLC<7>HSbhjjkkjhc\SKI?:9767@GPYbjqry|�����|yqkgcedhnsuuwxuqoh\QPMV]uw]MOQWdoux{~~����~s]GBIQYcpy������xfbepz�������������������������������������������������������������������������������������������������������������������������������������������������������������ǘ������������������������������������������������������������������������������������������qgc]ZUSTPQONMNOOSVY\_bfgdfu~����������������}z}xwwxutxzvsvvtpqssquqtssuwxzxz{{~�������~zrihfihhijnkib_]\^bflntx{|}{z~yvwyvwxyyyz|}{z{}~}�������������������������������������������������������������������������������������������������������������~~}|}}{y|~zxwzzx|zvsxyttywtuxtrsxututpoqqpqppopopmnpollonoqonooolknnnlmlljlhglmihkkkiljggijhgkiijkkgllhikjfjljijnonmklnnmnnmklmjfiolinolmqr|��������������s^QLMPTY^fnx����vja_YY]YUVVURRRRTSLKKIFEFCCDBCDDDEEDDFHECFJSp���������������������������������iSVVWVSTTTUURSQPRRNOOLIMLIE. ##%***(''()'**)('%%%'&%%%%$#$$##%%&#!" !"$#%$#%%%#$%"##%&&%)**-017=@DGEDFE@;96/)#" #,41%"-1423::85:>;778;AHOQQPUVTUQNLKJHGGHDXhh^USPRSQURRPPNQPPOLKLHGECCEDDDCCCBCAABBA@>@>??><72* !""$&(&)+-.138=AJMWZ]VRJHC@<;89;@A@BDGJPX`ehnrx~��������������������������~oopsuxz}���������������zvqw���xstsrtwx{|~����������������������Ÿ��������������Y
+	
+
+ %071($#'*+15>GLVQIFDFPRLHV[]bffgidghghfcZUK<AAGW`eilorolhaXOHD>;=:;FQTVditz~��~}yqnhd`fov|���������������YKPPXalswyz}����{lUF@INOXfv~������sccgp|�������������������������������������������������������������������������������������������������������������������������������������������������������������ę��������������������������������������������������������������������������������������������xrfa[YYSRPNNNLMOQRTV[fcgdgp���������������}~��~zyxvsw{yuwxvrsurqswtoquwvywz|xxz}~}|��}wqnigggejmmif^]YWY^ckmt||}~{{{}xwz}zuxzzyyz{zz|~}}~�����������������������������������������������������������������������������������������������������������~}{}|zz}zwyyyxxzwquwvuvwuvxxuuwvttsqnqusorropssnmppmjnmmmommmnijkllkkkkiikijjighjjjjhffiljhjlihjijjlighkejojhilkhijghkjjkmkjmljglljkmlloqq����������������}eQINRUZ_ekx����xhXX\ZTTWWUUTPRSNLLJGGGDCCCBDFEDEECDEEDGFGQr���������������������������������hVTVWTTVVUTQPPPQQPOOMKJKLJA& !#$""""##"$"""#"""#""#   !! "##""##"!!"#"!%#$#"""%##%$%''(*,,-148:9:=>@??A@<5/)$" #'-2224:9788;@HMSRPRSSSSQOLIIHDHMRj~{qibXSPTUUTVUUQVTSRPMLIFEDEFDCBAABBA@@@@??B>?A?>;5,  #%$&(*,-../4<AEJRU\[ZROKFA<<<<>=AFGKSY^_dhmrx{���������������������������{uuxyy|~���������������}��������|yyzxxz|}������������������������������������I
+			
+&(#!"!#%(+07@MNLKKNNA23=KX]bdijlmmllmmlmjd\SJBAMS`gmptx{wrfYPIE?636@EPYbovuvusuppoljfkmt~���������������}QIJNXbktvwuz����zjR@ALQMR`nx�����maajs|�������������������������������������������������������������������������������������������������������������������������������������������������������������Ɩ���������������������������������������������������������������������������������������������|pjd`]XSQRNMNMMLMNRVa^ddbdrv{����������������}{}|wvwzzuvzvtrtuoostvoltwtstwwvvy~||{~~|vojffiiijnmhcb[YWXZ`ipz||~|xx{zwxyyutwzyyyzyyz}~{}�������������������������������������������������������������������������������������������������������}}��~|}~|z{{xy|zwy{yvwxxuswwywyvwxxtswwssssnmpqnpqqmnpojlnnkmollmmllklhjkmikkjkjiihjjhgijghigegjgiijhgggfhihfgkhhihgghjhhkgfjkhhkliilhhjmijjjknlnp������������������oYSRSVW^dlu����zle^TQVVTVXSRRPKJMKHFDAACDCCCDCCBBBEDEGDHMs���������������������������������fQWWWVUTWVRRQORQPMNNLKMMKH<&     !"!!!   !"  !! "!""!!##"!!"! !"$""$"%%%&()('(,./158:=@ACDDDA:5.&!$&*29667:BHLOTPPRPQRPLMJJMKKTZckqwzypjbYWLVUWYWUXVVUSOOLJHEDDBBCAACAB?@@?>??>>?==<8- "" #%()(*+,.247:AHQV[_]ZURKEA?=<=ACGLQVX\adinsvz}���������������������������yww{}�����������������������������{zz|}����������������������������������tG
+	
+	
+
+
+ %(-1<HLMKLNLA0.++/BO[bimrutsqnmpqpqnd_VKCFKVdorw���ycUKD=67:>@FPfoonmklkhnkjgilw����������������}NGJLV`kotxy|����ziNA>HMLJScs�����}lc`gp}�������������������������������������������������������������������������������������������������������������������������������������������������������������Ė����������������������������������������������������������������������������������������������|wqid\YSRQNNMLJKLNMMV_acihfp���������}}��~}~�}zwyzxtvyuutvvqqtrpjrvttssssuxy}||}|sjjggfhjkkkic_[ZZX^afp{��}~~}{zzzzyzxxvy{}|z{{z|�||�������������������������������������������������������������������������������������������������������������}|~~}|{yy}{wx|{wv{vtvyvtvyxtttsqrvrqrssnoqonprmmonjknpkooljmnjillkjkliikjgiifehifgggheehaehigfihehhedihgfhgfgihfgghghjiimjiklkikiefgegihgjmjlq�������������������u`TPRWW\elv���}qaVVXWXWTRUROMLLJIIDCCDCDDBBCEFABBDDDAIQx���������������������������������iXWWWVSTVTRSOMOOLNMJJLKKJE;&  ! !      !!"#!  !"" !"  """##&&%%(*+,0248=?BDDEFFHD:3+%!&-.27?OMLOPOQSPPPNNMNNPTZ^`chkoqsyzuld^XSUZZ[ZZZXVTTRQONLIFDCABB??>>?>=>;>?;=<=9+!!!#%'&)*+-.02339>CKSY_b`aWOKG@>?>BFLPTVZ\_eijnsy~�����������������������������|��������������������������������~~}�������������������������������o;
+
+		
+$)-1<GLJKLNPGF@33/-29K[dmsuy{ywxxwwzwvun`XOI?G]r���tYICA=;;;<<<Ecnjjhjjjhiiihkox����������������yKGIJWajovyz}���}mSEAGKKIK_ny����zia_fo}�������������������������������������������������������������������������������������������������������������������������������������������������������������ƕ������������������������������������������������������������������������������������������������zske^YSPNNNJKKLLNQSUYaefeenry��������~���~~��~|yzyuxzzxwyxsqtuokktvrrpqnouxwwx}}~tlggggiommkf`ZZYYY]eiq}~}~�{}}}{yz|}zxy|}||}{yz~�}~�������������������������������������������������������������������������������������������������������������}}~}|z{xwwyvvz{xuvwtttwqrsvusssrrttppqrqnoonrrngmnkinmllmmlkljiikhijkhgkgeeggecfffhgfdhhgbggeefecfhfcegffefcaifechgdggfghgcgjgfijdcdddghgfkkjlx���������������������iZSUWZ^cks}����na[YVXURSTPPQOMJHHDC@@AAA@@A@AC?ADCBCJIz���������������������������������fQRTWUTVWUSSRNOMLMLIIJGHHC6%     !   !  ! "#$#!"$')*,-.058;>CEFGJJHD=6.$"*4>GKNOMMNOQNPQRRSTTZZ\^adgjjkqvxxrmgaUSUZ]]]]^[WWWVVTSMIFDB?>>>><=<;;=<<<;<8*""%%%))(--+./235<AHNW^bc`[XOKGCAACGINTVY]`cfjnsv{~��������������������������������������������������������������}����������������~���������|{vkZ6
+	
+!&,0<ILMLLOPMNQQKF>9026DZipwz~~���}{{ywuurk`OC98=SeG9/4569;;:79Gbmjhhhijhfhhhimx����������������uCFGMUaiouwy|����w`OFGLIGHUhv����yg`^gq~�������������������������������������������������������������������������������������������������������������������������������������������������������������ƕ�������������������������������������������������������������������������������������������������}vlf`XROOLLKMJMMLONTZ^cffcfr{��������~|��}}~�}xx~|wuyzvrrutpnqstpponnntwwwz~xsmjggjklljjfa[WZ[]afns{~|~~}|}~{y|�}zy||{z|�{wz~�~|��������������������������������������������������������������������������������������������������������}��}|}{yy{{vuxywwzztsvuttuuttwursusrruqqqpmqppmnqnlkokkloljllkjklkkkjhjihfioigihfdfgbdgedgge`cdededbbefd_bdebbcdefedegebdecdddfihdfhgdefcfhjijjhin{����������������������q\TVY\^els~����vg[UWTSUSKNOIHGFEDB?>>=?>@A?@BABB@ACIQ{���������������������������������U>GMWPUVWUSSRQQNPNJIJHFHIE6#     "!! !"   !"$&((()),-26;?CGHIJKKHC=4,%&07>DHIIJMMMNQQSUTVZYZ^_`bdhghjnqrytnjd[STY]^`b^^\\\^]WQOLGCA??=<=<===<?;;:6(  "$$%')*+*+.02026;=BMT[`dffcWROIABBFKNQSX]_beinquy{��������������������½����������������������������������������������������������ztptwvxoXWWLLL3%-0>IMLLJQONISZ^ch_WG=633I^oz�����yurpolne^YE5/+/83/-144898747Hfjgfikhhihgghloz����������������mEDCJUahowvv{����|l[PKGEEFMan}���xf`_gr�������������������������������������������������������������������������������������������������������������������������������������������������������������×���������������������������������������������������������������������������������������������������yph`XSQLLKLKLJJLNPSX[dibdglrv~��~���~}�y|��|tv|ytswxurrtrommqvsppnnrsuy~~xlihjigjllgec_\VVXWbhmry||zz��~}|~{{�~{{|}}{|y|��}��������������������������������������������������������������������������������������������������~��~}�~|~�~}}|yxx{yvxywwyywuuuttutrstvrsssnsrpmsspnnnlnonjklmkmmklmmkillgikkgehgdcfgffgcbbefab`acfb^baadbcccbdb``bddbabedccefdecccefdbeedehebefdbefbafgegn|�����������������������ybYXZ^bdis{����wj[WTTUPMPOJIGEECA@??>>?@?@?A@@B@ADHKz��������������������������������{H3>?@>FKNUTRQOLMNKIIGGFFBA7%    !"##"%&(&+.27;ACGJJMOLLIE80'"$,3:AEHKLMNONORPTWWY\]]_bdefehikpsusmhc`URY^bcdfda_\^][VMLHDA?;:;<<<;><;97'""$%&&**)*--.0115:=BJRY\dkff`[TIGCGHHIPUY]`dgikpsw|������������������������������������������������������������}w{���������������ztmjklljY4&*/KQB-$&04?KONNNSSMINZfs~��xrgWJ</5Ody��udKEC@=8660.,*+041/,.3468:957G`ihgijhfeddbdjnz����������������kCCEJU_hmstw{������viYRLFDEJXkx���ub_ahs����������������������������������������������������������������������������������������������������������������������������������������������������������¿��������������������������������������������������������������������������������������������������������wrl^USMJJJPLLLKNNNPS[`cedbhny}|����}�~}�|{{|{vw{zzvsrtrpnptwtnimqtqx||skhhghikllgba`^ZVX\bjnuy|}|{~��|~~}{}~|}}}}}zz�~}���������������������������������������������������������������yw����������������������~w{������������~�x}��}~~{{{{ywwxwuxyxtsutrstqqpwsttrqosqnrqpooonlnmlkklkllljjljgjkifhiffffgfeecefdbddbabcaddf_^cc`cdcaaababdbacb`bfa_`ec`cbbadcbcedcgfcbffccgbaehffjo~��������������������Ŀ���m]Z]``djq{����}n]VWURQSOKJHEED@=<::<>>>@@@?><?BBFN{��������������������������������P85/$#29AGLMOKMPOLKJHFGEB@2! !! "###$&),06:@CIMNOPQQMJ@7-&")1:AGIKMNPPQSTUUWZ\\^_`bbdegjkmptwvpmf`^X]dffhgfdcaa`]ZSNIFA=;<;:9<::83%"  #$&''&)+--/.1489>CJPU^eijjkc[VQKDFILOTVZ`cdhlpty|�����������������������»��������������������������������pZav}~����~}|yvsronlkkige[?;;ATXRD><90*#!(.3AMUZXSWZOHM[iw��������uhYRWit}sL%"#$$$%)-10-+.36578846Ebkhghjgeb_]]ckp����������������iBCCHT`hoqru{������}qf^WMGDJWeu��sc`bju���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|sg]XLKIHIJLNONQOMPTY^bbbbepvw|����~}��}x|}ww~xssuuspssrtysjlqrvv}zrkffjihjkfc_]Z\YVUZ^hnswz{|{{��}zz~}x}��~|}}{{}{x}�~|~���������������������������������������������������������������{\gx��������������������x]dt����������{z�|||zuy{yvvxvtsxxsrstsssrqstqqusqpqrnopqnopmkkoljklifijheikh`hgfdgheeececdecfb`dfdcbda`ca`]``_^adb_bb^_a`_`e_\`b^^bc`]bcbabcaeeddf`_efcbac[bgcahjk���������������������������vc_]acfkq{�����rdYPQRPNKIEFD@<5,.479:>?>>@?>???GQ|���������������������������������^LC:/($#/9>DFKNNJIIGEGFBB=-    !!!#"#%&().4:AGJLNQTSVSOF<5( "'/5@FLLQPQSSUVXXZY\\]^`cdgghikoqrvvtoidb``cfhiiheddccaZVRNJD><:9:9893$"""$%%%'*,..00567<>BFOX^cilqmia\TQNJGIKQVY]`fhlorw{������������������������¼��������������vu|�������y|wjY?6\tz|�~yutsrpmnnnmlkihhgfee`Y^KFP_YQZ^\[YYVN2!&.4CSdnzwxrPEN\hy����������������}B! #)-01,+-27797;46IdigfggedcaX`glq}����������������d?BCHSahmqsu{������|uqocTJFKVcp}�|pe`aju��������������������������������������������������������������������������������������������������������������������������������������������������������¹�������������������������������������������������������������������������������������������������������������uj_QNKHKKNPRRRMMMNQT[^acdfjqz~||��y{�zy~�~y{|wsttursupqszslmtyy{zpkgchhijiha`^YWZVTT^emstx}~wwz~~yy}|{���~~~~~}|||~�~�����������������������������������������������������������������wiej{�������������������mU]ny~������~}}�{z|}}|}zvvzzywwutsuxuqtttstqqsvupoqpooqnnppmmonjkjlkkkjijjjghkhhfeffigdcecbcddcceb`dca`a`__a__]^][^a``_b___`__ba^\__^``aaabdcaaabccddb`adbbb`_\dcbcgfk������������������������ľ��}gacdfkmtz�����ydSUSPOLGEDB>:+&.369;;?A?>@?BGPw��������������������������������|YNQMC8-$ '17>DIHEEECDFDD;+ !  !!!""#"#$&)+/29?DHOTUWYY[WLB90($$#!"'/8<DIOQSSUUVWYV\]]__bcedegjlmnsty|xrnkfb_eiikjkihgfcca[UPGB>:89:71" "!##%''(,-..04458<=@CGTZ`glqqlhf^UPLIGJMSW[\cilqsw{��������������������������ƺ�������yhbdjknpc\TKF=@:434Snx�|urpmmkkijkjhfdbbbdeeindGAYiiouy�����Y-&,3DWq����kKIM[ix����������������y? !  $*/20+,/39:<<:58Ldheefghge]\fflr�����������������]<?CEQagmpqu{������|wvxl^QKMUbq{�{me^_kv��������������������������������������������������������������������������������������������������������������������������������������������������������»�������������������������������������������������������������������������������������������������������������zn_QMJJKNQPSSPOOMKQUZ^bddefnsvz~�~z}~�}}�|}}~{wutuvvuurswxrnrx||ungegghjihd`^\ZYWWVX_goqrtx{ztwy|{z}�~}�������~{}��������������������������������������������������������������������qjgcq{����������������~ufdXemw}�����|~~{{|{z||zxxzzywytruvtqqttrqsoqttronpnnppmnpnlonkhmljihjfeiifhjjfddbdeecacc_aabcbaaaabb^]_]\_^]^[^[]__^_``\`_^]_a^^^]\^a_]_aaa_]\`aa`cb__caaa`]_ac^acdfo��������������������������³��uggghlns|�����{m]TROLGGC@<6$#*047;<=??=>CQw��������������������������������wHCFMNMA8.%&/5>ADFB??CCC:)  !!!!"$''&&'-6<DKQWZZ\[[ZTKC91*(&#""#%+4>GNPSUUXWXX\]]]^aabeefijllnpstx|xvqkhddfjlnnnmkiiihg]VSJD<<:6.   "%&)),,+-13459<=?@CEMT_gmrswrhd]VOHHHILOV^bhkorxy~������������������������ѵkagaWUQNA?98<431,1/0('-//4Mkv{{tpnkklihihgca```beegms]GW�����������n/! &-4CTn�|siVGFLWhw���������ú�����w6 !&)-1.+-28CKJK@69Peifghghg`X_efnu�����������������[:>@GV^fmqqsy������{xzyri[SQXclu}ymc]bmw��������������������������������������������������������������������������������������������������������������������������������������������������������º���������������������������������������������������������������������������������������������������������������zl_RLJKMPRRRSQROMPPSUY_adabhovz}}�|}�~}~�~~|zyxvvytrqvzvppx~{rkhefhhjkhc_]]XWVTX^bhovsrxxxwxzzyx��~�����~|~�~|~�}�����������������������������������������������������������������liledflppnuxvxzyxx}yxwsfce^]dr}���}}}z{}|yy{{zwwyxwvvrquwrpstoprrnsvpjnonlopmmomklnlhjkiihifefgeceeebce_bdc`acb`cbb`_`^_ba_^^___][[\]\[_^^_a^\[_]^\]]Z__]W^^][__^`^[\``_aa^\]^_a_^]^b`aeccfx���������������������������ż��kgglknsz������p]UNJJFB?<3"").38<?>=@CQw��������������������������������rA6;AFNRKB80$#)49:416>@=3 "" !##""$&),4<DMVY\_^_``^UG@80-)&&#"&,2=CLQTWWZ]]]^^_a`adffjkllnqrqtxy{|xtpnhchjoqrrrqplkjhd^UNJC<0   "%')*),,13347;==>@@AFKS\dintwwtkcYWMG=AIPV]agmqqv{����������������������ҪG711-*(($'(&)*)(*.--*&)-,1Sovxzxrmmkjhhghhfba`__defeeX=Ij������������t0! %,6EQZUP\VNFCJUbt���������¹�����t4$%&&&)/1,*,5E[de[@8>PglgfhhgbY]bdgnu�����������������V9>@IU_fnqqtx�����|z{ytmd^\Zbir|tja\dnx��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yj_WOJLLOORRRSSPQNPOSW[^abfioow�}z|��}{���}z{{yvuvrquxwuty|xpiedffhkiea]ZYYUTTX^cglstpntvtttxxvv}~}}��y���}~~}~��������������������������������������������������������������������wiklie[XSNJRPSY[Z]a`_aacdccWW]mw|}|y|~zy{|yz{|{xxyxwvvurttrqstqopqmotqlkonmmnmmmnjjkmjfifghhgdefffffcbbccbddcaacaaba`__]]]]\]^\YZ^X[]^[[[]Z]][ZY\]\[[[Z[\\[\_^\]]Z\`[[]^]`a_\V_^[_^\Z`][`^Y[i}�����������������������������İ��pkklprv{������saTLID@@=2!(07<<@CFUw��������������������������������m4(,28@FMOHD6) "%  +273# !!   "###(,5>FLU[]accehfaSI?60-,(''),2;DJOV[]]^_caccddffiijlmprrstwxz{|{wrnnijosvvwwvrpnnljc]WP@+!#"%')*+-123768;<>>@??FFRXagnuyxyrnb[SJ@CJOSYafkqvy|���������������������ӦC3*''*)&$$%$%&%&)+++'&*,+4Upsuwvpnkkihhfhfecb`_`deeaW17Pp������������n,! %,7EOPJTZTIFHFU^n~�������ſ������d+ ,6880))/.+(.;Snrn^@9AQaggdghf]W_cdjnu�����������������K;=@IU`ilmlmt~�����~||{tpkea_chpyqja_dmz���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|shYNHLMNRRRSUTUOPNPPSZ]^cbchp{}zz}��~�~{}~|{xvttz|wuuw|yogdcfhhjib]Z[XWWVVY`dfknrmntvtpsuwvvy}�����������������������������������������������������������������������������������nijilie`WQNQRQPPRUWX[ZY[_a^ZYX_pu~~{z|{z{|{y}}{yyyxuuwusssrostpmopnlqqlkllmlnoklnlknnjgijggihfefdcdebbbaaaaaaab__aa_^^]]]__^]^\[]]]UY[]Z[[`^\[YY[\]]]ZXZZYY^_^YYZYYZZYY]\]^`[Z\]\\Z]^]^Z]`\Z`hz������������������������������ƻ��~onprv{������wdSHBAA<1#*05;AFQ|��������������������������������k)%-4:BJLSE9-#%+(  !""%'(*29EOUZadgjkihg^QC<40/+(()*19DKU[\^_bbeeefhgjhjnnnnrstvwxz{~yuqmqux{{zyywtrtqokfXE5( "%&'*+/02467:;=<?=BCHKINV]hryzwxuk`SJGFFD?FT^dmptz~�������������������џ=-(&%&&%"#!###&((*+*$$*,,2Vsuuxvponlihhijgecba_`dfd_P?FPt��������¹��h'""$++.24/6GPUYZ[UST^`^dpy���������������^-!0L__T:*+/0.).>\qyw^B7@Wbhfdjhcca`acjmv�����������������K??BJT^hkliir~������~}wrolgadhlnlf_\am{�������������������������������������������������������������������������������������������������������������������������������������������������������ž��������������������������������������������������������������������������������������������������������������������zgbSLJMPRPPRQVUQONNOSTW[^cdfnuw{��~�}{{{}z||xvw|yusxzvjhbchgihic]\YZUUWX\behjmnonqroprqttvx{}��������������������������������������������������������������������������������~jgikmihc\UPNOPRSRTVWWXWY^]]ZWTPgny}|{||yy|{xyyxuuyvttwwsoqnnprqmmqpmmpnlmonmoqnjmmjlnlhfhfcghgbedbabcb````_^]__a_Z_e_^_^\]_^]\^\[\ZYUXVWXYXTV[YW[\ZXYZZZ[YYY`YWVVUWYYWY[ZZ]_]YZ[\\[[\[\ZZ\_\_ad��������������������������������¯��xqtvx{}������{dODB@<6,*((&&.5@Ux��������������������������������i"!*1<DJIE?6-#!"#$#%(1:EMX_ekilpomkcYJ>930.+)')/6BLQ]`a`dehhgijklmlmmqqsuvwwy{|��}{zzywy{}}||{zxtuttohZK?5# !  !#$'(,003568<<=>>BCFHHKLO_nummnmg^SIFHH8.-6AMYaiowz�����������������Л=-&$%&%$"$###$%&)))'"#)++7Ynqtxuqnlljhfhjhdbbb``dfe^MB=Ix������������c&"" #-C\^b^P38MW`mroont���������������������_+'Elvqa<).1/-*0>ay�|[@5>Ycfe`igb`^__ahmy����������������{UHEGKU^gmlecq�������ysqplljjiec`[\en{�������������������������������������������������������������������������������������������������������������������������������������������������������Ž���������������������������������������������������������������������������������������������������������������������}teWMNMNOTTRVVSQPNNJNQV[a``emsy||�|y{�~yy}|xvzzvtxxtjedcgjjjfb^\ZXWVVZ^ckhhkmmloqplnqqqsyz|z}�~�������������������������������������������������������|onqsqrqstxvxzy|}{{}~���ulhlmlkhc]TRPOPRTUUVWWXZ[\`^WUQ\gsxz{zzx{zwuzxvuwywuvyvqprpptqqpqqoonmmkonlmpomllkjkmhghhdehfdeebaccabbabb^_]`b_]^_`\^_\Z^\Z[[\XWXWUWXWWWYXZYXYWWWYYYYYZZXXZ^YWWVXZZYWZ[ZXZ[YXZYX\[ZYZYXX[[Z`ac����������������������������������ȸ��xyy{~�������jUIA?@C<764+'3Nt��������������������������������d#*6>BFLIC80)  " !$$$(.8EQYafmoqqtssj_TI=62..,*+/8@MT[`ddhhhjlmmlmnnpsruwvxz{{}�������}z{z|~~�}|zwvvslaVJ<.  !  !""$',/.24669==<=?CDHHIIKZlnbba]VRNIFDB940.,38GU`jou|����������������Δ:+&#"$%#!""$$#%&)**%!$)+*6Ynsuvsonlljhhihfccb``bcfgaQ=<Z|������������`# # )Bp���uW/9O`w���������������������������^)*Orzzg:*.32-)2A]{~wU@8@Wdggcggfc[XWahmy�����������������cURPS[]glkbfv����������|vussrpmmkb^YYfo|�������������������������������������������������������������������������������������������������������������������������������������������������������Ļ����������������������������������������������������������������������������������������������������������������������|sj_SIIMPSUWUUTTPPHGNPSZ`aabirux}{y{}�{z}}{x{zxwvxpi_abfhhgea^\\YXWWZ`eehefkikmoolmqurqvyz}���~������������������������������������������������������nW^cijjhjknpruvwzyz{{~����|sjkjmljj_\UQPQRQSSUWUWZZ[]_[VNP_jsxzxxy|wsuzywwxxsuwurpqopppjmpqnlnkhiknmlnnnnnkijljjligffeddec`acb__`^___^]_`^\]^[\\\Z[\YW[]YUXYUVVVVWXYX[YWVXYXX[XWUZWWV[ZWWXUSTSUXYXXWYYVXYWXYXXZYVXY[Y[`bi�����������������������������������̽����}y���������oVD=DHC?><8,
+
+
+
+"Fo��������������������������������c) *6;LHKIB=1'   !  "%'*4BOW`hrststtrmk[OD;:60*(+.5?IT_bcgjkmlnnoprstttuuxz{{}����������}xy{�����~|x|{uk^TE6%""#$%$$()+.125779<;=>ADFFGMO[kh`_^TQNJGEA?A@A;62016BO[enu~��������������ʎ6*&##$&$"""#$%''*))% #)*)7[qrsuqnmmkihhhggeca``bdff`X:=NWu�����������[  $ )M~���rR.=L_�|rq|����������������������Z&/Vz��h9-/31-)0F^y�{V?:CXeggdffdbQY_dhnz����������������ecbaa_cfiidkx����������~xwwwwtsrj_^\]fq}�������������������������������������������������������������������������������������������������������������������������������������������������������ú�����������������������������������������������������������������������������������������������������������������������{q^POKOSUVVYYXUROMQKOSX]accjlu{z{z}|�}zz}xwvuljedeehheb`]Z\YYYWY`fjgggjjikoponostpt{xy~�����~����������������������������������������������������jTU\bgihijknopuyzxzyz~~���ysmjijlld`[WRPRSTVUWVZYZ\]^`XRNWamvyuuxzwwzzvuvvtsvvqoqpnnpomqpnjlnmmkonmmnlmolkjlkhijgdgfcbbb__`a`_^^^]`^`]Z[]__[Y^^[ZZ[WXZ[XWWVUWVUTWWVWWVUUUVVWXSUVUOSVWSSXUQWXSVZYUVVXWWYWWYYWYYWUWYXX\^_p���������������������������������������ȯ�������������pYGFHGEB?@7(
+
+:o��������������������������������g6*#+7?IOOMG<3(  !#"%',6@RZbjpsuvxwvvpdVK>:641.,/4@KW]fiklknprstuuuuvwy|{{~�������������}����������{yxwqdL:)# "#%&%()+.//3689<<>@AEFGHLM[kja^[UQNKIF@;>DGJFC?9423:HYdnw|������������Ǆ.*'&&&%#"%#!#&).1-)%!$))':`qrttrpmmljjiifeb````adee]H-22N}�����������T! #! )T}��{mP-=OYj`QQi|����������������������S&-W~�|\:0012*+4Gb}�oS<9E_hhfdfeaYV_dgkq|�����������������oikmlklmmmpsz~����������{yzyywvtlda\^et��������������������������������������������������������������������������������������������������������������������������������������������������������ù������������������������������������������������������������������������������������������������������������������������xqh[NLNQUYZZYZWURQPPOQUZ_bbdmttx}}z|��zxwvsuqmfgcggggca[XZYXSWWY_chkggjliilqpnnrrqpuwxx}��}~�������������������������������������������������������~[KJR[aeiihimnpsvxvxx{{{~��~zpjihjjjgb]VQRUVVWVXYY[\]\`ZTQPXerurx{ywxyxstvvsuwroppmoppnornmllmmmmnklljilkhhjhhggheeeabddaaa``^^^]\__]^][Y]^_YZ]XVXWWUYXUUVUSSTTPTUSSRTURRRSVWUSUTQQVSSRUVRQTUTXWTSVVVWXVUVWUUXVTSWVVXZ[]v�������������������������������������������û����������{bPKIHEBB?3#			
+9k}�������������������������������mH;.$*8BIOPOI>1( !   !"#$'))6DSahjpuwyz{wxpg[K?751./.+2>LX\flmpqrssuvvvxyz{||����������������~������uhg_baP?4:;/" #$%*,-0102479;;<?@CEIJLL\jj`\ZTPNKID<65:>JMOUKEA<45=KU`mw������������~2,&$&'%$#&&$$'-440)%#%)*-;^prttrpnlljkhgd_[\``_bdge[E5>G_�������ý���P!!" !,[��zqhM.>PZhokkm����������������������}Q$/V{~tY5/121+.5FevylR=;E]ffdedc`YXYZcjr|�����������������qpsssvvzywxx{���������~{zyxxxyvpgb[^dt��������������������������������������������������������������������������������������������������������������������������������������������������������¹����������������������������������������������������������������������������������������������������������������������}��zdZQPRUZZY]ZZXVRRQNOPTZ\^bbfioy~|z}�|urnppnieceghhgd[ZXZYYXUUW^eimkggjihhoqnmnrrnotwxz|�~��������������������������������������������������������yUIJMQYaikhhjklpqstvwxxxz{}��|tkiijkkigb[USVTYYZYZZZ\][\YWQLQ\jrvyyxyxwtsvwuuxtoopmkqsoosrmlmkkjllmkljijljjklhhhggfffbaec`ca]\]]\XZ\[[^]XY[ZYXYYVWYVTTXVTSVVRSTVSTTSRSTSUTRSVURTUTRQUQPSSRQRSQSVTPTUTTVURTWUSTWSTWVTXYXajx����������������������������������������������˿���������}iNIHGFC>2!	
+4h~�������������������������������lNF<3*! 0;FNSVUJB5)!   #$$%*002267;AGSceablsvvwvtxpkaL>327:76524<IT`lopqstvvyyyz|}~~�����������������z|}���sM=?99876FadVI6& !#',/2667778::;>@BEHKIN\if]ZZURNKFB>87;=?EKXZ_]WIC=<9@Obnt|�������Ƿr6)%"$$$$$%&&%&-34/*$!$))(?dusuvrqmlljiigc^[^`_^acfe[J>>Ff�������û���M"#!#.]~lrmJ1@Sc��rr����������������������}M 0Zx}x[4.///,-4Fdy|sN:=J[dffgdc_YWW[cmq}�����������������v|�~~����~~�~|zxz���{yyyy{|yqhaX^it���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|{��|ri`UQTWY[\ZZYZWUSPPOMPSW_b`dhqzz|~{wnhhmkfddeehfec_WXYXXXWWW[ahlnmihijiknnkmoqpqrtvz~����������������������������������������������������������xQJJKNSeuypihiilmoqqttuuuwwz}}wojfhhkkid`\VWWXX[[Z]\]]\]\[VROTdlvusvywsswvtrtuolomknrqknroklnlillilnmjkmlkjkjccfffdedabc`]`]XZ[XVV\\WXZZTVXWSXZWWXWVSUVTUTSRRURSQQPPSPSRRQRSRQQQSOPPQPPQOPSSROOSPRURQSTRRUWURVUSTSTVYZez}��������������������������������������������������������ƿ��MJLHGHD=.
+5by������������������������������iIGBA;6,!/>IQV]XN@6."
+  !!#$((&%&),,-*,22269<BFFIFFFJUaov_NI[khcbdeia]WKB9114>LJA91/=L\fkostxyz{|}��������������������~wxyp[8#&&'())L{�|m_R>- "!&-189<=<=:<>>=?EFJKP^mf[YWSQOJFC?AKWKE=@LXagjdXOJEE<CN[jx��������k6)$$%%'%&%%&$'.23/)#$)*,?issvxsqmmljijhd_Z\__`aadb[D16Dl������������C "!#2c{{�fB6DSc|zf]i����������������������|J 1^~�yR4-00.,-5Ig�|mK9>L\efdeddd^XZ^dmr~����������������}y�������~}}|{{xsnjlv��}z{yz{|zofaW_jt�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�������}���z{��||{md^TSW[YZ[\ZZYWVTPOPOQV\]_bipv|}tldcjic_`deehe_YVVVXWWVW[`dkommnnkiiknljmoonnsutu{}|{~��~~�����������������������������������������������������pNJJKM[~��~oiiijkknprrrtsuuvy{zsmhgfhimkf_\\[XZ\[\\]]\\\^\ZTPLXcoorwysqtvtrrtqqqpmnpqmlqoklnmjionjlnkhllihjlcVV]aceea_`b`]^YWZYWX\\XYZXWUZ\VUYYWVWWUTSSSSPQOQSOOOLOPPMMRQNPOQSRRSNQQONPRQSTQPOORQSQPQRPQUTUWbaSTTQSY\k��������}y~�������������������������������������������������ͼ{HIIJKKIF=+
+4ev}������������������������������hAEFEDC@3+#"2@MUZ^_PG:/%  !    !#%')*/5ALNRI=ACEE@@AGLLHOSZ\\ZUY\dnkjgW11AQI>@FNHCAB=3,&&+@Yd^RF;4:PYaixyzz{|}������������������������{wwslZ7! !"&',P|�����nXB$#"$&(/58:=?AA@AA>BDEIMP[ke\YURPOKHFFXk�zkfdhmz��}�~whgjo}���������\('$%'%&$$#%%#(.0/-("$)+.@isswwsqommkjigd^Z\^__`bde]G@Oe|�����������t= !"%7h��~rX?5BP]d[\bk����������������������F6_��xQ1,/1.+,5Li|{mK9=L\fdeffdaZR[_gmt�����������������y}������yssmklljgd]VVn��||{|y{}}zngaYakv�������������������������������������������������������������������������������������������������������������������������������������������������������Ŀ��������������������������������������������������������������������������������������������������������|�������~~��~~��}}�~xui]WRVWY]^^\[[XXXWSQOMQW\^agnvrk`ab__\bdacec`ZXTVVWWWY[agkookkmnkhilmiipommlsysu{}|z}���������������������������������������������������������cGIJLOa�����rmjjkjkorqqsstuuwz|wpjfdgfhjkd`[VUVY[[^`]\]\]][UQLPZilsyxtuwustusqqsonopjimniimljijlkjjjihjhhgggR=EQ[be__c`]^_^WX\YUWXVTUUVWVWVUWXUUTTSTUPPSRNQSRNMOOOORPSSSTQQSUTWVOUWVUSPNNPPLMNLNPPNMQQPRWTYhxs_ZYW]eo�������wsqpv������������������������������������������������ʯnBIHJMMLHF:'	
+6ivz�����������������������������eDFEFFEB@>4-"!4DOXaab\MA6("$*'""&&%$%#$&&%'),.244977:>AERhw{xgXWVYULRNY_^ajp{wvmjmqm`^WE$,5+$(&#$%$''%%7Mhf\UNIJOLTn�������������������������������}xwunZ5!%%)My������wd8"#''(,/36;@DEEEDDBCHJMQ\ke\ZURRMIHIRn�������������������������������Q"$#$&'$$$##$%(+./.'! $()*Geqsutqpmlkjihfd_\\]\^`cee\FHVl~������ĺ���p; "#'9k}mf_=5ET^ikuck����������������������|A!!:c��wO4.13.+.5Ml�~dI;;L_e_dffc^U[aeinu�����������������v~�����t_\\XUYYYVROJOm��~|}}}|�{ph^X`ju�������������������������������������������������������������������������������������������������������������������������������������������������������þ���������������������������������������������������������������������������������������������������������~���~���}���~��{}~��|{ypf\XWX[]]]\\]]]ZXWSRQQTY^adfc]^XTQV]^acbb^[XVVWVWVY^diknooljkmmilonhlonmmnuury|z|~~��������������������������������������������������������wWKIJMRn������qmkjijnpoqrtuwvwy{|{qfbdeghigc^ZUPTY[`__]^\ZZ[[RNMM_jsvstvussurorqolnoeVWbiijljijkihmolkknfidXP@17FV]`^ba]Y\\ZY\[XWWWVUVVUXVUSUUSRSURSTRQSSPOQPOQNMLOPMSa^Z[^^^_accbffcaZPMNOMKNNMNMMNNSS[fd`h~��xkciox���������vpprx������������������������������������������������ȩeEJIHLMMLJE6#	:erw}�����������������������������dIIGHHIHGED@5,&(7HT\diidVH:,    "7QF433688;;:8:;>@BFIJJHHIILLKWs|��lefkleehflnllounfadca_\ZZUP<"+1*" #'& )I`e[XWVRRSc��������������������������������}zyxm\3#'%)Kr����h7'" "$%)-0127<??BGFB@EGJLR`mf\YVRRPMGJ`����������ý�������������rr�����A""#&(%$$#$#')+00,$ "%()-Hdrrtuqpnlkjiigc^ZZ][^`ddcW8=Ki�������·���m6"#")9n�sfrc64FXfq�w\p����������������������q9 !:i��sG0.140,.6Pl|xcE:=K`fffefc_^cdfinu�����������������v�����kXZYVWYYVSPOIPn�}}}~��{pg_[ajv�������������������������������������������������������������������������������������������������������������������������������������������������������ž��������������������������������������������������������������������������������������������������������~}��~���|z���}~�~zz|~|{}xlc_XUVZ\[\Z^\]\\\[URQOQUX]][VNDILQUY_`][UUXVWWVXW]gqpoqsonpoomjnlinonllmpsrsvvvy}~������������������������������������������������������iLKKKLW�������tmjiihmmoqttuvwxz|}vmccdcfhehb\WRRV[][__`]\Z\[VPMIUfoportrrttooqrnkljSEHU`djljijjhimlhjlkgcUE:-(+8ITZ^_]\[[[X\ZWWZZTTWWWTVSTSRTQRPRPOQNNQPLNNLKNNNLKNSexullllklnmnnnpnjcVQMKKLOKLOLKMPS[pzwqs����{vtz����������xpqsy�����������������������������������������������ĜOFHHJJMMQNLC1 
+:crw}�����������������������������cLLKKJJKMMILHC=2*! +<IUahmmdWK;* !###! !(&" 5u�jLKONNPRWSROKILLIOOWZW`]`abnx���gikljgeigfgfffbbb`a_^\\Z\WO9,2)!$(&&A\]XWWYTQQi����������������������������������}{ys_,!'%&9_mplnpqoZ?50*%""!#%'+.028=?>@CA?ADEHKRbnf[YVQSRMDP_�������������������������LLe{���6"%%$#$$$$&).0-+%  %((-IgqrsuqpmkkjjjgaZXX\\]_cccSA>Eg������������i2!!)<o�wzw^77EWiy�[Wq����������������������l9!=l}�sE/-120*.5So~gE4<N\dddcdb`abdfipv�����������������u������kY^^ZYXVTPPNEPr��|}~���|qg^Zalx�������������������������������������������������������������������������������������������������������������������������������������������������������Ľ��������������������������������������������������������������������������������������������������������}~��|��~|}�~~}~|z{z{|}~|yvtmaXUV]\\\_^__^^\ZVSQPKRWYXVMFDDIOSZTVWUUSUTWVZ`htvquxtorwrmlonijoplkovpnrttsu{}}�����������������������������������������������������{dNLLLS_��������rjiiilmmorssuvwx{~|unebchgejga]YRSWZWZY\[[ZZXWTNLN[gopsttutrorrpnmkdK?@JWaijjlhfeljheijffaOC2&##-;IT\]]__[WX[WTWZYNOUTQQQMPROPPSRMMONNNNOMOOMKLLKNNO[r��}yuqqstqrtrrspmfYPGJLKIIMJJMPQe���|~������yz�����������xusuz���������������������������������������������Ͽ�JEFGHIMMQQRL?*
+
+<gqx������������������������������`MJLLLNPOOOPPLMGA6,'"*=OZhpqsj[L<0%!$&'&&$  %))%#!$'X���{pmhfqrnehegjmmmorrw�tnmlqtx��vaglkjhiigggfegdcabb`^]]\[WL5+0&! &*(%""%9ZZYWUWUTTg���������������������������������~}|u^. $(%%:[iommmmgXIB=;6/)##&((+-24:?ADABBA@BBEJMale[YVQSSPITd�����������»��������¹�~I:?Uo�Y)%"%&&$#$#%$'-0-*#$()/Norquurqnklkihe`XVZ[[\_cdaT:1Fo������ɿ����g) *Cq�~sY68HUkubP^l����������������������i1#Dm��lC,-11-,-3St��bD6:N]deeec`]YX^_fnv����������������t������h^``[XVVROPKFTv��}|}��{qh^X`mz�������������������������������������������������������������������������������������������������������������������������������������������������������ü���������������������������������������������������������������������������������������������������~���~���|�~}~��|~�{y|~}y{}~{ux{re]\[ZX]^]]\^^_]\ZZVSQPPRYUOJE@DFILMNQTUSTRWZ`mwxsszztpuvsnlomilnomnquonsrrtx{{��~~��������������������������������������������������x]MJKOUk���������}khgihhjnnqqrsuuu{|ysjdceeeffca\WVWZ[YWWVZYW[ZXRMMR_ipuqqtrmpronmmi[B:;>FW_bhhddflhedghgf]RE5'""%0<KVY\_^XVVXVQZbZMFMPMOPMQTPOPPOMMMLOOMMMKLLLMIKNMKXo����}xvststtutuvtpe\QOKIIIIIJLMRh�������������{}�����������zuss{��������������������������������������������̹}EGFFHJLOQRSPL<&=fqy������������������������������cNNPQRSTQSSUTRSTTNHA3) ".@TakpwrfUF3)$! "#%'(*)'#!#$'+++27c������������yvyvoonou|ykihgggfenmcikjjijggegfedbbbaa`]\[[ZSI4+/$!',/.(!$<XWWWXVUUTe�����ȿ��������������������������z[)$&*-*+?XkmomonfVLGFFD@83-)$#&)*+.138>ADDDCCAABGKQflc[XURUYWPYj�����������¼��������ù��bSL==D4)($"!$'$"%$$$%+00,'" $'(2Oprquuppmllkihc_XVZ\\]addbU3;Wu������Ⱦ����c)!"'Gs�wqjQ38IR_oplls����������������������m0%Fn��p>+-//,,-4Rm|qYA6=L_fdedb_XTRW`jmw�����������������r������e[\[XUVTRQPHAWz��}}}~���wme\Wblx�������������������������������������������������������������������������������������������������������������������������������������������������������º������������������������������������������������������������������������������������������������������~~��|~�}{}�|{~~{y~�zuz}{vruytnkf`ZZ]`_^^^___`]\[XVQOLQRQOJCC@BDGIOSTQUVX^ny|wtwywtswwtqnopppmlnrtomqspptyxxz}}|}���������������������������������������������������rTJIJORs����������|mihhgijkmnoprqtwzzvmfbdegfhif^]XZ]]\ZUQWZ]\^^YOMMT`cdgkopnqqollmdS=889;DOV]]_bdadaa`cdc^YK=-%  &0@PX\]][ZW[XUaj^MCGOQQNQSPNRQOMONLJLKKLMJJIKJJOIKM[q������|wuvuvvvxzywoi`TLHJMKJJLJPf���������������������������xuy}�������������������������������������������̱nAFFFGJLOPRTTQI8#?jsz������������������������������eOVUVVVWSWWXXYZYWY]YRC92* "$5PbjmicYI7-+(%#"!!$+0.+($! %$%)38Bf��ř������zuplihcdggfl__acaba_]ghehikkjggggcba_`b^\Z[YXVXWRG3,-% $-4=5( ;RYXTW[XUVl������¹�����������������������}~�}uR**7;?BB>G\kmponlgYOJJJIHFE?<6-('''*+146:>BCEEC@@BCFJQinbYYUTV[XRYk�����������¼��������¹��mgcUH:530( #$$#$%%&'*//,&  $(*3Uwuqttppmkjjkhc]WX[\[\`ded\USOq������ǽ����\' "*Iu{snkU37L[q��wmu����������������������l+&Hr��m?,,.0++/7TlrjWA6<P_gggbba[[\behox����������������r�����}^UWVTUURQQMFEYw��~~~���vjaX[cm{�������������������������������������������������������������������������������������������������������������������������������������������������������ø�������������������������������������������������������������������������������������������������}���}~�zz��yz~~~{|~~zz}zsx}}zxwvvuuwqha^Z^^^_]\___\\^[YUROMMPOOID>=>BGJKPPT\cn{}yvvwvtxyvsstpnpsrlfkplilqonpruvuy{z{}���������������������������������������������������fMHIJO[������������ynjgiijklkmnoprsswwqidddghhjidc`abcd`[X\^a^_^]UOJKSRTYbkoqrommlh_L96676;CLRWX[]]^__^^_`a^TF9) !+7IT[^_^^]^\]bd^RHGMQMLQPMNPNJKNJFHKHJKMKHJJIKOILQ\q�������zxwwwx{{zzwtm`SMJGHHJJINbw����������������������������|zz}������������������������������������������ʦ`BEEEHJLOQTVXUTK7$Cks{}~����������������������������cTWYWZZYY]\\^^_a`abffb]UG8,"#/FU\YOD=910,,,,34(# %,..+)*'"$%')+=f���w{{xststpmhgfca`dfbbabcbbc__`deghljgce_^`^^\\^\YUSROSRSWRF0./& $/>ED. ";V\[gUXVSTk������û�����������������������~|wiN0;JPRZO>E`jnppnlgZPLLNLONMMHD;762-+-148<>CGDC?>ACFGMUfl`ZXWVWVQLOk�����������¼�����������wUdbUE943/' "#$##$%(+00,$$&)*2Ypqqtspomkjhigc\WVZ\\]_def`J/Dw������Ǽ����T$""-M}�{zmT0:M^s{abct����������������������^-%Js�}_=.-//+*/7VlqqX>4=Q`hhfb`bb]^adinx����������������}w�����{WUWUVVTRPPIBAVy�~{}~���~wkaU[en~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~���|z~�{{�|v{~�~}~}z{~{uu{}ywxxuttvvqomgda___^]`_^\_`]\Y\XUOQRQMIA?<=@BFKLZeny~{wxxuttvvsrutomqspkknnkmnpnlortuwyzx|����������������������������������������������������[KIJMQe�������������|nhjjiikmlnpqrqruvrkfcdhhgeiieddefkihefggfdbc^UOJONNS\ejonlmlie^E653355;BJSWZ[^]^^_^]a__^OE3%#/AOX^aaa__a_`bd^OHIKMOQOLMMMLNNJIKKJKLKLLKJKJIFLPYn~��������~zxzz|{{||{zocYOGFGGGGN[jx�����������������������������}{������������������������������������������ǝUBDFHJILPRVX[ZZVK60Kjrz{|~��������������������������bT[ZZ\^]_``bbbbfghkmpqsqiXF9-$ ':COLE836.***+:NNB4'$'*-./10.+%!  (,/<i���wwvrpprpmljigbeefdffdecdbcb`acegjlfb_^[UXVXXWX\VPPRWWWXYSF1 /0%  )3=EA4 #=Z^xzebekdn���������������������������������}xsbK/=B?HK5/A`nmnpomeYRONNPRQNQPOMLJC<884568=DGIDAABBFKLSeha\[[ZWVRPYq������������������������j6JH>:720+%#$##%#%&(,//& !()*3^oqrvsqpnmkiigcZTUXZ[]_cbbP&,]}������ǿ����O!  $,Qx�{xmP4<OYcVU\^v����������������������\''Mqx_;,,00-*0>[r|qW;5?Rdeffda_^Z[\`hnw����������������xy�����|YXYZYVURSQG?A^{�||~����ylbR[gp������������������������������������������������������������������������������������������������������������������������������������������������������Ŀ�����������������������������������������������������������������������������������������������������~||}~}�~yz}}}|�|y{|vpx{wttxwrrstsrstokfc`]__`_^_b_^^\_[[]]YQMLGD?;:<BGRenv{{xw{ystyxwuuvsqpusolnnljoqpompoprvwuvz~||���}��������������������������������������������xUJHJLUq��������������ynkkjkilmnorssssvtokebcgefhjhfddflmmnmmmkjhhd\UPOONOQ]emnlmlkg[;5300136;CKRX\\]_^_^]`^a`[M>-"!(1CQ\abba_a``accZNELPQLKLOLJLMLHJHGHIIGGFIIKIGDIMXmy����������}|{{|||}|}{rdZPJGDFHMS^hx����������������������������������������������������������������������Ͽ�F@BEGJKKMSXZ_b``\H=Ukryyz���������������������������~c\]_`abbddfhfjknmnpqstsqqj[J:-$$%*08A;0.,'%*+(1EMQA.()'(0345640,*$ !  ! #'/Fi���svtrpppomnkkkihgfffdedbcaa`_`dehiib]a\[\XY[Y\]YYUUZ___bbZI/!0/$%.2770'#=[t������nv���������������������������������{ukaK),-$?B17I`nommlmi[TQQQQRTTOSSUSTQPKJD=:@BCEHEAADFHJMVchb`bc_^^VT^v������������������������e2CA<96.))$#$$#(#$$&*+*%  (**5aqpturqomljiigc[XZXX[]_ab`TBPbt������ƽ����O !!"+Uz|wqfH.>NUblnfbx����������������������Y *Orz^3*,0/**.>YrzsX<6>Raedecc`ZSUY_hoz����������������|y�����uWTW]\[[YXQC>Fa|�|z}~~��ulaV\fp}������������������������������������������������������������������������������������������������������������������������������������������������������ſ����������������������������������������������������������������������������������������������������~~��y{|�{w{|{z{}{zy|~yttywusuwvstvvttuvsqokcX]]^_ac_^``_a^ada_YSONLG?<<ALbpvz|zv{}xs{~yuvyxrpuwqnmmliimojkppmlqutrvyzx{~�����������������������������������������������pQJJKP[}���������������xnmkmlnnooqrrrrvvtrjdccdfhjmjfgilmnoqqqpnklid\PNOONPV_imnlkieO5430//114<DMU[\_b]^bc```abYJ8)!"*6HV`cdcb`bbadhcSJJNOLKLLLJJIHKJHIKJGFGGJKKIIIJMZlx}�����������}|}}~~~~~}tk_PGDIHJMT]ix����������������������������������������������������������������������̺}@BCGHILNPTVZadjkjh\hrxxvs��������������������������~b]_adefffijnoppnqqookda[TNJF:,))$!+65.*(&&+-'$33/,))!".488:8553/*%!!'(,Fl���suurpqnmnmkjhghhefecaba_]__``cfjnmgfeb`fefhcee_denkmrqon_I-!22$"(*+-.*##?Vz������`}�������ſ������������������������~{tjdJ,4CJWRF@Ifnqopqng[SUSTRTXZYVUXYZYZ]YUSONOKFGGAADGHKOWel`agb`a\VRXp������������������������_8EC<95-('$"$%#$#$#&(++' #'**:atrtvrqolkjjihd_[[YY[]`cefbQLc|������ƹ����J  "-V}xqlfG4>Q\q�~be~����������������������T#(Pwx]/(-1.()/>\t�uR:5AUeedcab]SRX^bio{����������������zw�����sWY\ca^^\XNA?Hc�~}~�����wmaZ]cs}������������������������������������������������������������������������������������������������������������������������������������������������������ÿ��������������������������������������������������������������������������������������������������|~�}{z}}|zz|z{}}{{}}{wvxwvvwxustvutrurrotxri\a`]]``^]^aa_afeda[USMOKE@@O`wuy}{y|}yst|{vsvvslmrromnhigjlmlonmloqtruvwtv||}}��������������������������������������������aIILOSa�����������������xnnnllmmlnqpqrtvxuoha`cddhkmnmqtssrrrstppnljfZRMNPPQXagjica_D540..,,+/4;GPX^`a_bba`b`bd_TC4'#1?NZbdcedfdacge\RLNMLLLJLJJIJJGIJKIIHHIJIHHIKHNZjuz~������������~~~~~}wm_RKHFHILSZft����������������������������������������������������������Խ���������ʱm>ACDGIKNRSW\bgpw{{yyyqp}��������������������������uONX__cbcfhdbb_\WQOLH@<=700122-+( &11+)('&.9'!!"%&%".579=;>?<71-+% )2Gf���ovtonlijmjhgddbba`^\XWXZ[[^`_cdkz}uxzxz�zw~yzwnyx���~vrs\K- 3/#%&*.344+%!@[y����}ua�����������������������������������~vskL2DMLNKA8DgopqqoldWSUUUWYZY[\[\]_^__`aaab^ZVOLEDEHHIPYfi]^]YYYUMFUt������������������������R6IB;84-('#"$##$$$$'*,+("&)-;cqqstrqpmjjjigc\YWYYY]`bcifTKh~������Ź���yE !#1\}zvujD7@Q\{nOMd�����������������������O(Uu~zU/*-.-'&-C_w~nN84@Udebca`^XX\`ejoz����������������zw�����qYX[Z\][YTJC>Fg��{|�����~sjaV[er~������������������������������������������������������������������������������������������������������������������������������������������������������ÿ������������������������������������������������������������������������������������������������~~���~~}�}zz|zy{|}zy{|{ruyyutvwvqqvvsrrrpprvxtpnof_^_^``__^`cflkdZXTQROKGHX`jsy|yu~~uoy{xtsxwsqqqpnoqjhjkklnqlilspmnuusrvzxwz}~~����������������������������������������uYHNUXZl������������������wqommmnnooppqruuvsle_bceijmu���}vtsuvussrqmibZPNNOOT\dgXFJD4..-)**)',16@IRZaabca`bbaccc`N=.%!")7GT_dfghfebcdhf[TNOMMLILLKKJJHGFHDIGGHFGGGHGGM[jstz������������������{rdYNGFFIPU[hu��������������������������������������������������������ڼ����������˩dCBCFHIIKPRY_ekt�������~qz���������������������������n46JSQOPOMBB>:454/22----.,-*+)''%&./*+(&1Wc, !#$'& &3988<AGHED@83.*'##&*=_���rusojeeb__cb]\\]]]\WUUWZY^aa`cj����������������~����{smgYE*#2-% $38:=BF@5'AX���z��c���������ú��������������������������|rL19;:<;51JhpoqoolcYPSWZ[]\^`_aabccefggiljnhc]WLHGHKLP[ika]YSSTPHDYx��������������������ǿ��O9E?:76-)(#!$$##$$$'.0/&!%'+;erssusqnlkkjigaZWWYZZ\`ab_B4Kc}������ø���xD "!$2b�{iA4CQ_jVJZe��������������¹�������L,Xw�yQ/*+.,%&.C^wyfL96BVfebba^\ZYY]dip|����������������yz�����mWYZW\\\]WOG@Ji��}|}�����}tk^T\jt�������������������������������������������������������������������������������������������������������������������������������������������������������ž�������������������������������������������������������������������������������������������������}���}|}}yy}�{v{}|wuzxytvwuswwvsrtvurtusntxxutyzwne_^\^`^_]deo�y_ZXSTXUONMPU_jtyxy|vvx{wvvwwsrtrmmmphghjihkmncfjomjnspqpsuunsvvy~����������������������������������������q_elsup�������������������tpnnmnpopppprrrtuoia_bdgjmt����}syvvuvttssni^VROPONVYU9)3.&''('('&%%*,3?JS]cdbbbcbcdbba[I9+#"$0<MYbgjiifdeffje\SQPMKLMLKIJJHIHFFIHGFFGHGGGHL]gnot|����������������������}vjYLFFGJNU\fr������������������������������������������������������������������ƜTDEDCEJLMPSX^elv�������������������������������������l',XK7202//---,+*''"  !"%&(%!"$'"%.-())J��F" "##%$")7<=<?CKSWXUQK;33/)&#'*)4W���vupmfadaa\__W[Z``^[\]]`]`dijeek���������������}w}~zrnkeXB)$30% /=;;>BB8-!"E\������Y����ǿ���ƿ��������������������������ynH*8@AKC00NgnoonprfYNTYZ\acefdeghhjlmmlppqqoljf`WSOPPNZii`[XUSPMHHYx��������������������ǿ��K7D><62+)'#!%'#"$$$&,-.% %%+Aiussvspnkjkkjg`ZWWZZ[]`a`V:5Hb������������s?!#"%1e~�}s_?1ASZ`hmlg��������������½������wE.Xv~nN,(+.,&%-A`tthI57EYdeb`^`\YUQYdjp~����������������w|�����jS_^_]_d`\RC<Lm��~~~�����|uj]P[hu�������������������������������������������������������������������������������������������������������������������������������������������������������ý������������������������������������������������������������������������������������������������~}��|x|~{yz~�ywyxyxyz{vvvuut}ywtuzxutsurquxvss{{yrg]\[Z]`bdis�}a]d^UPYZXVTINWalsx{ztx{{vvyzsruspiinhacihfgiifeimolmpqqovvurpqquw{}~���������������������������������~����}}�������������������������{qqopopoqrqpqqqtwsmb^`cgjq�������yyxwvvuvsoj`WPOSORQF3')&#%'&# "!#&,5?OX_fddegfeedc`]UF6*##(3@S_gjjjiihfglng]ROQMLNMKJIILMJHGHGEFHEGHHGHI\gmory~�����������������������{n_SFDHKOT\dnz�����������������������������������������������������������������ƐPGFECDGKOQUY^dlx�������������������������������������g%Kc<,*'&%%%%%" ""#"  "($$./)-I�{K<  "#"&%)9@AEFHNWfsztfJ:<:83.023>Jz�~qsqmeigcgadbcbefd`aenjmpqrurjem�������������tvvyyvtprnfUA''42%! .51.023-%"C_��xe��eT����ļ����Ŀ������������������������ymE0?C@F>(,PjorrqqpcWNTX[^fllppsqrrqtrruvuuvwvtsplje][X_jh`[WSQMMGG\y��������������������Ǿ��F>E=864,)'"#$%##!%%'-.,! %&+Cmxrsusppolkkjg`XTWYY[_ab_WE67\������ʿ����o=!""(:j��xn]92CR^ry|bg����������������������n@4cx|qM/),.+$#.AavzgI87H\bccdb_[UQV_ekpz����������������w�����m^a[][\^YNH><Nn��}}|~����~tj^T\gv�������������������������������������������������������������������������������������������������������������������������������������������������������ü������������������������������������������������������������������������������������������������~|�}|{}~{{�{xyzww||zvvutrrvxwrsuwvpprsrpwtqrv{}tg_X\\Z\\_agjg``m{mRQZ[\ZZVZWX^gpvusxyytuywqrtrlfjkedfhfedfieeonjkoonpttrroqrsssuvz}�����}wxzxw}�{{}||z{{{{|}||~||y{~������������������������������wrrrpqpprqqrsssurmg`^aeks�����������|zwxywrlg\TPPQQLA6.*%!  !"&05BOZaefdghfffca`[RB2&#)8HXbjllkkjhfmppaXPOOMKIIHGKKHGHGDDFHFDFHFFIP\gmnouz�����������������������|rbRHFIKQUZaku���������������������������������������������������������������Ѿ�GJHFFEGJMPRX^enx�������ż����������������������������b%47*&!!! "! $)$&,)(*<YRI7  ##"'%".FQVY_am}����~[D@=6547;=><i{novyutsruszzw|vvtqtvw����zyytjgo������}��yqz}}zytquldP?$&2/#4f=%4*+)$B`��k��za\������������������������������������yi>-706M@..UlptttskaUNSX\bkqy����}{wtxz{{|~}||~}|{{soijlgb`[WVSPICZ~��������������������ƾ��B=C7875-*' !%%&#$$##'+-& &$)Fmvsvvronmljiie]SPVX[[^ab_W9/Tx������������k9!#$*:q��ykY:3FPWpsWWi����������������������u<6a}�nC-(+,(#&/GbvybF48G[bbbcb^VPQ\aaho{����������������x}�����h[XUVWVTOIB<=Op�|}{~����|si\Y_js�������������������������������������������������������������������������������������������������������������������������������������������������������»�������������������������������������������������������������������������������������������������}��}}~|yx|}uuvvtw{{yuvurqqvyrruxvusqrrsxwtty}|qeW\__]\X[^\_b__cttTKXZ\\Z[]`\Y[]monqxzyz|wtstsmjihbbekfceihfcjlhfjljimrrqoostqopppqtxz��zpjebbcflmmnorrrsvwxyzy|}}}~�������������������������������yvssssstsrrsqqrsrme`adjx����Ĥ��������}zvrmcXSPRRNH>3+'"!!',5BN\chghihhhfdaa\L=+!$/=N^jlnonljlnoqk_TKMLJJJGHJHEFHGGFGFGFFFFGIP^gjlprx|�������������������������tgYNGGMQSY]eq�������������������������������������������������������������ηuGJJIJGILMQTY]dn{�������������������������������������d)0)"!# "$$  !)+$%*&$*CcVG, #"")(%A`kz}��������hD;;5348:932HY_p����������������������}wvuhfq�������������zy{|uyridh`YL<%'1,"! :��[XL,)''Ea������sZ����������ú������������������������zuh?16=XP9/3XpruwwsnbPPSZ]dku�������������������������mda`_[\ZTJG`�������������������������G@E8873,.*  &&%#"##$&,*"#&(1Kovsuuronmlkjhc]TSVY\^_`ab_[k�������������i2 !"'<p��wnX33IX]fkeej����������������������w<:`xs_?*(,.+%&.F_tpZ@49J\cecdb`YWZ[W\ku����������������u������bVTRTUUSKG@:?Otzz|~}��yodZW_hv�������������������������������������������������������������������������������������������������������������������������������������������������������»����������������������������������������������������������������������������������~��������~���}}��{|}|yvz}~zvxyupv|zxtwtporvwrtwtorwqoqvxuv|~wka[Y_ca^YTXZ[^`][[[USWY[\[[[]^]^^Z^diquuvyzspuunihihdcfhcbdhhdfljbfkkikoonmmrtronooopnrt|}zsjba_aadhilkknoprtuwwxyy{}~���������������������������������yvwwututvtsssrssojb`di|������������������|vogb[UYVND6-'#$-7CT_fiiikjjjhddc[J8(!&2AYflnpqnmonprpfWNKJJHGHGGDDGHEFHGFFFDDEFIO`hjloqtw�������������������������zm]MAFJNRVYcn{������������������������������������������������Ά���������˫eMLMLJKKNOQVZ`hm|�������������ø����������������������`&-' ),$  !()$')&&1QcY9*!!$()(Fp������������a:8@;74761-.4@Sr����������������������unolcjp|��������|zyvsqnnha]]ZUTTL;!(4* !K����>)*&)K`������lQ���������Ľ����������������|wtxx|��{vob8-6HN:7/4Wrqv��wpbRNUX\bkv����������������������������gcbdhe`^YTVl�����������ſ�������¼���><>744354+! &'$"!##$&+' "'(*Ipspuvsonlkjjic\VTWYY]^`belw����������ý���f, !")Dr��wlS32EYj��zgl����������������������p69`nnaB()-.(#%-I^jkU@3:J]ghdefg`Z_\cqy|�����������������r������`TVTT[UNIE:9?Oq~{z{{}~�vnbUR]lx�������������������������������������������������������������������������������������������������������������������������������������������������������º�����������������������������������������������������������������������������������������}}��{{}~|}||z{|{|z|}xvyy{wwyvqrtttqrwvrntzqlotwx~}sh_Z`a`c`[USVY\__^ZXWWZ[_]\]\Z^]\^__^Zdjkntxvonrpkhhgffdiicbghefljfglkghmqonmorttqpppppljkrtrojddb`_`ehkmklppssvxvwzz{}������������������������������������|{zwyxxwxvvtttrqjc`bo����ϸ��������������~ywnjicTIA5+%%.7J[bijjkmklkhfhdUF1% "+:J]ipssrqpppqupaSNMMIGIFFCECDFEEEDGEDEGGIP_gjlknruz�������������������������p`NEGJNRVZbk{�����������������������������������������������������������ʢZNOPNNNONOPV\`fq}���������������ƽ��������������������W".$'3!")Dc:##!$&(%!')'(.LdX?3"#%(**Ku������������`DOg[GC=845307Tw���������������~||||wuwwvslahoptwvuptskjcdhd`Zda]`][WVTK=+4,""!F���f#*+&(La������tM���������������������~ztomeec\Xbt�{wtk[6,<A;?:/6\pt���xp`SQTW[ckv��������¾��������������ǹ�eedjrfbgd_cr�����������ž������Ϳ���{8;;7578:4-!%'%"!!$%(+(!"%)2Jlrottrpllkjihd\UVXYZ\\aahq{��������������d, ""&Ev��vjR46FTk�pgbn����������������������b/!Cg{yfA**,-)"$.H`nr\@48Mbnswwvtsvyr~��������������������u������bWVVWXVPJC;8>Us�~||}~~��vl^SSam{��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������w|��{|}�~}}{z~|{wx|vqu{wttxxuqouropuxtpqwuklpuz~{rg^\^_`cd[WSTSTUX]][ZWSUX[]_\XXWWVXZ\^\]`]fquqprrlihhhgghgcfhhddggdfjjfdhnkhjlqsrrqpqnnnligknnoigedb`^aehknmmnrttutxz|{}}�������������������������������������~~||y||ywwvvvuoebcu����˭����������������~~yu[PLE=3)!"'/9K]hkmppooomkiebR>,#"'4CTcnsuusqssrvvk^SKJIGGGFFFFFEEBCEFCDHFEJU`fjjknqsv}���������������������������saSGFJOQWYakv����������������������������������������������������������œSPPRRQSQRQRV\bgr~�����������������ϸ�{����������������U#) .Wsnv}����6###$()% '*(&0Pla[3"%'',b�������������QTw�jZ[UF?94/3Ru��������������zw}��{}{{|xtnlpqnjmmjihgfgdehmeoc__`\ZXUQJ9+1+"! X��sU-*+&+Lb������iQ���������������t`]VNIHA=884846;Wv�zqmiU4,=@EJ;*:_su���xo`NMTV[blv�������������������������ŷ�ceelnjomib\p�����������Ž�����������z9;<789:83+ !''%##$$#))'!$'*1Olrpturplkkjihd]VVYZ[\]acgorx��������������\&"" 'Dw��vlN.7EOTJJ]bp���������������������}^*!Ck{xd=''--( ",LgzuW?2:Tiw�����������������������������~q�����_VWVY[XRKB98AWy�~z{}~}~�{ti_NS`mz��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~{��~~~y{~zv{~zru{zvsvzrmtxwvrssrpsyvpqvwskpvy|}mb]\abcgaZVTSSPWWUWY\[VSRRRRVQSROMNPSV[]^\]belkqtnjkkgbafgdeggeeggfeghfdhjlfgnostrrrrpmmnljihijmljihfdbbdgkonnoprrtvyz{z{{~~�������������������������������˺�������~}}}{yxvusmiir���־������������������~zshUROKE:/&!(1>Scjprrpqsqnmlj_P=+$#(;P`lsvwvutsutxumUJKJFGFGEDDGFDADFCBCECDIVaefhkmmqty}���������������������������sbRFFKNSX]dmy�����������������������������������������������������������IOQRTVXVVVTV[ajs�����������������ػ�z����������������Q#%"I��������^,"#&&*)$"(*'(6^plU# "%./2k������~|{y}�xKJibTLG=32-*+9Sn�������������~���|||zx{vpjinqomonmljggfhq��t�i``^[YWUTJ8,2*#!#J|���?*.'-Qh���\isiQ��������������yB&(/,)--,.222059Vz}xnjdS06@EIH7-<^sx���vk[JKMT[dmw������������Ǽ��������������ghdggdff^TRg�����������ļ�����������v8?=877972(#)(%$#%%&*)%!#&(0SnqqtusonmjjifbYTUWXY\^_cfiht���������Ŀ���Z"$#!)Gz��yoQ-7DNROP]^v����������������������e+ Gnzzd8#&--)"$/LmvmV</:Vl������������������������������zr�����~bZWX\^YTL<78A]y�}yz{}���|ti^UX`n{����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~����~��|z~�~|~�~yx~{xy|{wux{xvyzvor{wsrttpoqtvqquyrjmuyzsg[^\[^ab_[VSRQPTSORXY\\VQLHGKMGJIGDDFKOVZYZ\\[]ckokikkgaehhdffdbikgbgiffgikhhlrquvsqpponmlkjiiggiklkiiffeddjnoorqqtvvwvxzzz{}~�����������������������������ئ������������~|yxwtohq���˯����������������~zwo]NUTPJB5+# %-8FVbmrrstutrppoj]K7($&1AUgrwxyxxtvuw|qLIJJIGFFECDGEAADDCAABADIVafhhikmpqvy}���������������������������vdMHIJOVZ^djs�������������������������������������������������������ηtDMQUXXZ[YWWX]bit������������������׷�{����������������I#$$S��m���M;,%$$%'+*##**%)3TjY9% "&/RNFo�����wx|{wwyl>&4>5+&&$!"!&8Wz��������������~zztspmmfea_chklmnkjjkgghh�����]ab\\YXVTH5-2)"!(x���z;%+) ,Rh�����xmX��������������{<))()+-/./.-.13;V{tnjaN-&.=AD7#@bpw���thUFGJPW`ly������������Ǽ��������ÿ����ehb_][^^WNOi�����������û�����������q9?=565642(")&##$%$%((##$'0Vlpqtuqpmlkjhe`YTTVWYZZ]`debu���������ü���X# %H}�}xeH.8GSYbcegz����������������������g*"Hl}z^5#'-+%"%1PmurV91>So�������x~���������������������xp�����}bZX_bbZSG;68>]z}{yz{}~�|sg]SYbl|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|���~~��}�xx~{x{�}wwz{wuvwurpuxsnotspnprpnrtrmps{xod]W\\Z_b_WTPPPOQRPOV]bgaXTMHGEDDGDCB@?CFNTW\[ZXY^ekihjiihlhdgjfbfjkeakhgcfhigglquvwuronnomlkljihgfhjmlkijjgehlnoqrsuvvxxyyz||}~~�����������������������������Ӻ���������������}yrfs���ë�����������������|vtj\VUVSQI=0'" %-4>I]gpsvxwuusrpojXB/&"'4Haovz{{||ywxu]AFLMJGDGFDEFCBDEABCCBCEKVbeghjjkpstv{����������������������������u`PGHLRWY^aju������������������������������������������������������˯hDINW[]\^_`a^^_hr������������������յ|����������������L##"857��X;83,$$"'))"$++'*7^jR4##/\bK?f�����r{|ywwrd=+6;6,&"! !#<Xs���������}z{xxmojigdbae^___`fkknnljjhhghm�����[_[[\XXUQH2-1(# =GEnr.42)0Pm������f^��������������|<(()())./210105;Xvyqlg^N)+BEC?3)Beqx���uhXJFGIPZiw������������ż�����������Ĳ�fga_[^de^WZm�����������ú�������ü��k9?;367970&!('%#""!"#$"$&'8[lprwronmjiigc^WTUWWZ[]^_cahy���������»���S# 'Mvyqk]D,;KYr�}ol|����������������������a'&Mt�wZ1#',+&!%3PqzoT;4@Sp������}z}|}�������������������yu�����zd\\fjgaXH;68C^y~z{||}�qd\PXdp|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}�����~~�|y~�{vw~vrwzwrrvytoowyomorrppqrmqttrquyypd]Z[\\^^^YRONNQPQRRS^gqne`YRIEA@AAA@B>>A?FLTWZYYXX\aeghhhigechhdchhgchjjgegighkmnvuttroopmnnmlkjkigjiigkolnkiikmpsssuvxzx{{{{{{}�����������������������������ќ{}�������ø��������uiq��Ҹ����������������}{ysfWWXUVVME<+%  $,4;BQ_mtwxwvxxusrrhR:0%%0?Whsz|}|zwoT8>JPOJGEDEFC@BEDDEDCBDCIUcdghijlnpsvz}����������������������������vbNHINSUZ^dlu������������������������������������������þ���������ƦcMFJS[_`ceeedbain~�����������������ղ������������������K!"!#5_��PCLZe1$##&)("&+,'*9TjZ:!&@P4.7Zy����w~|{yxrc;+6:5+%"$&&"%>\irwxwsospmfdofdfhheddddebaa``dhlnkllhhhgic�����Y``\ZWWTQH4-/&!"9g��|DnK(5Rq���z��ia��������������z6%((()-=CD?3./4?Xvvlha\I/8=3582+Egrz��|sk[OLIJLU_p������������÷������ú���ï�gga^X^hid`dt�����������º������˿���h4B=55884.&#&'$!"!#&(6_onqtqmlmigffb`ZVWVVY[^^_a`hz��������ſ����Q (TtuhheE.;GZyl^a{��������������¼������\!&KoynW/!&,*$&2VmvjR63@Zq������~{����������������������vy�����{gflklmmZC96:Cay~zz|||~{pd[SXcl{������������������������������������������������������������������������������������������������������������������������������������������������������ÿ��������������������������������������������������������������������������������������~�||}�}vu{~|xwzvtvzyuruxwsrsxvtrutqqqrpnttrswyrg[[TU\\^_ZUPNNLMPQPSY`gptnjie_QE>>>>=?>>=>>BFHNRVY^]Y[_cceggffhiggiiiiijjhhjlfgkniktutsrqonmmoonljkkjjhechnnnnjiilnprrsuvyx{zxz||~�����������������������������āvnz�����������������WY��ʶ���������������}yxunaVWYY\ZSND4*#$,3:?L]krvxy{z{zwuurbM;.'+7Kbpx~���~zwlN;<CPPLIBCEEDCFD@EFCDECCNYcdcfhhlmpsrx{����������������������������t\MGLOSY^ahov�����������������������;���������������������������Ý`ZPKNV]chhikkkjko}�����������������ӯ~�����������������E!!#>wǳB*U}k.$$$&()"-,*$)=orV/#+@?13<V|�������~{yr^6+:<3)%&)-'#)<Xgorqqoonljijjgkqoiggfefdbabbbeinnmlihhgggg�����[b_\[XVUQF. -.%" -{���jqh9)!6Vq�{g���\f��������������y3'%%&->[dbH3124:Ztsnie_H42037?3*Fhuz��|sn^WWXYVV\dv�����������ö��������������dfc_W[fhe`ao�����������»�����������g5?;45895-&#((%! "  #$%5]mnnqqnmjhgffe`YUUUVYY\Z\__l|�������������z7(Z|�woa=0=JU[^LTc���������������Ż������Z (RruuX.!&+*# '6UqwmM73D\t������������������������������ux�����iinnomcR>87:Fg|~yyz}~~~zph[SVdq}������������������������������������������������������������������������������������������������������������������������������������������������������ſ�������������������������������������������������������������������������������������{~}{{|{|zvv|}zx{{tuy{tsuxvnrwwrostronqqllntstuxrbVWWY[\]]ZVRPONNKNOTX`gntrjjmlfVNGA=:;=<<<;;=?AFJNSVVYYY[^cefehjihiligiolhegjlciomhptuvusqonmnnnmlkmmkjhecgmnnmlijknpqrsrswwxyyy{||}~���������������������������ń�||����������������{*]��ǭ�����������~���||vsj\X[\\\[YVM@3(!"+5<FUdjqrsw|}}|zxzvm\G8-*1@Wiw}���|yn^J?=HNLJEEDCDCA;?CA@CDBBNZacegghjmprtuy{����������������Ŀ�����������qYIJLQW[_elpz��������µ������������Ⱥ��������������������������Ͽ�dd]RNRX`kprrssuss|�����������������ӯ}�����������������F" (c��7:cB($"%())"*8/)%(<]_O6& .?DCFFW�������}yo`1)9:0*'')(#"*Iarrsqpppnnlmlljv�}hgfecccbb`aacjllmkghigfhf�����_`^]]YWTNE+!//%" +o���VA8.( :To���ePq����µ��������v1&%%&/SgbT51247<WstojgeK(6=CIF,)Khtz��yql`YX\``aehr����������ö��������¾���gfb^WW^^YRWn������������������������b4?;45862,#!&&"#"&<cqmqqommjheceb\XWWVVXY[ZZ^cp|�~���������f@*^~|skZ</>IYlyf\g���������������ú������T)VszuR* &+*#'6\qxsL44A]w������������������������������uz�����|ioomkf^I:65:Kh}|yyz}|}|qfZPWeq~������������������������������������������������������������������������������������������������������������������������������������������������������Ŀ�����������~��������������������������������������������������������������������������|}~|{||�zwyzwrxyxrrwwrotvqhntqmloonhmortvvq_SVSRZX\\ZVRROMLKIQTZ_fjoslcimlic^OH>;<<;::::<=<@BEJLOUWUVX]_afklfglicfjnjehkmlklrmnuwvtutrpnmmmnnoqwxrjgdcgllpnpmooopqstqruvwyyyyyz}~��������������������������������z|���������������L%y�������������st����{xvsh[Z^^^^^Z[XK=/& "+59E^konnoqvxz}|||xukXE5-/4G_s{�����}vkVH;?MNLHDBBBB?@@BA@CBBDMYabcefijlnssuwz~����������������ʹp`jqw~����dPJKNSX]bhnu�������������������������������������������������λ�nljdYPS[gpvy{|~}~������������������ӫ|�����������������>! !(i�?#1'%$$(**)"14,)&):bfY8! 1ISZQ=\�������~}xnY2)5:1&&&$ "3Yiorsrrppmlkkknu��vggfebacdb_`dhijlmkkjifgff�����Z__]ZZXRNF*"10'" $c��yH11.' 9\v����ojat���ʿ���������m6'%&(,9??743659=`uuoki`G&6BJJ>1.Qmt{�urkb^^bfhirsr}������������������ý�����{jfa\WTVTMEPo����������ĺ��������Ŀ��_2?955772-$!&&! !$&'Ahooqtrnjhfddda]YVUVUWZZZ[bhr{�|sjxyqlcVG=,+_ztlcV9.?MbymUVf���������������¹������Q+RryiK+"&*)#':[u~kK15E]z������������������������������wz�����xnqokgc[C754;Ki}}yy}~{}�}reWOXgs~������������������������������������������������������������������������������������������������������������������������������������������������������ü�����������~������������������������������������������������������������������������}}�{z}~zz{~�ztvxxtuywtsuyvqstqnmttmmnqojirqrsujWRXUSZXZ\ZUPNOMKKLNV\fhjlooknnpmmsm_PA=;;;<;;:8::;;>?BEJMMNPST]cfgejmifinladlnnosurprwwvrqsronlmnnnnx��zmkffjlmnorstsqrstuttuvxzxxxwx{}~�������������������������ϩ���vpy|������������46��ҷ�������}yzurtzz~~{yxrd^[\__``]^^WI:+& %,5<Ldpw�}qiilptvxxyzweS?0-0@Or������|sfSC=FMLHEDDDCBDB@@BA@BFN[_adffhikmqqtxz|����������������ȉ-).48CN\lzp\QNMMQV[_ekr|��������������Ϻ���������������������������������ζ�qrpnle[U^gpz�����������������������Ы~�����������������9! (GQL;*W:!#$$'(++"(++)&,;^aR1!/GO\_B3b������}}ypdP/+460)&"%B_lpqpsrrolkjikmr��pfffdcccc`_bfikllkkjhghhen����|\`_]YYWRMC+#32'#"-gkYoP+/.(  =Z{�����w`q���ʼ���������i5*+'+223:@BAAHF?d{zsnh^A+9DJLD3,Rnsy��zrmaabfhootz}����������˾������ļ������yffa[VTTPHBTt���������̺�������������X6>846760*""'% ## &''@gnnpurlkjhgfda^YSSVUVXZ[_egnw�zkPSK@<;20,".aypkhX30AO]de^bf�����������������������J/UppbE(#(+*#!(;Zv~fI15G_}��������|z��������������������t}�����rkmliecU@635=Jm|{xy|}|}�zpbTOZgq�������������������������������������������������������������������������������������������������������������������������������������������������������º���������~�������������������������������������������������������������������{���|}~zyz~}xz}~|xxzzwux{vss{xsrusonswpinopjiopqrqgYSTTSUVWYZWROMOMKMNV^fhilooihoollmqnf]TKD>====98;;9;=<;;>AABHMOW]`cglliflnhflpmnpwuqquuutssuroomnonmn��npnjknponqrtuuttuuxxvxyzyy{{z||}���������������������������Ś�����umfjx���������u'X��ϸ��������}{vrsmtz}{yxnb]__aabaadb_WG6*" #(/7@Ukw���znmlllfkpttrj^K8319o���������ztdPBBKNNIDECBABA>@@?>?EQ\^addcegjnpqsvy{�����������������v&-,+)*(*09CSXUQOQSX^cjr{����������������̿����������������ͻ�������������Ȯ{uvxwvqk`\`hu�����������������������Ъ����������������|8!'Q��^rhC!##$&*-+! &*)'*A_kP. )HVZXM95h�����~zvtpi_L0*460($")Ccnrsoqqronmllllq��jdbeddda__aafjmlllkkhfgfgs�|��ha_^^[ZVSND+&4/'%#)08b�J*,,' !?_{�����zOq���ȹ���������i13FNPPT`_e^gdeREi�zsmh\?,=LRLD21Wnt{��{smbfhmqtv{����������ǿ�������ļ������xhga]XUVTJEWu���������ı���������þ��U9>64882,'"#'$%!!%&*Biqorsqmllhfedb]VQSUVVWZY^bblw}zjSXTLKKB>2%-i~vskR42BN`u~ubh����������������������|D2ZmpjF)!'*(""+<XxwaF37I`�������vwz��������������������s�����ofdegfdYJ@65<Oo}{xx{}}}~umbTR[ir������������������������������������������������������������������������������������������������������������������������������������������������������������������~����~��������������������~������������������������������������~��~����~}}yz}~zx}�}ww|~wtwxxstx{rnrsqnnstpnrsnikprsn^TQRQPSTUWWUSMNMPMOTX_gkdglmjgjnnjilnkjkdZTGA=<<9:;98:::9989;;@EHOTW^ekieikmljnmkmtuqquvutstttrponnnmmr��qjpmloqppoqrtwwvwwtxwyyz{z{||||}��������������������������а�������v[HXx��������O(x��ҵ���������zvqgv|{ywvlb`cdddedcefc]SC2' "',28CZn~����xtsnljjihkkh^M?;<M����Ĺ�����zqZKDGORMGDB?B?==>>=>AEPZ_``cbbgkmnptww{���������������˹f&*'%'&%$"*AW\ZWRPSW]`ir}�������������������õ�������������̳�������������Ʀuvz|}}|{vk_Ydt����������������������Φz����������������w6!*w�mJ`U@!"$#&*,(&'&$(1QeQ).=S[TO93o��xvumnsok`O4-881*%!)Mlnqrqrpponnmmklx��mhfeedba__`aejpnkkkjiggffhqhfc]__]\[YURN@(&3.&#!#,9lO%(+-'#A`|������Ww���Ŷ���������h3Qlplluvvphc`ZHAl{wpkeW;+?HBB<+2Xlt��xqpknsx{~����������������������¼������sjgb^WSRRIFVt��������������������ž��Q:>8443/,' #'" "  $'.Fhmnqtqmjjgfedb]TQSVWVXYY^bdn|�zlbehkgbXK4'3bwttjT0/BLb�q_Nl��������������ſ������zB5atulF&#(*&  )?`oq`C36Jh��������yz{��������������������s�����cXY_jptkYI95<Qp~|xxz|}}}~vm^PR[hq������������������������������������������������������������������������������������������������������������������������������������������������������������������z}��}|�����������������������}����������������������������������������|zxzzw|~zwyywtuxvrrv{toqutqpqtpnqqlimrqus]LSQSURRUXWRMLMOPNTWY[hkhghjiiklonmnnolnplg`TKC>;<;:9996779679:;?BHKOW`eehnmkkmolpt{xtvxwuuttttrqonnllnv��pjklmrtrrrrstvwz|xuxy}zy||z{}}~���������������������������Őt�������oNJdx�������56���ˮ������������zx|zxxxthcggfhihhjjied_R?0$&-6:Dat�����|wtqnnkjhfdc\QDEKY��������ƶ���lVD?KTPJCA>@?>?=>=?AFQ[^``ccegklnnrwxz���������������ɱY/-)%&'&##*DY_^]ZYXW]^hq}�������������������ʨz������������ĭ�������������š|{|~�����ukcep���������������������̣z����������������v0%V_sqrF !" %+,)&('&+BmbG+'=PM[L28r��}yyxyyyvqdN.1=>5,&#"6arlppqqoommonkjlu��gfgfcc`a`_]ahklmjihgfgfecbcc__^^\^]YWSQNB''2-%$! '('$&*..'"C_������rIz���Ǵ���������f8Ue_RTXRXQHB=:7AfvungaY:1?;;B8,3Vkt���ytt|�����������������������������������rjfb\USSQHGXw���������µ���������þ��F9=7673/-'"& !!$&,Idmmprpmkkfffca\TQRUUWXY[`chu�xkis|xrc=0* 5j{tlaR-3BKSRE@Vq��������������ž������y?9`wygD&#'*& *@ctra?25Ik�������������������������������r������XQXiorviO;33;Ts{zxzz}}���wl_QQZhw������������������������������������������������������������������������������������������������������������������������������������������������������������������{�}y|�}}�������~�������������������������������������������~��}���}}}{yz{ywx~{xwwwxwxvvttxxoouuroqrolnplfjopneVLTTMPVTTVYTPMKLLRSZZ^enkgiligipnmkpqnlmoolijgZLE>=;;;;8855559:89=>@CHKT\bkllknnmlsvwttvwtrstuutrqolnnln|�ljijlquutqqrsuwz}|zzz{{z}}|}~���������������������������̽xk������zaFAOg{|}���i&T����������x������}|wyxupffllmklkmlmkkjf^M;+#%069Ogx�����}voponkjjhge[QLMQW���������������}hgcZPB?>??>?>>>??ERY\^aa`chjjmnrvv|��������������ˣI41,'%&%#$-JY`caa`^X[_fo|�������������������ΐ\�����������˲����������������������������umiq|�������������������ˣ~����������������t/ %_����f"  %+*( &('%*=OJ:( ,5EN=2:w��������}skaL/4EC7-)($"(Eptnrqnpnpnlmljjlq��gghfeddba_]aghimikkjhfgfccccb`__]]ZZWSPL@%)2+%%""%$#'*.10(#G^�����wQUz���Ű���������`0897,).+1378758Eetslg`T:2;?CI<*3Ylu���}{���������~}uswystwxx�����������������qiea[UVSOFFVx���������ƻ�������������F;:56972-& % !!#'+Hfmmrupljifedc`\VQRVTVWZ[_`kw�xmn|��|vP73-"5ivkdaK-3BKMLJGXn��������������þ������t;=dtxe=%"%)% )>]miR;19Im������������������������������{r�����{TOPZ`oeI=401;Vt}{wzy{{~�uj^PQ\hv�������������������������������������������������������������������������������������������������������������������������������������������������������������������}{{~~}���������������~��������~����������������������}��������}|�~}}{z{|zxw~{y{}ywwywsrsuvsoruqlnqpmhlliinqofUHRQMPSRSWVRRPLMPRWY\]iolfcikgilqliknljhlnkjlqoe]TJ@=;::8;977<??<<?@>>@FKS[ccfhnnnnruutuuutuutuutqqonnnmq��xmmkjjosuusrrttvy~~~}~�~������������������������������Ѷk]fooiVHA55>M[cMp���R'|��ٹ�����xzxwyyy{{zyvtohlsqrqrrsqrpprol^J5("%-08Uj~�����ypkmokkkigec[TOLT\��������������ø���{OC?>=?==<>>>>HU[]]__`afhimprvy|��������������ʐ231,'%#"!%5S^dfeefd_^_dmz������������������ڿmj����������ʸ��������������ξ���������������vsp�������������������̡z����������������o* !1y�bG9>9 '+*&"()($(,-0,'$4>FD09w����|{zvngfaN07TG1,,+''*S�yssqnoppnllkjknx��fhgeedcbb`^`fhhklkkjifhgeeba_^^__\ZYYUOJ@$+3,&&! "##(+/21)%Gb����sliW{��������������]1/,.1/,+538AB8;IhurlhbT83?@@</&7]ot���{}���̳�zpld^TMU]__\`k~����������������okf`ZUSSOGG]{���������ĵ�������������?<;569:5.'"& "&,Nloostoliffcbdb_XTUUTTXZY]akx~}wnp~��oI985% =lzmgaH05ENX`fbly��������������½������s6?iwyc7"!%)' !(BZf]P:08Kl������������������������������yu����ySLLR^qPA;3/0?Yv~{vy{{{}}zri]MS]kw������������������������������������������������������������������������������������������������������������������������������������������������������¿����������}�{z}~~���~~�������������y~�|~����������������������������}��~|��}xyz{yv{{ww|~yvuxwroqtupquupmpsqljklmnonbTPQQLNRQTWVROMNLQTYYY]bikhehjfgkmkkmnmiilllhkqnmnmaUI@=:9::9:;@BBADDD???ACHHP[_djmnmsuwwtsstvtttusqppnomls��qqpljgjotvsstuuwx~���������������������������������������ѧ}kXZTQKA:>UbQLAK����<8���ԭ������upnptmv|ywspjnz{yzyyyxwwvvxusm\I1) &+,<Wm�����unmmljjigdc`[WSRTZ�����������������Ī{=>?>=<=>>?><@KV[[[^_``cfhmoqtx{����������������z"./,(%#""&=[cghiijhebbdjz����ɥ������������ئT~���������ҽ���������������λ�����������������}������������������Ś|����������������m)"".\`aa~�D  %))&$''%#&'%(,'"2GUF.=|���{vuuonnnfL/8D3&%'%%'*g�sspnoqnomllklo~��iggfccca_aa`cghikjiiieeeefb_^^^_^\YXZTPM>!,5-(%  !(+0:<2$%Fe�������W���������������Z0.1;L?8;KPX`Q7;FhtqlhaV4.4664.)=apw���x~���٪p`VZYIEAIRWUQYk}����������������lif`[UXUOGF]}�������������������ÿ��>9;76895.'#&!"$-Qlmotsomjggdcca_ZWUSTUXXV[aft}|ukly���nI=<7#"Brweh^F-6GSi{�tt}��������������ü������p1 Aiwxc9""(($"*>Yc]N;/8Mj������������������������������qj~����yUMN\m\DA<5.0C^xzwzxz|~}yqf\OU]hw������������������������������������������������������������������������������������������������������������������������������������������������������¾����������z||}~}|~��}{}��~}�~~�����~}|���~���������������������}�������}|�}y{��xuxywwvyywuw|yusvxwoortqpsutqppomlmlnonkZOMSRMNPSRVWQNNLKNTX^Z^cijhhjjhfhomjlopkhknlkimogimqh^XND>;<;;;=AACEIFGDBA?=@DKTY`glpsssuxussuussrrpqqpnomnx��pzunkijmqturstwwx|�����������������������������������������̙��|wpgTBMenaDCFt���[)R���£�������{vuxur}{wuphax�xz}|~|}~||{{{{wn[B0% $'-A\o�����yqnmmljihfca_\XSQQW����������������ξ�c@@?====<>?==CMWZZ[\_acegilnpsw{��������������;d!)('(&#"")Aafhjmmnmlkjily���ѫv������������ȊY����������ӯ���������������η���ŧ�������������������������������ǰ�~����������������o$ , -x����{0!%*)&"''%&'#$),&%5U[B1<�����~}}|}yxnJ+54) !#-{�zpsqpqqnmonnmlp���igfdddcb[_^^fkjhhjijhgfddcab`^_]]]YWVQQJ= 0<-'%  !$(0:GJ6$*Jf������{W���������������Y*.9Vc[YalokoR:;HjsojgaT3)5?CF;*=_ox��{u���آ_TRTPGDCENOIIYj���������¼������jic`ZUVQNGG[����������¶������������{?=;66763/'#&$'2Qhlossomjggfdda_^\WUSUWWW\`fu}rbXgprk\JB<1 EqunlYB(7GTp{vngy��������������»������j.!GfzrY6##((#'?[_ZL7.:Ni�����������������������������`h�����vTNPidE;=;403Ca}{xyyz{~~}og[QW_gv������������������������������������������������������������������������������������������������������������������������������������������������������ÿ����������yz||y{~}{yz}~}z��~��~���~z}���~����������������}~�����{�|yy~|vwyyutwzwvxz|xrtzxsptuporsqmqtqllnomnmgXMORLMNQPSWWSONNOQWYX\cehnhehmjfdiliglolhhjlkhhljbenngce^SHA=::<>@ACGIGGFGFB;<@DKQV]fmqqpuyyusutrsqqqqqpppop��}{�yqljhkmrsrttxvxz����������������������������������������Ͻ�������gQ^h`OHEM\QUO3,v��ٮ������������x}~~zzwnbex`U[`fnuy������zmV<.' #*@Zs����vqmlljjgfed``^VPLOe����������������ɵ�L@@<<=<<>>><>@MWYZ[[\`ddfhjnpsx}��������������ɱJ"(&%%%'$%/Mgklnqqtuutrnmw���ĕ}�����������ɳue����������Ρ���������������ͽ�����������������������|����������ʨ��~����������������h$$*-h\G>WfA !#'+)&"''&&%#$(*&"7OSF4D����������|xnH+12( $4��{ttssrqnnonnlln|�gefedcfcbaa`afjlkkkhjgeefdcba`^^_^\ZWVRQK: /7,&$  #"&0;DE<( +Qd������yS���������������`,0;\kgcclieYA:=Kntpjh_L."<JRJ/%B`pu|y����֛aRPOJECCDDCCGTf���������¾������hjc`ZRTSOGH_������������������������w9;:457640&!# $'3XimnsrokhggfeecbbbZTSVVWY^ciy{q[NOHIRPJ=6)!Ivxri^>*:HR_joUaz����������������������f*!HkzuY1"#'&"(B\]^I7-:Pl�����������������������������jMg~����uQNVgG<657/.3Ba{~zxyz|}~}zkeTOXahw������������������������������������������������������������������������������������������������������������������������������������������������������¾����������tw}~|z{}|yz�~~�~��~}���zy}|}{��~~~��~��||������~z~������~|~�zz~�}yy~{xutz{vx||yvsxxsnstpjnrsmlqtnjkppnmaRJKQOINPQTWWTPMNQUUUX^bghhifejlhegmhhiklkijkkllmmjinnlhjrj^VOG<9:9<>BDFGJHGEE@=<=AHKN\ejntwvwwsssrsqrsppooonq��x}}yommhijlmostuuwz����������������������������������������̯�������wX_\frj\TNHDD56��⹣������������{�~wtrlgprI??<=>Oetz}������|iK6,'"!+@Xw����xrnmkkjigfecaa]WPMTu����������������ŬyE@?>>?<<;<==>?MXYZ[[]^acfhimqtx|��������������ʤ5%'&$#&)''7_nsttvwz|}~~yxy�������������������gg�������������������������������Ⱥ������ѻ�����������jpx}�������循��{����������������c!$%%.$nw3$#$%),(&&+)()'%%),'#:KPD/K���������~{tjE+05)!%;��vutrrrpnonmkmlknjdgeeebcaaba`beiljjkiihhgfdbaba_]]]^\YZTPJ9 /2*'$"!&,4>AA;0&*Nf������lY���������������`,1:KOBA=>B>?=:8Smuoif\K15DA:/!'Gcsz���{����Ҕ]LMKGBCCEBBABOg�����������������hkcaZQTSOHFb����������ø������������r6;835744,$"#"! &4Xlqrvtoljhfeeecgle[SSSUTW^emz~zoYMH@@GH=75+#Ny{kfb>*9JS]pNa~����������������������e&%MozzY0#'&!,FZYVD4-=On����������������������������|FJg|����sRNSXH:300--4Ei||ywy|}}~|vldUMUbmx������������������������������������������������������������������������������������������������������������������������������������������������������¾����������w}�ww{|xw|�}{~~{}~}~���xw|y{���}~�����~~���~���}}��}}��~zz~{zz�}xxz{yssuxvsvywtstvtpqsqmlpqqnpqonnoplj`RGKSOHKRQSUVTQONPUXVTciggiihdfljggkmihllklknlhmnqijmpkhlpohgf`VK?<::9=?BFIHFFFDA?;9AFIP[diwtrrtsutrspqqopppopy��u}zvrqpkihjjmpqpuwz��������������������������������������ʤ�������`esq~trnhh]UP.S��˦����~|�����|x{{xtqjiucA=<:868CJUg������|]<0-& !-B_}���~wxrllkiiheeea_ZTNK^����������������Ϳ�bE@=>>>;<::<=<BOXXYY[\^``_hhmptx|��������������Ȍ'('%##&&&+?gx�����������������������ǰ{w���cu�������������������������������¹�����Ы�����������m`iz||������滮��z����������������_#"!"('Z:&:A%%&&))%%(*+./,,23."4IX>)L�������|vqlbC(33%&A��{uuurqpoomljlighhhgedeedbbbb_bhjmljkmkhgeedcaaa_^^\][WVRPG5 44+(&$'2<<;630-%-Oi�����wMb���������������W'39F?46;@CHFGB?Rrxoii^K/-,&$$#)Gfu���{����ѐTLKHHGEDECCEBUk��������¿�������kjb`ZUVUOFKc����������µ������������o6;735752)" $!! "&7UkqrvvqmjhffecejneYUSSUUX_emz{qXNJBDIIB;9)%Tuwii[;,=NXtxKO_�����������������������_"%Ss}yW.$'% !.EQSQD3/;Pn��������~������������������vR?Ph{����sWMOQI>40.-.4Eh~}xyz{|z||tjbSJUamz�����������������������������������������������������������������������¿�����������������������������������������������������������������������������������������{�~wqtz}ywv{|z{}~}{}}}}~��~zzz}~��}~������}}���~~��~z~�~{}|z~�~z{|~{vx{ywuuvvwuwxvssssssrrnnrsqporqmmrplg^MMLPPILQURVVRTSQRTTSXdkiggjjhhmmhilokgjmkjkmoifnolfjlkjkookjmlhbYQF><9;>>BDFDFGICA=<=>AGLWaiklqstttsrrqoononnq�}z�{rosslkjiklmlnsvw}���~z����������������������������������Ğ������gi���vtynkk_YE,��׶������~|vrrsqx}~zytrjjtV>><988761.3\nv~���Y<3/,& )?_����{����zsjhgeedb_ZTRHe����������������ǵ�KA==><::<:9;;=ERVY\YY]]``dfhkprv|���������������u'&# ##! +Ht����������������������������ʻ���q���������������������������������Ŀ���ɽ��ustwvtuurk\^ixvx~|����⸧��|����������������_   "4LGMWV^{�W%(%%))%#(-3:;89>90 2HE71Q����}yuuurmjbA,1-" '@��yvstsronllmljihhheggddcbbcdc``gjljjjkkiggfddaa``^^]\[ZWTQF5#24,)(&.:;5.*,.+#1Qn��zNYXTi���������������R,4G[WRU[W^de_G@Pouojf_J*%(056.-Hds���{����ˊSKGIOXUJHGDEF[p��������¿�������ije`[VVVO@Ne���������Ѿ��������ɿ���k5<70566.'!"$! "'8[loquupmjhffedfkngWUSSUUW^doz{mXQN=DMVZM@,%YvrkbQ8.@KWjRCQb�����������������������W&Tw�vO+"$#!.FSPNA2.;Tp�������{������������������fJ@ALi|����kQKMQIEB=60.5Hl�yuwxx{z{ysj`PLX`l|������������������������������������������������������������������������������������������������������������������������������������������������������������������{}yttyzzvx{zzz|}}~~~}||}{zz��|w|�~}~{y}��~��|z||||~�|}�||{~|wu{{xtuwxyuwxvtuusrtuspjnsqnlpqllmnkg\SJMOMJMOPTUTUUTSSRTOQbjgdeikggkmlgjljffkkjkmnmimommmommmrrnlnnlkjgaRIC=;:9>AABDGADB@=;:<ADJO[diprvutsppqponmlls����|zqlppnnmjlllnoptvz���{}���������������������������������˹������ok����{xtpwq`U7B��Ƨ��������|tqqrdpz}{xsohjjK@>;:7433+$"('/<WcdWE?:42,%"'@f����~�����}hhgfeda^ZQKOq����������������ð}D><==;:;<9:;<>FSVXZZW\]^aceilnqx}������������ɹb!%$""%%$"*:Zv������������������������������Ӧs�����������н�������������������������ò�~cbgnmfdc_]JRahjosuy���߿���~����������������^!!/j�������v/#%#$),%#*.6B>;8:8($/AE?6S���wzzzxutqhA*-*  'E��wussrsrpmlmljgjkifffdcabcbacbcgkllkkijhfhdddcc``^][\[ZXTPG3$25.+)(0;/+-*+/*$3Og��sh\XTm��¿�����������K.8]sjnmkhnegT>?WqtqjibJ13>DRJ2,Lhs}���}����ɇUKIQi�vh`UJEHXp�����������������jjd^ZTVTLDPh����������û������������e2:82576.(!$%!  !&:^nmquwrmihgeedfmpfXRTUXWX`fozykYSK6>Ued`T4'[sriaR60>KT\acbh�����������������������S)RuyfG'#&$ !.JVNL?0.;To������}w|���������������sTB<;@Oj|����mOKIJIOVPC1/9Oq}ytvxyyz{ysh]PNXbn|������������������������������������������������������������������������������������������������������������������������������������������������������������������{wwvxywvvyzwwz~~{yz}|zy{zww}}zxy�|y}~}|~~~{|�~}�~~}}}{}~}|{~|z}}|xsx|xttywutuytpmvtonssppopomklonjnojaWPNQQLIMOPPVTQTUUSSTQUaegeegmkginljgkkhfhkllmolkkqolmpojmptokmomjkllga[PF<98;<=@@BADDC>=<<<?DHMZeosutsrpppnnnllox��wrtxqlmmpqokjlnonoqt|���|~���������������������������������Ȭ�����qk������tp{�sfT-^�ܸ�����������}{pit{{zvslfoaCC?<950/-%$)2DLE>82-,*Fl���~x�����qcfgfeba_WPLY����������������̾�j@???<;;:98==<>ITWVYYZZ]]_`dilosw|������������ȯN$###%).+()4GX^^Zddowyz{��������������������١u�����������˧���������������þ�������������������{sYR\fdkheo���ྲ��}����������������]!" 7��odUI=0$ !$'+"$*,1401482"#5TQE5V����������{uc@).+! %M��ststrrrqnmmnkkikggeeecbccbaabdijklkjiiiifdddab____Z]\YYSOH0$36.-+*9_kqf/+/+"6Tp���uiWk��������������xG+9RZYYQKIHJH::B[qsqmlgN16GTTA-+Mhs}��z����ƃUJJ\����z\IHJ\p�����������������dia^YWVUMGNh����������ù�������Ⱦ���c2=624881) %'# "&=\onruvqljhfdddflleYTVUVU[elu~�{jVI?59C[^d`7,awsohP3.AO^t|thi�����������������������O,TpueH'$'$ /IRNN>11>Tiy����wqs}��������������]B?<;<Rk|����kMMJKN_iY9+/9Qs|yuwwyyxzxrgZNNTao|������������������������������������������������������������������������������������������������������������������������������������������������������������������|utyyvvvvyvtw|}yxwz|zwy|{|}~{z|�}||||{{~zy�~y}�{}��{zx{|yuz|zz{|{wstzwrruyvvuyxsrtvqlqxqoqqqnknsrmnoh_SJIKPKGMNQSSSRQTUWURU[cefgfimieilmikmljgiiijlnmjjlmkjknljlrpklmnkklmmkkk[QJC=:669;>?AAA>>=::;=BGTcpsrssrrqomllllp{�~oqwumnroqqomjknoonpt~���~����������������������������������ƣ����ua�������ko�vgH(~�֨�������������zxz|{xtqhhrQ@FD=70,-) "9O^cYCBD@5Tv���|������fffgdc`]\TOMe����������������Ȼ�O<@@>;=:88;<::?LVXVWYZZ[[^`dgjnrw��������������ɣB#%&)4IM=0.6GSX\X\UUZ^_bhf������������������֒v����������ұ����������������Ĺ�����������ż���������iZenptstx���า��|����������������V!",?%!$)("$)+**+283,#$;TQB1W����������{u_8'5/ #'S��xvssrppomnkhjlhjgdeeedcdcca]_chmkmmnljkigedb`__^_^Z]ZXVQOD/$56/.)3v���e/,.+!9Tq������fo���º���������vG(47553/054445:@XqvppolK'5OD;7.1Qlu���z�����|UHIa����zWMKIZp��������¾������|cia^YXUVMFOi���������ͺ��������Ǿ���]4=62484+& $'$ #(;^mnqwtokjhgda`dkkdXUTTUS]jt|��ziPC>:84@O]J(2a{vsmQ00@M]wrb^l����������������������}L1ZqyjA'#&$#/KQPP>/1>Wht|���ysoox~�������������dD@<:9So~����cQQQYjfK6.*-5Qq}xswxyyyzxoe[MLUap{������������������������������������������������������������������������������������������������������������������������������������������������������������������~{vxwuuuxxwvx{zwx{}{xxzz{|~|{{|~yy|~zxz|~zyz|}yz~�{y{�~zyy{{yz|wx{~}xtxzytuwxutvx{uovwrmowrlottplmrqooqi[RKKPNMNNOOQUUSQUTSPNOZdfgdegklfgjjhgknifdhgfholljjnjimnlllosmlnnljiomlkqma]WQF;52579;=>?>=;9;89=CQgqsqsqqqommlmlkq��ykxyuopmkppkkiikmoopt����~���������������������������������������wJ~������t_l}�ujC5�������������������~~{xrqbjiFTgRB7,(+&$=c~�ymopj_av����������ggefec`][SLIq���������ٿ�����ĳ�?:>>=;<:99::9;@OWVVXXXZX[^bdhknrw}�������������Ǖ5)*.>VcS@329IVYb{���v]VZ]e�Ķ���������������̆v��������������������������������������ź�þ���������`Wiorwx|����ߴ���{����������������N! !! #''"%)+*09:1,)%&@LO=/\���������zrlX57_I2*',014\��qvvrsoomlmjhijifgeedddddcb_`_dkonmmlkjkiffc`___][\[YVUSRLC,$66.+*;���~I+./) 8Yv������ax���Ž���������{D,//-,,,.02368:@\syonnfD,@?+47/2Vmv��}|����xPFJf����eOPMJZp����������������|dgb_ZYWRKGQl���������ʻ��������Ǿ���U8<4354,#"%(!  !'=]omsusoljggebbelndWTSSUW]my���{kPFGFF@99A5 1fwvrcG.1AKQOJU^n����������������������|G/Yuvc<$#&%"2ITTL<,1>Yiqty}|zqeaepuy�����������_>>:7;Tn���|]PTYdR8-+**.5Tv|wuvvwxyxvmdZNMXcq}�������������������������������������������������������������������������������������û�����������������������������������������������������������������������������~uqstywtvwyzyxx{yxv{yvv{~|vuz{zwxzyww|~{wxzyxz|~|y|�|{||zzz{zvy|}xuw{{vrvywpqvxuoowuokrskipvqlkoqomlcYOMJMNLNONOQRVTRSVRQLU[`cggacgjgfijkhjkkhhijjjkomlnpmklnnkkpsnjlokkmonkilqkfhg]RG=864568:;;:99<:98AQjrtrrrspommmljmu��fj|yriffjmlkjhijjmmoy�������������������������������������������}\i�����uO2Bewz}m8J�Ȭ����������������}|xslbmeh�rb^Q?3.'"&Eh��������js��������thgfgfda\ZSLQ|���������Ͽ����˿�i<=><;;:99999:>BMSVWWXY[[[`aagkotx}�������������Ą,*)09=9/&'->P\g����Ʒ�]Z`p�������������������uq��������������������������������������§������������RSaagmv�����ܯ���~����������������K!  %&& %).4AB4***$ $9HI>4]����~{|xvqlfUDf�xPIEHL>5c��tuursnnmnnlkjijfgfccecbcdb`_`glommlkliiicgfc``]XSUSSQRSRL?*&73+,)0^���_+./* 9Yt�����aYx��������������w>,.//..,1469:;EJauyrpmeG6;89?>25Sow���}����pNGNv����dTPOM[r����������������zgid_ZWWSMFQp���������ʽ��������Ž���U5;545/'#!&(""!"(B_onsutqkhhgecchqn`UTSTUXanz���zhSOV\ZRFD@6"6hxteZJ-4EKMKKX[q����������������������w@.YjbV<##&#$4IVRJ9,2AYmsvyyxrh\diikk~����������X6845>Rt���yZTWWG2240)+/;Vt{wvuuvxyyvmcYMLWcp}�����������������������������������������������������������������������������������������������������������������������������������������������������¿��������������xopttrrsvvtstxwuyzzwsv{}{v{yxvz}{wwx|{yy{yvw|}|xy�}zz~xvwyywtx}zrquzwqqvuqqottrotvtooqoimtumkoqpmjeSKJNNJHLKLORSSRRUUTNPX`ccefdcfijjjlkjklnkjklijmnnlknokknplilnokinmiknqolmpogknja\VL@:75654799:;;<:;?Xmssrrrqnmlklllnz�p]ktrkhkkjiikjhijilkr~�����������������������������������zhw������zadh^Q@<Vr{��l/c������������������|zvrj`m[ox��zvr^I@-(,!,Hr��������Vr��}������lfhhigd`\XOKa����������������Ǹ�R;<<;;::977898<CQSWWVVY[[]_`bdioty|������������ʽc"&&()&#!!$0DU^������׮Y[bz����}l�����������ʲhs����������î�������������������������Ȼ������������qL]ejns{�����֪��}����������������G 2* "%%&)0;C4&$'& 'ESO;0e��|xvuywtookZI��|YOJKG91l��vturqnoooomjlkjhhgfeddeca_^^`gknmjkjijhheeb`\XLNKOOOOPNNKB'(84++*D����;+/-(  >[y����aee|��������������wC<6968:7>=>BHORUczyusuiK3>JPOC44Yov��������kJDWz����`USOI_u�������������ÿ�uehb]WTSRLCRo����������¼������������M/:632.%! '*% #+BbooqsuqjhifdbdhmjaXSTTVXap|���zgRQ^kqaUSN;"4kxsYbF(4EMU[a`fv����������������������v=2Xa_R6#!$##3JTQL:+2C[oy|}|yqecjfcjo����������Q7534=Ps~���u]TSKEHJGG;/1=Xvzvuvuvyz{ukdXKNWbo}��������������������������������������������������������������������������������������Ⱦ�������������������������������������������������������������þ��������������{rqssrtttttuuwwx{|wvtz|zyy{yuw|~xuvxyttwzvuv{{vty|{ywx{wutxxuvy{wrsvyvtvxtqtqsqtvxuqrspmmqqnilqqmh`VMKKMLIKLNOOQSSTOTTRTafcefhgdfiiikmmhhkmkfjplgjnlkknonhknqmknmojnnkknonklnkjimkhgkc[LB96422445688;<@Eeqpstspomljjlmpt|}hdikkhimnniikgfjjjlmr������������������������������������hgw������}���|��������U/���������vw�����������|uncchJSatu��~xtgifT.&$ 8]���������A}��~������cgqrlgda[SKQn���������̺�����¯{=<<:;<<::8;:89<ETUWXUWWXZZ]`bdiorx~������������ʳK#$%$%#! "&4KYd��߮���Y\^�ó����������ཋ����h�����������ʩ��������������ʿ�ȿ������ö������������yckw|}������ө��|~����������������E/:Ch0 !%$$'094$""%#!+BVL;0j��~}{}���~{u_?fjH;850*(.t��wuutqnnnlpklkjfggfefddcbb^^^`fkllljkhhheef_[VSOQNNOPNLJKH@$*84+)(;g��yu_6.'!@^����w��cz���Ⱦ���������zWJGHEEFGEBBEGMGO`y{vusl?+>IMD?.0Vpx��������jKG\z���jSRSNMau�������������½�refb^YUUSJCUr����������º��������ü��I29224/'#"'(" #)Gdmntwuokhgfdcchqn_UQTTVYcr}���xbNL^qyqa\O6 ?o|mhdC*4GPcyojx����������������������n75Y`_Q5#!$#$4KQMI7*2G\q~��{sidaagos����������|E4124?Vu����sZSPR]]c[Q=01=ZtxvstvwyxzulcVKOWco}���������������������������������������������������������������������������������������ż�����������������������������������������������������������������������������yttrtwvuuvxwvtx|{usuyyutvwyvvzyvttwvrttvuvz|vtuzzwxwxwtuwywwy}zttwvvuvxspsuromuzvrrrrniknmkgkpnh\PHIMLJKLONPRQQTRUSPNRdnhcdghdbfiigkkkhhkmiejmljknkjnsoiknnmmqpkknpninqokimojillkgijhd]UG;52/1123569<CQgqpprrpmjjjjnt�tegjihggnnnjkjggjjikov����~���������ϯ���������������������cox�����������������~��BC��������xrz��sky������~vl`d]@?:CNW_itt���pTQA(En��������_O��|������xcz���|rcXOIU|����������Ž���˾�h?==::<;:;989:9;KTUWUSTVWXZ[^`dinrv}������������̣:$&#!"! !%6IXn��b{��U^d�Ǭ����������ڟw{���n�����������Ě��������������Ƚ������������������������dp|���������Ѱ��y����������������C5\c(H��-  #'.0)$"$%""-@RM91m����������u\-26,&&$""$.r��pttsqqomlmkkkiggfeefba`a_\\\_dfhikjkihhgf_VSUTOPNNLMKKJHF< )95+**@����T3/'"BX�������W|���ż���������|ZAD>996;76846>=Caz{wtqjD,EA<?5(5Ypv{}{v����ޮcKGUu�iSMOPJTdt����������������phca]XUUSK@Qq���������ʻ���������Ļ��G7724;91,&&*# )Ohlnvvtpjfeccacgli]UQSTWYcr|���xbMGZnwqh^L9 Crzroc@+7DNhwkYXx����������������������p46X`ZP3 "# #5JSKB4*2Gap����}qdedckru����������w<1/04A\u����oTKIRTSVMC5.2A^wxutvvvvw{ulcTKNWbp}����������������������������������������������������������������������������������������Ż����������������������������������������������������������¿�����������������|vrovxrrvvvtrvzwsruvustxxwtuyxstutstuvwvz}{uty{xwxxwpswwvtv{{vtvvsprvuonsvpknttsnnpqlklmmloppg]NJJKKGJMONMNQSSUSQPNSYnkhffheaeiifhlkjhlmmjkolijpmhltskgnolkmrmijkpmloqljjlmmlmlhjliiihaUG>831/11359<C\jpsqpqnkikkjp���zdeijjeecdjnolihfhiklqz��������������Ϝ��������������������ybov�����������������}�n.]�������tq}��e>Pu���~�{siagR:?GLGMQMJZmtwsps5!.Pz��������Eh��z������nu������`RMI]����������������ȸ�N?=;;9989865699@MVVVTTUVVXVY]`chorv{������������ɐ,&(%$"!  (;M]���m���Sal�ɩ����������ܹ�����s����������ֵ��������������������������������������Ķ�gv~���������Ѵ��{~���������������}>"P�jH:!Q��Y& $(+-*&%%%$$.E]P23q����������~tX/13,($  $/v��srrspoppmlkjihjhebcc^[]]\ZUW\`acfikjjhjfaQSSUNILIJGJKIHFC7!*65..-O����U*-0(#E^��yk��kT|���Ž���������v@120/.-.,/11389DdzzywrkD0<;;@6.=\rssuwx����ݬgPESltf^PKOMM\gw����������������rjec_XUUTMDSt���������ø���������»��@4725CJE<%&)" !.Lfkntvsplggedcdgjh^VQRTTXcp{���u^IFSfqrj\Q@' Kv�}td=*8EMZaOO^{����������������������k19W\VG1##"8JPHA3(3Far}���vorpoorv����������k620/4@_u����nNE==@MMDF9-1@`xxuuwvvwxyri`PGPZdq~�����������������������������������������������������������������������������������������Ź�����������������������������������������������������������������������������ypnqsrqssqptvwsrttrqtvxvrsyyurvtoruvuut{ztrvyuprwwtnrwvrrvxwtstsrpsvtqttroostsqpqokhpmllmonfXLFGJIGJLJMPQORQTTRLMXbfkjjhghegihgglojhknkikmlgflnkglqnemrokmpoikoqnlmnmlllkkkmlgekjghije]VN>850/0239=Ffloqpnmljjiijm���[^gjjhgdaejmpokihggjnu����|���������̵���������������������harw�����������������xqG*w����{ptpr��{NH^w����zqfddH;DO]q{lcXTG@FPa="#5_��������nI�������d}�����v[QJIm���������þ�����±@<;<=9878968::;BPVTUTVWVYYXX[^dgnqv}�������������~%'(#$!!,BR`��������vYaj���������������ƺ���u����������Ӧ�����������������ƾ�������������������ȱ}ew���������ӹ��x����������������}9 $Qz`UPh��[.! !&,.2.'&&%#!,KY?23t�������~}�qU465+%"#3���tqqmlkkgjjjif`ffc_YRSTZY[TOTY\_^cfjjkidbSQVSMEGKKLKNOMHF>1,;60.+4X��u?'/0)!$G]��k���^]|��������������z3../.-,,-/1269BIey}yxvmB4EEED7(;]opmjnv����۪eRGRklfYGGPHP`i|�������������ľ�kgfc_XUVSMGXw���������ǽ��������Ž��~B4614@IG4!$)##/Mfmotvrokgfebbdilh]TRSTTWbq|��w`IHL_enkd_K%#Ky�yb;(8FQ`qqgj{����������������������i.8TVPD0#$ #9NOIA2)2Ias~����|wutrqsu����������]./.-5Dat����hKC;B\]MSJ3.3Aawxtqstuwxxrh]OHO[eu�������������������������������������������������������������������������������������������ö����������������������������������������������������������������������������zttrqptvsrtwusrsurprvwsorvwtrrtrnovvsqv|uqsutrrrtvtuwvrrwyxutvxrpqurrsvspnqupmpstlkoqmlmpndVMIJIJIJKMLNPRRSQPMJPZfkljfhkga`jhfdllkhjonhinnjekmljlqnmlpmlmqpjflomkmmnkkmkjjlnjehkjigghfhh[LC;42//18>QjmnpoojggjghijmjZ]hijhfgfadjloonnjiilpx����~���������¢��}~�����������������`fwz��}v~������������uX-F�����vqtx���x[]dix}}�~xq^f^D>K_hffu�p[6+,&'Io��������Q^��{������yl������dXPIP{���������������ʽ�iAB=A??>?><<<@=>ITVVUUUWXY[ZZ\`bglqv�������������i%*&$#  $9JT^��㻔���ea`x�����������������پzx�������������������������������������½�����������Ȱvly����������Ϯ��y����������������}9-+3\���E (-02-('&'%#0JK>25z������yw|�pU045(!!"/��|pkllha]^abdeZXac_UJUYYZUQQORZ^[ZZ^eihidYPTTNEFKOONPWTPIE=/1A6..+E���^:,/0&%Kc�����{n\��������������v6132/,.02:<7AIP\p}xzzm@,;:>A9-Abnpnpuz����ڦhVNTjkgSBHJL^ck|����������������ngeb`ZXVSLH[z���������Ż������������{:7733:DA+$'!!%1Lioosurnkgecabdilh[RPTTTXcq}��~w\HFGTcnmg_>$!N}�|sZ6'8GTs}rag����������������������f);URMC-#$%;RQNA1+5Mar�����uoljnns����������X./-/6E`t����dH@=QV@DI5,,4Caywrqrtvwwwpg\LIOWct�������������������������������������������������������������������������������������������ʿ����������������������������������������������������������������������������vrpooqsrnnsvqptusnnstrnmtusorwvrruwtqvzwvtxssptvuusvwsquwursywtoqrrnnsupnmmpljorqlmnnloih`SNIKJIGHKLMNOQQQONLNQ\dfjnhfjkdabifgiljhhmnkjmqmjkokikonkknogijnliipoggjnmjkkkhilmjjlkjijhiikjcZRH>71/17?Yjmmpnljgggfhiigc`cfghhhggcdgnoqqliilms}����������������{~������������������_ox�����}|}��������qF$]������������wcaacmxz~�tm\eTA@a��yV:<IQD</%/V}��������Ds��v������iq������[TMH]����������������Ʒ�aKIIHGIJIHGIJKKLRWXXYYZZW[\_\^`afjov{�����������̴L#*)&#!!2>MWa��՝�εlZ`Z�ȳ���������������شhu������������������������������ľ�������������¶���ĩljz����������ɪ��t����������������w4!+*w��P8@@"!(,,*()+,+($4DJA4:y������}���|rP,33(! $3��m_chnnb^^YW\\XY[[N?ISUVSKGJOSSUUTTZbgfgXLUURIGKMOOOTWURLD?..:2-0-@`jke?.22''Ld�����pT���������������vA9?ABCENU[ablpqu}�|{}|qA.45>H9*Eeqtvyyy~���١gZQTag_ICIR\adjz����������������ljgb_YWWQJI]~������������������Ƚ���z87725;D@+%'" "%2Okloturmjgdacbcflh\TQSTUXdt}��~s^DAAJ\hheT8!Rx}shT1+:GN`YEFa�����������������������a#=TPK=*$$(:SRO>0*4M_nz~���~m_]aehr����������R/0--6Hcv���fB9?K4,1/)*-4Gf{wrqstvwwunf]JHOWdr��������������������������������������������������������������������������������������������ɿ���������������������������������������������������������������������������tppopsqonrutoqwvpqstroquutrtvuoozyrquzurtwvpnpuuspsvsmotusqtxupnrpnnsvtplmmnlksplllmlklhZRKHIKIGIFKOPPOPQPMKIVdggilkiknhafffdgjifgjmjillihjkkhgklkhlmlgkkmmfnolhlmllmmljjllkknmijllkhiigc`aVLC<512?^imoonliedcbfghfd``eghiifjljlprrrjikkqy����~�����������xow~~���������������sct|������������������o;)y������������w`[]_cpuzzqc^cK=@k���n`]L?@G+ 6]��������lK��~������h~�����nXWOJ]����������������³�[SOQPPRSSTTWSUYYXZYZ[\^^\^`_[__`dins|�����������ͩ>,)*-& !#-8KXp��Ӽ��`S[_b�ͥ���������������ԧ[j��������������������������ȿ�պ�������������ư����Ģfm|���������羧��t����������������x33U�@OUkf" &+'&%'(-/."5L[E3:y����������ynI,./&$! %9��YWdloqkd_RKSXXYWRD;JY\VPICERVUTSPQX`fffXUWUQLSSLKOOVYWUMH=.0:1-.+Lw��q-051()Lf������{W����������������susuyyx{~���������}|}�s;#,;PH((Ggsv���}����՝d]XNQXSJO[_a`ce{�������������º�kkfa]XVVQHF[���������Ͽ��������Ⱦ���t13503<JA*!&)# $5UkmpuurnihecccchnfZTPQQSXft~��rXE?=FU]d]G0#Wttf\N2,<IPV]Xce�����������������������[ =QPJ>( !"(AZZT>/)3O_llt{��~k][[[bq����������W.0--5Fcw���~c<5:70----+.5Ilyupprsuvwtlc[IGOXdq~��������������������������������������������������������������������������������������������Ⱦ��������������������������������������������������������������������������tpqrrqooqvurruxsopsrnosusoqutqnnvpopsuqntxtmouwrqswvonoqstvywrqpqolpztnnmooinqsjilpolj`ZNJFEIGEKMLSQPNQONMLLVfhgmkihikheeeefdfjhfgjhfimlgfikhgjnlllnmjkmmlllplhmpoiknlifjkjiknjdglljhffeefjcYQK>36Bflmlmlkfca^_bbddb\^abdgggippnnqtrnkmpv|����}���������������~o|��������������diw~������������������rEA�������������w]WV\glix~wo```C<=KUgnr��q�|`@"$Ah��������Qa��{~�����a������f\SMQf���������������ʽ�uSSQRRTVUVWXXVY\[ZZZZ\\]^_`abcddadfkox�����������͘10XoJ*+0"!*=NXw����Į�i``f�͘�ʨ������������Кe���������������������������ƽ˿��������������̿����bt~���������Ѭ���u����������������s1$_��m!3YzO" ')%$&*.83+"1CPE2?|����������wkD(0<)$#$#&.A��OO_cffedZLGLPYa`WHDS^SQOJFGUXVYSPRU^ei`UTSRPT_VOPPQY[UQKG?+8>2..+D���b6NO/&  -Pj������zV����¶���������������������������}y{|j+;NJ8)+Fgv���}���Ҙc`\VVVVW[`^_`^d��������������÷�lkfa\VWTOGF_���������ν��������Ⱦ���o06535=IB) ('!$4UinpstqnjhfedbbjlaXTQRRRVgv���~pSD@AKV]ZPB2$Xppd`M2.<LXgupkh�����������������������W"">PPI='"!'BQPJ:,*6M^ddhruvlXRPPOWm����������U..-,5Jhx���z^43888<;?;1.6KlvsnosrsuvrmbXGHQYeq���������������������������������������������������������������������������������������������ŷ������������������������������������������������������������������������rmoqoonotsrqrurlpqsomrqqpostqnoqsoqstqou|upoqxtqswuqlorqsuxwqkmqpmkquokopmjgrpjhmnnml^VOJHIGGIJLLNQPOLPNLNOWiheijjeeiifbdcbffhiggkkfhpqlihiigjnmjoqonhjljjjlmhcmoihlnlighjkjikjfgihhihgedhicf_YJA@Lhmlkjgfeeb\]`^ab`]^]^aeghopqqqrrqroosy~����~����������������x���������������]mw{������������������y3^�������������~j]_nzxz}|ui[dU<:655-:K`k{�jB -Sv��������O��}������nn������[XOMXv���������þ����ǹ�fRTRVTVUUTUYXX\\[\[Z[[\_aacbddcfddgiov����������Ʉ<t��=I��E,DSWg�����ͺn\_k�Ȕ��u��������������k����������ջ���������������ǿȼ��������������Ų���ǽ�cv���������ѿ�v����������������m.&y�^2M,")(**-7;:.$ 8SM5'<����������znbF)D�Y6/3;OBJ��WXT]fhfcXNKGNZabUKO\YOUTLGKRVUSRQUT_hiZUSOQRX[VOPNQURMKFB:,7=3/03S���frO.' !-Rg������t_����������������������������������yzxs[15KE@;,*Fgw����{����͒b`]XYY^a]Y]aace��������������³�kibb\XWTPEDb���������ͷ��������Ƚ���i17437?MA' '&! &7Ymnquuqnkhfdd_biicZRQQRQZjw���|nTCACKXZUN=+)]smgbJ--@NZ{|gRe�����������������������Q!%BSPL>%""(CLJH:-*8M\^X`ghfUIGIJLSk����������>-,,,5Mkx���yS63<JPWQSE2,5LnypoorttuvrjaVHIQYct�����������������������������������������������������������������������������������������������¶������������������������������������������������������������������������pmornmopqqpqsurnprpptuqnoturqpssmmssqnoxxonorspmpsqmlqrpqtvrmknqnllrromnkkmlplikpoli_RLFGHGGFGLNNMONNOPKKP_iebeghfehjjgfggfhjhhfkmignomjikjfglojilnmkhkljglljjlomdinmjijjillkkijjhghhigchedcheaVQR_knkkhgefdb^_\\Z]acc^]`achknrsturrwstux���������������i������������������u_omqz�ry������������\/|��������������}���~}ysf[bI<621,($"+)@S_P'#;c��������kS��~~������kv�����z\WLIb����������ǿ����ï�UPTVVUVWXX[ZXZ]^\[][YW[]bccegggghiijnu}�����������a=��4[�ڐY5HS^a��ӻ��gX]w�����i{�����ʸ�����{w����������ѧ������������������±������������Ͼ����ɶ�iy�������|����̵�x����������������n(+c[ ("%(*1:E?6+%!+-+'$?�������}wsk_B7��jTSOUX@G��NRepnnmjbVOBS]`_YRXa\[[YSLMRVUQVXUUaifXPLJOOXZTJKIHIJGGC?9* 8<2//9����XaU7/& !0Rm���pn�q^����½���������������������������xwukV&+9CD>1/Jiw����x����ˏdaZWVX__\Z\`beh�������������½��iicb]TVSOFFa���������̺��������Ż���g2:435AI;%!&&&7UkkorrqnieeecbejqfYRRQPQZkw~��{oTFB?BLOJ;-/_vkd_F*2GWb`YKSk����������������������~K%ESNG8" !+BOKG:*+7M\]\\ZZTIB@CGMXm���������j2.,++5Rky��tE06Pd]YSL5*,4Oktplorttuuqh`RGGO[ft�������������������������������������������������������������������������������������������������������������������������������������������������������������������������rppqoqtrrqqtsrusrpmqtsompsrropqrlnrqokourjmpproopsolosrqsutqnorrmloqoopnkkonnjlomkfUIHEDFEEHIKPMOMNONMJKQ[jjeghiifkklljjhdfiihfilhghlljhkliehllhilnlhgjkllnojknokimojhikiiknmifligekighgfdfgihdbabfjmlifeee`\\^\YWZaddb`a^_ejlrvvursuuwxz����z}�������Ϯ���PF���~~������������jZH3@L`hkoorw|���������M8����������������������}yp^[\?630*(&#! & &Cm�����ý�Vk��{st����xn������m]UHWu���������������ȿ�sDEPVXXXWYZ\^^]\__Z_^\ZW[aefghjiikkilou}���������˻IY�u8�ߜ�[-BLT[k�������]\^�̻���������ü�}m���sy����������ɜ��������������������������������˺����ȵ{l{���������Ҵ��{����������������m'$>2#.$(+5>=.#$$ !!4,! +[���yxzuwyxtiB:���pbOQO>I��UkxsppnllaTPZ_`\ZUZ[\[TXXPPSW\_^YUZdheXPMMQTUSGFHGHGJJID@7)$9;21/2Xw�zl4+0.'!0Tn������gg���������������������������������ztqmeO'3FE?:/2Qfw����u����Ɋe`[XX\__^_`_`eh�����������������ehea]YWRNFIg���������;��������ù���e1865:BM=!!'#';[llquurmhecebbflo_TQPQQR[kx~��|mSC@@ABA70$3^skbYE-8Sjnliabl����������������������zH(EULF6"# *?NJA7*+<Q``]YYTRK=:>GOZq���������b0+)+-7Vkz���qC.3B>31/,))+5Qptnmqstrstph^RGHMZet������������������������������������������������������������������������������������������������ɽ�����������������������������������������������������������������������tsrnoqrrpostrpprqmknpnklnpppmoqolnrpmnpsrlorqmortqnprrppuvsqorplknoomnqnhinmlkmkhfYNICDDEGIJMMNMKKKNNJJS^hjgfggjiikjhfehdaddggfjkhgkmjiimlkilmljkkkjjljimnkfjomijlmlffhjgilnkiifegikjhiiedgfffdgjghjklhdeefb[Z[\ZYY\`bcca^\bfjouvuustvxx{���~y~�����������qKW|�~��������������[Y?&&$%+<M\dmouw������x8N�������������||�������}xkV[O:5/)$"!-Ru�����ú�Q}��zslquupht������bXTOW����������������ŷ�W,9GUZ[[\]]^___b`aaaa][Y^`fihkmnpoqoopqw���������˯=b����t�G1?OZh������bY`f�Ш������������sm���n���������������������������������������������̯����Ʈmm|������|���ƫ��{����������������h& !3?.%)1@UqU'+-30)#"""'9piT_su���~{������xkD<���p\ILF3O��urqmlmllke_X_a\[ZXYZQLT\YSSVZfg`WWakg_XTKIQSNKGGEHIHGGHD?6(#<>101=z���n.+1-% 3Sl������kl����������¿���������������������xsqniQ483011.3Riy���~x����Ȃ_`[VW\^]\[^``ee����������������}dfca\XTQLFLe���������ͼ��������·���^/763:EC-!& (<`nlsvuqlgcccacgllfVQONPSYlx|�{hQ=B@;971* 2`phaYD(7b{t|nbVm����������������������wE(EULF6"*BKFC7(/>Taa_[ZZUH>=DGN[q���������\.,*+1;Um|���i=,/51+)($%',7Sntpoqqqsttoe[PEIOYdu�������������������������������������������������������������������������������������������������Ⱥ����������������������������������������������������������������������tqoonpppompspnqrpnjlopnmnsqnoqnimprontsqopsrpostpmksunmpsrooopmikmomklnkgjllllkmaOJFBFHFFJHKLKMJLMKLKGM^fjgdbiiheimjhgliedfgghjjegkpmiiolijlmkijkjhkllkjlojfmokkklljikjkjnnmjhiefhjhdhjhbdhc`adhifgjkjgfgfc][\\Z[]\]^_`cda_aejoquuuvvuwy|���~|������������il�����������������~ZV/$%"!#&')29M[k}�����g(m��������������}~~����vqfTZF93/$ ""7]}�������r]��woy�~vyvvpn���j\XSOd����������ź����±�2",=RV\^_^^abbbcccaccba]Y^ejlnpqrrsttuty~��������͙1_����n�{".@PZx��Ы�l]V\]d�מ����������׾�����k����������â~��������������Ǽ��Ⱦ������������ǧ����Ģlr}�������|��漜��z����������������j%!!Vt�������L')-+'%" ""E������m�����������{j<;zt^H8,,('M��wrmgfhgijke^`b_YWZUQKQ^_X\UW_eh^T]ilg_VTLERTODFHIFBFFIGF@6&#;:.21;����W*+.,$ 4Sr������wt���������������������������������~||wtP7464*-+/Snz���|�����~ca]YT]a\YZ]aack����������������}efb^[WSRNDKj���������ʻ�������������\0731442&!&"! &BclltvtrlhffedcgoqdUNNMOS]mv|��ygMBE=9032.(8esj^U?,;XcqrYT]p����������������������v?)GSIA3 "-AHIG4'+<Tc`^^[YUMHFIJP\u���������U.,,.,;Wn|���e<-13/))(%'(,;Tmrmloonqttof[PDGOWdt��������������������������������������������������������������������������������������������������ǽ���������������������������������������������������������������������qponqqroppvtpqsropqopponnmkqpomkqrsnpuqjjqplhkqojjornmoqrpnpqmgdhlokknlegmmkjjf\KDDCCECFGGIKKJMMKKGGNU_cegecijghinkghhkgeeffdflh`elkfeikhgfkjhhlljiiliklnljjmkkklkhjkkiilnjhhifdfhhfeghedefebcfjghgiifdeec`YXZYY[]]]\[\`ddcbchossrrtvtx|������������������}������������������m]O!       !!$%,7Mfq~o=0����������������������xp\ZfD72;7/)%$&Af��������_o��~vt������{unmjca^VQUq���������������ɾ�r+!".@LXabcacddeffhgffgf`YZainptuxywxyzxz~��������˄!A���WM_(-ASXf�Ũ~���_Y_m�Д���������������ə]{����������������Ǽ��������ĺ�͡�������������Ǻ���˿�ft}�������z��㷧��t����������������a$!!/��������c/*,,'''#"#%!T�����}Q�����������zc8(<6-! #"#V��ohdea`adjlhdcd]XWTSPU^`_VYUVcieYWgmkg]XRFGRSMDHLJEEKOMKG?5#%892313g���f),-*#8Xt������nu��������������þ��������������������uK39>0*-%.Rlz���{����xba]YY^`^[Y\abbj���������½�����z`gc_ZXUTNGKl���������˺�������������S.841441,#"&  '?ckmsvspkgfeeecekfYSONNPS^mw�ynb`gZXZXZXO=;drgZV<*8SfumN]ds����������������������o:*HLGB4" 0CHIF4',@Sa\Z[ZYWQQNMOU_y���������N,++-.?Zq~���e3,02+('&%%(-;WnomkoqqtqpmbYMBDNXfu���������������������������������������������������������������������������������������������������ƺ��������������������������������������������������������������}�����rqoprqnnrtpnpqspjmonlooomlnupljmommnqtrnmpnllpomkrrplmqrppqpojjlokllllifinnmgcZLFCBDDEDGFIIKKJNMGFGM]aekkffijhfhlmhfkcaehieccgjgfkiefikmjhhjhijmkkjjkijmkjknkijhjjhiiihhjjhghgebelghijhcdhgdeffecjhhgcdedb`\XXXYZ[^_^\[]_ccceilqrsstwwy}������������������������������������dZH  !""$*0;7 C���������������������~zvm[h^<?KXJFA80'#*Mn��������`����yx�������~zpd_\TOW��������޾�����Ż�a.$&>OYafihhggihhkhgiieb^^emrvyyz~|~~~����������n%f���}yrP.@Q_�����Ψ_Z^|�ɐ���������������ʏ\����������´���������������øм�������������������ʾ�_z�������s��߷���u����������������\" "3��XI;+&1 -/-*+,+)*,'Q�����mj�����������zf;)43'(]��bfecbZW^ehfdaaZUQPRY[]^VUYV[ad_\dllki^XQJOUPHDJKJIIMROMH>5$';8230L����2#+-)" 7Wx�����ox������������������������������������~tL$&'"$&'2Wn|���|{����vca^ZZ^a][[]_\al����������������|ijd`XWVTPEJo���������ʵ�������������R683.4:?:#"% !!"*Eajntvsolheccbba`\VSNMMNU_nx��~}����������Y!>ivgaT6(:Sd|odcbu����������������������l7.IKKE2!! 2BOK?2(.@U^WQSTTTRTQOOTcy���������E)+*+1;Zr~��f1--*''&%&&)-:Vmolklpoorsn`WMDFNXfv����������������������������������������������������������������������������������������������������Ż������������������������������������������������������������������rmmppomosrmnqspmklqnmqonlorsmkjpommqromppomosqnjnsrkknpnlpqokhimjghlojihjjjheVE=CBDCEFGHHLJGJKHFHKU]bchjfbglfbekjhfgideehhdceiiemljehmkhijjihiljjjnlhgjjghlkjkijjghhjhikkiiigedfgighjhdbdeb_gieffjggebded^Z[XWXY[]^^_^]]^bfggiknnruvxyy}������������������������������������ZV7"   !!R�xs�����������������~zytgWiO:CXcSTRPH?:1-(!3Yu�������nt�����zwvz}���ujd`ZRPa����������ο����´�R6) )=U^gijkklkkllmmllmjdacjqx}����������������ϺTR�������Y"4IYb������ԫWXa�Ͼ����������������Ʌr����������Ӳ���������������¿Ƚ�������������������Ⱦ�cw�������|w����ķ�}����������������\!!*J)$&!0/-1996770* 2Qz���}y����������u_8(82&$^��\gcg^\ZaedidbaYOHCHQPTVUZVX]_c`cijkibYWRRWWODEFIJKKMPNOJ>4"+?:330>j��zI**-)#;\y�����|lx������������������������������������vS""!"*-,7\n|���{x����oba\YZ^`^Z_`_bfq����������������wfifb[ZYTOEQq���������ȸ�������������M07127DQ5!$ $  *Hennttpnkhfddba^\WTQLLLOVcnw~������ľ������S ?mpg^S7&9YnwOSTWx����������������������g2.JOMB.!!1FMJ?/).BUZMHJLKPPNMJLScz��������|K1,*+1?^v���}d3+*(%%%%(*+-=^oomikonnqri^VJGJQZgv�����������������������������������������������������������������������������������������������������ķ������������������������������������������������������������������qkmonlkprmmorplmnmnqsommjqrollrtnlqrqllqqnjlpmkmqqojjlmklnpokhlkfhlmlkijihgaREABCBBDFHFHFEFGFFFDHZgf^bgifdijcfkniffhigjlkecfhifgjgefkki`gnjhhkkiimljikihflnjjkkkjiljhgkkihikgbeihgfiigbdjdbfkgdfhigeb`bed^ZZZYXZ^^^]\``^^aeghikllquuwy{������������������������������������wYM("#%-61#Twfjodlz������������~|zyq_[eC9GYR:@EJNPPKD<+$?f�������b�����������|xytuldc^WSSq����������ɼ���ɻ�oG@4(!)?Tajnqqqqopqqrqqrmjf^doy����������������ϯ=6r{{���j%-@L\m�ۿ���Ԥ[Y_�ܨ���������������ܿ�����������̥��������������˾��ƺ�������������Ŀ���ƶ}j{�������zz�����������������������W  .8!*;a?%60.:C:563+#"Q�����mp�����������z_1+62&'a��`fgibbbc`hid_bZOH?BAEMUXWU_^agglnnmcYVXSX_ZOBHIKMLJLKKMJB2 ,><22/@���wD,+,) !>[z������f�������������������������������������~Q#-68>;09[p}���x~����pb_[YZ^_[WZZ^^ct����������������rkjdbZXVUPDUs���������ǵ��������º���J47237FG1# !!,Kjmpuurpljheca`][XTPNKKLRbmu|~||����������z<Cnpf\O2':LbU?EKX{����������������������h02LQMC-!!2IMH@/'0CUVIBABEFEFGFIQa{���������]5,,-2Hfx���|\5))()(&''))->[lljjkmooqojaXJFIOYhv������������������������������������������������������������������������������������������������������Ŷ�����������������������������������������������������������}�����tnrnlnotomorrmkopkjmooklnrqkloqnknrqojlmnminolmoponnikjkppnkjlnidilkjkjjifZJDB@DBCEFGEDGDCFFEEEJWcedcfhhgkkgbilkgeklfhijd`bfdeeehhehljgglnfijmhgnlkijhghimlgdijjhfijccmkigijebhigfhhfdcdebbfebdgfdedb`dddb_]\ZY]_][[[[^```dfijkkpuvwux~�������������������������������������eXG###7k���],'i}vxwodXUbfs|}������~yxxlZ^]<9KW>#"#-<AMUM2,Hj�������~i���������������|uogaZUV���������������ŷ�[GE<0&",BVgqqqtvuutuvuvvqqlh`it~����������������Ҟ0J��}uosm,5@N]z������ޅU\g�כ�̝������������ײx����������ٿ����������������������������������ǫ���Ưzn}�������wz��ݻ��~}����������������M "'Llefr~���A.4/,1.-,,)$" h����SO`�����������xX/.7/" 'm��`hhedcd^bhe^TZVNG<DBHW]WKY\\`binjkhQPWYY_b^TELORSNOKJILKB5 .E<42/Fmg`vS,+-'$@^~������c|��������������������������������������N0?DDC728[n~���s~���ܮma_YVZ_`[ZYZ\^bt����������������njjd_[XUTNDWu���������Ƕ�������˿����B55/07DH/!$ #/Kemmswsoligdba_^[XRNLJJLP_ksyzxu}��������h>!Hoog[N/(8EJKIDQ\}����������������������c-5PRMB-"!4HKH>.'2FXUE;:>?=<=@DISc����������`3,++2Efv��}Z1*+,,**+,+'/A^lklkljmopoi^XIEHKYiw�������������������������������������������������������������������������������������������������������ĵ������������~�������������������������������������������������uppomnpolnoplintqmkmnmjlprmimppklotpopponmnsqnoopommlikmqmiikmifhjlkhiigd[HACCCBADEEFFDDCCBDDFMZggdeigfdgihccjjieeihfgiheceffghjjhjkljijkheikjfhlihhjidehnhfhlkggkjhilkhhiifcgifdeffddcdcceecafecba`abdba_^__\[^__][[\\``acgkmnptvuvvy}������~������������������������������`V? $D�ν��yQ!H�~s~xrsobgXaqlwtwx{wtvdWcM39QS6#!'7ML%4Wu�������ox������������������zqlad�������������������RJFB:.#$0Ibrvyz{{{{zzyzzyyvqiiq{����������������ύ3��������*/CO_��������nU_s�׎���o�������ƾ����m����������ױ���������������Ƽ��þ����¾������ж����Ǫpp~�������u����Ȼ��{����������������O! 7��������c*(/*(('(*.*)%!j��mB//b�����������v[1.60"!-w�{fgiidf`^jhaQOUNPIFIQ][VUY][Z\gjgfdWOUWX[acaXPLPV\UPJHFIFB1/E9330:?Qu�J)*,' %E`�������jx����Ŀ�������������������������������|G-.015//7^o����r����۪k^_ZY[`^[YZ\\]fy����������������oiib`\XWSICVw���������ŵ�������ɾ����;05127@F2#$"-Nhlntvrnlifdcb^\YVRMJJKNS^lrvvumblrywn^TF6!"Mknf^L,(8CKPS[fm�����������������������\'8TVMB(!"8ILF<-'4EXT@::=>;;=>BJTe����������T/,--4Hhw���W0+-24238=1)/B`lklkmmooprh^SIEHO_ky��������������������������������������������������������������������������������������������������������ĵ��������~}�������������������������������������������������vqkllmnnmommmptqmlmoolinqlijprnkoqqnpqnmlmmmlnpoljmmifilomhilkgdgijjhfdbVH<?DE@ACEDABECABBCFHM[fkb`bffdehgddgihffhjhjjjhfgffghjiggkiggikhdfiiiiiljghjgdhkjefijefkidglmihihc`dghacffcbbccbceddehdab_`_bb`]XYY\^][]^]][Z\_abbdhjosuuuvwy~������������������������������������y\G*/{�;-\�nU("7|��jhtww����ucpNPfORX`ehiVQZ>36QO6"!8S?">ez�������c���������������������xrv���������Կ����ʾ�nRQJHB8+!&9Wp~����}�~~~~~|ytnlu�����������������u"44>j���W/FS]|�����ܝWY[}�Љ���~������ϧ�����f����������̣���������������û�Ʀ�������������Ȱ����¡ou~�������s����Ŵ�{|����������������O >���tbM<C3"-*&$'0730.%+g���upku����������~sW-.4.!#,t�qddhieabjjbTIJMYYRQTWRPS[^\XUXb`]`ZQPRUZ`fddYPMQW^YPMLHFCA.0>401/7S|��qP,-'$Ca�������Z{���»��������������������������������{M!167<7.=_s����s����٥h]\YX[^^ZWWY\`bu����������������mihba\XWQJCWv���������ŷ�������ʿ���~>5711:ED2!$!,Qflnttpmigedb`^\ZVQNKIKNT^ksvwsiPBOUMBD:4/"#Qsqi_I+*8GNUi}oq�����������������������Y#:RSJ<%$8IID:-'3I[T=9<<?==??DLTh���������{5*)*-6Jgz���|W+-379<?XD,).Danihillnqole^UEDGO]l{������������������������������������������������������������������������������������������������������������������}|}|~~����������������������������������������������soknomopqpmlnrrnlmnlklnroghlnmllooklonlkjkmlilpmhinmhfinnlmkkjgjliihgc^NC=>BA@CBEBBCCCFB@CEFKYdgd_adfeeghfggikhikmgijjhgfffdeihheegiefhjffikiiijjigjifhkmgeiihhijfbhjkhfgeccffdbceecbbcbcfdcfjjfca_`abb`ZUUWX^b``__^\Z]__bdehknquvvxz{����������y��������������������������h@*!<_GEbT$%G������tkinulRsmq|k[N97<D@7KJ51:LK='(IM/)Jm�������~l����������������������y{���������������Ǻ�eTUNKGA6*!",Iq���zwz~��������~zsin����������������Z05_�ϫX"2GV[n��Ǫ��iV]a��Ǉ����������Ώk{���`�����������������������������˵��������������ʵ������hv������q���׵��||����������������I 7Z2&",+*/8@611-!*u�����}~�����������uT,.70'!#-v�jbhhjgggi^UHDFPa`][WSPV\^]UV[^``\ZZVSUT_ffg]UNOSY]YSNKFCB>./;2221R�����F+,&%Ea������rU~��������������������������������������K1EFLG7-@[q����w����٥h`][[^_]XVWX[_dy����������������migc`ZYVOHCWv���������ĳ��������½��z235118DE.!# &3Njnouurnifcd`^_^ZVRNIGIKR_jquwqhTHPRKD<733+&Spme[D)*;GO^�nbk�����������������������T!:NKF:""9GHC7*(3I_U>9;=>>?ABEJUh���������Y-**+.5Mjz���{Q+,3;AMbN-(+2Hdmjhlklmopld[RBBGOYmz����������������������������������������������������������������������������������������������������������õ�������~{|}~}{~����������������������������������������{�����nmlpmjkprpjiloollkkiglmlhfkmnloqnkknqmlkmlklmnlhmnoiikllljlmhfjlihhfc\K?==?@==EDDA@BCBB@BFIQ\aiib^dhgfgkiffhgfdjji^eigdeegdebgfecjjgfgjihjiihkkjghijfbikgcghgfeghfeghggggdbddddghdbbddcbdecahhfca]_abba_[VUWY]acda`_]\]b`bhggjmrstvxy|����|����r���������������������������S,% F_Q(0cU"(Z��������~tk[Vfgutoeb]]E(&0L@2,1FLH5&%;[G!2Vu�������ry��������������������v����������������°�[ZUSPMG>4)! &7hy]DGT_jmx||������|tr|����������������7%Mj}��jN)(7JVd�����Ź�\Yb�װ�����������������vU�����������������������������ƴ��������������ƪ���Ƚ�iz�������k���ӯ��{z����������������I);R+ Ae&",*-8>4'&'!1x�����x�����������rP+191(!"2��h^hggffe\ONHEG`gc__\]``]XSX[^_`]Z[\[ZXZdffcWNMNOZ_XRMKGCB=,-;4226i���i3'*%'Ga������nU���������������������������������������H3CKHB:(@as����x����Ҟd^^\^`]ZXXYYZ[ez����������������iigc_ZXTNBDX{���������õ������������v7640/8F@'!#%7Tgkotspnjgcca_^\XUQKGGGJS_jptupeYShkeU=57?<&WrmcXA(+=HOha?Eg�����������������������Q =OKC8 $<IJF7)&4H]S>9:;=<?CEEGTj���������G,+**,6Sny���vN--18O_L5+'*4Kgmjjkjkmoqlb[QBAHR^kz�����������������������������������������������������������������������������������������������������������ó���|{}}{y{z|~������������������������������������x�����okgnlkkoplhilmpnkjjlnpnjhlnnoorqljnrqkjlnkilqnjionkhimnlkjliccjkgfde[H?=?@@==AFDBB@@BBCDFIU_bfhdcbghdehigdeifcfjjhfiifeiifdehgeeghiggkjhinjdfjkgbhhgfhjhd`ghfimggfiggefgb_cdcbcgfaddcb``cdbcedca^]`bba_[YXYYY\^bcaa^]^^a`cghjjlnptvy{~����|~������������������������������D)# FlfO3$W�=$3w������������whaWXXYXcX2&6Q>1+*<JOF?935F^T,$;^z�������l����������������������x������������������r]\ZXXRMG?6+$$*?:&#$*3;IUispou{|�yq|���������������m+U�������8->LUo�������{WZi�ڠ������������ĺ�¯su����������Ǫ��������������˿�ɽ�������������������ȷ�i{������{j���ά��z����������������F4}�,B�V)2,071(#""!'9a����z}����������}oQ-.4/$ 7��gaihfbcbUMKFDYdc]\`^``\WX[]]^_`_^`a^[YYbcc_XROIR]_[QOOGCA=.2<4431Mn���{K+(#'If������n\���������������������������������������A0RG@A2(Fcq����{����ԛd_[]]\\\[WW[\_d�����������������ihfc^[WTOED[}���������ž�������ƺ���p654019L;%"!$5Vjlousomkhedba^[YUQJHIGJQ_hnsrmd_j���iA57@5.]pjaY>(-<HT_D@Pl����������|�����������}N =MG@6$>MOG7&%6JZO:469::>A?BHTl���������K-+*+.9Rnz���rC-/7I\L50-*+7PiliiijkloojbYLBCIR^m|������������������������������������������������������������������������������������������������������������ö���}{{|{yyzzz|{|������������������������������������z����qmkmmmopojimnmlmlmmmppljimmmmppmjknonhhljjilpkijnlgijlmkijiegijigdbVE=:<BB<=BDBCA@BBBDEEIR^`ehfdbdedbehhffffffhihfhigffhgedffdcdghffgjighjhehhgffiifhjigdghdehjbdfigbdfda_bbcbefdeeebabcccddddb`]^``a`^\YZY\[\\]]_a^___``efhjjjlnrx{~�����}~������������������������������q5)#  $Y��|g^vJ&@����������������xjbSGG=-,)>N:.(#(>LLOKPXVO0*Gj�������}n����������������������xz���������������Ʒ�dc`^YZUTMH?4+&+5-$"!#$!$.0:HYitqr{���������������fTz������u'-?RZ^����NVYs�ٓ���������������έn�����������Ĥ��������������ɼ����������������Ѻ����Ʊ�n|�������yf���ʨ��z}���������������C!G�E6a*7:-.+%#!"!!&?�����^p����������|nJ(.5-";��mhgghgf`WOGN[gf^RW^^^[VX[YXZ][__cjh]WSU^aa_VQNNWZ\UNOKGB@<*;@3230e����l5*("+Je������q`����ĺ���������������������������������A9A9A>1-Deq����y����іf_\ZXXXZTQRYY_g���������¾������cgec`ZUPNFF^�������������������Ź���m052-18?:$"#  &5Xjmouspnkgeecb^[YTPLHFGJP]hnrqldbo���oH4170-[vm`T9&,=HOLEDOn���������}z�����������|I#>LDB6&AOPH7&%4NaL712524;=>BHVm���������O,+((-<Tp{���pB/0=Z>-/-+(+8Phlhghijmong_VJBCGP_p~�������������������������������������������������������������������������������������������������������������õ���z{|zvwwyxxxxy|�����������������������������������{�����spnlinpplijpollonkjkoljijnnnlmlkilmkikkjjloqnkjnmkjlmligjjhfijhdb`RB69:@A==BCCDBAAACBB>J\bfgjfffeeddeffghhhefjiieglheehfedeecbcfhhhggiffhjhhkgdfhgegiigdghfeffechjhcafeabdccedfeeffcbbceaacbbc_]Z[^]`a^]\][\\]^ZZ]``__`_acfhikllmqv{����~~�������������������������������b0-!%K��~t^8$"'V�������������������|to`VJ@HG5(!1P?6AEOL8)1Uu�������s~���������������������v����������ȿ�������hecba_YZVPJ?3+++'#  %.=GRht���pqzvv~|�����xW>W[`lxw9,AVZj����Ȼ�UY`}��~���������������͡l����������ڽ���������������ž����������������ʹ����ï}n}�������xc���Ȫ�������������������}? ;I0_l4!<8,+&$""! 1�����O:x����������~oG$2;+! =��sjiffeb^ZRXdehd[PT[ZVV[[VUVZ[Y\cdbaXSPT\a`\URORTUUMJKKHD?5$=@1233j���p6%+*&##+Mf������ng����ʿ��������������������������������|<=CALB0-Jgv����v����͐a_[XUWZXTTXYZ_f�����������������hkfda[VSLD@a����������ǽ�������Ĺ���k16523:E< !!(:Ylnpuspmjgdeb`^\WSOLIGHKS_iorqkbXj���tL.0911`qgbT5&-?IQRKCUu���������}|�����������sC%@JIF3&BSPH6'):LYH1*+-./68;@GWq���������[0))*.=Tp{���n@05A<,*,-+(*7Rijeejjkmnmg^XKABHPaq~������������������������������������������������������������������������Ŀ������������������������������������ŷ���|{xzxvywwxxy{����������������������������������y�����tqojionnkhlnnjlmmjkmonjjopnnoojkjnlkkqmjknplkjjmkiikmighligghid_XN?96<?<<:?DEDDBA@A>;<K`fdejjebdfdaefeadhgfbfihffikhagjhfhgfecghffijhfgihcfjhceigfcgfdefigcdefehmjddeiebefaadddbddca^acd`acbb`]\[Y]^^_``]\][Z[]YYZ\]_a```cfihjmnmrx|�������������������������������������M7.(P^RHtg(%3r��������������������~zzsZFQF1$#G���gRO7 "";`|�������l�����������������������w���������������ɾ�wkheifeb`^[UM@3,,$! 6Rb��hv�|yt{y���vNTw}}�N/FV]������П\Y]���~���������������Ŕq����������Ӯ������������������ʵ�������������ʯ����ĥpo}�����~tj���ƪ��}����������������{:7wRb�]4"9;,,((&##$% 8���Z5@]�����������|oF%8C0&#!#(,H��shmhfb`aaegjfgc[ONUWWWXWSRVZVT]\][XVRNSXYWVTRQQPNKEGKMHD=3"<=331/`���`/*++*(&0Me������m_����ʿ��������������������������������v899:;40,Mdv����y����ˈ__ZYVY\XUUYY\_j��������������´�bifc^XVTOEEe����������Ÿ�������Ĺ���e062/2>H: ""(;_klsvqonjged`^][WTNJHGHKRajpsqlaYj|�oT;8<*4epf_Q5#-BLSZ[Ody���������}�����������t=(CKKH1)ANNF3&*;QUG1'''(*046<FWt���������e6*)*.<Wn|���h=.176<8=92)+7Tghdfjkklnkc\THA?HR`o���������������������������������������������������������������������������������������������������������������÷���zyzyxxvuwwyy{z}��������������������������������|�����slkkimllkllmmmnnllpqpljnnnkmrqlkkligkojghnnidfjlkgfkhddhkjhfed`WI?:9<>=9:>BBCEBCA>;8;Meeccdheabffb^cfdefhefghhfehjjfdiifhjhddgjddgjgceijhcghfbhhgghhdcegfeddceehhebcfgccee_^bdccceca`acbaeccc_]\\[]^_bb`\\\\\[\[Z[[\_db`bdfjigjloty~�������������������������������������v?6* &e~���I (@���������������������zyrUPT?/&N�ǹ����P`5'Gl�������zq����������������������{z���������������ķ�nacilkjjikfc\O>2-# )>`��|�����������s]Hr������H7JUm��˗��ݚQ[\�ѷ������������������r����������̢��������������̼�ʵ��������������ɱ����ğmmxv����|qi���ǧ��w����������������y7!L^I�7I"<;,/13/,*-,$6���q~�|�����������{oC:hv[E75789O��sljghcgkkkhfhgbVLPUYZSRPSZ[YSU]\\XWXTQROOQRSTQOMKJGLPLHD;0 8:243;k��e61...,%3Oh������hb����ɽ��������������������������������q:2,32201Oky����y����ǃ`_YYVZ\XVTXZ^`k��������������Ĵ�ejea\XUSNFGe���������ν��������¹���c073/1<C3!'>`lkqtpnnigfc`^][WUPJGEGJPbjpsskddqzqqj[GF:$8cmd\M2$/=GWhgjr{���������������������r=)FPJ@-(>KI@0#*:RUE/&%$$'+/29ERs���������d4)(+0?\r~���e7/2>PQTOG4*,:Wkjfehiihjhc]SD@BJTbq�������������������������������������������������������������������������Ľ�������������������������������������Ż���}wwuwvsxtwvwvy}�������������������������������z�����riikjmlllnkklqnlloomjjiljfflnijlkffgmlgghlihegkkiiiigdghgfffe^QE?<;>=<:9>BA@ACCA=97<Q`ddcdghc`egebeghdhihedkieefjhggjhfeif`bfhidehidcdhhefhfdehdegjeacdfd`ae^efeeccccaaded``dfbcde``baaadca`a]ZZ\\]_`ba`]]]\\Z[ZZ[WW]abcdfghhjjkqu{�������������������������������������c49$#(d���b!!-S���������������������zxnIRQ5<~����������*/Pq�������g�����������������������u�������������������UEDOX[bimoqokaPA- #IRA:/"#3m����Ƿ��������pP-06[x��{-$:PVu�ޥW��܆O[a�֯�ĕr�����������Ƹ�n���������˻���������������ʻ�Ƭ��������������ĩ���ǿ�_KXnu|tx�|ok���ī��x����������������v2! AO��NP3!86.4<>9322-!F������u�����������}oKj��vZJFG<4T��rmf\^^_fijjdheaWOMRTMLLPY]\VOS]`_XZZWTMKOQQQOMLJKQOMLIFB9/ =9/64?MRh�c;51/1-%5Tm�����{hj����ȿ��������������������������������i.-468:32Qkx���zv����ā^^XVUXYVPPTZ\_n��������������°�fjda\XVRMGMd���������̼��������º���_+52..5:& $A_llpurmljfeda^\[WSNJHEFJTbkqtsnhmzuNPaOLM:%;inaZM/%1>EMchgdw���������������������o7*HMHC/$8?>1$ ):RYB.&$$#$'*.3?Qu���������N.+*+0?_r���d3.6EMNKE:,*.;Yifbdghgghgb[RB>CGQbp~�����������������������������������������������������������������������������������������������������������������Ż���uvuvuouuvtvwy{|����������������������������������rijnjhimnlggmpnklmllhjkkjimojjkokfinliglnkgiiiihilkeegihfeec[MD>;=?<:<;<@B@ABC>987ARddfdehihbdhdbcffdcijgefkgefggifhhgehhd`dhfeegjgehifcafhdbggdcihebcffdcdfeiggededebcedcdfbaaabb^bba^bdaba^ZZZZ[[^ab_\]]\Z[ZZX[][[_bdefgefjkllqw|�������u������������������������������V88 '_����t$$.h���������������������~zucHUG@�����yx�wjv5"9^y�������e����������������������v���������������Ƚ�k(#%'*-<J\nwxvmfX3 2g�����n2#<z������������kE($#En{�� &=QZ��ܭ|���nU[k�ᡛ��q�������������pi��������������������������Ǻ�Ź��������������ĭ���Ƽ�NMdmnmiy�{jn���°��w����������������w3">q�Q10!;7.5;?3-,,'J�����|l�����������wiQu��dRE?;0+W��njf^YWU[afheea]WOIUWVVRV[^\QKUeec``_aVKQQQPONMKHOPLNLID@;0$A817:BLRws?54313.&5Qq�����~_p��������������������������������������_-8A??<13Phy���u{����{^\WRTVVWSSYY[`s����������������~cjfa\YUQLBIg�������������������ľ���]161/.0, (Bbnmrvsnljgdca_][XRLJHFGKVcksvsolv�q;6?BLG<*<gl]YH.&2AHLVZY`z���������~������������f3/KMGA,%*'  *@RWC/'$ !!$&-4AZx��������r8.+*+1?bt����a1,188=<;80*.=[ifeegihiig`XOB=AGPan~���������������������������������������������������������������������������Ľ�������������������������������������ǽ���}uttrstrtvwwyy{}����������������������������{�����pijkhgklmhfjmokljlkklmkghlnmejnlgjknifekkhcgjidehie]cgih`abXD?>9;?<87;=>BA@@;845;G\bjgabefdcchg``efbcehieehfggghihhihhfhgcbec_adhgbehgddggbdficeeigccghedfhgehffddec`cccba]aa``baa`bab_ccaac^XZ[[Z[^b_[\]\]\\\^\\\\]_acfggghiknosw~�������v�����������������������������}C6-#E|y��e%4u��������wv�����������}xqWJY>P�et�κ��y4(Eh�������{o����������������������|z���������������Ĺ�A#&;Yb_^mxk4#I���ƿ��,&G��������������]9%0-F{��P*CS]�������^WXw�א������������sv���Xi��������������Ż����������Ż��ſ������������ϼ����ƶ�J`polb_}�ygq���®��s����������������r.#".,$>8-11-%!#"!=n�|��zv����������ve@7JK@6,# $*_��gia]WSMU^bcda^[VJTa\\WT\]\THIWolfgiiaTPURQQMJLIJMIHNKHEB=3#;94<CIJIL@456676/&4Ws������cl����˿��������������������������������L,430.01;Vhx���uy����u][WRSVWVVYXXZ`n����������������zekga\XUSMBNh���������̼���������ü��U071/./) *Cdolqtplligeda`\YUPMJGEFMWcksuqmp~�uI,./?KH&<ekaWE+#5CKSUT]a}����������������������d/1JPH?+.AQTB.'"""#'+2A\z��������X*++,/5Fdv���~[/+1<IJMN@4*-@Zgd``fhhihg`VLA=AJS`o������������������������������������������������������������������������������������������������������������������������rsrrrrqstuvux{}���������������������������z����~rmnlhinnmhinnminllijlkhegmjhgloidhjifehkhfehgeffhhdefjjfa_Q>>=<;<:789=@A@@>9445=L^beha[affb`gjd`ehgcggfffeiedgggddggddfifacdacbdeecdgfdehhbeihefffcbcfeacedcchfbceda^dcca_^``bcc`aa`aded_^`b[VXY[\^aa][\]^^]^\]\\[\]^`aehghjkknpty~�������y�����������������������������m2," 5f�w��U'I���������]]jjrvy~xz��|xlIML8X{x�՘���z,.Qq�������n�����������������������{������������������~-#5MT/.RgT$ '\����k�|*S��������l���÷vX5Cdhp���A0HS[~�����؈NX\������������Ȼ�yy���c����������տ���������������������������������̼����űzPampg_h~�yav��޾���v����������������q*"'@8,-*&$! ! /Z�����u����������~uf9%293)"(f��iieZVUS[`defba]QS_a_[VY^[VSPK`mmmljj_TSWTOOLMROMJKNQMGEB:/%689IR[OC?:78:;<82'8Zx�����wiu�������������������������������������y="#$-0/:Vlx���sw����oYYVSTVUXZYW[[`l����������������wfke`[ZWSNDPk���������ʽ���������Ļ��S-52../*"" +Hdmnsurnmigecb`[XTQNJFEDJWahprpmo���^<86ESG$BknbYE*%5CNWai[^�����������������������c+.GNG=),CTUA.'""#"#&*3?\|��������X0-/02=Tg{����Y0,:LUPPK:+)-@Ygb`_dfdhhe]UK@=CKTbp���������������������������������������������������������������������������������������������������������������������������spqqqqpssrswzz|�������������������������y����rpojejnlhgknjfjmmkgjkifhijhfkmjghjjfegkhfeiifghifdehhje`\N=?>9:==85:>>?>?<5145CT_cefc^`cfdceha_bgfcekg`begeddeedcgigcfjiebfcabdfeeffffdgfdadf^_dcb`cddcacdceee``ddb`bbbabab__bca__a__aba\]a`XWV[[\_ba[XXY[[[]\ZZ\[\]\\_dijkklmnqu{~������}~�����������������������������V/'D�����&".X�����������yeXV]W\w��}{wb=JB.]��Ѝd���~ 7\x�������j����������������������}u���������������Ǽ�\ "?XN!-?.!-r�´IC�d/d�������hz���ȩqN@|������;5NSYd������PTX]���}����������ж�����i����������չ��������������˿��˺�������������˴����ëw`bikebs�}v`|��ۼ���r����������������i' " !/=+E6,,*&"!! Q�´��k^����������~uc9%28/% ,h��plhc\VWY]cfgb[NU`\Z^XRUWSVZTVbjhhiieTSRSSQOLT\SKKMUUNDA?8.&58AZjq_G?:9=@>=7.%<Uv���cnsg}�����¾������������������������������s1+25;91>Sjv���xx����lYXVSRTVYWWXYY]j����������������xdiea[\TSLEOm���������ʺ���������ú��N/52./00- !,Geinuuqomigeca]YVSQMHEDEIV_fmnjjl���eMHHO[E#GjmaWC)'6DP`neOb����������������������a)1ILC:'/CTT>,(&%# "(3?Z��������a;46;@P_n}���}T.(021//-+()0B\fc_bfffifcYRJ<>BJTcr������������������������������������������������������������������������������½��������������������������������������º���vopqpprpqruwww{~������������������������w�����tolihjjhehlkjiooljjmkggkjhejpnigmmgdejieeflg`cfgcejlhdcZJ?>=<:;<:66;>><=;616<G_i`dfca`cdd_`dd^`ded^bgbbfghggeeddgihfhhgecfecabcdcdgfccedbbadcdefddfgddcecaddedacda^`ec`_ca]\abb_]_`^_a`a`a`ZUXWYZ]cca[WXXZZ[\\[[[Z\^_^_dhhkmlmnqv|�����������z��������������������������F) "':llsp"0h��������������}qgo���zxsS?M;-2i��U����L%Ch�������|t���������������������yz���������������õ�A "5;JSG&+"2���zm��G<o������pn����ɝjGAbb_w��W!7MZcVRT_`_UVZXe�ֳ���������������ɼ|y����������Ϫ��������������˺�ɯ��������������Ȱ������fYScgiku}|s\���۽���t����������������e$ %DFCLQ]q�b.B9...)#"#!!]���XSPo����������t\5&380$ +l��suqlbXY[ZW_b[KMc\XY`QNUTQ[\Y\bea_]\TOQPROKJM\]SOVYXSKCA>9*'88Eb||oUAACCA>>7,'=Yz��z^o�e������������h����|q�������`k����������o))?C?>6,>Xl{���uy����gYYVTXYXUSRTW[_l����������������ufjdc]ZURJCSo���������Ŷ���������ù��J151039C," ,Kjkottqolhfdc_^ZWTQLGDCFKS_fkkifh��z\VTJTX=Okk`UC))7ELcdRLa����������������������{Z&5KLB8$0DXW=,''%""(2A]���������v^ST\dift���{M*((&%(('&&(1E]fa^ceeghgaXRJ?>CJTaq�����������������������������������������������������ſ�������������������������ÿ��������������������������������������ü����rpqqnnorstsuxz|����������������������y�����rlijkjhgjlkhjmpoiinkhgjnjgfkmighkhcefiedfgge`hecdikhb^UC8>?;:<;86459=<:736:CN^bY]fd^[^`a``cdcbegdddghbgifddgfdcfjhfdjhdbdecb`cbabefcceedcdcddfggeeihccddc_ccaa`bb__`aa_`a^Z^b`_]^___dbab`a\YUUWYX`ed^[XWX[YY[Z[\ZX]_^_abdfkolkpty|����������k��������������������������p5%;\�����M%Ay����������������qy���{vmLMM5-%%c�����~""-Pq�������q����������������������w������������������|-$@sqlfa@ # >�������.%Et�����~S]����Æb9EWA #$(&=]zxh\TSWXVUWZh�٠���������������Ҷq�����������ǘ��������������ʸ�����������������Ǭ���ǿ�VIcxromt||rW���ܺ���v����������������g$"P{������v,0B7-1/+%!!!#Z��sfv|����������}w]2)581'".o��usrm^Z[YOQZ\ULZ`STYZQNMOZ]]\_c_VRVUUURROCIJT^^USUYXTJDB?8*'45FYy�|lRJMGA?>5*(>[{������_�����������i\{zpmfdx�~anwkSk����������]$)32.//,@]l}���vy���ܰcVZXUYXXSOPSX[du����������������ofiea[XUPJCSl���������Ĺ���������ø��D37/-4<:%! !.Kkjouuqnjgfdc^]YVSPLGDBCIR^ejhfbl~�lZSHCKA2Qqj]U?');ELY`]Zf����������������������{U7IKE9! 1FVT:)&#"!"(2E_����������~vx}}mgw����xK*&&&%'(&%'(0E_e`^bccdfe`WSG=>CLV_r����������������������������������������������������¾���������������������������¼�������������������������������������½����qlpoonpqrrrrux{{��������������������v�����oiijkgffhihfikjielmhfilmgfhkjeegje`ihhfhggdfgidbejhd_U?<=>;9;=:53277:9647>DP`cb\`dea^`aabefdccfeceiiediedbaffcbefccfjgcaddc_aee^dgebeifedddacedcccecabcba\ca_b^b_bcbbbab```ca^]^_`_ad_^_`_[VTTVY\adea\XXXXWY\ZZ\ZY]_^_addhjopnrty}����}zzuv��������������������������^*! Nw�����g"-Z�������������vqlWX~��~zufEQI/)$A�­���oU[6]{�������l����������������������u���������������Ǻ�g=2C����rO%*.+_o����M +P�����e,X�����zZ9MT;(&++Dk����rdZWTWVZu�ڑ���������������өp����������Ҽ���������������ɻ�µ��������������ĥ���Ż�O\o{{qar|yn^���ۻ���y����������������b (x�}uoO1(/A0*,+&!! !#d������x����������}vZ0&982&".m��rtvhW\c^VVXTN[ZWUZ`UMIJS\Z[\_dc]]\]YSPRLFMPXXYRRUXWPKID<7)+68EVi��~qh[PA?<4(&?Z������xa��������ÿ�jfrvd^]av�tTYidd|���������m9%#135<50BYm|��~v}���٪eXXUTVYXSRTTV[dv����������������nghda[YUPHEUt���������ĵ���������¸��=36.-4;8.1Pgipusomjhedc_^YTSPLFDBCJT_glligr��y]D7<B@7%Tog\T;&,:EQazdSg��������������������zR<NRK;"3HXT4'$! !&0Ea��������������}lkx����uC,'&&(+,'&%&0E_da\abdgfe`XSG<=BKYbr����������������������������������������������������������������������������������¼�������������������������������������������yqnooppqqqrssvw|�������������������{�����qhhjihedhhddhiggjmjfilllfgjhgehjhacigghfdcbdgfbaee_ZN=7;;:79;970/2556359?FUcic_begc_acbbfhe`_becbbfgbbfdccaddceegddijfdegd``addcefdafhdbdcbaeedcedb_bbbc_bdbbdcc_ceda_ca]^\b`]]`a^]]]]__^]ZUSSTY_cccbb^YXYXYZYYZY[^^_^_dfiilpsuxz}����|z}}|~~}������������������������M'")m������p%&5p��������ka~��j\S]|���~ysVBT<,%<bv������M$Bg�������wv����������������������{w���������������Ķ�]N,a����[@ %=/ 5UXb_XQ("2[������rew���ŸvU10>*!'J|Q1EZw������qUYYYy�ς���������������͚b���������ü����������������ķ�����������������¨���ź�QZefe\b{}wh_���ݴ���z����������������a!#@=apkN#+>/)&%#!!!$k�����a`����������|tW2*88-"!0u��roiY[cdaXONQRURMS\\SJGLVVSTX^hlca^\WOONJKOQRRQLNPSROOJA95%-7:ITVl���{hVC@<3'(AY�����^{����������ch|r[XX_nyaKR`ew���������gB.'-AABB3+A\oz��rr}���֤bWUQPVZYTTTWWYcw����������������lgid`[XUOHGUv��������ο�������������736015AC.!!3Sikousnmkiebb_\YSQMIEAABKXdlppnmu��nE32;EQ8&Vmj`V:%-<GSgdKKk����������}����������wO=STI7#2JWO5(%"!'1Fc��������������}nkx����t?*%&',21%%$$/Hbb_Y^abefd]XQD<<BJVbq���������������������������������������������������������������������������������������������������������������������������������nmpnoqpqsrssvw|}������������������y�����rkkljiehihhhiiginkidjmjedhjccehgc\dffeggecadfea^`]WG;69<978::530.03226=?D[jlgb\aeb``ccabgd]_bcdb_dfgbdhfdfffdceeefiigdfehdacaabecb`efbcccccdfdddfca_cb`a^caa```_`aca^\`\Z^`a^^``a\]^]_``__[VTUUZ_ddcaba]Z[ZZYWWXY[]]_addgihkotwz|����|y{}}~~������������������������y8'&",:itn��h )?��������~����|~������~vjJFM/'#%E���x{w�M+Mr�������r�����������������������w��������������ʿ��UG#S���YQ<*8"#A_kpg^O-#6a������������ƧsQ+2PN[���:7LNTs������u]V[���w���������������ǍW���������������������������������������������ɶ����Ĵ�`ed^X`s}vee���ٷ���y����������������X <���~N)2>/(##" &s���^:8_����������}uX/+75(#4��qhYT[`ebYXWUSQMIT[\SNHLTJLRX^gkhdd]VRNMPRQSRNMJFJROORH>80!.9<KRVc���sf\FB=3('E^������y=d���»����pR\f[LOSYa]OGT`k�������yl\?62(-<DD=2,F_r}��xm����֟\USOMSXXTQRWWYd|����������������khgd_[YTOHG[w�������������������ȿ��y965//6ED*! #4Vjmputpnkhdbb_]ZRPNHC?BDM[hnqrpls�~\MLEFID))Zpog[:%.;EQXACWo����������}����������tH3GF>,#)5IUP5+'$"! #)6Je��������������~lk}����r=(&%'34)##"!/Lda^\_`acb_\WOA<?CISar����������������������������������������������������������������������������������������������������������������������������������wqloooprrrrrtwxz�����������������x�����okkhdfjmgeghgfgkljifijecfifaaggfbadefdgigbdefd``^RD558:9559840.../015<>FYdgec_]`b`]abaabebacbbbabgebcfgddedccedda`fgecdabaaccbcfebdedbbcbaaef`adcba_`a__ba_^`_`^bb`\^``]\`a_]_b_\``ZZ]`___XUTUX[`cc_]]^_`][YXWWYYY\]`befgjijnruz}����{z{}}|}}������������������������b2+!6XZ[��O")L���������~|�}�������{weCNF("!J�ַ����y'1Zz�������r����������������������|y���������������Ź�sW>8Udlgi\-') %+4@C2&>m������������ȖoGAm�����C%9ONZ{�u�����x\Z�״���������������ӿ�l��������������������������ȷ��Ǻ�������������Ŷ������w^mxWXjq{|s_f����ɽ��w����������������U 'z�@FXH+05,'$%" )t���V!0g����������{vU+,75'&7��{[SRX_bjd^[VX[XRSS[YURQQNJLTY\jrjfd]TOJNVWSTRMKHDJMLRLC=7.!/8=IPPh��pea^JB>4(&G`��lp��O6C]dv�tjbV;7KDBJNPJ=GR]YZvwyo_^XTEC??8'&AG@7//Gcx���~o����Ԝ^VSOOUXUSPRWXYf~����������������ehgda\WSNGEVz��������ʾ���������ǿ��x4/3/.6E;% %5Wlnqtrpnjhedb^]ZSPNIDADFN\gnrrmhjyibjrfN9+ ,\vwmW3#.=DIG@KYq���������|~����������qE&-,#',/8NYQ5/,,'#"$'/:Li��������������ymm}����j9#!"")(#!!!2Ka]Z]__bb`]ZUOB=AELVap~�����������������������������������������������������������������������������������������������������������������������������������zoonnoqqnppqsst{}���������������y�����rkgddjlibcehggimliffhiffjhcbelgfefdgeeigccfeca^^RA727::748:50-+-/136;BLcdche`[bbbb`aabbcecdhebacgdcbddd`bcca^bccccgffgfdbbcbddefcceb__ab`_`db_`dbaabc_^ab^^^``ccc`]\`b_]]^\[]^_]Z_^X[^]\[^WSRUX]acb]ZX[_``[WXYY[YY]__bfghkhjmrvz~����~|||}|}~}������������������������U3.,n���o %1e��������v`ots`r������~wqWCL3$!E��~��ΪX"=e�������|w����������������������{y���������������´�`R61Ne{p\E# )Iu������������Ąe>@q���nK'$APZ��_i�����a^�ڦ���������������ҷw�����������ά��������������ȹ�Ƴ��������������Ų������jViiYnmo{{qZj����Ͼ��z����������������Q3�f$Efm+.3-(()$" 1z���N)6r���������xoP),50%'<��iPZ[bejlf``^`dc\VX^VSRPLFEOYVVfrnje^RNJO\YQRRLIGIKMNQE?=86).8<ITUm��]QYUHB=5()Fb��i��gL<A_bYXcdc^[R?:F@AGJJBETX\QLXWUQRVUQJJJJ@15;.(./0Ke|���}t����Җ]WSNOTZUQRSVU[e}����������������kigdc\WSNHJ[~��������˽���������ǿ��t533//5F>%"'5Xlnqtppnihgc_^[ZUPLGBADGQ[fouuoe_b`m|�xL)#*_neTB%"0?DJMJMWr���������}|}����������o@"0;619P[R:312.**,,1>Qo��������������ylo}����j6!!! ##!"5La]Z]]\^da_YSMB>BGLXdu��������������������������������������������������������������������������������������������������������������������������������������xpnopomnonqrtvz���������������w�����qjgehkiddfgkjkihjgffffhjibbdikbdfeadbffb\`ca\ZYN<52287558953.,,.028=BPfeehea`efd`ace_`baa`b_^]_bc``cdbddbaddfcccihgehhedbbd\`feb_dca[^`ba`ceabcc_^cea]_a]Y]^_\^__\[[]\[\_]]]^_^]__\[]][\^[USRUZ_cca]ZXZ\\`^YVY]\[\]]`deggihjlqx{����zz{{|||||�����������������������{M2)#>���xK!$?y����������{sfg�������{uoFB@+$!"O��yGt�D'Fl�������o�����������������������w{���������ʻ���˿��NO)/>?J@#/T������������ϹwZ2""%AfmnV)FQWt������w��Wi�ӕ���������������Яu�����������ŝ��������������Ǹ�ť��������������������£cVaah}hi~|nVp����̺�}{����������������P2lJ4b~O%,3-,/-%")$1s��Z(,;{����������ymO$(2/#$:��dXadgkmkjcafighbYSRQQRRKKLQQJPdmjheXROOZdYSQPJEGKKMOJ@><;=)08=LUVu��\GNLFB<3(-Ic�����nZ>FgkforphcbXPXSD>>;FR\`\[VR\^YZ_`\XTUQP@21301511Ldx���zv����ϋXVPMKOSUONQUUZ^}����������������ijieb\VRNGBZ��������������������ſ��o163.05C;!"'9Xjmpuromkgdb`]\XSNLIEAAEQ_lvyvp_OVan{�qQ-"EI<*"1AGNWWOWt���������zx����������k;
+#3GE<19QYP:665431015@Ur��������������xkp����f1"  "!! #5P^\YY^`_ba_XSLDAAHS_m}���������������������������������������������������������������������������������������������������������������������������������������{oonmonnnonqsw���������������w�����qgfkmifefhhhgihgedeeeeggd_chif^addacdeb^^ab^[VI;434895369701/--05:?CSdcfkhdabhfb_bcb^aca```__caffccccdgidacddaachfdegfbcccb`adbabeb__ac``bb_`bdb^^db^\c_XZ\]]\^_^\\_^\Z^^^^`a_]`aaZ[\\\_^WTRTX^aca^]\YZ[Z\^\[^`]]^__adeijhilouy{�����x{z||{|{}�����������������������oD6&>����u<"'R����������������������zua9D<)" @�ƣv]ye0Uu�������g����������������������}t���������������ȹ�iKG .D+$-!&,$!6^������������ͪuU,!@tn71MUW����֯bf�`R|�ҋ���������������Ϥs����������ջ���������������Ź������������������ɾ��ƾ�[W_aorarzvlSz���ڴ��zy����������������L&H5%I\[9/7-.3-&! &-%)@\g/ ;`�����������|nN!&1.#"5��dZajkpllhachkif`PIOUW[[VSNKIHRbc^^YVTOT\_ZTSMGCDIKMIDB>;D<&17;PTYs��^JHIFD?4&*Ie����lcM=T{�~�zuppk]V[S?=<>Xafgffghhhhihca_[YTO@/4<>D:(.Qdv���yr����͊ZUOLIOUVONPUU[`�����������������ehgea]XSNF@[��������������������ľ��j051--6B8!'9Yklournljgeb`_\ZUPMIDAAEQ^iorl`JHRbk{~x`2""1AK^rYQUw���������|z���������c8
+ 1EMF<4>R\SD?=<<:757;FXs��������������yio�����_/"""!$4P_ZY[`bbbc`WXRGGSXcs�������������������������������������������������������������������������������������������������������������������������������������������ulmnnnnmkpsu���������������v�����mchkjfdcfhfcehfgefeeegjfb`gkha]deaadicabed`YRF9013676448960000159?FXeddfifcddedbcabccdcaaddc`ffec`cbadda__cca_bdeefhhcbfecdcba_acc`_`a`__a_\_c_\^_b^[]a][^^_]^a_[\]][[\]\\_^^]^b]\[Z\\]_[SQRVZ^aeb^[[ZZXY[[[_`bb`_``adfhjklnqvy}����}{|||{{|{}�����������������������Z53 !Errhj]!".a��������yWkwvtv������xuV>J9#!"D������i 9b|������}t����������������������yw���������������²�TE6"- +/!3h�l^O>3!!%Al�����������Ǻ�qF$012LXs�ĳ���YWXSS��˄���������������͘p����������Ϋ������������������û��������������ǩ���Ĺ�bdbin_i{}viT~���Ϣ��|x����������������I'7m�s,4?+,,*'%(--$+U��_Ic}�����������xlL"+4-"#:��XYaeimmie_kiiie^QGLQX_^[WRQLIL[YTXZZVRUXYXUPHECCFKKIEA@@F9 37>OUYp��VGIIGE@7&,Ge�jMFGFC?]����~xrheYQOKCEFUcfmrtsrpnmnlha___]ZQ>*4AEA0)7Sfy���so����ʄWSONQWVTSRSTVZf�����������������fhhd^YVQKEGa���������Ϳ���������ľ��g*33--8D8!%@_lkpuqmjieb_]]\ZTNLHC@@AHQV\YTH<AQbn{��h6
+#4CL^gOQ\z���������}z����������c4
+
+#.BLNH=4?X`TOJHDC?>;:>EXv��������������vls�����[+"!#$# #5S_[X^``chhc`gabjw�����������������������������������������������������������������������������������������������������������������������������������������������}kimlllkoru���������������v�����oiihfcbefgefghgffeefhlkdcfjhcafhcacfea`dc][PA8103432249;9621257;?L]gedegeeehedcce`bccba_bcc_^bcb_`cbbdbbdfgdbcfgbdghdcdfb^`c_]\_ca_`ca^__^^]`b`[`da^Z``]]a`_^a^[Y]\ZZ]^]\Y]\[]`^\]\ZY[^^WSQUY]^`dda^\ZYZYYWX\_dd`_]`cegiijlpuwz}����{|||||z|~������������������������H.)'c����g%;r�����������ul^j������~ynHDG0!!%j������J&Hj�������q�����������������������w|����������ƿ��ɽ�?>,+> ,&2��������zvQ*Jv��pkpruy�����j?$:NSawcWv��bXURc�ھ����������������ćy����������Õ���������������������������������������ĸ�e\ha[gov~xhX����Ͳ��w~����������������F$a��t4 ?A,)'&),0.&=������ou���������~wkC(.3*!7��N[c`glkd^eikiie_PFLQX__^\\XOKQ^WSWYWVVUY]]VQHABEFKLJD@DEB7 17?OW\t��\HHHHD>4&.HXdOKJIECF`�����wplbYX[ahjkoouwyxwvurqqokhicbYSH:,18>=56?Rhy���su����āTQPNNRUTRQTVXWg�����������������fjhe^ZWQLEIb���������ν���������Ľ��c+42,-8E7%@^jlqspnlieb_^][XSNKGCA?<9:<A><74>Map���m;!	
+	%6DLVbd_b}���������~{���������}c0
+	%/(:KQMG;5CZ``\XUNHEEB<>GZs��������������sfq�����Y*##%('" !';Webabdkpvx~�������������������������������������������������������������������������������������������������������������������������������������������������������umnmmnonu���������������o�����ukjgddefedefffefgedeghb\ehf``cdb`abea_a`ZVL=52145420159;97457;=?L^egedgfdcefdbabc_^ada]^cb`^_a```aabefcbdffb`deebdh```da^[aa^\]````ab`_`ba`bb_]_c`\Y^^]Z[^[\[\[XZ]\YX\][][[\]^][Z[ZXXZ\ZTQOVZ]``acb_]ZYXXWVVZ^ac```bdfghhgjntwy}����|z||~|{}}����������������������m;.&#U���x@%I|�������������gy������{ya<FB'&x�����\.Ut�������p����������������������|s���������������ź�d6J))MT$8& A����������].T��~mmruyvquzyzvc6#1(<INRWVi��mVVSTk�ݪ���������������Ӹuw��������������������������Ȼ��ȼ�������������Ϳ����Ĳ|IUiYfrjr~vfT����˭��x�����������������B!%l��:$$$FG-++*.1/(#=������au���������|wm@)15+"8��NgXXfng\_hghhgd`SJS[\^__YXULGM]YTZ\Y[[[][[WQICAEHHLOGDFHA1!58@SVUz��[IJIEA<3(/LV\UIDHOLN`�����xvvrrsvz|}|{z{{||{xwvtsrmifc_YSJ7&(?FF<4:Qfu���tu�����{QQOLLNSSPNPSU[f�����������������^ifc_[TPJFKd���������˺���������ļ��_'50+.:G4%D^jlprolkheda_][WUPLFCB>931268632<Tjw��ud8"
+			#6DNeymR[����������{{���������|b+
+	
+,/+AMSMD73A\bd`]ZUOKE?<=I\u��������������sho����U+$&)+))(#%'3Kjrtu��������������������������������������������������������������������������������������������������������������������������������������������������������������vnmmnmu���������������t�����pgfdaefecbddccafgcabgdbbhhc`egd``bca^_]ZWF82-.1463/257888878;::L`fggfehfedfhfdcda`cda^`bba``eb``cb``bcb`dda__cb`acfbced`]aib^_ba]\ab^\_^^^]_`^]__]\^c[[Y\_[^`^[^`_ZY\_ZY\[Z[\][XZ\ZVWY[WROQVZ]aba``^ZYYXYYYWYY]abcdcdfghjhimsx{����{|}{|{{}~����������������������_2.! C�����Z 'Y������������{z�������yvW;O6"#B{��|vQ!8a�������xx����������������������|t������������������OMd;gB.A!#Hjp�����ô:4\��{grwz|rtvwyytV3-bV#"(+ALQQRj��UTVTVXk�ٗ���������������Ԭky��������������������������Ƹ�ɵ��������������˻������vUa`bqjfy|t]U����ǩ��u�����������������? 1tf?<BC')NG,/3:8.&$"2Y}����u����������|vlD)23+@��a`RYefTL_fhfghd_X^_^__\XPOMGER`XVYYWV[]^\_XUNJFEDGKOLHGEF/!5;ESVZz�nRJJHEB=1&1Scvykjlor\h�������~~~}}}~�~}}~|zwxutrniga^YVO5,:JD7+,@Wix���vz���߽vSSOIKOSSOOPUT[h�����������������ahdb_[TPIEKd���������˼���������ļ��[-50+0=F3)H_klopmmjgca`^[ZVSNJFBA<5.+,032//>`pxrd`S4'			
+#5CMcdQIa����������||����������y])
+
+	
+ 46"1COQK@35FZeeeeb\SLE><=H^y�������������rgr�����O;6:;AFGLNQ`iz���������������������������������������������������������������������������������������������������������������������������������������������������������������������xsmloz���������������s�����qhdbbedbdffeegihfbbefbcfhfabfg_\bdca`ZUOC50--02551/2789999866?Ocghigegjgghjfdeeebahe^]`aa^`ba^``b`^adc_bec_adebacedbeca\`db^[^^^\__a_^^]\[_`^]]]^]__`[\[_]Z[[[V^^[WY\\XYZ\ZY[^]XZ][X\[[SOPSX\^_`a`_]ZXZXYZZVVX[]acghfffhjllnuy}����z|���}|{}}~����������������������H0."Xx}���5#5l��������^Rsvqcr������}xlJ>K0%s�������Y#Jp�������s�����������������������y|�����������ÿ�Ļ�~KgV Gi(!==J����������!">j��yo~���urvryztQ5^�0&Ii]-BOTSV��kJUTTT[w�؆���������������͡t�����������Ħ��������������ö�����������������˹������nWb[Wa\r{sY^���ⴟ��x����������������|;! #/<GVM+PA,5=@4" !$$C����|u����������}wkA&14(K��aZWW[UORccacbbaZY_`^a\SGAABBGQ[WTYWSW]_^^[XWSOFBCFKMPLHGB/!4<FWYZgo_LKHHGC>3)3Y��������Vg�����~�~~�~~~{{ywusqrsnhga[WSK3-;:133;EVgw���y{���޻qRRNJJPTSPNRTW[c����������������aiec^XQNJFNk���������̿���������ĺ��U-5.*1AE/)Meilqrmljgcc`][XVSPJFBA<5.'*.0.-0Cah\LFGA2(
+	'8FOY_cbh����������}{����������wZ%
+-I="3JUQE;17J[ghihe`TLF:6;JXz��������������yv������tnlps{�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xjkv���������������r�����nbbfjfbbhjgdehid`^becadfcbaehbY\cd_\YWJ=4/+,/12410146669975->\hkihhhihhhggfdeed``cd]]]`^^^beb_`de\^cb`eie``ba`_`dcaaba^]aaZX]_][\aa````_^``_^_`^[\a^]W[\ZSYZY[]_]YXZ\WX[[[Z[\[XY]][Y[[XOMPUY\^_^a`^[XWYZZZWWWWZ\_dgjigfghklqx{���������}{||{~���������������������zA1'H��}�U'I{�����������zm^t�����}{w`AEG'"Nap������I*Wy�������|����������������������x���������������¸�jFqN$U_!+F3.���������ǂ#Dw��ut����npvw}�nI3aR.h��S7MOSaz��jPTRRS[��ͅ���������������ǘ{����������Ӽ���������������¶�������������������ý����fgngaky~||rU]���ժ���s����������������w;!#2-?XN+M?/7<2& !=�ĸnIge����������vh<$.5$ L��O[c]\\WX][[]^_\V^^_^ZP?>>><?GT[WUY[X]^_`YTTOOLGBEFGKQLIF<,"4=EWX[mtaMMIIID<2)3`�������hPk�����~~}~}}~|{|{|zwwvtppnqqlgfa\WTH4-:AEEA?=Zi{���z|����kPSNJMSUSPORSV^j����������������ydkha]WTPIFOl����������ƽ��������Ĺ��O.2.*/=D,)Icjlprnlliec`_\WUPMIEB@;5+%)+++,4Ie`LB=?@3&
+	(8DTqwi]i����������|{����������wT 
+#=W5"6JRK?4/7IZikjgdcXL>38AYk��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������rz���������������p�����eX`ejd`\cc^[`ed_^bdcbfdbb^dgc][deg]XTI83.+,0131///257657545B[bdhhecgiebfifddee`aacd^`bc\^ab`\]`c`_ba^^eeb\`a`]]aca`ccbaaa`Z]``][^c^^`a`\\a_\Z]]ZZ]\Z]TZ\YZZZY\_^ZZZXXVWY\[YZYTWY[YXXYXSKMPW[\]_^a`\ZWVY[[YY[ZYZ]`dgilkggiimsw{������Ƥ�|y|||~���������������������f4( D���~i1 +Z�������������~m�������|sV8H= -Qz�M :_|�������|����������������������~y��������������ɿ��PYy62cO3P,0����������a'O{��om{�r_kv��gA>[UAt���4,DPQd��Ҥ{aUSQQ_�ڽ���������������ѿ������������԰������������������Ǹ����������������¿�Ľ�ir{|���~|ynV`���а���u����������������y;  &Nyu��[*D<.50' !!D��|-3cp�����������xd:"/5#T��M_hggbUUYYW[ZXUW^^Z]UD:<:::@LVZYVVVUYXVQLHDFDHKDDDDGGEIF9*';>HSZc|ZQSQLHF?1)8d��~r��YZUi������}}|~~|||zyyyyxvttsnnnlnjcb]ZXSF2->HGA<2<Zl}���vy���߳kTSNILRUTQRTTW[m����������������leia_\XUOFGSn����������Ǿ��������ĸ��N.5.*/99",I`ilrqnmlgecb^YVSPLICB?<4*%(&#$'0S`WMB?E>1%
+	(8CKVVOSk����������|{����������sN
+
+
+	,S]0 2CIE;1)6J\hmlhfcYG:4:Ry��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������s����~[P[cd`_[WNLSW]\\eeb`cgfdbcih__bfb\UPC6/-(*/110/--/34333206J]^cgieehkhfikjegkid`eda`afb^]_a^[Y_``a`a`\Yc`abcb]^aabaafecca`^\a`^\^`]\]`]\[]^XY[]\[[`^Z\W\Z[[ZZZ]\XZYZVWZXXZ\^[XV[ZXXZ[[XUONSX\[]^_a^[ZVVXYXY[]\\Z\^chkmmkjjgmtx|������ȱ�{yz{|����������������������P(+ #Yl:X�o-"6r���������cfyyy�������xlL=H- !""$#Jl�������|�����������������������{{��������������ɼ��RswObY&*%Cc,EP!I����ɿ��ϹB!2W}�}pr���lgqz��y_1=gm����77CRe���ʢ�vURQSg�ݭ���������������϶}�����������ɛ��������������Ⱦ������������������˴������fu}����~{zgLm���ּ���z����������������s6!L����t7'G=--*% #!H���ox�y����������~wc8%-0!!Z��Q^hqm[JU]WW_XPOW]SQXK<779?BHOVYVNPRSPMHEDCBECJNF>AEGDDFB9''6;HV]kwwhbf]QIE?1(?e��]Z�zc`Ok�����~|{{z{|{zxywwwvvsqomlllmic`^[UQD3(<G==:/>Yk~���v|���ܯiTQOLOTTTTRPRW]r����������������mdfa_\WSNGFVl����������ǽ��������¸��M08/+088. /Ohjnrqomkie`a]XTNKJGB@>:1)$%!!,HUSK?@H7-	*:BFC@KTj����������zz����������pL
+!<bY-)8@>3.+8K_moorqeYG95Es���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������s����zQES^ed_UC;>CGKNPQUX]df__aiic\^ba[WP?2-*))+--,,*),/1///./9Pgaaggfdhjfciojfffie__dc^^__[[\`_\]\_``aa__cdc_``c`\abd_```c^c`]Z\`]]Y][Z]^_^]^a[X^ab\\_`\[\\ZYW[WYY^ZUZXXWWZZYXZZXWY[XUZYWWVSPQVYZ[_```_[VTTVVVY[\\\[]^`fknnnkjkpwz~��������xzzz{{|��������������������|A63>XS}�P&>z��������MNsuu�������|tdA?C)1ZJ,')Qu�������v�������������}�������~z���������������ŷ�{j�����ngy{gSP2.+YJ#c���������x#:`��zv����qps||yvS./d��zPp@.6GUd���¶��vVRUq�ܟ���������������ʪ|����������к����ż���������ź��Ƽ����������������������ky���~}{ywcJr����ƿ��w����������������q1$ JV:"09N+G7+*+&!!&' T������w����������}ua;#1-'d�HZhkeJCTYS_]SLPUQFIMB757@ILLS\b_WVVSLFDECJKJHMOE@ACKHDDC;'&29L[\f~�vneXKE>2'=e��pm�jdaMk�����{zyyyyyyvuvuustuqponmlklgd`^^YSG0.=86=36C]m���{o}���ڭbRPMJLQRQMNPSYbt����������������vaid`\VSMGDSs����������ż������������D,4*,1?M;!/Rgimrqolkifb^\XSLHHEA><:1%)?I@@9;:-%
+		*;BDB?FLm���������}rx����������pI
+0Vl^-!*...**7K]inspmbQG;54Q{��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������o����vR;JY\]ZG8477:;<>>EO[bbX\`ef^]_\XTN:2*$'(,/.-+*(*/0.***.>Vicbdfgfcggechjfdbeed`bb_\^`^]][^^[^`__`a^^`ba^]`a`\`b`__`c``^`^\\^][[]_^^_`_]^``[\^\ZYZ^[Z\]\Z[Z\VZ\YXY[YYWY]XWZ\YW[ZZXYYXXXWQOSVXZ\]`_`^[YUVYXVX[\[[[]]_eilorpllpw{����|��{{|{||���������������������n@4&/d�~�y8,\���������{��~t��������{uX8E@#:}���jH<'*$4^w�������|���������������������}z���������������·�q����Ⱥ��ŵ�����ca8)|�Ƽ���Ž�o'Di�tu���sruwz�pK&.\mcHGP6"7HSXy����¸�^RXz�ה���������������ŝm����������������ǯ���������Ķ�θ�����������������������lz}xvurv{xgKx��������p����������������t/%%0AAW�b';/-00.**+-."Z����~lh����������~ua9$0*$g�yN[c`QDIRPPXVMQXSMKMH@46<INGMWgrh\[YUPKIMQSXULMMD@AGPIFLI7%(3:JY]c����xm]MF=1'@d��{i^ZgcTp����~zxyxyzyxwuuvvutsqpmkjljjhc^]]XTG00<=?B:4A[s����u|���٫aVRNINRVQFOSTXax����������������ocib_[XTLHIUu����������ż������������>.2),5JL-#1Rfjmpommjidb_^XRMIHEA?>;/$!'<SVLGJEFAA3		
+,<CJMFBOq���������{qt���������mB?cpX-#,*%+)(5N`ginh_VJC<2(5Ju�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������l����xI3:ITXXJ:35667996>KZ^^^`aa`[[_XSL60+'$(,//,)'((*,-+*+2?\dd`acfgd`hhfikjdcefddbdcbcba[]^Z]\]^_[Z_b_\^a^\`da_^bb]^^da\^aa`^_`\Z[]Z\^a_^^\]\[]^]Z\_[Z^^^[\[\]Z[[WXZ[XXXYYVXYXWZYZXXYZZYYUPPUVWY]^^_^[YXVWXXWYX[]Z[ZZ\chloqrnpty|���vy���|z|{{|����������������������[0&E�����+!3q������������~v~�����|vnK=B1e�й������1;e��������������������{}�������z|��������������ʾ��j�������������δ��+4��¿������o+Pr��uu���|nswvyzqE1k������K;MU|��`y�Ÿ�TNW��Ή�����������������`����������������ȭ���������¶���������¿������˸������zmuk\`[fu{t`H}�����ì�x����������������k1$8w���,--,5:<8/.-+X��|[Qjw����������}v_5&-)&h��e^XYIDRVQPTGKQTNMOPJ@<@GPD=N]loj\YXTMTXXXTUSPNNGACHLHNLA4$&29L]\az����rRLG>1+?f���}���zHo����}x|}|zxxxxxyxuuuqomjfhlkifdb`[WUD/2=BDA63A]y���|t����֢^TPLLQTUOQTWTZ`t����������������megd_ZURNJHUu����������ź�������ƿ��{9/0+*3C?)$6Tghkomkkifa``]YSLHJGB==:1$#*?^|��}{{}{wZ+
+		
+->JQX^^ey���������wsx���������~k=#IdlX)!1DN4,&*9P]c`_[VQE882*8Is�����¿������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������m����xF20?MTSJ;66687665>IV__ZYYZ]^]UOE0+)%%(,.-,'&%$'()*+.4Geleaadfggciihhlmhdghgcdge^^a`[X[`_\]`_]Y]_`^]`_]_bea^`a`]^a`^^ad^Z__][ZZ\]`___^_^^^[_][\]\[]_\VRYWWXYZYVVYYYYXYXXZXVY\YXYYZXXYWMOPTUWZ]^[ZZ[ZWXWVWXXY[]Y[]_aafjnpqqtvz}~���gm�������~����������������������A#"$"R�q%@}������������ji�������{wd<;=)![�|�����ǆ)Pr�������w������������|}�������}{}��������������Ⱥ�}o���������������ĆB����������F!6_z�rt�phu{x{�c:@{������6%>P]��pTz���yHR_��Ç��������������ξ�t����������Ƕ����Ŷ�����������º�������ý������ʶ���ƿ�wnqZR\]kuyr`N~����Ĩ��z����������������k'$ ]��uQ;$/-,6?2+&&&$!Z������{����������}t]1#.) (u��k^UQLVZ[TVNGMPHGGLSKDJMRQGFS_krg]ZTX_b_WZVVUOOQJEKKNSLI?6#'6;O\^l�����iRMI@0(Dp�������iPr����}����|{z|zxuvvuronkjjjlkhc`^][VF,(7B@:42Hcv��~oo����ם\UOGMQVTUSSTSXdx����������������hcgc^[WRNHHWx����������ź�������Ž��{4/2,+3C?*#5Xjjmpnlkhedc`]YTLKIEA<<<3$$2Y����������a$
+
+	 0?JZuxnjx���������{x{}��������|m:#AXaM'#JfVF.$+;O[]QOOQK:450,;Pw�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������i����zF,-7BMNH<43212024:ELRVTONPYXSPC2,'#$(*+,+($$'''&(*-3Kaphcadigggikfbhkheeffedffc_^`a[_`aZ]_c_^\__^]ba_^`a_S^]\[]``_]aca_]_^Z\]ZZXa_\Z_]Y[]_^YZ[WXU^]YUWYXW[\YTXYZXVWZWWXYTVYYWUYVUVSTUNOQSU[Z[ZWWXZZZ[XUTWYZWXX\^`c`ehlppsvy{����oc�����������������������������r0#?n('S������������}z�������ynW<?=%/f�wbQ^���74_��������|������������}�������{y���������������ö�p�����������������_N�Ľ�������('Adz�}en~�{iguzvws[2=W; 5op%+DSa�����Ĝ��MOj�߱���������������γ������������ϲ��������������Ⱦ�ÿ���������������Ʃ���ż�lolfcjcevvkUR����ק���t����������������e' ( KhmFB^=!/--/,$ "$_�����zm����������|qZ,&94!%/u�|j]UR][XTRRBHRKFBEKQKNPRUWKMX^hjc[SUagcaXWXXTMKKGFOSOOIH@5!*5=S]ao|����hUMH?0,Hr�����xdZY~�����������yyzzxwxutuqnmlmmllge`ad`UC-.?;4636Ifornoop����ԘXTRHKMTSPMRUQWc~����������������kffb]ZXSMFDT|����������ú������ʿ���v313++7D@($7Skknnoolhfd`^\XRLJHFB>><.$!"'7a���ü�����\#!1?H\n_VUu���������~y{��������|h9
+%6HO?"=_RK-#(=NZSEFJF:1130-Be�����������þ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������i����yI,+,4AFF>3.0/-+,14=IOTKBAHOOM<-(%%$'*+-+(&%%%'&'*.;N[cheaachegiieffkjgfhheddfd`_b``]cea[\^`[Z\`^\]_^[\``]]a]XZ]ab_a`b`_aaZZ[]ZXY]\\Y\ZWWZZZXZ\XW\_WXXZYWZ\[YWWYUSX[XUVWWWVVWTQZTYXWWTOQTSX[]]\YXVX[\[YYZYXXXXY]^`ddceimptvx{����wcg�����������������������������a)#/a��������zgvz{y~������}xmJ9B5!Z�Ǫ���i43Aj��������������������zz|������zw������������������i���������������ѳ>#y�������ƿ�*Ig�{n~��{lpx{xtyU.Ja^I8^�f2KUZ����ʚ�ÇISw�ࢠ��������������˫������������ͥ��������������ǻ������������������ħ���ø�cnpihi]m|zjTR����ա���y����������������d& '!e�}QqW9,.-*%#(h���}eL\����������|tY+Em`@2++159x�~nd[bj_UKJD;KMFGEABSWSKNRNIS[\bf`WTWaga[XSWWVPKE?GNRLJHH@5 -5AY^`k{�{}xiTMH>..Dk�gi��gbef�����������}{zzyxxwvutromllookif_`b]RB31/$"(03Mflkmmqy����ЖYROHIOSQNNQUUWd}����������������khd`\ZVPKFGY|����������¹������Ƽ���t312++5EB$'9Xkjnrpmmkhda^[YQMJHFB>><0%"%,2@l���ƾ�����_5"3AIQH9AQy���������|y{���������|e4
+$)*'-9*$16B<#(=PZP@BGB736850Iw��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������h����vI&&&)5?HA3-/-)((*0:EJHD=9=@?6,(##!$)**+'%%#$%&%)3BV]cfgeddfgfjkhegkjfdfigcghgd[ac_\___[XZ]\YZ^_\\\]\\^a\Z]b_[\^^\_ba__`^[VX[[XX\ZZYY\YYW[[\^[[Z[\ZWUWWWVXZWWYXWYVYXVSXWUYWUWWY[WYXWVQPQSUWY\^_^[VUUY[[[\ZYXYYX\`cbdefikorvy}�����xpg_y��������¼�����������������L&"@~���������zume]~������|ud?;D-S������G&Lp�������w�������������~{{������}y��������������Ǽ��m�ȵ������������О#/����������u*Ml��sv���unw~vypH&Gs~����$#9RUTx��ŕk��vKX}�ې���������������ơx����������������Ȱ���������Ż��ʿ��������������´���û�esqholnz{ylMZ����֛��y����������������c (&``lZagF+-(&&$#$##\�b;)%*\����������|rYB|��cI>;<96u�}rjdmhaWKJEEIHLOF?CYYSKINIMV]\[^YUMSZ[WPTRWSSOJ@EKPQMMLH@1,3@[adq��himcUKE=-,E`gs�����bd����������~{xxxwuwvuuvtspmmrqgfd`_][Q?10:6(),4Ncikmnqt����ˋUTOKKSTSOPQTUWg~����������������fefb^ZUNIDH]}������������������Ƚ���o111--;KA+'<]giorpmlkjhb_[WOLKHEB?=;0%%)05>o���ž�����iO#$5B(%4CLNZSXe}����������xz���������x`/
+
+'5<-77.&&"$("$'=QZN@FGC=:>?<<Jo����������������þ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������i����}L&##$(6A>2,-.*%! %0?EC?6342.,&$  "$*)('#%%##$&,5G]aadedehfddgifedijecggggcegca^cb_\c`]\[]]Z[_`___`^]_`^Z]^]\Y]]Z[^a_^Z]]YS[\[Y\\ZX[\ZX\^\YY_[UVZYUWXXWUVZXWXXXUWWVUSTXVTZYXWWYXTVXUTRPQRVXZ[\^]\YWTUYYZZYYZZ[Y[_cdcfdfkprvz|����{xwumddn������������������������y7$$!%9EWj2'O�������������wk������{t]7<=#%FpU?_|�;0Yy�������x����������������������{w���������������ø�qy�Ôv������������z.��������ͺB1Ux�nv���ypwzrs}e8!8q���y");PWo�̴�����_PX��ь���������������Ùx����������в����ý���������¶��µ�������������������´�irjelrqvxxfK`����ә��uy����������������]#'!O���x&+.++*'%&(*( 0,#)11/`����������}oWO���aI>:858|�zpggljd[ZXPHEDJQNLSc\TKEDFQX]\abYUOORSRQSPRSTQLGKMQTSNHB:0*4BYafx��jei`SKC:++G`�������EZ���������|zzzvvwtvuvtwvvnlvysfhgb`]XO?-4=2)+)2Peimlmqv����ʊWSPNLQUSMMQVVWd�����������������cefc^YTNJGH\~������������������Ǽ���k03.(-;HC/&>aiinrolkihfa_\UPLKHEB?;:1)),03<t���ſ�����zV! >]e<)"%6GP^pkln����������}{���������z[,!?Z\LH>3*$'<R[RKNKHDDGDB@G\��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������h�����J)#! !(22-++,(+<AA<61-)('# #&))$ !"$$%'-9Kc`^acccehfb`eheceiggghhghhjfbbcfeab_]]]_^Y\^_]]^][Z\]^YZ[]VX[][[_c`^_bb\X\[YYYYWW[[\XWZ[WUZ[WVY[ZXY[WUXZXVWXVTTWYVUXXWWWVWVXWXWUUXVPRQQSTVZ]^^]YXVWSUUZ\YYYZ][\_ccdghhkotvy|����xyzxvrk_`jpy����������������Χ��p6/ 0h�����~0d�������������tu������|xnK8?-L�������0 =f�������}����������������������yz��������������ȿ��k��¨�������������P,���������v 7[z��u}���wo{wpqx[0 ,GF/&';RZ��������mIR\��Ɗ��������������ͽ�}����������Ƣ���Ⱦ�����������������������������������{ith`_ciswudOd����͗��vv����������������Z#&%p���s7,-/383---,&&'4PYQ<b����������~qQ9`hM8-+'**:��~jallld`hdUJKOV[XVY^[WLABOSY]]de[WUTUVUYTPQSSRKIHOWVPGB@;-+6C[cf|�b]cWMKF;,+Ny���ZjqJH\��������}z{|}|ywutwwvtvtx|urrke^ZWO<*-111238Qglljkoy����ȇVTMMNSRQMNQSUZf�����������������aifa_ZUQLGKb������������������Ƽ���e*2.'+;LF*(@ajjptnmkhfba_]VNKLJEA=>:2.-/05Ew���ſ�����xL (PsmRB''8DNjxdYc|����������{{���������xU'"[zY[PD?7))=R\WSPNMJIIFCAFZy�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������h�����T/$ $)))(+&"3=BA<4.(%$!"%&'#"$*.;Sgd__ceddfhgefghghkifgikihlkhcdgeb`bc^YZ\]VU\YZ[_a^][_^]YZ\][^`^[]``[]bb\Z[\YWXYWWZZZWVX[ZZW\[Y\\[Y]ZXTVY[VTWWWUTYXVWXZUUWYVVXVWWXXVRQQPPSUZ[^__[USTUSSXXZ[Z[[\\^acedgikloux|�����yyxxyy{wwtijlnqx������������ ��T0,C������h&"8v��������~{��{t�����~zve>9=)a������^%Hj�������l����������������������}x���������������ƻ��i�ƾ��������������5#g���������m #Ac}�~h{���zvwsmqvQ'#&$)BVWu��|~���pMUi�൓��������������ɲyx����������������ö�����������ĺ�������µ������˼������xlvra]cmvys^Dh����ѷ��yw����������������U"''WXu|hE("01/8?8/((&!%0_��_9c����������zlL*%3/"%A��x`kqnlbaeeWPRV^b^Y[^]YPGJQRX]`feZVWXWUVWTLKOPPJEGOTLC?=<7-,7H\chy�}bYVNNKG<,2P���ZRYZg^`��������}~~}���{wstusqw{~}}{xrje\YVO@)+9A@=3:Sgjmooru�����}PSNOPRQONNRTUZg��������������´�bgfd^ZUOMHMc�������������������Ƽ���`&4.(.9G?&(Cdhkpsommjhc`^\WPLLIC@=<93../16D{���Ŀ�����tI,Nqqg;#+9DIUG??T}����������{y���������wS% A_WYJ@B6,&AX[WWSOLKHJLC?Nb|����������������������Ŀ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������c�����X8+# #"%'$ )9BGD>4*%$$$$%&$" !"'.;Ufc`bcccdihfefhgehkhgdfhggikgebbdb_]b`][\^]ZY`]^]aa\\_`\\[]^[X]^\[Y_\\\^]\[[XYYYXXWZ[XUW[[WZZYWX[ZWVXWSPUXWVWXYTVWYVUYTRUVVVUXXUUVWTTSQOMOVXY]\]^ZUSSUVVWVXY\\\Z[]cdcefimptwy~����yxxxywxzy{����{usjno|������ϲ��|B(#2noV>) 'E���������qqooen������{wq[=>;$?YK[{�},.Ts������yt��������������������{u���������������Ķ�lt�����������������$,��ľ�������)Ln~�wn}��~vwwtrrnJ#$#('"-ETU`�������{NUr�ޢ���������������Ʀiw����������������¸��������Ƚ�ż���������������ɶ������poubWcgnuvsYEs�����̿�yz����������������S$& N��u[A1%-,-22)#! &6r�zP2f���������ykM%&6/$ D��ogusqj^_gdWSWT\b^XXZYZSLKQSV[aj`XVVYUTWRONKLOOICHIFGA==A?2 /9I]dh�cUOONID<-6X��������_b��������~������|xwsstuyxyyxxtqme^\XP>-,AH=1*9Sfjklnqx�����{PSPRQQRRNKRUVZg�����������������bhfc^[WPNINf����������ƽ�������ƽ���Z)6-)-;I=&+Gehkornmmigda^[XRNLHD@>=9/**-+.L����ſ�����nF(@VS?' )6BHD;<E[|����������zy��������~rQ#"2.DD03CD6"*CTZVVPMKJJLNNTdx��������������������������ÿ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������a�����_@5*! &&!$6@HIF<2'%%(('&$ #'0?cdc_\egb`cgfddhidagiedghgfijjfbddda`ac_^_a^\^_^\[^^[Z[]]ZY\[XYY[X]\Z\[]^][\][WSX\WTX[ZUVXYXWXYYVXZXXWXXUTY\WXXXXTW\YTTVTTUVVSVWVWWUVRVTPONOTXW[]^\YWTSTTWYWXXZ^\Z[\`bcdhmpsvx{����xyzxwzyxxx}���������ywqqy�������h3$(Udb_XMI""0\������������rjv������{wqN4A3!9���uq��.8^|������n�������������~�|}������{w������������������c��ο�����¤������t3����������\.Up��sq���}szxurvj? 3MSWNP92HTY|�������oJXz�ۗ���������������Ģs�����������ƭ���Ͼ���������Ƽ��ƾ��������������Ǽ������eqn[^eejtvoZMv������¢vx����������������N%#(x�'%B<$#*+++$  (9m~g;-k���������{nM$'3-!!A��hospogXae]TOLR[a]XX[YYUPOPSU[ce\Y[X[[]YMLKHKMQJCEFGIB=@BC2/;L]eg}��gULOMIC:-2Z�������zXm������������������|wruz{vuuqrtunb_][VE226.  (;Vhjkmnov����sPSKJJMSTNQSTUTk����������������}]fdc_XSLKJRh����������û�������ż���V+4+'->I= ,Gdikpronnjfdb][VPLLHD@>>8.'*-.<W����ſ�����k?#$*1$*:CEA==E_�����������ww��������}oM!
+#%(((=JJ=#-DY[WVQPMJJKOVf����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������]�����eI=/$#&!3>HMMF8-'&$$'%$ #$*5EXcda__dd``bffggiedfihdfgheflljcaifcbbb_[`a`YVYZ\Z\]ZZ[^^\\]^\YZ]^[\`\ZZ^^YZ[[WUTWZWVXXVSXVWYX[YVVZZWYZXWYZZ[VXYVTUSVUSSURSVTTSUWUVUUUTURMMMPUVXZ]^\XTUTTTXYYVW[^^\]]abbcgjnsvz{����xxxwwxxxwuz��������������~������X&">u�w��o{h$:t������������������}{vj>8?.$f������q!Ei�������e�������������|z�~����~|~��������������Ƽ��c��Î~������������M@�º�������5 6[t��tt}��{vxvtpwc7!=������M8OZw�ϲ��Ǽ�LP\��҈�����������������}�����������ë��������������ľ������������������³������hrle``]fwukONz������ʠtw����������������L!.mQ0$4c\E[I$)*+%" &<qkQ.1p���������zlG"'4+(F��rsroj^S^[XUONT[_[WYZWWWQNQSTY\]]WZ_cf_SLKHGKMOJCDFHF>;>A=-#2;M_iw���ueVONLG7,6^�����recg��������������������ytwyvsrlkpvwkb_][XG*%#*>Yillnnsw����pRRJHFGQQOPSUWZn����������������zafec]WRLKMRj���������˿��������º���R,1*'.BI8.Ifjkqroomkgca\YUPMLFBBA@:1+/7<D[�����������j@ +;DC><<G_����������vy��������~lI '0*-AKL6 0@WZYYUSONLOR`����������������������������������¾�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������a�����hKB5'%#,;GLLLC6,($$%&"$)*3EYacb`aefcbdfefghfchijecgffgjlhebgjfbed`^^bb_\[^^[\_]\Z[^][]`]Z[]\Y\\]VWW][XV[XTUWWYXYZXVXYXVY[]XWYZXTX[UVWVUUSUXUTVUTWXUUXVSSRVWUTTUUORRLJMMNTVZZ\][URSUUUVWWVVZ\^^_`acfbfhmquy|�����yyxxxxyywqman������������������{B#%N����[ez7%H���������povyka~�����|zrZ6>A(!L�����b+Ot������zo������������{}|�������}y���������������Ĺ�ot�ξ��������������3!d��������$$@az��sw~��vrtyvts\05~�����t, ?PZ��������]EMc��Ȉ��������������λ������������׸����̷������������Լ���������������Ŷ���·�hspbRT[r~ujQS|������ȝv{����������������I  &Whmk�����6#&('$!%<`iY62m���������~xlD#'4,!(O��{tqniXTZZXWVV][_ZYZXVVVQQQRRUX[\Y]ficXPQJBGMNNIDDFFB<;>>8+"1=Ol|����|n`RLKG7+5^��fXXWepl��������������������|{wqopmpqyvkhdb`[I"!$%+04A[hklmnp|���޴lUSLGGJRPNLSUV[n�������������ſ�vaedb]YSKJINn���������ʾ��������ù���M,0*(-BH0!-Jfjlqronmlgdb^WSPMMHECA?<65;ADKc�����������e<!.;DB@<?Kd����������wy��������|mH4DMNPVM+ "'7HWY]ZUTRPPU\o�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������c����gNF=- 
+#"(;EJLOK@4+&&#&$$%)-5H]bcc`bhgdbegdcdhhediifcfgedejjfdfjheceda`db`[^_`Y[\]\Z[[Z[X\\[Z[XWX[[XXYY[[X]]WXY[XWXZYVXXYWSVYWWVXXVUWWRWWXVTXXXUUWWSVXTUVSSQRTTTTTUTUWPJJMOPUVWZ[[XUSRSVTUWWXXXY]aaaaaeegimrvy~����}wxwwwwvxwssvon�����������������q7*&#JdB�s:J: ,X���������}|vhet�����|xpP6E?"7�ȥz78A4X{������lz������������{|z������zz������������������c��Ѽ��������������h��������Å(Hh~�|nu|~s`gqzsuuQ,FpN(6>D*(CPWo��v���uOQe�߳���������������̶������������Ф����£��������Ⱦ�Ⱦ����������������ȳ������fxwaX_isxshIZ~������Ĝn|����������������I !*y����xfY-%(%# $;`yeH:p���������~xjC#(4,'V��}srp`MQSVV\[YZ[`YX]ZTRUUTRPPRU^\Yaec_[XTPMKLNLHECDC><<?=9,#2<Po�����xpdVKJF7);b��������m���������������������~zuprtuuvxsqnhfefM)0=AH>7B[iiklmo{���޵lUQJJKPSQPOUUU_r�������������ſ�w`fea]WQMKKTm���������ɽ�������������J,1)).@D3! .Nfgmppmlkkgca_ZUQNLGDA><<:=DJIKc����ľ�����a2
+
+
+!.<BA>:?Kh����������zz��������ygC
+6Oad]W4 "%$&+.1<HTZ_]WSQONPYd{���������������������������������������¿������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������_{����iNJB6(
+ #7BIKNQNA3)%#%#&)-:M[^bbbafhfdddecadhgdhhgghkjfglmighmkfced`]_`_][][WU[\ZWY\]XZ[^[Y]\ZWZ]ZWX`^YY[^\TVZYVVUXVTXXUVWWXVVXWVY\VWTXXTTUYWTVWWTSTVSRUUSSTVUTUXVUYWOJLMPOVVZafd[VSRSVVUYXXYY[]acbabdefintw{����ywxwxvwvyxx}}~x�����������������](0$<c���a1 4o��������������������~zvj@7L1_���tj|n "Ae�������h�������������}{������|x}��������������ȼ��b��ǯ�������������p/����ǿ����X /Ql��yhnsnT5UnuqrnF,JXK0(&**!,GSO`�������{LOp�⠕��������������˱|��������������ʸ���������Ǿ�ȶ���������������̽������}juiUcinuxtaF]�������Úr}���������������|D""/yiB1$$)&# *Mu�h:3q���������~xg=$+5+)_��ynrnYNONNY`YVRYXW[[VPMSTTSPMNP`d``[^cdb\VOKJLJHD@@?>=<A><-%3<Sfv����rj\SMKI9(<j�������lS�������������������|~}xvpu}{y{}xnhkkgB1:DJE84B_ihklmt~���ܱdQQMKLTTRTTTUW`q�������������ý�pdhfd`ZSOMLYo���������ɾ��������º���H.0*(.;7&!/Rgglsrmmligd`]ZTQPLGD@=;<=?FHHLi����ſ����|_1
+
+"/?CB?<?Li����������zyz��������vfA'K`cW="$)0459>CIQZY^^cZYUOMKJSa������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������`�����jQHC8*
+.?HIKOQI>0'$"$(/=SX[_b`adhgeefedcgffegkgeiklhfgmiddihdcefc_^^\^\\]VTV[[YZ\\ZY[\\XX][[U\\ZXZ_ZVW][YYXWTWVUUSVYWUWXVWVVWVUZZUSUVTQVUWSQUUSRRUWUSUTRUWXUVWWUVVSNKJMLPTYdw�~lZXTTTUTVTWYXZ_bddcefegkqwy|�����xvxwvuwxxy|��|��}��������������Y25C|�{[V!#E���������������������}yt]4>?&`������\(Or������xh��������������~������~zz���������������ķ�tm��Į�������������O@����������#"5Xq��q\jmgA'Pjklnc='JefkmfeK 4MTWw�������bGV~�ڗ���������������Ȩw����������������ͻ���������ǽ�ĺ���������������ɷ������tlsfcpurx{u^Ea���������l}����������������B#"!.&(%"!,b�{Y-3u���������|wj<(-4("-j��rpqfRNLPP\]XUNVUTZYUOJIRUTMJLThjb__adid\XYTMKKG@=<=?=?><:,$3>Sbgw���kYQQOPOB6?n������jPU}�����������������zxywwxsy~~|{��vkjfdW8+8E<136E_ghkkms����٬cOOLINRSQSQSTW_t����������������qcjfd_WSPMJ\q���������ɾ��������¹���>,0*')*$  #1Vgimsqmnmhgc_]ZWSQMHFB>;=<=BCDKh����ľ����x[+"2ACB?==Pn����������}xy��������xe>-?B1 #.:EMRNWZ_aefcefc_]WMD><AU���������������������������������������������ÿ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������a�����dLGB8/#	*<FGLNQNH=0(%&,7JV]`cabefgfdgfd_cfdbehhfeghhecfffbdiifdgfa^^^^^[^]Y[`^ZWZ\[YZ^ZYW\]XWT\]WU[[ZXZ\^Z\YWXYVUVUWXTWWUVVVVVUUVUUUWTTSYWWTVVUTTWVUTTRRSUWVTVTTRUSPLJKPMRW[s���p`XWVUUUTWXY[acghffgfikrwz|�����yuvwvwvyvw}���������Ƕ���������vD-,"H�{gsJ%X���������������������|xoP4=6(]�ʺ����81]|������ny���������������������{~������������������`|�ʽ���}����������0V����������!">_w��h_jm\86aropn[29tu|��s7 :NSp�������sPQ[��Ԏ���������������Ơj���������������������������ļ��Ż��������������ɹ������ppqbajnqxyo\Dd���������m}���������������|@#"$&%#*U��xN)9x���������~vf;'/3* /p��lnjYOQXYSX\UJHPSZ[YTOKJMUSNIH[qre`_^ejc[XZZSIGDB?<@B@@==9+'4@Sbfn��w^RMR\cmrF?c�jh���gVT}�����������������vvsot{|{||{|��tmf`\M61>9(/54G\jijknq����֧aRPLJPSSSTSRSXbu�������������½�rbjfd^WQOLM[r���������ȼ������������|4,2+)()"! %5Ueimqpnnligda^[WSPMHEB>==;;?BBJj����ľ����rF#!6CDB@>>Pp����������{r|��������xh;!"(5CU]``bcbb_adbb][WRI;66:BS�����������������������������������������������ÿ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������_�����_KE@60'		'8GHKLPOPF8.**0:EQZ`_\^efdbefeb`cdddgfffghfedeikgfhifefhc_bc_\]^_ZX]^\XW[ZYZZYWX[[[YZ\`[YY\\Z[_\ZZ[YTXXSSVTTTTXUUTVWUTUWYVVYXTSUWTTVUQRTUTTTUTRQPUUUVTSSTUQMHKKMMPU^������q`[VVUUXWVX[_bfiiggilotxz~����~ywvvvuuwtt�����º��������������d82)0n���v76F0o��������������������~zvgC2@1!V��������= Ah�������h����������������������|~��������������Ǽ��`��ǡ|�������������Z����������'Gf}��cetqM7JmpmkkQ+&UmL��o1%;NVj�������zTO]��̉���������������n����������Ŷ���������������þ������������������Ǵ������fpnZXhmuxwlXDh������о�v~���������������x<"%&&"/]��sH&@��������|we5%,1**q��nmaMRX]^VUWKGJNQX]\YVROPUQKDF\mnc`]]gie^WXYNGEEHDBDFEA@?8('3@VdefmjYW[bm���CC`kx���wpjZ������������������yvtlu��~||zz}~wrf_\P:25.0::8I[hhhkmr����סZRPLKOQQRSSUSVcr�������������¼�pagcb]WQPMLYt���������ƻ������������|6.1*&''! !%6Teimrpomkigd`\ZVSOKHFA<;<99=?@Dl����Ŀ����i(#5BEC??@Pq����������yr{��������ye>%-2=HV]cbcb`ZXWXVTTPLJF?549=ALc������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������_����\D@:40)!	"7FLLNNOQKB:4027>HRZ]^addbfhfcadfffjihgfgheefffefjigchgc[_db^[]]\X[_]ZY[[ZZ^\ZZZ\[[YZXXWYZZZXYZWWWWTTWVQRWYWUVYXVUWWURXYTVW[WQSVTQSUSUUTSSTTUTSTWWUSUTRUUROLIKJLNQWg�������saZYUUWXVUZ\_fjjigilpuy|�����xywuvxvuur������}svspy����������U35"'P���N7W/"G~�����������}|�������|yr]55>+C�����k��b+Ut������vg����������������������}{���������������ķ�rh��Ƭ����»�������i!^�������ã?/Oo��|mz�c:0MbfdimK'?md���Q&=NRVUd������OPe�߸���������������ɻ������������շ������������������н�����������������������epldkvuwyvlQIk������л�u���������������z; ''%!(Hiuh@#@����������{w]5#-2)+p��sk[MY`a^URRNLNKLTY\[ZXTTSOH?I_qmbd`_fgb[UUPGCDMOJHLOHGB>4#*9EYeefa`bit������:Bg����Sbvn\������������������~~yv���|x{||~{sjfa[X>2;@DD<7M]ihillt����ԛ[TOLLMTTTTUTVUbx����������������obhea_XQOLM[v���������Ż������������v5//'$&&"!" #8Yhinrromlggda][XTOKHE@;:978==@Fn����Ŀ����R$5AFC?>ASx����������xqw��������}hG5244589?EKRX_fedgc`\YSNKIGGFDC>866:==;Df������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������a����~]@@=70)"
+
+3BMQOPPMQMG>635;=KUY`ddacjidceggceihfdgigfdghfdhjebdhda^dfb_a`\ZY]_WZ^^ZWZ\YVYYZZ[XVVVVYZXWXYYXWXYVVWVUTWXVVWVVUVWVVUWTPUXWSTVTSUVTSSTURQTTTQUVVVTVTRQSTROJJJINQR\q��������sc\VUUVVTW\agjlmkijouz~�����wxwvtwxwusyyimtw|~�������������{C20P��~d@%#$X����������VAP_mu~����{ynR599*b}M�Ѿ���v)7`}������is������������zy|~����{z������������������by����������̿���BC����������@3\q���~qZ9.6AJZeh@'Lc���xJ -FQOPOSr����yHVt�᫘��������������ǵ������������ͩ����ǡ��������Ƚ���������������������������jtmeefkpwsgOOm������ϼ�q}���������������q8!''%"".RmaC'@���������{x\3 -3')q��nfSQbaa_XWQOPPIHPVZ^ZXUVUQICFesmdca_`_YQPQGDJSZTRPTOGGE>3 -<Lalkpw������º�~6Fr�ƑKDPgZ^�ƿ�����������������}|��|z{}|}~njcde`Y</?IBB;4M_ikllow����ϗ\UOJLLRTUTTSWXaz����������������kcgda]WQOLP^x���������Ļ������������q2/.(%&'" "$!&9\ijprqolkgebb^ZVURKGE@<;;7:?A@Ku����Ŀ����N&6EFC?>CVx���������~uqx��������|mXSSVX^baekjikfcaa^ZYXVPKGCB@?=8129=?<97<G_������������������������������������������������������ÿ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������^����~\A??8.($	0BOUUSSTUSOK@9877ALS^c`_difaegcddehgdcfheegigffkhccjga`ebb_``]XY\Z[V[[[VUZZWVYXXYZ[YVXXXWWZ\\[XYVWWVVUUVWTTTUTVTVUTUUVSXXXVWZVSUUURSRUTRSSSSQWTROXVQRUTVSNIKJHLPT^x���������ueXUVVWXZ\`eikmmlorvz~����xwvwttuvunruw}����������������l70()VcDS_E+j���������TSuzrlrw���zueB2910dqx�Ƒ���r$#Im�������c�������������unppv����}y|��������������ǽ��]��л�}}����������*J���ZT`p���C =dw�����vbH33.3R`U5(Q}��lo)!0HPOPQTy����kIX}�ܡ���������������Ĭ������������ŝ����ò��������ƽ�ȳ���������������Ϳ�������gupb_\cpxsfGOr������η�u~���������������r2$'&%!!+Jm`C/B~��������|y]/!,1''g�ycdSUea`e`]SPTMIHLRXZZXWVTQJCIhwkaa[Z[SPNLLIJT]ZVRQPKGGEB7"1Nk����������ù�y3C���JFIORKd�Ƶ�����������������{}{vw|{|yjecfdcT8/?<6:84Pagilmmx����͑SSNHILPSTRTTTV^~�������������º�edfb`[XOMKO`z���������ź���������ù�o./-(&&%"  !(<^kkorqolkifdb^ZWUQKHD@>@A?CGGJY|���������F'8EFB@>FWw���������|snt������vsmgggggidefe`__\\[ZVUVUTRPIDA><4-19<=><88:;D_��������������������������������������������������������¾������������������������������������������������������������������������������������������������������������������������������������������������������������������������]�����W:<?<2+#
+*?PYWWWWUUSQJC<46:?KW[\cifcchhdacggfehhfeghgdcghedegd^bcb^\^\YVY[YWY\YVVUYWX[ZZZ[\\VUZZXYZ\ZXXZZXXYYWUUVVTSUVXXXWUSSTUTXYTTVVPQRUUUTQUTUVVRSTWTSTXURUWTUSMGHGIMQSa�����������uaZXVX[]\_bhknoprtw|����{wvuwvvtvxt{��������������������^/* 0Zej}x/ 8y��������wx����wv|��|xt_;49,/w���|e���s.Xw������xg������������~ssux}���|z���������������Ź�sb��ҵ���������wz���K����{t{���$&Amw������rcQG?;IYQ."A{��u?#3JNOQQUy��đQN[��ו���������������è|����������ӹ���������������ż�Ÿ���������������˷������~itqZa`huytfGTw������϶�v���������������o-"&%#!!+FocG.G����������{xZ*"/1%.n�lcYNT]\Zbe]UUSKGGIOSXWYYSOPG?Reqh]\VUUMMMNPPT[\YVQMIFHGGD9#$Q�������������ø�u8Mz�QBJHIKRd������������������{z}}zxwz{y{vldbdbaO64>07<:;Pckikmqy����ȊSQLIHKPQSTSRTYg��������������÷�gef__ZVQMMOaz���������ú���������õ�j*/.(&%$#&?^jkprpnlkigdb^ZXSOKHD@@DILQRSZg����������}D
+,<GGEBAGSu���������xqkn~���|ysnplgedcc`[\YTVUTTVUWWWSRRQPKGD?5-1<>?=>?=74;Z�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������[����~U59A>4-%#@R]ZZ[XWUWUOKA;768BNV^djccgheaafgedfjfbdhhddcggeehfb_a_\\^^XVX\\Z[\\YWY[XWY]WZY\[YVWZYVW\ZXWZXYXZ[XWVVXWVVWUUXWVSRRSSUVWUTSTRSXWTSWUUTVWTSWTUUVXVWTUUTTQLGGEJNOUl���z��������p_ZYWW[]^cfknoqsvy~�����vxvvvwwuwz~���������������������F((){���K%R����������������~z|�{vqV47;&V��LW����C9c~������kn������������|yttw~���~zx������������������fu��ɠ{������������_(����������o.Pr~�������zrh_X[jN+<���q@$;QNNPONZ~��SINb��ь�������������������������ǥ���������������º��ú��������������ȹ������unsaUjjqwxqa@Wy������δ�v~���������������r)!%%# !+Svh@*L����������|vW+"/2"2t�o[NLQWUYee`XUQLIHNRUUTTVQOOD?Rhrga\WWTRPRPSVZ[\XWRLHGIGFB5!%v����������������^.McjZJHHHY^j�����������������{xvwxzwtsusuxsjecb`\K.2>>@>9<Tbghjmou����ǆRRLGIMPPRTSSSZl��������������µ�`hgdaZUQMKN^z���������¹������������h(0.(&'$"&Dakmpspmmjhgba_[VRNJFCA@GMPV[\]f����������};#0BMMJIDJTo��������}qmkmruspkjib_]^\YWTTPPMPSTTSSSRSSTRQOPMID9.7ADCDDDD@73=Z�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������^����~X48@?7.% !:Q^\][ZXWXWSQIB<458@IX]aaceeb^bfb`ffhdaehhcdfgfdgieabb`[]b`YX^_\[Z\[ZYZXVZYZTXY[ZXZ[Z[Y\[[\ZZVXX[YWUUUSTVWWUUWUTUVVTUWXUTXUTVWWTTTVSSTUVUTVTTUVWTSUWWUURLHGHJLOXw�������������s`[YW[\_bfhmoruvy~�����vwwuvvvuw��Ʒ���|unt����������o8(%&P��tC-c�����������������y}��{ujH173"7q����m"#Io�������`�������������zzxx{����|v{��������������ɿ��_���Ƣ�������������@$|�������ɬK 4Ws��������~ysmikrD%7����nQ'?RNKKLKILLAFLRj������������������Ǽ�y����������������ʹ���������½������������������ɵ������nrucdlmrvyp[BZ{������α�x���������������j) %23;8$&$&/`d5)P����������}sS' -* 7y�lWNLMUW^eb^YUVRQPTYUTWUTRSQHARisic[W[XTUXXV^b]]YXSLHKKHH>/1�������������ȿ��<-MizsLELVqji�����������������zwtrsttusrnrtrjcded[C+/=A?<7<Qejjlmkt����ǀPSMFIOSSTTSST]l��������������³�chee`YUQNKQ\}�����������������������c-20)'&%!)Cajmqsqmmkjfba_ZURNIEBA>DINNPPNX����������}9#0DSYYUUZ`gr~�����{vskjhfggbZWPSTQOOPOPNOOMLNPQOOONONNONOLKKF<33CFFFGFEB>514Cy���������������������������������������������������������������ÿ�����������������������������������������������������������������������������������������������������������������������������������������������������������������\�����U/4<?:1(!5O[_^Z[ZXUXWVPIA:56:?KT[aegc__feacggdbhiiffeeffffe_bfc`^`aZWZ_\[[\[[[ZYUV[ZZX[\\YXXZWXYYXUYYYTVYYVVUVTSUXXUZ[XTTWVTUWYVTWXUTUSTQQQNRSSRTVUWVUVXVTSTUURTPIFFGIMR]��������������j]ZZ\^`befkoqtwz~����}yvwvvvvuqr����xttyzxv���������^/*#+k}}�|9 ;}�����������������x�{ua<27+"M��µ��J-Xy������x`�������������zy{������}v~��������������ǻ�wg��ջ������������ʢ"=�����ĥ���0#?_u��������tsqkkrg=&Pw`g~�J*DQMKJKKHHJJLOUr�諛��������������Ŵ}z����������������ɹ��������ȿ��н���������������ħ������orrUSefmvvlZB^y������ί�y����������������h% ,Jqt}\#$&# -H|�a7'Q��������}vP$ .* 8��cLNHFR[[`\YUPZ[YVWRPSUUTRSQKDRlsjc`_`\\_`abb`^[[XQGFOKKH8)8�������������Ĵ�X./Q���jPSu�kx�����������������xvttqquusrortqjddfcU>)*>C;88>Ufhijmqz�����xPRLHHOUQSQSSS_m�����������������gifb`YVTQMRa����������ļ������������^(30)&'&!)E_jmttqlljgdba^[WQLIFC?=?@?=ACCT�����ý���s?1.($&+4J^cefehlqruxz{xxpjcd`\UVPVMPQLOMMLLIJMLLKKJKLKKJIIJHHGHHGEE@79AEEEBA>;983-+2f����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������W~����X-+4;91*#1KY__]\^YWXXYUPHB8358AKUbhfbaefaaefecflieeddecdee`]eea__a^UX^_[\\^[[YYWWYZXXY[ZZXXWWVWYUX[ZXYUY[WUWVVTUWXVVXXVUUVUSSVVTSVXWTXZVUSTRTVVSUVTSSWWVUSTTUUUUMGFFEJNQf���������������}e]]]`ccdehlquy}����wxwwvvvvurgiy~������}}���������L)+(Ohit'%R�����������������ou��}xrT368%(��ig��qC<g�������gf������������|y|������zx���������������ö�jv��з�������������~P���}qc[w�t'Iiy�������moomnu[2*V]Jl��(.KQOMLKIFGIJKMU��⟫��������������ë~�����������ʲ��������������ż�������������������§������fso[aooswtkSA`�������˭�v����������������e&G��o2%%"Al��W8+Q��������|uL""-, 8}�SFLFDJPU^[XNLW^]YTKLPSUTPONICSnskhhffbhdedd`\ZZXUIHLNKJA3%C������������Ʋ�bD-5W���zv}��W�����������������wvttsqrqpttprrqhfeidZ>+77.+29AUfillmq|���ݾrPRLHHPTRTRSRQ_p�������������Ŀ��`jgc_YUQMMRc������������������������T(2.*()*%!+Ibilstolkjgca`^[UQMIEA><<:89@EI_���¾�����oUMF=70,+058CKUcijlmmopnnnnmjbba_]]XVVUPWWTQURRRQMNNLLKJJIJHE@??@>?@@BBBAA>=@BBB@;:51/-+('.c����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������[}����Y*%,572.&+FW^_^_\Z\YVWXUOG>6228AN]da^deb]aeeddijdcffeddeffbdf`abcb_[^]][\]YYY[YWY\YVSXXWVXVTWY[ZYZ]ZXYZZVUUXXTUVUUUUXVRWVUUWXXWWWVVWWXVTUTTTWXVTUVTSUWTUVSSSUUSPIHEFGLQSl����������������ra`_`babehlqvz~�����xuuusrvwwxukar�����������������o=1,b����U-j����������������vp�~zunG2:5 9�n#\���-!Ly�������`}������������}{~������wx��������������ɿ��_���ɧ�����������οW)Oh0-Qox�������xnpootuS)/WYK��H2MQOLJHFFFGHINZ��ؐ�����������������������������Ȫ��������������ƻ�ˮ���������������̾�������hunfjmmpvsjLFd�������̪~v����������������_  1<QT>,&%" Fu|lL32U��������|~zpI('/+ ;vNGHBAFIOZ^\YSY]]ZOIKPPTSKLMIFQejgdfdb_c`a[WWTRMIH@GHDGD<1&K����������;�z]L?.6Tzy����XJ}���������~��~xoqtusppqonnnlmnngfiif[=/10+&/9BUehjimq}���ݸpLPLGJQSSSSTTU]p��������������¯�^jheaYSONLSf����������ʿ��������ȿ��T&1-)+299+!0Kgijrunlkihea^^YSPLIFA>>=<AEJNXn����������sga\YSNORRZ_bhilmnnlkjjeccaZ__^]]^\Y[\\\\YYXWWYYUTVTRQOOPMKC>7546:;??A@??A@A@A?@>962.)%"!'R����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������[}����Y)$',11/*!%>SZ\\a_][ZWWXXXPF=635;BOY_bdc_^bfedfigefgfdehfdcbee\_cb`]\]ZXY[]ZWY[YX[[ZWTZZYZYZYZ[]WSXYYVV\YVUUWUUXVSQUXXVUYYWUWYWVVWVSUVUSRUVTTVXUUVWUTVWUTUTUUWVSOIIFHJLPZ|����������������nebbbbbeimruy}����xwvuusuvvw|�wlez���������������e7;,1@KV@E���������r|{|��zw|��}yse<0<-/T4A���b+^�������w\�������������~}{{�����}y~��������������ǻ�~b���Ŝ�����������Ϯ21aiWUPD7+&5Zo|�������vnomouqG$.Poo�sJ!8KMKJJIEFFGJJQa��ΐ��������������ǿ������������Լ�����Ű��������ż�Ƶ���������������˸�������ktlc^_cntofEHg�������Ȫ|v����������������] +PQD+&&# 5ME`VL6T�������}|ykH%&-, ;x|II@>CIHMXa_\UV]a\PJINLRSMOOJIN\b[YUUVSMRUQNLKGEFHFHBBC@:5+O���������ɲ�h]YN>-5Pggr���oYX}��������}{~ukbiqtrppqoljgbdgheelkf]:/48/,48BXikmnmo���ܴkQQLILPTTUTTUUZo����������������]jiec[TNLNTg����������ɾ��������Ⱦ��Q%1,(4Ldj?#/Pgiksunmjhfcb]\XTPMJFA?@CHPYdjoz���������wqkhkdilphlqmollmkijgefda``\_ac__``\\\]^\\[ZWYZXSUWTRRTQPNI>82/0456<=@<===?B@A?>><961,'#  +t���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������X{����[-$$%)-,*"!8OW[]]][XYYWWWWTNF<8579BNX`c_^cegffgiedffdbegda^^c`\`b_]\^ZW[[`[Z[[\Z\\\XWZWXXYYWWU[XTVXYXXX\VUVWWTXZXTW[YUUXXUUUVTUUVUSVWUVRSUUVXWVVWWWVWWWVTUWUVXVRLEFFGJMQ]������������������odcbceikntvx}����}wvvtututqy���}mkn��������������V46!"S���������~qsmllmlhnz}{xnU235$!H��I>m�������ee������������|||}�����|u���������������ö�nl���¢�����������̕9���������z1">_q�|v���zpnoprpf=#@�����T%?OMKJKHAEFGJLPe�ۼ���������������Ż������������Ϩ����ʵ���������º������������������ȱ������xlvod_^amslgBIi�������Ǫxt����������������Y!$C`[B$&#'9Y{yO+Z�������}~~wjF"$.,9xxPRMIOPJMW]^YNT]`YOMMLIMQLLLIQT^dVQOQOKKOOMMLHHHHJMIC@@=<;+K�������̵�na^\WM<'5Xp����xhkd���������|{{�~qcbjsvrnppoljfb_cdaeihbT6*394;;:DXgllklp{���ۯdPOLJLNTUUUTUW]o�������������þ�{^hfb`\UONMOj����������Ⱦ��������ǽ��J)/*+Cg�5 #3Sfjlssmmkhgcc`ZXSOMJGC@AIUbknsv|���������xspnmpmnnlmnjiihijggfdcbcb``b`a__^_^][\[[[[XWXYVUUVUSQQPMKD>8511210267579<=@??<=::;964-*%#$$F���������������������������������������������������������������������������¿����������������������������������������������������������������������������������������������������������������������������������������������������S~����^2#""#%'&",GTV[[\[YZZUUUWZUPF=637:DQ[^^`ccaaggfdfge`bfecaa`c]]aa_^a]YZ]^\WY[ZWXZ\YTXXXWXZXUVY[XW[[YVY[YWVWUXXXVWSXXXVUWVTTUTSSWYUVWZXUWYYVWUTTUWWVUXWVXUWXVUWUQKEEEHKMRg�������������������|mebdhlnqsvy}����xuvstuuutp}�����yljy�����������uH2.(d�������������|wpgit~~{vlG-97  %S|�������^z������������|zzx}����~|u��������������ɿ��d���Ӳ}�����������u!V����������'&Ggv��qagpunlnnrpp_4!:fv���=,HRMKGFDBEHFHLUx�㰖��������������ķ������������Ơ�����������������������������������Ȯ������tpsgVVR_lvnb@Rp�������ťyz����������������W! =fmY0#&" #7\iaF.a�������}~wkH!%.*:w~d[XY]XOQZXYYQV``ZRRQOLNNH?GOWYcaPNUVPILOSVWSJHGGJSMF@@@=8'#W�����Ϲ�ub^]]ZUK<-:c����]Ypmb��������~x{��{okqzvootsomjgcbec`bgh`R9.9>AF=:G[gijlmq����ڮgORNLONPTUUTTXbt�������������Ŀ�yZhcaa[TONMTm����������Ǿ��������ƽ��B,/,0Lu�t, $3Rejntrmkihgec_[WSPLKHDCBP_elssu|���������zusrnljggecefeeefegeeecdabb`````_]\\[\YZXYVUVSTUPTSRQRPMOKEA?;6133/)&'),+166:899::8799941.*+,@x����������������������������������������������������������������������������ÿ��������������������������������������������������������������������������������������������������������������������������������������������������Px����Y0#!  !$%$'BQZV[[\[YVSPQTYZWLE:345;DPU\abbcdgffggeaabddcbca`^`a`^^_\Y\]\WUYXWVXYZWYYWVX\YZVY[YYZ[USUZXUVVWTXXVTVVXWWWXXUVXXWUYYXUWYYVUXWUTTTVVVWVVTXWTWVYVWVUSPJECDFGKSp�����������bf�������zjedhloquxz~����utuvustsrogdx�����uh�����������l62,'E15u��������s]����������}yrf=.>/29"/`�������xV�������������zyz~�����|wu��������������ƺ�|Z���Ͱ�����������κQ.EQu�������-Oky�~lZcffhjmloolS/4oh. /DLMJGEDDEEDFKV��㤥��������������ı�����������ñ���������������ƽ��ѽ���������������Ʒ������lro`X_[cmun\8Or�������ġv{����������������U.I_N+%'" #0EtiN4h��������~wjC'2, A~�bRU^bSOWVNRSRWb`TLKOSONJ@?LWXY^YMOUUOIJO]`^QGHHHLSNFD?;82%T���Ͷ�ud\\]XZYWSC/;e�PDMa`W��������vvy����urvuttuxqlkhcagga_adg^P7043786:I[chikpu����ب]NQPNPQSVTSSSV`v�������������Ľ�t]jfa`\UOLKTn����������ƽ��������ƽ��>,0-/Km�[%!%3Sgilssoliheeb^[WRPMJHGGKWdhmquvz{��~}zyxurpkf`dcbb``^[_abb``bbab_^ba]]_^ZYXVVTUSSSSRRRSRNTTUUTUQQJC?=;85682(!!##%*/13569864787499745B}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������R�����[/$""!!"!!";MWTYX]ZZURKJOY[XSMD73369EQ\`aagffdffebbcdcbbdc`]bca^^^^\Z^ZXXZZYYZZYXYZWUXZ[YWX[YVVZZVRY[VVWWXWXXXUVUVVVXYXXYZWUWXVVWYWXTXWTTVVWXVYWVXWVWXZYXTWYSSPHBBDGKL[|�����������m���������xjdgjoqvy|����}tsuxusttqnlhek�����zv����������]35$,[t^1K����������~����������{wsV65>(7s�_M<' =r�������g_������������{yy|�����~zt���������������·�ii���ɴ�����������ϫ1N{�������J7Xm}��g\[Z^bd`ehnjK&+R80FKKFEGDDEFEFP^��ۛ���������������äm�����������������̱��������Ÿ�˽������������������������kspfcd]aprmY;Tt�������ġw{����������������R!@cpV%&(" &?t�i:3i���������~xeE$8C9& 'K��ZGIMHLS[NLNMNTZWGCGPUSNJJOUVTUTQNQTQNLMUbfcQILIGMKHBD?:60$!R���{ldeb]__ZZ[]aL/;f��KDJNY^`�������{rrx���zors|�yturjebbaed]\\baXN;-+)*09<J`jkjjmv����֥`PONORRRTRPRRTay�������������ü�laida^ZUPLLRp����������ż��������Ż��8+1,*Dld8&4Vllmsspnkidba`ZXSQMLLKOXeoqrvvuvssrqoklnppnmmkgebb^\\ZZ\[\`^]_`]_]\\]ZZXXYVSPPNOKKJMKONUXY\_cdfd]OC?<::94484+#!$#"(+02131135699=?AAEHw���������������������������������������������������������������������������������¾���������������������������������������������������������������������������������������������������������������������������������������������V�����^/$"!"! 5KWSR[\Y[XPHGMW]\WSK>8325<GQWY^ddcadebaeea^`bd][aea^][]]^]]XYY[[VVZZWWYYWWWVXYXZZYUXZYXX\ZZZYXXYYWTTUWVTWXXWWWXWUWWVWYZVXWYXVWYWVWXYVWZYXX_]ZZX[XWTNFDEEFIMf�����������������������phgiprw{}����wstvvssttsstvrj����������������|I,.8|�Z$'_���������}z~}y�����{xqH288",������xifK"Jy�������[u������������zzx|�����}xx��������������Ⱦ��Vy����������������ɔ!+q�ľ������)"<`r~�y`PC=JLLNUbmhE ## 7QLHCCECCGIHJQg��Ԑ���������������h�����������������Ȱ��������Ĺ�ȧ������ý��������Ⱦ������lvtffdaivulPA[v�������ĝw}����������������L%MuwU%&! 'EvpK)3j��������~|vfF3[qY<0,-/2P�aZRDITTQHFIJKSRI@FKPRQRWTVWSMKNTOUURQONT]`\QJIGCEGDBB>:5-$>^QDO]bbb`aigcbmkK-?g��cb_eoma�������slpy��sahpu�~lptj^\\`X^_YWX`\VQ:)-/1<@<L_gkklpt����Ԣ\RQMPQRSRSTSUW_w�������������¼�lbhecaYROMNVn����������ż��������Ĺ��6.0*)4B6%$8Vkkmvtnkjheb__ZWTQPOR\hnuwwwwuonmhec]^`hhnmmmmjhfd`^\[[\[\^^\^ZZYZZXTUURRRRMLJGGE@BDIRX`fiostspePA>;87::23740+--'$'-0132458?BACHN^dap�����������������������������������������������������������������������������������¿�������������������������������������������������������������������������������������������������������������������������������������������T�����]1#$"!   .FUWVXVY[YRJDHSZ[ZVSK=6215<CNU]bddefeddgf`_aec_bed^[]\Z]_\[VX[\ZXWYYXWXZYZXYX\\YXWYZXVYZYZ[[ZUXYWVSVWWSTXXWWVUYYZ[ZWYZZYY[XXXXZUZXYZZZ[ZXZ]\Z[\[XXWJBBBDHJKl������������������������ljmpswz����vvuvutttvvwyzyw��q}������������p4%$=L:6q��������~\glo_n�����}yvb;19/7���������70Y�������uY�������������w{z�����{w~��������������Ż�T������������������lI����������*%Dhw��t]J:9D@@@Meme?&&'#"!#9THEBCBDFJJJJQx������������������ĺ������������к���������������ü�Į���������������̿�������kuujlmnvxsgO@^y���������p}����������������M#RtnS"&%  !(M{zN*<k��������~}vhBT��x[E=:71Q|wc_WNXRNGBBBFKMTGCFLMGNVZWURLEK[ZUVWTQQQU\\XNFFGCDDEFDC>51%/65<P\_bbmz|tmosmJ,?m�������|Z�������nlpv{z]R\hvtgfum]X[]\T`[SNXb\WP:,8==@=?Majkklnv����ӝ[POOSSTUTUUTSTcz�������������º�pbhec_XSNMNXq����������Ļ��������ĸ�{0.0+'*,'"   '@_ijmrnjffdb`]\VVXV[]foxxxwvvuuqnliid^^]bekjlmljjhdcba`__^]][W[XYWXWTSQPNLJJHGEB>937AM\glrsvyvumYE;:996574643114/*$!"%+15689;=DEDDFPc~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Q�����_4&##!!! (@OWVQRVVXUMCELV\]ZYSF<5005:CP[^cdffb`deb_`a_[[_b_YZ^^\\^\XY[[[[[Y[YZZYYZ[[ZX\\ZWXZXXX\ZXWYYZV[\YVVWXWWZYTU[[WWYZYXZYYXYZYWZ[Z[XY[Z\\[[[Z\\[[[][WYQEDC@DGMTn������������������������|pqpry|~����svttuvttuwz�������±���������`'%!.GM8 E����������uzoq������~xrV.4;*!(+^����s9g�������jg������������{yxy�����}{s���������������ø�kZ����������������̶FW���������� *Lmx��rZJBEDC?<Vfq_7#AVTSPNM 'BOIEBBBDDFJJL[��簙��������������²������������ϱ�����½�����������˽���������������ɺ������jxocfjiouteMC^~���������m}���������������G&XojP&!*'#  )LxyN*9m�������}wdEY��jOB:0(!Mpo]\TVWNKFCACFHMRFBEJJJSYXUKJEIQ_]XWXVUTUUXYRIDGHEBAGIF?>83$,54=PY_dl~��~xsoeF0Es�������eZ�������}vuuvnQJThocWis^USUWXW]OFI^dZTK2))().38Mbkkmmnx����ΘSPQRTUXWUVUTSXcy�������������»�jdieb_XUOMPVq����������ú��������÷�w213+%)($"!"!"-9Nhhhknjeeeaa`^]^abekmrrtuutttsuutqmlid`_`bddeghiihebc`cabb^_][\\YZYUSOLJEBCDCB@;4,,3CR`kquxxyvrhVF=:95.,//0-,//1/-***6:=>>ABFDDECD?Ko������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������O|����b1&"#!!! !"8MY[WTTUWXOE?EP[__[WNE92/15;DQVaffb_bec__``\^cd`]]^[]]^_]YZ]ZXY[[XXYZXT\ZXUX[XYW[YYY\ZZYY\ZYY]\YWWWVVW[YXZ\ZXX\ZYZ\\Y[]\ZY[]]\\]\[[\\X^[ZY[^]]][YQB@??BGLR^s�������������o~���������zursz}����}uttutttusq�����½��������������R&*7x�u9&Z���������}���}�������|ujH,78"5^lPe�P!Ix�������^p������������zwwx����|{z��������������ɿ��Zi����������������̥/A������Ž�=2Wnz�~k[PIEFFBIammT0#S������u+CKFB@BBFGHHLTv��ᡡ��������������«|�����������Ƣ�����������������������������������Ǹ������}kyk[]^alrtbJ?a�������Ѿ�l}���������������{DAciT!('# 'KvrH,9v�����{��~tb=4JK>0& !RvsY[YY[VURJGIFAQICBFKMMRYUQJHILVe`]WZ^[\WUTSQNMIGDCFIIA?<61"*25DOYchy����}vl[=0Es��kacllf����������~zjFB^ote`jbMLURXVRLFEUb_[SL-!+9Obikknpw����ϒUSTVZ]cb]ZVSTZg}�������������ú�khjea]XRNMPYq����������¹��������ö�r.04,%('%! !"#%%$&(+;M^ksqqrrnkihhgfhefklopsrrqsrqpqqusrsqplkieb`b]\_adeba`_]ab`_^a`\[[WWVUOJA=876>B@@:0'*4ATcmsvy{xtocTE?;6/'""###%*+(*,-/;HF??>@CDABBDB;?Uv�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������O~����_3(#$$$!0IY^YUSRPWPH>?KV^^]ZSIA50.15:GT_ec^`dd`abaa]ba_[]_`]]\^]ZY]\WX[YXTUWWWPUUUXWXWWXYWXY[[YWZ]ZZ\ZZWWYYYXZZX[[][YZZZY\][Z]`\Y[]\Z[\[]\ZW\Y][Z[\]^]]]XK<?@@AFKNLXr������������������������}uvz~����yvssttsrts������~rgg{���������w='(T~}T1o�����������nKq������~yte@/915R;P�����)T�������zQ�������������{yy{}���{t|��������������Ƽ�F}����������������Ǉ0������Ǿ��= ;_nx�zhXMGEIFFQ_ofM,&>OKh���R4HLFB?CDFFGJO_���ۓ���������������§y����������ҹ�����ʽ��������Ľ��Ͻ���������������ƶ������wnxm]]]\ajn^HJd�������л�p|������}��������|B"TonJ!'%!"$Ljl?5:t�������{ra3"-7+  U�sZYZ]\^ddXTOQIMHECEHJLSVSOOMLK[g_YY[`][WVRQONLIFBFGHFAC?7.(15COcty����|uh\8-FjwNFJYm�uo�����������{hLVn~{da`SDJNLPOG@?ESZ]ZVJ-'9Qailjlp{����͍UTWZaiq{l^XUSZg����������������dfjdc]WRNMPYo�����������������������n,/2+%''#""$$'**114=JRf{�|{zxvtspmlmlnlnppoopomlnppppqrqprqspnljedc_][]\ZZZYWVWYXXWWXUUUVWXVOC84234>A??8.',6DXdlrvx{vtnbP@?90' "#$&&3DHG@:69=??CEA>47@r�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������T}����b3(&$%%%$# &<V]^VQQPTPKE=ER``]ZXTM?60005>LW\]^acb^ce`^]ba_Z^a_YV\\[XZ[ZVYYTRRQRNKMMNOTTTUTXXVWXXYXY\ZZ[][\\Z[[[Z[\[[\[[]\[X\]Z[\][Y[]]][^\]^^^\\^]Z\\_\][^]TF>BA@CEIIFKYm������������������������uvz}����xtttsssqss��|ledkmsz�����������d1&"&]�}RB�����������ZGq�������|wqZ61;)F���z��ƒ'7h�������oY�������������~|~�����}yx���������������ĸ�mP����������������˿d B����������4#Ehs~�|dQE?A>=?LeoeG$0���P6HJEDC@AFDFJNv���ӌ��������������ž�{����������˭�����Ǵ��������Ĺ�ϻ����������������ð������tuy`K`\PSek\=Ng�������ӻ�n}��������������|>'Jgq: &#!#&Db[IC=u���������~s]2.:-'a�pQRY\aeif_ZWUSMPPJJKGJPROLNLLNU`[XZZZ[ZUUUTNIJIE@CB@CEE<5-'/5E]y��{y{{upql\7,FY[X_p���cg�����������{iMe��}f_YK@B@EMIA=>?LS]^ZE)(=Pbknkmrz����ƇUUZcoq�w\YVV[f�����������������chkgd^WPMLOZt�����������������������i-22*&'%"!!%),-16:@CKT_hqx��}~{xvusqnpplllnmmnnmlllllkklnpoppppoqnkjhfc`ZYXUTPLKJKMPPQSTTTWXZ[_VB85336>AA>81-0:HXbkostvvztfN@?5+
+	
+#*8FIIB:558?@FB>956Cv�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������M{����b6)%%%%,,*$!1P[_ZUOPRSNG>@N[^^[[VPH?83/27AIOW^``^\ac__``^Y\``[XY\]Z[YYYZZURSRPOKHIFEINPRRQSRSTTUUWXZZY\\[\`\[[ZX[YZ[[Z\__\W[\ZZ^]Z]]]\[\_][[]_]\`^ZX\_[[]]YN=<>?ADFGCBFLWk}���������������������~uw{~���~vuuwvssrqoppouvwy~|~�����������V&(4q}m=%V���������\]��|�����~zviI.49%%�Ż����RDq�������Ze��������������������|xr������������������]f����������������ȯA N����Ľ��ǌ+Mkx��wWHCGHDEAPin_;"'Di��i9$=RKECCBBDEIIc��������������������Ź�x�����������������´��������ù�Ȥ����������������ķ������ltsaewg[]ciX:Pj�������ϵ�hz���������������{=&HiY#"&%"  %3Png:<v���������~q[/*3'*j�uY\^bghhdb\\[[Z\]XRKGDHGHHFEINXa[]]YWYYWYVTMHJEC@<;=@EA;2*&/5Hgusmjkjg]YejX5+GWdt����^Ef��������~nzygPfy�s]^XI:;;BME85:BKU]\[B%)>Qcjmjlpx����ąWUZekg��y[XPUWg�����������������efieb]WPNNOYt���������ɾ��������ǿ��c*12(#%%%$&+049=EINVQ[bhksyz~||{z||zvsppoookijiklkkkiijihhhgillmomllkjiidb^^XUOJD>=;>BDJKNQSUWZZ[\[TB85437?BDDA:6:BQ[diklot|�~qO>>;1#	!*;BCD?:834>?A>760.Cw�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������M����a7'''&'/20(",L]b]XRNKLRPC@HU\^^\XVOF>71036>JU]``acb_``c_][`b]Y[^][Za\WX\WRRTSSPLHCCCIKNSTRQRQNPSSWUUVWXXXYZYY[]\\[]]Z[^_^\\\][]_^]^_^ZZ][^X\_a^\^[Y\Z]]]^\WJ?:=?BDEFEBEHQXi��������������������uvy{����wvuttvtsurkqwzyyz{}}~����������z@%)Iig[.1q������������{������~yxd@,83*OJc����"%Qv������}O}���������������������{yz��������������Ž��R|����������������Ȝ#&n�ȸ����εO3Wp{��tWJHNMIEGWfl[7"*\�����z(%AQGDCCCCDDJN}���鯐��������������³�o�����������������ɿ�������ǿ��±����������������¯������ovtjgf[]agkS=Nj�������д�k}���������������u8 59$""&&!"(Aw|P6B|���������|pU+),%(j�wgiglikb_ba^]]`]]\XSLHGBGGEHNQW^`ab_^\XUVZOGGIDEB=>?CA;:5*(/6Leh`VTXSKQP\cZ3,Gd����vF:Ko�������xlm{rV\��b[b\J@;5:IF848;N[_[P7!*=Thlmkms}�����~PX\dgk��qXWTW\i�����������������`iigd[SPOLN\w���������Ǿ��������Ⱦ��],13,&%&)*4FY^bhnssotttwyz{~}|}|}{xxwvsmmpmoonnonomkonopoomjihiggggfeedc_a^\RMIB<92212:ACGJNPSVYYZ[\YPC:89:AIKNPNNOT[`bdddhox���sL??>7*	'49?@=:8319?=:7/)+8t������������������������������������������������������������������������������������������������������¿���������������������������������������������������������������������������������������������������������������������������N|����g=''$&*/45.("&>Ua`]YURNQOHDFP[aa^]ZTME=3-/38BPWY`cb_]``_^\^a_Y[_^[Z^^\Z[[TNNQPQQOKHGGIMPRRRPQRQQSVURRRSTVUVXWWY[[[[\]\a`]YZ\\[]^[]__\]][][_\^_^^^^]\\_^`_^\TH>9>ABDDEEEDEHJUg~�������������������svxy����utsrttuswtsvxx}|{{{|}����������k3(#"Ykv\;������������tat������|xtX6.9- Fnp��^3_~������iR��������������������}zx���������������ø�iB����ʨ����������Ļx'��`����Ç0 :]qz��nRIGB<5:EYjgO2)]������f(FNDACCCCDEOh����⠟����������������~�����������°��������������ż��ɻ���������������˿�������nxvfZGWelqhP?To�������ͳ�t{��������������q5&KV;"&%  !)HsgOAA|��������~yoT) +.# ,p�vmljlheX]`a[\\Z[][ZXTNKLRNLOSV]bcfkgc`ZUTSJGGFGGD??@CB?<6)'/7JX^^[SRPPOM^bQ1/K{���uVNQYx������|qq~��~b_{fNU[^M:74ATF;56AT\aYK1 +>Rgjlkor���޿zTV\gjr��b]TVW^j�������������þ��_jjfa[TONKO^w���������Ƚ��������Ƽ��]-56/--,/2Fhuwvzxw|{{zyy{}}}{zyyyuvwsqonnorrtuvuuvvxuuuwspqmjigfa`\WUWRNKCB>62/*,,)+.28=?BFILOPTWYY\ZQGBCEJPTXY^\]]_bcaa_`do~���wVA?=:3))27:;<6228;:63.)*1I�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������N}����e?+(&'),132,&!5M]b`_YVROLLHEOZ`__^]YPKD931029CMT\b`]^bc]]]ab\X_`\]\^_]^^[SMNPQQRRPNMNMQRQQRPSTSSUTRTRTTUVTTUUVUUVXYXZ]\[ZZ\\\Y^_\\^a___]]\]^`]^\^ZZ_^]]^^]ZSB:9=AACCCAGEHKMMYi�������������������vxxz����wtsttx{zzwv{|}~{|yz|~����������V*) 8syjE'V�����������]=i�������|wkM.27&9�и���!Al�������[e���������������������zwv������������������KV���ؤgd^b{����Ĺ��S@����������@@cs~��lYPE:68EMaiaK-$K~��|�n/,JKEA@ACEFHU~����ߑ�������������������à��������ů�����¾�������û�����¿������������ʽ������|ozydTZfjrrcKBWt�������˴�p}������}��������v4,s�oB%#%%! (GpsZ;C���������~xlS&*.& *u�pjlhdc_T\`a\\Z]ZX\ZYSQROLKLORWchcfljhd^XRNJDCFHHEAACCC?<6((06GS^hhc^bdbb]XF)0Me��{qlijjz������rpy}|��lkmLGS`aD941DQF=7>HX]bYK0"+BVckmnnt~���ݼsRW_jq���mfVVY\o�������������¿��akkfaWUQNMP]v���������ǻ��������·��[DE>9?><=Kdvx{y{||}{zzyy}|z||ywvvtuusxtvvxyyy{zxxxxxvxtsrpnmjhb[PHGB<550/,*+('('''&')-68<ACFLPTXXZ[]]XUW\[Y[\`ba`bbb`^__]\cp���xdB;8741,(#*03785225673/+((2Gh����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ku����iF3(&&&(-22/(& .F]bbc^ZUPMLHCJV]b^\]ZVSL@93.-18ALY_a^`ac_```_]b`_X__`^_a_WKGHJORRTSRRPNNONNROQRRTQQUSRUTSTTUUUUTTQPSTTUUVXYY[Z\]]\`_`\__^_\\]^\_]\Y]_\\\^_]XQ;589;?ABDBEEDIHBGXr�����������������{uux|���~xutu~���~}�����~���������}G%*Jcnl3.q���������{`V��x������{rbB+3/4������b&!Nt������|Jx��������������������~xtx��������������ƽ��=s���ѯ��la������j��;E����ս����#(Ijv��hHBA@>FMRbd\E+"U��eNV(!:NKDCABCHKRm�����τ��������������ǻ������������չ������ø�������º��ͼ���������������ȷ������wqzkQceekumaGA[v�������ʱ�r}������~��������p2"\xD=0%"'#".\trX<A���������wmR&).'.{�medaaaa[]_a`_ZYYXZWZPKMLNMNQSXed]bd___XTNLIEDGKHJD@A?A@<4').4?Mamoqtwxrj]J5)2GWPUaiqplcz�����}nswvt��rm_9F\hP@;77JM=6<HTY[^[L2"-?Xelpusu���ٶtTYcw�����}OQX`u�������������¿��ckifbYTQOMP\u���������Ķ������ý���zd\YWV[VZdpzz{||~|{zzxvvvwuwxvwuvvwwxxyy|{{zzzywwvtsvsumgca\VPJD=3-(&$%%%'%##$$$$$&'((,259@DINTWY\]^_`aa``^][[]]_^_\]\_]\ZW_kx���~e:6787772( ',.-./,244.,&#-Ec�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Qw����oP=.)(%%'.1/,(#%@[`edc_[UOLGCEN[ba``\ZWRK?62.14:GOW^_dd`^_aa\_a]]Za_][_`[THABDHMPUSRRSMJCAHIKMMOOOPQSTTRTRPRTSUTUSQRRSTSRUTUWVXVWZZXYZ[\[\[[^[]]^_a``^^]]][YK<89<=??BCCCDDFFCJQZt����������������ttvx|���zvtttpw}~}}��������������������n:))Tx�Z=����������y~���������}zp\:.2,ExWMtb!*St������cD���������������������}xw~������ż������¹�o?�����ù�{r�����pl��%!i����c~��x/Uox��~^RQRPNOS^fi[@,=���~n[%#BNFCACCEGJY��������������������Ŷ������������Ϩ����Ҿ������������ѻ����������������ɷ������sqvhkwkckul_AH^z�������ˬ|s|��������������k+&C8&%,$#'"$0_yrU;I���������wkN%,0'4z�sba`geg_X_bb_`[XTRRSJFJNRRPPSXgbUX\\\ZSOKKHGGMSUME@@@@>:5$ *,3?IWgp}���wjUB1(0FPQPRMQ[XMv�����|yrjw|�}olc[^qWA<?CEQN67G][\\[YK5"0D\fpy�{t����۵kP[o������xUUXav�������������ÿ��ajhebZSPNKMXn��������ɼ�����������xtqmmurqrvz{|{{{{zyzyyvtvvvwyyyyww||}�}{z{{xwvtvrojda]XSNB83.(%"!    #$#!"#$$$$&()+/7;CMX\][]`bbfgghgfb`\\][XZXY[Z^[YWUXdpz��|Y.1579954'
+
+"!!#%'--,)&"!)Y�������������������������������������������������������������������������������������������������������������¿���������������������������������������������������������������������������������������������������������������������Mv����nWD2)'%&%(+-,,'9R_eddb`]TMJ@>HU`bba^]WTRH<83/03;DPX]aa```_[]^\^`ab]]]_a]QB@?ADLRVTTOD6.4BKNMNOOMNNNKLORTTPTSUVTSSRSSRPTTUTSTTUUUUUVVSVWVVXZZW[\]^\[[ZZ\Y[ZTECCB@??=@@ACDDEEBFJK^}���������������ptwz����vtrruy~}yuvu|�����������������^00*9l~uO!R���������{���}�����}zumO013'MtvmOa�e.[{������X^���������������������|xv��������������Ⱦ��\R����յ�{Xr��������q"f���������_#9bpy��t[]^`i^\bdgdT<%Du����a$.;JJDBBDCDHNm�����鬐����������������������������Ɲ����η��������ƿ��ǥ����������������ë������ntvkiia^qrm`=L_}�������̩zp~�����|��������g-%Cec32!#'$ #4gnH5O���������}xlJ%+.# 7~�jccggfe\YYZZVVVRLKPOJHNPSRPOS[a]TY\\\XTPKMIJSX[XKCCBDD@;4$)-4<CL]r����yeSC5*2FORMJGD@EEv�����|js��}ywwqq_ENQMNUWL@=IW^YV\VN5 #2E\hw��|t����ڲfQ]r������oVVX_x����������������~dihf`YTQPLNUc������������vxwy{{z{zwwxuvwuvxzzy{zyxwwwz|yz}|~~~}}~�~}~}{zyurpjd^]WMIE?:4/'# "!"""##"##%')-3<K\bhgc`]_cdfffeffghc__ZZXUUTVXXWWTSYemuzvH+0224331&	
+###"#! $)Jy������������������������������������������������������������������������������������������������������������¿��������������������������������������������������������������������������������������������������������������������Ps����pYE6+'%&'&'*,,(!1O^cdca`^[UPE?ANZdcaa^ZYVPH@:0003:FRY^_`bc_]^a^__b_\^a^]XJ?<>=CKRVUQF7),<KQTSTUTSOMKHHKMRRQTRSRRQRRQMJLQSSQOPOPRUUUUWUWWWUTUUTWWYZXVWVWXVXXVSPNKHB;9<;<=@BGEGGKPQj��������������yttw|���~vtssv~��|st{����{|������������}I/:!=\qn<(g���������igkiaq�����|xriD*46#F����Õ!=d������xDt��������������������~{vr��������������ƾ��Lv���ۮcgfd}�����w��M6��������2%Cgs~��m\YZbh]]`dkhO5$B�����A%5>GPGBACCGIV������眠�������������Ž������������²���������������Ľ��������������������´������lxyhZ^bjurpY?N`��������ɪxu���������������d)*q�>.#&$  &2d�b4+O���������~vjH$ -0# 8��khegca]VSSPOOPJ==AHMLOQRXXPKO[a\XXWXYWUPJIKPUZZSIFIJJMF@4%*-39?GUl����weWG8'4ITQMHECBADl������mk���������fNWpeUSPPPHEFUb\Z\XK3"0E[m|��{����ثgS[s������jWUYfz����������������viljgb[VTSSR[k|����������~}}~|z{{{{{yxvxvwwwwxyzzz{{}~������������~�~xtpqpg]ZXM@83,%$&&&('# !!"!""!""%*.5E]fiiqnjiccegffggiihhgfd]\YVVUSQRRRPQQU\]\R6-((&'+/-' !!$&,1K�������������������������������������������������������������������������������������������������������������Ŀ�������������������������������������������������������������������������������������������������������������������Ms����mWF8-''&&&#'())&,FXbcba`a`]YLC<ET_bdaa\[XUNG=3//.3<HOV_b`_]_`a]]`a`_`a_ZPE<9;=@GOUVRG6009ELSTUVWTUUPMKMSVWVVVVPHIMOPLHHNQPKFHJMQUTVWUVVVVUUUTUTRSUUUUWWWWVVWVURQMH=769;<=@CCFIJJOXs�������������rsuy|���yrtrsz��zwz��������������������s9/3Ep�n/5{�������������~��}v��}wq^<175(���ɇ%#Im������f@���������������������}wwy��������������ú�yQ����؟nyyz��jkqeg��4@\_j�����y$(Jjv~��gRFELLGNP`kcK1*g����X#,4INFBBCEENr������ݎ��������������Ļ�u���������������������������Ĺ��Ƕ����������������ƿ������oyvh`ilottnUBMf��������ģss}��������������a'&EA#$" %6i|]-,P���������}vkI$$/-" 9z�^YUXXWQOMIKLKIG>;>GJLOPQUVMGRZ``XWUVTWWPJJKORVXQJKLNQOD<6!(+29>DNfw��~tg^R=-6HQMJGEDAAKj�����{n���������{KEczlLBDJQQHP\\YWVSG4 .H]p���y����اbTVj�����z[VRYgz����������������wstpkgb]^_cjz��}|���������}~|yyyxwwwxvwyz{{}�����������~{{zxvnhhc[VNNA>81.)&"   !!!!"! #&+1B[kiginssponkjhhhggffgigdec^XWSRNNMMMONNPNI>4,%$-.+%!&,/9Eg���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ks����jSF>4+)*'''')(((!$>U^`bccdda_XE<=I[dfcb_\[YUNF931,.2<EPZ`^[\__Y\^_`_```^VL?967;>DOUWVPD=59>DKNPUVTWWUSRSVWUVVTK<=BFJNMMNQSNFCHJKPTTVWUUSUWVSUUUTSVUVXWVXXWXXVVUTUSOA8359;;>AABGGHMSe�������������stuy����wssrsv{{vuv������z�������������e1-)"W��\$H���������������������|wlT038+1��ë�\"*Su������UV��������������������}xt��������������ȿ��`Z����Ӡ������{phy��#T|lh^c}���1Ulw���bK?;H=AGPgo^H0+T�}{L#4MJCDAEDKZ�������׋��������������û�q�����������������ϲ��������¸�����������������������ÿ���myr`^cdmrqhK<Nh����������wu�������~��������`$%6 !%#!'6mrY,/S���������}wlH#!.,! H{xOKOOPQOMLLMLLPTJHIRLJIMOTUOJOX^aWUWZXZ[RKJKKMTSMMMKPOJDA7)-17;AO_qz~}wofW<.8MPMKHFCBBIi�yp����������x��X@EidIMMPTTOTYXUPQMG5 1G^q}}~uv����֣_SYd{xtjf_WVW^jy������������������~{vrpqsxz~��������������~~�z{yxxxwyzz{}~~������������~}{y}yrnh[XOI;4-/.,,-+(''$! !! !""! "&,:Zikhgimquxzvspljggfdceecdebb][VRONKHHILKGC?<5/$'-,( !&(/4Gd����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Kr����fHHJ;2-'%#$#'&(*$"0N]aab`babb^N?8BWbgc`_\]WVQMC94/-04<GSYZ\]^^\_``^`a_][QF>845:>DJPTUTRKG?=?DFJPQPUUVTSTVUWTRF9>CABEHNPRRPMHFIJKMQRRTSUTUTSUUWVUTSVWVTVVUUWVTTTVWTOD946789:=ABDFHLRYn������������qswx����wsrqru}|wv~�������������������P)0"/\x{>*^���������igsvlit����}wsfB+3:%&~Ϩ���i28Yy�����w@k��������������������{xvr��������������Ǿ��Ml���߼pT]`l��lxzx~�c"k���������m 7]ry��x^KAHHGIFSfiXB,1�����f  :QJDCBGHPo�������ȋ�����������������~����������Ʒ�����;������������̹��������v��������������qqycX\_gotofM>Uj�������ó�sw�������~��������]%.076&#$"!%5^jC-4Z���������}vjE!#.-"AqqEGKMOLMLIORUY^YTTVULLLMNRRLFNV[\S[gg`]VPLKJIIMOKJMNSPGA=.!(*279@KZhtz~|uhW=*6OONKHGDC@Hl�pz�������zu`gymSORkt^\`WVYTRXZXQNOML2!1G_kpmnms����Ӣ]VWTXR\YZXZ[_do~��������������������~||~������������~~~|{||z{zyyz{~������������~~||yxsnlg`ZRNC8//,%#" %'(*)((&#$" ! !" !'2Klmmkjhinqruuwupmjfeb_acacaa_^`\XWTMJFDEEB?<:4+"(*)$&2<HVe���ô������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ot����fMLND46>CFTWN>1)($*BVba`_`ceba[I;AP^gecb_]\ZXTJC;30//6?IRY]_^[\``_]^a`^[P>97459=ADMSWVXUWQKGFDGIHLOOPPOQOPQPF8>IIEB=>EKNRRRONOKGHJIJQRQMQRQUUSUTRTUVTVUVSTVURTSSRTQG935677;;>A@AGLPTa}����������rqtx}���|ttutz}|wy��������������������xB,23q�h!1z������������wcJn{���{wr[5(95=�u	6ig/"Ai������f@���������������������{xux��������������ĺ�zK����ܳ��������~x��B(x�������˽@%Ccr{��mTH?CEGFJYheS<+<|����R&>QFCCCEL\�������췓����������������������������ȯ��������������ſ��Ѿ���������n���Ǽ��������{dpjY`ltttupdJ@Tl������Ư��zy���������������Y#)Lty�c("%!! $0G?*-2Z���������~ujC "//%AopGJNPPNRPPS]^^YMPQTSPNRPNQNFFNUVXVcmlb[WPNPNJJKMKKNQROFA6+ ()179AIX`mw}}ui\=,;LQPLHFDCCOq����������xrbhkgaTcumfibW\ZGXgaZSNLHE.".K]hjjmqw����КY\YWXZ]_ghijoz���������������������������������������~}z}}}~}��������~~{}~|xwvqojha[OK@>>9;62/-.+)'""&+))&%%%#" !!!&6Tmrspmlmmlnprtutspkggec``^abca`\]]YTOIB@=:852-$()(%%!$/Hj�����������������¿�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Fq����jJEPRV�������r>''%!#:Q_ba_`bdcb`VF@O]ggfeb`^Z[WPKD;4/-06<JV]^\Y\^\^_]__^ZI<76478;>BIRVWWWWTSSROMLLLMKMJHKLLMC8<GMNMHC@ADGMTSRRPNHC@?CLOOMNNLMMKMPRTSUUQPQPPRQOLKLNQOI=53656<<=??@GGCQVi����������prsx~���ztprqv~wtw����~�z����������f103#_nZ3#G��������������{v�|z}~{woR-.7,5d@'1B�s$Qr������RK��������������������{vp������������������iX����Ѭ�������������*#Y���������*(Lht~��gN;2=EFKQ\iaP<06\lq��L ,EMDBBDBHo�������ꬠ�������������ǽ���î���������������Ǳ�������ļ��Ǧ��������y������������}ncePUejtwutr^CCXr������ʴ��uw����������������T@���wN##&""&'%%+3\���������|ugB##21&HupNSSTTY\_]\f`WH?@MPPPRURPONHMQSS[biji^UMNTSRLKJIFHOSQKE<1&#)).5:AHT^gpxyvnd=+>LSMIGFD@CRw����������|vjkol_cotobVW[^YQbkb^ZPLMH,".Nbikkmsz����ʚhhabbfjqxzvu|�����������������������������}}|}����������������~{}{xwwvpmkeaYRME>540-/323332210///-'" (*(&###"#$"   ""&<Zquusrppmkkjllmpqtsonkgbb`b`a_^]]^^[UPJDA:50*$%)''&""#"!  $%"&9f������������ɾ���������������������������������������������������������������������������������������������������������þ��������������������������������������������������������������������������������������������������������������Jr����i;:Xx������׾�c4'%"/KZbb_bccccd^QGLZehfed`_\ZWVSOE:40-05DMW]^[`_^bcc`^[TD9765768=BHNRW\YYYXUWXVTTUVTVWSUUQK?DMPSTRSONECBFKPOONMH?53<IRTRPOKGECDJPTSROMJHFFHGD?=@FKKKA724678>?@?CEFKOS\n���������lsux����usqqv�|wv|����~��������������R-4++G9&_�����������������{u}}ztd@*/2#"[~����,4^~�����t;e�������~�����������|yww��������������Ǿ��]p����Δ[ax�����u_s�wO��������ƌ0Wpv��}aQAGTWVPWaf_K87f�����E'#/DJEC@DDY��������⚯�������������Ļ�}����������ʷ�����й��������ú��ĩ��������y���ð���������|jb^rievyurpZDJ[s������ҹ��ny�������~��������Q7XuykO"%"$"##$*6`���������{uhA"'23$#W�u\[XZ`eee_`aWLC@DNPOLQQOKJNQPRRZkgegdTIRZZSNJFEIEJOPIE?90##)+.5:AHR\elu|ys];*AORKHGGEABS�����������zsxqjn{ykQ1FTY]^_jlfaRTLPH+ &9Rhnpst{����ɹ�}wtsuux{~}yz}���������������������~~~~~~��������������~}�yxwsnicdeb[SF=661.,)&%%((+-..0111011//'&""! "+.-*&%$#$$" !"':Yrvywvrrponljhilmosttqokhecaa^^^]_\ZYURNJG=4*$$#$%#&'''&%%%&(''&##5i���������������ż��������½������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hq����i;9t��������ͮ�E+'$ ,DXbd`_accde_WOJVbffgdba_ZXVUSLC94--1<FRY]_^[]ab]^]YL?5348779=AFLRX]ZZY\[[\ZYYYZYYY[ZZVTSTSUUUVXWXNI@;>EHKLJE:46ALTTTSRQKA94>MTTSNG@879AE?748:CJKC942469=>>>BCELLPXb�������xpquy����trposy|~{yxutu|~}�������������zF-,3TeH"3p���������hrywlz��y|�|yr]4)//"m�ж��Y Al������f;}�������~������������{vvy��������������Ż��]����޽zbes������{��_Z���������]!8^ux��]OGLPPHBTbbYF76l����kE2'5HHBCDFKt��������ڑ��ϡ��������Ĺ��������������¨���������������·��Ʒ��������}��������������}ibbi[`koqpkVCI[s������ռ��iw��������������{L 7j��[#!# !!#"$*6b���������}vi='1/"&]�wc\V_fhifb`^RG=:DNOKHKONJLONLNNbmfa_WLNVWTOKHGAFEJKKEEA<2$")+/59>HRX^fowzv_8*BNOLJGFEA@Q�����������{|yr{|cSH/$ITXfkdjlg]XQJRC' (/Dcuy|�������������}||~�������������������������������������������}}{yvsqokhbZLD@BLNF;4.*-//+)(((&'(,++-00100-.---)*(()-0.*('%#""!"  $(:[qvwywtusspmkhiiiknprrrqpnhgb_]\^^[ZXUVRQMD9/!$),,++))))(('('&"" %K���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hq����j:2}�����������U*($! $;Tbea_]bcdeb_WOS\gigecb_ZYWYTQNA61/16?JV\_^]^bb^^]SE8/14877:<>CHOTXZYY[\^][[[Z\\\[^\Z[XYXWXWVWWWVRMB;<?EKLNIA@?DKORQQOMM<*-=MWWVRM=106BHFC@<;=DJF=60027;;<>BEGJMPSYn�������mqpsz���{uqpnnrwzvwy~����z�������������l4&%>~�UA�����������}scm��u��|ulO.,2*O��d��qV*Ks������RJ��������|�����������yuv���������������·�rg����ذ�~�����������:'g���������D&L{w��~[KD@>>@AVdcWE6)<m|��u;.+;HFDCEEW���������Ȋ�߰l�������������t�������������������������������̿��������l���̾���������qda^]P]`ipqhTAH]x������ʸ��jw��������������zI2pT)"#  !#*6a���������~vg<%.-!&_��e^Ygjihea`]SH>?EHNIEFJLKNNFGHSdlf`ZQPQQQNJKKHFHJIIIHIC>6%%+*068=FOVYaiuxs[3-ERNLHFEDACS������{uw}���~�}eSP@16U]cniefkgZVKHE9("'08Da~����������������������������������������������������������������||xvurolheb^[WO:*$(4@IME:6..11..,*)'&())++/001/.....,-.,+-.-,*)'%#! "!!!!!(>]msuxuvvuusrpllljkkmpqsuvtspjeaa]Z[ZXVVSROJ=.!&*-.-,++)*(())*)& !##:l������������������������������¿������������������������������������������������������������������������������������������ý����������������������������������������������������������������������������������������������������������Iq����p=2r{y~������̦d-(%"3M]dba``bcbbbZQOZdjhfeb`_\ZXVTPK@9413=FUY][```]_`[M72,.26899;?BEMUZZZ]]\]\[[[Y\\]\]\ZZ[\\ZZ[WYWVTPNHDAACGKNMJIECFHLNPOIA71:EMSUWVQE5/3?FHFDC<;DIF@81--6<;<@@CIIMORXau������mnpr{���uuqqnqz{zvs~����y������������_('$*Z{j@%Y�����������~~~���}��|scC(-0%f�>3q~~P2\{�����s:c�������������������|wvv��������������ƿ��iy����Сeu������ŷ��� #4����������/0l�~��yVQKKMHEF_ccVF@F�����_()+?HDDDDJv������������۠ay�������_b���l��������������������������¿���˻���¿���T_�����������~hXbgckhgfnprlOANbz������˹��jv���������������vG<lgM*#$   !%*9i��������|ud:$/+ *e��i][fijgb_]YRF==FKLG>@HNQOHDGJUfgec[WTPPNJIHGHJMNLKHJKE@:,"(,379=DKTY^eovnQ-0FQOLHEDC??P~�}���voos~���yjZROD;E\^dk_^kpj\OFC?504=JV[p�����������������������������������������������������������}zzzxxvrolhgd^]^]YUPNNG1!%,1<DFIC6230100,*'%'')*,-0/2//..---,,+-//,,+++'%#%" !"  '=Yhpsuuvxvvturrqpomlkkmnpsuttrmifb_[ZXXVVTNL=($')++-..--++,++)++(&!#! 3X��������������������������������ž����������������������������������������������������������������������������������������ļ����������������������������������������������������������������������������������������������������������Km����o9&dpt|}�����Ԫh.(&" /L^dfeda_`bbc^UOWbiigdca`^[YWUQPKB:328BTX\\_`]]]_XF5.,-16:<:<CGIOUY[]]^^^]]^][\\[Y\WXW[^^]\ZZ[ZVTQOLKHGFGMPLJKJFACGKNOMKH=?CHMPUXXO?3039BEA:5?EHGD;1+-036:?ABFFHOQWX]|����xllpt}���rrsoq}�}}xr���zu~�����}�����zJ%-!.\rsD3t���������O?d{�������}yp];)21!Td?f���!Al������e;v��������������������|wtx��������������Ļ��f�����ʕnuzu��������v%("LR\s�����W!D��{���oUTKGIFJUag`UMQy����W(#$*BKBDFIX�������������޷��������p`l��������������ô��������������ü���²�������~Yo���wlic[[WUSP[q|{}yxvtspeNARc������ø��ht��������������zE?ScS*&& "#"!$*=l���������|u`7%-) +j�}deggilga^\WTKJHKLMJADJNOHEGJLPbaa_\ZVTTOGJHIORRONONMMKC:+%*,368?DNUX\cmtkN+0IQNLIFFCAASs|}��}stnv����ymRFKOIANc^_`UYwui^RLGDMS_got}�������������������������������������������������������~|zyvvupookhfeb^\]\[YVYTQPNLA,!"&*,07BHI@<86432-+)&&'(*,,-/1010/0/.,.,.--,+,++++('$##"!!    %5N`kpsuuwuvwwvwttrqollljjnqrswtqmkjc`\YVUUPI:%(20/,-.--./.,./.,,+)(&#".q��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Hk����w<Eelun[b���Эt8,*% ! .K`fjgfb_`abdbZPT\gmjgdca^][VUSPOJ@967>NVY_^[\]baO?4.*,27;==BGJNRVX[]]__\\\]]\[^[VQKLTY\\XVVYYYVTOJGHKNLJOOHDILIFDDFINPQTLFBDFJORURG7--6@FC>;@EJIHA2(%(+26;<?ECEKMSW[j����nljnt����roporttvxx����~{|�����}~�����q7&.&^�s,F���������tY����������{unR0-51%jr9���5&Os������OH��������������������}zvy������������������zc����۴mSHMm����;k��P/="-4 @lrx���eQ>8BFGPY`i_WVRp��e`T(""*CJBBFQz���������單����ȿ����������������������í��������������û��ǩ��������jw����lfgdaf`[^^eq}~~|ywusofJFSe�����������gt�������{�������xB;VyU.#%#  %,<k���������|s\5#*% 0p�k]lhfflga\[VQNNJQPQPMGMNMKGHIIS_a_^][XUYVROLNSVSPPQRQOND8'&,.567=CNVXY\gkcH)3JQNLIGFC??Nn����{|}����{t]:6HTPM_[TVLC[imea]XSbly}~}�����������������������������������������������}zywuspnmjjgfeba^_\^[Y[ZZYWUSQPOH9'"*...-1;ABGA=;730+)''('(),-.-/120120-0/-,----++,,,)&%%"!  !#$#"#"  *>O]jrusvvwxvvxwvrrrpnlkjjlloortssqmid_[ZXSI9( "09986530../0//-,..,++**$"! V�������������������������������������ÿ�����������������������������������������������������������������������������½����������������������������������������������������������������������������������������������������������������Hl����yB%DZniK*\��̱�H3-'#"#-K`jkkhfdcbaba^UR[ckmigeb`_\ZXVSPNHB99;HU\b_[^_`[J>30./29=?BCGLPRUZZ[]]^ZXWXYY[\[THDEPYXOLHNRTSTSNGDHLLIHMLHGJJGHGHKMPRSRRSMKGFJJMQL<-.4?FJIGHIIHGA8+&#&.26;>BCDGKOUV`t��xfijmv���}qoprruwtsx{vsy{xw{vpw~}~�����~`-,* Gm�` "_���������������������yteH).6)!U�ZO���=3]{�����y=X��������������������|xrw������������������hj����׶{b`i��~F\���5"ic @dqx���YKBGRNGCSacZRPFn����q#!&2FFCEF\����������؊�����������Ŀ���������������Ϸ������������������¿���������q������~{zz{yzwnlu}yvutncFFTh������޿��au}������}�������xAIcmT*"$"   " !  "&,3j���������}s^4$," 2n�lgkjfdeed`]XSOONQSXZXRSUSNKJFF[ghcabb`bbZVTUVXYUTUTRPPME:#'+/666;DMVZY\cf\A'3HQONKHEC@?Hm������������tm_A?JWTUXFHN?:Udfccffoy{��������������������������������������������������~|ywvrnmjigc_ba_^`^^^^\[ZZ[Z[[ZWTTRPPE5&#+1642//0<JHD=;:6+)(('(((***.0123332100-.-,,---,,+)(''%##!#! %(((&#   (9K\dnquvxywxxvuuvvtqnklllkkmmnppttqpiga\VM<0).:A@==;:8431/.,,,+--+,++-*""$&=��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Il����yB(PstQ!)_����^81+&!!$)Ccjlmjigdb_``_XTW`imlgeeb`\[YVVUROKB<=ES_a]^bb^WMC:6537=@BEDFINRUYZZ\^[UNEDLSRQQKAADLNRIDBIPNIGKLJIMLGABHEDDDDEGIIKPVVVRVWUUPMMLPRMC;99AEIIFGGA;9=?4("#).3<?DFGGJNPSWaosiijjnu���wspos~�wssx{�z{xussx||}~�����|Q%,%HyK/s��������������m�����woZ9(.,*`|��ˏx=@f������f2n�������������������~zury��������������Ž��Vp����ԚJZsq���l����~]�X#"'Hjqx��|VKHEC>>FZc`UJ=7t���y=#:QFCGS{����������ȃ��������������ǻ������������ʨ��������������Ŀ����������������þ����������xox~}|ywtrm`DFUi������ܿ��}`r{������~�������v="IhnP $%#&(+/4/ "  #'0;n���������|s^3$* 2u�rkjjhfcfhbbZURQNRVZZYXXXSOKGCE[oplllifgg_ZZZ[ZXVVTSPKQLE8 (*.677<DQY\\\cgX8%5KROMIHECDDSt�������y~~{xji_LLTXWUJGNNDD]ijhltxy~~����������������������������������������������~zxvrpljgfb`_[^]]^^Y[]][Z[\ZXY\Y[XZYUSPOMD4""*5:973/-0:>>>>A9,(&')'(+**,.024424430.-/.,,/.,+++**+)(&%""!!" "#'+*&%""!&4DT`fqtuuutuuttstuspnnmnnkjjlkkpsrvrpkd_UE<:>BECBCB@<;9642.-,,--.-,..+'!#/<U��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Fl����~D&^��\&*]���W33.*(!"'$%9Ubknmkihfd`]^YUR]imlhhgeb^\\YUUSPPKB@DP[^]_aa_YPH@;:9;>@CFFGJLQTWZZ]^WQEFHLKB@>??BEFDFCAFOYOD@JPQPOJDAFJGBCCAAA@DHPZYYZ[ZYYVXVUWVTPQNHHGEDBAA529DH?/$!$*29>FGIKLOOPSW`gikjlp|���utpotz~}yw}��|zvvvvwv{}~����r<*6l�x8 @�������������t}�����}vmS.*0)&r�ȒCmo"!Lt������P6��������������������{xtt~��������������ļ��W�����ьq�����{���{�au�8 2<;57=<1.Tlsx��qZQI?;8DTbc`SF0)f��UP?$9OFEIf�������������������������Ƚ����������������������������þ���ƻ�������������¯���������sy|}|zxxsok^CHXl������ܼ��zdrz������|�������s:;t}jC$&),5QbkiW6!!&0<q���������|x]1$)6{�wjjllgecdba`[PNQW[Z[\[YZSOKEBD_wwqppnkfd_^\ZZYWXVTSSTTOD7)+/888>GSZ]`dkfR7,>PTSPMKJKRQj�}���zhhnjngchc[[^aa`[\abcjnqswuy|}~����������������������������������������|yytqqkhgedcca```^\][\[\\Y[\[ZXYZZ[[[Z[YXUSOLMIC1!+58::961147@>B4*'&()')++,,./247456510/1/-.0.++++,,,*+)('&%%$"!$)++*'%#"#*8CQdmtttuttuvuuutsrqprpollkklnnmtqtsohbVMJHGFFEEDAAA=:;8410.--+,,---*%#9Z��������������������������������������������������������������������������������������������������������������������ƿ�����������������������������������������������������������������������������������������������������������������Cl�����I'b��h5&OiW=330/+$!&&#(6BZipsoklifba^\WQ[fkmkihfb]_^[VTSQPNIFGMX]]`ba_[OGA@@>>?ACEFEGKQSWZ\]_ZTMNNNFCFHF@EIHDB@@GOWODBOSTRLGCHOJFCHNLFCCEHOTVZ[[[[[\[[]\[ZXYVPNOOPNKJIJLIHD5*""(1;ADFHGMPOOQTZ`fhjlq���trqqty�xvv~}zvyyxw|~}�����b.$6Ey�Y+V���������xc��������~zvkG,-2(/v�G.u�7/\|�����x6K�������������������~{wrv������������������gl������wgsr��}ry]az�El�'3FLJJGMK06]lqw��jQSJA>=DL`d]PC-T��|{jO+@OFHS�����������잘��������������ƹ����������������������������½���ƾ������������������������rz|~{{xwsnm\:IYo������۹��{ht~������z�������p7+d{vaF#)4Ms����P!!"  &-Ap���������|w\1$)6u�vijfihda^]^_[TVWZ[\ZZZVVVOLJIMdxxtrqokfbb][[YWWVVWVWTQMF8 +.2789BKQ[djppgTA=M\^[XXX\agr�|y|xnlfd]khimniinppoopruuvxy{|}~��������������������������������~}{zyvtrmoifde^[_a`a`aa__]\\[]\ZYZ][[\YYZZZZXZWXXYWSPMKMHA,'.8=>=94126:7+(''''()*+,-0204877886420/.-/.-,,*,-*++,**+*((%%$"&',/+)&$!!",;Uhvxvttuuuvvwsrtqqsqqnmlkjkkknmqruqme`YRPKJFFEDDB?===87542/,-,,.-,& B��������������������������������������������������������������������������������������Ŀ������������������������������������������������������������������������������������������������������������������������������������������������Gh�����LR}�xC1?@6533-'""'+)!!*2Oemoqollkgdc`ZRV`jmnkhfedb^]XUUUTPMLILX\^bca[SID?BC@?>AFGGCEKOPVY]_`^[YYYY[Z]YTSVVSNHGJKOOIBBIJLOLGFNSIBDOXUJDACDGPUZ^^]]\^__^]]\\[WTRTTRRRQQONLKH@5&"%,8?BEIJMQPQSTY^egimt���{qonpwz{ywy�����������|����P)21'_r_A+j����������|���������}wr`9'//$)i�E\��w2=h������f/e�������������������}zusw��������������ž��f�����ܯmpvs���������,.�z(:=@?EMF$"@bnt|�~gQNC=88CVddZN?*]}�{�z>!.HNFKd�����������䍭��Ѳ��������ÿ���|��������������������������¼���������½������������������o{�}|zxsplR@K\p������ڷ��xky�������x�������o3*Nkf\O*"5`�������M ""!"'0@r���������~tW.$("!8w�zmkiijgbVRY\[WTYZ\\YYZYWWUSRPUg{xwssolfac`^_\[Y\[YWWURNF9()1-27:<FOYclsyvphdjqxwpqsvx}�����{|xtuspqsrvvvuwz|{{~~~~�����������������������������������~{yvtpmmlhdeda`c]^`_`^^]^^_^__^]\[[]\ZZ\]\[ZYYYY[[WWVVWZUSPNNLF9&%.6=CB>;61/,)('(&'()*-./1203478::9860,)*-,.-,,-/-+,+++,+)('))&#"$&+.0.+'$#"#,Jk{�~yusssttvtrsrvurqnoomllmhijmoppqsmfd^WSMJFEFGD@A@><<:8620/00-,+%"*{���������������������������������������������������������������������������������������ľ����������������������������������������������������������������������������������������������������������������������������������������������Jj�����Q 7o��S,><74242.'!!(.,##:Nakqroqokhfb_WR[goqmhffe`_]ZWVVSPPOKMU[`_]\\LEA?@DA@?BGFEDGKOTWX]ab``a`aab```_^_]\YXYVUURJDHHGDHHBCIKFBBKPQG?=>@CJVZ_^]^][_^\]^^\ZXVTSUUUWUVUSPOLK=/%")3>BEILNRQRRUY_dgjow���uspnpwzzxup}�����������~|���~vC * ;ae^98����������|wz~{w����|wjQ/(/+0l��Ӿ�u*"Im������M5��������������������{wtt��������������ü��m�����ׯ������������D�I';FEEHMRA'Hcow��}_NA:97?KXgbWM=(b�����'"5OIDP~�����������Չ��㘂��������������������������������������������ǥ����������������ž�������r|~~zzxvspiM@N^w������շ��sn}������y�������q0"=WVcY9*T�������a,!! '0>v���������|pV*$)";�xojhfjh_IGU\ULT[\\[ZYYXWWWXVTXjzzwssqlgded`^^\Z[\ZWYVONFA965;?9?CIMV\gouz�~�������������������~}}{wyxzyz|{}��������������������������������������~zxwrpkfb_`b`_[\`_]^]^^]]\\[^]_]]^^]\YZZZYYYXZZYXWVYYXWWUUXWURNLLIC6%"&0?EJF>92+*(&&')+),,-101/446789;:91-)''(**+,..,.--,+,+-+**+*'""#%*,000/,''(1Lq����{tsoopqstutspnnrppolkkijjkkkkkorqmkfbYSOIGGFEFDA@A?>;883320/11.)'?u����������������������������������������������������������������������������������������½��������������������������������������������������������������������������������������������������������������������������������������������Ik�����W"#T{�a&'9;853321/' !*10$!9PcnrsqnljigdYRXcorokihea`^[YWWSPPNMNSX]ZVYUHD?>>CCABBFFHJIJNSVX\`ddcbbdedeecbdcbcbbda_\YWVVRKDDEA?BDBCAEIGA?>@BCJSV[][]_^]]^_^][YUSLMPSUWZ\YVSROMD6+#'0?DFIJMRPPRW\afimqz���pporry~zrt������upuz�~}}����}j1"/C_nV$O���������y_jnlYz����{vgH+.2)/���w9ua-Xv�����z:I�������������������zusw���������������º�|r�����˛�m^l����t��_EY$*>HJNSNM40Rgqz��xXGCD@=CNZe`WO>5~����D#$9OEHg������������Ʌ��ϊ��������jf�����ğ�������̸������������������ż�����������������ú������wr|~~zyvuqmcI<N^x������ӷ��xq~������~{�������j,$;]rj*-i����r[C8# "! !)1Bw���������{mO&%+"?~�wlfbeiePBKRVNX[\Z\ZYXVWZWWXY^co{ytusqlifcd`a_]]]]\YZVTQPWY[V\a_fglppuy|����������������������������}zyz{zz}~���������������������������������}}{wtpoife`\]Z\][]]]^\]\]^\][\\\^[]\[]]\\\\[WWWXYWWVYYWYXYZWWVVSTVUUPLKJJ@4"$/;HFA>80,(&'()**++,0122557679;<;71-'$""#&(+---.../.-//---,*&%#!"%(*,./1222/,.4Rr�����{tgiosuuuurppoqqrqonnlljkjhfgimlmronh`\TLHIJIGEEFFDBA@:8887;ELJFSi������������������������������������������������������������������������������������������ÿ������������������������������������������������������������������������������������������������������������������������������������������Mk�����] 9n}h3.C=876430/*% ('!":ThotqnmllihbXW`lpplhhfdb_]ZYXTROOLLQSOLBCC@A?;=ADAB@ACHJJIMQUV[`cdcbeffggffffddcdccdbaa``\UGCEHGHDGE@?CA==AABBAFJRWYYZ\\\[]`]\ZVK@67<BKS[_][XVRPIB2'%-=EIJMPONRSY^aglpt��yqooqrw{wsvzqw�~vy����~y|}��~}[#/3"Ood:)e������������wu|�����zsc>&/3&@��{Lft-9b|�����j$e��������}����������~yupz������������������q������őgIMz��������?''/DKLMHHH05Xmr{��pYVTQPUW[ab_VNB?v���kUa.#AKCU���������������ȍ��������bg��������������Ȫ��������������þ��Ƽ����������������ʿ�������xv}}~yxvtpj`F;P`w������Ϲ��ws������{�������j.%JZaN)FYVE?A?@3!"##'/Dx��������~vhF'+"!?{�ohcckicYSPTXZ\\XXZXWUTVXWXX[hktwsrqoligcdb_```````ac`lr{w}��������������������������������������y{zyz{z}�����~}~�~~}~~}}}}{|zxwvutsqnjhgdca\\V[ZZ[ZYYYZZYZ[YZ\^\[[YZ[\\U][[Z\Z[Z[ZWVXVWVUWXVVWXXXZ[XTTTUUTNLJHE=1#+5=CB=4-('(()++,,.024557779;<;;:87.'" #'*,--021.012/-..,,*)''%$)+-,.0./024453:Zv������p_behlqrrpqqnoprpopnnllmligebfegnopomgc_VSMKIHFHHGGECA@@@BI\z��������������������������������������������������������������������������������������������������ý���������������������ÿ���������������������������������������������������������������������������������������������������������������Gm�����]%,aykG' );E>:9864010+$#$=UlswtommmldZZairsllkhfdb`][XUTQPMKKLHD@>>>@@?>CHEDA>AEHJIMOSSZ_ddcdfefgghhhhgfgffecdcddd^WIGLNLKKGEABD?=A??A@ACHLPONORVVWX]]\XP>,#$'-7GX``[[XURPL</'+7GIKNQPRXY[`eimrw���vrpnqx{uqqsttusx�����zy{��{uG .$!bi27|��������������������}vnT1)43!1������xA\&$Fq������X.{�������|�����������|zvt���������������Ľ��o�����ܶzYRp����Ž��~+5ONMMPOF&<\mt|��i\]^\\`bcde\TN?N������}-*AMLg��������������������������������������˿���������������ý���ø������������������������tw}}}ywutoj^><Qb{������Ͳ��xv~�������|�������f-!?]k6&9IJEBA:5*! "$&-E{������ysmfW>$ *,$"B~�nfhllimfb\\][[[WXXXYWVWZ[^]h~vz~xtpqmjegfgfceiiijqy}����������������������������������������������|y{}~~���������}������~|}~{xxwsrnifc`_^Z\WTXXWXXWWXYYXWXZYYYZZ[YW[\[ZZZZZYYZ[YZY[ZYZXZXVVWVUTUUTTVWVWVVTRRRPSQLKHHD=-#(-8;82+&'*))+,.,001456669:;:9;;;<81,#$'),-000131001/11/-+)'('(',,*-004577Ba������o_]]`eknqopppomooopooqoqokieddedelmnqplid_YUOKKJKKLHIFHEFHT}�����������������������������������������������������������������������������������������������������þ������������������������������������������������������������������������������������������������������������������������������������Nj�����a'DodI/)5CD?<:997322/)$#;Ylwuqonmlie``fosomihgeba^[YVURPNLIJGFA<;>???=@DFDB?@EKKJLMQVX]beeefgfghhhgghhfgfcdbddcb`_YVUSOIJGECDDB@DDDDCCBFKKIEGHKJKSZ\[RI>1& )<S^`^]YTTUSF9-,7HKNNOSVVZ`dgilqx���upompw{yssvuxwwwz{tvx|{{~���{k6+<yqK N���������vNssqal�����zrlH*,6-8������v�_,Vy�����;B�������|z����������zwtu������������������xv�����њhkjp��������o%>NGIKKKA!$Ddnu�e]`_^_^``edYRJ4V������=.GJV�������������鏠�����Ż�����������ï�������ȵ��������������������ɽ���������������ɾ�������nw~~|yvrqnk\>@Pd|������˫��nt~������~�������c'"Fmd5$8EF>;977*!###$$+Ft����yrheb^WA&!12&&N��wrpolijge^^`]]][ZZZZ\[\^_cjy~v|{vusqmmlkllorvy~������������������������������������������������������������������������{uuwqpmkca]ZXUXUOSTVVUVWXWVUVXYXWWWXXXYYZ[YYYWVWXWZ[XYYYZZYYWWWVXWXXWUTTUSQRUSTSTSPRROPQMMIDDB:("&*,./,&%()*+-..012356889;;=<<;<@>=8.&#'),/0202242331-($" #%&*..0359F_}������tlf_]_dfjmllnomppoqrqpoonplhgfedcgijklknmlfa[WUPNLMMMIKIIJRm���������������������������������������������������������������������������������������������������������������������¿��������������������������������������������������������������������������������������������������������������������Eg�����`)9hcH10FIB?=<=<;5531/*!(C_qvrnpolnib_dmqpnihgfeda]\XVTOOJJLLJEA?>>>><<BEDB?BIMMKKKOVZ]`ddefgfhijiihhihigca`aba``^^^]ZVUSQOLHEDCGGGDCCEJOQPLLMNLLQXVK=95*#(7M^b^]ZWXTSOE5/5DKNNPQVW[^cgjnt|���uroorx|xspnw~��|y|����}{|��y_)#,!WpRB#a������������scs����{sa:'.2&?vv`_gheW;d}�����n0]�������~}���������}xtsy��������������ſ��t������Ͱ���������sq�\*>GFIIGE5*Nipv��y_\__^]]abfbVOF$+1--,4.""1GIh�������������׆���������������²��������������������������������ӿ����������������ƺ�������qz~|{xvtpmiW9CRh|������Ͷ��lu�������~{�������b'"Ke_5$2CKKA=;2#!##"#%)Cr���ysojhc^VA&%IJ7'%%(2Y��usrpljhgecca_`_^`_^_a[ahkq}�xty|{{}{xxz|~������������������������������������������������������������������������}~ztspmhda^\ZYUSVUSTTSVRRRSTTVVUTUUVVWVWWVWWXYYXXXXXWVWVVYYWVTWXXWVVXVWVUWWTTTTRRRSTSTRRSRPPNMNMKHDCB5"!#&)+)&')**+.01233779:;;;=>==>=???=;5,"!')-.0452540-($!  !! !$(+,.139Hf������}wsqijiihhjilmlppoppnmpppqnlkjhgeefdfghjlpmkeb_YUOMMMKLMKJMR[h|����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Bg�����c,#:TWMAEKIDCD?>=<854200)%0Oipssqnmmic_bkqplfcceec_]]YVTSQMKKMKIEB>>?@=<@DEAADLPPLIKPUY\adffhgegikjkihjiigd`_^^^`^^__\\^^]^[YTTMQVQLHDBENUZ\[YZ]URWWJ:#"*8EQVWTTUYVUSN=35BIJORRVY[_cgkqu���{sqppsz{vrr}����xx����~{|}~�~tL$".GIZB.w��������������z�����}xnR.(00!*�������( Hk������V0q�������}{��������{wtt���������������ü��m������ͽ������iiY_}�>%&2.0.'05'1Ujqx��vXZ[\^^aadgaUPA ! #&(($$7NV���������������~�����������������{���������������������Ǿ���ÿ���Ǩ����������������ŷ�������nz|}|xvsnkgT6CSg}������˷��lw�������|y�������\$#Kmj.#4D>;0,(%"#" !$(Mv���{tqla`\]Q4>foR>6216\��urpolkiihgecbb`^fbghopu|������������������������������������������~~~~}�����������������������������}~�~}}zyvsnjffa_[ZYUVUTRRRQTSPSRRSSQQSTSSSUUUSTTUUTUUVVTXUVWUUVVUUWUUUTSUTVWVWVWVUVUUTRTSUTQRSQSURQSOQPPOLLLIGEB=0 "$&'''())*+-024557789:;:::=<??>>@AA>:2( "(,.002410/-++*)'%$%&(),029Gh��������|z{tsrqjklkkklmmnoonnnonoonmkljiffdeddegiknomhcc[QPMLLMKIILMLQf������������������������������������������������������������������������������������������������������ſ�������������������������������������������������������������������������������������������������������������������������������Gg�����f($09FZWXVNIEBA?><;=964450,*% 3Kgrtrnkligaaipqkb]]bdca_][WTURNLLLKJGB>?A@:;@EFDFJOSQNJKOVY_cdghhggghijkjghikkgcbbbaabaa_[]^X^`a`_aa\]\XQNKLQVYY[^`a`]`]N9#$-57>@?@DIQRRSOC87=GLMORUX\_cflry���uqopqu|{rrz��yspps|�}wvz|���~{p9).Nkb("E����������}���{�����zwkF)*0++m�����k*Po�����?A��������}{���������}yvsu������������������q������Ƚ������swsl�� %$&%""+1*7^msy��p\[Z[_^aacf`QO8%.)%%?Sn����������������������������Ƽ�z��������������������ƣ����¾��ĸ�������������������������|m|}}|xuromgL6DSj�������ʸ��nx�������|{�������Z $Ow]5#*-&"!!! !"!!$'.P{����{wtpqtt`@V��YG?821_��vttspnklmkkkmnoux}~���������������������������}||}��������~}}|}~����������������������������{wvwsrrnokigc^]YWTSQQQRSPRRPNQQPSPQRPRQQRQTUTSSSQQRSSTSSURSUTUSUTRTVTSPTSTSSTTTWVUVWVTSSTTTSRQSSRRQQQSRPRNPRPNLJJHFC?9+!!"$%(&''',,-023366899:;;;<=<<==?@AA??<7.%"!'-/.130101/*(&&'(*+.2:Hm�������~||{xywyrtsqnllklmnonnmnmmmnnnnnmlhgeedddeghgoomifb[WTLLMJKJJLNNc��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Oh�����g-%-47<QhdVLFCCDA>>??;:652120*$1QhrtoiiiiebekrmbWUW]^_^][WUWSNLLLMIEC@?>=99>DGEHLNQPMMNQVX^`cikihjkihjmkijiijihhhhigghfbba^\\`^]^__^_^]\YUUVUTWZ`bcddcaT<( !#"!'38661+,0<FIMPMJ?:=HNOPOSWZ\`dkrz���sonnpvytpqtusuvwuustuuwz{}~|]*!+0dsD%`���������xmruip���~��yr^8&)-# >K*7g{Q5Yw�����n(W��������z}��������{ytqw��������������ž��r�������������_p�y|�m),$+.5:@@9!$Eamtz��fOW\^]\_`ad]VT. &($$(E[��������������ꢑ������������������ļ�������þ�����Ƶ��������º��������������������ʿ�������{t|}{xvusol_H:FWl�������ɻ��kz������xw������Z!IiR5$('"  !##! %*3W��������|xut]7I^SF@=802d���|{{wwvxz{z|~�������������������������������}zx{|{~{{{{|~~���}����������������������������|xxtpojefc`][ZWTUPSQNRQRONPOOOOOPORPPPMNNPQPQQRRQRRPRPOPPRSQSURUSTSRQNOPSTPPQRSRQRSTTRSSTUSRSUSRQQQQRRQOOQPPNOOOPNLJJFFDA=8( !#&&'&'),,./13357788;<;==<=>>>?@?>?@>=93,""$'*+/-00.,+,,,.126?Mn�������}{zzz{{|{{xspopllllmmpnmmnmompnopmkihhhfcd`ccfiknnlhb_VTROONNMPQT]fr���������������������������������������������������������������������������������������������������Ǿ������������������������������������������������������������������������������������������������������������������������������Jc�����h5!)378@IYXMICA@CBA?@@>=;85111.)# 6[mxumkigfa]fppeVNKPUY[[\[YYUQOKJKFCA=:8778=AFJJLMQRPOOPQU[^dillmmjiijjkkkjjjiijkjkkijihfcc`_cb_^_]\\[]\\[^\[Y[[`cedddc[E.&,3/(&,48971,(,6?DGEFID;BHNSTTUYY[_eks~��}rqqmnnpppquux~�~tustuvv{}|~~zN%))A]I*-x�����������wqr��u�}xpS1(,-%\��{l��F!Ad~�����U(p�������}y|��������~ywsr{��������������Ľ��n������Ӽ������������G-".BKNSUK2)Mdos}�~bY\^_`]`bdfTUR## "/Pv��������������䗢��ĉ��������������Ϸ�������̽����ҳ��������������µ���������������ɾ�������wvzxvtsqrol_D6GXn��������Ƹ�hy������{ttz������R)JZf<$(%#!"" ##! %*3Z��������{tpqQ(/03;>?<><k���������������������������������������������~~}~�������������������������������������}{zxtromigb[^[XXWQQTRRSOOPQPPRQPNOONLNOOPQOPOPONOOOSQQPQRPORRQRPQPPQRRRRPQSQQPPPPOQOQRQPRRRQPRRQRSQSSSPPNOOQQQNNMLOMMNNNMKIGFCA?;4$"##%%&()*+-//1456678;>=;<==@?@@@><<=<<<:6/& $&'(-//1355468@Qo�������|{yvyz|}{{zxwtrqpnmnnnmmomonnmoponlkjhgfcdbacegilnmmhd_[VSQQPRSTUW[m���������������������������������������������������������������������������������������������������ȿ�����������������������������������������������������������������������������������������������������������������������������Mc�����c-+5;;?ENPJEAC@BDDBBA>=<;5232/+( #Abx{xpjhf^Xblqi[OFEJPUY[[YXUSNKJJGDB:97555:>EIJLNRRRQPPNSXafjnpookjkljkklkkkkjjllkjjjkligffggeddeca`]Z]]]]][[[\`cdfedd]O=4<FI9*/8:<<40/8CFFF@<AC>CJRUUTSVVZafms���xonomjmooorr����wtststwx{}|~�q>!+#A����������xijx������~{uiD(),(!\������{(Qp�����|@/�������{}���������~yusr���������������ü��p������ͺ������m~ry��0!+DNNJB:4(1Wipv��y`X[__`_^b`_U[S !$!$:]���������������ߏ���`k������jg�����ͮ�������˰���ѱ���������������ľ������������������������urrjkhdimmj[=3HYr��������̸�jy������xnmnz����~M&U{p>$'$"!!! "" ")1Z�������{tkhX;4B?:FNQNYc}��������������������������������������������~������������������������������������~|xvqnlfdba^^XWYSVTTQQPTQQSQPQQRROOONNNOMMOMMROMOONMLNNOQPPNQPOSRPQPNNNNPQRRPNPQPRMOOOOOPQRRQQPOOPRQOQRPQQRNMMNPPPPLMLJNMMKKMLJGCCA@=:1" !"!##%&))*-/0/1357687;<;:;=?==>??>?<<=<=;;82+" #&'),,-.13=Oq�������~~|{zxy{{{{ywz|zwtsqonnmmnmonnnoqpqommkjhfedfdcdehikmmoie`\YWTVTTWWZ`}���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������K_�����b&(3:=@COSE@@BBCEDEEB??=;99632.*%")Kkx}xqldZX_krrdQG>@DJRV[ZWTRPNKJGC?<;94668=BGKLOQRTSNLLS[ahmqsrqnmnmjljmmmkomkklkjjjikjihiikhhghhhfcdebdba^^^_`behgfedXJ<=FF1*2:<=<853@QPPG1"1AKGQUTSRORT[bfjp���xqmmnllooow��ztuussrttvy|~~yi-'2#W���������~fxx{������zqb9%).'B���qq|�i/\x�����l1O��������}��������}{vrsv������������������x|������Ȼ�����~�w`r�p,CFGHFEA) ;[kpy��t_Y^][]]^`a\WZM$+/++;w���������������ˆ��ۓcw������^]�����ͣ�������Ģ���Ƨ���������¿���ν�������������������������neKUdZV^jlgU;7JZp��������η�hw������vidcm|����N&^zc9%*'# "##"$%+2U�����llhfhd^`cgdhlsy��������������������������������������������}���������������������������������~~zysrpolh`]_]WY]YXZVXWWVSSUSRTTSRQPQPPPMMNOMNNMOQMNONNNNNMMMOQNMOOQOOSQOOONONJMNOQNOPPOQMQPPNNNQPNPPOOQQQPLPPOPOOLMNNPMNNNMJLMLKJKLJGEC???<8. !%%$%')),-.0132476:7:<==>??==>?@?@==?=<;9974/( ""$'4Lq���������}}|z|{zzyw{|}{{{vtsopmnonmpppqrqoonlijjhfefdfeeeggjlmmkke`[ZVUWVX]p���������������������������������������������������������������������������������������������������¼����������������������������������������������������������������������������������������������������������������������������J\�����c*-8=AFNKBBAB@BDBEECBA>>:99540.)&!0Tjy~zsjaYYhqvjXN?<>@GQWZWSRONLGEB<<886566<AFLMPPQSSPMMV^ciosuvusnmmommommnomnollmmllkikiijmiijijjiijhgebeec_acefihged_P?51*&-7;?C>:96:BD>-#.GPUV[\ZQQRVY\`eiu���tpnlklmlmnqyyy{wsqrstuxz|}�}s_&+*,o��������������������~ypW,%-.(9�����j|�TCk|�����U#f����������������{xtqrx��������������ƾ��l�������¹�����������^!8LIGMJIB##C`kqy��o]ZZZWZ\_`a]Y`E'=N<1/P������������������۱�������whn�������������������ɵ��������������Ĭ����¾������������¹�����Q8HmjZPYcgdV8:H]q��������̵�jx����|n]QO[o���}M*Keg3(,(#  ! $#&&),29]w���jmrxx{|���������������������������������}����~���������~~����������������������������~~|}zxvrpmhefbc_^]]ZYYYZXXWWVUSRSSQROOOPMNOPNNLMMOOPONMNLMNMQMKMNPPNOOOMMNONNNONOPNOONONLMMNNMOPOOMMLNMOOMOONLOPNLNNOONOLLLMMMMLKMLHIIIHGIGHFDA??<96*!#$&&'),--.223489889:;<=>>@B?>>?>?=<><;<:99983,$(Go�����������}|z|}{yywyy{|}}zyytrppomnnonrrqopomjiighhgffdfdcbgigiopkiea^ZYWVZl��������������������������������������������������������������������������������������������������������¿�����������������������������������������������������������������������������������������������������������������������Ib�����g0(4;HMHDDBAC@ABCEFEDCB?>=;7623-*%6Vo|zyrh]VaouobQA<<=CLRVUSROKKIED=;6663458=DJKORTTUSPSZbgjpsuyytooponoooopoopommnoomlkjlmllmkkkkkiijiiifgkiggegiihhfb`UB-%%'/:>@@@<:522.('5QXVW[cgh]VVZ[_adky��zspmkklllns}����ztrsuttyz{}��{lJ$">���������������}�����~xkG&'/,'YtJ�ǹ���`'On������>7~�����������������~zxutq���������������ÿ��m������ҽ����������}�C#18>@<6=3'Lelrz�jWYYZYZ[_c`[\e= #*HXX<.8s��������������������ʼ����������������������������ƭ������������ô�������������������¸����x3LjjbQLQ\Y^Q68H[s��������̶�oz����~eCABKc}���J*U^B((&$!!%.168;DU`qy���������������������������������������~}����~~�����~���������������������������~|{zxxwurolniffdabaa_\\Z[[XWZXYZWXWURRQQPQOPQOLNNNMLKLLOPONMMKKMMONNNMKMLNNNLKMONNMKMMMONMOOONNNLMMMONMLLLLNPMMMMNMMMMKLMMNOOLKKJJKIJJKKJHHGGGFGFGDB><<=84*!"##%(()*+,./23367989:<;<=>=@@=>@@=><;<;9:;988785-)"%Iq�����������}~}}~}}|zxvz~~~}|}|yuuqnnmmmopoooonljihhhghhegfdfceeehlnmmjea`^[[i����������������������������������������������������������������������������������������������������������¼���������������������������������������������������������������������������������������������������������������������G[�����o-!/=IKECCBAA@CCBBDDEFA?B@=:87770+&0MkwzxrbXZgqshZI>>>AGMOPOOMJHHHF?:76665569@GJMPQSWTRU\chjmsz~ysqqponoqrqpoonmmnonlmljllkkkklkiklkkijjjikiijgjhhif^[YN9(&*44:=@?><;72.)%,@OUZY\gnmh]Z]\]^dm}��uoolkkkkmoqxz{�{qsrrrttw||���{d2&%S�����������zkd������}xb7#)2,(\ho��wov|\2^w�����m.K������������������}xtrqr������������������xv������ͼ���������r|~( 7GFGDE@&/Qgow~z`UYYZZ\]_d`Zal;%$+5C>5'L����������������铠�������������»�������������������̧���������������������������������·����o?iYNLC;@HQXK05G`r��������ʳ{pz�����sH.@HM\}���J))'&" !##%(8EMU]ejw�����������������������������������������~���������������������������������{|~~�~{{{zxyttvrqplmommjhifffcbcab^\ZYZZXVXUVYWTUSSRQRQPRQPLMMMNMLLMMMNMMLKJJLKOOMMIILMNLMLLMOOLMKKLNMMLKKKKLLMLKLMNLKKLLKKMKKKJLKLKLNMMLKMLLJHIIIHEGHJHFHGGDEFDB@>;:962'#$&&*,,./244446699;==>?>=<>>>?@?AA==;<;;9:=;7999862,$%Kv�����������|}~}|{z{{zy{}}|}{zywrqonmmmmmonnoljjjigfikihgdfedccfghhkmllifdbm��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}�����������������������Ja�����k1*>IEABDCBBCCBDECDEEBBA?@><9762.*& /Kdu||p\Xdozp^OC@<?DJJHFFHIIHGHB<86874336>FKKNPUWUTX]cgkpx~���xtqoonrqppprqoonnoplmmkmnlllklkjknmkkklnmnkjhfecaa\RPPC-'2CI><=@>ADD@90)%'4EPYZ\esyum`_^_bhq���uommlkkmonmlkjloqpprrtvw}~~~~z_*&*,m�����������pdd~������~vP%#+/%(d���wj��R?f|�����W%f����������������|wsqlt��������������ƿ��s�������ɺ�����tppkcyj!0GJJLE?6'7]nnw��x]WXX[Y]_ab]Vls2 !%'&#"$m����������������ߊ��������������Ź�������������������϶��Ǿ���þ������������������������������kXZLEF<4>EX]D/9K`w��������ȯ{ly����e:BS\S]���|D !(.*%#$&-0<Olry���������������������������������������������������������������������������~���{|zz{|ywwvqmf`ekmmklnmkjfiheffceb`^^^\[Z[ZYVWUVXTTUSRSQNOQQNNPMMLMPNMMKLMMLJJJKMMLLKJIKNMKLLKLNNLJKKLLKNLKLKKLMKJKOLLJKLMOMJKKKKJKKMLMMLLLLLKKIHIGFGGFEHGFFFEEFDDB?>::770#"%)**04668>=@BCEEDFHHFFIGDA@CABC@ABA><>>=;:<:87778852/)"(R|�������~}||~���~}|}}~~yx{{yyzxvtrommklmmmpqomkiijghigehheheadfeefhgiklnmju������������������������������������������������������������������������������������������������������������¾��������������������������������������������������������������������������������������|{{y|�����������������������Ka�����m2&9DCACBAABB@BBEFGFFCBA>@>=9755/,'$)Gctxuh[^hwwhWC?>?AFJC@=>ABDFFB<76777457<DILQRTUUXZ_chmrx���}rqqqqrrqrprppppnnmjlnnllmmnlkjijklklmmnklnmicXUTTQLD6)!*BVS@;<=>ALPP?4,'#'8KV\[bp��yjb`bchr���oonjjjkmmmnpppprsrrussuw{}}}|wI%$8�����������~yy��������f6%--P��I2���&%Kp����A5y�����������������}ytqps��������������ý��m�������ķ���������x�Y"# 3>BD=2:5#>ampx~�qYUW[Z[^^ba\Xtc!!8�����������������˃��������������¸���Ѣ�������ȱ���������������������������������������Ľ�����\OSXc`PENX__D2=Maw��������ȩ|iw��zY;FSVGc���z?! '.257;0*,/4CTh���������������������������������������������������������������������������}xroosy���}ytturpsqlcJIV_jkklmnmjgifffeddda^`]]\Y[YWWYVUWUTTTUTQPQRPNNNMMKNOMLMKKKLKKMLJMKJJKJLJLLJJKLMLMLLMLLJJLLKKKKLKIMLLKJKLJLLJIIIKJIHKKMMLJHGHKKJIHIGFFFEDDEEDDEDCBC?>=<963,!!&**,38;?AEKQOSW\\]]^ZZ\YVUPPJIKGDEB@>=>=<<;:9865665431-&&X~�������}}}~||z|}���~||{z|yxwuppokihhijmqpmmklmjiiihikhiiebgdbcdcdeilov��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zyzzz}~������������������������JY�����p3(??>AB@B?BBABCEFFDDCBBBA??<:7402-)$)Abuyoa\gswo`N@??>BFE>868=CEEB?968887669@HMQRRUY[^`bimsx�����utrsprrrrqpnppnnmllmmkkklllmlklljllmnmlklnlje``_\XN@) #2ZcU?9:<>CS^cQ>0($&4MY]]ak��zjabdlv��{pljhijllmooopqrqqrqtrsux{}~~{m7&"Q���������~mvqnly�����|W1 (/)/Nc;-Yu�����n/C�����������������|xsopr���������������»�yp������׾�������ļ���?"7LPM?68/&Ignrz��hTWZZ[\^\b_ZYsU!L��������������������������������ü����ˠ�������ƣ�������������Ŀ����κ������������������ĺ�����MLWdroQN`jbW;1>Mb|��������ɩuhx���yZA<?CIz����?""'/HW\[YWHGQ\lv��������������������������������������������������������������������~{|z||wfZRIOas����|vsrqqonjV,-;LXbknmnmjkiihfdfdc``_^][YZZVWWTUVUVUSTTSOOPNNNMOOLMKJKMKKMKKKMKLMMJIKJJJJJJIKNNKJJLLMJGHJJIIJJJJKLKIIIJJJIIKJIJHIHIJIJHIIHHILKKIFGGGFDDDDEEDBCCBA@>>::633* &'"#%)+028=@EPVW]agkpquuvspokic`\VQQNIHFAB@>=<;;;977655322.*#(W�������|~}|{{{{{~�~����~}}}{ywuqonnjhc][ejmmmklomlkkhghkiibeddeffdabbbhp��������������������������������������������������������������������������������������������������������������Ŀ��������������������������������������������������������������������������������}||�����������������������������EU�����l42@?@DBAB?ABCCDDDCEDCCBBCDB><:9551,(% %Ibtqf\epxug[HA@<=BE>6017<>DC@;767:8757<CJOQTXZ[]_bhnu{������}{{{y{xxwwpourpomkkmmmlkjllmlnmlmmmmllklnmklmkjgfY4" 3XkcM868<>BPdiZG2(%&3O[^^\eu����odfoz��rnljhhjjklnooooqoqtrrqquy{{|{wY*!((j�������������nr������~qI+$*.&;cx�����a)c�����������������}zwrosy��������������ƿ��jz������з�������¼���"*5FKKF@9),Rjpv{��hZX\[Z[]\__VbsG!&Z���������������������綥�����Ǿ�������×������ƻ��������������¿���Ż�������������������µ�����_Z[_dV6UlheV31=Mb}��������ȥtny���xaI56Cf����y= !"&*4D^ptxxztpx����������������������������~��������������������������������������xvtnc\[YbklcM9&%2Hat����{vsrrpiV2#%2BQ_eillijijgfgfb__`^]][[[[YWWTUSUUSSSSRNNPMLMJLKJLJJMMJKMLKLJKKJIHGJJIIJJIIIILKJJLLJJJIHHJJKJKIKLKKKIIIHHHKIJJGHJKIJGGKIHJJIFGGEEFDEDDBCEC?>@AB@=<::752/'#//&!"'*.137<AFIOW^_egost}z|zwspmlgeb\VTRNJHFDB@?<::977845642.(!##-\�������}|z~~}zxz{|~~�������{xvsppmke_^bhjlmmmnpnnkkjjhjigeehhhhggddc_ds���ɿ���������������������������������������������������������������������������������������������������������¾�������������������������������������������������������������������������������������������������������������������FQ}����r4$?A@@AA@@@@@ACDEBBCDDBA@ADBA=<?:73/,** -G_if_aluxm_MFA?<?D?7/+028>BC>75588633;AGMOUWY[[]fkov}��������������������zxwsonpnmlnnmmnmklmnmmlklnnnomlkic@$"8bkhX=567;>BWorfV8.&(4P^``bek�����qru���olljkhjlmllonnooqstqqruuy{z}ypN!$8~�������������x������l?+%+* Gn�����F(t�����������������}zvpon���������������ü��h�������ʵ�������¿��t+;!'?EEGDF9(3Xmru}�{aVZ\[W[^_a_Tkx="#! /x����������������瞡���|��������������Ư�����������������������¿�������������i�����������������gg`caPXnlfdZ;5>Lf���������ħrpw}���}rZKQc�����y?(&!%&&(*,-6@MVfw��������������������������������������������������������������������������|yshTIC9*,+1>Q_]YM:,)-BYq�����}xuqliS:#$5H[agijiiieegfba`a]\]ZYY[[XVXVUTSSQQQPOPNLKKJJKJKLLLJKMKIGJGJJIHIGJKJIIHHGIHIILJJJIKIJIHIJJJKKKJLLKKGFGHFFHJHFHIHGIHIHHHGHIFGFFCECBAAABCA>>>=>==<96531+%%0/(%!#$&).148=EHKQWXZ_dilqswvtrposljgc`^YUQMMLJGDAAA@>:87666552.*! %%1`���������|z|zzwxxz{{{~�~�����}}~}yuusppoprsropnlmnmllmmljkmjgcihhhhhgfedgr�����ɷ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������GS~����r31@?AA?ABD?@CFACDDEDCDD@CBDCF@>>=;731/-'-FXa[]gtxtjWDA===BA81.,04;BC>74369742:@HNQSWZ\^ahmpz����������������������������|{vuropomnnlnnnmmmllllmlk^?*2RjoibS>048:<J`qul[F7*-9T[aaaegu�����zy��ymllijilmmlmopooortssrtwwxxy{vi;&J����������szzpbv������vbC+&*&*Rs�����n5B������������������|xsols������������������{p�������ĵ�������¿��N>7 *7AEDDF># 8cosu|�r[UVVYX[[]a^Vwv3#$8�����������������ߋ���w������iYf��z��������������������ü�������������������c�����������������mrnmllolifaP65>Lc���������Ģrmw~����xjaao�����xA181*)+*))(./05<@FNOTTZ\js|������������������������������~}�������������������������������������}}}yyzwureL.+,*$##&-9N[e_WK6*-8Pe{�����|pd[L=( .APWbgiiffhgfdaa^][[[[YYYXWXVURSSPSSPQPNLJLNLLLKKJKJJKIHJIHIHHHHJJJKIHJJIJIIHJGJIIMHGHGHHJIHJJJJIGJKHHFEGGEHGGFGGGGHHJHHHFFECEC@A@?@BBB?>==<<<;;96540-#%))(*%'(& ##%(*.06:>BHKNSVY^cgijklomiikidb`[[XVUSPMMKHHGEDB><<;8764451.'! 1`~����������|yxvxwz}||}~{|���|���~}{z{yzzyy|{zvvtomlkjkniiihlhgghjkkkiiihghr���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}}������������������������LY{����v8!7GAA>=?BAA@@CBCDEDCDHCDCEEECB@@<:8641-+$  .@SW\dsz{qcPE?=:>@<4/,,08?A=843377427;CLQQVZ\^binr{���������������������������������~{wutroonnnnmlklmmi[;CZhmnkjeXC55;CDThtxuhR>1/5FT]bcefk|��������tnmliihhkklmmnnpqorrsrsvxxyzyte-!'g����������vqk`n������|rX>+)*"8e~�����b+Y�����������������|vqnjy��������������ƿ��h�������׻������������7 B<!(9DGEDA5!Ddntw|zm]XXYZXZ\^^ZU}s(! $>�����������������ψ��㯄������efq��r�������������������������������Ž��������zv���������������ssyzvtpnkhfaP98?Mc~��������Ğrqw~����}thkz�����xZOQPMLPUOOUVZ\cfgmvw|}~����������������������������~�������������������������������������{||{xzwvuywyzusrfM1*(&&""(2BTegdWD1(.@Zv�����t[N@5*!"6CU]fheffed``a^\[Z\\[XUTVUTTTVTRQQQOMMNKKKKJIKLKKKIIHHJGHHHGFGGHIKGGHIFGHKIJIHHIKIFIHJIHHGIHIKGGLKGGFHHHGHGGHFDGGEFIFFEEFEDEDCCCA@A??=>>==<<:98332.+#&%*.17-*"!   "$%'+.049<BFILRWZ_bdeggggefdda^]YVTWTROLLIIHHEEC?===;9:778995.& #'8c��������������{wwzz|}|{w|~~����}}}~{~||{wvrnljeejhkkkmjjjikkjmmllkkklw���������Ƚ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}{{z{������������������������DT~����v;%9FB@=;?@?@@@ACCDCDC@CEDCEFKCBA>>;=97520+'!);LV[iqxrfWLA=<?@?81,*-3;>?;634775559@HORWZ^`djot}���������������������������������������|{vssqpolmlmiUKRgilmnnlh]M=:CPT`pvxwm]J:2,5FVcgjjms�������}pmjhjihhgjlllnnoppqqqrtuwwxxysV!1y��������������������~zmQ5)+'.)Fm������A#r����������������~}zwpmn���������������ÿ��S�������Զ������������%F1!5@AA@EF<*Rjnq{}{k\XYYXZ\_`_XZ|e#!#%"*S����������������������������������}�������������������������������������������y����ó��������{dsxtsomkjhd^OA?CM`~��������mrx~�����xjk�����}heglpvyxqmttvvssyz��������������������������~�������������������������������������|{z{zvxyxvvwvxyxyzvttjS8)%$''! !&-7MdpneS=,)3Qn����~jQ:2*$'9HW_dfeecbb`]\[[][YXVTVSRQRTSQPNQNMNJJHJJIIMKJJJJIIIHJKHIGEGFGJGFGGDFHIJJGIHHHGIHIKJIIHHHHHGGGFGFGEFDFFFGGEEEGGGGFDCEFFFEB@@AA?@=<=>>=><:<;8530.-'$%#*132-+(*'# !!"%'*.048<@BIOPVZ\aaedabaa`^][WUTSROLKIHHIGECEB?>?=99975669941&#$)?g���������������~zyyyz|~{|}}}����~~~~xyx{|||{{xwqlihcggjljjkiihhjkmmppnmollmtx����������Ŵ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~y||~}|z������������������������HP~����t=&8?>?==>=??@BA@ACHIZaVIECBCEDBAAA@?==;570,%!'8DNXchjf^VJ@??@B<4-+*/9=><734457768>FOTWY]aejotz��������������������������������������������~zvrqnmh^filmnmnonmgZH;DS`ksvusof[TJ@BDYhllnmnw������xnkifhihihhilloonnooorrruwyyw{tF#!C��������������������~}tfM6*+%AQ;*$*Tt�����q2A�����������������~|ysnnr������������������q[�������̴�����������e$2!(?CFFGJK52[mss{zhVX]ZZ[^_a`Zg�]&*..)% ")`�������������������������¼���������е�����������������������ž����ȷ���¾����������������~{�ynsrhd_bdhgb_NGGJR]y��������qwz������|pp������ydhryy|~|{xwwy{|��������������������}������������������������������������������|}}yyyz|{yzxwxwuwuwyxzzvvutp[?*$$&((%" #)7K_ttlaJ4+,=^v����qW?.)# 0BQ[bgdcba_]]Z^[ZZYXTUSRRRRSRPNOMMMJIKKKIIJHKJIHJIIHLIFGHHGEHHJJHHGGHHHGHIFHIGGHGIHHFHHIIGFFHHGGHDEFFFEFDEFFEFGHEEBDDCCCBB@BA=>=<=>=;;;::97431-+&&''.2.&',14.'!&(*,/147<?FKNRUW[]]]]^\ZX[YXVRROLKIHGEIGEECBA@>>;898784557662*"!+Dk�����������������~yz|zz{}}||~~�~~��}{{}}}}|ytoppnqrqsqpnliihiiilmnnoopopqpkg]l���������̽�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}}}{{~������������������������HP{����yB)3;9<<===@ABBDDEM_���qXFAABGEGBCD@>=>;9832,$ %2@HQUY_\WVJA>@FB:0-,,49<;744338989=CLQTZ]`ejnou������������������������������������������������zxvqqoooooklnnneV=1?Sfoqssrpmhb^ZW[honopopu�����|slhfeffghihhkmnnnoonoqrsqtxxyti3"#\��������������������zoeI/)) -^VSE>7421-)),8^{�����\*R�����������������~{xrnoy��������������ƿ��Qh�������ų�����������O%/&6879AJD*;`npoz�yeT[_[[\\^`^Uo�M-154-&!$2z�����������������������������������Ǩ�����������������������Ľ���µ��������|x���ž�����������orjWSPYdife_SMRTW]j����������z{}�������xs������zkq{~��||z{~����������������{~~�������������������������������������~~|yz}~{}|{zz|{xxwvvvvuuvxywxxwxuur`B,%#"$()(%""'0@[oxunZ</,3Sn����|gM6*#!$8FS]`ccaa_^\^\^ZYZTURQTSRSQOONLLLKIGIIJIHHJIIIHIGHJFGEFGHGHFGFFGGHHIIFFGGEGGEFFGGGGGFHHFGEFHEEEEGFEEEFDEGEFDDEECBCBA@AAA?>?<<=<;;:9::8874301-*%&',/+'! '28/)"!$&(+,05:=BHKMQSUTUVXXXYVVUSPMKKIIHDEDFBBB@?@>=<;9986632124673/( )Dl���������~���������~|||yz|||~��}||{}~�~||}}{zxvsqpoljhhjjkjmoopponiaRMRScw���������Ŷ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~}}}������������������������GPv����wB,6<<=<<=@CCDDGL_�����mSADBAFEFCAC@A@>:9740+' #3@EJMNUVTOHAADF=6/-+05:<:63327996:?HNRW\`gklnt���������������������������������������������������~yvvsrroooni^J>;;Xhnpqrttrpkd^]ainpoprrsz���tvmiecdefffigijlolmnonopqsuuxxxp[""/w��������������������}tl`G+,).UWJDGFV[RKLW3Ci�����H(j����������������|xtomp��������������Ļ��=m������׿������������-/, 6KIEGJN>"$Fhnru{�ybVZZWY[\^`]Sx�=++--)%&(8�����������������搳����������Ⱦ�������������������������������ü�������������Bd����̹����������ptkddefiiiid[[^]_\Zm������̾����������{w������|rv|�~}~}{~��������������~}~���������������������������������}}{}zz}{|{y|||||}z|~{xywyxvtuuxyxvyzzyvvsdK/%$!$'(++'!!&-8Oj|{vfT;/8Gax����qY>0)% *:KS]__a_\]\\[YXYUUSSUSQRQONMLJJKHGGHIGGHIIHHFFGEDFHHGFGGGGGJFFGGFGHGEGFEEGFFEFFFFGFFFDEFFCEEEFEEDGFDCCCEDDDDDDDDBAA@@@?>><=<;=:8:997642212,)$!%)'&%%140+!! "$%(+/157:AEHLOOPPRRSSTTRPQLKHGGHDCCCB@BA@>===::97774542023313-($$Cl��������������~}�������zyxx{}|}~~�}|{{}}}~~����~|zvusrrmimlkjkmmnoqrme]VSZ\\]_l������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������EMu����yC$069<=>>@BBEGO[�������nTABCCEFCABAC?A=98542+% !0:CGHJPPNJGDCD@<2-+,29;8512338:87<CKQV]bfkijs������������������������������������������������������yxtspnnnife`_ekmprtuxusoga]`begkorrqu|}vrpdca_bccdfiijklnllkmnnpqtuuwvwkG"B��������������������zriY?+*$+;1/:=?LSRQTC*Qt�����w3:}����������������~|yrmlr������������������m?q������й�����������}*%6;8==<<+,Lhosx}�u]V\[YZ\\^`WY��."%('&''J�����������������׍�����������ú�������������������������������¼������������z?Y����̾���������xjvsqppnkjhghfilsoe]]l�����Ƹ�����������z������zrvz||{y|������������~~~��������������������������������~~{}z{|y{}}}||{|zz|~~}{{{z}zvxzwwvututxwxxzzyxxxrhP4&$  #%),+(" $+6Ibt{}s`O3,<Um����ydL9,&#"1ALTZ^]\]\\YYXVUUUSQQRQPQOPKKJJHIHGGFFGGHGGEFFEFHHEEGHFFECEDGFFDDFFFHFFFGGFEFEEGFFDEEEDECEEFDCEEEEA@CCFBDDCCDBBA@AA?>>=;;<;:=<:;98742210/-(" $! *153+!&$!!"#&*-/248=@EGGIKKLOOLLMMLIGGEDCCAA@??@=<>;<::<=;;<;;99213001.,*& Eo�������~~�����||~������~z{zzyz}}|}{|zzz|~~}~~����{yvuvttsronlijjkoolqtqfZY[a`deeeacz���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������FOy����zB)2:<;=>BBCFJZ���������nRIEFGGCCBCFBAB==963/,%!/:CFGGIJJKGBFFA5.,-/597310/17897>@FNU[afgbdr���������������������������������������������������������}{upjknnqpqpqqpquxyxxulgab`bafnpqrrvtsqlgaZZ^`b``cehjlmkilnploortxzvvc<# T��������������������}xphV8*)*TF#.:0..2;A: 4a~�����]'O���������������|yvqlnv��������������ž��QJ������˶�����������^$3::88;7(0Xkosy~�o[Y[ZZ\\]_aYb�x&(%$.Z�����������������ʋ���Ķ��������������������������������������������½�������}idk�������������}mszwvsopolkouwy{vj]XV`������������������~������vlrtz|~������������~~~~��������������������������������~~||}}|}|y{z{}|{|{|}||~||}{{|zvzwwxwwvuvwuvyyyyvyxtmU9)# "$)--)% #)0@Vp��oW3'1Ji�����oU>/)# %7COW\[ZYZZYWUTSTRPQQOMMLMIKIGIJJFFHGFHGGGGFEDEEFFEFGEEDDEFFDGFDGFFFGEEGFGGGFFGFGEGGEDFFECBBCDCFECCDECABBCBB@>?BA?>=;:9:99:::9974540211-,("$(/451' (2,% $((*+-168<??BBEGIHHHIHHGEFCCAA@@@>=><9::;=?BDBBIQPMI@<900/-./+&#$Hs��������������|{|}}������|{yxz|}~}|yywzz|}|}~~���~zxuwusttywusollmnoptvqfV\`fgiijgffc`p����������Ǿ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������EKv����{E#.8;>==ABCNZ����������~iRDEGEGDCBCCA@@?:9:52+% !.8BHC@EIJKIHHD:2,,-27865/.14799;>DKS[_cfhkv������������������������������������������������������������ypjg`afikjjkprstvuvvsklkhhnnoqppruqhZ728@GMOQV`fc`deehikjmnnruttsZ+4)+n�������������������{toeS2)(!Pg<?cVB>9* Aj}����C*j����������������~zwsokiw��������������ú��>`�������ű�����������C(?IJHMPA&7Zlptx�jX[[XXZZ^`\Wq�e!%%%.l���������������������͐����������|x��������������������������ž���������Ŀ���~NWz�������~yxttrpx|yvtstrprx~}}~~uj[RRMNcnc{�������������|������uhpu~������������~}|~������������������������~}~���������|~}z{}|z|y}|z|}{{||{}z{{{|zxyyvwvvxvuwvvxyzxwxwxum^C.#"%,.-,&! $+19He��|mV4%)@]x����|bG4.)" +;HQUZ[Z[[WUUUQOPQONLKKKIJJGGIHEEGHFEFGFFGGFFFGEEFEEEFFDFDDHHFGGHGHGFFGHGFFFFGEEFFFEFFEEDBCDCDDDCCCC@CAA@B@?>A@><<;:9977878665231///1.*' &(*/.($#(27+% ##$')*-03489:=>@ABCDAACBCCB?>?=<;<;<89:<CKSYa`dw}vtmaUK>1(,-+*,'%,Pu���������������}~}}{}������~{|z|{{|||wyywyxyzzz{{{{yuuttrrtwz{zyvrnmnnotvreP\bgikikiifecb]es����������ż����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������FMq����zH(4==<=@CKY~������������dMGGEDCBBCCACB@><:730*$ ,9BB@>AHJKJMJB71--/59740./259::<BIRY_dgil}����������������������������������������������������������������|ulicYYXY\]]bejmnqoqqruqsqposuup\.,+"$*/K^VGENVWU[]aihjpsutlP";X+@�������cj{rumnsyvruvywtldK)&#+PDOb^V]VJA>?7"(Op����p/9x���������������~zwqmll��������������ǿ��g9m������׽������������)1CGC@A@;#"@blswz�zcWWXW[X[^`YT�S!!'3{�������������������z�������q]Yu���é�����������������������ļ��������������j>Tn����zwtpmjgjlqz{yvwwxwvvy{}~}xwsi[RNJFABz��������������������smu~�����������~}���������������������~~����~�����~�}{}zz}~}{yz|yz}|y{z{||z{{{zxxwwvwuuxvwvtxxtxwyyuwvndK0%  #(+,-+(%#"%+4AYnzqZB!'<Us����~hP?2*'#!4@JRVYYXVUUVSQRSNMLLNKJIIHIHHGGGGECGGFFFEFEDEEFFFFDFEGHEDEFGGHGGFEDFJHGFGGGGBFEEFEGDEFECCDDCCBCBACBCCABA@???>;:::;;:754445642200..--*'!"'+-*'*/1-( #'"!!#%%'(,.01324699::=;=????<<<;:<:;88679>L]t��x��������whWF40+*,,.5Uy������������������~||xwz{����}{z}|}{|zyywxxxvvvuuspnmnnsvx{|~}xqnrsqrurd[^dgjlmmjigfdcaccal����������;��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ENs����H$07<?ACDXy�������������}eQHFEBBEACABDAA>=>842,% )3>B?>EGILONJ=3/.-17840//24:<=?AHNW`dfgm��������������������������������������������������������������������}wpjha]ZUPVWUXZ_`iimmnnmlqvtqP!#<T+";cbVSQPOA223<AIU_isth<,aJ*#"!!! !T�������{��}_cmidcbcfyvpjdA(("!,2>?BHEOSVXX?2^z�����V'O����������������~ysomkq��������������ź��HDx������Ϸ�����������k"3>??@BI> &Llosx~�u^WZZXYW]__XZ��A !(@�����������������呱�޳��������nde�������������ɹ��������������¹�������������|GLao���ue`Y[Z_]`iqtwwuvx{zyxy|~~|{zwndYMLJKM~��������������������uw}����������}zz}������������}��~��~~~~�������~��|{}{||}~{z|zy|{zzzyz{|{z{|yxxvvxvvvtwwvuwwwxvzywwuqiQ5'"!%))+-/+& !#*/:Jfv}s^,&4Uu�����t^E63,'#'5BLRVWVSTTRQPNNNLKMLJHHHFGGHGFDBEFDFFBFFDDDEGGHGCFEGFGGDEFFGEFEDEFGGFFFEEEEFEDCDEDCEFFDCCDDCBCCCBCBA@@@>><=;::::887524334100...-+*(% !%,-%!$).0*%**"!!#$'++,+-0136546579998889889876666:DXx���������������tYA3/(*/;]z���������������~���{vyvyx{~����}||yxz|||{{|{~~zxusqngjlmrvx{|~��~wrtvwsqnb\aggmmnljkjiheedda_]`o����������Ƽ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������FMo����I%0:@?FPu���������������zeP?EEBCBEDACBCA>;8574,%'5?@@?BHJNSPE6/,-057500./06;>@BHOX`eio�������������������������������������������������������������������������|uojhb^YY[WYY]``bbSOR^fln@6lW)29=R_edpocW1-:L_os\2.B71161..,)*&"!-k���������z_p�xnaph`lwvni_8))#(#&*22:E=&>h�����;/c����������������|wpmkjx��������������÷��@X��­���ū�����������O $6?GIFJJ7-Sinqx}}q]X[[ZZZ\^^X`��0#&J�����������������؈���ȼ��������~�������������ɩ�������������ý��{�����������j_nw����rdc_bejlnruttuyxy|{|{{}�~�}{xpcWRNKJ���������������������{{��������~{z{}�����~~��~���������~��������~|{}}z~~|{zy{{yyz{{zy{{y{{{{zxyvvuuuxutuvwwxxxxyvtskV>*$  "')+-/.,&"!!$.8DZsy]. )<]|�����}fO=3/)'$ -<EMSQRSQRSOOPNLJJJIJHIGFFFEDFFFEEFHEEEFDCEFEGFDEGGEHDEGEEFGFCCFEGGEEECEFEDDCEDDDCDFEDDDDEDDACA@C@@???>=;=<<:88977754221/.00---*(&!! "'.0+.8-#)(!!#&%'+/-0111247555565664336546:Cf������������������v`@*+.>^~���������������~~|����{ywuvy|�����}{yyyzz|}}~�{yrppmkjmnsvwy}~���zyz|{uqha]ceimmmkjkjhhfgfcaadeb]cs���������ƿ���������������������������������������������������������������������������ý���������������������������������������������������������������������������������������������������������������EOm����L&3?GRq�����������������y`NIBCEBEDCDFBA@<:9862,& )7@A>>CFMUWM</,,.15422/..38=?CGNX`hoy����������������������������������������������������������������������������~xrokhea_abccdbPIFDO^R,EfV_odfdXC8]TRS93&0do^T\qmS)2<4/Da[LGB?<61/-*+--@����������t}��l_uwiTwwtlmX.*2I\>80-'!  # %Nq�����l26v����������������~zupmkl�������Ȼ�����ɿ��d4f���md����o^v�������j* ,7>CBIE(5^kopv|~nX\]ZXZZ[^]Wj�� "&-W�����������������ǂ������ż��ǿ����������������ś������������ü���Uiv���������|~�����~wttux|wwxzyxy||}~}|{}����~~xlaUPQU���������������������}{�����~~|}}|~~��~�~��������������~~����~~~~~}~}|}{z{zzyxxzzzyyzzyyzz{zz|{xxxtuwuwxuuuuwvwvvwvssrl^H1($!$'*,,,-*&#$*2=LbK('5@Ww������qX@92.*&!%/=GOQQRTTRQNKKLLKJKGHGFGGFGFFFDDEDDEFFDEFFGHDDDEFEECEHEFCEEBDEHHFFDCDDEDDEDDCEEDEDCCCCA@ABAB@AB@@?>><;<;::9878755420220/0/++,)'% !%+,-54**5*  $'(*,--11224433532235438Fo������~�������������qT/+9`����������������������|ytvvxy}������}yxwvxyyzyrsrpooonoprsuw{~���~�xrlhfggjmllkjhgghgifdjgecbaa`]iu�����������������������������������������������������������������������������������Ļ�����������������������������������������������������������������������������������{yx�������������������������FMo����}M*=On�������������������xaNFDFCBBDBBBAAA><<730,%)7@B><AJU[UG8/--04551.,.16=@BGOXclw���������������������������������������������������������������������������������|xspmljkjihdYL>@P?!$OhotdjvoZOahatlj`//v��mlkngG&4;6N��rmlfd]XQLFEA99W��������������{rny}wrooO*/NpYINCFQH<92*($!.^|�����[(C�����������������{yrplkq���з���������ȼ��LEr�ĸ7%x�y("!;ECRim_K5';IFFGD9#=apqsyzm][[XZY[^_ZSv�y (,5h�����������������������������ï�������������˽�����������������|oz����������������}{x|{~|z{|{|~��������������~xlaZW]���������������������}}��~|}�}�������������������������~~����~~}~}zyzx{|zxvvz{{{zxzzxzzy|zz|zxwvvvwvwwvvutuuxwvvtruqlbP:0(  "&),-,-+)(#!'/4<5+&$+49=Pj������zbL<50-)$(4CHLOPQQOLKKMMKJHGGCEFEDEDDDDDDEEFGEDDEFEFDCDDFGEEGFFECEEDDBFGFGEEDDECCFEADEEEEBBCBB?AA@BCB@A?>@=;<;=:88877763331020/..-,*('&% #!!),-2.+02*#!#$%(*+--021335522104An���������r�����������Y>8a������������~}~������}�����|xyurx{}�����~}zxxxvxwsvsppuqolprsuu{|}������|wwtrolkllkiiijjigjiihgfhebcccd_ep��������˿���������������������������������������������������������������������������������������������������������������������������������������������������������{xwspq�������������������������FIp�����M#&Ae���������������������z]LBBD@BBA?@BACA?=842/+#)7BA>@FS_[L?41.03684.-..2:@CHPYeny������������������������������������������������������������������������������������zwtqppqpiVCAI7"?aoc>5]ga]u{p�tj|]+YrLe�rnp^4&69<i�TR\iqitupkbdY93j��������������������|uqpg:,5YP"*1T~SVk[E;93/$ :f����xF,[����������������~{wrmkky���ʨ���������ķ��AX��˥'5��T"%-6<=6#"" 3?DGGEA7 $Eempp{�{kUWYWXWZ_`XS��_#*,1x����������������쟑�����������˾����������������sl���������������������������������}|~|{~}~~������������������zof_f�������������������������������������������������~�������~�����~~|~�|zzyz}|yvrw{{z{yvz{z{yz{{|{yyxvxwuvutwwtuuvwtvvtrsqogWC5+# $%')+,+))(&! %)./.+)-2686Gax������mT?851+'!.7AKMPPNMLJLLJHHGGEECEFDEECDDCEEFFCDDEEDFFDEFGFDEEDDCBEEEFFFFFEGGCEDCDEEBCCCFEEDCDCBA?ACCB@AA?@=<<;;:8677565210/0//-,,**'&%#"&'(.0/20-+"$!#%(*,-//220/0/2;`���������jv������������fEe��������~~���������~~���~{wxvx{}����~}~}}{yxtttrgfglortwz||������~|�}yqklkiijlkmjgijhjggfhhfeedbb]ai����������������������������������������������������������������������������������������������������������������������������������������������������������������|wvtvsrt�������������������������EFm����}O!(M�����������������������uaOGEDC@@@ABCDC><=9320)!*8CABHXdeYH93//25671.++.5>DJPZeo~����������������������������������������������������������������������������������������~{|}yv`H?D?B\jaL?ToWWp}`da4T|I 4#?�anqR),7:Foj.0B_XTkYLJfkM.:���������������������zspra,.9WD>�}J��W=6?=<'&Gm�����k3=p����������������ytqnkk�����������������yUt��΄8��4")5527)#"2BJMKE@1-Qioqv~�{gX[[XXY\_b\b��K !()*@�����������������ꐡ�����������Ƹ���my����������}aSlw{{�����~zy�����������������~�������������������������������������yuro��������������������������������������������������~}�~��������~�}}}}�~|{yx{|yywsxz{{zxyzzzyy{z{|ywxvuvtuvttvutttvvvvutsqppk\I7-&$&''),++)()'$  &,--/01467;>On������v_H;741)%!#1>FLMMNLKLJJGGGFFGFHFDDCCDDDDEHFEEDBDDEFFDFGDCCBDEFEDFFFFFCEDEEBCBEFDDCCCEFDEBBDBB@ABBDC@A?@??>=;99755555310/...-,+++&$$# $-2665<>:.!"# #$'(+00000.04Is��������~��������������mr�������~{{|||~���|~������������}xwwyxz|~~����~|xuxqiedkjimpuz}~����������{unjfffhijmnlllljgfgjjkihfeefe`\k~����������������������������������������������������������������������������������������������������������������������������������������������������������|zyuuvwvwy�������������������������FGj����}R$(U������������������������s_KIEA@?BB?BBA??@=845/'#+;CGJ\ekcN?71/27673-++-2;AFP\gp����������������������������������������������������������������������������������������������{nYIB?HTC?IZm^7JgLAZUUyw5*[��IYslG"199MxZ,7]jXlc;27bi<.M��������������������~xrquW(3DQ5_�_l�n/!88!-Vv�����W/N}����������������xtpkjn���ҷ���������ǽ��jf����bL�� *=C07<)!"9KOOOI?,7Yklox��xaY[[XX[]`dir��= &)'/J�����������������܃�����������ѽ���i7B_r��������y|~~������������������������������������������������������������������xy�����������������������������������������������������~����~����|~�~~|}~{{zwxyzyxwwxz{{xwwxzzxzzzzzzxvvvttvtssutttsutwtrrrqqpn`L7/'" "$'')++**))('%"$(,123479:;?Mf{������iQ>:91,*%!%2=CJKJLJHHFFGEFFEFDDDDCCDFFDEEECBBCDDDDDEFCCADDFGEFEEGFDBDFCBCCCEDDBCBCDCEDCBBDCABA@AA@?>???>=;:75655651221..-,*(*($%%# &4<BDMV_N7%*0-' !$(+--..-.29S{��������������������������������}}|z{|��������������~}|{xuvwvwz|}}~�~xyyxmfa_a_^ejpuz|����������}yuohdfgihjmmlnlnlifgihhijljihhd_bbj|�����������������������������������������������������������������������´������������������������������������������������������������������������������~}zzzxyz{{|��������������������������GBj�����U(4_������������������������r[IFB@?@BB@AB@B@A>9<6.*#/>IPYbieWH;4/14554/,+*/6=DQ]gw�����������������������������������������������������������������������������������������������yfR9)$$#4>>. 97%:ZjwwQ 1r��cUloe9%694bv\JUibamS<3Gh]/3f��������������������|tqsqC)5NQC419sgG�D(!71:e|����J/]�����������������|vrnkku���ʳ���������ĺ��Yv��۵;+o�s&;SOKcZ."!.ELLOLHA)=^mps|��zYXYZYZ[\`kv|��%  #)&$2\������������������q�����������Ǵ���fNdpw����������������������������������������������������������������������������������{�����������������������������������������~�����}~�����~~��~~��|}}~}}~|z|yyyxyzyxwwzzxvvvvxyyywxxxyyuxwuuusrrtttssstvrqpqrqpncM<2)#! #%&')+++*)(((($ "(-024689;;;CXw������uZI?;63/+%"+8DEHJIJIGFFDFEEEDCBCDCDEECBBCCDBCDCCCDEDCECDEFDDEEFFEFCDEDBCBECDCBCDDCBBBCCACC@AA@?>?=@>>=;=:987766542320.-+))*)'(%%" $'(-2;FLNU^hmYA(*35/"$(,+..-,./7X{������������������������������}|||x|~~����������~}}zwsrquxxz||y{|sif`^[W\\`gnsx~���������~{vqkikmmmnnmmmlmlkjgihhfhmmlkjigebd^eu���������������������������������������������������������������������ƾ�����������������������������������������������������������������������������~}~}{{|}|~��������������������������E?g�����Y'$;c�����~������������������s[KEAA@CDBB@CEABA::53.(##9GMOVbd^RE921366430+*.2:DP\i}������������������������������������������������������������������������������������������������u\3!,5;47CIB$(`�}pdjiV..:97r}nnonjkj[OLbqL,8|�������������������zsrvl0.6WlhYUTYBNgV:$,=- Hp�����v@9m�����������������xuokin~��������������µ�gDz��؜&G��N?aniy�c # "0.FKNHHJ: "Ccmpr��z[XZ[YY]`gw~x�q #$%&&$(:b����������������ϏJq���������˸����wy������������������������������������������������������������������������������������������������������������������������������������������~~�}}|}�~}~{}��~{{}||{z{ywyzwxvvyywuuvvwz{wvwuwwvuxvvvttsstsrqrrtsrrpqropmcSA4(#!#$$$'*++)(&(+.-'""&+-146899;;;Rj������}gOE=::61/("%0<CEIJHFFGFEEFEDCFDDDBBDBBACEFCCBDBDEDDDCCCDEEDCDFCEFDCCCCAACCBDDEGEBBBBBA@AB???>>>=>?>=<;<888875432330..,+)))(((&%# $)17=>ELPSNPRYejdK0'/5, !$'*+-.,++.07Tu�����������~������������������{}~||{{~}}�~~|������~}}~���|ywtsrqtutw{zpggdcde_^_`fit|����������}{xsrppuutrrronkigllikihfhlklmmlljhfccbhp����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������AHi�����X*&Al��|w~������������������qYEBCAFCBA@CECA@>;7540*$,:BEHWZ_ZQA93359753-*,06?K[p��������������������������������������������������������������������������������������������������`-%2;;LcgV'",0AKCSkfG(4=65FOS^bjqpvonq~j:4K�������������}������~vpq{W%.9exdbg_`]]YQC>76FD (Ss�����h5G�����������������wspljq��������˼��������>6r���}0a��2 Ebqu�r7""<B!-612474%*Mhmrr���fRYYWX]`j�|t}^20142,)1?Jkurvy������������ygnu|���������������������������������������������������������������������������������������������~}��}��������������������������������������������������������~~}}}{~~|~z|~}yy|||zyzzwxyyvwvwxwvusuww{{vvutwuuwvttvtvtttqrqqstsrrpqpoqmeZF6)#   !"%)*)('$)0450(!#'+02577:<:>Da|������pXD>?>972.' (4<CDFGFHGEFBBCBDDDEBCCBCCDDDCDBCBEECEBBAACDFD@DCCEDCB@BAA@BCDCCDFDDBAA@A@@C?@@?==>>>>>?<988776533430-,-,,)()('%# ! #)5:>ABFIKHFGHJNSXYN6,./ !$%),..-,,,,,,5Hi������������~}����������������~~~}}||{z{|~}~}|~�������~|}~�~zwurrsw}yoefjklf^YYZ\bqy����������|zyvwuuwwxvvusqomkkkllllijklnmonnlmljfcddai{�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ADf�����\,*>^qww{��������������������pWIDAEEC@AACDEA@>::88/*$*9@CIP^a`MB96578650+,04=H[p��������������������������������������������������������������������������������������������������_'#)-6:R^]e`OE,.[pc9'792,3=;?<<?@HSVWWB77b�������������}�����}ukw{D(0GkU@]b_``_``be\[]64ay�����U/[�����������������{wsnkkv��������������ƻ�p$T���F;o�� 9dvsdD$(Aok2 !/Tiotu����gSY[Z[^gx�uciO>ADDB:<DGC>DHOZbdlqx|���������������������������������������������������������������������������������������������ywwvqrz~�~~�����������������������������������������������������~~�����|}~~}||~}}|z|~|yy{|yyw{xvwwvwuuwyxwvtvvvyxuwttusuvutuuttrtsprpqtttspoonnnmh\D/&"'('''&*4:=8,"! !&,.0478::;=GZt������|_JDB><;84.' ,5;BEFFDDECBBECBEECCCBBCCACBDDCCDEDDBCCCDBBAADDDGEDCBBBBACCCDCBCAFBBCBA?AA?A@@>=??><>>;99977533432/--.-+)'''%$#  #',277889;<CCACDEIIILF;41% "#"$&(*-.../-++*++,1?Wr������������������������������~}}z}{z{z||zz~~~������~}~~~�~|yxy��vhagkjh\TQRTV]ju~���������~{zywwwwvwxyxwvsqonlmmmmiikjnmnnnponoljfecd`dr���������������������������������������������������������������������¿������������������������������������������������������������������������������|{z�������������������������DDd�����`2&?`lu��}|������������������o\JBCDB@@CA@DC@?<<=93/'!,:CFQ`hmbLB<679971-,-0;EWk��������������������������������������������������������������������������������������������������R.8BT\WUOXV<# (% BigV-*460BbUQOKGGC<<<6855<{������������uv�����}xrl|r0)6T]ARk^WZXUXWXYXUH&Fo�����F7p����������������|{xroko~��������������õ�[$S�S5x�|>qyl\>'&2Fq��J(")(  3M_lrt����\QWXX]bp��\0=>;@FIIHSXXST^cjnuz}����������������������������������������������������������������������������������������|zyywrpnljfhkkmz�����������������������������������������������������~~�����}|}}}{{}|z|~zz}}zzz{{yzxywvvtuvtuxxwwutvvxwuvvuutuvuvtttsqqtrrqptsqspponoonli\D.% %&(('%(09?@7*$#" !"%',/48:9<=?DNi�������gQFB?@@;96.%%.8<ADCCBBBCB@AECBCBBCEDBCDDCBCCBDBCCDCDEBADEDEEEDDDCAABDCCCBCCCBBBBCA@@@>??@<<>>=;=;:98975423531//0.+))''&&$"! "&+32345;?CDCDCFHGDDB?82"#$'))+*-./..---+*))*++,3B[u���������������������������~~z}||}z{yyz{|~}~����������}|~������~tjeeeaXMLOMOSVco{����������{|zwwxwwwxyxxyxxuroolmmmllkmolmoqpoonnmkhffeedm~�����������������������������������������������������������������������������������������������������������������������������������������������~|yxwvz�������������������������DH`�����a3 *Dbu}|xuz������������������jOBBEB@@CABADAB??>;52-(!,<FTdrvqbTC988:82/**+4?N]q�������������������������������������������������������������������������������������������������E4DF>3*)?[Z>1'%,NR0&%$OldI)+447V^N\bZYXWWPRKI90G������������zmt|z���zsor�g(,;_[Hgnihih_[SO@:E?'Wr�����t<E����������������~{zwrngl��������������ɿ��;%6H"&M��^,Xopnc6!%!=\��T@:BPC661)&%#!)4AZgq~����`TWW[^j���8!(/5:=IT\fhmqwz�����������������������������������������������������������������������������������������{xvqiigdbcb^b`cdcfhjo{�������������������������������������������������}���~~}�~~|}{{|z{|||}{{{{{zyy{{xxwwwvtvxusuwvussuvwwvuvsstuvvtsrrsrssqpqoptqoqppnmpnnlgZG3% "$'(&$&+1;A=3,'%##" !%*.4:;<>?@DHc}������p\KEEEAA@83(#!)18>@AABA@ACCDDBBBAACCBBBBACBCBCCCDDDDEDCCEDDCCCFDDBBCCCBBABBCCB@A?????>=?@=<>;=<=;986774313420//-,*)(&%&&$ "19767<@BDEDBADDCA?=;:5,$!#&)*+-/.//0/0-,,+++*(())((--/>Xi}������������vv���������~||~~~}}|{{zxwvyzz}���~}������~~������ufcd`ZOJMRTPPQ^gt�����������}y|{xyyxxxyyywyvtsqnnonmmmmnopqpqqpqomlkjiffdaafo��������������������������������������������������������������������������������������������������������������������������������������������~zyxvsqmw�������������������������CF^�����^6!,Qouyttyz�����������������uO@BA?AAA@CBCCDAB?;843.(".DXjsxxndSC;9:850+**09EKXfv����������������������������������������������������������������������������������������������7&'!GW]ONNOcziYO:.;B?@aoc>*173?RLM_XXX]\XPMRT:.\������������vpwz���wpnw~N%,AbP9HMU^eknhZG27E63]s�����X4Z����������������}zyupjeq��������������ƻ�p'&@M@(<w��:/ITURF#%&$/M]SMINb\PSOIEEB>A?GJACSdp����dWZ[^dv��N0126;AOenwy~����������������������������������������������������������������������������������������{xpib_^YXTXVUZZ]__``acefir{������������������������������������������������}���~~~~~~}{|{|||zy{{||||yvxzyxyzwywwwssvuttuutsttvvxwwvrrrqtusvsqstrsqprnoppopnnnmmolmlh^L7(# #%)'$&'+4=><70*&&%"  $*/5:=?AACDDSu�������hQGGFFEB?82*"#-5:=ABACAABCDCDDBCBACABABEDEBCCCCCCBBABABDCBCDC@BCCCBCBCBBCCBA??@@?AA@@?>?;<;;=<:977765331110/.-+*)&&%%%#)67998:=ABD?>??>=<96983/+$#((,../00..//-.-,*++**('()''&()*+2CUiv���oVslMRr~���������~~|}}}~~~{zxxvwzyy{~~}���������}~}||ukhf_THKRXYWSQX_k��������|~}~|z{zxzyxxxwvutuuqsqoonmnlmsqqroqpmlnmkjjgdggfgghx�����������������������������������������������������������¾��������������������������������������������������������������������������~}zvrpmd^Vj�������������������������FD`�����]4$<bpstwxx}����������������qG?@@@@?@@?@@BEBA><9;83-' 2PelszvqeVC=<<83.*+-2=@?@Oez��������������������������������������������������������������������������������������������q("GW\_nmnvsywc`pxlcXbkZ4(245HVIVbafkfbWC;RJ57v������������wp�����|tmgpY-)/N^>7689=BZlcXJ>;?)=cv����xD7l���������������|zwslgev��������������÷�Z!!(9B:Bb���#,**4.%80*+059?BIMNSUY\[\d\bdacaVTU]i����tdZY_bdw|`EHIHFHPd}������������������������������������������������������������������������������������{yytnjff_ZSTOPSORSTSUYZ^^_abaeeejt}����������������������������������������������~��~}��}~~{z{~~{{zyyz{{yxyzz{{yyyzxwxxxxsuvstuuuutssqtttuuwxurrrstvutqqqqqpoqtoppoponomlkmlmkhbR?+$!%))$%%'.7=CB:3.)%$$!!$*05:?BCCEFJSh�������xcNJHIHGF>80'(/5;?@BAA@BCBABBCA@AAAAABBDBDCDBCCCBABCBCBBDDAACCCBAAABDDDCCAAA@?>@A??>>?;;;<;:9767754421/./..-+*)''%&$ 
+
+
+
+
+	&-25668<?ED?>>==<9787544.) #'*-.-.//,+--+*+,*++)''(((('%('')'(+18=GHM>-7,1Vy��������}����}|{}�|zyxwxxywxzz{~��~�������~|}~}}wrqng[LJVZ^^ZVTWcu���������}~}|}|{zzyyyxvutuusvtqqolplnlqporqommnmmkjjiihfgijjkq���������������������������������������������������������ž������������������������������������������������������������������������~}xqojaXSMHGk�������������������������DE]�����^.(E`lwwtxx|���������������V=>?@B?@@?@A>A@?B?=;>974/)"7PipuxyqgWIB@=73-*+/8ACDA=Sgy������������������������������������������������������������������������������������������^5O_Uaolj_noVczxmmi\fiM-)436MP;<AEJWdeYEDQ?.C�������������z}����~xqeTC%(3Z_259889DgjhjfB;?""Lm}����r=H}���������������}zzvpiaYt�������������ʿ��D***/9AS{��g.,+-08=4&.FL;:@GLORSVYY]^bggjmjkihfc`__\]jodd]`Zbmuv|zof\WUSORcy��������������������������������������������������������������������������������}xohe]W[WWVTRQRQPQRQRQRRUX\]]_abaccfjs~���������������������������������������������}�~}~~�~|~~}}|z{}||}yxyx{zyyxyyzxx{zwwxxwwvtustuttusrtstuuttsuwttqsstrsrpqrrqoopnmonppnpmikklkkhfcXH1' $&&$$%).4<CGF<1(#"%%&'(*/5:?CCDDHFJa|�������nUKLKJIEA>70(&/6=AA???@?@ACCBAABBCB@BDCDBBA@AABACDBBBDCCECABCBBBBBCCDBA@BBB?>>?>??<?=<=>;:9766753311//0/-+*)''&&'# 
+
+
+&/3566<CHEBA@BB?:;954541.,(#!%*+,..,*+++)))++)('&'%%'')(''&$$&'''(&&&(&&+5Xu�������~~��{||}~~~|z~y{xzxxwyz|{~}~�������||{yvsodYRX\__`]UNVg{�������|~~}}}}}}zz{z|zwssrtvvttqqqmnmnoprsqopollkkmmjjijnlmmkin}�������������������������������������������������������������������������������������������������������������������������������zxul_XPKHEDAEs�������������������������A@`}����^.-Oiqsvwtux�������������cDB@@@A@???@A?>??B?>?B>961.)# 0Qmosxzsk\JF@961,),4CQVRLLAI[u|���������������������������������������������������������������������������������������G!9GB8DGKGUnZJlxglzdYib@'-44;VE0000:VfjhPMP:,S������������}~�����|um\@ )<dT8ACABFThkjcH4B9/Zw�����k<V���������������~}|xsleS?_�������������Ȼ�y:69;9>BMn�tTJDFIJPSIL]k_UWW\cdeeghikknnnoppmkkjjjjikihheiqx�������}l^RQOI?Rftty}�������������������������������������������������������������|ywsrsttmmiec_]ZY[XXVVTSPPQQNNQQRQRV[]]_``_^cfhr������������������������������������������~�~���}|}|}|zy{|{}|yxxy|zyzwxzzwxyxuxywwuvvupststtstssuuttssuuvursttrrorrrtqnppmmlmqonoljlmligfgd\N2(!!"!"#',3<BIM@- &+.10/255;?ACCHHJMYu�������|dRLJKLLIE>7-% -4;?>@BAADCCABCBAAA@ABCBCAC@<=>@CAABBCBBACCAC@?@AA@@BBA@AAA@@??>?=<>=><<;:;:865744221000-,+)('&#%#!
+
+
+
+
+$.167<BHKNKJGIJF?>83457893,%  $%(+,+)***()((())''(%$%$$&'&'($#%$#####%&%&:[w��������~|{}~��~~~|}|{~}zz{{xwxyyx|~}~~{|~�������|zxxvlc]]_]__]WJE]r������{~~}~~}|{{|{z{|z{zwrstuusssrqnnoooqqqqrpmjlnonmmlnpqononnnmv��������������������������������������������������ý�����������������������������������������������������������������������{riaXRMJDBDCDFPt�������������������������BC^����_/ 2J\nuvtqs{�����������pDBD?@@???@@@?@>?AB?@EJB;964/) !2Mfmt{yuo^RMC:4-*)0=Oblfb[UNSP]n}�����������������������������������������������������������������������������������|;4/+*+&(//%&)*,5CB/?aq{�xK]kX5(245CV>678:F_hi\>NN7.i�����������y}����{qgQ.!,F]QLMMKLLPNFKD79E+@ez�����W3f���������������|yvohaB'?Y|�������������fBGJLKKNMPRRSRTWZZ_cfiqqkjihjmknpqqqrpomrpqorsurvz~~}{}�����������{eYJIJC>I]knruw{}~}|��������������������������������������������������zsqkgfbcWYchjeffbba\[\[\YXUVVSOOONNPQRSRSXZ]]_aab`bdfq����������������������������������������~~~}}~��~}�}{|||{|{z{{{{yxyyyywwxuxzxxywvxywuutvvrprttuttsrqtutrqstuvtqrrrtsprqrrpmppnnmnpommjjkkkjhggf`P4'  !"#',3:BNM;$")1:=AGF@=<=>@CGHHKNTk��������lXQMOPOOLE?7,#&07:@A@?@ABABB@@B@A@?@AB@@<6227=?BCCCDB@AABCA?>@B@@ABBA@@@@@??>??<==<<;:;;975564421010/,,*))(%##! !"
+
+
+	
+!*26:@CIMNRRTUTLG@9:98:==91)"! #(*++*++)(')('('&'%%%$$&&%%''%##$!"""#$%(=ax����������||~~~~�~zz{|}||{}||yzzyyxz{~~}}~}�����~zyytpgdc\^]\UJBOgw������~~~{|||}{{||}~||{{zyxutursooqrqronppononkknpqopqpqqrtssqrromml}�����������������������������������������������ý�������������������������������������������������������������������yqhc[TNNJJGFEFEEFUw�������������������������AF[�����_2,Kdmtrstx}���������}IAAA=AB@??@??>@@AB@BFEGE>;941,'"5Rkmsx{zndZPA:3-+.6CP[iolpifd[Y\_o|��������������������������������������������������������������������������������f@DDEHGLIGBDCE@=@9956Xo}{VMfeO-*335HRBCBDDGGIC56RH0?�����������vyywtz~�~wnaK$",JZWVNNLMNMKJH<7CG!&Nly����n;5k��������������}{wsnfW4';LXn��������~_\\Z]]]\\]\Z^__cfhlmoomoonnpsrrssttvwvvu{|~����������������������{n\ROKHAG[hnuz~����������������������������������������������������|yplcTJJSPWXZYSVdggdedcdb`^^[[\YWWUOQPNNQPPRRTUYZ\^^^a]^bchs�������������������������������������������~|~}}}}~{{}{{{{||zz{zwwwwvvvwxxxwwxwttwvuwuuvtrpqsutrsrqqstsrqsrvvsqqqsspprpppnnonmmmnmlmkikklkihgff`P8' !##%)-6COO<"*1=GSelfYLB9;?CIHHLNTc{�������v\RORORQOMGA4,$")4;>??@ABDBAA@AA@A?@@@>>8.%&+2:@BBCDABABBBBA@AABBABBBAAA@@??>>?==>><;<;:99655443100/--,**(&$$%"!" 
+	
+
+	
+%09>BILPVVVXZXQJFBAEAAB??=1)"!%''(((&''((())'&$&%$%&'&%$&%#$$"! !""#&:`x������~}~~}|}���}||||}|}~|||zywvwxz|}|}|������~zyvpnie_^WNBFTp~������}{yzz{{y}||||{|~~}}yxvrsnrqrsrrqoooooliknqronooqstsuvuutsqpkilw����������������������������������Ľ���������ſ�����������������������������������������������������������������|riaTQROJIJHIJLIIJJK[y�������������������������ADV�����d20K`jnrtwy}��������T=D@?>A@???@@?@BDDA@AEEGFA>:751-&!4Remrw|xsi_RE72145;BIMNVbntyoojffelrt}����������������������������������������������������������������������������LBFGGEINMPPSU\\\`Z^][enpd[jj`F&-96<QQHIJFGHBFB<IS;+P����������}|ztrty}�wmg]@#%##)4>LQVSPKLMIHT=/Ukz����Z$/f������������}zvtpj`L- "%*./25;>HQ_nx|tomlijhjiklllmmknpqrqronqrssswxwyzx|~�������������������������������~uf\XSLFJ[nv{����������������������������������������������}zyvunj]^WNLI5-=JOTWWVUVcfffecbec^c`_^^YYZUSRNNOQOOQQPRVXZ]^^```defs���������������������������������������~~|}�~}~}|}|{{zxzzz|zy||ywvtuvvvvwvvuwuuvutsuttturropsrssrqpprsrrrpsusqpqqrpnnpnpnkmmlmlkljkkhjiikihhfefaV>)! "&+1BNO6(07CYn�yk[KD>;FHHKMPQXo�������}fYUTVUUVUQIA8+$ !(-4:=?@ABBA@A@@@@??@?=?8+!$*6=@BCBCBDBBA@ABBBBAA@AABBA??@??=<<==;;<:8995543431//.-,*)))%#$%"  
+	
+
+	
+ 0;CHMSUVZYZXSROOMMFAA@CHB7,$!"#%''''&&'(('&$&'&%%&%&%##$"%$##!!#(<dz������y{{z|~~}{}~}~~�}}|y|}~}}|yyyxwvwx{{|~~���������~|zxsqkf`VGFPjx������~|zyyyxyyx|{}|~���}|{{yxvuvvvtrsrpqqqnkjmppplmqrtttuutvwvurnnigch|������������������������������ź��������������������������������������������������������������������������{pdaZSOMKLLHJIJJJLLLLMK[�������������������������AD[|����e05R_irvy{��������ZADDB@@DB@A@AA@ABEBABBDGHGFC=9730-$"5Shprv{|zwhVG@<969>DEELV``fmpquxurmkfgw}������������������������������������������������������������������������m9CBDEFGJNQQRSX[ZZ[^afikkillgZ<)4<68IHHMPJLPIHEHYO-/l����������|ywturstnjc_U3 #!&+3=GHMNC8Xgx���yI#<\t���}}���~}{xuqpmf[D4246>BIKQTZ]chiqy}{xtstqrtutvwvvvutuvusuuvxz{}����������������������������������������~wlcXNJO`t{~������������������������������������������|xsmkkmig`[VQLJHB51=JOSWXXOV_bddc_ab`abaec`\[VTTRQPOLMOPPPTVZ]\]`]^_`cjr���������������������������������������~}~}|}~~~}||}{x{zzzxzzyz{ywvuuttturtuuvurtwutsttutqopoorstsqppprrrsqnssqpppqpnmmnmolllklkljhjligjhijhgeeedbX@) #&.?LH0!*,0Af|���{iZH=>EIHKMOQSc��������l[VWWYZYYWQHC8+$##)/6<?@@ABABA@A?>?AA?>8-# +39>@ABBCBB>???@AAA??@@@@A>?@?>=;;<;;;:99977642221/---*)(('%###!  
+
+
+	
++;HNVUX][WURRSVRNGHFFIJHB:0' !$&'(''%%('&%''&''&$$%$#%%%$"$"  ""*?ey������xvwxz{{||}|{zz{}|~~{}}||}~}|{zzzwuvwvy{|}�����~�������}{wsnf[OXft������}{wvvvuvxzy{{~����}|~|}||yy{zxvtssoqqpljjnoonmnqrrutxwswwwwusspnljedp|����������������������������������������������������������������������������������������������������{pe\YPNMNLKJKKLLINMMNNNNNPb��������������������������CCSz����e2"=Zfquy}�������dAADD@AA@????BA>=?CABDEEEDDEE@=752/*&!$<Xinqw~��{kZLEB>?CHJNQROUY\bhnv|{}|zvunjhs��������������������������������������������������������������������Q>BBCFIGJMPSSTVXZ[[^`cgiihjibU0(66./.113356;>AAAD1+>���������������ytnlicc^L) ""! ;R^l{�|g5!'@PW]bfmpuxtiifegklbR>BFMRYZ`ckotz�������~{yxxzwyy{|zyywz{|�������������������������������������������������zo`UR[r}��������������������������������������}~{vqssrolgfdgif^WSQMHH@21>KPRUVWOU\_bba`__``_ada__YUUUPPLONPOOQSTUZ[[\^]]^`civ���������������������������������~���~~}{|{{}|{||{zz{yyzy{zz{yyzxvvusssrtssvuvwssuusrtsturqrqoprsqpponoppqqpnopqpoppnmnmmnllmllmkkijlkhgiiihgfeeccd\H,!(1<FB-%''*Lp�����s`LA@CFHLOQPM_u�������q]\WZZZ\^^^UJ@5-%!$,29>AAB???@@?@@A@??:3'&/58>ACCCAAC@>?>@@=?@@@>A>>?><==<:;:;9888886432010,++*(((&%$" 
+	
+		):DNPV[^ZVSTXWQLJJJIHGCA>;1) !#%'''%&&''%(&%'&'&'%%'%%##"##!#*?dz�����}sqruvxyzz|||zzy{{}}~}|z{}~~|||zzxxwwyxvy{~����~}}���������yofchw������}z{zz{zxvwy}|z|}���~~~~~~}~~�~|{utsprqoomkmmlmlnoqstsvwuuuwwuwtsrqqnhe`fiy���������������������������������������������������������������������������������������������~si_WSRQSNOMNOMLMONMMNNOPPPORTe��������������������������ABSz����g1)D_kt{�������oBDAAA?AA?@@A@BB@?@AAADGFEDGECA?9770+*$ !%8Thlq{���}l]TPNKILQTTRRSVWY^cjnsxy��~zxxtqmq{����������������������������������������������������������������n;ADCDEHKMOQRUWWWZ\]`ccfifgigdH*)52,+,.++*(*+.-*.-.+O�����������������}xtllZ>""! .:GZhpnV-!%()/889AHNX_XPCCPQ[gh]QT\`ehnpuz}������������}~~~}������������������������������������������������������������|pkjn{�����������������������������������~}ytuusoprqpnjgefgif`YSOLIH@31?JLQVXUPSZ]^]`a^]^\]ac__`[VWTOQMMOPRQQQQVX\[\]^]_`cjw��������������������������������~��~}|{{y{}zz{y{zzz{wxyxyxxwxywvvtrrqrssuuuuupqstqrttrsqpqoprqsqpppoppprqnlmppopqnlonmnkjkklmmlkiijjiehgfdefffccd]I0 !)1:C?0')*%&2Pp�����ycO@>@EJPQORRXg�������|h[\Z\]``ba]VOA4,%$*/6=?A?>>?@?>>?>??=7,"#)18=@?>?@AA@A@B@?AA@@>??>=>=<<;::;;89978554100/.-+*)(('%$" 
+			
+"/;KU\_^[WXTSPHJJHC?;:<>=75+% $%$$%$%%'%$&&%%&&%%%%%" $#$! !$*Ahy�����|qrrrstuwvxyz{{zzxy{}~}{|||}}zyyxzyyyz{|~~���~|���������{ww}������}xwyz|{~�}}~|{{~|}}~~|~~~~~~zxtstrqqplmmlnmmprrsssustvxyyyxxwwtrookhdb`q�����������������������������ż���������������������������������������������������������~ribYRQRPRUTRRRRNNPQMNPRRQRTSSSWYk��������������������������BDRz����k7/Odq�������zLABAAA?B?>>>AABA@?AA@ABCEEEDECB>;85/.+$"  ':Tflpy���{mc]ZXVZ[[[XWVVVXZ_`eekqvz|���{xxrkoqx������������������������������������������������������������K;DFHIHINPQRTWWXXY[]]_dfiihhg[:%-51++++)**(((('*,,-0c�������������������xrkY6 $$.>RM>3.3424568<>?@CCB?CMNWdfdfjlnosw~������������������������������������������������������������������������������������zx|�����������������������������������~}zwturrrqrrroiddfiigbZSQMLF@51CNORSWTLOVZ[Z]\\Z^]_``^a\WWSPOONNPRRPPPSUY]]\[\[^`bhw��������������������������������{z��~}||zx||{{zx|zyyzwxyvwwvwyvuvusrrprqrsqstrqppqrsqrrrponmopqrpqpnnoooqnmmnpoopollmkmnllmmmlkjhggghgdheeeccdccfe^N8&!'09?>2 '-//--/:Tn�����ydLFDDHMOQQQTSd~�������g_Z^^_bcdc`^XOB7.((,49;>?@>AB@A?=<<<8.% %.5:=>@A?@?AAA@@?A@>=>>==><<:;::<<9:96545432001/-,)))&$"" 
+					)AKT\^\WXOMHCGDB=8657>@>>71+$! "#$&'&$%&$$&$%&&&%"! ###""$/Giy�����}spqpqqqrquuvz{|||xy{z~���}}|{z~|}{zzyy{||zyyz{z|~~}|}�����������������}wttvwy|~���||}~~||{{~}~���~}zzzvvusqnomikmnmqpqqsutwxvxzzwwxwvtsrrnlhgfghs��������������������������������������¾������������������������������������������{sid^[ZWTRRQSTUSPQSRQQSROTSRSTTUUTU[m��������������������������CEKy����g/ <\q�������VABAAB??B@@>?BBDDCAB@B@@ACDFFEDEE?;84/,)'" %;Rfns{����tneedfd_]ZXVXXY[\^`^bgjosuy{}~���~|wssrw~�������������������������������������������������������q:JPSRSQPUSWXWXZY\\]_bceijjhggW1#/4/*))***(()&%'())-;w������������������xrjR0#&!#+5=DFB>><@?>@CHJKNNPY\]_ehnqsstuuxz����������������������������������������������������������������������������������������������������������~wwrrv}������������~{xvsvsrttsphhdaggfc\UUPLF>30BOPPUUTPRVYXXYYZ\[\``]^`\XWURQPNPPQSRRRUVZ\ZZZZ[]`ci{��������������������������������}~�|{{zwxzz||zz|zyyyxwwvvuuywrsusrsrrporrpqsrqmoprrqqqppmlnmppppqpnoononlllmoononmnlllkjiikjhiihggfgfggcdecccacebaUA." %.3:<1#'.257=<:<Yp�����u\PNNOORSRSTTV\o�������na`aadddggeebYPF7+*/358=?@ABBCA@>?>91'!  )06;>@>AAA@@BA??>;=>>;<<=<;<:99;988643333321/.,+())'&#!  		
+	
+
+$5BOW\]ZUSF?CEB><88:ACCDA<92,$ #$%'$#%%%$%$%$$$&""!#"!! #$.Hk}�����|trpqqonnoqnrvxy|{|}z{{}}}}z{|{{{|{{|}{yyyxxxy|~����~�}~�������������xpppprtxz~~���|{{z|~�����~|{}}|zxvspoljkmnmooopqruwwwyyyxxxvwuttrronmnjhehr�����������������������������������������������������������������������������~skjfb_\Y]YVWURUUXRRRRUXWVWUXVUUTTTSRRZu��������������������������BHVu����g4(Jm������]DEB>@?@CDECBBCBBCC@B=?@@DEFCBDCBFD=;710*$ " '=Peqt|����zoihklgc_^\YYW[\]^]bcdhhkmoswy|������~zwux�����������������������������������������������������IC]]_\]^[adbbdcaac_cigfjjjkliaJ*(44-+,,)(((()''(&'()A�������������������~woeJ(#(-'(/27<EPTSRPOOSUTSTW[]_`dfhkmpuxz{zzyy|����������������������������������������������������������������������������������������~�����������|zvrljgdjfdagnw�����������~|{wxvqsvupnhhdfhggc\UTPJE=21COQRUUXQRVUTXVUWXW\]\^__\ZVTSONNOPPQQNRVW]ZW[[Y[_`ak}��������������������������������}~�~{zzyxyyy|yv{xx{xvwutuvtvwxvttrqqqrqnpqoppqqnoqrrsqqpplmmlmoonnnnonnnmlmmnnnlkkmkilkiifghhhhhifeffeecbcdcccbbb`]VG4% $*-281#'-28FUKE;@Yk}���xi]]ab`ZWTVUWYYNf�������udcdeefghgljhf_TI=52/07=BACEEDC@>>:0(#$,3:??BB@>?@@??=;?>;<<=><9::99:987533333101/-,,(''&%"! #''$"			
+-:IW_[VQNHGDCEC@<>@EGHFGGC<1*$!#$&%%%&&$&$&&$%%#!""#" #&-Jl�����}uropolmmmoomptvxyz{|}}}y{}}�~~{{xyzz{|||{zxzzyxxxy}}�����|}~}}~�}�|xx��ysomlmoposrvz|��|zz{z{z{}}}}}~~}{y|{xurnkkjklolnomptvvwxyxyxwvutuussssronlhhdbl�����������������������������¼�����������������������������������������xrikggdbaabaa][[ZXXYZXWWTY[]XXXXTQSPRRSSUXw��������������������������CGQv����f30Zv����eDDA>?>=@ABAAABDEECAA@>>@@DFDDDDEECA><:52+$  (6Qhrv|����{ojjknpkfea^\ZXZ[_bcefegfjllopvx|�������|zxw}�����������������������������������������������m;QY[``^abdgjlojkphjtupqwtpqng^?",>>.,,,,*)(*)(()(''$2^u�����������������|wrfD((0:=93+)$(7AEKRZ_ca_`_]_`caccijjnnoruvx{~~����������������������������������������������������������������������������������������yvx�����|yvtrlbXNPRS[\a`ba^fnv���������~��|y{xvxzvrojhefghhd_XSNJF=26FOQRTSVPNOQQRSRVVVZZ[^_[\XSRSOPPQORQRQSTW[[Z[[[[]^`j}��������������������������������}|~�~{yxwz{xyzwxywzyuvvqtuutuuvwtsspnpoqqqqppoommnopstqmonmnknqonnnnnmmpmlllklmmnkjjjijjihgiheihiiefhffedccbbaabbb`^YN9&#&*-21) #/66<MWYWF<BSet|~wkcbjorrlf^[^^YVRXq������}kgfeghijkmopnhb[N=0+1:ABEEEDCBA=<7-&!"(28<>AA?==>>=<=@?>>=<=>;9::9:998643420/0/..-,*&&%$! &-/-)$		
+
+(;FS_^XUSROOOJHCA@CINQNOOKE;1' !$%&$%&&&&$$&%&$# "#" "%.Tt������~vsooppllnmmmpprvuxxy|{}}}y{~||~}|{zxx|~}}|zyyzzxwyyz{}|��{~}z}}|{��{wtronnppqqqqswz|~||}|{}|}}{~}}�{z||}{wtqnkjmnlkmkmoqrttwwwvxwxvwvvvvwysrqnoiieim���������������������������»�������������������������������������zqsoihegfgecbddca^aa`\\^^^\[XZZ]WYWVVQROPQOPQXz��������������������������BDLt����o8&?`|��rI?A?=?A@ACB@@AABCDBCBAA?@ACDCDECHFFCBA=:51*  *<Qiqv{����o_]emnpqoige^\ZY]^`bbegjjjihljquw{}��������}xxxw}������������������������������������������?8IMQSZZ_```fniemoipxz{|�|yzmgX0 EjN)+011-)')*+*(''%%<Jaw}��������~~|{yuna@;=BEIJHGC:4+*& !$4@OV]efhkmjhlhkklnplopsxxxy|�����������������������������������������������������������������������������������������������|rjru~���~sifif^SSMFDKLSX\^`a`acju~�����������}}z{{zytojgghjihd]YSMJG?47EQPPTTRKKMNONQRQRVVX[]ZZZWSRQPMNNMOOPRSTVY[\[\][\_ai|��������������������������������xy~~|{xwxxwvywyzyxyvsssrsrstrrstutrnnooooppoponlmpooonmnmjjiilommnmmlkmmkkjjijmmkkkhjiighedggcfghfaegde``ba^`_``ab`_]P;' %)-142,.:?>=MYfZM@?FQ`gkojc_aoy��xqkc^UUWRb�������offhgjllnrrrsrpjW@-+2=CGEEDCCCC@:5/)"'/49>??>?>=<<=?>@<;;>?;8999;98664221000/.-,**&&%#" &/352+&!
+		
+-BO[\WW[[XOQLIGEHJNPONNKLI>6-# !##&&&%%#$'$$"##$#"##""%3Xq������}yxrppmlnonmkilmpptv{z{}~~|||{{}~~}||zz|}||}{{z{yzzzzyvvvw{~~��������������{wvqsstutqrrpruxxz|~|y}~{}{{{��~|}~�}}}}xtpppnlkijjlmprqppruwxxwvuuuuuuvutrrqmnkhfhz����������������������ľ�����������������������������������}lidhijgghggegeghggedfhifefdca^[YXYXSTTSSNOOOOKOMZz��������������������������BCRo����g;3Ni|tMCA><>>>AAABBBC@@CDAD@?@?@CDCECDFHHGFB@?<561%!"$"!1@Tovx{�����jW9D]jlrpsrmfc`_]]^_`abejjklmoooprtz}}�������yxrwx�������������������������������������X-<CDGKKNQRUVWZ^`cbfmqswzzy{tlbL%K��P/35510.++))*)&%##)(/EUcrleekkighoknlaSONOMQRSQRPLLGA@A:64256=O]adhjnqrrqoqquvwxxxz|~����������������������������������������������������������������������������������������������������wpu}������xpkgca]XSNMHGLQTY^_``behqu������������|{~~{vqolgefhgke][SNMI?54GQPQUURKJKPNLMNOOTSWWY[[YUURQMKMNPOPPQRSTV[[Z[ZZ\_ah}��������������������������������wx}yzxwwwxwwxwxxvyxvutqrttuuqrutsqommoonmoonookkmmjkonllkjiikkjklmnliijkkihkjhkmijjhiikkifdedeeebcdddda_``__`___``^_\Q?- $(.9;;:BHFFOX\\QJ>@KZ`dfgb\VZp������xn_`]SR`y������odiknqqrtuvz|{voK'#*7BHDDFGFDHFA?:3,&##+49<>=>><;<>?>><<:;=::9:78:7443313320.-+*)('&$#"   )37896,'!
+
+		
+	+8ET]aa_]WOLSNOGMHIHA;<<C@@:1& ""$%$%%#%$#%"%&$$""""'3[u������}{yvtqnnllnmlmmlnmosuutw|||}~|z{z|}}~~~}||}{{|||xxwwz{tuvwywy{}�������������|z{ywvxywwxxxvttrwz}��}yzy{}{}~}}|}}|||zxvvroliijkllnlkmorstwursuttuvvruuutqqpnkihgn��������������������ÿ��������������������������������upgcbcaddeehiijggkkjijmmmolhfeda][WTTQSTTTUUQOPMKIMK\~��������������������������DHLt����m;#7LYK9>A==>>?DCCDCAB@>@FB@@???@BDDCCEFHIIHCAA>973+""!%4AThsw{�����pR?.5Gbhpswuromidb^\X[\adcgkjnmnpoopsvy|}��~~~~|zyu|����������������������������������*,;BACJIMONOPSTVXWY]abdgnqrwoh[C?���U>7435650/*((('&" &*-266;69=?C@GHQU\dllg`Z\\[[[Z[[Z[[\\]^YZY[Y[]dfjopsuwyzywwz|~�����������������������������������������������������������������������������������������������~}���������������������{wnged^^XUQQLIPUXY^bdcbfktz�����������}}{toljeehiijf`ZRNIF?38ITPSWVRKIILLLLLNOORVWWXXSUSPNKMLLNPRRQQUUWXYV[WX[[[i}��������������������������������vwzyzwuwvwyxuuvuvxvvtssssttqrstrrpnlmnnnmmlmnmlkkkllnlkmhhfgjjjlkjjhghhhihgigfiieeghhghgdecabbdfccdedcb_^^^_`^^]___`\R?. !)7EIKONJKNVXWUS@<GaonfedYM?Ys�������sia\YTWp�����jklnpruwyy{{yt_2"3@EEDGHEFHFDED@93-& !)289=AA>=>=<=<=<<:<8:;:896325553430.-,*))'#%$"   (1348<81)%
+		
+3EOWXXVRHFB@941)&$#0:@?4*$!#$&%$##%$"""""##  !&5^w������~{wuvstunmnklmmnmmknorsuvvyz|~}|yyx{|}~{xz|}{xywwzy|zyyyywyz{|}���������}xxyz{zy|{y{{{zyxsutwz|~���}}|||}|||}|}z{}z|~|{{yurnnmkigjiikkmorvvtuutstutuvvwvvttrqnlihghu�����������������������������������������������~njjfffecc`bffeghjllmnopopromjjifd`\\ZWUUPRSSSTROLLJGIILZ���������������������������BGLs����p>)9:=<>?>???@BAACCBABBBB=@@AA@ABCBBBBDFHFECC?<<;72& !#"(7GWjtx~�����w`=)(&2GYeszswsqmkfb`]^]]_acghlmnnnnnptvuvsuwxz~��~|{}~~�����������������������������F2>@CFEJLKKLMPTSSUWYY\^_bcghedT??sybOB:8:@><B<0-++*)'#&-7:?DFHJNIROVV]elimpqmjieeebadggghhjiiiklrsmklmmoouxy|~������������������������������������������������������������������������������������������������������������������������������xojfbb_\XSQKKMRTX^aa`bgcoz�����������~tpkheddiigd^YRNKH?38LSQOSVRKJKKKMKMMKMORTSUTRRTQRONPPPPRPQRUQTYXYZ\[^[Xl}��������������������������������xxxyyxwvuvxutvututsutrsqrssqrrqrrnnmlmmmomkjjlmjklnmlkjiegehjikkjigijihhgihigghigefgeefedfcbdeeccdbaa``__^^__]\^]\\[XTF3#'2JRUYQFDJOVXZTJ@Qr��yppcH8@_{�������{oc_][Y_�����rkklorvxyyzxu\0"/>EFGGGEGHHGGIHD@<1)"#,58=>><=>=<;<==;;98;977644555211/..,++*)'&$"   %/4388;:3,'"			09B?97.(%&6=>5.'! #&$!"#"""!$$#!" "(9`w������}xsuttstrrqpmkmnplmmnorrrtwxz}}|||y{|�~|zzzzz|}{z{|zz{{zwxywz}|��������zruuy|{z{}}�~~}~}{yvvyy{}{~~|zyz{~{xz|~~~}|z|yxwvsrljjhfhgilmqprqssrtrtttwwxutyzyvpkjfceq������������������������������������������|kcdhgfddeddcccfgmijlnprrrusqomlijgdb]\]]YYSQRUWTVPNNLJGHFI^���������������������������@BKp����x@ /7;=9:<@@@?AC@BBB@?DCBCBA@@ACCBBABBCDFEEBB?>>;84*"!#$"*7BRitz�����|cC*#"#$(:K]lprstupliea`_][]_`aeehhkmqqqrqqrtquxxyzyy~~~�|vy������������������������y"29=<?AFDFHGIOKNMPTUWZ[]`aadbeS/!"/@@==??BFQWB;77517<=?JNTVVWX\baacgjnnooqronmljmmlnnoqrqqrrqrsttpruwzz|~����������������������������������������������������������������������������������������������������������������������������������������vmjhfb^YTTOMMNOUZ[`cedhov}����������~tojgaddhkjc^YRLJG>59KPPQUXQLIKJJJIIMKKMMPROQRSQQPLKOPQQQNSTTUZXWYXYY[\]j|�������������������������������suwxxvvtsttsvusuttssqqqptsrrpomqommmmkjlnmiijllhjknljkjheeehigjiihijighfefhigfgfdcfedefddddcdcdacd_````^^]\^^\Z\][[ZYVG6%$.BSZVJ68@HKMSSKG`�����}tT70Hi���������vjbaVSey��{fhlnpruwyxwrV+!):DEHHHGHHGHHIIJHDB80(%!   !(07<?=?><<<<=;99999776655532111//.+**))&%#"" &/2368:9980+(!	
+
+
+
+
+
+
+!1::91'!!$%&&""!"" !  !"(:dw������zussttutvtpomnnpomnmllnnnprvw{{}~~||}{||}��~{zzyz}|{|~~zxyyz{z{{{{x}}~}�zwsruuwxwz}}��������zywttxy{|~���}{{{{}{{}}}{}~|{|��}}{vspliggddegiloorqrsqsrsuuuvy~�~{xutrkhf\e~�����������������������������������rmjjhhhgdfgebeeefgkjkkmomprqpoonjgigeeghbca^[XUTTUVTQNNJIEDADF\���������������������������@>Tm����s<(17776;?>>>BB@ABBBA@A@B@?>@BCABDBBACFJIHFDA><:960'!!""$"-9DVkw|������iQ5$ )5FZotvxvvtokie_^\][[^_bfhjmptsrstsutvtttvwx{}~��~��������������������A ((*/35887;>BIFGOROQVWY]`_``\F'#+.6:<?@DFJSMIIHGIT[^VX\]aeedehlmnqqrusttsstpopopquwsuvxwzz{{|���������������������������������������������������������������������������������������������������������������������������������������������������ytnjhca^ZUOJJLNSV\`cgfhlsy{���������~qljedcehfhd]XRKJLA5<MSSSVWQNHGJJJJIJJKLNOQMNRPNPOLLLNOMPRROSWXXYXWZ[Z[]j}�������������������������������~uvyyxxvsuursusrtqsvsqroprpppmmnonnmlnkjlljhijlliillihiihheedhhighgiigdhfeedfdfggdeddddddcaaa`bda`_\a`_^Z[ZZ\][YXY[ZYZUK9&&9LVRF:7>DGHJJJMc�������lH47Jw���������}rmicdhonXXdkooqrvytlM+#5BIJJIIKHIKMKKLLMIFB95/)%$#"!&/8>?A?>=;;9;:::998768433110/..-+))('(%$#!  &/00269<<=<61*&			
+
+
+
+		
+
+'1:<;1($  "%$"$##"    " !%9gz������{tsssrtuturrsrpsrollkkmlkoprtvwz||~~��}}||~}|{zz{{}~�~{{xx|{{{}{|~���~~~zxxvuuux}����������|}xvrurx{����~z}|zyz|�}||~}{|}{ywoonhfc`^bdfkmoqprtsrsrqsx����}yxupmif^dr������������ĺ�����������������tqjijkkiijijkekgkljllmkknljkmnmnmljiggfb`cdddb^YZUSSTSQPPMHFEBBBIe���������������������������A@Jk����r?-8<758:=?=AB>@AB@AB@AB??@ACCAB@ACBCDFHIECBA@<977.%""#'" ->JXlz��������x\;)2@Wgtttyyrtpigd_[Z[[\_bfijmpuvxwsututsttvwz{{{}~��������������������o#))*.359@CFJKSTWY[ZR>28;??DFHKLNOQPWZ\]ejhc``cdhlkmopstttvvuuuvuutrprqquwyz|}{����������������������������������������������������������������������������������������������������������������������������������������������������������vpjfhca^XUPLIJMS\`gefhiosy~�������zmggedbcgfgc^WSMKO@4;NUUVWVRKHJJHIJHGGIILLLMNNNLKNMNNNOONNOPUXVWWTXXXZ[_h}�������������������������������}tuxvvvtttsusuttrpqrprpoponookkmllljlljjjhgggjjhhjjgggffeededfgfghghfdeecddcdbccbaccababdca`_`aa``^Z\\]][[\\[\[XWWXW[ZRL@1%!!.>JNHHGNOHEDCFMe�������xZ?29f�����������|qf[U72L^hmnopqpeE&  2@IKJKIKIKLKLMMNPLMLHB;40.+'& "'.:=@BCC@=:;:8::97557433320/..,**)''&&#" %,-/47998;?=93.)#	
+
+
+
+
+
+)8FNF:0'"!$#'%""!"! '>cz������{vutqrtusrttuusspqpnlklnmnmkmorwx{{~����~}{x{~}~}|{}~~~||}|{z{|{{~��~�����|wtuvuvxz}�����������}ywvuswy{|~}|||yz|}~~||}~}{}|{||xupnkd_WU[dehjknqsssspotx}����~|ytrliii^c{������������������������}tqqnmjlkmlkkillljnloimmllkkiihhhjlmjijjkhecbcfefb_[\VVUQNMNKIGDA@@BHh���������������������������@>Gj����r@$3@9669=?>?@BBBCB@@BCD?<BBCDBEBBA?AEFGGFC>AB?<894,! !"##! 0@O^n}���������eJ.$+;Rgsuz~{wvtpkgea__\^\_bgkppsstvywuuvwwwvxxxwy|~�����������������@!!%'(*-/465BFJOWXRLIKNMNRUWXZ]``edgfkhgefggjlnrprtuwxxwwvwvvvttstvvx~��������������������������������������������������������������������������������������������������������������������������������������������������������������������ytmjecd`\TNIGJOT\dbeeggkqv{������zpigfedfhhgd^[QONMB0:RXVVXXUMIKIFIKGGJFGKMMMNMMMKKMNLMOQQPQRVWVXVUWUUW[^h�������������������������������|twvuuusurturprtrqqpproppooomjlkjljjjljhhgihhhghihgghebcbccbdgedgfghdcccbabcbaaaaa`abbbba_`__`_^^\[Z]\[\\XYZXYYXWYWWZXSNE7:4&& &2>HKTURPLHB>AIj��������fL93Y��������������tK-)F[jlmkmpfC&!.>JKLLLKKLKMNMOPPPRQNMKF?;83-'""$&',7<@EGFA==;9:866568523220/-..,*('((&$"!$*/034568;?@?>71/*
+
+
+
+
+
+
+			
+*<GKHC7*#  %%%&#   !)>d{������yusrqrttsosttssssqqnnonnmnnllqrsvuvy{~����~|{|~~}~~~~}}}~|~��~~~{{{}~~�zssvwusw{|�������������}{zwuvx{|}~��~}}{|}|{z~~}}|{{wsmjf_]\]`dgknrrtrqosvvw{|�~���zutphfe]fq��������Ž���������~tnlnonomlnmnnnnnoonnqponnmkjhgfdfgfgkklljkkjjfdeghfdb_\VTSPOOMMIJGCCCDKk���������������������������@CGh����r@-;=;77<<=@@@?@@@@@@@BDBBC?BBABC><BDCFEDDCCB=:9:83&!" 5OXau|���������sT0%4G^imvxwwxyunlfc`\_]`adehlmqsvxy{}|yxyxxxwwxwy{|������������S'$ %&(+-1467<?BKRTU]^[[]ZZ\\^_cffgiknjihjfghijknqqsuwxy{|zyzzxwxurv|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������}vpifffe]TMGEJOV\^`ceffglsw���ypihhghhghhd]ZONML>2AOVWXYYSJHNKHHIGHHDEHIIKJLMMMLNMMOONOPOPQRVWVYVTTUWZj��������������������������������|vwuwvsrtprsrppqqqqoromonnlkjjjijkjkihhfghgghefigfggfccb`ba`becfedfebba```ab`_aa`a___^__^\\^__^^\YZZZYXYZVVXWVWWUUSTTURQNWklN+).&"!#-6@LW\YQIFD@>@Pp�������lXF7K{�������������Y%>XiklieX7$ );GILLMKNMMNPMOQRRSRQRRQMJE?;1)'&%$',3>EHJGA<::866764741223...---)((''$#"! "(*.10388;>?@@@A80( 
+	
+
+
+			
+-<IOPK<.*# %$$$$$#!!)Cf{������xtpqqqrstqtttqqqutvqqqoomlnnoppnqrsswy~����~~}|z}}|}~��~|z|~���~|{~~}|{|}}|wvuxywrtw|��������������~}|zwuuw{}~����~~{z}|~|{{}~~}|}}}zyvunhea^\^cfimnqqqopttuswyz~����~|zwrkggcZg}��������������xtqqnmmmnpooonloqqppollnoollljgcddbdfdfhijlklklighhghggc`_\URROOLMMMHDDEFLo���������������������������?CGi����uJ$8GA916;=@@@@A@@CB@@??@BBBDBABB@@B@BECDDDBA@=::881$!#@R[cnz���������x_7!0BPais{}{zsojhdcb``acdfhjnquy|}}}}|zyzyxx{{}}�������~mJB<0$ !!"%%),-018:<?DEHNRXY]ciigfcdcdfdefijkllllnkjjkllmmprsvz{{~�������zsu|�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������yqngefb[SMHJKMSW\_adgfgjnw��wplhgghkjjgd]VPLKI=3>MVTWYZTLKMJJJIGJHEHGHHIHKKKLLKMOOPOMNNPSUYUUWVUVYW]h��������������������������������{sstvsrsqopppqpopnnonllmklmjihiiijkifgfefefeffghggfebbca```bdbbcbcd`^ba__``_a`^`_]]]_]^\^][]]\[]]WYXXYYXYWUWVTSSTRSQPSRS\v��|Z3-,'$ #"&04;K[d]SIABA@>>c�������eMDFs�������������U"5Rcgc^M4$"6GMLLMMNONORSQSQRVVVUVWWUSNKB<41+()+.4=FIGH@>;875455633431-//,,*()(%$$#   %'(+-/5878;?ABEID;1-'		
+	
+		
+
+
+
++ALRTOE8-%   "%'%&$"!",Di}�����vsqqqqrtsrrssrqssquusstqqnonnpppqprruuy|{|���}||z|~�|~}~~~���~}|}|}}{{{}|ywuwxy�����������������~|{wvxy{|~���~}}{{|}{}|||~{|}|{xysrnieaabdgjklnrsstutvuuw}�����|wtplgccdt����������|wvsrrqpnmnoqsrqrqrnolkkijjjkkkhfifdfeefddhillmomjhigfiedc^^]ZUQOROPPLHDDCACt���������������������������AB@k����rI,=E<618;BA@@A@@BB@AB@?AACCB@AA@AAACDCEFGAA?==:984) !'FT\dm}����������gE% +>Xm{��}|{yvtokjfcaaeddcfjmmsvw{|}}|||}|{{|}~�����|ttq[F2)()-/048;=ADFGLQRY[_acfjiikiifegjjjlmlkmnnnmomopppuvy|~�������������|twuu~����������������������������������������������������������������������������������}�~����������������������������������������������������������������������������������������{wllhcc\SJHJJMQRU\_cggfiqxz{{uqligffijhie_VOKHG<1>MTRTUVQMMLKKIJHGGGHGIIHIIGJIILLMMMOLLOPSUURSTTVWVX]l����}���������������������������xqpqtrqqonooonmmmlmnlkjjjjjgedghghjhddcdfdeecbcdcddbcd`a_^]a`b```b`a``__`^_]^_\]][\^]^\[][[][Y[\_`_ZWVVUSTTUTSQRRRRRPQQVd�����nB )-.*%#%'-28H[gd[OE?<;::J~�������sVEDn�������������a% 1L``XD, !/DMMLMMPQPMQTTSSSTXXWXXZ\\Z\YPJC:521026=DFGEC<775566523120..--*())'###    %'),+.2556:ADCEHID=6/' 		
+
+
+
+
+
+
+
+					-=PSTRG?7,$  "$%$%%"#,Ei{�����srppprqrqqqsrrssssuttvvssoqqonpqrrqpqrswwx{|}����|y{|}~~~��}~|{{||�����~�~|}{zyxxxwy}��������������������}{ywvvy|}����~|{{{z|}}}||}~{z{z~{wvrnjfccd^cehlnqsutsuutvxy|~�����{uplhebgksuq��|yzyvutsqqpmprrsrsrqnljehhggjjiigff`feedeedfhlllmmljkihgedc`^\\UQQPLLJGD@==>Jt���������������������������@@Hi����qH!3G@:24>BB>>@???@@A?AA@BFBB@@A@@A@ACEFFFBCBA=<;:62)#/LX\_n����������lO1!(7I[grz~�~}}|yvspmkieedeeefilosuw{{~~~~~���������������r>2489<=BFJLMQVWZ]^`dfhjjjllkjiglkmnnomporqttxy|�}��������������������~xx{�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xtoib^VPMKJLLMRUY]dfeflqtwwtqkfehfhkljfbSNKJG;2=LRPSVRPNMNMLIIIEGHGGJJJJJHHJIKKLLNNMOQNQSSSTTUXUUV\n��������������������������������vnqqrqqnllnmoonnklmlklihjihgddgfegheccbbccedabdcccc`ab^^^]]a_b_]bc___^_^]\^\\]]^]\\[[]\[ZXYZZ[Z^fmj`[URTSRSSQPQQMOOPPQS[h������~X)#0993-*')+.7J[llc[L<756:H���������_=Di�������������n5+DXT:%%** +?LNMLMRSRQNRSUUVXYZ\[[^_^`bb``]QFB?989:>BFGC@<967853320100,,,,)&'(#$#    "%(*+,.2459=@CDFJKKG;2+"
+	
+	
+
+
+					)AJY]SLG;.'!##&%%'0Jo|�����}tpmnnoqqrpsssrrqrrtstutuutrqoorrqprqopqnrvwyz{����~~~}~��~}{{{}~~��|{||{zvuvvxxvx|����������������������~yxuuwy{~��~~~}|zzyw{{z||}||{{|yvutqlmjegdgjlmosqqtttstvwz|�����}yuokifea^_hoqsvuvvuuvtsropnpopqqpnljhghddehigghiefhhedfebbfjjklmmkkhffefea^^[WPOLHEB?><86;Q|���������������������������@ABf����vK' !,;D>1/7?A?>>??AA@A>?AABCFAAB@@A??BBCECEBABB=?=:671!2MWZ`p����������v[>&*8HZhrvz�~{zxvrpikfeffefkjinqswxz|~�������������ɷ�@;<>BEGILOTUV[\^beeegiijmmnonoimpsvuwy}{�����������������������������w~�����������������������������������������~����������������������������������������������������������������������������������������������������������������������������������������~xof`]VRMKJKMLNRX]adjlmmqmqqlfeedhknlf_SMJKF;3?MRRSVTQNOROONMIFIHFGHJIJIGHJILKLKMKKPOQRRRQSSSSTTU\n��������������������������������umppqqpnmkkmlmomkkljiihilhhdceedeifbaabcdcda`ddca_^_`a^]\\]__`\^a`^\\]_\\\]]\Y\\\\YWZZXYXWWWVXW^u~~nbXSVUQNOMNPROQQRTVXao��������c=!!%*5CLHA73,'),6E_mnjg\H:836@o��������mHHd�������������w?$7=0")8ID+&:HNNNORRPRPRSVUXZY[\]\_aabdhijjga]TNLD>??@CCB?=<77543210//+,-+*('&#"!  "#&(++/2468;>BDEHJLMKA6/'
+
+
+
+						
+/AMWVUQQC4& $&&(3Sn{�����|oplmlnonorrrsrrrrsqpqtvvvvtttsroqqqopoopqrvvvxz||~��~�}~|z}}}|{|zz{{|{yxyywtljprvvtvy~������������������������~zutuwz{|��}{{{{zz||{{||~|}}{z|zvtsrpmjiijkkmnqsssstttvxxx|����~zvtqnkhefg_hmnnnonpqpppmjklljlnmkkifegc`ceghffgehhhiffa\]bikkjlmljhggdda`[YWNKGC@>;676439Q����������������������������?ACh����yM$" $0?=-)/6=BA>>@C@@@?@@AEDCCBD@?@AABACEFGDBDE@=<:;;6,;MUW`m����������fJ*"'0<Qbrzy����~}zxuqpnkhjgfgghknpsx}�������{������v;?BGHLOQSVX___`cfhggkmmqqsvxyz|����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yrjd^YTSMKLJKJPSZ_ejlkklsqlihhilnqog^UMKJF;0>NSSRVWQNOTSTRNKKLIIJHGIGHGGIIJLLIKJMKLPPPRSRRSRSUUYn��������������������������������uoqppponlkllilmkkkkjjheihiidbebacebbaadbbc`__aa`^_aa_^_\[]^]^_[]_`][Z\\[]]]^YW\YZZX[ZXUVUVVVVVYd����td\WSPMMNOPPRTVX\aht����������wTGBCFMSUTPG>5-)*1DalqnkcUKD=86`���������cYe��������������P ++"!3VpT,#5FQPQRQRRSSTTVYXYZ[\\]`bcegjllorsrnge^TGEA@=?@@=:86310100-+*+**)'%#!"#&*+,/2567:;?CGJKMMMIC61/$	
+
+
+					
+ 4JRWXUVRB2"#&*3Vo~�����~trnmmonkmporqpprqrrrrruutvtuwvutsspnooooporuuuwxx{}|~���~{}||~~}~�}{zyy{{zxvuvsqsuwuvuwz��������{���������������|{xwvvy|~~��|{{zyzyz{}}}}}}{yzzzyxwustomllljjmnprtututuwwvy~~���}|xtrmllkkmljjhjkikkihhfffgfhkjhjhfbbcaacfffehghgikgd_Z_bkmkkklnkhfdca\ZUQHFB?<:89767559W����������������������������>ADf����xM#   !#&*+#"(29>A@@AAB>>>>?B??DDBBA@@@AC?BHDBCBDEA>;;9963(%<Q[^]l~�����������tV7$ %*/9EVcoy�������|{yvtolkiigkjkkqy~���wkj|���ϫIBCGJLPOUV[\beffgmmlnuy{}}�����������������������������������������������}y}~���������������~����������������������������������������������������������������������������������������������������������������������������������������������������������{tnjcYVTNKHJILMRX]aceejtrljiikjpple]VOJHC8/?OTUOTVQORVVVSMKJIHIJJGHFHHIILKKLLJLMLLOOPPQQRSQRUVXn��������������������������������vpomonkkkllkjlljjihgfffgfgecddabcbac`_`a`_^^`ba_^__`]]]Z\^\]]]]]\[[YZZY]^\\[XWYWXWWXVVUTUVTUUTXf�����zm]QOONPQRTW\`ejt|�������������wbZSUXYYWTMB6/,,A]lsokg`YUI@>]���������zjk��������������`%(.-#"BjsL/!1CNRRPPSTUUVWVXZY\[]^_bdghimnosvx{��}vmfXLE@>@>?=::5210..,+++)*)(&# !"%(*+-.047:<@DHIMKJLLNH941'!	
+	
+
+								!7LUWWXVO:!#)2Yo~�����~wsponnmkllkmmmppqtsqrruuqtttuuututropooooorqsrusstvyz|~��}|z|z{|||||{{||{xuxwuututsuuvx|��������������������������~}zuwwwz|~��~|{{|}yxz{}{wyz}}{{zzyywvssrpnljjmnnnrrrssvtrtuvy|~}~|xususqppnkkkhhghghhgd_cdeffegihea_]abbeefghfgfhifa`]bijjlkijjifc`XUUTNIC@><:97774886;\����������������������������>?Cc����uR(  !$##$%'))%%(-4;?AA?BAB??@@CBCECDCABB>?@B@AACCCED??>>:887/#+JYa``l������������z`@'$"! !&(*4=NZjzz���������}|ywuqqooquwx~ypmj����ǃ?IJMQTVX`degmqsuv|z}�����������������������������������������������������{y}��}�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~xsjc`[TPLJIKKMQUXZ[`frqkhjjkkonkd]WNIHD82@SSSUXVSRVXZYSPQOKKNJJIGFHIJJLGJIILKNPPNMQOOPQOPRSVZo��������������������������������vlmmnlkjklkjjjhghffgfedfffecbabdbaba``^^`^]__`a_^^^_]\\Z][Z[ZZZ[YXYXYZZZYY[XWTTTUUUVTTTSUQRRQSXb�������rbXUSRTW[afkrw~����������������g]ZXZZ\ZVQF<35<SholmjgccXNL[y���������yl��������������p<,5?KF'$Ca_J2')<GNQSUVTUVXXXZZ[^^```ddhjkoquvz}������zqeXMC@==?@@9410.--++*))((%!   '-+! "&()++-/347<BFGJJMNMLNNJB:1-#
+	
+
+
+
+
+	
+
+
+	
+#>OYX]WVA#$4Zt~�����}xusqpmllljjlllmpssrrrrstruurutuvwwusqqqqqppqpprpqtutuwyz{~}~�}zwyyyxxxy{{}yutxxwwusoormorw}�����������������������������~zwsqvxz}~�}}|}}{yzz{}}||}~|y{zzzxwvtqnkjjkloqrqrstsqsotwz{{zyxxwyxvtrmmoliihhfedbbaba`afgihca^\_^_`aceefgghhhb\^eikkihedb_ZWPLKMNJF?>=<<9:963577<_����������������������������?=D\|���vR( !!"""##$%')(+,+)++,18;@??@CB=>BBAAACDECAA@=@?BB@ACGDCEB@>?:9:93-3Q_b]Yj�������������iO5)*&   #$'*,-/4AK^kv|�������������~~|~��}y����вVDPJWZ`hmqwy{����������������������������������������������������������}}���}�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xsnhdYRMKKMJLOQRRV]fnpljjijjlkjd]VMJJC84@VWRTTUQSY\\YUVTROONJJGEGGGHIIIHHIKJJMMKLPONQQPQPRUWp��������������������������������tkmmkjjjihfhjhffdeggffccfcbca`cb`aa`^^\_^\\\\^\[]][[[ZVWXYXXWXXXXWXXXZ\XUXWTTRSTSTTRQRSRRORPONTay�������|j_YYZ]bfjpw{~�����������������rd^ZZ\]^\TNB<;FUcgiljhg`ZU]n���������kx�������������ySR[p�L  /JUB6+!/9DMRSVTTXXXYZ[]^_`aceghkmqtxy}���������ukaVPB@DGE<321/,,+**)&'&%!  "6EH@#!"%'(**-0137;@EHIJLMOOMNNLF>5-#
+
+
+
+
+
+	-EYZ\XWJ3"3Xt�����{xwttronliikkkllnmpnpprrsqtututvvvvvvusssqpooooqporsqrpswxz{}~~~�~|wuwvtututqonsssuqminphkosu������������������������������}yvsvvwx{|}~~~~||{yw{|{zzy{}z{z{|zyyyxwspnmjkmoprrrpqpotwxyyzwx|{z{zyxvqqhljjgebcc`_`__adeiifaYW\Z\^]^`abeffhd][`chkidb]XUPKIGFFJKIF@@?<;:;853667=d����������������������������@?D^y���yU- !    !!$%%"%&&()**,-/.//10109;>@B?CB???@@@@BCCBABCBA=@??BBDDEID>>>=;<:64+:Xb^]Zj�������������qYC71)" !#"##%')*+-335>JMagqz������������������������ƑU[ejrtx~�������������������������������������������������������������������}zy|~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~ysj`WROMMJMORTRTZdmnjjiimlllhc^UNJIE94@TTSUURQS]`_]XYWTQPLLKGHHGFGHIIJIJKILNLLNOMNROPPPRVZp��������������������������������rkljjiihighkfeedceffdcbcba``_``__`]]][[\[[\[[ZZZXXWVWXUTVXUTWWVTUVVTTXXUUTSSRQQNMPRQQQQQQPNOOOR[t��|~�����xh`Z]aeinrw|������������������~la^]`ba_\UMB?HP[bhihed`_cht���������qx��������������y��v9%)ES>>/",7CNSTSVXYZ\[]`^_`bfhjjmqswz}����������|okifXMKRP?41.,++*)*)'&%""   0DKJ@ !"#&(*+.058;?DGJKJLLONMOSQJA52*!
+	
+4OY][WXS8$0Ys~�����xtvuttsqnllmljjkmonoqqrrsstvvuuvuvwvvwtutrqqppqplnrqqqtuwxz{z||~|�~~{zwrqonmkigeggkkllntxwustz~��������������������������������~{xxvquwwz}��~}{{{ywyzzzzy}}||zzzyyywxtppolnpppppqqrtuwxyzxxz{||}||yusqolmifbba]]]\^acfhgc_ZWYYZ[\__a_bfea[WZ__a_[WUOKEEDBDDFIJJG@@?<<<;764478Ag����������������������������EBE]w���yU0" !    #"##$&&(())+,,,./221566796:@?BBDBCC?<?AB?ADCBAA@?@?A?ECBDDGIE?@?@<:8770"$AT[_Y]k�������������tbK=3* """"#$%&%&()*+-/22578::7=HW[hqx}�����������������ʵznqsvy{|������������������������������������������������������������������{}yyz}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|ujb\UPMJNMPWYZ\bkljjhklikkid\TLJIC72ERTTVUSQT^d`_^\VVRPOPLIIHFIHIIIJIJKJKLLLLMNNNNOONQSWq��������������������������������sjjiigfhhffhcedbdeeda_aca``]]^]^^][\[ZZZYYZ[[XYZXXYXWUTTTTTRSSUOLNOQRUSQTSQQONHCGKOOONNMMNMOOPRXitugj}�����k`__bhloty}�������������������qf``accc`]TG@BLS[_`_caaddky��������sr�����������������n`M7#.KXVS5&4BNTUVWX\]]\`aaccfikmqsuy}����������tWgv~|n`Za`@40.++***)('&%#! 4DD:/!"%'')+/0269=BEGILKMONNPQPQNE<71'	
+
+
+
+":P[_ZZ]N;+0\v������{vttrsqprqnmkklllmnooqprruruuutwvwwtuuvwuttsppspnoqppprsvvxzzyx{y}{y||vqlhec_]\YYW[`cjt|}}zwwz|}�������������������������������~{vuvuvy|���|}{{zzyyzz{{{{{|zxxyzyzyvurnnnmnnmklqstxwyywvw{~~~||{xvqnllheba^Z[[YY_bfeda_XYZYX_^_``_```UPSYZWUTOLHDECCAABDIMLJGB??;:::77;99;Dj����������������������������HAC]w���yX-$! !!"""!!#%&$$'('+++,,--.23459<>=@A@?BBCEFCDE?>?@AABCAABBBCBEBB@A@DFFGDBA?A?<9897- %DZgl^[f�������������vfVE7*"   !!"##$&'(**)+,-.24466878:::<9<>FNRdqt~����������������twwx{|�����������������������������������������������������������������}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}vof]UPJMNTY^_^clljjgjkiljfc[RLGID63GPRSUUVQWdhcdb^ZZWRSPMLJJKKHIIHIKJJJIIILKJMKMNMLMPQXr�������������������������������rjhhgdfiffgfdcdbdbadb_``_`^\\[]]\\[Y[[YXXWVZYXXWXWXXUSRRRSSSUSTI@AGNQSQOPQPPML>4;AILJLLLMPOQQRTZdklikq�������pd_`fgkpux{�������������������yhcddehgfdVJCCEKRXZ[]Z_clv�|u�����tm���������������oRdqrH$1Samg?%$#6ISTVWZ^^^^bcdeejjnrvvy~����������pJFi���kkq^B50-,))(**(%$$#!  .880! #'))*/0156:?BEFIKKNMLNOORRRQKA863'
+	
+
+
+
+
+*DWX]WcbVE2#+]v������zwusrqorqqpnlmnmmlkmlpoppsqstuvxwwvsstvxwututtrsrrpppqoprruzyxzyxuwtturld`\[YWUPKNSX^hpz|{wusrux|��������������}~����������������{yxwwxy{~���}{zyyy{zz{|}{|~|{z{y{yzytstrnoolklnqsvwwxxv{~}~~|{ysqnjhfa`]YWVUX_aeedb`[[ZZY]_`a^][XSIPSVUPJIJLHFDBBBBBFJPPMHEC?>===98:99<Jr����������������������������I@>[x���|W1%"!""$$##%&(&&')+,,.011214457:>@BCEGGDFHKMFBB@?@CAB?@>>ABAABCDAC??AADFECC@>B@=;:764*/Vki[X[h}������������ti]J7+%"#$%$%%'(*+-,.13226:>:>@@CCFGGPOWW]bahmpnotz�����������}y{|{|}���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{pg^VPLLQV[^^fmlijhiijljgeYNJCFB64DQUTRSTRZdffgd__][VTQPOMLMIHJIHIHHHIJJJJJJJIMMMNPPPXu�������������������������������~mhfeeehhhgeba`abb__``_`___\]]Y[[YYZXYYWWWVVWUVVTUUVVTTSSRQQSQQND68>FMOOPONQMLI6&*6>EIMLLMOPUWY]bgnqqptz�������teddeilosw{��������������������ujghjijlg^UE?AEJQRPMSbs}�yjn���zoi���������������op��o>&+OiwiD52&)<FMSW\`a``ccehikortw{}����������rMQq����xnqlYF6/.-**)))(%$#! +373 !$&()*-.1346;?EGHJMLLMMOOQSUUVUJ?;3"
+	
+
+
+0N[S\cgkbSD3)4`w������xurrssrqpppoppopomiiklmmmpqrrsvvxwuttsuwvsuwwvututppopqptqswwxzxyvtolg]YYYWXVVTOJHIPXahqvnmromoux|~������������~}�����������������}yvsvx{}~~~{zyz{z{|~~}}|{|}{zyyyxwtstrrnmmooqqstuvuxzz}}~}{zwtrokig`]ZVTSTY^bdecb``^[[YY\\[ZYWSOQUTROKFFJJGFGDACCDEJONKFCB>=<<=;9889;Lx����������������������������L@BYw���{X3%%$$#%'''')+*,+*-00245468::=??BDFIKILMRTWSJDDB@BCAA?@@BAACA@ABA@BAABCCEEEC@A?><:8550'0BHPTSW_}�����������ulXG8*%'())*+.0113499=B@FHNPT^[]ceggoqsqqsurtsswwwyxz~�������~}{~}~��}�������������������������������������������������������������������~}~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yrh^WOJOUY]^fknhjjhjjkhedVJGBFB54ESTQNTUV\fgiifdd`][VSTPLLKIHIFEHFEGFIKKKKKJJNLMNOQR\t�������������������������������~ngfeefgefeaaa`_`______]]][[\[XXVVXWXWUVUTVVTTUSRRSTRSRRRQOOSRTOG;86<CJMNOMPNKA.%2=EKMPQSY^adjmrvwxyz~��������{ndcehjnqvz��������������������{okjjknnjdZJA??EMGCL`x���ibqywnfg{�����������������hBE4#FkmQAE<*&5BLU\_`bbedfhkmqsvy����������wJEm����ysrsi\H7//-*,((('&%$"" 1=9 $%'(*+-/236;@EGHHJLLLLLOPOTVZ[ZTE=4' 
+
+
+
+
+
+!4RQ\dltxti[JMau������wsqsstspprsqrsqqqnkkkilklmnpqrttwutsuttrtsuuvvvvvuttsqqqrrsttuzvvusl^YWWZY[ZZZWQKFBILV_gnklqpnoswx{������������~~~����������������������{yvvvxy|~}|{{||||zz|y{~|||{yxwvusstsqromopqqrqtuyzzzz|}yutrplif`\ZXUROY`abddb``^[[YXXWWWVUSSWXVROJJNNJHGGDBECEFLOOJFCB@><<<<::::=T|����������������������������I=DWu���wZ3&&%$%''(((,/021/1357877:=?@DFEFIKKOPSW[]ZVNGHIFEDB>?AA@B@BAA@@ACB@@BEBDHEDB???;99796/" !! !)8CJONS[t�����������x`SMA3,-/14579=?EINU[\bdchjmnqps{x|x}{z{{z{yzxzzxxzzz{z{~~~�~}}|~�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zuldWOQVUV[ckkgijkikihe^NDDBEA33EOPPNQTV[fkmifgdaa\WWWSQMLKIHFIGEGHFGKJJJJILNMMNLNRXt�������������������������������~mdfddedbba^``^_^]^\[\YX[[Z\ZVVSOOTRSSSUUUUSRRQSSQPQRRRRROPQTWZYWQH;9<DKMMIKLG=*"1=EMSX]bgikqtvwy{~|�����������lfeffjmqvz~��������������������vnjlnonnh_OB;=>;>JYw���wc_fjc_[r����������������bWkh:&C]O2;G9$!.;JT[`_bedgikprwz}�����������b0X|���vttsog\L;1.-+*&$'&%$#!"+9: !%&%'(-..238;@EGHGIJIJLKNPQRTVX[]WLA8/) 
+
+	&?T\iw����yYNe{�����uqqppprqopqpqrrqrppqonkhijmoprtttststuuuutssvtvwwywutqqrprqqqtvutqmdVTY\^^]`daXRMH@CGOX\bhorsqpsuvw}��������������|~�������������������������zwurttx{~�~}}|}{zxyz{}~|y{zyxvvvwtssvutqqpoomloquwxyyyywvtsqokea]\YTQRU\_aa`\Z[TWUXWXXZYVTVVYXWSOMOQLIHIGCCCEFGLPOLDCC@=>=<;<;<99U~����������������������������@>@Pt���{]4*'''))+,+,.01354579:<<=ABDBFHJJLPRRUY\`c_]WOFDFEEB?@BB@A@?@BB@?BC@ABFGEEECACB==98::;7- $)*)(%!/;CEGD9Cm�����uo��|kKIKC;9:?@GLPVW^glrtxzvuzx{}�~}}}}~}|{z{{||zzz{{zy||z{|}}~{}����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|vpd\VTSPXajjhikjkmlie\LDCFFA02CORPOPSQWflljfgedb]Y\ZUTOKJIHFIGFGHFHHIIJKLLLMKKLLO\{�������������������������������~ldeddca``_^\]^^^]]\[[YYYYZXUTSJCELNPQQRSSSQQQPSSMQTVUSUTTVX\aeefa\PA9=BFIHIJE<+#/<GR\_cfknqtvy|}��������������vkffijmrty~��������������������~qpnoprplcWG<:67DXq����h\agdZPh���������������w���M:'%FYG,22.!!&9GR\^cdfilnstx~�����������{O5g��}ywtrqnk_TA3-,+('%'%$""!! '41""%'&'(,,-147:?CFHGIIIMKKNOPRTW[\^^XQF>6%
+
+
+.Tkt�����vCAg|�����}tnmlnorsrprooprrstttpqnjklklloqrqqstuuwvsuusssuwyyyxvtsssttrrtttsndYVW[_a_abfb[UPKFCDJPZadotsroprsv{��������������~��������������������������}xtrtvz}��|yxxxxz{||||zz{yxxvvuuuvvvppnnkmmnpssuvuttsssojhf_][VSSVY[]^[YWXWYW[YYY[ZXUWX[XVUTRSROLKIGEFFEGIMTOIECE?=>>=<=<;:>Z�����������������������������A=@Rt���y]7+(+*+---/012247789:;@BAFFGHIIMORUWV[^_^ffb\UKGFFFC@A@@@????DD>@@??@ACEDDFDBCB>?:9;<;84*#*031/,%+47;=8.,Ep����xahzzlXKJNLQW`dhmmtuxz|z}~|�~~~}~~}~~}{zz||{|{||{||{||}~~~������������~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}xog`USOW^jkkllkkmmie[IFEHF?24DPRQRRQOZhmkijhgfa]_]ZVSNLKHHFGEGHGIKJHIIJKHIJKJLNR]x�������������������������������~ndcdd```\]^ZY\\]]Z[ZXWXWWXTRROB6:BHKOOOQPQPPPQRQPTWXXXYWZ^adgijihe_SC>;?DFIFC=) -:GNW^dikoswz~����������������{mhhhjorw{���������������������vqoqsqonh\M@43:Jb����|qptq_P\�����������������jBBF,$H^W8,30'/."%2BOYbfhknpsu{�����������yVKh{vuvvqqoljbXF6-,)(()('%$"" $..$ #$$&)*+,/26:?DFGGIHIJJKNRPPSV[^_^^[UI@5) +Nn������O#?ky�����|tmkijmoppopppqqsstvvsspmmnlkkmpqpqstttttuuussttvwwvwvwvtuuuttutrqkYSWY]`bbeegfa[[VOGACNU\cirqmkjmnry���������������~}��������������������������}{uusyx{|��}|{wwxxyz{|zzxwz{z{wvxutwwusqokkhhjnnpronooomkigd^\XSONSVW[[ZZ[\\Z\ZY[^[Z\YXXY[[XWSTSOLKJGEFFDHJLNNJFCA@>=>>==<;<:]�����������������������������E<>Tq���zb?.),-./21226678;9;=@@BCCFGILOPQUWUZ\_acdhfea[RMIFFDDA@@?BB@BCCAA==A@BABDDEDDD@>>;:<=<;:5')499;72+!'/8>:6046Sx����yu{}ukeejnlmqsrsuwyy|}}~}~�}~|}~}|}}}}z|}}|}}}}}}~}~�~�~}~{~�����~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~ytk`YST_illonljlkjdZLJJKF>23FSPPRRPQ\hmkjhhie`_b^ZXWSRNIHFEFGHFGIHHJJJLIJKLKKNR_~�������������������������������}k`ba_\]^]^\ZXZYYZWWWUWUUVVRPPL:.-5?EKLNONNOPQRTUVXZ\\[\]_bcgjkjijkhbWH<:?CEBB<'(9FOX^dlotw|�������������������tjijlqux}����������������������}vstrqrplaT@506Qy��������rTXz���������������P(9QO5 Dc_XNI**:D6-$-=KX`fjlosx|������������d^jsvwvsopnljdYL90,)(*)'%%#"!  (,/-!!$$$&*+*+125:>BEFHIHKKLNQSRRSWZ]_^^``^SC82'"!En�����p&Cp{�����|snlkkjlmnnnprrpnpqsttvssnpoooooopqqrssstuuvvttuuvvuvxxvuwvuvvttrodURVY]aabhhifcc`\WOGDJRX`gknkifgkotz�����������������~���������������������������}}zwvwx{}��|{zxzxyy{|zz{}}zyxwvvvwvtsojfefiloomjghiggfda[[XTQSVVZ[_^]__^\Z[^^_\]]]YX[^]ZXWWRMKJKHDEFDHKNPMIEDB?=@?=;><:<=b�����������������������������F>8So���cA0-..043356779:><=@DCFFFIKRRSWWXYZ\acdeheeggaZQKEDDCAAA?CDCBC@?@??@?AABFFECECA@<=><<;<:7/"2;BFDC>5)&/@GHB@CFM^l�����������tqpqpqstvuwxx{{}~|�}~~}{}||}~~||}}|||}}|||||~}}~~~|}~}y}�~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~zsmcYTbkmklmkjkjhcZNLKHE=24FPNOOOQS\fjjkghjebc`]]ZWTTMIGGEHFHHGIIGIIJIJKKKKIKP]{�������������������������������yh`_^]\[\]\ZYYXWYYUVTSVUUSRRQPI9+&+2<DHJLMMMOQTWZXZ\\\\[\_abdgjkjknnkfXI:7;>>?8%(7DOYaimrw{�������������������znllptw|~�����������������������zvsrrssmdU?41Bd~��������pj{��������������mNRTqg82Yeh]6*;HHA3+!!)5GT`gkntx~�������������ndouwwuroooolg^O=1-)('''%$$$  "443- "#"%(()-1267=BEFIFHJKNOPPQQTXYZ_]`_a`]UIA<1)#
+			)Nq����w8!Fpz�����zrnllkjiiilmnoqpnpqrrssrurqrpqnlnmnmprqpqqstsuvvvuuuuttuwyvvwwutqj\UUVY\`dehiihfhfc^RKFFMS\fiiihgijnqw��������������~}|||����������������������������}{xvvx{z~��~{xxuxxvyzz{z{{||zwtwutstpkhggkoqrmeb]_a`]^ZYZYTWYZ[]]_^`ac_^[\^^\\^[\XZ\[YXYVTQNLKKJGDEBGKONNKECA?>@>=<>;;<@f�����������������������������H?>Rl{��}gD5222456579:9;=??CDFHMKPRRVVXZ]]^_`bdgghhjehfaZPGDECABB@CFDCB??AA?>@AB?EHFEDCAB?>>>===::4) 0=GLOUSKFGT[ZRPRWahjmw����������wsurstuuxwyyz{|}���~~�}~}}~}}~}{{{{z|}||}|}}}|}}|~~~|}{����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}}|zxmfdkonkkjjijhgbYLHGED=12DLMOPPON\gklkiihfdd```[YWSMLHEEFEDKHFHHHHHHIKKJHIMP^|�������������������������������vd^]\[\YYZYYVVUWVTTVSSSUSQQQOMH:'!"'/9AGKKLNOPSUWYZ[Z[Z[]^`bdfhjloorpoh]QD;:;:2$&4CNWahnsx|�������������������~tqqsvz}������������������ü����yutttsog\K=;Eg|��������y���������������~x���N;$"8QO0%;JPNH?2,#$1BQ^hpsw~������������|svyvuurpqpponi^QD3,((('%%$##!!3:5/   #$&'(*-/48<>ADFGHGJKKKMNPRSWZ\_^``abbb[NEB>74
+
+	2Zv���|X Is}�����|qpmkijkihhilloqpttrpqrpqqqssspqolmmonopprqrqsvwvvvvutuvwxwwvyyuti\UUXZ`ehjlkkjklkidYPGDJQ[accijikmnrtw~��������������}||~�������������������������������~|{xwvxy{}}}zyywzyyy|{}{||{{zustupljjjoruvufca`_^\^_]^]Y[\\]^[]_`ab`]Z]]^]^_^ZU[][VXWUSONLJJFEEEFIMMMOKFBA@?>=>>>=><@k�����������������������������G<>Kj~��eB7245679:<<<=?ADEHJJPRQRTWXZ^^__abbefggijmlkkiaXNIGDB?A@BCDACA>@@BBACAACECHDDDC@A@=>>;:<62.-16=DLRW]`]Z\b`a][aipnklow���������snttuvxzzy{}{|~~}}~|{|||}||{{{yzz{zzz{{||{||~~~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}|||{yvspqpnkhhjihihaVJCBBC;04FMLMNONP[glkjjhfeddab_YYUQMMFEEDCEHGFDDGFGJIJKIGKLJ\��������������������������������td^][Z[YYWWZUUUTTSRTTQORRPNQNLF6%! %-5>EIKMOPPQTUUVWZY[^^``dehiknnrrsqkbUF>981""1AMXaiouz~���������������������xtstx{}�������������������þ����}wuuwwvqbVG:H\t��������������������������gBG>#"6ITURMF?4( %.>N]fnu~�����������zvuyyvtsrqqoonnjaTH7.)()'$$#    '13/  #%$%'*.0259:?DDGGIJJJLMLMQQTYY\^^`bbcb`]WTR=%>k����{N!Mt~�����~vromjmllkhiikklnqprqoqrpoosttssrqooonmmmoopqrtutuvusutvvvywxxxxugYRVY[`chmoomlnoqoh^TLHIPV\bhmkkkonrtv{���������������������������������������������������}zrpsuxz~}|zxxuxwwx{}|{{{|wvwvsnkkkntxwribbaa_`_`]^`_[]^__]_`aa`____]^\]_\\\]\\YWWURLMMKJGFFFGGKNOMJE@@@?<>=>=>>;Go�����������������������������G<?Mm����gH83479:===?BEEEGILLMTVVVXY[]_`aaddehggiklmmmmkhcXNKFBA@@@@AACBAAAAAACAADDEIGFEFEB@A>===:89:?CGMQUZ[^`adeeeefjstpjlimsww{�����wrpstwwyzyzz{{||~{{x{}}{}}}z|z{|zz{~~}����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~}~|z{{xuqlighjjigf_SFABBC:-2DMMLLNMO\gjiijhgecaaa^\ZUQNKGDCFEFEFFEFFFHHHEIHGKLM`��������������������������������ue\Z[[ZZZUWYWTVQRSRTTQNQOOMKKJD3$ ")3<CHKNNNPQSTVVXY[\^a`egghklmrtstspeUI>9/ /@MXahouy}���������������������|wvvx{��������������������ƽ����}{|{}~zpbQ@=Tm}�����������������������S2;KF10ETWWVXSK>3()2<O]gox�����������zwvvuvtusurqppokdYL=.(&('&%$!  %+)  $##('),.047:@CCGIJIIJJLMLOTTX[]_``acdcbdaaR5
+!Ku����~:%Pu������yusooromkjjijiklnopqpqqprrsqrttssrpnpnlmmlnpqrsstvuvvtvuvwvwxwysi[VWX\_eiossqprssqi_XQIHNT\cjmmpooqruw{}~��������������������������������������������������{wvtuvv{|~�~xyywyzyz{{{yz|}xvoklkovywngdccdba^^_^`_^_a``a`acba_`_^_Z\_`__^][ZZZVTOMOLIIIGFGHIMNNKIE@A@>=?9=?@><Mr�����������������������������@9>Io����hM779;==>@AAEJJKKNORUWXZZ[Z_`acdeffhkjjllllnooomkbWOH@@?AA@@?BA?>C@AAEDDCDEDDEFGGA???=<989;@ELVVX]__acfefijjjmplmmpnppssuxv{yqsssrtvwwwxyxyz}{|{zz||~|}{||������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������||}{}}zz}{vsnjhgfhiifc\RHBCDC8-3DNMKKMKL\ggghhfeccdb^]\YTPOIHDBEEFEEBCEDFJGFHIHHHINd��������������������������������wbXWXZ[__ZWUTTSPRONROONLMMKJLI?."!$,:BGLPPOPRTVVVZ^]]^cfffijlqsuuutvsg]MA1$-:IWahpuy|���������������������xuvx|����������������������������~~���zk_E8K^n���������������������sOIVkb.$*'*AQX[]\\[UKB807@N[env�������~{xtutstuuvurronnkf^OA3*''((&""# ""$'&(*.115<?BDFGIGHJJLONOSTW[^^^`acbaefc]F0
+'Y~����x(,Wy������~{wtsrqqnmkkiiijjklmmnmonoqrqrsututssrqmnklmnmoqqsuuuwuvruwwwwvxqhZTW]\^fkostttuwvrhbYPIIKR^cjnpqqrsttwx|~������������������~��������������������������������~|yutruw{{|~}|zxyxywx{yz{zzuplmmnuzyniccdeab___^`^_aabefdddcea^^_a^_`a__^]]ZUSQQMNOJGGHHFFFINPNLGCAA@=>?=<<>=<Nt�����������������������������99=Pq����mN99;:<>ACGGHMQRRTTWY\Z]^\aacfhghjjkmjlmloqpqssqqlcYOEA@AA@@AACCACAABCBBCCEEEFGGFCCC@?;988;>EMWY\acdefhiijjjkmllmnrpqpqqrrsurstsutsuuwwyvx{{{z}~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}����������~}~|{~|z{{xvtpjhfghehea\SJDFFB8.3EKJIKJHLZffghffecbcb_^\[VPMJGDDDDCEECDFEFFEGHGGIFHKa��������������������������������s]VVVYajkaZURQPPPNNOLMMKLKILKG@0 $-7@GMOQRRRVXY[]^^`bdhghilprsstvwvso`RA2$ +<JWajnsy|����������������������{xx|�����������������������Ƹ�����������ueS>;Ic���������������������{��y< !(:>3$%<NY^^`bdcb\RI?9BIQ\fq{�����}{xwuuttttttssrqomle_VB5*)('&'$"# "!  !"#&'(+-003<@CDFGHIGIKMPOQQSU[[\]abccbcecZ?+2a�����a,Yy������zxtvutsronlijljihjkkokloopoqqrpsuuuussonlllkmnpprsttuvvvvwswxxvshZTX\[^gnptxvuvxvqjc[WMGHRZ\gnrssrqruuvyz������������������������~�������������������������������}yvttvvxz}~~}}{yyyxyyzzzvolnnowywqiecedcb``a`````acfjhedbc_]`aaaaaa`^]]\ZTONLMONJKIJJGGFKPTNNHDACB><><=;><=Q|�����������������������������9:?In����oI;<=?@BFFJLNSVVWWXZ]_^_aaedfiljllklnnnqqrttuvywwvpfXJCACC?A@?BBAAACDADC@EJJGFDCA@?BD@<;99;<CKSZ_ddgggkkjkiijllnnmqqrqqqtvttsuvtwvwxyzz{|}~~�����������~~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|}�����������|||z|~{y{zxwtnifefhghe^[QKEDEC9+0CJJHIJHKWeggfcdc`bdb`][YVOLKGDECCCDDCFDEFEEFGFGHHIOa��������������������������������q\VTU]m}rbYSNNNKJLLKNJIJHGIIE=.!*5@HMPRRPSUYY\^^_adfeghjmprsvwwxxvpdXB5,&"
+'7IT`gmtx|����������������������||�������������������������ô����������|mY>9Fm����������������������c,""#7SF."!5LX]_cehjmnng_PG?DFR_m{����}zwvuuuuutsrrsrpoomhcYI;,)''%%###" #&!  "!#%'**-/35<>@DBHJKHLJMLLQQTWYZ\]`badccecV9$Ai�����E.Yy������zvttstrsqomlkkjifgiglmmlmopomqtsssuututroonlklmoqqrrsuuvwvuwwyvri\WX\\ahnpuwxx{zwrlc[WMGHKTXemtuurprrrrty~������������������������}��������������������������������zxywqrw{}~}{{zyyywxxtnlmmrz{wogddccaba`aa`]^_bdfjffda`^`_``^`aba_]^]YQNMKMPOLJIJJJGHKQTOMGGCBB?=><==<;?U������������������������������66>Hm����mP?>?CFIJLPPSVXX\\\^`aceefghkjinmqrstutxyvw|~�~~}scPGCDC@@>??B?@BCC@@CEFHHHFEFCBEE@>>::<=<@DQY_ehkiklklkmlkklnpstrtttwxxwyxz{{zyy{|}~~~}���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}~�����������}{}{{||yyxxxwslgdeghggd^YNHFED@3(2CJHGFIGJXfffdeeabc`\]ZYXTPLKHCDDBDCDDEDGFBEEEGGFHIKc��������������������������������lVVVYb{���xhZPNKJKJGIJHHIGEFFD<* '4>EKOPRTVWX[\_^`deggikloqtvvx{|{ytm\M>5,&"5ET^hntx}�����������������������~������������������������Ȼ�����������weP8Ih�������������������zD0+/'&E8!$# .HW]acfjlqv{~}qfPC;?Lat����|ywvvvuvuvrrrpprppnjd[O?/((&%$#$$# !(+1+  "%'**-.27<;?DEGHIIHJLLMPRSWXY__aaaccdecS9"	Fq����|43`|������|zurrqostusqpmmjjhhifgjikmnnoropqsqrsrtvtppoljlmlnpqrrsvwwvxvwwvrl_WYX[ciorx{|||zxtnd^WNFFNUZensstuppommosz��������������������������~����������������������������������}vtqswz{}~yxzwyuomllpsw{wmffgecb`abaa`abccefhfeccaab_a__ab_]`_^]YNFHIMOOONLLJIFGJNQNIGEB@@@A>>>=;;?Y������������������������������7;<Mh����mSABDJNQSXYV[^^^adcddfhllnortssw{{|}|~�����~�~}~~}tl_QDB@@A@@?@AECAAA?BDCDEGGEFDGHDCA@;;=CBB@MYbhjooonopnlmkmopqsuvwxyzyyyxzxyzzzz|�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��~|{|}}�~~��~}||y{{zwwvuxwrjedcdefge_[NGDFE@2(1CGEDEFFHVcfedeb__`[[[YXVTOLJGDBBBDDDDEEEEDCCEFEFHHHg��������������������������������jZUU[dz����yg[RKLKKHGFFGGFEDDB9'#0;CKOSWWVVZ]_`addehikmnpptvwz}~~}ypbS@6.'  .AQ]fmry~������������������������~��������������������������������������|mWDBTck{�����������������rRIM5*3# $!%BU_`dhmouz�����jS;5=Ro����{vwxvvvtssrpqrpsqoole\QC5+(&%%$#""  ,49)!"%'))-.158=@EGGGHHHJMLLPPRVY\^___^baba^O6#				$Oz����n$5dz������|wojmkiqrtstsqomkjiihhhhgilmlmpsqrqqstqrqrrpnnnmlnqoopruuuwxuwxwslaXZY\ahosx|~z|{{uld_ZRGEMT]enspssngfeeinv|�����������������������~����������������������������������|vtwtvw|~�~{xwslllmotw{wjefeecb___aba^`acgkggffecbaa`a__b_Y^\^]ULHIKMNMONKJEEGJMPROKHCB@B>B>>?<;;>b������������������������������:::Lh����mUGIMRWY\^`bbdgiknpqqsuxwxz{~}�}�����~~~|{||{|||yti]SJBBCC@AAABCBCCCCCCCGFFHFADFEC?:<EILGD@JYemorrqpqqqmnqrprsvtvxwxwwyz{{{|}����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~}|}||}�~{|~}{{zyzywvuttwwqjdcadcfhb^WNHCDD?1(0AEDDECAGVbddedbb`\[\ZYXUPOIGGEBDDEEFDCEEEEEDFFFHHIIe��������������������������������kXSW^hs�����~k[OLIFFEEEFDDDDB@7&$/;EMSVWXXX[]aabdfijnnooqsuvx|��veUF;2+&*>O`gmsy~���������������������������������������������������ź�����������teK9@Ia�����������������xdvn3%& #&!=R`ddjorx�������n@03Gf{���}xwwuuustsrqqsprrppngaVF7,)&%%%#"  0<5 "#%&)*,.037>AEBEHIHGJMJJMPSVYZ^^^^^bbab^O9&		
+
+*Y�����^:e|�����zunjkiilmpptttromjkljiigfhklmmouusqqqttrpqrrsrnmnnmnnopqtutwuuvwwne\Z[Z_jptx|}|{zzrld_\SHFMU^eknqrtsqmkjglsz����}��������������������������������������������������������}zuqrtw{}��}yxqllmntwzuidddfda^^]^accc`bedfcdgfbcbb_`^\_`\^]\\WMGFKMMLNMLHIGGIMOOMIGCCB@>@>;=;;=Dg������������������������������88>Kh{��zeSPTZ\aflnsrsuzyyy{|yz~}|�����}}}}~~}|{{}{yz{{{|{xvqdYMD=@>???A@@BDCDGDDDFDEFEDEDB=?KX^]UQKGTdmswwvtoqrqoousqstwuvy{}~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�~~|}}~{}~z{|}||{xwxvsststsssmhcaabadf`]VNGDDC<0(0<CABDAAGY`adbaba_]]\Y[WQNNHFECBADGGHGDDEECDDCEGGEGHe�������������������������������~h[Z^dkov������nZKGEDCEDDCCAB@;3$#-8EMRVYX[\\]_acehjnmnoppstx{}����~vjYE95/%
+);O]enwz����������������������������������������������������´����������}lX><Br������������µ�����X2 "%#"&$ 1I^eelpw}�������T0(-?^r��~zvvvtttsqqrrqorsqolhaYM<0('&&%"!" #43   ! #&)*,-147<?BDGHHGHLMJIKNRUYZ\^_]_abdc^L:&
+6^�����G=f{�����|wttplfceghjpsttppnmlkijihjjjilnrnpspqqqrqqsrttrooooponopqrptsuuvunf_[Z]ckpsw|}{zxwrleaZQGDOX`eijnqvvvvtommry|���~}~~}����������������~~������������������������������������|wropsvy{~�piloorvxvfdededbc`_`a_`a[bdbedeebb`__`_]_a`_^]^\XMGGMLMMJJJGFFGHJNPMJGFAA?==<<>;9<Ei������������������������������>@DUguywpmnqrvwxyyz~z{y{}}||~}}~}~~{z}~}|}}}{{zzy|y{zxxz{xxuunbWJCB;?@?@@A@CBDECCEDBCDCBFBKUhuvpha]Yamtv{}{zyuvvuwwyyz{}~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�~~�~}}�~}{{z|}{{}|z||{z|{xywrqtrrtrttogd^^aceda\UMCAA?;1(0;@CAA@@FY`ab`___]]]ZX[VQNLGGEBBACHHHFDEEECCCBCFEEHMh��������������������������������j^^afhlpt|�����n\OGDBECAA@@?>:1 (5CLRVX[\\]abbdhjlmopqpqtxy{~�����zm[L?6.$&5M[gotz�����������������������������������������������������ɺ�����������u^I7Qv������������´��sbT>(!(--*#/G^dhmsy������|Q-"$+:Wo}��~yvutqtstsrqqposrqpljcYO@1((('%""#"!"!,7- !  $''))+047;?@CFGFHIJLMLMPPUVZ\\]^_]_ccZH9#		*7<<71.Cc�����<Akz�����xtwvqnkfbccbjmopqqpormmjjkkjjijlnnnmprqqrqrrstuuspprrpopqooqpsttwysjb\Z_doruy}|{{zwrjb_ZPFBPZ`cejnrwvwzyvtrsv{}�~}~}~~~�����������������~~{�������~����������������������~|xoqrtvyzrihkmnrwyvhgddde`aaabc`cdddgeffdfe`]]^^`a_`[^\[^]SHFINOLLIHIGHGGILPRMIFFC@>=?==<<;<Cn������������������������������TZ_kvxxyw||{yzz{|}}|{}yz{|{z|}z{y|{{yz~}}|~~~}}|zyx{ywvtvvxwxywrkdTHCA@@@A@BDBADDFCECABFDJQXlzxxvrkooqwx{~��~||~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������~~}}|{~|{yyxz{{xy{yy|yyyxvvwtsrsqpqqttmf`Z\_aec`[SJBA@<90(/=??@@@?CS_`_^a_\\\ZZXWTROKJGEDA?<=@BCFFDCBDEDCDDDJPj��������������������������������j[Y^cfhkotz�����r_LEBBBB?>?==:0 &4ALRW[[]^_`afiikjmpqpstwy|}�����zn`Q@8.'"7KXgnt{������������������������������������������������������������������~oTIPp�����������Ⱦ�wc]H?. -:6/$'@Ueintz�����l?*'!!(4Lfy��zwtsqtroqooonmrqpolje\QE4*($%$#%$#""! "27+ !&('))*.15;?@CFDFGGHJKLMORTX[\[\_^_`abYC2#		)E^jokg\W]l~���n7Bl|����}wtxtsokigc`bbchjloppqommhkkjjjiljjjlopoqpoqrtstpptsttqsrsnonorrrturoc\[_cintx{xxyvunhb_YKEGMVadekoqsvvwxxtvvu{{}||}{}~~~������������������~~~������}���������~�������������~zwrrsookhjjlnsuvqhfdfeb^``aacdcccciheiaffb`^`__`^a_^][Z^UKGHQPKMJGFGGEEHLOPLIEDB?@?>><;>==Hs������������������������������ntxy}{wxz{{wy{zzx{{ywxxyywx{y{|||{{{|}~|}}��|zxvvuvxxwwwxwvvuurpl_RHBA?@B@?@BBBDCCCBDFNSenw|zwtsruzxxz|~�����������������~�}}~~~~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�~}~~|}~|{}~zxwxx{|zwxyxxyxyyxxvtsuqppqorsqkg^XX]bcc`[RIA>==90'.;=<>??@DT]_^_`^\[YXYWUTQMGIFC??>9779?DHFEBCCDEEDGMWs��������������������������������cRTX]bdgjnpv�����u_NF@A@><<<;9/!"'3@KRXZ^`_acgghkjknpruuvy|}������}qdQB5/(  2DZdnv{������������������������������������������������������ȹ�����������uaJAZx����������ë�mO@L=-876/ "9Rciov~���zS0))# &1E_u�|yvuuuspponkgmmopoolh^TH6.)%%$##$$#"!!-60'"$&')++.14;?<EFGGHHFHJNMNQTVY\\]^]^`a`WB4&	$Cn����~xurqwwws_=2-%.Hn}����{zywuplkkjfcZ]`eghjnooolmmlhjkikjkgllllnnpqqrnssprtsutssrrqnppoqusroh_[_cinsxzyxwurme]^TJGJOW^aejmqrssvwvvyxyzyxyxxz~��{������������������}}~}��������������������������������|xtrpnljkjnswvrjedcdcbcbbbcaabbfihghefdb`^_```^`_^_^_\VLIOQNNMJIJEDEEIKPNNGDEA>@A=?<;=><Kv������������������������������uvwzz{zyzzyxzzwvwxyzwxzzyzzyxxy||z|z}||}|{|{ywxzxvuvvvssuttutuvtssh_UID?>CA@>?AABCBAFPXiqvtuwurqsy~{xwyz}~}~||}|{{|z{{z{zvz{y{|}|~��|}���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�~|}~~~~|}~|{|xxzywvywvzxvuwxyxxvwwwwuqqspnonospmhc[USY^_`]WND>:9<;2(-9><;<<=ET_`^_][ZXVWVTTTKJKGCB>>=:7427>EHFGDCEEGJLQ^v�������������������������������zbPPTX[_bgijnt|����wcPFA><=<<<8.'' $/:FOY\`_a`dgfiklmoruuuxz|~������shVB92+# 0CUcntz������������������������������������������������������˿������������oYIUn���������ɳ�sYgp? +4=D-#3O_hov|��e<&('&#! $+?Zp}�{yxvutsppokfhllqspmif`UH:0)''%'$"$$!,52-!"##&)*+/25:>?DFEEEIIIJMMNRTUXZZ]]`cab_P>5		=o��������~yxttpg^VMJJZp}�����||zzwspoonigc__ab`dhimqpnoolkkjkjjgjijkmnlnpooprsoqqsrsrqtqqoonoqsrold^^bhkpuwwwvrnie\XPFEKQW[_dhknprruttuwvxyxwuuuwz���|}������������������~}|�~|}}��������������}|������������~~|wnhhimsxwphbefda`ba``abcbdgihggfddc]^ba`a_]\]^^^[TMLPOOLIHHHEEEDDKQNJFCCAB>@==:9;<<Hz������������������������������wwwwyyxwxxwy{yzz||{z{yzz{zxxz{}}{z{z{{}{{{z{ywxvtvvuwywvuvvxuuyxz|woaRHB@@??@?@BBFFMYeotsrqpptruxxyxtrtxzzyvvvxvxvuvwvvxyyyz}{x|}|||}�~|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������}}�}~~||~~~{{{zz{zxvywxxvuvxustvwxuuutrttrpppnmmkorkigaXPNU\[^YWLC<87;=4),7;:;:9:CT[\^]YYXWWXUSRRKJKGBA??@>;5..7AFIJKHFFHIMQ]s����~�������������������������x[OOQSVY_cdflorz����v_MB><<:984+.20*!!-7CNX\___adfikmmpsuvy{z}~~�����}yl\I<6.%*ARblt|�������������������������������������������������������Ŵ�����������x`IDl��������Ѹ����o7'!3F=0/"'@Q`houjS1##$%%#! "*9Uky�~|zwtttronghnlkppllkgaYM?1(''%&$"$$!!!#252'!!""$(*,-259>AACEIHHIJILKJPOQXZX]]]_```K7("M|�������~}{zuwsrvmojajt|�����{|~zywwtqqnmlhgb_]^`dilnprqnpqlmmkjjhiikmlknnnqooppqqrrquuqqopmnopppmgb_^chjoqrrqmkde[VOGEKQY]afiloprlpttutuuvuvtssty�����~~~�����~����������~|}|z|}}~��������������}��������������xnhhknswuofccfdbababcdbbddgilifefc``bb___a_\[]^_[UPOONOLHGGHHFGGHNQPKFBAAB<???=<:;=T{������������������������������svwuxyyzyyzz}y{yyyxxzxzz{zxwyz|{yz{{}z{zxzyxwxwxvtxwvxzyyx{zz{{zyxvpi]RFA@??@?DIM^lv|ztuvusqprptvvsojefqrttsrruututtusswyyz}{zw|�}{�~|~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|~��~}~�~}~~|~{yz}||||{x{|yyyutwxwwvrsusqtttuuqrsqoppprqnnmkjmmhif`TLLS[][WUJC;86;@3%,6:;:889COXYYYVXVVVSRRQLJLIFB?>=>==9/,/6BHMMKIHIKMM[s���~uy�������������������������u^MLNPSW[]`afikpx����s_M=<:8852+"3>@90&!&5CPW]^`abfijklqtvwxyz||}������wlYL<4-)'<N_lu{�������������������������������������������������������ǹ������������qMHQs�������®��qcN,!018>7#"*?PTSM@.&#"!!$$! "(5Jfv~�}zwtstspnkmnjkmpqmjgcZP@4+((&$&&#!  ! )63/%"#&'*,0368;=@CDEGHHMJJKHOSTVXY\\[\^`\K3&
+2`~�������|zwtomfmqvywqsx~����{{{y{zzwuutrokiebba]bdfjnpppqpmnolihgghjkkkkkmqpopooprpqqsutrronmononjfc_cehmnppnkhcYXRLHLRW[]chjmoorppttrtsrqsstrrsvz����~~}}~����~~������~}~{{{zzxz{}��������������~}~�����������xjgklortrldddfeb__``accbceghjfdc^^_``_^]\___]]]^]TNNNMNKGGIIFDBFJONMHDCAA??A?=;9::=X�������������������������������uwwwwxwyywwwyuxy|{|wyyxxuuyzwz||yz{{zwyxvxxxwvwvvxyyyyz}||~zwyyywwxvrj_PEA@ACHMZjwzwtwwvttqonkmnonnkhiorrsutrruussvurquvxwz{}{|~|z|~}x||}~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������||~{|~}|}}~~�}||||}|zzz{w{{xwz{wxxusvwvusppsrqqrqrtrqrppoommqolmihkjhihe]TLNTZZZVPG@954:=.#*5999899BNWYXVUWTSRQRROLKIEDCA=<<=:82/,08CLOPNKJHLL[r���~st�������������������������w^KIJKNUXW[^cehkor~��|p`NB<9964*&9EIHB5)'5AMX]aabffiinqtwxxz{|}�~������~zm^NA73*#%9Palv~�������������������������������������������������������Ǽ������������v_BBe������ƭl^{pP!'6B:-31(263/*'%  "$"!'3D^t��}zywssrrojjijlopnlkid]TG9+()(%%%&$#"#" '0.*" !#%')).057=@CBEGGIIGHILMOSUWXZ[\\\]_ZI2#			@it|���~xwuuqjZSjnnqxyvw~�����|~{z{|||y{xvsnlljge^Zb_cgjlqprqnnnjigiiiijhjlmnnnpqnloopopqusqpomomoplg_`abeilkkgea_ZVNIHNUY\_ejmnoppopsssturqrrssrruw}���}~}{|~}��}������~}}~}||{yxvw}~~���}}}������~~~�~��������xohklosusjdbccdca^_a``bccdfggfb``ab`b`^\__\\Z^VaVSPQPNLJJIFGGECEJOONJCA@?@@A><;9;9Ba�������������������������������
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/LICENSE.md b/third-party-programs/Velocity-Bench/hplinpack/LICENSE.md
new file mode 100644
index 000000000..5d33d8d1f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/LICENSE.md
@@ -0,0 +1,42 @@
+  -- High Performance Computing Linpack Benchmark (HPL)                
+     Modifications Copyright (C) 2023 Intel Corporation​
+                                                                       
+  -- Copyright notice and Licensing terms:                             
+                                                                       
+  Redistribution  and  use in  source and binary forms, with or without
+  modification, are  permitted provided  that the following  conditions
+  are met:                                                             
+                                                                       
+  1. Redistributions  of  source  code  must retain the above copyright
+  notice, this list of conditions and the following disclaimer.        
+                                                                       
+  2. Redistributions in binary form must reproduce  the above copyright
+  notice, this list of conditions,  and the following disclaimer in the
+  documentation and/or other materials provided with the distribution. 
+                                                                       
+  3. All  advertising  materials  mentioning  features  or  use of this
+  software must display the following acknowledgement:                 
+  This  product  includes  software  developed  at  the  University  of
+  Tennessee, Knoxville, Innovative Computing Laboratory.             
+                                                                       
+  4. The name of the  University,  the name of the  Laboratory,  or the
+  names  of  its  contributors  may  not  be used to endorse or promote
+  products  derived   from   this  software  without  specific  written
+  permission.                                                          
+                                                                       
+  -- Disclaimer:                                                       
+                                                                      
+  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+  ---------------------------------------------------------------------
+ 
+ SPDX-License-Identifier: BSD-4-Clause
diff --git a/third-party-programs/Velocity-Bench/hplinpack/README.md b/third-party-programs/Velocity-Bench/hplinpack/README.md
new file mode 100644
index 000000000..80da74c85
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/README.md
@@ -0,0 +1,89 @@
+This is a workload for high performance linpack. <br />
+
+## CUDA <br />
+Source the oneAPI <br />
+cd cuda/hpl-2.3/ <br />
+make clean && make <br />
+cd bin/intel64/ cp ../../../../datafiles/HPL_small_gpu.dat HPL.dat <br />
+export LD_LIBRARY_PATH=../../src/cuda/:$LD_LIBRARY_PATH <br />
+
+## HIP <br />
+Source the oneAPI <br />
+cd hip/hpl-2.3/ <br />
+make clean && make <br />
+cd bin/intel64/ cp ../../../../datafiles/HPL_small_gpu.dat HPL.dat <br />
+export LD_LIBRARY_PATH=../../src/cuda/:$LD_LIBRARY_PATH <br />
+
+## Open Source oneAPI DPC++ compiler for Nvidia backend <br/>
+export USE_AMD_BACKEND=ON <br />
+   
+Source the oneAPI MPI and Onemkl environment variables. <br />
+source /opt/intel/oneapi/mkl/latest/env/vars.sh <br />
+source /opt/intel/oneapi/mpi/latest/env/vars.sh <br />
+
+Source the open source oneAPI DPC++ compiler. <br />
+
+cd dpcpp/hpl-2.3/ <br />
+make clean && make <br />
+cd bin/intel64/ <br />
+cp ../../../../datafiles/HPL_small_gpu.dat HPL.dat <br />
+export LD_LIBRARY_PATH=../../src/dpcpp/:$LD_LIBRARY_PATH <br />
+./xhpl <br />
+
+## Open Source oneAPI DPC++ compiler for Nvidia backend <br/>
+export USE_NVIDIA_BACKEND=ON <br />
+   
+Source the OneAPI MPI and Onemkl environment variables. <br />
+source /opt/intel/oneapi/mkl/latest/env/vars.sh <br />
+source /opt/intel/oneapi/mpi/latest/env/vars.sh <br />
+
+Source the open source oneAPI DPC++ compiler. <br />
+source ~/sycl_workspace/llvm/env.sh <br />
+
+cd dpcpp/hpl-2.3/ <br />
+make clean && make <br />
+cd bin/intel64/ <br />
+cp ../../../../datafiles/HPL_small_gpu.dat HPL.dat <br />
+export LD_LIBRARY_PATH=../../src/dpcpp/:$LD_LIBRARY_PATH <br />
+./xhpl <br />
+
+## DPC++ MPI version. <br />
+source oneAPI <br />
+cd dpcpp/hpl-2.3/ <br />
+make clean && make <br />
+cd bin/intel64/ <br />
+cp ../../../../datafiles/HPL_small_gpu_2_tile.dat HPL.dat <br />
+export LD_LIBRARY_PATH=../../src/dpcpp/:$LD_LIBRARY_PATH <br />
+export I_MPI_DEBUG=5 <br />
+export I_MPI_FABRICS=shm <br />
+export I_MPI_OFFLOAD_TOPOLIB=level_zero <br />
+export I_MPI_OFFLOAD_CELL_LIST=0,1 <br />
+mpirun -bootstrap ssh -n 2 ./xhpl <br />
+
+## For CPU. <br />
+source oneAPI <br />
+export ONEAPI_DEVICE_SELECTOR=opencl:cpu <br />
+cd dpcpp/hpl-2.3/ <br />
+make clean && make <br />
+cd bin/intel64/ <br />
+cp ../../../../datafiles/HPL_small_cpu.dat HPL.dat <br />
+export LD_LIBRARY_PATH=../../src/dpcpp/:$LD_LIBRARY_PATH <br />
+OMP_NUM_THREADS=32, OMP_PLACES=numa_domains, OMP_PROC_BIND=close  ./xhpl <br />
+
+## view output <br />
+### look for the GFlops measurement in the output log<br />
+================================================================================ <br />
+T/V                N    NB     P     Q               Time                 Gflops <br />
+-------------------------------------------------------------------------------- <br />
+WR10L2L2        4096   768     1     1               0.33              1.387e+02 <br />
+-------------------------------------------------------------------------------- <br />
+||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)=        0.0056536 ...... PASSED <br />
+================================================================================ <br />
+
+Finished      1 tests with the following results: <br />
+              1 tests completed and passed residual checks, <br />
+              0 tests completed and failed residual checks, <br />
+              0 tests skipped because of illegal input values. <br />
+-------------------------------------------------------------------------------- <br />
+
+
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/AUTHORS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/AUTHORS
new file mode 100644
index 000000000..b08e25180
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/AUTHORS
@@ -0,0 +1,6 @@
+Antoine Petitet
+Clint Whaley rcwhaley@lsu.edu
+Jack Dongarra dongarra@icl.utk.edu
+Andy Cleary
+Piotr Luszczek luszczek@icl.utk.edu
+Julien Langou Julien.Langou@ucdenver.edu
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/BUGS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/BUGS
new file mode 100644
index 000000000..08d694014
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/BUGS
@@ -0,0 +1,9 @@
+==============================================================
+ List of the known problems with the HPL software
+
+ Current as of release HPL - 2.3 - December 2, 2018
+==============================================================
+
+==============================================================
+ 
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/COPYING b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/COPYING
new file mode 100644
index 000000000..08465d618
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/COPYING
@@ -0,0 +1,45 @@
+======================================================================
+ -- High Performance Computing Linpack Benchmark (HPL)                
+    HPL - 2.3 - December 2, 2018                        
+    Antoine P. Petitet                                                
+    University of Tennessee, Knoxville                                
+    Innovative Computing Laboratory                                 
+    (C) Copyright 2000-2008 All Rights Reserved                       
+                                                                      
+ -- Copyright notice and Licensing terms:                             
+                                                                      
+ Redistribution  and  use in  source and binary forms, with or without
+ modification, are  permitted provided  that the following  conditions
+ are met:                                                             
+                                                                      
+ 1. Redistributions  of  source  code  must retain the above copyright
+ notice, this list of conditions and the following disclaimer.        
+                                                                      
+ 2. Redistributions in binary form must reproduce  the above copyright
+ notice, this list of conditions,  and the following disclaimer in the
+ documentation and/or other materials provided with the distribution. 
+                                                                      
+ 3. All  advertising  materials  mentioning  features  or  use of this
+ software must display the following acknowledgement:                 
+ This  product  includes  software  developed  at  the  University  of
+ Tennessee, Knoxville, Innovative Computing Laboratory.             
+                                                                      
+ 4. The name of the  University,  the name of the  Laboratory,  or the
+ names  of  its  contributors  may  not  be used to endorse or promote
+ products  derived   from   this  software  without  specific  written
+ permission.                                                          
+                                                                      
+ -- Disclaimer:                                                       
+                                                                      
+ THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+======================================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/COPYRIGHT b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/COPYRIGHT
new file mode 100644
index 000000000..08465d618
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/COPYRIGHT
@@ -0,0 +1,45 @@
+======================================================================
+ -- High Performance Computing Linpack Benchmark (HPL)                
+    HPL - 2.3 - December 2, 2018                        
+    Antoine P. Petitet                                                
+    University of Tennessee, Knoxville                                
+    Innovative Computing Laboratory                                 
+    (C) Copyright 2000-2008 All Rights Reserved                       
+                                                                      
+ -- Copyright notice and Licensing terms:                             
+                                                                      
+ Redistribution  and  use in  source and binary forms, with or without
+ modification, are  permitted provided  that the following  conditions
+ are met:                                                             
+                                                                      
+ 1. Redistributions  of  source  code  must retain the above copyright
+ notice, this list of conditions and the following disclaimer.        
+                                                                      
+ 2. Redistributions in binary form must reproduce  the above copyright
+ notice, this list of conditions,  and the following disclaimer in the
+ documentation and/or other materials provided with the distribution. 
+                                                                      
+ 3. All  advertising  materials  mentioning  features  or  use of this
+ software must display the following acknowledgement:                 
+ This  product  includes  software  developed  at  the  University  of
+ Tennessee, Knoxville, Innovative Computing Laboratory.             
+                                                                      
+ 4. The name of the  University,  the name of the  Laboratory,  or the
+ names  of  its  contributors  may  not  be used to endorse or promote
+ products  derived   from   this  software  without  specific  written
+ permission.                                                          
+                                                                      
+ -- Disclaimer:                                                       
+                                                                      
+ THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+======================================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/ChangeLog b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/ChangeLog
new file mode 100644
index 000000000..1c2b36778
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/ChangeLog
@@ -0,0 +1,16 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ Done list in version 1.0b, December 15th, 2004
+ - Fixed problem with 32-bit integer overflow.
+   Thanks to John Baron.
+
+ Done list in version 1.0a, January 1st, 2004
+ - Added Row- or Column-major process mapping in data file
+ - Fixed compilation error for gcc 3.3 in walltime.
+ - Fixed building problems on the T3E;
+   Thanks to Edward Anderson.
+
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/HISTORY b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/HISTORY
new file mode 100644
index 000000000..d6d59ee45
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/HISTORY
@@ -0,0 +1,103 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ History
+
+ - 09/09/00 Public release of Version 1.0
+
+ - 09/27/00 A couple of mistakes in the  VSIPL  port have been
+ corrected.  The tar file as well as the web site were updated
+ on September 27th, 2000.  Note  that  these problems were not
+ affecting the BLAS version of the software in any way.
+
+ - 01/01/04 Version 1.0a
+ The  MPI  process grid  numbering  scheme  is now an run-time
+ option.
+ The inlined assembly  timer  routine that caused the compila-
+ tion to fail when using  gcc  version 3.3  and above has been
+ removed from the package.
+ Various building problems on the T3E have been fixed;  Thanks
+ to Edward Anderson.
+
+ - 15/12/04 Version 1.0b
+ Weakness of the pseudo-random matrix generator found for pro-
+ blem sizes being power of twos and larger  than 2^15;  Thanks
+ to Gregory Bauer. This problem has not been fixed. It is thus
+ currently recommended to  HPL  users willing to test matrices
+ of size larger than 2^15 to not use power twos.
+
+ When the matrix size is such that one needs  > 16 GB  per MPI
+ rank,  the  intermediate  calculation  (mat.ld+1) * mat.nq in
+ HPL_pdtest.c  ends up  overflowing  because  it is done using
+ 32-bit arithmetic.  This issue has been fixed by  typecasting
+ to size_t; Thanks to John Baron.
+
+ - 09/10/08 Version 2.0
+
+ Piotr Luszczek changed to 64-bit RNG, modified files:
+ -- [M] include/hpl_matgen.h
+ -- [M] testing/matgen/HPL_ladd.c
+ -- [M] testing/matgen/HPL_lmul.c
+ -- [M] testing/matgen/HPL_rand.c
+ -- [M] testing/ptest/HPL_pdinfo.c
+
+ For a motivation for the change, see:
+    Dongarra and Langou, ``The Problem with the Linpack
+    Benchmark Matrix Generator'', LAWN 206, June 2008.
+
+ -- [M] testing/ptest/HPL_pdtest.c  --
+
+ Julien Langou changed the test for correctness from
+      ||Ax-b||_oo / ( eps * ||A||_1  * N            )
+      ||Ax-b||_oo / ( eps * ||A||_1  * ||x||_1      )
+      ||Ax-b||_oo / ( eps * ||A||_oo * ||x||_oo * N )
+ to the normwise backward error
+      || r ||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )
+ See:
+  Nicholas J. Higham, ``Accuracy and Stability of Numerical Algorithms'',
+  Society for Industrial and Applied Mathematics, Philadelphia, PA, USA,
+  Second Edition, pages = xxx+680, ISBN = 0-89871-521-0, 2002.
+
+ Note that in our case || b ||_oo is almost for sure
+ 1/2, we compute it anyway.
+
+ - 10/26/2012 Version 2.1
+
+ Piotr Luszczek introduced exact time stamping for HPL_pdgesv():
+ -- [M] dist/include/hpl_misc.h
+ -- [M] dist/testing/ptest/HPL_pdtest.c
+
+ Piotr Luszczek fixed out-of-bounds access in data spreading functions
+ and exact time stamping for HPL_pdgesv():
+ -- [M] dist/src/pgesv/HPL_spreadN.c
+ -- [M] dist/src/pgesv/HPL_spreadT.c
+ Thanks to Stephen Whalen from Cray.
+
+ - 02/24/2016 Version 2.2
+
+ Piotr Luszczek added continuous reporting of factorization progress
+ submitted by Intel and make scripts that uses Intel software tools and
+ libraries and their Apple's Mac OS X equivalents.
+
+ - 12/02/2018 Version 2.3
+
+ Piotr Luszczek removed deprecated MPI functions that are no longer
+ supported in some MPI implementations (for example Open MPI 4.0) and
+ replaced them with
+ modern equivalents in HPL_packL():
+ -- [M] src/comm/HPL_packL.c
+
+ Piotr Luszczek added one digit to the display of performance result
+ and changed display of scaled residual to scientific notation with
+ extra digits in HPL_pdtest():
+ -- [M] testing/ptest/HPL_pdtest.c
+
+ Piotr Luszczek added support for Autotools configuration packages
+ autoconf and automake:
+ -- [A] Makefile.am
+ -- [A] configure.ac
+ -- [A] acinclude.m4
+ -- [A] src/Makefile.am
+ -- [A] testing/Makefile.am
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/INSTALL b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/INSTALL
new file mode 100644
index 000000000..fec266c49
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/INSTALL
@@ -0,0 +1,81 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ 1) Retrieve the tar file, then
+
+    gunzip hpl.tgz; tar -xvf hpl.tar
+
+ this  will create an  hpl  directory,  that we call below the
+ top-level directory.
+
+ 2) Create a file Make.<arch> in the  top-level directory. For
+ this purpose,  you  may  want  to re-use one contained in the
+ setup directory. This file essentially contains the compilers
+ and librairies with their paths to be used.
+
+ 3) Type "make arch=<arch>". This  should create an executable
+ in the bin/<arch> directory called xhpl.
+
+ For example, on our Linux PII cluster, I create a file called
+ Make.Linux_PII in the top-level directory. Then, I type
+    "make arch=Linux_PII" 
+ This creates the executable file bin/Linux_PII/xhpl.
+
+ 4) Quick check: run a few tests:
+
+    cd bin/<arch>
+    mpirun -np 4 xhpl
+
+ 5) Tuning: Most of the performance  parameters can be tuned,
+ by modifying the input file bin/HPL.dat. See the file TUNING
+ in the top-level directory.
+
+==============================================================
+
+ Compile time options:  At the end of the "model" Make.<arch>,
+ ---------------------  the  user  is given the opportunity to
+ compile the software with some specific compile options.  The
+ list of this options and their meaning are:
+
+    -DHPL_COPY_L
+       force the copy of the panel L before bcast;
+
+    -DHPL_CALL_CBLAS
+       call the cblas interface;
+
+    -DHPL_CALL_VSIPL
+       call the vsip  library;
+
+    -DHPL_DETAILED_TIMING
+       enables detail timers;
+
+ The  user  must  choose  between  either  the BLAS Fortran 77
+ interface,  or the  BLAS  C  interface,  or the VSIPL library
+ depending on which computational kernels are available on his
+ system. Only one of these options should be selected.  If you
+ choose the BLAS Fortran 77 interface, it is necessary to fill
+ out the machine-specific C to Fortran 77 interface section of
+ the  Make.<arch>  file.  To  do this,  please  refer  to  the 
+ Make.<arch> examples contained in the setup directory.
+
+ By default HPL will:
+    *) not copy L before broadcast,
+    *) call the BLAS Fortran 77 interface,
+    *) not display detailed timing information.
+
+ As an example,  suppose  one wants  HPL  to copy the panel of
+ columns  into  a  contiguous buffer  before broadcasting.  In
+ theory,  it  would be more efficient to let  HPL  create  the
+ appropriate  MPI  user-defined data type since this may avoid 
+ the data copy. So, it is a strange idea, but one insists.  To
+ achieve this one would add -DHPL_COPY_L  to the definition of
+ HPL_OPTS  at the end of the file  Make.<arch>.  Issue  then a
+ "make clean arch=<arch>; make build arch=<arch>" and the xhpl
+ executable will be re-build with that feature in.
+==============================================================
+ 
+ Check out  the website  www.netlib.org/benchmark/hpl  for the
+ latest information.
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
new file mode 100644
index 000000000..492ed42ca
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
@@ -0,0 +1,236 @@
+ # -- High Performance Computing Linpack Benchmark (HPL)                
+ #    Modifications Copyright (C) 2023 Intel Corporation​
+ #                                                                      
+ # -- Copyright notice and Licensing terms:                             
+ #                                                                      
+ # Redistribution  and  use in  source and binary forms, with or without
+ # modification, are  permitted provided  that the following  conditions
+ # are met:                                                             
+ #                                                                      
+ # 1. Redistributions  of  source  code  must retain the above copyright
+ # notice, this list of conditions and the following disclaimer.        
+ #                                                                      
+ # 2. Redistributions in binary form must reproduce  the above copyright
+ # notice, this list of conditions,  and the following disclaimer in the
+ # documentation and/or other materials provided with the distribution. 
+ #                                                                      
+ # 3. All  advertising  materials  mentioning  features  or  use of this
+ # software must display the following acknowledgement:                 
+ # This  product  includes  software  developed  at  the  University  of
+ # Tennessee, Knoxville, Innovative Computing Laboratory.             
+ #                                                                      
+ # 4. The name of the  University,  the name of the  Laboratory,  or the
+ # names  of  its  contributors  may  not  be used to endorse or promote
+ # products  derived   from   this  software  without  specific  written
+ # permission.                                                          
+ #                                                                      
+ # -- Disclaimer:                                                       
+ #                                                                      
+ # THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ # OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ # SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ # DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ # ---------------------------------------------------------------------
+ #
+ #SPDX-License-Identifier: BSD-4-Clause
+
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -fs
+MKDIR        = mkdir -p
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = intel64 
+export  ARCH = intel64
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+# Set TOPdir to the location of where this is being built
+TOPdir       = $(CURDIR)
+INCdir       = $(TOPdir)/include
+BINdir        =$(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a
+
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+OneAPIdir    = $(ONEAPI_ROOT)
+MPdir        = $(OneAPIdir)/mpi/latest/
+MPinc        = -I$(MPdir)/include/
+MPlib        = -lmpi #$(MPdir)/lib/release/libmpi.so
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(OneAPIdir)/mkl/latest/lib/intel64/
+LAinc        = -I$(OneAPIdir)/mkl/latest/include/intel64/
+LAlib 	     = -L$(TOPdir)/src/cuda/ -ldgemm  -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lpthread -liomp5 -lm -lstdc++ -I$(TOPdir)/src/cuda/
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) #$(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+# -DASYOUGO              enable timing information as you go (nonintrusive)
+# -DASYOUGO2             slightly intrusive timing information
+# -DASYOUGO2_DISPLAY     display detailed DGEMM information
+# -DENDEARLY             end the problem early  
+# -DFASTSWAP             insert to use DLASWP instead of HPL code
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpicc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = $(CC) 
+LINKFLAGS    = $(CCFLAGS) 
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
+MAKE = make VERBOSE=1 arch=$(ARCH) TOPdir=$(TOPdir)
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.top b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.top
new file mode 100644
index 000000000..48967633b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.top
@@ -0,0 +1,195 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+arch             = UNKNOWN
+#
+include Make.$(arch)
+#
+## build ###############################################################
+#
+build_src        :
+	( $(CD) src/auxil/$(arch);         $(MAKE) )
+	( $(CD) src/blas/$(arch);          $(MAKE) )
+	( $(CD) src/comm/$(arch);          $(MAKE) )
+	( $(CD) src/grid/$(arch);          $(MAKE) )
+	( $(CD) src/panel/$(arch);         $(MAKE) )
+	( $(CD) src/pauxil/$(arch);        $(MAKE) )
+	( $(CD) src/pfact/$(arch);         $(MAKE) )
+	( $(CD) src/pgesv/$(arch);         $(MAKE) )
+	( $(CD) src/cuda/;                $(MAKE) )
+#
+build_tst        :
+	( $(CD) testing/matgen/$(arch);    $(MAKE) )
+	( $(CD) testing/timer/$(arch);     $(MAKE) )
+	( $(CD) testing/pmatgen/$(arch);   $(MAKE) )
+	( $(CD) testing/ptimer/$(arch);    $(MAKE) )
+	( $(CD) testing/ptest/$(arch);     $(MAKE) )
+#( SPMS_make_cd`' testing/test/$(arch);      SPMS_make_make`' )
+#
+## startup #############################################################
+#
+startup_dir      :
+	- $(MKDIR) include/$(arch)
+	- $(MKDIR) lib
+	- $(MKDIR) lib/$(arch)
+	- $(MKDIR) bin
+	- $(MKDIR) bin/$(arch)
+#
+startup_src      :
+	- $(MAKE) -f Make.top leaf le=src/auxil       arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/blas        arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/comm        arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/grid        arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/panel       arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/pauxil      arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/pfact       arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/pgesv       arch=$(arch)
+#
+startup_tst      :
+	- $(MAKE) -f Make.top leaf le=testing/matgen  arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=testing/timer   arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=testing/pmatgen arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=testing/ptimer  arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=testing/ptest   arch=$(arch)
+#- SPMS_make_make`' -f Make.top leaf le=testing/test    arch=$(arch)
+#
+## refresh #############################################################
+#
+refresh_src      :
+	- $(CP) makes/Make.auxil    src/auxil/$(arch)/Makefile
+	- $(CP) makes/Make.blas     src/blas/$(arch)/Makefile
+	- $(CP) makes/Make.comm     src/comm/$(arch)/Makefile
+	- $(CP) makes/Make.grid     src/grid/$(arch)/Makefile
+	- $(CP) makes/Make.panel    src/panel/$(arch)/Makefile
+	- $(CP) makes/Make.pauxil   src/pauxil/$(arch)/Makefile
+	- $(CP) makes/Make.pfact    src/pfact/$(arch)/Makefile
+	- $(CP) makes/Make.pgesv    src/pgesv/$(arch)/Makefile
+#
+refresh_tst      :
+	- $(CP) makes/Make.matgen   testing/matgen/$(arch)/Makefile
+	- $(CP) makes/Make.timer    testing/timer/$(arch)/Makefile
+	- $(CP) makes/Make.pmatgen  testing/pmatgen/$(arch)/Makefile
+	- $(CP) makes/Make.ptimer   testing/ptimer/$(arch)/Makefile
+	- $(CP) makes/Make.ptest    testing/ptest/$(arch)/Makefile
+#- SPMS_make_cp`' makes/Make.test     testing/test/$(arch)/Makefile
+#
+## clean ###############################################################
+#
+clean_src        :
+	- ( $(CD) src/auxil/$(arch);        $(MAKE) clean )
+	- ( $(CD) src/blas/$(arch);         $(MAKE) clean )
+	- ( $(CD) src/comm/$(arch);         $(MAKE) clean )
+	- ( $(CD) src/grid/$(arch);         $(MAKE) clean )
+	- ( $(CD) src/panel/$(arch);        $(MAKE) clean )
+	- ( $(CD) src/pauxil/$(arch);       $(MAKE) clean )
+	- ( $(CD) src/pfact/$(arch);        $(MAKE) clean )
+	- ( $(CD) src/pgesv/$(arch);        $(MAKE) clean )
+	- ( $(CD) src/cuda/;               $(MAKE) clean)
+#
+clean_tst        :
+	- ( $(CD) testing/matgen/$(arch);   $(MAKE) clean )
+	- ( $(CD) testing/timer/$(arch);    $(MAKE) clean )
+	- ( $(CD) testing/pmatgen/$(arch);  $(MAKE) clean )
+	- ( $(CD) testing/ptimer/$(arch);   $(MAKE) clean )
+	- ( $(CD) testing/ptest/$(arch);    $(MAKE) clean )
+#- ( SPMS_make_cd`' testing/test/$(arch);     SPMS_make_make`' clean )
+#
+## clean_arch ##########################################################
+#
+clean_arch_src   :
+	- $(RM) -r src/auxil/$(arch)
+	- $(RM) -r src/blas/$(arch)
+	- $(RM) -r src/comm/$(arch)
+	- $(RM) -r src/grid/$(arch)
+	- $(RM) -r src/panel/$(arch)
+	- $(RM) -r src/pauxil/$(arch)
+	- $(RM) -r src/pfact/$(arch)
+	- $(RM) -r src/pgesv/$(arch)
+	- ( $(CD) src/cuda;         $(MAKE) clean)
+#
+clean_arch_tst   :
+	- $(RM) -r testing/matgen/$(arch)
+	- $(RM) -r testing/timer/$(arch)
+	- $(RM) -r testing/pmatgen/$(arch)
+	- $(RM) -r testing/ptimer/$(arch)
+	- $(RM) -r testing/ptest/$(arch)
+#- SPMS_make_rm`' -r testing/test/$(arch)
+#
+## clean_arch_all ######################################################
+#
+clean_arch_all   :
+	- $(MAKE) -f Make.top clean_arch_src arch=$(arch)
+	- $(MAKE) -f Make.top clean_arch_tst arch=$(arch)
+	- $(RM) -r bin/$(arch) include/$(arch) lib/$(arch)
+#
+## clean_guard #########################################################
+#
+clean_guard_src  :
+	- ( $(CD) src/auxil/$(arch);       $(RM) *.grd )
+	- ( $(CD) src/blas/$(arch);        $(RM) *.grd )
+	- ( $(CD) src/comm/$(arch);        $(RM) *.grd )
+	- ( $(CD) src/grid/$(arch);        $(RM) *.grd )
+	- ( $(CD) src/panel/$(arch);       $(RM) *.grd )
+	- ( $(CD) src/pauxil/$(arch);      $(RM) *.grd )
+	- ( $(CD) src/pfact/$(arch);       $(RM) *.grd )
+	- ( $(CD) src/pgesv/$(arch);       $(RM) *.grd )
+#
+clean_guard_tst  :
+	- ( $(CD) testing/matgen/$(arch);  $(RM) *.grd )
+	- ( $(CD) testing/timer/$(arch);   $(RM) *.grd )
+	- ( $(CD) testing/pmatgen/$(arch); $(RM) *.grd )
+	- ( $(CD) testing/ptimer/$(arch);  $(RM) *.grd )
+	- ( $(CD) testing/ptest/$(arch);   $(RM) *.grd )
+#- ( SPMS_make_cd`' testing/test/$(arch);    SPMS_make_rm`' *.grd )
+#
+## misc ################################################################
+#
+leaf             :
+	- ( $(CD) $(le) ; $(MKDIR) $(arch) )
+	- ( $(CD) $(le)/$(arch) ; \
+            $(LN_S) $(TOPdir)/Make.$(arch) Make.inc )
+#
+########################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile
new file mode 100644
index 000000000..40b5585ae
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile
@@ -0,0 +1,135 @@
+
+ # -- High Performance Computing Linpack Benchmark (HPL)                
+ #    Modifications Copyright (C) 2023 Intel Corporation​
+ #                                                                      
+ # -- Copyright notice and Licensing terms:                             
+ #                                                                      
+ # Redistribution  and  use in  source and binary forms, with or without
+ # modification, are  permitted provided  that the following  conditions
+ # are met:                                                             
+ #                                                                      
+ # 1. Redistributions  of  source  code  must retain the above copyright
+ # notice, this list of conditions and the following disclaimer.        
+ #                                                                      
+ # 2. Redistributions in binary form must reproduce  the above copyright
+ # notice, this list of conditions,  and the following disclaimer in the
+ # documentation and/or other materials provided with the distribution. 
+ #                                                                      
+ # 3. All  advertising  materials  mentioning  features  or  use of this
+ # software must display the following acknowledgement:                 
+ # This  product  includes  software  developed  at  the  University  of
+ # Tennessee, Knoxville, Innovative Computing Laboratory.             
+ #                                                                      
+ # 4. The name of the  University,  the name of the  Laboratory,  or the
+ # names  of  its  contributors  may  not  be used to endorse or promote
+ # products  derived   from   this  software  without  specific  written
+ # permission.                                                          
+ #                                                                      
+ # -- Disclaimer:                                                       
+ #                                                                      
+ # THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ # OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ # SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ # DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ # ---------------------------------------------------------------------
+ #
+ #SPDX-License-Identifier: BSD-4-Clause
+ 
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+#
+SHELL            = /bin/sh
+#
+arch             = intel64
+make             = 'make VERBOSE=1'
+#
+## Targets #############################################################
+#
+all              : install
+#
+# ######################################################################
+#
+install          : startup refresh build
+#
+startup          :
+	$(MAKE) -f Make.top startup_dir     arch=$(arch)
+	$(MAKE) -f Make.top startup_src     arch=$(arch)
+	$(MAKE) -f Make.top startup_tst     arch=$(arch)
+	$(MAKE) -f Make.top refresh_src     arch=$(arch)
+	$(MAKE) -f Make.top refresh_tst     arch=$(arch)
+#
+refresh          :
+	$(MAKE) -f Make.top refresh_src     arch=$(arch)
+	$(MAKE) -f Make.top refresh_tst     arch=$(arch)
+#
+build            :
+	$(MAKE) -f Make.top build_src       arch=$(arch)
+	$(MAKE) -f Make.top build_tst       arch=$(arch)
+#
+clean            :
+	$(MAKE) -f Make.top clean_src       arch=$(arch)
+	$(MAKE) -f Make.top clean_tst       arch=$(arch)
+#
+clean_arch       :
+	$(MAKE) -f Make.top clean_arch_src  arch=$(arch)
+	$(MAKE) -f Make.top clean_arch_tst  arch=$(arch)
+#
+clean_arch_all   :
+	$(MAKE) -f Make.top clean_arch_all  arch=$(arch)
+#
+clean_guard      :
+	$(MAKE) -f Make.top clean_guard_src arch=$(arch)
+	$(MAKE) -f Make.top clean_guard_tst arch=$(arch)
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile.am
new file mode 100644
index 000000000..1ad8c1b17
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src testing
+
+AM_CPPFLAGS = -I$(top_srcdir)/include
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile.in
new file mode 100644
index 000000000..76f0e2dd6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile.in
@@ -0,0 +1,772 @@
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+subdir = .
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+	$(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \
+	$(am__configure_deps) $(am__DIST_COMMON)
+am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
+ configure.lineno config.status.lineno
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/include/hplconfig.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+SOURCES =
+DIST_SOURCES =
+RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
+	ctags-recursive dvi-recursive html-recursive info-recursive \
+	install-data-recursive install-dvi-recursive \
+	install-exec-recursive install-html-recursive \
+	install-info-recursive install-pdf-recursive \
+	install-ps-recursive install-recursive installcheck-recursive \
+	installdirs-recursive pdf-recursive ps-recursive \
+	tags-recursive uninstall-recursive
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
+  distclean-recursive maintainer-clean-recursive
+am__recursive_targets = \
+  $(RECURSIVE_TARGETS) \
+  $(RECURSIVE_CLEAN_TARGETS) \
+  $(am__extra_recursive_targets)
+AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
+	cscope distdir distdir-am dist dist-all distcheck
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+CSCOPE = cscope
+DIST_SUBDIRS = $(SUBDIRS)
+am__DIST_COMMON = $(srcdir)/Makefile.in \
+	$(top_srcdir)/include/hplconfig.h.in AUTHORS COPYING ChangeLog \
+	INSTALL NEWS README THANKS TODO compile config.guess \
+	config.sub depcomp install-sh missing
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+distdir = $(PACKAGE)-$(VERSION)
+top_distdir = $(distdir)
+am__remove_distdir = \
+  if test -d "$(distdir)"; then \
+    find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
+      && rm -rf "$(distdir)" \
+      || { sleep 5 && rm -rf "$(distdir)"; }; \
+  else :; fi
+am__post_remove_distdir = $(am__remove_distdir)
+am__relativize = \
+  dir0=`pwd`; \
+  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+  sed_rest='s,^[^/]*/*,,'; \
+  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+  sed_butlast='s,/*[^/]*$$,,'; \
+  while test -n "$$dir1"; do \
+    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+    if test "$$first" != "."; then \
+      if test "$$first" = ".."; then \
+        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+      else \
+        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+        if test "$$first2" = "$$first"; then \
+          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+        else \
+          dir2="../$$dir2"; \
+        fi; \
+        dir0="$$dir0"/"$$first"; \
+      fi; \
+    fi; \
+    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+  done; \
+  reldir="$$dir2"
+DIST_ARCHIVES = $(distdir).tar.gz
+GZIP_ENV = --best
+DIST_TARGETS = dist-gzip
+distuninstallcheck_listfiles = find . -type f -print
+am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
+  | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
+distcleancheck_listfiles = find . -type f -print
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BLAS_LIBS = @BLAS_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build_alias = @build_alias@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host_alias = @host_alias@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUBDIRS = src testing
+AM_CPPFLAGS = -I$(top_srcdir)/include
+all: all-recursive
+
+.SUFFIXES:
+am--refresh: Makefile
+	@:
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      echo ' cd $(srcdir) && $(AUTOMAKE) --gnu'; \
+	      $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    echo ' $(SHELL) ./config.status'; \
+	    $(SHELL) ./config.status;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	$(SHELL) ./config.status --recheck
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	$(am__cd) $(srcdir) && $(AUTOCONF)
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	$(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
+$(am__aclocal_m4_deps):
+
+include/hplconfig.h: include/stamp-h1
+	@test -f $@ || rm -f include/stamp-h1
+	@test -f $@ || $(MAKE) $(AM_MAKEFLAGS) include/stamp-h1
+
+include/stamp-h1: $(top_srcdir)/include/hplconfig.h.in $(top_builddir)/config.status
+	@rm -f include/stamp-h1
+	cd $(top_builddir) && $(SHELL) ./config.status include/hplconfig.h
+$(top_srcdir)/include/hplconfig.h.in:  $(am__configure_deps) 
+	($(am__cd) $(top_srcdir) && $(AUTOHEADER))
+	rm -f include/stamp-h1
+	touch $@
+
+distclean-hdr:
+	-rm -f include/hplconfig.h include/stamp-h1
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run 'make' without going through this Makefile.
+# To change the values of 'make' variables: instead of editing Makefiles,
+# (1) if the variable is set in 'config.status', edit 'config.status'
+#     (which will cause the Makefiles to be regenerated when you run 'make');
+# (2) otherwise, pass the desired values on the 'make' command line.
+$(am__recursive_targets):
+	@fail=; \
+	if $(am__make_keepgoing); then \
+	  failcom='fail=yes'; \
+	else \
+	  failcom='exit 1'; \
+	fi; \
+	dot_seen=no; \
+	target=`echo $@ | sed s/-recursive//`; \
+	case "$@" in \
+	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+	  *) list='$(SUBDIRS)' ;; \
+	esac; \
+	for subdir in $$list; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    dot_seen=yes; \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done; \
+	if test "$$dot_seen" = "no"; then \
+	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+	fi; test -z "$$fail"
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-recursive
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+	  include_option=--etags-include; \
+	  empty_fix=.; \
+	else \
+	  include_option=--include; \
+	  empty_fix=; \
+	fi; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test ! -f $$subdir/TAGS || \
+	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+	  fi; \
+	done; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-recursive
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscope: cscope.files
+	test ! -s cscope.files \
+	  || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS)
+clean-cscope:
+	-rm -f cscope.files
+cscope.files: clean-cscope cscopelist
+cscopelist: cscopelist-recursive
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+	-rm -f cscope.out cscope.in.out cscope.po.out cscope.files
+
+distdir: $(BUILT_SOURCES)
+	$(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+	$(am__remove_distdir)
+	test -d "$(distdir)" || mkdir "$(distdir)"
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    $(am__make_dryrun) \
+	      || test -d "$(distdir)/$$subdir" \
+	      || $(MKDIR_P) "$(distdir)/$$subdir" \
+	      || exit 1; \
+	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+	    $(am__relativize); \
+	    new_distdir=$$reldir; \
+	    dir1=$$subdir; dir2="$(top_distdir)"; \
+	    $(am__relativize); \
+	    new_top_distdir=$$reldir; \
+	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+	    ($(am__cd) $$subdir && \
+	      $(MAKE) $(AM_MAKEFLAGS) \
+	        top_distdir="$$new_top_distdir" \
+	        distdir="$$new_distdir" \
+		am__remove_distdir=: \
+		am__skip_length_check=: \
+		am__skip_mode_fix=: \
+	        distdir) \
+	      || exit 1; \
+	  fi; \
+	done
+	-test -n "$(am__skip_mode_fix)" \
+	|| find "$(distdir)" -type d ! -perm -755 \
+		-exec chmod u+rwx,go+rx {} \; -o \
+	  ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
+	  ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
+	  ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
+	|| chmod -R a+r "$(distdir)"
+dist-gzip: distdir
+	tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz
+	$(am__post_remove_distdir)
+
+dist-bzip2: distdir
+	tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
+	$(am__post_remove_distdir)
+
+dist-lzip: distdir
+	tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
+	$(am__post_remove_distdir)
+
+dist-xz: distdir
+	tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
+	$(am__post_remove_distdir)
+
+dist-tarZ: distdir
+	@echo WARNING: "Support for distribution archives compressed with" \
+		       "legacy program 'compress' is deprecated." >&2
+	@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
+	tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
+	$(am__post_remove_distdir)
+
+dist-shar: distdir
+	@echo WARNING: "Support for shar distribution archives is" \
+	               "deprecated." >&2
+	@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
+	shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz
+	$(am__post_remove_distdir)
+
+dist-zip: distdir
+	-rm -f $(distdir).zip
+	zip -rq $(distdir).zip $(distdir)
+	$(am__post_remove_distdir)
+
+dist dist-all:
+	$(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:'
+	$(am__post_remove_distdir)
+
+# This target untars the dist file and tries a VPATH configuration.  Then
+# it guarantees that the distribution is self-contained by making another
+# tarfile.
+distcheck: dist
+	case '$(DIST_ARCHIVES)' in \
+	*.tar.gz*) \
+	  eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\
+	*.tar.bz2*) \
+	  bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
+	*.tar.lz*) \
+	  lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
+	*.tar.xz*) \
+	  xz -dc $(distdir).tar.xz | $(am__untar) ;;\
+	*.tar.Z*) \
+	  uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
+	*.shar.gz*) \
+	  eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\
+	*.zip*) \
+	  unzip $(distdir).zip ;;\
+	esac
+	chmod -R a-w $(distdir)
+	chmod u+w $(distdir)
+	mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst
+	chmod a-w $(distdir)
+	test -d $(distdir)/_build || exit 0; \
+	dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
+	  && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
+	  && am__cwd=`pwd` \
+	  && $(am__cd) $(distdir)/_build/sub \
+	  && ../../configure \
+	    $(AM_DISTCHECK_CONFIGURE_FLAGS) \
+	    $(DISTCHECK_CONFIGURE_FLAGS) \
+	    --srcdir=../.. --prefix="$$dc_install_base" \
+	  && $(MAKE) $(AM_MAKEFLAGS) \
+	  && $(MAKE) $(AM_MAKEFLAGS) dvi \
+	  && $(MAKE) $(AM_MAKEFLAGS) check \
+	  && $(MAKE) $(AM_MAKEFLAGS) install \
+	  && $(MAKE) $(AM_MAKEFLAGS) installcheck \
+	  && $(MAKE) $(AM_MAKEFLAGS) uninstall \
+	  && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
+	        distuninstallcheck \
+	  && chmod -R a-w "$$dc_install_base" \
+	  && ({ \
+	       (cd ../.. && umask 077 && mkdir "$$dc_destdir") \
+	       && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
+	       && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
+	       && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
+	            distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
+	      } || { rm -rf "$$dc_destdir"; exit 1; }) \
+	  && rm -rf "$$dc_destdir" \
+	  && $(MAKE) $(AM_MAKEFLAGS) dist \
+	  && rm -rf $(DIST_ARCHIVES) \
+	  && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \
+	  && cd "$$am__cwd" \
+	  || exit 1
+	$(am__post_remove_distdir)
+	@(echo "$(distdir) archives ready for distribution: "; \
+	  list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
+	  sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
+distuninstallcheck:
+	@test -n '$(distuninstallcheck_dir)' || { \
+	  echo 'ERROR: trying to run $@ with an empty' \
+	       '$$(distuninstallcheck_dir)' >&2; \
+	  exit 1; \
+	}; \
+	$(am__cd) '$(distuninstallcheck_dir)' || { \
+	  echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
+	  exit 1; \
+	}; \
+	test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
+	   || { echo "ERROR: files left after uninstall:" ; \
+	        if test -n "$(DESTDIR)"; then \
+	          echo "  (check DESTDIR support)"; \
+	        fi ; \
+	        $(distuninstallcheck_listfiles) ; \
+	        exit 1; } >&2
+distcleancheck: distclean
+	@if test '$(srcdir)' = . ; then \
+	  echo "ERROR: distcleancheck can only run from a VPATH build" ; \
+	  exit 1 ; \
+	fi
+	@test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
+	  || { echo "ERROR: files left in build directory after distclean:" ; \
+	       $(distcleancheck_listfiles) ; \
+	       exit 1; } >&2
+check-am: all-am
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-generic mostlyclean-am
+
+distclean: distclean-recursive
+	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-hdr distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
+	-rm -rf $(top_srcdir)/autom4te.cache
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-generic
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(am__recursive_targets) install-am install-strip
+
+.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \
+	am--refresh check check-am clean clean-cscope clean-generic \
+	cscope cscopelist-am ctags ctags-am dist dist-all dist-bzip2 \
+	dist-gzip dist-lzip dist-shar dist-tarZ dist-xz dist-zip \
+	distcheck distclean distclean-generic distclean-hdr \
+	distclean-tags distcleancheck distdir distuninstallcheck dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs installdirs-am \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \
+	uninstall-am
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/NEWS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/NEWS
new file mode 100644
index 000000000..d6d59ee45
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/NEWS
@@ -0,0 +1,103 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ History
+
+ - 09/09/00 Public release of Version 1.0
+
+ - 09/27/00 A couple of mistakes in the  VSIPL  port have been
+ corrected.  The tar file as well as the web site were updated
+ on September 27th, 2000.  Note  that  these problems were not
+ affecting the BLAS version of the software in any way.
+
+ - 01/01/04 Version 1.0a
+ The  MPI  process grid  numbering  scheme  is now an run-time
+ option.
+ The inlined assembly  timer  routine that caused the compila-
+ tion to fail when using  gcc  version 3.3  and above has been
+ removed from the package.
+ Various building problems on the T3E have been fixed;  Thanks
+ to Edward Anderson.
+
+ - 15/12/04 Version 1.0b
+ Weakness of the pseudo-random matrix generator found for pro-
+ blem sizes being power of twos and larger  than 2^15;  Thanks
+ to Gregory Bauer. This problem has not been fixed. It is thus
+ currently recommended to  HPL  users willing to test matrices
+ of size larger than 2^15 to not use power twos.
+
+ When the matrix size is such that one needs  > 16 GB  per MPI
+ rank,  the  intermediate  calculation  (mat.ld+1) * mat.nq in
+ HPL_pdtest.c  ends up  overflowing  because  it is done using
+ 32-bit arithmetic.  This issue has been fixed by  typecasting
+ to size_t; Thanks to John Baron.
+
+ - 09/10/08 Version 2.0
+
+ Piotr Luszczek changed to 64-bit RNG, modified files:
+ -- [M] include/hpl_matgen.h
+ -- [M] testing/matgen/HPL_ladd.c
+ -- [M] testing/matgen/HPL_lmul.c
+ -- [M] testing/matgen/HPL_rand.c
+ -- [M] testing/ptest/HPL_pdinfo.c
+
+ For a motivation for the change, see:
+    Dongarra and Langou, ``The Problem with the Linpack
+    Benchmark Matrix Generator'', LAWN 206, June 2008.
+
+ -- [M] testing/ptest/HPL_pdtest.c  --
+
+ Julien Langou changed the test for correctness from
+      ||Ax-b||_oo / ( eps * ||A||_1  * N            )
+      ||Ax-b||_oo / ( eps * ||A||_1  * ||x||_1      )
+      ||Ax-b||_oo / ( eps * ||A||_oo * ||x||_oo * N )
+ to the normwise backward error
+      || r ||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )
+ See:
+  Nicholas J. Higham, ``Accuracy and Stability of Numerical Algorithms'',
+  Society for Industrial and Applied Mathematics, Philadelphia, PA, USA,
+  Second Edition, pages = xxx+680, ISBN = 0-89871-521-0, 2002.
+
+ Note that in our case || b ||_oo is almost for sure
+ 1/2, we compute it anyway.
+
+ - 10/26/2012 Version 2.1
+
+ Piotr Luszczek introduced exact time stamping for HPL_pdgesv():
+ -- [M] dist/include/hpl_misc.h
+ -- [M] dist/testing/ptest/HPL_pdtest.c
+
+ Piotr Luszczek fixed out-of-bounds access in data spreading functions
+ and exact time stamping for HPL_pdgesv():
+ -- [M] dist/src/pgesv/HPL_spreadN.c
+ -- [M] dist/src/pgesv/HPL_spreadT.c
+ Thanks to Stephen Whalen from Cray.
+
+ - 02/24/2016 Version 2.2
+
+ Piotr Luszczek added continuous reporting of factorization progress
+ submitted by Intel and make scripts that uses Intel software tools and
+ libraries and their Apple's Mac OS X equivalents.
+
+ - 12/02/2018 Version 2.3
+
+ Piotr Luszczek removed deprecated MPI functions that are no longer
+ supported in some MPI implementations (for example Open MPI 4.0) and
+ replaced them with
+ modern equivalents in HPL_packL():
+ -- [M] src/comm/HPL_packL.c
+
+ Piotr Luszczek added one digit to the display of performance result
+ and changed display of scaled residual to scientific notation with
+ extra digits in HPL_pdtest():
+ -- [M] testing/ptest/HPL_pdtest.c
+
+ Piotr Luszczek added support for Autotools configuration packages
+ autoconf and automake:
+ -- [A] Makefile.am
+ -- [A] configure.ac
+ -- [A] acinclude.m4
+ -- [A] src/Makefile.am
+ -- [A] testing/Makefile.am
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/README b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/README
new file mode 100644
index 000000000..c3f79a877
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/README
@@ -0,0 +1,32 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ HPL is a software package that solves a (random) dense linear
+ system  in   double  precision  (64   bits)   arithmetic   on 
+ distributed-memory  computers.   It can thus be regarded as a
+ portable as well as  freely  available implementation  of the
+ High Performance Computing Linpack Benchmark.
+
+ The  HPL  software  package requires the availibility on your
+ system of an implementation of the  Message Passing Interface
+ MPI  (1.1 compliant).  An  implementation of either the Basic
+ Linear Algebra Subprograms  BLAS  or the  Vector Signal Image
+ Processing Library VSIPL is also needed.  Machine-specific as
+ well as generic implementations of MPI, the  BLAS  and  VSIPL
+ are available for a large variety of systems.
+
+ Install See the file INSTALL in this directory.
+ -------
+
+ Tuning  See the file TUNING in this directory.
+ ------
+
+ Bugs  Known  problems and bugs with this release are documen-
+ ----  ted in the file hpl/BUGS.
+
+ Check out  the website  www.netlib.org/benchmark/hpl  for the
+ latest information.
+
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/THANKS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/THANKS
new file mode 100644
index 000000000..1c5641ce4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/THANKS
@@ -0,0 +1 @@
+This software was improved with contribution of external developers.
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/TODO b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/TODO
new file mode 100644
index 000000000..1c2b36778
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/TODO
@@ -0,0 +1,16 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ Done list in version 1.0b, December 15th, 2004
+ - Fixed problem with 32-bit integer overflow.
+   Thanks to John Baron.
+
+ Done list in version 1.0a, January 1st, 2004
+ - Added Row- or Column-major process mapping in data file
+ - Fixed compilation error for gcc 3.3 in walltime.
+ - Fixed building problems on the T3E;
+   Thanks to Edward Anderson.
+
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/TUNING b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/TUNING
new file mode 100644
index 000000000..24707f1fc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/TUNING
@@ -0,0 +1,419 @@
+==============================================================
+ Performance Tuning and setting up the input data file HPL.dat
+ 
+ Current as of release HPL - 2.3 - December 2, 2018
+==============================================================
+ Check out  the website  www.netlib.org/benchmark/hpl  for the
+ latest information.
+
+ After  having  built  the executable hpl/bin/<arch>/xhpl, one
+ may want to modify the input  data  file  HPL.dat.  This file
+ should  reside  in  the  same  directory  as  the  executable 
+ hpl/bin/<arch>/xhpl.  An example  HPL.dat file is provided by
+ default.  This  file  contains  information about the problem
+ sizes,  machine configuration,  and  algorithm features to be
+ used by the executable. It is 30 lines long. All the selected
+ parameters  will  be  printed  in the output generated by the
+ executable.
+
+ At the end of this file,  there  is a couple of  experimental
+ guide lines that you may find useful.
+
+==============================================================
+ File HPL.dat (description):
+
+ Line 1: (unused) Typically  one  would  use this line for its 
+ own good. For example, it could be used to summarize the con-
+ tent of the input file. By default this line reads:
+ 
+ HPL Linpack benchmark input file
+ 
+ Line 2: (unused) same as line 1. By default this line reads:
+ 
+ Innovative Computing Laboratory, University of Tennessee
+ 
+ Line 3: the  user  can  choose where the output should be re-
+ directed to.  In the case of a file, a name is necessary, and
+ this  is  the  line  where one wants to specify it.  Only the
+ first name on this line is significative. By default, the li-
+ ne reads:
+ 
+ HPL.out  output file name (if any)
+ 
+ This  means  that if  one chooses to redirect the output to a
+ file, the file will be called "HPL.out". The rest of the line
+ is unused,  and this space to put some informative comment on
+ the meaning of this line.
+ 
+ Line 4: This line specifies  where the  output should go. The
+ line is formatted, it must be a positive integer, the rest is
+ unsignificant.  3 choices are possible for the positive inte-
+ ger,  6 means that the output will go  the standard output, 7
+ means  that the  output will go to the standard error. Any o-
+ ther  integer  means  that  the  output  should be redirected
+ to a file,  which  name has been specified in the line above.
+ This line by default reads:
+ 
+ 6        device out (6=stdout,7=stderr,file)
+ 
+ which  means  that  the  output generated  by  the executable
+ should be redirected to the standard output.
+ 
+ Line 5: This line specifies the number of problem sizes to be
+ executed. This number should be less than or equal to 20. The
+ first  integer  is  significant,  the rest is ignored. If the 
+ line reads:
+ 
+ 3        # of problems sizes (N)
+ 
+ this  means  that  the user is willing to run 3 problem sizes
+ that will be specified in the next line.
+ 
+ Line 6:  This  line  specifies the problem sizes one wants to 
+ run.  Assuming  the  line  above  started with 3, the 3 first
+ positive  integers  are significant, the rest is ignored. For
+ example:
+ 
+ 3000 6000 10000    Ns
+ 
+ means that one wants xhpl to run 3 (specified in line 5) pro-
+ blem sizes, namely 3000, 6000 and 10000.
+ 
+ Line 7: This line  specifies  the number of block sizes to be
+ runned. This number  should  be  less  than  or equal to  20.
+ The first integer is significant, the rest is ignored. If the
+ line reads:
+ 
+ 5        # of NBs
+ 
+ this means that the user is willing to use 5 block sizes that
+ will be specified in the next line.
+ 
+ Line 8: This line specifies the block sizes one wants to run.
+ Assuming  the line above started with 5, the 5 first positive
+ integers are significant, the rest is ignored. For example:
+ 
+ 80 100 120 140 160 NBs
+ 
+ means  that  one  wants  xhpl  to use 5 (specified in line 7)
+ block sizes, namely 80, 100, 120, 140 and 160.
+
+ Line 9 specifies how the  MPI processes should be mapped onto
+ the nodes of your platform.  There are currently two possible
+ mappings, namely row- and column-major. This feature is main-
+ ly  useful  when these nodes  are  themselves multi-processor
+ computers. A row-major mapping is recommended.
+ 
+ Line 10: This line specifies  the  number  of process grid to
+ be runned.  This  number  should be less than or equal to 20.
+ The first integer is significant, the rest is ignored. If the
+ line reads:
+
+ 2        # of process grids (P x Q)
+ 
+ this  means  that you are willing to try 2 process grid sizes 
+ that will be specified in the next line.
+ 
+ Line 11-12: These  two  lines specify  the  number of process
+ rows  and  columns of each grid you want to run on.  Assuming
+ the line above (10) started with 2,  the 2 first positive in-
+ tegers of those two lines are significant,  the rest is igno-
+ red. For example:
+ 
+ 1 2          Ps
+ 6 8          Qs
+ 
+ means  that one wants to run  xhpl  on  2 process grids (line
+ 10), namely 1 by 6 and 2 by 8.  Note:  In this example, it is
+ required then to start xhpl on at least 16 nodes  (max of P_i
+ xQ_i). The runs on the two grids will be consecutive.  If one
+ was starting xhpl on more than 16 nodes, say 52, only 6 would
+ be used for the first grid  (1x6) and then 16  (2x8) would be
+ used for the second grid.  The fact  that you started the MPI 
+ job on 52 nodes,  will not make HPL use all of them.  In this 
+ example, only 16 would be used. If one wants to run xhpl with
+ 52 processes one needs to specify a grid of 52 processes, for
+ example the following lines would do the job:
+ 
+ 4  2         Ps
+ 13 8         Qs
+ 
+ Line 13: This  line  specifies  the  threshold  the residuals
+ should be compared to.  The  residuals  should be or order 1,
+ but are in practice slightly less than this, typically 0.001.
+ This  line  is  made of a real number, the rest is unsignifi-
+ cant. For example:
+ 
+ 16.0         threshold
+
+ In practice,  a value of 16.0 will cover most cases.  For va-
+ rious reasons,  it is possible that some of the residuals be-
+ come slightly larger, say for example 35.6.  xhpl  will  flag
+ those runs as failed,  however they can be considered as cor-
+ rect.  A run can be considered as failed if the residual is a
+ few order of magnitude  bigger than 1 for example 10^6 or mo-
+ re. Note: if one was to specify a threshold of 0.0, all tests
+ would be flagged  as failed, even though the answer is likely
+ to be correct.  It is allowed to specify a negative value for
+ this threshold,  in  which case the checks will be by-passed,
+ no matter what the value is, as soon as it is negative.  This
+ feature  allows to save time when performing a lot of experi-
+ ments, say for instance during the tuning phase. Example:
+ 
+ -16.0        threshold
+ 
+ The remaning lines  allow  to specifies algorithmic features.
+ xhpl  will  run  all  possible combinations of those for each
+ problem  size,  block size, process grid combination. This is
+ handy  when one looks for an "optimal" set of parameters.  To
+ understand  a little bit better,  let  say  first a few words
+ about  the algorithm implemented in HPL. Basically this is  a
+ right-looking  version  with  row-partial pivoting. The panel
+ factorization is matrix-matrix operation based and recursive,
+ dividing the panel into  NDIV  subpanels  at each step.  This
+ part  of  the  panel   factorization   is  denoted  below  by
+ "recursive panel fact. (RFACT)". The recursion stops when the
+ current panel is made of less than or equal to NBMIN columns.
+ At  that  point,  xhpl  uses  a matrix-vector operation based
+ factorization denoted below by  "PFACTs".  Classic  recursion
+ would then use  NDIV=2,  NBMIN=1.  There  are  essentially  3
+ numerically  equivalent  LU  factorization algorithm variants
+ (left-looking, Crout  and  right-looking).  In  HPL,  one can 
+ choose  every one  of those  for the  RFACT,  as well as  the
+ PFACT. The following lines of HPL.dat allows you to set those
+ parameters.
+ 
+ Lines 14-21: (Example 1)
+ 3       # of panel fact
+ 0 1 2   PFACTs (0=left, 1=Crout, 2=Right)
+ 4       # of recursive stopping criterium
+ 1 2 4 8 NBMINs (>= 1)
+ 3       # of panels in recursion
+ 2 3 4   NDIVs
+ 3       # of recursive panel fact.
+ 0 1 2   RFACTs (0=left, 1=Crout, 2=Right)
+ 
+ This  example  would  try all variants of PFACT, 4 values for
+ NBMIN,  namely 1, 2, 4 and 8,  3 values for NDIV namely 2,  3 
+ and 4, and all variants for RFACT.  Lines 14-21: (Example 1)
+
+ 2       # of panel fact
+ 2 0     PFACTs (0=left, 1=Crout, 2=Right)
+ 2       # of recursive stopping criterium
+ 4 8     NBMINs (>= 1)
+ 1       # of panels in recursion
+ 2       NDIVs
+ 1       # of recursive panel fact.
+ 2       RFACTs (0=left, 1=Crout, 2=Right)
+ 
+ This example would try  2 variants of PFACT namely right loo-
+ king and left looking, 2 values for NBMIN, namely 4 and 8,  1
+ value for NDIV namely 2, and one variant for RFACT.
+ 
+ In the  main loop of the algorithm,  the current panel of co-
+ lumn is broadcast in process rows  using  a virtual  ring to-
+ pology. HPL offers various choices, and one most  likely want
+ to use the increasing ring modified encoded as 1.  4  is also
+ a good choice. Lines 22-23: (Example 1):
+
+ 1       # of broadcast
+ 1       BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+ 
+ This will cause HPL  to broadcast the current panel using the
+ increasing ring modified topology. Lines 22-23: (Example 2):
+ 
+ 2       # of broadcast
+ 0 4     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+ 
+ This will cause  HPL to broadcast the current panel using the
+ increasing ring virtual topology and the long message algori-
+ thm.
+ 
+ Lines 24-25  allow  to  specify  the look-ahead depth used by
+ HPL. A depth of 0 means that the next panel is factorized af-
+ ter the update by the current panel is completely finished. A
+ depth of 1 means that the next panel is factorized immediate-
+ ly after being updated.  The  update by the current  panel is
+ then finished.  A depth of k means that the k next panels are
+ factorized immediately after being updated. The update by the
+ current  panel is then finished. It turns out that a depth of
+ 1  seems  to give the best results, but may need a large pro-
+ blem size  before one can see the performance gain. So use 1,
+ if you do not know better,  otherwise  you may want to try 0.
+ Look-ahead of depths 2  and larger will probably not give you
+ better results.  Lines 24-25: (Example 1):
+ 
+ 1       # of lookahead depth
+ 1       DEPTHs (>=0)
+ 
+ This will cause HPL to use a look-ahead of depth 1.
+ Lines 24-25: (Example 2):
+ 
+ 2       # of lookahead depth
+ 0 1     DEPTHs (>=0)
+ 
+ This will cause HPL to use a look-ahead of depths 0 and 1.
+
+ Lines 26-27  allow to specify  the swapping algorithm used by
+ HPL for all tests.  There  are  currently  two swapping algo-
+ rithms  available,  one  based  on "binary exchange"  and the
+ other one based on a  "spread-roll"  procedure  (also  called 
+ "long" below. For large problem sizes, this last one is like-
+ ly to be more efficient. The user can also choose to mix both
+ variants, that is "binary-exchange"  for  a number of columns
+ less  than a threshold value, and then the  "spread-roll" al-
+ gorithm.  This threshold  value is then specified on Line 27.
+ Lines 26-27: (Example 1):
+
+ 1       SWAP (0=bin-exch,1=long,2=mix)
+ 60      swapping threshold
+
+ This will cause HPL to use the "long" or  "spread-roll" swap-
+ ping algorithm.  Note  that a threshold  is specified in that
+ example but not used by HPL. Lines 26-27: (Example 2):
+
+ 2       SWAP (0=bin-exch,1=long,2=mix)
+ 60      swapping threshold
+
+ This will cause HPL to use the "long" or  "spread-roll" swap-
+ ping  algorithm  as  soon as there is more than 60 columns in
+ the row panel.  Otherwise,  the  "binary-exchange"  algorithm
+ will be used instead.
+
+ Line 28  allows  to specify whether the upper triangle of the
+ panel  of  columns  should  be  stored  in  no-transposed  or
+ transposed form. Example:
+
+ 0            L1 in (0=transposed,1=no-transposed) form
+
+ Line 29 allows to specify whether the panel of rows  U should
+ be stored in no-transposed or transposed form. Example:
+ 
+ 0            U  in (0=transposed,1=no-transposed) form
+
+ Line 30 enables/disables the equilibration phase. This option
+ will not be used unless you selected 1 or 2 in Line 26. Ex:
+
+ 1            Equilibration (0=no,1=yes)
+
+
+ Line 31  allows  to  specify  the alignment in memory for the
+ memory space allocated by HPL. On modern machines, one proba-
+ bly wants to use 4, 8 or 16. This may result in a tiny amount
+ of memory wasted. Example:
+ 
+ 4       memory alignment in double (> 0)
+
+==============================================================
+ Guide lines:
+
+ 1) Figure  out  a  good  block  size  for  the  matrix-matrix 
+ multiply routine. The best method is to try a few out. If you
+ happen  to know  the block size  used  by  the  matrix-matrix 
+ multiply routine, a small multiple of that block size will do
+ fine.
+
+ HPL  uses the block size NB for the data distribution as well
+ as  for   the  computational   granularity.   From   a   data 
+ distribution point of view,  the smallest  NB, the better the
+ load balance.  You  definitely  want  to stay away  from very
+ large values of NB.  From a computation point of view,  a too
+ small value of  NB may limit the computational performance by
+ a large factor because almost no data reuse will occur in the
+ highest level of the memory hierarchy. The number of messages
+ will also increase.  Efficient  matrix-multiply  routines are 
+ often internally blocked.  Small multiples  of  this blocking
+ factor are likely to be good block sizes for HPL.  The bottom
+ line  is  that  "good"  block sizes  are almost always in the
+ [32..256] interval. The best values depend on the computation
+ / communication performance ratio of your system.  To  a much
+ less  extent,  the problem size  matters  as  well.  Say  for
+ example,  you emperically found that 44 was a good block size
+ with respect to performance.  88 or 132  are likely  to  give
+ slightly better  results for large problem sizes because of a
+ slighlty higher flop rate.
+
+ 2)  The process mapping  should  not matter  if  the nodes of
+ your platform are single processor computers.  If these nodes
+ are multi-processors, a row-major mapping is recommended.
+
+ 3) HPL likes "square" or slightly flat process grids.  Unless
+ you  are using  a very small process grid, stay away from the 
+ 1-by-Q and P-by-1 process grids.
+
+ 4) Panel factorization parameters:  a good start are the fol-
+ lowing for the lines 14-21:
+
+ 1       # of panel fact
+ 1       PFACTs (0=left, 1=Crout, 2=Right)
+ 2       # of recursive stopping criterium
+ 4 8     NBMINs (>= 1)
+ 1       # of panels in recursion
+ 2       NDIVs
+ 1       # of recursive panel fact.
+ 2       RFACTs (0=left, 1=Crout, 2=Right)
+
+ 5) Broadcast parameters: at this time, it is far from obvious
+ to me what the best setting is,  so i would probably try them
+ all. If I had to guess I would probably start with the follo-
+ wing for the lines 22-23:
+ 
+ 2       # of broadcast
+ 1 3     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+
+ The best broadcast  depends  on your problem size and harware
+ performance. My take is that 4 or 5  may be  competitive  for
+ machines  featuring  very  fast nodes  comparatively  to  the 
+ network.
+
+ 6) Look-ahead depth: as mentioned above  0 or 1 are likely to 
+ be the best choices.  This also  depends  on the problem size
+ and machine configuration, so I would try "no look-ahead (0)"
+ and "look-ahead of depth 1 (1)". That is for lines 24-25:
+ 
+ 2       # of lookahead depth
+ 0 1     DEPTHs (>=0)
+
+ 7) Swapping:  one  can select only one of the three algorithm 
+ in the input file. Theoretically, mix (2) should win, however
+ long (1) might just be good enough. The  difference should be
+ small between those two assuming  a swapping threshold of the 
+ order of the block size (NB) selected. If  this  threshold is
+ very large, HPL will use bin_exch (0) most of the time and if
+ it  is  very  small  (< NB) long (1)  will always be used. In 
+ short  and  assuming  the  block size (NB)  used is say 60, I 
+ would choose for the lines 26-27:
+
+ 2       SWAP (0=bin-exch,1=long,2=mix)
+ 60      swapping threshold 
+
+ I would also try the long variant.  For  a very  small number 
+ of processes  in every column of the process grid  (say < 4),
+ very little performance difference should be observable.
+
+ 8) Local storage:  I do not think Line 28 matters.  Pick 0 in
+ doubt.  Line 29 is more important.  It controls how the panel
+ of rows should be stored. No doubt 0 is better. The caveat is
+ that in that case the matrix-multiply function is called with
+ ( Notrans, Trans, ... ), that is C := C - A B^T.  Unless  the
+ computational  kernel  you  are  using  has a very poor (with
+ respect to performance)  implementation  of that case, and is
+ much more efficient with  ( Notrans, Notrans, ... ) just pick
+ 0 as well. So, my choice: 
+
+ 0       L1 in (0=transposed,1=no-transposed) form
+ 0       U  in (0=transposed,1=no-transposed) form
+
+ 9) Equilibration:  It  is hard to tell  whether equilibration
+ should always be performed or not. Not knowing much about the
+ random matrix generated and because the overhead is so small
+ compared to the possible gain, I turn it on all the time.
+
+ 1       Equilibration (0=no,1=yes)
+
+ 10) For  alignment, 4 should be plenty,  but just to be safe,
+ one may want to pick 8 instead.
+
+ 8       memory alignment in double (> 0)
+ 
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/acinclude.m4 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/acinclude.m4
new file mode 100644
index 000000000..4072a950f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/acinclude.m4
@@ -0,0 +1,90 @@
+
+AC_DEFUN([HPL_BLAS], [
+
+AC_PREREQ(2.69)
+
+hpl_blas_ok=no
+
+dnl FIXME: add --with-blas="<library spec>"
+
+current_LIBS="$LIBS"
+
+cat <<HPLEOF > hplvars.txt
+name1=OpenBLAS
+rout1=dgemm_
+libs1=-lopenblas -lm
+
+name2=Atlas Fortran BLAS
+rout2=dgemm_
+libs2=-lf77blas -latlas
+
+name3=Sequential Intel MKL LP64 (group)
+rout3=dgemm_
+libs3=-Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread
+
+name4=Sequential Intel MKL LP64
+rout4=dgemm_
+libs4=-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
+
+name5=AMD's ACML
+rout5=dgemm_
+libs5=-lacml -lm
+
+name6=Accelerate
+rout6=dgemm_
+libs6=-framework Accelerate
+
+name7=Apple VecLib
+rout7=dgemm_
+libs7=-framework vecLib
+
+name8=IBM ESSL
+rout8=dgemm_
+libs8=-lessl
+
+name9=NVIDIA nvblas
+rout9=dgemm_
+libs9=-lnvblas
+
+name10=Generic BLAS
+rout10=dgemm_
+libs10=-lblas
+
+HPLEOF
+for hpl_i in 1 2 3 4 5 6 7 8 9 10;
+do
+if test  x$hpl_blas_ok = xno; then
+  name="`grep ^name${hpl_i}= hplvars.txt | sed s/^name${hpl_i}=//`"
+  rout="`grep ^rout${hpl_i}= hplvars.txt | sed s/^rout${hpl_i}=//`"
+  libs="`grep ^libs${hpl_i}= hplvars.txt | sed s/^libs${hpl_i}=//`"
+  AC_MSG_CHECKING([for [$]rout in [$]name])
+
+  LIBS="[$]libs"
+  AC_TRY_LINK_FUNC([$]rout, [hpl_blas_ok=yes;BLAS_LIBS="[$]libs"])
+  LIBS="$current_LIBS"
+
+  AC_MSG_RESULT($hpl_blas_ok)
+fi
+done
+rm hplvars.txt
+
+if test  x$hpl_blas_ok = xno; then
+dnl
+AC_MSG_CHECKING([for dgemm_ in OpenBLAS])
+AC_CHECK_LIB(openblas, dgemm_, [hpl_blas_ok=yes;BLAS_LIBS="-lopenblas"])
+AC_MSG_RESULT($hpl_blas_ok)
+dnl
+fi
+
+AC_SUBST(BLAS_LIBS)
+
+# If present, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$hpl_blas_ok" = xyes; then
+        ifelse([$1],,AC_DEFINE(HAVE_BLAS,1,[Define if you have a BLAS library.]),[$1])
+        :
+else
+        hpl_blas_ok=no
+        $2
+fi
+
+])dnl HPL_BLAS
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/aclocal.m4 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/aclocal.m4
new file mode 100644
index 000000000..56c6bd753
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/aclocal.m4
@@ -0,0 +1,1308 @@
+# generated automatically by aclocal 1.16.1 -*- Autoconf -*-
+
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])])
+m4_ifndef([AC_AUTOCONF_VERSION],
+  [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
+m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],,
+[m4_warning([this file was generated for autoconf 2.69.
+You have another version of autoconf.  It may work, but is not guaranteed to.
+If you have problems, you may need to regenerate the build system entirely.
+To do so, use the procedure documented by the package, typically 'autoreconf'.])])
+
+# ===========================================================================
+#      https://www.gnu.org/software/autoconf-archive/ax_prog_cc_mpi.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_PROG_CC_MPI([MPI-WANTED-TEST[, ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]])
+#
+# DESCRIPTION
+#
+#   This macro tries to find out how to compile C programs that use MPI
+#   (Message Passing Interface), a standard API for parallel process
+#   communication (see http://www-unix.mcs.anl.gov/mpi/). The macro has to
+#   be used instead of the standard macro AC_PROG_CC and will replace the
+#   standard variable CC with the found compiler.
+#
+#   MPI-WANTED-TEST is used to test whether MPI is actually wanted by the
+#   user. If MPI-WANTED_TEST is omitted or if it succeeds, the macro will
+#   try to find out how to use MPI, if it fails, the macro will call
+#   AC_PROG_CC to find a standard C compiler instead.
+#
+#   When MPI is found, ACTION-IF-FOUND will be executed, if MPI is not found
+#   (or MPI-WANTED-TEST fails) ACTION-IF-NOT-FOUND is executed. If
+#   ACTION-IF-FOUND is not set, the macro will define HAVE_MPI.
+#
+#   The following example demonstrates usage of the macro:
+#
+#     # If --with-mpi=auto is used, try to find MPI, but use standard C compiler if it is not found.
+#     # If --with-mpi=yes is used, try to find MPI and fail if it isn't found.
+#     # If --with-mpi=no is used, use a standard C compiler instead.
+#     AC_ARG_WITH(mpi, [AS_HELP_STRING([--with-mpi],
+#         [compile with MPI (parallelization) support. If none is found,
+#         MPI is not used. Default: auto])
+#     ],,[with_mpi=auto])
+#     #
+#     AX_PROG_CC_MPI([test x"$with_mpi" != xno],[use_mpi=yes],[
+#       use_mpi=no
+#       if test x"$with_mpi" = xyes; then
+#         AC_MSG_FAILURE([MPI compiler requested, but couldn't use MPI.])
+#       else
+#         AC_MSG_WARN([No MPI compiler found, won't use MPI.])
+#       fi
+#     ])
+#
+# LICENSE
+#
+#   Copyright (c) 2010,2011 Olaf Lenz <olenz@icp.uni-stuttgart.de>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 2
+
+AC_DEFUN([AX_PROG_CC_MPI], [
+AC_PREREQ(2.50)
+
+# Check for compiler
+# Needs to be split off into an extra macro to ensure right expansion
+# order.
+AC_REQUIRE([_AX_PROG_CC_MPI],[_AX_PROG_CC_MPI([$1])])
+
+AS_IF([test x"$_ax_prog_cc_mpi_mpi_wanted" = xno],
+  [ _ax_prog_cc_mpi_mpi_found=no ],
+  [
+    AC_LANG_PUSH([C])
+    # test whether MPI_Init is available
+    # We do not use AC_SEARCH_LIBS here, as it caches its outcome and
+    # thus disallows corresponding calls in the other AX_PROG_*_MPI
+    # macros.
+    for lib in NONE mpi mpich; do
+      save_LIBS=$LIBS
+      if test x"$lib" = xNONE; then
+        AC_MSG_CHECKING([for function MPI_Init])
+      else
+        AC_MSG_CHECKING([for function MPI_Init in -l$lib])
+        LIBS="-l$lib $LIBS"
+      fi
+      AC_LINK_IFELSE([AC_LANG_CALL([],[MPI_Init])],
+        [ _ax_prog_cc_mpi_mpi_found=yes ],
+        [ _ax_prog_cc_mpi_mpi_found=no ])
+      AC_MSG_RESULT($_ax_prog_cc_mpi_mpi_found)
+      if test "x$_ax_prog_cc_mpi_mpi_found" = "xyes"; then
+        break;
+      fi
+      LIBS=$save_LIBS
+    done
+
+    # Check for header
+    AS_IF([test x"$_ax_prog_cc_mpi_mpi_found" = xyes], [
+      AC_MSG_CHECKING([for mpi.h])
+      AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include <mpi.h>])],
+        [ AC_MSG_RESULT(yes)],
+        [ AC_MSG_RESULT(no)
+         _ax_prog_cc_mpi_mpi_found=no
+      ])
+    ])
+    AC_LANG_POP([C])
+])
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+AS_IF([test x"$_ax_prog_cc_mpi_mpi_found" = xyes], [
+        ifelse([$2],,[AC_DEFINE(HAVE_MPI,1,[Define if you have the MPI library.])],[$2])
+        :
+],[
+        $3
+        :
+])
+
+])dnl AX_PROG_CC_MPI
+
+dnl _AX_PROG_CC_MPI is an internal macro required by AX_PROG_CC_MPI.
+dnl To ensure the right expansion order, the main function AX_PROG_CC_MPI
+dnl has to be split into two parts.
+dnl
+dnl Known MPI C compilers:
+dnl  mpicc
+dnl  mpixlc_r
+dnl  mpixlc
+dnl  hcc
+dnl  mpxlc_r
+dnl  mpxlc
+dnl  sxmpicc  NEC SX
+dnl  mpifcc   Fujitsu
+dnl  mpgcc
+dnl  mpcc
+dnl  cmpicc
+dnl  cc
+dnl
+AC_DEFUN([_AX_PROG_CC_MPI], [
+  AC_ARG_VAR(MPICC,[MPI C compiler command])
+  ifelse([$1],,[_ax_prog_cc_mpi_mpi_wanted=yes],[
+    AC_MSG_CHECKING([whether to compile using MPI])
+    if $1; then
+      _ax_prog_cc_mpi_mpi_wanted=yes
+    else
+      _ax_prog_cc_mpi_mpi_wanted=no
+    fi
+    AC_MSG_RESULT($_ax_prog_cc_mpi_mpi_wanted)
+  ])
+  if test x"$_ax_prog_cc_mpi_mpi_wanted" = xyes; then
+    if test -z "$CC" && test -n "$MPICC"; then
+      CC="$MPICC"
+    else
+      AC_CHECK_TOOLS([CC], [mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc])
+    fi
+  fi
+  AC_PROG_CC
+])dnl _AX_PROG_CC_MPI
+
+# Copyright (C) 2002-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_AUTOMAKE_VERSION(VERSION)
+# ----------------------------
+# Automake X.Y traces this macro to ensure aclocal.m4 has been
+# generated from the m4 files accompanying Automake X.Y.
+# (This private macro should not be called outside this file.)
+AC_DEFUN([AM_AUTOMAKE_VERSION],
+[am__api_version='1.16'
+dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
+dnl require some minimum version.  Point them to the right macro.
+m4_if([$1], [1.16.1], [],
+      [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
+])
+
+# _AM_AUTOCONF_VERSION(VERSION)
+# -----------------------------
+# aclocal traces this macro to find the Autoconf version.
+# This is a private macro too.  Using m4_define simplifies
+# the logic in aclocal, which can simply ignore this definition.
+m4_define([_AM_AUTOCONF_VERSION], [])
+
+# AM_SET_CURRENT_AUTOMAKE_VERSION
+# -------------------------------
+# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
+# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
+AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
+[AM_AUTOMAKE_VERSION([1.16.1])dnl
+m4_ifndef([AC_AUTOCONF_VERSION],
+  [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
+_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
+
+# AM_AUX_DIR_EXPAND                                         -*- Autoconf -*-
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
+# $ac_aux_dir to '$srcdir/foo'.  In other projects, it is set to
+# '$srcdir', '$srcdir/..', or '$srcdir/../..'.
+#
+# Of course, Automake must honor this variable whenever it calls a
+# tool from the auxiliary directory.  The problem is that $srcdir (and
+# therefore $ac_aux_dir as well) can be either absolute or relative,
+# depending on how configure is run.  This is pretty annoying, since
+# it makes $ac_aux_dir quite unusable in subdirectories: in the top
+# source directory, any form will work fine, but in subdirectories a
+# relative path needs to be adjusted first.
+#
+# $ac_aux_dir/missing
+#    fails when called from a subdirectory if $ac_aux_dir is relative
+# $top_srcdir/$ac_aux_dir/missing
+#    fails if $ac_aux_dir is absolute,
+#    fails when called from a subdirectory in a VPATH build with
+#          a relative $ac_aux_dir
+#
+# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
+# are both prefixed by $srcdir.  In an in-source build this is usually
+# harmless because $srcdir is '.', but things will broke when you
+# start a VPATH build or use an absolute $srcdir.
+#
+# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
+# iff we strip the leading $srcdir from $ac_aux_dir.  That would be:
+#   am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
+# and then we would define $MISSING as
+#   MISSING="\${SHELL} $am_aux_dir/missing"
+# This will work as long as MISSING is not called from configure, because
+# unfortunately $(top_srcdir) has no meaning in configure.
+# However there are other variables, like CC, which are often used in
+# configure, and could therefore not use this "fixed" $ac_aux_dir.
+#
+# Another solution, used here, is to always expand $ac_aux_dir to an
+# absolute PATH.  The drawback is that using absolute paths prevent a
+# configured tree to be moved without reconfiguration.
+
+AC_DEFUN([AM_AUX_DIR_EXPAND],
+[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
+# Expand $ac_aux_dir to an absolute path.
+am_aux_dir=`cd "$ac_aux_dir" && pwd`
+])
+
+# AM_CONDITIONAL                                            -*- Autoconf -*-
+
+# Copyright (C) 1997-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_CONDITIONAL(NAME, SHELL-CONDITION)
+# -------------------------------------
+# Define a conditional.
+AC_DEFUN([AM_CONDITIONAL],
+[AC_PREREQ([2.52])dnl
+ m4_if([$1], [TRUE],  [AC_FATAL([$0: invalid condition: $1])],
+       [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
+AC_SUBST([$1_TRUE])dnl
+AC_SUBST([$1_FALSE])dnl
+_AM_SUBST_NOTMAKE([$1_TRUE])dnl
+_AM_SUBST_NOTMAKE([$1_FALSE])dnl
+m4_define([_AM_COND_VALUE_$1], [$2])dnl
+if $2; then
+  $1_TRUE=
+  $1_FALSE='#'
+else
+  $1_TRUE='#'
+  $1_FALSE=
+fi
+AC_CONFIG_COMMANDS_PRE(
+[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
+  AC_MSG_ERROR([[conditional "$1" was never defined.
+Usually this means the macro was only invoked conditionally.]])
+fi])])
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+
+# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be
+# written in clear, in which case automake, when reading aclocal.m4,
+# will think it sees a *use*, and therefore will trigger all it's
+# C support machinery.  Also note that it means that autoscan, seeing
+# CC etc. in the Makefile, will ask for an AC_PROG_CC use...
+
+
+# _AM_DEPENDENCIES(NAME)
+# ----------------------
+# See how the compiler implements dependency checking.
+# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC".
+# We try a few techniques and use that to set a single cache variable.
+#
+# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
+# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
+# dependency, and given that the user is not expected to run this macro,
+# just rely on AC_PROG_CC.
+AC_DEFUN([_AM_DEPENDENCIES],
+[AC_REQUIRE([AM_SET_DEPDIR])dnl
+AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
+AC_REQUIRE([AM_MAKE_INCLUDE])dnl
+AC_REQUIRE([AM_DEP_TRACK])dnl
+
+m4_if([$1], [CC],   [depcc="$CC"   am_compiler_list=],
+      [$1], [CXX],  [depcc="$CXX"  am_compiler_list=],
+      [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
+      [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'],
+      [$1], [UPC],  [depcc="$UPC"  am_compiler_list=],
+      [$1], [GCJ],  [depcc="$GCJ"  am_compiler_list='gcc3 gcc'],
+                    [depcc="$$1"   am_compiler_list=])
+
+AC_CACHE_CHECK([dependency style of $depcc],
+               [am_cv_$1_dependencies_compiler_type],
+[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
+  # We make a subdir and do the tests there.  Otherwise we can end up
+  # making bogus files that we don't know about and never remove.  For
+  # instance it was reported that on HP-UX the gcc test will end up
+  # making a dummy file named 'D' -- because '-MD' means "put the output
+  # in D".
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  # Copy depcomp to subdir because otherwise we won't find it if we're
+  # using a relative directory.
+  cp "$am_depcomp" conftest.dir
+  cd conftest.dir
+  # We will build objects and dependencies in a subdirectory because
+  # it helps to detect inapplicable dependency modes.  For instance
+  # both Tru64's cc and ICC support -MD to output dependencies as a
+  # side effect of compilation, but ICC will put the dependencies in
+  # the current directory while Tru64 will put them in the object
+  # directory.
+  mkdir sub
+
+  am_cv_$1_dependencies_compiler_type=none
+  if test "$am_compiler_list" = ""; then
+     am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
+  fi
+  am__universal=false
+  m4_case([$1], [CC],
+    [case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac],
+    [CXX],
+    [case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac])
+
+  for depmode in $am_compiler_list; do
+    # Setup a source with many dependencies, because some compilers
+    # like to wrap large dependency lists on column 80 (with \), and
+    # we should not choose a depcomp mode which is confused by this.
+    #
+    # We need to recreate these files for each test, as the compiler may
+    # overwrite some of them when testing with obscure command lines.
+    # This happens at least with the AIX C compiler.
+    : > sub/conftest.c
+    for i in 1 2 3 4 5 6; do
+      echo '#include "conftst'$i'.h"' >> sub/conftest.c
+      # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with
+      # Solaris 10 /bin/sh.
+      echo '/* dummy */' > sub/conftst$i.h
+    done
+    echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
+
+    # We check with '-c' and '-o' for the sake of the "dashmstdout"
+    # mode.  It turns out that the SunPro C++ compiler does not properly
+    # handle '-M -o', and we need to detect this.  Also, some Intel
+    # versions had trouble with output in subdirs.
+    am__obj=sub/conftest.${OBJEXT-o}
+    am__minus_obj="-o $am__obj"
+    case $depmode in
+    gcc)
+      # This depmode causes a compiler race in universal mode.
+      test "$am__universal" = false || continue
+      ;;
+    nosideeffect)
+      # After this tag, mechanisms are not by side-effect, so they'll
+      # only be used when explicitly requested.
+      if test "x$enable_dependency_tracking" = xyes; then
+	continue
+      else
+	break
+      fi
+      ;;
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
+      # This compiler won't grok '-c -o', but also, the minuso test has
+      # not run yet.  These depmodes are late enough in the game, and
+      # so weak that their functioning should not be impacted.
+      am__obj=conftest.${OBJEXT-o}
+      am__minus_obj=
+      ;;
+    none) break ;;
+    esac
+    if depmode=$depmode \
+       source=sub/conftest.c object=$am__obj \
+       depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
+       $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
+         >/dev/null 2>conftest.err &&
+       grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
+       ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
+      # icc doesn't choke on unknown options, it will just issue warnings
+      # or remarks (even with -Werror).  So we grep stderr for any message
+      # that says an option was ignored or not supported.
+      # When given -MP, icc 7.0 and 7.1 complain thusly:
+      #   icc: Command line warning: ignoring option '-M'; no argument required
+      # The diagnosis changed in icc 8.0:
+      #   icc: Command line remark: option '-MP' not supported
+      if (grep 'ignoring option' conftest.err ||
+          grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
+        am_cv_$1_dependencies_compiler_type=$depmode
+        break
+      fi
+    fi
+  done
+
+  cd ..
+  rm -rf conftest.dir
+else
+  am_cv_$1_dependencies_compiler_type=none
+fi
+])
+AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
+AM_CONDITIONAL([am__fastdep$1], [
+  test "x$enable_dependency_tracking" != xno \
+  && test "$am_cv_$1_dependencies_compiler_type" = gcc3])
+])
+
+
+# AM_SET_DEPDIR
+# -------------
+# Choose a directory name for dependency files.
+# This macro is AC_REQUIREd in _AM_DEPENDENCIES.
+AC_DEFUN([AM_SET_DEPDIR],
+[AC_REQUIRE([AM_SET_LEADING_DOT])dnl
+AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
+])
+
+
+# AM_DEP_TRACK
+# ------------
+AC_DEFUN([AM_DEP_TRACK],
+[AC_ARG_ENABLE([dependency-tracking], [dnl
+AS_HELP_STRING(
+  [--enable-dependency-tracking],
+  [do not reject slow dependency extractors])
+AS_HELP_STRING(
+  [--disable-dependency-tracking],
+  [speeds up one-time build])])
+if test "x$enable_dependency_tracking" != xno; then
+  am_depcomp="$ac_aux_dir/depcomp"
+  AMDEPBACKSLASH='\'
+  am__nodep='_no'
+fi
+AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
+AC_SUBST([AMDEPBACKSLASH])dnl
+_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
+AC_SUBST([am__nodep])dnl
+_AM_SUBST_NOTMAKE([am__nodep])dnl
+])
+
+# Generate code to set up dependency tracking.              -*- Autoconf -*-
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_OUTPUT_DEPENDENCY_COMMANDS
+# ------------------------------
+AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
+[{
+  # Older Autoconf quotes --file arguments for eval, but not when files
+  # are listed without --file.  Let's play safe and only enable the eval
+  # if we detect the quoting.
+  # TODO: see whether this extra hack can be removed once we start
+  # requiring Autoconf 2.70 or later.
+  AS_CASE([$CONFIG_FILES],
+          [*\'*], [eval set x "$CONFIG_FILES"],
+          [*], [set x $CONFIG_FILES])
+  shift
+  # Used to flag and report bootstrapping failures.
+  am_rc=0
+  for am_mf
+  do
+    # Strip MF so we end up with the name of the file.
+    am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'`
+    # Check whether this is an Automake generated Makefile which includes
+    # dependency-tracking related rules and includes.
+    # Grep'ing the whole file directly is not great: AIX grep has a line
+    # limit of 2048, but all sed's we know have understand at least 4000.
+    sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \
+      || continue
+    am_dirpart=`AS_DIRNAME(["$am_mf"])`
+    am_filepart=`AS_BASENAME(["$am_mf"])`
+    AM_RUN_LOG([cd "$am_dirpart" \
+      && sed -e '/# am--include-marker/d' "$am_filepart" \
+        | $MAKE -f - am--depfiles]) || am_rc=$?
+  done
+  if test $am_rc -ne 0; then
+    AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments
+    for automatic dependency tracking.  Try re-running configure with the
+    '--disable-dependency-tracking' option to at least be able to build
+    the package (albeit without support for automatic dependency tracking).])
+  fi
+  AS_UNSET([am_dirpart])
+  AS_UNSET([am_filepart])
+  AS_UNSET([am_mf])
+  AS_UNSET([am_rc])
+  rm -f conftest-deps.mk
+}
+])# _AM_OUTPUT_DEPENDENCY_COMMANDS
+
+
+# AM_OUTPUT_DEPENDENCY_COMMANDS
+# -----------------------------
+# This macro should only be invoked once -- use via AC_REQUIRE.
+#
+# This code is only required when automatic dependency tracking is enabled.
+# This creates each '.Po' and '.Plo' makefile fragment that we'll need in
+# order to bootstrap the dependency handling code.
+AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
+[AC_CONFIG_COMMANDS([depfiles],
+     [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
+     [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])])
+
+# Do all the work for Automake.                             -*- Autoconf -*-
+
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This macro actually does too much.  Some checks are only needed if
+# your package does certain things.  But this isn't really a big deal.
+
+dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O.
+m4_define([AC_PROG_CC],
+m4_defn([AC_PROG_CC])
+[_AM_PROG_CC_C_O
+])
+
+# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
+# AM_INIT_AUTOMAKE([OPTIONS])
+# -----------------------------------------------
+# The call with PACKAGE and VERSION arguments is the old style
+# call (pre autoconf-2.50), which is being phased out.  PACKAGE
+# and VERSION should now be passed to AC_INIT and removed from
+# the call to AM_INIT_AUTOMAKE.
+# We support both call styles for the transition.  After
+# the next Automake release, Autoconf can make the AC_INIT
+# arguments mandatory, and then we can depend on a new Autoconf
+# release and drop the old call support.
+AC_DEFUN([AM_INIT_AUTOMAKE],
+[AC_PREREQ([2.65])dnl
+dnl Autoconf wants to disallow AM_ names.  We explicitly allow
+dnl the ones we care about.
+m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
+AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
+AC_REQUIRE([AC_PROG_INSTALL])dnl
+if test "`cd $srcdir && pwd`" != "`pwd`"; then
+  # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
+  # is not polluted with repeated "-I."
+  AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
+  # test to see if srcdir already configured
+  if test -f $srcdir/config.status; then
+    AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
+  fi
+fi
+
+# test whether we have cygpath
+if test -z "$CYGPATH_W"; then
+  if (cygpath --version) >/dev/null 2>/dev/null; then
+    CYGPATH_W='cygpath -w'
+  else
+    CYGPATH_W=echo
+  fi
+fi
+AC_SUBST([CYGPATH_W])
+
+# Define the identity of the package.
+dnl Distinguish between old-style and new-style calls.
+m4_ifval([$2],
+[AC_DIAGNOSE([obsolete],
+             [$0: two- and three-arguments forms are deprecated.])
+m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
+ AC_SUBST([PACKAGE], [$1])dnl
+ AC_SUBST([VERSION], [$2])],
+[_AM_SET_OPTIONS([$1])dnl
+dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
+m4_if(
+  m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]),
+  [ok:ok],,
+  [m4_fatal([AC_INIT should be called with package and version arguments])])dnl
+ AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
+ AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
+
+_AM_IF_OPTION([no-define],,
+[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package])
+ AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl
+
+# Some tools Automake needs.
+AC_REQUIRE([AM_SANITY_CHECK])dnl
+AC_REQUIRE([AC_ARG_PROGRAM])dnl
+AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}])
+AM_MISSING_PROG([AUTOCONF], [autoconf])
+AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}])
+AM_MISSING_PROG([AUTOHEADER], [autoheader])
+AM_MISSING_PROG([MAKEINFO], [makeinfo])
+AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
+AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl
+AC_REQUIRE([AC_PROG_MKDIR_P])dnl
+# For better backward compatibility.  To be removed once Automake 1.9.x
+# dies out for good.  For more background, see:
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
+AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
+# We need awk for the "check" target (and possibly the TAP driver).  The
+# system "awk" is bad on some platforms.
+AC_REQUIRE([AC_PROG_AWK])dnl
+AC_REQUIRE([AC_PROG_MAKE_SET])dnl
+AC_REQUIRE([AM_SET_LEADING_DOT])dnl
+_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
+	      [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
+			     [_AM_PROG_TAR([v7])])])
+_AM_IF_OPTION([no-dependencies],,
+[AC_PROVIDE_IFELSE([AC_PROG_CC],
+		  [_AM_DEPENDENCIES([CC])],
+		  [m4_define([AC_PROG_CC],
+			     m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_CXX],
+		  [_AM_DEPENDENCIES([CXX])],
+		  [m4_define([AC_PROG_CXX],
+			     m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_OBJC],
+		  [_AM_DEPENDENCIES([OBJC])],
+		  [m4_define([AC_PROG_OBJC],
+			     m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_OBJCXX],
+		  [_AM_DEPENDENCIES([OBJCXX])],
+		  [m4_define([AC_PROG_OBJCXX],
+			     m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl
+])
+AC_REQUIRE([AM_SILENT_RULES])dnl
+dnl The testsuite driver may need to know about EXEEXT, so add the
+dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen.  This
+dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below.
+AC_CONFIG_COMMANDS_PRE(dnl
+[m4_provide_if([_AM_COMPILER_EXEEXT],
+  [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl
+
+# POSIX will say in a future version that running "rm -f" with no argument
+# is OK; and we want to be able to make that assumption in our Makefile
+# recipes.  So use an aggressive probe to check that the usage we want is
+# actually supported "in the wild" to an acceptable degree.
+# See automake bug#10828.
+# To make any issue more visible, cause the running configure to be aborted
+# by default if the 'rm' program in use doesn't match our expectations; the
+# user can still override this though.
+if rm -f && rm -fr && rm -rf; then : OK; else
+  cat >&2 <<'END'
+Oops!
+
+Your 'rm' program seems unable to run without file operands specified
+on the command line, even when the '-f' option is present.  This is contrary
+to the behaviour of most rm programs out there, and not conforming with
+the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
+
+Please tell bug-automake@gnu.org about your system, including the value
+of your $PATH and any error possibly output before this message.  This
+can help us improve future automake versions.
+
+END
+  if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
+    echo 'Configuration will proceed anyway, since you have set the' >&2
+    echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
+    echo >&2
+  else
+    cat >&2 <<'END'
+Aborting the configuration process, to ensure you take notice of the issue.
+
+You can download and install GNU coreutils to get an 'rm' implementation
+that behaves properly: <https://www.gnu.org/software/coreutils/>.
+
+If you want to complete the configuration process using your problematic
+'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
+to "yes", and re-run configure.
+
+END
+    AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
+  fi
+fi
+dnl The trailing newline in this macro's definition is deliberate, for
+dnl backward compatibility and to allow trailing 'dnl'-style comments
+dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841.
+])
+
+dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion.  Do not
+dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
+dnl mangled by Autoconf and run in a shell conditional statement.
+m4_define([_AC_COMPILER_EXEEXT],
+m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])])
+
+# When config.status generates a header, we must update the stamp-h file.
+# This file resides in the same directory as the config header
+# that is generated.  The stamp files are numbered to have different names.
+
+# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
+# loop where config.status creates the headers, so we can generate
+# our stamp files there.
+AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
+[# Compute $1's index in $config_headers.
+_am_arg=$1
+_am_stamp_count=1
+for _am_header in $config_headers :; do
+  case $_am_header in
+    $_am_arg | $_am_arg:* )
+      break ;;
+    * )
+      _am_stamp_count=`expr $_am_stamp_count + 1` ;;
+  esac
+done
+echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_INSTALL_SH
+# ------------------
+# Define $install_sh.
+AC_DEFUN([AM_PROG_INSTALL_SH],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+if test x"${install_sh+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
+  *)
+    install_sh="\${SHELL} $am_aux_dir/install-sh"
+  esac
+fi
+AC_SUBST([install_sh])])
+
+# Copyright (C) 2003-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# Check whether the underlying file-system supports filenames
+# with a leading dot.  For instance MS-DOS doesn't.
+AC_DEFUN([AM_SET_LEADING_DOT],
+[rm -rf .tst 2>/dev/null
+mkdir .tst 2>/dev/null
+if test -d .tst; then
+  am__leading_dot=.
+else
+  am__leading_dot=_
+fi
+rmdir .tst 2>/dev/null
+AC_SUBST([am__leading_dot])])
+
+# Check to see how 'make' treats includes.	            -*- Autoconf -*-
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_MAKE_INCLUDE()
+# -----------------
+# Check whether make has an 'include' directive that can support all
+# the idioms we need for our automatic dependency tracking code.
+AC_DEFUN([AM_MAKE_INCLUDE],
+[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive])
+cat > confinc.mk << 'END'
+am__doit:
+	@echo this is the am__doit target >confinc.out
+.PHONY: am__doit
+END
+am__include="#"
+am__quote=
+# BSD make does it like this.
+echo '.include "confinc.mk" # ignored' > confmf.BSD
+# Other make implementations (GNU, Solaris 10, AIX) do it like this.
+echo 'include confinc.mk # ignored' > confmf.GNU
+_am_result=no
+for s in GNU BSD; do
+  AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out])
+  AS_CASE([$?:`cat confinc.out 2>/dev/null`],
+      ['0:this is the am__doit target'],
+      [AS_CASE([$s],
+          [BSD], [am__include='.include' am__quote='"'],
+          [am__include='include' am__quote=''])])
+  if test "$am__include" != "#"; then
+    _am_result="yes ($s style)"
+    break
+  fi
+done
+rm -f confinc.* confmf.*
+AC_MSG_RESULT([${_am_result}])
+AC_SUBST([am__include])])
+AC_SUBST([am__quote])])
+
+# Fake the existence of programs that GNU maintainers use.  -*- Autoconf -*-
+
+# Copyright (C) 1997-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_MISSING_PROG(NAME, PROGRAM)
+# ------------------------------
+AC_DEFUN([AM_MISSING_PROG],
+[AC_REQUIRE([AM_MISSING_HAS_RUN])
+$1=${$1-"${am_missing_run}$2"}
+AC_SUBST($1)])
+
+# AM_MISSING_HAS_RUN
+# ------------------
+# Define MISSING if not defined so far and test if it is modern enough.
+# If it is, set am_missing_run to use it, otherwise, to nothing.
+AC_DEFUN([AM_MISSING_HAS_RUN],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+AC_REQUIRE_AUX_FILE([missing])dnl
+if test x"${MISSING+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
+  *)
+    MISSING="\${SHELL} $am_aux_dir/missing" ;;
+  esac
+fi
+# Use eval to expand $SHELL
+if eval "$MISSING --is-lightweight"; then
+  am_missing_run="$MISSING "
+else
+  am_missing_run=
+  AC_MSG_WARN(['missing' script is too old or missing])
+fi
+])
+
+# Helper functions for option handling.                     -*- Autoconf -*-
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_MANGLE_OPTION(NAME)
+# -----------------------
+AC_DEFUN([_AM_MANGLE_OPTION],
+[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
+
+# _AM_SET_OPTION(NAME)
+# --------------------
+# Set option NAME.  Presently that only means defining a flag for this option.
+AC_DEFUN([_AM_SET_OPTION],
+[m4_define(_AM_MANGLE_OPTION([$1]), [1])])
+
+# _AM_SET_OPTIONS(OPTIONS)
+# ------------------------
+# OPTIONS is a space-separated list of Automake options.
+AC_DEFUN([_AM_SET_OPTIONS],
+[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
+
+# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
+# -------------------------------------------
+# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
+AC_DEFUN([_AM_IF_OPTION],
+[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_PROG_CC_C_O
+# ---------------
+# Like AC_PROG_CC_C_O, but changed for automake.  We rewrite AC_PROG_CC
+# to automatically call this.
+AC_DEFUN([_AM_PROG_CC_C_O],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+AC_REQUIRE_AUX_FILE([compile])dnl
+AC_LANG_PUSH([C])dnl
+AC_CACHE_CHECK(
+  [whether $CC understands -c and -o together],
+  [am_cv_prog_cc_c_o],
+  [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])])
+  # Make sure it works both with $CC and with simple cc.
+  # Following AC_PROG_CC_C_O, we do the test twice because some
+  # compilers refuse to overwrite an existing .o file with -o,
+  # though they will create one.
+  am_cv_prog_cc_c_o=yes
+  for am_i in 1 2; do
+    if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \
+         && test -f conftest2.$ac_objext; then
+      : OK
+    else
+      am_cv_prog_cc_c_o=no
+      break
+    fi
+  done
+  rm -f core conftest*
+  unset am_i])
+if test "$am_cv_prog_cc_c_o" != yes; then
+   # Losing compiler, so override with the script.
+   # FIXME: It is wrong to rewrite CC.
+   # But if we don't then we get into trouble of one sort or another.
+   # A longer-term fix would be to have automake use am__CC in this case,
+   # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
+   CC="$am_aux_dir/compile $CC"
+fi
+AC_LANG_POP([C])])
+
+# For backward compatibility.
+AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_RUN_LOG(COMMAND)
+# -------------------
+# Run COMMAND, save the exit status in ac_status, and log it.
+# (This has been adapted from Autoconf's _AC_RUN_LOG macro.)
+AC_DEFUN([AM_RUN_LOG],
+[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD
+   ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
+   (exit $ac_status); }])
+
+# Check to make sure that the build environment is sane.    -*- Autoconf -*-
+
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_SANITY_CHECK
+# ---------------
+AC_DEFUN([AM_SANITY_CHECK],
+[AC_MSG_CHECKING([whether build environment is sane])
+# Reject unsafe characters in $srcdir or the absolute working directory
+# name.  Accept space and tab only in the latter.
+am_lf='
+'
+case `pwd` in
+  *[[\\\"\#\$\&\'\`$am_lf]]*)
+    AC_MSG_ERROR([unsafe absolute working directory name]);;
+esac
+case $srcdir in
+  *[[\\\"\#\$\&\'\`$am_lf\ \	]]*)
+    AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);;
+esac
+
+# Do 'set' in a subshell so we don't clobber the current shell's
+# arguments.  Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+   am_has_slept=no
+   for am_try in 1 2; do
+     echo "timestamp, slept: $am_has_slept" > conftest.file
+     set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
+     if test "$[*]" = "X"; then
+	# -L didn't work.
+	set X `ls -t "$srcdir/configure" conftest.file`
+     fi
+     if test "$[*]" != "X $srcdir/configure conftest.file" \
+	&& test "$[*]" != "X conftest.file $srcdir/configure"; then
+
+	# If neither matched, then we have a broken ls.  This can happen
+	# if, for instance, CONFIG_SHELL is bash and it inherits a
+	# broken ls alias from the environment.  This has actually
+	# happened.  Such a system could not be considered "sane".
+	AC_MSG_ERROR([ls -t appears to fail.  Make sure there is not a broken
+  alias in your environment])
+     fi
+     if test "$[2]" = conftest.file || test $am_try -eq 2; then
+       break
+     fi
+     # Just in case.
+     sleep 1
+     am_has_slept=yes
+   done
+   test "$[2]" = conftest.file
+   )
+then
+   # Ok.
+   :
+else
+   AC_MSG_ERROR([newly created file is older than distributed files!
+Check your system clock])
+fi
+AC_MSG_RESULT([yes])
+# If we didn't sleep, we still need to ensure time stamps of config.status and
+# generated files are strictly newer.
+am_sleep_pid=
+if grep 'slept: no' conftest.file >/dev/null 2>&1; then
+  ( sleep 1 ) &
+  am_sleep_pid=$!
+fi
+AC_CONFIG_COMMANDS_PRE(
+  [AC_MSG_CHECKING([that generated files are newer than configure])
+   if test -n "$am_sleep_pid"; then
+     # Hide warnings about reused PIDs.
+     wait $am_sleep_pid 2>/dev/null
+   fi
+   AC_MSG_RESULT([done])])
+rm -f conftest.file
+])
+
+# Copyright (C) 2009-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_SILENT_RULES([DEFAULT])
+# --------------------------
+# Enable less verbose build rules; with the default set to DEFAULT
+# ("yes" being less verbose, "no" or empty being verbose).
+AC_DEFUN([AM_SILENT_RULES],
+[AC_ARG_ENABLE([silent-rules], [dnl
+AS_HELP_STRING(
+  [--enable-silent-rules],
+  [less verbose build output (undo: "make V=1")])
+AS_HELP_STRING(
+  [--disable-silent-rules],
+  [verbose build output (undo: "make V=0")])dnl
+])
+case $enable_silent_rules in @%:@ (((
+  yes) AM_DEFAULT_VERBOSITY=0;;
+   no) AM_DEFAULT_VERBOSITY=1;;
+    *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);;
+esac
+dnl
+dnl A few 'make' implementations (e.g., NonStop OS and NextStep)
+dnl do not support nested variable expansions.
+dnl See automake bug#9928 and bug#10237.
+am_make=${MAKE-make}
+AC_CACHE_CHECK([whether $am_make supports nested variables],
+   [am_cv_make_support_nested_variables],
+   [if AS_ECHO([['TRUE=$(BAR$(V))
+BAR0=false
+BAR1=true
+V=1
+am__doit:
+	@$(TRUE)
+.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then
+  am_cv_make_support_nested_variables=yes
+else
+  am_cv_make_support_nested_variables=no
+fi])
+if test $am_cv_make_support_nested_variables = yes; then
+  dnl Using '$V' instead of '$(V)' breaks IRIX make.
+  AM_V='$(V)'
+  AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)'
+else
+  AM_V=$AM_DEFAULT_VERBOSITY
+  AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY
+fi
+AC_SUBST([AM_V])dnl
+AM_SUBST_NOTMAKE([AM_V])dnl
+AC_SUBST([AM_DEFAULT_V])dnl
+AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl
+AC_SUBST([AM_DEFAULT_VERBOSITY])dnl
+AM_BACKSLASH='\'
+AC_SUBST([AM_BACKSLASH])dnl
+_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
+])
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_INSTALL_STRIP
+# ---------------------
+# One issue with vendor 'install' (even GNU) is that you can't
+# specify the program used to strip binaries.  This is especially
+# annoying in cross-compiling environments, where the build's strip
+# is unlikely to handle the host's binaries.
+# Fortunately install-sh will honor a STRIPPROG variable, so we
+# always use install-sh in "make install-strip", and initialize
+# STRIPPROG with the value of the STRIP variable (set by the user).
+AC_DEFUN([AM_PROG_INSTALL_STRIP],
+[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
+# Installed binaries are usually stripped using 'strip' when the user
+# run "make install-strip".  However 'strip' might not be the right
+# tool to use in cross-compilation environments, therefore Automake
+# will honor the 'STRIP' environment variable to overrule this program.
+dnl Don't test for $cross_compiling = yes, because it might be 'maybe'.
+if test "$cross_compiling" != no; then
+  AC_CHECK_TOOL([STRIP], [strip], :)
+fi
+INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
+AC_SUBST([INSTALL_STRIP_PROGRAM])])
+
+# Copyright (C) 2006-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_SUBST_NOTMAKE(VARIABLE)
+# ---------------------------
+# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
+# This macro is traced by Automake.
+AC_DEFUN([_AM_SUBST_NOTMAKE])
+
+# AM_SUBST_NOTMAKE(VARIABLE)
+# --------------------------
+# Public sister of _AM_SUBST_NOTMAKE.
+AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
+
+# Check how to create a tarball.                            -*- Autoconf -*-
+
+# Copyright (C) 2004-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_PROG_TAR(FORMAT)
+# --------------------
+# Check how to create a tarball in format FORMAT.
+# FORMAT should be one of 'v7', 'ustar', or 'pax'.
+#
+# Substitute a variable $(am__tar) that is a command
+# writing to stdout a FORMAT-tarball containing the directory
+# $tardir.
+#     tardir=directory && $(am__tar) > result.tar
+#
+# Substitute a variable $(am__untar) that extract such
+# a tarball read from stdin.
+#     $(am__untar) < result.tar
+#
+AC_DEFUN([_AM_PROG_TAR],
+[# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AC_SUBST([AMTAR], ['$${TAR-tar}'])
+
+# We'll loop over all known methods to create a tar archive until one works.
+_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
+
+m4_if([$1], [v7],
+  [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
+
+  [m4_case([$1],
+    [ustar],
+     [# The POSIX 1988 'ustar' format is defined with fixed-size fields.
+      # There is notably a 21 bits limit for the UID and the GID.  In fact,
+      # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343
+      # and bug#13588).
+      am_max_uid=2097151 # 2^21 - 1
+      am_max_gid=$am_max_uid
+      # The $UID and $GID variables are not portable, so we need to resort
+      # to the POSIX-mandated id(1) utility.  Errors in the 'id' calls
+      # below are definitely unexpected, so allow the users to see them
+      # (that is, avoid stderr redirection).
+      am_uid=`id -u || echo unknown`
+      am_gid=`id -g || echo unknown`
+      AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format])
+      if test $am_uid -le $am_max_uid; then
+         AC_MSG_RESULT([yes])
+      else
+         AC_MSG_RESULT([no])
+         _am_tools=none
+      fi
+      AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format])
+      if test $am_gid -le $am_max_gid; then
+         AC_MSG_RESULT([yes])
+      else
+        AC_MSG_RESULT([no])
+        _am_tools=none
+      fi],
+
+  [pax],
+    [],
+
+  [m4_fatal([Unknown tar format])])
+
+  AC_MSG_CHECKING([how to create a $1 tar archive])
+
+  # Go ahead even if we have the value already cached.  We do so because we
+  # need to set the values for the 'am__tar' and 'am__untar' variables.
+  _am_tools=${am_cv_prog_tar_$1-$_am_tools}
+
+  for _am_tool in $_am_tools; do
+    case $_am_tool in
+    gnutar)
+      for _am_tar in tar gnutar gtar; do
+        AM_RUN_LOG([$_am_tar --version]) && break
+      done
+      am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
+      am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
+      am__untar="$_am_tar -xf -"
+      ;;
+    plaintar)
+      # Must skip GNU tar: if it does not support --format= it doesn't create
+      # ustar tarball either.
+      (tar --version) >/dev/null 2>&1 && continue
+      am__tar='tar chf - "$$tardir"'
+      am__tar_='tar chf - "$tardir"'
+      am__untar='tar xf -'
+      ;;
+    pax)
+      am__tar='pax -L -x $1 -w "$$tardir"'
+      am__tar_='pax -L -x $1 -w "$tardir"'
+      am__untar='pax -r'
+      ;;
+    cpio)
+      am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
+      am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
+      am__untar='cpio -i -H $1 -d'
+      ;;
+    none)
+      am__tar=false
+      am__tar_=false
+      am__untar=false
+      ;;
+    esac
+
+    # If the value was cached, stop now.  We just wanted to have am__tar
+    # and am__untar set.
+    test -n "${am_cv_prog_tar_$1}" && break
+
+    # tar/untar a dummy directory, and stop if the command works.
+    rm -rf conftest.dir
+    mkdir conftest.dir
+    echo GrepMe > conftest.dir/file
+    AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
+    rm -rf conftest.dir
+    if test -s conftest.tar; then
+      AM_RUN_LOG([$am__untar <conftest.tar])
+      AM_RUN_LOG([cat conftest.dir/file])
+      grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
+    fi
+  done
+  rm -rf conftest.dir
+
+  AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
+  AC_MSG_RESULT([$am_cv_prog_tar_$1])])
+
+AC_SUBST([am__tar])
+AC_SUBST([am__untar])
+]) # _AM_PROG_TAR
+
+m4_include([acinclude.m4])
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/HPL.dat b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/HPL.dat
new file mode 100644
index 000000000..19a956783
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/HPL.dat
@@ -0,0 +1,32 @@
+HPLinpack benchmark input file
+Innovative Computing Laboratory, University of Tennessee
+HPL.out      output file name (if any)
+6            device out (6=stdout,7=stderr,file)
+2            # of problems sizes (N)
+24576 24576 12288 Ns
+1             # of NBs
+2048 1024 2048 384 640 768 896 960 1024 1152 1280 384 640 960 768 640 256  960 512 768 1152         NBs
+0            PMAP process mapping (0=Row-,1=Column-major)
+1            # of process grids (P x Q)
+1            Ps
+1            Qs
+16.0         threshold
+1            # of panel fact
+0 1 2        PFACTs (0=left, 1=Crout, 2=Right)
+1            # of recursive stopping criterium
+2 8          NBMINs (>= 1)
+1            # of panels in recursion
+2            NDIVs
+1            # of recursive panel fact.
+0 1 2        RFACTs (0=left, 1=Crout, 2=Right)
+1            # of broadcast
+0 2          BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+1            # of lookahead depth
+1 0          DEPTHs (>=0)
+1            SWAP (0=bin-exch,1=long,2=mix)
+192          swapping threshold
+1            L1 in (0=transposed,1=no-transposed) form
+1            U  in (0=transposed,1=no-transposed) form
+1            Equilibration (0=no,1=yes)
+8            memory alignment in double (> 0)
+
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl
new file mode 100755
index 000000000..f192f33f4
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/compile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/compile
new file mode 100755
index 000000000..99e50524b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/compile
@@ -0,0 +1,348 @@
+#! /bin/sh
+# Wrapper for compilers which do not understand '-c -o'.
+
+scriptversion=2018-03-07.03; # UTC
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+# Written by Tom Tromey <tromey@cygnus.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+
+nl='
+'
+
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent tools from complaining about whitespace usage.
+IFS=" ""	$nl"
+
+file_conv=
+
+# func_file_conv build_file lazy
+# Convert a $build file to $host form and store it in $file
+# Currently only supports Windows hosts. If the determined conversion
+# type is listed in (the comma separated) LAZY, no conversion will
+# take place.
+func_file_conv ()
+{
+  file=$1
+  case $file in
+    / | /[!/]*) # absolute file, and not a UNC file
+      if test -z "$file_conv"; then
+	# lazily determine how to convert abs files
+	case `uname -s` in
+	  MINGW*)
+	    file_conv=mingw
+	    ;;
+	  CYGWIN*)
+	    file_conv=cygwin
+	    ;;
+	  *)
+	    file_conv=wine
+	    ;;
+	esac
+      fi
+      case $file_conv/,$2, in
+	*,$file_conv,*)
+	  ;;
+	mingw/*)
+	  file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
+	  ;;
+	cygwin/*)
+	  file=`cygpath -m "$file" || echo "$file"`
+	  ;;
+	wine/*)
+	  file=`winepath -w "$file" || echo "$file"`
+	  ;;
+      esac
+      ;;
+  esac
+}
+
+# func_cl_dashL linkdir
+# Make cl look for libraries in LINKDIR
+func_cl_dashL ()
+{
+  func_file_conv "$1"
+  if test -z "$lib_path"; then
+    lib_path=$file
+  else
+    lib_path="$lib_path;$file"
+  fi
+  linker_opts="$linker_opts -LIBPATH:$file"
+}
+
+# func_cl_dashl library
+# Do a library search-path lookup for cl
+func_cl_dashl ()
+{
+  lib=$1
+  found=no
+  save_IFS=$IFS
+  IFS=';'
+  for dir in $lib_path $LIB
+  do
+    IFS=$save_IFS
+    if $shared && test -f "$dir/$lib.dll.lib"; then
+      found=yes
+      lib=$dir/$lib.dll.lib
+      break
+    fi
+    if test -f "$dir/$lib.lib"; then
+      found=yes
+      lib=$dir/$lib.lib
+      break
+    fi
+    if test -f "$dir/lib$lib.a"; then
+      found=yes
+      lib=$dir/lib$lib.a
+      break
+    fi
+  done
+  IFS=$save_IFS
+
+  if test "$found" != yes; then
+    lib=$lib.lib
+  fi
+}
+
+# func_cl_wrapper cl arg...
+# Adjust compile command to suit cl
+func_cl_wrapper ()
+{
+  # Assume a capable shell
+  lib_path=
+  shared=:
+  linker_opts=
+  for arg
+  do
+    if test -n "$eat"; then
+      eat=
+    else
+      case $1 in
+	-o)
+	  # configure might choose to run compile as 'compile cc -o foo foo.c'.
+	  eat=1
+	  case $2 in
+	    *.o | *.[oO][bB][jJ])
+	      func_file_conv "$2"
+	      set x "$@" -Fo"$file"
+	      shift
+	      ;;
+	    *)
+	      func_file_conv "$2"
+	      set x "$@" -Fe"$file"
+	      shift
+	      ;;
+	  esac
+	  ;;
+	-I)
+	  eat=1
+	  func_file_conv "$2" mingw
+	  set x "$@" -I"$file"
+	  shift
+	  ;;
+	-I*)
+	  func_file_conv "${1#-I}" mingw
+	  set x "$@" -I"$file"
+	  shift
+	  ;;
+	-l)
+	  eat=1
+	  func_cl_dashl "$2"
+	  set x "$@" "$lib"
+	  shift
+	  ;;
+	-l*)
+	  func_cl_dashl "${1#-l}"
+	  set x "$@" "$lib"
+	  shift
+	  ;;
+	-L)
+	  eat=1
+	  func_cl_dashL "$2"
+	  ;;
+	-L*)
+	  func_cl_dashL "${1#-L}"
+	  ;;
+	-static)
+	  shared=false
+	  ;;
+	-Wl,*)
+	  arg=${1#-Wl,}
+	  save_ifs="$IFS"; IFS=','
+	  for flag in $arg; do
+	    IFS="$save_ifs"
+	    linker_opts="$linker_opts $flag"
+	  done
+	  IFS="$save_ifs"
+	  ;;
+	-Xlinker)
+	  eat=1
+	  linker_opts="$linker_opts $2"
+	  ;;
+	-*)
+	  set x "$@" "$1"
+	  shift
+	  ;;
+	*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
+	  func_file_conv "$1"
+	  set x "$@" -Tp"$file"
+	  shift
+	  ;;
+	*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
+	  func_file_conv "$1" mingw
+	  set x "$@" "$file"
+	  shift
+	  ;;
+	*)
+	  set x "$@" "$1"
+	  shift
+	  ;;
+      esac
+    fi
+    shift
+  done
+  if test -n "$linker_opts"; then
+    linker_opts="-link$linker_opts"
+  fi
+  exec "$@" $linker_opts
+  exit 1
+}
+
+eat=
+
+case $1 in
+  '')
+     echo "$0: No command.  Try '$0 --help' for more information." 1>&2
+     exit 1;
+     ;;
+  -h | --h*)
+    cat <<\EOF
+Usage: compile [--help] [--version] PROGRAM [ARGS]
+
+Wrapper for compilers which do not understand '-c -o'.
+Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
+arguments, and rename the output as expected.
+
+If you are trying to build a whole package this is not the
+right script to run: please start by reading the file 'INSTALL'.
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit $?
+    ;;
+  -v | --v*)
+    echo "compile $scriptversion"
+    exit $?
+    ;;
+  cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \
+  icl | *[/\\]icl | icl.exe | *[/\\]icl.exe )
+    func_cl_wrapper "$@"      # Doesn't return...
+    ;;
+esac
+
+ofile=
+cfile=
+
+for arg
+do
+  if test -n "$eat"; then
+    eat=
+  else
+    case $1 in
+      -o)
+	# configure might choose to run compile as 'compile cc -o foo foo.c'.
+	# So we strip '-o arg' only if arg is an object.
+	eat=1
+	case $2 in
+	  *.o | *.obj)
+	    ofile=$2
+	    ;;
+	  *)
+	    set x "$@" -o "$2"
+	    shift
+	    ;;
+	esac
+	;;
+      *.c)
+	cfile=$1
+	set x "$@" "$1"
+	shift
+	;;
+      *)
+	set x "$@" "$1"
+	shift
+	;;
+    esac
+  fi
+  shift
+done
+
+if test -z "$ofile" || test -z "$cfile"; then
+  # If no '-o' option was seen then we might have been invoked from a
+  # pattern rule where we don't need one.  That is ok -- this is a
+  # normal compilation that the losing compiler can handle.  If no
+  # '.c' file was seen then we are probably linking.  That is also
+  # ok.
+  exec "$@"
+fi
+
+# Name of file we expect compiler to create.
+cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
+
+# Create the lock directory.
+# Note: use '[/\\:.-]' here to ensure that we don't use the same name
+# that we are using for the .o file.  Also, base the name on the expected
+# object file name, since that is what matters with a parallel build.
+lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
+while true; do
+  if mkdir "$lockdir" >/dev/null 2>&1; then
+    break
+  fi
+  sleep 1
+done
+# FIXME: race condition here if user kills between mkdir and trap.
+trap "rmdir '$lockdir'; exit 1" 1 2 15
+
+# Run the compile.
+"$@"
+ret=$?
+
+if test -f "$cofile"; then
+  test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
+elif test -f "${cofile}bj"; then
+  test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
+fi
+
+rmdir "$lockdir"
+exit $ret
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/compile_commands.json b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/compile_commands.json
new file mode 100644
index 000000000..c59b36a79
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/compile_commands.json
@@ -0,0 +1,724 @@
+[
+    {
+        "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_dlacpy.o HPL_dlatcpy.o HPL_fprintf.o HPL_warn.o HPL_abort.o HPL_dlaprnt.o HPL_dlange.o HPL_dlamch.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64"
+    },
+    {
+        "command": "cc -c -o HPL_dlacpy.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlacpy.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlacpy.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlatcpy.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlatcpy.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlatcpy.c"
+    },
+    {
+        "command": "cc -c -o HPL_fprintf.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_fprintf.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_fprintf.c"
+    },
+    {
+        "command": "cc -c -o HPL_warn.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_warn.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_warn.c"
+    },
+    {
+        "command": "cc -c -o HPL_abort.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_abort.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_abort.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaprnt.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaprnt.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlaprnt.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlange.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlange.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlange.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlamch.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlamch.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlamch.c"
+    },
+    {
+        "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_dcopy.o HPL_daxpy.o HPL_dscal.o HPL_idamax.o HPL_dgemv.o HPL_dtrsv.o HPL_dger.o HPL_dgemm.o HPL_dtrsm.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64"
+    },
+    {
+        "command": "cc -c -o HPL_dcopy.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dcopy.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dcopy.c"
+    },
+    {
+        "command": "cc -c -o HPL_daxpy.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_daxpy.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_daxpy.c"
+    },
+    {
+        "command": "cc -c -o HPL_dscal.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dscal.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dscal.c"
+    },
+    {
+        "command": "cc -c -o HPL_idamax.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_idamax.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_idamax.c"
+    },
+    {
+        "command": "cc -c -o HPL_dgemv.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dgemv.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemv.c"
+    },
+    {
+        "command": "cc -c -o HPL_dtrsv.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dtrsv.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsv.c"
+    },
+    {
+        "command": "cc -c -o HPL_dger.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dger.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dger.c"
+    },
+    {
+        "command": "cc -c -o HPL_dgemm.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dgemm.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemm.c"
+    },
+    {
+        "command": "cc -c -o HPL_dtrsm.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dtrsm.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsm.c"
+    },
+    {
+        "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_1ring.o HPL_1rinM.o HPL_2ring.o HPL_2rinM.o HPL_blong.o HPL_blonM.o HPL_packL.o HPL_copyL.o HPL_binit.o HPL_bcast.o HPL_bwait.o HPL_send.o HPL_recv.o HPL_sdrv.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64"
+    },
+    {
+        "command": "cc -c -o HPL_1ring.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_1ring.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1ring.c"
+    },
+    {
+        "command": "cc -c -o HPL_1rinM.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_1rinM.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1rinM.c"
+    },
+    {
+        "command": "cc -c -o HPL_2ring.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_2ring.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2ring.c"
+    },
+    {
+        "command": "cc -c -o HPL_2rinM.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_2rinM.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2rinM.c"
+    },
+    {
+        "command": "cc -c -o HPL_blong.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_blong.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blong.c"
+    },
+    {
+        "command": "cc -c -o HPL_blonM.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_blonM.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blonM.c"
+    },
+    {
+        "command": "cc -c -o HPL_packL.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_packL.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_packL.c"
+    },
+    {
+        "command": "cc -c -o HPL_copyL.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_copyL.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_copyL.c"
+    },
+    {
+        "command": "cc -c -o HPL_binit.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_binit.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_binit.c"
+    },
+    {
+        "command": "cc -c -o HPL_bcast.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_bcast.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bcast.c"
+    },
+    {
+        "command": "cc -c -o HPL_bwait.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_bwait.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bwait.c"
+    },
+    {
+        "command": "cc -c -o HPL_send.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_send.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_send.c"
+    },
+    {
+        "command": "cc -c -o HPL_recv.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_recv.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_recv.c"
+    },
+    {
+        "command": "cc -c -o HPL_sdrv.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_sdrv.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_sdrv.c"
+    },
+    {
+        "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_grid_init.o HPL_pnum.o HPL_grid_info.o HPL_grid_exit.o HPL_broadcast.o HPL_reduce.o HPL_all_reduce.o HPL_barrier.o HPL_min.o HPL_max.o HPL_sum.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64"
+    },
+    {
+        "command": "cc -c -o HPL_grid_init.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_grid_init.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_init.c"
+    },
+    {
+        "command": "cc -c -o HPL_pnum.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pnum.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_pnum.c"
+    },
+    {
+        "command": "cc -c -o HPL_grid_info.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_grid_info.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_info.c"
+    },
+    {
+        "command": "cc -c -o HPL_grid_exit.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_grid_exit.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_exit.c"
+    },
+    {
+        "command": "cc -c -o HPL_broadcast.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_broadcast.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_broadcast.c"
+    },
+    {
+        "command": "cc -c -o HPL_reduce.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_reduce.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_reduce.c"
+    },
+    {
+        "command": "cc -c -o HPL_all_reduce.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_all_reduce.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_all_reduce.c"
+    },
+    {
+        "command": "cc -c -o HPL_barrier.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_barrier.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_barrier.c"
+    },
+    {
+        "command": "cc -c -o HPL_min.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_min.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_min.c"
+    },
+    {
+        "command": "cc -c -o HPL_max.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_max.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_max.c"
+    },
+    {
+        "command": "cc -c -o HPL_sum.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_sum.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_sum.c"
+    },
+    {
+        "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_pdpanel_new.o HPL_pdpanel_init.o HPL_pdpanel_disp.o HPL_pdpanel_free.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64"
+    },
+    {
+        "command": "cc -c -o HPL_pdpanel_new.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanel_new.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_new.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdpanel_init.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanel_init.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_init.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdpanel_disp.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanel_disp.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_disp.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdpanel_free.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanel_free.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_free.c"
+    },
+    {
+        "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_indxg2l.o HPL_indxg2lp.o HPL_indxg2p.o HPL_indxl2g.o HPL_infog2l.o HPL_numroc.o HPL_numrocI.o HPL_dlaswp00N.o HPL_dlaswp10N.o HPL_dlaswp01N.o HPL_dlaswp01T.o HPL_dlaswp02N.o HPL_dlaswp03N.o HPL_dlaswp03T.o HPL_dlaswp04N.o HPL_dlaswp04T.o HPL_dlaswp05N.o HPL_dlaswp05T.o HPL_dlaswp06N.o HPL_dlaswp06T.o HPL_pwarn.o HPL_pabort.o HPL_pdlaprnt.o HPL_pdlamch.o HPL_pdlange.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64"
+    },
+    {
+        "command": "cc -c -o HPL_indxg2l.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_indxg2l.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2l.c"
+    },
+    {
+        "command": "cc -c -o HPL_indxg2lp.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_indxg2lp.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2lp.c"
+    },
+    {
+        "command": "cc -c -o HPL_indxg2p.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_indxg2p.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2p.c"
+    },
+    {
+        "command": "cc -c -o HPL_indxl2g.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_indxl2g.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxl2g.c"
+    },
+    {
+        "command": "cc -c -o HPL_infog2l.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_infog2l.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_infog2l.c"
+    },
+    {
+        "command": "cc -c -o HPL_numroc.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_numroc.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numroc.c"
+    },
+    {
+        "command": "cc -c -o HPL_numrocI.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_numrocI.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numrocI.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaswp00N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp00N.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp00N.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaswp10N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp10N.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp10N.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaswp01N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp01N.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01N.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaswp01T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp01T.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01T.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaswp02N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp02N.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp02N.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaswp03N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp03N.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03N.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaswp03T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp03T.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03T.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaswp04N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp04N.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04N.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaswp04T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp04T.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04T.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaswp05N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp05N.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05N.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaswp05T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp05T.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05T.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaswp06N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp06N.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06N.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlaswp06T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp06T.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06T.c"
+    },
+    {
+        "command": "cc -c -o HPL_pwarn.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pwarn.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pwarn.c"
+    },
+    {
+        "command": "cc -c -o HPL_pabort.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pabort.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pabort.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdlaprnt.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlaprnt.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlaprnt.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdlamch.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlamch.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlamch.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdlange.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlange.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlange.c"
+    },
+    {
+        "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_dlocmax.o HPL_dlocswpN.o HPL_dlocswpT.o HPL_pdmxswp.o HPL_pdpancrN.o HPL_pdpancrT.o HPL_pdpanllN.o HPL_pdpanllT.o HPL_pdpanrlN.o HPL_pdpanrlT.o HPL_pdrpanllN.o HPL_pdrpanllT.o HPL_pdrpancrN.o HPL_pdrpancrT.o HPL_pdrpanrlN.o HPL_pdrpanrlT.o HPL_pdfact.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64"
+    },
+    {
+        "command": "cc -c -o HPL_dlocmax.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlocmax.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocmax.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlocswpN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlocswpN.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpN.c"
+    },
+    {
+        "command": "cc -c -o HPL_dlocswpT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlocswpT.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpT.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdmxswp.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdmxswp.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdmxswp.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdpancrN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpancrN.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrN.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdpancrT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpancrT.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrT.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdpanllN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanllN.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllN.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdpanllT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanllT.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllT.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdpanrlN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanrlN.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlN.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdpanrlT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanrlT.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlT.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdrpanllN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdrpanllN.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllN.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdrpanllT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdrpanllT.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllT.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdrpancrN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdrpancrN.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrN.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdrpancrT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdrpancrT.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrT.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdrpanrlN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdrpanrlN.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlN.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdrpanrlT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdrpanrlT.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlT.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdfact.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdfact.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdfact.c"
+    },
+    {
+        "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_pipid.o HPL_plindx0.o HPL_pdlaswp00N.o HPL_pdlaswp00T.o HPL_perm.o HPL_logsort.o HPL_plindx10.o HPL_plindx1.o HPL_spreadN.o HPL_spreadT.o HPL_rollN.o HPL_rollT.o HPL_equil.o HPL_pdlaswp01N.o HPL_pdlaswp01T.o HPL_pdupdateNN.o HPL_pdupdateNT.o HPL_pdupdateTN.o HPL_pdupdateTT.o HPL_pdtrsv.o HPL_pdgesv0.o HPL_pdgesvK1.o HPL_pdgesvK2.o HPL_pdgesv.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64"
+    },
+    {
+        "command": "cc -c -o HPL_pipid.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pipid.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pipid.c"
+    },
+    {
+        "command": "cc -c -o HPL_plindx0.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_plindx0.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx0.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdlaswp00N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlaswp00N.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdlaswp00T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlaswp00T.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c"
+    },
+    {
+        "command": "cc -c -o HPL_perm.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_perm.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_perm.c"
+    },
+    {
+        "command": "cc -c -o HPL_logsort.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_logsort.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_logsort.c"
+    },
+    {
+        "command": "cc -c -o HPL_plindx10.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_plindx10.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx10.c"
+    },
+    {
+        "command": "cc -c -o HPL_plindx1.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_plindx1.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx1.c"
+    },
+    {
+        "command": "cc -c -o HPL_spreadN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_spreadN.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadN.c"
+    },
+    {
+        "command": "cc -c -o HPL_spreadT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_spreadT.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadT.c"
+    },
+    {
+        "command": "cc -c -o HPL_rollN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_rollN.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollN.c"
+    },
+    {
+        "command": "cc -c -o HPL_rollT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_rollT.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollT.c"
+    },
+    {
+        "command": "cc -c -o HPL_equil.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_equil.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_equil.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdlaswp01N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlaswp01N.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdlaswp01T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlaswp01T.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdupdateNN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdupdateNN.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNN.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdupdateNT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdupdateNT.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNT.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdupdateTN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdupdateTN.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTN.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdupdateTT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdupdateTT.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTT.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdtrsv.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdtrsv.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdtrsv.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdgesv0.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdgesv0.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv0.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdgesvK1.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdgesvK1.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK1.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdgesvK2.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdgesvK2.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK2.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdgesv.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdgesv.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv.c"
+    },
+    {
+        "command": "cc -c -O0 -fPIC -DMPI -o cuda_dgemm.o -I/usr/local/cuda/include -I/opt/intel/oneapi/mpi/2021.10.0/include cuda_dgemm.cpp",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp"
+    },
+    {
+        "command": "c++ -O3 -shared -o libdgemm.so.1.0.1 cuda_dgemm.o -I/opt/intel/oneapi/mpi/2021.10.0/include -lmpi",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda"
+    },
+    {
+        "command": "c++ -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccevCa71.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --build-id --eh-frame-hdr -melf_x86_64 --hash-style=gnu --as-needed -shared -o libdgemm.so.1.0.1 /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -soname libdgemm.so.1 cuda_dgemm.o --enable-new-dtags -rpath /opt/intel/oneapi/mpi/2021.10.0/lib/release -rpath /opt/intel/oneapi/mpi/2021.10.0/lib -lmpi --enable-new-dtags --push-state --as-needed --pop-state --push-state --as-needed --pop-state /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda"
+    },
+    {
+        "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_dmatgen.o HPL_ladd.o HPL_lmul.o HPL_xjumpm.o HPL_jumpit.o HPL_rand.o HPL_setran.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64"
+    },
+    {
+        "command": "cc -c -o HPL_dmatgen.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dmatgen.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_dmatgen.c"
+    },
+    {
+        "command": "cc -c -o HPL_ladd.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_ladd.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_ladd.c"
+    },
+    {
+        "command": "cc -c -o HPL_lmul.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_lmul.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_lmul.c"
+    },
+    {
+        "command": "cc -c -o HPL_xjumpm.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_xjumpm.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_xjumpm.c"
+    },
+    {
+        "command": "cc -c -o HPL_jumpit.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_jumpit.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_jumpit.c"
+    },
+    {
+        "command": "cc -c -o HPL_rand.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_rand.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_rand.c"
+    },
+    {
+        "command": "cc -c -o HPL_setran.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_setran.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_setran.c"
+    },
+    {
+        "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_timer.o HPL_timer_cputime.o HPL_timer_walltime.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64"
+    },
+    {
+        "command": "cc -c -o HPL_timer.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_timer.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer.c"
+    },
+    {
+        "command": "cc -c -o HPL_timer_cputime.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_timer_cputime.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_cputime.c"
+    },
+    {
+        "command": "cc -c -o HPL_timer_walltime.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_timer_walltime.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_walltime.c"
+    },
+    {
+        "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_pdmatgen.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64"
+    },
+    {
+        "command": "cc -c -o HPL_pdmatgen.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdmatgen.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c"
+    },
+    {
+        "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_ptimer.o HPL_ptimer_cputime.o HPL_ptimer_walltime.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64"
+    },
+    {
+        "command": "cc -c -o HPL_ptimer.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_ptimer.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer.c"
+    },
+    {
+        "command": "cc -c -o HPL_ptimer_cputime.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_ptimer_cputime.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c"
+    },
+    {
+        "command": "cc -c -o HPL_ptimer_walltime.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_ptimer_walltime.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c"
+    },
+    {
+        "command": "c++ -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/ -lmpi",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64"
+    },
+    {
+        "command": "cc -c -o HPL_pddriver.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pddriver.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pddriver.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdinfo.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdinfo.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdinfo.c"
+    },
+    {
+        "command": "cc -c -o HPL_pdtest.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdtest.c",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64",
+        "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdtest.c"
+    },
+    {
+        "command": "c++ -cc=gcc -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/ -lmpi",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64"
+    },
+    {
+        "command": "c++ -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/ -lmpi -I/opt/intel/oneapi/mpi/2021.10.0/include -lmpi",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64"
+    },
+    {
+        "command": "c++ -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccANGnqv.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lpthread -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --build-id --eh-frame-hdr -melf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o /usr/lib/gcc/x86_64-linux-gnu/11/crtoffloadbegin.o HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a -lmpi --enable-new-dtags -rpath /opt/intel/oneapi/mpi/2021.10.0/lib/release -rpath /opt/intel/oneapi/mpi/2021.10.0/lib -lmpi --enable-new-dtags --push-state --as-needed --pop-state --push-state --as-needed --pop-state /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o /usr/lib/gcc/x86_64-linux-gnu/11/crtoffloadend.o",
+        "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64"
+    }
+]
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/config.guess b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/config.guess
new file mode 100755
index 000000000..256083a70
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/config.guess
@@ -0,0 +1,1476 @@
+#! /bin/sh
+# Attempt to guess a canonical system name.
+#   Copyright 1992-2018 Free Software Foundation, Inc.
+
+timestamp='2018-03-08'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <https://www.gnu.org/licenses/>.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that
+# program.  This Exception is an additional permission under section 7
+# of the GNU General Public License, version 3 ("GPLv3").
+#
+# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
+#
+# You can get the latest version of this script from:
+# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+#
+# Please send patches to <config-patches@gnu.org>.
+
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION]
+
+Output the configuration name of the system \`$me' is run on.
+
+Options:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.guess ($timestamp)
+
+Originally written by Per Bothner.
+Copyright 1992-2018 Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help" >&2
+       exit 1 ;;
+    * )
+       break ;;
+  esac
+done
+
+if test $# != 0; then
+  echo "$me: too many arguments$help" >&2
+  exit 1
+fi
+
+trap 'exit 1' 1 2 15
+
+# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
+# compiler to aid in system detection is discouraged as it requires
+# temporary files to be created and, as you can see below, it is a
+# headache to deal with in a portable fashion.
+
+# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
+# use `HOST_CC' if defined, but it is deprecated.
+
+# Portable tmp directory creation inspired by the Autoconf team.
+
+set_cc_for_build='
+trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
+trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
+: ${TMPDIR=/tmp} ;
+ { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
+ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
+dummy=$tmp/dummy ;
+tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
+case $CC_FOR_BUILD,$HOST_CC,$CC in
+ ,,)    echo "int x;" > "$dummy.c" ;
+	for c in cc gcc c89 c99 ; do
+	  if ($c -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
+	     CC_FOR_BUILD="$c"; break ;
+	  fi ;
+	done ;
+	if test x"$CC_FOR_BUILD" = x ; then
+	  CC_FOR_BUILD=no_compiler_found ;
+	fi
+	;;
+ ,,*)   CC_FOR_BUILD=$CC ;;
+ ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
+esac ; set_cc_for_build= ;'
+
+# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+# (ghazi@noc.rutgers.edu 1994-08-24)
+if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+	PATH=$PATH:/.attbin ; export PATH
+fi
+
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+
+case "$UNAME_SYSTEM" in
+Linux|GNU|GNU/*)
+	# If the system lacks a compiler, then just pick glibc.
+	# We could probably try harder.
+	LIBC=gnu
+
+	eval "$set_cc_for_build"
+	cat <<-EOF > "$dummy.c"
+	#include <features.h>
+	#if defined(__UCLIBC__)
+	LIBC=uclibc
+	#elif defined(__dietlibc__)
+	LIBC=dietlibc
+	#else
+	LIBC=gnu
+	#endif
+	EOF
+	eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`"
+
+	# If ldd exists, use it to detect musl libc.
+	if command -v ldd >/dev/null && \
+		ldd --version 2>&1 | grep -q ^musl
+	then
+	    LIBC=musl
+	fi
+	;;
+esac
+
+# Note: order is significant - the case branches are not exclusive.
+
+case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
+    *:NetBSD:*:*)
+	# NetBSD (nbsd) targets should (where applicable) match one or
+	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
+	# switched to ELF, *-*-netbsd* would select the old
+	# object file format.  This provides both forward
+	# compatibility and a consistent mechanism for selecting the
+	# object file format.
+	#
+	# Note: NetBSD doesn't particularly care about the vendor
+	# portion of the name.  We always set it to "unknown".
+	sysctl="sysctl -n hw.machine_arch"
+	UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
+	    "/sbin/$sysctl" 2>/dev/null || \
+	    "/usr/sbin/$sysctl" 2>/dev/null || \
+	    echo unknown)`
+	case "$UNAME_MACHINE_ARCH" in
+	    armeb) machine=armeb-unknown ;;
+	    arm*) machine=arm-unknown ;;
+	    sh3el) machine=shl-unknown ;;
+	    sh3eb) machine=sh-unknown ;;
+	    sh5el) machine=sh5le-unknown ;;
+	    earmv*)
+		arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
+		endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'`
+		machine="${arch}${endian}"-unknown
+		;;
+	    *) machine="$UNAME_MACHINE_ARCH"-unknown ;;
+	esac
+	# The Operating System including object format, if it has switched
+	# to ELF recently (or will in the future) and ABI.
+	case "$UNAME_MACHINE_ARCH" in
+	    earm*)
+		os=netbsdelf
+		;;
+	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+		eval "$set_cc_for_build"
+		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
+			| grep -q __ELF__
+		then
+		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
+		    # Return netbsd for either.  FIX?
+		    os=netbsd
+		else
+		    os=netbsdelf
+		fi
+		;;
+	    *)
+		os=netbsd
+		;;
+	esac
+	# Determine ABI tags.
+	case "$UNAME_MACHINE_ARCH" in
+	    earm*)
+		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
+		abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"`
+		;;
+	esac
+	# The OS release
+	# Debian GNU/NetBSD machines have a different userland, and
+	# thus, need a distinct triplet. However, they do not need
+	# kernel version information, so it can be replaced with a
+	# suitable tag, in the style of linux-gnu.
+	case "$UNAME_VERSION" in
+	    Debian*)
+		release='-gnu'
+		;;
+	    *)
+		release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2`
+		;;
+	esac
+	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
+	# contains redundant information, the shorter form:
+	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
+	echo "$machine-${os}${release}${abi}"
+	exit ;;
+    *:Bitrig:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
+	echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE"
+	exit ;;
+    *:OpenBSD:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+	echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE"
+	exit ;;
+    *:LibertyBSD:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
+	echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE"
+	exit ;;
+    *:MidnightBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE"
+	exit ;;
+    *:ekkoBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE"
+	exit ;;
+    *:SolidBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE"
+	exit ;;
+    macppc:MirBSD:*:*)
+	echo powerpc-unknown-mirbsd"$UNAME_RELEASE"
+	exit ;;
+    *:MirBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE"
+	exit ;;
+    *:Sortix:*:*)
+	echo "$UNAME_MACHINE"-unknown-sortix
+	exit ;;
+    *:Redox:*:*)
+	echo "$UNAME_MACHINE"-unknown-redox
+	exit ;;
+    mips:OSF1:*.*)
+        echo mips-dec-osf1
+        exit ;;
+    alpha:OSF1:*:*)
+	case $UNAME_RELEASE in
+	*4.0)
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+		;;
+	*5.*)
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		;;
+	esac
+	# According to Compaq, /usr/sbin/psrinfo has been available on
+	# OSF/1 and Tru64 systems produced since 1995.  I hope that
+	# covers most systems running today.  This code pipes the CPU
+	# types through head -n 1, so we only detect the type of CPU 0.
+	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+	case "$ALPHA_CPU_TYPE" in
+	    "EV4 (21064)")
+		UNAME_MACHINE=alpha ;;
+	    "EV4.5 (21064)")
+		UNAME_MACHINE=alpha ;;
+	    "LCA4 (21066/21068)")
+		UNAME_MACHINE=alpha ;;
+	    "EV5 (21164)")
+		UNAME_MACHINE=alphaev5 ;;
+	    "EV5.6 (21164A)")
+		UNAME_MACHINE=alphaev56 ;;
+	    "EV5.6 (21164PC)")
+		UNAME_MACHINE=alphapca56 ;;
+	    "EV5.7 (21164PC)")
+		UNAME_MACHINE=alphapca57 ;;
+	    "EV6 (21264)")
+		UNAME_MACHINE=alphaev6 ;;
+	    "EV6.7 (21264A)")
+		UNAME_MACHINE=alphaev67 ;;
+	    "EV6.8CB (21264C)")
+		UNAME_MACHINE=alphaev68 ;;
+	    "EV6.8AL (21264B)")
+		UNAME_MACHINE=alphaev68 ;;
+	    "EV6.8CX (21264D)")
+		UNAME_MACHINE=alphaev68 ;;
+	    "EV6.9A (21264/EV69A)")
+		UNAME_MACHINE=alphaev69 ;;
+	    "EV7 (21364)")
+		UNAME_MACHINE=alphaev7 ;;
+	    "EV7.9 (21364A)")
+		UNAME_MACHINE=alphaev79 ;;
+	esac
+	# A Pn.n version is a patched version.
+	# A Vn.n version is a released version.
+	# A Tn.n version is a released field test version.
+	# A Xn.n version is an unreleased experimental baselevel.
+	# 1.2 uses "1.2" for uname -r.
+	echo "$UNAME_MACHINE"-dec-osf"`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`"
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	exitcode=$?
+	trap '' 0
+	exit $exitcode ;;
+    Amiga*:UNIX_System_V:4.0:*)
+	echo m68k-unknown-sysv4
+	exit ;;
+    *:[Aa]miga[Oo][Ss]:*:*)
+	echo "$UNAME_MACHINE"-unknown-amigaos
+	exit ;;
+    *:[Mm]orph[Oo][Ss]:*:*)
+	echo "$UNAME_MACHINE"-unknown-morphos
+	exit ;;
+    *:OS/390:*:*)
+	echo i370-ibm-openedition
+	exit ;;
+    *:z/VM:*:*)
+	echo s390-ibm-zvmoe
+	exit ;;
+    *:OS400:*:*)
+	echo powerpc-ibm-os400
+	exit ;;
+    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+	echo arm-acorn-riscix"$UNAME_RELEASE"
+	exit ;;
+    arm*:riscos:*:*|arm*:RISCOS:*:*)
+	echo arm-unknown-riscos
+	exit ;;
+    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
+	echo hppa1.1-hitachi-hiuxmpp
+	exit ;;
+    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+	if test "`(/bin/universe) 2>/dev/null`" = att ; then
+		echo pyramid-pyramid-sysv3
+	else
+		echo pyramid-pyramid-bsd
+	fi
+	exit ;;
+    NILE*:*:*:dcosx)
+	echo pyramid-pyramid-svr4
+	exit ;;
+    DRS?6000:unix:4.0:6*)
+	echo sparc-icl-nx6
+	exit ;;
+    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
+	case `/usr/bin/uname -p` in
+	    sparc) echo sparc-icl-nx7; exit ;;
+	esac ;;
+    s390x:SunOS:*:*)
+	echo "$UNAME_MACHINE"-ibm-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`"
+	exit ;;
+    sun4H:SunOS:5.*:*)
+	echo sparc-hal-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+	exit ;;
+    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+	echo sparc-sun-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`"
+	exit ;;
+    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
+	echo i386-pc-auroraux"$UNAME_RELEASE"
+	exit ;;
+    i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
+	eval "$set_cc_for_build"
+	SUN_ARCH=i386
+	# If there is a compiler, see if it is configured for 64-bit objects.
+	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
+	# This test works for both compilers.
+	if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
+		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		grep IS_64BIT_ARCH >/dev/null
+	    then
+		SUN_ARCH=x86_64
+	    fi
+	fi
+	echo "$SUN_ARCH"-pc-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+	exit ;;
+    sun4*:SunOS:6*:*)
+	# According to config.sub, this is the proper way to canonicalize
+	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
+	# it's likely to be more like Solaris than SunOS4.
+	echo sparc-sun-solaris3"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+	exit ;;
+    sun4*:SunOS:*:*)
+	case "`/usr/bin/arch -k`" in
+	    Series*|S4*)
+		UNAME_RELEASE=`uname -v`
+		;;
+	esac
+	# Japanese Language versions have a version number like `4.1.3-JL'.
+	echo sparc-sun-sunos"`echo "$UNAME_RELEASE"|sed -e 's/-/_/'`"
+	exit ;;
+    sun3*:SunOS:*:*)
+	echo m68k-sun-sunos"$UNAME_RELEASE"
+	exit ;;
+    sun*:*:4.2BSD:*)
+	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+	test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3
+	case "`/bin/arch`" in
+	    sun3)
+		echo m68k-sun-sunos"$UNAME_RELEASE"
+		;;
+	    sun4)
+		echo sparc-sun-sunos"$UNAME_RELEASE"
+		;;
+	esac
+	exit ;;
+    aushp:SunOS:*:*)
+	echo sparc-auspex-sunos"$UNAME_RELEASE"
+	exit ;;
+    # The situation for MiNT is a little confusing.  The machine name
+    # can be virtually everything (everything which is not
+    # "atarist" or "atariste" at least should have a processor
+    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
+    # to the lowercase version "mint" (or "freemint").  Finally
+    # the system name "TOS" denotes a system which is actually not
+    # MiNT.  But MiNT is downward compatible to TOS, so this should
+    # be no problem.
+    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+	echo m68k-atari-mint"$UNAME_RELEASE"
+	exit ;;
+    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+	echo m68k-atari-mint"$UNAME_RELEASE"
+	exit ;;
+    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+	echo m68k-atari-mint"$UNAME_RELEASE"
+	exit ;;
+    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+	echo m68k-milan-mint"$UNAME_RELEASE"
+	exit ;;
+    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+	echo m68k-hades-mint"$UNAME_RELEASE"
+	exit ;;
+    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+	echo m68k-unknown-mint"$UNAME_RELEASE"
+	exit ;;
+    m68k:machten:*:*)
+	echo m68k-apple-machten"$UNAME_RELEASE"
+	exit ;;
+    powerpc:machten:*:*)
+	echo powerpc-apple-machten"$UNAME_RELEASE"
+	exit ;;
+    RISC*:Mach:*:*)
+	echo mips-dec-mach_bsd4.3
+	exit ;;
+    RISC*:ULTRIX:*:*)
+	echo mips-dec-ultrix"$UNAME_RELEASE"
+	exit ;;
+    VAX*:ULTRIX*:*:*)
+	echo vax-dec-ultrix"$UNAME_RELEASE"
+	exit ;;
+    2020:CLIX:*:* | 2430:CLIX:*:*)
+	echo clipper-intergraph-clix"$UNAME_RELEASE"
+	exit ;;
+    mips:*:*:UMIPS | mips:*:*:RISCos)
+	eval "$set_cc_for_build"
+	sed 's/^	//' << EOF > "$dummy.c"
+#ifdef __cplusplus
+#include <stdio.h>  /* for printf() prototype */
+	int main (int argc, char *argv[]) {
+#else
+	int main (argc, argv) int argc; char *argv[]; {
+#endif
+	#if defined (host_mips) && defined (MIPSEB)
+	#if defined (SYSTYPE_SYSV)
+	  printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_SVR4)
+	  printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
+	  printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0);
+	#endif
+	#endif
+	  exit (-1);
+	}
+EOF
+	$CC_FOR_BUILD -o "$dummy" "$dummy.c" &&
+	  dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+	  SYSTEM_NAME=`"$dummy" "$dummyarg"` &&
+	    { echo "$SYSTEM_NAME"; exit; }
+	echo mips-mips-riscos"$UNAME_RELEASE"
+	exit ;;
+    Motorola:PowerMAX_OS:*:*)
+	echo powerpc-motorola-powermax
+	exit ;;
+    Motorola:*:4.3:PL8-*)
+	echo powerpc-harris-powermax
+	exit ;;
+    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
+	echo powerpc-harris-powermax
+	exit ;;
+    Night_Hawk:Power_UNIX:*:*)
+	echo powerpc-harris-powerunix
+	exit ;;
+    m88k:CX/UX:7*:*)
+	echo m88k-harris-cxux7
+	exit ;;
+    m88k:*:4*:R4*)
+	echo m88k-motorola-sysv4
+	exit ;;
+    m88k:*:3*:R3*)
+	echo m88k-motorola-sysv3
+	exit ;;
+    AViiON:dgux:*:*)
+	# DG/UX returns AViiON for all architectures
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	if [ "$UNAME_PROCESSOR" = mc88100 ] || [ "$UNAME_PROCESSOR" = mc88110 ]
+	then
+	    if [ "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx ] || \
+	       [ "$TARGET_BINARY_INTERFACE"x = x ]
+	    then
+		echo m88k-dg-dgux"$UNAME_RELEASE"
+	    else
+		echo m88k-dg-dguxbcs"$UNAME_RELEASE"
+	    fi
+	else
+	    echo i586-dg-dgux"$UNAME_RELEASE"
+	fi
+	exit ;;
+    M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
+	echo m88k-dolphin-sysv3
+	exit ;;
+    M88*:*:R3*:*)
+	# Delta 88k system running SVR3
+	echo m88k-motorola-sysv3
+	exit ;;
+    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+	echo m88k-tektronix-sysv3
+	exit ;;
+    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+	echo m68k-tektronix-bsd
+	exit ;;
+    *:IRIX*:*:*)
+	echo mips-sgi-irix"`echo "$UNAME_RELEASE"|sed -e 's/-/_/g'`"
+	exit ;;
+    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
+	exit ;;               # Note that: echo "'`uname -s`'" gives 'AIX '
+    i*86:AIX:*:*)
+	echo i386-ibm-aix
+	exit ;;
+    ia64:AIX:*:*)
+	if [ -x /usr/bin/oslevel ] ; then
+		IBM_REV=`/usr/bin/oslevel`
+	else
+		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+	fi
+	echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV"
+	exit ;;
+    *:AIX:2:3)
+	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+		eval "$set_cc_for_build"
+		sed 's/^		//' << EOF > "$dummy.c"
+		#include <sys/systemcfg.h>
+
+		main()
+			{
+			if (!__power_pc())
+				exit(1);
+			puts("powerpc-ibm-aix3.2.5");
+			exit(0);
+			}
+EOF
+		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"`
+		then
+			echo "$SYSTEM_NAME"
+		else
+			echo rs6000-ibm-aix3.2.5
+		fi
+	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+		echo rs6000-ibm-aix3.2.4
+	else
+		echo rs6000-ibm-aix3.2
+	fi
+	exit ;;
+    *:AIX:*:[4567])
+	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
+	if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then
+		IBM_ARCH=rs6000
+	else
+		IBM_ARCH=powerpc
+	fi
+	if [ -x /usr/bin/lslpp ] ; then
+		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc |
+			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
+	else
+		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+	fi
+	echo "$IBM_ARCH"-ibm-aix"$IBM_REV"
+	exit ;;
+    *:AIX:*:*)
+	echo rs6000-ibm-aix
+	exit ;;
+    ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
+	echo romp-ibm-bsd4.4
+	exit ;;
+    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
+	echo romp-ibm-bsd"$UNAME_RELEASE"   # 4.3 with uname added to
+	exit ;;                             # report: romp-ibm BSD 4.3
+    *:BOSX:*:*)
+	echo rs6000-bull-bosx
+	exit ;;
+    DPX/2?00:B.O.S.:*:*)
+	echo m68k-bull-sysv3
+	exit ;;
+    9000/[34]??:4.3bsd:1.*:*)
+	echo m68k-hp-bsd
+	exit ;;
+    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+	echo m68k-hp-bsd4.4
+	exit ;;
+    9000/[34678]??:HP-UX:*:*)
+	HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'`
+	case "$UNAME_MACHINE" in
+	    9000/31?)            HP_ARCH=m68000 ;;
+	    9000/[34]??)         HP_ARCH=m68k ;;
+	    9000/[678][0-9][0-9])
+		if [ -x /usr/bin/getconf ]; then
+		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case "$sc_cpu_version" in
+		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
+		      532)                      # CPU_PA_RISC2_0
+			case "$sc_kernel_bits" in
+			  32) HP_ARCH=hppa2.0n ;;
+			  64) HP_ARCH=hppa2.0w ;;
+			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
+			esac ;;
+		    esac
+		fi
+		if [ "$HP_ARCH" = "" ]; then
+		    eval "$set_cc_for_build"
+		    sed 's/^		//' << EOF > "$dummy.c"
+
+		#define _HPUX_SOURCE
+		#include <stdlib.h>
+		#include <unistd.h>
+
+		int main ()
+		{
+		#if defined(_SC_KERNEL_BITS)
+		    long bits = sysconf(_SC_KERNEL_BITS);
+		#endif
+		    long cpu  = sysconf (_SC_CPU_VERSION);
+
+		    switch (cpu)
+			{
+			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+			case CPU_PA_RISC2_0:
+		#if defined(_SC_KERNEL_BITS)
+			    switch (bits)
+				{
+				case 64: puts ("hppa2.0w"); break;
+				case 32: puts ("hppa2.0n"); break;
+				default: puts ("hppa2.0"); break;
+				} break;
+		#else  /* !defined(_SC_KERNEL_BITS) */
+			    puts ("hppa2.0"); break;
+		#endif
+			default: puts ("hppa1.0"); break;
+			}
+		    exit (0);
+		}
+EOF
+		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"`
+		    test -z "$HP_ARCH" && HP_ARCH=hppa
+		fi ;;
+	esac
+	if [ "$HP_ARCH" = hppa2.0w ]
+	then
+	    eval "$set_cc_for_build"
+
+	    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
+	    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
+	    # generating 64-bit code.  GNU and HP use different nomenclature:
+	    #
+	    # $ CC_FOR_BUILD=cc ./config.guess
+	    # => hppa2.0w-hp-hpux11.23
+	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
+	    # => hppa64-hp-hpux11.23
+
+	    if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) |
+		grep -q __LP64__
+	    then
+		HP_ARCH=hppa2.0w
+	    else
+		HP_ARCH=hppa64
+	    fi
+	fi
+	echo "$HP_ARCH"-hp-hpux"$HPUX_REV"
+	exit ;;
+    ia64:HP-UX:*:*)
+	HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'`
+	echo ia64-hp-hpux"$HPUX_REV"
+	exit ;;
+    3050*:HI-UX:*:*)
+	eval "$set_cc_for_build"
+	sed 's/^	//' << EOF > "$dummy.c"
+	#include <unistd.h>
+	int
+	main ()
+	{
+	  long cpu = sysconf (_SC_CPU_VERSION);
+	  /* The order matters, because CPU_IS_HP_MC68K erroneously returns
+	     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
+	     results, however.  */
+	  if (CPU_IS_PA_RISC (cpu))
+	    {
+	      switch (cpu)
+		{
+		  case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
+		  default: puts ("hppa-hitachi-hiuxwe2"); break;
+		}
+	    }
+	  else if (CPU_IS_HP_MC68K (cpu))
+	    puts ("m68k-hitachi-hiuxwe2");
+	  else puts ("unknown-hitachi-hiuxwe2");
+	  exit (0);
+	}
+EOF
+	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` &&
+		{ echo "$SYSTEM_NAME"; exit; }
+	echo unknown-hitachi-hiuxwe2
+	exit ;;
+    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
+	echo hppa1.1-hp-bsd
+	exit ;;
+    9000/8??:4.3bsd:*:*)
+	echo hppa1.0-hp-bsd
+	exit ;;
+    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
+	echo hppa1.0-hp-mpeix
+	exit ;;
+    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
+	echo hppa1.1-hp-osf
+	exit ;;
+    hp8??:OSF1:*:*)
+	echo hppa1.0-hp-osf
+	exit ;;
+    i*86:OSF1:*:*)
+	if [ -x /usr/sbin/sysversion ] ; then
+	    echo "$UNAME_MACHINE"-unknown-osf1mk
+	else
+	    echo "$UNAME_MACHINE"-unknown-osf1
+	fi
+	exit ;;
+    parisc*:Lites*:*:*)
+	echo hppa1.1-hp-lites
+	exit ;;
+    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+	echo c1-convex-bsd
+	exit ;;
+    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+	if getsysinfo -f scalar_acc
+	then echo c32-convex-bsd
+	else echo c2-convex-bsd
+	fi
+	exit ;;
+    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+	echo c34-convex-bsd
+	exit ;;
+    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+	echo c38-convex-bsd
+	exit ;;
+    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+	echo c4-convex-bsd
+	exit ;;
+    CRAY*Y-MP:*:*:*)
+	echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*[A-Z]90:*:*:*)
+	echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \
+	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
+	      -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*TS:*:*:*)
+	echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*T3E:*:*:*)
+	echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*SV1:*:*:*)
+	echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    *:UNICOS/mp:*:*)
+	echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
+	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'`
+	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
+    5000:UNIX_System_V:4.*:*)
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
+	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
+    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+	echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE"
+	exit ;;
+    sparc*:BSD/OS:*:*)
+	echo sparc-unknown-bsdi"$UNAME_RELEASE"
+	exit ;;
+    *:BSD/OS:*:*)
+	echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
+	exit ;;
+    *:FreeBSD:*:*)
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	case "$UNAME_PROCESSOR" in
+	    amd64)
+		UNAME_PROCESSOR=x86_64 ;;
+	    i386)
+		UNAME_PROCESSOR=i586 ;;
+	esac
+	echo "$UNAME_PROCESSOR"-unknown-freebsd"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`"
+	exit ;;
+    i*:CYGWIN*:*)
+	echo "$UNAME_MACHINE"-pc-cygwin
+	exit ;;
+    *:MINGW64*:*)
+	echo "$UNAME_MACHINE"-pc-mingw64
+	exit ;;
+    *:MINGW*:*)
+	echo "$UNAME_MACHINE"-pc-mingw32
+	exit ;;
+    *:MSYS*:*)
+	echo "$UNAME_MACHINE"-pc-msys
+	exit ;;
+    i*:PW*:*)
+	echo "$UNAME_MACHINE"-pc-pw32
+	exit ;;
+    *:Interix*:*)
+	case "$UNAME_MACHINE" in
+	    x86)
+		echo i586-pc-interix"$UNAME_RELEASE"
+		exit ;;
+	    authenticamd | genuineintel | EM64T)
+		echo x86_64-unknown-interix"$UNAME_RELEASE"
+		exit ;;
+	    IA64)
+		echo ia64-unknown-interix"$UNAME_RELEASE"
+		exit ;;
+	esac ;;
+    i*:UWIN*:*)
+	echo "$UNAME_MACHINE"-pc-uwin
+	exit ;;
+    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
+	echo x86_64-unknown-cygwin
+	exit ;;
+    prep*:SunOS:5.*:*)
+	echo powerpcle-unknown-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+	exit ;;
+    *:GNU:*:*)
+	# the GNU system
+	echo "`echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,'`-unknown-$LIBC`echo "$UNAME_RELEASE"|sed -e 's,/.*$,,'`"
+	exit ;;
+    *:GNU/*:*:*)
+	# other systems with GNU libc and userland
+	echo "$UNAME_MACHINE-unknown-`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`-$LIBC"
+	exit ;;
+    i*86:Minix:*:*)
+	echo "$UNAME_MACHINE"-pc-minix
+	exit ;;
+    aarch64:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    aarch64_be:Linux:*:*)
+	UNAME_MACHINE=aarch64_be
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    alpha:Linux:*:*)
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	  EV5)   UNAME_MACHINE=alphaev5 ;;
+	  EV56)  UNAME_MACHINE=alphaev56 ;;
+	  PCA56) UNAME_MACHINE=alphapca56 ;;
+	  PCA57) UNAME_MACHINE=alphapca56 ;;
+	  EV6)   UNAME_MACHINE=alphaev6 ;;
+	  EV67)  UNAME_MACHINE=alphaev67 ;;
+	  EV68*) UNAME_MACHINE=alphaev68 ;;
+	esac
+	objdump --private-headers /bin/sh | grep -q ld.so.1
+	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    arc:Linux:*:* | arceb:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    arm*:Linux:*:*)
+	eval "$set_cc_for_build"
+	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
+	    | grep -q __ARM_EABI__
+	then
+	    echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	else
+	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+		| grep -q __ARM_PCS_VFP
+	    then
+		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi
+	    else
+		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf
+	    fi
+	fi
+	exit ;;
+    avr32*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    cris:Linux:*:*)
+	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
+	exit ;;
+    crisv32:Linux:*:*)
+	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
+	exit ;;
+    e2k:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    frv:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    hexagon:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    i*86:Linux:*:*)
+	echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
+	exit ;;
+    ia64:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    k1om:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    m32r*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    m68*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    mips:Linux:*:* | mips64:Linux:*:*)
+	eval "$set_cc_for_build"
+	sed 's/^	//' << EOF > "$dummy.c"
+	#undef CPU
+	#undef ${UNAME_MACHINE}
+	#undef ${UNAME_MACHINE}el
+	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+	CPU=${UNAME_MACHINE}el
+	#else
+	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+	CPU=${UNAME_MACHINE}
+	#else
+	CPU=
+	#endif
+	#endif
+EOF
+	eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU'`"
+	test "x$CPU" != x && { echo "$CPU-unknown-linux-$LIBC"; exit; }
+	;;
+    mips64el:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    openrisc*:Linux:*:*)
+	echo or1k-unknown-linux-"$LIBC"
+	exit ;;
+    or32:Linux:*:* | or1k*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    padre:Linux:*:*)
+	echo sparc-unknown-linux-"$LIBC"
+	exit ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+	echo hppa64-unknown-linux-"$LIBC"
+	exit ;;
+    parisc:Linux:*:* | hppa:Linux:*:*)
+	# Look for CPU level
+	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+	  PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;;
+	  PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;;
+	  *)    echo hppa-unknown-linux-"$LIBC" ;;
+	esac
+	exit ;;
+    ppc64:Linux:*:*)
+	echo powerpc64-unknown-linux-"$LIBC"
+	exit ;;
+    ppc:Linux:*:*)
+	echo powerpc-unknown-linux-"$LIBC"
+	exit ;;
+    ppc64le:Linux:*:*)
+	echo powerpc64le-unknown-linux-"$LIBC"
+	exit ;;
+    ppcle:Linux:*:*)
+	echo powerpcle-unknown-linux-"$LIBC"
+	exit ;;
+    riscv32:Linux:*:* | riscv64:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    s390:Linux:*:* | s390x:Linux:*:*)
+	echo "$UNAME_MACHINE"-ibm-linux-"$LIBC"
+	exit ;;
+    sh64*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    sh*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    sparc:Linux:*:* | sparc64:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    tile*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    vax:Linux:*:*)
+	echo "$UNAME_MACHINE"-dec-linux-"$LIBC"
+	exit ;;
+    x86_64:Linux:*:*)
+	echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
+	exit ;;
+    xtensa*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    i*86:DYNIX/ptx:4*:*)
+	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
+	# earlier versions are messed up and put the nodename in both
+	# sysname and nodename.
+	echo i386-sequent-sysv4
+	exit ;;
+    i*86:UNIX_SV:4.2MP:2.*)
+	# Unixware is an offshoot of SVR4, but it has its own version
+	# number series starting with 2...
+	# I am not positive that other SVR4 systems won't match this,
+	# I just have to hope.  -- rms.
+	# Use sysv4.2uw... so that sysv4* matches it.
+	echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION"
+	exit ;;
+    i*86:OS/2:*:*)
+	# If we were able to find `uname', then EMX Unix compatibility
+	# is probably installed.
+	echo "$UNAME_MACHINE"-pc-os2-emx
+	exit ;;
+    i*86:XTS-300:*:STOP)
+	echo "$UNAME_MACHINE"-unknown-stop
+	exit ;;
+    i*86:atheos:*:*)
+	echo "$UNAME_MACHINE"-unknown-atheos
+	exit ;;
+    i*86:syllable:*:*)
+	echo "$UNAME_MACHINE"-pc-syllable
+	exit ;;
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
+	echo i386-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    i*86:*DOS:*:*)
+	echo "$UNAME_MACHINE"-pc-msdosdjgpp
+	exit ;;
+    i*86:*:4.*:*)
+	UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'`
+	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+		echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL"
+	else
+		echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL"
+	fi
+	exit ;;
+    i*86:*:5:[678]*)
+	# UnixWare 7.x, OpenUNIX and OpenServer 6.
+	case `/bin/uname -X | grep "^Machine"` in
+	    *486*)	     UNAME_MACHINE=i486 ;;
+	    *Pentium)	     UNAME_MACHINE=i586 ;;
+	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
+	esac
+	echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}{$UNAME_VERSION}"
+	exit ;;
+    i*86:*:3.2:*)
+	if test -f /usr/options/cb.name; then
+		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+		echo "$UNAME_MACHINE"-pc-isc"$UNAME_REL"
+	elif /bin/uname -X 2>/dev/null >/dev/null ; then
+		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
+		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
+			&& UNAME_MACHINE=i586
+		(/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL"
+	else
+		echo "$UNAME_MACHINE"-pc-sysv32
+	fi
+	exit ;;
+    pc:*:*:*)
+	# Left here for compatibility:
+	# uname -m prints for DJGPP always 'pc', but it prints nothing about
+	# the processor, so we play safe by assuming i586.
+	# Note: whatever this is, it MUST be the same as what config.sub
+	# prints for the "djgpp" host, or else GDB configure will decide that
+	# this is a cross-build.
+	echo i586-pc-msdosdjgpp
+	exit ;;
+    Intel:Mach:3*:*)
+	echo i386-pc-mach3
+	exit ;;
+    paragon:*:*:*)
+	echo i860-intel-osf1
+	exit ;;
+    i860:*:4.*:*) # i860-SVR4
+	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+	  echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4
+	else # Add other i860-SVR4 vendors below as they are discovered.
+	  echo i860-unknown-sysv"$UNAME_RELEASE"  # Unknown i860-SVR4
+	fi
+	exit ;;
+    mini*:CTIX:SYS*5:*)
+	# "miniframe"
+	echo m68010-convergent-sysv
+	exit ;;
+    mc68k:UNIX:SYSTEM5:3.51m)
+	echo m68k-convergent-sysv
+	exit ;;
+    M680?0:D-NIX:5.3:*)
+	echo m68k-diab-dnix
+	exit ;;
+    M68*:*:R3V[5678]*:*)
+	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
+    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
+	OS_REL=''
+	test -r /etc/.relid \
+	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	  && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
+    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4; exit; } ;;
+    NCR*:*:4.2:* | MPRAS*:*:4.2:*)
+	OS_REL='.3'
+	test -r /etc/.relid \
+	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	    && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
+	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
+    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
+	echo m68k-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    mc68030:UNIX_System_V:4.*:*)
+	echo m68k-atari-sysv4
+	exit ;;
+    TSUNAMI:LynxOS:2.*:*)
+	echo sparc-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    rs6000:LynxOS:2.*:*)
+	echo rs6000-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
+	echo powerpc-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    SM[BE]S:UNIX_SV:*:*)
+	echo mips-dde-sysv"$UNAME_RELEASE"
+	exit ;;
+    RM*:ReliantUNIX-*:*:*)
+	echo mips-sni-sysv4
+	exit ;;
+    RM*:SINIX-*:*:*)
+	echo mips-sni-sysv4
+	exit ;;
+    *:SINIX-*:*:*)
+	if uname -p 2>/dev/null >/dev/null ; then
+		UNAME_MACHINE=`(uname -p) 2>/dev/null`
+		echo "$UNAME_MACHINE"-sni-sysv4
+	else
+		echo ns32k-sni-sysv
+	fi
+	exit ;;
+    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+			# says <Richard.M.Bartel@ccMail.Census.GOV>
+	echo i586-unisys-sysv4
+	exit ;;
+    *:UNIX_System_V:4*:FTX*)
+	# From Gerald Hewes <hewes@openmarket.com>.
+	# How about differentiating between stratus architectures? -djm
+	echo hppa1.1-stratus-sysv4
+	exit ;;
+    *:*:*:FTX*)
+	# From seanf@swdc.stratus.com.
+	echo i860-stratus-sysv4
+	exit ;;
+    i*86:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo "$UNAME_MACHINE"-stratus-vos
+	exit ;;
+    *:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo hppa1.1-stratus-vos
+	exit ;;
+    mc68*:A/UX:*:*)
+	echo m68k-apple-aux"$UNAME_RELEASE"
+	exit ;;
+    news*:NEWS-OS:6*:*)
+	echo mips-sony-newsos6
+	exit ;;
+    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+	if [ -d /usr/nec ]; then
+		echo mips-nec-sysv"$UNAME_RELEASE"
+	else
+		echo mips-unknown-sysv"$UNAME_RELEASE"
+	fi
+	exit ;;
+    BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
+	echo powerpc-be-beos
+	exit ;;
+    BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
+	echo powerpc-apple-beos
+	exit ;;
+    BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
+	echo i586-pc-beos
+	exit ;;
+    BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
+	echo i586-pc-haiku
+	exit ;;
+    x86_64:Haiku:*:*)
+	echo x86_64-unknown-haiku
+	exit ;;
+    SX-4:SUPER-UX:*:*)
+	echo sx4-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-5:SUPER-UX:*:*)
+	echo sx5-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-6:SUPER-UX:*:*)
+	echo sx6-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-7:SUPER-UX:*:*)
+	echo sx7-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-8:SUPER-UX:*:*)
+	echo sx8-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-8R:SUPER-UX:*:*)
+	echo sx8r-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-ACE:SUPER-UX:*:*)
+	echo sxace-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    Power*:Rhapsody:*:*)
+	echo powerpc-apple-rhapsody"$UNAME_RELEASE"
+	exit ;;
+    *:Rhapsody:*:*)
+	echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
+	exit ;;
+    *:Darwin:*:*)
+	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
+	eval "$set_cc_for_build"
+	if test "$UNAME_PROCESSOR" = unknown ; then
+	    UNAME_PROCESSOR=powerpc
+	fi
+	if test "`echo "$UNAME_RELEASE" | sed -e 's/\..*//'`" -le 10 ; then
+	    if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+		if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		       (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		       grep IS_64BIT_ARCH >/dev/null
+		then
+		    case $UNAME_PROCESSOR in
+			i386) UNAME_PROCESSOR=x86_64 ;;
+			powerpc) UNAME_PROCESSOR=powerpc64 ;;
+		    esac
+		fi
+		# On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
+		if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
+		       (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		       grep IS_PPC >/dev/null
+		then
+		    UNAME_PROCESSOR=powerpc
+		fi
+	    fi
+	elif test "$UNAME_PROCESSOR" = i386 ; then
+	    # Avoid executing cc on OS X 10.9, as it ships with a stub
+	    # that puts up a graphical alert prompting to install
+	    # developer tools.  Any system running Mac OS X 10.7 or
+	    # later (Darwin 11 and later) is required to have a 64-bit
+	    # processor. This is not true of the ARM version of Darwin
+	    # that Apple uses in portable devices.
+	    UNAME_PROCESSOR=x86_64
+	fi
+	echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
+	exit ;;
+    *:procnto*:*:* | *:QNX:[0123456789]*:*)
+	UNAME_PROCESSOR=`uname -p`
+	if test "$UNAME_PROCESSOR" = x86; then
+		UNAME_PROCESSOR=i386
+		UNAME_MACHINE=pc
+	fi
+	echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE"
+	exit ;;
+    *:QNX:*:4*)
+	echo i386-pc-qnx
+	exit ;;
+    NEO-*:NONSTOP_KERNEL:*:*)
+	echo neo-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSE-*:NONSTOP_KERNEL:*:*)
+	echo nse-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSR-*:NONSTOP_KERNEL:*:*)
+	echo nsr-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSV-*:NONSTOP_KERNEL:*:*)
+	echo nsv-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSX-*:NONSTOP_KERNEL:*:*)
+	echo nsx-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    *:NonStop-UX:*:*)
+	echo mips-compaq-nonstopux
+	exit ;;
+    BS2000:POSIX*:*:*)
+	echo bs2000-siemens-sysv
+	exit ;;
+    DS/*:UNIX_System_V:*:*)
+	echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE"
+	exit ;;
+    *:Plan9:*:*)
+	# "uname -m" is not consistent, so use $cputype instead. 386
+	# is converted to i386 for consistency with other x86
+	# operating systems.
+	if test "$cputype" = 386; then
+	    UNAME_MACHINE=i386
+	else
+	    UNAME_MACHINE="$cputype"
+	fi
+	echo "$UNAME_MACHINE"-unknown-plan9
+	exit ;;
+    *:TOPS-10:*:*)
+	echo pdp10-unknown-tops10
+	exit ;;
+    *:TENEX:*:*)
+	echo pdp10-unknown-tenex
+	exit ;;
+    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
+	echo pdp10-dec-tops20
+	exit ;;
+    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
+	echo pdp10-xkl-tops20
+	exit ;;
+    *:TOPS-20:*:*)
+	echo pdp10-unknown-tops20
+	exit ;;
+    *:ITS:*:*)
+	echo pdp10-unknown-its
+	exit ;;
+    SEI:*:*:SEIUX)
+	echo mips-sei-seiux"$UNAME_RELEASE"
+	exit ;;
+    *:DragonFly:*:*)
+	echo "$UNAME_MACHINE"-unknown-dragonfly"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`"
+	exit ;;
+    *:*VMS:*:*)
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	case "$UNAME_MACHINE" in
+	    A*) echo alpha-dec-vms ; exit ;;
+	    I*) echo ia64-dec-vms ; exit ;;
+	    V*) echo vax-dec-vms ; exit ;;
+	esac ;;
+    *:XENIX:*:SysV)
+	echo i386-pc-xenix
+	exit ;;
+    i*86:skyos:*:*)
+	echo "$UNAME_MACHINE"-pc-skyos"`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`"
+	exit ;;
+    i*86:rdos:*:*)
+	echo "$UNAME_MACHINE"-pc-rdos
+	exit ;;
+    i*86:AROS:*:*)
+	echo "$UNAME_MACHINE"-pc-aros
+	exit ;;
+    x86_64:VMkernel:*:*)
+	echo "$UNAME_MACHINE"-unknown-esx
+	exit ;;
+    amd64:Isilon\ OneFS:*:*)
+	echo x86_64-unknown-onefs
+	exit ;;
+esac
+
+echo "$0: unable to guess system type" >&2
+
+case "$UNAME_MACHINE:$UNAME_SYSTEM" in
+    mips:Linux | mips64:Linux)
+	# If we got here on MIPS GNU/Linux, output extra information.
+	cat >&2 <<EOF
+
+NOTE: MIPS GNU/Linux systems require a C compiler to fully recognize
+the system type. Please install a C compiler and try again.
+EOF
+	;;
+esac
+
+cat >&2 <<EOF
+
+This script (version $timestamp), has failed to recognize the
+operating system you are using. If your script is old, overwrite *all*
+copies of config.guess and config.sub with the latest versions from:
+
+  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+and
+  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+
+If $0 has already been updated, send the following data and any
+information you think might be pertinent to config-patches@gnu.org to
+provide the necessary information to handle your system.
+
+config.guess timestamp = $timestamp
+
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
+
+hostinfo               = `(hostinfo) 2>/dev/null`
+/bin/universe          = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch              = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+
+UNAME_MACHINE = "$UNAME_MACHINE"
+UNAME_RELEASE = "$UNAME_RELEASE"
+UNAME_SYSTEM  = "$UNAME_SYSTEM"
+UNAME_VERSION = "$UNAME_VERSION"
+EOF
+
+exit 1
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/config.sub b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/config.sub
new file mode 100755
index 000000000..9ccf09a7a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/config.sub
@@ -0,0 +1,1801 @@
+#! /bin/sh
+# Configuration validation subroutine script.
+#   Copyright 1992-2018 Free Software Foundation, Inc.
+
+timestamp='2018-03-08'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <https://www.gnu.org/licenses/>.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that
+# program.  This Exception is an additional permission under section 7
+# of the GNU General Public License, version 3 ("GPLv3").
+
+
+# Please send patches to <config-patches@gnu.org>.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+
+# You can get the latest version of this script from:
+# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support.  The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
+
+Canonicalize a configuration name.
+
+Options:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.sub ($timestamp)
+
+Copyright 1992-2018 Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help"
+       exit 1 ;;
+
+    *local*)
+       # First pass through any local machine types.
+       echo "$1"
+       exit ;;
+
+    * )
+       break ;;
+  esac
+done
+
+case $# in
+ 0) echo "$me: missing argument$help" >&2
+    exit 1;;
+ 1) ;;
+ *) echo "$me: too many arguments$help" >&2
+    exit 1;;
+esac
+
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
+  linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+  knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \
+  kopensolaris*-gnu* | cloudabi*-eabi* | \
+  storm-chaos* | os2-emx* | rtmk-nova*)
+    os=-$maybe_os
+    basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+    ;;
+  android-linux)
+    os=-linux-android
+    basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+    ;;
+  *)
+    basic_machine=`echo "$1" | sed 's/-[^-]*$//'`
+    if [ "$basic_machine" != "$1" ]
+    then os=`echo "$1" | sed 's/.*-/-/'`
+    else os=; fi
+    ;;
+esac
+
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work.  We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+	-sun*os*)
+		# Prevent following clause from handling this invalid input.
+		;;
+	-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+	-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+	-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+	-apple | -axis | -knuth | -cray | -microblaze*)
+		os=
+		basic_machine=$1
+		;;
+	-bluegene*)
+		os=-cnk
+		;;
+	-sim | -cisco | -oki | -wec | -winbond)
+		os=
+		basic_machine=$1
+		;;
+	-scout)
+		;;
+	-wrs)
+		os=-vxworks
+		basic_machine=$1
+		;;
+	-chorusos*)
+		os=-chorusos
+		basic_machine=$1
+		;;
+	-chorusrdb)
+		os=-chorusrdb
+		basic_machine=$1
+		;;
+	-hiux*)
+		os=-hiuxwe2
+		;;
+	-sco6)
+		os=-sco5v6
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco5)
+		os=-sco3.2v5
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco4)
+		os=-sco3.2v4
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2.[4-9]*)
+		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2v[4-9]*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco5v6*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco*)
+		os=-sco3.2v2
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-udk*)
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-isc)
+		os=-isc2.2
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-clix*)
+		basic_machine=clipper-intergraph
+		;;
+	-isc*)
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-lynx*178)
+		os=-lynxos178
+		;;
+	-lynx*5)
+		os=-lynxos5
+		;;
+	-lynx*)
+		os=-lynxos
+		;;
+	-ptx*)
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-sequent/'`
+		;;
+	-psos*)
+		os=-psos
+		;;
+	-mint | -mint[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+esac
+
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+	# Recognize the basic CPU types without company name.
+	# Some are omitted here because they have special meanings below.
+	1750a | 580 \
+	| a29k \
+	| aarch64 | aarch64_be \
+	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
+	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+	| am33_2.0 \
+	| arc | arceb \
+	| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
+	| avr | avr32 \
+	| ba \
+	| be32 | be64 \
+	| bfin \
+	| c4x | c8051 | clipper \
+	| d10v | d30v | dlx | dsp16xx \
+	| e2k | epiphany \
+	| fido | fr30 | frv | ft32 \
+	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| hexagon \
+	| i370 | i860 | i960 | ia16 | ia64 \
+	| ip2k | iq2000 \
+	| k1om \
+	| le32 | le64 \
+	| lm32 \
+	| m32c | m32r | m32rle | m68000 | m68k | m88k \
+	| maxq | mb | microblaze | microblazeel | mcore | mep | metag \
+	| mips | mipsbe | mipseb | mipsel | mipsle \
+	| mips16 \
+	| mips64 | mips64el \
+	| mips64octeon | mips64octeonel \
+	| mips64orion | mips64orionel \
+	| mips64r5900 | mips64r5900el \
+	| mips64vr | mips64vrel \
+	| mips64vr4100 | mips64vr4100el \
+	| mips64vr4300 | mips64vr4300el \
+	| mips64vr5000 | mips64vr5000el \
+	| mips64vr5900 | mips64vr5900el \
+	| mipsisa32 | mipsisa32el \
+	| mipsisa32r2 | mipsisa32r2el \
+	| mipsisa32r6 | mipsisa32r6el \
+	| mipsisa64 | mipsisa64el \
+	| mipsisa64r2 | mipsisa64r2el \
+	| mipsisa64r6 | mipsisa64r6el \
+	| mipsisa64sb1 | mipsisa64sb1el \
+	| mipsisa64sr71k | mipsisa64sr71kel \
+	| mipsr5900 | mipsr5900el \
+	| mipstx39 | mipstx39el \
+	| mn10200 | mn10300 \
+	| moxie \
+	| mt \
+	| msp430 \
+	| nds32 | nds32le | nds32be \
+	| nios | nios2 | nios2eb | nios2el \
+	| ns16k | ns32k \
+	| open8 | or1k | or1knd | or32 \
+	| pdp10 | pj | pjl \
+	| powerpc | powerpc64 | powerpc64le | powerpcle \
+	| pru \
+	| pyramid \
+	| riscv32 | riscv64 \
+	| rl78 | rx \
+	| score \
+	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
+	| sh64 | sh64le \
+	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
+	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
+	| spu \
+	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
+	| ubicom32 \
+	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
+	| visium \
+	| wasm32 \
+	| x86 | xc16x | xstormy16 | xtensa \
+	| z8k | z80)
+		basic_machine=$basic_machine-unknown
+		;;
+	c54x)
+		basic_machine=tic54x-unknown
+		;;
+	c55x)
+		basic_machine=tic55x-unknown
+		;;
+	c6x)
+		basic_machine=tic6x-unknown
+		;;
+	leon|leon[3-9])
+		basic_machine=sparc-$basic_machine
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65)
+		;;
+	ms1)
+		basic_machine=mt-unknown
+		;;
+
+	strongarm | thumb | xscale)
+		basic_machine=arm-unknown
+		;;
+	xgate)
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	xscaleeb)
+		basic_machine=armeb-unknown
+		;;
+
+	xscaleel)
+		basic_machine=armel-unknown
+		;;
+
+	# We use `pc' rather than `unknown'
+	# because (1) that's what they normally are, and
+	# (2) the word "unknown" tends to confuse beginning users.
+	i*86 | x86_64)
+	  basic_machine=$basic_machine-pc
+	  ;;
+	# Object if more than one company name word.
+	*-*-*)
+		echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2
+		exit 1
+		;;
+	# Recognize the basic CPU types with company name.
+	580-* \
+	| a29k-* \
+	| aarch64-* | aarch64_be-* \
+	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
+	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
+	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
+	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+	| avr-* | avr32-* \
+	| ba-* \
+	| be32-* | be64-* \
+	| bfin-* | bs2000-* \
+	| c[123]* | c30-* | [cjt]90-* | c4x-* \
+	| c8051-* | clipper-* | craynv-* | cydra-* \
+	| d10v-* | d30v-* | dlx-* \
+	| e2k-* | elxsi-* \
+	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
+	| h8300-* | h8500-* \
+	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| hexagon-* \
+	| i*86-* | i860-* | i960-* | ia16-* | ia64-* \
+	| ip2k-* | iq2000-* \
+	| k1om-* \
+	| le32-* | le64-* \
+	| lm32-* \
+	| m32c-* | m32r-* | m32rle-* \
+	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
+	| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
+	| microblaze-* | microblazeel-* \
+	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
+	| mips16-* \
+	| mips64-* | mips64el-* \
+	| mips64octeon-* | mips64octeonel-* \
+	| mips64orion-* | mips64orionel-* \
+	| mips64r5900-* | mips64r5900el-* \
+	| mips64vr-* | mips64vrel-* \
+	| mips64vr4100-* | mips64vr4100el-* \
+	| mips64vr4300-* | mips64vr4300el-* \
+	| mips64vr5000-* | mips64vr5000el-* \
+	| mips64vr5900-* | mips64vr5900el-* \
+	| mipsisa32-* | mipsisa32el-* \
+	| mipsisa32r2-* | mipsisa32r2el-* \
+	| mipsisa32r6-* | mipsisa32r6el-* \
+	| mipsisa64-* | mipsisa64el-* \
+	| mipsisa64r2-* | mipsisa64r2el-* \
+	| mipsisa64r6-* | mipsisa64r6el-* \
+	| mipsisa64sb1-* | mipsisa64sb1el-* \
+	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
+	| mipsr5900-* | mipsr5900el-* \
+	| mipstx39-* | mipstx39el-* \
+	| mmix-* \
+	| mt-* \
+	| msp430-* \
+	| nds32-* | nds32le-* | nds32be-* \
+	| nios-* | nios2-* | nios2eb-* | nios2el-* \
+	| none-* | np1-* | ns16k-* | ns32k-* \
+	| open8-* \
+	| or1k*-* \
+	| orion-* \
+	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
+	| pru-* \
+	| pyramid-* \
+	| riscv32-* | riscv64-* \
+	| rl78-* | romp-* | rs6000-* | rx-* \
+	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
+	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
+	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
+	| sparclite-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \
+	| tahoe-* \
+	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+	| tile*-* \
+	| tron-* \
+	| ubicom32-* \
+	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
+	| vax-* \
+	| visium-* \
+	| wasm32-* \
+	| we32k-* \
+	| x86-* | x86_64-* | xc16x-* | xps100-* \
+	| xstormy16-* | xtensa*-* \
+	| ymp-* \
+	| z8k-* | z80-*)
+		;;
+	# Recognize the basic CPU types without company name, with glob match.
+	xtensa*)
+		basic_machine=$basic_machine-unknown
+		;;
+	# Recognize the various machine names and aliases which stand
+	# for a CPU type and a company and sometimes even an OS.
+	386bsd)
+		basic_machine=i386-pc
+		os=-bsd
+		;;
+	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+		basic_machine=m68000-att
+		;;
+	3b*)
+		basic_machine=we32k-att
+		;;
+	a29khif)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	abacus)
+		basic_machine=abacus-unknown
+		;;
+	adobe68k)
+		basic_machine=m68010-adobe
+		os=-scout
+		;;
+	alliant | fx80)
+		basic_machine=fx80-alliant
+		;;
+	altos | altos3068)
+		basic_machine=m68k-altos
+		;;
+	am29k)
+		basic_machine=a29k-none
+		os=-bsd
+		;;
+	amd64)
+		basic_machine=x86_64-pc
+		;;
+	amd64-*)
+		basic_machine=x86_64-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	amdahl)
+		basic_machine=580-amdahl
+		os=-sysv
+		;;
+	amiga | amiga-*)
+		basic_machine=m68k-unknown
+		;;
+	amigaos | amigados)
+		basic_machine=m68k-unknown
+		os=-amigaos
+		;;
+	amigaunix | amix)
+		basic_machine=m68k-unknown
+		os=-sysv4
+		;;
+	apollo68)
+		basic_machine=m68k-apollo
+		os=-sysv
+		;;
+	apollo68bsd)
+		basic_machine=m68k-apollo
+		os=-bsd
+		;;
+	aros)
+		basic_machine=i386-pc
+		os=-aros
+		;;
+	asmjs)
+		basic_machine=asmjs-unknown
+		;;
+	aux)
+		basic_machine=m68k-apple
+		os=-aux
+		;;
+	balance)
+		basic_machine=ns32k-sequent
+		os=-dynix
+		;;
+	blackfin)
+		basic_machine=bfin-unknown
+		os=-linux
+		;;
+	blackfin-*)
+		basic_machine=bfin-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	bluegene*)
+		basic_machine=powerpc-ibm
+		os=-cnk
+		;;
+	c54x-*)
+		basic_machine=tic54x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	c55x-*)
+		basic_machine=tic55x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	c6x-*)
+		basic_machine=tic6x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	c90)
+		basic_machine=c90-cray
+		os=-unicos
+		;;
+	cegcc)
+		basic_machine=arm-unknown
+		os=-cegcc
+		;;
+	convex-c1)
+		basic_machine=c1-convex
+		os=-bsd
+		;;
+	convex-c2)
+		basic_machine=c2-convex
+		os=-bsd
+		;;
+	convex-c32)
+		basic_machine=c32-convex
+		os=-bsd
+		;;
+	convex-c34)
+		basic_machine=c34-convex
+		os=-bsd
+		;;
+	convex-c38)
+		basic_machine=c38-convex
+		os=-bsd
+		;;
+	cray | j90)
+		basic_machine=j90-cray
+		os=-unicos
+		;;
+	craynv)
+		basic_machine=craynv-cray
+		os=-unicosmp
+		;;
+	cr16 | cr16-*)
+		basic_machine=cr16-unknown
+		os=-elf
+		;;
+	crds | unos)
+		basic_machine=m68k-crds
+		;;
+	crisv32 | crisv32-* | etraxfs*)
+		basic_machine=crisv32-axis
+		;;
+	cris | cris-* | etrax*)
+		basic_machine=cris-axis
+		;;
+	crx)
+		basic_machine=crx-unknown
+		os=-elf
+		;;
+	da30 | da30-*)
+		basic_machine=m68k-da30
+		;;
+	decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+		basic_machine=mips-dec
+		;;
+	decsystem10* | dec10*)
+		basic_machine=pdp10-dec
+		os=-tops10
+		;;
+	decsystem20* | dec20*)
+		basic_machine=pdp10-dec
+		os=-tops20
+		;;
+	delta | 3300 | motorola-3300 | motorola-delta \
+	      | 3300-motorola | delta-motorola)
+		basic_machine=m68k-motorola
+		;;
+	delta88)
+		basic_machine=m88k-motorola
+		os=-sysv3
+		;;
+	dicos)
+		basic_machine=i686-pc
+		os=-dicos
+		;;
+	djgpp)
+		basic_machine=i586-pc
+		os=-msdosdjgpp
+		;;
+	dpx20 | dpx20-*)
+		basic_machine=rs6000-bull
+		os=-bosx
+		;;
+	dpx2*)
+		basic_machine=m68k-bull
+		os=-sysv3
+		;;
+	e500v[12])
+		basic_machine=powerpc-unknown
+		os=$os"spe"
+		;;
+	e500v[12]-*)
+		basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		os=$os"spe"
+		;;
+	ebmon29k)
+		basic_machine=a29k-amd
+		os=-ebmon
+		;;
+	elxsi)
+		basic_machine=elxsi-elxsi
+		os=-bsd
+		;;
+	encore | umax | mmax)
+		basic_machine=ns32k-encore
+		;;
+	es1800 | OSE68k | ose68k | ose | OSE)
+		basic_machine=m68k-ericsson
+		os=-ose
+		;;
+	fx2800)
+		basic_machine=i860-alliant
+		;;
+	genix)
+		basic_machine=ns32k-ns
+		;;
+	gmicro)
+		basic_machine=tron-gmicro
+		os=-sysv
+		;;
+	go32)
+		basic_machine=i386-pc
+		os=-go32
+		;;
+	h3050r* | hiux*)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	h8300hms)
+		basic_machine=h8300-hitachi
+		os=-hms
+		;;
+	h8300xray)
+		basic_machine=h8300-hitachi
+		os=-xray
+		;;
+	h8500hms)
+		basic_machine=h8500-hitachi
+		os=-hms
+		;;
+	harris)
+		basic_machine=m88k-harris
+		os=-sysv3
+		;;
+	hp300-*)
+		basic_machine=m68k-hp
+		;;
+	hp300bsd)
+		basic_machine=m68k-hp
+		os=-bsd
+		;;
+	hp300hpux)
+		basic_machine=m68k-hp
+		os=-hpux
+		;;
+	hp3k9[0-9][0-9] | hp9[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k2[0-9][0-9] | hp9k31[0-9])
+		basic_machine=m68000-hp
+		;;
+	hp9k3[2-9][0-9])
+		basic_machine=m68k-hp
+		;;
+	hp9k6[0-9][0-9] | hp6[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k7[0-79][0-9] | hp7[0-79][0-9])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k78[0-9] | hp78[0-9])
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][13679] | hp8[0-9][13679])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][0-9] | hp8[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hppaosf)
+		basic_machine=hppa1.1-hp
+		os=-osf
+		;;
+	hppro)
+		basic_machine=hppa1.1-hp
+		os=-proelf
+		;;
+	i370-ibm* | ibm*)
+		basic_machine=i370-ibm
+		;;
+	i*86v32)
+		basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+		os=-sysv32
+		;;
+	i*86v4*)
+		basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+		os=-sysv4
+		;;
+	i*86v)
+		basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+		os=-sysv
+		;;
+	i*86sol2)
+		basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+		os=-solaris2
+		;;
+	i386mach)
+		basic_machine=i386-mach
+		os=-mach
+		;;
+	vsta)
+		basic_machine=i386-unknown
+		os=-vsta
+		;;
+	iris | iris4d)
+		basic_machine=mips-sgi
+		case $os in
+		    -irix*)
+			;;
+		    *)
+			os=-irix4
+			;;
+		esac
+		;;
+	isi68 | isi)
+		basic_machine=m68k-isi
+		os=-sysv
+		;;
+	leon-*|leon[3-9]-*)
+		basic_machine=sparc-`echo "$basic_machine" | sed 's/-.*//'`
+		;;
+	m68knommu)
+		basic_machine=m68k-unknown
+		os=-linux
+		;;
+	m68knommu-*)
+		basic_machine=m68k-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	magnum | m3230)
+		basic_machine=mips-mips
+		os=-sysv
+		;;
+	merlin)
+		basic_machine=ns32k-utek
+		os=-sysv
+		;;
+	microblaze*)
+		basic_machine=microblaze-xilinx
+		;;
+	mingw64)
+		basic_machine=x86_64-pc
+		os=-mingw64
+		;;
+	mingw32)
+		basic_machine=i686-pc
+		os=-mingw32
+		;;
+	mingw32ce)
+		basic_machine=arm-unknown
+		os=-mingw32ce
+		;;
+	miniframe)
+		basic_machine=m68000-convergent
+		;;
+	*mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+	mips3*-*)
+		basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`
+		;;
+	mips3*)
+		basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`-unknown
+		;;
+	monitor)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	morphos)
+		basic_machine=powerpc-unknown
+		os=-morphos
+		;;
+	moxiebox)
+		basic_machine=moxie-unknown
+		os=-moxiebox
+		;;
+	msdos)
+		basic_machine=i386-pc
+		os=-msdos
+		;;
+	ms1-*)
+		basic_machine=`echo "$basic_machine" | sed -e 's/ms1-/mt-/'`
+		;;
+	msys)
+		basic_machine=i686-pc
+		os=-msys
+		;;
+	mvs)
+		basic_machine=i370-ibm
+		os=-mvs
+		;;
+	nacl)
+		basic_machine=le32-unknown
+		os=-nacl
+		;;
+	ncr3000)
+		basic_machine=i486-ncr
+		os=-sysv4
+		;;
+	netbsd386)
+		basic_machine=i386-unknown
+		os=-netbsd
+		;;
+	netwinder)
+		basic_machine=armv4l-rebel
+		os=-linux
+		;;
+	news | news700 | news800 | news900)
+		basic_machine=m68k-sony
+		os=-newsos
+		;;
+	news1000)
+		basic_machine=m68030-sony
+		os=-newsos
+		;;
+	news-3600 | risc-news)
+		basic_machine=mips-sony
+		os=-newsos
+		;;
+	necv70)
+		basic_machine=v70-nec
+		os=-sysv
+		;;
+	next | m*-next)
+		basic_machine=m68k-next
+		case $os in
+		    -nextstep* )
+			;;
+		    -ns2*)
+		      os=-nextstep2
+			;;
+		    *)
+		      os=-nextstep3
+			;;
+		esac
+		;;
+	nh3000)
+		basic_machine=m68k-harris
+		os=-cxux
+		;;
+	nh[45]000)
+		basic_machine=m88k-harris
+		os=-cxux
+		;;
+	nindy960)
+		basic_machine=i960-intel
+		os=-nindy
+		;;
+	mon960)
+		basic_machine=i960-intel
+		os=-mon960
+		;;
+	nonstopux)
+		basic_machine=mips-compaq
+		os=-nonstopux
+		;;
+	np1)
+		basic_machine=np1-gould
+		;;
+	neo-tandem)
+		basic_machine=neo-tandem
+		;;
+	nse-tandem)
+		basic_machine=nse-tandem
+		;;
+	nsr-tandem)
+		basic_machine=nsr-tandem
+		;;
+	nsv-tandem)
+		basic_machine=nsv-tandem
+		;;
+	nsx-tandem)
+		basic_machine=nsx-tandem
+		;;
+	op50n-* | op60c-*)
+		basic_machine=hppa1.1-oki
+		os=-proelf
+		;;
+	openrisc | openrisc-*)
+		basic_machine=or32-unknown
+		;;
+	os400)
+		basic_machine=powerpc-ibm
+		os=-os400
+		;;
+	OSE68000 | ose68000)
+		basic_machine=m68000-ericsson
+		os=-ose
+		;;
+	os68k)
+		basic_machine=m68k-none
+		os=-os68k
+		;;
+	pa-hitachi)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	paragon)
+		basic_machine=i860-intel
+		os=-osf
+		;;
+	parisc)
+		basic_machine=hppa-unknown
+		os=-linux
+		;;
+	parisc-*)
+		basic_machine=hppa-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	pbd)
+		basic_machine=sparc-tti
+		;;
+	pbb)
+		basic_machine=m68k-tti
+		;;
+	pc532 | pc532-*)
+		basic_machine=ns32k-pc532
+		;;
+	pc98)
+		basic_machine=i386-pc
+		;;
+	pc98-*)
+		basic_machine=i386-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pentium | p5 | k5 | k6 | nexgen | viac3)
+		basic_machine=i586-pc
+		;;
+	pentiumpro | p6 | 6x86 | athlon | athlon_*)
+		basic_machine=i686-pc
+		;;
+	pentiumii | pentium2 | pentiumiii | pentium3)
+		basic_machine=i686-pc
+		;;
+	pentium4)
+		basic_machine=i786-pc
+		;;
+	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+		basic_machine=i586-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pentiumpro-* | p6-* | 6x86-* | athlon-*)
+		basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+		basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pentium4-*)
+		basic_machine=i786-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pn)
+		basic_machine=pn-gould
+		;;
+	power)	basic_machine=power-ibm
+		;;
+	ppc | ppcbe)	basic_machine=powerpc-unknown
+		;;
+	ppc-* | ppcbe-*)
+		basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	ppcle | powerpclittle)
+		basic_machine=powerpcle-unknown
+		;;
+	ppcle-* | powerpclittle-*)
+		basic_machine=powerpcle-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	ppc64)	basic_machine=powerpc64-unknown
+		;;
+	ppc64-*) basic_machine=powerpc64-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	ppc64le | powerpc64little)
+		basic_machine=powerpc64le-unknown
+		;;
+	ppc64le-* | powerpc64little-*)
+		basic_machine=powerpc64le-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	ps2)
+		basic_machine=i386-ibm
+		;;
+	pw32)
+		basic_machine=i586-unknown
+		os=-pw32
+		;;
+	rdos | rdos64)
+		basic_machine=x86_64-pc
+		os=-rdos
+		;;
+	rdos32)
+		basic_machine=i386-pc
+		os=-rdos
+		;;
+	rom68k)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	rm[46]00)
+		basic_machine=mips-siemens
+		;;
+	rtpc | rtpc-*)
+		basic_machine=romp-ibm
+		;;
+	s390 | s390-*)
+		basic_machine=s390-ibm
+		;;
+	s390x | s390x-*)
+		basic_machine=s390x-ibm
+		;;
+	sa29200)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	sb1)
+		basic_machine=mipsisa64sb1-unknown
+		;;
+	sb1el)
+		basic_machine=mipsisa64sb1el-unknown
+		;;
+	sde)
+		basic_machine=mipsisa32-sde
+		os=-elf
+		;;
+	sei)
+		basic_machine=mips-sei
+		os=-seiux
+		;;
+	sequent)
+		basic_machine=i386-sequent
+		;;
+	sh5el)
+		basic_machine=sh5le-unknown
+		;;
+	simso-wrs)
+		basic_machine=sparclite-wrs
+		os=-vxworks
+		;;
+	sps7)
+		basic_machine=m68k-bull
+		os=-sysv2
+		;;
+	spur)
+		basic_machine=spur-unknown
+		;;
+	st2000)
+		basic_machine=m68k-tandem
+		;;
+	stratus)
+		basic_machine=i860-stratus
+		os=-sysv4
+		;;
+	strongarm-* | thumb-*)
+		basic_machine=arm-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	sun2)
+		basic_machine=m68000-sun
+		;;
+	sun2os3)
+		basic_machine=m68000-sun
+		os=-sunos3
+		;;
+	sun2os4)
+		basic_machine=m68000-sun
+		os=-sunos4
+		;;
+	sun3os3)
+		basic_machine=m68k-sun
+		os=-sunos3
+		;;
+	sun3os4)
+		basic_machine=m68k-sun
+		os=-sunos4
+		;;
+	sun4os3)
+		basic_machine=sparc-sun
+		os=-sunos3
+		;;
+	sun4os4)
+		basic_machine=sparc-sun
+		os=-sunos4
+		;;
+	sun4sol2)
+		basic_machine=sparc-sun
+		os=-solaris2
+		;;
+	sun3 | sun3-*)
+		basic_machine=m68k-sun
+		;;
+	sun4)
+		basic_machine=sparc-sun
+		;;
+	sun386 | sun386i | roadrunner)
+		basic_machine=i386-sun
+		;;
+	sv1)
+		basic_machine=sv1-cray
+		os=-unicos
+		;;
+	symmetry)
+		basic_machine=i386-sequent
+		os=-dynix
+		;;
+	t3e)
+		basic_machine=alphaev5-cray
+		os=-unicos
+		;;
+	t90)
+		basic_machine=t90-cray
+		os=-unicos
+		;;
+	tile*)
+		basic_machine=$basic_machine-unknown
+		os=-linux-gnu
+		;;
+	tx39)
+		basic_machine=mipstx39-unknown
+		;;
+	tx39el)
+		basic_machine=mipstx39el-unknown
+		;;
+	toad1)
+		basic_machine=pdp10-xkl
+		os=-tops20
+		;;
+	tower | tower-32)
+		basic_machine=m68k-ncr
+		;;
+	tpf)
+		basic_machine=s390x-ibm
+		os=-tpf
+		;;
+	udi29k)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	ultra3)
+		basic_machine=a29k-nyu
+		os=-sym1
+		;;
+	v810 | necv810)
+		basic_machine=v810-nec
+		os=-none
+		;;
+	vaxv)
+		basic_machine=vax-dec
+		os=-sysv
+		;;
+	vms)
+		basic_machine=vax-dec
+		os=-vms
+		;;
+	vpp*|vx|vx-*)
+		basic_machine=f301-fujitsu
+		;;
+	vxworks960)
+		basic_machine=i960-wrs
+		os=-vxworks
+		;;
+	vxworks68)
+		basic_machine=m68k-wrs
+		os=-vxworks
+		;;
+	vxworks29k)
+		basic_machine=a29k-wrs
+		os=-vxworks
+		;;
+	w65*)
+		basic_machine=w65-wdc
+		os=-none
+		;;
+	w89k-*)
+		basic_machine=hppa1.1-winbond
+		os=-proelf
+		;;
+	x64)
+		basic_machine=x86_64-pc
+		;;
+	xbox)
+		basic_machine=i686-pc
+		os=-mingw32
+		;;
+	xps | xps100)
+		basic_machine=xps100-honeywell
+		;;
+	xscale-* | xscalee[bl]-*)
+		basic_machine=`echo "$basic_machine" | sed 's/^xscale/arm/'`
+		;;
+	ymp)
+		basic_machine=ymp-cray
+		os=-unicos
+		;;
+	none)
+		basic_machine=none-none
+		os=-none
+		;;
+
+# Here we handle the default manufacturer of certain CPU types.  It is in
+# some cases the only manufacturer, in others, it is the most popular.
+	w89k)
+		basic_machine=hppa1.1-winbond
+		;;
+	op50n)
+		basic_machine=hppa1.1-oki
+		;;
+	op60c)
+		basic_machine=hppa1.1-oki
+		;;
+	romp)
+		basic_machine=romp-ibm
+		;;
+	mmix)
+		basic_machine=mmix-knuth
+		;;
+	rs6000)
+		basic_machine=rs6000-ibm
+		;;
+	vax)
+		basic_machine=vax-dec
+		;;
+	pdp11)
+		basic_machine=pdp11-dec
+		;;
+	we32k)
+		basic_machine=we32k-att
+		;;
+	sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
+		basic_machine=sh-unknown
+		;;
+	cydra)
+		basic_machine=cydra-cydrome
+		;;
+	orion)
+		basic_machine=orion-highlevel
+		;;
+	orion105)
+		basic_machine=clipper-highlevel
+		;;
+	mac | mpw | mac-mpw)
+		basic_machine=m68k-apple
+		;;
+	pmac | pmac-mpw)
+		basic_machine=powerpc-apple
+		;;
+	*-unknown)
+		# Make sure to match an already-canonicalized machine name.
+		;;
+	*)
+		echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2
+		exit 1
+		;;
+esac
+
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+	*-digital*)
+		basic_machine=`echo "$basic_machine" | sed 's/digital.*/dec/'`
+		;;
+	*-commodore*)
+		basic_machine=`echo "$basic_machine" | sed 's/commodore.*/cbm/'`
+		;;
+	*)
+		;;
+esac
+
+# Decode manufacturer-specific aliases for certain operating systems.
+
+if [ x"$os" != x"" ]
+then
+case $os in
+	# First match some system type aliases that might get confused
+	# with valid system types.
+	# -solaris* is a basic system type, with this one exception.
+	-auroraux)
+		os=-auroraux
+		;;
+	-solaris1 | -solaris1.*)
+		os=`echo $os | sed -e 's|solaris1|sunos4|'`
+		;;
+	-solaris)
+		os=-solaris2
+		;;
+	-unixware*)
+		os=-sysv4.2uw
+		;;
+	-gnu/linux*)
+		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+		;;
+	# es1800 is here to avoid being matched by es* (a different OS)
+	-es1800*)
+		os=-ose
+		;;
+	# Now accept the basic system types.
+	# The portable systems comes first.
+	# Each alternative MUST end in a * to match a version number.
+	# -sysv* is not here because it comes later, after sysvr4.
+	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
+	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
+	      | -sym* | -kopensolaris* | -plan9* \
+	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+	      | -aos* | -aros* | -cloudabi* | -sortix* \
+	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+	      | -hiux* | -knetbsd* | -mirbsd* | -netbsd* \
+	      | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \
+	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
+	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* | -hcos* \
+	      | -chorusos* | -chorusrdb* | -cegcc* | -glidix* \
+	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+	      | -linux-newlib* | -linux-musl* | -linux-uclibc* \
+	      | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
+	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* \
+	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
+	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
+	      | -morphos* | -superux* | -rtmk* | -windiss* \
+	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
+	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
+	      | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox* | -bme* \
+	      | -midnightbsd*)
+	# Remember, each alternative MUST END IN *, to match a version number.
+		;;
+	-qnx*)
+		case $basic_machine in
+		    x86-* | i*86-*)
+			;;
+		    *)
+			os=-nto$os
+			;;
+		esac
+		;;
+	-nto-qnx*)
+		;;
+	-nto*)
+		os=`echo $os | sed -e 's|nto|nto-qnx|'`
+		;;
+	-sim | -xray | -os68k* | -v88r* \
+	      | -windows* | -osx | -abug | -netware* | -os9* \
+	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+		;;
+	-mac*)
+		os=`echo "$os" | sed -e 's|mac|macos|'`
+		;;
+	-linux-dietlibc)
+		os=-linux-dietlibc
+		;;
+	-linux*)
+		os=`echo $os | sed -e 's|linux|linux-gnu|'`
+		;;
+	-sunos5*)
+		os=`echo "$os" | sed -e 's|sunos5|solaris2|'`
+		;;
+	-sunos6*)
+		os=`echo "$os" | sed -e 's|sunos6|solaris3|'`
+		;;
+	-opened*)
+		os=-openedition
+		;;
+	-os400*)
+		os=-os400
+		;;
+	-wince*)
+		os=-wince
+		;;
+	-utek*)
+		os=-bsd
+		;;
+	-dynix*)
+		os=-bsd
+		;;
+	-acis*)
+		os=-aos
+		;;
+	-atheos*)
+		os=-atheos
+		;;
+	-syllable*)
+		os=-syllable
+		;;
+	-386bsd)
+		os=-bsd
+		;;
+	-ctix* | -uts*)
+		os=-sysv
+		;;
+	-nova*)
+		os=-rtmk-nova
+		;;
+	-ns2)
+		os=-nextstep2
+		;;
+	-nsk*)
+		os=-nsk
+		;;
+	# Preserve the version number of sinix5.
+	-sinix5.*)
+		os=`echo $os | sed -e 's|sinix|sysv|'`
+		;;
+	-sinix*)
+		os=-sysv4
+		;;
+	-tpf*)
+		os=-tpf
+		;;
+	-triton*)
+		os=-sysv3
+		;;
+	-oss*)
+		os=-sysv3
+		;;
+	-svr4*)
+		os=-sysv4
+		;;
+	-svr3)
+		os=-sysv3
+		;;
+	-sysvr4)
+		os=-sysv4
+		;;
+	# This must come after -sysvr4.
+	-sysv*)
+		;;
+	-ose*)
+		os=-ose
+		;;
+	-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+		os=-mint
+		;;
+	-zvmoe)
+		os=-zvmoe
+		;;
+	-dicos*)
+		os=-dicos
+		;;
+	-pikeos*)
+		# Until real need of OS specific support for
+		# particular features comes up, bare metal
+		# configurations are quite functional.
+		case $basic_machine in
+		    arm*)
+			os=-eabi
+			;;
+		    *)
+			os=-elf
+			;;
+		esac
+		;;
+	-nacl*)
+		;;
+	-ios)
+		;;
+	-none)
+		;;
+	*)
+		# Get rid of the `-' at the beginning of $os.
+		os=`echo $os | sed 's/[^-]*-//'`
+		echo Invalid configuration \`"$1"\': system \`"$os"\' not recognized 1>&2
+		exit 1
+		;;
+esac
+else
+
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system.  Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+
+case $basic_machine in
+	score-*)
+		os=-elf
+		;;
+	spu-*)
+		os=-elf
+		;;
+	*-acorn)
+		os=-riscix1.2
+		;;
+	arm*-rebel)
+		os=-linux
+		;;
+	arm*-semi)
+		os=-aout
+		;;
+	c4x-* | tic4x-*)
+		os=-coff
+		;;
+	c8051-*)
+		os=-elf
+		;;
+	hexagon-*)
+		os=-elf
+		;;
+	tic54x-*)
+		os=-coff
+		;;
+	tic55x-*)
+		os=-coff
+		;;
+	tic6x-*)
+		os=-coff
+		;;
+	# This must come before the *-dec entry.
+	pdp10-*)
+		os=-tops20
+		;;
+	pdp11-*)
+		os=-none
+		;;
+	*-dec | vax-*)
+		os=-ultrix4.2
+		;;
+	m68*-apollo)
+		os=-domain
+		;;
+	i386-sun)
+		os=-sunos4.0.2
+		;;
+	m68000-sun)
+		os=-sunos3
+		;;
+	m68*-cisco)
+		os=-aout
+		;;
+	mep-*)
+		os=-elf
+		;;
+	mips*-cisco)
+		os=-elf
+		;;
+	mips*-*)
+		os=-elf
+		;;
+	or32-*)
+		os=-coff
+		;;
+	*-tti)	# must be before sparc entry or we get the wrong os.
+		os=-sysv3
+		;;
+	sparc-* | *-sun)
+		os=-sunos4.1.1
+		;;
+	pru-*)
+		os=-elf
+		;;
+	*-be)
+		os=-beos
+		;;
+	*-ibm)
+		os=-aix
+		;;
+	*-knuth)
+		os=-mmixware
+		;;
+	*-wec)
+		os=-proelf
+		;;
+	*-winbond)
+		os=-proelf
+		;;
+	*-oki)
+		os=-proelf
+		;;
+	*-hp)
+		os=-hpux
+		;;
+	*-hitachi)
+		os=-hiux
+		;;
+	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+		os=-sysv
+		;;
+	*-cbm)
+		os=-amigaos
+		;;
+	*-dg)
+		os=-dgux
+		;;
+	*-dolphin)
+		os=-sysv3
+		;;
+	m68k-ccur)
+		os=-rtu
+		;;
+	m88k-omron*)
+		os=-luna
+		;;
+	*-next)
+		os=-nextstep
+		;;
+	*-sequent)
+		os=-ptx
+		;;
+	*-crds)
+		os=-unos
+		;;
+	*-ns)
+		os=-genix
+		;;
+	i370-*)
+		os=-mvs
+		;;
+	*-gould)
+		os=-sysv
+		;;
+	*-highlevel)
+		os=-bsd
+		;;
+	*-encore)
+		os=-bsd
+		;;
+	*-sgi)
+		os=-irix
+		;;
+	*-siemens)
+		os=-sysv4
+		;;
+	*-masscomp)
+		os=-rtu
+		;;
+	f30[01]-fujitsu | f700-fujitsu)
+		os=-uxpv
+		;;
+	*-rom68k)
+		os=-coff
+		;;
+	*-*bug)
+		os=-coff
+		;;
+	*-apple)
+		os=-macos
+		;;
+	*-atari*)
+		os=-mint
+		;;
+	*)
+		os=-none
+		;;
+esac
+fi
+
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer.  We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+	*-unknown)
+		case $os in
+			-riscix*)
+				vendor=acorn
+				;;
+			-sunos*)
+				vendor=sun
+				;;
+			-cnk*|-aix*)
+				vendor=ibm
+				;;
+			-beos*)
+				vendor=be
+				;;
+			-hpux*)
+				vendor=hp
+				;;
+			-mpeix*)
+				vendor=hp
+				;;
+			-hiux*)
+				vendor=hitachi
+				;;
+			-unos*)
+				vendor=crds
+				;;
+			-dgux*)
+				vendor=dg
+				;;
+			-luna*)
+				vendor=omron
+				;;
+			-genix*)
+				vendor=ns
+				;;
+			-mvs* | -opened*)
+				vendor=ibm
+				;;
+			-os400*)
+				vendor=ibm
+				;;
+			-ptx*)
+				vendor=sequent
+				;;
+			-tpf*)
+				vendor=ibm
+				;;
+			-vxsim* | -vxworks* | -windiss*)
+				vendor=wrs
+				;;
+			-aux*)
+				vendor=apple
+				;;
+			-hms*)
+				vendor=hitachi
+				;;
+			-mpw* | -macos*)
+				vendor=apple
+				;;
+			-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+				vendor=atari
+				;;
+			-vos*)
+				vendor=stratus
+				;;
+		esac
+		basic_machine=`echo "$basic_machine" | sed "s/unknown/$vendor/"`
+		;;
+esac
+
+echo "$basic_machine$os"
+exit
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/configure b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/configure
new file mode 100755
index 000000000..ed0b4faa0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/configure
@@ -0,0 +1,6161 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.69 for hpl 2.3.
+#
+# Report bugs to <hpl@icl.utk.edu>.
+#
+#
+# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
+#
+#
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+    && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='print -r --'
+  as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='printf %s\n'
+  as_echo_n='printf %s'
+else
+  if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+    as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+    as_echo_n='/usr/ucb/echo -n'
+  else
+    as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+    as_echo_n_body='eval
+      arg=$1;
+      case $arg in #(
+      *"$as_nl"*)
+	expr "X$arg" : "X\\(.*\\)$as_nl";
+	arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+      esac;
+      expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+    '
+    export as_echo_n_body
+    as_echo_n='sh -c $as_echo_n_body as_echo'
+  fi
+  export as_echo_body
+  as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh).  But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there.  '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+# Use a proper internal environment variable to ensure we don't fall
+  # into an infinite loop, continuously re-executing ourselves.
+  if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
+    _as_can_reexec=no; export _as_can_reexec;
+    # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+as_fn_exit 255
+  fi
+  # We don't want this to propagate to other subprocesses.
+          { _as_can_reexec=; unset _as_can_reexec;}
+if test "x$CONFIG_SHELL" = x; then
+  as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '\${1+\"\$@\"}'='\"\$@\"'
+  setopt NO_GLOB_SUBST
+else
+  case \`(set -o) 2>/dev/null\` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+"
+  as_required="as_fn_return () { (exit \$1); }
+as_fn_success () { as_fn_return 0; }
+as_fn_failure () { as_fn_return 1; }
+as_fn_ret_success () { return 0; }
+as_fn_ret_failure () { return 1; }
+
+exitcode=0
+as_fn_success || { exitcode=1; echo as_fn_success failed.; }
+as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }
+as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }
+as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }
+if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
+
+else
+  exitcode=1; echo positional parameters were not saved.
+fi
+test x\$exitcode = x0 || exit 1
+test -x / || exit 1"
+  as_suggested="  as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
+  as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
+  eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
+  test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1
+test \$(( 1 + 1 )) = 2 || exit 1"
+  if (eval "$as_required") 2>/dev/null; then :
+  as_have_required=yes
+else
+  as_have_required=no
+fi
+  if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then :
+
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+as_found=false
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  as_found=:
+  case $as_dir in #(
+	 /*)
+	   for as_base in sh bash ksh sh5; do
+	     # Try only shells that exist, to save several forks.
+	     as_shell=$as_dir/$as_base
+	     if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+		    { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then :
+  CONFIG_SHELL=$as_shell as_have_required=yes
+		   if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then :
+  break 2
+fi
+fi
+	   done;;
+       esac
+  as_found=false
+done
+$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } &&
+	      { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then :
+  CONFIG_SHELL=$SHELL as_have_required=yes
+fi; }
+IFS=$as_save_IFS
+
+
+      if test "x$CONFIG_SHELL" != x; then :
+  export CONFIG_SHELL
+             # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
+fi
+
+    if test x$as_have_required = xno; then :
+  $as_echo "$0: This script requires a shell more modern than all"
+  $as_echo "$0: the shells that I found on your system."
+  if test x${ZSH_VERSION+set} = xset ; then
+    $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should"
+    $as_echo "$0: be upgraded to zsh 4.3.4 or later."
+  else
+    $as_echo "$0: Please tell bug-autoconf@gnu.org and hpl@icl.utk.edu
+$0: about your system, including any error possibly output
+$0: before this message. Then install a modern shell, or
+$0: manually run the script under such a shell if you do
+$0: have one."
+  fi
+  exit 1
+fi
+fi
+fi
+SHELL=${CONFIG_SHELL-/bin/sh}
+export SHELL
+# Unset more variables known to interfere with behavior of common tools.
+CLICOLOR_FORCE= GREP_OPTIONS=
+unset CLICOLOR_FORCE GREP_OPTIONS
+
+## --------------------- ##
+## M4sh Shell Functions. ##
+## --------------------- ##
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else
+  as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else
+  as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  }
+fi # as_fn_arith
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+  fi
+  $as_echo "$as_me: error: $2" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+
+  as_lineno_1=$LINENO as_lineno_1a=$LINENO
+  as_lineno_2=$LINENO as_lineno_2a=$LINENO
+  eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
+  test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
+  # Blame Lee E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
+    sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
+      N
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+      t loop
+      s/-\n.*//
+    ' >$as_me.lineno &&
+  chmod +x "$as_me.lineno" ||
+    { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
+
+  # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
+  # already done that, so ensure we don't try to do so again and fall
+  # in an infinite loop.  This has already happened in practice.
+  _as_can_reexec=no; export _as_can_reexec
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
+  # Exit status is that of the last command.
+  exit
+}
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='	';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -pR'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -pR'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -pR'
+  fi
+else
+  as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+test -n "$DJDIR" || exec 7<&0 </dev/null
+exec 6>&1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+
+# Identity of this package.
+PACKAGE_NAME='hpl'
+PACKAGE_TARNAME='hpl'
+PACKAGE_VERSION='2.3'
+PACKAGE_STRING='hpl 2.3'
+PACKAGE_BUGREPORT='hpl@icl.utk.edu'
+PACKAGE_URL=''
+
+ac_unique_file="include/hpl.h"
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# ifdef HAVE_STDLIB_H
+#  include <stdlib.h>
+# endif
+#endif
+#ifdef HAVE_STRING_H
+# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
+#  include <memory.h>
+# endif
+# include <string.h>
+#endif
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+ac_subst_vars='am__EXEEXT_FALSE
+am__EXEEXT_TRUE
+LTLIBOBJS
+LIBOBJS
+EGREP
+GREP
+CPP
+BLAS_LIBS
+AM_BACKSLASH
+AM_DEFAULT_VERBOSITY
+AM_DEFAULT_V
+AM_V
+am__fastdepCC_FALSE
+am__fastdepCC_TRUE
+CCDEPMODE
+am__nodep
+AMDEPBACKSLASH
+AMDEP_FALSE
+AMDEP_TRUE
+am__include
+DEPDIR
+am__untar
+am__tar
+AMTAR
+am__leading_dot
+SET_MAKE
+AWK
+mkdir_p
+MKDIR_P
+INSTALL_STRIP_PROGRAM
+STRIP
+install_sh
+MAKEINFO
+AUTOHEADER
+AUTOMAKE
+AUTOCONF
+ACLOCAL
+VERSION
+PACKAGE
+CYGPATH_W
+am__isrc
+INSTALL_DATA
+INSTALL_SCRIPT
+INSTALL_PROGRAM
+RANLIB
+OBJEXT
+EXEEXT
+CPPFLAGS
+LDFLAGS
+CFLAGS
+ac_ct_CC
+CC
+MPICC
+target_alias
+host_alias
+build_alias
+LIBS
+ECHO_T
+ECHO_N
+ECHO_C
+DEFS
+mandir
+localedir
+libdir
+psdir
+pdfdir
+dvidir
+htmldir
+infodir
+docdir
+oldincludedir
+includedir
+localstatedir
+sharedstatedir
+sysconfdir
+datadir
+datarootdir
+libexecdir
+sbindir
+bindir
+program_transform_name
+prefix
+exec_prefix
+PACKAGE_URL
+PACKAGE_BUGREPORT
+PACKAGE_STRING
+PACKAGE_VERSION
+PACKAGE_TARNAME
+PACKAGE_NAME
+PATH_SEPARATOR
+SHELL
+am__quote'
+ac_subst_files=''
+ac_user_opts='
+enable_option_checking
+enable_dependency_tracking
+enable_silent_rules
+'
+      ac_precious_vars='build_alias
+host_alias
+target_alias
+MPICC
+CC
+CFLAGS
+LDFLAGS
+LIBS
+CPPFLAGS
+CPP'
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+ac_unrecognized_opts=
+ac_unrecognized_sep=
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+  # If the previous option needs an argument, assign it.
+  if test -n "$ac_prev"; then
+    eval $ac_prev=\$ac_option
+    ac_prev=
+    continue
+  fi
+
+  case $ac_option in
+  *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *=)   ac_optarg= ;;
+  *)    ac_optarg=yes ;;
+  esac
+
+  # Accept the important Cygnus configure options, so we can diagnose typos.
+
+  case $ac_dashdash$ac_option in
+  --)
+    ac_dashdash=yes ;;
+
+  -bindir | --bindir | --bindi | --bind | --bin | --bi)
+    ac_prev=bindir ;;
+  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+    bindir=$ac_optarg ;;
+
+  -build | --build | --buil | --bui | --bu)
+    ac_prev=build_alias ;;
+  -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+    build_alias=$ac_optarg ;;
+
+  -cache-file | --cache-file | --cache-fil | --cache-fi \
+  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+    ac_prev=cache_file ;;
+  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+    cache_file=$ac_optarg ;;
+
+  --config-cache | -C)
+    cache_file=config.cache ;;
+
+  -datadir | --datadir | --datadi | --datad)
+    ac_prev=datadir ;;
+  -datadir=* | --datadir=* | --datadi=* | --datad=*)
+    datadir=$ac_optarg ;;
+
+  -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+  | --dataroo | --dataro | --datar)
+    ac_prev=datarootdir ;;
+  -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+  | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+    datarootdir=$ac_optarg ;;
+
+  -disable-* | --disable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid feature name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=no ;;
+
+  -docdir | --docdir | --docdi | --doc | --do)
+    ac_prev=docdir ;;
+  -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+    docdir=$ac_optarg ;;
+
+  -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+    ac_prev=dvidir ;;
+  -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+    dvidir=$ac_optarg ;;
+
+  -enable-* | --enable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid feature name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=\$ac_optarg ;;
+
+  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+  | --exec | --exe | --ex)
+    ac_prev=exec_prefix ;;
+  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+  | --exec=* | --exe=* | --ex=*)
+    exec_prefix=$ac_optarg ;;
+
+  -gas | --gas | --ga | --g)
+    # Obsolete; use --with-gas.
+    with_gas=yes ;;
+
+  -help | --help | --hel | --he | -h)
+    ac_init_help=long ;;
+  -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+    ac_init_help=recursive ;;
+  -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+    ac_init_help=short ;;
+
+  -host | --host | --hos | --ho)
+    ac_prev=host_alias ;;
+  -host=* | --host=* | --hos=* | --ho=*)
+    host_alias=$ac_optarg ;;
+
+  -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+    ac_prev=htmldir ;;
+  -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+  | --ht=*)
+    htmldir=$ac_optarg ;;
+
+  -includedir | --includedir | --includedi | --included | --include \
+  | --includ | --inclu | --incl | --inc)
+    ac_prev=includedir ;;
+  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+  | --includ=* | --inclu=* | --incl=* | --inc=*)
+    includedir=$ac_optarg ;;
+
+  -infodir | --infodir | --infodi | --infod | --info | --inf)
+    ac_prev=infodir ;;
+  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+    infodir=$ac_optarg ;;
+
+  -libdir | --libdir | --libdi | --libd)
+    ac_prev=libdir ;;
+  -libdir=* | --libdir=* | --libdi=* | --libd=*)
+    libdir=$ac_optarg ;;
+
+  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+  | --libexe | --libex | --libe)
+    ac_prev=libexecdir ;;
+  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+  | --libexe=* | --libex=* | --libe=*)
+    libexecdir=$ac_optarg ;;
+
+  -localedir | --localedir | --localedi | --localed | --locale)
+    ac_prev=localedir ;;
+  -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+    localedir=$ac_optarg ;;
+
+  -localstatedir | --localstatedir | --localstatedi | --localstated \
+  | --localstate | --localstat | --localsta | --localst | --locals)
+    ac_prev=localstatedir ;;
+  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+  | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+    localstatedir=$ac_optarg ;;
+
+  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+    ac_prev=mandir ;;
+  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+    mandir=$ac_optarg ;;
+
+  -nfp | --nfp | --nf)
+    # Obsolete; use --without-fp.
+    with_fp=no ;;
+
+  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+  | --no-cr | --no-c | -n)
+    no_create=yes ;;
+
+  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+    no_recursion=yes ;;
+
+  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+  | --oldin | --oldi | --old | --ol | --o)
+    ac_prev=oldincludedir ;;
+  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+    oldincludedir=$ac_optarg ;;
+
+  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+    ac_prev=prefix ;;
+  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+    prefix=$ac_optarg ;;
+
+  -program-prefix | --program-prefix | --program-prefi | --program-pref \
+  | --program-pre | --program-pr | --program-p)
+    ac_prev=program_prefix ;;
+  -program-prefix=* | --program-prefix=* | --program-prefi=* \
+  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+    program_prefix=$ac_optarg ;;
+
+  -program-suffix | --program-suffix | --program-suffi | --program-suff \
+  | --program-suf | --program-su | --program-s)
+    ac_prev=program_suffix ;;
+  -program-suffix=* | --program-suffix=* | --program-suffi=* \
+  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+    program_suffix=$ac_optarg ;;
+
+  -program-transform-name | --program-transform-name \
+  | --program-transform-nam | --program-transform-na \
+  | --program-transform-n | --program-transform- \
+  | --program-transform | --program-transfor \
+  | --program-transfo | --program-transf \
+  | --program-trans | --program-tran \
+  | --progr-tra | --program-tr | --program-t)
+    ac_prev=program_transform_name ;;
+  -program-transform-name=* | --program-transform-name=* \
+  | --program-transform-nam=* | --program-transform-na=* \
+  | --program-transform-n=* | --program-transform-=* \
+  | --program-transform=* | --program-transfor=* \
+  | --program-transfo=* | --program-transf=* \
+  | --program-trans=* | --program-tran=* \
+  | --progr-tra=* | --program-tr=* | --program-t=*)
+    program_transform_name=$ac_optarg ;;
+
+  -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+    ac_prev=pdfdir ;;
+  -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+    pdfdir=$ac_optarg ;;
+
+  -psdir | --psdir | --psdi | --psd | --ps)
+    ac_prev=psdir ;;
+  -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+    psdir=$ac_optarg ;;
+
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil)
+    silent=yes ;;
+
+  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+    ac_prev=sbindir ;;
+  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+  | --sbi=* | --sb=*)
+    sbindir=$ac_optarg ;;
+
+  -sharedstatedir | --sharedstatedir | --sharedstatedi \
+  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+  | --sharedst | --shareds | --shared | --share | --shar \
+  | --sha | --sh)
+    ac_prev=sharedstatedir ;;
+  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+  | --sha=* | --sh=*)
+    sharedstatedir=$ac_optarg ;;
+
+  -site | --site | --sit)
+    ac_prev=site ;;
+  -site=* | --site=* | --sit=*)
+    site=$ac_optarg ;;
+
+  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+    ac_prev=srcdir ;;
+  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+    srcdir=$ac_optarg ;;
+
+  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+  | --syscon | --sysco | --sysc | --sys | --sy)
+    ac_prev=sysconfdir ;;
+  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+    sysconfdir=$ac_optarg ;;
+
+  -target | --target | --targe | --targ | --tar | --ta | --t)
+    ac_prev=target_alias ;;
+  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+    target_alias=$ac_optarg ;;
+
+  -v | -verbose | --verbose | --verbos | --verbo | --verb)
+    verbose=yes ;;
+
+  -version | --version | --versio | --versi | --vers | -V)
+    ac_init_version=: ;;
+
+  -with-* | --with-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid package name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=\$ac_optarg ;;
+
+  -without-* | --without-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid package name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=no ;;
+
+  --x)
+    # Obsolete; use --with-x.
+    with_x=yes ;;
+
+  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+  | --x-incl | --x-inc | --x-in | --x-i)
+    ac_prev=x_includes ;;
+  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+    x_includes=$ac_optarg ;;
+
+  -x-libraries | --x-libraries | --x-librarie | --x-librari \
+  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+    ac_prev=x_libraries ;;
+  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+    x_libraries=$ac_optarg ;;
+
+  -*) as_fn_error $? "unrecognized option: \`$ac_option'
+Try \`$0 --help' for more information"
+    ;;
+
+  *=*)
+    ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+    # Reject names that are not valid shell variable names.
+    case $ac_envvar in #(
+      '' | [0-9]* | *[!_$as_cr_alnum]* )
+      as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
+    esac
+    eval $ac_envvar=\$ac_optarg
+    export $ac_envvar ;;
+
+  *)
+    # FIXME: should be removed in autoconf 3.0.
+    $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+    expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+      $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+    : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}"
+    ;;
+
+  esac
+done
+
+if test -n "$ac_prev"; then
+  ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+  as_fn_error $? "missing argument to $ac_option"
+fi
+
+if test -n "$ac_unrecognized_opts"; then
+  case $enable_option_checking in
+    no) ;;
+    fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
+    *)     $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
+  esac
+fi
+
+# Check all directory arguments for consistency.
+for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
+		datadir sysconfdir sharedstatedir localstatedir includedir \
+		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+		libdir localedir mandir
+do
+  eval ac_val=\$$ac_var
+  # Remove trailing slashes.
+  case $ac_val in
+    */ )
+      ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'`
+      eval $ac_var=\$ac_val;;
+  esac
+  # Be sure to have absolute directory names.
+  case $ac_val in
+    [\\/$]* | ?:[\\/]* )  continue;;
+    NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+  esac
+  as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+  if test "x$build_alias" = x; then
+    cross_compiling=maybe
+  elif test "x$build_alias" != "x$host_alias"; then
+    cross_compiling=yes
+  fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+  as_fn_error $? "working directory cannot be determined"
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+  as_fn_error $? "pwd does not report name of working directory"
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+  ac_srcdir_defaulted=yes
+  # Try the directory containing this script, then the parent directory.
+  ac_confdir=`$as_dirname -- "$as_myself" ||
+$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_myself" : 'X\(//\)[^/]' \| \
+	 X"$as_myself" : 'X\(//\)$' \| \
+	 X"$as_myself" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_myself" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  srcdir=$ac_confdir
+  if test ! -r "$srcdir/$ac_unique_file"; then
+    srcdir=..
+  fi
+else
+  ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+  test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+  as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
+fi
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+	cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
+	pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+  srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+  eval ac_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_env_${ac_var}_value=\$${ac_var}
+  eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+  # Omit some internal or obsolete options to make the list less imposing.
+  # This message is too long to be a string in the A/UX 3.1 sh.
+  cat <<_ACEOF
+\`configure' configures hpl 2.3 to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE.  See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+  -h, --help              display this help and exit
+      --help=short        display options specific to this package
+      --help=recursive    display the short help of all the included packages
+  -V, --version           display version information and exit
+  -q, --quiet, --silent   do not print \`checking ...' messages
+      --cache-file=FILE   cache test results in FILE [disabled]
+  -C, --config-cache      alias for \`--cache-file=config.cache'
+  -n, --no-create         do not create output files
+      --srcdir=DIR        find the sources in DIR [configure dir or \`..']
+
+Installation directories:
+  --prefix=PREFIX         install architecture-independent files in PREFIX
+                          [$ac_default_prefix]
+  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX
+                          [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc.  You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+  --bindir=DIR            user executables [EPREFIX/bin]
+  --sbindir=DIR           system admin executables [EPREFIX/sbin]
+  --libexecdir=DIR        program executables [EPREFIX/libexec]
+  --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]
+  --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]
+  --localstatedir=DIR     modifiable single-machine data [PREFIX/var]
+  --libdir=DIR            object code libraries [EPREFIX/lib]
+  --includedir=DIR        C header files [PREFIX/include]
+  --oldincludedir=DIR     C header files for non-gcc [/usr/include]
+  --datarootdir=DIR       read-only arch.-independent data root [PREFIX/share]
+  --datadir=DIR           read-only architecture-independent data [DATAROOTDIR]
+  --infodir=DIR           info documentation [DATAROOTDIR/info]
+  --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]
+  --mandir=DIR            man documentation [DATAROOTDIR/man]
+  --docdir=DIR            documentation root [DATAROOTDIR/doc/hpl]
+  --htmldir=DIR           html documentation [DOCDIR]
+  --dvidir=DIR            dvi documentation [DOCDIR]
+  --pdfdir=DIR            pdf documentation [DOCDIR]
+  --psdir=DIR             ps documentation [DOCDIR]
+_ACEOF
+
+  cat <<\_ACEOF
+
+Program names:
+  --program-prefix=PREFIX            prepend PREFIX to installed program names
+  --program-suffix=SUFFIX            append SUFFIX to installed program names
+  --program-transform-name=PROGRAM   run sed PROGRAM on installed program names
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+  case $ac_init_help in
+     short | recursive ) echo "Configuration of hpl 2.3:";;
+   esac
+  cat <<\_ACEOF
+
+Optional Features:
+  --disable-option-checking  ignore unrecognized --enable/--with options
+  --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
+  --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
+  --enable-dependency-tracking
+                          do not reject slow dependency extractors
+  --disable-dependency-tracking
+                          speeds up one-time build
+  --enable-silent-rules   less verbose build output (undo: "make V=1")
+  --disable-silent-rules  verbose build output (undo: "make V=0")
+
+Some influential environment variables:
+  MPICC       MPI C compiler command
+  CC          C compiler command
+  CFLAGS      C compiler flags
+  LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
+              nonstandard directory <lib dir>
+  LIBS        libraries to pass to the linker, e.g. -l<library>
+  CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
+              you have headers in a nonstandard directory <include dir>
+  CPP         C preprocessor
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+Report bugs to <hpl@icl.utk.edu>.
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+  # If there are subdirs, report their specific --help.
+  for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+    test -d "$ac_dir" ||
+      { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } ||
+      continue
+    ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+    cd "$ac_dir" || { ac_status=$?; continue; }
+    # Check for guested configure.
+    if test -f "$ac_srcdir/configure.gnu"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+    elif test -f "$ac_srcdir/configure"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure" --help=recursive
+    else
+      $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+    fi || ac_status=$?
+    cd "$ac_pwd" || { ac_status=$?; break; }
+  done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+  cat <<\_ACEOF
+hpl configure 2.3
+generated by GNU Autoconf 2.69
+
+Copyright (C) 2012 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+  exit
+fi
+
+## ------------------------ ##
+## Autoconf initialization. ##
+## ------------------------ ##
+
+# ac_fn_c_try_compile LINENO
+# --------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext
+  if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_compile
+
+# ac_fn_c_try_link LINENO
+# -----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_link ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext conftest$ac_exeext
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 test -x conftest$ac_exeext
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_retval=1
+fi
+  # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+  # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+  # interfere with the next link command; also delete a directory that is
+  # left behind by Apple's compiler.  We do this before executing the actions.
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_link
+
+# ac_fn_c_check_func LINENO FUNC VAR
+# ----------------------------------
+# Tests whether FUNC exists, setting the cache variable VAR accordingly
+ac_fn_c_check_func ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $2 innocuous_$2
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $2 (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $2
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $2 ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$2 || defined __stub___$2
+choke me
+#endif
+
+int
+main ()
+{
+return $2 ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  eval "$3=yes"
+else
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_func
+
+# ac_fn_c_try_cpp LINENO
+# ----------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_cpp ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } > conftest.i && {
+	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+    ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_cpp
+
+# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
+# the include files in INCLUDES and setting the cache variable VAR
+# accordingly.
+ac_fn_c_check_header_mongrel ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if eval \${$3+:} false; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
+$as_echo_n "checking $2 usability... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_header_compiler=yes
+else
+  ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
+$as_echo_n "checking $2 presence... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <$2>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  ac_header_preproc=yes
+else
+  ac_header_preproc=no
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
+  yes:no: )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+    ;;
+  no:yes:* )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: $2:     check for missing prerequisite headers?" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+( $as_echo "## ------------------------------ ##
+## Report this to hpl@icl.utk.edu ##
+## ------------------------------ ##"
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  eval "$3=\$ac_header_compiler"
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_mongrel
+
+# ac_fn_c_try_run LINENO
+# ----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
+# that executables *can* be run.
+ac_fn_c_try_run ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: program exited with status $ac_status" >&5
+       $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_retval=$ac_status
+fi
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_run
+
+# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists and can be compiled using the include files in
+# INCLUDES, setting the cache variable VAR accordingly.
+ac_fn_c_check_header_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  eval "$3=yes"
+else
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_compile
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by hpl $as_me 2.3, which was
+generated by GNU Autoconf 2.69.  Invocation command line was
+
+  $ $0 $@
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null     || echo unknown`
+
+/bin/arch              = `(/bin/arch) 2>/dev/null              || echo unknown`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null       || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo      = `(/usr/bin/hostinfo) 2>/dev/null      || echo unknown`
+/bin/machine           = `(/bin/machine) 2>/dev/null           || echo unknown`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null       || echo unknown`
+/bin/universe          = `(/bin/universe) 2>/dev/null          || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    $as_echo "PATH: $as_dir"
+  done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+  for ac_arg
+  do
+    case $ac_arg in
+    -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+    -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+    | -silent | --silent | --silen | --sile | --sil)
+      continue ;;
+    *\'*)
+      ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    case $ac_pass in
+    1) as_fn_append ac_configure_args0 " '$ac_arg'" ;;
+    2)
+      as_fn_append ac_configure_args1 " '$ac_arg'"
+      if test $ac_must_keep_next = true; then
+	ac_must_keep_next=false # Got value, back to normal.
+      else
+	case $ac_arg in
+	  *=* | --config-cache | -C | -disable-* | --disable-* \
+	  | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+	  | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+	  | -with-* | --with-* | -without-* | --without-* | --x)
+	    case "$ac_configure_args0 " in
+	      "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+	    esac
+	    ;;
+	  -* ) ac_must_keep_next=true ;;
+	esac
+      fi
+      as_fn_append ac_configure_args " '$ac_arg'"
+      ;;
+    esac
+  done
+done
+{ ac_configure_args0=; unset ac_configure_args0;}
+{ ac_configure_args1=; unset ac_configure_args1;}
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log.  We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+  # Save into config.log some information that might help in debugging.
+  {
+    echo
+
+    $as_echo "## ---------------- ##
+## Cache variables. ##
+## ---------------- ##"
+    echo
+    # The following way of writing the cache mishandles newlines in values,
+(
+  for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+  (set) 2>&1 |
+    case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      sed -n \
+	"s/'\''/'\''\\\\'\'''\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+      ;; #(
+    *)
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+)
+    echo
+
+    $as_echo "## ----------------- ##
+## Output variables. ##
+## ----------------- ##"
+    echo
+    for ac_var in $ac_subst_vars
+    do
+      eval ac_val=\$$ac_var
+      case $ac_val in
+      *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+      esac
+      $as_echo "$ac_var='\''$ac_val'\''"
+    done | sort
+    echo
+
+    if test -n "$ac_subst_files"; then
+      $as_echo "## ------------------- ##
+## File substitutions. ##
+## ------------------- ##"
+      echo
+      for ac_var in $ac_subst_files
+      do
+	eval ac_val=\$$ac_var
+	case $ac_val in
+	*\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+	esac
+	$as_echo "$ac_var='\''$ac_val'\''"
+      done | sort
+      echo
+    fi
+
+    if test -s confdefs.h; then
+      $as_echo "## ----------- ##
+## confdefs.h. ##
+## ----------- ##"
+      echo
+      cat confdefs.h
+      echo
+    fi
+    test "$ac_signal" != 0 &&
+      $as_echo "$as_me: caught signal $ac_signal"
+    $as_echo "$as_me: exit $exit_status"
+  } >&5
+  rm -f core *.core core.conftest.* &&
+    rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+    exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+  trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+$as_echo "/* confdefs.h */" > confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_URL "$PACKAGE_URL"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer an explicitly selected file to automatically selected ones.
+ac_site_file1=NONE
+ac_site_file2=NONE
+if test -n "$CONFIG_SITE"; then
+  # We do not want a PATH search for config.site.
+  case $CONFIG_SITE in #((
+    -*)  ac_site_file1=./$CONFIG_SITE;;
+    */*) ac_site_file1=$CONFIG_SITE;;
+    *)   ac_site_file1=./$CONFIG_SITE;;
+  esac
+elif test "x$prefix" != xNONE; then
+  ac_site_file1=$prefix/share/config.site
+  ac_site_file2=$prefix/etc/config.site
+else
+  ac_site_file1=$ac_default_prefix/share/config.site
+  ac_site_file2=$ac_default_prefix/etc/config.site
+fi
+for ac_site_file in "$ac_site_file1" "$ac_site_file2"
+do
+  test "x$ac_site_file" = xNONE && continue
+  if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
+$as_echo "$as_me: loading site script $ac_site_file" >&6;}
+    sed 's/^/| /' "$ac_site_file" >&5
+    . "$ac_site_file" \
+      || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "failed to load site script $ac_site_file
+See \`config.log' for more details" "$LINENO" 5; }
+  fi
+done
+
+if test -r "$cache_file"; then
+  # Some versions of bash will fail to source /dev/null (special files
+  # actually), so we avoid doing that.  DJGPP emulates it as a regular file.
+  if test /dev/null != "$cache_file" && test -f "$cache_file"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
+$as_echo "$as_me: loading cache $cache_file" >&6;}
+    case $cache_file in
+      [\\/]* | ?:[\\/]* ) . "$cache_file";;
+      *)                      . "./$cache_file";;
+    esac
+  fi
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5
+$as_echo "$as_me: creating cache $cache_file" >&6;}
+  >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+  eval ac_old_set=\$ac_cv_env_${ac_var}_set
+  eval ac_new_set=\$ac_env_${ac_var}_set
+  eval ac_old_val=\$ac_cv_env_${ac_var}_value
+  eval ac_new_val=\$ac_env_${ac_var}_value
+  case $ac_old_set,$ac_new_set in
+    set,)
+      { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,set)
+      { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,);;
+    *)
+      if test "x$ac_old_val" != "x$ac_new_val"; then
+	# differences in whitespace do not lead to failure.
+	ac_old_val_w=`echo x $ac_old_val`
+	ac_new_val_w=`echo x $ac_new_val`
+	if test "$ac_old_val_w" != "$ac_new_val_w"; then
+	  { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5
+$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+	  ac_cache_corrupted=:
+	else
+	  { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
+$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;}
+	  eval $ac_var=\$ac_old_val
+	fi
+	{ $as_echo "$as_me:${as_lineno-$LINENO}:   former value:  \`$ac_old_val'" >&5
+$as_echo "$as_me:   former value:  \`$ac_old_val'" >&2;}
+	{ $as_echo "$as_me:${as_lineno-$LINENO}:   current value: \`$ac_new_val'" >&5
+$as_echo "$as_me:   current value: \`$ac_new_val'" >&2;}
+      fi;;
+  esac
+  # Pass precious variables to config.status.
+  if test "$ac_new_set" = set; then
+    case $ac_new_val in
+    *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+    *) ac_arg=$ac_var=$ac_new_val ;;
+    esac
+    case " $ac_configure_args " in
+      *" '$ac_arg' "*) ;; # Avoid dups.  Use of quotes ensures accuracy.
+      *) as_fn_append ac_configure_args " '$ac_arg'" ;;
+    esac
+  fi
+done
+if $ac_cache_corrupted; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
+$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+  as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+fi
+## -------------------- ##
+## Main body of script. ##
+## -------------------- ##
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+ac_config_headers="$ac_config_headers include/hplconfig.h"
+
+
+ac_aux_dir=
+for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
+  if test -f "$ac_dir/install-sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install-sh -c"
+    break
+  elif test -f "$ac_dir/install.sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install.sh -c"
+    break
+  elif test -f "$ac_dir/shtool"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/shtool install -c"
+    break
+  fi
+done
+if test -z "$ac_aux_dir"; then
+  as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
+fi
+
+# These three variables are undocumented and unsupported,
+# and are intended to be withdrawn in a future Autoconf release.
+# They can cause serious problems if a builder's source tree is in a directory
+# whose full name contains unusual characters.
+ac_config_guess="$SHELL $ac_aux_dir/config.guess"  # Please don't use this var.
+ac_config_sub="$SHELL $ac_aux_dir/config.sub"  # Please don't use this var.
+ac_configure="$SHELL $ac_aux_dir/configure"  # Please don't use this var.
+
+
+# Expand $ac_aux_dir to an absolute path.
+am_aux_dir=`cd "$ac_aux_dir" && pwd`
+
+
+
+  _ax_prog_cc_mpi_mpi_wanted=yes
+  if test x"$_ax_prog_cc_mpi_mpi_wanted" = xyes; then
+    if test -z "$CC" && test -n "$MPICC"; then
+      CC="$MPICC"
+    else
+      if test -n "$ac_tool_prefix"; then
+  for ac_prog in mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CC" && break
+done
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
+
+    fi
+  fi
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}gcc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+  ac_ct_CC=$CC
+  # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="gcc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+else
+  CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+          if test -n "$ac_tool_prefix"; then
+    # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}cc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  fi
+fi
+if test -z "$CC"; then
+  # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+  ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+       ac_prog_rejected=yes
+       continue
+     fi
+    ac_cv_prog_CC="cc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_CC
+  shift
+  if test $# != 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set CC to just the basename; use the full file name.
+    shift
+    ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+  fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+  if test -n "$ac_tool_prefix"; then
+  for ac_prog in cl.exe
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in cl.exe
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CC" && break
+done
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
+
+fi
+
+
+test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
+
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+  { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    sed '10a\
+... rest of stderr output deleted ...
+         10q' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+  fi
+  rm -f conftest.er1 conftest.err
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+done
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5
+$as_echo_n "checking whether the C compiler works... " >&6; }
+ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+
+# The possible output files:
+ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*"
+
+ac_rmfiles=
+for ac_file in $ac_files
+do
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+    * ) ac_rmfiles="$ac_rmfiles $ac_file";;
+  esac
+done
+rm -f $ac_rmfiles
+
+if { { ac_try="$ac_link_default"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link_default") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
+# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
+# in a Makefile.  We should not override ac_cv_exeext if it was cached,
+# so that the user can short-circuit this test for compilers unknown to
+# Autoconf.
+for ac_file in $ac_files ''
+do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj )
+	;;
+    [ab].out )
+	# We found the default executable, but exeext='' is most
+	# certainly right.
+	break;;
+    *.* )
+	if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+	then :; else
+	   ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	fi
+	# We set ac_cv_exeext here because the later test for it is not
+	# safe: cross compilers may not add the suffix if given an `-o'
+	# argument, so we may need to know it at that point already.
+	# Even if this section looks crufty: it has the advantage of
+	# actually working.
+	break;;
+    * )
+	break;;
+  esac
+done
+test "$ac_cv_exeext" = no && ac_cv_exeext=
+
+else
+  ac_file=''
+fi
+if test -z "$ac_file"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+$as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "C compiler cannot create executables
+See \`config.log' for more details" "$LINENO" 5; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5
+$as_echo_n "checking for C compiler default output file name... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
+$as_echo "$ac_file" >&6; }
+ac_exeext=$ac_cv_exeext
+
+rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5
+$as_echo_n "checking for suffix of executables... " >&6; }
+if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'.  For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+    *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	  break;;
+    * ) break;;
+  esac
+done
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest conftest$ac_cv_exeext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
+$as_echo "$ac_cv_exeext" >&6; }
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdio.h>
+int
+main ()
+{
+FILE *f = fopen ("conftest.out", "w");
+ return ferror (f) || fclose (f) != 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files="$ac_clean_files conftest.out"
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
+$as_echo_n "checking whether we are cross compiling... " >&6; }
+if test "$cross_compiling" != yes; then
+  { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+  if { ac_try='./conftest$ac_cv_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+    cross_compiling=no
+  else
+    if test "$cross_compiling" = maybe; then
+	cross_compiling=yes
+    else
+	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details" "$LINENO" 5; }
+    fi
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
+$as_echo "$cross_compiling" >&6; }
+
+rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
+$as_echo_n "checking for suffix of object files... " >&6; }
+if ${ac_cv_objext+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  for ac_file in conftest.o conftest.obj conftest.*; do
+  test -f "$ac_file" || continue;
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;;
+    *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+       break;;
+  esac
+done
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of object files: cannot compile
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5
+$as_echo "$ac_cv_objext" >&6; }
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
+$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
+if ${ac_cv_c_compiler_gnu+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_compiler_gnu=yes
+else
+  ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
+$as_echo "$ac_cv_c_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+  GCC=yes
+else
+  GCC=
+fi
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
+$as_echo_n "checking whether $CC accepts -g... " >&6; }
+if ${ac_cv_prog_cc_g+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_save_c_werror_flag=$ac_c_werror_flag
+   ac_c_werror_flag=yes
+   ac_cv_prog_cc_g=no
+   CFLAGS="-g"
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_g=yes
+else
+  CFLAGS=""
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+else
+  ac_c_werror_flag=$ac_save_c_werror_flag
+	 CFLAGS="-g"
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+$as_echo "$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+  CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+  if test "$GCC" = yes; then
+    CFLAGS="-g -O2"
+  else
+    CFLAGS="-g"
+  fi
+else
+  if test "$GCC" = yes; then
+    CFLAGS="-O2"
+  else
+    CFLAGS=
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
+if ${ac_cv_prog_cc_c89+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdio.h>
+struct stat;
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+     char **p;
+     int i;
+{
+  return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+  char *s;
+  va_list v;
+  va_start (v,p);
+  s = g (p, va_arg (v,int));
+  va_end (v);
+  return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default.  It has
+   function prototypes and stuff, but not '\xHH' hex character constants.
+   These don't provoke an error unfortunately, instead are silently treated
+   as 'x'.  The following induces an error, until -std is added to get
+   proper ANSI mode.  Curiously '\x00'!='x' always comes out true, for an
+   array size at least.  It's necessary to write '\x00'==0 to get something
+   that's true only with -std.  */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+   inside strings and character constants.  */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0]  ||  f (e, argv, 1) != argv[1];
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+	-Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_c89=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+  x)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+  xno)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c89"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c89" != xno; then :
+
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5
+$as_echo_n "checking whether $CC understands -c and -o together... " >&6; }
+if ${am_cv_prog_cc_c_o+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+  # Make sure it works both with $CC and with simple cc.
+  # Following AC_PROG_CC_C_O, we do the test twice because some
+  # compilers refuse to overwrite an existing .o file with -o,
+  # though they will create one.
+  am_cv_prog_cc_c_o=yes
+  for am_i in 1 2; do
+    if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5
+   ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); } \
+         && test -f conftest2.$ac_objext; then
+      : OK
+    else
+      am_cv_prog_cc_c_o=no
+      break
+    fi
+  done
+  rm -f core conftest*
+  unset am_i
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5
+$as_echo "$am_cv_prog_cc_c_o" >&6; }
+if test "$am_cv_prog_cc_c_o" != yes; then
+   # Losing compiler, so override with the script.
+   # FIXME: It is wrong to rewrite CC.
+   # But if we don't then we get into trouble of one sort or another.
+   # A longer-term fix would be to have automake use am__CC in this case,
+   # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
+   CC="$am_aux_dir/compile $CC"
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
+
+
+
+# Check for compiler
+# Needs to be split off into an extra macro to ensure right expansion
+# order.
+
+
+if test x"$_ax_prog_cc_mpi_mpi_wanted" = xno; then :
+   _ax_prog_cc_mpi_mpi_found=no
+else
+
+    ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+    # test whether MPI_Init is available
+    # We do not use AC_SEARCH_LIBS here, as it caches its outcome and
+    # thus disallows corresponding calls in the other AX_PROG_*_MPI
+    # macros.
+    for lib in NONE mpi mpich; do
+      save_LIBS=$LIBS
+      if test x"$lib" = xNONE; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking for function MPI_Init" >&5
+$as_echo_n "checking for function MPI_Init... " >&6; }
+      else
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking for function MPI_Init in -l$lib" >&5
+$as_echo_n "checking for function MPI_Init in -l$lib... " >&6; }
+        LIBS="-l$lib $LIBS"
+      fi
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char MPI_Init ();
+int
+main ()
+{
+return MPI_Init ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+   _ax_prog_cc_mpi_mpi_found=yes
+else
+   _ax_prog_cc_mpi_mpi_found=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_ax_prog_cc_mpi_mpi_found" >&5
+$as_echo "$_ax_prog_cc_mpi_mpi_found" >&6; }
+      if test "x$_ax_prog_cc_mpi_mpi_found" = "xyes"; then
+        break;
+      fi
+      LIBS=$save_LIBS
+    done
+
+    # Check for header
+    if test x"$_ax_prog_cc_mpi_mpi_found" = xyes; then :
+
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpi.h" >&5
+$as_echo_n "checking for mpi.h... " >&6; }
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <mpi.h>
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+         _ax_prog_cc_mpi_mpi_found=no
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+    ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$_ax_prog_cc_mpi_mpi_found" = xyes; then :
+
+
+$as_echo "#define HAVE_MPI 1" >>confdefs.h
+
+        :
+
+else
+
+
+        :
+
+fi
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ranlib; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_RANLIB+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$RANLIB"; then
+  ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+RANLIB=$ac_cv_prog_RANLIB
+if test -n "$RANLIB"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5
+$as_echo "$RANLIB" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_RANLIB"; then
+  ac_ct_RANLIB=$RANLIB
+  # Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_RANLIB+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_RANLIB"; then
+  ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_RANLIB="ranlib"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB
+if test -n "$ac_ct_RANLIB"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5
+$as_echo "$ac_ct_RANLIB" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_RANLIB" = x; then
+    RANLIB=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    RANLIB=$ac_ct_RANLIB
+  fi
+else
+  RANLIB="$ac_cv_prog_RANLIB"
+fi
+
+
+# Find a good install program.  We prefer a C program (faster),
+# so one script is as good as another.  But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AmigaOS /C/install, which installs bootblocks on floppy discs
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# OS/2's system install, which has a completely different semantic
+# ./install, which can be erroneously created by make from ./install.sh.
+# Reject install programs that cannot install multiple files.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5
+$as_echo_n "checking for a BSD-compatible install... " >&6; }
+if test -z "$INSTALL"; then
+if ${ac_cv_path_install+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    # Account for people who put trailing slashes in PATH elements.
+case $as_dir/ in #((
+  ./ | .// | /[cC]/* | \
+  /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \
+  ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \
+  /usr/ucb/* ) ;;
+  *)
+    # OSF1 and SCO ODT 3.0 have their own names for install.
+    # Don't use installbsd from OSF since it installs stuff as root
+    # by default.
+    for ac_prog in ginstall scoinst install; do
+      for ac_exec_ext in '' $ac_executable_extensions; do
+	if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
+	  if test $ac_prog = install &&
+	    grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+	    # AIX install.  It has an incompatible calling convention.
+	    :
+	  elif test $ac_prog = install &&
+	    grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+	    # program-specific install script used by HP pwplus--don't use.
+	    :
+	  else
+	    rm -rf conftest.one conftest.two conftest.dir
+	    echo one > conftest.one
+	    echo two > conftest.two
+	    mkdir conftest.dir
+	    if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" &&
+	      test -s conftest.one && test -s conftest.two &&
+	      test -s conftest.dir/conftest.one &&
+	      test -s conftest.dir/conftest.two
+	    then
+	      ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c"
+	      break 3
+	    fi
+	  fi
+	fi
+      done
+    done
+    ;;
+esac
+
+  done
+IFS=$as_save_IFS
+
+rm -rf conftest.one conftest.two conftest.dir
+
+fi
+  if test "${ac_cv_path_install+set}" = set; then
+    INSTALL=$ac_cv_path_install
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for INSTALL within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    INSTALL=$ac_install_sh
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5
+$as_echo "$INSTALL" >&6; }
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+
+am__api_version='1.16'
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5
+$as_echo_n "checking whether build environment is sane... " >&6; }
+# Reject unsafe characters in $srcdir or the absolute working directory
+# name.  Accept space and tab only in the latter.
+am_lf='
+'
+case `pwd` in
+  *[\\\"\#\$\&\'\`$am_lf]*)
+    as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;;
+esac
+case $srcdir in
+  *[\\\"\#\$\&\'\`$am_lf\ \	]*)
+    as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;;
+esac
+
+# Do 'set' in a subshell so we don't clobber the current shell's
+# arguments.  Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+   am_has_slept=no
+   for am_try in 1 2; do
+     echo "timestamp, slept: $am_has_slept" > conftest.file
+     set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
+     if test "$*" = "X"; then
+	# -L didn't work.
+	set X `ls -t "$srcdir/configure" conftest.file`
+     fi
+     if test "$*" != "X $srcdir/configure conftest.file" \
+	&& test "$*" != "X conftest.file $srcdir/configure"; then
+
+	# If neither matched, then we have a broken ls.  This can happen
+	# if, for instance, CONFIG_SHELL is bash and it inherits a
+	# broken ls alias from the environment.  This has actually
+	# happened.  Such a system could not be considered "sane".
+	as_fn_error $? "ls -t appears to fail.  Make sure there is not a broken
+  alias in your environment" "$LINENO" 5
+     fi
+     if test "$2" = conftest.file || test $am_try -eq 2; then
+       break
+     fi
+     # Just in case.
+     sleep 1
+     am_has_slept=yes
+   done
+   test "$2" = conftest.file
+   )
+then
+   # Ok.
+   :
+else
+   as_fn_error $? "newly created file is older than distributed files!
+Check your system clock" "$LINENO" 5
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+# If we didn't sleep, we still need to ensure time stamps of config.status and
+# generated files are strictly newer.
+am_sleep_pid=
+if grep 'slept: no' conftest.file >/dev/null 2>&1; then
+  ( sleep 1 ) &
+  am_sleep_pid=$!
+fi
+
+rm -f conftest.file
+
+test "$program_prefix" != NONE &&
+  program_transform_name="s&^&$program_prefix&;$program_transform_name"
+# Use a double $ so make ignores it.
+test "$program_suffix" != NONE &&
+  program_transform_name="s&\$&$program_suffix&;$program_transform_name"
+# Double any \ or $.
+# By default was `s,x,x', remove it if useless.
+ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
+program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
+
+if test x"${MISSING+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
+  *)
+    MISSING="\${SHELL} $am_aux_dir/missing" ;;
+  esac
+fi
+# Use eval to expand $SHELL
+if eval "$MISSING --is-lightweight"; then
+  am_missing_run="$MISSING "
+else
+  am_missing_run=
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5
+$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
+fi
+
+if test x"${install_sh+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
+  *)
+    install_sh="\${SHELL} $am_aux_dir/install-sh"
+  esac
+fi
+
+# Installed binaries are usually stripped using 'strip' when the user
+# run "make install-strip".  However 'strip' might not be the right
+# tool to use in cross-compilation environments, therefore Automake
+# will honor the 'STRIP' environment variable to overrule this program.
+if test "$cross_compiling" != no; then
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args.
+set dummy ${ac_tool_prefix}strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_STRIP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$STRIP"; then
+  ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_STRIP="${ac_tool_prefix}strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+STRIP=$ac_cv_prog_STRIP
+if test -n "$STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5
+$as_echo "$STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_STRIP"; then
+  ac_ct_STRIP=$STRIP
+  # Extract the first word of "strip", so it can be a program name with args.
+set dummy strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_STRIP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_STRIP"; then
+  ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_STRIP="strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP
+if test -n "$ac_ct_STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5
+$as_echo "$ac_ct_STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_STRIP" = x; then
+    STRIP=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    STRIP=$ac_ct_STRIP
+  fi
+else
+  STRIP="$ac_cv_prog_STRIP"
+fi
+
+fi
+INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5
+$as_echo_n "checking for a thread-safe mkdir -p... " >&6; }
+if test -z "$MKDIR_P"; then
+  if ${ac_cv_path_mkdir+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in mkdir gmkdir; do
+	 for ac_exec_ext in '' $ac_executable_extensions; do
+	   as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue
+	   case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
+	     'mkdir (GNU coreutils) '* | \
+	     'mkdir (coreutils) '* | \
+	     'mkdir (fileutils) '4.1*)
+	       ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext
+	       break 3;;
+	   esac
+	 done
+       done
+  done
+IFS=$as_save_IFS
+
+fi
+
+  test -d ./--version && rmdir ./--version
+  if test "${ac_cv_path_mkdir+set}" = set; then
+    MKDIR_P="$ac_cv_path_mkdir -p"
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for MKDIR_P within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    MKDIR_P="$ac_install_sh -d"
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5
+$as_echo "$MKDIR_P" >&6; }
+
+for ac_prog in gawk mawk nawk awk
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_AWK+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AWK"; then
+  ac_cv_prog_AWK="$AWK" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_AWK="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AWK=$ac_cv_prog_AWK
+if test -n "$AWK"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5
+$as_echo "$AWK" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$AWK" && break
+done
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5
+$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; }
+set x ${MAKE-make}
+ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'`
+if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.make <<\_ACEOF
+SHELL = /bin/sh
+all:
+	@echo '@@@%%%=$(MAKE)=@@@%%%'
+_ACEOF
+# GNU make sometimes prints "make[1]: Entering ...", which would confuse us.
+case `${MAKE-make} -f conftest.make 2>/dev/null` in
+  *@@@%%%=?*=@@@%%%*)
+    eval ac_cv_prog_make_${ac_make}_set=yes;;
+  *)
+    eval ac_cv_prog_make_${ac_make}_set=no;;
+esac
+rm -f conftest.make
+fi
+if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+  SET_MAKE=
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+  SET_MAKE="MAKE=${MAKE-make}"
+fi
+
+rm -rf .tst 2>/dev/null
+mkdir .tst 2>/dev/null
+if test -d .tst; then
+  am__leading_dot=.
+else
+  am__leading_dot=_
+fi
+rmdir .tst 2>/dev/null
+
+DEPDIR="${am__leading_dot}deps"
+
+ac_config_commands="$ac_config_commands depfiles"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5
+$as_echo_n "checking whether ${MAKE-make} supports the include directive... " >&6; }
+cat > confinc.mk << 'END'
+am__doit:
+	@echo this is the am__doit target >confinc.out
+.PHONY: am__doit
+END
+am__include="#"
+am__quote=
+# BSD make does it like this.
+echo '.include "confinc.mk" # ignored' > confmf.BSD
+# Other make implementations (GNU, Solaris 10, AIX) do it like this.
+echo 'include confinc.mk # ignored' > confmf.GNU
+_am_result=no
+for s in GNU BSD; do
+  { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5
+   (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); }
+  case $?:`cat confinc.out 2>/dev/null` in #(
+  '0:this is the am__doit target') :
+    case $s in #(
+  BSD) :
+    am__include='.include' am__quote='"' ;; #(
+  *) :
+    am__include='include' am__quote='' ;;
+esac ;; #(
+  *) :
+     ;;
+esac
+  if test "$am__include" != "#"; then
+    _am_result="yes ($s style)"
+    break
+  fi
+done
+rm -f confinc.* confmf.*
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5
+$as_echo "${_am_result}" >&6; }
+
+# Check whether --enable-dependency-tracking was given.
+if test "${enable_dependency_tracking+set}" = set; then :
+  enableval=$enable_dependency_tracking;
+fi
+
+if test "x$enable_dependency_tracking" != xno; then
+  am_depcomp="$ac_aux_dir/depcomp"
+  AMDEPBACKSLASH='\'
+  am__nodep='_no'
+fi
+ if test "x$enable_dependency_tracking" != xno; then
+  AMDEP_TRUE=
+  AMDEP_FALSE='#'
+else
+  AMDEP_TRUE='#'
+  AMDEP_FALSE=
+fi
+
+
+# Check whether --enable-silent-rules was given.
+if test "${enable_silent_rules+set}" = set; then :
+  enableval=$enable_silent_rules;
+fi
+
+case $enable_silent_rules in # (((
+  yes) AM_DEFAULT_VERBOSITY=0;;
+   no) AM_DEFAULT_VERBOSITY=1;;
+    *) AM_DEFAULT_VERBOSITY=1;;
+esac
+am_make=${MAKE-make}
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5
+$as_echo_n "checking whether $am_make supports nested variables... " >&6; }
+if ${am_cv_make_support_nested_variables+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if $as_echo 'TRUE=$(BAR$(V))
+BAR0=false
+BAR1=true
+V=1
+am__doit:
+	@$(TRUE)
+.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then
+  am_cv_make_support_nested_variables=yes
+else
+  am_cv_make_support_nested_variables=no
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5
+$as_echo "$am_cv_make_support_nested_variables" >&6; }
+if test $am_cv_make_support_nested_variables = yes; then
+    AM_V='$(V)'
+  AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)'
+else
+  AM_V=$AM_DEFAULT_VERBOSITY
+  AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY
+fi
+AM_BACKSLASH='\'
+
+if test "`cd $srcdir && pwd`" != "`pwd`"; then
+  # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
+  # is not polluted with repeated "-I."
+  am__isrc=' -I$(srcdir)'
+  # test to see if srcdir already configured
+  if test -f $srcdir/config.status; then
+    as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5
+  fi
+fi
+
+# test whether we have cygpath
+if test -z "$CYGPATH_W"; then
+  if (cygpath --version) >/dev/null 2>/dev/null; then
+    CYGPATH_W='cygpath -w'
+  else
+    CYGPATH_W=echo
+  fi
+fi
+
+
+# Define the identity of the package.
+ PACKAGE='hpl'
+ VERSION='2.3'
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE "$PACKAGE"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define VERSION "$VERSION"
+_ACEOF
+
+# Some tools Automake needs.
+
+ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"}
+
+
+AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"}
+
+
+AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"}
+
+
+AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"}
+
+
+MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
+
+# For better backward compatibility.  To be removed once Automake 1.9.x
+# dies out for good.  For more background, see:
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
+mkdir_p='$(MKDIR_P)'
+
+# We need awk for the "check" target (and possibly the TAP driver).  The
+# system "awk" is bad on some platforms.
+# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AMTAR='$${TAR-tar}'
+
+
+# We'll loop over all known methods to create a tar archive until one works.
+_am_tools='gnutar  pax cpio none'
+
+am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
+
+
+
+
+
+depcc="$CC"   am_compiler_list=
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5
+$as_echo_n "checking dependency style of $depcc... " >&6; }
+if ${am_cv_CC_dependencies_compiler_type+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
+  # We make a subdir and do the tests there.  Otherwise we can end up
+  # making bogus files that we don't know about and never remove.  For
+  # instance it was reported that on HP-UX the gcc test will end up
+  # making a dummy file named 'D' -- because '-MD' means "put the output
+  # in D".
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  # Copy depcomp to subdir because otherwise we won't find it if we're
+  # using a relative directory.
+  cp "$am_depcomp" conftest.dir
+  cd conftest.dir
+  # We will build objects and dependencies in a subdirectory because
+  # it helps to detect inapplicable dependency modes.  For instance
+  # both Tru64's cc and ICC support -MD to output dependencies as a
+  # side effect of compilation, but ICC will put the dependencies in
+  # the current directory while Tru64 will put them in the object
+  # directory.
+  mkdir sub
+
+  am_cv_CC_dependencies_compiler_type=none
+  if test "$am_compiler_list" = ""; then
+     am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp`
+  fi
+  am__universal=false
+  case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac
+
+  for depmode in $am_compiler_list; do
+    # Setup a source with many dependencies, because some compilers
+    # like to wrap large dependency lists on column 80 (with \), and
+    # we should not choose a depcomp mode which is confused by this.
+    #
+    # We need to recreate these files for each test, as the compiler may
+    # overwrite some of them when testing with obscure command lines.
+    # This happens at least with the AIX C compiler.
+    : > sub/conftest.c
+    for i in 1 2 3 4 5 6; do
+      echo '#include "conftst'$i'.h"' >> sub/conftest.c
+      # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with
+      # Solaris 10 /bin/sh.
+      echo '/* dummy */' > sub/conftst$i.h
+    done
+    echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
+
+    # We check with '-c' and '-o' for the sake of the "dashmstdout"
+    # mode.  It turns out that the SunPro C++ compiler does not properly
+    # handle '-M -o', and we need to detect this.  Also, some Intel
+    # versions had trouble with output in subdirs.
+    am__obj=sub/conftest.${OBJEXT-o}
+    am__minus_obj="-o $am__obj"
+    case $depmode in
+    gcc)
+      # This depmode causes a compiler race in universal mode.
+      test "$am__universal" = false || continue
+      ;;
+    nosideeffect)
+      # After this tag, mechanisms are not by side-effect, so they'll
+      # only be used when explicitly requested.
+      if test "x$enable_dependency_tracking" = xyes; then
+	continue
+      else
+	break
+      fi
+      ;;
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
+      # This compiler won't grok '-c -o', but also, the minuso test has
+      # not run yet.  These depmodes are late enough in the game, and
+      # so weak that their functioning should not be impacted.
+      am__obj=conftest.${OBJEXT-o}
+      am__minus_obj=
+      ;;
+    none) break ;;
+    esac
+    if depmode=$depmode \
+       source=sub/conftest.c object=$am__obj \
+       depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
+       $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
+         >/dev/null 2>conftest.err &&
+       grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
+       ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
+      # icc doesn't choke on unknown options, it will just issue warnings
+      # or remarks (even with -Werror).  So we grep stderr for any message
+      # that says an option was ignored or not supported.
+      # When given -MP, icc 7.0 and 7.1 complain thusly:
+      #   icc: Command line warning: ignoring option '-M'; no argument required
+      # The diagnosis changed in icc 8.0:
+      #   icc: Command line remark: option '-MP' not supported
+      if (grep 'ignoring option' conftest.err ||
+          grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
+        am_cv_CC_dependencies_compiler_type=$depmode
+        break
+      fi
+    fi
+  done
+
+  cd ..
+  rm -rf conftest.dir
+else
+  am_cv_CC_dependencies_compiler_type=none
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5
+$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; }
+CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type
+
+ if
+  test "x$enable_dependency_tracking" != xno \
+  && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then
+  am__fastdepCC_TRUE=
+  am__fastdepCC_FALSE='#'
+else
+  am__fastdepCC_TRUE='#'
+  am__fastdepCC_FALSE=
+fi
+
+
+
+# POSIX will say in a future version that running "rm -f" with no argument
+# is OK; and we want to be able to make that assumption in our Makefile
+# recipes.  So use an aggressive probe to check that the usage we want is
+# actually supported "in the wild" to an acceptable degree.
+# See automake bug#10828.
+# To make any issue more visible, cause the running configure to be aborted
+# by default if the 'rm' program in use doesn't match our expectations; the
+# user can still override this though.
+if rm -f && rm -fr && rm -rf; then : OK; else
+  cat >&2 <<'END'
+Oops!
+
+Your 'rm' program seems unable to run without file operands specified
+on the command line, even when the '-f' option is present.  This is contrary
+to the behaviour of most rm programs out there, and not conforming with
+the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
+
+Please tell bug-automake@gnu.org about your system, including the value
+of your $PATH and any error possibly output before this message.  This
+can help us improve future automake versions.
+
+END
+  if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
+    echo 'Configuration will proceed anyway, since you have set the' >&2
+    echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
+    echo >&2
+  else
+    cat >&2 <<'END'
+Aborting the configuration process, to ensure you take notice of the issue.
+
+You can download and install GNU coreutils to get an 'rm' implementation
+that behaves properly: <https://www.gnu.org/software/coreutils/>.
+
+If you want to complete the configuration process using your problematic
+'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
+to "yes", and re-run configure.
+
+END
+    as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5
+  fi
+fi
+
+
+
+
+
+
+
+
+
+hpl_blas_ok=no
+
+
+current_LIBS="$LIBS"
+
+cat <<HPLEOF > hplvars.txt
+name1=OpenBLAS
+rout1=dgemm_
+libs1=-lopenblas -lm
+
+name2=Atlas Fortran BLAS
+rout2=dgemm_
+libs2=-lf77blas -latlas
+
+name3=Sequential Intel MKL LP64 (group)
+rout3=dgemm_
+libs3=-Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread
+
+name4=Sequential Intel MKL LP64
+rout4=dgemm_
+libs4=-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
+
+name5=AMD's ACML
+rout5=dgemm_
+libs5=-lacml -lm
+
+name6=Accelerate
+rout6=dgemm_
+libs6=-framework Accelerate
+
+name7=Apple VecLib
+rout7=dgemm_
+libs7=-framework vecLib
+
+name8=IBM ESSL
+rout8=dgemm_
+libs8=-lessl
+
+name9=NVIDIA nvblas
+rout9=dgemm_
+libs9=-lnvblas
+
+name10=Generic BLAS
+rout10=dgemm_
+libs10=-lblas
+
+HPLEOF
+for hpl_i in 1 2 3 4 5 6 7 8 9 10;
+do
+if test  x$hpl_blas_ok = xno; then
+  name="`grep ^name${hpl_i}= hplvars.txt | sed s/^name${hpl_i}=//`"
+  rout="`grep ^rout${hpl_i}= hplvars.txt | sed s/^rout${hpl_i}=//`"
+  libs="`grep ^libs${hpl_i}= hplvars.txt | sed s/^libs${hpl_i}=//`"
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $rout in $name" >&5
+$as_echo_n "checking for $rout in $name... " >&6; }
+
+  LIBS="$libs"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $rout ();
+int
+main ()
+{
+return $rout ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  hpl_blas_ok=yes;BLAS_LIBS="$libs"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  LIBS="$current_LIBS"
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hpl_blas_ok" >&5
+$as_echo "$hpl_blas_ok" >&6; }
+fi
+done
+rm hplvars.txt
+
+if test  x$hpl_blas_ok = xno; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dgemm_ in OpenBLAS" >&5
+$as_echo_n "checking for dgemm_ in OpenBLAS... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dgemm_ in -lopenblas" >&5
+$as_echo_n "checking for dgemm_ in -lopenblas... " >&6; }
+if ${ac_cv_lib_openblas_dgemm_+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lopenblas  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dgemm_ ();
+int
+main ()
+{
+return dgemm_ ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_openblas_dgemm_=yes
+else
+  ac_cv_lib_openblas_dgemm_=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_openblas_dgemm_" >&5
+$as_echo "$ac_cv_lib_openblas_dgemm_" >&6; }
+if test "x$ac_cv_lib_openblas_dgemm_" = xyes; then :
+  hpl_blas_ok=yes;BLAS_LIBS="-lopenblas"
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hpl_blas_ok" >&5
+$as_echo "$hpl_blas_ok" >&6; }
+fi
+
+
+
+# If present, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$hpl_blas_ok" = xyes; then
+        LIBS="$BLAS_LIBS $LIBS"
+        :
+else
+        hpl_blas_ok=no
+        as_fn_error $? "BLAS not found" "$LINENO" 5
+fi
+
+
+
+
+for ac_func in dgemm_
+do :
+  ac_fn_c_check_func "$LINENO" "dgemm_" "ac_cv_func_dgemm_"
+if test "x$ac_cv_func_dgemm_" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_DGEMM_ 1
+_ACEOF
+
+fi
+done
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
+$as_echo_n "checking how to run the C preprocessor... " >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+  CPP=
+fi
+if test -z "$CPP"; then
+  if ${ac_cv_prog_CPP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+      # Double quotes because CPP needs to be expanded
+    for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+    do
+      ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+  break
+fi
+
+    done
+    ac_cv_prog_CPP=$CPP
+
+fi
+  CPP=$ac_cv_prog_CPP
+else
+  ac_cv_prog_CPP=$CPP
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
+$as_echo "$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if ${ac_cv_path_GREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$GREP"; then
+  ac_path_GREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in grep ggrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_GREP" || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+  # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'GREP' >> "conftest.nl"
+    "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_GREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_GREP="$ac_path_GREP"
+      ac_path_GREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_GREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_GREP"; then
+    as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+$as_echo_n "checking for egrep... " >&6; }
+if ${ac_cv_path_EGREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+   then ac_cv_path_EGREP="$GREP -E"
+   else
+     if test -z "$EGREP"; then
+  ac_path_EGREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in egrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_EGREP" || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+  # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'EGREP' >> "conftest.nl"
+    "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_EGREP="$ac_path_EGREP"
+      ac_path_EGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_EGREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_EGREP"; then
+    as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_EGREP=$EGREP
+fi
+
+   fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+$as_echo "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
+$as_echo_n "checking for ANSI C header files... " >&6; }
+if ${ac_cv_header_stdc+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_header_stdc=yes
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+  # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "memchr" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "free" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+  if test "$cross_compiling" = yes; then :
+  :
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+		   (('a' <= (c) && (c) <= 'i') \
+		     || ('j' <= (c) && (c) <= 'r') \
+		     || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    if (XOR (islower (i), ISLOWER (i))
+	|| toupper (i) != TOUPPER (i))
+      return 2;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
+$as_echo "$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+$as_echo "#define STDC_HEADERS 1" >>confdefs.h
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+		  inttypes.h stdint.h unistd.h
+do :
+  as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
+"
+if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
+  cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_header in mpi.h
+do :
+  ac_fn_c_check_header_mongrel "$LINENO" "mpi.h" "ac_cv_header_mpi_h" "$ac_includes_default"
+if test "x$ac_cv_header_mpi_h" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_MPI_H 1
+_ACEOF
+
+fi
+
+done
+
+
+ac_config_files="$ac_config_files Makefile src/Makefile testing/Makefile"
+
+
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems.  If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+  for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+
+  (set) 2>&1 |
+    case $as_nl`(ac_space=' '; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      # `set' does not quote correctly, so add quotes: double-quote
+      # substitution turns \\\\ into \\, and sed turns \\ into \.
+      sed -n \
+	"s/'/'\\\\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+      ;; #(
+    *)
+      # `set' quotes correctly as required by POSIX, so do not add quotes.
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+) |
+  sed '
+     /^ac_cv_env_/b end
+     t clear
+     :clear
+     s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+     t end
+     s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+     :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+  if test -w "$cache_file"; then
+    if test "x$cache_file" != "x/dev/null"; then
+      { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
+$as_echo "$as_me: updating cache $cache_file" >&6;}
+      if test ! -f "$cache_file" || test -h "$cache_file"; then
+	cat confcache >"$cache_file"
+      else
+        case $cache_file in #(
+        */* | ?:*)
+	  mv -f confcache "$cache_file"$$ &&
+	  mv -f "$cache_file"$$ "$cache_file" ;; #(
+        *)
+	  mv -f confcache "$cache_file" ;;
+	esac
+      fi
+    fi
+  else
+    { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
+$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
+  fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+DEFS=-DHAVE_CONFIG_H
+
+ac_libobjs=
+ac_ltlibobjs=
+U=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+  # 1. Remove the extension, and $U if already installed.
+  ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+  ac_i=`$as_echo "$ac_i" | sed "$ac_script"`
+  # 2. Prepend LIBOBJDIR.  When used with automake>=1.10 LIBOBJDIR
+  #    will be set to the directory where LIBOBJS objects are built.
+  as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+  as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5
+$as_echo_n "checking that generated files are newer than configure... " >&6; }
+   if test -n "$am_sleep_pid"; then
+     # Hide warnings about reused PIDs.
+     wait $am_sleep_pid 2>/dev/null
+   fi
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5
+$as_echo "done" >&6; }
+if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then
+  as_fn_error $? "conditional \"AMDEP\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then
+  as_fn_error $? "conditional \"am__fastdepCC\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+ if test -n "$EXEEXT"; then
+  am__EXEEXT_TRUE=
+  am__EXEEXT_FALSE='#'
+else
+  am__EXEEXT_TRUE='#'
+  am__EXEEXT_FALSE=
+fi
+
+
+: "${CONFIG_STATUS=./config.status}"
+ac_write_fail=0
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5
+$as_echo "$as_me: creating $CONFIG_STATUS" >&6;}
+as_write_fail=0
+cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+
+SHELL=\${CONFIG_SHELL-$SHELL}
+export SHELL
+_ASEOF
+cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+    && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='print -r --'
+  as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='printf %s\n'
+  as_echo_n='printf %s'
+else
+  if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+    as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+    as_echo_n='/usr/ucb/echo -n'
+  else
+    as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+    as_echo_n_body='eval
+      arg=$1;
+      case $arg in #(
+      *"$as_nl"*)
+	expr "X$arg" : "X\\(.*\\)$as_nl";
+	arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+      esac;
+      expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+    '
+    export as_echo_n_body
+    as_echo_n='sh -c $as_echo_n_body as_echo'
+  fi
+  export as_echo_body
+  as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh).  But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there.  '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+  fi
+  $as_echo "$as_me: error: $2" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else
+  as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else
+  as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  }
+fi # as_fn_arith
+
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='	';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -pR'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -pR'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -pR'
+  fi
+else
+  as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+## ----------------------------------- ##
+## Main body of $CONFIG_STATUS script. ##
+## ----------------------------------- ##
+_ASEOF
+test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# Save the log message, to keep $0 and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by hpl $as_me 2.3, which was
+generated by GNU Autoconf 2.69.  Invocation command line was
+
+  CONFIG_FILES    = $CONFIG_FILES
+  CONFIG_HEADERS  = $CONFIG_HEADERS
+  CONFIG_LINKS    = $CONFIG_LINKS
+  CONFIG_COMMANDS = $CONFIG_COMMANDS
+  $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+case $ac_config_files in *"
+"*) set x $ac_config_files; shift; ac_config_files=$*;;
+esac
+
+case $ac_config_headers in *"
+"*) set x $ac_config_headers; shift; ac_config_headers=$*;;
+esac
+
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+# Files that config.status was made for.
+config_files="$ac_config_files"
+config_headers="$ac_config_headers"
+config_commands="$ac_config_commands"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+ac_cs_usage="\
+\`$as_me' instantiates files and other configuration actions
+from templates according to the current configuration.  Unless the files
+and actions are specified as TAGs, all are instantiated by default.
+
+Usage: $0 [OPTION]... [TAG]...
+
+  -h, --help       print this help, then exit
+  -V, --version    print version number and configuration settings, then exit
+      --config     print configuration, then exit
+  -q, --quiet, --silent
+                   do not print progress messages
+  -d, --debug      don't remove temporary files
+      --recheck    update $as_me by reconfiguring in the same conditions
+      --file=FILE[:TEMPLATE]
+                   instantiate the configuration file FILE
+      --header=FILE[:TEMPLATE]
+                   instantiate the configuration header FILE
+
+Configuration files:
+$config_files
+
+Configuration headers:
+$config_headers
+
+Configuration commands:
+$config_commands
+
+Report bugs to <hpl@icl.utk.edu>."
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
+ac_cs_version="\\
+hpl config.status 2.3
+configured by $0, generated by GNU Autoconf 2.69,
+  with options \\"\$ac_cs_config\\"
+
+Copyright (C) 2012 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+INSTALL='$INSTALL'
+MKDIR_P='$MKDIR_P'
+AWK='$AWK'
+test -n "\$AWK" || AWK=awk
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# The default lists apply if the user does not specify any file.
+ac_need_defaults=:
+while test $# != 0
+do
+  case $1 in
+  --*=?*)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+    ac_shift=:
+    ;;
+  --*=)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=
+    ac_shift=:
+    ;;
+  *)
+    ac_option=$1
+    ac_optarg=$2
+    ac_shift=shift
+    ;;
+  esac
+
+  case $ac_option in
+  # Handling of the options.
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    ac_cs_recheck=: ;;
+  --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+    $as_echo "$ac_cs_version"; exit ;;
+  --config | --confi | --conf | --con | --co | --c )
+    $as_echo "$ac_cs_config"; exit ;;
+  --debug | --debu | --deb | --de | --d | -d )
+    debug=: ;;
+  --file | --fil | --fi | --f )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    '') as_fn_error $? "missing file argument" ;;
+    esac
+    as_fn_append CONFIG_FILES " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --header | --heade | --head | --hea )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    as_fn_append CONFIG_HEADERS " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --he | --h)
+    # Conflict between --help and --header
+    as_fn_error $? "ambiguous option: \`$1'
+Try \`$0 --help' for more information.";;
+  --help | --hel | -h )
+    $as_echo "$ac_cs_usage"; exit ;;
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil | --si | --s)
+    ac_cs_silent=: ;;
+
+  # This is an error.
+  -*) as_fn_error $? "unrecognized option: \`$1'
+Try \`$0 --help' for more information." ;;
+
+  *) as_fn_append ac_config_targets " $1"
+     ac_need_defaults=false ;;
+
+  esac
+  shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+  exec 6>/dev/null
+  ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+if \$ac_cs_recheck; then
+  set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+  shift
+  \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
+  CONFIG_SHELL='$SHELL'
+  export CONFIG_SHELL
+  exec "\$@"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+  $as_echo "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+#
+# INIT-COMMANDS
+#
+AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+  case $ac_config_target in
+    "include/hplconfig.h") CONFIG_HEADERS="$CONFIG_HEADERS include/hplconfig.h" ;;
+    "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;;
+    "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+    "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;;
+    "testing/Makefile") CONFIG_FILES="$CONFIG_FILES testing/Makefile" ;;
+
+  *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+  esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used.  Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+  test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+  test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
+fi
+
+# Have a temporary directory for convenience.  Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+  tmp= ac_tmp=
+  trap 'exit_status=$?
+  : "${ac_tmp:=$tmp}"
+  { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
+' 0
+  trap 'as_fn_exit 1' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+  tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+  test -d "$tmp"
+}  ||
+{
+  tmp=./conf$$-$RANDOM
+  (umask 077 && mkdir "$tmp")
+} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+ac_tmp=$tmp
+
+# Set up the scripts for CONFIG_FILES section.
+# No need to generate them if there are no CONFIG_FILES.
+# This happens for instance with `./config.status config.h'.
+if test -n "$CONFIG_FILES"; then
+
+
+ac_cr=`echo X | tr X '\015'`
+# On cygwin, bash can eat \r inside `` if the user requested igncr.
+# But we know of no other shell where ac_cr would be empty at this
+# point, so we can use a bashism as a fallback.
+if test "x$ac_cr" = x; then
+  eval ac_cr=\$\'\\r\'
+fi
+ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
+if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
+  ac_cs_awk_cr='\\r'
+else
+  ac_cs_awk_cr=$ac_cr
+fi
+
+echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
+_ACEOF
+
+
+{
+  echo "cat >conf$$subs.awk <<_ACEOF" &&
+  echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
+  echo "_ACEOF"
+} >conf$$subs.sh ||
+  as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  . ./conf$$subs.sh ||
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+
+  ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
+  if test $ac_delim_n = $ac_delim_num; then
+    break
+  elif $ac_last_try; then
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+rm -f conf$$subs.sh
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
+_ACEOF
+sed -n '
+h
+s/^/S["/; s/!.*/"]=/
+p
+g
+s/^[^!]*!//
+:repl
+t repl
+s/'"$ac_delim"'$//
+t delim
+:nl
+h
+s/\(.\{148\}\)..*/\1/
+t more1
+s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
+p
+n
+b repl
+:more1
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t nl
+:delim
+h
+s/\(.\{148\}\)..*/\1/
+t more2
+s/["\\]/\\&/g; s/^/"/; s/$/"/
+p
+b
+:more2
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t delim
+' <conf$$subs.awk | sed '
+/^[^""]/{
+  N
+  s/\n//
+}
+' >>$CONFIG_STATUS || ac_write_fail=1
+rm -f conf$$subs.awk
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACAWK
+cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
+  for (key in S) S_is_set[key] = 1
+  FS = ""
+
+}
+{
+  line = $ 0
+  nfields = split(line, field, "@")
+  substed = 0
+  len = length(field[1])
+  for (i = 2; i < nfields; i++) {
+    key = field[i]
+    keylen = length(key)
+    if (S_is_set[key]) {
+      value = S[key]
+      line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3)
+      len += length(value) + length(field[++i])
+      substed = 1
+    } else
+      len += 1 + keylen
+  }
+
+  print line
+}
+
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
+  sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
+else
+  cat
+fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
+  || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
+_ACEOF
+
+# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
+# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+  ac_vpsub='/^[	 ]*VPATH[	 ]*=[	 ]*/{
+h
+s///
+s/^/:/
+s/[	 ]*$/:/
+s/:\$(srcdir):/:/g
+s/:\${srcdir}:/:/g
+s/:@srcdir@:/:/g
+s/^:*//
+s/:*$//
+x
+s/\(=[	 ]*\).*/\1/
+G
+s/\n//
+s/^[^=]*=[	 ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+fi # test -n "$CONFIG_FILES"
+
+# Set up the scripts for CONFIG_HEADERS section.
+# No need to generate them if there are no CONFIG_HEADERS.
+# This happens for instance with `./config.status Makefile'.
+if test -n "$CONFIG_HEADERS"; then
+cat >"$ac_tmp/defines.awk" <<\_ACAWK ||
+BEGIN {
+_ACEOF
+
+# Transform confdefs.h into an awk script `defines.awk', embedded as
+# here-document in config.status, that substitutes the proper values into
+# config.h.in to produce config.h.
+
+# Create a delimiter string that does not exist in confdefs.h, to ease
+# handling of long lines.
+ac_delim='%!_!# '
+for ac_last_try in false false :; do
+  ac_tt=`sed -n "/$ac_delim/p" confdefs.h`
+  if test -z "$ac_tt"; then
+    break
+  elif $ac_last_try; then
+    as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+# For the awk script, D is an array of macro values keyed by name,
+# likewise P contains macro parameters if any.  Preserve backslash
+# newline sequences.
+
+ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
+sed -n '
+s/.\{148\}/&'"$ac_delim"'/g
+t rset
+:rset
+s/^[	 ]*#[	 ]*define[	 ][	 ]*/ /
+t def
+d
+:def
+s/\\$//
+t bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[	 ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3"/p
+s/^ \('"$ac_word_re"'\)[	 ]*\(.*\)/D["\1"]=" \2"/p
+d
+:bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[	 ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3\\\\\\n"\\/p
+t cont
+s/^ \('"$ac_word_re"'\)[	 ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p
+t cont
+d
+:cont
+n
+s/.\{148\}/&'"$ac_delim"'/g
+t clear
+:clear
+s/\\$//
+t bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/"/p
+d
+:bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p
+b cont
+' <confdefs.h | sed '
+s/'"$ac_delim"'/"\\\
+"/g' >>$CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  for (key in D) D_is_set[key] = 1
+  FS = ""
+}
+/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ {
+  line = \$ 0
+  split(line, arg, " ")
+  if (arg[1] == "#") {
+    defundef = arg[2]
+    mac1 = arg[3]
+  } else {
+    defundef = substr(arg[1], 2)
+    mac1 = arg[2]
+  }
+  split(mac1, mac2, "(") #)
+  macro = mac2[1]
+  prefix = substr(line, 1, index(line, defundef) - 1)
+  if (D_is_set[macro]) {
+    # Preserve the white space surrounding the "#".
+    print prefix "define", macro P[macro] D[macro]
+    next
+  } else {
+    # Replace #undef with comments.  This is necessary, for example,
+    # in the case of _POSIX_SOURCE, which is predefined and required
+    # on some systems where configure will not decide to define it.
+    if (defundef == "undef") {
+      print "/*", prefix defundef, macro, "*/"
+      next
+    }
+  }
+}
+{ print }
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+  as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
+fi # test -n "$CONFIG_HEADERS"
+
+
+eval set X "  :F $CONFIG_FILES  :H $CONFIG_HEADERS    :C $CONFIG_COMMANDS"
+shift
+for ac_tag
+do
+  case $ac_tag in
+  :[FHLC]) ac_mode=$ac_tag; continue;;
+  esac
+  case $ac_mode$ac_tag in
+  :[FHL]*:*);;
+  :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
+  :[FH]-) ac_tag=-:-;;
+  :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+  esac
+  ac_save_IFS=$IFS
+  IFS=:
+  set x $ac_tag
+  IFS=$ac_save_IFS
+  shift
+  ac_file=$1
+  shift
+
+  case $ac_mode in
+  :L) ac_source=$1;;
+  :[FH])
+    ac_file_inputs=
+    for ac_f
+    do
+      case $ac_f in
+      -) ac_f="$ac_tmp/stdin";;
+      *) # Look for the file first in the build tree, then in the source tree
+	 # (if the path is not absolute).  The absolute path cannot be DOS-style,
+	 # because $ac_f cannot contain `:'.
+	 test -f "$ac_f" ||
+	   case $ac_f in
+	   [\\/$]*) false;;
+	   *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+	   esac ||
+	   as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+      esac
+      case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
+      as_fn_append ac_file_inputs " '$ac_f'"
+    done
+
+    # Let's still pretend it is `configure' which instantiates (i.e., don't
+    # use $as_me), people would be surprised to read:
+    #    /* config.h.  Generated by config.status.  */
+    configure_input='Generated from '`
+	  $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'
+	`' by configure.'
+    if test x"$ac_file" != x-; then
+      configure_input="$ac_file.  $configure_input"
+      { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
+$as_echo "$as_me: creating $ac_file" >&6;}
+    fi
+    # Neutralize special characters interpreted by sed in replacement strings.
+    case $configure_input in #(
+    *\&* | *\|* | *\\* )
+       ac_sed_conf_input=`$as_echo "$configure_input" |
+       sed 's/[\\\\&|]/\\\\&/g'`;; #(
+    *) ac_sed_conf_input=$configure_input;;
+    esac
+
+    case $ac_tag in
+    *:-:* | *:-) cat >"$ac_tmp/stdin" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
+    esac
+    ;;
+  esac
+
+  ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$ac_file" : 'X\(//\)[^/]' \| \
+	 X"$ac_file" : 'X\(//\)$' \| \
+	 X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  as_dir="$ac_dir"; as_fn_mkdir_p
+  ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+  case $ac_mode in
+  :F)
+  #
+  # CONFIG_FILE
+  #
+
+  case $INSTALL in
+  [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
+  *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;;
+  esac
+  ac_MKDIR_P=$MKDIR_P
+  case $MKDIR_P in
+  [\\/$]* | ?:[\\/]* ) ;;
+  */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;;
+  esac
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+ac_sed_dataroot='
+/datarootdir/ {
+  p
+  q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p'
+case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  ac_datarootdir_hack='
+  s&@datadir@&$datadir&g
+  s&@docdir@&$docdir&g
+  s&@infodir@&$infodir&g
+  s&@localedir@&$localedir&g
+  s&@mandir@&$mandir&g
+  s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_sed_extra="$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s|@configure_input@|$ac_sed_conf_input|;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@top_build_prefix@&$ac_top_build_prefix&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+s&@INSTALL@&$ac_INSTALL&;t t
+s&@MKDIR_P@&$ac_MKDIR_P&;t t
+$ac_datarootdir_hack
+"
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \
+  >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' \
+      "$ac_tmp/out"`; test -z "$ac_out"; } &&
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined" >&5
+$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined" >&2;}
+
+  rm -f "$ac_tmp/stdin"
+  case $ac_file in
+  -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
+  *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
+  esac \
+  || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ ;;
+  :H)
+  #
+  # CONFIG_HEADER
+  #
+  if test x"$ac_file" != x-; then
+    {
+      $as_echo "/* $configure_input  */" \
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs"
+    } >"$ac_tmp/config.h" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+    if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then
+      { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
+$as_echo "$as_me: $ac_file is unchanged" >&6;}
+    else
+      rm -f "$ac_file"
+      mv "$ac_tmp/config.h" "$ac_file" \
+	|| as_fn_error $? "could not create $ac_file" "$LINENO" 5
+    fi
+  else
+    $as_echo "/* $configure_input  */" \
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \
+      || as_fn_error $? "could not create -" "$LINENO" 5
+  fi
+# Compute "$ac_file"'s index in $config_headers.
+_am_arg="$ac_file"
+_am_stamp_count=1
+for _am_header in $config_headers :; do
+  case $_am_header in
+    $_am_arg | $_am_arg:* )
+      break ;;
+    * )
+      _am_stamp_count=`expr $_am_stamp_count + 1` ;;
+  esac
+done
+echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" ||
+$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$_am_arg" : 'X\(//\)[^/]' \| \
+	 X"$_am_arg" : 'X\(//\)$' \| \
+	 X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$_am_arg" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`/stamp-h$_am_stamp_count
+ ;;
+
+  :C)  { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5
+$as_echo "$as_me: executing $ac_file commands" >&6;}
+ ;;
+  esac
+
+
+  case $ac_file$ac_mode in
+    "depfiles":C) test x"$AMDEP_TRUE" != x"" || {
+  # Older Autoconf quotes --file arguments for eval, but not when files
+  # are listed without --file.  Let's play safe and only enable the eval
+  # if we detect the quoting.
+  # TODO: see whether this extra hack can be removed once we start
+  # requiring Autoconf 2.70 or later.
+  case $CONFIG_FILES in #(
+  *\'*) :
+    eval set x "$CONFIG_FILES" ;; #(
+  *) :
+    set x $CONFIG_FILES ;; #(
+  *) :
+     ;;
+esac
+  shift
+  # Used to flag and report bootstrapping failures.
+  am_rc=0
+  for am_mf
+  do
+    # Strip MF so we end up with the name of the file.
+    am_mf=`$as_echo "$am_mf" | sed -e 's/:.*$//'`
+    # Check whether this is an Automake generated Makefile which includes
+    # dependency-tracking related rules and includes.
+    # Grep'ing the whole file directly is not great: AIX grep has a line
+    # limit of 2048, but all sed's we know have understand at least 4000.
+    sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \
+      || continue
+    am_dirpart=`$as_dirname -- "$am_mf" ||
+$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$am_mf" : 'X\(//\)[^/]' \| \
+	 X"$am_mf" : 'X\(//\)$' \| \
+	 X"$am_mf" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$am_mf" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+    am_filepart=`$as_basename -- "$am_mf" ||
+$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$am_mf" : 'X\(//\)$' \| \
+	 X"$am_mf" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$am_mf" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+    { echo "$as_me:$LINENO: cd "$am_dirpart" \
+      && sed -e '/# am--include-marker/d' "$am_filepart" \
+        | $MAKE -f - am--depfiles" >&5
+   (cd "$am_dirpart" \
+      && sed -e '/# am--include-marker/d' "$am_filepart" \
+        | $MAKE -f - am--depfiles) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); } || am_rc=$?
+  done
+  if test $am_rc -ne 0; then
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "Something went wrong bootstrapping makefile fragments
+    for automatic dependency tracking.  Try re-running configure with the
+    '--disable-dependency-tracking' option to at least be able to build
+    the package (albeit without support for automatic dependency tracking).
+See \`config.log' for more details" "$LINENO" 5; }
+  fi
+  { am_dirpart=; unset am_dirpart;}
+  { am_filepart=; unset am_filepart;}
+  { am_mf=; unset am_mf;}
+  { am_rc=; unset am_rc;}
+  rm -f conftest-deps.mk
+}
+ ;;
+
+  esac
+done # for ac_tag
+
+
+as_fn_exit 0
+_ACEOF
+ac_clean_files=$ac_clean_files_save
+
+test $ac_write_fail = 0 ||
+  as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded.  So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status.  When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+  ac_cs_success=:
+  ac_config_status_args=
+  test "$silent" = yes &&
+    ac_config_status_args="$ac_config_status_args --quiet"
+  exec 5>/dev/null
+  $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+  exec 5>>config.log
+  # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+  # would make configure fail if this is the last instruction.
+  $ac_cs_success || as_fn_exit 1
+fi
+if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
+$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
+fi
+
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/configure.ac b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/configure.ac
new file mode 100644
index 000000000..eb91dc590
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/configure.ac
@@ -0,0 +1,34 @@
+AC_PREREQ([2.69])
+
+AC_INIT(hpl, 2.3, hpl@icl.utk.edu)
+AC_CONFIG_SRCDIR([include/hpl.h])
+AC_CONFIG_HEADERS([include/hplconfig.h])
+
+AX_PROG_CC_MPI
+
+AC_PROG_RANLIB
+
+AC_PROG_INSTALL
+
+AM_INIT_AUTOMAKE([subdir-objects])
+
+AM_PROG_CC_C_O
+
+dnl
+dnl AX_BLAS requires Fortran compiler and detects fortran libraries in $FLIBS
+dnl
+dnl AX_BLAS(LIBS="$BLAS_LIBS $LIBS $FLIBS")
+dnl
+
+HPL_BLAS(LIBS="$BLAS_LIBS $LIBS",AC_MSG_ERROR([BLAS not found]))
+
+dnl FIXME: test for CBLAS: Atlas, MKL, OpenBLAS, ESSL, ...
+dnl FIXME: test for GSL CBLAS
+
+AC_CHECK_FUNCS([dgemm_])
+
+AC_CHECK_HEADERS([mpi.h])
+
+AC_CONFIG_FILES([Makefile src/Makefile testing/Makefile])
+
+AC_OUTPUT
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/depcomp b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/depcomp
new file mode 100755
index 000000000..65cbf7093
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/depcomp
@@ -0,0 +1,791 @@
+#! /bin/sh
+# depcomp - compile a program generating dependencies as side-effects
+
+scriptversion=2018-03-07.03; # UTC
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
+
+case $1 in
+  '')
+    echo "$0: No command.  Try '$0 --help' for more information." 1>&2
+    exit 1;
+    ;;
+  -h | --h*)
+    cat <<\EOF
+Usage: depcomp [--help] [--version] PROGRAM [ARGS]
+
+Run PROGRAMS ARGS to compile a file, generating dependencies
+as side-effects.
+
+Environment variables:
+  depmode     Dependency tracking mode.
+  source      Source file read by 'PROGRAMS ARGS'.
+  object      Object file output by 'PROGRAMS ARGS'.
+  DEPDIR      directory where to store dependencies.
+  depfile     Dependency file to output.
+  tmpdepfile  Temporary file to use when outputting dependencies.
+  libtool     Whether libtool is used (yes/no).
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit $?
+    ;;
+  -v | --v*)
+    echo "depcomp $scriptversion"
+    exit $?
+    ;;
+esac
+
+# Get the directory component of the given path, and save it in the
+# global variables '$dir'.  Note that this directory component will
+# be either empty or ending with a '/' character.  This is deliberate.
+set_dir_from ()
+{
+  case $1 in
+    */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
+      *) dir=;;
+  esac
+}
+
+# Get the suffix-stripped basename of the given path, and save it the
+# global variable '$base'.
+set_base_from ()
+{
+  base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
+}
+
+# If no dependency file was actually created by the compiler invocation,
+# we still have to create a dummy depfile, to avoid errors with the
+# Makefile "include basename.Plo" scheme.
+make_dummy_depfile ()
+{
+  echo "#dummy" > "$depfile"
+}
+
+# Factor out some common post-processing of the generated depfile.
+# Requires the auxiliary global variable '$tmpdepfile' to be set.
+aix_post_process_depfile ()
+{
+  # If the compiler actually managed to produce a dependency file,
+  # post-process it.
+  if test -f "$tmpdepfile"; then
+    # Each line is of the form 'foo.o: dependency.h'.
+    # Do two passes, one to just change these to
+    #   $object: dependency.h
+    # and one to simply output
+    #   dependency.h:
+    # which is needed to avoid the deleted-header problem.
+    { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
+      sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
+    } > "$depfile"
+    rm -f "$tmpdepfile"
+  else
+    make_dummy_depfile
+  fi
+}
+
+# A tabulation character.
+tab='	'
+# A newline character.
+nl='
+'
+# Character ranges might be problematic outside the C locale.
+# These definitions help.
+upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
+lower=abcdefghijklmnopqrstuvwxyz
+digits=0123456789
+alpha=${upper}${lower}
+
+if test -z "$depmode" || test -z "$source" || test -z "$object"; then
+  echo "depcomp: Variables source, object and depmode must be set" 1>&2
+  exit 1
+fi
+
+# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
+depfile=${depfile-`echo "$object" |
+  sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
+tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
+
+rm -f "$tmpdepfile"
+
+# Avoid interferences from the environment.
+gccflag= dashmflag=
+
+# Some modes work just like other modes, but use different flags.  We
+# parameterize here, but still list the modes in the big case below,
+# to make depend.m4 easier to write.  Note that we *cannot* use a case
+# here, because this file can only contain one case statement.
+if test "$depmode" = hp; then
+  # HP compiler uses -M and no extra arg.
+  gccflag=-M
+  depmode=gcc
+fi
+
+if test "$depmode" = dashXmstdout; then
+  # This is just like dashmstdout with a different argument.
+  dashmflag=-xM
+  depmode=dashmstdout
+fi
+
+cygpath_u="cygpath -u -f -"
+if test "$depmode" = msvcmsys; then
+  # This is just like msvisualcpp but w/o cygpath translation.
+  # Just convert the backslash-escaped backslashes to single forward
+  # slashes to satisfy depend.m4
+  cygpath_u='sed s,\\\\,/,g'
+  depmode=msvisualcpp
+fi
+
+if test "$depmode" = msvc7msys; then
+  # This is just like msvc7 but w/o cygpath translation.
+  # Just convert the backslash-escaped backslashes to single forward
+  # slashes to satisfy depend.m4
+  cygpath_u='sed s,\\\\,/,g'
+  depmode=msvc7
+fi
+
+if test "$depmode" = xlc; then
+  # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
+  gccflag=-qmakedep=gcc,-MF
+  depmode=gcc
+fi
+
+case "$depmode" in
+gcc3)
+## gcc 3 implements dependency tracking that does exactly what
+## we want.  Yay!  Note: for some reason libtool 1.4 doesn't like
+## it if -MD -MP comes after the -MF stuff.  Hmm.
+## Unfortunately, FreeBSD c89 acceptance of flags depends upon
+## the command line argument order; so add the flags where they
+## appear in depend2.am.  Note that the slowdown incurred here
+## affects only configure: in makefiles, %FASTDEP% shortcuts this.
+  for arg
+  do
+    case $arg in
+    -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
+    *)  set fnord "$@" "$arg" ;;
+    esac
+    shift # fnord
+    shift # $arg
+  done
+  "$@"
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  mv "$tmpdepfile" "$depfile"
+  ;;
+
+gcc)
+## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
+## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
+## (see the conditional assignment to $gccflag above).
+## There are various ways to get dependency output from gcc.  Here's
+## why we pick this rather obscure method:
+## - Don't want to use -MD because we'd like the dependencies to end
+##   up in a subdir.  Having to rename by hand is ugly.
+##   (We might end up doing this anyway to support other compilers.)
+## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
+##   -MM, not -M (despite what the docs say).  Also, it might not be
+##   supported by the other compilers which use the 'gcc' depmode.
+## - Using -M directly means running the compiler twice (even worse
+##   than renaming).
+  if test -z "$gccflag"; then
+    gccflag=-MD,
+  fi
+  "$@" -Wp,"$gccflag$tmpdepfile"
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  # The second -e expression handles DOS-style file names with drive
+  # letters.
+  sed -e 's/^[^:]*: / /' \
+      -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
+## This next piece of magic avoids the "deleted header file" problem.
+## The problem is that when a header file which appears in a .P file
+## is deleted, the dependency causes make to die (because there is
+## typically no way to rebuild the header).  We avoid this by adding
+## dummy dependencies for each header file.  Too bad gcc doesn't do
+## this for us directly.
+## Some versions of gcc put a space before the ':'.  On the theory
+## that the space means something, we add a space to the output as
+## well.  hp depmode also adds that space, but also prefixes the VPATH
+## to the object.  Take care to not repeat it in the output.
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly.  Breaking it into two sed invocations is a workaround.
+  tr ' ' "$nl" < "$tmpdepfile" \
+    | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+hp)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+sgi)
+  if test "$libtool" = yes; then
+    "$@" "-Wp,-MDupdate,$tmpdepfile"
+  else
+    "$@" -MDupdate "$tmpdepfile"
+  fi
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+
+  if test -f "$tmpdepfile"; then  # yes, the sourcefile depend on other files
+    echo "$object : \\" > "$depfile"
+    # Clip off the initial element (the dependent).  Don't try to be
+    # clever and replace this with sed code, as IRIX sed won't handle
+    # lines with more than a fixed number of characters (4096 in
+    # IRIX 6.2 sed, 8192 in IRIX 6.5).  We also remove comment lines;
+    # the IRIX cc adds comments like '#:fec' to the end of the
+    # dependency line.
+    tr ' ' "$nl" < "$tmpdepfile" \
+      | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
+      | tr "$nl" ' ' >> "$depfile"
+    echo >> "$depfile"
+    # The second pass generates a dummy entry for each header file.
+    tr ' ' "$nl" < "$tmpdepfile" \
+      | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
+      >> "$depfile"
+  else
+    make_dummy_depfile
+  fi
+  rm -f "$tmpdepfile"
+  ;;
+
+xlc)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+aix)
+  # The C for AIX Compiler uses -M and outputs the dependencies
+  # in a .u file.  In older versions, this file always lives in the
+  # current directory.  Also, the AIX compiler puts '$object:' at the
+  # start of each line; $object doesn't have directory information.
+  # Version 6 uses the directory in both cases.
+  set_dir_from "$object"
+  set_base_from "$object"
+  if test "$libtool" = yes; then
+    tmpdepfile1=$dir$base.u
+    tmpdepfile2=$base.u
+    tmpdepfile3=$dir.libs/$base.u
+    "$@" -Wc,-M
+  else
+    tmpdepfile1=$dir$base.u
+    tmpdepfile2=$dir$base.u
+    tmpdepfile3=$dir$base.u
+    "$@" -M
+  fi
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+    exit $stat
+  fi
+
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  aix_post_process_depfile
+  ;;
+
+tcc)
+  # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
+  # FIXME: That version still under development at the moment of writing.
+  #        Make that this statement remains true also for stable, released
+  #        versions.
+  # It will wrap lines (doesn't matter whether long or short) with a
+  # trailing '\', as in:
+  #
+  #   foo.o : \
+  #    foo.c \
+  #    foo.h \
+  #
+  # It will put a trailing '\' even on the last line, and will use leading
+  # spaces rather than leading tabs (at least since its commit 0394caf7
+  # "Emit spaces for -MD").
+  "$@" -MD -MF "$tmpdepfile"
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
+  # We have to change lines of the first kind to '$object: \'.
+  sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
+  # And for each line of the second kind, we have to emit a 'dep.h:'
+  # dummy dependency, to avoid the deleted-header problem.
+  sed -n -e 's|^  *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+## The order of this option in the case statement is important, since the
+## shell code in configure will try each of these formats in the order
+## listed in this file.  A plain '-MD' option would be understood by many
+## compilers, so we must ensure this comes after the gcc and icc options.
+pgcc)
+  # Portland's C compiler understands '-MD'.
+  # Will always output deps to 'file.d' where file is the root name of the
+  # source file under compilation, even if file resides in a subdirectory.
+  # The object file name does not affect the name of the '.d' file.
+  # pgcc 10.2 will output
+  #    foo.o: sub/foo.c sub/foo.h
+  # and will wrap long lines using '\' :
+  #    foo.o: sub/foo.c ... \
+  #     sub/foo.h ... \
+  #     ...
+  set_dir_from "$object"
+  # Use the source, not the object, to determine the base name, since
+  # that's sadly what pgcc will do too.
+  set_base_from "$source"
+  tmpdepfile=$base.d
+
+  # For projects that build the same source file twice into different object
+  # files, the pgcc approach of using the *source* file root name can cause
+  # problems in parallel builds.  Use a locking strategy to avoid stomping on
+  # the same $tmpdepfile.
+  lockdir=$base.d-lock
+  trap "
+    echo '$0: caught signal, cleaning up...' >&2
+    rmdir '$lockdir'
+    exit 1
+  " 1 2 13 15
+  numtries=100
+  i=$numtries
+  while test $i -gt 0; do
+    # mkdir is a portable test-and-set.
+    if mkdir "$lockdir" 2>/dev/null; then
+      # This process acquired the lock.
+      "$@" -MD
+      stat=$?
+      # Release the lock.
+      rmdir "$lockdir"
+      break
+    else
+      # If the lock is being held by a different process, wait
+      # until the winning process is done or we timeout.
+      while test -d "$lockdir" && test $i -gt 0; do
+        sleep 1
+        i=`expr $i - 1`
+      done
+    fi
+    i=`expr $i - 1`
+  done
+  trap - 1 2 13 15
+  if test $i -le 0; then
+    echo "$0: failed to acquire lock after $numtries attempts" >&2
+    echo "$0: check lockdir '$lockdir'" >&2
+    exit 1
+  fi
+
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  # Each line is of the form `foo.o: dependent.h',
+  # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
+  # Do two passes, one to just change these to
+  # `$object: dependent.h' and one to simply `dependent.h:'.
+  sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process this invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+hp2)
+  # The "hp" stanza above does not work with aCC (C++) and HP's ia64
+  # compilers, which have integrated preprocessors.  The correct option
+  # to use with these is +Maked; it writes dependencies to a file named
+  # 'foo.d', which lands next to the object file, wherever that
+  # happens to be.
+  # Much of this is similar to the tru64 case; see comments there.
+  set_dir_from  "$object"
+  set_base_from "$object"
+  if test "$libtool" = yes; then
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir.libs/$base.d
+    "$@" -Wc,+Maked
+  else
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir$base.d
+    "$@" +Maked
+  fi
+  stat=$?
+  if test $stat -ne 0; then
+     rm -f "$tmpdepfile1" "$tmpdepfile2"
+     exit $stat
+  fi
+
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  if test -f "$tmpdepfile"; then
+    sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
+    # Add 'dependent.h:' lines.
+    sed -ne '2,${
+               s/^ *//
+               s/ \\*$//
+               s/$/:/
+               p
+             }' "$tmpdepfile" >> "$depfile"
+  else
+    make_dummy_depfile
+  fi
+  rm -f "$tmpdepfile" "$tmpdepfile2"
+  ;;
+
+tru64)
+  # The Tru64 compiler uses -MD to generate dependencies as a side
+  # effect.  'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
+  # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
+  # dependencies in 'foo.d' instead, so we check for that too.
+  # Subdirectories are respected.
+  set_dir_from  "$object"
+  set_base_from "$object"
+
+  if test "$libtool" = yes; then
+    # Libtool generates 2 separate objects for the 2 libraries.  These
+    # two compilations output dependencies in $dir.libs/$base.o.d and
+    # in $dir$base.o.d.  We have to check for both files, because
+    # one of the two compilations can be disabled.  We should prefer
+    # $dir$base.o.d over $dir.libs/$base.o.d because the latter is
+    # automatically cleaned when .libs/ is deleted, while ignoring
+    # the former would cause a distcleancheck panic.
+    tmpdepfile1=$dir$base.o.d          # libtool 1.5
+    tmpdepfile2=$dir.libs/$base.o.d    # Likewise.
+    tmpdepfile3=$dir.libs/$base.d      # Compaq CCC V6.2-504
+    "$@" -Wc,-MD
+  else
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir$base.d
+    tmpdepfile3=$dir$base.d
+    "$@" -MD
+  fi
+
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+    exit $stat
+  fi
+
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  # Same post-processing that is required for AIX mode.
+  aix_post_process_depfile
+  ;;
+
+msvc7)
+  if test "$libtool" = yes; then
+    showIncludes=-Wc,-showIncludes
+  else
+    showIncludes=-showIncludes
+  fi
+  "$@" $showIncludes > "$tmpdepfile"
+  stat=$?
+  grep -v '^Note: including file: ' "$tmpdepfile"
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  # The first sed program below extracts the file names and escapes
+  # backslashes for cygpath.  The second sed program outputs the file
+  # name when reading, but also accumulates all include files in the
+  # hold buffer in order to output them again at the end.  This only
+  # works with sed implementations that can handle large buffers.
+  sed < "$tmpdepfile" -n '
+/^Note: including file:  *\(.*\)/ {
+  s//\1/
+  s/\\/\\\\/g
+  p
+}' | $cygpath_u | sort -u | sed -n '
+s/ /\\ /g
+s/\(.*\)/'"$tab"'\1 \\/p
+s/.\(.*\) \\/\1:/
+H
+$ {
+  s/.*/'"$tab"'/
+  G
+  p
+}' >> "$depfile"
+  echo >> "$depfile" # make sure the fragment doesn't end with a backslash
+  rm -f "$tmpdepfile"
+  ;;
+
+msvc7msys)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+#nosideeffect)
+  # This comment above is used by automake to tell side-effect
+  # dependency tracking mechanisms from slower ones.
+
+dashmstdout)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout, regardless of -o.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  # Remove '-o $object'.
+  IFS=" "
+  for arg
+  do
+    case $arg in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    *)
+      set fnord "$@" "$arg"
+      shift # fnord
+      shift # $arg
+      ;;
+    esac
+  done
+
+  test -z "$dashmflag" && dashmflag=-M
+  # Require at least two characters before searching for ':'
+  # in the target name.  This is to cope with DOS-style filenames:
+  # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
+  "$@" $dashmflag |
+    sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
+  rm -f "$depfile"
+  cat < "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process this sed invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  tr ' ' "$nl" < "$tmpdepfile" \
+    | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+dashXmstdout)
+  # This case only exists to satisfy depend.m4.  It is never actually
+  # run, as this mode is specially recognized in the preamble.
+  exit 1
+  ;;
+
+makedepend)
+  "$@" || exit $?
+  # Remove any Libtool call
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+  # X makedepend
+  shift
+  cleared=no eat=no
+  for arg
+  do
+    case $cleared in
+    no)
+      set ""; shift
+      cleared=yes ;;
+    esac
+    if test $eat = yes; then
+      eat=no
+      continue
+    fi
+    case "$arg" in
+    -D*|-I*)
+      set fnord "$@" "$arg"; shift ;;
+    # Strip any option that makedepend may not understand.  Remove
+    # the object too, otherwise makedepend will parse it as a source file.
+    -arch)
+      eat=yes ;;
+    -*|$object)
+      ;;
+    *)
+      set fnord "$@" "$arg"; shift ;;
+    esac
+  done
+  obj_suffix=`echo "$object" | sed 's/^.*\././'`
+  touch "$tmpdepfile"
+  ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
+  rm -f "$depfile"
+  # makedepend may prepend the VPATH from the source file name to the object.
+  # No need to regex-escape $object, excess matching of '.' is harmless.
+  sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process the last invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  sed '1,2d' "$tmpdepfile" \
+    | tr ' ' "$nl" \
+    | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile" "$tmpdepfile".bak
+  ;;
+
+cpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  # Remove '-o $object'.
+  IFS=" "
+  for arg
+  do
+    case $arg in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    *)
+      set fnord "$@" "$arg"
+      shift # fnord
+      shift # $arg
+      ;;
+    esac
+  done
+
+  "$@" -E \
+    | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
+             -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
+    | sed '$ s: \\$::' > "$tmpdepfile"
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  cat < "$tmpdepfile" >> "$depfile"
+  sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+msvisualcpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  IFS=" "
+  for arg
+  do
+    case "$arg" in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
+        set fnord "$@"
+        shift
+        shift
+        ;;
+    *)
+        set fnord "$@" "$arg"
+        shift
+        shift
+        ;;
+    esac
+  done
+  "$@" -E 2>/dev/null |
+  sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
+  echo "$tab" >> "$depfile"
+  sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+msvcmsys)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+none)
+  exec "$@"
+  ;;
+
+*)
+  echo "Unknown depmode $depmode" 1>&2
+  exit 1
+  ;;
+esac
+
+exit 0
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl.h
new file mode 100644
index 000000000..6d131963f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl.h
@@ -0,0 +1,97 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_H
+#define HPL_H
+/*
+ * ---------------------------------------------------------------------
+ * HPL default compile options that can overridden in the Make.<arch>
+ * ---------------------------------------------------------------------
+ */
+#ifndef HPL_NO_MPI_DATATYPE         /* Use MPI user-defined data type */
+#define HPL_USE_MPI_DATATYPE
+#endif
+ 
+#ifndef HPL_COPY_L  /* do not copy L, use MPI user-defined data types */
+#define HPL_NO_COPY_L
+#endif
+ 
+#ifndef HPL_DETAILED_TIMING         /* Do not enable detailed timings */
+#define HPL_NO_DETAILED_TIMING
+#endif
+ 
+#ifndef HPL_CALL_VSIPL          /* Call the Fortran 77 BLAS interface */
+#ifndef HPL_CALL_CBLAS                       /* there can be only one */
+#define HPL_CALL_FBLAS
+#endif
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pfact.h"
+#include "hpl_pgesv.h"
+
+#include "hpl_timer.h"
+#include "hpl_matgen.h"
+#include "hpl_test.h"
+
+#include "hpl_ptimer.h"
+#include "hpl_pmatgen.h"
+#include "hpl_ptest.h"
+
+#endif
+/*
+ * End of hpl.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_auxil.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_auxil.h
new file mode 100644
index 000000000..861caf380
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_auxil.h
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_AUXIL_H
+#define HPL_AUXIL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+/*
+ * ---------------------------------------------------------------------
+ * typedef definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{ HPL_NORM_A = 800, HPL_NORM_1 = 801, HPL_NORM_I = 802 } HPL_T_NORM;
+
+typedef enum
+{
+   HPL_MACH_EPS   = 900,                /* relative machine precision */
+   HPL_MACH_SFMIN = 901, /* safe minimum st 1/sfmin does not overflow */
+   HPL_MACH_BASE  = 902,                /* base = base of the machine */
+   HPL_MACH_PREC  = 903,                          /* prec  = eps*base */
+   HPL_MACH_MLEN  = 904,   /* number of (base) digits in the mantissa */
+   HPL_MACH_RND   = 905,        /* 1.0 if rounding occurs in addition */
+   HPL_MACH_EMIN  = 906,   /* min exponent before (gradual) underflow */
+   HPL_MACH_RMIN  = 907,        /* underflow threshold base**(emin-1) */
+   HPL_MACH_EMAX  = 908,          /* largest exponent before overflow */
+   HPL_MACH_RMAX  = 909  /* overflow threshold - (base**emax)*(1-eps) */
+ 
+} HPL_T_MACH;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_fprintf
+STDC_ARGS( (
+   FILE *,
+   const char *,
+   ...
+) );
+void                             HPL_warn
+STDC_ARGS( (
+   FILE *,
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_abort
+STDC_ARGS( (
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_dlacpy
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dlatcpy
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dlaprnt
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int,
+   const char *
+) );
+double                           HPL_dlange
+STDC_ARGS( (
+   const HPL_T_NORM,
+   const int,
+   const int,
+   const double *,
+   const int
+) );
+double                           HPL_dlamch
+STDC_ARGS( (
+   const HPL_T_MACH
+) );
+
+#endif
+/*
+ * End of hpl_auxil.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_blas.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_blas.h
new file mode 100644
index 000000000..2a510471a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_blas.h
@@ -0,0 +1,630 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_BLAS_H
+#define HPL_BLAS_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+
+
+/*
+ * ---------------------------------------------------------------------
+ * typedef definitions
+ * ---------------------------------------------------------------------
+ */
+enum HPL_ORDER
+{  HplRowMajor = 101,  HplColumnMajor  = 102 };
+enum HPL_TRANS
+{  HplNoTrans  = 111,  HplTrans        = 112,  HplConjTrans    = 113 };
+enum HPL_UPLO
+{  HplUpper    = 121,  HplLower        = 122 };
+enum HPL_DIAG
+{  HplNonUnit  = 131,  HplUnit         = 132 };
+enum HPL_SIDE
+{  HplLeft     = 141,  HplRight        = 142 }; 
+
+
+#ifdef HPL_CALL_CBLAS
+
+
+/*
+ * ---------------------------------------------------------------------
+ * The C interface of the BLAS is available ...
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    CBLAS_INDEX         int
+ 
+#define    CBLAS_ORDER         HPL_ORDER
+#define    CblasRowMajor       HplRowMajor
+#define    CblasColMajor       HplColMajor
+ 
+#define    CBLAS_TRANSPOSE     HPL_TRANS
+#define    CblasNoTrans        HplNoTrans
+#define    CblasTrans          HplTrans
+#define    CblasConjTrans      HplConjTrans
+ 
+#define    CBLAS_UPLO          HPL_UPLO
+#define    CblasUpper          HplUpper
+#define    CblasLower          HplLower
+ 
+#define    CBLAS_DIAG          HPL_DIAG
+#define    CblasNonUnit        HplNonUnit
+#define    CblasUnit           HplUnit
+ 
+#define    CBLAS_SIDE          HPL_SIDE
+#define    CblasLeft           HplLeft
+#define    CblasRight          HplRight
+/*
+ * ---------------------------------------------------------------------
+ * CBLAS Function prototypes
+ * ---------------------------------------------------------------------
+ */
+CBLAS_INDEX       cblas_idamax
+STDC_ARGS(
+(  const int,       const double *,  const int ) );
+void              cblas_dswap
+STDC_ARGS(
+(  const int,       double *,        const int,       double *,
+   const int ) );
+void              cblas_dcopy
+STDC_ARGS(
+(  const int,       const double *,  const int,       double *,
+   const int ) );
+void              cblas_daxpy
+STDC_ARGS(
+(  const int,       const double,    const double *,  const int,
+   double *,        const int ) );
+void              cblas_dscal
+STDC_ARGS(
+(  const int,       const double,    double *,        const int ) );
+
+void              cblas_dgemv
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const int,       const int,       const double,    const double *,
+   const int,       const double *,  const int,       const double,
+   double *,        const int ) );
+
+void              cblas_dger
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const int,       const int,
+   const double,    const double *,  const int,       const double *,
+   const int,       double *,        const int ) );
+void              cblas_dtrsv
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_UPLO,
+   const enum CBLAS_TRANSPOSE,       const enum CBLAS_DIAG,
+   const int,       const double *,  const int,       double *,
+   const int ) );
+
+void              cblas_dgemm
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_TRANSPOSE,       const int,       const int,
+   const int,       const double,    const double *,  const int,
+   const double *,  const int,       const double,    double *,
+   const int ) );
+
+void              cblas_dtrsm
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_SIDE,
+   const enum CBLAS_UPLO,            const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_DIAG,            const int,       const int,
+   const double,    const double *,  const int,       double *,
+   const int ) );
+void             dpcpp_dgemm 
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_TRANSPOSE,       const int,       const int,
+   const int,       const double,    const double *,  const int,
+   const double *,  const int,       const double,    double *,
+   const int ) );
+
+void             dpcpp_dtrsm 
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_SIDE,
+   const enum CBLAS_UPLO,            const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_DIAG,            const int,       const int,
+   const double,    const double *,  const int,       double *,
+   const int ) );
+/*
+ * ---------------------------------------------------------------------
+ * HPL C BLAS macro definition
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_dswap           cblas_dswap
+#define    HPL_dcopy           cblas_dcopy
+#define    HPL_daxpy           cblas_daxpy
+#define    HPL_dscal           cblas_dscal
+#define    HPL_idamax          cblas_idamax
+
+#define    HPL_dgemv           cblas_dgemv
+#define    HPL_dtrsv           cblas_dtrsv
+#define    HPL_dger            cblas_dger
+
+//#define    HPL_dgemm           cblas_dgemm
+//#define    HPL_dtrsm           cblas_dtrsm
+#define    HPL_dgemm           dpcpp_dgemm
+#define    HPL_dtrsm           dpcpp_dtrsm  
+
+#endif
+
+//#define    HPL_hello           sss_gemm 
+
+#ifdef HPL_CALL_FBLAS
+/*
+ * ---------------------------------------------------------------------
+ * Use the Fortran 77 interface of the BLAS ...
+ * ---------------------------------------------------------------------
+ * Defaults: Add_, F77_INTEGER=int, StringSunStyle
+ * ---------------------------------------------------------------------
+ */
+#ifndef NoChange
+#ifndef UpCase
+#ifndef Add__
+#ifndef Add_
+
+#define Add_
+
+#endif
+#endif
+#endif
+#endif
+
+#ifndef F77_INTEGER
+#define    F77_INTEGER         int
+#else
+#define    HPL_USE_F77_INTEGER_DEF
+#endif
+
+#ifndef StringCrayStyle
+#ifndef StringStructVal
+#ifndef StringStructPtr
+#ifndef StringSunStyle
+
+#define StringSunStyle
+
+#endif
+#endif
+#endif
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Fortran 77 <-> C interface
+ * ---------------------------------------------------------------------
+ *
+ * These macros identifies how Fortran routines will be called.
+ *
+ * Add_     : the Fortran compiler expects the name of C functions to be
+ * in all lower case and to have an underscore postfixed it (Suns, Intel
+ * compilers expect this).
+ *
+ * NoChange : the Fortran compiler expects the name of C functions to be
+ * in all lower case (IBM RS6K compilers do this).
+ *
+ * UpCase   : the Fortran compiler expects the name of C functions to be
+ * in all upcase. (Cray compilers expect this).
+ *
+ * Add__    : the Fortran compiler in use is f2c, a Fortran to C conver-
+ * ter.
+ */
+#ifdef NoChange
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm(...)
+ */
+#define    F77dswap               dswap
+#define    F77dscal               dscal
+#define    F77dcopy               dcopy
+#define    F77daxpy               daxpy
+#define    F77idamax              idamax
+
+#define    F77dgemv               dgemv
+#define    F77dtrsv               dtrsv
+#define    F77dger                dger
+
+#define    F77dgemm               dgemm
+#define    F77dtrsm               dtrsm
+
+#endif
+
+#ifdef UpCase
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          DGEMM(...)
+ */
+#ifdef CRAY_BLAS
+                                                                                
+#define    F77dswap               SSWAP
+#define    F77dscal               SSCAL
+#define    F77dcopy               SCOPY
+#define    F77daxpy               SAXPY
+#define    F77idamax              ISAMAX
+                                                                                
+#define    F77dgemv               SGEMV
+#define    F77dtrsv               STRSV
+#define    F77dger                SGER
+                                                                                
+#define    F77dgemm               SGEMM
+#define    F77dtrsm               STRSM
+                                                                                
+#else
+
+#define    F77dswap               DSWAP
+#define    F77dscal               DSCAL
+#define    F77dcopy               DCOPY
+#define    F77daxpy               DAXPY
+#define    F77idamax              IDAMAX
+
+#define    F77dgemv               DGEMV
+#define    F77dtrsv               DTRSV
+#define    F77dger                DGER
+
+#define    F77dgemm               DGEMM
+#define    F77dtrsm               DTRSM
+
+#endif
+
+#endif
+
+#ifdef Add_
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine  with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm_(...)
+ */
+#define    F77dswap               dswap_
+#define    F77dscal               dscal_
+#define    F77dcopy               dcopy_
+#define    F77daxpy               daxpy_
+#define    F77idamax              idamax_
+
+#define    F77dgemv               dgemv_
+#define    F77dtrsv               dtrsv_
+#define    F77dger                dger_
+
+#define    F77dgemm               dgemm_
+#define    F77dtrsm               dtrsm_
+
+#endif
+
+#ifdef Add__
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine  with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm_(...)
+ */
+#define    F77dswap               dswap_
+#define    F77dscal               dscal_
+#define    F77dcopy               dcopy_
+#define    F77daxpy               daxpy_
+#define    F77idamax              idamax_
+ 
+#define    F77dgemv               dgemv_
+#define    F77dtrsv               dtrsv_
+#define    F77dger                dger_
+ 
+#define    F77dgemm               dgemm_
+#define    F77dtrsm               dtrsm_
+//#define    F77hello               sss_gemm
+ 
+#endif
+//#define    F77hello               sss_gemm
+/*
+ * ---------------------------------------------------------------------
+ * Typedef definitions and conversion utilities
+ * ---------------------------------------------------------------------
+ */
+#ifdef StringCrayStyle
+
+#include <fortran.h>
+                      /* Type of character argument in a FORTRAN call */
+#define    F77_CHAR            _fcd
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(_fcdtocp(c) ))
+#define    HPL_C2F_CHAR(c)     (_cptofcd(&(c), 1))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringStructVal
+                      /* Type of character argument in a FORTRAN call */
+typedef struct { char *cp; F77_INTEGER len; } F77_CHAR;
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c.cp))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringStructPtr
+                      /* Type of character argument in a FORTRAN call */
+typedef struct { char *cp; F77_INTEGER len; } F77_CHAR;
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c->cp))
+
+#define    F77_CHAR_DECL       F77_CHAR *        /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringSunStyle
+                      /* Type of character argument in a FORTRAN call */
+#define    F77_CHAR            char *
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c))
+#define    HPL_C2F_CHAR(c)     (&(c))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+#define    F77_1_CHAR          , F77_INTEGER
+#define    F77_2_CHAR          F77_1_CHAR F77_1_CHAR
+#define    F77_3_CHAR          F77_2_CHAR F77_1_CHAR
+#define    F77_4_CHAR          F77_3_CHAR F77_1_CHAR
+
+#endif
+/* ------------------------------------------------------------------ */
+
+#ifndef F77_1_CHAR
+#define    F77_1_CHAR
+#define    F77_2_CHAR
+#define    F77_3_CHAR
+#define    F77_4_CHAR
+#endif
+
+#define    F77_INT_DECL        const F77_INTEGER *   /* input integer */
+#define    F77_SIN_DECL        const double *         /* input scalar */
+#define    F77_VIN_DECL        const double *         /* input vector */
+#define    F77_VINOUT_DECL     double *        /* input/output matrix */
+#define    F77_MIN_DECL        const double *         /* input matrix */
+#define    F77_MINOUT_DECL     double *        /* input/output matrix */
+ 
+#ifdef CRAY_PVP_ENV                      /* Type of FORTRAN functions */
+#define    F77_VOID_FUN        extern fortran void      /* subroutine */
+#define    F77_INT_FUN         extern fortran int /* integer function */
+#else
+#define    F77_VOID_FUN        extern void              /* subroutine */
+#define    F77_INT_FUN         extern int         /* integer function */
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Fortran 77 BLAS function prototypes
+ * ---------------------------------------------------------------------
+ */
+F77_VOID_FUN    F77dswap
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VINOUT_DECL, F77_INT_DECL,    F77_VINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77dscal
+STDC_ARGS(
+(  F77_INT_DECL,    F77_SIN_DECL,    F77_VINOUT_DECL, F77_INT_DECL ) );
+F77_VOID_FUN    F77dcopy
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,    F77_VINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77daxpy
+STDC_ARGS(
+(  F77_INT_DECL,    F77_SIN_DECL,    F77_VIN_DECL,    F77_INT_DECL,
+   F77_VINOUT_DECL, F77_INT_DECL ) );
+F77_INT_FUN     F77idamax
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL ) );
+
+F77_VOID_FUN    F77dgemv
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,
+   F77_SIN_DECL,    F77_VINOUT_DECL, F77_INT_DECL     F77_1_CHAR ) );
+F77_VOID_FUN    F77dger
+STDC_ARGS(
+(  F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_VIN_DECL,
+   F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,    F77_MINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77dtrsv
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,   F77_INT_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_VINOUT_DECL, F77_INT_DECL
+   F77_3_CHAR ) );
+
+F77_VOID_FUN    F77dgemm
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_INT_DECL,    F77_INT_DECL,
+   F77_INT_DECL,    F77_SIN_DECL,    F77_MIN_DECL,    F77_INT_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_MINOUT_DECL,
+   F77_INT_DECL     F77_2_CHAR ) );
+F77_VOID_FUN    F77dtrsm
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,
+   F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_MIN_DECL,
+   F77_INT_DECL,    F77_MINOUT_DECL, F77_INT_DECL     F77_4_CHAR ) );
+
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * HPL BLAS Function prototypes
+ * ---------------------------------------------------------------------
+ */
+#ifndef HPL_CALL_CBLAS
+
+int                              HPL_idamax
+STDC_ARGS( (
+   const int,
+   const double *,
+   const int
+) );
+void                             HPL_daxpy
+STDC_ARGS( (
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dcopy
+STDC_ARGS( (
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dscal
+STDC_ARGS( (
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_dswap
+STDC_ARGS( (
+   const int,
+   double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dgemv
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_TRANS,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   const double *,
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_dger
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dtrsv
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_UPLO,
+   const enum HPL_TRANS,
+   const enum HPL_DIAG,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dgemm
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_TRANS,
+   const enum HPL_TRANS,
+   const int,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   const double *,
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_hello
+STDC_ARGS( (
+) );
+#endif
+void                             HPL_dtrsm
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_SIDE,
+   const enum HPL_UPLO,
+   const enum HPL_TRANS,
+   const enum HPL_DIAG,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+
+//#endif
+
+#endif
+/*
+ * hpl_blas.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_comm.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_comm.h
new file mode 100644
index 000000000..e3ba51a57
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_comm.h
@@ -0,0 +1,161 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_COMM_H
+#define HPL_COMM_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+#include "hpl_panel.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_1RING         = 401,                        /* Increasing ring */
+   HPL_1RING_M       = 402,             /* Increasing ring (modified) */
+   HPL_2RING         = 403,                      /* Increasing 2-ring */
+   HPL_2RING_M       = 404,           /* Increasing 2-ring (modified) */
+   HPL_BLONG         = 405,                         /* long broadcast */
+   HPL_BLONG_M       = 406               /* long broadcast (modified) */
+} HPL_T_TOP;
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_FAILURE            0
+#define    HPL_SUCCESS            1
+#define    HPL_KEEP_TESTING       2
+/*
+ * ---------------------------------------------------------------------
+ * comm function prototypes
+ * ---------------------------------------------------------------------
+ */
+int                              HPL_send
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_recv
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_sdrv
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_binit
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+int                              HPL_bcast
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *
+) );
+int                              HPL_bwait
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+int                              HPL_packL
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int
+) );
+void                             HPL_copyL
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+ 
+int HPL_binit_1ring STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_1ring STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_1ring STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_1rinM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_1rinM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_1rinM STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_2ring STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_2ring STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_2ring STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_2rinM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_2rinM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_2rinM STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_blong STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_blong STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_blong STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_blonM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_blonM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_blonM STDC_ARGS( ( HPL_T_panel *        ) );
+
+#endif
+/*
+ * End of hpl_comm.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_gesv.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_gesv.h
new file mode 100644
index 000000000..ce671cf2b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_gesv.h
@@ -0,0 +1,87 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_GESV_H
+#define HPL_GESV_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_LEFT_LOOKING  = 301,           /* Left looking lu fact variant */
+   HPL_CROUT         = 302,                  /* Crout lu fact variant */
+   HPL_RIGHT_LOOKING = 303           /* Right looking lu fact variant */
+} HPL_T_FACT;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void              HPL_dgesv
+STDC_ARGS(
+(  const int,       const int,       const int,       const HPL_T_FACT,
+   const HPL_T_FACT,                 const int,       double *,
+   const int,       int * ) );
+void              HPL_ipid
+STDC_ARGS(
+(  const int,       double *,        int *,           int *,
+   int *,           int *,           int *,           int *,
+   const int,       const int,       const int,       const int,
+   const int ) );
+
+#endif
+/*
+ * End of hpl_gesv.h
+ */ 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_grid.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_grid.h
new file mode 100644
index 000000000..1895a5ed4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_grid.h
@@ -0,0 +1,212 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_GRID_H
+#define HPL_GRID_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum { HPL_INT       = 100, HPL_DOUBLE       = 101 } HPL_T_TYPE;
+ 
+typedef enum
+{
+   HPL_ROW_MAJOR     = 201,
+   HPL_COLUMN_MAJOR  = 202
+} HPL_T_ORDER;
+
+typedef struct HPL_S_grid
+{
+   MPI_Comm        all_comm;                     /* grid communicator */
+   MPI_Comm        row_comm;                      /* row communicator */
+   MPI_Comm        col_comm;                   /* column communicator */
+   HPL_T_ORDER     order;        /* ordering of the procs in the grid */
+   int             iam;                        /* my rank in the grid */
+   int             myrow;                /* my row number in the grid */
+   int             mycol;             /* my column number in the grid */
+   int             nprow;          /* the total # of rows in the grid */
+   int             npcol;       /* the total # of columns in the grid */
+   int             nprocs;        /* the total # of procs in the grid */
+   int             row_ip2;          /* largest power of two <= nprow */
+   int             row_hdim;     /* row_ip2 procs hypercube dimension */
+   int             row_ip2m1;      /* largest power of two <= nprow-1 */
+   int             row_mask;        /* row_ip2m1 procs hypercube mask */
+   int             col_ip2;          /* largest power of two <= npcol */
+   int             col_hdim;     /* col_ip2 procs hypercube dimension */
+   int             col_ip2m1;      /* largest power of two <= npcol-1 */
+   int             col_mask;        /* col_ip2m1 procs hypercube mask */
+} HPL_T_grid;
+
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef void (*HPL_T_OP)
+(  const int,       const void *,    void *,          const HPL_T_TYPE );
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_2_MPI_TYPE( typ ) \
+                           ( ( typ == HPL_INT ? MPI_INT : MPI_DOUBLE ) )
+/*
+ * The following macros perform common modulo operations;  All functions
+ * except MPosMod assume arguments are < d (i.e., arguments are themsel-
+ * ves within modulo range).
+ */
+                                                /* increment with mod */
+#define    MModInc(I, d)       if(++(I) == (d)) (I) = 0
+                                                /* decrement with mod */
+#define    MModDec(I, d)       if(--(I) == -1) (I) = (d)-1
+                                                   /* positive modulo */
+#define    MPosMod(I, d)       ( (I) - ((I)/(d))*(d) )
+                                                   /* add two numbers */
+#define    MModAdd(I1, I2, d) \
+           ( ( (I1) + (I2) < (d) ) ? (I1) + (I2) : (I1) + (I2) - (d) )
+                                                        /* add 1 to # */
+#define    MModAdd1(I, d) ( ((I) != (d)-1) ? (I) + 1 : 0 )
+                                              /* subtract two numbers */
+#define    MModSub(I1, I2, d) \
+           ( ( (I1) < (I2) ) ? (d) + (I1) - (I2) : (I1) - (I2) )
+                                                      /* sub 1 from # */
+#define    MModSub1(I, d) ( ((I)!=0) ? (I)-1 : (d)-1 )
+/*
+ * ---------------------------------------------------------------------
+ * grid function prototypes
+ * ---------------------------------------------------------------------
+ */
+int                              HPL_grid_init
+STDC_ARGS( (
+   MPI_Comm,
+   const HPL_T_ORDER,
+   const int,
+   const int,
+   HPL_T_grid *
+) );
+int                              HPL_grid_exit
+STDC_ARGS( (
+   HPL_T_grid *
+) );
+
+int                              HPL_grid_info
+STDC_ARGS( (
+   const HPL_T_grid *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+int                              HPL_pnum
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int
+) );
+
+int                              HPL_barrier
+STDC_ARGS( (
+   MPI_Comm
+) );
+int                              HPL_broadcast
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const int,
+   MPI_Comm
+) );
+int                              HPL_reduce
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const HPL_T_OP ,
+   const int,
+   MPI_Comm
+) );
+int                              HPL_all_reduce
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const HPL_T_OP ,
+   MPI_Comm
+) );
+
+void                             HPL_max
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+void                             HPL_min
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+void                             HPL_sum
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+
+#endif
+/*
+ * End of hpl_grid.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_matgen.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_matgen.h
new file mode 100644
index 000000000..de6503eea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_matgen.h
@@ -0,0 +1,120 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_MATGEN_H
+#define HPL_MATGEN_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_MULT0         1284865837
+#define    HPL_MULT1         1481765933
+#define    HPL_IADD0         1
+#define    HPL_IADD1         0
+#define    HPL_DIVFAC        2147483648.0
+#define    HPL_POW16         65536.0
+#define    HPL_HALF          0.5
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_dmatgen
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int
+) );
+void                             HPL_lmul
+STDC_ARGS( (
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_ladd
+STDC_ARGS( (
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_xjumpm
+STDC_ARGS( (
+   const int,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_setran
+STDC_ARGS( (
+   const int,
+   int *
+) );
+void                             HPL_jumpit
+STDC_ARGS( (
+   int *,
+   int *,
+   int *,
+   int *
+) );
+double                           HPL_rand STDC_ARGS( ( void ) );
+
+#endif
+/*
+ * End of hpl_matgen.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_misc.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_misc.h
new file mode 100644
index 000000000..ea421a403
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_misc.h
@@ -0,0 +1,110 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_MISC_H
+#define HPL_MISC_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#ifdef __STDC__
+#define STDC_HEADERS
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#ifdef STDC_HEADERS
+#include <stdarg.h>
+#define STDC_ARGS(p)           p
+#else
+#include <varargs.h>
+#define STDC_ARGS(p)           ()
+#endif
+
+#ifdef HPL_CALL_VSIPL
+#include <vsip.h>
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_rone             1.0
+#define    HPL_rtwo             2.0
+#define    HPL_rzero            0.0
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    Mabs( a_ )          ( ( (a_) <   0  ) ? -(a_) : (a_) )
+#define    Mmin( a_, b_ )      ( ( (a_) < (b_) ) ?  (a_) : (b_) )
+#define    Mmax( a_, b_ )      ( ( (a_) > (b_) ) ?  (a_) : (b_) )
+
+#define    Mfloor(a,b) (((a)>0) ? (((a)/(b))) : (-(((-(a))+(b)-1)/(b))))
+#define    Mceil(a,b)           ( ( (a)+(b)-1 ) / (b) )
+#define    Miceil(a,b) (((a)>0) ? ((((a)+(b)-1)/(b))) : (-((-(a))/(b))))
+
+#define    Mupcase(C)          (((C)>96 && (C)<123) ? (C) & 0xDF : (C))
+#define    Mlowcase(C)         (((C)>64 && (C)< 91) ? (C) | 32   : (C))
+/*
+ * Mptr returns a pointer to a_( i_, j_ ) for readability reasons and
+ * also less silly errors ...
+ */
+#define    Mptr( a_, i_, j_, lda_ ) \
+   ( (a_) + (size_t)(i_) + (size_t)(j_)*(size_t)(lda_) )
+/*
+ * Align pointer
+ */
+#define    HPL_PTR( ptr_, al_ ) \
+                      ( ( ( (size_t)(ptr_)+(al_)-1 ) / (al_) ) * (al_) ) 
+#endif
+/*
+ * End of hpl_misc.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_panel.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_panel.h
new file mode 100644
index 000000000..d5ba2939c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_panel.h
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PANEL_H
+#define HPL_PANEL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef struct HPL_S_panel
+{
+   struct HPL_S_grid   * grid;             /* ptr to the process grid */
+   struct HPL_S_palg   * algo;          /* ptr to the algo parameters */
+   struct HPL_S_pmat   * pmat;         /* ptr to the local array info */
+   double              * A;              /* ptr to trailing part of A */
+   double              * WORK;                          /* work space */
+   double              * L2;                              /* ptr to L */
+   double              * L1;       /* ptr to jb x jb upper block of A */
+   double              * DPIV;    /* ptr to replicated jb pivot array */
+   double              * DINFO;      /* ptr to replicated scalar info */
+   double              * U;                               /* ptr to U */
+   int                 * IWORK;     /* integer workspace for swapping */
+   void                * * * buffers[2];   /* buffers for panel bcast */
+   int                 counts [2];          /* counts for panel bcast */
+   MPI_Datatype        dtypes [2];      /* data types for panel bcast */
+   MPI_Request         request[1];        /* requests for panel bcast */
+   MPI_Status          status [1];          /* status for panel bcast */
+   int                 nb;            /* distribution blocking factor */
+   int                 jb;                             /* panel width */
+   int                 m;   /* global # of rows of trailing part of A */
+   int                 n;   /* global # of cols of trailing part of A */
+   int                 ia;  /* global row index of trailing part of A */
+   int                 ja;  /* global col index of trailing part of A */
+   int                 mp;   /* local # of rows of trailing part of A */
+   int                 nq;   /* local # of cols of trailing part of A */
+   int                 ii;   /* local row index of trailing part of A */
+   int                 jj;   /* local col index of trailing part of A */
+   int                 lda;           /* local leading dim of array A */
+   int                 prow;  /* proc. row owning 1st row of trail. A */
+   int                 pcol;  /* proc. col owning 1st col of trail. A */
+   int                 msgid;           /* message id for panel bcast */
+   int                 ldl2;         /* local leading dim of array L2 */
+   int                 len;      /* length of the buffer to broadcast */
+#ifdef HPL_CALL_VSIPL
+   vsip_block_d        * Ablock;                           /* A block */
+   vsip_block_d        * L1block;                         /* L1 block */
+   vsip_block_d        * L2block;                         /* L2 block */
+   vsip_block_d        * Ublock;                           /* U block */
+#endif
+} HPL_T_panel;
+
+/*
+ * ---------------------------------------------------------------------
+ * panel function prototypes
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pgesv.h"
+
+void                             HPL_pdpanel_new
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int,
+   const int,
+   HPL_T_pmat *,
+   const int,
+   const int,
+   const int,
+   HPL_T_panel * *
+) );
+void                             HPL_pdpanel_init
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int,
+   const int,
+   HPL_T_pmat *,
+   const int,
+   const int,
+   const int,
+   HPL_T_panel *
+) );
+int                              HPL_pdpanel_disp
+STDC_ARGS( (
+   HPL_T_panel * *
+) );
+int                              HPL_pdpanel_free
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+
+#endif
+/*
+ * End of hpl_panel.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pauxil.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pauxil.h
new file mode 100644
index 000000000..1fd0ee457
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pauxil.h
@@ -0,0 +1,505 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PAUXIL_H
+#define HPL_PAUXIL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+/*
+ * Mindxg2p  returns the process coodinate owning the entry globally in-
+ * dexed by ig_.
+ */
+#define    Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ )            \
+           {                                                           \
+              if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) &&            \
+                  ( (nprocs_) > 1 ) )                                  \
+              {                                                        \
+                 proc_  = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_);       \
+                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 proc_ = (src_);                                       \
+              }                                                        \
+           }
+
+#define    Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ )       \
+           {                                                           \
+              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
+                  ( (nprocs_) == 1 ) ) { il_ = (ig_); }                \
+              else                                                     \
+              {                                                        \
+                 int i__, j__;                                         \
+                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
+                 il_ = (nb_)*( j__ - i__ ) +                           \
+                       ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?        \
+                         (ig_) - (inb_) : (ig_) );                     \
+              }                                                        \
+           }
+
+#define    Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ )      \
+           {                                                           \
+              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
+                  ( (nprocs_) == 1 ) )                                 \
+              { il_ = (ig_); proc_ = (src_); }                         \
+              else                                                     \
+              {                                                        \
+                 int i__, j__;                                         \
+                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
+                 il_ = (nb_)*(j__-i__) +                               \
+                       ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?       \
+                         (ig_) - (inb_) : (ig_) );                     \
+                 proc_  = (src_) + 1 + i__;                            \
+                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
+              }                                                        \
+           }
+/*
+ * Mindxl2g computes the global index ig_ corresponding to the local
+ * index il_ in process proc_.
+ */
+#define    Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ )       \
+           {                                                           \
+              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
+              {                                                        \
+                 if( (proc_) == (src_) )                               \
+                 {                                                     \
+                    if( (il_) < (inb_) ) ig_ = (il_);                  \
+                    else                 ig_ = (il_) +                 \
+                       (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \
+                 }                                                     \
+                 else if( (proc_) < (src_) )                           \
+                 {                                                     \
+                    ig_ = (il_) + (inb_) +                             \
+                          (nb_)*(  ((nprocs_)-1)*((il_)/(nb_)) +       \
+                                   (proc_)-(src_)-1+(nprocs_) );       \
+                 }                                                     \
+                 else                                                  \
+                 {                                                     \
+                    ig_ =  (il_) + (inb_) +                            \
+                           (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) +       \
+                           (proc_)-(src_)-1 );                         \
+                 }                                                     \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 ig_ = (il_);                                          \
+              }                                                        \
+           }
+/*
+ * MnumrocI computes the # of local indexes  np_ residing in the process
+ * of coordinate  proc_  corresponding to the interval of global indexes
+ * i_:i_+n_-1  assuming  that the global index 0 resides in  the process
+ * src_,  and that the indexes are distributed from src_ using the para-
+ * meters inb_, nb_ and nprocs_.
+ */
+#define    MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ )    \
+           {                                                           \
+              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
+              {                                                        \
+                 int inb__, mydist__, n__, nblk__, quot__, src__;      \
+                 if( ( inb__ = (inb_) - (i_) ) <= 0 )                  \
+                 {                                                     \
+                    nblk__ = (-inb__) / (nb_) + 1;                     \
+                    src__  = (src_) + nblk__;                          \
+                    src__ -= ( src__ / (nprocs_) ) * (nprocs_);        \
+                    inb__ += nblk__*(nb_);                             \
+                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
+                    {                                                  \
+                       if( (proc_) == src__ ) np_ = (n_);              \
+                       else                   np_ = 0;                 \
+                    }                                                  \
+                    else                                               \
+                    {                                                  \
+                       if( ( mydist__ = (proc_) - src__ ) < 0 )        \
+                          mydist__ += (nprocs_);                       \
+                       nblk__    = n__ / (nb_) + 1;                    \
+                       mydist__ -= nblk__ -                            \
+                          (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \
+                       if( mydist__ < 0 )                              \
+                       {                                               \
+                          if( (proc_) != src__ )                       \
+                             np_ = (nb_) + (nb_) * quot__;             \
+                          else                                         \
+                             np_ = inb__ + (nb_) * quot__;             \
+                       }                                               \
+                       else if( mydist__ > 0 )                         \
+                       {                                               \
+                          np_ = (nb_) * quot__;                        \
+                       }                                               \
+                       else                                            \
+                       {                                               \
+                          if( (proc_) != src__ )                       \
+                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
+                          else                                         \
+                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
+                       }                                               \
+                    }                                                  \
+                 }                                                     \
+                 else                                                  \
+                 {                                                     \
+                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
+                    {                                                  \
+                       if( (proc_) == (src_) ) np_ = (n_);             \
+                       else                    np_ = 0;                \
+                    }                                                  \
+                    else                                               \
+                    {                                                  \
+                       if( ( mydist__ = (proc_) - (src_) ) < 0 )       \
+                          mydist__ += (nprocs_);                       \
+                       nblk__    = n__ / (nb_) + 1;                    \
+                       mydist__ -= nblk__ -                            \
+                          ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \
+                       if( mydist__ < 0 )                              \
+                       {                                               \
+                          if( (proc_) != (src_) )                      \
+                             np_ = (nb_) + (nb_) * quot__;             \
+                          else                                         \
+                             np_ = inb__ + (nb_) * quot__;             \
+                       }                                               \
+                       else if( mydist__ > 0 )                         \
+                       {                                               \
+                          np_ = (nb_) * quot__;                        \
+                       }                                               \
+                       else                                            \
+                       {                                               \
+                          if( (proc_) != (src_) )                      \
+                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
+                          else                                         \
+                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
+                       }                                               \
+                    }                                                  \
+                 }                                                     \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 np_ = (n_);                                           \
+              }                                                        \
+           }
+
+#define    Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ )         \
+           MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ )
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_indxg2lp
+STDC_ARGS( (
+   int *,
+   int *,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxg2l
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxg2p
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxl2g
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+void                             HPL_infog2l
+STDC_ARGS( (
+   int,
+   int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+int                              HPL_numroc
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_numrocI
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+
+void                             HPL_dlaswp00N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp10N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp01N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp01T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp02N
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp03N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int
+) );
+void                             HPL_dlaswp03T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int
+) );
+void                             HPL_dlaswp04N
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp04T
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp05N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp05T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp06N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp06T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+
+void                             HPL_pabort
+STDC_ARGS( (
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_pwarn
+STDC_ARGS( (
+   FILE *,
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_pdlaprnt
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int,
+   const char *
+) );
+double                           HPL_pdlamch
+STDC_ARGS( (
+   MPI_Comm,
+   const HPL_T_MACH
+) );
+double                           HPL_pdlange
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const HPL_T_NORM,
+   const int,
+   const int,
+   const int,
+   const double *,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_pauxil.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pfact.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pfact.h
new file mode 100644
index 000000000..09eee79ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pfact.h
@@ -0,0 +1,216 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PFACT_H
+#define HPL_PFACT_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef void (*HPL_T_PFA_FUN)
+(  HPL_T_panel *,   const int,       const int,       const int,
+   double * );
+typedef void (*HPL_T_RFA_FUN)
+(  HPL_T_panel *,   const int,       const int,       const int,
+   double * );
+typedef void (*HPL_T_UPD_FUN)
+(  HPL_T_panel *,   int *,           HPL_T_panel *,   const int ); 
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_dlocmax
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_dlocswpN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_dlocswpT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdmxswp
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdpancrN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpancrT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanllN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanllT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanrlN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanrlT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdrpancrN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpancrT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanllN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanllT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanrlN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanrlT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdfact
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+ 
+#endif
+/*
+ * End of hpl_pfact.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pgesv.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pgesv.h
new file mode 100644
index 000000000..3ca576c68
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pgesv.h
@@ -0,0 +1,346 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PGESV_H
+#define HPL_PGESV_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+#include "hpl_comm.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pfact.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_SWAP00        = 451,                      /* Use HPL_pdlaswp00 */
+   HPL_SWAP01        = 452,                      /* Use HPL_pdlaswp01 */
+   HPL_SW_MIX        = 453, /* Use HPL_pdlaswp00_ for small number of */
+                            /* columns, and HPL_pdlaswp01_ otherwise. */
+   HPL_NO_SWP        = 499
+} HPL_T_SWAP;
+
+typedef struct HPL_S_palg
+{
+   HPL_T_TOP           btopo;               /* row broadcast topology */
+   int                 depth;                     /* look-ahead depth */
+   int                 nbdiv;            /* recursive division factor */
+   int                 nbmin;         /* recursion stopping criterium */
+   HPL_T_FACT          pfact;                   /* panel fact variant */
+   HPL_T_FACT          rfact;               /* recursive fact variant */
+   HPL_T_PFA_FUN       pffun;              /* panel fact function ptr */
+   HPL_T_RFA_FUN       rffun;          /* recursive fact function ptr */
+   HPL_T_UPD_FUN       upfun;                      /* update function */
+   HPL_T_SWAP          fswap;                   /* Swapping algorithm */
+   int                 fsthr;                   /* Swapping threshold */
+   int                 equil;                        /* Equilibration */
+   int                 align;              /* data alignment constant */
+} HPL_T_palg;
+
+typedef struct HPL_S_pmat
+{
+#ifdef HPL_CALL_VSIPL
+   vsip_block_d        * block;
+#endif
+   double              * A;            /* pointer to local piece of A */
+   double              * X;             /* pointer to solution vector */
+   int                 n;                      /* global problem size */
+   int                 nb;                         /* blocking factor */
+   int                 ld;                 /* local leading dimension */
+   int                 mp;                    /* local number of rows */
+   int                 nq;                 /* local number of columns */
+   int                 info;                    /* computational flag */
+} HPL_T_pmat;
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    MSGID_BEGIN_PFACT   1001              /* message id ranges */
+#define    MSGID_END_PFACT     2000
+#define    MSGID_BEGIN_FACT    2001
+#define    MSGID_END_FACT      3000
+#define    MSGID_BEGIN_PTRSV   3001
+#define    MSGID_END_PTRSV     4000
+ 
+#define    MSGID_BEGIN_COLL    9001
+#define    MSGID_END_COLL     10000
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    MNxtMgid( id_, beg_, end_ ) \
+                             (( (id_)+1 > (end_) ?  (beg_) : (id_)+1 ))
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pipid
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   int *
+) );
+void                             HPL_plindx0
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_pdlaswp00N
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdlaswp00T
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_perm
+STDC_ARGS( (
+   const int,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_logsort
+STDC_ARGS( (
+   const int,
+   const int,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_plindx10
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_plindx1
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_spreadN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_SIDE,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_spreadT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_SIDE,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_equil
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_TRANS,
+   const int,
+   double *,
+   const int,
+   int *,
+   const int *,
+   const int *,
+   int *
+) );
+void                             HPL_rollN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_rollT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_pdlaswp01N
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdlaswp01T
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_pdupdateNN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateNT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateTN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateTT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_pdgesv0
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesvK1
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesvK2
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesv
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+ 
+void                             HPL_pdtrsv
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_pmat *
+) );
+
+#endif
+/*
+ * End of hpl_pgesv.h
+ */ 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pmatgen.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pmatgen.h
new file mode 100644
index 000000000..1091b0f60
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pmatgen.h
@@ -0,0 +1,77 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PMATGEN_H
+#define HPL_PMATGEN_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_matgen.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pdmatgen
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_pmatgen.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pmisc.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pmisc.h
new file mode 100644
index 000000000..23550d47b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pmisc.h
@@ -0,0 +1,59 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PMISC_H
+#define HPL_PMISC_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "mpi.h"
+
+#endif
+/*
+ * End of hpl_pmisc.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_ptest.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_ptest.h
new file mode 100644
index 000000000..5777bd536
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_ptest.h
@@ -0,0 +1,151 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PTEST_H
+#define HPL_PTEST_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pgesv.h"
+
+#include "hpl_ptimer.h"
+#include "hpl_pmatgen.h"
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef struct HPL_S_test
+{
+   double              epsil;                      /* epsilon machine */
+   double              thrsh;                            /* threshold */
+   FILE *              outfp;       /* output stream (only in proc 0) */
+   int                 kfail;                    /* # of tests failed */
+   int                 kpass;                    /* # of tests passed */
+   int                 kskip;                   /* # of tests skipped */
+   int                 ktest;                /* total number of tests */
+} HPL_T_test;
+
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants for testing only
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_LINE_MAX         256
+#define    HPL_MAX_PARAM         20
+#define    HPL_ISEED            100
+/*
+ * ---------------------------------------------------------------------
+ * global timers for timing analysis only
+ * ---------------------------------------------------------------------
+ */
+#ifdef HPL_DETAILED_TIMING
+#define    HPL_TIMING_BEG        11 /* timer 0 reserved, used by main */
+#define    HPL_TIMING_N           6 /* number of timers defined below */
+#define    HPL_TIMING_RPFACT     11 /* starting from here, contiguous */
+#define    HPL_TIMING_PFACT      12
+#define    HPL_TIMING_MXSWP      13
+#define    HPL_TIMING_UPDATE     14
+#define    HPL_TIMING_LASWP      15
+#define    HPL_TIMING_PTRSV      16
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pdinfo
+STDC_ARGS( (
+   HPL_T_test *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_ORDER *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_FACT *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_FACT *,
+   int *,
+   HPL_T_TOP *,
+   int *,
+   int *,
+   HPL_T_SWAP *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_pdtest
+STDC_ARGS( (
+   HPL_T_test *,
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_ptest.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_ptimer.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_ptimer.h
new file mode 100644
index 000000000..43c8fe33a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_ptimer.h
@@ -0,0 +1,96 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PTIMER_H
+#define HPL_PTIMER_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_NPTIMER             64
+#define    HPL_PTIMER_STARTFLAG   5.0
+#define    HPL_PTIMER_ERROR      -1.0
+/*
+ * ---------------------------------------------------------------------
+ * type definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{  HPL_WALL_PTIME = 101, HPL_CPU_PTIME  = 102 } HPL_T_PTIME;
+
+typedef enum
+{ HPL_AMAX_PTIME  = 201, HPL_AMIN_PTIME = 202, HPL_SUM_PTIME  = 203 }
+HPL_T_PTIME_OP;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+double          HPL_ptimer_cputime   STDC_ARGS(     ( void      ) );
+double          HPL_ptimer_walltime  STDC_ARGS(     ( void      ) );
+
+void            HPL_ptimer           STDC_ARGS(     ( const int ) );
+void            HPL_ptimer_boot      STDC_ARGS(     ( void      ) );
+void            HPL_ptimer_combine
+STDC_ARGS(
+(  MPI_Comm comm,   const HPL_T_PTIME_OP,             const HPL_T_PTIME,
+   const int,       const int,       double * ) );
+void            HPL_ptimer_disable   STDC_ARGS(     ( void      ) );
+void            HPL_ptimer_enable    STDC_ARGS(     ( void      ) );
+double          HPL_ptimer_inquire
+STDC_ARGS(
+(  const HPL_T_PTIME,                const int ) );
+
+#endif
+/*
+ * End of hpl_ptimer.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_test.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_test.h
new file mode 100644
index 000000000..1eedc97e0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_test.h
@@ -0,0 +1,80 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_TEST_H
+#define HPL_TEST_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_matgen.h"
+#include "hpl_timer.h"
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void            HPL_dinfo
+STDC_ARGS(
+(  FILE * *,        int *,           int *,           int *,
+   HPL_T_FACT *,    int *,           int *,           int *, 
+   int *,           int *,           HPL_T_FACT *,    int *,
+   double *,        double * ) );
+void            HPL_dtest
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   HPL_T_FACT,      HPL_T_FACT,      const int,       const double,
+   const double,    int *,           int *,           int * ) );
+
+#endif
+/*
+ * End of hpl_test.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_timer.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_timer.h
new file mode 100644
index 000000000..4c91700ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_timer.h
@@ -0,0 +1,88 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_TIMER_H
+#define HPL_TIMER_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_NTIMER              64
+#define    HPL_TIMER_STARTFLAG    5.0
+#define    HPL_TIMER_ERROR       -1.0
+/*
+ * ---------------------------------------------------------------------
+ * type definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{  HPL_WALL_TIME = 101, HPL_CPU_TIME  = 102 } HPL_T_TIME;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+double          HPL_timer_cputime    STDC_ARGS(     ( void      ) );
+double          HPL_timer_walltime   STDC_ARGS(     ( void      ) );
+
+void            HPL_timer            STDC_ARGS(     ( const int ) );
+void            HPL_timer_boot       STDC_ARGS(     ( void      ) );
+void            HPL_timer_enable     STDC_ARGS(     ( void      ) );
+void            HPL_timer_disable    STDC_ARGS(     ( void      ) );
+double          HPL_timer_inquire
+STDC_ARGS(
+(  const HPL_T_TIME,                 const int ) );
+
+#endif
+/*
+ * End of hpl_timer.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_units.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_units.h
new file mode 100644
index 000000000..a96956497
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_units.h
@@ -0,0 +1,135 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_UNITS_H
+#define HPL_UNITS_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_MAXROUT       50
+#define    HPL_MAXRNAME      15
+
+#define    HPL_TRUE         'T'
+#define    HPL_FALSE        'F'
+
+#define    HPL_INDXG2P_ROUT   "HPL_indxg2p"
+#define    HPL_INDXG2L_ROUT   "HPL_indxg2l"
+#define    HPL_INDXL2G_ROUT   "HPL_indxl2g"
+#define    HPL_NUMROC_ROUT    "HPL_numroc"
+#define    HPL_NUMROCI_ROUT   "HPL_numrocI"
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void            HPL_unit_info
+STDC_ARGS(
+(  FILE * *,        int *,           int *,           int *,
+   int *,           int *,           int *,           int *,
+   int *,           int *,           int *,           char [][HPL_MAXRNAME],
+   int [] ) );
+ 
+void            HPL_unit_indxg2l
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+int             HPL_chek_indxg2l
+STDC_ARGS(
+(  FILE *,          const char *,    const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+ 
+void            HPL_unit_indxl2g
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+int             HPL_chek_indxl2g
+STDC_ARGS(
+(  FILE *,          const char *,    const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+ 
+void            HPL_unit_indxg2p
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+int             HPL_chek_indxg2p
+STDC_ARGS(
+(  FILE *,          const char *,    const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+ 
+void            HPL_unit_numroc
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+void            HPL_unit_numrocI
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       const int,       long *,          long * ) );
+int             HPL_chek_numrocI
+STDC_ARGS(
+(  FILE *,          const char *,    const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       const int,       long *,          long * ) );
+
+#endif
+/*
+ * End of hpl_units.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hplconfig.h.in b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hplconfig.h.in
new file mode 100644
index 000000000..b4b3b9a35
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hplconfig.h.in
@@ -0,0 +1,67 @@
+/* include/hplconfig.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define if you have a BLAS library. */
+#undef HAVE_BLAS
+
+/* Define to 1 if you have the `dgemm_' function. */
+#undef HAVE_DGEMM_
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define if you have the MPI library. */
+#undef HAVE_MPI
+
+/* Define to 1 if you have the <mpi.h> header file. */
+#undef HAVE_MPI_H
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Version number of package */
+#undef VERSION
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/install-sh b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/install-sh
new file mode 100755
index 000000000..8175c640f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/install-sh
@@ -0,0 +1,518 @@
+#!/bin/sh
+# install - install a program, script, or datafile
+
+scriptversion=2018-03-11.20; # UTC
+
+# This originates from X11R5 (mit/util/scripts/install.sh), which was
+# later released in X11R6 (xc/config/util/install.sh) with the
+# following copyright and license.
+#
+# Copyright (C) 1994 X Consortium
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
+# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# Except as contained in this notice, the name of the X Consortium shall not
+# be used in advertising or otherwise to promote the sale, use or other deal-
+# ings in this Software without prior written authorization from the X Consor-
+# tium.
+#
+#
+# FSF changes to this file are in the public domain.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# 'make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.
+
+tab='	'
+nl='
+'
+IFS=" $tab$nl"
+
+# Set DOITPROG to "echo" to test this script.
+
+doit=${DOITPROG-}
+doit_exec=${doit:-exec}
+
+# Put in absolute file names if you don't have them in your path;
+# or use environment vars.
+
+chgrpprog=${CHGRPPROG-chgrp}
+chmodprog=${CHMODPROG-chmod}
+chownprog=${CHOWNPROG-chown}
+cmpprog=${CMPPROG-cmp}
+cpprog=${CPPROG-cp}
+mkdirprog=${MKDIRPROG-mkdir}
+mvprog=${MVPROG-mv}
+rmprog=${RMPROG-rm}
+stripprog=${STRIPPROG-strip}
+
+posix_mkdir=
+
+# Desired mode of installed file.
+mode=0755
+
+chgrpcmd=
+chmodcmd=$chmodprog
+chowncmd=
+mvcmd=$mvprog
+rmcmd="$rmprog -f"
+stripcmd=
+
+src=
+dst=
+dir_arg=
+dst_arg=
+
+copy_on_change=false
+is_target_a_directory=possibly
+
+usage="\
+Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
+   or: $0 [OPTION]... SRCFILES... DIRECTORY
+   or: $0 [OPTION]... -t DIRECTORY SRCFILES...
+   or: $0 [OPTION]... -d DIRECTORIES...
+
+In the 1st form, copy SRCFILE to DSTFILE.
+In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
+In the 4th, create DIRECTORIES.
+
+Options:
+     --help     display this help and exit.
+     --version  display version info and exit.
+
+  -c            (ignored)
+  -C            install only if different (preserve the last data modification time)
+  -d            create directories instead of installing files.
+  -g GROUP      $chgrpprog installed files to GROUP.
+  -m MODE       $chmodprog installed files to MODE.
+  -o USER       $chownprog installed files to USER.
+  -s            $stripprog installed files.
+  -t DIRECTORY  install into DIRECTORY.
+  -T            report an error if DSTFILE is a directory.
+
+Environment variables override the default commands:
+  CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
+  RMPROG STRIPPROG
+"
+
+while test $# -ne 0; do
+  case $1 in
+    -c) ;;
+
+    -C) copy_on_change=true;;
+
+    -d) dir_arg=true;;
+
+    -g) chgrpcmd="$chgrpprog $2"
+        shift;;
+
+    --help) echo "$usage"; exit $?;;
+
+    -m) mode=$2
+        case $mode in
+          *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*)
+            echo "$0: invalid mode: $mode" >&2
+            exit 1;;
+        esac
+        shift;;
+
+    -o) chowncmd="$chownprog $2"
+        shift;;
+
+    -s) stripcmd=$stripprog;;
+
+    -t)
+        is_target_a_directory=always
+        dst_arg=$2
+        # Protect names problematic for 'test' and other utilities.
+        case $dst_arg in
+          -* | [=\(\)!]) dst_arg=./$dst_arg;;
+        esac
+        shift;;
+
+    -T) is_target_a_directory=never;;
+
+    --version) echo "$0 $scriptversion"; exit $?;;
+
+    --) shift
+        break;;
+
+    -*) echo "$0: invalid option: $1" >&2
+        exit 1;;
+
+    *)  break;;
+  esac
+  shift
+done
+
+# We allow the use of options -d and -T together, by making -d
+# take the precedence; this is for compatibility with GNU install.
+
+if test -n "$dir_arg"; then
+  if test -n "$dst_arg"; then
+    echo "$0: target directory not allowed when installing a directory." >&2
+    exit 1
+  fi
+fi
+
+if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
+  # When -d is used, all remaining arguments are directories to create.
+  # When -t is used, the destination is already specified.
+  # Otherwise, the last argument is the destination.  Remove it from $@.
+  for arg
+  do
+    if test -n "$dst_arg"; then
+      # $@ is not empty: it contains at least $arg.
+      set fnord "$@" "$dst_arg"
+      shift # fnord
+    fi
+    shift # arg
+    dst_arg=$arg
+    # Protect names problematic for 'test' and other utilities.
+    case $dst_arg in
+      -* | [=\(\)!]) dst_arg=./$dst_arg;;
+    esac
+  done
+fi
+
+if test $# -eq 0; then
+  if test -z "$dir_arg"; then
+    echo "$0: no input file specified." >&2
+    exit 1
+  fi
+  # It's OK to call 'install-sh -d' without argument.
+  # This can happen when creating conditional directories.
+  exit 0
+fi
+
+if test -z "$dir_arg"; then
+  if test $# -gt 1 || test "$is_target_a_directory" = always; then
+    if test ! -d "$dst_arg"; then
+      echo "$0: $dst_arg: Is not a directory." >&2
+      exit 1
+    fi
+  fi
+fi
+
+if test -z "$dir_arg"; then
+  do_exit='(exit $ret); exit $ret'
+  trap "ret=129; $do_exit" 1
+  trap "ret=130; $do_exit" 2
+  trap "ret=141; $do_exit" 13
+  trap "ret=143; $do_exit" 15
+
+  # Set umask so as not to create temps with too-generous modes.
+  # However, 'strip' requires both read and write access to temps.
+  case $mode in
+    # Optimize common cases.
+    *644) cp_umask=133;;
+    *755) cp_umask=22;;
+
+    *[0-7])
+      if test -z "$stripcmd"; then
+        u_plus_rw=
+      else
+        u_plus_rw='% 200'
+      fi
+      cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
+    *)
+      if test -z "$stripcmd"; then
+        u_plus_rw=
+      else
+        u_plus_rw=,u+rw
+      fi
+      cp_umask=$mode$u_plus_rw;;
+  esac
+fi
+
+for src
+do
+  # Protect names problematic for 'test' and other utilities.
+  case $src in
+    -* | [=\(\)!]) src=./$src;;
+  esac
+
+  if test -n "$dir_arg"; then
+    dst=$src
+    dstdir=$dst
+    test -d "$dstdir"
+    dstdir_status=$?
+  else
+
+    # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
+    # might cause directories to be created, which would be especially bad
+    # if $src (and thus $dsttmp) contains '*'.
+    if test ! -f "$src" && test ! -d "$src"; then
+      echo "$0: $src does not exist." >&2
+      exit 1
+    fi
+
+    if test -z "$dst_arg"; then
+      echo "$0: no destination specified." >&2
+      exit 1
+    fi
+    dst=$dst_arg
+
+    # If destination is a directory, append the input filename.
+    if test -d "$dst"; then
+      if test "$is_target_a_directory" = never; then
+        echo "$0: $dst_arg: Is a directory" >&2
+        exit 1
+      fi
+      dstdir=$dst
+      dstbase=`basename "$src"`
+      case $dst in
+	*/) dst=$dst$dstbase;;
+	*)  dst=$dst/$dstbase;;
+      esac
+      dstdir_status=0
+    else
+      dstdir=`dirname "$dst"`
+      test -d "$dstdir"
+      dstdir_status=$?
+    fi
+  fi
+
+  case $dstdir in
+    */) dstdirslash=$dstdir;;
+    *)  dstdirslash=$dstdir/;;
+  esac
+
+  obsolete_mkdir_used=false
+
+  if test $dstdir_status != 0; then
+    case $posix_mkdir in
+      '')
+        # Create intermediate dirs using mode 755 as modified by the umask.
+        # This is like FreeBSD 'install' as of 1997-10-28.
+        umask=`umask`
+        case $stripcmd.$umask in
+          # Optimize common cases.
+          *[2367][2367]) mkdir_umask=$umask;;
+          .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
+
+          *[0-7])
+            mkdir_umask=`expr $umask + 22 \
+              - $umask % 100 % 40 + $umask % 20 \
+              - $umask % 10 % 4 + $umask % 2
+            `;;
+          *) mkdir_umask=$umask,go-w;;
+        esac
+
+        # With -d, create the new directory with the user-specified mode.
+        # Otherwise, rely on $mkdir_umask.
+        if test -n "$dir_arg"; then
+          mkdir_mode=-m$mode
+        else
+          mkdir_mode=
+        fi
+
+        posix_mkdir=false
+        case $umask in
+          *[123567][0-7][0-7])
+            # POSIX mkdir -p sets u+wx bits regardless of umask, which
+            # is incompatible with FreeBSD 'install' when (umask & 300) != 0.
+            ;;
+          *)
+            # Note that $RANDOM variable is not portable (e.g. dash);  Use it
+            # here however when possible just to lower collision chance.
+            tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
+
+            trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0
+
+            # Because "mkdir -p" follows existing symlinks and we likely work
+            # directly in world-writeable /tmp, make sure that the '$tmpdir'
+            # directory is successfully created first before we actually test
+            # 'mkdir -p' feature.
+            if (umask $mkdir_umask &&
+                $mkdirprog $mkdir_mode "$tmpdir" &&
+                exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1
+            then
+              if test -z "$dir_arg" || {
+                   # Check for POSIX incompatibilities with -m.
+                   # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
+                   # other-writable bit of parent directory when it shouldn't.
+                   # FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
+                   test_tmpdir="$tmpdir/a"
+                   ls_ld_tmpdir=`ls -ld "$test_tmpdir"`
+                   case $ls_ld_tmpdir in
+                     d????-?r-*) different_mode=700;;
+                     d????-?--*) different_mode=755;;
+                     *) false;;
+                   esac &&
+                   $mkdirprog -m$different_mode -p -- "$test_tmpdir" && {
+                     ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"`
+                     test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
+                   }
+                 }
+              then posix_mkdir=:
+              fi
+              rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir"
+            else
+              # Remove any dirs left behind by ancient mkdir implementations.
+              rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null
+            fi
+            trap '' 0;;
+        esac;;
+    esac
+
+    if
+      $posix_mkdir && (
+        umask $mkdir_umask &&
+        $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
+      )
+    then :
+    else
+
+      # The umask is ridiculous, or mkdir does not conform to POSIX,
+      # or it failed possibly due to a race condition.  Create the
+      # directory the slow way, step by step, checking for races as we go.
+
+      case $dstdir in
+        /*) prefix='/';;
+        [-=\(\)!]*) prefix='./';;
+        *)  prefix='';;
+      esac
+
+      oIFS=$IFS
+      IFS=/
+      set -f
+      set fnord $dstdir
+      shift
+      set +f
+      IFS=$oIFS
+
+      prefixes=
+
+      for d
+      do
+        test X"$d" = X && continue
+
+        prefix=$prefix$d
+        if test -d "$prefix"; then
+          prefixes=
+        else
+          if $posix_mkdir; then
+            (umask=$mkdir_umask &&
+             $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
+            # Don't fail if two instances are running concurrently.
+            test -d "$prefix" || exit 1
+          else
+            case $prefix in
+              *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
+              *) qprefix=$prefix;;
+            esac
+            prefixes="$prefixes '$qprefix'"
+          fi
+        fi
+        prefix=$prefix/
+      done
+
+      if test -n "$prefixes"; then
+        # Don't fail if two instances are running concurrently.
+        (umask $mkdir_umask &&
+         eval "\$doit_exec \$mkdirprog $prefixes") ||
+          test -d "$dstdir" || exit 1
+        obsolete_mkdir_used=true
+      fi
+    fi
+  fi
+
+  if test -n "$dir_arg"; then
+    { test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
+    { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
+    { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
+      test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
+  else
+
+    # Make a couple of temp file names in the proper directory.
+    dsttmp=${dstdirslash}_inst.$$_
+    rmtmp=${dstdirslash}_rm.$$_
+
+    # Trap to clean up those temp files at exit.
+    trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
+
+    # Copy the file name to the temp name.
+    (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
+
+    # and set any options; do chmod last to preserve setuid bits.
+    #
+    # If any of these fail, we abort the whole thing.  If we want to
+    # ignore errors from any of these, just make sure not to ignore
+    # errors from the above "$doit $cpprog $src $dsttmp" command.
+    #
+    { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
+    { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
+    { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
+    { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
+
+    # If -C, don't bother to copy if it wouldn't change the file.
+    if $copy_on_change &&
+       old=`LC_ALL=C ls -dlL "$dst"     2>/dev/null` &&
+       new=`LC_ALL=C ls -dlL "$dsttmp"  2>/dev/null` &&
+       set -f &&
+       set X $old && old=:$2:$4:$5:$6 &&
+       set X $new && new=:$2:$4:$5:$6 &&
+       set +f &&
+       test "$old" = "$new" &&
+       $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
+    then
+      rm -f "$dsttmp"
+    else
+      # Rename the file to the real destination.
+      $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
+
+      # The rename failed, perhaps because mv can't rename something else
+      # to itself, or perhaps because mv is so ancient that it does not
+      # support -f.
+      {
+        # Now remove or move aside any old file at destination location.
+        # We try this two ways since rm can't unlink itself on some
+        # systems and the destination file might be busy for other
+        # reasons.  In this case, the final cleanup might fail but the new
+        # file should still install successfully.
+        {
+          test ! -f "$dst" ||
+          $doit $rmcmd -f "$dst" 2>/dev/null ||
+          { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
+            { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
+          } ||
+          { echo "$0: cannot unlink or rename $dst" >&2
+            (exit 1); exit 1
+          }
+        } &&
+
+        # Now rename the file to the real destination.
+        $doit $mvcmd "$dsttmp" "$dst"
+      }
+    fi || exit 1
+
+    trap '' 0
+  fi
+done
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a
new file mode 100644
index 000000000..f9f3f32c2
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.auxil b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.auxil
new file mode 100644
index 000000000..e92d18b80
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.auxil
@@ -0,0 +1,100 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h
+#
+## Object files ########################################################
+#
+HPL_au0obj       = \
+   HPL_dlacpy.o           HPL_dlatcpy.o          HPL_fprintf.o          \
+   HPL_warn.o             HPL_abort.o            HPL_dlaprnt.o          \
+   HPL_dlange.o
+HPL_au1obj       = \
+   HPL_dlamch.o
+HPL_auxobj       = \
+   $(HPL_au0obj) $(HPL_au1obj)
+#
+## Targets #############################################################
+#
+all     : lib
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_auxobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_auxobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dlacpy.o           : ../HPL_dlacpy.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlacpy.c
+HPL_dlatcpy.o          : ../HPL_dlatcpy.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlatcpy.c
+HPL_fprintf.o          : ../HPL_fprintf.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_fprintf.c
+HPL_warn.o             : ../HPL_warn.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_warn.c
+HPL_abort.o            : ../HPL_abort.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_abort.c
+HPL_dlaprnt.o          : ../HPL_dlaprnt.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaprnt.c
+HPL_dlange.o           : ../HPL_dlange.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlange.c
+HPL_dlamch.o           : ../HPL_dlamch.c           $(INCdep)
+	$(CC) -o $@ -c $(CCNOOPT)  ../HPL_dlamch.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.blas b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.blas
new file mode 100644
index 000000000..ed9f3d0e2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.blas
@@ -0,0 +1,98 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h
+#
+## Object files ########################################################
+#
+HPL_blaobj       = \
+   HPL_dcopy.o            HPL_daxpy.o            HPL_dscal.o            \
+   HPL_idamax.o           HPL_dgemv.o            HPL_dtrsv.o            \
+   HPL_dger.o             HPL_dgemm.o            HPL_dtrsm.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_blaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_blaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dcopy.o            : ../HPL_dcopy.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dcopy.c
+HPL_daxpy.o            : ../HPL_daxpy.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_daxpy.c
+HPL_dscal.o            : ../HPL_dscal.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dscal.c
+HPL_idamax.o           : ../HPL_idamax.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_idamax.c
+HPL_dgemv.o            : ../HPL_dgemv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgemv.c
+HPL_dtrsv.o            : ../HPL_dtrsv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtrsv.c
+HPL_dger.o             : ../HPL_dger.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dger.c
+HPL_dgemm.o            : ../HPL_dgemm.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgemm.c
+HPL_dtrsm.o            : ../HPL_dtrsm.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtrsm.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.comm b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.comm
new file mode 100644
index 000000000..529fe9aea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.comm
@@ -0,0 +1,111 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_pmisc.h   $(INCdir)/hpl_grid.h \
+   $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_comobj       = \
+   HPL_1ring.o            HPL_1rinM.o            HPL_2ring.o            \
+   HPL_2rinM.o            HPL_blong.o            HPL_blonM.o            \
+   HPL_packL.o            HPL_copyL.o            HPL_binit.o            \
+   HPL_bcast.o            HPL_bwait.o            HPL_send.o             \
+   HPL_recv.o             HPL_sdrv.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_comobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_comobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_1ring.o            : ../HPL_1ring.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_1ring.c
+HPL_1rinM.o            : ../HPL_1rinM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_1rinM.c
+HPL_2ring.o            : ../HPL_2ring.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_2ring.c
+HPL_2rinM.o            : ../HPL_2rinM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_2rinM.c
+HPL_blong.o            : ../HPL_blong.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_blong.c
+HPL_blonM.o            : ../HPL_blonM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_blonM.c
+HPL_packL.o            : ../HPL_packL.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_packL.c
+HPL_copyL.o            : ../HPL_copyL.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_copyL.c
+HPL_binit.o            : ../HPL_binit.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_binit.c
+HPL_bcast.o            : ../HPL_bcast.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_bcast.c
+HPL_bwait.o            : ../HPL_bwait.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_bwait.c
+HPL_send.o             : ../HPL_send.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_send.c
+HPL_recv.o             : ../HPL_recv.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_recv.c
+HPL_sdrv.o             : ../HPL_sdrv.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_sdrv.c
+#
+# ######################################################################
+# 
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.gesv b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.gesv
new file mode 100644
index 000000000..2a8722559
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.gesv
@@ -0,0 +1,83 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h  \
+   $(INCdir)/hpl_gesv.h
+#
+## Object files ########################################################
+#
+HPL_gesobj       = \
+   HPL_dgesv.o            HPL_ipid.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_gesobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_gesobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dgesv.o            : ../HPL_dgesv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgesv.c
+HPL_ipid.o             : ../HPL_ipid.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ipid.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.grid b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.grid
new file mode 100644
index 000000000..51549d817
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.grid
@@ -0,0 +1,103 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h
+#
+## Object files ########################################################
+#
+HPL_griobj       = \
+   HPL_grid_init.o        HPL_pnum.o             HPL_grid_info.o        \
+   HPL_grid_exit.o        HPL_broadcast.o        HPL_reduce.o           \
+   HPL_all_reduce.o       HPL_barrier.o          HPL_min.o              \
+   HPL_max.o              HPL_sum.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_griobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_griobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_grid_init.o        : ../HPL_grid_init.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_init.c
+HPL_pnum.o             : ../HPL_pnum.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pnum.c
+HPL_grid_info.o        : ../HPL_grid_info.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_info.c
+HPL_grid_exit.o        : ../HPL_grid_exit.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_exit.c
+HPL_broadcast.o        : ../HPL_broadcast.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_broadcast.c
+HPL_reduce.o           : ../HPL_reduce.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_reduce.c
+HPL_all_reduce.o       : ../HPL_all_reduce.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_all_reduce.c
+HPL_barrier.o          : ../HPL_barrier.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_barrier.c
+HPL_min.o              : ../HPL_min.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_min.c
+HPL_max.o              : ../HPL_max.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_max.c
+HPL_sum.o              : ../HPL_sum.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_sum.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.matgen b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.matgen
new file mode 100644
index 000000000..f027fbc06
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.matgen
@@ -0,0 +1,95 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h  \
+   $(INCdir)/hpl_matgen.h
+#
+## Object files ########################################################
+#
+HPL_matobj       = \
+   HPL_dmatgen.o          HPL_ladd.o             HPL_lmul.o             \
+   HPL_xjumpm.o           HPL_jumpit.o           HPL_rand.o             \
+   HPL_setran.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_matobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_matobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dmatgen.o          : ../HPL_dmatgen.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dmatgen.c
+HPL_ladd.o             : ../HPL_ladd.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ladd.c
+HPL_lmul.o             : ../HPL_lmul.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_lmul.c
+HPL_xjumpm.o           : ../HPL_xjumpm.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_xjumpm.c
+HPL_jumpit.o           : ../HPL_jumpit.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_jumpit.c
+HPL_rand.o             : ../HPL_rand.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rand.c
+HPL_setran.o           : ../HPL_setran.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_setran.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.panel b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.panel
new file mode 100644
index 000000000..804749cc2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.panel
@@ -0,0 +1,90 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h  $(INCdir)/hpl_comm.h  \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \
+   $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_panobj       = \
+   HPL_pdpanel_new.o      HPL_pdpanel_init.o     HPL_pdpanel_disp.o     \
+   HPL_pdpanel_free.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_panobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_panobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pdpanel_new.o      : ../HPL_pdpanel_new.c      $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_new.c
+HPL_pdpanel_init.o     : ../HPL_pdpanel_init.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_init.c
+HPL_pdpanel_disp.o     : ../HPL_pdpanel_disp.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_disp.c
+HPL_pdpanel_free.o     : ../HPL_pdpanel_free.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_free.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pauxil b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pauxil
new file mode 100644
index 000000000..ea93cd150
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pauxil
@@ -0,0 +1,137 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h   $(INCdir)/hpl_pauxil.h
+#
+## Object files ########################################################
+#
+HPL_pauobj       = \
+   HPL_indxg2l.o          HPL_indxg2lp.o         HPL_indxg2p.o          \
+   HPL_indxl2g.o          HPL_infog2l.o          HPL_numroc.o           \
+   HPL_numrocI.o          HPL_dlaswp00N.o        HPL_dlaswp10N.o        \
+   HPL_dlaswp01N.o        HPL_dlaswp01T.o        HPL_dlaswp02N.o        \
+   HPL_dlaswp03N.o        HPL_dlaswp03T.o        HPL_dlaswp04N.o        \
+   HPL_dlaswp04T.o        HPL_dlaswp05N.o        HPL_dlaswp05T.o        \
+   HPL_dlaswp06N.o        HPL_dlaswp06T.o        HPL_pwarn.o            \
+   HPL_pabort.o           HPL_pdlaprnt.o         HPL_pdlamch.o          \
+   HPL_pdlange.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pauobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pauobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_indxg2l.o          : ../HPL_indxg2l.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2l.c
+HPL_indxg2lp.o         : ../HPL_indxg2lp.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2lp.c
+HPL_indxg2p.o          : ../HPL_indxg2p.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2p.c
+HPL_indxl2g.o          : ../HPL_indxl2g.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxl2g.c
+HPL_infog2l.o          : ../HPL_infog2l.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_infog2l.c
+HPL_numroc.o           : ../HPL_numroc.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_numroc.c
+HPL_numrocI.o          : ../HPL_numrocI.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_numrocI.c
+HPL_dlaswp00N.o        : ../HPL_dlaswp00N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp00N.c
+HPL_dlaswp10N.o        : ../HPL_dlaswp10N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp10N.c
+HPL_dlaswp01N.o        : ../HPL_dlaswp01N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp01N.c
+HPL_dlaswp01T.o        : ../HPL_dlaswp01T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp01T.c
+HPL_dlaswp02N.o        : ../HPL_dlaswp02N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp02N.c
+HPL_dlaswp03N.o        : ../HPL_dlaswp03N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp03N.c
+HPL_dlaswp03T.o        : ../HPL_dlaswp03T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp03T.c
+HPL_dlaswp04N.o        : ../HPL_dlaswp04N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp04N.c
+HPL_dlaswp04T.o        : ../HPL_dlaswp04T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp04T.c
+HPL_dlaswp05N.o        : ../HPL_dlaswp05N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp05N.c
+HPL_dlaswp05T.o        : ../HPL_dlaswp05T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp05T.c
+HPL_dlaswp06N.o        : ../HPL_dlaswp06N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp06N.c
+HPL_dlaswp06T.o        : ../HPL_dlaswp06T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp06T.c
+HPL_pwarn.o            : ../HPL_pwarn.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pwarn.c
+HPL_pabort.o           : ../HPL_pabort.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pabort.c
+HPL_pdlaprnt.o         : ../HPL_pdlaprnt.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaprnt.c
+HPL_pdlamch.o          : ../HPL_pdlamch.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlamch.c
+HPL_pdlange.o          : ../HPL_pdlange.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlange.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pfact b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pfact
new file mode 100644
index 000000000..bf4634d31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pfact
@@ -0,0 +1,118 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pfact.h
+#
+## Object files ########################################################
+#
+HPL_pfaobj       = \
+   HPL_dlocmax.o          HPL_dlocswpN.o         HPL_dlocswpT.o         \
+   HPL_pdmxswp.o          HPL_pdpancrN.o         HPL_pdpancrT.o         \
+   HPL_pdpanllN.o         HPL_pdpanllT.o         HPL_pdpanrlN.o         \
+   HPL_pdpanrlT.o         HPL_pdrpanllN.o        HPL_pdrpanllT.o        \
+   HPL_pdrpancrN.o        HPL_pdrpancrT.o        HPL_pdrpanrlN.o        \
+   HPL_pdrpanrlT.o        HPL_pdfact.o
+#
+## Targets #############################################################
+#
+all              : lib 
+#
+lib              : lib.grd
+#
+lib.grd          : $(HPL_pfaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pfaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dlocmax.o          : ../HPL_dlocmax.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocmax.c
+HPL_dlocswpN.o         : ../HPL_dlocswpN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocswpN.c
+HPL_dlocswpT.o         : ../HPL_dlocswpT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocswpT.c
+HPL_pdmxswp.o          : ../HPL_pdmxswp.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdmxswp.c
+HPL_pdpancrN.o         : ../HPL_pdpancrN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpancrN.c
+HPL_pdpancrT.o         : ../HPL_pdpancrT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpancrT.c
+HPL_pdpanllN.o         : ../HPL_pdpanllN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanllN.c
+HPL_pdpanllT.o         : ../HPL_pdpanllT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanllT.c
+HPL_pdpanrlN.o         : ../HPL_pdpanrlN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanrlN.c
+HPL_pdpanrlT.o         : ../HPL_pdpanrlT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanrlT.c
+HPL_pdrpanllN.o        : ../HPL_pdrpanllN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanllN.c
+HPL_pdrpanllT.o        : ../HPL_pdrpanllT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanllT.c
+HPL_pdrpancrN.o        : ../HPL_pdrpancrN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpancrN.c
+HPL_pdrpancrT.o        : ../HPL_pdrpancrT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpancrT.c
+HPL_pdrpanrlN.o        : ../HPL_pdrpanrlN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanrlN.c
+HPL_pdrpanrlT.o        : ../HPL_pdrpanrlT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanrlT.c
+HPL_pdfact.o           : ../HPL_pdfact.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdfact.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pgesv b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pgesv
new file mode 100644
index 000000000..7898665f0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pgesv
@@ -0,0 +1,136 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h   $(INCdir)/hpl_comm.h  \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pfact.h \
+   $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_pgeobj       = \
+   HPL_pipid.o            HPL_plindx0.o          HPL_pdlaswp00N.o       \
+   HPL_pdlaswp00T.o       HPL_perm.o             HPL_logsort.o          \
+   HPL_plindx10.o         HPL_plindx1.o          HPL_spreadN.o          \
+   HPL_spreadT.o          HPL_rollN.o            HPL_rollT.o            \
+   HPL_equil.o            HPL_pdlaswp01N.o       HPL_pdlaswp01T.o       \
+   HPL_pdupdateNN.o       HPL_pdupdateNT.o       HPL_pdupdateTN.o       \
+   HPL_pdupdateTT.o       HPL_pdtrsv.o           HPL_pdgesv0.o          \
+   HPL_pdgesvK1.o         HPL_pdgesvK2.o         HPL_pdgesv.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pgeobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pgeobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pipid.o            : ../HPL_pipid.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pipid.c
+HPL_plindx0.o          : ../HPL_plindx0.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx0.c
+HPL_pdlaswp00N.o       : ../HPL_pdlaswp00N.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp00N.c
+HPL_pdlaswp00T.o       : ../HPL_pdlaswp00T.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp00T.c
+HPL_perm.o             : ../HPL_perm.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_perm.c
+HPL_logsort.o          : ../HPL_logsort.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_logsort.c
+HPL_plindx10.o         : ../HPL_plindx10.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx10.c
+HPL_plindx1.o          : ../HPL_plindx1.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx1.c
+HPL_spreadN.o          : ../HPL_spreadN.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_spreadN.c
+HPL_spreadT.o          : ../HPL_spreadT.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_spreadT.c
+HPL_rollN.o            : ../HPL_rollN.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rollN.c
+HPL_rollT.o            : ../HPL_rollT.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rollT.c
+HPL_equil.o            : ../HPL_equil.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_equil.c
+HPL_pdlaswp01N.o       : ../HPL_pdlaswp01N.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp01N.c
+HPL_pdlaswp01T.o       : ../HPL_pdlaswp01T.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp01T.c
+HPL_pdupdateNN.o       : ../HPL_pdupdateNN.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateNN.c
+HPL_pdupdateNT.o       : ../HPL_pdupdateNT.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateNT.c
+HPL_pdupdateTN.o       : ../HPL_pdupdateTN.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateTN.c
+HPL_pdupdateTT.o       : ../HPL_pdupdateTT.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateTT.c
+HPL_pdtrsv.o           : ../HPL_pdtrsv.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdtrsv.c
+HPL_pdgesv0.o          : ../HPL_pdgesv0.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesv0.c
+HPL_pdgesvK1.o         : ../HPL_pdgesvK1.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesvK1.c
+HPL_pdgesvK2.o         : ../HPL_pdgesvK2.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesvK2.c
+HPL_pdgesv.o           : ../HPL_pdgesv.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesv.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pmatgen b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pmatgen
new file mode 100644
index 000000000..bf33fcd7b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pmatgen
@@ -0,0 +1,81 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_matgen.h $(INCdir)/hpl_pmisc.h \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pmatgen.h
+#
+## Object files ########################################################
+#
+HPL_pmaobj       = \
+   HPL_pdmatgen.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pmaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pmaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pdmatgen.o         : ../HPL_pdmatgen.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdmatgen.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.ptest b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.ptest
new file mode 100644
index 000000000..cfc96e667
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.ptest
@@ -0,0 +1,94 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h   \
+   $(INCdir)/hpl_gesv.h   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h  \
+   $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pgesv.h $(INCdir)/hpl_pmatgen.h \
+   $(INCdir)/hpl_ptimer.h $(INCdir)/hpl_ptest.h
+#
+## Executable names ####################################################
+# 
+xhpl             = $(BINdir)/xhpl
+#
+## Object files ########################################################
+#
+HPL_pteobj       = \
+   HPL_pddriver.o         HPL_pdinfo.o           HPL_pdtest.o
+#
+## Targets #############################################################
+#
+all     : dexe
+#
+dexe    : dexe.grd
+#
+$(BINdir)/HPL.dat : ../HPL.dat
+	( $(CP) ../HPL.dat $(BINdir) )
+#
+dexe.grd: $(HPL_pteobj) $(HPLlib)
+	$(LINKER) $(LINKFLAGS) -o $(xhpl) $(HPL_pteobj) $(HPL_LIBS)
+	$(MAKE) $(BINdir)/HPL.dat
+	$(TOUCH) dexe.grd
+#
+# ######################################################################
+#
+HPL_pddriver.o         : ../HPL_pddriver.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pddriver.c
+HPL_pdinfo.o           : ../HPL_pdinfo.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdinfo.c
+HPL_pdtest.o           : ../HPL_pdtest.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdtest.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.ptimer b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.ptimer
new file mode 100644
index 000000000..971500764
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.ptimer
@@ -0,0 +1,84 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_ptimer.h
+#
+## Object files ########################################################
+#
+HPL_ptiobj       = \
+   HPL_ptimer.o           HPL_ptimer_cputime.o   HPL_ptimer_walltime.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_ptiobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_ptiobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_ptimer.o           : ../HPL_ptimer.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer.c
+HPL_ptimer_cputime.o   : ../HPL_ptimer_cputime.c   $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer_cputime.c
+HPL_ptimer_walltime.o  : ../HPL_ptimer_walltime.c  $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer_walltime.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.test b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.test
new file mode 100644
index 000000000..514d445b8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.test
@@ -0,0 +1,93 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_gesv.h  $(INCdir)/hpl_matgen.h $(INCdir)/hpl_timer.h \
+   $(INCdir)/hpl_test.h
+#
+## Executable names ####################################################
+# 
+xlinpack         = $(BINdir)/xlinpack
+#
+## Object files ########################################################
+#
+HPL_tesobj       = \
+   HPL_ddriver.o          HPL_dinfo.o            HPL_dtest.o
+#
+## Targets #############################################################
+#
+all     : dexe
+#
+dexe    : dexe.grd
+#
+$(BINdir)/LINPACK.dat : ../LINPACK.dat
+	( $(CP) ../LINPACK.dat $(BINdir) )
+#
+dexe.grd: $(HPL_tesobj) $(HPLlib)
+	$(LINKER) $(LINKFLAGS) -o $(xlinpack) $(HPL_tesobj) HPL_make_libs
+	$(MAKE) $(BINdir)/LINPACK.dat
+	$(TOUCH) dexe.grd
+#
+# ######################################################################
+#
+HPL_ddriver.o          : ../HPL_ddriver.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ddriver.c
+HPL_dinfo.o            : ../HPL_dinfo.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dinfo.c
+HPL_dtest.o            : ../HPL_dtest.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtest.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.timer b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.timer
new file mode 100644
index 000000000..b8009e88a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.timer
@@ -0,0 +1,84 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_timer.h
+#
+## Object files ########################################################
+#
+HPL_timobj       = \
+   HPL_timer.o            HPL_timer_cputime.o    HPL_timer_walltime.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_timobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_timobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_timer.o            : ../HPL_timer.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer.c
+HPL_timer_cputime.o    : ../HPL_timer_cputime.c    $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer_cputime.c
+HPL_timer_walltime.o   : ../HPL_timer_walltime.c   $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer_walltime.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.units b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.units
new file mode 100644
index 000000000..1c447f204
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.units
@@ -0,0 +1,112 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+@rout Make.units
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_auxil.h $(INCdir)/hpl_pmisc.h \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_units.h 
+#
+## Executable names ####################################################
+# 
+xunits           = $(BINdir)/xunits   
+#
+## Object files ########################################################
+#
+HPL_uniobj       = \
+   HPL_unit_driver.o      HPL_unit_info.o        HPL_unit_indxg2l.o     \
+   HPL_chek_indxg2l.o     HPL_unit_indxg2p.o     HPL_chek_indxg2p.o     \
+   HPL_unit_indxl2g.o     HPL_chek_indxl2g.o     HPL_unit_numroc.o      \
+   HPL_unit_numrocI.o     HPL_chek_numrocI.o
+#
+## Targets #############################################################
+#
+all              : dexe
+#
+dexe             : dexe.grd
+#
+$(BINdir)/UNITS.dat : ../UNITS.dat
+	( $(CP) ../UNITS.dat $(BINdir) )
+#
+dexe.grd         : $(HPL_uniobj) $(HPLlib)
+	$(LINKER) $(LINKFLAGS) -o $(xunits) $(HPL_uniobj) @(hpllibs)
+	$(MAKE) $(BINdir)/UNITS.dat
+	$(TOUCH) dexe.grd
+#
+# ######################################################################
+#
+HPL_unit_driver.o      : ../HPL_unit_driver.c      $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_driver.c
+HPL_unit_info.o        : ../HPL_unit_info.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_info.c
+HPL_unit_indxg2l.o     : ../HPL_unit_indxg2l.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_indxg2l.c
+HPL_chek_indxg2l.o     : ../HPL_chek_indxg2l.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_chek_indxg2l.c
+HPL_unit_indxg2p.o     : ../HPL_unit_indxg2p.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_indxg2p.c
+HPL_chek_indxg2p.o     : ../HPL_chek_indxg2p.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_chek_indxg2p.c
+HPL_unit_indxl2g.o     : ../HPL_unit_indxl2g.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_indxl2g.c
+HPL_chek_indxl2g.o     : ../HPL_chek_indxl2g.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_chek_indxl2g.c
+HPL_unit_numroc.o      : ../HPL_unit_numroc.c      $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_numroc.c
+HPL_unit_numrocI.o     : ../HPL_unit_numrocI.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_numrocI.c
+HPL_chek_numrocI.o     : ../HPL_chek_numrocI.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_chek_numrocI.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_abort.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_abort.3
new file mode 100644
index 000000000..c6a2c7a70
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_abort.3
@@ -0,0 +1,52 @@
+.TH HPL_abort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_abort \- halts execution.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_abort(\fR
+\fB\&int\fR
+\fI\&LINE\fR,
+\fB\&const char *\fR
+\fI\&SRNAME\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_abort\fR
+displays an error message on stderr and halts execution.
+.SH ARGUMENTS
+.TP 8
+LINE    (local input)           int
+On entry,  LINE  specifies the line  number in the file where
+the  error  has  occured.  When  LINE  is not a positive line
+number, it is ignored.
+.TP 8
+SRNAME  (local input)           const char *
+On entry, SRNAME  should  be the name of the routine  calling
+this error handler.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   HPL_abort( __LINE__, __FILE__, "Halt.\en" );
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_fprintf \ (3),
+.BR HPL_warn \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_all_reduce.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_all_reduce.3
new file mode 100644
index 000000000..70ec6c4ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_all_reduce.3
@@ -0,0 +1,49 @@
+.TH HPL_all_reduce 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_all_reduce \- All reduce operation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_all_reduce(\fR
+\fB\&void *\fR
+\fI\&BUFFER\fR,
+\fB\&const int\fR
+\fI\&COUNT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR,
+\fB\&const HPL_T_OP \fR
+\fI\&OP\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_all_reduce\fR
+performs   a   global   reduce  operation  across  all
+processes of a group leaving the results on all processes.
+.SH ARGUMENTS
+.TP 8
+BUFFER  (local input/global out void *
+On entry,  BUFFER  points to  the  buffer to be combined.  On
+exit, this array contains the combined data and  is identical
+on all processes in the group.
+.TP 8
+COUNT   (global input)          const int
+On entry,  COUNT  indicates the number of entries in  BUFFER.
+COUNT must be at least zero.
+.TP 8
+DTYPE   (global input)          const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.TP 8
+OP      (global input)          const HPL_T_OP 
+On entry, OP is a pointer to the local combine function.
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_barrier.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_barrier.3
new file mode 100644
index 000000000..ffee7f291
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_barrier.3
@@ -0,0 +1,27 @@
+.TH HPL_barrier 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_barrier \- Barrier operation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_barrier(\fR
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_barrier\fR
+blocks the caller until all process members have call it.
+The  call  returns  at any process  only after all group members have
+entered the call.
+.SH ARGUMENTS
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_bcast.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_bcast.3
new file mode 100644
index 000000000..54eb54b25
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_bcast.3
@@ -0,0 +1,31 @@
+.TH HPL_bcast 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_bcast \- Perform the row broadcast.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_bcast(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_bcast\fR
+broadcasts  the  current  panel.  Successful  completion is
+indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to
+HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was
+not completed, in which case this function should be called again.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.TP 8
+IFLAG   (output)                int *
+On exit,  IFLAG  indicates  whether  or not the broadcast has
+occured.
+.SH SEE ALSO
+.BR HPL_binit \ (3),
+.BR HPL_bwait \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_binit.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_binit.3
new file mode 100644
index 000000000..083776ab6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_binit.3
@@ -0,0 +1,23 @@
+.TH HPL_binit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_binit \- Initialize the row broadcast.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_binit(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_binit\fR
+initializes  a  row  broadcast.  Successful  completion  is
+indicated by the returned error code HPL_SUCCESS.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.SH SEE ALSO
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_broadcast.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_broadcast.3
new file mode 100644
index 000000000..317d374cf
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_broadcast.3
@@ -0,0 +1,49 @@
+.TH HPL_broadcast 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_broadcast \- Broadcast operation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_broadcast(\fR
+\fB\&void *\fR
+\fI\&BUFFER\fR,
+\fB\&const int\fR
+\fI\&COUNT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR,
+\fB\&const int\fR
+\fI\&ROOT\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_broadcast\fR
+broadcasts  a message from the process with rank ROOT to
+all processes in the group.
+.SH ARGUMENTS
+.TP 8
+BUFFER  (local input/output)    void *
+On entry,  BUFFER  points to  the  buffer to be broadcast. On
+exit, this array contains the broadcast data and is identical
+on all processes in the group.
+.TP 8
+COUNT   (global input)          const int
+On entry,  COUNT  indicates the number of entries in  BUFFER.
+COUNT must be at least zero.
+.TP 8
+DTYPE   (global input)          const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.TP 8
+ROOT    (global input)          const int
+On entry, ROOT is the coordinate of the source process.
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.SH SEE ALSO
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_bwait.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_bwait.3
new file mode 100644
index 000000000..0dac6fe58
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_bwait.3
@@ -0,0 +1,24 @@
+.TH HPL_bwait 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_bwait \- Finalize the row broadcast.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_bwait(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_bwait\fR
+HPL_bwait waits  for  the  row  broadcast  of  the current  panel  to
+terminate.  Successful completion is indicated by the returned  error
+code HPL_SUCCESS.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.SH SEE ALSO
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_copyL.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_copyL.3
new file mode 100644
index 000000000..d60619a06
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_copyL.3
@@ -0,0 +1,28 @@
+.TH HPL_copyL 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_copyL \- Copy the current panel into a contiguous workspace.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_copyL(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_copyL\fR
+copies  the  panel of columns, the L1 replicated submatrix,
+the pivot array  and  the info scalar into a contiguous workspace for
+later broadcast.
+ 
+The copy of this panel  into  a contiguous buffer  can be enforced by
+specifying -DHPL_COPY_L in the architecture specific Makefile.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.SH SEE ALSO
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_daxpy.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_daxpy.3
new file mode 100644
index 000000000..50bd0b0a8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_daxpy.3
@@ -0,0 +1,76 @@
+.TH HPL_daxpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_daxpy \- y := y + alpha * x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_daxpy(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_daxpy\fR
+scales the vector x by alpha and adds it to y.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vectors  x  and  y. N
+must be at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied as zero, then the entries of the incremented array X
+need not be set on input.
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+Y       (local input/output)    double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+On exit, the entries of the incremented array  Y  are updated
+with the scaled entries of the incremented array X.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3], y[3];
+.br
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+.br
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+.br
+   HPL_daxpy( 3, 2.0, x, 1, y, 1 );
+.br
+   printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dcopy \ (3),
+.BR HPL_dscal \ (3),
+.BR HPL_dswap \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dcopy.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dcopy.3
new file mode 100644
index 000000000..f2759ced9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dcopy.3
@@ -0,0 +1,69 @@
+.TH HPL_dcopy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dcopy \- y := x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dcopy(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dcopy\fR
+copies the vector x into the vector y.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vectors  x  and  y. N
+must be at least zero.
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+Y       (local input/output)    double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+On exit, the entries of the incremented array  Y  are updated
+with the entries of the incremented array X.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3], y[3];
+.br
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+.br
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+.br
+   HPL_dcopy( 3, x, 1, y, 1 );
+.br
+   printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_daxpy \ (3),
+.BR HPL_dscal \ (3),
+.BR HPL_dswap \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dgemm.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dgemm.3
new file mode 100644
index 000000000..57c69f78c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dgemm.3
@@ -0,0 +1,160 @@
+.TH HPL_dgemm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dgemm \- C := alpha * op(A) * op(B) + beta * C.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dgemm(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANSA\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANSB\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&K\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&B\fR,
+\fB\&const int\fR
+\fI\&LDB\fR,
+\fB\&const double\fR
+\fI\&BETA\fR,
+\fB\&double *\fR
+\fI\&C\fR,
+\fB\&const int\fR
+\fI\&LDC\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dgemm\fR
+performs one of the matrix-matrix operations
+ 
+    C := alpha * op( A ) * op( B ) + beta * C
+ 
+ where op( X ) is one of
+ 
+    op( X ) = X   or   op( X ) = X^T.
+ 
+Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
+an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+TRANSA  (local input)           const enum HPL_TRANS
+On entry, TRANSA  specifies the form of  op(A)  to be used in
+the matrix-matrix operation follows:                         
+   TRANSA==HplNoTrans    : op( A ) = A,                     
+   TRANSA==HplTrans      : op( A ) = A^T,                   
+   TRANSA==HplConjTrans  : op( A ) = A^T.                   
+.TP 8
+TRANSB  (local input)           const enum HPL_TRANS
+On entry, TRANSB  specifies the form of  op(B)  to be used in
+the matrix-matrix operation follows:                         
+   TRANSB==HplNoTrans    : op( B ) = B,                     
+   TRANSB==HplTrans      : op( B ) = B^T,                   
+   TRANSB==HplConjTrans  : op( B ) = B^T.                   
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the  number  of rows  of the  matrix
+op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the number  of columns of the matrix
+op(B)  and  the number of columns of the matrix  C. N must be
+at least zero.
+.TP 8
+K       (local input)           const int
+On entry,  K  specifies  the  number of columns of the matrix
+op(A) and the number of rows of the matrix op(B).  K  must be
+be at least  zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied  as  zero  then the elements of the matrices A and B
+need not be set on input.
+.TP 8
+A       (local input)           const double *
+On entry,  A  is an array of dimension (LDA,ka),  where ka is
+k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+the array  A must contain the matrix A, otherwise the leading
+k  by  m  part of the array  A  must  contain the  matrix  A.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA  specifies the first dimension of A as declared
+in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+LDA must be at least max(1,m), otherwise LDA must be at least
+max(1,k).
+.TP 8
+B       (local input)           const double *
+On entry, B is an array of dimension (LDB,kb),  where  kb  is
+n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+the array  B must contain the matrix B, otherwise the leading
+n  by  k  part of the array  B  must  contain  the matrix  B.
+.TP 8
+LDB     (local input)           const int
+On entry, LDB  specifies the first dimension of B as declared
+in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+LDB must be at least max(1,k), otherwise LDB must be at least
+max(1,n).
+.TP 8
+BETA    (local input)           const double
+On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+supplied  as  zero  then  the  elements of the matrix C  need
+not be set on input.
+.TP 8
+C       (local input/output)    double *
+On entry,  C  is an array of dimension (LDC,n). Before entry,
+the  leading m by n part  of  the  array  C  must contain the
+matrix C,  except when beta is zero, in which case C need not
+be set on entry. On exit, the array  C  is overwritten by the
+m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+.TP 8
+LDC     (local input)           const int
+On entry, LDC  specifies the first dimension of C as declared
+in  the   calling  (sub)  program.   LDC  must  be  at  least
+max(1,m).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], b[2*2], c[2*2];
+.br
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+.br
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+.br
+   c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0;
+.br
+   HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans,
+.br
+              2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 );
+.br
+   printf("  [%f,%f]\en", c[0], c[2]);
+.br
+   printf("c=[%f,%f]\en", c[1], c[3]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dtrsm \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dgemv.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dgemv.3
new file mode 100644
index 000000000..f85db57fb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dgemv.3
@@ -0,0 +1,128 @@
+.TH HPL_dgemv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dgemv \- y := beta * y + alpha * op(A) * x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dgemv(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANS\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&const double\fR
+\fI\&BETA\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dgemv\fR
+performs one of the matrix-vector operations
+ 
+    y := alpha * op( A ) * x + beta * y,
+ 
+ where op( X ) is one of
+ 
+    op( X ) = X   or   op( X ) = X^T.
+ 
+where alpha and beta are scalars, x and y are vectors and  A  is an m
+by n matrix.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+TRANS   (local input)           const enum HPL_TRANS
+On entry,  TRANS  specifies the  operation to be performed as
+follows:   
+   TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+   TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the number of rows of  the matrix A.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied as zero then  A and X  need not be set on input.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points  to an array of size equal to or greater
+than LDA * n.  Before  entry, the leading m by n part  of the
+array  A  must contain the matrix coefficients.
+.TP 8
+LDA     (local input)           const int
+On entry,  LDA  specifies  the  leading  dimension  of  A  as
+declared  in  the  calling  (sub) program.  LDA  must  be  at
+least MAX(1,m).
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+BETA    (local input)           const double
+On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+supplied as zero then  Y  need not be set on input.
+.TP 8
+Y       (local input/output)    double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+Before entry with BETA non-zero, the incremented array Y must
+contain the vector  y.  On exit,  Y  is  overwritten  by  the
+updated vector y.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], x[2], y[2];
+.br
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+.br
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+.br
+   HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0,
+.br
+              a, 2, x, 1, -1.0, y, 1 );
+.br
+   printf("y=[%f,%f]\en", y[0], y[1]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dger \ (3),
+.BR HPL_dtrsv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dger.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dger.3
new file mode 100644
index 000000000..da9ddf495
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dger.3
@@ -0,0 +1,108 @@
+.TH HPL_dger 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dger \- A := alpha * x * y^T + A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dger(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dger\fR
+performs the rank 1 operation
+ 
+    A := alpha * x * y^T + A,
+ 
+where alpha is a scalar,  x is an m-element vector, y is an n-element
+vector and A is an m by n matrix.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the number of rows of  the matrix A.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied as zero then  X and Y  need not be set on input.
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+Y       (local input)           double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.TP 8
+A       (local input/output)    double *
+On entry,  A  points  to an array of size equal to or greater
+than LDA * n.  Before  entry, the leading m by n part  of the
+array  A  must contain the matrix coefficients. On exit, A is
+overwritten by the updated matrix.
+.TP 8
+LDA     (local input)           const int
+On entry,  LDA  specifies  the  leading  dimension  of  A  as
+declared  in  the  calling  (sub) program.  LDA  must  be  at
+least MAX(1,m).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], x[2], y[2];
+.br
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+.br
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+.br
+   HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1,
+.br
+             a, 2 );
+.br
+   printf("y=[%f,%f]\en", y[0], y[1]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dgemv \ (3),
+.BR HPL_dtrsv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlacpy.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlacpy.3
new file mode 100644
index 000000000..8da8b1316
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlacpy.3
@@ -0,0 +1,72 @@
+.TH HPL_dlacpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlacpy \- B := A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlacpy(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&B\fR,
+\fB\&const int\fR
+\fI\&LDB\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlacpy\fR
+copies an array A into an array B.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the number of rows of the arrays A and
+B. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N specifies  the number of columns of the arrays A
+and B. N must be at least zero.
+.TP 8
+A       (local input)           const double *
+On entry, A points to an array of dimension (LDA,N).
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+B       (local output)          double *
+On entry, B points to an array of dimension (LDB,N). On exit,
+B is overwritten with A.
+.TP 8
+LDB     (local input)           const int
+On entry, LDB specifies the leading dimension of the array B.
+LDB must be at least MAX(1,M).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], b[2*2];
+.br
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+.br
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+.br
+   printf("  [%f,%f]\en", b[0], b[2]);
+.br
+   printf("b=[%f,%f]\en", b[1], b[3]);
+.br
+   exit(0);
+.br
+   return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dlatcpy \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlamch.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlamch.3
new file mode 100644
index 000000000..9bf41b68a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlamch.3
@@ -0,0 +1,76 @@
+.TH HPL_dlamch 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlamch \- determines machine-specific arithmetic constants.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_dlamch(\fR
+\fB\&const HPL_T_MACH\fR
+\fI\&CMACH\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlamch\fR
+determines  machine-specific  arithmetic constants such as
+the relative machine precision  (eps),  the safe minimum (sfmin) such
+that 1 / sfmin does not overflow, the base of the machine (base), the
+precision (prec), the  number of (base) digits  in the  mantissa (t),
+whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise),  the
+minimum exponent before  (gradual)  underflow (emin),  the  underflow
+threshold (rmin) base**(emin-1), the largest exponent before overflow
+(emax), the overflow threshold (rmax) (base**emax)*(1-eps).
+.SH ARGUMENTS
+.TP 8
+CMACH   (local input)           const HPL_T_MACH
+Specifies the value to be returned by HPL_dlamch             
+   = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+   = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+   = HPL_MACH_BASE,  HPL_dlamch := base                      
+   = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+   = HPL_MACH_MLEN,  HPL_dlamch := t                         
+   = HPL_MACH_RND,   HPL_dlamch := rnd                       
+   = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+   = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+   = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+   = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+ 
+where                                                        
+ 
+   eps   = relative machine precision,                       
+   sfmin = safe minimum,                                     
+   base  = base of the machine,                              
+   prec  = eps*base,                                         
+   t     = number of digits in the mantissa,                 
+   rnd   = 1.0 if rounding occurs in addition,               
+   emin  = minimum exponent before underflow,                
+   rmin  = underflow threshold,                              
+   emax  = largest exponent before overflow,                 
+   rmax  = overflow threshold.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double eps;
+.br
+   eps = HPL_dlamch( HPL_MACH_EPS );
+.br
+   printf("eps=%18.8e\en", eps);
+.br
+   exit(0); return(0);
+.br
+}
+.SH REFERENCES
+This function has been manually translated from the Fortran 77 LAPACK
+auxiliary function dlamch.f  (version 2.0 -- 1992), that  was  itself
+based on the function ENVRON  by Malcolm and incorporated suggestions
+by Gentleman and Marovich. See                                       
+ 
+Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).                 
+ 
+Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+properties of  floating point arithmetic units.,  Comms. of  the ACM,
+17, 276-277 (1974).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlange.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlange.3
new file mode 100644
index 000000000..ffbab554f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlange.3
@@ -0,0 +1,73 @@
+.TH HPL_dlange 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlange \- Compute ||A||.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_dlange(\fR
+\fB\&const HPL_T_NORM\fR
+\fI\&NORM\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlange\fR
+returns  the value of the one norm,  or the infinity norm,
+or the element of largest absolute value of a matrix A:              
+ 
+   max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+   norm1(A),        when NORM = HPL_NORM_1,                          
+   normI(A),        when NORM = HPL_NORM_I,                          
+ 
+where norm1 denotes the one norm of a matrix (maximum column sum) and
+normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+that max(abs(A(i,j))) is not a matrix norm.
+.SH ARGUMENTS
+.TP 8
+NORM    (local input)           const HPL_T_NORM
+On entry,  NORM  specifies  the  value to be returned by this
+function as described above.
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the number  of rows of the matrix A.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points to an  array of dimension  (LDA,N), that
+contains the matrix A.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,M).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2];
+.br
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+.br
+   norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 );
+.br
+   printf("norm=%f\en", norm);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dlaprnt \ (3),
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaprnt.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaprnt.3
new file mode 100644
index 000000000..8fdd89b8c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaprnt.3
@@ -0,0 +1,70 @@
+.TH HPL_dlaprnt 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaprnt \- Print the matrix A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaprnt(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&IA\fR,
+\fB\&const int\fR
+\fI\&JA\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const char *\fR
+\fI\&CMATNM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaprnt\fR
+prints to standard error an M-by-N matrix A.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies the number of rows of A. M must be at
+least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies the number of columns of A. N must be
+at least zero.
+.TP 8
+A       (local input)           double *
+On entry, A  points to an array of dimension (LDA,N).
+.TP 8
+IA      (local input)           const int
+On entry, IA specifies the starting row index to be printed.
+.TP 8
+JA      (local input)           const int
+On entry,  JA  specifies  the  starting  column index  to be
+printed.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,M).
+.TP 8
+CMATNM  (local input)           const char *
+On entry, CMATNM is the name of the matrix to be printed.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2];
+.br
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+.br
+   HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" );
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp00N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp00N.3
new file mode 100644
index 000000000..efe3580b3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp00N.3
@@ -0,0 +1,60 @@
+.TH HPL_dlaswp00N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp00N \- performs a series of row interchanges.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp00N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int *\fR
+\fI\&IPIV\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp00N\fR
+performs a series of local row interchanges on a matrix
+A. One row interchange is initiated for rows 0 through M-1 of A.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M specifies the number of rows of the array A to be
+interchanged. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies  the number of columns of the array A.
+N must be at least zero.
+.TP 8
+A       (local input/output)    double *
+On entry, A  points to an array of dimension (LDA,N) to which
+the row interchanges will be  applied.  On exit, the permuted
+matrix.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+IPIV    (local input)           const int *
+On entry,  IPIV  is  an  array of size  M  that  contains the
+pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+implies that local rows k and l are to be interchanged.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp01N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp01N.3
new file mode 100644
index 000000000..662913e54
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp01N.3
@@ -0,0 +1,88 @@
+.TH HPL_dlaswp01N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp01N \- copies rows of A into itself and into U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp01N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp01N\fR
+copies  scattered rows  of  A  into itself  and into an
+array  U.  The row offsets in  A  of the source rows are specified by
+LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+positive value of  LINDXAU indicates that the array destination is U,
+and A otherwise.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+moved within A or copied into U. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the length of rows of A that should be
+moved within A or copied into U. N must be at least zero.
+.TP 8
+A       (local input/output)    double *
+On entry, A points to an array of dimension (LDA,N). The rows
+of this array specified by LINDXA should be moved within A or
+copied into U.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    double *
+On entry, U points to an array of dimension (LDU,N). The rows
+of A specified by LINDXA are be copied within this array U at
+the positions indicated by positive values of LINDXAU.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local  row indexes  of  A  that should be moved within  A  or
+or copied into U.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension  M that  contains
+the local  row indexes of  U  where the rows of  A  should be
+copied at. This array also contains the  local row offsets in
+A where some of the rows of A should be moved to.  A positive
+value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+should be copied into U at the position LINDXAU[i]; otherwise
+the row  LINDXA[i]  of  A  should be moved  at  the  position
+-LINDXAU[i] within A.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp01T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp01T.3
new file mode 100644
index 000000000..738507755
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp01T.3
@@ -0,0 +1,89 @@
+.TH HPL_dlaswp01T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp01T \- copies rows of A into itself and into U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp01T(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp01T\fR
+copies  scattered rows  of  A  into itself  and into an
+array U.  The row offsets in  A  of the source rows  are specified by
+LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+positive value of LINDXAU indicates that the array  destination is U,
+and A otherwise. Rows of A are stored as columns in U.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+moved within A or copied into U. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the length of rows of A that should be
+moved within A or copied into U. N must be at least zero.
+.TP 8
+A       (local input/output)    double *
+On entry, A points to an array of dimension (LDA,N). The rows
+of this array specified by LINDXA should be moved within A or
+copied into U.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    double *
+On entry, U points to an array of dimension (LDU,M). The rows
+of A specified by  LINDXA  are copied within this array  U at
+the  positions indicated by positive values of LINDXAU.  The
+rows of A are stored as columns in U.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local  row indexes  of  A  that should be moved within  A  or
+or copied into U.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension  M that  contains
+the local  row indexes of  U  where the rows of  A  should be
+copied at. This array also contains the  local row offsets in
+A where some of the rows of A should be moved to.  A positive
+value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+should be copied into U at the position LINDXAU[i]; otherwise
+the row  LINDXA[i]  of  A  should be moved  at  the  position
+-LINDXAU[i] within A.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp02N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp02N.3
new file mode 100644
index 000000000..600449c68
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp02N.3
@@ -0,0 +1,85 @@
+.TH HPL_dlaswp02N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp02N \- pack rows of A into columns of W.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp02N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&W0\fR,
+\fB\&double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp02N\fR
+packs scattered rows of an array  A  into workspace  W.
+The row offsets in A are specified by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+copied into W. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the length of rows of A that should be
+copied into W. N must be at least zero.
+.TP 8
+A       (local input)           const double *
+On entry, A points to an array of dimension (LDA,N). The rows
+of this array specified by LINDXA should be copied into W.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+W0      (local input/output)    double *
+On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local output)          double *
+On entry, W  is an array of size (LDW,M). On exit, W contains
+the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+in W(:,i).
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be copied into W.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension M  that  contains
+the local  row indexes of  U that should be copied into A and
+replaced by the rows of W.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp03N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp03N.3
new file mode 100644
index 000000000..1ba0b3208
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp03N.3
@@ -0,0 +1,75 @@
+.TH HPL_dlaswp03N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp03N \- copy rows of W into U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp03N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const double *\fR
+\fI\&W0\fR,
+\fB\&const double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp03N\fR
+copies columns of  W  into  rows  of an  array  U.  The
+destination in U of these columns contained in W is stored within W0.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies  the  number  of columns of  W  stored
+contiguously that should be copied into U. M must be at least
+zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the  length of columns of  W  stored
+contiguously that should be copied into U. N must be at least
+zero.
+.TP 8
+U       (local input/output)    double *
+On entry, U points to an array of dimension (LDU,N).  Columns
+of W are copied as rows within this array U at  the positions
+specified in W0.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M).
+.TP 8
+W0      (local input)           const double *
+On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local input)           const double *
+On entry, W  is an array of size (LDW,M),  that contains data
+to be copied into U. For i in [0..M),  entries W(:,i)  should
+be copied into the row or column W0(i*LDW) of U.
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp03T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp03T.3
new file mode 100644
index 000000000..d8bd11ec1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp03T.3
@@ -0,0 +1,75 @@
+.TH HPL_dlaswp03T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp03T \- copy columns of W into U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp03T(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const double *\fR
+\fI\&W0\fR,
+\fB\&const double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp03T\fR
+copies  columns of W into an array U.  The  destination
+in U of these columns contained in W is stored within W0.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies  the  number  of columns of  W  stored
+contiguously that should be copied into U. M must be at least
+zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the  length of columns of  W  stored
+contiguously that should be copied into U. N must be at least
+zero.
+.TP 8
+U       (local input/output)    double *
+On entry, U points to an array of dimension (LDU,M).  Columns
+of W are copied within the array U at the positions specified
+in W0.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+W0      (local input)           const double *
+On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local input)           const double *
+On entry, W  is an array of size (LDW,M),  that contains data
+to be copied into U. For i in [0..M),  entries W(:,i)  should
+be copied into the row or column W0(i*LDW) of U.
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp04N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp04N.3
new file mode 100644
index 000000000..9f12d79ab
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp04N.3
@@ -0,0 +1,106 @@
+.TH HPL_dlaswp04N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp04N \- copy rows of U in A and replace them with columns of W.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp04N(\fR
+\fB\&const int\fR
+\fI\&M0\fR,
+\fB\&const int\fR
+\fI\&M1\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&W0\fR,
+\fB\&const double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp04N\fR
+copies M0 rows of U into A and replaces those rows of U
+with columns of W. In addition M1 - M0 columns of  W  are copied into
+rows of U.
+.SH ARGUMENTS
+.TP 8
+M0      (local input)           const int
+On entry, M0 specifies the number of rows of U that should be
+copied into  A  and replaced by columns of  W.  M0 must be at
+least zero.
+.TP 8
+M1      (local input)           const int
+On entry, M1 specifies the number of columns of W that should
+be copied into rows of U. M1 must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the rows of U that should
+be copied into A. N must be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  points to  an array of dimension (LDU,N).  This
+array contains the rows that are to be copied into A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M1).
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+rows of U indicated by LINDXAU.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M0).
+.TP 8
+W0      (local input)           const double *
+On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local input)           const double *
+On entry, W  is an array of size (LDW,M0+M1),  that  contains
+data to be copied into U.  For i in [M0..M0+M1),  the entries
+W(:,i) are copied into the row W0(i*LDW) of U.
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA  is an array of dimension  M0 containing the
+local row indexes A into which rows of U are copied.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension M0 that  contains
+the local  row indexes of  U that should be copied into A and
+replaced by the columns of W.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp04T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp04T.3
new file mode 100644
index 000000000..448334148
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp04T.3
@@ -0,0 +1,107 @@
+.TH HPL_dlaswp04T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp04T \- copy columns of U in rows of A and replace them with columns of W.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp04T(\fR
+\fB\&const int\fR
+\fI\&M0\fR,
+\fB\&const int\fR
+\fI\&M1\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&W0\fR,
+\fB\&const double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp04T\fR
+copies M0 columns of U into rows of A and replaces those
+columns of U with columns of W. In addition M1 - M0 columns of W  are
+copied into U.
+.SH ARGUMENTS
+.TP 8
+M0      (local input)           const int
+On entry, M0 specifies the number of columns of U that should
+be copied into A and replaced by columns of W.  M0 must be at
+least zero.
+.TP 8
+M1      (local input)           const int
+On entry, M1 specifies  the number of columnns of W that will
+be copied into U. M1 must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies the length of the columns of  U  that
+will be copied into rows of A. N must be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  points  to an array of dimension (LDU,*).  This
+array contains the columns that are to be copied into rows of
+A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+columns of U indicated by LINDXAU.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M0).
+.TP 8
+W0      (local input)           const double *
+On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local input)           const double *
+On entry, W  is an array of size (LDW,M0+M1),  that  contains
+data to be copied into U.  For i in [M0..M0+M1),  the entries
+W(:,i) are copied into the column W0(i*LDW) of U.
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA  is an array of dimension  M0 containing the
+local row indexes A into which columns of U are copied.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension M0 that  contains
+the  local column indexes of  U  that should be copied into A
+and replaced by the columns of W.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp05N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp05N.3
new file mode 100644
index 000000000..371dd0b92
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp05N.3
@@ -0,0 +1,77 @@
+.TH HPL_dlaswp05N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp05N \- copy rows of U into A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp05N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp05N\fR
+copies rows of  U of global offset LINDXAU into rows of
+A at positions indicated by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of U that should be
+copied into A. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the rows of U that should
+be copied into A. N must be at least zero.
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+rows of U indicated by LINDXAU.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    const double *
+On entry,  U  points to an array of dimension  (LDU,N).  This
+array contains the rows that are to be copied into A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be copied from U.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension  M that  contains
+the local row indexes of U that should be copied in A.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp05T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp05T.3
new file mode 100644
index 000000000..5d70a7a16
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp05T.3
@@ -0,0 +1,77 @@
+.TH HPL_dlaswp05T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp05T \- copy rows of U into A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp05T(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp05T\fR
+copies columns of  U of global offset LINDXAU into rows
+of A at positions indicated by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the columns of U that will
+be copied into rows of A. N must be at least zero.
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+columns of U indicated by LINDXAU.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    const double *
+On entry,  U  points  to an array of dimension (LDU,*).  This
+array contains the columns that are to be copied into rows of
+A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be copied from U.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension  M that  contains
+the local column indexes of U that should be copied in A.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp06N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp06N.3
new file mode 100644
index 000000000..7fa19d41a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp06N.3
@@ -0,0 +1,72 @@
+.TH HPL_dlaswp06N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp06N \- swap rows of U with rows of A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp06N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp06N\fR
+swaps rows of  U  with rows of A at positions
+indicated by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+swapped with rows of U. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the rows of A that should
+be swapped with rows of U. N must be at least zero.
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+rows or columns of U.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    double *
+On entry,  U  points  to an array of dimension (LDU,N).  This
+array contains the rows of U that are to be swapped with rows
+of A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be swapped with U.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp06T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp06T.3
new file mode 100644
index 000000000..41fa3d6ee
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp06T.3
@@ -0,0 +1,72 @@
+.TH HPL_dlaswp06T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp06T \- swap rows or columns of U with rows of A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp06T(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp06T\fR
+swaps  columns  of  U  with  rows  of  A  at  positions
+indicated by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+swapped with columns of U. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the rows of A that should
+be swapped with columns of U. N must be at least zero.
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+columns of U.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    double *
+On entry,  U  points  to an array of dimension (LDU,*).  This
+array contains the columns of  U  that are to be swapped with
+rows of A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be swapped with U.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp10N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp10N.3
new file mode 100644
index 000000000..23465895c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp10N.3
@@ -0,0 +1,59 @@
+.TH HPL_dlaswp10N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp10N \- performs a series column interchanges.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp10N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int *\fR
+\fI\&IPIV\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp10N\fR
+performs a sequence  of  local column interchanges on a
+matrix A.  One column interchange is initiated  for columns 0 through
+N-1 of A.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+__arg0__
+.TP 8
+N       (local input)           const int
+On entry,  M  specifies  the number of rows of the array A. M
+must be at least zero.
+.TP 8
+A       (local input/output)    double *
+On entry, N specifies the number of columns of the array A. N
+must be at least zero.
+.TP 8
+LDA     (local input)           const int
+On entry, A  points to an  array of  dimension (LDA,N).  This
+array contains the columns onto which the interchanges should
+be applied. On exit, A contains the permuted matrix.
+.TP 8
+IPIV    (local input)           const int *
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlatcpy.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlatcpy.3
new file mode 100644
index 000000000..dc940e321
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlatcpy.3
@@ -0,0 +1,70 @@
+.TH HPL_dlatcpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlatcpy \- B := A^T
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlatcpy(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&B\fR,
+\fB\&const int\fR
+\fI\&LDB\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlatcpy\fR
+copies the transpose of an array A into an array B.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the number of  rows of the array B and
+the number of columns of A. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the number of  rows of the array A and
+the number of columns of B. N must be at least zero.
+.TP 8
+A       (local input)           const double *
+On entry, A points to an array of dimension (LDA,M).
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,N).
+.TP 8
+B       (local output)          double *
+On entry, B points to an array of dimension (LDB,N). On exit,
+B is overwritten with the transpose of A.
+.TP 8
+LDB     (local input)           const int
+On entry, LDB specifies the leading dimension of the array B.
+LDB must be at least MAX(1,M).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], b[2*2];
+.br
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+.br
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+.br
+   printf("  [%f,%f]\en", b[0], b[2]);
+.br
+   printf("b=[%f,%f]\en", b[1], b[3]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dlacpy \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocmax.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocmax.3
new file mode 100644
index 000000000..f68f887c9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocmax.3
@@ -0,0 +1,69 @@
+.TH HPL_dlocmax 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlocmax \- finds the maximum entry in matrix column.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlocmax(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&II\fR,
+\fB\&const int\fR
+\fI\&JJ\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlocmax\fR
+finds  the maximum entry in the current column  and packs
+the useful information in  WORK[0:3].  On exit,  WORK[0] contains the
+local maximum  absolute value  scalar,  WORK[1] is the  corresponding
+local row index,  WORK[2]  is the corresponding global row index, and
+WORK[3] is the coordinate of the process owning this max.  When N  is
+less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set
+to the total number of process rows.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of rows of the column
+of A on which we operate.
+.TP 8
+II      (local input)           const int
+On entry, II  specifies the row offset where the column to be
+operated on starts with respect to the panel.
+.TP 8
+JJ      (local input)           const int
+On entry, JJ  specifies the column offset where the column to
+be operated on starts with respect to the panel.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is  a workarray of size at least 4.  On exit,
+WORK[0] contains  the  local  maximum  absolute value scalar,
+WORK[1] contains  the corresponding local row index,  WORK[2]
+contains the corresponding global row index, and  WORK[3]  is
+the coordinate of process owning this max.
+.SH SEE ALSO
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocswpN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocswpN.3
new file mode 100644
index 000000000..367e37e36
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocswpN.3
@@ -0,0 +1,62 @@
+.TH HPL_dlocswpN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlocswpN \- locally swaps rows within panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlocswpN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&II\fR,
+\fB\&const int\fR
+\fI\&JJ\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlocswpN\fR
+performs  the local swapping operations  within a panel.
+The lower triangular  N0-by-N0  upper block of the panel is stored in
+no-transpose form (i.e. just like the input matrix itself).
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+II      (local input)           const int
+On entry, II  specifies the row offset where the column to be
+operated on starts with respect to the panel.
+.TP 8
+JJ      (local input)           const int
+On entry, JJ  specifies the column offset where the column to
+be operated on starts with respect to the panel.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+WORK[0] contains  the  local  maximum  absolute value scalar,
+WORK[1] contains  the corresponding local row index,  WORK[2]
+contains the corresponding global row index, and  WORK[3]  is
+the coordinate of process owning this max.  The N0 length max
+row is stored in WORK[4:4+N0-1];  Note  that this is also the
+JJth row  (or column) of L1. The remaining part of this array
+is used as workspace.
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocswpT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocswpT.3
new file mode 100644
index 000000000..f864de535
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocswpT.3
@@ -0,0 +1,62 @@
+.TH HPL_dlocswpT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlocswpT \- locally swaps rows within panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlocswpT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&II\fR,
+\fB\&const int\fR
+\fI\&JJ\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlocswpT\fR
+performs  the local swapping operations  within a panel.
+The lower triangular  N0-by-N0  upper block of the panel is stored in
+transpose form.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+II      (local input)           const int
+On entry, II  specifies the row offset where the column to be
+operated on starts with respect to the panel.
+.TP 8
+JJ      (local input)           const int
+On entry, JJ  specifies the column offset where the column to
+be operated on starts with respect to the panel.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+WORK[0] contains  the  local  maximum  absolute value scalar,
+WORK[1] contains  the corresponding local row index,  WORK[2]
+contains the corresponding global row index, and  WORK[3]  is
+the coordinate of process owning this max.  The N0 length max
+row is stored in WORK[4:4+N0-1];  Note  that this is also the
+JJth row  (or column) of L1. The remaining part of this array
+is used as workspace.
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dmatgen.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dmatgen.3
new file mode 100644
index 000000000..c287fb0fb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dmatgen.3
@@ -0,0 +1,55 @@
+.TH HPL_dmatgen 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dmatgen \- random matrix generator.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dmatgen(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int\fR
+\fI\&ISEED\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dmatgen\fR
+generates (or regenerates) a random matrix A.
+ 
+The  pseudo-random  generator uses the linear congruential algorithm:
+X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+Programming, Knuth 1973, Vol. 2.
+.SH ARGUMENTS
+.TP 8
+M       (input)                 const int
+On entry,  M  specifies  the number  of rows of the matrix A.
+M must be at least zero.
+.TP 8
+N       (input)                 const int
+On entry,  N specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+A       (output)                double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+this  array  contains   the   coefficients  of  the  randomly
+generated matrix.
+.TP 8
+LDA     (input)                 const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,M).
+.TP 8
+ISEED   (input)                 const int
+On entry, ISEED  specifies  the  seed  number to generate the
+matrix A. ISEED must be at least zero.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dscal.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dscal.3
new file mode 100644
index 000000000..8f42a10f5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dscal.3
@@ -0,0 +1,62 @@
+.TH HPL_dscal 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dscal \- x = alpha * x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dscal(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dscal\fR
+scales the vector x by alpha.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vector x. N  must  be
+at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied as zero, then the entries of the incremented array X
+need not be set on input.
+.TP 8
+X       (local input/output)    double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+On exit, the entries of the incremented array  X  are  scaled
+by the scalar alpha.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3];
+.br
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+.br
+   HPL_dscal( 3, 2.0, x, 1 );
+.br
+   printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_daxpy \ (3),
+.BR HPL_dcopy \ (3),
+.BR HPL_dswap \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dswap.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dswap.3
new file mode 100644
index 000000000..a398f795a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dswap.3
@@ -0,0 +1,73 @@
+.TH HPL_dswap 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dswap \- y <-> x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dswap(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dswap\fR
+swaps the vectors x and y.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vectors  x  and  y. N
+must be at least zero.
+.TP 8
+X       (local input/output)    double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+On exit, the entries of the incremented array  X  are updated
+with the entries of the incremented array Y.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+Y       (local input/output)    double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+On exit, the entries of the incremented array  Y  are updated
+with the entries of the incremented array X.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3], y[3];
+.br
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+.br
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+.br
+   HPL_dswap( 3, x, 1, y, 1 );
+.br
+   printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]);
+.br
+   printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_daxpy \ (3),
+.BR HPL_dcopy \ (3),
+.BR HPL_dscal \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dtrsm.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dtrsm.3
new file mode 100644
index 000000000..ad099eb83
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dtrsm.3
@@ -0,0 +1,152 @@
+.TH HPL_dtrsm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dtrsm \- B := A^{-1} * B  or  B := B * A^{-1}.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dtrsm(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const enum HPL_SIDE\fR
+\fI\&SIDE\fR,
+\fB\&const enum HPL_UPLO\fR
+\fI\&UPLO\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANS\fR,
+\fB\&const enum HPL_DIAG\fR
+\fI\&DIAG\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&B\fR,
+\fB\&const int\fR
+\fI\&LDB\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dtrsm\fR
+solves one of the matrix equations
+ 
+   op( A ) * X = alpha * B,   or  X * op( A ) = alpha * B,
+ 
+where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+non-unit, upper or lower triangular matrix and op(A) is one of
+ 
+   op( A ) = A   or   op( A ) = A^T.
+ 
+The matrix X is overwritten on B.
+ 
+No test for  singularity  or  near-singularity  is included  in  this
+routine. Such tests must be performed before calling this routine.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+SIDE    (local input)           const enum HPL_SIDE
+On entry, SIDE  specifies  whether  op(A) appears on the left
+or right of X as follows:
+   SIDE==HplLeft    op( A ) * X = alpha * B,
+   SIDE==HplRight   X * op( A ) = alpha * B.
+.TP 8
+UPLO    (local input)           const enum HPL_UPLO
+On  entry,   UPLO   specifies  whether  the  upper  or  lower
+triangular  part  of the array  A  is to be referenced.  When
+UPLO==HplUpper, only  the upper triangular part of A is to be
+referenced, otherwise only the lower triangular part of A is 
+to be referenced. 
+.TP 8
+TRANS   (local input)           const enum HPL_TRANS
+On entry, TRANSA  specifies the form of  op(A)  to be used in
+the matrix-matrix operation follows:                         
+   TRANSA==HplNoTrans    : op( A ) = A,                     
+   TRANSA==HplTrans      : op( A ) = A^T,                   
+   TRANSA==HplConjTrans  : op( A ) = A^T.                   
+.TP 8
+DIAG    (local input)           const enum HPL_DIAG
+On entry,  DIAG  specifies  whether  A  is unit triangular or
+not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+and otherwise, A is not assumed to be unit triangular.
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the number of rows of the  matrix B.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the number of columns of the matrix B.
+N must be at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied  as  zero then the elements of the matrix B need not
+be set on input.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points  to an array of size equal to or greater
+than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+k by k upper triangular  part of the array A must contain the
+upper triangular  matrix and the  strictly  lower  triangular
+part of A is not referenced.  When  UPLO==HplLower on  entry,
+the  leading k by k lower triangular part of the array A must
+contain the lower triangular matrix  and  the  strictly upper
+triangular part of A is not referenced.
+ 
+Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+not referenced  either,  but are assumed to be unity.
+.TP 8
+LDA     (local input)           const int
+On entry,  LDA  specifies  the  leading  dimension  of  A  as
+declared  in  the  calling  (sub) program.  LDA  must  be  at
+least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+.TP 8
+B       (local input/output)    double *
+On entry,  B  points  to an array of size equal to or greater
+than LDB * n.  Before entry, the leading  m by n  part of the
+array B must contain the matrix  B, except when beta is zero,
+in which case B need not be set on entry.  On exit, the array
+B is overwritten by the m by n solution matrix.
+.TP 8
+LDB     (local input)           const int
+On entry,  LDB  specifies  the  leading  dimension  of  B  as
+declared  in  the  calling  (sub) program.  LDB  must  be  at
+least MAX(1,m).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], b[2*2];
+.br
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+.br
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+.br
+   HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper,
+.br
+              HplNoTrans, HplNonUnit, 2, 2, 2.0,
+.br
+              a, 2, b, 2 );
+.br
+   printf("  [%f,%f]\en", b[0], b[2]);
+.br
+   printf("b=[%f,%f]\en", b[1], b[3]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dgemm \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dtrsv.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dtrsv.3
new file mode 100644
index 000000000..5df37c78b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dtrsv.3
@@ -0,0 +1,121 @@
+.TH HPL_dtrsv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dtrsv \- x := A^{-1} x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dtrsv(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const enum HPL_UPLO\fR
+\fI\&UPLO\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANS\fR,
+\fB\&const enum HPL_DIAG\fR
+\fI\&DIAG\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dtrsv\fR
+solves one of the systems of equations
+ 
+    A * x = b,   or   A^T * x = b,
+ 
+where b and x are n-element vectors and  A  is an n by n non-unit, or
+unit, upper or lower triangular matrix.
+ 
+No test for  singularity  or  near-singularity  is included  in  this
+routine. Such tests must be performed before calling this routine.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+UPLO    (local input)           const enum HPL_UPLO
+On  entry,   UPLO   specifies  whether  the  upper  or  lower
+triangular  part  of the array  A  is to be referenced.  When
+UPLO==HplUpper, only  the upper triangular part of A is to be
+referenced, otherwise only the lower triangular part of A is 
+to be referenced. 
+.TP 8
+TRANS   (local input)           const enum HPL_TRANS
+On entry,  TRANS  specifies  the equations  to  be  solved as
+follows:
+   TRANS==HplNoTrans     A   * x = b,
+   TRANS==HplTrans       A^T * x = b.
+.TP 8
+DIAG    (local input)           const enum HPL_DIAG
+On entry,  DIAG  specifies  whether  A  is unit triangular or
+not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+and otherwise, A is not assumed to be unit triangular.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the order of the matrix A. N must be at
+least zero.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points  to an array of size equal to or greater
+than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+n by n upper triangular  part of the array A must contain the
+upper triangular  matrix and the  strictly  lower  triangular
+part of A is not referenced.  When  UPLO==HplLower  on entry,
+the  leading n by n lower triangular part of the array A must
+contain the lower triangular matrix  and  the  strictly upper
+triangular part of A is not referenced.
+ 
+Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+not referenced  either,  but are assumed to be unity.
+.TP 8
+LDA     (local input)           const int
+On entry,  LDA  specifies  the  leading  dimension  of  A  as
+declared  in  the  calling  (sub) program.  LDA  must  be  at
+least MAX(1,n).
+.TP 8
+X       (local input/output)    double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+Before entry,  the  incremented array  X  must contain  the n
+element right-hand side vector b. On exit,  X  is overwritten
+with the solution vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], x[2];
+.br
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+.br
+   x[0] = 2.0; x[1] = 1.0;
+.br
+   HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans,
+.br
+              HplNoUnit, a, 2, x, 1 );
+.br
+   printf("x=[%f,%f]\en", x[0], x[1]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dger \ (3),
+.BR HPL_dgemv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_equil.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_equil.3
new file mode 100644
index 000000000..817780e44
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_equil.3
@@ -0,0 +1,91 @@
+.TH HPL_equil 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_equil \- Equilibrate U and forward the column panel L.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_equil(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANS\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR,
+\fB\&int *\fR
+\fI\&IWORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_equil\fR
+equilibrates  the  local  pieces  of U, so that on exit to
+this function, pieces of U contained in every process row are of the
+same size. This phase makes the rolling phase optimal.  In addition,
+this  function probes  for  the  column panel L and forwards it when
+possible.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be equilibrated) information.
+.TP 8
+TRANS   (global input)          const enum HPL_TRANS
+On entry, TRANS specifies whether  U  is stored in transposed
+or non-transposed form.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the number of rows or columns of  U. N
+must be at least 0.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U in each process row.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+non-transposed form, and MAX(1,N) otherwise.
+.TP 8
+IPLEN   (global input)          int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in process IPMAP[i].
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IPMAP is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words, IPMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry, IPMAPM1  is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+.TP 8
+IWORK   (workspace)             int *
+On entry, IWORK is a workarray of dimension NPROW+1.
+.SH SEE ALSO
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_fprintf.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_fprintf.3
new file mode 100644
index 000000000..8a81c0bfb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_fprintf.3
@@ -0,0 +1,44 @@
+.TH HPL_fprintf 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_fprintf \- fprintf + fflush wrapper.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_fprintf(\fR
+\fB\&FILE *\fR
+\fI\&STREAM\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_fprintf\fR
+is a wrapper around fprintf flushing the output stream.
+.SH ARGUMENTS
+.TP 8
+STREAM  (local input)           FILE *
+On entry, STREAM specifies the output stream.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   HPL_fprintf( stdout, "Hello World.\en" );
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_abort \ (3),
+.BR HPL_warn \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_exit.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_exit.3
new file mode 100644
index 000000000..dab8067e2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_exit.3
@@ -0,0 +1,25 @@
+.TH HPL_grid_exit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_grid_exit \- Exit process grid.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_grid_exit(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_grid_exit\fR
+marks  the process  grid object for  deallocation.  The
+returned  error  code  MPI_SUCCESS  indicates  successful completion.
+Other error codes are (MPI) implementation dependent.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input/output)    HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid to be released.
+.SH SEE ALSO
+.BR HPL_pnum \ (3),
+.BR HPL_grid_init \ (3),
+.BR HPL_grid_info \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_info.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_info.3
new file mode 100644
index 000000000..53c6a214b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_info.3
@@ -0,0 +1,52 @@
+.TH HPL_grid_info 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_grid_info \- Retrieve grid information.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_grid_info(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&int *\fR
+\fI\&NPROW\fR,
+\fB\&int *\fR
+\fI\&NPCOL\fR,
+\fB\&int *\fR
+\fI\&MYROW\fR,
+\fB\&int *\fR
+\fI\&MYCOL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_grid_info\fR
+returns  the grid shape and the coordinates in the grid
+of the calling process.  Successful  completion  is  indicated by the
+returned error code  MPI_SUCCESS. Other error codes depend on the MPI
+implementation.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+NPROW   (global output)         int *
+On exit,   NPROW  specifies the number of process rows in the
+grid. NPROW is at least one.
+.TP 8
+NPCOL   (global output)         int *
+On exit,   NPCOL  specifies  the number of process columns in
+the grid. NPCOL is at least one.
+.TP 8
+MYROW   (global output)         int *
+On exit,  MYROW  specifies my  row process  coordinate in the
+grid. MYROW is greater than or equal  to zero  and  less than
+NPROW.
+.TP 8
+MYCOL   (global output)         int *
+On exit,  MYCOL specifies my column process coordinate in the
+grid. MYCOL is greater than or equal  to zero  and  less than
+NPCOL.
+.SH SEE ALSO
+.BR HPL_pnum \ (3),
+.BR HPL_grid_init \ (3),
+.BR HPL_grid_exit \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_init.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_init.3
new file mode 100644
index 000000000..7792a522d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_init.3
@@ -0,0 +1,55 @@
+.TH HPL_grid_init 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_grid_init \- Create a process grid.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_grid_init(\fR
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR,
+\fB\&const HPL_T_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const int\fR
+\fI\&NPROW\fR,
+\fB\&const int\fR
+\fI\&NPCOL\fR,
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_grid_init\fR
+creates a NPROW x NPCOL  process  grid using column- or
+row-major ordering from an initial collection of processes identified
+by an  MPI  communicator.  Successful  completion is indicated by the
+returned error code MPI_SUCCESS.  Other error codes depend on the MPI
+implementation. The coordinates of processes that are not part of the
+grid are set to values outside of [0..NPROW) x [0..NPCOL).
+.SH ARGUMENTS
+.TP 8
+COMM    (global/local input)    MPI_Comm
+On entry,  COMM  is  the  MPI  communicator  identifying  the
+initial  collection  of  processes out of which  the  grid is
+formed.
+.TP 8
+ORDER   (global input)          const HPL_T_ORDER
+On entry, ORDER specifies how the processes should be ordered
+in the grid as follows:
+   ORDER = HPL_ROW_MAJOR    row-major    ordering;
+   ORDER = HPL_COLUMN_MAJOR column-major ordering;
+.TP 8
+NPROW   (global input)          const int
+On entry,  NPROW  specifies the number of process rows in the
+grid to be created. NPROW must be at least one.
+.TP 8
+NPCOL   (global input)          const int
+On entry,  NPCOL  specifies  the number of process columns in
+the grid to be created. NPCOL must be at least one.
+.TP 8
+GRID    (local input/output)    HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information to be initialized.
+.SH SEE ALSO
+.BR HPL_pnum \ (3),
+.BR HPL_grid_info \ (3),
+.BR HPL_grid_exit \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_idamax.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_idamax.3
new file mode 100644
index 000000000..c00292a02
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_idamax.3
@@ -0,0 +1,59 @@
+.TH HPL_idamax 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_idamax \- 1st k s.t. |x_k| = max_i(|x_i|).
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_idamax(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_idamax\fR
+returns  the index in an n-vector  x  of the first element
+having maximum absolute value.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vector x. N  must  be
+at least zero.
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3];
+.br
+   int    imax;
+.br
+   x[0] = 1.0; x[1] = 3.0; x[2] = 2.0;
+.br
+   imax = HPL_idamax( 3, x, 1 );
+.br
+   printf("imax=%d\en", imax);
+.br
+   exit(0);
+.br
+   return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_daxpy \ (3),
+.BR HPL_dcopy \ (3),
+.BR HPL_dscal \ (3),
+.BR HPL_dswap \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2l.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2l.3
new file mode 100644
index 000000000..32c4d9e07
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2l.3
@@ -0,0 +1,53 @@
+.TH HPL_indxg2l 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_indxg2l \- Map a global index into a local one.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_indxg2l(\fR
+\fB\&const int\fR
+\fI\&IG\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_indxg2l\fR
+computes  the local index of a matrix entry pointed to by
+the  global index IG.  This  local  returned index is the same in all
+processes.
+.SH ARGUMENTS
+.TP 8
+IG      (input)                 const int
+On entry, IG specifies the global index of the matrix  entry.
+IG must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix. NB must be larger than one.
+.TP 8
+SRCPROC (input)                 const int
+On entry, if SRCPROC = -1, the data  is not  distributed  but
+replicated,  in  which  case  this  routine returns IG in all
+processes. Otherwise, the value of SRCPROC is ignored.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2lp \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2lp.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2lp.3
new file mode 100644
index 000000000..ca2004031
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2lp.3
@@ -0,0 +1,66 @@
+.TH HPL_indxg2lp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_indxg2lp \- Map a local index into a global one.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_indxg2lp(\fR
+\fB\&int *\fR
+\fI\&IL\fR,
+\fB\&int *\fR
+\fI\&PROC\fR,
+\fB\&const int\fR
+\fI\&IG\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_indxg2lp\fR
+computes the local index of a matrix entry pointed to by
+the global  index IG as well as the process coordinate which posseses
+this entry. The local returned index is the same in all processes.
+.SH ARGUMENTS
+.TP 8
+IL      (output)                int *
+On exit, IL specifies the local index corresponding to IG. IL
+is at least zero.
+.TP 8
+PROC    (output)                int *
+On exit,  PROC  is the  coordinate of the process  owning the
+entry specified by the global index IG. PROC is at least zero
+and less than NPROCS.
+.TP 8
+IG      (input)                 const int
+On entry, IG specifies the global index of the matrix  entry.
+IG must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+SRCPROC (input)                 const int
+On entry, if SRCPROC = -1, the data  is not  distributed  but
+replicated,  in  which  case  this  routine returns IG in all
+processes. Otherwise, the value of SRCPROC is ignored.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2p.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2p.3
new file mode 100644
index 000000000..5e0273feb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2p.3
@@ -0,0 +1,52 @@
+.TH HPL_indxg2p 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_indxg2p \- Map a global index into a process coordinate.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_indxg2p(\fR
+\fB\&const int\fR
+\fI\&IG\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_indxg2p\fR
+computes the process coordinate  which posseses the entry
+of a matrix specified by a global index IG.
+.SH ARGUMENTS
+.TP 8
+IG      (input)                 const int
+On entry, IG specifies the global index of the matrix  entry.
+IG must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+SRCPROC (input)                 const int
+On entry,  SRCPROC  specifies  the coordinate of the  process
+that possesses the first row or column of the matrix. SRCPROC
+must be at least zero and strictly less than NPROCS.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxl2g.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxl2g.3
new file mode 100644
index 000000000..ba6da53a7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxl2g.3
@@ -0,0 +1,59 @@
+.TH HPL_indxl2g 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_indxl2g \- Map a index-process pair into a global index.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_indxl2g(\fR
+\fB\&const int\fR
+\fI\&IL\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&PROC\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_indxl2g\fR
+computes the global index of a matrix  entry  pointed to
+by the local index IL of the process indicated by PROC.
+.SH ARGUMENTS
+.TP 8
+IL      (input)                 const int
+On entry, IL specifies the local  index of the matrix  entry.
+IL must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+PROC    (input)                 const int
+On entry, PROC  specifies the coordinate of the process whose
+local array row or column is to be determined. PROC  must  be
+at least zero and strictly less than NPROCS.
+.TP 8
+SRCPROC (input)                 const int
+On entry,  SRCPROC  specifies  the coordinate of the  process
+that possesses the first row or column of the matrix. SRCPROC
+must be at least zero and strictly less than NPROCS.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2lp \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_infog2l.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_infog2l.3
new file mode 100644
index 000000000..c07f276d5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_infog2l.3
@@ -0,0 +1,126 @@
+.TH HPL_infog2l 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_infog2l \- global to local index translation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_infog2l(\fR
+\fB\&int\fR
+\fI\&I\fR,
+\fB\&int\fR
+\fI\&J\fR,
+\fB\&const int\fR
+\fI\&IMB\fR,
+\fB\&const int\fR
+\fI\&MB\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&RSRC\fR,
+\fB\&const int\fR
+\fI\&CSRC\fR,
+\fB\&const int\fR
+\fI\&MYROW\fR,
+\fB\&const int\fR
+\fI\&MYCOL\fR,
+\fB\&const int\fR
+\fI\&NPROW\fR,
+\fB\&const int\fR
+\fI\&NPCOL\fR,
+\fB\&int *\fR
+\fI\&II\fR,
+\fB\&int *\fR
+\fI\&JJ\fR,
+\fB\&int *\fR
+\fI\&PROW\fR,
+\fB\&int *\fR
+\fI\&PCOL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_infog2l\fR
+computes the starting local index II, JJ corresponding to
+the submatrix starting globally at the entry pointed by  I,  J.  This
+routine returns the coordinates in the grid of the process owning the
+matrix entry of global indexes I, J, namely PROW and PCOL.
+.SH ARGUMENTS
+.TP 8
+I       (global input)          int
+On entry,  I  specifies  the  global  row index of the matrix
+entry. I must be at least zero.
+.TP 8
+J       (global input)          int
+On entry,  J  specifies the global column index of the matrix
+entry. J must be at least zero.
+.TP 8
+IMB     (global input)          const int
+On entry,  IMB  specifies  the size of the first row block of
+the global matrix. IMB must be at least one.
+.TP 8
+MB      (global input)          const int
+On entry,  MB specifies the blocking factor used to partition
+and  distribute the rows of the matrix A.  MB  must be larger
+than one.
+.TP 8
+INB     (global input)          const int
+On entry, INB specifies the size of the first column block of
+the global matrix. INB must be at least one.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the columns of the matrix A. NB must be larger
+than one.
+.TP 8
+RSRC    (global input)          const int
+On entry,  RSRC  specifies  the row coordinate of the process
+that possesses the row  I.  RSRC  must  be at least zero  and
+strictly less than NPROW.
+.TP 8
+CSRC    (global input)          const int
+On entry, CSRC specifies the column coordinate of the process
+that possesses the column J. CSRC  must be at least zero  and
+strictly less than NPCOL.
+.TP 8
+MYROW   (local input)           const int
+On entry, MYROW  specifies my  row process  coordinate in the
+grid. MYROW is greater than or equal  to zero  and  less than
+NPROW.
+.TP 8
+MYCOL   (local input)           const int
+On entry, MYCOL specifies my column process coordinate in the
+grid. MYCOL is greater than or equal  to zero  and  less than
+NPCOL.
+.TP 8
+NPROW   (global input)          const int
+On entry,  NPROW  specifies the number of process rows in the
+grid. NPROW is at least one.
+.TP 8
+NPCOL   (global input)          const int
+On entry,  NPCOL  specifies  the number of process columns in
+the grid. NPCOL is at least one.
+.TP 8
+II      (local output)          int *
+On exit, II  specifies the  local  starting  row index of the
+submatrix. On exit, II is at least 0.
+.TP 8
+JJ      (local output)          int *
+On exit, JJ  specifies the local starting column index of the
+submatrix. On exit, JJ is at least 0.
+.TP 8
+PROW    (global output)         int *
+On exit, PROW is the row coordinate of the process owning the
+entry specified by the global index I.  PROW is at least zero
+and less than NPROW.
+.TP 8
+PCOL    (global output)         int *
+On exit, PCOL  is the column coordinate of the process owning
+the entry specified by the global index J.  PCOL  is at least
+zero and less than NPCOL.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_jumpit.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_jumpit.3
new file mode 100644
index 000000000..66e77ac32
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_jumpit.3
@@ -0,0 +1,48 @@
+.TH HPL_jumpit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_jumpit \- jump into the random sequence.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_jumpit(\fR
+\fB\&int *\fR
+\fI\&MULT\fR,
+\fB\&int *\fR
+\fI\&IADD\fR,
+\fB\&int *\fR
+\fI\&IRANN\fR,
+\fB\&int *\fR
+\fI\&IRANM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_jumpit\fR
+jumps in the random sequence from the number  X(n) encoded
+in IRANN to the number  X(m)  encoded in  IRANM using the constants A
+and C encoded in MULT and IADD: X(m) = A * X(n) + C.  The constants A
+and C obviously depend on m and n,  see  the function  HPL_xjumpm  in
+order to initialize them.
+.SH ARGUMENTS
+.TP 8
+MULT    (local input)           int *
+On entry, MULT is an array of dimension 2, that contains the
+16-lower and 15-higher bits of the constant A.
+.TP 8
+IADD    (local input)           int *
+On entry, IADD is an array of dimension 2, that contains the
+16-lower and 15-higher bits of the constant C.
+.TP 8
+IRANN   (local input)           int *
+On entry,  IRANN  is an array of dimension 2,  that contains 
+the 16-lower and 15-higher bits of the encoding of X(n).
+.TP 8
+IRANM   (local output)          int *
+On entry,  IRANM  is an array of dimension 2.  On exit, this
+array contains respectively the 16-lower and  15-higher bits
+of the encoding of X(m).
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ladd.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ladd.3
new file mode 100644
index 000000000..9fd6805d3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ladd.3
@@ -0,0 +1,41 @@
+.TH HPL_ladd 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_ladd \- Adds two long positive integers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_ladd(\fR
+\fB\&int *\fR
+\fI\&J\fR,
+\fB\&int *\fR
+\fI\&K\fR,
+\fB\&int *\fR
+\fI\&I\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_ladd\fR
+adds  without carry two long positive integers  K and J  and
+puts the result into I. The long integers  I, J, K are encoded on 64
+bits using an array of 2 integers.  The 32-lower bits  are stored in
+the  first  entry  of each array,  the 32-higher bits  in the second
+entry.
+.SH ARGUMENTS
+.TP 8
+J       (local input)           int *
+On entry, J is an integer array of dimension 2 containing the
+encoded long integer J.
+.TP 8
+K       (local input)           int *
+On entry, K is an integer array of dimension 2 containing the
+encoded long integer K.
+.TP 8
+I       (local output)          int *
+On entry, I is an integer array of dimension 2. On exit, this
+array contains the encoded long integer result.
+.SH SEE ALSO
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_lmul.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_lmul.3
new file mode 100644
index 000000000..8be7380e0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_lmul.3
@@ -0,0 +1,42 @@
+.TH HPL_lmul 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_lmul \- multiplies 2 long positive integers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_lmul(\fR
+\fB\&int *\fR
+\fI\&K\fR,
+\fB\&int *\fR
+\fI\&J\fR,
+\fB\&int *\fR
+\fI\&I\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_lmul\fR
+multiplies  without carry two long positive integers K and J
+and puts the result into I. The long integers  I, J, K are encoded on
+64 bits using an array of 2 integers. The 32-lower bits are stored in
+the first entry of each array, the 32-higher bits in the second entry
+of each array. For efficiency purposes, the  intrisic modulo function
+is inlined.
+.SH ARGUMENTS
+.TP 8
+K       (local input)           int *
+On entry, K is an integer array of dimension 2 containing the
+encoded long integer K.
+.TP 8
+J       (local input)           int *
+On entry, J is an integer array of dimension 2 containing the
+encoded long integer J.
+.TP 8
+I       (local output)          int *
+On entry, I is an integer array of dimension 2. On exit, this
+array contains the encoded long integer result.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_logsort.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_logsort.3
new file mode 100644
index 000000000..e7e80062a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_logsort.3
@@ -0,0 +1,65 @@
+.TH HPL_logsort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_logsort \- Sort the processes in logarithmic order.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_logsort(\fR
+\fB\&const int\fR
+\fI\&NPROCS\fR,
+\fB\&const int\fR
+\fI\&ICURROC\fR,
+\fB\&int *\fR
+\fI\&IPLEN\fR,
+\fB\&int *\fR
+\fI\&IPMAP\fR,
+\fB\&int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_logsort\fR
+computes an array  IPMAP  and  its inverse  IPMAPM1  that
+contain  the logarithmic sorted processes id with repect to the local
+number of rows of  U  that they own. This is necessary to ensure that
+the logarithmic spreading of U is optimal in terms of number of steps
+and communication volume as well.  In other words,  the larget pieces
+of U will be sent a minimal number of times.
+.SH ARGUMENTS
+.TP 8
+NPROCS  (global input)          const int
+On entry, NPROCS  specifies the number of process rows in the
+process grid. NPROCS is at least one.
+.TP 8
+ICURROC (global input)          const int
+On entry, ICURROC is the source process row.
+.TP 8
+IPLEN   (global input/output)   int *
+On entry, IPLEN is an array of dimension NPROCS+1,  such that
+IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+rows of U  in the processes before process IPMAP[i] after the
+sort,  with  the convention that  IPLEN[NPROCS] is  the total
+number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+IPLEN[i] is  the  number of rows of A that should be moved to
+the process IPMAP[i].  IPLEN  is such that the number of rows
+of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+remaining  entries  of  this  array  are  sorted  so that the
+quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+.TP 8
+IPMAP   (global output)         int *
+On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+array contains  the logarithmic mapping of the processes.  In
+other words, IPMAP[myroc] is the corresponding sorted process
+coordinate.
+.TP 8
+IPMAPM1 (global output)         int *
+On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+this  array  contains  the inverse of the logarithmic mapping
+contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+[0.. NPROCS)
+.SH SEE ALSO
+.BR HPL_plindx1 \ (3),
+.BR HPL_plindx10 \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_max.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_max.3
new file mode 100644
index 000000000..16d8aecc6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_max.3
@@ -0,0 +1,43 @@
+.TH HPL_max 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_max \- Combine (max) two buffers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_max(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const void *\fR
+\fI\&IN\fR,
+\fB\&void *\fR
+\fI\&INOUT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_max\fR
+combines (max) two buffers.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies  the  length  of  the  buffers  to  be
+combined. N must be at least zero.
+.TP 8
+IN      (input)                 const void *
+On entry, IN points to the input-only buffer to be combined.
+.TP 8
+INOUT   (input/output)          void *
+On entry, INOUT  points  to  the  input-output  buffer  to be
+combined.  On exit,  the  entries of this array contains  the
+combined results.
+.TP 8
+DTYPE   (input)                 const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_min.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_min.3
new file mode 100644
index 000000000..a816d61b7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_min.3
@@ -0,0 +1,43 @@
+.TH HPL_min 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_min \- Combine (min) two buffers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_min(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const void *\fR
+\fI\&IN\fR,
+\fB\&void *\fR
+\fI\&INOUT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_min\fR
+combines (min) two buffers.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies  the  length  of  the  buffers  to  be
+combined. N must be at least zero.
+.TP 8
+IN      (input)                 const void *
+On entry, IN points to the input-only buffer to be combined.
+.TP 8
+INOUT   (input/output)          void *
+On entry, INOUT  points  to  the  input-output  buffer  to be
+combined.  On exit,  the  entries of this array contains  the
+combined results.
+.TP 8
+DTYPE   (input)                 const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_numroc.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_numroc.3
new file mode 100644
index 000000000..34c8acfa9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_numroc.3
@@ -0,0 +1,60 @@
+.TH HPL_numroc 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_numroc \- Compute the local number of row/columns.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_numroc(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&PROC\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_numroc\fR
+returns  the  local number of matrix rows/columns process
+PROC  will  get  if  we give out  N rows/columns starting from global
+index 0.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies the number of rows/columns being dealt
+out. N must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+PROC    (input)                 const int
+On entry, PROC specifies  the coordinate of the process whose
+local portion is determined.  PROC must be at least zero  and
+strictly less than NPROCS.
+.TP 8
+SRCPROC (input)                 const int
+On entry,  SRCPROC  specifies  the coordinate of the  process
+that possesses the first row or column of the matrix. SRCPROC
+must be at least zero and strictly less than NPROCS.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2lp \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_numrocI.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_numrocI.3
new file mode 100644
index 000000000..1891f1ac9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_numrocI.3
@@ -0,0 +1,66 @@
+.TH HPL_numrocI 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_numrocI \- Compute the local number of row/columns.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_numrocI(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&I\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&PROC\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_numrocI\fR
+returns  the  local number of matrix rows/columns process
+PROC  will  get  if  we give out  N rows/columns starting from global
+index I.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies the number of rows/columns being dealt
+out. N must be at least zero.
+.TP 8
+I       (input)                 const int
+On entry, I  specifies the global index of the matrix  entry
+I must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of th
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+PROC    (input)                 const int
+On entry, PROC specifies  the coordinate of the process whos
+local portion is determined.  PROC must be at least zero  an
+strictly less than NPROCS.
+.TP 8
+SRCPROC (input)                 const int
+On entry,  SRCPROC  specifies  the coordinate of the  proces
+that possesses the first row or column of the matrix. SRCPRO
+must be at least zero and strictly less than NPROCS.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process row
+or columns over which the matrix is distributed.  NPROCS mus
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2lp \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pabort.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pabort.3
new file mode 100644
index 000000000..044e87210
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pabort.3
@@ -0,0 +1,40 @@
+.TH HPL_pabort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pabort \- halts execution.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pabort(\fR
+\fB\&int\fR
+\fI\&LINE\fR,
+\fB\&const char *\fR
+\fI\&SRNAME\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pabort\fR
+displays an error message on stderr and halts execution.
+.SH ARGUMENTS
+.TP 8
+LINE    (local input)           int
+On entry,  LINE  specifies the line  number in the file where
+the  error  has  occured.  When  LINE  is not a positive line
+number, it is ignored.
+.TP 8
+SRNAME  (local input)           const char *
+On entry, SRNAME  should  be the name of the routine  calling
+this error handler.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH SEE ALSO
+.BR HPL_fprintf \ (3),
+.BR HPL_pwarn \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_packL.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_packL.3
new file mode 100644
index 000000000..c79019c37
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_packL.3
@@ -0,0 +1,42 @@
+.TH HPL_packL 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_packL \- Form the MPI structure for the row ring broadcasts.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_packL(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&INDEX\fR,
+\fB\&const int\fR
+\fI\&LEN\fR,
+\fB\&const int\fR
+\fI\&IBUF\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_packL\fR
+forms  the MPI data type for the panel to be broadcast.
+Successful  completion  is  indicated  by  the  returned  error  code
+MPI_SUCCESS.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.TP 8
+INDEX   (input)                 const int
+On entry,  INDEX  points  to  the  first entry of the  packed
+buffer being broadcast.
+.TP 8
+LEN     (input)                 const int
+On entry, LEN is the length of the packed buffer.
+.TP 8
+IBUF    (input)                 const int
+On entry, IBUF  specifies the panel buffer/count/type entries
+that should be initialized.
+.SH SEE ALSO
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pddriver.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pddriver.3
new file mode 100644
index 000000000..30e55b62e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pddriver.3
@@ -0,0 +1,15 @@
+.TH main 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+main \- HPL main timing program.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&main();\fR
+.SH DESCRIPTION
+\fB\&main\fR
+is the main driver program for testing the HPL routines.
+This  program is  driven  by  a short data file named  "HPL.dat".
+.SH SEE ALSO
+.BR HPL_pdinfo \ (3),
+.BR HPL_pdtest \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdfact.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdfact.3
new file mode 100644
index 000000000..e3db5fb8b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdfact.3
@@ -0,0 +1,64 @@
+.TH HPL_pdfact 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdfact \- recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdfact(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdfact\fR
+recursively factorizes a  1-dimensional  panel of columns.
+The  RPFACT  function pointer specifies the recursive algorithm to be
+used, either Crout, Left- or Right looking.  NBMIN allows to vary the
+recursive stopping criterium in terms of the number of columns in the
+panel, and  NDIV  allow to specify the number of subpanels each panel
+should be divided into. Usuallly a value of 2 will be chosen. Finally
+PFACT is a function pointer specifying the non-recursive algorithm to
+to be used on at most NBMIN columns. One can also choose here between
+Crout, Left- or Right looking.  Empirical tests seem to indicate that
+values of 4 or 8 for NBMIN give the best results.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesv.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesv.3
new file mode 100644
index 000000000..ab4b62c4e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesv.3
@@ -0,0 +1,40 @@
+.TH HPL_pdgesv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdgesv \- Solve A x = b.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdgesv(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdgesv\fR
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with  or  without look-ahead.  The  lower  triangular  factor is left
+unpivoted and the pivots are not returned. The right hand side is the
+N+1 column of the coefficient matrix.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.SH SEE ALSO
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdtrsv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesv0.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesv0.3
new file mode 100644
index 000000000..180f191f2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesv0.3
@@ -0,0 +1,47 @@
+.TH HPL_pdgesv0 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdgesv0 \- Factor an N x N+1 matrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdgesv0(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdgesv0\fR
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+without look-ahead. The lower triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdfact \ (3),
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pdupdateTT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesvK1.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesvK1.3
new file mode 100644
index 000000000..64cee67ed
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesvK1.3
@@ -0,0 +1,46 @@
+.TH HPL_pdgesvK1 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdgesvK1 \- Factor an N x N+1 matrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdgesvK1(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdgesvK1\fR
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with look-ahead.  The  lower  triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdfact \ (3),
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pdupdateTT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesvK2.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesvK2.3
new file mode 100644
index 000000000..9f389b9dd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesvK2.3
@@ -0,0 +1,47 @@
+.TH HPL_pdgesvK2 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdgesvK2 \- Factor an N x N+1 matrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdgesvK2(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdgesvK2\fR
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with look-ahead.  The  lower  triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdfact \ (3),
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pdupdateTT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdinfo.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdinfo.3
new file mode 100644
index 000000000..eed541159
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdinfo.3
@@ -0,0 +1,212 @@
+.TH HPL_pdinfo 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdinfo \- Read input parameter file.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdinfo(\fR
+\fB\&HPL_T_test *\fR
+\fI\&TEST\fR,
+\fB\&int *\fR
+\fI\&NS\fR,
+\fB\&int *\fR
+\fI\&N\fR,
+\fB\&int *\fR
+\fI\&NBS\fR,
+\fB\&int *\fR
+\fI\&NB\fR,
+\fB\&HPL_T_ORDER *\fR
+\fI\&PMAPPIN\fR,
+\fB\&int *\fR
+\fI\&NPQS\fR,
+\fB\&int *\fR
+\fI\&P\fR,
+\fB\&int *\fR
+\fI\&Q\fR,
+\fB\&int *\fR
+\fI\&NPFS\fR,
+\fB\&HPL_T_FACT *\fR
+\fI\&PF\fR,
+\fB\&int *\fR
+\fI\&NBMS\fR,
+\fB\&int *\fR
+\fI\&NBM\fR,
+\fB\&int *\fR
+\fI\&NDVS\fR,
+\fB\&int *\fR
+\fI\&NDV\fR,
+\fB\&int *\fR
+\fI\&NRFS\fR,
+\fB\&HPL_T_FACT *\fR
+\fI\&RF\fR,
+\fB\&int *\fR
+\fI\&NTPS\fR,
+\fB\&HPL_T_TOP *\fR
+\fI\&TP\fR,
+\fB\&int *\fR
+\fI\&NDHS\fR,
+\fB\&int *\fR
+\fI\&DH\fR,
+\fB\&HPL_T_SWAP *\fR
+\fI\&FSWAP\fR,
+\fB\&int *\fR
+\fI\&TSWAP\fR,
+\fB\&int *\fR
+\fI\&L1NOTRAN\fR,
+\fB\&int *\fR
+\fI\&UNOTRAN\fR,
+\fB\&int *\fR
+\fI\&EQUIL\fR,
+\fB\&int *\fR
+\fI\&ALIGN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdinfo\fR
+reads  the  startup  information for the various tests and
+transmits it to all processes.
+.SH ARGUMENTS
+.TP 8
+TEST    (global output)         HPL_T_test *
+On entry, TEST  points to a testing data structure.  On exit,
+the fields of this data structure are initialized as follows:
+TEST->outfp  specifies the output file where the results will
+be printed.  It is only defined and used by  the process 0 of
+the grid.  TEST->thrsh specifies the threshhold value for the
+test ratio.  TEST->epsil is the relative machine precision of
+the distributed computer.  Finally  the test counters, kfail,
+kpass, kskip, ktest are initialized to zero.
+.TP 8
+NS      (global output)         int *
+On exit,  NS  specifies the number of different problem sizes
+to be tested. NS is less than or equal to HPL_MAX_PARAM.
+.TP 8
+N       (global output)         int *
+On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+the first NS entries of this array contain the  problem sizes
+to run the code with.
+.TP 8
+NBS     (global output)         int *
+On exit,  NBS  specifies the number of different distribution
+blocking factors to be tested. NBS must be less than or equal
+to HPL_MAX_PARAM.
+.TP 8
+NB      (global output)         int *
+On exit,  PMAPPIN  specifies the process mapping onto the no-
+des of the  MPI machine configuration.  PMAPPIN  defaults  to
+row-major ordering.
+.TP 8
+PMAPPIN (global output)         HPL_T_ORDER *
+On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+the first NBS entries of this array contain the values of the
+various distribution blocking factors, to run the code with.
+.TP 8
+NPQS    (global output)         int *
+On exit, NPQS  specifies the  number of different values that
+can be used for P and Q, i.e., the number of process grids to
+run  the  code with.  NPQS must be  less  than  or  equal  to
+HPL_MAX_PARAM.
+.TP 8
+P       (global output)         int *
+On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+the first NPQS entries of this array contain the values of P,
+the number of process rows of the  NPQS grids to run the code
+with.
+.TP 8
+Q       (global output)         int *
+On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+the first NPQS entries of this array contain the values of Q,
+the number of process columns of the  NPQS  grids to  run the
+code with.
+.TP 8
+NPFS    (global output)         int *
+On exit, NPFS  specifies the  number of different values that
+can be used for PF : the panel factorization algorithm to run
+the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+.TP 8
+PF      (global output)         HPL_T_FACT *
+On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+the first  NPFS  entries  of this array  contain  the various
+panel factorization algorithms to run the code with.
+.TP 8
+NBMS    (global output)         int *
+On exit,  NBMS  specifies  the  number  of  various recursive
+stopping criteria  to be tested.  NBMS  must be  less than or
+equal to HPL_MAX_PARAM.
+.TP 8
+NBM     (global output)         int *
+On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+exit, the first NBMS entries of this array contain the values
+of the various recursive stopping criteria to be tested.
+.TP 8
+NDVS    (global output)         int *
+On exit,  NDVS  specifies  the number  of various numbers  of
+panels in recursion to be tested.  NDVS is less than or equal
+to HPL_MAX_PARAM.
+.TP 8
+NDV     (global output)         int *
+On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+exit, the first NDVS entries of this array contain the values
+of the various numbers of panels in recursion to be tested.
+.TP 8
+NRFS    (global output)         int *
+On exit, NRFS  specifies the  number of different values that
+can be used for RF : the recursive factorization algorithm to
+be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+.TP 8
+RF      (global output)         HPL_T_FACT *
+On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+the first  NRFS  entries  of  this array contain  the various
+recursive factorization algorithms to run the code with.
+.TP 8
+NTPS    (global output)         int *
+On exit, NTPS  specifies the  number of different values that
+can be used for the  broadcast topologies  to be tested. NTPS
+is less than or equal to HPL_MAX_PARAM.
+.TP 8
+TP      (global output)         HPL_T_TOP *
+On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+the  first NTPS  entries of this  array  contain  the various
+broadcast (along rows) topologies to run the code with.
+.TP 8
+NDHS    (global output)         int *
+On exit, NDHS  specifies the  number of different values that
+can be used for the  lookahead depths to be  tested.  NDHS is
+less than or equal to HPL_MAX_PARAM.
+.TP 8
+DH      (global output)         int *
+On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+exit, the first NDHS entries of this array contain the values
+of lookahead depths to run the code with.  Such a value is at
+least 0 (no-lookahead) or greater than zero.
+.TP 8
+FSWAP   (global output)         HPL_T_SWAP *
+On exit, FSWAP specifies the swapping algorithm to be used in
+all tests.
+.TP 8
+TSWAP   (global output)         int *
+On exit,  TSWAP  specifies the swapping threshold as a number
+of columns when the mixed swapping algorithm was chosen.
+.TP 8
+L1NOTRA (global output)         int *
+On exit, L1NOTRAN specifies whether the upper triangle of the
+panels of columns  should  be stored  in  no-transposed  form
+(L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+.TP 8
+UNOTRAN (global output)         int *
+On exit, UNOTRAN  specifies whether the panels of rows should
+be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+form (UNOTRAN=0) during their broadcast.
+.TP 8
+EQUIL   (global output)         int *
+On exit,  EQUIL  specifies  whether  equilibration during the
+swap-broadcast  of  the  panel of rows  should  be  performed
+(EQUIL=1) or not (EQUIL=0).
+.TP 8
+ALIGN   (global output)         int *
+On exit,  ALIGN  specifies the alignment  of  the dynamically
+allocated buffers in double precision words. ALIGN is greater
+than zero.
+.SH SEE ALSO
+.BR HPL_pddriver \ (3),
+.BR HPL_pdtest \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlamch.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlamch.3
new file mode 100644
index 000000000..7ce46c23e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlamch.3
@@ -0,0 +1,53 @@
+.TH HPL_pdlamch 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlamch \- determines machine-specific arithmetic constants.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_pdlamch(\fR
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR,
+\fB\&const HPL_T_MACH\fR
+\fI\&CMACH\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlamch\fR
+determines  machine-specific  arithmetic  constants  such  as
+the relative machine precision (eps),  the safe minimum(sfmin) such that
+1/sfmin does not overflow, the base of the machine (base), the precision
+(prec),  the  number  of  (base)  digits in the  mantissa  (t),  whether
+rounding occurs in addition (rnd = 1.0 and 0.0 otherwise),  the  minimum
+exponent before  (gradual)  underflow (emin),  the  underflow  threshold
+(rmin)- base**(emin-1), the largest exponent before overflow (emax), the
+overflow threshold (rmax)  - (base**emax)*(1-eps).
+.SH ARGUMENTS
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.TP 8
+CMACH   (global input)          const HPL_T_MACH
+Specifies the value to be returned by HPL_pdlamch            
+   = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+   = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+   = HPL_MACH_BASE,  HPL_pdlamch := base                     
+   = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+   = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+   = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+   = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+   = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+   = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+   = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+ 
+where                                                        
+ 
+   eps   = relative machine precision,                       
+   sfmin = safe minimum,                                     
+   base  = base of the machine,                              
+   prec  = eps*base,                                         
+   t     = number of digits in the mantissa,                 
+   rnd   = 1.0 if rounding occurs in addition,               
+   emin  = minimum exponent before underflow,                
+   rmin  = underflow threshold,                              
+   emax  = largest exponent before overflow,                 
+   rmax  = overflow threshold.
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlange.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlange.3
new file mode 100644
index 000000000..30593401b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlange.3
@@ -0,0 +1,68 @@
+.TH HPL_pdlange 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlange \- Compute ||A||.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_pdlange(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&const HPL_T_NORM\fR
+\fI\&NORM\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlange\fR
+returns  the value of the one norm,  or the infinity norm,
+or the element of largest absolute value of a distributed matrix A:  
+ 
+ 
+   max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+   norm1(A),        when NORM = HPL_NORM_1,                          
+   normI(A),        when NORM = HPL_NORM_I,                          
+ 
+where norm1 denotes the one norm of a matrix (maximum column sum) and
+normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+that max(abs(A(i,j))) is not a matrix norm.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+NORM    (global input)          const HPL_T_NORM
+On entry,  NORM  specifies  the  value to be returned by this
+function as described above.
+.TP 8
+M       (global input)          const int
+On entry,  M  specifies  the number  of rows of the matrix A.
+M must be at least zero.
+.TP 8
+N       (global input)          const int
+On entry,  N specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix. NB must be larger than one.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+that contains the local pieces of the distributed matrix A.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,LocP(M)).
+.SH SEE ALSO
+.BR HPL_pdlaprnt \ (3),
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaprnt.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaprnt.3
new file mode 100644
index 000000000..feb010a67
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaprnt.3
@@ -0,0 +1,72 @@
+.TH HPL_pdlaprnt 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaprnt \- Print a distributed matrix A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaprnt(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int\fR
+\fI\&IAROW\fR,
+\fB\&const int\fR
+\fI\&IACOL\fR,
+\fB\&const char *\fR
+\fI\&CMATNM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaprnt\fR
+prints  to  standard  error a distributed matrix A. The
+local pieces of  A  are sent to the process of coordinates  (0,0)  in
+the grid and then printed.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+M       (global input)          const int
+On entry,  M  specifies the number of rows of the coefficient
+matrix A. M must be at least zero.
+.TP 8
+N       (global input)          const int
+On  entry,   N   specifies  the  number  of  columns  of  the
+coefficient matrix A. N must be at least zero.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix. NB must be larger than one.
+.TP 8
+A       (local input)           double *
+On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+This array contains the coefficient matrix to be printed.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,LocP(M)).
+.TP 8
+IAROW   (global input)          const int
+On entry,  IAROW  specifies the row process coordinate owning
+the  first row of A.  IAROW  must be  larger than or equal to
+zero and less than NPROW.
+.TP 8
+IACOL   (global input)          const int
+On entry,  IACOL  specifies  the  column  process  coordinate
+owning the  first column  of A. IACOL  must be larger than or
+equal to zero and less than NPCOL.
+.TP 8
+CMATNM  (global input)          const char *
+On entry, CMATNM is the name of the matrix to be printed.
+.SH SEE ALSO
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp00N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp00N.3
new file mode 100644
index 000000000..3875400e3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp00N.3
@@ -0,0 +1,65 @@
+.TH HPL_pdlaswp00N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaswp00N \- Broadcast a column panel L and swap the row panel U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaswp00N(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaswp00N\fR
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+Bi-directional  exchange  is used to perform the  swap :: broadcast of
+the row  panel U at once, resulting in a lower number of messages than
+usual as well as a lower communication volume. With P process rows and
+assuming  bi-directional links,  the running time of this function can
+be approximated by:
+ 
+   log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  Mono
+directional links will double this communication cost.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be broadcast and swapped) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to  be swapped and broadcast starting at
+the current position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pipid \ (3),
+.BR HPL_plindx0 \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp05N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp00T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp00T.3
new file mode 100644
index 000000000..39901ba4b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp00T.3
@@ -0,0 +1,65 @@
+.TH HPL_pdlaswp00T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaswp00T \- Broadcast a column panel L and swap the row panel U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaswp00T(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaswp00T\fR
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+Bi-directional  exchange  is used to perform the  swap :: broadcast of
+the row  panel U at once, resulting in a lower number of messages than
+usual as well as a lower communication volume. With P process rows and
+assuming  bi-directional links,  the running time of this function can
+be approximated by:
+ 
+   log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  Mono
+directional links will double this communication cost.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be broadcast and swapped) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to  be swapped and broadcast starting at
+the current position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTT \ (3),
+.BR HPL_pipid \ (3),
+.BR HPL_plindx0 \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp01N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp01N.3
new file mode 100644
index 000000000..1ee14c0a8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp01N.3
@@ -0,0 +1,69 @@
+.TH HPL_pdlaswp01N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaswp01N \- Broadcast a column panel L and swap the row panel U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaswp01N(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaswp01N\fR
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+row panel U at once,  resulting in a minimal communication volume  and
+a "very good"  use of the connectivity if available.  With  P  process
+rows  and  assuming  bi-directional links,  the  running time  of this
+function can be approximated by:
+ 
+   (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  K is
+a constant in (2,3] that depends on the achieved bandwidth  during  a
+simultaneous  message exchange  between two processes.  An  empirical
+optimistic value of K is typically 2.4.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to  be swapped and broadcast starting at
+the current position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pipid \ (3),
+.BR HPL_plindx1 \ (3),
+.BR HPL_plindx10 \ (3),
+.BR HPL_spreadN \ (3),
+.BR HPL_equil \ (3),
+.BR HPL_rollN \ (3),
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp06N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp01T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp01T.3
new file mode 100644
index 000000000..e5c5de024
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp01T.3
@@ -0,0 +1,69 @@
+.TH HPL_pdlaswp01T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaswp01T \- Broadcast a column panel L and swap the row panel U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaswp01T(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaswp01T\fR
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+row panel U at once,  resulting in a minimal communication volume  and
+a "very good"  use of the connectivity if available.  With  P  process
+rows  and  assuming  bi-directional links,  the  running time  of this
+function can be approximated by:
+ 
+   (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  K is
+a constant in (2,3] that depends on the achieved bandwidth  during  a
+simultaneous  message exchange  between two processes.  An  empirical
+optimistic value of K is typically 2.4.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to  be swapped and broadcast starting at
+the current position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTT \ (3),
+.BR HPL_pipid \ (3),
+.BR HPL_plindx1 \ (3),
+.BR HPL_plindx10 \ (3),
+.BR HPL_spreadT \ (3),
+.BR HPL_equil \ (3),
+.BR HPL_rollT \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdmatgen.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdmatgen.3
new file mode 100644
index 000000000..5b4675c6e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdmatgen.3
@@ -0,0 +1,67 @@
+.TH HPL_pdmatgen 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdmatgen \- Parallel random matrix generator.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdmatgen(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int\fR
+\fI\&ISEED\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdmatgen\fR
+generates (or regenerates) a parallel random matrix A.
+ 
+The  pseudo-random  generator uses the linear congruential algorithm:
+X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+Programming, Knuth 1973, Vol. 2.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+M       (global input)          const int
+On entry,  M  specifies  the number  of rows of the matrix A.
+M must be at least zero.
+.TP 8
+N       (global input)          const int
+On entry,  N specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+A       (local output)          double *
+On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+On exit, this array contains the coefficients of the randomly
+generated matrix.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,LocP(M)).
+.TP 8
+ISEED   (global input)          const int
+On entry, ISEED  specifies  the  seed  number to generate the
+matrix A. ISEED must be at least zero.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_drand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdmxswp.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdmxswp.3
new file mode 100644
index 000000000..41c604373
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdmxswp.3
@@ -0,0 +1,78 @@
+.TH HPL_pdmxswp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdmxswp \- swaps and broacast the pivot row.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdmxswp(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&II\fR,
+\fB\&const int\fR
+\fI\&JJ\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdmxswp\fR
+swaps  and  broadcasts  the  absolute value max row using
+bi-directional exchange.  The buffer is partially set by HPL_dlocmax.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by
+ 
+   log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth )
+ 
+where  lat and bdwth are the latency and bandwidth of the network for
+double precision real elements.  Communication  only  occurs  in  one
+process  column. Mono-directional links  will cause the communication
+cost to double.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of the matrix
+column on which this function operates.
+.TP 8
+II      (local input)           const int
+On entry, II  specifies the row offset where the column to be
+operated on starts with respect to the panel.
+.TP 8
+JJ      (local input)           const int
+On entry, JJ  specifies the column offset where the column to
+be operated on starts with respect to the panel.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+It  is assumed that  HPL_dlocmax  was called  prior  to  this
+routine to  initialize  the first four entries of this array.
+On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+Note that this is also the  JJth  row  (or column) of L1. The
+remaining part is used as a temporary array.
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpancrN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpancrN.3
new file mode 100644
index 000000000..2e94a36a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpancrN.3
@@ -0,0 +1,82 @@
+.TH HPL_pdpancrN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpancrN \- Crout panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpancrN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpancrN\fR
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel  A using the Crout variant of the  usual
+one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+of the panel is stored in no-transpose form (i.e. just like the input
+matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and gam2-3 is  an  estimate  of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpancrT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpancrT.3
new file mode 100644
index 000000000..035e60d60
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpancrT.3
@@ -0,0 +1,81 @@
+.TH HPL_pdpancrT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpancrT \- Crout panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpancrT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpancrT\fR
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel  A using the Crout variant of the  usual
+one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is an  estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_disp.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_disp.3
new file mode 100644
index 000000000..94a212ced
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_disp.3
@@ -0,0 +1,24 @@
+.TH HPL_pdpanel_disp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanel_disp \- Deallocate a panel data structure.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_pdpanel_disp(\fR
+\fB\&HPL_T_panel * *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanel_disp\fR
+deallocates  the  panel  structure  and  resources  and
+stores the error code returned by the panel factorization.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel * *
+On entry,  PANEL  points  to  the  address  of the panel data
+structure to be deallocated.
+.SH SEE ALSO
+.BR HPL_pdpanel_new \ (3),
+.BR HPL_pdpanel_init \ (3),
+.BR HPL_pdpanel_free \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_free.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_free.3
new file mode 100644
index 000000000..cfad40c3d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_free.3
@@ -0,0 +1,24 @@
+.TH HPL_pdpanel_free 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanel_free \- Deallocate the panel ressources.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_pdpanel_free(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanel_free\fR
+deallocates  the panel resources  and  stores the error
+code returned by the panel factorization.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points  to  the  panel data  structure from
+which the resources should be deallocated.
+.SH SEE ALSO
+.BR HPL_pdpanel_new \ (3),
+.BR HPL_pdpanel_init \ (3),
+.BR HPL_pdpanel_disp \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_init.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_init.3
new file mode 100644
index 000000000..cbb0e7e3a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_init.3
@@ -0,0 +1,76 @@
+.TH HPL_pdpanel_init 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanel_init \- Initialize the panel resources.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanel_init(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&JB\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&IA\fR,
+\fB\&const int\fR
+\fI\&JA\fR,
+\fB\&const int\fR
+\fI\&TAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanel_init\fR
+initializes a panel data structure.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+M       (local input)           const int
+On entry, M specifies the global number of rows of the panel.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the  global number of columns of the
+panel and trailing submatrix. N must be at least zero.
+.TP 8
+JB      (global input)          const int
+On entry, JB specifies is the number of columns of the panel.
+JB must be at least zero.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.TP 8
+IA      (global input)          const int
+On entry,  IA  is  the global row index identifying the panel
+and trailing submatrix. IA must be at least zero.
+.TP 8
+JA      (global input)          const int
+On entry, JA is the global column index identifying the panel
+and trailing submatrix. JA must be at least zero.
+.TP 8
+TAG     (global input)          const int
+On entry, TAG is the row broadcast message id.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.SH SEE ALSO
+.BR HPL_pdpanel_new \ (3),
+.BR HPL_pdpanel_disp \ (3),
+.BR HPL_pdpanel_free \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_new.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_new.3
new file mode 100644
index 000000000..ed9fe1053
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_new.3
@@ -0,0 +1,76 @@
+.TH HPL_pdpanel_new 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanel_new \- Create a panel data structure.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanel_new(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&JB\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&IA\fR,
+\fB\&const int\fR
+\fI\&JA\fR,
+\fB\&const int\fR
+\fI\&TAG\fR,
+\fB\&HPL_T_panel * *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanel_new\fR
+creates and initializes a panel data structure.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+M       (local input)           const int
+On entry, M specifies the global number of rows of the panel.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the  global number of columns of the
+panel and trailing submatrix. N must be at least zero.
+.TP 8
+JB      (global input)          const int
+On entry, JB specifies is the number of columns of the panel.
+JB must be at least zero.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.TP 8
+IA      (global input)          const int
+On entry,  IA  is  the global row index identifying the panel
+and trailing submatrix. IA must be at least zero.
+.TP 8
+JA      (global input)          const int
+On entry, JA is the global column index identifying the panel
+and trailing submatrix. JA must be at least zero.
+.TP 8
+TAG     (global input)          const int
+On entry, TAG is the row broadcast message id.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel * *
+On entry,  PANEL  points  to  the  address  of the panel data
+structure to create and initialize.
+.SH SEE ALSO
+.BR HPL_pdpanel_new \ (3),
+.BR HPL_pdpanel_init \ (3),
+.BR HPL_pdpanel_disp \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanllN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanllN.3
new file mode 100644
index 000000000..eca1f4a34
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanllN.3
@@ -0,0 +1,82 @@
+.TH HPL_pdpanllN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanllN \- Left-looking panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanllN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanllN\fR
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel A  using the Left-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in no-transpose form (i.e. just like the
+input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanllT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanllT.3
new file mode 100644
index 000000000..a18d52c61
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanllT.3
@@ -0,0 +1,81 @@
+.TH HPL_pdpanllT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanllT \- Left-looking panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanllT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanllT\fR
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel A  using the Left-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanrlN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanrlN.3
new file mode 100644
index 000000000..cae2b5b5b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanrlN.3
@@ -0,0 +1,82 @@
+.TH HPL_pdpanrlN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanrlN \- Right-looking panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanrlN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanrlN\fR
+factorizes  a panel of columns  that is a sub-array of a
+larger one-dimensional panel A using the Right-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in no-transpose form (i.e. just like the
+input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanrlT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanrlT.3
new file mode 100644
index 000000000..434444bf7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanrlT.3
@@ -0,0 +1,81 @@
+.TH HPL_pdpanrlT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanrlT \- Right-looking panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanrlT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanrlT\fR
+factorizes  a panel of columns  that is a sub-array of a
+larger one-dimensional panel A using the Right-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpancrN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpancrN.3
new file mode 100644
index 000000000..fc6dd25f8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpancrN.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpancrN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpancrN \- Crout recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpancrN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpancrN\fR
+HPL_pdrpancrN recursively  factorizes  a panel of columns  using  the
+recursive  Crout  variant of the usual one-dimensional algorithm. The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpancrT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpancrT.3
new file mode 100644
index 000000000..ea0a57bc9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpancrT.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpancrT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpancrT \- Crout recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpancrT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpancrT\fR
+recursively  factorizes  a panel  of columns using  the
+recursive  Crout  variant  of  the  usual one-dimensional  algorithm.
+The lower triangular N0-by-N0  upper block of the panel  is stored in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanllN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanllN.3
new file mode 100644
index 000000000..29b6db40a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanllN.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpanllN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpanllN \- Left-looking recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpanllN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpanllN\fR
+recursively  factorizes  a panel  of columns using  the
+recursive Left-looking variant of the one-dimensional algorithm.  The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanllT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanllT.3
new file mode 100644
index 000000000..18db5c1fb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanllT.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpanllT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpanllT \- Left-looking recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpanllT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpanllT\fR
+recursively  factorizes  a panel of columns  using  the
+recursive Left-looking variant of the one-dimensional algorithm.  The
+lower  triangular  N0-by-N0  upper block  of  the panel  is stored in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanrlN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanrlN.3
new file mode 100644
index 000000000..441560c14
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanrlN.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpanrlN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpanrlN \- Right-looking recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpanrlN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpanrlN\fR
+recursively  factorizes  a panel of columns  using  the
+recursive Right-looking variant of the one-dimensional algorithm. The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanrlT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanrlT.3
new file mode 100644
index 000000000..e5bd9d110
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanrlT.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpanrlT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpanrlT \- Right-looking recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpanrlT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpanrlT\fR
+recursively  factorizes  a panel of columns  using  the
+recursive Right-looking variant of the one-dimensional algorithm. The
+lower  triangular  N0-by-N0  upper  block of the panel  is stored  in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdtest.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdtest.3
new file mode 100644
index 000000000..eaaff2bff
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdtest.3
@@ -0,0 +1,63 @@
+.TH HPL_pdtest 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdtest \- Perform one test.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdtest(\fR
+\fB\&HPL_T_test *\fR
+\fI\&TEST\fR,
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&NB\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdtest\fR
+performs  one  test  given a set of parameters such as the
+process grid, the  problem size, the distribution blocking factor ...
+This function generates  the data, calls  and times the linear system
+solver,  checks  the  accuracy  of the  obtained vector solution  and
+writes this information to the file pointed to by TEST->outfp.
+.SH ARGUMENTS
+.TP 8
+TEST    (global input)          HPL_T_test *
+On entry,  TEST  points  to a testing data structure:  outfp
+specifies the output file where the results will be printed.
+It is only defined and used by the process  0  of the  grid.
+thrsh  specifies  the  threshhold value  for the test ratio.
+Concretely, a test is declared "PASSED"  if and only if  the
+following inequality is satisfied:
+||Ax-b||_oo / ( epsil *
+                ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+                 N )  < thrsh.
+epsil  is the  relative machine precision of the distributed
+computer. Finally the test counters, kfail, kpass, kskip and
+ktest are updated as follows:  if the test passes,  kpass is
+incremented by one;  if the test fails, kfail is incremented
+by one; if the test is skipped, kskip is incremented by one.
+ktest is left unchanged.
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters to be used for this test.
+.TP 8
+N       (global input)          const int
+On entry,  N specifies the order of the coefficient matrix A.
+N must be at least zero.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.SH SEE ALSO
+.BR HPL_pddriver \ (3),
+.BR HPL_pdinfo \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdtrsv.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdtrsv.3
new file mode 100644
index 000000000..5d2d14dcd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdtrsv.3
@@ -0,0 +1,49 @@
+.TH HPL_pdtrsv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdtrsv \- Solve triu( A ) x = b.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdtrsv(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&AMAT\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdtrsv\fR
+solves an upper triangular system of linear equations.
+ 
+The rhs is the last column of the N by N+1 matrix A. The solve starts
+in the process  column owning the  Nth  column of A, so the rhs b may
+need to be moved one process column to the left at the beginning. The
+routine therefore needs  a column  vector in every process column but
+the one owning  b. The result is  replicated in all process rows, and
+returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes.
+ 
+The algorithm uses decreasing one-ring broadcast in process rows  and
+columns  implemented  in terms of  synchronous communication point to
+point primitives.  The  lookahead of depth 1 is used to minimize  the
+critical path. This entire operation is essentially ``latency'' bound
+and an estimate of its running time is given by:
+ 
+   (move rhs) lat + N / ( P bdwth ) +            
+   (solve)    ((N / NB)-1) 2 (lat + NB / bdwth) +
+              gam2 N^2 / ( P Q ),                
+ 
+where  gam2   is an estimate of the   Level 2 BLAS rate of execution.
+There are  N / NB  diagonal blocks. One must exchange  2  messages of
+length NB to compute the next  NB  entries of the vector solution, as
+well as performing a total of N^2 floating point operations.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+AMAT    (local input/output)    HPL_T_pmat *
+On entry,  AMAT  points  to the data structure containing the
+local array information.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateNN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateNN.3
new file mode 100644
index 000000000..e20929a27
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateNN.3
@@ -0,0 +1,48 @@
+.TH HPL_pdupdateNN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdupdateNN \- Broadcast a panel and update the trailing submatrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdupdateNN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdupdateNN\fR
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local output)          int *
+On exit,  IFLAG  indicates  whether or not  the broadcast has
+been completed when PBCST is not NULL on entry. In that case,
+IFLAG is left unchanged.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be updated) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to be updated  starting  at the  current
+position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp01N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateNT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateNT.3
new file mode 100644
index 000000000..276c2ceda
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateNT.3
@@ -0,0 +1,48 @@
+.TH HPL_pdupdateNT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdupdateNT \- Broadcast a panel and update the trailing submatrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdupdateNT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdupdateNT\fR
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local output)          int *
+On exit,  IFLAG  indicates  whether or not  the broadcast has
+been completed when PBCST is not NULL on entry. In that case,
+IFLAG is left unchanged.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be updated) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to be updated  starting  at the  current
+position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateTN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateTN.3
new file mode 100644
index 000000000..091859d01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateTN.3
@@ -0,0 +1,48 @@
+.TH HPL_pdupdateTN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdupdateTN \- Broadcast a panel and update the trailing submatrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdupdateTN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdupdateTN\fR
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local output)          int *
+On exit,  IFLAG  indicates  whether or not  the broadcast has
+been completed when PBCST is not NULL on entry. In that case,
+IFLAG is left unchanged.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be updated) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to be updated  starting  at the  current
+position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp01N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateTT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateTT.3
new file mode 100644
index 000000000..34502c6ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateTT.3
@@ -0,0 +1,48 @@
+.TH HPL_pdupdateTT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdupdateTT \- Broadcast a panel and update the trailing submatrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdupdateTT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdupdateTT\fR
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local output)          int *
+On exit,  IFLAG  indicates  whether or not  the broadcast has
+been completed when PBCST is not NULL on entry. In that case,
+IFLAG is left unchanged.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be updated) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to be updated  starting  at the  current
+position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_perm.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_perm.3
new file mode 100644
index 000000000..9476b5eff
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_perm.3
@@ -0,0 +1,50 @@
+.TH HPL_perm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_perm \- Combine 2 index arrays - Generate the permutation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_perm(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&int *\fR
+\fI\&LINDXA\fR,
+\fB\&int *\fR
+\fI\&LINDXAU\fR,
+\fB\&int *\fR
+\fI\&IWORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_perm\fR
+combines  two  index  arrays  and generate the corresponding
+permutation. First, this function computes the inverse of LINDXA, and
+then combine it with LINDXAU.  Second, in order to be able to perform
+the permutation in place,  LINDXAU  is overwritten by the sequence of
+permutation  producing  the  same result.  What we ultimately want to
+achieve is:  U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the
+call to this function,  this in place permutation can be performed by
+for i in [0..N) swap U[i] with U[LINDXAU[i]].
+.SH ARGUMENTS
+.TP 8
+N       (global input)          const int
+On entry,  N  specifies the length of the arrays  LINDXA  and
+LINDXAU. N should be at least zero.
+.TP 8
+LINDXA  (global input/output)   int *
+On entry,  LINDXA  is an array of dimension N  containing the
+source indexes. On exit,  LINDXA  contains the combined index
+array.
+.TP 8
+LINDXAU (global input/output)   int *
+On entry,  LINDXAU is an array of dimension N  containing the
+target indexes.  On exit,  LINDXAU  contains  the sequence of
+permutation,  that  should be applied  in increasing order to
+permute the underlying array U in place.
+.TP 8
+IWORK   (workspace)             int *
+On entry, IWORK is a workarray of dimension N.
+.SH SEE ALSO
+.BR HPL_plindx1 \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pipid.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pipid.3
new file mode 100644
index 000000000..6a8f5f277
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pipid.3
@@ -0,0 +1,79 @@
+.TH HPL_pipid 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pipid \- Simplify the pivot vector.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pipid(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&int *\fR
+\fI\&K\fR,
+\fB\&int *\fR
+\fI\&IPID\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pipid\fR
+computes an array  IPID  that contains the source and final
+destination  of  matrix rows  resulting  from  the  application  of N
+interchanges  as computed by the  LU  factorization  with row partial
+pivoting. The array IPID is such that the row of global index IPID(i)
+should be mapped onto the row of global index IPID(i+1). Note that we
+cannot really know the length of IPID a priori. However, we know that
+this array is at least 2*N long,  since  there are N rows to swap and
+broadcast. The length of this array  must be smaller than or equal to
+4*N, since every row is swapped with at most a single distinct remote
+row. The algorithm constructing  IPID  goes as follows: Let IA be the
+global index of the first row to be swapped.
+ 
+For every row src IA + i with i in [0..N) to be swapped with row  dst
+such that dst is given by DPIV[i]:
+ 
+Is row  src  the destination  of a previous row of the current block,
+that is, is there k odd such that IPID(k) is equal to src ?
+    Yes:  update  this destination  with dst.  For  example,  if  the
+pivot array is  (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5,
+we swap in fact row 0 and 5,  i.e.,  row 0 goes to 5 and not 2  as it
+was thought so far ...
+    No :  add  the pair (src,dst) at the end of IPID; row src has not
+been moved yet.
+ 
+Is row  dst  different  from src the destination of a previous row of
+the current block, i.e., is there k odd such that IPID(k) is equal to
+dst ?
+    Yes:  update  IPID(k) with src.  For example,  if the pivot array
+is (0,5)(1,1)(2,5) ... , then when  we swap rows  2 and 5, we swap in
+fact row 2 and 0,  i.e.,  row 0 goes to 2 and not 5 as it was thought
+so far ...
+    No : add  the  pair (dst,src) at the end of IPID; row dst has not
+been moved yet.
+ 
+Note that when src is equal to dst, the pair (dst,src)  should not be
+added to  IPID  in  order  to avoid duplicated entries in this array.
+During  the construction of the array  IPID,  we  make  sure that the
+first N entries are such that IPID(k) with k odd is equal to  IA+k/2.
+For k in  [0..K/2),  the  row  of global index  IPID(2*k)  should  be
+mapped onto the row of global index IPID(2*k+1).
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+K       (global output)         int *
+On exit, K specifies the number of entries in  IPID.  K is at
+least 2*N, and at most 4*N.
+.TP 8
+IPID    (global output)         int *
+On entry, IPID is an array of length 4*N.  On exit, the first
+K entries of that array contain the src and final destination
+resulting  from  the  application of the  N  interchanges  as
+specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+in [0..N)
+.SH SEE ALSO
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx0.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx0.3
new file mode 100644
index 000000000..2b889947a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx0.3
@@ -0,0 +1,168 @@
+.TH HPL_plindx0 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_plindx0 \- Compute local swapping index arrays.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_plindx0(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&K\fR,
+\fB\&int *\fR
+\fI\&IPID\fR,
+\fB\&int *\fR
+\fI\&LINDXA\fR,
+\fB\&int *\fR
+\fI\&LINDXAU\fR,
+\fB\&int *\fR
+\fI\&LLEN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_plindx0\fR
+computes two local arrays  LINDXA and  LINDXAU  containing
+the  local  source and final destination position  resulting from the
+application of row interchanges.
+ 
+On entry, the array  IPID  of length K is such that the row of global
+index  IPID(i)  should be mapped onto row of global index  IPID(i+1).
+Let  IA  be the global index of the first row to be swapped. For k in
+[0..K/2), the row of global index IPID(2*k) should be mapped onto the
+row of global index  IPID(2*k+1).  The question then, is to determine
+which rows should ultimately be part of U.
+ 
+First, some rows of the process ICURROW  may be swapped locally.  One
+of this row belongs to U, the other one belongs to my local  piece of
+A.  The other  rows of the current block are swapped with remote rows
+and are thus not part of U. These rows however should be sent  along,
+and  grabbed by the other processes  as we  progress in the  exchange
+phase.
+ 
+So, assume that I am  ICURROW  and consider a row of index  IPID(2*i)
+that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less
+than N,  this row is locally swapped and should be copied into  U  at
+the position IPID(2*i+1) - IA. No row will be exchanged for this one.
+If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be
+locally copied into my local piece of A at the position corresponding
+to the row of global index IPID(2*i+1).
+ 
+If the process  ICURROW does not own  IPID(2*i+1), then row IPID(2*i)
+is to be swapped away and strictly speaking does not belong to U, but
+to  A  remotely.  Since this  process will however send this array U,
+this row is  copied into  U, exactly where the row IPID(2*i+1) should
+go. For this, we search IPID for k1, such that IPID(2*k1) is equal to
+IPID(2*i+1); and row  IPID(2*i) is to be copied in U  at the position
+IPID(2*k1+1)-IA.
+ 
+It is thus  important to put the rows that go into U, i.e., such that
+IPID(2*i+1) - IA is less than N at the begining of the array IPID. By
+doing so,  U  is formed, and the local copy  is performed in just one
+sweep.
+ 
+Two lists  LINDXA  and  LINDXAU are built.  LINDXA contains the local
+index of the rows I have that should be copied. LINDXAU  contains the
+local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A
+is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
+of A should be locally copied into A(-LINDXAU(k),:).  In the  process
+ICURROW, the initial packing algorithm proceeds as follows.
+ 
+  for all entries in IPID,
+     if IPID(2*i) is in ICURROW,
+        if IPID(2*i+1) is in ICURROW,
+           if( IPID(2*i+1) - IA < N )
+            save corresponding local position
+            of this row (LINDXA);
+            save local position (LINDXAU) in U
+            where this row goes;
+            [copy row IPID(2*i) in U at position
+            IPID(2*i+1)-IA; ];
+           else
+            save corresponding local position of
+            this row (LINDXA);
+            save local position (-LINDXAU) in A
+            where this row goes;
+            [copy row IPID(2*i) in my piece of A
+            at IPID(2*i+1);]
+           end if
+        else
+           find k1 such that IPID(2*k1) = IPID(2*i+1);
+           copy row IPID(2*i) in U at position
+           IPID(2*k1+1)-IA;
+           save corresponding local position of this
+           row (LINDXA);
+           save local position (LINDXAU) in U where
+           this row goes;
+        end if
+     end if
+  end for
+ 
+Second, if I am not the current row process  ICURROW, all source rows
+in IPID that I own are part of U. Indeed,  they  are swapped with one
+row  of  the  current  block  of rows,  and  the  main  factorization
+algorithm proceeds one row after each other.  The processes different
+from ICURROW,  should  exchange and accumulate  those rows until they
+receive some data previously owned by the process ICURROW.
+ 
+In processes different from  ICURROW,  the  initial packing algorithm
+proceeds as follows.  Consider a row of global index IPID(2*i) that I
+own. When I will be receiving data previously owned by ICURROW, i.e.,
+U, row IPID(2*i) should  replace the row in U at pos. IPID(2*i+1)-IA,
+and  this particular row of U should be first copied into my piece of
+A, at A(il,:),  where  il is the  local row  index  corresponding  to
+IPID(2*i). Now,initially, this row will be packed into workspace, say
+as the kth row of  that  work array.  The  following  algorithm  sets
+LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row
+should be copied. LINDXA(k) stores the local index in  A  where  this
+row of U should be copied, i.e il.
+ 
+  for all entries in IPID,
+     if IPID(2*i) is not in ICURROW,
+        copy row IPID(2*i) in work array;
+        save corresponding local position
+        of this row (LINDXA);
+        save position (LINDXAU) in U where
+        this row should be copied;
+     end if
+  end for
+ 
+Since we are at it, we also globally figure  out  how many rows every
+process has. That is necessary, because it would rather be cumbersome
+to  figure it on  the fly  during the  bi-directional exchange phase.
+This information is kept in the array  LLEN  of size NPROW. Also note
+that the arrays LINDXA and LINDXAU are of max length equal to 2*N.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+K       (global input)          const int
+On entry, K specifies the number of entries in IPID.  K is at
+least 2*N, and at most 4*N.
+.TP 8
+IPID    (global input)          int *
+On entry,  IPID  is an array of length K. The first K entries
+of that array contain the src and final destination resulting
+from the application of the interchanges.
+.TP 8
+LINDXA  (local output)          int *
+On entry, LINDXA  is an array of dimension 2*N. On exit, this
+array contains the local indexes of the rows of A I have that
+should be copied into U.
+.TP 8
+LINDXAU (local output)          int *
+On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+array contains  the local destination  information encoded as
+follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+of A should be locally copied into A(-LINDXAU(k),:).
+.TP 8
+LLEN    (global output)         int *
+On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+contains how many rows every process has.
+.SH SEE ALSO
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx1.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx1.3
new file mode 100644
index 000000000..7d4f8feba
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx1.3
@@ -0,0 +1,106 @@
+.TH HPL_plindx1 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_plindx1 \- Compute local swapping index arrays.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_plindx1(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&K\fR,
+\fB\&const int *\fR
+\fI\&IPID\fR,
+\fB\&int *\fR
+\fI\&IPA\fR,
+\fB\&int *\fR
+\fI\&LINDXA\fR,
+\fB\&int *\fR
+\fI\&LINDXAU\fR,
+\fB\&int *\fR
+\fI\&IPLEN\fR,
+\fB\&int *\fR
+\fI\&IPMAP\fR,
+\fB\&int *\fR
+\fI\&IPMAPM1\fR,
+\fB\&int *\fR
+\fI\&PERMU\fR,
+\fB\&int *\fR
+\fI\&IWORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_plindx1\fR
+computes two local arrays  LINDXA and  LINDXAU  containing
+the  local  source and final destination position  resulting from the
+application of row interchanges.  In addition, this function computes
+three arrays IPLEN, IPMAP and IPMAPM1  that contain  the  logarithmic
+mapping information for the spreading phase.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+K       (global input)          const int
+On entry, K specifies the number of entries in IPID.  K is at
+least 2*N, and at most 4*N.
+.TP 8
+IPID    (global input)          const int *
+On entry,  IPID  is an array of length K. The first K entries
+of that array contain the src and final destination resulting
+from the application of the interchanges.
+.TP 8
+IPA     (global output)         int *
+On exit,  IPA  specifies  the number of rows that the current
+process row has that either belong to U  or should be swapped
+with remote rows of A.
+.TP 8
+LINDXA  (global output)         int *
+On entry, LINDXA  is an array of dimension 2*N. On exit, this
+array contains the local indexes of the rows of A I have that
+should be copied into U.
+.TP 8
+LINDXAU (global output)         int *
+On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+array contains  the local destination  information encoded as
+follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+of A should be locally copied into A(-LINDXAU(k),:).
+.TP 8
+IPLEN   (global output)         int *
+On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+this array is such that  IPLEN[i]  is the number of rows of A
+in  the  processes  before  process  IPMAP[i]  after the sort
+with the convention that IPLEN[nprow]  is the total number of
+rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+local number of rows of A that should be moved to the process
+IPMAP[i]. IPLEN is such that the number of rows of the source
+process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+remaining  entries  of  this  array  are  sorted  so that the
+quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+.TP 8
+IPMAP   (global output)         int *
+On entry, IPMAP is an array of dimension NPROW. On exit, this
+array contains  the logarithmic mapping of the processes.  In
+other words, IPMAP[myrow] is the corresponding sorted process
+coordinate.
+.TP 8
+IPMAPM1 (global output)         int *
+On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+this  array  contains  the inverse of the logarithmic mapping
+contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+[0.. NPROCS)
+.TP 8
+PERMU   (global output)         int *
+On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+contains  a sequence of permutations,  that should be applied
+in increasing order to permute in place the row panel U.
+.TP 8
+IWORK   (workspace)             int *
+On entry, IWORK is a workarray of dimension 2*JB.
+.SH SEE ALSO
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx10.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx10.3
new file mode 100644
index 000000000..d22d64f36
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx10.3
@@ -0,0 +1,68 @@
+.TH HPL_plindx10 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_plindx10 \- Compute the logarithmic maps for the spreading.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_plindx10(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&K\fR,
+\fB\&const int *\fR
+\fI\&IPID\fR,
+\fB\&int *\fR
+\fI\&IPLEN\fR,
+\fB\&int *\fR
+\fI\&IPMAP\fR,
+\fB\&int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_plindx10\fR
+computes  three arrays  IPLEN,  IPMAP  and  IPMAPM1  that
+contain the logarithmic mapping information for the spreading phase.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+K       (global input)          const int
+On entry, K specifies the number of entries in IPID.  K is at
+least 2*N, and at most 4*N.
+.TP 8
+IPID    (global input)          const int *
+On entry,  IPID  is an array of length K. The first K entries
+of that array contain the src and final destination resulting
+from the application of the interchanges.
+.TP 8
+IPLEN   (global output)         int *
+On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+this array is such that  IPLEN[i]  is the number of rows of A
+in the processes  before process IMAP[i] after the sort, with
+the convention that IPLEN[nprow] is the total number of rows.
+In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+rows of  A  that should be moved for each process.  IPLEN  is
+such that the number of rows of the source process row can be
+computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+this  array are sorted  so  that  the quantities IPLEN[i+1] -
+IPLEN[i] are logarithmically sorted.
+.TP 8
+IPMAP   (global output)         int *
+On entry, IPMAP is an array of dimension NPROW. On exit, this
+array contains  the logarithmic mapping of the processes.  In
+other words, IPMAP[myrow] is the corresponding sorted process
+coordinate.
+.TP 8
+IPMAPM1 (global output)         int *
+On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+this  array  contains  the inverse of the logarithmic mapping
+contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+[0.. NPROW)
+.SH SEE ALSO
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pnum.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pnum.3
new file mode 100644
index 000000000..38956c5a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pnum.3
@@ -0,0 +1,38 @@
+.TH HPL_pnum 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pnum \- Rank determination.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_pnum(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&const int\fR
+\fI\&MYROW\fR,
+\fB\&const int\fR
+\fI\&MYCOL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pnum\fR
+determines  the  rank  of a  process  as a function  of  its
+coordinates in the grid.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+MYROW   (local input)           const int
+On entry,  MYROW  specifies the row coordinate of the process
+whose rank is to be determined. MYROW must be greater than or
+equal to zero and less than NPROW.
+.TP 8
+MYCOL   (local input)           const int
+On entry,  MYCOL  specifies  the  column  coordinate  of  the
+process whose rank is to be determined. MYCOL must be greater
+than or equal to zero and less than NPCOL.
+.SH SEE ALSO
+.BR HPL_grid_init \ (3),
+.BR HPL_grid_info \ (3),
+.BR HPL_grid_exit \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer.3
new file mode 100644
index 000000000..550703aee
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer.3
@@ -0,0 +1,35 @@
+.TH HPL_ptimer 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_ptimer \- Timer facility.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_ptimer(\fR
+\fB\&const int\fR
+\fI\&I\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_ptimer\fR
+provides a  "stopwatch"  functionality  cpu/wall  timer in
+seconds.  Up to  64  separate timers can be functioning at once.  The
+first call starts the timer,  and the second stops it.  This  routine
+can be disenabled  by calling HPL_ptimer_disable(),  so that calls to
+the timer are ignored.  This feature can be used to make sure certain
+sections of code do not affect timings,  even  if  they call routines
+which have HPL_ptimer calls in them. HPL_ptimer_enable()  will enable
+the  timer  functionality.  One  can retrieve  the current value of a
+timer by calling
+ 
+t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ 
+where  I  is the timer index in  [0..64).  To  inititialize the timer
+functionality, one must have called HPL_ptimer_boot() prior to any of
+the functions mentioned above.
+.SH ARGUMENTS
+.TP 8
+I       (global input)          const int
+On entry, I specifies the timer to stop/start.
+.SH SEE ALSO
+.BR HPL_ptimer_cputime \ (3),
+.BR HPL_ptimer_walltime \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer_cputime.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer_cputime.3
new file mode 100644
index 000000000..a93a1c208
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer_cputime.3
@@ -0,0 +1,23 @@
+.TH HPL_ptimer_cputime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_ptimer_cputime \- Return the CPU time.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_ptimer_cputime();\fR
+.SH DESCRIPTION
+\fB\&HPL_ptimer_cputime\fR
+returns the cpu time. If HPL_USE_CLOCK is defined,
+the  clock() function is used to return an approximation of processor
+time used by the program.  The value returned is the CPU time used so
+far as a clock_t;  to get the number of seconds used,  the result  is
+divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+standard library.  If  HPL_USE_TIMES is defined, the times() function
+is used instead.  This  function  returns  the current process times.
+times() returns the number of clock ticks that have elapsed since the
+system has been up.  Otherwise and by default,  the  standard library
+function getrusage() is used.
+.SH SEE ALSO
+.BR HPL_ptimer_walltime \ (3),
+.BR HPL_ptimer \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer_walltime.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer_walltime.3
new file mode 100644
index 000000000..37e5e8c54
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer_walltime.3
@@ -0,0 +1,14 @@
+.TH HPL_ptimer_walltime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_ptimer_walltime \- Return the elapsed (wall-clock) time.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_ptimer_walltime();\fR
+.SH DESCRIPTION
+\fB\&HPL_ptimer_walltime\fR
+returns the elapsed (wall-clock) time.
+.SH SEE ALSO
+.BR HPL_ptimer_cputime \ (3),
+.BR HPL_ptimer \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pwarn.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pwarn.3
new file mode 100644
index 000000000..14e4a65d3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pwarn.3
@@ -0,0 +1,45 @@
+.TH HPL_pwarn 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pwarn \- displays an error message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pwarn(\fR
+\fB\&FILE *\fR
+\fI\&STREAM\fR,
+\fB\&int\fR
+\fI\&LINE\fR,
+\fB\&const char *\fR
+\fI\&SRNAME\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pwarn\fR
+displays an error message.
+.SH ARGUMENTS
+.TP 8
+STREAM  (local input)           FILE *
+On entry, STREAM specifies the output stream.
+.TP 8
+LINE    (local input)           int
+On entry,  LINE  specifies the line  number in the file where
+the  error  has  occured.  When  LINE  is not a positive line
+number, it is ignored.
+.TP 8
+SRNAME  (local input)           const char *
+On entry, SRNAME  should  be the name of the routine  calling
+this error handler.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH SEE ALSO
+.BR HPL_pabort \ (3),
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rand.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rand.3
new file mode 100644
index 000000000..8b1918fea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rand.3
@@ -0,0 +1,28 @@
+.TH HPL_rand 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_rand \- random number generator.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_rand();\fR
+.SH DESCRIPTION
+\fB\&HPL_rand\fR
+generates  the next number  in the  random  sequence.  This
+function  ensures  that this number lies in the interval (-0.5, 0.5].
+ 
+The static array irand contains the information (2 integers) required
+to generate the  next number  in the sequence  X(n).  This  number is
+computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5,  where the
+constant d is the largest 64 bit positive integer. The array irand is
+then  updated  for the generation of the next number  X(n+1)  in  the
+random sequence as follows X(n+1) = a * X(n) + c. The constants a and
+c  should have been preliminarily stored in the arrays ias and ics as
+2 pairs of integers.  The initialization of  ias,  ics and  irand  is
+performed by the function HPL_setran.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_recv.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_recv.3
new file mode 100644
index 000000000..d9136c14b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_recv.3
@@ -0,0 +1,49 @@
+.TH HPL_recv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_recv \- Receive a message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_recv(\fR
+\fB\&double *\fR
+\fI\&RBUF\fR,
+\fB\&int\fR
+\fI\&RCOUNT\fR,
+\fB\&int\fR
+\fI\&SRC\fR,
+\fB\&int\fR
+\fI\&RTAG\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_recv\fR
+is a simple wrapper around  MPI_Recv.  Its  main  purpose is
+to  allow for some  experimentation / tuning  of this simple routine.
+Successful  completion  is  indicated  by  the  returned  error  code
+HPL_SUCCESS.  In the case of messages of length less than or equal to
+zero, this function returns immediately.
+.SH ARGUMENTS
+.TP 8
+RBUF    (local output)          double *
+On entry, RBUF specifies the starting address of buffer to be
+received.
+.TP 8
+RCOUNT  (local input)           int
+On entry,  RCOUNT  specifies  the number  of double precision
+entries in RBUF. RCOUNT must be at least zero.
+.TP 8
+SRC     (local input)           int
+On entry, SRC  specifies the rank of the  sending  process in
+the communication space defined by COMM.
+.TP 8
+RTAG    (local input)           int
+On entry,  STAG specifies the message tag to be used for this
+communication operation.
+.TP 8
+COMM    (local input)           MPI_Comm
+The MPI communicator identifying the communication space.
+.SH SEE ALSO
+.BR HPL_send \ (3),
+.BR HPL_sendrecv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_reduce.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_reduce.3
new file mode 100644
index 000000000..c48f04ded
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_reduce.3
@@ -0,0 +1,56 @@
+.TH HPL_reduce 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_reduce \- Reduce operation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_reduce(\fR
+\fB\&void *\fR
+\fI\&BUFFER\fR,
+\fB\&const int\fR
+\fI\&COUNT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR,
+\fB\&const HPL_T_OP \fR
+\fI\&OP\fR,
+\fB\&const int\fR
+\fI\&ROOT\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_reduce\fR
+performs a global reduce operation across all processes of
+a group.  Note that the input buffer is  used as workarray and in all
+processes but the accumulating process corrupting the original data.
+.SH ARGUMENTS
+.TP 8
+BUFFER  (local input/output)    void *
+On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+exit,  and  in process of rank  ROOT  this array contains the
+reduced data.  This  buffer  is also used as workspace during
+the operation in the other processes of the group.
+.TP 8
+COUNT   (global input)          const int
+On entry,  COUNT  indicates the number of entries in  BUFFER.
+COUNT must be at least zero.
+.TP 8
+DTYPE   (global input)          const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.TP 8
+OP      (global input)          const HPL_T_OP 
+On entry, OP is a pointer to the local combine function.
+.TP 8
+ROOT    (global input)          const int
+On entry, ROOT is the coordinate of the accumulating process.
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rollN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rollN.3
new file mode 100644
index 000000000..eac4deb66
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rollN.3
@@ -0,0 +1,77 @@
+.TH HPL_rollN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_rollN \- Roll U and forward the column panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_rollN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_rollN\fR
+rolls the local arrays containing the local pieces of U, so
+that on exit to this function  U  is replicated in every process row.
+In addition, this function probe for the presence of the column panel
+and forwards it when available.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be rolled) information.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the number of columns of  U.  N must be
+at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U in each process row.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least  MAX(1,IPLEN[NPROW]).
+.TP 8
+IPLEN   (global input)          const int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in each process row.
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IMAP  is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words,  IMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry,  IMAPM1  is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+.SH SEE ALSO
+.BR HPL_pdlaswp01N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rollT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rollT.3
new file mode 100644
index 000000000..bab5bdffd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rollT.3
@@ -0,0 +1,77 @@
+.TH HPL_rollT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_rollT \- Roll U and forward the column panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_rollT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_rollT\fR
+rolls the local arrays containing the local pieces of U, so
+that on exit to this function  U  is replicated in every process row.
+In addition, this function probe for the presence of the column panel
+and forwards it when available.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be rolled) information.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the local number of rows of  U.  N must
+be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U in each process row.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least  MAX(1,N).
+.TP 8
+IPLEN   (global input)          const int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in each process row.
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IMAP  is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words,  IMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry,  IMAPM1  is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+.SH SEE ALSO
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_sdrv.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_sdrv.3
new file mode 100644
index 000000000..a11252d6a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_sdrv.3
@@ -0,0 +1,67 @@
+.TH HPL_sdrv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_sdrv \- Send and receive a message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_sdrv(\fR
+\fB\&double *\fR
+\fI\&SBUF\fR,
+\fB\&int\fR
+\fI\&SCOUNT\fR,
+\fB\&int\fR
+\fI\&STAG\fR,
+\fB\&double *\fR
+\fI\&RBUF\fR,
+\fB\&int\fR
+\fI\&RCOUNT\fR,
+\fB\&int\fR
+\fI\&RTAG\fR,
+\fB\&int\fR
+\fI\&PARTNER\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_sdrv\fR
+is a simple wrapper around MPI_Sendrecv. Its main purpose is
+to allow for some experimentation and tuning of this simple function.
+Messages  of  length  less than  or  equal to zero  are not sent  nor
+received.  Successful completion  is  indicated by the returned error
+code HPL_SUCCESS.
+.SH ARGUMENTS
+.TP 8
+SBUF    (local input)           double *
+On entry, SBUF specifies the starting address of buffer to be
+sent.
+.TP 8
+SCOUNT  (local input)           int
+On entry,  SCOUNT  specifies  the number  of double precision
+entries in SBUF. SCOUNT must be at least zero.
+.TP 8
+STAG    (local input)           int
+On entry,  STAG  specifies the message tag to be used for the
+sending communication operation.
+.TP 8
+RBUF    (local output)          double *
+On entry, RBUF specifies the starting address of buffer to be
+received.
+.TP 8
+RCOUNT  (local input)           int
+On entry,  RCOUNT  specifies  the number  of double precision
+entries in RBUF. RCOUNT must be at least zero.
+.TP 8
+RTAG    (local input)           int
+On entry,  RTAG  specifies the message tag to be used for the
+receiving communication operation.
+.TP 8
+PARTNER (local input)           int
+On entry,  PARTNER  specifies  the rank of the  collaborative
+process in the communication space defined by COMM.
+.TP 8
+COMM    (local input)           MPI_Comm
+The MPI communicator identifying the communication space.
+.SH SEE ALSO
+.BR HPL_send \ (3),
+.BR HPL_recv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_send.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_send.3
new file mode 100644
index 000000000..48ffc5d62
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_send.3
@@ -0,0 +1,49 @@
+.TH HPL_send 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_send \- Send a message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_send(\fR
+\fB\&double *\fR
+\fI\&SBUF\fR,
+\fB\&int\fR
+\fI\&SCOUNT\fR,
+\fB\&int\fR
+\fI\&DEST\fR,
+\fB\&int\fR
+\fI\&STAG\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_send\fR
+is a simple wrapper around  MPI_Send.  Its  main  purpose is
+to  allow for some  experimentation / tuning  of this simple routine.
+Successful  completion  is  indicated  by  the  returned  error  code
+MPI_SUCCESS.  In the case of messages of length less than or equal to
+zero, this function returns immediately.
+.SH ARGUMENTS
+.TP 8
+SBUF    (local input)           double *
+On entry, SBUF specifies the starting address of buffer to be
+sent.
+.TP 8
+SCOUNT  (local input)           int
+On entry,  SCOUNT  specifies  the number of  double precision
+entries in SBUF. SCOUNT must be at least zero.
+.TP 8
+DEST    (local input)           int
+On entry, DEST specifies the rank of the receiving process in
+the communication space defined by COMM.
+.TP 8
+STAG    (local input)           int
+On entry,  STAG specifies the message tag to be used for this
+communication operation.
+.TP 8
+COMM    (local input)           MPI_Comm
+The MPI communicator identifying the communication space.
+.SH SEE ALSO
+.BR HPL_recv \ (3),
+.BR HPL_sendrecv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_setran.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_setran.3
new file mode 100644
index 000000000..e9a9433ae
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_setran.3
@@ -0,0 +1,37 @@
+.TH HPL_setran 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_setran \- Manage the random number generator.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_setran(\fR
+\fB\&const int\fR
+\fI\&OPTION\fR,
+\fB\&int *\fR
+\fI\&IRAN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_setran\fR
+initializes  the random generator with the encoding of the
+first number X(0) in the sequence,  and the constants a and c used to
+compute the next element in the sequence: X(n+1) = a*X(n) + c.  X(0),
+a and c are stored in the static variables  irand, ias and ics.  When
+OPTION is 0 (resp. 1 and 2),  irand  (resp. ia and ic)  is set to the
+values of the input array IRAN.  When OPTION is 3, IRAN is set to the
+current value of irand, and irand is then incremented.
+.SH ARGUMENTS
+.TP 8
+OPTION  (local input)           const int
+On entry, OPTION  is an integer that specifies the operations
+to be performed on the random generator as specified above.
+.TP 8
+IRAN    (local input/output)    int *
+On entry,  IRAN is an array of dimension 2, that contains the
+16-lower and 15-higher bits of a random number.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_spreadN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_spreadN.3
new file mode 100644
index 000000000..452b8da34
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_spreadN.3
@@ -0,0 +1,96 @@
+.TH HPL_spreadN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_spreadN \- Spread row panel U and forward current column panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_spreadN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const enum HPL_SIDE\fR
+\fI\&SIDE\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int\fR
+\fI\&SRCDIST\fR,
+\fB\&const int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_spreadN\fR
+spreads the local array containing local pieces of U, so
+that on exit to this function,  a piece of  U  is contained in every
+process row. The array IPLEN contains the number of rows of U,  that
+should be spread on any given process row. This function also probes
+for the presence of the column panel PBCST. In case of success, this
+panel will be forwarded.  If  PBCST  is NULL on input,  this probing
+mechanism will be disabled.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be spread) information.
+.TP 8
+SIDE    (global input)          const enum HPL_SIDE
+On entry, SIDE specifies whether the local piece of U located
+in process IPMAP[SRCDIST] should be spread to the right or to
+the left. This feature is used by the equilibration process.
+.TP 8
+N       (global input)          const int
+On entry,  N  specifies  the  local number of columns of U. N
+must be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least MAX(1,IPLEN[nprow]).
+.TP 8
+SRCDIST (local input)           const int
+On entry,  SRCDIST  specifies the source process that spreads
+its piece of U.
+.TP 8
+IPLEN   (global input)          const int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in each process before process IPMAP[i], with the  convention
+that IPLEN[nprow] is the total number of rows. In other words
+IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+should be moved to process IPMAP[i].
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IPMAP is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words, IPMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry,  IPMAPM1 is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+.SH SEE ALSO
+.BR HPL_pdlaswp01N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_spreadT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_spreadT.3
new file mode 100644
index 000000000..54f7dda31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_spreadT.3
@@ -0,0 +1,96 @@
+.TH HPL_spreadT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_spreadT \- Spread row panel U and forward current column panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_spreadT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const enum HPL_SIDE\fR
+\fI\&SIDE\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int\fR
+\fI\&SRCDIST\fR,
+\fB\&const int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_spreadT\fR
+spreads  the local array containing local pieces of U, so
+that on exit to this function,  a piece of  U  is contained in every
+process row.  The array  IPLEN  contains the number of columns of U,
+that should be spread on any given process row.  This function  also
+probes for the presence of  the column panel  PBCST.  If  available,
+this  panel will be forwarded.  If  PBCST  is  NULL  on input,  this
+probing mechanism will be disabled.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be spread) information.
+.TP 8
+SIDE    (global input)          const enum HPL_SIDE
+On entry, SIDE specifies whether the local piece of U located
+in process IPMAP[SRCDIST] should be spread to the right or to
+the left. This feature is used by the equilibration process.
+.TP 8
+N       (global input)          const int
+On entry,  N  specifies the local number of rows of U. N must
+be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least MAX(1,N).
+.TP 8
+SRCDIST (local input)           const int
+On entry,  SRCDIST  specifies the source process that spreads
+its piece of U.
+.TP 8
+IPLEN   (global input)          const int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in each process before process IPMAP[i], with the  convention
+that IPLEN[nprow] is the total number of rows. In other words
+IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+should be moved to process IPMAP[i].
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IPMAP is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words, IPMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry,  IPMAPM1 is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+.SH SEE ALSO
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_sum.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_sum.3
new file mode 100644
index 000000000..a3c4e2190
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_sum.3
@@ -0,0 +1,44 @@
+.TH HPL_sum 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_sum \- Combine (sum) two buffers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_sum(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const void *\fR
+\fI\&IN\fR,
+\fB\&void *\fR
+\fI\&INOUT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_sum\fR
+combines (sum) two buffers.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies  the  length  of  the  buffers  to  be
+combined. N must be at least zero.
+.TP 8
+IN      (input)                 const void *
+On entry, IN points to the input-only buffer to be combined.
+.TP 8
+INOUT   (input/output)          void *
+On entry, INOUT  points  to  the  input-output  buffer  to be
+combined.  On exit,  the  entries of this array contains  the
+combined results.
+.TP 8
+DTYPE   (input)                 const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer.3
new file mode 100644
index 000000000..61f3f7cb1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer.3
@@ -0,0 +1,35 @@
+.TH HPL_timer 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_timer \- Timer facility.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_timer(\fR
+\fB\&const int\fR
+\fI\&I\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_timer\fR
+provides a  "stopwatch"  functionality  cpu/wall  timer  in
+seconds.  Up to  64  separate timers can be functioning at once.  The
+first call starts the timer,  and the second stops it.  This  routine
+can be disenabled  by calling  HPL_timer_disable(),  so that calls to
+the timer are ignored.  This feature can be used to make sure certain
+sections of code do not affect timings,  even  if  they call routines
+which have HPL_timer calls in them. HPL_timer_enable() will re-enable
+the  timer  functionality.  One  can retrieve  the current value of a
+timer by calling
+ 
+t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ 
+where  I  is the timer index in  [0..64).  To  initialize  the  timer
+functionality, one must have called HPL_timer_boot()  prior to any of
+the functions mentioned above.
+.SH ARGUMENTS
+.TP 8
+I       (global input)          const int
+On entry, I specifies the timer to stop/start.
+.SH SEE ALSO
+.BR HPL_timer_cputime \ (3),
+.BR HPL_timer_walltime \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer_cputime.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer_cputime.3
new file mode 100644
index 000000000..1f8987ca2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer_cputime.3
@@ -0,0 +1,23 @@
+.TH HPL_timer_cputime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_timer_cputime \- Return the CPU time.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_timer_cputime();\fR
+.SH DESCRIPTION
+\fB\&HPL_timer_cputime\fR
+returns the cpu time.  If HPL_USE_CLOCK is defined,
+the  clock() function is used to return an approximation of processor
+time used by the program.  The value returned is the CPU time used so
+far as a clock_t;  to get the number of seconds used,  the result  is
+divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+standard library.  If  HPL_USE_TIMES is defined, the times() function
+is used instead.  This  function  returns  the current process times.
+times() returns the number of clock ticks that have elapsed since the
+system has been up.  Otherwise and by default,  the  standard library
+function getrusage() is used.
+.SH SEE ALSO
+.BR HPL_timer_walltime \ (3),
+.BR HPL_timer \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer_walltime.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer_walltime.3
new file mode 100644
index 000000000..9a6e898e7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer_walltime.3
@@ -0,0 +1,14 @@
+.TH HPL_timer_walltime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_timer_walltime \- Return the elapsed (wall-clock) time.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_timer_walltime();\fR
+.SH DESCRIPTION
+\fB\&HPL_timer_walltime\fR
+returns the elapsed (wall-clock) time.
+.SH SEE ALSO
+.BR HPL_timer_cputime \ (3),
+.BR HPL_timer \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_warn.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_warn.3
new file mode 100644
index 000000000..6b051acb3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_warn.3
@@ -0,0 +1,59 @@
+.TH HPL_warn 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_warn \- displays an error message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_warn(\fR
+\fB\&FILE *\fR
+\fI\&STREAM\fR,
+\fB\&int\fR
+\fI\&LINE\fR,
+\fB\&const char *\fR
+\fI\&SRNAME\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_warn\fR
+displays an error message.
+.SH ARGUMENTS
+.TP 8
+STREAM  (local input)           FILE *
+On entry, STREAM specifies the output stream.
+.TP 8
+LINE    (local input)           int
+On entry,  LINE  specifies the line  number in the file where
+the  error  has  occured.  When  LINE  is not a positive line
+number, it is ignored.
+.TP 8
+SRNAME  (local input)           const char *
+On entry, SRNAME  should  be the name of the routine  calling
+this error handler.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   HPL_warn( stderr, __LINE__, __FILE__,
+.br
+             "Demo.\en" );
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_abort \ (3),
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_xjumpm.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_xjumpm.3
new file mode 100644
index 000000000..df3e0a954
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_xjumpm.3
@@ -0,0 +1,77 @@
+.TH HPL_xjumpm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_xjumpm \- Compute constants to jump in the random sequence.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_xjumpm(\fR
+\fB\&const int\fR
+\fI\&JUMPM\fR,
+\fB\&int *\fR
+\fI\&MULT\fR,
+\fB\&int *\fR
+\fI\&IADD\fR,
+\fB\&int *\fR
+\fI\&IRANN\fR,
+\fB\&int *\fR
+\fI\&IRANM\fR,
+\fB\&int *\fR
+\fI\&IAM\fR,
+\fB\&int *\fR
+\fI\&ICM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_xjumpm\fR
+computes  the constants  A and C  to jump JUMPM numbers in
+the random sequence: X(n+JUMPM) = A*X(n)+C.  The constants encoded in
+MULT and IADD  specify  how to jump from one entry in the sequence to
+the next.
+.SH ARGUMENTS
+.TP 8
+JUMPM   (local input)           const int
+On entry,  JUMPM  specifies  the  number  of entries  in  the
+sequence to jump over. When JUMPM is less or equal than zero,
+A and C are not computed, IRANM is set to IRANN corresponding
+to a jump of size zero.
+.TP 8
+MULT    (local input)           int *
+On entry, MULT is an array of dimension 2,  that contains the
+16-lower  and 15-higher bits of the constant  a  to jump from
+X(n) to X(n+1) = a*X(n) + c in the random sequence.
+.TP 8
+IADD    (local input)           int *
+On entry, IADD is an array of dimension 2,  that contains the
+16-lower  and 15-higher bits of the constant  c  to jump from
+X(n) to X(n+1) = a*X(n) + c in the random sequence.
+.TP 8
+IRANN   (local input)           int *
+On entry, IRANN is an array of dimension 2. that contains the
+16-lower and 15-higher bits of the encoding of X(n).
+.TP 8
+IRANM   (local output)          int *
+On entry,  IRANM  is an array of dimension 2.   On exit, this
+array  contains respectively  the 16-lower and 15-higher bits
+of the encoding of X(n+JUMPM).
+.TP 8
+IAM     (local output)          int *
+On entry, IAM is an array of dimension 2. On exit, when JUMPM
+is  greater  than  zero,  this  array  contains  the  encoded
+constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+sequence. IAM(0:1)  contains  respectively  the  16-lower and
+15-higher  bits  of this constant  A. When  JUMPM  is less or
+equal than zero, this array is not referenced.
+.TP 8
+ICM     (local output)          int *
+On entry, ICM is an array of dimension 2. On exit, when JUMPM
+is  greater  than  zero,  this  array  contains  the  encoded
+constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+sequence. ICM(0:1)  contains  respectively  the  16-lower and
+15-higher  bits  of this constant  C. When  JUMPM  is less or
+equal than zero, this array is not referenced.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/missing b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/missing
new file mode 100755
index 000000000..625aeb118
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/missing
@@ -0,0 +1,215 @@
+#! /bin/sh
+# Common wrapper for a few potentially missing GNU programs.
+
+scriptversion=2018-03-07.03; # UTC
+
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+if test $# -eq 0; then
+  echo 1>&2 "Try '$0 --help' for more information"
+  exit 1
+fi
+
+case $1 in
+
+  --is-lightweight)
+    # Used by our autoconf macros to check whether the available missing
+    # script is modern enough.
+    exit 0
+    ;;
+
+  --run)
+    # Back-compat with the calling convention used by older automake.
+    shift
+    ;;
+
+  -h|--h|--he|--hel|--help)
+    echo "\
+$0 [OPTION]... PROGRAM [ARGUMENT]...
+
+Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due
+to PROGRAM being missing or too old.
+
+Options:
+  -h, --help      display this help and exit
+  -v, --version   output version information and exit
+
+Supported PROGRAM values:
+  aclocal   autoconf  autoheader   autom4te  automake  makeinfo
+  bison     yacc      flex         lex       help2man
+
+Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
+'g' are ignored when checking the name.
+
+Send bug reports to <bug-automake@gnu.org>."
+    exit $?
+    ;;
+
+  -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
+    echo "missing $scriptversion (GNU Automake)"
+    exit $?
+    ;;
+
+  -*)
+    echo 1>&2 "$0: unknown '$1' option"
+    echo 1>&2 "Try '$0 --help' for more information"
+    exit 1
+    ;;
+
+esac
+
+# Run the given program, remember its exit status.
+"$@"; st=$?
+
+# If it succeeded, we are done.
+test $st -eq 0 && exit 0
+
+# Also exit now if we it failed (or wasn't found), and '--version' was
+# passed; such an option is passed most likely to detect whether the
+# program is present and works.
+case $2 in --version|--help) exit $st;; esac
+
+# Exit code 63 means version mismatch.  This often happens when the user
+# tries to use an ancient version of a tool on a file that requires a
+# minimum version.
+if test $st -eq 63; then
+  msg="probably too old"
+elif test $st -eq 127; then
+  # Program was missing.
+  msg="missing on your system"
+else
+  # Program was found and executed, but failed.  Give up.
+  exit $st
+fi
+
+perl_URL=https://www.perl.org/
+flex_URL=https://github.com/westes/flex
+gnu_software_URL=https://www.gnu.org/software
+
+program_details ()
+{
+  case $1 in
+    aclocal|automake)
+      echo "The '$1' program is part of the GNU Automake package:"
+      echo "<$gnu_software_URL/automake>"
+      echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:"
+      echo "<$gnu_software_URL/autoconf>"
+      echo "<$gnu_software_URL/m4/>"
+      echo "<$perl_URL>"
+      ;;
+    autoconf|autom4te|autoheader)
+      echo "The '$1' program is part of the GNU Autoconf package:"
+      echo "<$gnu_software_URL/autoconf/>"
+      echo "It also requires GNU m4 and Perl in order to run:"
+      echo "<$gnu_software_URL/m4/>"
+      echo "<$perl_URL>"
+      ;;
+  esac
+}
+
+give_advice ()
+{
+  # Normalize program name to check for.
+  normalized_program=`echo "$1" | sed '
+    s/^gnu-//; t
+    s/^gnu//; t
+    s/^g//; t'`
+
+  printf '%s\n' "'$1' is $msg."
+
+  configure_deps="'configure.ac' or m4 files included by 'configure.ac'"
+  case $normalized_program in
+    autoconf*)
+      echo "You should only need it if you modified 'configure.ac',"
+      echo "or m4 files included by it."
+      program_details 'autoconf'
+      ;;
+    autoheader*)
+      echo "You should only need it if you modified 'acconfig.h' or"
+      echo "$configure_deps."
+      program_details 'autoheader'
+      ;;
+    automake*)
+      echo "You should only need it if you modified 'Makefile.am' or"
+      echo "$configure_deps."
+      program_details 'automake'
+      ;;
+    aclocal*)
+      echo "You should only need it if you modified 'acinclude.m4' or"
+      echo "$configure_deps."
+      program_details 'aclocal'
+      ;;
+   autom4te*)
+      echo "You might have modified some maintainer files that require"
+      echo "the 'autom4te' program to be rebuilt."
+      program_details 'autom4te'
+      ;;
+    bison*|yacc*)
+      echo "You should only need it if you modified a '.y' file."
+      echo "You may want to install the GNU Bison package:"
+      echo "<$gnu_software_URL/bison/>"
+      ;;
+    lex*|flex*)
+      echo "You should only need it if you modified a '.l' file."
+      echo "You may want to install the Fast Lexical Analyzer package:"
+      echo "<$flex_URL>"
+      ;;
+    help2man*)
+      echo "You should only need it if you modified a dependency" \
+           "of a man page."
+      echo "You may want to install the GNU Help2man package:"
+      echo "<$gnu_software_URL/help2man/>"
+    ;;
+    makeinfo*)
+      echo "You should only need it if you modified a '.texi' file, or"
+      echo "any other file indirectly affecting the aspect of the manual."
+      echo "You might want to install the Texinfo package:"
+      echo "<$gnu_software_URL/texinfo/>"
+      echo "The spurious makeinfo call might also be the consequence of"
+      echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might"
+      echo "want to install GNU make:"
+      echo "<$gnu_software_URL/make/>"
+      ;;
+    *)
+      echo "You might have modified some files without having the proper"
+      echo "tools for further handling them.  Check the 'README' file, it"
+      echo "often tells you about the needed prerequisites for installing"
+      echo "this package.  You may also peek at any GNU archive site, in"
+      echo "case some other package contains this missing '$1' program."
+      ;;
+  esac
+}
+
+give_advice "$1" | sed -e '1s/^/WARNING: /' \
+                       -e '2,$s/^/         /' >&2
+
+# Propagate the correct exit status (expected to be 127 for a program
+# not found, 63 for a program that failed due to version mismatch).
+exit $st
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/.Makefile.dpct.patched.swp b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/.Makefile.dpct.patched.swp
new file mode 100644
index 000000000..b7e1c370d
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/.Makefile.dpct.patched.swp differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/MainSourceFiles.yaml b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/MainSourceFiles.yaml
new file mode 100644
index 000000000..19e73e079
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/MainSourceFiles.yaml
@@ -0,0 +1,1000 @@
+---
+MainSourceFile:  MainSrcFiles_placehold
+Replacements:
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          6545
+    Length:          0
+    ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          6822
+    Length:          0
+    ReplacementText: "\n#include <dpct/blas_utils.hpp>\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          6825
+    Length:          18
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          6843
+    Length:          26
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          6869
+    Length:          20
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          6956
+    Length:          9
+    ReplacementText: 'dpct::err0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7044
+    Length:          197
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7334
+    Length:          0
+    ReplacementText: ' try '
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7364
+    Length:          0
+    ReplacementText: "        /*\n        DPCT1010:1: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n        */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7372
+    Length:          9
+    ReplacementText: 'dpct::err0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7388
+    Length:          18
+    ReplacementText: '0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7416
+    Length:          199
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7739
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7772
+    Length:          208
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          8006
+    Length:          0
+    ReplacementText: "\ncatch (sycl::exception const &exc) {\n  std::cerr << exc.what() << \"Exception caught at file:\" << __FILE__ << \", line:\" << __LINE__ << std::endl;\n  std::exit(1);\n}"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          8954
+    Length:          0
+    ReplacementText: "\n   dpct::device_ext &dev_ct1 = dpct::get_current_device();\n   sycl::queue &q_ct1 = dev_ct1.in_order_queue();"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9513
+    Length:          54
+    ReplacementText: 'DPCT_CHECK_ERROR(devPtrA = sycl::malloc_device<double>(K * LDA, q_ct1))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9587
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9637
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9662
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9687
+    Length:          55
+    ReplacementText: 'DPCT_CHECK_ERROR(devPtrB = sycl::malloc_device<double>(N *  LDB, q_ct1))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9762
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9813
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9838
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9863
+    Length:          54
+    ReplacementText: 'DPCT_CHECK_ERROR(devPtrC = sycl::malloc_device<double>(N * LDC, q_ct1))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9937
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9987
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10012
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10025
+    Length:          23
+    ReplacementText: 'dev_ct1.queues_wait_and_throw()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10054
+    Length:          85
+    ReplacementText: 'oneapi::mkl::blas::column_major::gemm(*dpct::get_current_device().get_saved_queue(), oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, M, N, K, ALPHA, devPtrA, LDA, devPtrB, LDB, BETA, devPtrC, LDC).wait()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10145
+    Length:          23
+    ReplacementText: 'dev_ct1.queues_wait_and_throw()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10187
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10237
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10262
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10269
+    Length:          23
+    ReplacementText: 'dev_ct1.queues_wait_and_throw()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10299
+    Length:          17
+    ReplacementText: 'sycl::free(devPtrA, q_ct1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10322
+    Length:          17
+    ReplacementText: 'sycl::free(devPtrB, q_ct1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10345
+    Length:          17
+    ReplacementText: 'sycl::free(devPtrC, q_ct1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10637
+    Length:          0
+    ReplacementText: "\n   dpct::device_ext &dev_ct1 = dpct::get_current_device();\n   sycl::queue &q_ct1 = dev_ct1.in_order_queue();"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11149
+    Length:          55
+    ReplacementText: 'DPCT_CHECK_ERROR(devPtrA = sycl::malloc_device<double>(M * LDA, q_ct1))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11224
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11271
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11296
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11334
+    Length:          55
+    ReplacementText: 'DPCT_CHECK_ERROR(devPtrB = sycl::malloc_device<double>(N * LDB, q_ct1))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11409
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11456
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11481
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11488
+    Length:          23
+    ReplacementText: 'dev_ct1.queues_wait_and_throw()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11522
+    Length:          62
+    ReplacementText: 'oneapi::mkl::blas::column_major::trsm(*dpct::get_current_device().get_saved_queue(), oneapi::mkl::side::left, oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, M, N, ALPHA, devPtrA, LDA, devPtrB, LDB).wait()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11595
+    Length:          23
+    ReplacementText: 'dev_ct1.queues_wait_and_throw()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11642
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11689
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11714
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11726
+    Length:          23
+    ReplacementText: 'dev_ct1.queues_wait_and_throw()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11755
+    Length:          17
+    ReplacementText: 'sycl::free(devPtrA, q_ct1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11778
+    Length:          17
+    ReplacementText: 'sycl::free(devPtrB, q_ct1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+MainSourceFilesDigest:
+  - MainSourceFile:  '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Digest:          c9ea63d69505b8c70080ff9792b77dd8
+DpctVersion:     18.0.0
+MainHelperFileName: ''
+USMLevel:        ''
+FeatureMap:      {}
+CompileTargets:
+  /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl:
+    - MigratedFileName: './testing/ptest/HPL_pddriver.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdinfo.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdtest.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pddriver.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdinfo.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdtest.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pddriver.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdinfo.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdtest.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pddriver.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdinfo.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdtest.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+  /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a:
+    - MigratedFileName: './src/auxil/HPL_dlacpy.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_dlatcpy.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_fprintf.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_warn.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_abort.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_dlaprnt.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_dlange.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_dlamch.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dcopy.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_daxpy.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dscal.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_idamax.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dgemv.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dtrsv.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dger.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dgemm.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dtrsm.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_1ring.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_1rinM.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_2ring.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_2rinM.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_blong.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_blonM.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_packL.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_copyL.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_binit.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_bcast.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_bwait.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_send.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_recv.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_sdrv.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_grid_init.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_pnum.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_grid_info.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_grid_exit.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_broadcast.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_reduce.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_all_reduce.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_barrier.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_min.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_max.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_sum.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/panel/HPL_pdpanel_new.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/panel/HPL_pdpanel_init.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/panel/HPL_pdpanel_disp.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/panel/HPL_pdpanel_free.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_indxg2l.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_indxg2lp.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_indxg2p.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_indxl2g.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_infog2l.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_numroc.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_numrocI.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp00N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp10N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp01N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp01T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp02N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp03N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp03T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp04N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp04T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp05N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp05T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp06N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp06T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_pwarn.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_pabort.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_pdlaprnt.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_pdlamch.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_pdlange.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_dlocmax.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_dlocswpN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_dlocswpT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdmxswp.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdpancrN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdpancrT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdpanllN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdpanllT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdpanrlN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdpanrlT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdrpanllN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdrpanllT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdrpancrN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdrpancrT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdrpanrlN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdrpanrlT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdfact.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pipid.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_plindx0.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdlaswp00N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdlaswp00T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_perm.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_logsort.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_plindx10.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_plindx1.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_spreadN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_spreadT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_rollN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_rollT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_equil.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdlaswp01N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdlaswp01T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdupdateNN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdupdateNT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdupdateTN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdupdateTT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdtrsv.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdgesv0.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdgesvK1.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdgesvK2.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdgesv.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_dmatgen.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_ladd.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_lmul.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_xjumpm.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_jumpit.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_rand.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_setran.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/timer/HPL_timer.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/timer/HPL_timer_cputime.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/timer/HPL_timer_walltime.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/pmatgen/HPL_pdmatgen.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptimer/HPL_ptimer.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptimer/HPL_ptimer_cputime.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptimer/HPL_ptimer_walltime.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+  libdgemm.so.1.0.1:
+    - MigratedFileName: './src/cuda/cuda_dgemm.cpp.dp.cpp'
+      CompileOptions:  '-O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) '
+      Compiler:        cc
+    - MigratedFileName: './src/cuda/cuda_dgemm.cpp.dp.cpp'
+      CompileOptions:  '-O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) '
+      Compiler:        cc
+OptionMap:
+  AnalysisScopePath:
+    Value:           '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3'
+    Specified:       false
+  AsyncHandler:
+    Value:           'false'
+    Specified:       false
+  CommentsEnabled:
+    Value:           'false'
+    Specified:       false
+  CompilationsDir:
+    Value:           '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3'
+    Specified:       true
+  CtadEnabled:
+    Value:           'false'
+    Specified:       false
+  EnablepProfiling:
+    Value:           'false'
+    Specified:       false
+  ExperimentalFlag:
+    Value:           '0'
+    Specified:       false
+  ExplicitClNamespace:
+    Value:           'false'
+    Specified:       false
+  ExplicitNamespace:
+    Value:           '20'
+    Specified:       false
+  ExtensionDDFlag:
+    Value:           '0'
+    Specified:       false
+  ExtensionDEFlag:
+    Value:           '4294967295'
+    Specified:       false
+  HelperFuncPreferenceFlag:
+    Value:           '0'
+    Specified:       false
+  NDRangeDim:
+    Value:           '3'
+    Specified:       false
+  NoDRYPattern:
+    Value:           'false'
+    Specified:       false
+  NoUseGenericSpace:
+    Value:           ''
+    Specified:       true
+  OptimizeMigration:
+    Value:           'false'
+    Specified:       false
+  ProcessAll:
+    Value:           'false'
+    Specified:       false
+  RuleFile:
+    Value:           ''
+    Specified:       false
+  SyclNamedLambda:
+    Value:           'false'
+    Specified:       false
+  UsmLevel:
+    Value:           '1'
+    Specified:       false
+...
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/Makefile.dpct b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/Makefile.dpct
new file mode 100644
index 000000000..dfae1e89e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/Makefile.dpct
@@ -0,0 +1,1019 @@
+CC := icpx
+
+LD := $(CC)
+
+#DPCT2001:4: You can link with more library by add them here.
+LIB := 
+
+FLAGS := 
+
+ifeq ($(shell which $(CC)),)
+    $(error ERROR - $(CC) compiler not found)
+endif
+
+ROOT_DIR     := $(shell dirname $(shell which $(CC)))
+INCLUDE_SYCL := $(ROOT_DIR)/../include
+INCLUDE_CL   := $(ROOT_DIR)/../include/sycl
+
+TARGET_0_SRC_0 = ./testing/ptest/HPL_pddriver.c
+TARGET_0_OBJ_0 = ./testing/ptest/HPL_pddriver.o
+TARGET_0_FLAG_0 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_1 = ./testing/ptest/HPL_pdinfo.c
+TARGET_0_OBJ_1 = ./testing/ptest/HPL_pdinfo.o
+TARGET_0_FLAG_1 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_2 = ./testing/ptest/HPL_pdtest.c
+TARGET_0_OBJ_2 = ./testing/ptest/HPL_pdtest.o
+TARGET_0_FLAG_2 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_3 = ./testing/ptest/HPL_pddriver.c
+TARGET_0_OBJ_3 = ./testing/ptest/HPL_pddriver.o
+TARGET_0_FLAG_3 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_4 = ./testing/ptest/HPL_pdinfo.c
+TARGET_0_OBJ_4 = ./testing/ptest/HPL_pdinfo.o
+TARGET_0_FLAG_4 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_5 = ./testing/ptest/HPL_pdtest.c
+TARGET_0_OBJ_5 = ./testing/ptest/HPL_pdtest.o
+TARGET_0_FLAG_5 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_6 = ./testing/ptest/HPL_pddriver.c
+TARGET_0_OBJ_6 = ./testing/ptest/HPL_pddriver.o
+TARGET_0_FLAG_6 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_7 = ./testing/ptest/HPL_pdinfo.c
+TARGET_0_OBJ_7 = ./testing/ptest/HPL_pdinfo.o
+TARGET_0_FLAG_7 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_8 = ./testing/ptest/HPL_pdtest.c
+TARGET_0_OBJ_8 = ./testing/ptest/HPL_pdtest.o
+TARGET_0_FLAG_8 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_9 = ./testing/ptest/HPL_pddriver.c
+TARGET_0_OBJ_9 = ./testing/ptest/HPL_pddriver.o
+TARGET_0_FLAG_9 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_10 = ./testing/ptest/HPL_pdinfo.c
+TARGET_0_OBJ_10 = ./testing/ptest/HPL_pdinfo.o
+TARGET_0_FLAG_10 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_11 = ./testing/ptest/HPL_pdtest.c
+TARGET_0_OBJ_11 = ./testing/ptest/HPL_pdtest.o
+TARGET_0_FLAG_11 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_0 = ./src/auxil/HPL_dlacpy.c
+TARGET_1_OBJ_0 = ./src/auxil/HPL_dlacpy.o
+TARGET_1_FLAG_0 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_1 = ./src/auxil/HPL_dlatcpy.c
+TARGET_1_OBJ_1 = ./src/auxil/HPL_dlatcpy.o
+TARGET_1_FLAG_1 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_2 = ./src/auxil/HPL_fprintf.c
+TARGET_1_OBJ_2 = ./src/auxil/HPL_fprintf.o
+TARGET_1_FLAG_2 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_3 = ./src/auxil/HPL_warn.c
+TARGET_1_OBJ_3 = ./src/auxil/HPL_warn.o
+TARGET_1_FLAG_3 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_4 = ./src/auxil/HPL_abort.c
+TARGET_1_OBJ_4 = ./src/auxil/HPL_abort.o
+TARGET_1_FLAG_4 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_5 = ./src/auxil/HPL_dlaprnt.c
+TARGET_1_OBJ_5 = ./src/auxil/HPL_dlaprnt.o
+TARGET_1_FLAG_5 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_6 = ./src/auxil/HPL_dlange.c
+TARGET_1_OBJ_6 = ./src/auxil/HPL_dlange.o
+TARGET_1_FLAG_6 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_7 = ./src/auxil/HPL_dlamch.c
+TARGET_1_OBJ_7 = ./src/auxil/HPL_dlamch.o
+TARGET_1_FLAG_7 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -I ./include ${FLAGS}
+
+TARGET_1_SRC_8 = ./src/blas/HPL_dcopy.c
+TARGET_1_OBJ_8 = ./src/blas/HPL_dcopy.o
+TARGET_1_FLAG_8 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_9 = ./src/blas/HPL_daxpy.c
+TARGET_1_OBJ_9 = ./src/blas/HPL_daxpy.o
+TARGET_1_FLAG_9 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_10 = ./src/blas/HPL_dscal.c
+TARGET_1_OBJ_10 = ./src/blas/HPL_dscal.o
+TARGET_1_FLAG_10 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_11 = ./src/blas/HPL_idamax.c
+TARGET_1_OBJ_11 = ./src/blas/HPL_idamax.o
+TARGET_1_FLAG_11 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_12 = ./src/blas/HPL_dgemv.c
+TARGET_1_OBJ_12 = ./src/blas/HPL_dgemv.o
+TARGET_1_FLAG_12 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_13 = ./src/blas/HPL_dtrsv.c
+TARGET_1_OBJ_13 = ./src/blas/HPL_dtrsv.o
+TARGET_1_FLAG_13 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_14 = ./src/blas/HPL_dger.c
+TARGET_1_OBJ_14 = ./src/blas/HPL_dger.o
+TARGET_1_FLAG_14 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_15 = ./src/blas/HPL_dgemm.c
+TARGET_1_OBJ_15 = ./src/blas/HPL_dgemm.o
+TARGET_1_FLAG_15 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_16 = ./src/blas/HPL_dtrsm.c
+TARGET_1_OBJ_16 = ./src/blas/HPL_dtrsm.o
+TARGET_1_FLAG_16 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_17 = ./src/comm/HPL_1ring.c
+TARGET_1_OBJ_17 = ./src/comm/HPL_1ring.o
+TARGET_1_FLAG_17 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_18 = ./src/comm/HPL_1rinM.c
+TARGET_1_OBJ_18 = ./src/comm/HPL_1rinM.o
+TARGET_1_FLAG_18 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_19 = ./src/comm/HPL_2ring.c
+TARGET_1_OBJ_19 = ./src/comm/HPL_2ring.o
+TARGET_1_FLAG_19 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_20 = ./src/comm/HPL_2rinM.c
+TARGET_1_OBJ_20 = ./src/comm/HPL_2rinM.o
+TARGET_1_FLAG_20 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_21 = ./src/comm/HPL_blong.c
+TARGET_1_OBJ_21 = ./src/comm/HPL_blong.o
+TARGET_1_FLAG_21 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_22 = ./src/comm/HPL_blonM.c
+TARGET_1_OBJ_22 = ./src/comm/HPL_blonM.o
+TARGET_1_FLAG_22 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_23 = ./src/comm/HPL_packL.c
+TARGET_1_OBJ_23 = ./src/comm/HPL_packL.o
+TARGET_1_FLAG_23 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_24 = ./src/comm/HPL_copyL.c
+TARGET_1_OBJ_24 = ./src/comm/HPL_copyL.o
+TARGET_1_FLAG_24 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_25 = ./src/comm/HPL_binit.c
+TARGET_1_OBJ_25 = ./src/comm/HPL_binit.o
+TARGET_1_FLAG_25 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_26 = ./src/comm/HPL_bcast.c
+TARGET_1_OBJ_26 = ./src/comm/HPL_bcast.o
+TARGET_1_FLAG_26 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_27 = ./src/comm/HPL_bwait.c
+TARGET_1_OBJ_27 = ./src/comm/HPL_bwait.o
+TARGET_1_FLAG_27 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_28 = ./src/comm/HPL_send.c
+TARGET_1_OBJ_28 = ./src/comm/HPL_send.o
+TARGET_1_FLAG_28 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_29 = ./src/comm/HPL_recv.c
+TARGET_1_OBJ_29 = ./src/comm/HPL_recv.o
+TARGET_1_FLAG_29 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_30 = ./src/comm/HPL_sdrv.c
+TARGET_1_OBJ_30 = ./src/comm/HPL_sdrv.o
+TARGET_1_FLAG_30 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_31 = ./src/grid/HPL_grid_init.c
+TARGET_1_OBJ_31 = ./src/grid/HPL_grid_init.o
+TARGET_1_FLAG_31 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_32 = ./src/grid/HPL_pnum.c
+TARGET_1_OBJ_32 = ./src/grid/HPL_pnum.o
+TARGET_1_FLAG_32 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_33 = ./src/grid/HPL_grid_info.c
+TARGET_1_OBJ_33 = ./src/grid/HPL_grid_info.o
+TARGET_1_FLAG_33 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_34 = ./src/grid/HPL_grid_exit.c
+TARGET_1_OBJ_34 = ./src/grid/HPL_grid_exit.o
+TARGET_1_FLAG_34 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_35 = ./src/grid/HPL_broadcast.c
+TARGET_1_OBJ_35 = ./src/grid/HPL_broadcast.o
+TARGET_1_FLAG_35 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_36 = ./src/grid/HPL_reduce.c
+TARGET_1_OBJ_36 = ./src/grid/HPL_reduce.o
+TARGET_1_FLAG_36 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_37 = ./src/grid/HPL_all_reduce.c
+TARGET_1_OBJ_37 = ./src/grid/HPL_all_reduce.o
+TARGET_1_FLAG_37 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_38 = ./src/grid/HPL_barrier.c
+TARGET_1_OBJ_38 = ./src/grid/HPL_barrier.o
+TARGET_1_FLAG_38 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_39 = ./src/grid/HPL_min.c
+TARGET_1_OBJ_39 = ./src/grid/HPL_min.o
+TARGET_1_FLAG_39 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_40 = ./src/grid/HPL_max.c
+TARGET_1_OBJ_40 = ./src/grid/HPL_max.o
+TARGET_1_FLAG_40 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_41 = ./src/grid/HPL_sum.c
+TARGET_1_OBJ_41 = ./src/grid/HPL_sum.o
+TARGET_1_FLAG_41 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_42 = ./src/panel/HPL_pdpanel_new.c
+TARGET_1_OBJ_42 = ./src/panel/HPL_pdpanel_new.o
+TARGET_1_FLAG_42 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_43 = ./src/panel/HPL_pdpanel_init.c
+TARGET_1_OBJ_43 = ./src/panel/HPL_pdpanel_init.o
+TARGET_1_FLAG_43 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_44 = ./src/panel/HPL_pdpanel_disp.c
+TARGET_1_OBJ_44 = ./src/panel/HPL_pdpanel_disp.o
+TARGET_1_FLAG_44 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_45 = ./src/panel/HPL_pdpanel_free.c
+TARGET_1_OBJ_45 = ./src/panel/HPL_pdpanel_free.o
+TARGET_1_FLAG_45 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_46 = ./src/pauxil/HPL_indxg2l.c
+TARGET_1_OBJ_46 = ./src/pauxil/HPL_indxg2l.o
+TARGET_1_FLAG_46 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_47 = ./src/pauxil/HPL_indxg2lp.c
+TARGET_1_OBJ_47 = ./src/pauxil/HPL_indxg2lp.o
+TARGET_1_FLAG_47 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_48 = ./src/pauxil/HPL_indxg2p.c
+TARGET_1_OBJ_48 = ./src/pauxil/HPL_indxg2p.o
+TARGET_1_FLAG_48 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_49 = ./src/pauxil/HPL_indxl2g.c
+TARGET_1_OBJ_49 = ./src/pauxil/HPL_indxl2g.o
+TARGET_1_FLAG_49 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_50 = ./src/pauxil/HPL_infog2l.c
+TARGET_1_OBJ_50 = ./src/pauxil/HPL_infog2l.o
+TARGET_1_FLAG_50 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_51 = ./src/pauxil/HPL_numroc.c
+TARGET_1_OBJ_51 = ./src/pauxil/HPL_numroc.o
+TARGET_1_FLAG_51 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_52 = ./src/pauxil/HPL_numrocI.c
+TARGET_1_OBJ_52 = ./src/pauxil/HPL_numrocI.o
+TARGET_1_FLAG_52 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_53 = ./src/pauxil/HPL_dlaswp00N.c
+TARGET_1_OBJ_53 = ./src/pauxil/HPL_dlaswp00N.o
+TARGET_1_FLAG_53 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_54 = ./src/pauxil/HPL_dlaswp10N.c
+TARGET_1_OBJ_54 = ./src/pauxil/HPL_dlaswp10N.o
+TARGET_1_FLAG_54 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_55 = ./src/pauxil/HPL_dlaswp01N.c
+TARGET_1_OBJ_55 = ./src/pauxil/HPL_dlaswp01N.o
+TARGET_1_FLAG_55 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_56 = ./src/pauxil/HPL_dlaswp01T.c
+TARGET_1_OBJ_56 = ./src/pauxil/HPL_dlaswp01T.o
+TARGET_1_FLAG_56 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_57 = ./src/pauxil/HPL_dlaswp02N.c
+TARGET_1_OBJ_57 = ./src/pauxil/HPL_dlaswp02N.o
+TARGET_1_FLAG_57 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_58 = ./src/pauxil/HPL_dlaswp03N.c
+TARGET_1_OBJ_58 = ./src/pauxil/HPL_dlaswp03N.o
+TARGET_1_FLAG_58 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_59 = ./src/pauxil/HPL_dlaswp03T.c
+TARGET_1_OBJ_59 = ./src/pauxil/HPL_dlaswp03T.o
+TARGET_1_FLAG_59 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_60 = ./src/pauxil/HPL_dlaswp04N.c
+TARGET_1_OBJ_60 = ./src/pauxil/HPL_dlaswp04N.o
+TARGET_1_FLAG_60 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_61 = ./src/pauxil/HPL_dlaswp04T.c
+TARGET_1_OBJ_61 = ./src/pauxil/HPL_dlaswp04T.o
+TARGET_1_FLAG_61 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_62 = ./src/pauxil/HPL_dlaswp05N.c
+TARGET_1_OBJ_62 = ./src/pauxil/HPL_dlaswp05N.o
+TARGET_1_FLAG_62 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_63 = ./src/pauxil/HPL_dlaswp05T.c
+TARGET_1_OBJ_63 = ./src/pauxil/HPL_dlaswp05T.o
+TARGET_1_FLAG_63 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_64 = ./src/pauxil/HPL_dlaswp06N.c
+TARGET_1_OBJ_64 = ./src/pauxil/HPL_dlaswp06N.o
+TARGET_1_FLAG_64 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_65 = ./src/pauxil/HPL_dlaswp06T.c
+TARGET_1_OBJ_65 = ./src/pauxil/HPL_dlaswp06T.o
+TARGET_1_FLAG_65 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_66 = ./src/pauxil/HPL_pwarn.c
+TARGET_1_OBJ_66 = ./src/pauxil/HPL_pwarn.o
+TARGET_1_FLAG_66 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_67 = ./src/pauxil/HPL_pabort.c
+TARGET_1_OBJ_67 = ./src/pauxil/HPL_pabort.o
+TARGET_1_FLAG_67 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_68 = ./src/pauxil/HPL_pdlaprnt.c
+TARGET_1_OBJ_68 = ./src/pauxil/HPL_pdlaprnt.o
+TARGET_1_FLAG_68 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_69 = ./src/pauxil/HPL_pdlamch.c
+TARGET_1_OBJ_69 = ./src/pauxil/HPL_pdlamch.o
+TARGET_1_FLAG_69 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_70 = ./src/pauxil/HPL_pdlange.c
+TARGET_1_OBJ_70 = ./src/pauxil/HPL_pdlange.o
+TARGET_1_FLAG_70 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_71 = ./src/pfact/HPL_dlocmax.c
+TARGET_1_OBJ_71 = ./src/pfact/HPL_dlocmax.o
+TARGET_1_FLAG_71 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_72 = ./src/pfact/HPL_dlocswpN.c
+TARGET_1_OBJ_72 = ./src/pfact/HPL_dlocswpN.o
+TARGET_1_FLAG_72 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_73 = ./src/pfact/HPL_dlocswpT.c
+TARGET_1_OBJ_73 = ./src/pfact/HPL_dlocswpT.o
+TARGET_1_FLAG_73 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_74 = ./src/pfact/HPL_pdmxswp.c
+TARGET_1_OBJ_74 = ./src/pfact/HPL_pdmxswp.o
+TARGET_1_FLAG_74 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_75 = ./src/pfact/HPL_pdpancrN.c
+TARGET_1_OBJ_75 = ./src/pfact/HPL_pdpancrN.o
+TARGET_1_FLAG_75 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_76 = ./src/pfact/HPL_pdpancrT.c
+TARGET_1_OBJ_76 = ./src/pfact/HPL_pdpancrT.o
+TARGET_1_FLAG_76 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_77 = ./src/pfact/HPL_pdpanllN.c
+TARGET_1_OBJ_77 = ./src/pfact/HPL_pdpanllN.o
+TARGET_1_FLAG_77 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_78 = ./src/pfact/HPL_pdpanllT.c
+TARGET_1_OBJ_78 = ./src/pfact/HPL_pdpanllT.o
+TARGET_1_FLAG_78 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_79 = ./src/pfact/HPL_pdpanrlN.c
+TARGET_1_OBJ_79 = ./src/pfact/HPL_pdpanrlN.o
+TARGET_1_FLAG_79 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_80 = ./src/pfact/HPL_pdpanrlT.c
+TARGET_1_OBJ_80 = ./src/pfact/HPL_pdpanrlT.o
+TARGET_1_FLAG_80 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_81 = ./src/pfact/HPL_pdrpanllN.c
+TARGET_1_OBJ_81 = ./src/pfact/HPL_pdrpanllN.o
+TARGET_1_FLAG_81 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_82 = ./src/pfact/HPL_pdrpanllT.c
+TARGET_1_OBJ_82 = ./src/pfact/HPL_pdrpanllT.o
+TARGET_1_FLAG_82 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_83 = ./src/pfact/HPL_pdrpancrN.c
+TARGET_1_OBJ_83 = ./src/pfact/HPL_pdrpancrN.o
+TARGET_1_FLAG_83 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_84 = ./src/pfact/HPL_pdrpancrT.c
+TARGET_1_OBJ_84 = ./src/pfact/HPL_pdrpancrT.o
+TARGET_1_FLAG_84 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_85 = ./src/pfact/HPL_pdrpanrlN.c
+TARGET_1_OBJ_85 = ./src/pfact/HPL_pdrpanrlN.o
+TARGET_1_FLAG_85 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_86 = ./src/pfact/HPL_pdrpanrlT.c
+TARGET_1_OBJ_86 = ./src/pfact/HPL_pdrpanrlT.o
+TARGET_1_FLAG_86 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_87 = ./src/pfact/HPL_pdfact.c
+TARGET_1_OBJ_87 = ./src/pfact/HPL_pdfact.o
+TARGET_1_FLAG_87 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_88 = ./src/pgesv/HPL_pipid.c
+TARGET_1_OBJ_88 = ./src/pgesv/HPL_pipid.o
+TARGET_1_FLAG_88 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_89 = ./src/pgesv/HPL_plindx0.c
+TARGET_1_OBJ_89 = ./src/pgesv/HPL_plindx0.o
+TARGET_1_FLAG_89 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_90 = ./src/pgesv/HPL_pdlaswp00N.c
+TARGET_1_OBJ_90 = ./src/pgesv/HPL_pdlaswp00N.o
+TARGET_1_FLAG_90 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_91 = ./src/pgesv/HPL_pdlaswp00T.c
+TARGET_1_OBJ_91 = ./src/pgesv/HPL_pdlaswp00T.o
+TARGET_1_FLAG_91 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_92 = ./src/pgesv/HPL_perm.c
+TARGET_1_OBJ_92 = ./src/pgesv/HPL_perm.o
+TARGET_1_FLAG_92 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_93 = ./src/pgesv/HPL_logsort.c
+TARGET_1_OBJ_93 = ./src/pgesv/HPL_logsort.o
+TARGET_1_FLAG_93 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_94 = ./src/pgesv/HPL_plindx10.c
+TARGET_1_OBJ_94 = ./src/pgesv/HPL_plindx10.o
+TARGET_1_FLAG_94 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_95 = ./src/pgesv/HPL_plindx1.c
+TARGET_1_OBJ_95 = ./src/pgesv/HPL_plindx1.o
+TARGET_1_FLAG_95 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_96 = ./src/pgesv/HPL_spreadN.c
+TARGET_1_OBJ_96 = ./src/pgesv/HPL_spreadN.o
+TARGET_1_FLAG_96 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_97 = ./src/pgesv/HPL_spreadT.c
+TARGET_1_OBJ_97 = ./src/pgesv/HPL_spreadT.o
+TARGET_1_FLAG_97 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_98 = ./src/pgesv/HPL_rollN.c
+TARGET_1_OBJ_98 = ./src/pgesv/HPL_rollN.o
+TARGET_1_FLAG_98 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_99 = ./src/pgesv/HPL_rollT.c
+TARGET_1_OBJ_99 = ./src/pgesv/HPL_rollT.o
+TARGET_1_FLAG_99 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_100 = ./src/pgesv/HPL_equil.c
+TARGET_1_OBJ_100 = ./src/pgesv/HPL_equil.o
+TARGET_1_FLAG_100 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_101 = ./src/pgesv/HPL_pdlaswp01N.c
+TARGET_1_OBJ_101 = ./src/pgesv/HPL_pdlaswp01N.o
+TARGET_1_FLAG_101 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_102 = ./src/pgesv/HPL_pdlaswp01T.c
+TARGET_1_OBJ_102 = ./src/pgesv/HPL_pdlaswp01T.o
+TARGET_1_FLAG_102 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_103 = ./src/pgesv/HPL_pdupdateNN.c
+TARGET_1_OBJ_103 = ./src/pgesv/HPL_pdupdateNN.o
+TARGET_1_FLAG_103 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_104 = ./src/pgesv/HPL_pdupdateNT.c
+TARGET_1_OBJ_104 = ./src/pgesv/HPL_pdupdateNT.o
+TARGET_1_FLAG_104 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_105 = ./src/pgesv/HPL_pdupdateTN.c
+TARGET_1_OBJ_105 = ./src/pgesv/HPL_pdupdateTN.o
+TARGET_1_FLAG_105 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_106 = ./src/pgesv/HPL_pdupdateTT.c
+TARGET_1_OBJ_106 = ./src/pgesv/HPL_pdupdateTT.o
+TARGET_1_FLAG_106 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_107 = ./src/pgesv/HPL_pdtrsv.c
+TARGET_1_OBJ_107 = ./src/pgesv/HPL_pdtrsv.o
+TARGET_1_FLAG_107 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_108 = ./src/pgesv/HPL_pdgesv0.c
+TARGET_1_OBJ_108 = ./src/pgesv/HPL_pdgesv0.o
+TARGET_1_FLAG_108 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_109 = ./src/pgesv/HPL_pdgesvK1.c
+TARGET_1_OBJ_109 = ./src/pgesv/HPL_pdgesvK1.o
+TARGET_1_FLAG_109 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_110 = ./src/pgesv/HPL_pdgesvK2.c
+TARGET_1_OBJ_110 = ./src/pgesv/HPL_pdgesvK2.o
+TARGET_1_FLAG_110 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_111 = ./src/pgesv/HPL_pdgesv.c
+TARGET_1_OBJ_111 = ./src/pgesv/HPL_pdgesv.o
+TARGET_1_FLAG_111 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_112 = ./testing/matgen/HPL_dmatgen.c
+TARGET_1_OBJ_112 = ./testing/matgen/HPL_dmatgen.o
+TARGET_1_FLAG_112 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_113 = ./testing/matgen/HPL_ladd.c
+TARGET_1_OBJ_113 = ./testing/matgen/HPL_ladd.o
+TARGET_1_FLAG_113 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_114 = ./testing/matgen/HPL_lmul.c
+TARGET_1_OBJ_114 = ./testing/matgen/HPL_lmul.o
+TARGET_1_FLAG_114 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_115 = ./testing/matgen/HPL_xjumpm.c
+TARGET_1_OBJ_115 = ./testing/matgen/HPL_xjumpm.o
+TARGET_1_FLAG_115 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_116 = ./testing/matgen/HPL_jumpit.c
+TARGET_1_OBJ_116 = ./testing/matgen/HPL_jumpit.o
+TARGET_1_FLAG_116 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_117 = ./testing/matgen/HPL_rand.c
+TARGET_1_OBJ_117 = ./testing/matgen/HPL_rand.o
+TARGET_1_FLAG_117 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_118 = ./testing/matgen/HPL_setran.c
+TARGET_1_OBJ_118 = ./testing/matgen/HPL_setran.o
+TARGET_1_FLAG_118 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_119 = ./testing/timer/HPL_timer.c
+TARGET_1_OBJ_119 = ./testing/timer/HPL_timer.o
+TARGET_1_FLAG_119 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_120 = ./testing/timer/HPL_timer_cputime.c
+TARGET_1_OBJ_120 = ./testing/timer/HPL_timer_cputime.o
+TARGET_1_FLAG_120 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_121 = ./testing/timer/HPL_timer_walltime.c
+TARGET_1_OBJ_121 = ./testing/timer/HPL_timer_walltime.o
+TARGET_1_FLAG_121 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_122 = ./testing/pmatgen/HPL_pdmatgen.c
+TARGET_1_OBJ_122 = ./testing/pmatgen/HPL_pdmatgen.o
+TARGET_1_FLAG_122 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_123 = ./testing/ptimer/HPL_ptimer.c
+TARGET_1_OBJ_123 = ./testing/ptimer/HPL_ptimer.o
+TARGET_1_FLAG_123 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_124 = ./testing/ptimer/HPL_ptimer_cputime.c
+TARGET_1_OBJ_124 = ./testing/ptimer/HPL_ptimer_cputime.o
+TARGET_1_FLAG_124 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_125 = ./testing/ptimer/HPL_ptimer_walltime.c
+TARGET_1_OBJ_125 = ./testing/ptimer/HPL_ptimer_walltime.o
+TARGET_1_FLAG_125 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_2_SRC_0 = ./src/cuda/cuda_dgemm.cpp.dp.cpp
+TARGET_2_OBJ_0 = ./src/cuda/cuda_dgemm.cpp.dp.o
+TARGET_2_FLAG_0 = -O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS}
+
+TARGET_2_SRC_1 = ./src/cuda/cuda_dgemm.cpp.dp.cpp
+TARGET_2_OBJ_1 = ./src/cuda/cuda_dgemm.cpp.dp.o
+TARGET_2_FLAG_1 = -O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS}
+
+TARGET_0 := /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl
+TARGET_1 := /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a
+TARGET_2 := libdgemm.so.1.0.1
+
+TARGET :=  ${TARGET_0} ${TARGET_1} ${TARGET_2}
+
+.PHONY:all clean
+OBJS_0 :=  ${TARGET_0_OBJ_0} ${TARGET_0_OBJ_1} ${TARGET_0_OBJ_2} ${TARGET_0_OBJ_3} ${TARGET_0_OBJ_4} ${TARGET_0_OBJ_5} ${TARGET_0_OBJ_6} ${TARGET_0_OBJ_7} ${TARGET_0_OBJ_8} ${TARGET_0_OBJ_9} ${TARGET_0_OBJ_10} ${TARGET_0_OBJ_11}
+OBJS_1 :=  ${TARGET_1_OBJ_0} ${TARGET_1_OBJ_1} ${TARGET_1_OBJ_2} ${TARGET_1_OBJ_3} ${TARGET_1_OBJ_4} ${TARGET_1_OBJ_5} ${TARGET_1_OBJ_6} ${TARGET_1_OBJ_7} ${TARGET_1_OBJ_8} ${TARGET_1_OBJ_9} ${TARGET_1_OBJ_10} ${TARGET_1_OBJ_11} ${TARGET_1_OBJ_12} ${TARGET_1_OBJ_13} ${TARGET_1_OBJ_14} ${TARGET_1_OBJ_15} ${TARGET_1_OBJ_16} ${TARGET_1_OBJ_17} ${TARGET_1_OBJ_18} ${TARGET_1_OBJ_19} ${TARGET_1_OBJ_20} ${TARGET_1_OBJ_21} ${TARGET_1_OBJ_22} ${TARGET_1_OBJ_23} ${TARGET_1_OBJ_24} ${TARGET_1_OBJ_25} ${TARGET_1_OBJ_26} ${TARGET_1_OBJ_27} ${TARGET_1_OBJ_28} ${TARGET_1_OBJ_29} ${TARGET_1_OBJ_30} ${TARGET_1_OBJ_31} ${TARGET_1_OBJ_32} ${TARGET_1_OBJ_33} ${TARGET_1_OBJ_34} ${TARGET_1_OBJ_35} ${TARGET_1_OBJ_36} ${TARGET_1_OBJ_37} ${TARGET_1_OBJ_38} ${TARGET_1_OBJ_39} ${TARGET_1_OBJ_40} ${TARGET_1_OBJ_41} ${TARGET_1_OBJ_42} ${TARGET_1_OBJ_43} ${TARGET_1_OBJ_44} ${TARGET_1_OBJ_45} ${TARGET_1_OBJ_46} ${TARGET_1_OBJ_47} ${TARGET_1_OBJ_48} ${TARGET_1_OBJ_49} ${TARGET_1_OBJ_50} ${TARGET_1_OBJ_51} ${TARGET_1_OBJ_52} ${TARGET_1_OBJ_53} ${TARGET_1_OBJ_54} ${TARGET_1_OBJ_55} ${TARGET_1_OBJ_56} ${TARGET_1_OBJ_57} ${TARGET_1_OBJ_58} ${TARGET_1_OBJ_59} ${TARGET_1_OBJ_60} ${TARGET_1_OBJ_61} ${TARGET_1_OBJ_62} ${TARGET_1_OBJ_63} ${TARGET_1_OBJ_64} ${TARGET_1_OBJ_65} ${TARGET_1_OBJ_66} ${TARGET_1_OBJ_67} ${TARGET_1_OBJ_68} ${TARGET_1_OBJ_69} ${TARGET_1_OBJ_70} ${TARGET_1_OBJ_71} ${TARGET_1_OBJ_72} ${TARGET_1_OBJ_73} ${TARGET_1_OBJ_74} ${TARGET_1_OBJ_75} ${TARGET_1_OBJ_76} ${TARGET_1_OBJ_77} ${TARGET_1_OBJ_78} ${TARGET_1_OBJ_79} ${TARGET_1_OBJ_80} ${TARGET_1_OBJ_81} ${TARGET_1_OBJ_82} ${TARGET_1_OBJ_83} ${TARGET_1_OBJ_84} ${TARGET_1_OBJ_85} ${TARGET_1_OBJ_86} ${TARGET_1_OBJ_87} ${TARGET_1_OBJ_88} ${TARGET_1_OBJ_89} ${TARGET_1_OBJ_90} ${TARGET_1_OBJ_91} ${TARGET_1_OBJ_92} ${TARGET_1_OBJ_93} ${TARGET_1_OBJ_94} ${TARGET_1_OBJ_95} ${TARGET_1_OBJ_96} ${TARGET_1_OBJ_97} ${TARGET_1_OBJ_98} ${TARGET_1_OBJ_99} ${TARGET_1_OBJ_100} ${TARGET_1_OBJ_101} ${TARGET_1_OBJ_102} ${TARGET_1_OBJ_103} ${TARGET_1_OBJ_104} ${TARGET_1_OBJ_105} ${TARGET_1_OBJ_106} ${TARGET_1_OBJ_107} ${TARGET_1_OBJ_108} ${TARGET_1_OBJ_109} ${TARGET_1_OBJ_110} ${TARGET_1_OBJ_111} ${TARGET_1_OBJ_112} ${TARGET_1_OBJ_113} ${TARGET_1_OBJ_114} ${TARGET_1_OBJ_115} ${TARGET_1_OBJ_116} ${TARGET_1_OBJ_117} ${TARGET_1_OBJ_118} ${TARGET_1_OBJ_119} ${TARGET_1_OBJ_120} ${TARGET_1_OBJ_121} ${TARGET_1_OBJ_122} ${TARGET_1_OBJ_123} ${TARGET_1_OBJ_124} ${TARGET_1_OBJ_125}
+OBJS_2 :=  ${TARGET_2_OBJ_0} ${TARGET_2_OBJ_1}
+all: $(TARGET)
+$(TARGET_0): $(OBJS_0)
+	$(CC) -fsycl -o $@ $^ $(LIB) -qmkl
+
+$(TARGET_0_OBJ_0):$(TARGET_0_SRC_0)
+	cc -c ${TARGET_0_SRC_0} -o ${TARGET_0_OBJ_0} $(TARGET_0_FLAG_0)
+
+$(TARGET_0_OBJ_1):$(TARGET_0_SRC_1)
+	cc -c ${TARGET_0_SRC_1} -o ${TARGET_0_OBJ_1} $(TARGET_0_FLAG_1)
+
+$(TARGET_0_OBJ_2):$(TARGET_0_SRC_2)
+	cc -c ${TARGET_0_SRC_2} -o ${TARGET_0_OBJ_2} $(TARGET_0_FLAG_2)
+
+$(TARGET_0_OBJ_3):$(TARGET_0_SRC_3)
+	cc -c ${TARGET_0_SRC_3} -o ${TARGET_0_OBJ_3} $(TARGET_0_FLAG_3)
+
+$(TARGET_0_OBJ_4):$(TARGET_0_SRC_4)
+	cc -c ${TARGET_0_SRC_4} -o ${TARGET_0_OBJ_4} $(TARGET_0_FLAG_4)
+
+$(TARGET_0_OBJ_5):$(TARGET_0_SRC_5)
+	cc -c ${TARGET_0_SRC_5} -o ${TARGET_0_OBJ_5} $(TARGET_0_FLAG_5)
+
+$(TARGET_0_OBJ_6):$(TARGET_0_SRC_6)
+	cc -c ${TARGET_0_SRC_6} -o ${TARGET_0_OBJ_6} $(TARGET_0_FLAG_6)
+
+$(TARGET_0_OBJ_7):$(TARGET_0_SRC_7)
+	cc -c ${TARGET_0_SRC_7} -o ${TARGET_0_OBJ_7} $(TARGET_0_FLAG_7)
+
+$(TARGET_0_OBJ_8):$(TARGET_0_SRC_8)
+	cc -c ${TARGET_0_SRC_8} -o ${TARGET_0_OBJ_8} $(TARGET_0_FLAG_8)
+
+$(TARGET_0_OBJ_9):$(TARGET_0_SRC_9)
+	cc -c ${TARGET_0_SRC_9} -o ${TARGET_0_OBJ_9} $(TARGET_0_FLAG_9)
+
+$(TARGET_0_OBJ_10):$(TARGET_0_SRC_10)
+	cc -c ${TARGET_0_SRC_10} -o ${TARGET_0_OBJ_10} $(TARGET_0_FLAG_10)
+
+$(TARGET_0_OBJ_11):$(TARGET_0_SRC_11)
+	cc -c ${TARGET_0_SRC_11} -o ${TARGET_0_OBJ_11} $(TARGET_0_FLAG_11)
+
+$(TARGET_1): $(OBJS_1)
+	ar -r $@ $^ $(LIB) -qmkl
+
+$(TARGET_1_OBJ_0):$(TARGET_1_SRC_0)
+	cc -c ${TARGET_1_SRC_0} -o ${TARGET_1_OBJ_0} $(TARGET_1_FLAG_0)
+
+$(TARGET_1_OBJ_1):$(TARGET_1_SRC_1)
+	cc -c ${TARGET_1_SRC_1} -o ${TARGET_1_OBJ_1} $(TARGET_1_FLAG_1)
+
+$(TARGET_1_OBJ_2):$(TARGET_1_SRC_2)
+	cc -c ${TARGET_1_SRC_2} -o ${TARGET_1_OBJ_2} $(TARGET_1_FLAG_2)
+
+$(TARGET_1_OBJ_3):$(TARGET_1_SRC_3)
+	cc -c ${TARGET_1_SRC_3} -o ${TARGET_1_OBJ_3} $(TARGET_1_FLAG_3)
+
+$(TARGET_1_OBJ_4):$(TARGET_1_SRC_4)
+	cc -c ${TARGET_1_SRC_4} -o ${TARGET_1_OBJ_4} $(TARGET_1_FLAG_4)
+
+$(TARGET_1_OBJ_5):$(TARGET_1_SRC_5)
+	cc -c ${TARGET_1_SRC_5} -o ${TARGET_1_OBJ_5} $(TARGET_1_FLAG_5)
+
+$(TARGET_1_OBJ_6):$(TARGET_1_SRC_6)
+	cc -c ${TARGET_1_SRC_6} -o ${TARGET_1_OBJ_6} $(TARGET_1_FLAG_6)
+
+$(TARGET_1_OBJ_7):$(TARGET_1_SRC_7)
+	cc -c ${TARGET_1_SRC_7} -o ${TARGET_1_OBJ_7} $(TARGET_1_FLAG_7)
+
+$(TARGET_1_OBJ_8):$(TARGET_1_SRC_8)
+	cc -c ${TARGET_1_SRC_8} -o ${TARGET_1_OBJ_8} $(TARGET_1_FLAG_8)
+
+$(TARGET_1_OBJ_9):$(TARGET_1_SRC_9)
+	cc -c ${TARGET_1_SRC_9} -o ${TARGET_1_OBJ_9} $(TARGET_1_FLAG_9)
+
+$(TARGET_1_OBJ_10):$(TARGET_1_SRC_10)
+	cc -c ${TARGET_1_SRC_10} -o ${TARGET_1_OBJ_10} $(TARGET_1_FLAG_10)
+
+$(TARGET_1_OBJ_11):$(TARGET_1_SRC_11)
+	cc -c ${TARGET_1_SRC_11} -o ${TARGET_1_OBJ_11} $(TARGET_1_FLAG_11)
+
+$(TARGET_1_OBJ_12):$(TARGET_1_SRC_12)
+	cc -c ${TARGET_1_SRC_12} -o ${TARGET_1_OBJ_12} $(TARGET_1_FLAG_12)
+
+$(TARGET_1_OBJ_13):$(TARGET_1_SRC_13)
+	cc -c ${TARGET_1_SRC_13} -o ${TARGET_1_OBJ_13} $(TARGET_1_FLAG_13)
+
+$(TARGET_1_OBJ_14):$(TARGET_1_SRC_14)
+	cc -c ${TARGET_1_SRC_14} -o ${TARGET_1_OBJ_14} $(TARGET_1_FLAG_14)
+
+$(TARGET_1_OBJ_15):$(TARGET_1_SRC_15)
+	cc -c ${TARGET_1_SRC_15} -o ${TARGET_1_OBJ_15} $(TARGET_1_FLAG_15)
+
+$(TARGET_1_OBJ_16):$(TARGET_1_SRC_16)
+	cc -c ${TARGET_1_SRC_16} -o ${TARGET_1_OBJ_16} $(TARGET_1_FLAG_16)
+
+$(TARGET_1_OBJ_17):$(TARGET_1_SRC_17)
+	cc -c ${TARGET_1_SRC_17} -o ${TARGET_1_OBJ_17} $(TARGET_1_FLAG_17)
+
+$(TARGET_1_OBJ_18):$(TARGET_1_SRC_18)
+	cc -c ${TARGET_1_SRC_18} -o ${TARGET_1_OBJ_18} $(TARGET_1_FLAG_18)
+
+$(TARGET_1_OBJ_19):$(TARGET_1_SRC_19)
+	cc -c ${TARGET_1_SRC_19} -o ${TARGET_1_OBJ_19} $(TARGET_1_FLAG_19)
+
+$(TARGET_1_OBJ_20):$(TARGET_1_SRC_20)
+	cc -c ${TARGET_1_SRC_20} -o ${TARGET_1_OBJ_20} $(TARGET_1_FLAG_20)
+
+$(TARGET_1_OBJ_21):$(TARGET_1_SRC_21)
+	cc -c ${TARGET_1_SRC_21} -o ${TARGET_1_OBJ_21} $(TARGET_1_FLAG_21)
+
+$(TARGET_1_OBJ_22):$(TARGET_1_SRC_22)
+	cc -c ${TARGET_1_SRC_22} -o ${TARGET_1_OBJ_22} $(TARGET_1_FLAG_22)
+
+$(TARGET_1_OBJ_23):$(TARGET_1_SRC_23)
+	cc -c ${TARGET_1_SRC_23} -o ${TARGET_1_OBJ_23} $(TARGET_1_FLAG_23)
+
+$(TARGET_1_OBJ_24):$(TARGET_1_SRC_24)
+	cc -c ${TARGET_1_SRC_24} -o ${TARGET_1_OBJ_24} $(TARGET_1_FLAG_24)
+
+$(TARGET_1_OBJ_25):$(TARGET_1_SRC_25)
+	cc -c ${TARGET_1_SRC_25} -o ${TARGET_1_OBJ_25} $(TARGET_1_FLAG_25)
+
+$(TARGET_1_OBJ_26):$(TARGET_1_SRC_26)
+	cc -c ${TARGET_1_SRC_26} -o ${TARGET_1_OBJ_26} $(TARGET_1_FLAG_26)
+
+$(TARGET_1_OBJ_27):$(TARGET_1_SRC_27)
+	cc -c ${TARGET_1_SRC_27} -o ${TARGET_1_OBJ_27} $(TARGET_1_FLAG_27)
+
+$(TARGET_1_OBJ_28):$(TARGET_1_SRC_28)
+	cc -c ${TARGET_1_SRC_28} -o ${TARGET_1_OBJ_28} $(TARGET_1_FLAG_28)
+
+$(TARGET_1_OBJ_29):$(TARGET_1_SRC_29)
+	cc -c ${TARGET_1_SRC_29} -o ${TARGET_1_OBJ_29} $(TARGET_1_FLAG_29)
+
+$(TARGET_1_OBJ_30):$(TARGET_1_SRC_30)
+	cc -c ${TARGET_1_SRC_30} -o ${TARGET_1_OBJ_30} $(TARGET_1_FLAG_30)
+
+$(TARGET_1_OBJ_31):$(TARGET_1_SRC_31)
+	cc -c ${TARGET_1_SRC_31} -o ${TARGET_1_OBJ_31} $(TARGET_1_FLAG_31)
+
+$(TARGET_1_OBJ_32):$(TARGET_1_SRC_32)
+	cc -c ${TARGET_1_SRC_32} -o ${TARGET_1_OBJ_32} $(TARGET_1_FLAG_32)
+
+$(TARGET_1_OBJ_33):$(TARGET_1_SRC_33)
+	cc -c ${TARGET_1_SRC_33} -o ${TARGET_1_OBJ_33} $(TARGET_1_FLAG_33)
+
+$(TARGET_1_OBJ_34):$(TARGET_1_SRC_34)
+	cc -c ${TARGET_1_SRC_34} -o ${TARGET_1_OBJ_34} $(TARGET_1_FLAG_34)
+
+$(TARGET_1_OBJ_35):$(TARGET_1_SRC_35)
+	cc -c ${TARGET_1_SRC_35} -o ${TARGET_1_OBJ_35} $(TARGET_1_FLAG_35)
+
+$(TARGET_1_OBJ_36):$(TARGET_1_SRC_36)
+	cc -c ${TARGET_1_SRC_36} -o ${TARGET_1_OBJ_36} $(TARGET_1_FLAG_36)
+
+$(TARGET_1_OBJ_37):$(TARGET_1_SRC_37)
+	cc -c ${TARGET_1_SRC_37} -o ${TARGET_1_OBJ_37} $(TARGET_1_FLAG_37)
+
+$(TARGET_1_OBJ_38):$(TARGET_1_SRC_38)
+	cc -c ${TARGET_1_SRC_38} -o ${TARGET_1_OBJ_38} $(TARGET_1_FLAG_38)
+
+$(TARGET_1_OBJ_39):$(TARGET_1_SRC_39)
+	cc -c ${TARGET_1_SRC_39} -o ${TARGET_1_OBJ_39} $(TARGET_1_FLAG_39)
+
+$(TARGET_1_OBJ_40):$(TARGET_1_SRC_40)
+	cc -c ${TARGET_1_SRC_40} -o ${TARGET_1_OBJ_40} $(TARGET_1_FLAG_40)
+
+$(TARGET_1_OBJ_41):$(TARGET_1_SRC_41)
+	cc -c ${TARGET_1_SRC_41} -o ${TARGET_1_OBJ_41} $(TARGET_1_FLAG_41)
+
+$(TARGET_1_OBJ_42):$(TARGET_1_SRC_42)
+	cc -c ${TARGET_1_SRC_42} -o ${TARGET_1_OBJ_42} $(TARGET_1_FLAG_42)
+
+$(TARGET_1_OBJ_43):$(TARGET_1_SRC_43)
+	cc -c ${TARGET_1_SRC_43} -o ${TARGET_1_OBJ_43} $(TARGET_1_FLAG_43)
+
+$(TARGET_1_OBJ_44):$(TARGET_1_SRC_44)
+	cc -c ${TARGET_1_SRC_44} -o ${TARGET_1_OBJ_44} $(TARGET_1_FLAG_44)
+
+$(TARGET_1_OBJ_45):$(TARGET_1_SRC_45)
+	cc -c ${TARGET_1_SRC_45} -o ${TARGET_1_OBJ_45} $(TARGET_1_FLAG_45)
+
+$(TARGET_1_OBJ_46):$(TARGET_1_SRC_46)
+	cc -c ${TARGET_1_SRC_46} -o ${TARGET_1_OBJ_46} $(TARGET_1_FLAG_46)
+
+$(TARGET_1_OBJ_47):$(TARGET_1_SRC_47)
+	cc -c ${TARGET_1_SRC_47} -o ${TARGET_1_OBJ_47} $(TARGET_1_FLAG_47)
+
+$(TARGET_1_OBJ_48):$(TARGET_1_SRC_48)
+	cc -c ${TARGET_1_SRC_48} -o ${TARGET_1_OBJ_48} $(TARGET_1_FLAG_48)
+
+$(TARGET_1_OBJ_49):$(TARGET_1_SRC_49)
+	cc -c ${TARGET_1_SRC_49} -o ${TARGET_1_OBJ_49} $(TARGET_1_FLAG_49)
+
+$(TARGET_1_OBJ_50):$(TARGET_1_SRC_50)
+	cc -c ${TARGET_1_SRC_50} -o ${TARGET_1_OBJ_50} $(TARGET_1_FLAG_50)
+
+$(TARGET_1_OBJ_51):$(TARGET_1_SRC_51)
+	cc -c ${TARGET_1_SRC_51} -o ${TARGET_1_OBJ_51} $(TARGET_1_FLAG_51)
+
+$(TARGET_1_OBJ_52):$(TARGET_1_SRC_52)
+	cc -c ${TARGET_1_SRC_52} -o ${TARGET_1_OBJ_52} $(TARGET_1_FLAG_52)
+
+$(TARGET_1_OBJ_53):$(TARGET_1_SRC_53)
+	cc -c ${TARGET_1_SRC_53} -o ${TARGET_1_OBJ_53} $(TARGET_1_FLAG_53)
+
+$(TARGET_1_OBJ_54):$(TARGET_1_SRC_54)
+	cc -c ${TARGET_1_SRC_54} -o ${TARGET_1_OBJ_54} $(TARGET_1_FLAG_54)
+
+$(TARGET_1_OBJ_55):$(TARGET_1_SRC_55)
+	cc -c ${TARGET_1_SRC_55} -o ${TARGET_1_OBJ_55} $(TARGET_1_FLAG_55)
+
+$(TARGET_1_OBJ_56):$(TARGET_1_SRC_56)
+	cc -c ${TARGET_1_SRC_56} -o ${TARGET_1_OBJ_56} $(TARGET_1_FLAG_56)
+
+$(TARGET_1_OBJ_57):$(TARGET_1_SRC_57)
+	cc -c ${TARGET_1_SRC_57} -o ${TARGET_1_OBJ_57} $(TARGET_1_FLAG_57)
+
+$(TARGET_1_OBJ_58):$(TARGET_1_SRC_58)
+	cc -c ${TARGET_1_SRC_58} -o ${TARGET_1_OBJ_58} $(TARGET_1_FLAG_58)
+
+$(TARGET_1_OBJ_59):$(TARGET_1_SRC_59)
+	cc -c ${TARGET_1_SRC_59} -o ${TARGET_1_OBJ_59} $(TARGET_1_FLAG_59)
+
+$(TARGET_1_OBJ_60):$(TARGET_1_SRC_60)
+	cc -c ${TARGET_1_SRC_60} -o ${TARGET_1_OBJ_60} $(TARGET_1_FLAG_60)
+
+$(TARGET_1_OBJ_61):$(TARGET_1_SRC_61)
+	cc -c ${TARGET_1_SRC_61} -o ${TARGET_1_OBJ_61} $(TARGET_1_FLAG_61)
+
+$(TARGET_1_OBJ_62):$(TARGET_1_SRC_62)
+	cc -c ${TARGET_1_SRC_62} -o ${TARGET_1_OBJ_62} $(TARGET_1_FLAG_62)
+
+$(TARGET_1_OBJ_63):$(TARGET_1_SRC_63)
+	cc -c ${TARGET_1_SRC_63} -o ${TARGET_1_OBJ_63} $(TARGET_1_FLAG_63)
+
+$(TARGET_1_OBJ_64):$(TARGET_1_SRC_64)
+	cc -c ${TARGET_1_SRC_64} -o ${TARGET_1_OBJ_64} $(TARGET_1_FLAG_64)
+
+$(TARGET_1_OBJ_65):$(TARGET_1_SRC_65)
+	cc -c ${TARGET_1_SRC_65} -o ${TARGET_1_OBJ_65} $(TARGET_1_FLAG_65)
+
+$(TARGET_1_OBJ_66):$(TARGET_1_SRC_66)
+	cc -c ${TARGET_1_SRC_66} -o ${TARGET_1_OBJ_66} $(TARGET_1_FLAG_66)
+
+$(TARGET_1_OBJ_67):$(TARGET_1_SRC_67)
+	cc -c ${TARGET_1_SRC_67} -o ${TARGET_1_OBJ_67} $(TARGET_1_FLAG_67)
+
+$(TARGET_1_OBJ_68):$(TARGET_1_SRC_68)
+	cc -c ${TARGET_1_SRC_68} -o ${TARGET_1_OBJ_68} $(TARGET_1_FLAG_68)
+
+$(TARGET_1_OBJ_69):$(TARGET_1_SRC_69)
+	cc -c ${TARGET_1_SRC_69} -o ${TARGET_1_OBJ_69} $(TARGET_1_FLAG_69)
+
+$(TARGET_1_OBJ_70):$(TARGET_1_SRC_70)
+	cc -c ${TARGET_1_SRC_70} -o ${TARGET_1_OBJ_70} $(TARGET_1_FLAG_70)
+
+$(TARGET_1_OBJ_71):$(TARGET_1_SRC_71)
+	cc -c ${TARGET_1_SRC_71} -o ${TARGET_1_OBJ_71} $(TARGET_1_FLAG_71)
+
+$(TARGET_1_OBJ_72):$(TARGET_1_SRC_72)
+	cc -c ${TARGET_1_SRC_72} -o ${TARGET_1_OBJ_72} $(TARGET_1_FLAG_72)
+
+$(TARGET_1_OBJ_73):$(TARGET_1_SRC_73)
+	cc -c ${TARGET_1_SRC_73} -o ${TARGET_1_OBJ_73} $(TARGET_1_FLAG_73)
+
+$(TARGET_1_OBJ_74):$(TARGET_1_SRC_74)
+	cc -c ${TARGET_1_SRC_74} -o ${TARGET_1_OBJ_74} $(TARGET_1_FLAG_74)
+
+$(TARGET_1_OBJ_75):$(TARGET_1_SRC_75)
+	cc -c ${TARGET_1_SRC_75} -o ${TARGET_1_OBJ_75} $(TARGET_1_FLAG_75)
+
+$(TARGET_1_OBJ_76):$(TARGET_1_SRC_76)
+	cc -c ${TARGET_1_SRC_76} -o ${TARGET_1_OBJ_76} $(TARGET_1_FLAG_76)
+
+$(TARGET_1_OBJ_77):$(TARGET_1_SRC_77)
+	cc -c ${TARGET_1_SRC_77} -o ${TARGET_1_OBJ_77} $(TARGET_1_FLAG_77)
+
+$(TARGET_1_OBJ_78):$(TARGET_1_SRC_78)
+	cc -c ${TARGET_1_SRC_78} -o ${TARGET_1_OBJ_78} $(TARGET_1_FLAG_78)
+
+$(TARGET_1_OBJ_79):$(TARGET_1_SRC_79)
+	cc -c ${TARGET_1_SRC_79} -o ${TARGET_1_OBJ_79} $(TARGET_1_FLAG_79)
+
+$(TARGET_1_OBJ_80):$(TARGET_1_SRC_80)
+	cc -c ${TARGET_1_SRC_80} -o ${TARGET_1_OBJ_80} $(TARGET_1_FLAG_80)
+
+$(TARGET_1_OBJ_81):$(TARGET_1_SRC_81)
+	cc -c ${TARGET_1_SRC_81} -o ${TARGET_1_OBJ_81} $(TARGET_1_FLAG_81)
+
+$(TARGET_1_OBJ_82):$(TARGET_1_SRC_82)
+	cc -c ${TARGET_1_SRC_82} -o ${TARGET_1_OBJ_82} $(TARGET_1_FLAG_82)
+
+$(TARGET_1_OBJ_83):$(TARGET_1_SRC_83)
+	cc -c ${TARGET_1_SRC_83} -o ${TARGET_1_OBJ_83} $(TARGET_1_FLAG_83)
+
+$(TARGET_1_OBJ_84):$(TARGET_1_SRC_84)
+	cc -c ${TARGET_1_SRC_84} -o ${TARGET_1_OBJ_84} $(TARGET_1_FLAG_84)
+
+$(TARGET_1_OBJ_85):$(TARGET_1_SRC_85)
+	cc -c ${TARGET_1_SRC_85} -o ${TARGET_1_OBJ_85} $(TARGET_1_FLAG_85)
+
+$(TARGET_1_OBJ_86):$(TARGET_1_SRC_86)
+	cc -c ${TARGET_1_SRC_86} -o ${TARGET_1_OBJ_86} $(TARGET_1_FLAG_86)
+
+$(TARGET_1_OBJ_87):$(TARGET_1_SRC_87)
+	cc -c ${TARGET_1_SRC_87} -o ${TARGET_1_OBJ_87} $(TARGET_1_FLAG_87)
+
+$(TARGET_1_OBJ_88):$(TARGET_1_SRC_88)
+	cc -c ${TARGET_1_SRC_88} -o ${TARGET_1_OBJ_88} $(TARGET_1_FLAG_88)
+
+$(TARGET_1_OBJ_89):$(TARGET_1_SRC_89)
+	cc -c ${TARGET_1_SRC_89} -o ${TARGET_1_OBJ_89} $(TARGET_1_FLAG_89)
+
+$(TARGET_1_OBJ_90):$(TARGET_1_SRC_90)
+	cc -c ${TARGET_1_SRC_90} -o ${TARGET_1_OBJ_90} $(TARGET_1_FLAG_90)
+
+$(TARGET_1_OBJ_91):$(TARGET_1_SRC_91)
+	cc -c ${TARGET_1_SRC_91} -o ${TARGET_1_OBJ_91} $(TARGET_1_FLAG_91)
+
+$(TARGET_1_OBJ_92):$(TARGET_1_SRC_92)
+	cc -c ${TARGET_1_SRC_92} -o ${TARGET_1_OBJ_92} $(TARGET_1_FLAG_92)
+
+$(TARGET_1_OBJ_93):$(TARGET_1_SRC_93)
+	cc -c ${TARGET_1_SRC_93} -o ${TARGET_1_OBJ_93} $(TARGET_1_FLAG_93)
+
+$(TARGET_1_OBJ_94):$(TARGET_1_SRC_94)
+	cc -c ${TARGET_1_SRC_94} -o ${TARGET_1_OBJ_94} $(TARGET_1_FLAG_94)
+
+$(TARGET_1_OBJ_95):$(TARGET_1_SRC_95)
+	cc -c ${TARGET_1_SRC_95} -o ${TARGET_1_OBJ_95} $(TARGET_1_FLAG_95)
+
+$(TARGET_1_OBJ_96):$(TARGET_1_SRC_96)
+	cc -c ${TARGET_1_SRC_96} -o ${TARGET_1_OBJ_96} $(TARGET_1_FLAG_96)
+
+$(TARGET_1_OBJ_97):$(TARGET_1_SRC_97)
+	cc -c ${TARGET_1_SRC_97} -o ${TARGET_1_OBJ_97} $(TARGET_1_FLAG_97)
+
+$(TARGET_1_OBJ_98):$(TARGET_1_SRC_98)
+	cc -c ${TARGET_1_SRC_98} -o ${TARGET_1_OBJ_98} $(TARGET_1_FLAG_98)
+
+$(TARGET_1_OBJ_99):$(TARGET_1_SRC_99)
+	cc -c ${TARGET_1_SRC_99} -o ${TARGET_1_OBJ_99} $(TARGET_1_FLAG_99)
+
+$(TARGET_1_OBJ_100):$(TARGET_1_SRC_100)
+	cc -c ${TARGET_1_SRC_100} -o ${TARGET_1_OBJ_100} $(TARGET_1_FLAG_100)
+
+$(TARGET_1_OBJ_101):$(TARGET_1_SRC_101)
+	cc -c ${TARGET_1_SRC_101} -o ${TARGET_1_OBJ_101} $(TARGET_1_FLAG_101)
+
+$(TARGET_1_OBJ_102):$(TARGET_1_SRC_102)
+	cc -c ${TARGET_1_SRC_102} -o ${TARGET_1_OBJ_102} $(TARGET_1_FLAG_102)
+
+$(TARGET_1_OBJ_103):$(TARGET_1_SRC_103)
+	cc -c ${TARGET_1_SRC_103} -o ${TARGET_1_OBJ_103} $(TARGET_1_FLAG_103)
+
+$(TARGET_1_OBJ_104):$(TARGET_1_SRC_104)
+	cc -c ${TARGET_1_SRC_104} -o ${TARGET_1_OBJ_104} $(TARGET_1_FLAG_104)
+
+$(TARGET_1_OBJ_105):$(TARGET_1_SRC_105)
+	cc -c ${TARGET_1_SRC_105} -o ${TARGET_1_OBJ_105} $(TARGET_1_FLAG_105)
+
+$(TARGET_1_OBJ_106):$(TARGET_1_SRC_106)
+	cc -c ${TARGET_1_SRC_106} -o ${TARGET_1_OBJ_106} $(TARGET_1_FLAG_106)
+
+$(TARGET_1_OBJ_107):$(TARGET_1_SRC_107)
+	cc -c ${TARGET_1_SRC_107} -o ${TARGET_1_OBJ_107} $(TARGET_1_FLAG_107)
+
+$(TARGET_1_OBJ_108):$(TARGET_1_SRC_108)
+	cc -c ${TARGET_1_SRC_108} -o ${TARGET_1_OBJ_108} $(TARGET_1_FLAG_108)
+
+$(TARGET_1_OBJ_109):$(TARGET_1_SRC_109)
+	cc -c ${TARGET_1_SRC_109} -o ${TARGET_1_OBJ_109} $(TARGET_1_FLAG_109)
+
+$(TARGET_1_OBJ_110):$(TARGET_1_SRC_110)
+	cc -c ${TARGET_1_SRC_110} -o ${TARGET_1_OBJ_110} $(TARGET_1_FLAG_110)
+
+$(TARGET_1_OBJ_111):$(TARGET_1_SRC_111)
+	cc -c ${TARGET_1_SRC_111} -o ${TARGET_1_OBJ_111} $(TARGET_1_FLAG_111)
+
+$(TARGET_1_OBJ_112):$(TARGET_1_SRC_112)
+	cc -c ${TARGET_1_SRC_112} -o ${TARGET_1_OBJ_112} $(TARGET_1_FLAG_112)
+
+$(TARGET_1_OBJ_113):$(TARGET_1_SRC_113)
+	cc -c ${TARGET_1_SRC_113} -o ${TARGET_1_OBJ_113} $(TARGET_1_FLAG_113)
+
+$(TARGET_1_OBJ_114):$(TARGET_1_SRC_114)
+	cc -c ${TARGET_1_SRC_114} -o ${TARGET_1_OBJ_114} $(TARGET_1_FLAG_114)
+
+$(TARGET_1_OBJ_115):$(TARGET_1_SRC_115)
+	cc -c ${TARGET_1_SRC_115} -o ${TARGET_1_OBJ_115} $(TARGET_1_FLAG_115)
+
+$(TARGET_1_OBJ_116):$(TARGET_1_SRC_116)
+	cc -c ${TARGET_1_SRC_116} -o ${TARGET_1_OBJ_116} $(TARGET_1_FLAG_116)
+
+$(TARGET_1_OBJ_117):$(TARGET_1_SRC_117)
+	cc -c ${TARGET_1_SRC_117} -o ${TARGET_1_OBJ_117} $(TARGET_1_FLAG_117)
+
+$(TARGET_1_OBJ_118):$(TARGET_1_SRC_118)
+	cc -c ${TARGET_1_SRC_118} -o ${TARGET_1_OBJ_118} $(TARGET_1_FLAG_118)
+
+$(TARGET_1_OBJ_119):$(TARGET_1_SRC_119)
+	cc -c ${TARGET_1_SRC_119} -o ${TARGET_1_OBJ_119} $(TARGET_1_FLAG_119)
+
+$(TARGET_1_OBJ_120):$(TARGET_1_SRC_120)
+	cc -c ${TARGET_1_SRC_120} -o ${TARGET_1_OBJ_120} $(TARGET_1_FLAG_120)
+
+$(TARGET_1_OBJ_121):$(TARGET_1_SRC_121)
+	cc -c ${TARGET_1_SRC_121} -o ${TARGET_1_OBJ_121} $(TARGET_1_FLAG_121)
+
+$(TARGET_1_OBJ_122):$(TARGET_1_SRC_122)
+	cc -c ${TARGET_1_SRC_122} -o ${TARGET_1_OBJ_122} $(TARGET_1_FLAG_122)
+
+$(TARGET_1_OBJ_123):$(TARGET_1_SRC_123)
+	cc -c ${TARGET_1_SRC_123} -o ${TARGET_1_OBJ_123} $(TARGET_1_FLAG_123)
+
+$(TARGET_1_OBJ_124):$(TARGET_1_SRC_124)
+	cc -c ${TARGET_1_SRC_124} -o ${TARGET_1_OBJ_124} $(TARGET_1_FLAG_124)
+
+$(TARGET_1_OBJ_125):$(TARGET_1_SRC_125)
+	cc -c ${TARGET_1_SRC_125} -o ${TARGET_1_OBJ_125} $(TARGET_1_FLAG_125)
+
+$(TARGET_2): $(OBJS_2)
+	$(CC) -fsycl -o $@ $^ $(LIB) -qmkl
+
+$(TARGET_2_OBJ_0):$(TARGET_2_SRC_0)
+	cc -c ${TARGET_2_SRC_0} -o ${TARGET_2_OBJ_0} $(TARGET_2_FLAG_0)
+
+$(TARGET_2_OBJ_1):$(TARGET_2_SRC_1)
+	cc -c ${TARGET_2_SRC_1} -o ${TARGET_2_OBJ_1} $(TARGET_2_FLAG_1)
+
+clean:
+	rm -f  ${OBJS_0} ${OBJS_1} ${OBJS_2} $(TARGET)
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/Makefile.dpct.patched b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/Makefile.dpct.patched
new file mode 100644
index 000000000..08159b6dd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/Makefile.dpct.patched
@@ -0,0 +1,1019 @@
+CC := icpx
+
+LD := $(CC)
+
+#DPCT2001:4: You can link with more library by add them here.
+LIB := -lmpi
+
+FLAGS := -fPIC
+
+ifeq ($(shell which $(CC)),)
+    $(error ERROR - $(CC) compiler not found)
+endif
+
+ROOT_DIR     := $(shell dirname $(shell which $(CC)))
+INCLUDE_SYCL := $(ROOT_DIR)/../include
+INCLUDE_CL   := $(ROOT_DIR)/../include/sycl
+
+TARGET_0_SRC_0 = ./testing/ptest/HPL_pddriver.c
+TARGET_0_OBJ_0 = ./testing/ptest/HPL_pddriver.o
+TARGET_0_FLAG_0 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_1 = ./testing/ptest/HPL_pdinfo.c
+TARGET_0_OBJ_1 = ./testing/ptest/HPL_pdinfo.o
+TARGET_0_FLAG_1 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_2 = ./testing/ptest/HPL_pdtest.c
+TARGET_0_OBJ_2 = ./testing/ptest/HPL_pdtest.o
+TARGET_0_FLAG_2 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_3 = ./testing/ptest/HPL_pddriver.c
+TARGET_0_OBJ_3 = ./testing/ptest/HPL_pddriver.o
+TARGET_0_FLAG_3 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_4 = ./testing/ptest/HPL_pdinfo.c
+TARGET_0_OBJ_4 = ./testing/ptest/HPL_pdinfo.o
+TARGET_0_FLAG_4 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_5 = ./testing/ptest/HPL_pdtest.c
+TARGET_0_OBJ_5 = ./testing/ptest/HPL_pdtest.o
+TARGET_0_FLAG_5 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_6 = ./testing/ptest/HPL_pddriver.c
+TARGET_0_OBJ_6 = ./testing/ptest/HPL_pddriver.o
+TARGET_0_FLAG_6 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_7 = ./testing/ptest/HPL_pdinfo.c
+TARGET_0_OBJ_7 = ./testing/ptest/HPL_pdinfo.o
+TARGET_0_FLAG_7 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_8 = ./testing/ptest/HPL_pdtest.c
+TARGET_0_OBJ_8 = ./testing/ptest/HPL_pdtest.o
+TARGET_0_FLAG_8 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_9 = ./testing/ptest/HPL_pddriver.c
+TARGET_0_OBJ_9 = ./testing/ptest/HPL_pddriver.o
+TARGET_0_FLAG_9 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_10 = ./testing/ptest/HPL_pdinfo.c
+TARGET_0_OBJ_10 = ./testing/ptest/HPL_pdinfo.o
+TARGET_0_FLAG_10 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_11 = ./testing/ptest/HPL_pdtest.c
+TARGET_0_OBJ_11 = ./testing/ptest/HPL_pdtest.o
+TARGET_0_FLAG_11 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_0 = ./src/auxil/HPL_dlacpy.c
+TARGET_1_OBJ_0 = ./src/auxil/HPL_dlacpy.o
+TARGET_1_FLAG_0 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_1 = ./src/auxil/HPL_dlatcpy.c
+TARGET_1_OBJ_1 = ./src/auxil/HPL_dlatcpy.o
+TARGET_1_FLAG_1 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_2 = ./src/auxil/HPL_fprintf.c
+TARGET_1_OBJ_2 = ./src/auxil/HPL_fprintf.o
+TARGET_1_FLAG_2 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_3 = ./src/auxil/HPL_warn.c
+TARGET_1_OBJ_3 = ./src/auxil/HPL_warn.o
+TARGET_1_FLAG_3 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_4 = ./src/auxil/HPL_abort.c
+TARGET_1_OBJ_4 = ./src/auxil/HPL_abort.o
+TARGET_1_FLAG_4 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_5 = ./src/auxil/HPL_dlaprnt.c
+TARGET_1_OBJ_5 = ./src/auxil/HPL_dlaprnt.o
+TARGET_1_FLAG_5 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_6 = ./src/auxil/HPL_dlange.c
+TARGET_1_OBJ_6 = ./src/auxil/HPL_dlange.o
+TARGET_1_FLAG_6 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_7 = ./src/auxil/HPL_dlamch.c
+TARGET_1_OBJ_7 = ./src/auxil/HPL_dlamch.o
+TARGET_1_FLAG_7 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -I ./include ${FLAGS}
+
+TARGET_1_SRC_8 = ./src/blas/HPL_dcopy.c
+TARGET_1_OBJ_8 = ./src/blas/HPL_dcopy.o
+TARGET_1_FLAG_8 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_9 = ./src/blas/HPL_daxpy.c
+TARGET_1_OBJ_9 = ./src/blas/HPL_daxpy.o
+TARGET_1_FLAG_9 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_10 = ./src/blas/HPL_dscal.c
+TARGET_1_OBJ_10 = ./src/blas/HPL_dscal.o
+TARGET_1_FLAG_10 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_11 = ./src/blas/HPL_idamax.c
+TARGET_1_OBJ_11 = ./src/blas/HPL_idamax.o
+TARGET_1_FLAG_11 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_12 = ./src/blas/HPL_dgemv.c
+TARGET_1_OBJ_12 = ./src/blas/HPL_dgemv.o
+TARGET_1_FLAG_12 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_13 = ./src/blas/HPL_dtrsv.c
+TARGET_1_OBJ_13 = ./src/blas/HPL_dtrsv.o
+TARGET_1_FLAG_13 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_14 = ./src/blas/HPL_dger.c
+TARGET_1_OBJ_14 = ./src/blas/HPL_dger.o
+TARGET_1_FLAG_14 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_15 = ./src/blas/HPL_dgemm.c
+TARGET_1_OBJ_15 = ./src/blas/HPL_dgemm.o
+TARGET_1_FLAG_15 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_16 = ./src/blas/HPL_dtrsm.c
+TARGET_1_OBJ_16 = ./src/blas/HPL_dtrsm.o
+TARGET_1_FLAG_16 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_17 = ./src/comm/HPL_1ring.c
+TARGET_1_OBJ_17 = ./src/comm/HPL_1ring.o
+TARGET_1_FLAG_17 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_18 = ./src/comm/HPL_1rinM.c
+TARGET_1_OBJ_18 = ./src/comm/HPL_1rinM.o
+TARGET_1_FLAG_18 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_19 = ./src/comm/HPL_2ring.c
+TARGET_1_OBJ_19 = ./src/comm/HPL_2ring.o
+TARGET_1_FLAG_19 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_20 = ./src/comm/HPL_2rinM.c
+TARGET_1_OBJ_20 = ./src/comm/HPL_2rinM.o
+TARGET_1_FLAG_20 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_21 = ./src/comm/HPL_blong.c
+TARGET_1_OBJ_21 = ./src/comm/HPL_blong.o
+TARGET_1_FLAG_21 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_22 = ./src/comm/HPL_blonM.c
+TARGET_1_OBJ_22 = ./src/comm/HPL_blonM.o
+TARGET_1_FLAG_22 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_23 = ./src/comm/HPL_packL.c
+TARGET_1_OBJ_23 = ./src/comm/HPL_packL.o
+TARGET_1_FLAG_23 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_24 = ./src/comm/HPL_copyL.c
+TARGET_1_OBJ_24 = ./src/comm/HPL_copyL.o
+TARGET_1_FLAG_24 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_25 = ./src/comm/HPL_binit.c
+TARGET_1_OBJ_25 = ./src/comm/HPL_binit.o
+TARGET_1_FLAG_25 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_26 = ./src/comm/HPL_bcast.c
+TARGET_1_OBJ_26 = ./src/comm/HPL_bcast.o
+TARGET_1_FLAG_26 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_27 = ./src/comm/HPL_bwait.c
+TARGET_1_OBJ_27 = ./src/comm/HPL_bwait.o
+TARGET_1_FLAG_27 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_28 = ./src/comm/HPL_send.c
+TARGET_1_OBJ_28 = ./src/comm/HPL_send.o
+TARGET_1_FLAG_28 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_29 = ./src/comm/HPL_recv.c
+TARGET_1_OBJ_29 = ./src/comm/HPL_recv.o
+TARGET_1_FLAG_29 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_30 = ./src/comm/HPL_sdrv.c
+TARGET_1_OBJ_30 = ./src/comm/HPL_sdrv.o
+TARGET_1_FLAG_30 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_31 = ./src/grid/HPL_grid_init.c
+TARGET_1_OBJ_31 = ./src/grid/HPL_grid_init.o
+TARGET_1_FLAG_31 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_32 = ./src/grid/HPL_pnum.c
+TARGET_1_OBJ_32 = ./src/grid/HPL_pnum.o
+TARGET_1_FLAG_32 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_33 = ./src/grid/HPL_grid_info.c
+TARGET_1_OBJ_33 = ./src/grid/HPL_grid_info.o
+TARGET_1_FLAG_33 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_34 = ./src/grid/HPL_grid_exit.c
+TARGET_1_OBJ_34 = ./src/grid/HPL_grid_exit.o
+TARGET_1_FLAG_34 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_35 = ./src/grid/HPL_broadcast.c
+TARGET_1_OBJ_35 = ./src/grid/HPL_broadcast.o
+TARGET_1_FLAG_35 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_36 = ./src/grid/HPL_reduce.c
+TARGET_1_OBJ_36 = ./src/grid/HPL_reduce.o
+TARGET_1_FLAG_36 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_37 = ./src/grid/HPL_all_reduce.c
+TARGET_1_OBJ_37 = ./src/grid/HPL_all_reduce.o
+TARGET_1_FLAG_37 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_38 = ./src/grid/HPL_barrier.c
+TARGET_1_OBJ_38 = ./src/grid/HPL_barrier.o
+TARGET_1_FLAG_38 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_39 = ./src/grid/HPL_min.c
+TARGET_1_OBJ_39 = ./src/grid/HPL_min.o
+TARGET_1_FLAG_39 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_40 = ./src/grid/HPL_max.c
+TARGET_1_OBJ_40 = ./src/grid/HPL_max.o
+TARGET_1_FLAG_40 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_41 = ./src/grid/HPL_sum.c
+TARGET_1_OBJ_41 = ./src/grid/HPL_sum.o
+TARGET_1_FLAG_41 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_42 = ./src/panel/HPL_pdpanel_new.c
+TARGET_1_OBJ_42 = ./src/panel/HPL_pdpanel_new.o
+TARGET_1_FLAG_42 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_43 = ./src/panel/HPL_pdpanel_init.c
+TARGET_1_OBJ_43 = ./src/panel/HPL_pdpanel_init.o
+TARGET_1_FLAG_43 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_44 = ./src/panel/HPL_pdpanel_disp.c
+TARGET_1_OBJ_44 = ./src/panel/HPL_pdpanel_disp.o
+TARGET_1_FLAG_44 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_45 = ./src/panel/HPL_pdpanel_free.c
+TARGET_1_OBJ_45 = ./src/panel/HPL_pdpanel_free.o
+TARGET_1_FLAG_45 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_46 = ./src/pauxil/HPL_indxg2l.c
+TARGET_1_OBJ_46 = ./src/pauxil/HPL_indxg2l.o
+TARGET_1_FLAG_46 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_47 = ./src/pauxil/HPL_indxg2lp.c
+TARGET_1_OBJ_47 = ./src/pauxil/HPL_indxg2lp.o
+TARGET_1_FLAG_47 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_48 = ./src/pauxil/HPL_indxg2p.c
+TARGET_1_OBJ_48 = ./src/pauxil/HPL_indxg2p.o
+TARGET_1_FLAG_48 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_49 = ./src/pauxil/HPL_indxl2g.c
+TARGET_1_OBJ_49 = ./src/pauxil/HPL_indxl2g.o
+TARGET_1_FLAG_49 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_50 = ./src/pauxil/HPL_infog2l.c
+TARGET_1_OBJ_50 = ./src/pauxil/HPL_infog2l.o
+TARGET_1_FLAG_50 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_51 = ./src/pauxil/HPL_numroc.c
+TARGET_1_OBJ_51 = ./src/pauxil/HPL_numroc.o
+TARGET_1_FLAG_51 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_52 = ./src/pauxil/HPL_numrocI.c
+TARGET_1_OBJ_52 = ./src/pauxil/HPL_numrocI.o
+TARGET_1_FLAG_52 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_53 = ./src/pauxil/HPL_dlaswp00N.c
+TARGET_1_OBJ_53 = ./src/pauxil/HPL_dlaswp00N.o
+TARGET_1_FLAG_53 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_54 = ./src/pauxil/HPL_dlaswp10N.c
+TARGET_1_OBJ_54 = ./src/pauxil/HPL_dlaswp10N.o
+TARGET_1_FLAG_54 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_55 = ./src/pauxil/HPL_dlaswp01N.c
+TARGET_1_OBJ_55 = ./src/pauxil/HPL_dlaswp01N.o
+TARGET_1_FLAG_55 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_56 = ./src/pauxil/HPL_dlaswp01T.c
+TARGET_1_OBJ_56 = ./src/pauxil/HPL_dlaswp01T.o
+TARGET_1_FLAG_56 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_57 = ./src/pauxil/HPL_dlaswp02N.c
+TARGET_1_OBJ_57 = ./src/pauxil/HPL_dlaswp02N.o
+TARGET_1_FLAG_57 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_58 = ./src/pauxil/HPL_dlaswp03N.c
+TARGET_1_OBJ_58 = ./src/pauxil/HPL_dlaswp03N.o
+TARGET_1_FLAG_58 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_59 = ./src/pauxil/HPL_dlaswp03T.c
+TARGET_1_OBJ_59 = ./src/pauxil/HPL_dlaswp03T.o
+TARGET_1_FLAG_59 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_60 = ./src/pauxil/HPL_dlaswp04N.c
+TARGET_1_OBJ_60 = ./src/pauxil/HPL_dlaswp04N.o
+TARGET_1_FLAG_60 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_61 = ./src/pauxil/HPL_dlaswp04T.c
+TARGET_1_OBJ_61 = ./src/pauxil/HPL_dlaswp04T.o
+TARGET_1_FLAG_61 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_62 = ./src/pauxil/HPL_dlaswp05N.c
+TARGET_1_OBJ_62 = ./src/pauxil/HPL_dlaswp05N.o
+TARGET_1_FLAG_62 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_63 = ./src/pauxil/HPL_dlaswp05T.c
+TARGET_1_OBJ_63 = ./src/pauxil/HPL_dlaswp05T.o
+TARGET_1_FLAG_63 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_64 = ./src/pauxil/HPL_dlaswp06N.c
+TARGET_1_OBJ_64 = ./src/pauxil/HPL_dlaswp06N.o
+TARGET_1_FLAG_64 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_65 = ./src/pauxil/HPL_dlaswp06T.c
+TARGET_1_OBJ_65 = ./src/pauxil/HPL_dlaswp06T.o
+TARGET_1_FLAG_65 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_66 = ./src/pauxil/HPL_pwarn.c
+TARGET_1_OBJ_66 = ./src/pauxil/HPL_pwarn.o
+TARGET_1_FLAG_66 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_67 = ./src/pauxil/HPL_pabort.c
+TARGET_1_OBJ_67 = ./src/pauxil/HPL_pabort.o
+TARGET_1_FLAG_67 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_68 = ./src/pauxil/HPL_pdlaprnt.c
+TARGET_1_OBJ_68 = ./src/pauxil/HPL_pdlaprnt.o
+TARGET_1_FLAG_68 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_69 = ./src/pauxil/HPL_pdlamch.c
+TARGET_1_OBJ_69 = ./src/pauxil/HPL_pdlamch.o
+TARGET_1_FLAG_69 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_70 = ./src/pauxil/HPL_pdlange.c
+TARGET_1_OBJ_70 = ./src/pauxil/HPL_pdlange.o
+TARGET_1_FLAG_70 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_71 = ./src/pfact/HPL_dlocmax.c
+TARGET_1_OBJ_71 = ./src/pfact/HPL_dlocmax.o
+TARGET_1_FLAG_71 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_72 = ./src/pfact/HPL_dlocswpN.c
+TARGET_1_OBJ_72 = ./src/pfact/HPL_dlocswpN.o
+TARGET_1_FLAG_72 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_73 = ./src/pfact/HPL_dlocswpT.c
+TARGET_1_OBJ_73 = ./src/pfact/HPL_dlocswpT.o
+TARGET_1_FLAG_73 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_74 = ./src/pfact/HPL_pdmxswp.c
+TARGET_1_OBJ_74 = ./src/pfact/HPL_pdmxswp.o
+TARGET_1_FLAG_74 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_75 = ./src/pfact/HPL_pdpancrN.c
+TARGET_1_OBJ_75 = ./src/pfact/HPL_pdpancrN.o
+TARGET_1_FLAG_75 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_76 = ./src/pfact/HPL_pdpancrT.c
+TARGET_1_OBJ_76 = ./src/pfact/HPL_pdpancrT.o
+TARGET_1_FLAG_76 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_77 = ./src/pfact/HPL_pdpanllN.c
+TARGET_1_OBJ_77 = ./src/pfact/HPL_pdpanllN.o
+TARGET_1_FLAG_77 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_78 = ./src/pfact/HPL_pdpanllT.c
+TARGET_1_OBJ_78 = ./src/pfact/HPL_pdpanllT.o
+TARGET_1_FLAG_78 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_79 = ./src/pfact/HPL_pdpanrlN.c
+TARGET_1_OBJ_79 = ./src/pfact/HPL_pdpanrlN.o
+TARGET_1_FLAG_79 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_80 = ./src/pfact/HPL_pdpanrlT.c
+TARGET_1_OBJ_80 = ./src/pfact/HPL_pdpanrlT.o
+TARGET_1_FLAG_80 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_81 = ./src/pfact/HPL_pdrpanllN.c
+TARGET_1_OBJ_81 = ./src/pfact/HPL_pdrpanllN.o
+TARGET_1_FLAG_81 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_82 = ./src/pfact/HPL_pdrpanllT.c
+TARGET_1_OBJ_82 = ./src/pfact/HPL_pdrpanllT.o
+TARGET_1_FLAG_82 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_83 = ./src/pfact/HPL_pdrpancrN.c
+TARGET_1_OBJ_83 = ./src/pfact/HPL_pdrpancrN.o
+TARGET_1_FLAG_83 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_84 = ./src/pfact/HPL_pdrpancrT.c
+TARGET_1_OBJ_84 = ./src/pfact/HPL_pdrpancrT.o
+TARGET_1_FLAG_84 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_85 = ./src/pfact/HPL_pdrpanrlN.c
+TARGET_1_OBJ_85 = ./src/pfact/HPL_pdrpanrlN.o
+TARGET_1_FLAG_85 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_86 = ./src/pfact/HPL_pdrpanrlT.c
+TARGET_1_OBJ_86 = ./src/pfact/HPL_pdrpanrlT.o
+TARGET_1_FLAG_86 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_87 = ./src/pfact/HPL_pdfact.c
+TARGET_1_OBJ_87 = ./src/pfact/HPL_pdfact.o
+TARGET_1_FLAG_87 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_88 = ./src/pgesv/HPL_pipid.c
+TARGET_1_OBJ_88 = ./src/pgesv/HPL_pipid.o
+TARGET_1_FLAG_88 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_89 = ./src/pgesv/HPL_plindx0.c
+TARGET_1_OBJ_89 = ./src/pgesv/HPL_plindx0.o
+TARGET_1_FLAG_89 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_90 = ./src/pgesv/HPL_pdlaswp00N.c
+TARGET_1_OBJ_90 = ./src/pgesv/HPL_pdlaswp00N.o
+TARGET_1_FLAG_90 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_91 = ./src/pgesv/HPL_pdlaswp00T.c
+TARGET_1_OBJ_91 = ./src/pgesv/HPL_pdlaswp00T.o
+TARGET_1_FLAG_91 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_92 = ./src/pgesv/HPL_perm.c
+TARGET_1_OBJ_92 = ./src/pgesv/HPL_perm.o
+TARGET_1_FLAG_92 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_93 = ./src/pgesv/HPL_logsort.c
+TARGET_1_OBJ_93 = ./src/pgesv/HPL_logsort.o
+TARGET_1_FLAG_93 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_94 = ./src/pgesv/HPL_plindx10.c
+TARGET_1_OBJ_94 = ./src/pgesv/HPL_plindx10.o
+TARGET_1_FLAG_94 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_95 = ./src/pgesv/HPL_plindx1.c
+TARGET_1_OBJ_95 = ./src/pgesv/HPL_plindx1.o
+TARGET_1_FLAG_95 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_96 = ./src/pgesv/HPL_spreadN.c
+TARGET_1_OBJ_96 = ./src/pgesv/HPL_spreadN.o
+TARGET_1_FLAG_96 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_97 = ./src/pgesv/HPL_spreadT.c
+TARGET_1_OBJ_97 = ./src/pgesv/HPL_spreadT.o
+TARGET_1_FLAG_97 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_98 = ./src/pgesv/HPL_rollN.c
+TARGET_1_OBJ_98 = ./src/pgesv/HPL_rollN.o
+TARGET_1_FLAG_98 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_99 = ./src/pgesv/HPL_rollT.c
+TARGET_1_OBJ_99 = ./src/pgesv/HPL_rollT.o
+TARGET_1_FLAG_99 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_100 = ./src/pgesv/HPL_equil.c
+TARGET_1_OBJ_100 = ./src/pgesv/HPL_equil.o
+TARGET_1_FLAG_100 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_101 = ./src/pgesv/HPL_pdlaswp01N.c
+TARGET_1_OBJ_101 = ./src/pgesv/HPL_pdlaswp01N.o
+TARGET_1_FLAG_101 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_102 = ./src/pgesv/HPL_pdlaswp01T.c
+TARGET_1_OBJ_102 = ./src/pgesv/HPL_pdlaswp01T.o
+TARGET_1_FLAG_102 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_103 = ./src/pgesv/HPL_pdupdateNN.c
+TARGET_1_OBJ_103 = ./src/pgesv/HPL_pdupdateNN.o
+TARGET_1_FLAG_103 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_104 = ./src/pgesv/HPL_pdupdateNT.c
+TARGET_1_OBJ_104 = ./src/pgesv/HPL_pdupdateNT.o
+TARGET_1_FLAG_104 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_105 = ./src/pgesv/HPL_pdupdateTN.c
+TARGET_1_OBJ_105 = ./src/pgesv/HPL_pdupdateTN.o
+TARGET_1_FLAG_105 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_106 = ./src/pgesv/HPL_pdupdateTT.c
+TARGET_1_OBJ_106 = ./src/pgesv/HPL_pdupdateTT.o
+TARGET_1_FLAG_106 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_107 = ./src/pgesv/HPL_pdtrsv.c
+TARGET_1_OBJ_107 = ./src/pgesv/HPL_pdtrsv.o
+TARGET_1_FLAG_107 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_108 = ./src/pgesv/HPL_pdgesv0.c
+TARGET_1_OBJ_108 = ./src/pgesv/HPL_pdgesv0.o
+TARGET_1_FLAG_108 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_109 = ./src/pgesv/HPL_pdgesvK1.c
+TARGET_1_OBJ_109 = ./src/pgesv/HPL_pdgesvK1.o
+TARGET_1_FLAG_109 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_110 = ./src/pgesv/HPL_pdgesvK2.c
+TARGET_1_OBJ_110 = ./src/pgesv/HPL_pdgesvK2.o
+TARGET_1_FLAG_110 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_111 = ./src/pgesv/HPL_pdgesv.c
+TARGET_1_OBJ_111 = ./src/pgesv/HPL_pdgesv.o
+TARGET_1_FLAG_111 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_112 = ./testing/matgen/HPL_dmatgen.c
+TARGET_1_OBJ_112 = ./testing/matgen/HPL_dmatgen.o
+TARGET_1_FLAG_112 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_113 = ./testing/matgen/HPL_ladd.c
+TARGET_1_OBJ_113 = ./testing/matgen/HPL_ladd.o
+TARGET_1_FLAG_113 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_114 = ./testing/matgen/HPL_lmul.c
+TARGET_1_OBJ_114 = ./testing/matgen/HPL_lmul.o
+TARGET_1_FLAG_114 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_115 = ./testing/matgen/HPL_xjumpm.c
+TARGET_1_OBJ_115 = ./testing/matgen/HPL_xjumpm.o
+TARGET_1_FLAG_115 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_116 = ./testing/matgen/HPL_jumpit.c
+TARGET_1_OBJ_116 = ./testing/matgen/HPL_jumpit.o
+TARGET_1_FLAG_116 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_117 = ./testing/matgen/HPL_rand.c
+TARGET_1_OBJ_117 = ./testing/matgen/HPL_rand.o
+TARGET_1_FLAG_117 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_118 = ./testing/matgen/HPL_setran.c
+TARGET_1_OBJ_118 = ./testing/matgen/HPL_setran.o
+TARGET_1_FLAG_118 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_119 = ./testing/timer/HPL_timer.c
+TARGET_1_OBJ_119 = ./testing/timer/HPL_timer.o
+TARGET_1_FLAG_119 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_120 = ./testing/timer/HPL_timer_cputime.c
+TARGET_1_OBJ_120 = ./testing/timer/HPL_timer_cputime.o
+TARGET_1_FLAG_120 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_121 = ./testing/timer/HPL_timer_walltime.c
+TARGET_1_OBJ_121 = ./testing/timer/HPL_timer_walltime.o
+TARGET_1_FLAG_121 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_122 = ./testing/pmatgen/HPL_pdmatgen.c
+TARGET_1_OBJ_122 = ./testing/pmatgen/HPL_pdmatgen.o
+TARGET_1_FLAG_122 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_123 = ./testing/ptimer/HPL_ptimer.c
+TARGET_1_OBJ_123 = ./testing/ptimer/HPL_ptimer.o
+TARGET_1_FLAG_123 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_124 = ./testing/ptimer/HPL_ptimer_cputime.c
+TARGET_1_OBJ_124 = ./testing/ptimer/HPL_ptimer_cputime.o
+TARGET_1_FLAG_124 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_125 = ./testing/ptimer/HPL_ptimer_walltime.c
+TARGET_1_OBJ_125 = ./testing/ptimer/HPL_ptimer_walltime.o
+TARGET_1_FLAG_125 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_2_SRC_0 = ./src/cuda/cuda_dgemm.cpp.dp.cpp
+TARGET_2_OBJ_0 = ./src/cuda/cuda_dgemm.cpp.dp.o
+TARGET_2_FLAG_0 = -O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS}
+
+TARGET_2_SRC_1 = ./src/cuda/cuda_dgemm.cpp.dp.cpp
+TARGET_2_OBJ_1 = ./src/cuda/cuda_dgemm.cpp.dp.o
+TARGET_2_FLAG_1 = -O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS}
+
+TARGET_0 := /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl
+TARGET_1 := /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a
+TARGET_2 := libdgemm.so.1.0.1
+
+TARGET :=   ${TARGET_1} ${TARGET_2} ${TARGET_0}
+
+.PHONY:all clean
+OBJS_0 :=  ${TARGET_0_OBJ_0} ${TARGET_0_OBJ_1} ${TARGET_0_OBJ_2} ${TARGET_0_OBJ_3} ${TARGET_0_OBJ_4} ${TARGET_0_OBJ_5} ${TARGET_0_OBJ_6} ${TARGET_0_OBJ_7} ${TARGET_0_OBJ_8} ${TARGET_0_OBJ_9} ${TARGET_0_OBJ_10} ${TARGET_0_OBJ_11}
+OBJS_1 :=  ${TARGET_1_OBJ_0} ${TARGET_1_OBJ_1} ${TARGET_1_OBJ_2} ${TARGET_1_OBJ_3} ${TARGET_1_OBJ_4} ${TARGET_1_OBJ_5} ${TARGET_1_OBJ_6} ${TARGET_1_OBJ_7} ${TARGET_1_OBJ_8} ${TARGET_1_OBJ_9} ${TARGET_1_OBJ_10} ${TARGET_1_OBJ_11} ${TARGET_1_OBJ_12} ${TARGET_1_OBJ_13} ${TARGET_1_OBJ_14} ${TARGET_1_OBJ_15} ${TARGET_1_OBJ_16} ${TARGET_1_OBJ_17} ${TARGET_1_OBJ_18} ${TARGET_1_OBJ_19} ${TARGET_1_OBJ_20} ${TARGET_1_OBJ_21} ${TARGET_1_OBJ_22} ${TARGET_1_OBJ_23} ${TARGET_1_OBJ_24} ${TARGET_1_OBJ_25} ${TARGET_1_OBJ_26} ${TARGET_1_OBJ_27} ${TARGET_1_OBJ_28} ${TARGET_1_OBJ_29} ${TARGET_1_OBJ_30} ${TARGET_1_OBJ_31} ${TARGET_1_OBJ_32} ${TARGET_1_OBJ_33} ${TARGET_1_OBJ_34} ${TARGET_1_OBJ_35} ${TARGET_1_OBJ_36} ${TARGET_1_OBJ_37} ${TARGET_1_OBJ_38} ${TARGET_1_OBJ_39} ${TARGET_1_OBJ_40} ${TARGET_1_OBJ_41} ${TARGET_1_OBJ_42} ${TARGET_1_OBJ_43} ${TARGET_1_OBJ_44} ${TARGET_1_OBJ_45} ${TARGET_1_OBJ_46} ${TARGET_1_OBJ_47} ${TARGET_1_OBJ_48} ${TARGET_1_OBJ_49} ${TARGET_1_OBJ_50} ${TARGET_1_OBJ_51} ${TARGET_1_OBJ_52} ${TARGET_1_OBJ_53} ${TARGET_1_OBJ_54} ${TARGET_1_OBJ_55} ${TARGET_1_OBJ_56} ${TARGET_1_OBJ_57} ${TARGET_1_OBJ_58} ${TARGET_1_OBJ_59} ${TARGET_1_OBJ_60} ${TARGET_1_OBJ_61} ${TARGET_1_OBJ_62} ${TARGET_1_OBJ_63} ${TARGET_1_OBJ_64} ${TARGET_1_OBJ_65} ${TARGET_1_OBJ_66} ${TARGET_1_OBJ_67} ${TARGET_1_OBJ_68} ${TARGET_1_OBJ_69} ${TARGET_1_OBJ_70} ${TARGET_1_OBJ_71} ${TARGET_1_OBJ_72} ${TARGET_1_OBJ_73} ${TARGET_1_OBJ_74} ${TARGET_1_OBJ_75} ${TARGET_1_OBJ_76} ${TARGET_1_OBJ_77} ${TARGET_1_OBJ_78} ${TARGET_1_OBJ_79} ${TARGET_1_OBJ_80} ${TARGET_1_OBJ_81} ${TARGET_1_OBJ_82} ${TARGET_1_OBJ_83} ${TARGET_1_OBJ_84} ${TARGET_1_OBJ_85} ${TARGET_1_OBJ_86} ${TARGET_1_OBJ_87} ${TARGET_1_OBJ_88} ${TARGET_1_OBJ_89} ${TARGET_1_OBJ_90} ${TARGET_1_OBJ_91} ${TARGET_1_OBJ_92} ${TARGET_1_OBJ_93} ${TARGET_1_OBJ_94} ${TARGET_1_OBJ_95} ${TARGET_1_OBJ_96} ${TARGET_1_OBJ_97} ${TARGET_1_OBJ_98} ${TARGET_1_OBJ_99} ${TARGET_1_OBJ_100} ${TARGET_1_OBJ_101} ${TARGET_1_OBJ_102} ${TARGET_1_OBJ_103} ${TARGET_1_OBJ_104} ${TARGET_1_OBJ_105} ${TARGET_1_OBJ_106} ${TARGET_1_OBJ_107} ${TARGET_1_OBJ_108} ${TARGET_1_OBJ_109} ${TARGET_1_OBJ_110} ${TARGET_1_OBJ_111} ${TARGET_1_OBJ_112} ${TARGET_1_OBJ_113} ${TARGET_1_OBJ_114} ${TARGET_1_OBJ_115} ${TARGET_1_OBJ_116} ${TARGET_1_OBJ_117} ${TARGET_1_OBJ_118} ${TARGET_1_OBJ_119} ${TARGET_1_OBJ_120} ${TARGET_1_OBJ_121} ${TARGET_1_OBJ_122} ${TARGET_1_OBJ_123} ${TARGET_1_OBJ_124} ${TARGET_1_OBJ_125}
+OBJS_2 :=  ${TARGET_2_OBJ_0} ${TARGET_2_OBJ_1}
+all: $(TARGET)
+$(TARGET_0): $(OBJS_0)
+	$(CC) -fsycl -o $@ $^ $(LIB) -qmkl libdgemm.so.1.0.1 /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a 
+
+$(TARGET_0_OBJ_0):$(TARGET_0_SRC_0)
+	cc -c ${TARGET_0_SRC_0} -o ${TARGET_0_OBJ_0} $(TARGET_0_FLAG_0)
+
+$(TARGET_0_OBJ_1):$(TARGET_0_SRC_1)
+	cc -c ${TARGET_0_SRC_1} -o ${TARGET_0_OBJ_1} $(TARGET_0_FLAG_1)
+
+$(TARGET_0_OBJ_2):$(TARGET_0_SRC_2)
+	cc -c ${TARGET_0_SRC_2} -o ${TARGET_0_OBJ_2} $(TARGET_0_FLAG_2)
+
+$(TARGET_0_OBJ_3):$(TARGET_0_SRC_3)
+	cc -c ${TARGET_0_SRC_3} -o ${TARGET_0_OBJ_3} $(TARGET_0_FLAG_3)
+
+$(TARGET_0_OBJ_4):$(TARGET_0_SRC_4)
+	cc -c ${TARGET_0_SRC_4} -o ${TARGET_0_OBJ_4} $(TARGET_0_FLAG_4)
+
+$(TARGET_0_OBJ_5):$(TARGET_0_SRC_5)
+	cc -c ${TARGET_0_SRC_5} -o ${TARGET_0_OBJ_5} $(TARGET_0_FLAG_5)
+
+$(TARGET_0_OBJ_6):$(TARGET_0_SRC_6)
+	cc -c ${TARGET_0_SRC_6} -o ${TARGET_0_OBJ_6} $(TARGET_0_FLAG_6)
+
+$(TARGET_0_OBJ_7):$(TARGET_0_SRC_7)
+	cc -c ${TARGET_0_SRC_7} -o ${TARGET_0_OBJ_7} $(TARGET_0_FLAG_7)
+
+$(TARGET_0_OBJ_8):$(TARGET_0_SRC_8)
+	cc -c ${TARGET_0_SRC_8} -o ${TARGET_0_OBJ_8} $(TARGET_0_FLAG_8)
+
+$(TARGET_0_OBJ_9):$(TARGET_0_SRC_9)
+	cc -c ${TARGET_0_SRC_9} -o ${TARGET_0_OBJ_9} $(TARGET_0_FLAG_9)
+
+$(TARGET_0_OBJ_10):$(TARGET_0_SRC_10)
+	cc -c ${TARGET_0_SRC_10} -o ${TARGET_0_OBJ_10} $(TARGET_0_FLAG_10)
+
+$(TARGET_0_OBJ_11):$(TARGET_0_SRC_11)
+	cc -c ${TARGET_0_SRC_11} -o ${TARGET_0_OBJ_11} $(TARGET_0_FLAG_11)
+
+$(TARGET_1): $(OBJS_1)
+	ar -r $@ $^ $(LIB) 
+
+$(TARGET_1_OBJ_0):$(TARGET_1_SRC_0)
+	cc -c ${TARGET_1_SRC_0} -o ${TARGET_1_OBJ_0} $(TARGET_1_FLAG_0)
+
+$(TARGET_1_OBJ_1):$(TARGET_1_SRC_1)
+	cc -c ${TARGET_1_SRC_1} -o ${TARGET_1_OBJ_1} $(TARGET_1_FLAG_1)
+
+$(TARGET_1_OBJ_2):$(TARGET_1_SRC_2)
+	cc -c ${TARGET_1_SRC_2} -o ${TARGET_1_OBJ_2} $(TARGET_1_FLAG_2)
+
+$(TARGET_1_OBJ_3):$(TARGET_1_SRC_3)
+	cc -c ${TARGET_1_SRC_3} -o ${TARGET_1_OBJ_3} $(TARGET_1_FLAG_3)
+
+$(TARGET_1_OBJ_4):$(TARGET_1_SRC_4)
+	cc -c ${TARGET_1_SRC_4} -o ${TARGET_1_OBJ_4} $(TARGET_1_FLAG_4)
+
+$(TARGET_1_OBJ_5):$(TARGET_1_SRC_5)
+	cc -c ${TARGET_1_SRC_5} -o ${TARGET_1_OBJ_5} $(TARGET_1_FLAG_5)
+
+$(TARGET_1_OBJ_6):$(TARGET_1_SRC_6)
+	cc -c ${TARGET_1_SRC_6} -o ${TARGET_1_OBJ_6} $(TARGET_1_FLAG_6)
+
+$(TARGET_1_OBJ_7):$(TARGET_1_SRC_7)
+	cc -c ${TARGET_1_SRC_7} -o ${TARGET_1_OBJ_7} $(TARGET_1_FLAG_7)
+
+$(TARGET_1_OBJ_8):$(TARGET_1_SRC_8)
+	cc -c ${TARGET_1_SRC_8} -o ${TARGET_1_OBJ_8} $(TARGET_1_FLAG_8)
+
+$(TARGET_1_OBJ_9):$(TARGET_1_SRC_9)
+	cc -c ${TARGET_1_SRC_9} -o ${TARGET_1_OBJ_9} $(TARGET_1_FLAG_9)
+
+$(TARGET_1_OBJ_10):$(TARGET_1_SRC_10)
+	cc -c ${TARGET_1_SRC_10} -o ${TARGET_1_OBJ_10} $(TARGET_1_FLAG_10)
+
+$(TARGET_1_OBJ_11):$(TARGET_1_SRC_11)
+	cc -c ${TARGET_1_SRC_11} -o ${TARGET_1_OBJ_11} $(TARGET_1_FLAG_11)
+
+$(TARGET_1_OBJ_12):$(TARGET_1_SRC_12)
+	cc -c ${TARGET_1_SRC_12} -o ${TARGET_1_OBJ_12} $(TARGET_1_FLAG_12)
+
+$(TARGET_1_OBJ_13):$(TARGET_1_SRC_13)
+	cc -c ${TARGET_1_SRC_13} -o ${TARGET_1_OBJ_13} $(TARGET_1_FLAG_13)
+
+$(TARGET_1_OBJ_14):$(TARGET_1_SRC_14)
+	cc -c ${TARGET_1_SRC_14} -o ${TARGET_1_OBJ_14} $(TARGET_1_FLAG_14)
+
+$(TARGET_1_OBJ_15):$(TARGET_1_SRC_15)
+	cc -c ${TARGET_1_SRC_15} -o ${TARGET_1_OBJ_15} $(TARGET_1_FLAG_15)
+
+$(TARGET_1_OBJ_16):$(TARGET_1_SRC_16)
+	cc -c ${TARGET_1_SRC_16} -o ${TARGET_1_OBJ_16} $(TARGET_1_FLAG_16)
+
+$(TARGET_1_OBJ_17):$(TARGET_1_SRC_17)
+	cc -c ${TARGET_1_SRC_17} -o ${TARGET_1_OBJ_17} $(TARGET_1_FLAG_17)
+
+$(TARGET_1_OBJ_18):$(TARGET_1_SRC_18)
+	cc -c ${TARGET_1_SRC_18} -o ${TARGET_1_OBJ_18} $(TARGET_1_FLAG_18)
+
+$(TARGET_1_OBJ_19):$(TARGET_1_SRC_19)
+	cc -c ${TARGET_1_SRC_19} -o ${TARGET_1_OBJ_19} $(TARGET_1_FLAG_19)
+
+$(TARGET_1_OBJ_20):$(TARGET_1_SRC_20)
+	cc -c ${TARGET_1_SRC_20} -o ${TARGET_1_OBJ_20} $(TARGET_1_FLAG_20)
+
+$(TARGET_1_OBJ_21):$(TARGET_1_SRC_21)
+	cc -c ${TARGET_1_SRC_21} -o ${TARGET_1_OBJ_21} $(TARGET_1_FLAG_21)
+
+$(TARGET_1_OBJ_22):$(TARGET_1_SRC_22)
+	cc -c ${TARGET_1_SRC_22} -o ${TARGET_1_OBJ_22} $(TARGET_1_FLAG_22)
+
+$(TARGET_1_OBJ_23):$(TARGET_1_SRC_23)
+	cc -c ${TARGET_1_SRC_23} -o ${TARGET_1_OBJ_23} $(TARGET_1_FLAG_23)
+
+$(TARGET_1_OBJ_24):$(TARGET_1_SRC_24)
+	cc -c ${TARGET_1_SRC_24} -o ${TARGET_1_OBJ_24} $(TARGET_1_FLAG_24)
+
+$(TARGET_1_OBJ_25):$(TARGET_1_SRC_25)
+	cc -c ${TARGET_1_SRC_25} -o ${TARGET_1_OBJ_25} $(TARGET_1_FLAG_25)
+
+$(TARGET_1_OBJ_26):$(TARGET_1_SRC_26)
+	cc -c ${TARGET_1_SRC_26} -o ${TARGET_1_OBJ_26} $(TARGET_1_FLAG_26)
+
+$(TARGET_1_OBJ_27):$(TARGET_1_SRC_27)
+	cc -c ${TARGET_1_SRC_27} -o ${TARGET_1_OBJ_27} $(TARGET_1_FLAG_27)
+
+$(TARGET_1_OBJ_28):$(TARGET_1_SRC_28)
+	cc -c ${TARGET_1_SRC_28} -o ${TARGET_1_OBJ_28} $(TARGET_1_FLAG_28)
+
+$(TARGET_1_OBJ_29):$(TARGET_1_SRC_29)
+	cc -c ${TARGET_1_SRC_29} -o ${TARGET_1_OBJ_29} $(TARGET_1_FLAG_29)
+
+$(TARGET_1_OBJ_30):$(TARGET_1_SRC_30)
+	cc -c ${TARGET_1_SRC_30} -o ${TARGET_1_OBJ_30} $(TARGET_1_FLAG_30)
+
+$(TARGET_1_OBJ_31):$(TARGET_1_SRC_31)
+	cc -c ${TARGET_1_SRC_31} -o ${TARGET_1_OBJ_31} $(TARGET_1_FLAG_31)
+
+$(TARGET_1_OBJ_32):$(TARGET_1_SRC_32)
+	cc -c ${TARGET_1_SRC_32} -o ${TARGET_1_OBJ_32} $(TARGET_1_FLAG_32)
+
+$(TARGET_1_OBJ_33):$(TARGET_1_SRC_33)
+	cc -c ${TARGET_1_SRC_33} -o ${TARGET_1_OBJ_33} $(TARGET_1_FLAG_33)
+
+$(TARGET_1_OBJ_34):$(TARGET_1_SRC_34)
+	cc -c ${TARGET_1_SRC_34} -o ${TARGET_1_OBJ_34} $(TARGET_1_FLAG_34)
+
+$(TARGET_1_OBJ_35):$(TARGET_1_SRC_35)
+	cc -c ${TARGET_1_SRC_35} -o ${TARGET_1_OBJ_35} $(TARGET_1_FLAG_35)
+
+$(TARGET_1_OBJ_36):$(TARGET_1_SRC_36)
+	cc -c ${TARGET_1_SRC_36} -o ${TARGET_1_OBJ_36} $(TARGET_1_FLAG_36)
+
+$(TARGET_1_OBJ_37):$(TARGET_1_SRC_37)
+	cc -c ${TARGET_1_SRC_37} -o ${TARGET_1_OBJ_37} $(TARGET_1_FLAG_37)
+
+$(TARGET_1_OBJ_38):$(TARGET_1_SRC_38)
+	cc -c ${TARGET_1_SRC_38} -o ${TARGET_1_OBJ_38} $(TARGET_1_FLAG_38)
+
+$(TARGET_1_OBJ_39):$(TARGET_1_SRC_39)
+	cc -c ${TARGET_1_SRC_39} -o ${TARGET_1_OBJ_39} $(TARGET_1_FLAG_39)
+
+$(TARGET_1_OBJ_40):$(TARGET_1_SRC_40)
+	cc -c ${TARGET_1_SRC_40} -o ${TARGET_1_OBJ_40} $(TARGET_1_FLAG_40)
+
+$(TARGET_1_OBJ_41):$(TARGET_1_SRC_41)
+	cc -c ${TARGET_1_SRC_41} -o ${TARGET_1_OBJ_41} $(TARGET_1_FLAG_41)
+
+$(TARGET_1_OBJ_42):$(TARGET_1_SRC_42)
+	cc -c ${TARGET_1_SRC_42} -o ${TARGET_1_OBJ_42} $(TARGET_1_FLAG_42)
+
+$(TARGET_1_OBJ_43):$(TARGET_1_SRC_43)
+	cc -c ${TARGET_1_SRC_43} -o ${TARGET_1_OBJ_43} $(TARGET_1_FLAG_43)
+
+$(TARGET_1_OBJ_44):$(TARGET_1_SRC_44)
+	cc -c ${TARGET_1_SRC_44} -o ${TARGET_1_OBJ_44} $(TARGET_1_FLAG_44)
+
+$(TARGET_1_OBJ_45):$(TARGET_1_SRC_45)
+	cc -c ${TARGET_1_SRC_45} -o ${TARGET_1_OBJ_45} $(TARGET_1_FLAG_45)
+
+$(TARGET_1_OBJ_46):$(TARGET_1_SRC_46)
+	cc -c ${TARGET_1_SRC_46} -o ${TARGET_1_OBJ_46} $(TARGET_1_FLAG_46)
+
+$(TARGET_1_OBJ_47):$(TARGET_1_SRC_47)
+	cc -c ${TARGET_1_SRC_47} -o ${TARGET_1_OBJ_47} $(TARGET_1_FLAG_47)
+
+$(TARGET_1_OBJ_48):$(TARGET_1_SRC_48)
+	cc -c ${TARGET_1_SRC_48} -o ${TARGET_1_OBJ_48} $(TARGET_1_FLAG_48)
+
+$(TARGET_1_OBJ_49):$(TARGET_1_SRC_49)
+	cc -c ${TARGET_1_SRC_49} -o ${TARGET_1_OBJ_49} $(TARGET_1_FLAG_49)
+
+$(TARGET_1_OBJ_50):$(TARGET_1_SRC_50)
+	cc -c ${TARGET_1_SRC_50} -o ${TARGET_1_OBJ_50} $(TARGET_1_FLAG_50)
+
+$(TARGET_1_OBJ_51):$(TARGET_1_SRC_51)
+	cc -c ${TARGET_1_SRC_51} -o ${TARGET_1_OBJ_51} $(TARGET_1_FLAG_51)
+
+$(TARGET_1_OBJ_52):$(TARGET_1_SRC_52)
+	cc -c ${TARGET_1_SRC_52} -o ${TARGET_1_OBJ_52} $(TARGET_1_FLAG_52)
+
+$(TARGET_1_OBJ_53):$(TARGET_1_SRC_53)
+	cc -c ${TARGET_1_SRC_53} -o ${TARGET_1_OBJ_53} $(TARGET_1_FLAG_53)
+
+$(TARGET_1_OBJ_54):$(TARGET_1_SRC_54)
+	cc -c ${TARGET_1_SRC_54} -o ${TARGET_1_OBJ_54} $(TARGET_1_FLAG_54)
+
+$(TARGET_1_OBJ_55):$(TARGET_1_SRC_55)
+	cc -c ${TARGET_1_SRC_55} -o ${TARGET_1_OBJ_55} $(TARGET_1_FLAG_55)
+
+$(TARGET_1_OBJ_56):$(TARGET_1_SRC_56)
+	cc -c ${TARGET_1_SRC_56} -o ${TARGET_1_OBJ_56} $(TARGET_1_FLAG_56)
+
+$(TARGET_1_OBJ_57):$(TARGET_1_SRC_57)
+	cc -c ${TARGET_1_SRC_57} -o ${TARGET_1_OBJ_57} $(TARGET_1_FLAG_57)
+
+$(TARGET_1_OBJ_58):$(TARGET_1_SRC_58)
+	cc -c ${TARGET_1_SRC_58} -o ${TARGET_1_OBJ_58} $(TARGET_1_FLAG_58)
+
+$(TARGET_1_OBJ_59):$(TARGET_1_SRC_59)
+	cc -c ${TARGET_1_SRC_59} -o ${TARGET_1_OBJ_59} $(TARGET_1_FLAG_59)
+
+$(TARGET_1_OBJ_60):$(TARGET_1_SRC_60)
+	cc -c ${TARGET_1_SRC_60} -o ${TARGET_1_OBJ_60} $(TARGET_1_FLAG_60)
+
+$(TARGET_1_OBJ_61):$(TARGET_1_SRC_61)
+	cc -c ${TARGET_1_SRC_61} -o ${TARGET_1_OBJ_61} $(TARGET_1_FLAG_61)
+
+$(TARGET_1_OBJ_62):$(TARGET_1_SRC_62)
+	cc -c ${TARGET_1_SRC_62} -o ${TARGET_1_OBJ_62} $(TARGET_1_FLAG_62)
+
+$(TARGET_1_OBJ_63):$(TARGET_1_SRC_63)
+	cc -c ${TARGET_1_SRC_63} -o ${TARGET_1_OBJ_63} $(TARGET_1_FLAG_63)
+
+$(TARGET_1_OBJ_64):$(TARGET_1_SRC_64)
+	cc -c ${TARGET_1_SRC_64} -o ${TARGET_1_OBJ_64} $(TARGET_1_FLAG_64)
+
+$(TARGET_1_OBJ_65):$(TARGET_1_SRC_65)
+	cc -c ${TARGET_1_SRC_65} -o ${TARGET_1_OBJ_65} $(TARGET_1_FLAG_65)
+
+$(TARGET_1_OBJ_66):$(TARGET_1_SRC_66)
+	cc -c ${TARGET_1_SRC_66} -o ${TARGET_1_OBJ_66} $(TARGET_1_FLAG_66)
+
+$(TARGET_1_OBJ_67):$(TARGET_1_SRC_67)
+	cc -c ${TARGET_1_SRC_67} -o ${TARGET_1_OBJ_67} $(TARGET_1_FLAG_67)
+
+$(TARGET_1_OBJ_68):$(TARGET_1_SRC_68)
+	cc -c ${TARGET_1_SRC_68} -o ${TARGET_1_OBJ_68} $(TARGET_1_FLAG_68)
+
+$(TARGET_1_OBJ_69):$(TARGET_1_SRC_69)
+	cc -c ${TARGET_1_SRC_69} -o ${TARGET_1_OBJ_69} $(TARGET_1_FLAG_69)
+
+$(TARGET_1_OBJ_70):$(TARGET_1_SRC_70)
+	cc -c ${TARGET_1_SRC_70} -o ${TARGET_1_OBJ_70} $(TARGET_1_FLAG_70)
+
+$(TARGET_1_OBJ_71):$(TARGET_1_SRC_71)
+	cc -c ${TARGET_1_SRC_71} -o ${TARGET_1_OBJ_71} $(TARGET_1_FLAG_71)
+
+$(TARGET_1_OBJ_72):$(TARGET_1_SRC_72)
+	cc -c ${TARGET_1_SRC_72} -o ${TARGET_1_OBJ_72} $(TARGET_1_FLAG_72)
+
+$(TARGET_1_OBJ_73):$(TARGET_1_SRC_73)
+	cc -c ${TARGET_1_SRC_73} -o ${TARGET_1_OBJ_73} $(TARGET_1_FLAG_73)
+
+$(TARGET_1_OBJ_74):$(TARGET_1_SRC_74)
+	cc -c ${TARGET_1_SRC_74} -o ${TARGET_1_OBJ_74} $(TARGET_1_FLAG_74)
+
+$(TARGET_1_OBJ_75):$(TARGET_1_SRC_75)
+	cc -c ${TARGET_1_SRC_75} -o ${TARGET_1_OBJ_75} $(TARGET_1_FLAG_75)
+
+$(TARGET_1_OBJ_76):$(TARGET_1_SRC_76)
+	cc -c ${TARGET_1_SRC_76} -o ${TARGET_1_OBJ_76} $(TARGET_1_FLAG_76)
+
+$(TARGET_1_OBJ_77):$(TARGET_1_SRC_77)
+	cc -c ${TARGET_1_SRC_77} -o ${TARGET_1_OBJ_77} $(TARGET_1_FLAG_77)
+
+$(TARGET_1_OBJ_78):$(TARGET_1_SRC_78)
+	cc -c ${TARGET_1_SRC_78} -o ${TARGET_1_OBJ_78} $(TARGET_1_FLAG_78)
+
+$(TARGET_1_OBJ_79):$(TARGET_1_SRC_79)
+	cc -c ${TARGET_1_SRC_79} -o ${TARGET_1_OBJ_79} $(TARGET_1_FLAG_79)
+
+$(TARGET_1_OBJ_80):$(TARGET_1_SRC_80)
+	cc -c ${TARGET_1_SRC_80} -o ${TARGET_1_OBJ_80} $(TARGET_1_FLAG_80)
+
+$(TARGET_1_OBJ_81):$(TARGET_1_SRC_81)
+	cc -c ${TARGET_1_SRC_81} -o ${TARGET_1_OBJ_81} $(TARGET_1_FLAG_81)
+
+$(TARGET_1_OBJ_82):$(TARGET_1_SRC_82)
+	cc -c ${TARGET_1_SRC_82} -o ${TARGET_1_OBJ_82} $(TARGET_1_FLAG_82)
+
+$(TARGET_1_OBJ_83):$(TARGET_1_SRC_83)
+	cc -c ${TARGET_1_SRC_83} -o ${TARGET_1_OBJ_83} $(TARGET_1_FLAG_83)
+
+$(TARGET_1_OBJ_84):$(TARGET_1_SRC_84)
+	cc -c ${TARGET_1_SRC_84} -o ${TARGET_1_OBJ_84} $(TARGET_1_FLAG_84)
+
+$(TARGET_1_OBJ_85):$(TARGET_1_SRC_85)
+	cc -c ${TARGET_1_SRC_85} -o ${TARGET_1_OBJ_85} $(TARGET_1_FLAG_85)
+
+$(TARGET_1_OBJ_86):$(TARGET_1_SRC_86)
+	cc -c ${TARGET_1_SRC_86} -o ${TARGET_1_OBJ_86} $(TARGET_1_FLAG_86)
+
+$(TARGET_1_OBJ_87):$(TARGET_1_SRC_87)
+	cc -c ${TARGET_1_SRC_87} -o ${TARGET_1_OBJ_87} $(TARGET_1_FLAG_87)
+
+$(TARGET_1_OBJ_88):$(TARGET_1_SRC_88)
+	cc -c ${TARGET_1_SRC_88} -o ${TARGET_1_OBJ_88} $(TARGET_1_FLAG_88)
+
+$(TARGET_1_OBJ_89):$(TARGET_1_SRC_89)
+	cc -c ${TARGET_1_SRC_89} -o ${TARGET_1_OBJ_89} $(TARGET_1_FLAG_89)
+
+$(TARGET_1_OBJ_90):$(TARGET_1_SRC_90)
+	cc -c ${TARGET_1_SRC_90} -o ${TARGET_1_OBJ_90} $(TARGET_1_FLAG_90)
+
+$(TARGET_1_OBJ_91):$(TARGET_1_SRC_91)
+	cc -c ${TARGET_1_SRC_91} -o ${TARGET_1_OBJ_91} $(TARGET_1_FLAG_91)
+
+$(TARGET_1_OBJ_92):$(TARGET_1_SRC_92)
+	cc -c ${TARGET_1_SRC_92} -o ${TARGET_1_OBJ_92} $(TARGET_1_FLAG_92)
+
+$(TARGET_1_OBJ_93):$(TARGET_1_SRC_93)
+	cc -c ${TARGET_1_SRC_93} -o ${TARGET_1_OBJ_93} $(TARGET_1_FLAG_93)
+
+$(TARGET_1_OBJ_94):$(TARGET_1_SRC_94)
+	cc -c ${TARGET_1_SRC_94} -o ${TARGET_1_OBJ_94} $(TARGET_1_FLAG_94)
+
+$(TARGET_1_OBJ_95):$(TARGET_1_SRC_95)
+	cc -c ${TARGET_1_SRC_95} -o ${TARGET_1_OBJ_95} $(TARGET_1_FLAG_95)
+
+$(TARGET_1_OBJ_96):$(TARGET_1_SRC_96)
+	cc -c ${TARGET_1_SRC_96} -o ${TARGET_1_OBJ_96} $(TARGET_1_FLAG_96)
+
+$(TARGET_1_OBJ_97):$(TARGET_1_SRC_97)
+	cc -c ${TARGET_1_SRC_97} -o ${TARGET_1_OBJ_97} $(TARGET_1_FLAG_97)
+
+$(TARGET_1_OBJ_98):$(TARGET_1_SRC_98)
+	cc -c ${TARGET_1_SRC_98} -o ${TARGET_1_OBJ_98} $(TARGET_1_FLAG_98)
+
+$(TARGET_1_OBJ_99):$(TARGET_1_SRC_99)
+	cc -c ${TARGET_1_SRC_99} -o ${TARGET_1_OBJ_99} $(TARGET_1_FLAG_99)
+
+$(TARGET_1_OBJ_100):$(TARGET_1_SRC_100)
+	cc -c ${TARGET_1_SRC_100} -o ${TARGET_1_OBJ_100} $(TARGET_1_FLAG_100)
+
+$(TARGET_1_OBJ_101):$(TARGET_1_SRC_101)
+	cc -c ${TARGET_1_SRC_101} -o ${TARGET_1_OBJ_101} $(TARGET_1_FLAG_101)
+
+$(TARGET_1_OBJ_102):$(TARGET_1_SRC_102)
+	cc -c ${TARGET_1_SRC_102} -o ${TARGET_1_OBJ_102} $(TARGET_1_FLAG_102)
+
+$(TARGET_1_OBJ_103):$(TARGET_1_SRC_103)
+	cc -c ${TARGET_1_SRC_103} -o ${TARGET_1_OBJ_103} $(TARGET_1_FLAG_103)
+
+$(TARGET_1_OBJ_104):$(TARGET_1_SRC_104)
+	cc -c ${TARGET_1_SRC_104} -o ${TARGET_1_OBJ_104} $(TARGET_1_FLAG_104)
+
+$(TARGET_1_OBJ_105):$(TARGET_1_SRC_105)
+	cc -c ${TARGET_1_SRC_105} -o ${TARGET_1_OBJ_105} $(TARGET_1_FLAG_105)
+
+$(TARGET_1_OBJ_106):$(TARGET_1_SRC_106)
+	cc -c ${TARGET_1_SRC_106} -o ${TARGET_1_OBJ_106} $(TARGET_1_FLAG_106)
+
+$(TARGET_1_OBJ_107):$(TARGET_1_SRC_107)
+	cc -c ${TARGET_1_SRC_107} -o ${TARGET_1_OBJ_107} $(TARGET_1_FLAG_107)
+
+$(TARGET_1_OBJ_108):$(TARGET_1_SRC_108)
+	cc -c ${TARGET_1_SRC_108} -o ${TARGET_1_OBJ_108} $(TARGET_1_FLAG_108)
+
+$(TARGET_1_OBJ_109):$(TARGET_1_SRC_109)
+	cc -c ${TARGET_1_SRC_109} -o ${TARGET_1_OBJ_109} $(TARGET_1_FLAG_109)
+
+$(TARGET_1_OBJ_110):$(TARGET_1_SRC_110)
+	cc -c ${TARGET_1_SRC_110} -o ${TARGET_1_OBJ_110} $(TARGET_1_FLAG_110)
+
+$(TARGET_1_OBJ_111):$(TARGET_1_SRC_111)
+	cc -c ${TARGET_1_SRC_111} -o ${TARGET_1_OBJ_111} $(TARGET_1_FLAG_111)
+
+$(TARGET_1_OBJ_112):$(TARGET_1_SRC_112)
+	cc -c ${TARGET_1_SRC_112} -o ${TARGET_1_OBJ_112} $(TARGET_1_FLAG_112)
+
+$(TARGET_1_OBJ_113):$(TARGET_1_SRC_113)
+	cc -c ${TARGET_1_SRC_113} -o ${TARGET_1_OBJ_113} $(TARGET_1_FLAG_113)
+
+$(TARGET_1_OBJ_114):$(TARGET_1_SRC_114)
+	cc -c ${TARGET_1_SRC_114} -o ${TARGET_1_OBJ_114} $(TARGET_1_FLAG_114)
+
+$(TARGET_1_OBJ_115):$(TARGET_1_SRC_115)
+	cc -c ${TARGET_1_SRC_115} -o ${TARGET_1_OBJ_115} $(TARGET_1_FLAG_115)
+
+$(TARGET_1_OBJ_116):$(TARGET_1_SRC_116)
+	cc -c ${TARGET_1_SRC_116} -o ${TARGET_1_OBJ_116} $(TARGET_1_FLAG_116)
+
+$(TARGET_1_OBJ_117):$(TARGET_1_SRC_117)
+	cc -c ${TARGET_1_SRC_117} -o ${TARGET_1_OBJ_117} $(TARGET_1_FLAG_117)
+
+$(TARGET_1_OBJ_118):$(TARGET_1_SRC_118)
+	cc -c ${TARGET_1_SRC_118} -o ${TARGET_1_OBJ_118} $(TARGET_1_FLAG_118)
+
+$(TARGET_1_OBJ_119):$(TARGET_1_SRC_119)
+	cc -c ${TARGET_1_SRC_119} -o ${TARGET_1_OBJ_119} $(TARGET_1_FLAG_119)
+
+$(TARGET_1_OBJ_120):$(TARGET_1_SRC_120)
+	cc -c ${TARGET_1_SRC_120} -o ${TARGET_1_OBJ_120} $(TARGET_1_FLAG_120)
+
+$(TARGET_1_OBJ_121):$(TARGET_1_SRC_121)
+	cc -c ${TARGET_1_SRC_121} -o ${TARGET_1_OBJ_121} $(TARGET_1_FLAG_121)
+
+$(TARGET_1_OBJ_122):$(TARGET_1_SRC_122)
+	cc -c ${TARGET_1_SRC_122} -o ${TARGET_1_OBJ_122} $(TARGET_1_FLAG_122)
+
+$(TARGET_1_OBJ_123):$(TARGET_1_SRC_123)
+	cc -c ${TARGET_1_SRC_123} -o ${TARGET_1_OBJ_123} $(TARGET_1_FLAG_123)
+
+$(TARGET_1_OBJ_124):$(TARGET_1_SRC_124)
+	cc -c ${TARGET_1_SRC_124} -o ${TARGET_1_OBJ_124} $(TARGET_1_FLAG_124)
+
+$(TARGET_1_OBJ_125):$(TARGET_1_SRC_125)
+	cc -c ${TARGET_1_SRC_125} -o ${TARGET_1_OBJ_125} $(TARGET_1_FLAG_125)
+
+$(TARGET_2): $(OBJS_2)
+	$(CC) -fPIC -shared -fsycl -o $@ $^ $(LIB) -qmkl
+
+$(TARGET_2_OBJ_0):$(TARGET_2_SRC_0)
+	cc -c ${TARGET_2_SRC_0} -o ${TARGET_2_OBJ_0} $(TARGET_2_FLAG_0)
+
+$(TARGET_2_OBJ_1):$(TARGET_2_SRC_1)
+	icpx -c  ${TARGET_2_SRC_1} -o ${TARGET_2_OBJ_1} $(TARGET_2_FLAG_1)
+
+clean:
+	rm -f  ${OBJS_0} ${OBJS_1} ${OBJS_2} $(TARGET)
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl.h
new file mode 100644
index 000000000..6d131963f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl.h
@@ -0,0 +1,97 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_H
+#define HPL_H
+/*
+ * ---------------------------------------------------------------------
+ * HPL default compile options that can overridden in the Make.<arch>
+ * ---------------------------------------------------------------------
+ */
+#ifndef HPL_NO_MPI_DATATYPE         /* Use MPI user-defined data type */
+#define HPL_USE_MPI_DATATYPE
+#endif
+ 
+#ifndef HPL_COPY_L  /* do not copy L, use MPI user-defined data types */
+#define HPL_NO_COPY_L
+#endif
+ 
+#ifndef HPL_DETAILED_TIMING         /* Do not enable detailed timings */
+#define HPL_NO_DETAILED_TIMING
+#endif
+ 
+#ifndef HPL_CALL_VSIPL          /* Call the Fortran 77 BLAS interface */
+#ifndef HPL_CALL_CBLAS                       /* there can be only one */
+#define HPL_CALL_FBLAS
+#endif
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pfact.h"
+#include "hpl_pgesv.h"
+
+#include "hpl_timer.h"
+#include "hpl_matgen.h"
+#include "hpl_test.h"
+
+#include "hpl_ptimer.h"
+#include "hpl_pmatgen.h"
+#include "hpl_ptest.h"
+
+#endif
+/*
+ * End of hpl.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_auxil.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_auxil.h
new file mode 100644
index 000000000..861caf380
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_auxil.h
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_AUXIL_H
+#define HPL_AUXIL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+/*
+ * ---------------------------------------------------------------------
+ * typedef definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{ HPL_NORM_A = 800, HPL_NORM_1 = 801, HPL_NORM_I = 802 } HPL_T_NORM;
+
+typedef enum
+{
+   HPL_MACH_EPS   = 900,                /* relative machine precision */
+   HPL_MACH_SFMIN = 901, /* safe minimum st 1/sfmin does not overflow */
+   HPL_MACH_BASE  = 902,                /* base = base of the machine */
+   HPL_MACH_PREC  = 903,                          /* prec  = eps*base */
+   HPL_MACH_MLEN  = 904,   /* number of (base) digits in the mantissa */
+   HPL_MACH_RND   = 905,        /* 1.0 if rounding occurs in addition */
+   HPL_MACH_EMIN  = 906,   /* min exponent before (gradual) underflow */
+   HPL_MACH_RMIN  = 907,        /* underflow threshold base**(emin-1) */
+   HPL_MACH_EMAX  = 908,          /* largest exponent before overflow */
+   HPL_MACH_RMAX  = 909  /* overflow threshold - (base**emax)*(1-eps) */
+ 
+} HPL_T_MACH;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_fprintf
+STDC_ARGS( (
+   FILE *,
+   const char *,
+   ...
+) );
+void                             HPL_warn
+STDC_ARGS( (
+   FILE *,
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_abort
+STDC_ARGS( (
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_dlacpy
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dlatcpy
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dlaprnt
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int,
+   const char *
+) );
+double                           HPL_dlange
+STDC_ARGS( (
+   const HPL_T_NORM,
+   const int,
+   const int,
+   const double *,
+   const int
+) );
+double                           HPL_dlamch
+STDC_ARGS( (
+   const HPL_T_MACH
+) );
+
+#endif
+/*
+ * End of hpl_auxil.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_blas.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_blas.h
new file mode 100644
index 000000000..2a510471a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_blas.h
@@ -0,0 +1,630 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_BLAS_H
+#define HPL_BLAS_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+
+
+/*
+ * ---------------------------------------------------------------------
+ * typedef definitions
+ * ---------------------------------------------------------------------
+ */
+enum HPL_ORDER
+{  HplRowMajor = 101,  HplColumnMajor  = 102 };
+enum HPL_TRANS
+{  HplNoTrans  = 111,  HplTrans        = 112,  HplConjTrans    = 113 };
+enum HPL_UPLO
+{  HplUpper    = 121,  HplLower        = 122 };
+enum HPL_DIAG
+{  HplNonUnit  = 131,  HplUnit         = 132 };
+enum HPL_SIDE
+{  HplLeft     = 141,  HplRight        = 142 }; 
+
+
+#ifdef HPL_CALL_CBLAS
+
+
+/*
+ * ---------------------------------------------------------------------
+ * The C interface of the BLAS is available ...
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    CBLAS_INDEX         int
+ 
+#define    CBLAS_ORDER         HPL_ORDER
+#define    CblasRowMajor       HplRowMajor
+#define    CblasColMajor       HplColMajor
+ 
+#define    CBLAS_TRANSPOSE     HPL_TRANS
+#define    CblasNoTrans        HplNoTrans
+#define    CblasTrans          HplTrans
+#define    CblasConjTrans      HplConjTrans
+ 
+#define    CBLAS_UPLO          HPL_UPLO
+#define    CblasUpper          HplUpper
+#define    CblasLower          HplLower
+ 
+#define    CBLAS_DIAG          HPL_DIAG
+#define    CblasNonUnit        HplNonUnit
+#define    CblasUnit           HplUnit
+ 
+#define    CBLAS_SIDE          HPL_SIDE
+#define    CblasLeft           HplLeft
+#define    CblasRight          HplRight
+/*
+ * ---------------------------------------------------------------------
+ * CBLAS Function prototypes
+ * ---------------------------------------------------------------------
+ */
+CBLAS_INDEX       cblas_idamax
+STDC_ARGS(
+(  const int,       const double *,  const int ) );
+void              cblas_dswap
+STDC_ARGS(
+(  const int,       double *,        const int,       double *,
+   const int ) );
+void              cblas_dcopy
+STDC_ARGS(
+(  const int,       const double *,  const int,       double *,
+   const int ) );
+void              cblas_daxpy
+STDC_ARGS(
+(  const int,       const double,    const double *,  const int,
+   double *,        const int ) );
+void              cblas_dscal
+STDC_ARGS(
+(  const int,       const double,    double *,        const int ) );
+
+void              cblas_dgemv
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const int,       const int,       const double,    const double *,
+   const int,       const double *,  const int,       const double,
+   double *,        const int ) );
+
+void              cblas_dger
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const int,       const int,
+   const double,    const double *,  const int,       const double *,
+   const int,       double *,        const int ) );
+void              cblas_dtrsv
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_UPLO,
+   const enum CBLAS_TRANSPOSE,       const enum CBLAS_DIAG,
+   const int,       const double *,  const int,       double *,
+   const int ) );
+
+void              cblas_dgemm
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_TRANSPOSE,       const int,       const int,
+   const int,       const double,    const double *,  const int,
+   const double *,  const int,       const double,    double *,
+   const int ) );
+
+void              cblas_dtrsm
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_SIDE,
+   const enum CBLAS_UPLO,            const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_DIAG,            const int,       const int,
+   const double,    const double *,  const int,       double *,
+   const int ) );
+void             dpcpp_dgemm 
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_TRANSPOSE,       const int,       const int,
+   const int,       const double,    const double *,  const int,
+   const double *,  const int,       const double,    double *,
+   const int ) );
+
+void             dpcpp_dtrsm 
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_SIDE,
+   const enum CBLAS_UPLO,            const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_DIAG,            const int,       const int,
+   const double,    const double *,  const int,       double *,
+   const int ) );
+/*
+ * ---------------------------------------------------------------------
+ * HPL C BLAS macro definition
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_dswap           cblas_dswap
+#define    HPL_dcopy           cblas_dcopy
+#define    HPL_daxpy           cblas_daxpy
+#define    HPL_dscal           cblas_dscal
+#define    HPL_idamax          cblas_idamax
+
+#define    HPL_dgemv           cblas_dgemv
+#define    HPL_dtrsv           cblas_dtrsv
+#define    HPL_dger            cblas_dger
+
+//#define    HPL_dgemm           cblas_dgemm
+//#define    HPL_dtrsm           cblas_dtrsm
+#define    HPL_dgemm           dpcpp_dgemm
+#define    HPL_dtrsm           dpcpp_dtrsm  
+
+#endif
+
+//#define    HPL_hello           sss_gemm 
+
+#ifdef HPL_CALL_FBLAS
+/*
+ * ---------------------------------------------------------------------
+ * Use the Fortran 77 interface of the BLAS ...
+ * ---------------------------------------------------------------------
+ * Defaults: Add_, F77_INTEGER=int, StringSunStyle
+ * ---------------------------------------------------------------------
+ */
+#ifndef NoChange
+#ifndef UpCase
+#ifndef Add__
+#ifndef Add_
+
+#define Add_
+
+#endif
+#endif
+#endif
+#endif
+
+#ifndef F77_INTEGER
+#define    F77_INTEGER         int
+#else
+#define    HPL_USE_F77_INTEGER_DEF
+#endif
+
+#ifndef StringCrayStyle
+#ifndef StringStructVal
+#ifndef StringStructPtr
+#ifndef StringSunStyle
+
+#define StringSunStyle
+
+#endif
+#endif
+#endif
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Fortran 77 <-> C interface
+ * ---------------------------------------------------------------------
+ *
+ * These macros identifies how Fortran routines will be called.
+ *
+ * Add_     : the Fortran compiler expects the name of C functions to be
+ * in all lower case and to have an underscore postfixed it (Suns, Intel
+ * compilers expect this).
+ *
+ * NoChange : the Fortran compiler expects the name of C functions to be
+ * in all lower case (IBM RS6K compilers do this).
+ *
+ * UpCase   : the Fortran compiler expects the name of C functions to be
+ * in all upcase. (Cray compilers expect this).
+ *
+ * Add__    : the Fortran compiler in use is f2c, a Fortran to C conver-
+ * ter.
+ */
+#ifdef NoChange
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm(...)
+ */
+#define    F77dswap               dswap
+#define    F77dscal               dscal
+#define    F77dcopy               dcopy
+#define    F77daxpy               daxpy
+#define    F77idamax              idamax
+
+#define    F77dgemv               dgemv
+#define    F77dtrsv               dtrsv
+#define    F77dger                dger
+
+#define    F77dgemm               dgemm
+#define    F77dtrsm               dtrsm
+
+#endif
+
+#ifdef UpCase
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          DGEMM(...)
+ */
+#ifdef CRAY_BLAS
+                                                                                
+#define    F77dswap               SSWAP
+#define    F77dscal               SSCAL
+#define    F77dcopy               SCOPY
+#define    F77daxpy               SAXPY
+#define    F77idamax              ISAMAX
+                                                                                
+#define    F77dgemv               SGEMV
+#define    F77dtrsv               STRSV
+#define    F77dger                SGER
+                                                                                
+#define    F77dgemm               SGEMM
+#define    F77dtrsm               STRSM
+                                                                                
+#else
+
+#define    F77dswap               DSWAP
+#define    F77dscal               DSCAL
+#define    F77dcopy               DCOPY
+#define    F77daxpy               DAXPY
+#define    F77idamax              IDAMAX
+
+#define    F77dgemv               DGEMV
+#define    F77dtrsv               DTRSV
+#define    F77dger                DGER
+
+#define    F77dgemm               DGEMM
+#define    F77dtrsm               DTRSM
+
+#endif
+
+#endif
+
+#ifdef Add_
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine  with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm_(...)
+ */
+#define    F77dswap               dswap_
+#define    F77dscal               dscal_
+#define    F77dcopy               dcopy_
+#define    F77daxpy               daxpy_
+#define    F77idamax              idamax_
+
+#define    F77dgemv               dgemv_
+#define    F77dtrsv               dtrsv_
+#define    F77dger                dger_
+
+#define    F77dgemm               dgemm_
+#define    F77dtrsm               dtrsm_
+
+#endif
+
+#ifdef Add__
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine  with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm_(...)
+ */
+#define    F77dswap               dswap_
+#define    F77dscal               dscal_
+#define    F77dcopy               dcopy_
+#define    F77daxpy               daxpy_
+#define    F77idamax              idamax_
+ 
+#define    F77dgemv               dgemv_
+#define    F77dtrsv               dtrsv_
+#define    F77dger                dger_
+ 
+#define    F77dgemm               dgemm_
+#define    F77dtrsm               dtrsm_
+//#define    F77hello               sss_gemm
+ 
+#endif
+//#define    F77hello               sss_gemm
+/*
+ * ---------------------------------------------------------------------
+ * Typedef definitions and conversion utilities
+ * ---------------------------------------------------------------------
+ */
+#ifdef StringCrayStyle
+
+#include <fortran.h>
+                      /* Type of character argument in a FORTRAN call */
+#define    F77_CHAR            _fcd
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(_fcdtocp(c) ))
+#define    HPL_C2F_CHAR(c)     (_cptofcd(&(c), 1))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringStructVal
+                      /* Type of character argument in a FORTRAN call */
+typedef struct { char *cp; F77_INTEGER len; } F77_CHAR;
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c.cp))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringStructPtr
+                      /* Type of character argument in a FORTRAN call */
+typedef struct { char *cp; F77_INTEGER len; } F77_CHAR;
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c->cp))
+
+#define    F77_CHAR_DECL       F77_CHAR *        /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringSunStyle
+                      /* Type of character argument in a FORTRAN call */
+#define    F77_CHAR            char *
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c))
+#define    HPL_C2F_CHAR(c)     (&(c))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+#define    F77_1_CHAR          , F77_INTEGER
+#define    F77_2_CHAR          F77_1_CHAR F77_1_CHAR
+#define    F77_3_CHAR          F77_2_CHAR F77_1_CHAR
+#define    F77_4_CHAR          F77_3_CHAR F77_1_CHAR
+
+#endif
+/* ------------------------------------------------------------------ */
+
+#ifndef F77_1_CHAR
+#define    F77_1_CHAR
+#define    F77_2_CHAR
+#define    F77_3_CHAR
+#define    F77_4_CHAR
+#endif
+
+#define    F77_INT_DECL        const F77_INTEGER *   /* input integer */
+#define    F77_SIN_DECL        const double *         /* input scalar */
+#define    F77_VIN_DECL        const double *         /* input vector */
+#define    F77_VINOUT_DECL     double *        /* input/output matrix */
+#define    F77_MIN_DECL        const double *         /* input matrix */
+#define    F77_MINOUT_DECL     double *        /* input/output matrix */
+ 
+#ifdef CRAY_PVP_ENV                      /* Type of FORTRAN functions */
+#define    F77_VOID_FUN        extern fortran void      /* subroutine */
+#define    F77_INT_FUN         extern fortran int /* integer function */
+#else
+#define    F77_VOID_FUN        extern void              /* subroutine */
+#define    F77_INT_FUN         extern int         /* integer function */
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Fortran 77 BLAS function prototypes
+ * ---------------------------------------------------------------------
+ */
+F77_VOID_FUN    F77dswap
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VINOUT_DECL, F77_INT_DECL,    F77_VINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77dscal
+STDC_ARGS(
+(  F77_INT_DECL,    F77_SIN_DECL,    F77_VINOUT_DECL, F77_INT_DECL ) );
+F77_VOID_FUN    F77dcopy
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,    F77_VINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77daxpy
+STDC_ARGS(
+(  F77_INT_DECL,    F77_SIN_DECL,    F77_VIN_DECL,    F77_INT_DECL,
+   F77_VINOUT_DECL, F77_INT_DECL ) );
+F77_INT_FUN     F77idamax
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL ) );
+
+F77_VOID_FUN    F77dgemv
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,
+   F77_SIN_DECL,    F77_VINOUT_DECL, F77_INT_DECL     F77_1_CHAR ) );
+F77_VOID_FUN    F77dger
+STDC_ARGS(
+(  F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_VIN_DECL,
+   F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,    F77_MINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77dtrsv
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,   F77_INT_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_VINOUT_DECL, F77_INT_DECL
+   F77_3_CHAR ) );
+
+F77_VOID_FUN    F77dgemm
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_INT_DECL,    F77_INT_DECL,
+   F77_INT_DECL,    F77_SIN_DECL,    F77_MIN_DECL,    F77_INT_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_MINOUT_DECL,
+   F77_INT_DECL     F77_2_CHAR ) );
+F77_VOID_FUN    F77dtrsm
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,
+   F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_MIN_DECL,
+   F77_INT_DECL,    F77_MINOUT_DECL, F77_INT_DECL     F77_4_CHAR ) );
+
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * HPL BLAS Function prototypes
+ * ---------------------------------------------------------------------
+ */
+#ifndef HPL_CALL_CBLAS
+
+int                              HPL_idamax
+STDC_ARGS( (
+   const int,
+   const double *,
+   const int
+) );
+void                             HPL_daxpy
+STDC_ARGS( (
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dcopy
+STDC_ARGS( (
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dscal
+STDC_ARGS( (
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_dswap
+STDC_ARGS( (
+   const int,
+   double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dgemv
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_TRANS,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   const double *,
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_dger
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dtrsv
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_UPLO,
+   const enum HPL_TRANS,
+   const enum HPL_DIAG,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dgemm
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_TRANS,
+   const enum HPL_TRANS,
+   const int,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   const double *,
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_hello
+STDC_ARGS( (
+) );
+#endif
+void                             HPL_dtrsm
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_SIDE,
+   const enum HPL_UPLO,
+   const enum HPL_TRANS,
+   const enum HPL_DIAG,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+
+//#endif
+
+#endif
+/*
+ * hpl_blas.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_comm.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_comm.h
new file mode 100644
index 000000000..e3ba51a57
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_comm.h
@@ -0,0 +1,161 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_COMM_H
+#define HPL_COMM_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+#include "hpl_panel.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_1RING         = 401,                        /* Increasing ring */
+   HPL_1RING_M       = 402,             /* Increasing ring (modified) */
+   HPL_2RING         = 403,                      /* Increasing 2-ring */
+   HPL_2RING_M       = 404,           /* Increasing 2-ring (modified) */
+   HPL_BLONG         = 405,                         /* long broadcast */
+   HPL_BLONG_M       = 406               /* long broadcast (modified) */
+} HPL_T_TOP;
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_FAILURE            0
+#define    HPL_SUCCESS            1
+#define    HPL_KEEP_TESTING       2
+/*
+ * ---------------------------------------------------------------------
+ * comm function prototypes
+ * ---------------------------------------------------------------------
+ */
+int                              HPL_send
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_recv
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_sdrv
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_binit
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+int                              HPL_bcast
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *
+) );
+int                              HPL_bwait
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+int                              HPL_packL
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int
+) );
+void                             HPL_copyL
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+ 
+int HPL_binit_1ring STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_1ring STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_1ring STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_1rinM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_1rinM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_1rinM STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_2ring STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_2ring STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_2ring STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_2rinM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_2rinM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_2rinM STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_blong STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_blong STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_blong STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_blonM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_blonM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_blonM STDC_ARGS( ( HPL_T_panel *        ) );
+
+#endif
+/*
+ * End of hpl_comm.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_gesv.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_gesv.h
new file mode 100644
index 000000000..ce671cf2b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_gesv.h
@@ -0,0 +1,87 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_GESV_H
+#define HPL_GESV_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_LEFT_LOOKING  = 301,           /* Left looking lu fact variant */
+   HPL_CROUT         = 302,                  /* Crout lu fact variant */
+   HPL_RIGHT_LOOKING = 303           /* Right looking lu fact variant */
+} HPL_T_FACT;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void              HPL_dgesv
+STDC_ARGS(
+(  const int,       const int,       const int,       const HPL_T_FACT,
+   const HPL_T_FACT,                 const int,       double *,
+   const int,       int * ) );
+void              HPL_ipid
+STDC_ARGS(
+(  const int,       double *,        int *,           int *,
+   int *,           int *,           int *,           int *,
+   const int,       const int,       const int,       const int,
+   const int ) );
+
+#endif
+/*
+ * End of hpl_gesv.h
+ */ 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_grid.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_grid.h
new file mode 100644
index 000000000..1895a5ed4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_grid.h
@@ -0,0 +1,212 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_GRID_H
+#define HPL_GRID_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum { HPL_INT       = 100, HPL_DOUBLE       = 101 } HPL_T_TYPE;
+ 
+typedef enum
+{
+   HPL_ROW_MAJOR     = 201,
+   HPL_COLUMN_MAJOR  = 202
+} HPL_T_ORDER;
+
+typedef struct HPL_S_grid
+{
+   MPI_Comm        all_comm;                     /* grid communicator */
+   MPI_Comm        row_comm;                      /* row communicator */
+   MPI_Comm        col_comm;                   /* column communicator */
+   HPL_T_ORDER     order;        /* ordering of the procs in the grid */
+   int             iam;                        /* my rank in the grid */
+   int             myrow;                /* my row number in the grid */
+   int             mycol;             /* my column number in the grid */
+   int             nprow;          /* the total # of rows in the grid */
+   int             npcol;       /* the total # of columns in the grid */
+   int             nprocs;        /* the total # of procs in the grid */
+   int             row_ip2;          /* largest power of two <= nprow */
+   int             row_hdim;     /* row_ip2 procs hypercube dimension */
+   int             row_ip2m1;      /* largest power of two <= nprow-1 */
+   int             row_mask;        /* row_ip2m1 procs hypercube mask */
+   int             col_ip2;          /* largest power of two <= npcol */
+   int             col_hdim;     /* col_ip2 procs hypercube dimension */
+   int             col_ip2m1;      /* largest power of two <= npcol-1 */
+   int             col_mask;        /* col_ip2m1 procs hypercube mask */
+} HPL_T_grid;
+
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef void (*HPL_T_OP)
+(  const int,       const void *,    void *,          const HPL_T_TYPE );
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_2_MPI_TYPE( typ ) \
+                           ( ( typ == HPL_INT ? MPI_INT : MPI_DOUBLE ) )
+/*
+ * The following macros perform common modulo operations;  All functions
+ * except MPosMod assume arguments are < d (i.e., arguments are themsel-
+ * ves within modulo range).
+ */
+                                                /* increment with mod */
+#define    MModInc(I, d)       if(++(I) == (d)) (I) = 0
+                                                /* decrement with mod */
+#define    MModDec(I, d)       if(--(I) == -1) (I) = (d)-1
+                                                   /* positive modulo */
+#define    MPosMod(I, d)       ( (I) - ((I)/(d))*(d) )
+                                                   /* add two numbers */
+#define    MModAdd(I1, I2, d) \
+           ( ( (I1) + (I2) < (d) ) ? (I1) + (I2) : (I1) + (I2) - (d) )
+                                                        /* add 1 to # */
+#define    MModAdd1(I, d) ( ((I) != (d)-1) ? (I) + 1 : 0 )
+                                              /* subtract two numbers */
+#define    MModSub(I1, I2, d) \
+           ( ( (I1) < (I2) ) ? (d) + (I1) - (I2) : (I1) - (I2) )
+                                                      /* sub 1 from # */
+#define    MModSub1(I, d) ( ((I)!=0) ? (I)-1 : (d)-1 )
+/*
+ * ---------------------------------------------------------------------
+ * grid function prototypes
+ * ---------------------------------------------------------------------
+ */
+int                              HPL_grid_init
+STDC_ARGS( (
+   MPI_Comm,
+   const HPL_T_ORDER,
+   const int,
+   const int,
+   HPL_T_grid *
+) );
+int                              HPL_grid_exit
+STDC_ARGS( (
+   HPL_T_grid *
+) );
+
+int                              HPL_grid_info
+STDC_ARGS( (
+   const HPL_T_grid *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+int                              HPL_pnum
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int
+) );
+
+int                              HPL_barrier
+STDC_ARGS( (
+   MPI_Comm
+) );
+int                              HPL_broadcast
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const int,
+   MPI_Comm
+) );
+int                              HPL_reduce
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const HPL_T_OP ,
+   const int,
+   MPI_Comm
+) );
+int                              HPL_all_reduce
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const HPL_T_OP ,
+   MPI_Comm
+) );
+
+void                             HPL_max
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+void                             HPL_min
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+void                             HPL_sum
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+
+#endif
+/*
+ * End of hpl_grid.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_matgen.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_matgen.h
new file mode 100644
index 000000000..de6503eea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_matgen.h
@@ -0,0 +1,120 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_MATGEN_H
+#define HPL_MATGEN_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_MULT0         1284865837
+#define    HPL_MULT1         1481765933
+#define    HPL_IADD0         1
+#define    HPL_IADD1         0
+#define    HPL_DIVFAC        2147483648.0
+#define    HPL_POW16         65536.0
+#define    HPL_HALF          0.5
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_dmatgen
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int
+) );
+void                             HPL_lmul
+STDC_ARGS( (
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_ladd
+STDC_ARGS( (
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_xjumpm
+STDC_ARGS( (
+   const int,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_setran
+STDC_ARGS( (
+   const int,
+   int *
+) );
+void                             HPL_jumpit
+STDC_ARGS( (
+   int *,
+   int *,
+   int *,
+   int *
+) );
+double                           HPL_rand STDC_ARGS( ( void ) );
+
+#endif
+/*
+ * End of hpl_matgen.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_misc.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_misc.h
new file mode 100644
index 000000000..ea421a403
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_misc.h
@@ -0,0 +1,110 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_MISC_H
+#define HPL_MISC_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#ifdef __STDC__
+#define STDC_HEADERS
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#ifdef STDC_HEADERS
+#include <stdarg.h>
+#define STDC_ARGS(p)           p
+#else
+#include <varargs.h>
+#define STDC_ARGS(p)           ()
+#endif
+
+#ifdef HPL_CALL_VSIPL
+#include <vsip.h>
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_rone             1.0
+#define    HPL_rtwo             2.0
+#define    HPL_rzero            0.0
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    Mabs( a_ )          ( ( (a_) <   0  ) ? -(a_) : (a_) )
+#define    Mmin( a_, b_ )      ( ( (a_) < (b_) ) ?  (a_) : (b_) )
+#define    Mmax( a_, b_ )      ( ( (a_) > (b_) ) ?  (a_) : (b_) )
+
+#define    Mfloor(a,b) (((a)>0) ? (((a)/(b))) : (-(((-(a))+(b)-1)/(b))))
+#define    Mceil(a,b)           ( ( (a)+(b)-1 ) / (b) )
+#define    Miceil(a,b) (((a)>0) ? ((((a)+(b)-1)/(b))) : (-((-(a))/(b))))
+
+#define    Mupcase(C)          (((C)>96 && (C)<123) ? (C) & 0xDF : (C))
+#define    Mlowcase(C)         (((C)>64 && (C)< 91) ? (C) | 32   : (C))
+/*
+ * Mptr returns a pointer to a_( i_, j_ ) for readability reasons and
+ * also less silly errors ...
+ */
+#define    Mptr( a_, i_, j_, lda_ ) \
+   ( (a_) + (size_t)(i_) + (size_t)(j_)*(size_t)(lda_) )
+/*
+ * Align pointer
+ */
+#define    HPL_PTR( ptr_, al_ ) \
+                      ( ( ( (size_t)(ptr_)+(al_)-1 ) / (al_) ) * (al_) ) 
+#endif
+/*
+ * End of hpl_misc.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_panel.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_panel.h
new file mode 100644
index 000000000..d5ba2939c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_panel.h
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PANEL_H
+#define HPL_PANEL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef struct HPL_S_panel
+{
+   struct HPL_S_grid   * grid;             /* ptr to the process grid */
+   struct HPL_S_palg   * algo;          /* ptr to the algo parameters */
+   struct HPL_S_pmat   * pmat;         /* ptr to the local array info */
+   double              * A;              /* ptr to trailing part of A */
+   double              * WORK;                          /* work space */
+   double              * L2;                              /* ptr to L */
+   double              * L1;       /* ptr to jb x jb upper block of A */
+   double              * DPIV;    /* ptr to replicated jb pivot array */
+   double              * DINFO;      /* ptr to replicated scalar info */
+   double              * U;                               /* ptr to U */
+   int                 * IWORK;     /* integer workspace for swapping */
+   void                * * * buffers[2];   /* buffers for panel bcast */
+   int                 counts [2];          /* counts for panel bcast */
+   MPI_Datatype        dtypes [2];      /* data types for panel bcast */
+   MPI_Request         request[1];        /* requests for panel bcast */
+   MPI_Status          status [1];          /* status for panel bcast */
+   int                 nb;            /* distribution blocking factor */
+   int                 jb;                             /* panel width */
+   int                 m;   /* global # of rows of trailing part of A */
+   int                 n;   /* global # of cols of trailing part of A */
+   int                 ia;  /* global row index of trailing part of A */
+   int                 ja;  /* global col index of trailing part of A */
+   int                 mp;   /* local # of rows of trailing part of A */
+   int                 nq;   /* local # of cols of trailing part of A */
+   int                 ii;   /* local row index of trailing part of A */
+   int                 jj;   /* local col index of trailing part of A */
+   int                 lda;           /* local leading dim of array A */
+   int                 prow;  /* proc. row owning 1st row of trail. A */
+   int                 pcol;  /* proc. col owning 1st col of trail. A */
+   int                 msgid;           /* message id for panel bcast */
+   int                 ldl2;         /* local leading dim of array L2 */
+   int                 len;      /* length of the buffer to broadcast */
+#ifdef HPL_CALL_VSIPL
+   vsip_block_d        * Ablock;                           /* A block */
+   vsip_block_d        * L1block;                         /* L1 block */
+   vsip_block_d        * L2block;                         /* L2 block */
+   vsip_block_d        * Ublock;                           /* U block */
+#endif
+} HPL_T_panel;
+
+/*
+ * ---------------------------------------------------------------------
+ * panel function prototypes
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pgesv.h"
+
+void                             HPL_pdpanel_new
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int,
+   const int,
+   HPL_T_pmat *,
+   const int,
+   const int,
+   const int,
+   HPL_T_panel * *
+) );
+void                             HPL_pdpanel_init
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int,
+   const int,
+   HPL_T_pmat *,
+   const int,
+   const int,
+   const int,
+   HPL_T_panel *
+) );
+int                              HPL_pdpanel_disp
+STDC_ARGS( (
+   HPL_T_panel * *
+) );
+int                              HPL_pdpanel_free
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+
+#endif
+/*
+ * End of hpl_panel.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pauxil.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pauxil.h
new file mode 100644
index 000000000..1fd0ee457
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pauxil.h
@@ -0,0 +1,505 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PAUXIL_H
+#define HPL_PAUXIL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+/*
+ * Mindxg2p  returns the process coodinate owning the entry globally in-
+ * dexed by ig_.
+ */
+#define    Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ )            \
+           {                                                           \
+              if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) &&            \
+                  ( (nprocs_) > 1 ) )                                  \
+              {                                                        \
+                 proc_  = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_);       \
+                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 proc_ = (src_);                                       \
+              }                                                        \
+           }
+
+#define    Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ )       \
+           {                                                           \
+              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
+                  ( (nprocs_) == 1 ) ) { il_ = (ig_); }                \
+              else                                                     \
+              {                                                        \
+                 int i__, j__;                                         \
+                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
+                 il_ = (nb_)*( j__ - i__ ) +                           \
+                       ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?        \
+                         (ig_) - (inb_) : (ig_) );                     \
+              }                                                        \
+           }
+
+#define    Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ )      \
+           {                                                           \
+              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
+                  ( (nprocs_) == 1 ) )                                 \
+              { il_ = (ig_); proc_ = (src_); }                         \
+              else                                                     \
+              {                                                        \
+                 int i__, j__;                                         \
+                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
+                 il_ = (nb_)*(j__-i__) +                               \
+                       ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?       \
+                         (ig_) - (inb_) : (ig_) );                     \
+                 proc_  = (src_) + 1 + i__;                            \
+                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
+              }                                                        \
+           }
+/*
+ * Mindxl2g computes the global index ig_ corresponding to the local
+ * index il_ in process proc_.
+ */
+#define    Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ )       \
+           {                                                           \
+              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
+              {                                                        \
+                 if( (proc_) == (src_) )                               \
+                 {                                                     \
+                    if( (il_) < (inb_) ) ig_ = (il_);                  \
+                    else                 ig_ = (il_) +                 \
+                       (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \
+                 }                                                     \
+                 else if( (proc_) < (src_) )                           \
+                 {                                                     \
+                    ig_ = (il_) + (inb_) +                             \
+                          (nb_)*(  ((nprocs_)-1)*((il_)/(nb_)) +       \
+                                   (proc_)-(src_)-1+(nprocs_) );       \
+                 }                                                     \
+                 else                                                  \
+                 {                                                     \
+                    ig_ =  (il_) + (inb_) +                            \
+                           (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) +       \
+                           (proc_)-(src_)-1 );                         \
+                 }                                                     \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 ig_ = (il_);                                          \
+              }                                                        \
+           }
+/*
+ * MnumrocI computes the # of local indexes  np_ residing in the process
+ * of coordinate  proc_  corresponding to the interval of global indexes
+ * i_:i_+n_-1  assuming  that the global index 0 resides in  the process
+ * src_,  and that the indexes are distributed from src_ using the para-
+ * meters inb_, nb_ and nprocs_.
+ */
+#define    MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ )    \
+           {                                                           \
+              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
+              {                                                        \
+                 int inb__, mydist__, n__, nblk__, quot__, src__;      \
+                 if( ( inb__ = (inb_) - (i_) ) <= 0 )                  \
+                 {                                                     \
+                    nblk__ = (-inb__) / (nb_) + 1;                     \
+                    src__  = (src_) + nblk__;                          \
+                    src__ -= ( src__ / (nprocs_) ) * (nprocs_);        \
+                    inb__ += nblk__*(nb_);                             \
+                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
+                    {                                                  \
+                       if( (proc_) == src__ ) np_ = (n_);              \
+                       else                   np_ = 0;                 \
+                    }                                                  \
+                    else                                               \
+                    {                                                  \
+                       if( ( mydist__ = (proc_) - src__ ) < 0 )        \
+                          mydist__ += (nprocs_);                       \
+                       nblk__    = n__ / (nb_) + 1;                    \
+                       mydist__ -= nblk__ -                            \
+                          (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \
+                       if( mydist__ < 0 )                              \
+                       {                                               \
+                          if( (proc_) != src__ )                       \
+                             np_ = (nb_) + (nb_) * quot__;             \
+                          else                                         \
+                             np_ = inb__ + (nb_) * quot__;             \
+                       }                                               \
+                       else if( mydist__ > 0 )                         \
+                       {                                               \
+                          np_ = (nb_) * quot__;                        \
+                       }                                               \
+                       else                                            \
+                       {                                               \
+                          if( (proc_) != src__ )                       \
+                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
+                          else                                         \
+                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
+                       }                                               \
+                    }                                                  \
+                 }                                                     \
+                 else                                                  \
+                 {                                                     \
+                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
+                    {                                                  \
+                       if( (proc_) == (src_) ) np_ = (n_);             \
+                       else                    np_ = 0;                \
+                    }                                                  \
+                    else                                               \
+                    {                                                  \
+                       if( ( mydist__ = (proc_) - (src_) ) < 0 )       \
+                          mydist__ += (nprocs_);                       \
+                       nblk__    = n__ / (nb_) + 1;                    \
+                       mydist__ -= nblk__ -                            \
+                          ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \
+                       if( mydist__ < 0 )                              \
+                       {                                               \
+                          if( (proc_) != (src_) )                      \
+                             np_ = (nb_) + (nb_) * quot__;             \
+                          else                                         \
+                             np_ = inb__ + (nb_) * quot__;             \
+                       }                                               \
+                       else if( mydist__ > 0 )                         \
+                       {                                               \
+                          np_ = (nb_) * quot__;                        \
+                       }                                               \
+                       else                                            \
+                       {                                               \
+                          if( (proc_) != (src_) )                      \
+                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
+                          else                                         \
+                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
+                       }                                               \
+                    }                                                  \
+                 }                                                     \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 np_ = (n_);                                           \
+              }                                                        \
+           }
+
+#define    Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ )         \
+           MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ )
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_indxg2lp
+STDC_ARGS( (
+   int *,
+   int *,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxg2l
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxg2p
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxl2g
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+void                             HPL_infog2l
+STDC_ARGS( (
+   int,
+   int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+int                              HPL_numroc
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_numrocI
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+
+void                             HPL_dlaswp00N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp10N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp01N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp01T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp02N
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp03N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int
+) );
+void                             HPL_dlaswp03T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int
+) );
+void                             HPL_dlaswp04N
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp04T
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp05N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp05T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp06N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp06T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+
+void                             HPL_pabort
+STDC_ARGS( (
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_pwarn
+STDC_ARGS( (
+   FILE *,
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_pdlaprnt
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int,
+   const char *
+) );
+double                           HPL_pdlamch
+STDC_ARGS( (
+   MPI_Comm,
+   const HPL_T_MACH
+) );
+double                           HPL_pdlange
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const HPL_T_NORM,
+   const int,
+   const int,
+   const int,
+   const double *,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_pauxil.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pfact.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pfact.h
new file mode 100644
index 000000000..09eee79ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pfact.h
@@ -0,0 +1,216 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PFACT_H
+#define HPL_PFACT_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef void (*HPL_T_PFA_FUN)
+(  HPL_T_panel *,   const int,       const int,       const int,
+   double * );
+typedef void (*HPL_T_RFA_FUN)
+(  HPL_T_panel *,   const int,       const int,       const int,
+   double * );
+typedef void (*HPL_T_UPD_FUN)
+(  HPL_T_panel *,   int *,           HPL_T_panel *,   const int ); 
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_dlocmax
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_dlocswpN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_dlocswpT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdmxswp
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdpancrN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpancrT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanllN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanllT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanrlN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanrlT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdrpancrN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpancrT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanllN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanllT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanrlN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanrlT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdfact
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+ 
+#endif
+/*
+ * End of hpl_pfact.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pgesv.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pgesv.h
new file mode 100644
index 000000000..3ca576c68
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pgesv.h
@@ -0,0 +1,346 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PGESV_H
+#define HPL_PGESV_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+#include "hpl_comm.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pfact.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_SWAP00        = 451,                      /* Use HPL_pdlaswp00 */
+   HPL_SWAP01        = 452,                      /* Use HPL_pdlaswp01 */
+   HPL_SW_MIX        = 453, /* Use HPL_pdlaswp00_ for small number of */
+                            /* columns, and HPL_pdlaswp01_ otherwise. */
+   HPL_NO_SWP        = 499
+} HPL_T_SWAP;
+
+typedef struct HPL_S_palg
+{
+   HPL_T_TOP           btopo;               /* row broadcast topology */
+   int                 depth;                     /* look-ahead depth */
+   int                 nbdiv;            /* recursive division factor */
+   int                 nbmin;         /* recursion stopping criterium */
+   HPL_T_FACT          pfact;                   /* panel fact variant */
+   HPL_T_FACT          rfact;               /* recursive fact variant */
+   HPL_T_PFA_FUN       pffun;              /* panel fact function ptr */
+   HPL_T_RFA_FUN       rffun;          /* recursive fact function ptr */
+   HPL_T_UPD_FUN       upfun;                      /* update function */
+   HPL_T_SWAP          fswap;                   /* Swapping algorithm */
+   int                 fsthr;                   /* Swapping threshold */
+   int                 equil;                        /* Equilibration */
+   int                 align;              /* data alignment constant */
+} HPL_T_palg;
+
+typedef struct HPL_S_pmat
+{
+#ifdef HPL_CALL_VSIPL
+   vsip_block_d        * block;
+#endif
+   double              * A;            /* pointer to local piece of A */
+   double              * X;             /* pointer to solution vector */
+   int                 n;                      /* global problem size */
+   int                 nb;                         /* blocking factor */
+   int                 ld;                 /* local leading dimension */
+   int                 mp;                    /* local number of rows */
+   int                 nq;                 /* local number of columns */
+   int                 info;                    /* computational flag */
+} HPL_T_pmat;
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    MSGID_BEGIN_PFACT   1001              /* message id ranges */
+#define    MSGID_END_PFACT     2000
+#define    MSGID_BEGIN_FACT    2001
+#define    MSGID_END_FACT      3000
+#define    MSGID_BEGIN_PTRSV   3001
+#define    MSGID_END_PTRSV     4000
+ 
+#define    MSGID_BEGIN_COLL    9001
+#define    MSGID_END_COLL     10000
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    MNxtMgid( id_, beg_, end_ ) \
+                             (( (id_)+1 > (end_) ?  (beg_) : (id_)+1 ))
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pipid
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   int *
+) );
+void                             HPL_plindx0
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_pdlaswp00N
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdlaswp00T
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_perm
+STDC_ARGS( (
+   const int,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_logsort
+STDC_ARGS( (
+   const int,
+   const int,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_plindx10
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_plindx1
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_spreadN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_SIDE,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_spreadT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_SIDE,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_equil
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_TRANS,
+   const int,
+   double *,
+   const int,
+   int *,
+   const int *,
+   const int *,
+   int *
+) );
+void                             HPL_rollN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_rollT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_pdlaswp01N
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdlaswp01T
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_pdupdateNN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateNT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateTN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateTT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_pdgesv0
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesvK1
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesvK2
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesv
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+ 
+void                             HPL_pdtrsv
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_pmat *
+) );
+
+#endif
+/*
+ * End of hpl_pgesv.h
+ */ 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pmatgen.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pmatgen.h
new file mode 100644
index 000000000..1091b0f60
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pmatgen.h
@@ -0,0 +1,77 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PMATGEN_H
+#define HPL_PMATGEN_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_matgen.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pdmatgen
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_pmatgen.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pmisc.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pmisc.h
new file mode 100644
index 000000000..23550d47b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pmisc.h
@@ -0,0 +1,59 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PMISC_H
+#define HPL_PMISC_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "mpi.h"
+
+#endif
+/*
+ * End of hpl_pmisc.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_ptest.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_ptest.h
new file mode 100644
index 000000000..5777bd536
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_ptest.h
@@ -0,0 +1,151 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PTEST_H
+#define HPL_PTEST_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pgesv.h"
+
+#include "hpl_ptimer.h"
+#include "hpl_pmatgen.h"
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef struct HPL_S_test
+{
+   double              epsil;                      /* epsilon machine */
+   double              thrsh;                            /* threshold */
+   FILE *              outfp;       /* output stream (only in proc 0) */
+   int                 kfail;                    /* # of tests failed */
+   int                 kpass;                    /* # of tests passed */
+   int                 kskip;                   /* # of tests skipped */
+   int                 ktest;                /* total number of tests */
+} HPL_T_test;
+
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants for testing only
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_LINE_MAX         256
+#define    HPL_MAX_PARAM         20
+#define    HPL_ISEED            100
+/*
+ * ---------------------------------------------------------------------
+ * global timers for timing analysis only
+ * ---------------------------------------------------------------------
+ */
+#ifdef HPL_DETAILED_TIMING
+#define    HPL_TIMING_BEG        11 /* timer 0 reserved, used by main */
+#define    HPL_TIMING_N           6 /* number of timers defined below */
+#define    HPL_TIMING_RPFACT     11 /* starting from here, contiguous */
+#define    HPL_TIMING_PFACT      12
+#define    HPL_TIMING_MXSWP      13
+#define    HPL_TIMING_UPDATE     14
+#define    HPL_TIMING_LASWP      15
+#define    HPL_TIMING_PTRSV      16
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pdinfo
+STDC_ARGS( (
+   HPL_T_test *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_ORDER *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_FACT *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_FACT *,
+   int *,
+   HPL_T_TOP *,
+   int *,
+   int *,
+   HPL_T_SWAP *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_pdtest
+STDC_ARGS( (
+   HPL_T_test *,
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_ptest.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_ptimer.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_ptimer.h
new file mode 100644
index 000000000..43c8fe33a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_ptimer.h
@@ -0,0 +1,96 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PTIMER_H
+#define HPL_PTIMER_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_NPTIMER             64
+#define    HPL_PTIMER_STARTFLAG   5.0
+#define    HPL_PTIMER_ERROR      -1.0
+/*
+ * ---------------------------------------------------------------------
+ * type definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{  HPL_WALL_PTIME = 101, HPL_CPU_PTIME  = 102 } HPL_T_PTIME;
+
+typedef enum
+{ HPL_AMAX_PTIME  = 201, HPL_AMIN_PTIME = 202, HPL_SUM_PTIME  = 203 }
+HPL_T_PTIME_OP;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+double          HPL_ptimer_cputime   STDC_ARGS(     ( void      ) );
+double          HPL_ptimer_walltime  STDC_ARGS(     ( void      ) );
+
+void            HPL_ptimer           STDC_ARGS(     ( const int ) );
+void            HPL_ptimer_boot      STDC_ARGS(     ( void      ) );
+void            HPL_ptimer_combine
+STDC_ARGS(
+(  MPI_Comm comm,   const HPL_T_PTIME_OP,             const HPL_T_PTIME,
+   const int,       const int,       double * ) );
+void            HPL_ptimer_disable   STDC_ARGS(     ( void      ) );
+void            HPL_ptimer_enable    STDC_ARGS(     ( void      ) );
+double          HPL_ptimer_inquire
+STDC_ARGS(
+(  const HPL_T_PTIME,                const int ) );
+
+#endif
+/*
+ * End of hpl_ptimer.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_test.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_test.h
new file mode 100644
index 000000000..1eedc97e0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_test.h
@@ -0,0 +1,80 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_TEST_H
+#define HPL_TEST_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_matgen.h"
+#include "hpl_timer.h"
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void            HPL_dinfo
+STDC_ARGS(
+(  FILE * *,        int *,           int *,           int *,
+   HPL_T_FACT *,    int *,           int *,           int *, 
+   int *,           int *,           HPL_T_FACT *,    int *,
+   double *,        double * ) );
+void            HPL_dtest
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   HPL_T_FACT,      HPL_T_FACT,      const int,       const double,
+   const double,    int *,           int *,           int * ) );
+
+#endif
+/*
+ * End of hpl_test.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_timer.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_timer.h
new file mode 100644
index 000000000..4c91700ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_timer.h
@@ -0,0 +1,88 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_TIMER_H
+#define HPL_TIMER_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_NTIMER              64
+#define    HPL_TIMER_STARTFLAG    5.0
+#define    HPL_TIMER_ERROR       -1.0
+/*
+ * ---------------------------------------------------------------------
+ * type definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{  HPL_WALL_TIME = 101, HPL_CPU_TIME  = 102 } HPL_T_TIME;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+double          HPL_timer_cputime    STDC_ARGS(     ( void      ) );
+double          HPL_timer_walltime   STDC_ARGS(     ( void      ) );
+
+void            HPL_timer            STDC_ARGS(     ( const int ) );
+void            HPL_timer_boot       STDC_ARGS(     ( void      ) );
+void            HPL_timer_enable     STDC_ARGS(     ( void      ) );
+void            HPL_timer_disable    STDC_ARGS(     ( void      ) );
+double          HPL_timer_inquire
+STDC_ARGS(
+(  const HPL_T_TIME,                 const int ) );
+
+#endif
+/*
+ * End of hpl_timer.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_abort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_abort.c
new file mode 100644
index 000000000..bf0c5e727
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_abort.c
@@ -0,0 +1,129 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_abort
+(
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_abort( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_abort displays an error message on stderr and halts execution.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   LINE   = va_arg( argptr, int      );
+   SRNAME = va_arg( argptr, char *   );
+   FORM   = va_arg( argptr, char *   );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( stderr, "%s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR in function", SRNAME, cline );
+   else
+      HPL_fprintf( stderr, "%s %d %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR on line", LINE, "of function", SRNAME, cline );
+   exit( 0 );
+/*
+ * End of HPL_abort
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlacpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlacpy.c
new file mode 100644
index 000000000..ec71180eb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlacpy.c
@@ -0,0 +1,343 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factors
+ * #ifndef HPL_LACPY_M_DEPTH
+ * #define    HPL_LACPY_M_DEPTH       32
+ * #define    HPL_LACPY_LOG2_M_DEPTH   5
+ * #endif
+ * #ifndef HPL_LACPY_N_DEPTH
+ * #define    HPL_LACPY_N_DEPTH        4
+ * #define    HPL_LACPY_LOG2_N_DEPTH   2
+ * #endif
+ */
+#ifndef HPL_LACPY_M_DEPTH
+#define    HPL_LACPY_M_DEPTH        4
+#define    HPL_LACPY_LOG2_M_DEPTH   2
+#endif
+#ifndef HPL_LACPY_N_DEPTH
+#define    HPL_LACPY_N_DEPTH        2
+#define    HPL_LACPY_LOG2_N_DEPTH   1
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlacpy
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dlacpy
+( M, N, A, LDA, B, LDB )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlacpy copies an array A into an array B.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the number of rows of the arrays A and
+ *         B. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies  the number of columns of the arrays A
+ *         and B. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,N).
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * B       (local output)                double *
+ *         On entry, B points to an array of dimension (LDB,N). On exit,
+ *         B is overwritten with A.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB specifies the leading dimension of the array B.
+ *         LDB must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_LACPY_USE_COPY
+   register int               j;
+#else
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+   const double               * A0 = A;
+   double                     * B0 = B;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+   const double               * A0 = A,              * A1 = A +     LDA;
+   double                     * B0 = B,              * B1 = B +     LDB;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+   const double               * A0 = A,              * A1 = A +     LDA,
+                              * A2 = A + (LDA << 1), * A3 = A + 3 * LDA;
+   double                     * B0 = B,              * B1 = B +     LDB,
+                              * B2 = B + (LDB << 1), * B3 = B + 3 * LDB;
+#endif
+   const int                  incA = ( (unsigned int)(LDA) <<
+                                       HPL_LACPY_LOG2_N_DEPTH ) - M,
+                              incB = ( (unsigned int)(LDB) <<
+                                       HPL_LACPY_LOG2_N_DEPTH ) - M,
+                              incA0 = (unsigned int)(LDA) - M,
+                              incB0 = (unsigned int)(LDB) - M;
+   int                        mu, nu;
+   register int               i, j;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+#ifdef HPL_LACPY_USE_COPY
+   for( j = 0; j < N; j++, A0 += LDA, B0 += LDB ) HPL_dcopy( M, A0, 1, B0, 1 );
+#else
+   mu = (int)( ( (unsigned int)(M) >> HPL_LACPY_LOG2_M_DEPTH ) <<
+                                      HPL_LACPY_LOG2_M_DEPTH );
+   nu = (int)( ( (unsigned int)(N) >> HPL_LACPY_LOG2_N_DEPTH ) <<
+                                      HPL_LACPY_LOG2_N_DEPTH );
+
+   for( j = 0; j < nu; j += HPL_LACPY_N_DEPTH )
+   {
+      for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH )
+      {
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 0] = A0[ 0];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; B2[ 0] = A2[ 0]; B3[ 0] = A3[ 0];
+#endif
+
+#if ( HPL_LACPY_M_DEPTH >  1 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 1] = A0[ 1];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; B2[ 1] = A2[ 1]; B3[ 1] = A3[ 1];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  2 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B2[ 2] = A2[ 2]; B3[ 2] = A3[ 2];
+         B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; B2[ 3] = A2[ 3]; B3[ 3] = A3[ 3];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  4 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5];
+         B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B2[ 4] = A2[ 4]; B3[ 4] = A3[ 4];
+         B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; B2[ 5] = A2[ 5]; B3[ 5] = A3[ 5];
+         B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B2[ 6] = A2[ 6]; B3[ 6] = A3[ 6];
+         B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; B2[ 7] = A2[ 7]; B3[ 7] = A3[ 7];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  8 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11];
+         B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9];
+         B0[10] = A0[10]; B1[10] = A1[10]; B0[11] = A0[11]; B1[11] = A1[11];
+         B0[12] = A0[12]; B1[12] = A1[12]; B0[13] = A0[13]; B1[13] = A1[13];
+         B0[14] = A0[14]; B1[14] = A1[14]; B0[15] = A0[15]; B1[15] = A1[15];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B2[ 8] = A2[ 8]; B3[ 8] = A3[ 8];
+         B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; B2[ 9] = A2[ 9]; B3[ 9] = A3[ 9];
+         B0[10] = A0[10]; B1[10] = A1[10]; B2[10] = A2[10]; B3[10] = A3[10];
+         B0[11] = A0[11]; B1[11] = A1[11]; B2[11] = A2[11]; B3[11] = A3[11];
+         B0[12] = A0[12]; B1[12] = A1[12]; B2[12] = A2[12]; B3[12] = A3[12];
+         B0[13] = A0[13]; B1[13] = A1[13]; B2[13] = A2[13]; B3[13] = A3[13];
+         B0[14] = A0[14]; B1[14] = A1[14]; B2[14] = A2[14]; B3[14] = A3[14];
+         B0[15] = A0[15]; B1[15] = A1[15]; B2[15] = A2[15]; B3[15] = A3[15];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH > 16 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19];
+         B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23];
+         B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27];
+         B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[16] = A0[16]; B1[16] = A1[16]; B0[17] = A0[17]; B1[17] = A1[17];
+         B0[18] = A0[18]; B1[18] = A1[18]; B0[19] = A0[19]; B1[19] = A1[19];
+         B0[20] = A0[20]; B1[20] = A1[20]; B0[21] = A0[21]; B1[21] = A1[21];
+         B0[22] = A0[22]; B1[22] = A1[22]; B0[23] = A0[23]; B1[23] = A1[23];
+         B0[24] = A0[24]; B1[24] = A1[24]; B0[25] = A0[25]; B1[25] = A1[25];
+         B0[26] = A0[26]; B1[26] = A1[26]; B0[27] = A0[27]; B1[27] = A1[27];
+         B0[28] = A0[28]; B1[28] = A1[28]; B0[29] = A0[29]; B1[29] = A1[29];
+         B0[30] = A0[30]; B1[30] = A1[30]; B0[31] = A0[31]; B1[31] = A1[31];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[16] = A0[16]; B1[16] = A1[16]; B2[16] = A2[16]; B3[16] = A3[16];
+         B0[17] = A0[17]; B1[17] = A1[17]; B2[17] = A2[17]; B3[17] = A3[17];
+         B0[18] = A0[18]; B1[18] = A1[18]; B2[18] = A2[18]; B3[18] = A3[18];
+         B0[19] = A0[19]; B1[19] = A1[19]; B2[19] = A2[19]; B3[19] = A3[19];
+         B0[20] = A0[20]; B1[20] = A1[20]; B2[20] = A2[20]; B3[20] = A3[20];
+         B0[21] = A0[21]; B1[21] = A1[21]; B2[21] = A2[21]; B3[21] = A3[21];
+         B0[22] = A0[22]; B1[22] = A1[22]; B2[22] = A2[22]; B3[22] = A3[22];
+         B0[23] = A0[23]; B1[23] = A1[23]; B2[23] = A2[23]; B3[23] = A3[23];
+         B0[24] = A0[24]; B1[24] = A1[24]; B2[24] = A2[24]; B3[24] = A3[24];
+         B0[25] = A0[25]; B1[25] = A1[25]; B2[25] = A2[25]; B3[25] = A3[25];
+         B0[26] = A0[26]; B1[26] = A1[26]; B2[26] = A2[26]; B3[26] = A3[26];
+         B0[27] = A0[27]; B1[27] = A1[27]; B2[27] = A2[27]; B3[27] = A3[27];
+         B0[28] = A0[28]; B1[28] = A1[28]; B2[28] = A2[28]; B3[28] = A3[28];
+         B0[29] = A0[29]; B1[29] = A1[29]; B2[29] = A2[29]; B3[29] = A3[29];
+         B0[30] = A0[30]; B1[30] = A1[30]; B2[30] = A2[30]; B3[30] = A3[30];
+         B0[31] = A0[31]; B1[31] = A1[31]; B2[31] = A2[31]; B3[31] = A3[31];
+#endif
+
+#endif
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+         A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+         A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH;
+         A2 += HPL_LACPY_M_DEPTH; B2 += HPL_LACPY_M_DEPTH;
+         A3 += HPL_LACPY_M_DEPTH; B3 += HPL_LACPY_M_DEPTH;
+#endif
+      }
+
+      for( i = mu; i < M; i++ )
+      {
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         *B0 = *A0; B0++; A0++;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++;
+         *B2 = *A2; B2++; A2++; *B3 = *A3; B3++; A3++;
+#endif
+      }
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+      A0 += incA; B0 += incB;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+      A0 += incA; B0 += incB; A1 += incA; B1 += incB;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+      A0 += incA; B0 += incB; A1 += incA; B1 += incB;
+      A2 += incA; B2 += incB; A3 += incA; B3 += incB;
+#endif
+   }
+
+   for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 )
+   {
+      for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH,
+           B0 += HPL_LACPY_M_DEPTH, A0 += HPL_LACPY_M_DEPTH )
+      {
+         B0[ 0] = A0[ 0];
+#if ( HPL_LACPY_M_DEPTH >  1 )
+         B0[ 1] = A0[ 1];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  2 )
+         B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  4 )
+         B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  8 )
+         B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11];
+         B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15];
+#endif
+#if ( HPL_LACPY_M_DEPTH > 16 )
+         B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19];
+         B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23];
+         B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27];
+         B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31];
+#endif
+      }
+      for( i = mu; i < M; i++, B0++, A0++ ) { *B0 = *A0; }
+   }
+#endif
+/*
+ * End of HPL_dlacpy
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlamch.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlamch.c
new file mode 100644
index 000000000..c685f0d5e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlamch.c
@@ -0,0 +1,876 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static function prototypes
+ * ---------------------------------------------------------------------
+ */
+static void     HPL_dlamc1
+STDC_ARGS(
+(  int *,           int *,           int *,           int * ) );
+static void     HPL_dlamc2
+STDC_ARGS(
+(  int *,           int *,           int *,           double *,
+   int *,           double *,        int *,           double * ) );
+static double   HPL_dlamc3
+STDC_ARGS(
+(  const double,    const double ) );
+static void     HPL_dlamc4
+STDC_ARGS(
+(  int *,           const double,    const int ) );
+static void     HPL_dlamc5
+STDC_ARGS(
+(  const int,       const int,       const int,       const int,
+   int *,           double * ) );
+static double   HPL_dipow
+STDC_ARGS(
+(  const double,    const int ) );
+
+#ifdef STDC_HEADERS
+double HPL_dlamch
+(
+   const HPL_T_MACH                 CMACH
+)
+#else
+double HPL_dlamch
+( CMACH )
+   const HPL_T_MACH                 CMACH;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlamch determines  machine-specific  arithmetic constants such as
+ * the relative machine precision  (eps),  the safe minimum (sfmin) such
+ * that 1 / sfmin does not overflow, the base of the machine (base), the
+ * precision (prec), the  number of (base) digits  in the  mantissa (t),
+ * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise),  the
+ * minimum exponent before  (gradual)  underflow (emin),  the  underflow
+ * threshold (rmin) base**(emin-1), the largest exponent before overflow
+ * (emax), the overflow threshold (rmax) (base**emax)*(1-eps).
+ *
+ * Notes
+ * =====
+ * 
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamch.f  (version 2.0 -- 1992), that  was  itself
+ * based on the function ENVRON  by Malcolm and incorporated suggestions
+ * by Gentleman and Marovich. See                                       
+ *  
+ * Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+ * arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).                 
+ *  
+ * Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+ * properties of  floating point arithmetic units.,  Comms. of  the ACM,
+ * 17, 276-277 (1974).
+ * 
+ * Arguments
+ * =========
+ *
+ * CMACH   (local input)                 const HPL_T_MACH
+ *         Specifies the value to be returned by HPL_dlamch             
+ *            = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+ *            = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+ *            = HPL_MACH_BASE,  HPL_dlamch := base                      
+ *            = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+ *            = HPL_MACH_MLEN,  HPL_dlamch := t                         
+ *            = HPL_MACH_RND,   HPL_dlamch := rnd                       
+ *            = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+ *            = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+ *            = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+ *            = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+ *          
+ *         where                                                        
+ *          
+ *            eps   = relative machine precision,                       
+ *            sfmin = safe minimum,                                     
+ *            base  = base of the machine,                              
+ *            prec  = eps*base,                                         
+ *            t     = number of digits in the mantissa,                 
+ *            rnd   = 1.0 if rounding occurs in addition,               
+ *            emin  = minimum exponent before underflow,                
+ *            rmin  = underflow threshold,                              
+ *            emax  = largest exponent before overflow,                 
+ *            rmax  = overflow threshold.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   static double              eps, sfmin, base, t, rnd, emin, rmin, emax,
+                              rmax, prec;
+   double                     small;
+   static int                 first=1;
+   int                        beta=0, imax=0, imin=0, it=0, lrnd=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0;
+      HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax );
+      base  = (double)(beta);  t     = (double)(it);
+      if( lrnd != 0 )
+      { rnd = HPL_rone;  eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; }
+      else
+      { rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it );            }
+      prec  = eps * base;  emin  = (double)(imin); emax  = (double)(imax);
+      sfmin = rmin;        small = HPL_rone / rmax;
+/*
+ * Use  SMALL  plus a bit,  to avoid the possibility of rounding causing
+ * overflow when computing  1/sfmin.
+ */
+      if( small >= sfmin ) sfmin = small * ( HPL_rone + eps );
+   }
+
+   if( CMACH == HPL_MACH_EPS   ) return( eps   );
+   if( CMACH == HPL_MACH_SFMIN ) return( sfmin );
+   if( CMACH == HPL_MACH_BASE  ) return( base  );
+   if( CMACH == HPL_MACH_PREC  ) return( prec  );
+   if( CMACH == HPL_MACH_MLEN  ) return( t     );
+   if( CMACH == HPL_MACH_RND   ) return( rnd   );
+   if( CMACH == HPL_MACH_EMIN  ) return( emin  );
+   if( CMACH == HPL_MACH_RMIN  ) return( rmin  );
+   if( CMACH == HPL_MACH_EMAX  ) return( emax  );
+   if( CMACH == HPL_MACH_RMAX  ) return( rmax  );
+
+   return( eps );
+/*
+ * End of HPL_dlamch
+ */
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc1
+(
+   int                        * BETA,
+   int                        * T,
+   int                        * RND,
+   int                        * IEEE1
+)
+#else
+static void HPL_dlamc1
+( BETA, T, RND, IEEE1 )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * BETA, * IEEE1, * RND, * T;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc1  determines  the machine parameters given by BETA, T, RND,
+ * and IEEE1.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc1.f  (version 2.0 -- 1992), that  was  itself
+ * based on the function ENVRON  by Malcolm and incorporated suggestions
+ * by Gentleman and Marovich. See
+ *
+ * Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+ * arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).
+ *
+ * Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+ * properties of  floating point arithmetic units.,  Comms. of  the ACM,
+ * 17, 276-277 (1974).
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local output)              int *
+ *         The base of the machine.
+ *
+ * T       (local output)              int *
+ *         The number of ( BETA ) digits in the mantissa.
+ *
+ * RND     (local output)              int *
+ *         Specifies whether proper rounding (RND=1) or chopping (RND=0)
+ *         occurs in addition.  This may not be a  reliable guide to the
+ *         way in which the machine performs its arithmetic.
+ *
+ * IEEE1   (local output)              int *
+ *         Specifies  whether  rounding  appears  to be done in the IEEE
+ *         `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     a, b, c, f, one, qtr, savec, t1, t2;
+   static int                 first=1, lbeta, lieee1, lrnd, lt;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0; one = HPL_rone;
+/*
+ * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and
+ * RND. Throughout this routine we use the function HPL_dlamc3 to ensure
+ * that relevant values are stored and not held in registers, or are not
+ * affected by optimizers.
+ *
+ * Compute  a = 2.0**m  with the  smallest  positive integer m such that
+ * fl( a + 1.0 ) == a.
+ */
+      a = HPL_rone; c = HPL_rone;
+      do
+      { a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); }
+      while( c == HPL_rone );
+/*
+ * Now compute b = 2.0**m with the smallest positive integer m such that
+ * fl( a + b ) > a.
+ */
+      b = HPL_rone; c = HPL_dlamc3( a, b );
+      while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); }
+/*
+ * Now compute the base.  a and c  are  neighbouring floating point num-
+ * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe-
+ * rence is BETA.  Adding 0.25 to c is to ensure that it is truncated to
+ * BETA and not (BETA-1).
+ */
+      qtr = one / 4.0; savec = c;
+      c   = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr);
+/*
+ * Now  determine  whether  rounding or chopping occurs, by adding a bit
+ * less than BETA/2 and a bit more than BETA/2 to a.
+ */
+      b = (double)(lbeta);
+      f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a );
+      if( c == a ) { lrnd = 1; } else { lrnd = 0; }
+      f = HPL_dlamc3( b / HPL_rtwo,  b / 100.0 ); c = HPL_dlamc3( f, a );
+      if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0;
+/*
+ * Try  and decide whether rounding is done in the  IEEE  round to nea-
+ * rest style.  b/2 is half a unit in the last place of the two numbers
+ * a  and savec. Furthermore, a is even, i.e. has last bit zero, and sa-
+ * vec is odd.  Thus adding b/2 to a should not change a, but adding b/2
+ * to savec should change savec.
+ */
+      t1 = HPL_dlamc3( b / HPL_rtwo, a );
+      t2 = HPL_dlamc3( b / HPL_rtwo, savec );
+      if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1;
+      else                                                  lieee1 = 0;
+/*
+ * Now find the mantissa, T. It should be the integer part of log to the
+ * base BETA of a, however it is safer to determine T by powering. So we
+ * find T as the smallest positive integer for which fl( beta**t + 1.0 )
+ * is equal to 1.0.
+ */
+      lt = 0; a = HPL_rone; c = HPL_rone;
+
+      do
+      {
+         lt++; a *= (double)(lbeta);
+         c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c,  -a );
+      } while( c == HPL_rone );
+   }
+
+   *BETA  = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1;
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc2
+(
+   int                        * BETA, 
+   int                        * T,
+   int                        * RND,
+   double                     * EPS,
+   int                        * EMIN,
+   double                     * RMIN,
+   int                        * EMAX,
+   double                     * RMAX
+)
+#else
+static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * BETA, * EMAX, * EMIN, * RND, * T;
+   double                     * EPS, * RMAX, * RMIN;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc2  determines the machine  parameters specified in its argu-
+ * ment list.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function  dlamc2.f (version 2.0 -- 1992), that  was  itself
+ * based on a function PARANOIA  by  W. Kahan of the University of Cali-
+ * fornia at Berkeley for the computation of the  relative machine epsi-
+ * lon eps.
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local output)              int *
+ *         The base of the machine.
+ *
+ * T       (local output)              int *
+ *         The number of ( BETA ) digits in the mantissa.
+ *
+ * RND     (local output)              int *
+ *         Specifies whether proper rounding (RND=1) or chopping (RND=0)
+ *         occurs in addition. This may not be a reliable  guide to  the
+ *         way in which the machine performs its arithmetic.
+ *
+ * EPS     (local output)              double *
+ *         The smallest positive number such that fl( 1.0 - EPS ) < 1.0,
+ *         where fl denotes the computed value.
+ *
+ * EMIN    (local output)              int *
+ *         The minimum exponent before (gradual) underflow occurs.
+ *
+ * RMIN    (local output)              double *
+ *         The smallest  normalized  number  for  the  machine, given by
+ *         BASE**( EMIN - 1 ), where  BASE  is the floating  point value
+ *         of BETA.
+ *
+ * EMAX    (local output)              int *
+ *         The maximum exponent before overflow occurs.
+ *
+ * RMAX    (local output)              double *
+ *         The  largest  positive  number  for  the  machine,  given  by
+ *         BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating  point
+ *         value of BETA.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   static double              leps, lrmax, lrmin;
+   double                     a, b, c, half, one, rbase, sixth, small,
+                              third, two, zero;
+   static int                 first=1, iwarn=0, lbeta=0, lemax, lemin,
+                              lt=0;
+   int                        gnmin=0, gpmin=0, i, ieee, lieee1=0,
+                              lrnd=0, ngnmin=0, ngpmin=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo;
+/*
+ * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of  BETA,
+ * T, RND, EPS, EMIN and RMIN.
+ *
+ * Throughout this routine we use the function HPL_dlamc3 to ensure that
+ * relevant values are stored and not held in registers,  or are not af-
+ * fected by optimizers.
+ *
+ * HPL_dlamc1 returns the parameters  lbeta, lt, lrnd and lieee1.
+ */
+      HPL_dlamc1( &lbeta, &lt, &lrnd, &lieee1 );
+/*
+ * Start to find eps.
+ */
+      b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a;
+/*
+ * Try some tricks to see whether or not this is the correct  EPS.
+ */
+      b     = two / 3.0; 
+      half  = one / HPL_rtwo;
+      sixth = HPL_dlamc3( b, -half );
+      third = HPL_dlamc3( sixth, sixth );
+      b     = HPL_dlamc3( third, -half );
+      b     = HPL_dlamc3( b, sixth );
+      b     = Mabs( b ); if( b < leps ) b = leps;
+
+      leps = HPL_rone;
+
+      while( ( leps > b ) && ( b > zero ) )
+      {
+         leps = b;
+         c = HPL_dlamc3( half * leps,
+                         HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) );
+         c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c );
+         c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c );
+      }
+      if( a < leps ) leps = a;
+/*
+ * Computation of EPS complete.
+ *
+ * Now find  EMIN.  Let a = + or - 1, and + or - (1 + BASE**(-3)).  Keep
+ * dividing a by BETA until (gradual) underflow occurs. This is detected
+ * when we cannot recover the previous a.
+ */
+      rbase = one / (double)(lbeta); small = one;
+      for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero );
+      a = HPL_dlamc3( one, small );
+      HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta );
+      HPL_dlamc4( &gpmin,    a, lbeta ); HPL_dlamc4( &gnmin,    -a, lbeta );
+
+      ieee = 0;
+
+      if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) )
+      {
+         if( ngpmin == gpmin )
+         {
+/*
+ * Non twos-complement machines, no gradual underflow; e.g.,  VAX )
+ */
+            lemin = ngpmin;
+         }
+         else if( ( gpmin-ngpmin ) == 3 )
+         {
+/*
+ * Non twos-complement machines with gradual underflow; e.g., IEEE stan-
+ * dard followers
+ */
+            lemin = ngpmin - 1 + lt; ieee = 1;
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, gpmin );
+            iwarn = 1;
+         }
+      }
+      else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) )
+      {
+         if( Mabs( ngpmin-ngnmin ) == 1 )
+         {
+/*
+ * Twos-complement machines, no gradual underflow; e.g., CYBER 205
+ */
+            lemin = Mmax( ngpmin, ngnmin );
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, ngnmin );
+            iwarn = 1;
+         }
+      }
+      else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) )
+      {
+         if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 )
+         {
+/*
+ * Twos-complement machines with gradual underflow; no known machine
+ */
+            lemin = Mmax( ngpmin, ngnmin ) - 1 + lt;
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, ngnmin );
+            iwarn = 1;
+         }
+      }
+      else
+      {
+/*
+ * A guess; no known machine
+ */
+         lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin );
+         lemin = Mmin( lemin, gnmin ); iwarn = 1;
+      }
+/*
+ * Comment out this if block if EMIN is ok
+ */
+      if( iwarn != 0 )
+      {
+         first = 1;
+         HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n",
+"WARNING. The value EMIN may be incorrect:- EMIN =", lemin,
+"If, after inspection, the value EMIN looks acceptable, please comment ",
+"out the  if  block  as marked within the code of routine  HPL_dlamc2, ",
+"otherwise supply EMIN explicitly." );
+      }
+/*
+ * Assume IEEE arithmetic if we found denormalised  numbers above, or if
+ * arithmetic seems to round in the  IEEE style,  determined  in routine
+ * HPL_dlamc1.  A true  IEEE  machine should have both things true; how-
+ * ever, faulty machines may have one or the other.
+ */
+      if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1;
+      else                                   ieee = 0;
+/*
+ * Compute  RMIN by successive division by  BETA. We could compute  RMIN
+ * as BASE**( EMIN - 1 ), but some machines underflow during this compu-
+ * tation.
+ */
+      lrmin = HPL_rone;
+      for( i = 0; i < 1 - lemin; i++ )
+         lrmin = HPL_dlamc3( lrmin*rbase, zero );
+/*
+ * Finally, call HPL_dlamc5 to compute emax and rmax.
+ */
+      HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax );
+   }
+   *BETA = lbeta; *T    = lt;    *RND  = lrnd;  *EPS  = leps;
+   *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax;
+} 
+
+#ifdef STDC_HEADERS
+static double HPL_dlamc3( const double A, const double B )
+#else
+static double HPL_dlamc3( A, B )
+/*
+ * .. Scalar Arguments ..
+ */
+   const double               A, B;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc3  is intended to force a and b  to be stored prior to doing
+ * the addition of  a  and  b,  for  use  in situations where optimizers
+ * might hold one of these in a register.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc3.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * A, B    (local input)               double
+ *         The values a and b.
+ *
+ * ---------------------------------------------------------------------
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   return( A + B );
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc4
+(
+   int                        * EMIN,
+   const double               START,
+   const int                  BASE
+)
+#else
+static void HPL_dlamc4( EMIN, START, BASE )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * EMIN;
+   const int                  BASE;
+   const double               START;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc4 is a service function for HPL_dlamc2.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc4.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * EMIN    (local output)              int *
+ *         The minimum exponent before  (gradual) underflow, computed by
+ *         setting A = START and dividing  by  BASE until the previous A
+ *         can not be recovered.
+ *
+ * START   (local input)               double
+ *         The starting point for determining EMIN.
+ *
+ * BASE    (local input)               int
+ *         The base of the machine.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     a, b1, b2, c1, c2, d1, d2, one, rbase, zero;
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   a     = START; one = HPL_rone; rbase = one / (double)(BASE);
+   zero  = HPL_rzero;
+   *EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a;
+
+   do
+   {
+      (*EMIN)--; a = b1;
+      b1 = HPL_dlamc3( a /  BASE,  zero );
+      c1 = HPL_dlamc3( b1 *  BASE, zero );
+      d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1;
+      b2 = HPL_dlamc3( a * rbase,  zero );
+      c2 = HPL_dlamc3( b2 / rbase, zero );
+      d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2;
+   } while( ( c1 == a ) && ( c2 == a ) &&  ( d1 == a ) && ( d2 == a ) );
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc5
+(
+   const int                  BETA,
+   const int                  P, 
+   const int                  EMIN,
+   const int                  IEEE,
+   int                        * EMAX,
+   double                     * RMAX
+)
+#else
+static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX )
+/*
+ * .. Scalar Arguments ..
+ */
+   const int                  BETA, EMIN, IEEE, P; 
+   int                        * EMAX;
+   double                     * RMAX;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc5  attempts  to compute RMAX, the largest machine  floating-
+ * point number, without overflow.  It assumes that EMAX + abs(EMIN) sum
+ * approximately to a power of 2.  It will fail  on machines where  this
+ * assumption does not hold, for example, the  Cyber 205 (EMIN = -28625,
+ * EMAX = 28718).  It will also fail if  the value supplied for  EMIN is
+ * too large (i.e. too close to zero), probably with overflow.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc5.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local input)               int
+ *         The base of floating-point arithmetic.
+ *
+ * P       (local input)               int
+ *         The number of base BETA digits in the mantissa of a floating-
+ *         point value.
+ *
+ * EMIN    (local input)               int
+ *         The minimum exponent before (gradual) underflow.
+ *
+ * IEEE    (local input)               int
+ *         A logical flag specifying whether or not  the arithmetic sys-
+ *         tem is thought to comply with the IEEE standard.
+ *
+ * EMAX    (local output)              int *
+ *         The largest exponent before overflow.
+ *
+ * RMAX    (local output)              double *
+ *         The largest machine floating-point number.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     oldy=HPL_rzero, recbas, y, z;
+   int                        exbits=1, expsum, i, lexp=1, nbits, try,
+                              uexp;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * First compute  lexp  and  uexp, two powers of 2 that bound abs(EMIN).
+ * We then assume that  EMAX + abs( EMIN ) will sum approximately to the
+ * bound that  is closest to abs( EMIN ). (EMAX  is the  exponent of the
+ * required number RMAX).
+ */
+l_10:
+   try = (int)( (unsigned int)(lexp) << 1 );
+   if( try <= ( -EMIN ) ) { lexp = try; exbits++; goto l_10; }
+
+   if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try; exbits++; }
+/*
+ * Now -lexp is less than or equal to EMIN, and -uexp is greater than or
+ * equal to EMIN. exbits is the number of bits needed to store the expo-
+ * nent.
+ */
+   if( ( uexp+EMIN ) > ( -lexp-EMIN ) )
+   { expsum = (int)( (unsigned int)(lexp) << 1 ); }
+   else
+   { expsum = (int)( (unsigned int)(uexp) << 1 ); }
+/*
+ * expsum is the exponent range, approximately equal to EMAX - EMIN + 1.
+ */
+   *EMAX = expsum + EMIN - 1;
+/*
+ * nbits  is  the total number of bits needed to store a  floating-point
+ * number.
+ */
+   nbits = 1 + exbits + P;
+
+   if( ( nbits % 2 == 1 ) && ( BETA == 2 ) )
+   {
+/*
+ * Either there are an odd number of bits used to store a floating-point
+ * number, which is unlikely, or some bits are not used in the represen-
+ * tation of numbers,  which is possible,  (e.g. Cray machines)  or  the
+ * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines),
+ * which is perhaps the most likely. We have to assume the last alterna-
+ * tive.  If this is true,  then we need to reduce  EMAX  by one because
+ * there must be some way of representing zero  in an  implicit-bit sys-
+ * tem. On machines like Cray we are reducing EMAX by one unnecessarily.
+ */
+      (*EMAX)--;
+   }
+
+   if( IEEE != 0 )
+   {
+/*
+ * Assume we are on an IEEE  machine which reserves one exponent for in-
+ * finity and NaN.
+ */
+      (*EMAX)--;
+   }
+/*
+ * Now create RMAX, the largest machine number, which should be equal to
+ * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being
+ * careful that the result is less than 1.0.
+ */
+   recbas = HPL_rone / (double)(BETA);
+   z      = (double)(BETA) - HPL_rone;
+   y      = HPL_rzero;
+
+   for( i = 0; i < P; i++ )
+   { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); }
+
+   if( y >= HPL_rone ) y = oldy;
+/*
+ * Now multiply by BETA**EMAX to get RMAX.
+ */
+   for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero );
+
+   *RMAX = y;
+/*
+ * End of HPL_dlamch
+ */
+} 
+
+#ifdef STDC_HEADERS
+static double HPL_dipow
+(
+   const double               X,
+   const int                  N
+)
+#else
+static double HPL_dipow( X, N )
+/*
+ * .. Scalar Arguments ..
+ */
+   const int                  N;
+   const double               X;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dipow computes the integer n-th power of a real scalar x.
+ *
+ * Arguments
+ * =========
+ *
+ * X       (local input)               const double
+ *         The real scalar x.
+ *
+ * N       (local input)               const int
+ *         The integer power to raise x to.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     r, y=HPL_rone;
+   int                        k, n;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( X == HPL_rzero ) return( HPL_rzero );
+   if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; }
+   for( k = 0; k < n; k++ ) y *= r; 
+
+   return( y );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlange.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlange.c
new file mode 100644
index 000000000..82f118b6b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlange.c
@@ -0,0 +1,184 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_dlange
+(
+   const HPL_T_NORM                 NORM,
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA
+)
+#else
+double HPL_dlange
+( NORM, M, N, A, LDA )
+   const HPL_T_NORM                 NORM;
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlange returns  the value of the one norm,  or the infinity norm,
+ * or the element of largest absolute value of a matrix A:              
+ *  
+ *    max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+ *    norm1(A),        when NORM = HPL_NORM_1,                          
+ *    normI(A),        when NORM = HPL_NORM_I,                          
+ *  
+ * where norm1 denotes the one norm of a matrix (maximum column sum) and
+ * normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+ * that max(abs(A(i,j))) is not a matrix norm.
+ *
+ * Arguments
+ * =========
+ *
+ * NORM    (local input)                 const HPL_T_NORM
+ *         On entry,  NORM  specifies  the  value to be returned by this
+ *         function as described above.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points to an  array of dimension  (LDA,N), that
+ *         contains the matrix A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     s, v0=HPL_rzero, * work = NULL;
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return( HPL_rzero );
+
+   if(      NORM == HPL_NORM_A )
+   {
+/*
+ * max( abs( A ) )
+ */
+      for( j = 0; j < N; j++ )
+      {
+         for( i = 0; i < M; i++ ) { v0 = Mmax( v0, Mabs( *A ) ); A++; }
+         A += LDA - M;
+      }
+   }
+   else if( NORM == HPL_NORM_1 )
+   {
+/*
+ * Find norm_1( A ).
+ */
+      work = (double*)malloc( (size_t)(N) * sizeof( double ) );
+      if( work == NULL )
+      { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); }
+      else
+      {
+         for( j = 0; j < N; j++ )
+         {
+            s = HPL_rzero;
+            for( i = 0; i < M; i++ ) { s += Mabs( *A ); A++; }
+            work[j] = s; A += LDA - M;
+         }
+/*
+ * Find maximum sum of columns for 1-norm
+ */
+         v0 = work[HPL_idamax( N, work, 1 )]; v0 = Mabs( v0 );
+         if( work ) free( work );
+      }
+   }
+   else if( NORM == HPL_NORM_I )
+   {
+/*
+ * Find norm_inf( A )
+ */
+      work = (double*)malloc( (size_t)(M) * sizeof( double ) );
+      if( work == NULL )
+      { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); }
+      else
+      {
+         for( i = 0; i < M; i++ ) { work[i] = HPL_rzero; }
+
+         for( j = 0; j < N; j++ )
+         {
+            for( i = 0; i < M; i++ ) { work[i] += Mabs( *A ); A++; }
+            A += LDA - M;
+         }
+/*       
+ * Find maximum sum of rows for inf-norm
+ */      
+         v0 = work[HPL_idamax( M, work, 1 )]; v0 = Mabs( v0 );
+         if( work ) free( work );
+      }
+   }
+
+   return( v0 );
+/*
+ * End of HPL_dlange
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlaprnt.c
new file mode 100644
index 000000000..f29df3cd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlaprnt.c
@@ -0,0 +1,130 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dlaprnt
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        IA,
+   const int                        JA,
+   const int                        LDA,
+   const char *                     CMATNM
+)
+#else
+void HPL_dlaprnt
+( M, N, A, IA, JA, LDA, CMATNM )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        IA;
+   const int                        JA;
+   const int                        LDA;
+   const char *                     CMATNM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaprnt prints to standard error an M-by-N matrix A.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies the number of rows of A. M must be at
+ *         least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies the number of columns of A. N must be
+ *         at least zero.
+ *
+ * A       (local input)                 double *
+ *         On entry, A  points to an array of dimension (LDA,N).
+ *
+ * IA      (local input)                 const int
+ *         On entry, IA specifies the starting row index to be printed.
+ *
+ * JA      (local input)                 const int
+ *         On entry,  JA  specifies  the  starting  column index  to be
+ *         printed.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * CMATNM  (local input)                 const char *
+ *         On entry, CMATNM is the name of the matrix to be printed.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   for( j = 0; j < N; j++ )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         HPL_fprintf( stderr, "%s(%6d,%6d)=%30.18f\n", CMATNM, IA+i,
+                      JA+j, *(Mptr( A, i, j, LDA )) );
+      }
+   }
+/*
+ * End of HPL_dlaprnt
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlatcpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlatcpy.c
new file mode 100644
index 000000000..410451c24
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlatcpy.c
@@ -0,0 +1,398 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factors
+ * #ifndef HPL_LATCPY_M_DEPTH
+ * #define    HPL_LATCPY_M_DEPTH      32
+ * #define    HPL_LATCPY_LOG2_M_DEPTH  5
+ * #endif
+ * #ifndef HPL_LATCPY_N_DEPTH
+ * #define    HPL_LATCPY_N_DEPTH       4
+ * #define    HPL_LATCPY_LOG2_N_DEPTH  2
+ * #endif
+ */
+#ifndef HPL_LATCPY_M_DEPTH
+#define    HPL_LATCPY_M_DEPTH       4
+#define    HPL_LATCPY_LOG2_M_DEPTH  2
+#endif
+#ifndef HPL_LATCPY_N_DEPTH
+#define    HPL_LATCPY_N_DEPTH       2
+#define    HPL_LATCPY_LOG2_N_DEPTH  1
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlatcpy
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dlatcpy
+( M, N, A, LDA, B, LDB )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlatcpy copies the transpose of an array A into an array B.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the number of  rows of the array B and
+ *         the number of columns of A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the number of  rows of the array A and
+ *         the number of columns of B. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,M).
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,N).
+ *
+ * B       (local output)                double *
+ *         On entry, B points to an array of dimension (LDB,N). On exit,
+ *         B is overwritten with the transpose of A.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB specifies the leading dimension of the array B.
+ *         LDB must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_LATCPY_USE_COPY
+   register int               j;
+#else
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+   const double               * A0 = A;
+   double                     * B0 = B;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+   const double               * A0 = A,              * A1 = A + 1;
+   double                     * B0 = B,              * B1 = B +     LDB;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+   const double               * A0 = A,              * A1 = A + 1,
+                              * A2 = A + 2,          * A3 = A + 3;
+   double                     * B0 = B,              * B1 = B +     LDB,
+                              * B2 = B + (LDB << 1), * B3 = B + 3 * LDB;
+#endif
+   const int                  incA = -M * LDA + (1 << HPL_LATCPY_LOG2_N_DEPTH),
+                              incB = ( (unsigned int)(LDB) <<
+                                       HPL_LATCPY_LOG2_N_DEPTH ) - M,
+                              incA0 = -M * LDA + 1, incB0 = LDB - M;
+   int                        mu, nu;
+   register int               i, j;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+#ifdef HPL_LATCPY_USE_COPY
+   for( j = 0; j < N; j++, B0 += LDB ) HPL_dcopy( M, A0+j, LDA, B0, 1 );
+#else
+   mu = (int)( ( (unsigned int)(M) >> HPL_LATCPY_LOG2_M_DEPTH ) <<
+                                      HPL_LATCPY_LOG2_M_DEPTH );
+   nu = (int)( ( (unsigned int)(N) >> HPL_LATCPY_LOG2_N_DEPTH ) <<
+                                      HPL_LATCPY_LOG2_N_DEPTH );
+
+   for( j = 0; j < nu; j += HPL_LATCPY_N_DEPTH )
+   {
+      for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH )
+      {
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 0] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA;
+         B2[ 0] = *A2; A2 += LDA; B3[ 0] = *A3; A3 += LDA;
+#endif
+
+#if ( HPL_LATCPY_M_DEPTH >  1 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 1] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA;
+         B2[ 1] = *A2; A2 += LDA; B3[ 1] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  2 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 2] = *A0; A0 += LDA; B0[ 3] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA;
+         B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA;
+         B2[ 2] = *A2; A2 += LDA; B3[ 2] = *A3; A3 += LDA;
+         B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA;
+         B2[ 3] = *A2; A2 += LDA; B3[ 3] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  4 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 4] = *A0; A0 += LDA; B0[ 5] = *A0; A0 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B0[ 7] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA;
+         B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA;
+         B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA;
+         B2[ 4] = *A2; A2 += LDA; B3[ 4] = *A3; A3 += LDA;
+         B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA;
+         B2[ 5] = *A2; A2 += LDA; B3[ 5] = *A3; A3 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA;
+         B2[ 6] = *A2; A2 += LDA; B3[ 6] = *A3; A3 += LDA;
+         B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA;
+         B2[ 7] = *A2; A2 += LDA; B3[ 7] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  8 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 8] = *A0; A0 += LDA; B0[ 9] = *A0; A0 += LDA;
+         B0[10] = *A0; A0 += LDA; B0[11] = *A0; A0 += LDA;
+         B0[12] = *A0; A0 += LDA; B0[13] = *A0; A0 += LDA;
+         B0[14] = *A0; A0 += LDA; B0[15] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA;
+         B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA;
+         B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA;
+         B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA;
+         B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA;
+         B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA;
+         B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA;
+         B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA;
+         B2[ 8] = *A2; A2 += LDA; B3[ 8] = *A3; A3 += LDA;
+         B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA;
+         B2[ 9] = *A2; A2 += LDA; B3[ 9] = *A3; A3 += LDA;
+         B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA;
+         B2[10] = *A2; A2 += LDA; B3[10] = *A3; A3 += LDA;
+         B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA;
+         B2[11] = *A2; A2 += LDA; B3[11] = *A3; A3 += LDA;
+         B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA;
+         B2[12] = *A2; A2 += LDA; B3[12] = *A3; A3 += LDA;
+         B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA;
+         B2[13] = *A2; A2 += LDA; B3[13] = *A3; A3 += LDA;
+         B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA;
+         B2[14] = *A2; A2 += LDA; B3[14] = *A3; A3 += LDA;
+         B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA;
+         B2[15] = *A2; A2 += LDA; B3[15] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH > 16 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[16] = *A0; A0 += LDA; B0[17] = *A0; A0 += LDA;
+         B0[18] = *A0; A0 += LDA; B0[19] = *A0; A0 += LDA;
+         B0[20] = *A0; A0 += LDA; B0[21] = *A0; A0 += LDA;
+         B0[22] = *A0; A0 += LDA; B0[23] = *A0; A0 += LDA;
+         B0[24] = *A0; A0 += LDA; B0[25] = *A0; A0 += LDA;
+         B0[26] = *A0; A0 += LDA; B0[27] = *A0; A0 += LDA;
+         B0[28] = *A0; A0 += LDA; B0[29] = *A0; A0 += LDA;
+         B0[30] = *A0; A0 += LDA; B0[31] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA;
+         B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA;
+         B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA;
+         B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA;
+         B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA;
+         B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA;
+         B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA;
+         B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA;
+         B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA;
+         B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA;
+         B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA;
+         B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA;
+         B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA;
+         B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA;
+         B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA;
+         B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA;
+         B2[16] = *A2; A2 += LDA; B3[16] = *A3; A3 += LDA;
+         B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA;
+         B2[17] = *A2; A2 += LDA; B3[17] = *A3; A3 += LDA;
+         B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA;
+         B2[18] = *A2; A2 += LDA; B3[18] = *A3; A3 += LDA;
+         B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA;
+         B2[19] = *A2; A2 += LDA; B3[19] = *A3; A3 += LDA;
+         B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA;
+         B2[20] = *A2; A2 += LDA; B3[20] = *A3; A3 += LDA;
+         B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA;
+         B2[21] = *A2; A2 += LDA; B3[21] = *A3; A3 += LDA;
+         B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA;
+         B2[22] = *A2; A2 += LDA; B3[22] = *A3; A3 += LDA;
+         B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA;
+         B2[23] = *A2; A2 += LDA; B3[23] = *A3; A3 += LDA;
+         B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA;
+         B2[24] = *A2; A2 += LDA; B3[24] = *A3; A3 += LDA;
+         B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA;
+         B2[25] = *A2; A2 += LDA; B3[25] = *A3; A3 += LDA;
+         B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA;
+         B2[26] = *A2; A2 += LDA; B3[26] = *A3; A3 += LDA;
+         B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA;
+         B2[27] = *A2; A2 += LDA; B3[27] = *A3; A3 += LDA;
+         B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA;
+         B2[28] = *A2; A2 += LDA; B3[28] = *A3; A3 += LDA;
+         B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA;
+         B2[29] = *A2; A2 += LDA; B3[29] = *A3; A3 += LDA;
+         B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA;
+         B2[30] = *A2; A2 += LDA; B3[30] = *A3; A3 += LDA;
+         B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA;
+         B2[31] = *A2; A2 += LDA; B3[31] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0 += HPL_LATCPY_M_DEPTH;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH;
+         B2 += HPL_LATCPY_M_DEPTH; B3 += HPL_LATCPY_M_DEPTH;
+#endif
+      }
+
+      for( i = mu; i < M; i++ )
+      {
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         *B0 = *A0; B0++; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA;
+         *B2 = *A2; B2++; A2 += LDA; *B3 = *A3; B3++; A3 += LDA;
+#endif
+      }
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+      A0 += incA; B0 += incB;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+      A0 += incA; A1 += incA; B0 += incB; B1 += incB;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+      A0 += incA; A1 += incA; A2 += incA; A3 += incA;
+      B0 += incB; B1 += incB; B2 += incB; B3 += incB;
+#endif
+   }
+
+   for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 )
+   {
+      for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH, B0 += HPL_LATCPY_M_DEPTH )
+      {
+         B0[ 0]=*A0; A0 += LDA;
+#if ( HPL_LATCPY_M_DEPTH >  1 )
+         B0[ 1]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  2 )
+         B0[ 2]=*A0; A0 += LDA; B0[ 3]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  4 )
+         B0[ 4]=*A0; A0 += LDA; B0[ 5]=*A0; A0 += LDA;
+         B0[ 6]=*A0; A0 += LDA; B0[ 7]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  8 )
+         B0[ 8]=*A0; A0 += LDA; B0[ 9]=*A0; A0 += LDA;
+         B0[10]=*A0; A0 += LDA; B0[11]=*A0; A0 += LDA;
+         B0[12]=*A0; A0 += LDA; B0[13]=*A0; A0 += LDA;
+         B0[14]=*A0; A0 += LDA; B0[15]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH > 16 )
+         B0[16]=*A0; A0 += LDA; B0[17]=*A0; A0 += LDA;
+         B0[18]=*A0; A0 += LDA; B0[19]=*A0; A0 += LDA;
+         B0[20]=*A0; A0 += LDA; B0[21]=*A0; A0 += LDA;
+         B0[22]=*A0; A0 += LDA; B0[23]=*A0; A0 += LDA;
+         B0[24]=*A0; A0 += LDA; B0[25]=*A0; A0 += LDA;
+         B0[26]=*A0; A0 += LDA; B0[27]=*A0; A0 += LDA;
+         B0[28]=*A0; A0 += LDA; B0[29]=*A0; A0 += LDA;
+         B0[30]=*A0; A0 += LDA; B0[31]=*A0; A0 += LDA;
+#endif
+      }
+
+      for( i = mu; i < M; i++, B0++, A0 += LDA ) { *B0 = *A0; }
+   }
+#endif
+/*
+ * End of HPL_dlatcpy
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_fprintf.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_fprintf.c
new file mode 100644
index 000000000..adaf22b39
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_fprintf.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_fprintf
+(
+   FILE *                           STREAM,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_fprintf( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_fprintf is a wrapper around fprintf flushing the output stream.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[256];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   char                       * FORM;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   (void) fprintf( STREAM, "%s", cline );
+   (void) fflush( STREAM );
+/*
+ * End of HPL_fprintf
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_warn.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_warn.c
new file mode 100644
index 000000000..bc40818a9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_warn.c
@@ -0,0 +1,134 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_warn
+(
+   FILE *                           STREAM,
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_warn( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_warn displays an error message.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   LINE   = va_arg( argptr, int    );
+   SRNAME = va_arg( argptr, char * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( STREAM, "%s %s:\n>>> %s <<<\n\n", "HPL ERROR in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( STREAM, "%s %d %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR on line", LINE, "of function", SRNAME, cline );
+/*
+ * End of HPL_warn
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_daxpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_daxpy.c
new file mode 100644
index 000000000..72be5774b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_daxpy.c
@@ -0,0 +1,175 @@
+/*
+ * -- High Performance Computing Linpack Benchmark (HPL)
+ *    HPL - 2.3 - December 2, 2018
+ *    Antoine P. Petitet
+ *    University of Tennessee, Knoxville
+ *    Innovative Computing Laboratory
+ *    (C) Copyright 2000-2008 All Rights Reserved
+ *
+ * -- Copyright notice and Licensing terms:
+ *
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:
+ *
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.
+ *
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.
+ *
+ * -- Disclaimer:
+ *
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ---------------------------------------------------------------------
+ */
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_daxpy
+
+#ifdef STDC_HEADERS
+void HPL_daxpy
+(
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_daxpy
+( N, ALPHA, X, INCX, Y, INCY )
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_daxpy scales the vector x by alpha and adds it to y.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vectors  x  and  y. N
+ *         must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero, then the entries of the incremented array X
+ *         need not be set on input.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         On exit, the entries of the incremented array  Y  are updated
+ *         with the scaled entries of the incremented array X.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_daxpy( N, ALPHA, X, INCX, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register const double     alpha = ALPHA;
+   register double           x0, x1, x2, x3, y0, y1, y2, y3;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
+                             incX3 = 3 * INCX, incY3 = 3 * INCY,
+                             incX4 = 4 * INCX, incY4 = 4 * INCY;
+
+   if( ( N > 0 ) && ( alpha != HPL_rzero ) )
+   {
+      if( ( nu = ( N >> 2 ) << 2 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     y0 = (*Y);     x1 = X[INCX ]; y1 = Y[INCY ];
+            x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3];
+ 
+            *Y       = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1;
+            Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3;
+ 
+            X  += incX4;
+            Y  += incY4;
+ 
+         } while( X != StX );
+      }
+ 
+      for( i = N - nu; i != 0; i-- )
+      {
+         x0  = (*X);
+         y0  = (*Y);
+ 
+         *Y  = y0 + alpha * x0;
+ 
+         X  += INCX;
+         Y  += INCY;
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy );
+#endif
+/*
+ * End of HPL_daxpy
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dcopy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dcopy.c
new file mode 100644
index 000000000..a8fe24109
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dcopy.c
@@ -0,0 +1,168 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dcopy
+
+#ifdef STDC_HEADERS
+void HPL_dcopy
+(
+   const int                        N,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_dcopy
+( N, X, INCX, Y, INCY )
+   const int                        N;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dcopy copies the vector x into the vector y.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vectors  x  and  y. N
+ *         must be at least zero.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         On exit, the entries of the incremented array  Y  are updated
+ *         with the entries of the incremented array X.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dcopy( N, X, INCX, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           x0, x1, x2, x3, x4, x5, x6, x7;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
+                             incX3 = 3 * INCX, incY3 = 3 * INCY,
+                             incX4 = 4 * INCX, incY4 = 4 * INCY,
+                             incX5 = 5 * INCX, incY5 = 5 * INCY,
+                             incX6 = 6 * INCX, incY6 = 6 * INCY,
+                             incX7 = 7 * INCX, incY7 = 7 * INCY,
+                             incX8 = 8 * INCX, incY8 = 8 * INCY;
+
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+            x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+            *Y       = x0; Y[incY4] = x4; Y[INCY ] = x1; Y[incY5] = x5;
+            Y[incY2] = x2; Y[incY6] = x6; Y[incY3] = x3; Y[incY7] = x7;
+ 
+            X  += incX8;
+            Y  += incY8;
+ 
+         } while( X != StX );
+      }
+ 
+      for( i = N - nu; i != 0; i-- )
+      {
+         x0  = (*X);
+         *Y  = x0;
+ 
+         X  += INCX;
+         Y  += INCY;
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   F77dcopy( &F77N, X, &F77incx, Y, &F77incy );
+#endif
+/*
+ * End of HPL_dcopy
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dgemm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dgemm.c
new file mode 100644
index 000000000..b222e4717
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dgemm.c
@@ -0,0 +1,521 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dgemm
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmNN
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iail, iblj, icij, j, jal, jbj, jcj, l;
+
+   for( j = 0, jbj = 0, jcj  = 0; j < N; j++, jbj += LDB, jcj += LDC )
+   {
+      HPL_dscal( M, BETA, C+jcj, 1 );
+      for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 )
+      {
+         t0 = ALPHA * B[iblj];
+         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
+         { C[icij] += A[iail] * t0; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmNT
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iail, ibj, ibjl, icij, j, jal, jcj, l;
+
+   for( j = 0, ibj  = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
+   {
+      HPL_dscal( M, BETA, C+jcj, 1 );
+      for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB )
+      {
+         t0 = ALPHA * B[ibjl];
+         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
+         { C[icij] += A[iail] * t0; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmTN
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iai, iail, iblj, icij, j, jbj, jcj, l;
+
+   for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC )
+   {
+      for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA )
+      {
+         t0 = HPL_rzero;
+         for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 )
+         { t0 += A[iail] * B[iblj]; }
+         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
+         else                    C[icij] *= BETA;
+         C[icij] += ALPHA * t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmTT
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iali, ibj, ibjl, icij, j, jai, jcj, l;
+
+   for( j = 0, ibj = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
+   {
+      for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA )
+      {
+         t0 = HPL_rzero;
+         for( l = 0,      iali  = jai, ibjl  = ibj;
+              l < K; l++, iali += 1,   ibjl += LDB ) t0 += A[iali] * B[ibjl];
+         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
+         else                    C[icij] *= BETA;
+         C[icij] += ALPHA * t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemm0
+(
+   const enum HPL_TRANS       TRANSA,
+   const enum HPL_TRANS       TRANSB,
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
+                        BETA, C, LDC )
+   const enum HPL_TRANS       TRANSA, TRANSB;
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   int                        i, j;
+
+   if( ( M == 0 ) || ( N == 0 ) ||
+       ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) &&
+         ( BETA == HPL_rone ) ) ) return;
+
+   if( ALPHA == HPL_rzero )
+   {
+      for( j = 0; j < N; j++ )
+      {  for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; }
+      return;
+   }
+
+   if( TRANSB == HplNoTrans )
+   {
+      if( TRANSA == HplNoTrans )
+      { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+      else
+      { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+   }
+   else
+   {
+      if( TRANSA == HplNoTrans )
+      { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+      else
+      { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dgemm
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_TRANS             TRANSA,
+   const enum HPL_TRANS             TRANSB,
+   const int                        M,
+   const int                        N,
+   const int                        K,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   const double *                   B,
+   const int                        LDB,
+   const double                     BETA,
+   double *                         C,
+   const int                        LDC
+)
+#else
+void HPL_dgemm
+( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_TRANS             TRANSA;
+   const enum HPL_TRANS             TRANSB;
+   const int                        M;
+   const int                        N;
+   const int                        K;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   const double *                   B;
+   const int                        LDB;
+   const double                     BETA;
+   double *                         C;
+   const int                        LDC;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dgemm performs one of the matrix-matrix operations
+ *  
+ *     C := alpha * op( A ) * op( B ) + beta * C
+ *  
+ *  where op( X ) is one of
+ *  
+ *     op( X ) = X   or   op( X ) = X^T.
+ *  
+ * Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
+ * an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * TRANSA  (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSA  specifies the form of  op(A)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSA==HplNoTrans    : op( A ) = A,                     
+ *            TRANSA==HplTrans      : op( A ) = A^T,                   
+ *            TRANSA==HplConjTrans  : op( A ) = A^T.                   
+ *
+ * TRANSB  (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSB  specifies the form of  op(B)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSB==HplNoTrans    : op( B ) = B,                     
+ *            TRANSB==HplTrans      : op( B ) = B^T,                   
+ *            TRANSB==HplConjTrans  : op( B ) = B^T.                   
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the  number  of rows  of the  matrix
+ *         op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the number  of columns of the matrix
+ *         op(B)  and  the number of columns of the matrix  C. N must be
+ *         at least zero.
+ *
+ * K       (local input)                 const int
+ *         On entry,  K  specifies  the  number of columns of the matrix
+ *         op(A) and the number of rows of the matrix op(B).  K  must be
+ *         be at least  zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied  as  zero  then the elements of the matrices A and B
+ *         need not be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  is an array of dimension (LDA,ka),  where ka is
+ *         k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+ *         entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+ *         the array  A must contain the matrix A, otherwise the leading
+ *         k  by  m  part of the array  A  must  contain the  matrix  A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA  specifies the first dimension of A as declared
+ *         in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+ *         LDA must be at least max(1,m), otherwise LDA must be at least
+ *         max(1,k).
+ *
+ * B       (local input)                 const double *
+ *         On entry, B is an array of dimension (LDB,kb),  where  kb  is
+ *         n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+ *         entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+ *         the array  B must contain the matrix B, otherwise the leading
+ *         n  by  k  part of the array  B  must  contain  the matrix  B.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB  specifies the first dimension of B as declared
+ *         in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+ *         LDB must be at least max(1,k), otherwise LDB must be at least
+ *         max(1,n).
+ *
+ * BETA    (local input)                 const double
+ *         On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+ *         supplied  as  zero  then  the  elements of the matrix C  need
+ *         not be set on input.
+ *
+ * C       (local input/output)          double *
+ *         On entry,  C  is an array of dimension (LDC,n). Before entry,
+ *         the  leading m by n part  of  the  array  C  must contain the
+ *         matrix C,  except when beta is zero, in which case C need not
+ *         be set on entry. On exit, the array  C  is overwritten by the
+ *         m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+ *
+ * LDC     (local input)                 const int
+ *         On entry, LDC  specifies the first dimension of C as declared
+ *         in  the   calling  (sub)  program.   LDC  must  be  at  least
+ *         max(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   printf("Order %d, TransA %d, TransB %d, M %d, N %d, K %d\n", ORDER, TRANSA, TRANSB, M, N, K);
+   cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA,
+                  C, LDC );
+   }
+   else
+   {
+      HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA,
+                  C, LDC );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA, beta = BETA;
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M   = M,   F77N   = N,   F77K = K,
+                             F77lda = LDA, F77ldb = LDB, F77ldc = LDC;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77K                 K
+#define F77lda               LDA
+#define F77ldb               LDB
+#define F77ldc               LDC
+#endif
+   char                      ctransa, ctransb;
+
+   if(      TRANSA == HplNoTrans ) ctransa = 'N';
+   else if( TRANSA == HplTrans   ) ctransa = 'T';
+   else                            ctransa = 'C';
+ 
+   if(      TRANSB == HplNoTrans ) ctransb = 'N';
+   else if( TRANSB == HplTrans   ) ctransb = 'T';
+   else                            ctransb = 'C';
+
+   if( ORDER == HplColumnMajor )
+   {
+#ifdef StringSunStyle
+      F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
+      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructVal
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructPtr
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+   }
+   else
+   {
+#ifdef StringSunStyle
+      F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
+      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructVal
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructPtr
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+   }
+#endif
+/*
+ * End of HPL_dgemm
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dgemv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dgemv.c
new file mode 100644
index 000000000..6366c5a48
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dgemv.c
@@ -0,0 +1,326 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dgemv
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dgemv0
+(
+   const enum HPL_TRANS       TRANS,
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * X,
+   const int                  INCX,
+   const double               BETA,
+   double                     * Y,
+   const int                  INCY
+)
+#else
+static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
+   const enum HPL_TRANS       TRANS;
+   const int                  INCX, INCY, LDA, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * X;
+   double                     * Y;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   int                        i, iaij, ix, iy, j, jaj, jx, jy;
+   register double            t0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M == 0 ) || ( N == 0 ) ||
+       ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone  ) ) ) return;
+ 
+   if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; }
+ 
+   if( TRANS == HplNoTrans )
+   {
+      HPL_dscal( M, BETA, Y, INCY );
+      for( j = 0, jaj  = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+      {
+         t0 = ALPHA * X[jx];
+         for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY )
+         { Y[iy] += A[iaij] * t0; }
+      }
+   }
+   else
+   {
+      for( j = 0, jaj  = 0, jy  = 0; j < N; j++, jaj += LDA, jy += INCY )
+      {
+         t0 = HPL_rzero;
+         for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX )
+         { t0 += A[iaij] * X[ix]; }
+         if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0;
+         else                    Y[jy] = BETA * Y[jy] + ALPHA * t0;
+      }
+   }
+}
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dgemv
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_TRANS             TRANS,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   const double *                   X,
+   const int                        INCX,
+   const double                     BETA,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_dgemv
+( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_TRANS             TRANS;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   const double *                   X;
+   const int                        INCX;
+   const double                     BETA;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dgemv performs one of the matrix-vector operations
+ *  
+ *     y := alpha * op( A ) * x + beta * y,
+ *  
+ *  where op( X ) is one of
+ *  
+ *     op( X ) = X   or   op( X ) = X^T.
+ *  
+ * where alpha and beta are scalars, x and y are vectors and  A  is an m
+ * by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry,  TRANS  specifies the  operation to be performed as
+ *         follows:   
+ *            TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+ *            TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of  the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero then  A and X  need not be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n.  Before  entry, the leading m by n part  of the
+ *         array  A  must contain the matrix coefficients.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m).
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * BETA    (local input)                 const double
+ *         On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+ *         supplied as zero then  Y  need not be set on input.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         Before entry with BETA non-zero, the incremented array Y must
+ *         contain the vector  y.  On exit,  Y  is  overwritten  by  the
+ *         updated vector y.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+   }
+   else
+   {
+      HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ),
+                  N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA, beta = BETA;
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  ftran;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  ftran;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  ftran;
+#endif
+ 
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M    = M,   F77N    = N,
+                             F77lda  = LDA, F77incx = INCX, F77incy = INCY;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77lda               LDA
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   char                      ctran;
+
+   if( ORDER == HplColumnMajor )
+   {
+      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
+
+#ifdef StringSunStyle
+      F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftran = HPL_C2F_CHAR( ctran );
+      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructVal
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructPtr
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+   }
+   else
+   {
+      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
+#ifdef StringSunStyle
+      F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftran = HPL_C2F_CHAR( ctran );
+      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructVal
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructPtr
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+   }
+
+#endif
+/*
+ * End of HPL_dgemv
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dger.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dger.c
new file mode 100644
index 000000000..5ea702778
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dger.c
@@ -0,0 +1,195 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dger
+
+#ifdef STDC_HEADERS
+void HPL_dger
+(
+   const enum HPL_ORDER             ORDER,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY,
+   double *                         A,
+   const int                        LDA
+)
+#else
+void HPL_dger
+( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+   const enum HPL_ORDER             ORDER;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+   double *                         A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dger performs the rank 1 operation
+ *  
+ *     A := alpha * x * y^T + A,
+ *  
+ * where alpha is a scalar,  x is an m-element vector, y is an n-element
+ * vector and A is an m by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of  the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero then  X and Y  need not be set on input.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input)                 double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n.  Before  entry, the leading m by n part  of the
+ *         array  A  must contain the matrix coefficients. On exit, A is
+ *         overwritten by the updated matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dger( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           t0;
+   int                       i, iaij, ix, iy, j, jaj, jx, jy;
+
+   if( ( M == 0 ) || ( N == 0 ) || ( ALPHA == HPL_rzero ) ) return;
+ 
+   if( ORDER == HplColumnMajor )
+   {
+      for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY )
+      {
+         t0 = ALPHA * Y[jy];
+         for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX )
+         { A[iaij] += X[ix] * t0; }
+      }
+   }
+   else
+   {
+      for( j = 0, jaj = 0, jx = 0; j < M; j++, jaj += LDA, jx += INCX )
+      {
+         t0 = ALPHA * X[jx];
+         for( i = 0, iaij = jaj, iy = 0; i < N; i++, iaij += 1, iy += INCY )
+         { A[iaij] += Y[iy] * t0; }
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M    = M,   F77N    = N,
+                             F77lda  = LDA, F77incx = INCX, F77incy = INCY;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77lda               LDA
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+
+   if( ORDER == HplColumnMajor )
+   {  F77dger( &F77M, &F77N, &alpha, X, &F77incx, Y, &F77incy, A, &F77lda ); }
+   else
+   {  F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); }
+#endif
+/*
+ * End of HPL_dger
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dscal.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dscal.c
new file mode 100644
index 000000000..7e041991f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dscal.c
@@ -0,0 +1,179 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dscal
+
+#ifdef STDC_HEADERS
+void HPL_dscal
+(
+   const int                        N,
+   const double                     ALPHA,
+   double *                         X,
+   const int                        INCX
+)
+#else
+void HPL_dscal
+( N, ALPHA, X, INCX )
+   const int                        N;
+   const double                     ALPHA;
+   double *                         X;
+   const int                        INCX;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dscal scales the vector x by alpha.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero, then the entries of the incremented array X
+ *         need not be set on input.
+ *
+ * X       (local input/output)          double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *         On exit, the entries of the incremented array  X  are  scaled
+ *         by the scalar alpha.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dscal( N, ALPHA, X, INCX );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           x0, x1, x2, x3, x4, x5, x6, x7;
+   register const double     alpha = ALPHA;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incX3 = 3 * INCX,
+                             incX4 = 4 * INCX, incX5 = 5 * INCX,
+                             incX6 = 6 * INCX, incX7 = 7 * INCX,
+                             incX8 = 8 * INCX;
+
+   if( ( N > 0 ) && ( alpha != HPL_rone ) )
+   {
+      if( alpha == HPL_rzero )
+      {
+         if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+         {
+            StX = (double *)X + nu * INCX;
+ 
+            do
+            {
+               (*X)     = HPL_rzero; X[incX4] = HPL_rzero;
+               X[INCX ] = HPL_rzero; X[incX5] = HPL_rzero;
+               X[incX2] = HPL_rzero; X[incX6] = HPL_rzero;
+               X[incX3] = HPL_rzero; X[incX7] = HPL_rzero; X += incX8;
+
+            } while( X != StX );
+         }
+ 
+         for( i = N - nu; i != 0; i-- ) { *X = HPL_rzero; X += INCX; }
+      }
+      else
+      {
+         if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+         {
+            StX = X + nu * INCX;
+ 
+            do
+            {
+               x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+               x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+               x0 *= alpha;   x4 *= alpha;   x1 *= alpha;   x5 *= alpha;
+               x2 *= alpha;   x6 *= alpha;   x3 *= alpha;   x7 *= alpha;
+ 
+               (*X)     = x0; X[incX4] = x4; X[INCX ] = x1; X[incX5] = x5;
+               X[incX2] = x2; X[incX6] = x6; X[incX3] = x3; X[incX7] = x7;
+ 
+               X  += incX8;
+ 
+            } while( X != StX );
+         }
+ 
+         for( i = N - nu; i != 0; i-- )
+         { x0 = (*X); x0 *= alpha; *X = x0; X += INCX; }
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#endif
+
+   F77dscal( &F77N, &alpha, X, &F77incx );
+#endif
+/*
+ * End of HPL_dscal
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dtrsm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dtrsm.c
new file mode 100644
index 000000000..a336a7d29
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dtrsm.c
@@ -0,0 +1,977 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dtrsm
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, jak  = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 )
+      {
+         B[ibkj] /= A[k+jak];
+         for( i = k+1,    iaik  = k+1+jak, ibij  = k+1+jbj;
+              i < M; i++, iaik +=1,        ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, jak  = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 )
+      {
+         for( i = k+1,    iaik  = k+1+jak, ibij  = k+1+jbj;
+              i < M; i++, iaik +=1,        ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = M-1,     jai  = (M-1)*LDA, ibij  = M-1+jbj;
+           i >= 0; i--, jai -= LDA,       ibij -= 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = i+1,    iaki  = i+1+jai, ibkj  = i+1+jbj;
+              k < M; k++, iaki += 1,       ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         t0 /= A[i+jai];
+         B[ibij] = t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = M-1,     jai  = (M-1)*LDA, ibij  = M-1+jbj;
+           i >= 0; i--, jai -= LDA,       ibij -= 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = i+1,    iaki  = i+1+jai, ibkj  = i+1+jbj;
+              k < M; k++, iaki += 1,       ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         B[ibij] = t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = M-1,     jak  = (M-1)*LDA, ibkj  = M-1+jbj;
+           k >= 0; k--, jak -= LDA,       ibkj -= 1 )
+      {
+         B[ibkj] /= A[k+jak];
+         for( i = 0,      iaik  = jak, ibij  = jbj;
+              i < k; i++, iaik += 1,   ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = M-1,     jak  = (M-1)*LDA, ibkj  = M-1+jbj;
+           k >= 0; k--, jak -= LDA,       ibkj -= 1 )
+      {
+         for( i = 0,      iaik  = jak, ibij  = jbj;
+              i < k; i++, iaik += 1,   ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+   register double            t0;
+
+   for( j = 0, jbj  = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, jai  = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         t0 /= A[i+jai];
+         B[ibij] = t0;
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj  = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, jai  = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         B[ibij] = t0;
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = N-1,      jaj  = (N-1)*LDA, jbj  = (N-1)*LDB;
+        j >= 0;  j--, jaj -= LDA,       jbj -= LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = j+1,    iakj  = j+1+jaj, jbk  = (j+1)*LDB;
+           k < N; k++, iakj += 1,       jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = N-1,      jaj  = (N-1)*LDA, jbj  = (N-1)*LDB;
+        j >= 0;  j--, jaj -= LDA,       jbj -= LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = j+1,    iakj  = j+1+jaj, jbk  = (j+1)*LDB;
+           k < N; k++, iakj += 1,       jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB )
+   {
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; }
+      for( j = k+1,    iajk  = (k+1)+jak, jbj  = (k+1)*LDB;
+           j < N; j++, iajk += 1,         jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB )
+   {
+      for( j = k+1,    iajk  = (k+1)+jak, jbj  = (k+1)*LDB;
+           j < N; j++, iajk += 1,         jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = N-1,     jak  = (N-1)*LDA, jbk  = (N-1)*LDB;
+        k >= 0; k--, jak -= LDA,       jbk -= LDB )
+   {
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; }
+      for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = N-1,     jak  = (N-1)*LDA, jbk  = (N-1)*LDB;
+        k >= 0; k--, jak -= LDA,       jbk -= LDB )
+   {
+      for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsm0
+(
+   const enum HPL_SIDE        SIDE,
+   const enum HPL_UPLO        UPLO,
+   const enum HPL_TRANS       TRANS,
+   const enum HPL_DIAG        DIAG,
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB )
+   const enum HPL_SIDE        SIDE;
+   const enum HPL_UPLO        UPLO;
+   const enum HPL_TRANS       TRANS;
+   const enum HPL_DIAG        DIAG;
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{ 
+   int                        i, j;
+
+   if( ( M == 0 ) || ( N == 0 ) ) return;
+ 
+   if( ALPHA == HPL_rzero )
+   {
+      for( j = 0; j < N; j++ )
+      {  for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; }
+      return;
+   }
+
+   if( SIDE == HplLeft )
+   {
+      if( UPLO == HplUpper )
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+      else
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+   }
+   else
+   {
+      if( UPLO == HplUpper )
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+      else
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dtrsm
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_SIDE              SIDE,
+   const enum HPL_UPLO              UPLO,
+   const enum HPL_TRANS             TRANS,
+   const enum HPL_DIAG              DIAG,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dtrsm
+( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_SIDE              SIDE;
+   const enum HPL_UPLO              UPLO;
+   const enum HPL_TRANS             TRANS;
+   const enum HPL_DIAG              DIAG;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dtrsm solves one of the matrix equations
+ *  
+ *    op( A ) * X = alpha * B,   or  X * op( A ) = alpha * B,
+ *  
+ * where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+ * non-unit, upper or lower triangular matrix and op(A) is one of
+ *  
+ *    op( A ) = A   or   op( A ) = A^T.
+ *  
+ * The matrix X is overwritten on B.
+ *  
+ * No test for  singularity  or  near-singularity  is included  in  this
+ * routine. Such tests must be performed before calling this routine.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * SIDE    (local input)                 const enum HPL_SIDE
+ *         On entry, SIDE  specifies  whether  op(A) appears on the left
+ *         or right of X as follows:
+ *            SIDE==HplLeft    op( A ) * X = alpha * B,
+ *            SIDE==HplRight   X * op( A ) = alpha * B.
+ *
+ * UPLO    (local input)                 const enum HPL_UPLO
+ *         On  entry,   UPLO   specifies  whether  the  upper  or  lower
+ *         triangular  part  of the array  A  is to be referenced.  When
+ *         UPLO==HplUpper, only  the upper triangular part of A is to be
+ *         referenced, otherwise only the lower triangular part of A is 
+ *         to be referenced. 
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSA  specifies the form of  op(A)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSA==HplNoTrans    : op( A ) = A,                     
+ *            TRANSA==HplTrans      : op( A ) = A^T,                   
+ *            TRANSA==HplConjTrans  : op( A ) = A^T.                   
+ *
+ * DIAG    (local input)                 const enum HPL_DIAG
+ *         On entry,  DIAG  specifies  whether  A  is unit triangular or
+ *         not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+ *         and otherwise, A is not assumed to be unit triangular.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of the  matrix B.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix B.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied  as  zero then the elements of the matrix B need not
+ *         be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+ *         otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+ *         k by k upper triangular  part of the array A must contain the
+ *         upper triangular  matrix and the  strictly  lower  triangular
+ *         part of A is not referenced.  When  UPLO==HplLower on  entry,
+ *         the  leading k by k lower triangular part of the array A must
+ *         contain the lower triangular matrix  and  the  strictly upper
+ *         triangular part of A is not referenced.
+ *          
+ *         Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+ *         not referenced  either,  but are assumed to be unity.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+ *
+ * B       (local input/output)          double *
+ *         On entry,  B  points  to an array of size equal to or greater
+ *         than LDB * n.  Before entry, the leading  m by n  part of the
+ *         array B must contain the matrix  B, except when beta is zero,
+ *         in which case B need not be set on entry.  On exit, the array
+ *         B is overwritten by the m by n solution matrix.
+ *
+ * LDB     (local input)                 const int
+ *         On entry,  LDB  specifies  the  leading  dimension  of  B  as
+ *         declared  in  the  calling  (sub) program.  LDB  must  be  at
+ *         least MAX(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB );
+   }
+   else
+   {
+      HPL_dtrsm0( ( SIDE == HplRight ? HplLeft  : HplRight ),
+                  ( UPLO == HplLower ? HplUpper : HplLower ),
+                  TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef StringSunStyle
+#if defined( HPL_USE_F77_INTEGER_DEF )
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M   = M,   F77N   = N,
+                             F77lda = LDA, F77ldb = LDB;
+#else
+#define  F77M                M
+#define  F77N                N
+#define  F77lda              LDA
+#define  F77ldb              LDB
+#endif
+   char                      cside, cuplo, ctran, cdiag;
+
+   if(      TRANS == HplNoTrans ) ctran = 'N';
+   else if( TRANS == HplTrans   ) ctran = 'T';
+   else                           ctran = 'C';
+   cdiag = ( DIAG == HplUnit  ? 'U' : 'N' );
+
+   if( ORDER == HplColumnMajor )
+   {
+      cside = ( SIDE == HplRight ? 'R' : 'L' );
+      cuplo = ( UPLO == HplLower ? 'L' : 'U' );
+#ifdef StringSunStyle
+      F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo );
+      ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructVal
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructPtr
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+   }
+   else
+   {
+      cside = ( SIDE == HplRight ? 'L' : 'R' );
+      cuplo = ( UPLO == HplLower ? 'U' : 'L' );
+#ifdef StringSunStyle
+      F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo );
+      ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructVal
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructPtr
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+   }
+#endif
+/*
+ * End of HPL_dtrsm
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dtrsv.c
new file mode 100644
index 000000000..99e84f073
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dtrsv.c
@@ -0,0 +1,520 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dtrsv
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLNN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLNN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx  = 0; j < N; j++, jaj += ldap1, jx += INCX )
+   {
+      X[jx] /= A[jaj]; t0 = X[jx];
+      for( i = j+1,    iaij  = jaj+1, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLNU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLNU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = jaj+1, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLTN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLTN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*(ldap1), jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= ldap1,         jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = 1+jaj, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { t0 -= A[iaij] * X[ix]; }
+      t0 /= A[jaj]; X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLTU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLTU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*(ldap1), jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= ldap1,         jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = 1+jaj, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { t0 -= A[iaij] * X[ix]; }
+      X[jx] = t0;
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUNN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUNN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*LDA, jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= LDA,       jx -= INCX )
+   {
+      X[jx] /= A[j+jaj]; t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUNU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUNU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*LDA, jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= LDA,       jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUTN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUTN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = 0, jaj = 0,jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { t0 -= A[iaij] * X[ix]; }
+      t0 /= A[iaij]; X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUTU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUTU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { t0 -= A[iaij] * X[ix]; }
+      X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsv0
+(
+   const enum HPL_UPLO        UPLO,
+   const enum HPL_TRANS       TRANS,
+   const enum HPL_DIAG        DIAG,
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+) 
+#else
+static void HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+   const enum HPL_UPLO        UPLO;
+   const enum HPL_TRANS       TRANS;
+   const enum HPL_DIAG        DIAG;
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   if( N == 0 ) return;
+ 
+   if( UPLO == HplUpper )
+   {
+      if( TRANS == HplNoTrans )
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvUNN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvUNU( N,    A, LDA, X, INCX ); }
+      }
+      else
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvUTN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvUTU( N,    A, LDA, X, INCX ); }
+      }
+   }
+   else
+   {
+      if( TRANS == HplNoTrans )
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvLNN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvLNU( N,    A, LDA, X, INCX ); }
+      }
+      else
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvLTN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvLTU( N,    A, LDA, X, INCX ); }
+      }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dtrsv
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_UPLO              UPLO,
+   const enum HPL_TRANS             TRANS,
+   const enum HPL_DIAG              DIAG,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         X,
+   const int                        INCX
+)
+#else
+void HPL_dtrsv
+( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_UPLO              UPLO;
+   const enum HPL_TRANS             TRANS;
+   const enum HPL_DIAG              DIAG;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         X;
+   const int                        INCX;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dtrsv solves one of the systems of equations
+ *  
+ *     A * x = b,   or   A^T * x = b,
+ *  
+ * where b and x are n-element vectors and  A  is an n by n non-unit, or
+ * unit, upper or lower triangular matrix.
+ *  
+ * No test for  singularity  or  near-singularity  is included  in  this
+ * routine. Such tests must be performed before calling this routine.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * UPLO    (local input)                 const enum HPL_UPLO
+ *         On  entry,   UPLO   specifies  whether  the  upper  or  lower
+ *         triangular  part  of the array  A  is to be referenced.  When
+ *         UPLO==HplUpper, only  the upper triangular part of A is to be
+ *         referenced, otherwise only the lower triangular part of A is 
+ *         to be referenced. 
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry,  TRANS  specifies  the equations  to  be  solved as
+ *         follows:
+ *            TRANS==HplNoTrans     A   * x = b,
+ *            TRANS==HplTrans       A^T * x = b.
+ *
+ * DIAG    (local input)                 const enum HPL_DIAG
+ *         On entry,  DIAG  specifies  whether  A  is unit triangular or
+ *         not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+ *         and otherwise, A is not assumed to be unit triangular.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the order of the matrix A. N must be at
+ *         least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+ *         n by n upper triangular  part of the array A must contain the
+ *         upper triangular  matrix and the  strictly  lower  triangular
+ *         part of A is not referenced.  When  UPLO==HplLower  on entry,
+ *         the  leading n by n lower triangular part of the array A must
+ *         contain the lower triangular matrix  and  the  strictly upper
+ *         triangular part of A is not referenced.
+ *          
+ *         Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+ *         not referenced  either,  but are assumed to be unity.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,n).
+ *
+ * X       (local input/output)          double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *         Before entry,  the  incremented array  X  must contain  the n
+ *         element right-hand side vector b. On exit,  X  is overwritten
+ *         with the solution vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dtrsv( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX );
+   }
+   else
+   {
+      HPL_dtrsv0( ( UPLO  == HplUpper   ? HplLower : HplUpper   ),
+                  ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ),
+                  DIAG, N, A, LDA, X, INCX );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+ 
+#ifdef HPL_USE_F77_INTEGER_DEF 
+   const F77_INTEGER         F77N = N, F77lda = LDA, F77incx = INCX;
+#else
+#define F77N              N
+#define F77lda            LDA
+#define F77incx           INCX
+#endif
+   char                      cuplo, ctran, cdiag;
+
+   if( ORDER == HplColumnMajor )
+   {
+      cuplo = ( UPLO  == HplUpper   ? 'U' : 'L' );
+      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
+   }
+   else
+   {
+      cuplo = ( UPLO  == HplUpper   ? 'L' : 'U' );
+      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
+   }
+   cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' );
+
+#ifdef StringSunStyle
+   F77dtrsv( &cuplo, &ctran, &cdiag, &F77N, A, &F77lda, X, &F77incx,
+             IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+   ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+   fuplo = HPL_C2F_CHAR( cuplo );
+   F77dtrsv( fuplo,  ftran,  fdiag,  &F77N, A, &F77lda, X, &F77incx );
+#endif
+#ifdef StringStructVal
+   fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran;
+   fdiag.len = 1; fdiag.cp = &cdiag;
+   F77dtrsv( fuplo,  ftran,  fdiag,  &F77N, A, &F77lda, X, &F77incx );
+#endif
+#ifdef StringStructPtr
+   fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran;
+   fdiag.len = 1; fdiag.cp = &cdiag;
+   F77dtrsv( &fuplo, &ftran, &fdiag, &F77N, A, &F77lda, X, &F77incx );
+#endif
+
+#endif
+/*
+ * End of HPL_dtrsv
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_idamax.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_idamax.c
new file mode 100644
index 000000000..5ceabdf25
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_idamax.c
@@ -0,0 +1,167 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_idamax
+
+#ifdef STDC_HEADERS
+int HPL_idamax
+(
+   const int                        N,
+   const double *                   X,
+   const int                        INCX
+)
+#else
+int HPL_idamax
+( N, X, INCX )
+   const int                        N;
+   const double *                   X;
+   const int                        INCX;
+#endif 
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_idamax returns  the index in an n-vector  x  of the first element
+ * having maximum absolute value.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   return( (int)(cblas_idamax( N, X, INCX )) );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           absxi, smax = HPL_rzero, x0, x1, x2, x3,
+                             x4, x5, x6, x7;
+   const double              * StX;
+   register int              imax = 0, i = 0, j;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incX3 = 3 * INCX,
+                             incX4 = 4 * INCX, incX5 = 5 * INCX,
+                             incX6 = 6 * INCX, incX7 = 7 * INCX,
+                             incX8 = 8 * INCX;
+
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+            x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+            absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x1 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x2 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x3 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x4 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x5 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x6 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x7 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+ 
+            X    += incX8;
+ 
+         } while( X != StX );
+      }
+ 
+      for( j = N - nu; j != 0; j-- )
+      {
+         x0    = (*X);
+         absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; }
+         i    += 1;
+         X    += INCX;
+      }
+   }
+   return( imax );
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#endif
+   int                       imax = 0;
+
+   if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1;
+   return( imax );
+#endif
+/*
+ * End of HPL_idamax
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_1rinM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_1rinM.c
new file mode 100644
index 000000000..dd03b79b1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_1rinM.c
@@ -0,0 +1,224 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+ 
+#ifdef STDC_HEADERS
+int HPL_binit_1rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_1rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_1rinM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_1rinM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, prev,
+                              rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process,  then  send message to its two
+ * next neighbors. Otherwise, probe for message. If the message is here,
+ * then receive it,   and  if I am not the last process of the ring,  or
+ * just after the root process, then forward it to the next.  Otherwise,
+ * inform the caller that the panel has still not been received.
+ */
+   rank = PANEL->grid->mycol; comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;        msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( next,
+                          size ), msgid, comm );
+      }
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+      if( ( size > 2 ) && 
+          ( MModSub1( prev, size ) == root ) ) partner = root;
+      else                                     partner = prev;
+
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) &&
+                ( prev != root ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+} 
+
+#ifdef STDC_HEADERS
+int HPL_bwait_1rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_1rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_1ring.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_1ring.c
new file mode 100644
index 000000000..dd5eb2d12
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_1ring.c
@@ -0,0 +1,216 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_1ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_1ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+
+#else
+
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_1ring
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_1ring( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, prev, rank, root,
+                              size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process, start spreading the panel.  If
+ * I am not the root process, probe for message. If the message is here,
+ * then receive it, and  if I am not the last process of the ring, then
+ * forward it to the next.  Otherwise, inform the caller that the panel
+ * has still not been received.
+ */
+   rank = PANEL->grid->mycol; comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;        msgid = PANEL->msgid;
+
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( rank,
+                       size ), msgid, comm );
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+
+      ierr = MPI_Iprobe( prev, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, prev, msgid,
+                             comm, &PANEL->status[0] );
+            next = MModAdd1( rank, size );
+            if( ( ierr == MPI_SUCCESS ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next,
+                                msgid, comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */  
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_1ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_1ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers 
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_2rinM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_2rinM.c
new file mode 100644
index 000000000..56581ea0d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_2rinM.c
@@ -0,0 +1,236 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_2rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_2rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_2rinM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_2rinM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, prev,
+                              rank, roo2, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase: root process send to its two right neighbors and mid-pro-
+ * cess. If I am not the root process, probe for message. If the message
+ * is there, then receive it. If I am not the last process of both rings
+ * then forward it to the next.  Otherwise,  inform  the caller that the
+ * panel has still not been received.
+ */
+   rank = PANEL->grid->mycol;           comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;                  msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );       roo2  = ( ( size + 1 ) >> 1 );
+   roo2 = MModAdd(  root, roo2, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         if( MModAdd1( next, size ) != roo2 )
+         {
+            ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE,
+                             MModAdd1( next, size ), msgid, comm );
+         }
+
+         if( ierr == MPI_SUCCESS )
+         {
+            ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid,
+                             comm );
+         }
+      }
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+      if( ( prev == root ) || ( rank == roo2 ) ||
+          ( MModSub1( prev,  size )  == root ) ) partner = root;
+      else                                       partner = prev;
+ 
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) && ( prev != root ) &&
+                ( next != roo2        ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+} 
+
+#ifdef STDC_HEADERS
+int HPL_bwait_2rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_2rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_2ring.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_2ring.c
new file mode 100644
index 000000000..f0e6e2647
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_2ring.c
@@ -0,0 +1,224 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_2ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_2ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+ 
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_2ring
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_2ring( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, rank,
+                              roo2, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase: root process  send to its right neighbor and mid-process.
+ * If I am not the root process,  probe for message.   If the message is
+ * there,  then receive it,  and  if I am not the last process  of  both
+ * rings, then forward it to the next. Otherwise, inform the caller that
+ * the panel has still not been received.
+ */
+   rank = PANEL->grid->mycol;           comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;                  msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );       roo2  = ( ( size + 1 ) >> 1 );
+   roo2 = MModAdd(  root, roo2, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid,
+                          comm );
+      }
+   }
+   else
+   {
+      partner = MModSub1( rank, size );
+      if( ( partner == root ) || ( rank == roo2 ) ) partner = root;
+ 
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) &&
+                ( next != roo2 ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_2ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_2ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_bcast.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_bcast.c
new file mode 100644
index 000000000..100161152
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_bcast.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_bcast
+(
+   HPL_T_panel *                    PANEL,
+   int *                            IFLAG
+)
+#else
+int HPL_bcast
+( PANEL, IFLAG )
+   HPL_T_panel *                    PANEL;
+   int *                            IFLAG;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_bcast broadcasts  the  current  panel.  Successful  completion is
+ * indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to
+ * HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was
+ * not completed, in which case this function should be called again.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * IFLAG   (output)                      int *
+ *         On exit,  IFLAG  indicates  whether  or not the broadcast has
+ *         occured.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_bcast_1rinM( PANEL, IFLAG ); break;
+      case HPL_1RING   : ierr = HPL_bcast_1ring( PANEL, IFLAG ); break;
+      case HPL_2RING_M : ierr = HPL_bcast_2rinM( PANEL, IFLAG ); break;
+      case HPL_2RING   : ierr = HPL_bcast_2ring( PANEL, IFLAG ); break;
+      case HPL_BLONG_M : ierr = HPL_bcast_blonM( PANEL, IFLAG ); break;
+      case HPL_BLONG   : ierr = HPL_bcast_blong( PANEL, IFLAG ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_bcast
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_binit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_binit.c
new file mode 100644
index 000000000..3daf72b7d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_binit.c
@@ -0,0 +1,108 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_binit
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_binit
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_binit initializes  a  row  broadcast.  Successful  completion  is
+ * indicated by the returned error code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS );
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_binit_1rinM( PANEL ); break;
+      case HPL_1RING   : ierr = HPL_binit_1ring( PANEL ); break;
+      case HPL_2RING_M : ierr = HPL_binit_2rinM( PANEL ); break;
+      case HPL_2RING   : ierr = HPL_binit_2ring( PANEL ); break;
+      case HPL_BLONG_M : ierr = HPL_binit_blonM( PANEL ); break;
+      case HPL_BLONG   : ierr = HPL_binit_blong( PANEL ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_binit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_blonM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_blonM.c
new file mode 100644
index 000000000..5fa221937
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_blonM.c
@@ -0,0 +1,445 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+int HPL_binit_blonM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_blonM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+   return( HPL_SUCCESS );
+}
+ 
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF_S1        PANEL->buffers[I_SEND]
+#define   _M_COUNT_S1       PANEL->counts[I_SEND]
+#define   _M_TYPE_S1        PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_S2        PANEL->buffers[I_SEND]
+#define   _M_COUNT_S2       PANEL->counts[I_SEND]
+#define   _M_TYPE_S2        PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_R1        PANEL->buffers[I_RECV]
+#define   _M_COUNT_R1       PANEL->counts[I_RECV]
+#define   _M_TYPE_R1        PANEL->dtypes[I_RECV]
+
+#define   _M_BUFF_R2        PANEL->buffers[I_RECV]
+#define   _M_COUNT_R2       PANEL->counts[I_RECV]
+#define   _M_TYPE_R2        PANEL->dtypes[I_RECV]
+ 
+#define   _M_ROLL_BUFF_S    PANEL->buffers[I_SEND]
+#define   _M_ROLL_COUNT_S   PANEL->counts[I_SEND]
+#define   _M_ROLL_TYPE_S    PANEL->dtypes[I_SEND]
+
+#define   _M_ROLL_BUFF_R    PANEL->buffers[I_RECV]
+#define   _M_ROLL_COUNT_R   PANEL->counts[I_RECV]
+#define   _M_ROLL_TYPE_R    PANEL->dtypes[I_RECV]
+
+#else
+
+#define   _M_BUFF_S1        (void *)(PANEL->L2)
+#define   _M_COUNT_S1       PANEL->len
+#define   _M_TYPE_S1        MPI_DOUBLE
+
+#define   _M_BUFF_S2        (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_S2       lbuf
+#define   _M_TYPE_S2        MPI_DOUBLE
+ 
+#define   _M_BUFF_R1        (void *)(PANEL->L2)
+#define   _M_COUNT_R1       PANEL->len
+#define   _M_TYPE_R1        MPI_DOUBLE
+ 
+#define   _M_BUFF_R2        (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_R2       lbuf
+#define   _M_TYPE_R2        MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_S    (void *)(PANEL->L2 + ibufS)
+#define   _M_ROLL_COUNT_S   lbufS
+#define   _M_ROLL_TYPE_S    MPI_DOUBLE
+#define   _M_ROLL_BUFF_R    (void *)(PANEL->L2 + ibufR)
+#define   _M_ROLL_COUNT_R   lbufR
+#define   _M_ROLL_TYPE_R    MPI_DOUBLE
+
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_blonM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_blonM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        COUNT, count, go=1, ierr=MPI_SUCCESS, ibuf,
+                              ibufR, ibufS, dummy=0, indx, ip2=1, k, l,
+                              lbuf, lbufR, lbufS, mask=1, msgid, mydist,
+                              mydist2, next, npm1, npm2, partner, prev,
+                              rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  root process  sends to its right neighbor,  then spread
+ * the panel on the other npcol - 2 processes.  If  I  am  not the root 
+ * process, probe for message received.  If the message is there,  then
+ * receive it. If I am just after the root process, return.  Otherwise,
+ * keep spreading on those npcol - 2 processes.  Otherwise,  inform the
+ * caller that the panel has still not been received.
+ */
+   comm = PANEL->grid->row_comm; rank  = PANEL->grid->mycol;
+   root = PANEL->pcol;           msgid = PANEL->msgid;
+   prev = MModSub1( rank, size );
+ 
+   if( rank == root )
+   {
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ierr == MPI_SUCCESS )
+         ierr =   HPL_packL( PANEL, 0, PANEL->len, I_SEND );
+#endif
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Ssend( _M_BUFF_S1, _M_COUNT_S1, _M_TYPE_S1,
+                             MModAdd1( rank, size ), msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+   else if( prev == root )
+   {
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ *
+ *    ierr = MPI_Iprobe( root, msgid, comm, &go, &PANEL->status[0] );
+ */
+      if( ierr == MPI_SUCCESS )
+      {                                  /* if panel is here, proceed */
+         if( go != 0 )
+         {
+#ifdef HPL_USE_MPI_DATATYPE
+            ierr =      HPL_packL( PANEL, 0, PANEL->len, I_RECV );
+#endif
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Recv( _M_BUFF_R1, _M_COUNT_R1, _M_TYPE_R1,
+                                  root, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+      }
+   }
+/*
+ * if I am just after the root, exit now. The message receive  completed
+ * successfully, this guy is done. If there are only 2 processes in each 
+ * row of processes, we are done as well.
+ */
+   if( ( prev == root ) || ( size == 2 ) )
+   {
+      *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+      return( *IFLAG );
+   }
+/*
+ * Otherwise, proceed with broadcast -  Spread  the panel across process
+ * columns
+ */
+   npm2 = ( npm1 = size - 1 ) - 1; COUNT = PANEL->len;
+
+   k = npm2; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   if( rank == root ) mydist2 = ( mydist = 0 );
+   else   mydist2 = ( mydist  = MModSub( rank, root, size ) - 1 );
+
+   indx = ip2; count = COUNT / npm1; count = Mmax( count, 1 );
+ 
+   do
+   {
+      mask ^= ip2;
+
+      if( ( mydist & mask ) == 0 )
+      {
+         lbuf = COUNT - ( ibuf = indx * count );
+         if( indx + ip2 < npm1 ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); }
+
+         partner = mydist ^ ip2;
+
+         if( ( mydist & ip2 ) != 0 )
+         {
+            partner = MModAdd( root, partner, size );
+            if( partner != root ) partner = MModAdd1( partner, size );  
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ */
+#if 0
+            ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+  
+            if( ierr == MPI_SUCCESS )
+            {        /* if panel is not here, return and keep testing */
+               if( go == 0 )
+               { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+            }
+#endif
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_RECV );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( _M_BUFF_R2, _M_COUNT_R2, _M_TYPE_R2,
+                                     partner, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                   msgid, comm, &PANEL->status[0] );
+            }
+         }
+         else if( partner < npm1 )
+         {
+            partner = MModAdd( root, partner, size );
+            if( partner != root ) partner = MModAdd1( partner, size );  
+
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_SEND );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( _M_BUFF_S2, _M_COUNT_S2, _M_TYPE_S2,
+                                      partner, msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE,
+                                      partner, msgid, comm );
+            }
+         }
+      }
+ 
+      if( mydist2 < ip2 ) {  ip2 >>= 1; indx -= ip2; }
+      else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; }
+
+   } while( ip2 > 0 );
+/*
+ * Roll the pieces
+ */
+   prev = MModSub1( rank, size );
+   if( MModSub1( prev, size ) == root ) prev = root;
+   next = MModAdd1( rank, size );
+   if( rank == root ) next = MModAdd1( next, size );
+
+   for( k = 0; k < npm2; k++ )
+   {
+      l = ( k >> 1 );
+/*
+ * Who is sending to who and how much
+ */
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         ibufS = ( indx = MModAdd( mydist, l,   npm1 ) ) * count;
+         lbufS = ( indx == npm2 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModSub( mydist, l+1, npm1 ) ) * count;
+         lbufR = ( indx == npm2 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = prev;
+      }
+      else
+      {
+         ibufS = ( indx = MModSub( mydist, l,   npm1 ) ) * count;
+         lbufS = ( indx == npm2 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModAdd( mydist, l+1, npm1 ) ) * count;
+         lbufR = ( indx == npm2 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = next;
+      }
+/*
+ * Exchange the messages
+ */
+      if( lbufS > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufS, lbufS, I_SEND );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S,
+                                 _M_ROLL_TYPE_S, partner, msgid, comm,
+                                 &PANEL->request[0] );
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                 msgid, comm, &PANEL->request[0] );
+      }
+ 
+      if(  lbufR > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufR, lbufR, I_RECV );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R,
+                               _M_ROLL_TYPE_R, partner, msgid, comm,
+                               &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                               msgid, comm, &PANEL->status[0] );
+      }
+ 
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Wait ( &PANEL->request[0], &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_blonM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_blonM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+
+   return( HPL_SUCCESS );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_blong.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_blong.c
new file mode 100644
index 000000000..e57f11bcc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_blong.c
@@ -0,0 +1,363 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+int HPL_binit_blong
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_blong( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+   return( HPL_SUCCESS );
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF_S         PANEL->buffers[I_SEND]
+#define   _M_COUNT_S        PANEL->counts[I_SEND]
+#define   _M_TYPE_S         PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_R         PANEL->buffers[I_RECV]
+#define   _M_COUNT_R        PANEL->counts[I_RECV]
+#define   _M_TYPE_R         PANEL->dtypes[I_RECV]
+ 
+#define   _M_ROLL_BUFF_S    PANEL->buffers[I_SEND]
+#define   _M_ROLL_COUNT_S   PANEL->counts[I_SEND]
+#define   _M_ROLL_TYPE_S    PANEL->dtypes[I_SEND]
+ 
+#define   _M_ROLL_BUFF_R    PANEL->buffers[I_RECV]
+#define   _M_ROLL_COUNT_R   PANEL->counts[I_RECV]
+#define   _M_ROLL_TYPE_R    PANEL->dtypes[I_RECV]
+ 
+#else
+ 
+#define   _M_BUFF_S         (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_S        lbuf
+#define   _M_TYPE_S         MPI_DOUBLE
+ 
+#define   _M_BUFF_R         (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_R        lbuf
+#define   _M_TYPE_R         MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_S    (void *)(PANEL->L2 + ibufS)
+#define   _M_ROLL_COUNT_S   lbufS
+#define   _M_ROLL_TYPE_S    MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_R    (void *)(PANEL->L2 + ibufR)
+#define   _M_ROLL_COUNT_R   lbufR
+#define   _M_ROLL_TYPE_R    MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_blong
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_blong( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        COUNT, count, dummy=0, ierr=MPI_SUCCESS,
+                              ibuf, ibufR, ibufS, indx, ip2, k, l, lbuf,
+                              lbufR, lbufS, mask, msgid, mydist, mydist2,
+                              next, npm1, partner, prev, rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process, start spreading the panel.  If
+ * I am not the root process,  test  for  message receive completion. If
+ * the message  is there,  then receive it,  and  keep  spreading  in  a
+ * blocking fashion this time.  Otherwise,  inform  the caller  that the
+ * panel has still not been received. 
+ */
+   comm    = PANEL->grid->row_comm;  rank  = PANEL->grid->mycol;
+   mask    = PANEL->grid->col_mask;  ip2   = PANEL->grid->col_ip2m1;
+   root    = PANEL->pcol;            msgid = PANEL->msgid;
+   COUNT   = PANEL->len;             npm1  = size - 1;
+   mydist2 = ( mydist = MModSub( rank, root, size ) ); indx = ip2;
+   count   = COUNT / size; count = Mmax( count, 1 );
+/*
+ * Spread the panel across process columns
+ */
+   do
+   {
+      mask ^= ip2;
+ 
+      if( ( mydist & mask ) == 0 )
+      {
+         lbuf = COUNT - ( ibuf = indx * count );
+         if( indx + ip2 < size ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); }
+ 
+         partner = mydist ^ ip2;
+ 
+         if( ( mydist & ip2 ) != 0 )
+         {
+            partner = MModAdd( root, partner, size );
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on 
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ */
+#if 0
+            ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+            if( ierr == MPI_SUCCESS )
+            {        /* if panel is not here, return and keep testing */
+               if( go == 0 )
+               { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+            }
+#endif
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_RECV );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( _M_BUFF_R, _M_COUNT_R, _M_TYPE_R,
+                                     partner, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                     msgid, comm, &PANEL->status[0] );
+            }
+         }
+         else if( partner < size )
+         {
+            partner = MModAdd( root, partner, size );
+ 
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_SEND );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( _M_BUFF_S, _M_COUNT_S, _M_TYPE_S,
+                                      partner, msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+            }
+            else       /* Send message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE,
+                                      partner, msgid, comm );
+            }
+         }
+      }
+ 
+      if( mydist2 < ip2 ) {  ip2 >>= 1; indx -= ip2; }
+      else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; }
+ 
+   } while( ip2 > 0 );
+/*
+ * Roll the pieces
+ */
+   prev = MModSub1( rank, size ); next = MModAdd1( rank, size );
+
+   for( k = 0; k < npm1; k++ )
+   {
+      l = ( k >> 1 ); 
+/*
+ * Who is sending to who and how much
+ */
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         ibufS = ( indx = MModAdd( mydist, l,   size ) ) * count;
+         lbufS = ( indx == npm1 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModSub( mydist, l+1, size ) ) * count;
+         lbufR = ( indx == npm1 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = prev;
+      }
+      else
+      {
+         ibufS = ( indx = MModSub( mydist, l,   size ) ) * count;
+         lbufS = ( indx == npm1 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModAdd( mydist, l+1, size ) ) * count;
+         lbufR = ( indx == npm1 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = next;
+      }
+/*
+ * Exchange the messages
+ */
+      if( lbufS > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufS, lbufS, I_SEND );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S,
+                                 _M_ROLL_TYPE_S, partner, msgid, comm,
+                                 &PANEL->request[0] );
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                 msgid, comm, &PANEL->request[0] );
+      }
+
+      if(  lbufR > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufR, lbufR, I_RECV );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R,
+                               _M_ROLL_TYPE_R, partner, msgid, comm,
+                               &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                               msgid, comm, &PANEL->status[0] );
+      }
+
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Wait ( &PANEL->request[0], &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_blong
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_blong( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+
+   return( HPL_SUCCESS );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_bwait.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_bwait.c
new file mode 100644
index 000000000..a2e0f4df8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_bwait.c
@@ -0,0 +1,109 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_bwait
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_bwait
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_bwait HPL_bwait waits  for  the  row  broadcast  of  the current  panel  to
+ * terminate.  Successful completion is indicated by the returned  error
+ * code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS );
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_bwait_1rinM( PANEL ); break;
+      case HPL_1RING   : ierr = HPL_bwait_1ring( PANEL ); break;
+      case HPL_2RING_M : ierr = HPL_bwait_2rinM( PANEL ); break;
+      case HPL_2RING   : ierr = HPL_bwait_2ring( PANEL ); break;
+      case HPL_BLONG_M : ierr = HPL_bwait_blonM( PANEL ); break;
+      case HPL_BLONG   : ierr = HPL_bwait_blong( PANEL ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_bwait
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_copyL.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_copyL.c
new file mode 100644
index 000000000..04f765a6b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_copyL.c
@@ -0,0 +1,108 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_copyL
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_copyL
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_copyL copies  the  panel of columns, the L1 replicated submatrix,
+ * the pivot array  and  the info scalar into a contiguous workspace for
+ * later broadcast.
+ *  
+ * The copy of this panel  into  a contiguous buffer  can be enforced by
+ * specifying -DHPL_COPY_L in the architecture specific Makefile.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        jb, lda;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->mycol == PANEL->pcol )
+   {
+      jb = PANEL->jb; lda = PANEL->lda;
+ 
+      if( PANEL->grid->myrow == PANEL->prow )
+      {
+         HPL_dlacpy( PANEL->mp-jb, jb, Mptr( PANEL->A, jb, -jb, lda ),
+                     lda, PANEL->L2, PANEL->ldl2 );
+      }
+      else
+      {
+         HPL_dlacpy( PANEL->mp,    jb, Mptr( PANEL->A,  0, -jb, lda ),
+                     lda, PANEL->L2, PANEL->ldl2 );
+      }
+   }
+/*
+ * End of HPL_copyL
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_packL.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_packL.c
new file mode 100644
index 000000000..8a70ef83d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_packL.c
@@ -0,0 +1,245 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_packL
+(
+   HPL_T_panel *                    PANEL,
+   const int                        INDEX,
+   const int                        LEN,
+   const int                        IBUF
+)
+#else
+int HPL_packL
+( PANEL, INDEX, LEN, IBUF )
+   HPL_T_panel *                    PANEL;
+   const int                        INDEX;
+   const int                        LEN;
+   const int                        IBUF;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_packL forms  the MPI data type for the panel to be broadcast.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * MPI_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * INDEX   (input)                       const int
+ *         On entry,  INDEX  points  to  the  first entry of the  packed
+ *         buffer being broadcast.
+ *
+ * LEN     (input)                       const int
+ *         On entry, LEN is the length of the packed buffer.
+ *
+ * IBUF    (input)                       const int
+ *         On entry, IBUF  specifies the panel buffer/count/type entries
+ *         that should be initialized.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+#ifndef HPL_COPY_L
+   MPI_Datatype               * type = NULL;
+   void                       * * * bufs = NULL;
+   double                     * A;
+   int                        * blen = NULL;
+   MPI_Aint                   * disp = NULL;
+   int                        curr, i, i1, ibuf, ierr=MPI_SUCCESS, j1,
+                              jb, jbm, jbp1, lda, len, m, m1, nbufs;
+#else
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_COPY_L
+/*
+ * Panel + L1 + DPIV  have been copied into a contiguous buffer - Create
+ * and commit a contiguous data type
+ */
+   PANEL->buffers[IBUF] = (void *)(PANEL->L2 + INDEX);
+   PANEL->counts [IBUF] = 1;
+
+   ierr =      MPI_Type_contiguous( LEN, MPI_DOUBLE, &PANEL->dtypes[IBUF] );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &PANEL->dtypes[IBUF] );
+
+   return( ierr );
+#else
+/*
+ * Panel is not contiguous (because of LDA and also L1 + DPIV) -  Create
+ * and commit a struct data type
+ */
+   jbp1 = ( jb = PANEL->jb ) + 1;
+/*
+ * Temporaries to create the type struct.
+ */
+   bufs = (void     * * *)malloc( jbp1 * sizeof( void * *     ) );
+   blen = (int          *)malloc( jbp1 * sizeof( int          ) );
+   disp = (MPI_Aint     *)malloc( jbp1 * sizeof( MPI_Aint     ) );
+   type = (MPI_Datatype *)malloc( jbp1 * sizeof( MPI_Datatype ) );
+ 
+   if( ( bufs != NULL ) && ( blen != NULL ) &&
+       ( disp != NULL ) && ( type != NULL ) )
+   {
+      m = PANEL->mp; curr = (int)( PANEL->grid->myrow == PANEL->prow );
+      if( curr != 0 ) m -= jb;
+ 
+      len = LEN; ibuf = INDEX; nbufs = 0; jbm = jb * m;
+ 
+      if( ( m > 0 ) && ( ibuf < jbm ) )
+      {
+/*
+ * Retrieve proper pointers depending on process row and column
+ */
+         if( PANEL->grid->mycol == PANEL->pcol )
+         {
+            lda = PANEL->lda;
+            if( curr != 0 ) { A = Mptr( PANEL->A, jb, -jb, lda ); }
+            else            { A = Mptr( PANEL->A,  0, -jb, lda ); }
+         }
+         else { lda = PANEL->ldl2; A = PANEL->L2; }
+/*
+ * Pack the first (partial) column of L
+ */
+         m1 = m - ( i1 = ibuf - ( j1 = ibuf / m ) * m );
+         m1 = Mmin( len, m1 );
+ 
+         bufs[nbufs] = (void *)(Mptr( A, i1, j1, lda ));
+         type[nbufs] = MPI_DOUBLE;
+         blen[nbufs] = m1;
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+ 
+         nbufs++; len -= m1; j1++; ibuf += m1;
+/*
+ * Pack the remaining columns of L
+ */
+         while( ( len > 0 ) && ( j1 < jb ) )
+         {
+            m1 = Mmin( len, m );
+ 
+            bufs[nbufs] = (void*)(Mptr( A, 0, j1, lda ));
+            type[nbufs] = MPI_DOUBLE;
+            blen[nbufs] = m1;
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+ 
+            nbufs++; len -= m1; j1++; ibuf += m1;
+         }
+      }
+/*
+ * Pack L1, DPIV, DINFO
+ */
+      if( len > 0 )
+      {                                            /* L1, DPIV, DINFO */
+         bufs[nbufs] = (void *)(PANEL->L1 + ibuf - jbm);
+         type[nbufs] = MPI_DOUBLE;
+         blen[nbufs] = len;
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+         nbufs++;
+      }
+ 
+      for( i = 1; i < nbufs; i++ ) disp[i] -= disp[0]; disp[0] = 0;
+ 
+      PANEL->buffers[IBUF] = (void *)(bufs[0]); PANEL->counts [IBUF] = 1;
+/*
+ * construct the struct type 
+ */
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_create_struct( nbufs, blen, disp, type,
+                                   &PANEL->dtypes[IBUF] );
+/*
+ * release temporaries
+ */
+      if( bufs ) free( bufs );
+      if( blen ) free( blen );
+      if( disp ) free( disp );
+      if( type ) free( type );
+/*
+ * commit the type 
+ */
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_commit( &PANEL->dtypes[IBUF] );
+
+      return( ierr );
+   }
+   else
+   {
+/*
+ * Memory allocation failed -> abort
+ */
+      HPL_pabort( __LINE__, "HPL_packL", "Memory allocation failed" );
+      return( MPI_SUCCESS );    /* never executed (hopefully ...) */
+   }
+#endif
+#else
+          /* HPL_USE_MPI_DATATYPE not defined - Oops, there is a bug
+             somewhere, so, just in case  and until I find it ... */
+   return( MPI_SUCCESS );   
+#endif
+/*
+ * End of HPL_packL
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_recv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_recv.c
new file mode 100644
index 000000000..ff426891c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_recv.c
@@ -0,0 +1,142 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_recv
+(
+   double *                         RBUF,
+   int                              RCOUNT,
+   int                              SRC,
+   int                              RTAG,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_recv
+( RBUF, RCOUNT, SRC, RTAG, COMM )
+   double *                         RBUF;
+   int                              RCOUNT;
+   int                              SRC;
+   int                              RTAG;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_recv is a simple wrapper around  MPI_Recv.  Its  main  purpose is
+ * to  allow for some  experimentation / tuning  of this simple routine.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * HPL_SUCCESS.  In the case of messages of length less than or equal to
+ * zero, this function returns immediately.
+ *
+ * Arguments
+ * =========
+ *
+ * RBUF    (local output)                double *
+ *         On entry, RBUF specifies the starting address of buffer to be
+ *         received.
+ *
+ * RCOUNT  (local input)                 int
+ *         On entry,  RCOUNT  specifies  the number  of double precision
+ *         entries in RBUF. RCOUNT must be at least zero.
+ *
+ * SRC     (local input)                 int
+ *         On entry, SRC  specifies the rank of the  sending  process in
+ *         the communication space defined by COMM.
+ *
+ * RTAG    (local input)                 int
+ *         On entry,  STAG specifies the message tag to be used for this
+ *         communication operation.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Status                 status;
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type;
+#endif
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( RCOUNT <= 0 ) return( HPL_SUCCESS );
+
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Recv( (void *)(RBUF), 1, type, SRC, RTAG, COMM,
+                         &status );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_free( &type );
+#else
+   ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, SRC, RTAG,
+                    COMM, &status );
+#endif
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+/*
+ * End of HPL_recv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_sdrv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_sdrv.c
new file mode 100644
index 000000000..0b2363563
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_sdrv.c
@@ -0,0 +1,239 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_sdrv
+(
+   double *                         SBUF,
+   int                              SCOUNT,
+   int                              STAG,
+   double *                         RBUF,
+   int                              RCOUNT,
+   int                              RTAG,
+   int                              PARTNER,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_sdrv
+( SBUF, SCOUNT, STAG, RBUF, RCOUNT, RTAG, PARTNER, COMM )
+   double *                         SBUF;
+   int                              SCOUNT;
+   int                              STAG;
+   double *                         RBUF;
+   int                              RCOUNT;
+   int                              RTAG;
+   int                              PARTNER;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_sdrv is a simple wrapper around MPI_Sendrecv. Its main purpose is
+ * to allow for some experimentation and tuning of this simple function.
+ * Messages  of  length  less than  or  equal to zero  are not sent  nor
+ * received.  Successful completion  is  indicated by the returned error
+ * code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * SBUF    (local input)                 double *
+ *         On entry, SBUF specifies the starting address of buffer to be
+ *         sent.
+ *
+ * SCOUNT  (local input)                 int
+ *         On entry,  SCOUNT  specifies  the number  of double precision
+ *         entries in SBUF. SCOUNT must be at least zero.
+ *
+ * STAG    (local input)                 int
+ *         On entry,  STAG  specifies the message tag to be used for the
+ *         sending communication operation.
+ *
+ * RBUF    (local output)                double *
+ *         On entry, RBUF specifies the starting address of buffer to be
+ *         received.
+ *
+ * RCOUNT  (local input)                 int
+ *         On entry,  RCOUNT  specifies  the number  of double precision
+ *         entries in RBUF. RCOUNT must be at least zero.
+ *
+ * RTAG    (local input)                 int
+ *         On entry,  RTAG  specifies the message tag to be used for the
+ *         receiving communication operation.
+ *
+ * PARTNER (local input)                 int
+ *         On entry,  PARTNER  specifies  the rank of the  collaborative
+ *         process in the communication space defined by COMM.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type[2];
+#endif
+   MPI_Request                request;
+   MPI_Status                 status;
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( RCOUNT > 0 )
+   {
+      if( SCOUNT > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * Post asynchronous receive
+ */
+         ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( (void *)(RBUF), 1, type[0], PARTNER,
+                                RTAG, COMM, &request );
+/*
+ * Blocking send
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[1] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( (void *)(SBUF), 1, type[1], PARTNER,
+                               STAG, COMM );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[1] );
+/*
+ * Wait for the receive to complete
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[0] );
+#else
+/*
+ * Post asynchronous receive
+ */
+         ierr =      MPI_Irecv( (void *)(RBUF), RCOUNT, MPI_DOUBLE,
+                                PARTNER, RTAG, COMM, &request );
+/*
+ * Blocking send
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE,
+                               PARTNER, STAG, COMM );
+/*
+ * Wait for the receive to complete
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+#endif
+      }
+      else
+      {
+/*
+ * Blocking receive
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+         ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(RBUF), 1, type[0], PARTNER, RTAG,
+                               COMM, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[0] );
+#else
+         ierr =      MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE,
+                               PARTNER, RTAG, COMM, &status );
+#endif
+      }
+   }
+   else if( SCOUNT > 0 )
+   {
+/*
+ * Blocking send
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+      ierr =      MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_commit( &type[1] );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG,
+                          COMM );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_free( &type[1] ) );
+#else
+      ierr =      MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, PARTNER,
+                            STAG, COMM );
+#endif
+   }
+   else { ierr = MPI_SUCCESS; }
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+/*
+ * End of HPL_sdrv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_send.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_send.c
new file mode 100644
index 000000000..9e9868594
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_send.c
@@ -0,0 +1,139 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_send
+(
+   double *                         SBUF,
+   int                              SCOUNT,
+   int                              DEST,
+   int                              STAG,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_send
+( SBUF, SCOUNT, DEST, STAG, COMM )
+   double *                         SBUF;
+   int                              SCOUNT;
+   int                              DEST;
+   int                              STAG;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_send is a simple wrapper around  MPI_Send.  Its  main  purpose is
+ * to  allow for some  experimentation / tuning  of this simple routine.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * MPI_SUCCESS.  In the case of messages of length less than or equal to
+ * zero, this function returns immediately.
+ *
+ * Arguments
+ * =========
+ *
+ * SBUF    (local input)                 double *
+ *         On entry, SBUF specifies the starting address of buffer to be
+ *         sent.
+ *
+ * SCOUNT  (local input)                 int
+ *         On entry,  SCOUNT  specifies  the number of  double precision
+ *         entries in SBUF. SCOUNT must be at least zero.
+ *
+ * DEST    (local input)                 int
+ *         On entry, DEST specifies the rank of the receiving process in
+ *         the communication space defined by COMM.
+ *
+ * STAG    (local input)                 int
+ *         On entry,  STAG specifies the message tag to be used for this
+ *         communication operation.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type;
+#endif
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( SCOUNT <= 0 ) return( HPL_SUCCESS );
+
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr =      MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Send( (void *)(SBUF), 1, type, DEST, STAG, COMM );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_free( &type );
+#else
+   ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, DEST, STAG, COMM );
+#endif
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); 
+/*
+ * End of HPL_send
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/cuda/cuda_dgemm.cpp.dp.cpp b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/cuda/cuda_dgemm.cpp.dp.cpp
new file mode 100644
index 000000000..644503181
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/cuda/cuda_dgemm.cpp.dp.cpp
@@ -0,0 +1,310 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+
+#define NUMBER_OF_STREAMS 4
+#define CHUNK_SIZE 512
+#define NN 64
+#define NM 128
+#define ERRCODE(e) (-(__LINE__ * 1000 + (e)))
+//#define DEVICE_DEBUG
+//#ifdef MPI
+//#include <mpi.h>
+//#endif
+
+
+#define _GNU_SOURCE
+
+#define CUDA_ERROR_CHECK
+#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ )
+#define CudaCheckError()    __cudaCheckError( __FILE__, __LINE__ )
+
+#include <sycl/sycl.hpp>
+#include <dpct/dpct.hpp>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <ctype.h>
+#include <math.h>
+#include <array>
+
+#include <time.h>
+#include <sys/types.h>
+#include <sys/times.h>
+#include <sys/time.h>
+
+#include <iostream>
+#include <chrono>
+#include <dpct/blas_utils.hpp>
+
+#include "mkl.h"
+
+extern "C" {
+
+inline void __cudaSafeCall(dpct::err0 err, const char *file, const int line)
+{
+    #ifdef CUDA_ERROR_CHECK
+
+#endif
+
+    return;
+}
+
+inline void __cudaCheckError(const char *file, const int line) try {
+#ifdef CUDA_ERROR_CHECK
+        /*
+        DPCT1010:1: SYCL uses exceptions to report errors and does not use the
+        error codes. The call was replaced with 0. You need to rewrite this
+        code.
+        */
+        dpct::err0 err = 0;
+
+        // More careful checking. However, this will affect performance.
+        // Comment away if needed.
+        err = DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw());
+
+#endif
+
+    return;
+}
+catch (sycl::exception const &exc) {
+  std::cerr << exc.what() << "Exception caught at file:" << __FILE__
+            << ", line:" << __LINE__ << std::endl;
+  std::exit(1);
+}
+
+    void dpcpp_dgemm 
+        (   const int ORDER,
+            const int TRANSA,   const int TRANSB,
+            const int M,        const int N,        const int K,       
+            const double ALPHA, const double *A,    const int LDA,
+            const double *B,    const int LDB,      const double BETA,    
+            double *C,          const int LDC);
+
+    void dpcpp_dtrsm(
+       int HPL_ORDER,
+       int HPL_SIDE,
+       int HPL_UPLO,
+       int HPL_TRANS,
+       int HPL_DIAG,
+       const int,
+       const int,
+       const double,
+       const double *,
+       const int,
+       double *,
+       const int);
+}
+
+
+void dpcpp_dgemm 
+(   const int ORDER,   const int TRANSA,    const int TRANSB,       
+    const int M,       const int N,         const int K,       
+    const double ALPHA,const double *A,     const int LDA,
+    const double *B,   const int LDB,       
+    const double BETA, double *C,         const int LDC)
+{
+   dpct::device_ext &dev_ct1 = dpct::get_current_device();
+   sycl::queue &q_ct1 = dev_ct1.in_order_queue();
+
+    if ((M==0)||(K==0)||(N==0)){
+	    return;
+    }
+
+    
+    if ( (N) < NN || (M) < NM || (K) < 128){ 
+         
+         #ifdef DEVICE_DEBUG
+            std::cout << "dgemm-Running on CPU" << std::endl; 
+         #endif
+          
+         cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,  M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC);
+          return;
+    }    
+
+    
+    #ifdef DEVICE_DEBUG
+            std::cout << "dgemm-Running on GPU" << std::endl; 
+    #endif
+
+    double *devPtrA, *devPtrB, *devPtrC;
+    int status;
+
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        devPtrA = sycl::malloc_device<double>(K * LDA, q_ct1)));
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(devPtrA, &A[0], K * LDA * sizeof(double)).wait()));
+
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        devPtrB = sycl::malloc_device<double>(N * LDB, q_ct1)));
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(devPtrB, &B[0], N * LDB * sizeof(double)).wait()));
+
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        devPtrC = sycl::malloc_device<double>(N * LDC, q_ct1)));
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(devPtrC, &C[0], N * LDC * sizeof(double)).wait()));
+
+    dev_ct1.queues_wait_and_throw();
+    oneapi::mkl::blas::column_major::gemm(
+        *dpct::get_current_device().get_saved_queue(),
+        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, M,
+        N, K, ALPHA, devPtrA, LDA, devPtrB, LDB, BETA, devPtrC, LDC)
+        .wait();
+    dev_ct1.queues_wait_and_throw();
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(&C[0], devPtrC, N * LDC * sizeof(double)).wait()));
+    dev_ct1.queues_wait_and_throw();
+    sycl::free(devPtrA, q_ct1);
+    sycl::free(devPtrB, q_ct1);
+    sycl::free(devPtrC, q_ct1);
+}
+  
+void dpcpp_dtrsm
+
+(  const int ORDER,           const int SIDE,
+   const int UPLO,            const int TRANS,
+   const int DIAG,            const int M,       const int N,
+   const double ALPHA,    const double* A,  const int LDA,       double* B,
+   const int LDB)
+{
+   dpct::device_ext &dev_ct1 = dpct::get_current_device();
+   sycl::queue &q_ct1 = dev_ct1.in_order_queue();
+
+        if ((M==0)||(N==0)){
+        	return;
+  	}
+
+    double *devPtrA, *devPtrB;	
+    int status;	
+
+    
+    if ( (M) < 512 || (N) < 2*(M)){
+        #ifdef DEVICE_DEBUG
+            std::cout << "dtrsm-Running on CPU" << std::endl; 
+        #endif
+ 	    cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, M, N, ALPHA, A, LDA, B, LDB);
+    
+    
+    	return;
+    } 
+       
+    #ifdef DEVICE_DEBUG
+            std::cout << "dtrsm-Running on GPU" << std::endl; 
+    #endif
+
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        devPtrA = sycl::malloc_device<double>(M * LDA, q_ct1)));
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(devPtrA, A, M * LDA * sizeof(double)).wait()));
+
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        devPtrB = sycl::malloc_device<double>(N * LDB, q_ct1)));
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(devPtrB, B, N * LDB * sizeof(double)).wait()));
+    dev_ct1.queues_wait_and_throw();
+
+    oneapi::mkl::blas::column_major::trsm(
+        *dpct::get_current_device().get_saved_queue(), oneapi::mkl::side::left,
+        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
+        oneapi::mkl::diag::unit, M, N, ALPHA, devPtrA, LDA, devPtrB, LDB)
+        .wait();
+
+    dev_ct1.queues_wait_and_throw();
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(B, devPtrB, N * LDB * sizeof(double)).wait()));
+
+    dev_ct1.queues_wait_and_throw();
+    sycl::free(devPtrA, q_ct1);
+    sycl::free(devPtrB, q_ct1);
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_all_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_all_reduce.c
new file mode 100644
index 000000000..776f48504
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_all_reduce.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_all_reduce
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const HPL_T_OP                   OP,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_all_reduce
+( BUFFER, COUNT, DTYPE, OP, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const HPL_T_OP                   OP;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_all_reduce performs   a   global   reduce  operation  across  all
+ * processes of a group leaving the results on all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/global output)   void *
+ *         On entry,  BUFFER  points to  the  buffer to be combined.  On
+ *         exit, this array contains the combined data and  is identical
+ *         on all processes in the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * OP      (global input)                const HPL_T_OP 
+ *         On entry, OP is a pointer to the local combine function.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr;
+/* ..
+ * .. Executable Statements ..
+ */
+   hplerr = HPL_reduce(   BUFFER, COUNT, DTYPE, OP, 0, COMM );
+   if( hplerr != MPI_SUCCESS ) return( hplerr );
+   return( HPL_broadcast( BUFFER, COUNT, DTYPE,     0, COMM ) );
+/*
+ * End of HPL_all_reduce
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_barrier.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_barrier.c
new file mode 100644
index 000000000..9a5d9b10a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_barrier.c
@@ -0,0 +1,90 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_barrier
+(
+   MPI_Comm                         COMM
+)
+#else
+int HPL_barrier
+( COMM )
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_barrier blocks the caller until all process members have call it.
+ * The  call  returns  at any process  only after all group members have
+ * entered the call.
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   return( HPL_broadcast( (void*)(&i), 1, HPL_INT, 0, COMM ) );
+/*
+ * End of HPL_barrier
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_broadcast.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_broadcast.c
new file mode 100644
index 000000000..42d962864
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_broadcast.c
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_broadcast
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const int                        ROOT,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_broadcast
+( BUFFER, COUNT, DTYPE, ROOT, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const int                        ROOT;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_broadcast broadcasts  a message from the process with rank ROOT to
+ * all processes in the group.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/output)          void *
+ *         On entry,  BUFFER  points to  the  buffer to be broadcast. On
+ *         exit, this array contains the broadcast data and is identical
+ *         on all processes in the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ROOT    (global input)                const int
+ *         On entry, ROOT is the coordinate of the source process.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr=MPI_SUCCESS, ip2=1, kk, mask=1, 
+                              mpierr, mydist, partner, rank, size, 
+                              tag = MSGID_BEGIN_COLL;
+   MPI_Status                 status;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( COUNT <= 0 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_size( COMM, &size ); if( size <= 1 ) return( mpierr );
+   mpierr = MPI_Comm_rank( COMM, &rank );
+
+   kk = size - 1;
+   while( kk > 1 ) { kk >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   mydist = MModSub( rank, ROOT, size );
+
+   do
+   {
+      mask ^= ip2;
+      if( ( mydist & mask ) == 0 )
+      {
+         partner = mydist ^ ip2;
+
+         if( mydist & ip2 )
+         {
+            partner = MModAdd( ROOT, partner, size );
+            mpierr  = MPI_Recv(  BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                 partner, tag, COMM, &status );
+         }
+         else if( partner < size )
+         {
+            partner = MModAdd( ROOT, partner, size );
+            mpierr  = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                partner, tag, COMM );
+         }
+         if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      }
+      ip2 >>= 1;
+   } while( ip2 );
+
+   return( hplerr );
+/*
+ * End of HPL_broadcast
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_exit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_exit.c
new file mode 100644
index 000000000..f0d00b065
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_exit.c
@@ -0,0 +1,109 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_exit
+(
+   HPL_T_grid *                     GRID
+)
+#else
+int HPL_grid_exit
+( GRID )
+   HPL_T_grid *                     GRID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_exit marks  the process  grid object for  deallocation.  The
+ * returned  error  code  MPI_SUCCESS  indicates  successful completion.
+ * Other error codes are (MPI) implementation dependent.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input/output)          HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid to be released.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr = MPI_SUCCESS, mpierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( GRID->all_comm != MPI_COMM_NULL )
+   {
+      mpierr = MPI_Comm_free( &(GRID->row_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      mpierr = MPI_Comm_free( &(GRID->col_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      mpierr = MPI_Comm_free( &(GRID->all_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+   }
+
+   GRID->order   = HPL_COLUMN_MAJOR;
+
+   GRID->iam     = GRID->myrow    = GRID->mycol     = -1;
+   GRID->nprow   = GRID->npcol    = GRID->nprocs    = -1;
+
+   GRID->row_ip2 = GRID->row_hdim = GRID->row_ip2m1 = GRID->row_mask = -1;
+   GRID->col_ip2 = GRID->col_hdim = GRID->col_ip2m1 = GRID->col_mask = -1;
+
+   return( hplerr );
+/*
+ * End of HPL_grid_exit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_info.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_info.c
new file mode 100644
index 000000000..95c5a7315
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_info.c
@@ -0,0 +1,116 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_info
+(
+   const HPL_T_grid *               GRID,
+   int *                            NPROW,
+   int *                            NPCOL,
+   int *                            MYROW,
+   int *                            MYCOL
+)
+#else
+int HPL_grid_info
+( GRID, NPROW, NPCOL, MYROW, MYCOL )
+   const HPL_T_grid *               GRID;
+   int *                            NPROW;
+   int *                            NPCOL;
+   int *                            MYROW;
+   int *                            MYCOL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_info returns  the grid shape and the coordinates in the grid
+ * of the calling process.  Successful  completion  is  indicated by the
+ * returned error code  MPI_SUCCESS. Other error codes depend on the MPI
+ * implementation.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * NPROW   (global output)               int *
+ *         On exit,   NPROW  specifies the number of process rows in the
+ *         grid. NPROW is at least one.
+ *
+ * NPCOL   (global output)               int *
+ *         On exit,   NPCOL  specifies  the number of process columns in
+ *         the grid. NPCOL is at least one.
+ *
+ * MYROW   (global output)               int *
+ *         On exit,  MYROW  specifies my  row process  coordinate in the
+ *         grid. MYROW is greater than or equal  to zero  and  less than
+ *         NPROW.
+ *
+ * MYCOL   (global output)               int *
+ *         On exit,  MYCOL specifies my column process coordinate in the
+ *         grid. MYCOL is greater than or equal  to zero  and  less than
+ *         NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   *NPROW = GRID->nprow; *NPCOL = GRID->npcol;
+   *MYROW = GRID->myrow; *MYCOL = GRID->mycol;
+   return( MPI_SUCCESS );
+/*
+ * End of HPL_grid_info
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_init.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_init.c
new file mode 100644
index 000000000..52111ac52
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_init.c
@@ -0,0 +1,184 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_init
+(
+   MPI_Comm                         COMM,
+   const HPL_T_ORDER                ORDER,
+   const int                        NPROW,
+   const int                        NPCOL,
+   HPL_T_grid *                     GRID
+)
+#else
+int HPL_grid_init
+( COMM, ORDER, NPROW, NPCOL, GRID )
+   MPI_Comm                         COMM;
+   const HPL_T_ORDER                ORDER;
+   const int                        NPROW;
+   const int                        NPCOL;
+   HPL_T_grid *                     GRID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_init creates a NPROW x NPCOL  process  grid using column- or
+ * row-major ordering from an initial collection of processes identified
+ * by an  MPI  communicator.  Successful  completion is indicated by the
+ * returned error code MPI_SUCCESS.  Other error codes depend on the MPI
+ * implementation. The coordinates of processes that are not part of the
+ * grid are set to values outside of [0..NPROW) x [0..NPCOL).
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         On entry,  COMM  is  the  MPI  communicator  identifying  the
+ *         initial  collection  of  processes out of which  the  grid is
+ *         formed.
+ *
+ * ORDER   (global input)                const HPL_T_ORDER
+ *         On entry, ORDER specifies how the processes should be ordered
+ *         in the grid as follows:
+ *            ORDER = HPL_ROW_MAJOR    row-major    ordering;
+ *            ORDER = HPL_COLUMN_MAJOR column-major ordering;
+ *
+ * NPROW   (global input)                const int
+ *         On entry,  NPROW  specifies the number of process rows in the
+ *         grid to be created. NPROW must be at least one.
+ *
+ * NPCOL   (global input)                const int
+ *         On entry,  NPCOL  specifies  the number of process columns in
+ *         the grid to be created. NPCOL must be at least one.
+ *
+ * GRID    (local input/output)          HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information to be initialized.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hdim, hplerr=MPI_SUCCESS, ierr, ip2, k,
+                              mask, mycol, myrow, nprocs, rank, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Comm_rank( COMM, &rank ); MPI_Comm_size( COMM, &size );
+/*
+ * Abort if illegal process grid
+ */
+   nprocs = NPROW * NPCOL;
+   if( ( nprocs > size ) || ( NPROW < 1 ) || ( NPCOL < 1 ) )
+   { HPL_pabort( __LINE__, "HPL_grid_init", "Illegal Grid" ); }
+/*
+ * Row- or column-major ordering of the processes
+ */
+   if( ORDER == HPL_ROW_MAJOR )
+   {
+      GRID->order = HPL_ROW_MAJOR;
+      myrow = rank / NPCOL; mycol = rank - myrow * NPCOL;
+   }
+   else
+   {
+      GRID->order = HPL_COLUMN_MAJOR;
+      mycol = rank / NPROW; myrow = rank - mycol * NPROW;
+   }
+   GRID->iam   = rank;  GRID->myrow = myrow; GRID->mycol  = mycol;
+   GRID->nprow = NPROW; GRID->npcol = NPCOL; GRID->nprocs = nprocs;
+/*
+ * row_ip2   : largest power of two <= nprow;
+ * row_hdim  : row_ip2 procs hypercube dim;
+ * row_ip2m1 : largest power of two <= nprow-1;
+ * row_mask  : row_ip2m1 procs hypercube mask;
+ */
+   hdim = 0; ip2 = 1; k = NPROW;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; }
+   GRID->row_ip2 = ip2; GRID->row_hdim = hdim; 
+
+   mask = ip2 = 1;    k = NPROW - 1;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   GRID->row_ip2m1 = ip2; GRID->row_mask = mask; 
+/*
+ * col_ip2   : largest power of two <= npcol;
+ * col_hdim  : col_ip2 procs hypercube dim;
+ * col_ip2m1 : largest power of two <= npcol-1;
+ * col_mask  : col_ip2m1 procs hypercube mask;
+ */
+   hdim = 0; ip2 = 1; k = NPCOL;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; }
+   GRID->col_ip2 = ip2; GRID->col_hdim = hdim; 
+
+   mask = ip2 = 1;    k = NPCOL - 1;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   GRID->col_ip2m1 = ip2; GRID->col_mask = mask; 
+/*
+ * All communicator, leave if I am not part of this grid. Creation of the
+ * row- and column communicators.
+ */
+   ierr = MPI_Comm_split( COMM, ( rank < nprocs ? 0 : MPI_UNDEFINED ),
+                          rank, &(GRID->all_comm) );
+   if( GRID->all_comm == MPI_COMM_NULL ) return( ierr );
+
+   ierr = MPI_Comm_split( GRID->all_comm, myrow, mycol, &(GRID->row_comm) );
+   if( ierr != MPI_SUCCESS ) hplerr = ierr;
+
+   ierr = MPI_Comm_split( GRID->all_comm, mycol, myrow, &(GRID->col_comm) );
+   if( ierr != MPI_SUCCESS ) hplerr = ierr;
+
+   return( hplerr );
+/*
+ * End of HPL_grid_init
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_max.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_max.c
new file mode 100644
index 000000000..002aabe01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_max.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_max
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_max
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_max combines (max) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] );
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] );
+   }
+/*
+ * End of HPL_max
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_min.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_min.c
new file mode 100644
index 000000000..a99e5e58a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_min.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_min
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_min
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_min combines (min) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] );
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] );
+   }
+/*
+ * End of HPL_min
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_pnum.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_pnum.c
new file mode 100644
index 000000000..c80885b9a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_pnum.c
@@ -0,0 +1,103 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pnum
+(
+   const HPL_T_grid *               GRID,
+   const int                        MYROW,
+   const int                        MYCOL
+)
+#else
+int HPL_pnum
+( GRID, MYROW, MYCOL )
+   const HPL_T_grid *               GRID;
+   const int                        MYROW;
+   const int                        MYCOL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pnum determines  the  rank  of a  process  as a function  of  its
+ * coordinates in the grid.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * MYROW   (local input)                 const int
+ *         On entry,  MYROW  specifies the row coordinate of the process
+ *         whose rank is to be determined. MYROW must be greater than or
+ *         equal to zero and less than NPROW.
+ *
+ * MYCOL   (local input)                 const int
+ *         On entry,  MYCOL  specifies  the  column  coordinate  of  the
+ *         process whose rank is to be determined. MYCOL must be greater
+ *         than or equal to zero and less than NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   if( GRID->order == HPL_ROW_MAJOR )
+      return( MYROW * GRID->npcol + MYCOL );
+   else
+      return( MYCOL * GRID->nprow + MYROW );
+/*
+ * End of HPL_pnum
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_reduce.c
new file mode 100644
index 000000000..417c21163
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_reduce.c
@@ -0,0 +1,179 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_reduce
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const HPL_T_OP                   OP,
+   const int                        ROOT,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_reduce
+( BUFFER, COUNT, DTYPE, OP, ROOT, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const HPL_T_OP                   OP;
+   const int                        ROOT;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_reduce performs a global reduce operation across all processes of
+ * a group.  Note that the input buffer is  used as workarray and in all
+ * processes but the accumulating process corrupting the original data.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/output)          void *
+ *         On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+ *         exit,  and  in process of rank  ROOT  this array contains the
+ *         reduced data.  This  buffer  is also used as workspace during
+ *         the operation in the other processes of the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * OP      (global input)                const HPL_T_OP 
+ *         On entry, OP is a pointer to the local combine function.
+ *
+ * ROOT    (global input)                const int
+ *         On entry, ROOT is the coordinate of the accumulating process.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Status                 status;
+   void                       * buffer = NULL;
+   int                        hplerr=MPI_SUCCESS, d=1, i, ip2=1, mask=0,
+                              mpierr, mydist, partner, rank, size, 
+                              tag = MSGID_BEGIN_COLL;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( COUNT <= 0 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_size( COMM, &size );
+   if( size  == 1 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_rank( COMM, &rank );
+   i = size - 1; while( i > 1 ) { i >>= 1; d++; }
+
+   if( DTYPE == HPL_INT )
+      buffer = (void *)( (int *)   malloc( (size_t)(COUNT) * 
+                                           sizeof( int    ) ) );
+   else
+      buffer = (void *)( (double *)malloc( (size_t)(COUNT) *
+                                           sizeof( double ) ) );
+
+   if( !( buffer ) )
+   { HPL_pabort( __LINE__, "HPL_reduce", "Memory allocation failed" ); }
+
+   if( ( mydist = MModSub( rank, ROOT, size ) ) == 0 )
+   {
+      do
+      {
+         mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                            MModAdd( ROOT, ip2, size ), tag, COMM,
+                            &status );
+         if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+         OP( COUNT, buffer, BUFFER, DTYPE );
+         ip2 <<= 1; d--;
+      } while( d );
+   }
+   else
+   {
+      do
+      {
+         if( ( mydist & mask ) == 0 )
+         {
+            partner = mydist ^ ip2;
+
+            if( mydist & ip2 )
+            {
+               partner = MModAdd( ROOT, partner, size );
+               mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                  partner, tag, COMM );
+            }
+            else if( partner < size )
+            {
+               partner = MModAdd( ROOT, partner, size );
+               mpierr  = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                   partner, tag, COMM, &status );
+               OP( COUNT, buffer, BUFFER, DTYPE );
+            }
+            if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+         }
+         mask ^= ip2; ip2 <<= 1; d--;
+      } while( d );
+   }
+   if( buffer ) free( buffer );
+
+   return( hplerr );
+/*
+ * End of HPL_reduce
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_sum.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_sum.c
new file mode 100644
index 000000000..34cf87210
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_sum.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_sum
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_sum
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_sum combines (sum) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] += a[i];
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] += a[i];
+   }
+/*
+ * End of HPL_sum
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_disp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_disp.c
new file mode 100644
index 000000000..757dad242
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_disp.c
@@ -0,0 +1,97 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pdpanel_disp
+(
+   HPL_T_panel * *                  PANEL
+)
+#else
+int HPL_pdpanel_disp
+( PANEL )
+   HPL_T_panel * *                  PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_disp deallocates  the  panel  structure  and  resources  and
+ * stores the error code returned by the panel factorization.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel * *
+ *         On entry,  PANEL  points  to  the  address  of the panel data
+ *         structure to be deallocated.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        mpierr;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Deallocate the panel resources and panel structure
+ */
+   mpierr = HPL_pdpanel_free( *PANEL );
+   if( *PANEL ) free( *PANEL );
+   *PANEL = NULL;
+
+   return( mpierr );
+/*
+ * End of HPL_pdpanel_disp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_free.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_free.c
new file mode 100644
index 000000000..38b5b0d97
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_free.c
@@ -0,0 +1,104 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pdpanel_free
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_pdpanel_free
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_free deallocates  the panel resources  and  stores the error
+ * code returned by the panel factorization.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points  to  the  panel data  structure from
+ *         which the resources should be deallocated.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->pmat->info == 0 ) PANEL->pmat->info = *(PANEL->DINFO);
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( PANEL->L1block, VSIP_TRUE );
+   (void) vsip_blockrelease_d( PANEL->L2block, VSIP_TRUE );
+   if( PANEL->grid->nprow > 1 )
+      (void) vsip_blockrelease_d( PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Destroy blocks
+ */
+   vsip_blockdestroy_d( PANEL->L1block );
+   vsip_blockdestroy_d( PANEL->L2block );
+   if( PANEL->grid->nprow > 1 )
+      vsip_blockdestroy_d( PANEL->Ublock );
+#endif
+
+   if( PANEL->WORK  ) free( PANEL->WORK  );
+   if( PANEL->IWORK ) free( PANEL->IWORK );
+
+   return( MPI_SUCCESS );
+/*
+ * End of HPL_pdpanel_free
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_init.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_init.c
new file mode 100644
index 000000000..9e35c7fb4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_init.c
@@ -0,0 +1,348 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_pdpanel_init
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        M,
+   const int                        N,
+   const int                        JB,
+   HPL_T_pmat *                     A,
+   const int                        IA,
+   const int                        JA,
+   const int                        TAG,
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_pdpanel_init
+( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        M;
+   const int                        N;
+   const int                        JB;
+   HPL_T_pmat *                     A;
+   const int                        IA;
+   const int                        JA;
+   const int                        TAG;
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_init initializes a panel data structure.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the global number of rows of the panel.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  global number of columns of the
+ *         panel and trailing submatrix. N must be at least zero.
+ *
+ * JB      (global input)                const int
+ *         On entry, JB specifies is the number of columns of the panel.
+ *         JB must be at least zero.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * IA      (global input)                const int
+ *         On entry,  IA  is  the global row index identifying the panel
+ *         and trailing submatrix. IA must be at least zero.
+ *
+ * JA      (global input)                const int
+ *         On entry, JA is the global column index identifying the panel
+ *         and trailing submatrix. JA must be at least zero.
+ *
+ * TAG     (global input)                const int
+ *         On entry, TAG is the row broadcast message id.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   size_t                     dalign;
+   int                        icurcol, icurrow, ii, itmp1, jj, lwork,
+                              ml2, mp, mycol, myrow, nb, npcol, nprow,
+                              nq, nu;
+/* ..
+ * .. Executable Statements ..
+ */
+   PANEL->grid    = GRID;                  /* ptr to the process grid */
+   PANEL->algo    = ALGO;               /* ptr to the algo parameters */
+   PANEL->pmat    = A;                 /* ptr to the local array info */
+
+   myrow = GRID->myrow; mycol = GRID->mycol;
+   nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb;
+
+   HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol,
+                nprow, npcol, &ii, &jj, &icurrow, &icurcol );
+   mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow );
+   nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol );
+                                         /* ptr to trailing part of A */
+   PANEL->A       = Mptr( (double *)(A->A), ii, jj, A->ld );
+/*
+ * Workspace pointers are initialized to NULL.
+ */
+   PANEL->WORK    = NULL; PANEL->L2      = NULL; PANEL->L1      = NULL;
+   PANEL->DPIV    = NULL; PANEL->DINFO   = NULL; PANEL->U       = NULL;
+   PANEL->IWORK   = NULL;
+/*
+ * Local lengths, indexes process coordinates
+ */
+   PANEL->nb      = nb;               /* distribution blocking factor */
+   PANEL->jb      = JB;                                /* panel width */
+   PANEL->m       = M;      /* global # of rows of trailing part of A */
+   PANEL->n       = N;      /* global # of cols of trailing part of A */
+   PANEL->ia      = IA;     /* global row index of trailing part of A */
+   PANEL->ja      = JA;     /* global col index of trailing part of A */
+   PANEL->mp      = mp;      /* local # of rows of trailing part of A */
+   PANEL->nq      = nq;      /* local # of cols of trailing part of A */
+   PANEL->ii      = ii;      /* local row index of trailing part of A */
+   PANEL->jj      = jj;      /* local col index of trailing part of A */
+   PANEL->lda     = A->ld;            /* local leading dim of array A */
+   PANEL->prow    = icurrow; /* proc row owning 1st row of trailing A */
+   PANEL->pcol    = icurcol; /* proc col owning 1st col of trailing A */
+   PANEL->msgid   = TAG;     /* message id to be used for panel bcast */
+/*
+ * Initialize  ldl2 and len to temporary dummy values and Update tag for
+ * next panel
+ */
+   PANEL->ldl2    = 0;               /* local leading dim of array L2 */
+   PANEL->len     = 0;           /* length of the buffer to broadcast */
+/*
+ * Figure out the exact amount of workspace  needed by the factorization
+ * and the update - Allocate that space - Finish the panel data structu-
+ * re initialization.
+ *
+ * L1:    JB x JB in all processes
+ * DPIV:  JB      in all processes
+ * DINFO: 1       in all processes
+ *
+ * We make sure that those three arrays are contiguous in memory for the
+ * later panel broadcast.  We  also  choose  to put this amount of space 
+ * right  after  L2 (when it exist) so that one can receive a contiguous
+ * buffer.
+ */
+   dalign = ALGO->align * sizeof( double );
+
+   if( npcol == 1 )                             /* P x 1 process grid */
+   {                                     /* space for L1, DPIV, DINFO */
+      lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 );
+      if( nprow > 1 )                                 /* space for U */
+      { nu = nq - JB; lwork += JB * Mmax( 0, nu ); }
+
+      if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * 
+                                           sizeof( double ) ) ) )
+      {
+         HPL_pabort( __LINE__, "HPL_pdpanel_init",
+                     "Memory allocation failed" );
+      }
+/*
+ * Initialize the pointers of the panel structure  -  Always re-use A in
+ * the only process column
+ */
+      PANEL->L2    = PANEL->A + ( myrow == icurrow ? JB : 0 );
+      PANEL->ldl2  = A->ld;
+      PANEL->L1    = (double *)HPL_PTR( PANEL->WORK, dalign );
+      PANEL->DPIV  = PANEL->L1    + JB * JB;
+      PANEL->DINFO = PANEL->DPIV + JB;       *(PANEL->DINFO) = 0.0;
+      PANEL->U     = ( nprow > 1 ? PANEL->DINFO + 1: NULL );
+   }
+   else
+   {                                        /* space for L2, L1, DPIV */
+      ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 );
+      PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 );
+#ifdef HPL_COPY_L
+      lwork = ALGO->align + PANEL->len;
+#else
+      lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len );
+#endif
+      if( nprow > 1 )                                 /* space for U */
+      { 
+         nu = ( mycol == icurcol ? nq - JB : nq );
+         lwork += JB * Mmax( 0, nu );
+      }
+
+      if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) *
+                                           sizeof( double ) ) ) )
+      {
+         HPL_pabort( __LINE__, "HPL_pdpanel_init",
+                     "Memory allocation failed" );
+      }
+/*
+ * Initialize the pointers of the panel structure - Re-use A in the cur-
+ * rent process column when HPL_COPY_L is not defined.
+ */
+#ifdef HPL_COPY_L
+      PANEL->L2    = (double *)HPL_PTR( PANEL->WORK, dalign );
+      PANEL->ldl2  = Mmax( 1, ml2 );
+      PANEL->L1    = PANEL->L2 + ml2 * JB;
+#else
+      if( mycol == icurcol )
+      {
+         PANEL->L2   = PANEL->A + ( myrow == icurrow ? JB : 0 );
+         PANEL->ldl2 = A->ld;
+         PANEL->L1   = (double *)HPL_PTR( PANEL->WORK, dalign );
+      }
+      else
+      {
+         PANEL->L2   = (double *)HPL_PTR( PANEL->WORK, dalign );
+         PANEL->ldl2 = Mmax( 1, ml2 );
+         PANEL->L1   = PANEL->L2 + ml2 * JB;
+      } 
+#endif
+      PANEL->DPIV  = PANEL->L1   + JB * JB;
+      PANEL->DINFO = PANEL->DPIV + JB;     *(PANEL->DINFO) = 0.0;
+      PANEL->U     = ( nprow > 1 ? PANEL->DINFO + 1 : NULL );
+   }
+#ifdef HPL_CALL_VSIPL
+   PANEL->Ablock  = A->block;
+/*
+ * Create blocks and bind them to the data pointers
+ */
+   PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1),
+                                      (vsip_length)(JB*JB), VSIP_MEM_NONE );
+   PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2),
+                                      (vsip_length)(PANEL->ldl2*JB),
+                                      VSIP_MEM_NONE );
+   if( nprow > 1 )
+   { 
+      nu = ( mycol == icurcol ? nq - JB : nq );
+      PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U),
+                                        (vsip_length)(JB * Mmax( 0, nu )),
+                                        VSIP_MEM_NONE );
+   }
+   else { PANEL->Ublock = A->block; }
+#endif
+/*
+ * If nprow is 1, we just allocate an array of JB integers for the swap.
+ * When nprow > 1, we allocate the space for the index arrays immediate-
+ * ly. The exact size of this array depends on the swapping routine that
+ * will be used, so we allocate the maximum:
+ *
+ *    IWORK[0] is of size at most 1      +
+ *    IPL      is of size at most 1      +
+ *    IPID     is of size at most 4 * JB +
+ *
+ *    For HPL_pdlaswp00:
+ *       lindxA   is of size at most 2 * JB +
+ *       lindxAU  is of size at most 2 * JB +
+ *       llen     is of size at most NPROW  +
+ *       llen_sv  is of size at most NPROW.
+ *
+ *    For HPL_pdlaswp01:
+ *       ipA      is of size ar most 1      +
+ *       lindxA   is of size at most 2 * JB +
+ *       lindxAU  is of size at most 2 * JB +
+ *       iplen    is of size at most NPROW  + 1 +
+ *       ipmap    is of size at most NPROW  +
+ *       ipmapm1  is of size at most NPROW  +
+ *       permU    is of size at most JB     +
+ *       iwork    is of size at most MAX( 2*JB, NPROW+1 ).
+ *
+ * that is  3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1))
+ *       =  4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ).
+ *
+ * We use the fist entry of this to work array  to indicate  whether the
+ * the  local  index arrays have already been computed,  and if yes,  by
+ * which function:
+ *    IWORK[0] = -1: no index arrays have been computed so far;
+ *    IWORK[0] =  0: HPL_pdlaswp00 already computed those arrays;
+ *    IWORK[0] =  1: HPL_pdlaswp01 already computed those arrays;
+ * This allows to save some redundant and useless computations.
+ */
+   if( nprow == 1 ) { lwork = JB; }
+   else             
+   {
+      itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork );
+      lwork = 4 + (9 * JB) + (3 * nprow) + itmp1;
+   }
+
+   PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) );
+
+   if( PANEL->IWORK == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); }
+                       /* Initialize the first entry of the workarray */
+   *(PANEL->IWORK) = -1;
+/*
+ * End of HPL_pdpanel_init
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_new.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_new.c
new file mode 100644
index 000000000..1dbd8a18f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_new.c
@@ -0,0 +1,152 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanel_new
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        M,
+   const int                        N,
+   const int                        JB,
+   HPL_T_pmat *                     A,
+   const int                        IA,
+   const int                        JA,
+   const int                        TAG,
+   HPL_T_panel * *                  PANEL
+)
+#else
+void HPL_pdpanel_new
+( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        M;
+   const int                        N;
+   const int                        JB;
+   HPL_T_pmat *                     A;
+   const int                        IA;
+   const int                        JA;
+   const int                        TAG;
+   HPL_T_panel * *                  PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_new creates and initializes a panel data structure.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the global number of rows of the panel.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  global number of columns of the
+ *         panel and trailing submatrix. N must be at least zero.
+ *
+ * JB      (global input)                const int
+ *         On entry, JB specifies is the number of columns of the panel.
+ *         JB must be at least zero.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * IA      (global input)                const int
+ *         On entry,  IA  is  the global row index identifying the panel
+ *         and trailing submatrix. IA must be at least zero.
+ *
+ * JA      (global input)                const int
+ *         On entry, JA is the global column index identifying the panel
+ *         and trailing submatrix. JA must be at least zero.
+ *
+ * TAG     (global input)                const int
+ *         On entry, TAG is the row broadcast message id.
+ *
+ * PANEL   (local input/output)          HPL_T_panel * *
+ *         On entry,  PANEL  points  to  the  address  of the panel data
+ *         structure to create and initialize.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * p = NULL;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Allocate the panel structure - Check for enough memory
+ */
+   if( !( p = (HPL_T_panel *)malloc( sizeof( HPL_T_panel ) ) ) )
+   {
+      HPL_pabort( __LINE__, "HPL_pdpanel_new", "Memory allocation failed" );
+   }
+
+   HPL_pdpanel_init( GRID, ALGO, M, N, JB, A, IA, JA, TAG, p );
+   *PANEL = p;
+/*
+ * End of HPL_pdpanel_new
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp00N.c
new file mode 100644
index 000000000..7ad5a1a99
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp00N.c
@@ -0,0 +1,198 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP00N_DEPTH
+#define    HPL_LASWP00N_DEPTH       32
+#define    HPL_LASWP00N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp00N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int *                      IPIV
+)
+#else
+void HPL_dlaswp00N
+( M, N, A, LDA, IPIV )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int *                      IPIV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp00N performs a series of local row interchanges on a matrix
+ * A. One row interchange is initiated for rows 0 through M-1 of A.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the number of rows of the array A to be
+ *         interchanged. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies  the number of columns of the array A.
+ *         N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A  points to an array of dimension (LDA,N) to which
+ *         the row interchanges will be  applied.  On exit, the permuted
+ *         matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * IPIV    (local input)                 const int *
+ *         On entry,  IPIV  is  an  array of size  M  that  contains the
+ *         pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+ *         implies that local rows k and l are to be interchanged.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register double            r;
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP00N_LOG2_DEPTH );
+   int                        ip, nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP00N_LOG2_DEPTH )
+                          << HPL_LASWP00N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP00N_DEPTH, A += incA )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         if( i != ( ip = IPIV[i] ) )
+         {
+            a0 = A + i; a1 = A + ip;
+
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#if ( HPL_LASWP00N_DEPTH >  1 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  2 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  4 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  8 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH > 16 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+         }
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         if( i != ( ip = IPIV[i] ) )
+         {
+            a0 = A + i; a1 = A + ip;
+            for( j = 0; j < nr; j++, a0 += LDA, a1 += LDA )
+            { r = *a0; *a0 = *a1; *a1 = r; }
+         }
+      }
+   }
+/*
+ * End of HPL_dlaswp00N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp01N.c
new file mode 100644
index 000000000..786d1eff4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp01N.c
@@ -0,0 +1,209 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP01N_DEPTH
+#define    HPL_LASWP01N_DEPTH      32
+#define    HPL_LASWP01N_LOG2_DEPTH  5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp01N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp01N
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp01N copies  scattered rows  of  A  into itself  and into an
+ * array  U.  The row offsets in  A  of the source rows are specified by
+ * LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+ * positive value of  LINDXAU indicates that the array destination is U,
+ * and A otherwise.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         moved within A or copied into U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         moved within A or copied into U. N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be moved within A or
+ *         copied into U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,N). The rows
+ *         of A specified by LINDXA are be copied within this array U at
+ *         the positions indicated by positive values of LINDXAU.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local  row indexes  of  A  that should be moved within  A  or
+ *         or copied into U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local  row indexes of  U  where the rows of  A  should be
+ *         copied at. This array also contains the  local row offsets in
+ *         A where some of the rows of A should be moved to.  A positive
+ *         value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+ *         should be copied into U at the position LINDXAU[i]; otherwise
+ *         the row  LINDXA[i]  of  A  should be moved  at  the  position
+ *         -LINDXAU[i] within A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP01N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP01N_LOG2_DEPTH );
+   int                        lda1, nu, nr;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01N_LOG2_DEPTH ) <<
+                            HPL_LASWP01N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP01N_DEPTH, A += incA, U += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
+         else                  { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
+
+         *a1 = *a0; a1 += lda1; a0 += LDA;
+#if ( HPL_LASWP01N_DEPTH >  1 )
+         *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  2 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  4 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  8 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH > 16 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
+         else                  { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
+         for( j = 0; j < nr; j++, a1 += lda1, a0 += LDA ) { *a1 = *a0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp01N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp01T.c
new file mode 100644
index 000000000..429cfb6f2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp01T.c
@@ -0,0 +1,252 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP01T_DEPTH
+#define    HPL_LASWP01T_DEPTH       32
+#define    HPL_LASWP01T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp01T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp01T
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp01T copies  scattered rows  of  A  into itself  and into an
+ * array U.  The row offsets in  A  of the source rows  are specified by
+ * LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+ * positive value of LINDXAU indicates that the array  destination is U,
+ * and A otherwise. Rows of A are stored as columns in U.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         moved within A or copied into U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         moved within A or copied into U. N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be moved within A or
+ *         copied into U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,M). The rows
+ *         of A specified by  LINDXA  are copied within this array  U at
+ *         the  positions indicated by positive values of LINDXAU.  The
+ *         rows of A are stored as columns in U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local  row indexes  of  A  that should be moved within  A  or
+ *         or copied into U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local  row indexes of  U  where the rows of  A  should be
+ *         copied at. This array also contains the  local row offsets in
+ *         A where some of the rows of A should be moved to.  A positive
+ *         value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+ *         should be copied into U at the position LINDXAU[i]; otherwise
+ *         the row  LINDXA[i]  of  A  should be moved  at  the  position
+ *         -LINDXAU[i] within A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP01T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP01T_LOG2_DEPTH );
+   int                        nu, nr;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01T_LOG2_DEPTH ) <<
+                            HPL_LASWP01T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP01T_DEPTH, A += incA, U += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+
+         if( LINDXAU[i] >= 0 )
+         {
+            a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+
+            a1[ 0] = *a0; a0 += LDA;
+#if ( HPL_LASWP01T_DEPTH >  1 )
+            a1[ 1] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  2 )
+            a1[ 2] = *a0; a0 += LDA; a1[ 3] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  4 )
+            a1[ 4] = *a0; a0 += LDA; a1[ 5] = *a0; a0 += LDA;
+            a1[ 6] = *a0; a0 += LDA; a1[ 7] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  8 )
+            a1[ 8] = *a0; a0 += LDA; a1[ 9] = *a0; a0 += LDA;
+            a1[10] = *a0; a0 += LDA; a1[11] = *a0; a0 += LDA;
+            a1[12] = *a0; a0 += LDA; a1[13] = *a0; a0 += LDA;
+            a1[14] = *a0; a0 += LDA; a1[15] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH > 16 )
+            a1[16] = *a0; a0 += LDA; a1[17] = *a0; a0 += LDA;
+            a1[18] = *a0; a0 += LDA; a1[19] = *a0; a0 += LDA;
+            a1[20] = *a0; a0 += LDA; a1[21] = *a0; a0 += LDA;
+            a1[22] = *a0; a0 += LDA; a1[23] = *a0; a0 += LDA;
+            a1[24] = *a0; a0 += LDA; a1[25] = *a0; a0 += LDA;
+            a1[26] = *a0; a0 += LDA; a1[27] = *a0; a0 += LDA;
+            a1[28] = *a0; a0 += LDA; a1[29] = *a0; a0 += LDA;
+            a1[30] = *a0; a0 += LDA; a1[31] = *a0; a0 += LDA;
+#endif
+         }
+         else
+         {
+            a1 = A - (size_t)(LINDXAU[i]);
+
+            *a1 = *a0; a1 += LDA; a0 += LDA;
+#if ( HPL_LASWP01T_DEPTH >  1 )
+            *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  2 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  4 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  8 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH > 16 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+         }
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+
+         if( LINDXAU[i] >= 0 )
+         {
+            a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+            for( j = 0; j < nr; j++, a0 += LDA ) { a1[j] = *a0; }
+         }
+         else
+         {
+            a1 = A - (size_t)(LINDXAU[i]);
+            for( j = 0; j < nr; j++, a1 += LDA, a0 += LDA ) { *a1 = *a0; }
+         }
+      }
+   }
+/*
+ * End of HPL_dlaswp01T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp02N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp02N.c
new file mode 100644
index 000000000..45c2f5f1f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp02N.c
@@ -0,0 +1,205 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP02N_DEPTH
+#define    HPL_LASWP02N_DEPTH       32
+#define    HPL_LASWP02N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp02N
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         W0,
+   double *                         W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp02N
+( M, N, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         W0;
+   double *                         W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp02N packs scattered rows of an array  A  into workspace  W.
+ * The row offsets in A are specified by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         copied into W. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         copied into W. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be copied into W.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * W0      (local input/output)          double *
+ *         On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local output)                double *
+ *         On entry, W  is an array of size (LDW,M). On exit, W contains
+ *         the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+ *         in W(:,i).
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied into W.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M  that  contains
+ *         the local  row indexes of  U that should be copied into A and
+ *         replaced by the rows of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * A0 = A, * a0;
+   double                     * w0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP02N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   for( i = 0; i < M; i++ ) 
+      *(W0+(size_t)(i)*(size_t)(LDW)) = (double)(LINDXAU[i]);
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP02N_LOG2_DEPTH ) <<
+                          HPL_LASWP02N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP02N_DEPTH, A0 += incA, W += HPL_LASWP02N_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW);
+
+         w0[ 0] = *a0; a0 += LDA;
+#if ( HPL_LASWP02N_DEPTH >  1 )
+         w0[ 1] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  2 )
+         w0[ 2] = *a0; a0 += LDA; w0[ 3] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  4 )
+         w0[ 4] = *a0; a0 += LDA; w0[ 5] = *a0; a0 += LDA;
+         w0[ 6] = *a0; a0 += LDA; w0[ 7] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  8 )
+         w0[ 8] = *a0; a0 += LDA; w0[ 9] = *a0; a0 += LDA;
+         w0[10] = *a0; a0 += LDA; w0[11] = *a0; a0 += LDA;
+         w0[12] = *a0; a0 += LDA; w0[13] = *a0; a0 += LDA;
+         w0[14] = *a0; a0 += LDA; w0[15] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH > 16 )
+         w0[16] = *a0; a0 += LDA; w0[17] = *a0; a0 += LDA;
+         w0[18] = *a0; a0 += LDA; w0[19] = *a0; a0 += LDA;
+         w0[20] = *a0; a0 += LDA; w0[21] = *a0; a0 += LDA;
+         w0[22] = *a0; a0 += LDA; w0[23] = *a0; a0 += LDA;
+         w0[24] = *a0; a0 += LDA; w0[25] = *a0; a0 += LDA;
+         w0[26] = *a0; a0 += LDA; w0[27] = *a0; a0 += LDA;
+         w0[28] = *a0; a0 += LDA; w0[29] = *a0; a0 += LDA;
+         w0[30] = *a0; a0 += LDA; w0[31] = *a0; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, a0 += LDA ) { w0[j] = *a0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp02N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp03N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp03N.c
new file mode 100644
index 000000000..760732a8d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp03N.c
@@ -0,0 +1,194 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP03N_DEPTH
+#define    HPL_LASWP03N_DEPTH       32
+#define    HPL_LASWP03N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp03N
+(
+   const int                        M,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW
+)
+#else
+void HPL_dlaswp03N
+( M, N, U, LDU, W0, W, LDW )
+   const int                        M;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp03N copies columns of  W  into  rows  of an  array  U.  The
+ * destination in U of these columns contained in W is stored within W0.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies  the  number  of columns of  W  stored
+ *         contiguously that should be copied into U. M must be at least
+ *         zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  length of columns of  W  stored
+ *         contiguously that should be copied into U. N must be at least
+ *         zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,N).  Columns
+ *         of W are copied as rows within this array U at  the positions
+ *         specified in W0.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M),  that contains data
+ *         to be copied into U. For i in [0..M),  entries W(:,i)  should
+ *         be copied into the row or column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * u0;
+   const int                  incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP03N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03N_LOG2_DEPTH ) <<
+                          HPL_LASWP03N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP03N_DEPTH, U += incU, w += HPL_LASWP03N_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); 
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *u0 = w0[ 0]; u0 += LDU;
+#if ( HPL_LASWP03N_DEPTH >  1 )
+         *u0 = w0[ 1]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  2 )
+         *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  4 )
+         *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU;
+         *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  8 )
+         *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU;
+         *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU;
+         *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU;
+         *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH > 16 )
+         *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU;
+         *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU;
+         *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU;
+         *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU;
+         *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU;
+         *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU;
+         *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU;
+         *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); 
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp03N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp03T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp03T.c
new file mode 100644
index 000000000..fece692ce
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp03T.c
@@ -0,0 +1,186 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP03T_DEPTH
+#define    HPL_LASWP03T_DEPTH       32
+#define    HPL_LASWP03T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp03T
+(
+   const int                        M,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW
+)
+#else
+void HPL_dlaswp03T
+( M, N, U, LDU, W0, W, LDW )
+   const int                        M;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp03T copies  columns of W into an array U.  The  destination
+ * in U of these columns contained in W is stored within W0.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies  the  number  of columns of  W  stored
+ *         contiguously that should be copied into U. M must be at least
+ *         zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  length of columns of  W  stored
+ *         contiguously that should be copied into U. N must be at least
+ *         zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,M).  Columns
+ *         of W are copied within the array U at the positions specified
+ *         in W0.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M),  that contains data
+ *         to be copied into U. For i in [0..M),  entries W(:,i)  should
+ *         be copied into the row or column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0; 
+   double                     * u0;
+   const int                  incU = ( 1 << HPL_LASWP03T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03T_LOG2_DEPTH ) <<
+                          HPL_LASWP03T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP03T_DEPTH, U += incU, w += HPL_LASWP03T_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         u0[ 0] = w0[ 0];
+#if ( HPL_LASWP03T_DEPTH >  1 )
+         u0[ 1] = w0[ 1];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  2 )
+         u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  4 )
+         u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  8 )
+         u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11];
+         u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15];
+#endif
+#if ( HPL_LASWP03T_DEPTH > 16 )
+         u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19];
+         u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23];
+         u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27];
+         u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31];
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp03T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp04N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp04N.c
new file mode 100644
index 000000000..4f9c490a5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp04N.c
@@ -0,0 +1,285 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP04N_DEPTH
+#define    HPL_LASWP04N_DEPTH       32
+#define    HPL_LASWP04N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp04N
+(
+   const int                        M0,
+   const int                        M1,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   double *                         A,
+   const int                        LDA,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp04N
+( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M0;
+   const int                        M1;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   double *                         A;
+   const int                        LDA;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp04N copies M0 rows of U into A and replaces those rows of U
+ * with columns of W. In addition M1 - M0 columns of  W  are copied into
+ * rows of U.
+ *
+ * Arguments
+ * =========
+ *
+ * M0      (local input)                 const int
+ *         On entry, M0 specifies the number of rows of U that should be
+ *         copied into  A  and replaced by columns of  W.  M0 must be at
+ *         least zero.
+ *
+ * M1      (local input)                 const int
+ *         On entry, M1 specifies the number of columns of W that should
+ *         be copied into rows of U. M1 must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of U that should
+ *         be copied into A. N must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points to  an array of dimension (LDU,N).  This
+ *         array contains the rows that are to be copied into A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M1).
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M0).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M0+M1),  that  contains
+ *         data to be copied into U.  For i in [M0..M0+M1),  the entries
+ *         W(:,i) are copied into the row W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA  is an array of dimension  M0 containing the
+ *         local row indexes A into which rows of U are copied.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M0 that  contains
+ *         the local  row indexes of  U that should be copied into A and
+ *         replaced by the columns of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) << 
+                                            HPL_LASWP04N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP04N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04N_LOG2_DEPTH ) <<
+                          HPL_LASWP04N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP04N_DEPTH, A += incA, U += incU,
+        w += HPL_LASWP04N_DEPTH )
+   {
+      for( i =  0; i < M0; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         u0 = U + (size_t)(LINDXAU[i]);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *a0 = *u0; *u0 = w0[ 0]; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP04N_DEPTH >  1 )
+         *a0 = *u0; *u0 = w0[ 1]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  2 )
+         *a0 = *u0; *u0 = w0[ 2]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 3]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  4 )
+         *a0 = *u0; *u0 = w0[ 4]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 5]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 6]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 7]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  8 )
+         *a0 = *u0; *u0 = w0[ 8]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 9]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[10]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[11]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[12]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[13]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[14]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[15]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH > 16 )
+         *a0 = *u0; *u0 = w0[16]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[17]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[18]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[19]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[20]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[21]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[22]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[23]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[24]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[25]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[26]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[27]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[28]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[29]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[30]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[31]; a0 += LDA; u0 += LDU;
+#endif
+      }
+
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW)));
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *u0 = w0[ 0]; u0 += LDU;
+#if ( HPL_LASWP04N_DEPTH >  1 )
+         *u0 = w0[ 1]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  2 )
+         *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  4 )
+         *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU;
+         *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  8 )
+         *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU;
+         *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU;
+         *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU;
+         *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH > 16 )
+         *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU;
+         *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU;
+         *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU;
+         *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU;
+         *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU;
+         *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU;
+         *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU;
+         *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         u0 = U + (size_t)(LINDXAU[i]);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU )
+         { *a0 = *u0; *u0 = w0[j]; }
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW)));
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp04N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp04T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp04T.c
new file mode 100644
index 000000000..9cbb4c863
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp04T.c
@@ -0,0 +1,270 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP04T_DEPTH
+#define    HPL_LASWP04T_DEPTH       32
+#define    HPL_LASWP04T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp04T
+(
+   const int                        M0,
+   const int                        M1,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   double *                         A,
+   const int                        LDA,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp04T
+( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M0;
+   const int                        M1;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   double *                         A;
+   const int                        LDA;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp04T copies M0 columns of U into rows of A and replaces those
+ * columns of U with columns of W. In addition M1 - M0 columns of W  are
+ * copied into U.
+ *
+ * Arguments
+ * =========
+ *
+ * M0      (local input)                 const int
+ *         On entry, M0 specifies the number of columns of U that should
+ *         be copied into A and replaced by columns of W.  M0 must be at
+ *         least zero.
+ *
+ * M1      (local input)                 const int
+ *         On entry, M1 specifies  the number of columnns of W that will
+ *         be copied into U. M1 must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies the length of the columns of  U  that
+ *         will be copied into rows of A. N must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns that are to be copied into rows of
+ *         A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M0).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M0+M1),  that  contains
+ *         data to be copied into U.  For i in [M0..M0+M1),  the entries
+ *         W(:,i) are copied into the column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA  is an array of dimension  M0 containing the
+ *         local row indexes A into which columns of U are copied.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M0 that  contains
+ *         the  local column indexes of  U  that should be copied into A
+ *         and replaced by the columns of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP04T_LOG2_DEPTH ),
+                              incU = (   1 << HPL_LASWP04T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04T_LOG2_DEPTH ) <<
+                          HPL_LASWP04T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP04T_DEPTH, A += incA, U += incU,
+        w += HPL_LASWP04T_DEPTH )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW;
+
+         *a0 = u0[ 0]; u0[ 0] = w0[ 0]; a0 += LDA;
+#if ( HPL_LASWP04T_DEPTH >  1 )
+         *a0 = u0[ 1]; u0[ 1] = w0[ 1]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  2 )
+         *a0 = u0[ 2]; u0[ 2] = w0[ 2]; a0 += LDA;
+         *a0 = u0[ 3]; u0[ 3] = w0[ 3]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  4 )
+         *a0 = u0[ 4]; u0[ 4] = w0[ 4]; a0 += LDA;
+         *a0 = u0[ 5]; u0[ 5] = w0[ 5]; a0 += LDA;
+         *a0 = u0[ 6]; u0[ 6] = w0[ 6]; a0 += LDA;
+         *a0 = u0[ 7]; u0[ 7] = w0[ 7]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  8 )
+         *a0 = u0[ 8]; u0[ 8] = w0[ 8]; a0 += LDA;
+         *a0 = u0[ 9]; u0[ 9] = w0[ 9]; a0 += LDA;
+         *a0 = u0[10]; u0[10] = w0[10]; a0 += LDA;
+         *a0 = u0[11]; u0[11] = w0[11]; a0 += LDA;
+         *a0 = u0[12]; u0[12] = w0[12]; a0 += LDA;
+         *a0 = u0[13]; u0[13] = w0[13]; a0 += LDA;
+         *a0 = u0[14]; u0[14] = w0[14]; a0 += LDA;
+         *a0 = u0[15]; u0[15] = w0[15]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH > 16 )
+         *a0 = u0[16]; u0[16] = w0[16]; a0 += LDA;
+         *a0 = u0[17]; u0[17] = w0[17]; a0 += LDA;
+         *a0 = u0[18]; u0[18] = w0[18]; a0 += LDA;
+         *a0 = u0[19]; u0[19] = w0[19]; a0 += LDA;
+         *a0 = u0[20]; u0[20] = w0[20]; a0 += LDA;
+         *a0 = u0[21]; u0[21] = w0[21]; a0 += LDA;
+         *a0 = u0[22]; u0[22] = w0[22]; a0 += LDA;
+         *a0 = u0[23]; u0[23] = w0[23]; a0 += LDA;
+         *a0 = u0[24]; u0[24] = w0[24]; a0 += LDA;
+         *a0 = u0[25]; u0[25] = w0[25]; a0 += LDA;
+         *a0 = u0[26]; u0[26] = w0[26]; a0 += LDA;
+         *a0 = u0[27]; u0[27] = w0[27]; a0 += LDA;
+         *a0 = u0[28]; u0[28] = w0[28]; a0 += LDA;
+         *a0 = u0[29]; u0[29] = w0[29]; a0 += LDA;
+         *a0 = u0[30]; u0[30] = w0[30]; a0 += LDA;
+         *a0 = u0[31]; u0[31] = w0[31]; a0 += LDA;
+#endif
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW;
+
+         u0[ 0] = w0[ 0];
+#if ( HPL_LASWP04T_DEPTH >  1 )
+         u0[ 1] = w0[ 1];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  2 )
+         u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  4 )
+         u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  8 )
+         u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11];
+         u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15];
+#endif
+#if ( HPL_LASWP04T_DEPTH > 16 )
+         u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19];
+         u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23];
+         u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27];
+         u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31];
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW;
+         for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; u0[j] = w0[j]; }
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW;
+         for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp04T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp05N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp05N.c
new file mode 100644
index 000000000..3edcf91a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp05N.c
@@ -0,0 +1,195 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP05N_DEPTH
+#define    HPL_LASWP05N_DEPTH       32
+#define    HPL_LASWP05N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp05N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const double *                   U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp05N
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const double *                   U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp05N copies rows of  U of global offset LINDXAU into rows of
+ * A at positions indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of U that should be
+ *         copied into A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of U that should
+ *         be copied into A. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          const double *
+ *         On entry,  U  points to an array of dimension  (LDU,N).  This
+ *         array contains the rows that are to be copied into A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied from U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local row indexes of U that should be copied in A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * U0 = U, * u0;
+   double                     * a0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP05N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP05N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05N_LOG2_DEPTH ) <<
+                            HPL_LASWP05N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP05N_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]);
+
+         *a0 = *u0; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP05N_DEPTH >  1 )
+         *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  2 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  4 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  8 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH > 16 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { *a0 = *u0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp05N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp05T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp05T.c
new file mode 100644
index 000000000..0adaa102d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp05T.c
@@ -0,0 +1,196 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP05T_DEPTH
+#define    HPL_LASWP05T_DEPTH       32
+#define    HPL_LASWP05T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp05T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const double *                   U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp05T
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const double *                   U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp05T copies columns of  U of global offset LINDXAU into rows
+ * of A at positions indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the columns of U that will
+ *         be copied into rows of A. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          const double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns that are to be copied into rows of
+ *         A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied from U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local column indexes of U that should be copied in A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * U0 = U, * u0;
+   double                     * a0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP05T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP05T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05T_LOG2_DEPTH ) <<
+                            HPL_LASWP05T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP05T_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[ i]);
+         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+
+         *a0 = u0[ 0]; a0 += LDA;
+#if ( HPL_LASWP05T_DEPTH >  1 )
+         *a0 = u0[ 1]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  2 )
+         *a0 = u0[ 2]; a0 += LDA; *a0 = u0[ 3]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  4 )
+         *a0 = u0[ 4]; a0 += LDA; *a0 = u0[ 5]; a0 += LDA;
+         *a0 = u0[ 6]; a0 += LDA; *a0 = u0[ 7]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  8 )
+         *a0 = u0[ 8]; a0 += LDA; *a0 = u0[ 9]; a0 += LDA;
+         *a0 = u0[10]; a0 += LDA; *a0 = u0[11]; a0 += LDA;
+         *a0 = u0[12]; a0 += LDA; *a0 = u0[13]; a0 += LDA;
+         *a0 = u0[14]; a0 += LDA; *a0 = u0[15]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH > 16 )
+         *a0 = u0[16]; a0 += LDA; *a0 = u0[17]; a0 += LDA;
+         *a0 = u0[18]; a0 += LDA; *a0 = u0[19]; a0 += LDA;
+         *a0 = u0[20]; a0 += LDA; *a0 = u0[21]; a0 += LDA;
+         *a0 = u0[22]; a0 += LDA; *a0 = u0[23]; a0 += LDA;
+         *a0 = u0[24]; a0 += LDA; *a0 = u0[25]; a0 += LDA;
+         *a0 = u0[26]; a0 += LDA; *a0 = u0[27]; a0 += LDA;
+         *a0 = u0[28]; a0 += LDA; *a0 = u0[29]; a0 += LDA;
+         *a0 = u0[30]; a0 += LDA; *a0 = u0[31]; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[ i]);
+         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+         for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp05T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp06N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp06N.c
new file mode 100644
index 000000000..a74bae75c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp06N.c
@@ -0,0 +1,206 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP06N_DEPTH
+#define    HPL_LASWP06N_DEPTH       32
+#define    HPL_LASWP06N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp06N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA
+)
+#else
+void HPL_dlaswp06N
+( M, N, A, LDA, U, LDU, LINDXA )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp06N swaps rows of  U  with rows of A at positions
+ * indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         swapped with rows of U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of A that should
+ *         be swapped with rows of U. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows or columns of U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,N).  This
+ *         array contains the rows of U that are to be swapped with rows
+ *         of A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be swapped with U.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * U0 = U, * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP06N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP06N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06N_LOG2_DEPTH ) <<
+                            HPL_LASWP06N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP06N_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i);
+
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP06N_DEPTH >  1 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  2 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  4 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  8 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH > 16 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU )
+         { r = *a0; *a0 = *u0; *u0 = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp06N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp06T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp06T.c
new file mode 100644
index 000000000..fb53c2a31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp06T.c
@@ -0,0 +1,207 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP06T_DEPTH
+#define    HPL_LASWP06T_DEPTH       32
+#define    HPL_LASWP06T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp06T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA
+)
+#else
+void HPL_dlaswp06T
+( M, N, A, LDA, U, LDU, LINDXA )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp06T swaps  columns  of  U  with  rows  of  A  at  positions
+ * indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         swapped with columns of U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of A that should
+ *         be swapped with columns of U. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns of  U  that are to be swapped with
+ *         rows of A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be swapped with U.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * U0 = U, * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP06T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP06T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06T_LOG2_DEPTH ) <<
+                            HPL_LASWP06T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP06T_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[i]);
+         u0 = U0 + (size_t)(i) * (size_t)(LDU);
+
+         r = *a0; *a0 = u0[ 0]; u0[ 0] = r; a0 += LDA;
+#if ( HPL_LASWP06T_DEPTH >  1 )
+         r = *a0; *a0 = u0[ 1]; u0[ 1] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  2 )
+         r = *a0; *a0 = u0[ 2]; u0[ 2] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 3]; u0[ 3] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  4 )
+         r = *a0; *a0 = u0[ 4]; u0[ 4] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 5]; u0[ 5] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 6]; u0[ 6] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 7]; u0[ 7] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  8 )
+         r = *a0; *a0 = u0[ 8]; u0[ 8] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 9]; u0[ 9] = r; a0 += LDA;
+         r = *a0; *a0 = u0[10]; u0[10] = r; a0 += LDA;
+         r = *a0; *a0 = u0[11]; u0[11] = r; a0 += LDA;
+         r = *a0; *a0 = u0[12]; u0[12] = r; a0 += LDA;
+         r = *a0; *a0 = u0[13]; u0[13] = r; a0 += LDA;
+         r = *a0; *a0 = u0[14]; u0[14] = r; a0 += LDA;
+         r = *a0; *a0 = u0[15]; u0[15] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH > 16 )
+         r = *a0; *a0 = u0[16]; u0[16] = r; a0 += LDA;
+         r = *a0; *a0 = u0[17]; u0[17] = r; a0 += LDA;
+         r = *a0; *a0 = u0[18]; u0[18] = r; a0 += LDA;
+         r = *a0; *a0 = u0[19]; u0[19] = r; a0 += LDA;
+         r = *a0; *a0 = u0[20]; u0[20] = r; a0 += LDA;
+         r = *a0; *a0 = u0[21]; u0[21] = r; a0 += LDA;
+         r = *a0; *a0 = u0[22]; u0[22] = r; a0 += LDA;
+         r = *a0; *a0 = u0[23]; u0[23] = r; a0 += LDA;
+         r = *a0; *a0 = u0[24]; u0[24] = r; a0 += LDA;
+         r = *a0; *a0 = u0[25]; u0[25] = r; a0 += LDA;
+         r = *a0; *a0 = u0[26]; u0[26] = r; a0 += LDA;
+         r = *a0; *a0 = u0[27]; u0[27] = r; a0 += LDA;
+         r = *a0; *a0 = u0[28]; u0[28] = r; a0 += LDA;
+         r = *a0; *a0 = u0[29]; u0[29] = r; a0 += LDA;
+         r = *a0; *a0 = u0[30]; u0[30] = r; a0 += LDA;
+         r = *a0; *a0 = u0[31]; u0[31] = r; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[i]);
+         u0 = U0 + (size_t)(i) * (size_t)(LDU);
+         for( j = 0; j < nr; j++, a0 += LDA )
+         { r = *a0; *a0 = u0[j]; u0[j] = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp06T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp10N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp10N.c
new file mode 100644
index 000000000..7dbf934f2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp10N.c
@@ -0,0 +1,186 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP10N_DEPTH
+#define    HPL_LASWP10N_DEPTH       32
+#define    HPL_LASWP10N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp10N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int *                      IPIV
+)
+#else
+void HPL_dlaswp10N
+( M, N, A, LDA, IPIV )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int *                      IPIV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp10N performs a sequence  of  local column interchanges on a
+ * matrix A.  One column interchange is initiated  for columns 0 through
+ * N-1 of A.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         __arg0__
+ *
+ * N       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of the array A. M
+ *         must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, N specifies the number of columns of the array A. N
+ *         must be at least zero.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, A  points to an  array of  dimension (LDA,N).  This
+ *         array contains the columns onto which the interchanges should
+ *         be applied. On exit, A contains the permuted matrix.
+ *
+ * IPIV    (local input)                 const int *
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * a0, * a1;
+   const int                  incA = ( 1 << HPL_LASWP10N_LOG2_DEPTH );
+   int                        jp, mr, mu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   mr = M - ( mu = (int)( ( (unsigned int)(M) >> HPL_LASWP10N_LOG2_DEPTH )
+                            << HPL_LASWP10N_LOG2_DEPTH ) );
+
+   for( j = 0; j < N; j++ )
+   {
+      if( j != ( jp = IPIV[j] ) )
+      {
+         a0 = A + j * LDA; a1 = A + jp * LDA;
+
+         for( i = 0; i < mu; i += incA, a0 += incA, a1 += incA )
+         {
+            r = *a0;    *a0    = *a1;    *a1    = r;
+#if ( HPL_LASWP10N_DEPTH >  1 )
+            r = a0[ 1]; a0[ 1] = a1[ 1]; a1[ 1] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  2 )
+            r = a0[ 2]; a0[ 2] = a1[ 2]; a1[ 2] = r;
+            r = a0[ 3]; a0[ 3] = a1[ 3]; a1[ 3] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  4 )
+            r = a0[ 4]; a0[ 4] = a1[ 4]; a1[ 4] = r;
+            r = a0[ 5]; a0[ 5] = a1[ 5]; a1[ 5] = r;
+            r = a0[ 6]; a0[ 6] = a1[ 6]; a1[ 6] = r;
+            r = a0[ 7]; a0[ 7] = a1[ 7]; a1[ 7] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  8 )
+            r = a0[ 8]; a0[ 8] = a1[ 8]; a1[ 8] = r;
+            r = a0[ 9]; a0[ 9] = a1[ 9]; a1[ 9] = r;
+            r = a0[10]; a0[10] = a1[10]; a1[10] = r;
+            r = a0[11]; a0[11] = a1[11]; a1[11] = r;
+            r = a0[12]; a0[12] = a1[12]; a1[12] = r;
+            r = a0[13]; a0[13] = a1[13]; a1[13] = r;
+            r = a0[14]; a0[14] = a1[14]; a1[14] = r;
+            r = a0[15]; a0[15] = a1[15]; a1[15] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH > 16 )
+            r = a0[16]; a0[16] = a1[16]; a1[16] = r;
+            r = a0[17]; a0[17] = a1[17]; a1[17] = r;
+            r = a0[18]; a0[18] = a1[18]; a1[18] = r;
+            r = a0[19]; a0[19] = a1[19]; a1[19] = r;
+            r = a0[20]; a0[20] = a1[20]; a1[20] = r;
+            r = a0[21]; a0[21] = a1[21]; a1[21] = r;
+            r = a0[22]; a0[22] = a1[22]; a1[22] = r;
+            r = a0[23]; a0[23] = a1[23]; a1[23] = r;
+            r = a0[24]; a0[24] = a1[24]; a1[24] = r;
+            r = a0[25]; a0[25] = a1[25]; a1[25] = r;
+            r = a0[26]; a0[26] = a1[26]; a1[26] = r;
+            r = a0[27]; a0[27] = a1[27]; a1[27] = r;
+            r = a0[28]; a0[28] = a1[28]; a1[28] = r;
+            r = a0[29]; a0[29] = a1[29]; a1[29] = r;
+            r = a0[30]; a0[30] = a1[30]; a1[30] = r;
+            r = a0[31]; a0[31] = a1[31]; a1[31] = r;
+#endif
+         }
+
+         for( i = 0; i < mr; i++ )
+         { r = a0[i]; a0[i] = a1[i]; a1[i] = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp10N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2l.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2l.c
new file mode 100644
index 000000000..e1b5bbfac
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2l.c
@@ -0,0 +1,151 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxg2l
+(
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxg2l
+( IG, INB, NB, SRCPROC, NPROCS )
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2l computes  the local index of a matrix entry pointed to by
+ * the  global index IG.  This  local  returned index is the same in all
+ * processes.
+ *
+ * Arguments
+ * =========
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry, if SRCPROC = -1, the data  is not  distributed  but
+ *         replicated,  in  which  case  this  routine returns IG in all
+ *         processes. Otherwise, the value of SRCPROC is ignored.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      return( IG );
+/*
+ * IG  =  INB - NB + ( l * NPROCS + MYROC ) * NB + X  with  0 <= X < NB,
+ * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC
+ * with  0 <= MYROC < NPROCS.  The local index to be returned depends on 
+ * whether  IG  resides in the process owning the first partial block of
+ * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB,
+ * so that if NPROCS divides i+1, i.e. MYROC=0,  we have i+1 = l*NPROCS.
+ * If we set  j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is
+ * equal to (j+1) * NPROCS.  Conversely, if NPROCS does not divide  i+1, 
+ * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that
+ * j=l and thus (j+1)*NPROCS > i+1.
+ */
+   j = ( i = ( IG - INB ) / NB ) / NPROCS;
+/*
+ * When IG resides in the process owning the first partial block of size
+ * INB (MYROC = 0), then the result IL can be written as:
+ * IL = INB - NB + l * NB + X  = IG + ( l - (l * NPROCS + MYROC) ) * NB.
+ * Using the above notation,  we have i+1 = l*NPROCS + MYROC = l*NPROCS,
+ * i.e l = ( i+1 ) / NPROCS = j+1,  since  NPROCS divides i+1, therefore
+ * IL = IG + ( j + 1 - ( i + 1 ) ) * NB.
+ *
+ * Otherwise when MYROC >= 1, the result IL can be written as:
+ * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB.
+ * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1,
+ * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e
+ * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB.
+ */
+   return( NB * (j - i) + 
+           ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ) );
+/*
+ * End of HPL_indxg2l
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2lp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2lp.c
new file mode 100644
index 000000000..74662f9d2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2lp.c
@@ -0,0 +1,176 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_indxg2lp
+(
+   int *                            IL,
+   int *                            PROC,
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+void HPL_indxg2lp
+( IL, PROC, IG, INB, NB, SRCPROC, NPROCS )
+   int *                            IL;
+   int *                            PROC;
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2lp computes the local index of a matrix entry pointed to by
+ * the global  index IG as well as the process coordinate which posseses
+ * this entry. The local returned index is the same in all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * IL      (output)                      int *
+ *         On exit, IL specifies the local index corresponding to IG. IL
+ *         is at least zero.
+ *
+ * PROC    (output)                      int *
+ *         On exit,  PROC  is the  coordinate of the process  owning the
+ *         entry specified by the global index IG. PROC is at least zero
+ *         and less than NPROCS.
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry, if SRCPROC = -1, the data  is not  distributed  but
+ *         replicated,  in  which  case  this  routine returns IG in all
+ *         processes. Otherwise, the value of SRCPROC is ignored.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+   {
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      *IL   = IG;
+      *PROC = SRCPROC;
+   }
+   else
+   {
+/*
+ * IG  =  INB - NB + ( l * NPROCS + MYROC ) * NB + X  with  0 <= X < NB,
+ * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC
+ * with  0 <= MYROC < NPROCS.  The local index to be returned depends on
+ * whether  IG  resides in the process owning the first partial block of
+ * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB,
+ * so that if NPROCS divides i+1, i.e. MYROC=0,  we have i+1 = l*NPROCS.
+ * If we set  j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is
+ * equal to (j+1) * NPROCS.  Conversely, if NPROCS does not divide  i+1,
+ * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that
+ * j=l and thus (j+1)*NPROCS > i+1.
+ */
+      j = ( i = ( IG - INB ) / NB ) / NPROCS;
+/*
+ * IG  is in block  1 + ( IG - INB ) / NB.  Add this to SRCPROC and take
+ * the NPROCS modulo (definition of the block-cyclic data distribution).
+ */
+      *PROC = SRCPROC + 1 + i;
+      *PROC = MPosMod( *PROC, NPROCS );
+/*
+ * When IG resides in the process owning the first partial block of size
+ * INB (MYROC = 0), then the result IL can be written as:
+ * IL = INB - NB + l * NB + X  = IG + ( l - (l * NPROCS + MYROC) ) * NB.
+ * Using the above notation,  we have i+1 = l*NPROCS + MYROC = l*NPROCS,
+ * i.e l = ( i+1 ) / NPROCS = j+1,  since  NPROCS divides i+1, therefore
+ * IL = IG + ( j + 1 - ( i + 1 ) ) * NB.
+ *
+ * Otherwise when MYROC >= 1, the result IL can be written as:
+ * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB.
+ * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1,
+ * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e
+ * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB.
+ */
+      *IL = NB * (j - i) + 
+            ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG );
+   }
+/*
+ * End of HPL_indxg2lp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2p.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2p.c
new file mode 100644
index 000000000..d0e75f516
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2p.c
@@ -0,0 +1,128 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxg2p
+(
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxg2p
+( IG, INB, NB, SRCPROC, NPROCS )
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2p computes the process coordinate  which posseses the entry
+ * of a matrix specified by a global index IG.
+ *
+ * Arguments
+ * =========
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        proc;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      return( SRCPROC );
+/*
+ * Otherwise,  IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC
+ * and take the NPROCS  modulo (definition of the block-cyclic data dis-
+ * tribution).
+ */
+   proc = SRCPROC + 1 + ( IG - INB ) / NB;
+   return( MPosMod( proc, NPROCS ) );
+/*
+ * End of HPL_indxg2p
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxl2g.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxl2g.c
new file mode 100644
index 000000000..7f139425a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxl2g.c
@@ -0,0 +1,164 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxl2g
+(
+   const int                        IL,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxl2g
+( IL, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        IL;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxl2g computes the global index of a matrix  entry  pointed to
+ * by the local index IL of the process indicated by PROC.
+ *
+ * Arguments
+ * =========
+ *
+ * IL      (input)                       const int
+ *         On entry, IL specifies the local  index of the matrix  entry.
+ *         IL must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC  specifies the coordinate of the process whose
+ *         local array row or column is to be determined. PROC  must  be
+ *         at least zero and strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+   {
+/*
+ * The data is not distributed, or there is just one process in this di-
+ * mension of the grid.
+ */
+      return( IL );
+   }
+   else if( PROC == SRCPROC )
+   {
+/*
+ * If I am SRCPROC, my first block is of size INB
+ */
+      if( IL < INB )
+/*
+ * If  IL  belongs to the first block,  the local and global indexes are
+ * equal.
+ */
+         return ( IL );
+/*
+ * The  number  of  entire  blocks  before  the  one  IL  belongs  to is
+ * ( IL - INB ) / NB + 1.  In  the other NPROCS-1 processes,  there  are
+ * thus NB*( ( IL-INB )/NB + 1 ) entries,  that are  globally before the
+ * global entry corresponding to IL.
+ */
+      return( ( NPROCS - 1 ) * NB * ( ( IL - INB ) / NB + 1 ) + IL );
+   }
+   else if( PROC < SRCPROC )
+   {
+/*
+ * Otherwise, the process of coordinate  MOD(SRCPROC+1, NPROCS) owns the
+ * second block. Let IPROC = PROC-SRCPROC-1+NPROCS be the number of pro-
+ * cesses between this process and  PROC  not  included  when going from
+ * left to right on the process line  with  possible wrap around.  These
+ * IPROC  processes have one more NB block than the other processes, who
+ * own IL / NB blocks of size NB.
+ */
+      return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1+NPROCS )+IL+INB );
+   }
+   else
+   {
+/*
+ * Same reasoning as above with IPROC = PROC - SRCPROC - 1.
+ */
+      return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1        )+IL+INB );
+   }
+/*
+ * End of HPL_indxl2g
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_infog2l.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_infog2l.c
new file mode 100644
index 000000000..2580f2ad4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_infog2l.c
@@ -0,0 +1,382 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_infog2l
+(
+   int                              I,
+   int                              J,
+   const int                        IMB,
+   const int                        MB,
+   const int                        INB,
+   const int                        NB,
+   const int                        RSRC,
+   const int                        CSRC,
+   const int                        MYROW,
+   const int                        MYCOL,
+   const int                        NPROW,
+   const int                        NPCOL,
+   int *                            II,
+   int *                            JJ,
+   int *                            PROW,
+   int *                            PCOL
+)
+#else
+void HPL_infog2l
+( I, J, IMB, MB, INB, NB, RSRC, CSRC, MYROW, MYCOL, NPROW, NPCOL, II, JJ, PROW, PCOL )
+   int                              I;
+   int                              J;
+   const int                        IMB;
+   const int                        MB;
+   const int                        INB;
+   const int                        NB;
+   const int                        RSRC;
+   const int                        CSRC;
+   const int                        MYROW;
+   const int                        MYCOL;
+   const int                        NPROW;
+   const int                        NPCOL;
+   int *                            II;
+   int *                            JJ;
+   int *                            PROW;
+   int *                            PCOL;
+#endif 
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_infog2l computes the starting local index II, JJ corresponding to
+ * the submatrix starting globally at the entry pointed by  I,  J.  This
+ * routine returns the coordinates in the grid of the process owning the
+ * matrix entry of global indexes I, J, namely PROW and PCOL.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                int
+ *         On entry,  I  specifies  the  global  row index of the matrix
+ *         entry. I must be at least zero.
+ *
+ * J       (global input)                int
+ *         On entry,  J  specifies the global column index of the matrix
+ *         entry. J must be at least zero.
+ *
+ * IMB     (global input)                const int
+ *         On entry,  IMB  specifies  the size of the first row block of
+ *         the global matrix. IMB must be at least one.
+ *
+ * MB      (global input)                const int
+ *         On entry,  MB specifies the blocking factor used to partition
+ *         and  distribute the rows of the matrix A.  MB  must be larger
+ *         than one.
+ *
+ * INB     (global input)                const int
+ *         On entry, INB specifies the size of the first column block of
+ *         the global matrix. INB must be at least one.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the columns of the matrix A. NB must be larger
+ *         than one.
+ *
+ * RSRC    (global input)                const int
+ *         On entry,  RSRC  specifies  the row coordinate of the process
+ *         that possesses the row  I.  RSRC  must  be at least zero  and
+ *         strictly less than NPROW.
+ *
+ * CSRC    (global input)                const int
+ *         On entry, CSRC specifies the column coordinate of the process
+ *         that possesses the column J. CSRC  must be at least zero  and
+ *         strictly less than NPCOL.
+ *
+ * MYROW   (local input)                 const int
+ *         On entry, MYROW  specifies my  row process  coordinate in the
+ *         grid. MYROW is greater than or equal  to zero  and  less than
+ *         NPROW.
+ *
+ * MYCOL   (local input)                 const int
+ *         On entry, MYCOL specifies my column process coordinate in the
+ *         grid. MYCOL is greater than or equal  to zero  and  less than
+ *         NPCOL.
+ *
+ * NPROW   (global input)                const int
+ *         On entry,  NPROW  specifies the number of process rows in the
+ *         grid. NPROW is at least one.
+ *
+ * NPCOL   (global input)                const int
+ *         On entry,  NPCOL  specifies  the number of process columns in
+ *         the grid. NPCOL is at least one.
+ *
+ * II      (local output)                int *
+ *         On exit, II  specifies the  local  starting  row index of the
+ *         submatrix. On exit, II is at least 0.
+ *
+ * JJ      (local output)                int *
+ *         On exit, JJ  specifies the local starting column index of the
+ *         submatrix. On exit, JJ is at least 0.
+ *
+ * PROW    (global output)               int *
+ *         On exit, PROW is the row coordinate of the process owning the
+ *         entry specified by the global index I.  PROW is at least zero
+ *         and less than NPROW.
+ *
+ * PCOL    (global output)               int *
+ *         On exit, PCOL  is the column coordinate of the process owning
+ *         the entry specified by the global index J.  PCOL  is at least
+ *         zero and less than NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int            ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc;
+/* ..
+ * .. Executable Statements ..
+ */
+   imb   = IMB;
+   *PROW = RSRC;
+
+   if( ( *PROW == -1 ) || ( NPROW == 1 ) )
+   {
+/*
+ * The data is not distributed,  or there is just one process row in the
+ * grid.
+ */
+     *II = I;
+   }
+   else if( I < imb )
+   {
+/*
+ * I refers to an entry in the first block of rows
+ */
+     *II = ( MYROW == *PROW ? I : 0 );
+   }
+   else
+   {
+      mb   = MB;
+      rsrc = *PROW;
+/*
+ * The discussion goes as follows:  compute  my distance from the source
+ * process so that  within  this process coordinate system,  the  source
+ * process   is  the  process  such  that  mydist = 0,  or  equivalently
+ * MYROW == rsrc.
+ *
+ * Find  out  the global coordinate of the block I belongs to (nblocks),
+ * as well as the minimum local number of blocks that every process has.
+ *
+ * when mydist < nblocks-ilocblk*NPROCS,  I own ilocblk + 1 full blocks,
+ * when mydist > nblocks-ilocblk*NPROCS,  I own ilocblk     full blocks,
+ * when mydist = nblocks-ilocblk*NPROCS,  I own ilocblk     full blocks
+ * but not I, or I own ilocblk + 1 blocks and the entry I refers to.
+ */
+      if( MYROW == rsrc )
+      {
+/*
+ * I refers  to an entry  that is not in the first block, find out which
+ * process has it.
+ */
+         nblocks = ( I - imb ) / mb + 1;
+         *PROW  += nblocks;
+         *PROW  -= ( *PROW / NPROW ) * NPROW;
+/*
+ * Since  mydist = 0  and nblocks - ilocblk * NPROW >= 0, there are only
+ * three possible cases:
+ *
+ *   1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I do not own
+ *      I, in which case II = IMB + ( ilocblk - 1 ) * MB. Note that this
+ *      case  cannot  happen  when  ilocblk is zero, since nblocks is at
+ *      least one.
+ *
+ *   2) When  0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in
+ *      which  case  I  and  II  can  respectively  be  written as IMB + 
+ *      (nblocks-1)*NB + IL  and  IMB + (ilocblk-1) * MB + IL.  That  is
+ *      II = I + (ilocblk-nblocks)*MB. Note that this case cannot happen
+ *      when ilocblk is zero, since nblocks is at least one.
+ *
+ *   3) mydist = 0 < nblocks - ilocblk * NPROW,  the source process owns
+ *      ilocblk+1 full blocks,  and  therefore  II = IMB + ilocblk * MB.
+ *      Note that when ilocblk is zero, II is just IMB.
+ */
+         if( nblocks < NPROW )
+         {
+            *II = imb;
+         }
+         else
+         {
+            ilocblk = nblocks / NPROW;
+            if( ilocblk * NPROW >= nblocks )
+            {
+               *II = ( ( MYROW == *PROW ) ?
+                       I   + ( ilocblk - nblocks ) * mb :
+                       imb + ( ilocblk - 1       ) * mb );
+            }
+            else
+            {
+               *II =  imb + ilocblk * mb;
+            }
+         }
+      }
+      else
+      {
+/*
+ * I refers  to  an entry that is not in the first block, find out which
+ * process has it.
+ */
+         nblocks = ( I -= imb ) / mb + 1;
+         *PROW  += nblocks;
+         *PROW  -= ( *PROW / NPROW ) * NPROW;
+/*
+ * Compute  my distance from the source process so that within this pro-
+ * cess coordinate system,  the  source process is the process such that
+ * mydist=0.
+ */
+         if( ( mydist  = MYROW - rsrc ) < 0 ) mydist += NPROW;
+/*
+ * When mydist <  nblocks - ilocblk * NPROW, I own ilocblk+1 full blocks
+ * of size MB since I am not the source process, i.e. II=(ilocblk+1)*MB.
+ * When mydist>=nblocks-ilocblk*NPROW and I do not own I,  I own ilocblk
+ * full blocks of size MB, i.e. II = ilocblk*MB, otherwise I own ilocblk
+ * blocks and I,  in which case I can be written as IMB + (nblocks-1)*MB
+ * + IL and II = ilocblk*MB + IL = I - IMB + (ilocblk - nblocks + 1)*MB.
+ */
+         if( nblocks < NPROW )
+         {
+            mydist -= nblocks;
+            *II     = ( ( mydist < 0 ) ? mb :
+                        ( ( MYROW == *PROW ) ?
+                          I + ( 1 - nblocks ) * mb : 0 ) );
+         }
+         else
+         {
+            ilocblk = nblocks / NPROW;
+            mydist -= nblocks - ilocblk * NPROW;
+            *II     = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb :
+                        ( ( MYROW == *PROW ) ?
+                          ( ilocblk - nblocks + 1 ) * mb + I :
+                          ilocblk * mb ) );
+         }
+      }
+   }
+/*
+ * Idem for the columns
+ */
+   inb   = INB;
+   *PCOL = CSRC;
+
+   if( ( *PCOL == -1 ) || ( NPCOL == 1 ) )
+   {
+      *JJ = J;
+   }
+   else if( J < inb )
+   {
+      *JJ = ( MYCOL == *PCOL ? J : 0 );
+   }
+   else
+   {
+      nb   = NB;
+      csrc = *PCOL;
+
+      if( MYCOL == csrc )
+      {
+         nblocks = ( J - inb ) / nb + 1;
+         *PCOL  += nblocks;
+         *PCOL  -= ( *PCOL / NPCOL ) * NPCOL;
+
+         if( nblocks < NPCOL )
+         {
+            *JJ = inb;
+         }
+         else
+         {
+            ilocblk = nblocks / NPCOL;
+            if( ilocblk * NPCOL >= nblocks )
+            {
+               *JJ = ( ( MYCOL == *PCOL ) ?
+                       J   + ( ilocblk - nblocks ) * nb :
+                       inb + ( ilocblk - 1       ) * nb );
+            }
+            else
+            {
+               *JJ = inb + ilocblk * nb;
+            }
+         }
+      }
+      else
+      {
+         nblocks = ( J -= inb ) / nb + 1;
+         *PCOL  += nblocks;
+         *PCOL  -= ( *PCOL / NPCOL ) * NPCOL;
+
+         if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL;
+
+         if( nblocks < NPCOL )
+         {
+            mydist -= nblocks;
+            *JJ     = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ?
+                        J + ( 1 - nblocks )*nb : 0 ) );
+         }
+         else
+         {
+            ilocblk = nblocks / NPCOL;
+            mydist -= nblocks - ilocblk * NPCOL;
+            *JJ     = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb :
+                        ( ( MYCOL == *PCOL ) ?
+                          ( ilocblk - nblocks + 1 ) * nb + J :
+                          ilocblk * nb ) );
+         }
+      }
+   }
+/*
+ * End of HPL_infog2l
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_numroc.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_numroc.c
new file mode 100644
index 000000000..39cd736d3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_numroc.c
@@ -0,0 +1,120 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_numroc
+(
+   const int                        N,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_numroc
+( N, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        N;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_numroc returns  the  local number of matrix rows/columns process
+ * PROC  will  get  if  we give out  N rows/columns starting from global
+ * index 0.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies the number of rows/columns being dealt
+ *         out. N must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC specifies  the coordinate of the process whose
+ *         local portion is determined.  PROC must be at least zero  and
+ *         strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   return( HPL_numrocI( N, 0, INB, NB, PROC, SRCPROC, NPROCS ) );
+/*
+ * End of HPL_numroc
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_numrocI.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_numrocI.c
new file mode 100644
index 000000000..70f3497de
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_numrocI.c
@@ -0,0 +1,243 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_numrocI
+(
+   const int                        N,
+   const int                        I,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_numrocI
+( N, I, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        N;
+   const int                        I;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_numrocI returns  the  local number of matrix rows/columns process
+ * PROC  will  get  if  we give out  N rows/columns starting from global
+ * index I.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies the number of rows/columns being dealt
+ *         out. N must be at least zero.
+ *
+ * I       (input)                       const int
+ *         On entry, I  specifies the global index of the matrix  entry
+ *         I must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of th
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC specifies  the coordinate of the process whos
+ *         local portion is determined.  PROC must be at least zero  an
+ *         strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  proces
+ *         that possesses the first row or column of the matrix. SRCPRO
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process row
+ *         or columns over which the matrix is distributed.  NPROCS mus
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ilocblk, inb, mydist, nblocks, srcproc;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * The data is not distributed, or there is just one process in this di-
+ * mension of the grid.
+ */
+      return( N );
+/*
+ * Compute coordinate of process owning I and corresponding INB
+ */
+   srcproc = SRCPROC;
+
+   if( ( inb = INB - I ) <= 0 )
+   {
+/*
+ * I is not in the first block, find out which process has it and update
+ * the size of first block
+ */
+      srcproc += ( nblocks = (-inb) / NB + 1 ); 
+      srcproc -= ( srcproc / NPROCS ) * NPROCS;
+      inb     += nblocks * NB;
+   }
+/*
+ * Now  everything  is  just like  N, I=0, INB, NB, srcproc, NPROCS. The
+ * discussion goes as follows:  compute my distance from the source pro-
+ * cess  so that within this process coordinate system,  the source pro-
+ * cess is the process such that mydist = 0, or PROC == srcproc.
+ *
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries. Then remark that
+ *
+ * when  mydist < nblocks - ilocblk*NPROCS, I own ilocblk+1 full blocks,
+ * when  mydist > nblocks - ilocblk*NPROCS, I own ilocblk   full blocks,
+ * when  mydist = nblocks - ilocblk*NPROCS, either the last block is not
+ * full and I own it,  or the last block is full and I am the first pro-
+ * cess owning only ilocblk full blocks.
+ */
+   if( PROC == srcproc )
+   {
+/*
+ * I am the source process, i.e. I own I (mydist=0).  When N <= INB, the
+ * answer is simply N.
+ */
+      if( N <= inb ) return( N );
+/*
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries.
+ */
+      nblocks = ( N - inb ) / NB + 1;
+/*
+ * Since  mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only
+ * two possible cases:
+ *
+ *   1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS di-
+ *      vides the global number of full blocks,  then the source process
+ *      srcproc owns one more block than the other processes;  and N can
+ *      be rewritten as N = INB + (nblocks-1) * NB + LNB  with  LNB >= 0
+ *      size of the last block. Similarly, the local value Np correspon-
+ *      ding to N can be written as  Np = INB + (ilocblk-1) * NB + LNB =
+ *      N + ( ilocblk-1 - (nblocks-1) )*NB.  Note  that this case cannot
+ *      happen when ilocblk is zero, since nblocks is at least one.
+ *
+ *   2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only
+ *      owns full blocks,  and  therefore Np = INB + ilocblk * NB.  Note
+ *      that when ilocblk is zero, Np is just INB.
+ */
+      if( nblocks < NPROCS ) return( inb );
+ 
+      ilocblk = nblocks / NPROCS;
+      return( ( nblocks - ilocblk * NPROCS ) ? inb + ilocblk * NB :
+              N + ( ilocblk - nblocks ) * NB );
+   }
+   else
+   {
+/*
+ * I am not the source process. When N <= INB, the answer is simply 0.
+ */
+      if( N <= inb ) return( 0 );
+/*
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries
+ */
+      nblocks = ( N - inb ) / NB + 1;
+/*
+ * Compute  my distance from the source process so that within this pro-
+ * cess coordinate system,  the source  process is the process such that
+ * mydist=0.
+ */
+      if( ( mydist = PROC - srcproc ) < 0 ) mydist += NPROCS;
+/*
+ * When mydist < nblocks - ilocblk*NPROCS, I own ilocblk + 1 full blocks
+ * of size NB since I am not the source process,
+ *
+ * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk   full blocks
+ * of size NB since I am not the source process,
+ *
+ * when mydist = nblocks - ilocblk*NPROCS,
+ * either the last block is not full and I own it, in which case
+ *    N = INB + (nblocks - 1)*NB + LNB with  LNB  the  size  of the last
+ *    block such that NB > LNB > 0;  the local value Np corresponding to
+ *    N is given by  Np = ilocblk*NB+LNB = N-INB+(ilocblk-nblocks+1)*NB;
+ * or the  last  block  is  full  and I am the first process owning only
+ *    ilocblk full blocks of size NB, that is N = INB+(nblocks-1)*NB and
+ *    Np = ilocblk * NB = N - INB + (ilocblk-nblocks+1) * NB.
+ */
+      if( nblocks < NPROCS )
+         return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 :
+                 N - inb + NB * ( 1 - nblocks ) ) );
+ 
+      ilocblk = nblocks / NPROCS;
+      mydist -= nblocks - ilocblk * NPROCS;
+      return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB :
+              ( ( mydist > 0 ) ? ilocblk * NB :
+                N - inb + NB * ( ilocblk - nblocks + 1 ) ) );
+   }
+/*
+ * End of HPL_numrocI
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pabort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pabort.c
new file mode 100644
index 000000000..268975fc1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pabort.c
@@ -0,0 +1,137 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pabort
+(
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_pabort( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pabort displays an error message on stderr and halts execution.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   int                        rank;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   LINE   = va_arg( argptr, int      );
+   SRNAME = va_arg( argptr, char *   );
+   FORM   = va_arg( argptr, char *   );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( stderr, "%s %s %d, %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR", "from process #", rank, "in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( stderr,
+                   "%s %s %d, %s %d %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR", "from process #", rank, "on line", LINE,
+                   "of function", SRNAME, cline );
+
+   MPI_Abort( MPI_COMM_WORLD, -1 );
+   exit( -1 );
+/*
+ * End of HPL_pabort
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlamch.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlamch.c
new file mode 100644
index 000000000..73cf649da
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlamch.c
@@ -0,0 +1,143 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_pdlamch
+(
+   MPI_Comm                         COMM,
+   const HPL_T_MACH                 CMACH
+)
+#else
+double HPL_pdlamch
+( COMM, CMACH )
+   MPI_Comm                         COMM;
+   const HPL_T_MACH                 CMACH;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlamch determines  machine-specific  arithmetic  constants  such  as
+ * the relative machine precision (eps),  the safe minimum(sfmin) such that
+ * 1/sfmin does not overflow, the base of the machine (base), the precision
+ * (prec),  the  number  of  (base)  digits in the  mantissa  (t),  whether
+ * rounding occurs in addition (rnd = 1.0 and 0.0 otherwise),  the  minimum
+ * exponent before  (gradual)  underflow (emin),  the  underflow  threshold
+ * (rmin)- base**(emin-1), the largest exponent before overflow (emax), the
+ * overflow threshold (rmax)  - (base**emax)*(1-eps).
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * CMACH   (global input)                const HPL_T_MACH
+ *         Specifies the value to be returned by HPL_pdlamch            
+ *            = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+ *            = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+ *            = HPL_MACH_BASE,  HPL_pdlamch := base                     
+ *            = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+ *            = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+ *            = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+ *            = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+ *            = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+ *            = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+ *            = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+ *          
+ *         where                                                        
+ *          
+ *            eps   = relative machine precision,                       
+ *            sfmin = safe minimum,                                     
+ *            base  = base of the machine,                              
+ *            prec  = eps*base,                                         
+ *            t     = number of digits in the mantissa,                 
+ *            rnd   = 1.0 if rounding occurs in addition,               
+ *            emin  = minimum exponent before underflow,                
+ *            rmin  = underflow threshold,                              
+ *            emax  = largest exponent before overflow,                 
+ *            rmax  = overflow threshold.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     param;
+/* ..
+ * .. Executable Statements ..
+ */
+   param = HPL_dlamch( CMACH );
+
+   switch( CMACH )
+   {
+      case HPL_MACH_EPS   :
+      case HPL_MACH_SFMIN :
+      case HPL_MACH_EMIN  :
+      case HPL_MACH_RMIN  :
+         (void) HPL_all_reduce( (void *)(&param), 1, HPL_DOUBLE,
+                                HPL_max, COMM );
+         break;
+      case HPL_MACH_EMAX  :
+      case HPL_MACH_RMAX  :
+         (void) HPL_all_reduce( (void *)(&param), 1, HPL_DOUBLE,
+                                HPL_min, COMM );
+         break;
+      default             :
+         break;
+   } 
+
+   return( param );
+/*
+ * End of HPL_pdlamch
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlange.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlange.c
new file mode 100644
index 000000000..40bdcc36b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlange.c
@@ -0,0 +1,242 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_pdlange
+(
+   const HPL_T_grid *               GRID,
+   const HPL_T_NORM                 NORM,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   const double *                   A,
+   const int                        LDA
+)
+#else
+double HPL_pdlange
+( GRID, NORM, M, N, NB, A, LDA )
+   const HPL_T_grid *               GRID;
+   const HPL_T_NORM                 NORM;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   const double *                   A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlange returns  the value of the one norm,  or the infinity norm,
+ * or the element of largest absolute value of a distributed matrix A:  
+ *  
+ *  
+ *    max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+ *    norm1(A),        when NORM = HPL_NORM_1,                          
+ *    normI(A),        when NORM = HPL_NORM_I,                          
+ *  
+ * where norm1 denotes the one norm of a matrix (maximum column sum) and
+ * normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+ * that max(abs(A(i,j))) is not a matrix norm.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * NORM    (global input)                const HPL_T_NORM
+ *         On entry,  NORM  specifies  the  value to be returned by this
+ *         function as described above.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+ *         that contains the local pieces of the distributed matrix A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     s, v0=HPL_rzero, * work = NULL;
+   MPI_Comm                   Acomm, Ccomm, Rcomm;
+   int                        ii, jj, mp, mycol, myrow, npcol, nprow,
+                              nq;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Rcomm = GRID->row_comm; Ccomm = GRID->col_comm;
+   Acomm = GRID->all_comm;
+
+   Mnumroc( mp, M, NB, NB, myrow, 0, nprow );
+   Mnumroc( nq, N, NB, NB, mycol, 0, npcol );
+
+   if( Mmin( M, N ) == 0 ) { return( v0 ); }
+   else if( NORM == HPL_NORM_A )
+   {
+/*
+ * max( abs( A ) )
+ */
+      if( ( nq > 0 ) && ( mp > 0 ) )
+      {
+         for( jj = 0; jj < nq; jj++ )
+         {
+            for( ii = 0; ii < mp; ii++ )
+            { v0 = Mmax( v0, Mabs( *A ) ); A++; }
+            A += LDA - mp;
+         }
+      }
+      (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0,
+                         Acomm );
+   }
+   else if( NORM == HPL_NORM_1 )
+   {
+/*
+ * Find norm_1( A ).
+ */
+      if( nq > 0 )
+      {
+         work = (double*)malloc( (size_t)(nq) * sizeof( double ) );
+         if( work == NULL )
+         { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); }
+
+         for( jj = 0; jj < nq; jj++ )
+         {
+            s = HPL_rzero;
+            for( ii = 0; ii < mp; ii++ ) { s += Mabs( *A ); A++; }
+            work[jj] = s; A += LDA - mp;
+         }
+/*
+ * Find sum of global matrix columns, store on row 0 of process grid
+ */
+         (void) HPL_reduce( (void *)(work), nq, HPL_DOUBLE, HPL_sum,
+                            0, Ccomm );
+/*
+ * Find maximum sum of columns for 1-norm
+ */
+         if( myrow == 0 )
+         { v0 = work[HPL_idamax( nq, work, 1 )]; v0 = Mabs( v0 ); }
+         if( work ) free( work );
+      }
+/*
+ * Find max in row 0, store result in process (0,0)
+ */
+      if( myrow == 0 )
+         (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0,
+                            Rcomm );
+   }
+   else if( NORM == HPL_NORM_I )
+   {
+/*
+ * Find norm_inf( A )
+ */
+      if( mp > 0 )
+      {
+         work = (double*)malloc( (size_t)(mp) * sizeof( double ) );
+         if( work == NULL )
+         { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); }
+
+         for( ii = 0; ii < mp; ii++ ) { work[ii] = HPL_rzero; }
+
+         for( jj = 0; jj < nq; jj++ )
+         {
+            for( ii = 0; ii < mp; ii++ )
+            { work[ii] += Mabs( *A ); A++; }
+            A += LDA - mp;
+         }
+/*       
+ * Find sum of global matrix rows, store on column 0 of process grid
+ */      
+         (void) HPL_reduce( (void *)(work), mp, HPL_DOUBLE, HPL_sum,
+                            0, Rcomm );
+/*       
+ * Find maximum sum of rows for inf-norm
+ */      
+         if( mycol == 0 )
+         { v0 = work[HPL_idamax( mp, work, 1 )]; v0 = Mabs( v0 ); }
+         if( work ) free( work );
+      }
+/*
+ * Find max in column 0, store result in process (0,0)
+ */
+      if( mycol == 0 )
+         (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max,
+                            0, Ccomm );
+   }
+/*
+ * Broadcast answer to every process in the grid
+ */
+   (void) HPL_broadcast( (void *)(&v0), 1, HPL_DOUBLE, 0, Acomm );
+
+   return( v0 );
+/*
+ * End of HPL_pdlange
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlaprnt.c
new file mode 100644
index 000000000..20f11129a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlaprnt.c
@@ -0,0 +1,236 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaprnt
+(
+   const HPL_T_grid *               GRID,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   double *                         A,
+   const int                        LDA,
+   const int                        IAROW,
+   const int                        IACOL,
+   const char *                     CMATNM
+)
+#else
+void HPL_pdlaprnt
+( GRID, M, N, NB, A, LDA, IAROW, IACOL, CMATNM )
+   const HPL_T_grid *               GRID;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   double *                         A;
+   const int                        LDA;
+   const int                        IAROW;
+   const int                        IACOL;
+   const char *                     CMATNM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaprnt prints  to  standard  error a distributed matrix A. The
+ * local pieces of  A  are sent to the process of coordinates  (0,0)  in
+ * the grid and then printed.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies the number of rows of the coefficient
+ *         matrix A. M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On  entry,   N   specifies  the  number  of  columns  of  the
+ *         coefficient matrix A. N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * A       (local input)                 double *
+ *         On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+ *         This array contains the coefficient matrix to be printed.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * IAROW   (global input)                const int
+ *         On entry,  IAROW  specifies the row process coordinate owning
+ *         the  first row of A.  IAROW  must be  larger than or equal to
+ *         zero and less than NPROW.
+ *
+ * IACOL   (global input)                const int
+ *         On entry,  IACOL  specifies  the  column  process  coordinate
+ *         owning the  first column  of A. IACOL  must be larger than or
+ *         equal to zero and less than NPCOL.
+ *
+ * CMATNM  (global input)                const char *
+ *         On entry, CMATNM is the name of the matrix to be printed.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   Acomm;
+   double                     * buf = NULL;
+   int                        h, i, ib, icurcol=IACOL, icurrow=IAROW,
+                              ii=0, j, jb, jj=0, mycol, myrow, npcol,
+                              nprow, src;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Acomm = GRID->all_comm; 
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+      buf = (double*)malloc( (size_t)(NB) * sizeof( double ) );
+
+   for( j = 0; j < N; j += NB )
+   {
+      jb = N-j; jb = Mmin( jb, NB );
+      for( h = 0; h < jb; h++ )
+      {
+         (void) HPL_barrier( Acomm );
+
+         for( i = 0; i < M; i += NB )
+         {
+            ib = M-i; ib = Mmin( ib, NB );
+            if( ( icurrow == 0 ) && ( icurcol == 0 ) )
+            {
+               if( ( myrow == 0 ) && ( mycol == 0 ) )
+                  HPL_dlaprnt( ib, 1, Mptr( A, ii, jj+h, LDA ), i+1,
+                               j+h+1, LDA, CMATNM );
+            }
+            else
+            {
+               if( ( myrow == icurrow ) && ( mycol == icurcol ) )
+               {
+                  (void) HPL_send( Mptr( A, ii, jj+h, LDA ), ib, 0,
+                                   9000+(j+h)*M+i, Acomm );
+               }
+               else if( ( myrow == 0 ) && ( mycol == 0 ) )
+               {
+                  src = HPL_pnum( GRID, icurrow, icurcol );
+                  (void) HPL_recv( buf, ib, src, 9000+(j+h)*M+i,
+                                   Acomm );
+                  if (buf != NULL)
+                  	HPL_dlaprnt( ib, 1, buf, i+1, j+h+1, NB, CMATNM );
+               }
+            }
+            if( myrow == icurrow ) ii += ib;
+            icurrow = MModAdd1( icurrow, nprow );
+            (void) HPL_barrier( Acomm );
+         }
+         ii = 0; icurrow = IAROW;
+      }
+      if( mycol == icurcol ) jj += jb;
+      icurcol = MModAdd1( icurcol, npcol );
+      (void) HPL_barrier( Acomm );
+   }
+   if( ( myrow == 0 ) && ( mycol == 0 ) && ( buf ) ) free( buf );
+/*
+ * End of HPL_pdlaprnt
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pwarn.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pwarn.c
new file mode 100644
index 000000000..a9f666f89
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pwarn.c
@@ -0,0 +1,139 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pwarn
+(
+   FILE *                           STREAM,
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_pwarn( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pwarn displays an error message.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   int                        rank;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   LINE   = va_arg( argptr, int    );
+   SRNAME = va_arg( argptr, char * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( STREAM, "%s %s %d, %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR", "from process #", rank, "in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( STREAM, "%s %s %d, %s %d %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR", "from process #", rank, "on line", LINE,
+                   "of function", SRNAME, cline );
+/*
+ * End of HPL_pwarn
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocmax.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocmax.c
new file mode 100644
index 000000000..644641412
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocmax.c
@@ -0,0 +1,149 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dlocmax
+(
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocmax
+( PANEL, N, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocmax finds  the maximum entry in the current column  and packs
+ * the useful information in  WORK[0:3].  On exit,  WORK[0] contains the
+ * local maximum  absolute value  scalar,  WORK[1] is the  corresponding
+ * local row index,  WORK[2]  is the corresponding global row index, and
+ * WORK[3] is the coordinate of the process owning this max.  When N  is
+ * less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set
+ * to the total number of process rows.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of rows of the column
+ *         of A on which we operate.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is  a workarray of size at least 4.  On exit,
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A;
+   int                        kk, igindx, ilindx, myrow, nb, nprow;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N > 0 )
+   {
+      A      = Mptr( PANEL->A, II, JJ, PANEL->lda );
+      myrow  = PANEL->grid->myrow;
+      nprow  = PANEL->grid->nprow;
+      nb     = PANEL->nb;
+      kk     = PANEL->ii + II + ( ilindx = HPL_idamax( N, A, 1 ) );
+      Mindxl2g( igindx, kk, nb, nb, myrow, 0, nprow );
+/*
+ * WORK[0] := local maximum absolute value scalar,
+ * WORK[1] := corresponding local  row index,
+ * WORK[2] := corresponding global row index,
+ * WORK[3] := coordinate of process owning this max.
+ */
+      WORK[0] = A[ilindx];         WORK[1] = (double)(ilindx);
+      WORK[2] = (double)(igindx);  WORK[3] = (double)(myrow);
+   }
+   else
+   {
+/*
+ * If I do not have any row of A, then set the coordinate of the process
+ * (WORK[3]) owning this "ghost" row,  such that it  will never be used,
+ * even if there are only zeros in the current column of A.
+ */
+      WORK[0] = WORK[1] = WORK[2] = HPL_rzero;
+      WORK[3] = (double)(PANEL->grid->nprow);
+   }
+/*
+ * End of HPL_dlocmax
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocswpN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocswpN.c
new file mode 100644
index 000000000..a3919500a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocswpN.c
@@ -0,0 +1,436 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LOCSWP_DEPTH
+#define    HPL_LOCSWP_DEPTH        32
+#define    HPL_LOCSWP_LOG2_DEPTH    5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlocswpN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocswpN
+( PANEL, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocswpN performs  the local swapping operations  within a panel.
+ * The lower triangular  N0-by-N0  upper block of the panel is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.  The N0 length max
+ *         row is stored in WORK[4:4+N0-1];  Note  that this is also the
+ *         JJth row  (or column) of L1. The remaining part of this array
+ *         is used as workspace.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax;
+   double                     * A1, * A2, * L, * Wr0, * Wmx;
+   int                        ilindx, lda, myrow, n0, nr, nu;
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda;
+
+   Wr0   = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0];
+   nu    = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) 
+                  << HPL_LOCSWP_LOG2_DEPTH );
+   nr    = n0 - nu;
+/*
+ * Replicated swap and copy of the current (new) row of A into L1
+ */
+   L  = Mptr( PANEL->L1, JJ, 0, n0  );
+/*
+ * If the pivot is non-zero ...
+ */
+   if( gmax != HPL_rzero )
+   {
+/*
+ * and if I own the current row of A ...
+ */
+      if( myrow == PANEL->prow )
+      {
+/*
+ * and if I also own the row to be swapped with the current row of A ...
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+/*
+ * and if the current row of A is not to swapped with itself ...
+ */
+            if( ( ilindx = (int)(WORK[1]) ) != 0 )
+            {
+/*
+ * then copy the max row into L1 and locally swap the 2 rows of A.
+ */
+               A1 = Mptr( PANEL->A,  II,     0, lda );
+               A2 = Mptr( A1,        ilindx, 0, lda );
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH )
+               {
+                  *L=*A1=Wmx[ 0]; *A2=Wr0[ 0]; L+=n0; A1+=lda; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  *L=*A1=Wmx[ 1]; *A2=Wr0[ 1]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  *L=*A1=Wmx[ 2]; *A2=Wr0[ 2]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 3]; *A2=Wr0[ 3]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  *L=*A1=Wmx[ 4]; *A2=Wr0[ 4]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 5]; *A2=Wr0[ 5]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 6]; *A2=Wr0[ 6]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 7]; *A2=Wr0[ 7]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  *L=*A1=Wmx[ 8]; *A2=Wr0[ 8]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 9]; *A2=Wr0[ 9]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[10]; *A2=Wr0[10]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[11]; *A2=Wr0[11]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[12]; *A2=Wr0[12]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[13]; *A2=Wr0[13]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[14]; *A2=Wr0[14]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[15]; *A2=Wr0[15]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  *L=*A1=Wmx[16]; *A2=Wr0[16]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[17]; *A2=Wr0[17]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[18]; *A2=Wr0[18]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[19]; *A2=Wr0[19]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[20]; *A2=Wr0[20]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[21]; *A2=Wr0[21]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[22]; *A2=Wr0[22]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[23]; *A2=Wr0[23]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[24]; *A2=Wr0[24]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[25]; *A2=Wr0[25]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[26]; *A2=Wr0[26]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[27]; *A2=Wr0[27]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[28]; *A2=Wr0[28]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[29]; *A2=Wr0[29]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[30]; *A2=Wr0[30]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[31]; *A2=Wr0[31]; L+=n0; A1+=lda; A2+=lda;
+#endif
+               }
+               for( i = 0; i < nr; i++, L += n0, A1 += lda, A2 += lda )
+               { *L = *A1 = Wmx[i]; *A2 = Wr0[i]; }
+            }
+            else
+            {
+/*
+ * otherwise the current row of  A  is swapped with itself, so just copy
+ * the current of A into L1.
+ */
+               *Mptr( PANEL->A, II, JJ, lda ) = gmax;
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH )
+               {
+                  *L = Wmx[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  *L = Wmx[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0;
+                  *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0;
+                  *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0;
+                  *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0;
+                  *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0;
+                  *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0;
+                  *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0;
+                  *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0;
+                  *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0;
+                  *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0;
+                  *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0;
+                  *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0;
+#endif
+               }
+               for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; }
+            }
+         }
+         else
+         {
+/*
+ * otherwise, the row to be swapped with the current row of A is in Wmx,
+ * so copy Wmx into L1 and A.
+ */
+            A1 = Mptr( PANEL->A,  II, 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wmx += HPL_LOCSWP_DEPTH )
+            {
+               *L = *A1 = Wmx[ 0]; L += n0; A1 += lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *L = *A1 = Wmx[ 1]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *L = *A1 = Wmx[ 2]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 3]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *L = *A1 = Wmx[ 4]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 5]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 6]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 7]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *L = *A1 = Wmx[ 8]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 9]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[10]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[11]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[12]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[13]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[14]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[15]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *L = *A1 = Wmx[16]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[17]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[18]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[19]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[20]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[21]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[22]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[23]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[24]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[25]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[26]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[27]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[28]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[29]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[30]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[31]; L += n0; A1 += lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, L += n0, A1 += lda )
+            { *L = *A1 = Wmx[i]; }
+         }
+      }
+      else
+      {
+/*
+ * otherwise I do not own the current row of A, so copy the max row  Wmx
+ * into L1.
+ */
+         for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+              Wmx += HPL_LOCSWP_DEPTH )
+         {
+            *L = Wmx[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+            *L = Wmx[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+            *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+            *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0;
+            *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+            *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0;
+            *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0;
+            *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0;
+            *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+            *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0;
+            *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0;
+            *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0;
+            *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0;
+            *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0;
+            *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0;
+            *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0;
+            *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0;
+#endif
+         }
+         for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; }
+/*
+ * and if I own the max row, overwrite it with the current row Wr0.
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+            A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wr0 += HPL_LOCSWP_DEPTH )
+            {
+               *A2 = Wr0[ 0]; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *A2 = Wr0[ 1]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda;
+               *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda;
+               *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda;
+               *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda;
+               *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda;
+               *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda;
+               *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda;
+               *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda;
+               *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda;
+               *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda;
+               *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda;
+               *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; }
+         }
+      }
+   }
+   else
+   {
+/*
+ * Otherwise the max element in the current column is zero,  simply copy
+ * the current row Wr0 into L1. The matrix is singular.
+ */
+      for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+           Wr0 += HPL_LOCSWP_DEPTH )
+      {
+         *L = Wr0[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+         *L = Wr0[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+         *L = Wr0[ 2]; L+=n0; *L = Wr0[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+         *L = Wr0[ 4]; L+=n0; *L = Wr0[ 5]; L+=n0;
+         *L = Wr0[ 6]; L+=n0; *L = Wr0[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+         *L = Wr0[ 8]; L+=n0; *L = Wr0[ 9]; L+=n0;
+         *L = Wr0[10]; L+=n0; *L = Wr0[11]; L+=n0;
+         *L = Wr0[12]; L+=n0; *L = Wr0[13]; L+=n0;
+         *L = Wr0[14]; L+=n0; *L = Wr0[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+         *L = Wr0[16]; L+=n0; *L = Wr0[17]; L+=n0;
+         *L = Wr0[18]; L+=n0; *L = Wr0[19]; L+=n0;
+         *L = Wr0[20]; L+=n0; *L = Wr0[21]; L+=n0;
+         *L = Wr0[22]; L+=n0; *L = Wr0[23]; L+=n0;
+         *L = Wr0[24]; L+=n0; *L = Wr0[25]; L+=n0;
+         *L = Wr0[26]; L+=n0; *L = Wr0[27]; L+=n0;
+         *L = Wr0[28]; L+=n0; *L = Wr0[29]; L+=n0;
+         *L = Wr0[30]; L+=n0; *L = Wr0[31]; L+=n0;
+#endif
+      }
+
+      for( i = 0; i < nr; i++, L += n0 ) { *L = Wr0[i]; }
+/*
+ * set INFO.
+ */
+      if( *(PANEL->DINFO) == 0.0 )
+         *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1);
+   }
+/*
+ * End of HPL_dlocswpN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocswpT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocswpT.c
new file mode 100644
index 000000000..89b86e35a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocswpT.c
@@ -0,0 +1,406 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LOCSWP_DEPTH
+#define    HPL_LOCSWP_DEPTH        32
+#define    HPL_LOCSWP_LOG2_DEPTH    5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlocswpT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocswpT
+( PANEL, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocswpT performs  the local swapping operations  within a panel.
+ * The lower triangular  N0-by-N0  upper block of the panel is stored in
+ * transpose form.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.  The N0 length max
+ *         row is stored in WORK[4:4+N0-1];  Note  that this is also the
+ *         JJth row  (or column) of L1. The remaining part of this array
+ *         is used as workspace.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax;
+   double                     * A1, * A2, * L, * Wr0, * Wmx;
+   int                        ilindx, lda, myrow, n0, nr, nu;
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda;
+
+   Wr0   = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0];
+   nu    = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) 
+                  << HPL_LOCSWP_LOG2_DEPTH );
+   nr    = n0 - nu;
+/*
+ * Replicated swap and copy of the current (new) row of A into L1
+ */
+   L  = Mptr( PANEL->L1, 0, JJ, n0  );
+/*
+ * If the pivot is non-zero ...
+ */
+   if( gmax != HPL_rzero )
+   {
+/*
+ * and if I own the current row of A ...
+ */
+      if( myrow == PANEL->prow )
+      {
+/*
+ * and if I also own the row to be swapped with the current row of A ...
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+/*
+ * and if the current row of A is not to swapped with itself ...
+ */
+            if( ( ilindx = (int)(WORK[1]) ) != 0 )
+            {
+/*
+ * then copy the max row into L1 and locally swap the 2 rows of A.
+ */
+               A1 = Mptr( PANEL->A, II,     0, lda );
+               A2 = Mptr( A1,       ilindx, 0, lda );
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH,
+                    L   += HPL_LOCSWP_DEPTH )
+               {
+                  L[ 0]=*A1=Wmx[ 0]; *A2=Wr0[ 0]; A1+=lda; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  L[ 1]=*A1=Wmx[ 1]; *A2=Wr0[ 1]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  L[ 2]=*A1=Wmx[ 2]; *A2=Wr0[ 2]; A1+=lda; A2+=lda;
+                  L[ 3]=*A1=Wmx[ 3]; *A2=Wr0[ 3]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  L[ 4]=*A1=Wmx[ 4]; *A2=Wr0[ 4]; A1+=lda; A2+=lda;
+                  L[ 5]=*A1=Wmx[ 5]; *A2=Wr0[ 5]; A1+=lda; A2+=lda;
+                  L[ 6]=*A1=Wmx[ 6]; *A2=Wr0[ 6]; A1+=lda; A2+=lda;
+                  L[ 7]=*A1=Wmx[ 7]; *A2=Wr0[ 7]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  L[ 8]=*A1=Wmx[ 8]; *A2=Wr0[ 8]; A1+=lda; A2+=lda;
+                  L[ 9]=*A1=Wmx[ 9]; *A2=Wr0[ 9]; A1+=lda; A2+=lda;
+                  L[10]=*A1=Wmx[10]; *A2=Wr0[10]; A1+=lda; A2+=lda;
+                  L[11]=*A1=Wmx[11]; *A2=Wr0[11]; A1+=lda; A2+=lda;
+                  L[12]=*A1=Wmx[12]; *A2=Wr0[12]; A1+=lda; A2+=lda;
+                  L[13]=*A1=Wmx[13]; *A2=Wr0[13]; A1+=lda; A2+=lda;
+                  L[14]=*A1=Wmx[14]; *A2=Wr0[14]; A1+=lda; A2+=lda;
+                  L[15]=*A1=Wmx[15]; *A2=Wr0[15]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  L[16]=*A1=Wmx[16]; *A2=Wr0[16]; A1+=lda; A2+=lda;
+                  L[17]=*A1=Wmx[17]; *A2=Wr0[17]; A1+=lda; A2+=lda;
+                  L[18]=*A1=Wmx[18]; *A2=Wr0[18]; A1+=lda; A2+=lda;
+                  L[19]=*A1=Wmx[19]; *A2=Wr0[19]; A1+=lda; A2+=lda;
+                  L[20]=*A1=Wmx[20]; *A2=Wr0[20]; A1+=lda; A2+=lda;
+                  L[21]=*A1=Wmx[21]; *A2=Wr0[21]; A1+=lda; A2+=lda;
+                  L[22]=*A1=Wmx[22]; *A2=Wr0[22]; A1+=lda; A2+=lda;
+                  L[23]=*A1=Wmx[23]; *A2=Wr0[23]; A1+=lda; A2+=lda;
+                  L[24]=*A1=Wmx[24]; *A2=Wr0[24]; A1+=lda; A2+=lda;
+                  L[25]=*A1=Wmx[25]; *A2=Wr0[25]; A1+=lda; A2+=lda;
+                  L[26]=*A1=Wmx[26]; *A2=Wr0[26]; A1+=lda; A2+=lda;
+                  L[27]=*A1=Wmx[27]; *A2=Wr0[27]; A1+=lda; A2+=lda;
+                  L[28]=*A1=Wmx[28]; *A2=Wr0[28]; A1+=lda; A2+=lda;
+                  L[29]=*A1=Wmx[29]; *A2=Wr0[29]; A1+=lda; A2+=lda;
+                  L[30]=*A1=Wmx[30]; *A2=Wr0[30]; A1+=lda; A2+=lda;
+                  L[31]=*A1=Wmx[31]; *A2=Wr0[31]; A1+=lda; A2+=lda;
+#endif
+               }
+
+               for( i = 0; i < nr; i++, A1 += lda, A2 += lda )
+               { L[i] = *A1 = Wmx[i]; *A2 = Wr0[i]; }
+            }
+            else
+            {
+/*
+ * otherwise the current row of  A  is swapped with itself, so just copy
+ * the current of A into L1.
+ */
+               *Mptr( PANEL->A, II, JJ, lda ) = gmax;
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+               {
+                  L[ 0]=Wmx[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  L[ 1]=Wmx[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5];
+                  L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  L[ 8]=Wmx[ 8]; L[12]=Wmx[12];
+                  L[ 9]=Wmx[ 9]; L[13]=Wmx[13];
+                  L[10]=Wmx[10]; L[14]=Wmx[14];
+                  L[11]=Wmx[11]; L[15]=Wmx[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  L[16]=Wmx[16]; L[20]=Wmx[20];
+                  L[17]=Wmx[17]; L[21]=Wmx[21];
+                  L[18]=Wmx[18]; L[22]=Wmx[22];
+                  L[19]=Wmx[19]; L[23]=Wmx[23];
+                  L[24]=Wmx[24]; L[28]=Wmx[28];
+                  L[25]=Wmx[25]; L[29]=Wmx[29];
+                  L[26]=Wmx[26]; L[30]=Wmx[30];
+                  L[27]=Wmx[27]; L[31]=Wmx[31];
+#endif
+               }
+               for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; }
+            }
+         }
+         else
+         {
+/*
+ * otherwise, the row to be swapped with the current row of A is in Wmx,
+ * so copy Wmx into L1 and A.
+ */
+            A1 = Mptr( PANEL->A, II, 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+            {
+               L[ 0]=*A1=Wmx[ 0]; A1+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               L[ 1]=*A1=Wmx[ 1]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               L[ 2]=*A1=Wmx[ 2]; A1+=lda; L[ 3]=*A1=Wmx[ 3]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               L[ 4]=*A1=Wmx[ 4]; A1+=lda; L[ 5]=*A1=Wmx[ 5]; A1+=lda;
+               L[ 6]=*A1=Wmx[ 6]; A1+=lda; L[ 7]=*A1=Wmx[ 7]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               L[ 8]=*A1=Wmx[ 8]; A1+=lda; L[ 9]=*A1=Wmx[ 9]; A1+=lda;
+               L[10]=*A1=Wmx[10]; A1+=lda; L[11]=*A1=Wmx[11]; A1+=lda;
+               L[12]=*A1=Wmx[12]; A1+=lda; L[13]=*A1=Wmx[13]; A1+=lda;
+               L[14]=*A1=Wmx[14]; A1+=lda; L[15]=*A1=Wmx[15]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               L[16]=*A1=Wmx[16]; A1+=lda; L[17]=*A1=Wmx[17]; A1+=lda;
+               L[18]=*A1=Wmx[18]; A1+=lda; L[19]=*A1=Wmx[19]; A1+=lda;
+               L[20]=*A1=Wmx[20]; A1+=lda; L[21]=*A1=Wmx[21]; A1+=lda;
+               L[22]=*A1=Wmx[22]; A1+=lda; L[23]=*A1=Wmx[23]; A1+=lda;
+               L[24]=*A1=Wmx[24]; A1+=lda; L[25]=*A1=Wmx[25]; A1+=lda;
+               L[26]=*A1=Wmx[26]; A1+=lda; L[27]=*A1=Wmx[27]; A1+=lda;
+               L[28]=*A1=Wmx[28]; A1+=lda; L[29]=*A1=Wmx[29]; A1+=lda;
+               L[30]=*A1=Wmx[30]; A1+=lda; L[31]=*A1=Wmx[31]; A1+=lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, A1 += lda ) { L[i]=*A1=Wmx[i]; } 
+         }
+      }
+      else
+      {
+/*
+ * otherwise I do not own the current row of A, so copy the max row  Wmx
+ * into L1.
+ */
+         for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+              Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+         {
+            L[ 0]=Wmx[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+            L[ 1]=Wmx[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+            L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+            L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+            L[ 8]=Wmx[ 8]; L[ 9]=Wmx[ 9]; L[10]=Wmx[10]; L[11]=Wmx[11];
+            L[12]=Wmx[12]; L[13]=Wmx[13]; L[14]=Wmx[14]; L[15]=Wmx[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+            L[16]=Wmx[16]; L[17]=Wmx[17]; L[18]=Wmx[18]; L[19]=Wmx[19];
+            L[20]=Wmx[20]; L[21]=Wmx[21]; L[22]=Wmx[22]; L[23]=Wmx[23];
+            L[24]=Wmx[24]; L[25]=Wmx[25]; L[26]=Wmx[26]; L[27]=Wmx[27];
+            L[28]=Wmx[28]; L[29]=Wmx[29]; L[30]=Wmx[30]; L[31]=Wmx[31];
+#endif
+         }
+         for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; }
+/*
+ * and if I own the max row, overwrite it with the current row Wr0.
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+            A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wr0 += HPL_LOCSWP_DEPTH )
+            {
+               *A2 = Wr0[ 0]; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *A2 = Wr0[ 1]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda;
+               *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda;
+               *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda;
+               *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda;
+               *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda;
+               *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda;
+               *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda;
+               *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda;
+               *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda;
+               *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda;
+               *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda;
+               *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda;
+#endif
+            }
+            for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; }
+         }
+      }
+   }
+   else
+   {
+/*
+ * Otherwise the max element in the current column is zero,  simply copy
+ * the current row Wr0 into L1. The matrix is singular.
+ */
+      for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+           Wr0 += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+      {
+         L[ 0]=Wr0[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+         L[ 1]=Wr0[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+         L[ 2]=Wr0[ 2]; L[ 3]=Wr0[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+         L[ 4]=Wr0[ 4]; L[ 5]=Wr0[ 5]; L[ 6]=Wr0[ 6]; L[ 7]=Wr0[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+         L[ 8]=Wr0[ 8]; L[12]=Wr0[12]; L[ 9]=Wr0[ 9]; L[13]=Wr0[13];
+         L[10]=Wr0[10]; L[14]=Wr0[14]; L[11]=Wr0[11]; L[15]=Wr0[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+         L[16]=Wr0[16]; L[20]=Wr0[20]; L[17]=Wr0[17]; L[21]=Wr0[21];
+         L[18]=Wr0[18]; L[22]=Wr0[22]; L[19]=Wr0[19]; L[23]=Wr0[23];
+         L[24]=Wr0[24]; L[28]=Wr0[28]; L[25]=Wr0[25]; L[29]=Wr0[29];
+         L[26]=Wr0[26]; L[30]=Wr0[30]; L[27]=Wr0[27]; L[31]=Wr0[31];
+#endif
+      }
+      for( i = 0; i < nr; i++ ) { L[i] = Wr0[i]; }
+/*
+ * Set INFO.
+ */
+      if( *(PANEL->DINFO) == 0.0 )
+         *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1);
+   }
+/*
+ * End of HPL_dlocswpT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdfact.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdfact.c
new file mode 100644
index 000000000..1d99c6e14
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdfact.c
@@ -0,0 +1,141 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdfact
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_pdfact
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdfact recursively factorizes a  1-dimensional  panel of columns.
+ * The  RPFACT  function pointer specifies the recursive algorithm to be
+ * used, either Crout, Left- or Right looking.  NBMIN allows to vary the
+ * recursive stopping criterium in terms of the number of columns in the
+ * panel, and  NDIV allows to specify the number of subpanels each panel
+ * should be divided into. Usuallly a value of 2 will be chosen. Finally
+ * PFACT is a function pointer specifying the non-recursive algorithm to
+ * to be used on at most NBMIN columns. One can also choose here between
+ * Crout, Left- or Right looking.  Empirical tests seem to indicate that
+ * values of 4 or 8 for NBMIN give the best results.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   void                       * vptr = NULL;
+   int                        align, jb;
+/* ..
+ * .. Executable Statements ..
+ */
+   jb = PANEL->jb; PANEL->n -= jb; PANEL->ja += jb;
+
+   if( ( PANEL->grid->mycol != PANEL->pcol ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_RPFACT );
+#endif
+   align = PANEL->algo->align;
+   vptr  = (void *)malloc( ( (size_t)(align) + 
+              (size_t)(((4+((unsigned int)(jb) << 1)) << 1) )) *
+              sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdfact", "Memory allocation failed" ); }
+/*
+ * Factor the panel - Update the panel pointers
+ */
+   PANEL->algo->rffun( PANEL, PANEL->mp, jb, 0, (double *)HPL_PTR( vptr,
+                       ((size_t)(align) * sizeof(double) ) ) );
+   if( vptr ) free( vptr );
+
+   PANEL->A   = Mptr( PANEL->A, 0, jb, PANEL->lda );
+   PANEL->nq -= jb; PANEL->jj += jb;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_RPFACT );
+#endif
+/*
+ * End of HPL_pdfact
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdmxswp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdmxswp.c
new file mode 100644
index 000000000..b14452197
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdmxswp.c
@@ -0,0 +1,311 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdmxswp
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_pdmxswp
+( PANEL, M, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdmxswp swaps  and  broadcasts  the  absolute value max row using
+ * bi-directional exchange.  The buffer is partially set by HPL_dlocmax.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by
+ *  
+ *    log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth )
+ *  
+ * where  lat and bdwth are the latency and bandwidth of the network for
+ * double precision real elements.  Communication  only  occurs  in  one
+ * process  column. Mono-directional links  will cause the communication
+ * cost to double.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of the matrix
+ *         column on which this function operates.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         It  is assumed that  HPL_dlocmax  was called  prior  to  this
+ *         routine to  initialize  the first four entries of this array.
+ *         On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+ *         Note that this is also the  JJth  row  (or column) of L1. The
+ *         remaining part is used as a temporary array.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax, tmp1;
+   double                     * A0, * Wmx, * Wwork;
+   HPL_T_grid                 * grid;
+   MPI_Comm                   comm;
+   unsigned int               hdim, ip2, ip2_, ipow, k, mask;
+   int                        Np2, cnt_, cnt0, i, icurrow, lda, mydist,
+                              mydis_, myrow, n0, nprow, partner, rcnt,
+                              root, scnt, size_;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_MXSWP );
+#endif
+   grid = PANEL->grid; myrow = grid->myrow; nprow = grid->nprow;
+/*
+ * ip2  : the smallest power of two less than or equal to nprow;
+ * hdim : dimension of the hypercube made of those ip2 processes;
+ * Np2  : logical flag indicating whether or not nprow is a power of 2;
+ */
+   comm    = grid->col_comm; ip2 = (unsigned int)(grid->row_ip2);
+   hdim    = (unsigned int)(grid->row_hdim);     n0  = PANEL->jb;
+   icurrow = PANEL->prow; Np2 = (int)( ( size_ = nprow - ip2 ) != 0 );
+   mydist  = MModSub( myrow, icurrow, nprow );
+/*
+ * Set up pointers in workspace:  WORK and Wwork  point to the beginning
+ * of the buffers of size 4 + 2*N0 to be combined. Wmx points to the row
+ * owning the local (before combine) and global (after combine) absolute
+ * value max. A0 points to the copy of the current row of the matrix.
+ */
+   cnt0  = ( cnt_ = n0 + 4 ) + n0; A0 = ( Wmx = WORK + 4 ) + n0;
+   Wwork = WORK + cnt0;
+/*
+ * Wmx[0:N0-1] := A[ilindx,0:N0-1] where ilindx is  (int)(WORK[1])  (row
+ * with max in current column). If I am the current process row, pack in
+ * addition the current row of A in A0[0:N0-1].  If I do not own any row
+ * of A, then zero out Wmx[0:N0-1].
+ */
+   if( M > 0 )
+   {
+      lda = PANEL->lda;
+      HPL_dcopy( n0, Mptr( PANEL->A, II+(int)(WORK[1]), 0, lda ), lda,
+                 Wmx, 1 );
+      if( myrow == icurrow )
+      { HPL_dcopy( n0, Mptr( PANEL->A, II, 0, lda ), lda, A0, 1 ); }
+   }
+   else { for( i = 0; i < n0; i++ ) Wmx[i] = HPL_rzero; }
+/*
+ * Combine the results (bi-directional exchange):  the process coordina-
+ * tes are relative to icurrow,  this allows to reduce the communication
+ * volume when nprow is not a power of 2.
+ *
+ * When nprow is not a power of 2:  proc[i-ip2] receives local data from
+ * proc[i]  for all i in [ip2..nprow).  In addition,  proc[0]  (icurrow)
+ * sends to proc[ip2] the current row of A  for later broadcast in procs
+ * [ip2..nprow).
+ */
+   if( ( Np2 != 0 ) &&
+       ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) )
+   {
+      if( ( mydist & ip2 ) != 0 )
+      {
+         if( mydist == (int)(ip2) )
+            (void) HPL_sdrv( WORK, cnt_, MSGID_BEGIN_PFACT, A0, n0,
+                             MSGID_BEGIN_PFACT, MModAdd( partner,
+                             icurrow, nprow ), comm );
+         else
+            (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow,
+                             nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+      else
+      {
+         if( mydist == 0 )
+            (void) HPL_sdrv( A0, n0, MSGID_BEGIN_PFACT, Wwork, cnt_,
+                             MSGID_BEGIN_PFACT, MModAdd( partner,
+                             icurrow, nprow ), comm );
+         else
+            (void) HPL_recv( Wwork, cnt_, MModAdd( partner, icurrow,
+                             nprow ), MSGID_BEGIN_PFACT, comm );
+ 
+         tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] );
+         if( ( tmp1 > gmax ) ||
+             ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) )
+         { HPL_dcopy( cnt_, Wwork, 1, WORK, 1 ); }
+      }
+   }
+
+   if( mydist < (int)(ip2) )
+   {
+/*
+ * power of 2 part of the processes collection: processes  [0..ip2)  are
+ * combining (binary exchange); proc[0] has two rows to send, but one to
+ * receive.  At every step  k  in [0..hdim) of the algorithm,  a process 
+ * pair exchanging 2 rows is such that  myrow >> k+1 is 0.  Among  those
+ * processes the ones  that are sending one more row than  what they are
+ * receiving are such that myrow >> k is equal to 0.
+ */
+      k = 0; ipow = 1;
+ 
+      while( k < hdim )
+      {
+         if( ( (unsigned int)(mydist) >> ( k + 1 ) ) == 0 )
+         {
+            if( ( (unsigned int)(mydist) >> k ) == 0 )
+            { scnt = cnt0; rcnt = cnt_; }
+            else
+            { scnt = cnt_; rcnt = cnt0; }
+         }
+         else { scnt = rcnt = cnt_; }
+ 
+         partner = (int)( (unsigned int)(mydist) ^ ipow );
+         (void) HPL_sdrv( WORK, scnt, MSGID_BEGIN_PFACT, Wwork, rcnt,
+                          MSGID_BEGIN_PFACT, MModAdd( partner, icurrow,
+                          nprow ), comm );
+ 
+         tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] );
+         if( ( tmp1 > gmax ) ||
+             ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) )
+         {
+            HPL_dcopy( ( rcnt == cnt0 ? cnt0 : cnt_ ), Wwork, 1,
+                       WORK, 1 );
+         }
+         else if( rcnt == cnt0 )
+         { HPL_dcopy( n0, Wwork+cnt_, 1, A0, 1 ); }
+ 
+         ipow <<= 1; k++;
+      }
+   }
+   else if( size_ > 1 )
+   {
+/*
+ * proc[ip2] broadcast current row of A to procs [ip2+1..nprow).
+ */
+      k = (unsigned int)(size_) - 1; ip2_ = mask = 1;
+      while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+ 
+      root   = MModAdd( icurrow, (int)(ip2), nprow );
+      mydis_ = MModSub( myrow,   root,       nprow );
+ 
+      do
+      {
+         mask ^= ip2_;
+         if( ( mydis_ & mask ) == 0 )
+         {
+            partner = (int)(mydis_ ^ ip2_);
+            if( ( mydis_ & ip2_ ) != 0 )
+            {
+               (void) HPL_recv( A0, n0, MModAdd( root, partner,
+                                nprow ), MSGID_BEGIN_PFACT, comm );
+            }
+            else if( partner < size_ )
+            {
+               (void) HPL_send( A0, n0, MModAdd( root, partner,
+                                nprow ), MSGID_BEGIN_PFACT, comm );
+            }
+         }
+         ip2_ >>= 1;
+      } while( ip2_ > 0 );
+   }
+/*
+ * If nprow is not a power of 2,  for all i in [ip2..nprow), proc[i-ip2]
+ * sends the pivot row to proc[i]  along  with the first four entries of
+ * the WORK array.
+ */
+   if( ( Np2 != 0 ) &&
+       ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) )
+   {
+      if( ( mydist & ip2 ) != 0 )
+      {
+         (void) HPL_recv( WORK, cnt_, MModAdd( partner, icurrow,
+                          nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+      else
+      {
+         (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow,
+                          nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+   }
+/*
+ * Save the global pivot index in pivot array
+ */
+   (PANEL->DPIV)[JJ] = WORK[2];
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_MXSWP );
+#endif
+/*
+ * End of HPL_pdmxswp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpancrN.c
new file mode 100644
index 000000000..4ea170b73
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpancrN.c
@@ -0,0 +1,270 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpancrN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpancrN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpancrN factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel  A using the Crout variant of the  usual
+ * one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+ * of the panel is stored in no-transpose form (i.e. just like the input
+ * matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and gam2-3 is  an  estimate  of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+/*
+ * Compute row (column) jj of L1
+ */
+      if( kk > 0 )
+      {
+         L1ptr = Mptr( L1, jj, jj+1, n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Xv0, ICOFF, jj+1,  kk, Nm1 );
+         Xv1 = vsip_msubview_d( Xv0, jj,    ICOFF, 1,  kk  );
+         Yv1 = vsip_msubview_d( Xv0, jj,    jj+1,  1,  Nm1 );
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Av1, VSIP_MAT_NTRANS,
+                      HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 ); 
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dgemv( HplColumnMajor, HplTrans, kk, Nm1, -HPL_rone,
+                    Mptr( L1, ICOFF, jj+1, n0 ), n0, Mptr( L1, jj,
+                    ICOFF, n0 ), n0, HPL_rone, L1ptr, n0 );
+#endif
+         if( curr != 0 )
+            HPL_dcopy( Nm1, L1ptr, n0, Mptr( A, ii, jj+1, lda ), lda );
+      }
+/*
+ * Scale current column by its absolute value max entry  -  Update  dia-
+ * diagonal and subdiagonal elements in column  A(iip1:iip1+Mm1-1, jj+1)
+ * and  find local  absolute value max in  that column  (Only  one  pass
+ * through cache for each current column).  This sequence of  operations
+ * could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 );
+      Xv1 = vsip_msubview_d( Xv0, ICOFF,          jj+1,            kk+1,   1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,    1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      vsip_mdestroy_d( Yv1 );
+      vsip_mdestroy_d( Xv1 );
+      vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, ICOFF,
+                 jj+1, n0 ), 1, HPL_rone, Mptr( A, iip1, jj+1, lda ),
+                 1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++; kk++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpancrN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpancrT.c
new file mode 100644
index 000000000..50ed300aa
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpancrT.c
@@ -0,0 +1,267 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpancrT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpancrT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpancrT factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel  A using the Crout variant of the  usual
+ * one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+ * of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is an  estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+/*
+ * Compute row (column) jj of L1
+ */
+      if( kk > 0 )
+      {
+         L1ptr = Mptr( L1, jj+1, jj, n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Xv0, jj+1,  ICOFF, Nm1, kk );
+         Xv1 = vsip_msubview_d( Xv0, ICOFF, jj,    kk,   1 );
+         Yv1 = vsip_msubview_d( Xv0, jj+1,  jj,    Nm1,  1 );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dgemv( HplColumnMajor, HplNoTrans, Nm1, kk, -HPL_rone,
+                    Mptr( L1, jj+1, ICOFF, n0 ), n0, Mptr( L1, ICOFF,
+                    jj, n0 ), 1, HPL_rone, L1ptr, 1 );
+#endif
+         if( curr != 0 )
+            HPL_dcopy( Nm1, L1ptr, 1, Mptr( A, ii, jj+1, lda ), lda );
+      }
+/*
+ * Scale current column by its absolute value max entry  -  Update  dia-
+ * diagonal and subdiagonal elements in column  A(iip1:iip1+Mm1-1, jj+1)
+ * and  find local  absolute value max in  that column  (Only  one  pass
+ * through cache for each current column).  This sequence of  operations
+ * could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 );
+      Xv1 = vsip_msubview_d( Xv0, jj+1,           ICOFF,           1,   kk+1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,    1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, jj+1, ICOFF,
+                 n0 ), n0, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++; kk++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpancrT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanllN.c
new file mode 100644
index 000000000..fa471198d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanllN.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanllN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanllN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanllN factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel A  using the Left-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in no-transpose form (i.e. just like the
+ * input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1 = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column and initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+
+      L1ptr = Mptr( L1, ICOFF, jj+1, n0 ); kk = jj + 1 - ICOFF;
+      HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, HplUnit, kk, 
+                 Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr,  1 );
+/*
+ * Scale  current column by its absolute value max entry  -  Update  and 
+ * find local  absolute value max  in next column (Only one pass through 
+ * cache for each next column).  This sequence of operations could bene-
+ * fit from a specialized  blocked implementation.
+ */ 
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk );
+      Xv1 = vsip_msubview_d( Xv0, ICOFF,        jj+1,              kk,   1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,  1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk,  -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, 1,
+                 HPL_rone, Mptr( A, iip1, jj+1, lda ),  1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 )
+      {
+         HPL_dcopy( kk, L1ptr,  1, Mptr( A, ICOFF, jj+1, lda ), 1 );
+         ii = iip1; iip1++; m = Mm1; Mm1--;
+      }
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanllN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanllT.c
new file mode 100644
index 000000000..a6e1b67bd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanllT.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanllT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanllT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanllT factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel A  using the Left-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1 = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column and initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+
+      L1ptr = Mptr( L1, jj+1, ICOFF, n0 ); kk = jj + 1 - ICOFF;
+      HPL_dtrsv( HplColumnMajor, HplUpper, HplTrans,   HplUnit, kk,
+                 Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, n0 );
+/*
+ * Scale  current column by its absolute value max entry  -  Update  and 
+ * find local  absolute value max  in next column (Only one pass through 
+ * cache for each next column).  This sequence of operations could bene-
+ * fit from a specialized  blocked implementation.
+ */ 
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk );
+      Xv1 = vsip_msubview_d( Xv0, jj+1,         ICOFF,             1,   kk );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,  1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk,  -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, n0,
+                 HPL_rone, Mptr( A, iip1, jj+1, lda ),  1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 )
+      {
+         HPL_dcopy( kk, L1ptr, n0, Mptr( A, ICOFF, jj+1, lda ), 1 );
+         ii = iip1; iip1++; m = Mm1; Mm1--;
+      }
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanllT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanrlN.c
new file mode 100644
index 000000000..0a3b9a542
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanrlN.c
@@ -0,0 +1,250 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanrlN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanrlN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanrlN factorizes  a panel of columns  that is a sub-array of a
+ * larger one-dimensional panel A using the Right-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in no-transpose form (i.e. just like the
+ * input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Acur, * Anxt;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Xv1, * Yv0, * Yv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, lda, m=M;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 >= 1 )
+   {
+      Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda );
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+/*
+ * Scale current column by its absolute value max entry  -  Update trai-
+ * ling sub-matrix and find local absolute value max in next column (On-
+ * ly one pass through cache for each current column).  This sequence of
+ * operations could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 );
+      HPL_daxpy( Mm1, -WORK[4+jj+1], Acur, 1, Anxt, 1 );
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+#ifdef HPL_CALL_VSIPL
+      if( Nm1 > 1 )
+      {
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2,
+                                Mm1, Nm1-1 );
+         Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj,
+                                Mm1, 1   );
+         Yv1 = vsip_msubview_d( Yv0, jj, jj+2, 1, Nm1-1 );
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+      }
+#else
+      if( Nm1 > 1 )
+         HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+                   WORK+4+jj+2, 1, Mptr( Anxt, 0, 1, lda ), lda );
+#endif
+/*
+ * Same thing as above but with worse data access on y (A += x * y^T)
+ *
+ *    if( Nm1 > 1 ) )
+ *       HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+ *                 Mptr( L1, jj, jj+2, n0 ), n0, Mptr( Anxt, 0, 1, lda ),
+ *                 lda );
+ */  
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Yv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanrlN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanrlT.c
new file mode 100644
index 000000000..68c1afc02
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanrlT.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanrlT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanrlT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanrlT factorizes  a panel of columns  that is a sub-array of a
+ * larger one-dimensional panel A using the Right-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Acur, * Anxt, * L1;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Xv1, * Yv0, * Yv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, lda, m=M,
+                              n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 >= 1 )
+   {
+      Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda );
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+/*
+ * Scale current column by its absolute value max entry  -  Update trai-
+ * ling sub-matrix and find local absolute value max in next column (On-
+ * ly one pass through cache for each current column).  This sequence of
+ * operations could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 );
+      HPL_daxpy( Mm1, -(*(Mptr( L1, jj+1, jj, n0 ))), Acur, 1, Anxt, 1 );
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+
+      if( Nm1 > 1 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2,
+                                Mm1, Nm1-1 );
+         Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj,
+                                Mm1, 1   );
+         Yv1 = vsip_msubview_d( Yv0, jj+2, jj, Nm1-1, 1 ); 
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_TRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+                   Mptr( L1, jj+2, jj, n0 ), 1, Mptr( Anxt, 0, 1, lda ),
+                   lda );
+#endif
+      }
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Yv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanrlT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpancrN.c
new file mode 100644
index 000000000..348d7ebe6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpancrN.c
@@ -0,0 +1,282 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpancrN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpancrN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpancrN HPL_pdrpancrN recursively  factorizes  a panel of columns  using  the
+ * recursive  Crout  variant of the usual one-dimensional algorithm. The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Local update - Factor current panel - Replicated update and solve
+ */
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jb );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jb );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, jj,
+                 -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr,
+                 0, jj, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ),
+                 lda );
+#endif
+      HPL_pdrpancrN( PANEL, m, jb, ioff, WORK );
+
+      if( n > 0 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+         (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+         Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0 );
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Lv0, ioff,  ICOFF,   jb, jj );
+         Av2 = vsip_msubview_d( Lv0, ioff,  ioff+jb, jb,  n );
+         Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff+jb, jj,  n );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Lv1 );
+         (void) vsip_mdestroy_d( Av2 );
+         (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+         (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+         (void) vsip_mdestroy_d( Lv0 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, jb, n,
+                    jj, -HPL_rone, Mptr( L1ptr, jj, 0, n0 ), n0,
+                    Mptr( L1ptr, 0, jj+jb, n0 ), n0, HPL_rone, 
+                    Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj,
+                    n0 ), n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+      }
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpancrN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpancrT.c
new file mode 100644
index 000000000..a1ecfac2c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpancrT.c
@@ -0,0 +1,282 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpancrT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpancrT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpancrT recursively  factorizes  a panel  of columns using  the
+ * recursive  Crout  variant  of  the  usual one-dimensional  algorithm.
+ * The lower triangular N0-by-N0  upper block of the panel  is stored in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Local update - Factor current panel - Replicated update and solve
+ */
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jb );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1,
+                   VSIP_MAT_TRANS, HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, jj,
+                 -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr,
+                 jj, 0, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ),
+                 lda );
+#endif
+      HPL_pdrpancrT( PANEL, m, jb, ioff, WORK );
+
+      if( n > 0 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+         (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+         Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 );
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Lv0, ioff+jb, ICOFF, n, jj );
+         Av2 = vsip_msubview_d( Lv0, ioff+jb, ioff,  n, jb );
+         Lv1 = vsip_msubview_d( Lv0, ICOFF,   ioff, jj, jb );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1,
+                      VSIP_MAT_NTRANS, HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Lv1 );
+         (void) vsip_mdestroy_d( Av2 );
+         (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+         (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+         (void) vsip_mdestroy_d( Lv0 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, n, jb,
+                    jj, -HPL_rone, Mptr( L1ptr, jj+jb, 0, n0 ), n0,
+                    Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone,
+                    Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj,
+                    n0 ), n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+      }
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpancrT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanllN.c
new file mode 100644
index 000000000..4dbc13b44
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanllN.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanllN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanllN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanllN recursively  factorizes  a panel  of columns using  the
+ * recursive Left-looking variant of the one-dimensional algorithm.  The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Replicated solve - Local update - Factor current panel
+ */
+      HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit,
+                 jj, jb, HPL_rone, L1ptr, n0, Mptr( L1ptr, 0, jj, n0 ),
+                 n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jj );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m,  jj );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb,
+                 jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda,
+                 Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj, lda ), lda );
+#endif
+      HPL_pdrpanllN( PANEL, m, jb, ioff, WORK );
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanllN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanllT.c
new file mode 100644
index 000000000..887caeb87
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanllT.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanllT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanllT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanllT recursively  factorizes  a panel of columns  using  the
+ * recursive Left-looking variant of the one-dimensional algorithm.  The
+ * lower  triangular  N0-by-N0  upper block  of  the panel  is stored in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Replicated solve - Local update - Factor current panel
+ */
+      HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                 HplUnit, jb, jj, HPL_rone, L1ptr, n0, Mptr( L1ptr,
+                 jj, 0, n0 ), n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jj );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jj );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb,  jj );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av1 );
+      (void) vsip_mdestroy_d( Av2 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb,
+                 jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda,
+                 Mptr( L1ptr, jj, 0, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj, lda ), lda );
+#endif
+      HPL_pdrpanllT( PANEL, m, jb, ioff, WORK );
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanllT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanrlN.c
new file mode 100644
index 000000000..22f105cf4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanrlN.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanrlN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanrlN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanrlN recursively  factorizes  a panel of columns  using  the
+ * recursive Right-looking variant of the one-dimensional algorithm. The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+ 
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Factor current panel - Replicated solve - Local update
+ */
+      HPL_pdrpanrlN( PANEL, m, jb, ioff, WORK );
+      HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                 HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, n0 ),
+                 n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+      if( curr != 0 ) { ii += jb; m -= jb; }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb,
+                                m, n );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,    m, jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m,  n );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, n,
+                 jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda,
+                 Mptr( L1ptr, jj, jj+jb, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj+jb, lda ), lda );
+#endif
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanrlN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanrlT.c
new file mode 100644
index 000000000..a77301b9b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanrlT.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanrlT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanrlT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanrlT recursively  factorizes  a panel of columns  using  the
+ * recursive Right-looking variant of the one-dimensional algorithm. The
+ * lower  triangular  N0-by-N0  upper  block of the panel  is stored  in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+ 
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Factor current panel - Replicated solve - Local update
+ */
+      HPL_pdrpanrlT( PANEL, m, jb, ioff, WORK );
+      HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                 HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, n0 ),
+                 n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+      if( curr != 0 ) { ii += jb; m -= jb; }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb,
+                                m, N );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,    m, jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m,  n );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, n,
+                 jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda,
+                 Mptr( L1ptr, jj+jb, jj, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj+jb, lda ), lda );
+#endif
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanrlT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_equil.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_equil.c
new file mode 100644
index 000000000..b917a6525
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_equil.c
@@ -0,0 +1,253 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_equil
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_TRANS             TRANS,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   int *                            IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1,
+   int *                            IWORK
+)
+#else
+void HPL_equil
+( PBCST, IFLAG, PANEL, TRANS, N, U, LDU, IPLEN, IPMAP, IPMAPM1, IWORK )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_TRANS             TRANS;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   int *                            IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_equil equilibrates  the  local  pieces  of U, so that on exit to
+ * this function, pieces of U contained in every process row are of the
+ * same size. This phase makes the rolling phase optimal.  In addition,
+ * this  function probes  for  the  column panel L and forwards it when
+ * possible.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be equilibrated) information.
+ *
+ * TRANS   (global input)                const enum HPL_TRANS
+ *         On entry, TRANS specifies whether  U  is stored in transposed
+ *         or non-transposed form.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of rows or columns of  U. N
+ *         must be at least 0.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+ *         non-transposed form, and MAX(1,N) otherwise.
+ *
+ * IPLEN   (global input)                int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension NPROW+1.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, ip, ipU, ipcur, iprow, iptgt, lastrow,
+                              left, npm1, nprow, ll, llU, llcur, lltgt,
+                              right, slen, smax, smin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( npm1 = ( nprow = PANEL->grid->nprow ) - 1 ) <= 1 ) return;
+/*
+ * If the current distribution of the pieces of U is already optimal for
+ * the rolling phase, then return imediately.  The  optimal distribution
+ * is such that ip processes have smax items and the remaining processes
+ * only have smin items. Another way to check this is to verify that all
+ * differences IPLEN[i+1] - IPLEN[i] are either smin or smax.
+ */
+   smax = ( ( slen = IPLEN[nprow] ) + npm1 ) / nprow;
+   ip   = slen - nprow * ( smin = slen / nprow );
+
+   iprow = 0;
+   do
+   {
+      ll = IPLEN[iprow+1] - IPLEN[iprow]; iprow++;
+   } while( ( iprow < nprow ) && ( ( ll == smin ) || ( ll == smax ) ) );
+
+   if( iprow == nprow ) return;
+/*
+ * Now,  we are sure  the distribution of the pieces of U is not optimal
+ * with respect to the rolling phase,  thus  perform  equilibration.  Go
+ * through the list of processes:  Processes  that have rows that do not
+ * belong to them  with respect to the optimal mapping spread them  in a
+ * logarithmic fashion. To simplify a little bit the implementation, and
+ * mainly the packing, a source process row spreads its data to its left
+ * first, and then to its right.
+ */
+   IWORK[nprow] = slen;
+
+   for( iprow = 0; iprow < nprow; iprow++ )
+   {
+      llU = IPLEN[iprow+1] - ( ipU = IPLEN[iprow] );
+      if( iprow < ip ) { lltgt = smax; iptgt = iprow * smax;      }
+      else             { lltgt = smin; iptgt = iprow * smin + ip; }
+
+      left = ( ipU < iptgt ); right = ( iptgt + lltgt < ipU + llU );
+/*
+ * If I have something to spread to either the left or the right
+ */
+      if( ( llU > 0 ) && ( left || right ) )
+      {        /* Figure out how much every other process should have */
+
+         ipcur = ipU; llcur = llU;
+
+         for( i = 0; i < nprow; i++ )
+         {
+            if( i < ip ) { lltgt = smax; iptgt = i * smax;      }
+            else         { lltgt = smin; iptgt = i * smin + ip; }
+            lastrow = iptgt + lltgt - 1;
+
+            if( ( lastrow >= ipcur ) && ( llcur > 0 ) )
+            { ll = lastrow - ipcur + 1; ll = Mmin( ll, llcur ); llcur -= ll; }
+            else { ll = 0; }
+
+            IWORK[i] = ipcur; ipcur += ll; IWORK[i+1] = ipcur;
+         }
+/*
+ * Equilibration phase
+ */
+         if( TRANS == HplNoTrans )
+         {
+            if( left  )
+            {
+               HPL_spreadN( PBCST, IFLAG, PANEL, HplLeft,  N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+
+            if( right )
+            {
+               HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+         }
+         else
+         {
+            if( left  )
+            {
+               HPL_spreadT( PBCST, IFLAG, PANEL, HplLeft,  N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+
+            if( right )
+            {
+               HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+         }
+      }
+   }
+/*
+ * Finally update  IPLEN  with the indexes corresponding to the new dis-
+ * tribution of U - IPLEN[nprow] remained unchanged.
+ */
+   for( i = 0; i < nprow; i++ ) IPLEN[i] = ( i < ip ? i*smax : i*smin + ip );
+/*
+ * End of HPL_equil
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_logsort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_logsort.c
new file mode 100644
index 000000000..0715159bd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_logsort.c
@@ -0,0 +1,185 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_logsort
+(
+   const int                        NPROCS,
+   const int                        ICURROC,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1
+)
+#else
+void HPL_logsort
+( NPROCS, ICURROC, IPLEN, IPMAP, IPMAPM1 )
+   const int                        NPROCS;
+   const int                        ICURROC;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_logsort computes an array  IPMAP  and  its inverse  IPMAPM1  that
+ * contain  the logarithmic sorted processes id with repect to the local
+ * number of rows of  U  that they own. This is necessary to ensure that
+ * the logarithmic spreading of U is optimal in terms of number of steps
+ * and communication volume as well.  In other words,  the larget pieces
+ * of U will be sent a minimal number of times.
+ *
+ * Arguments
+ * =========
+ *
+ * NPROCS  (global input)                const int
+ *         On entry, NPROCS  specifies the number of process rows in the
+ *         process grid. NPROCS is at least one.
+ *
+ * ICURROC (global input)                const int
+ *         On entry, ICURROC is the source process row.
+ *
+ * IPLEN   (global input/output)         int *
+ *         On entry, IPLEN is an array of dimension NPROCS+1,  such that
+ *         IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+ *         that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+ *         rows of U  in the processes before process IPMAP[i] after the
+ *         sort,  with  the convention that  IPLEN[NPROCS] is  the total
+ *         number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+ *         IPLEN[i] is  the  number of rows of A that should be moved to
+ *         the process IPMAP[i].  IPLEN  is such that the number of rows
+ *         of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+ *         remaining  entries  of  this  array  are  sorted  so that the
+ *         quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myroc] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROCS)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dist, i, ip, iplen_i, iplen_j, itmp, j, k;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Compute the  logarithmic distance between process j and process 0, as
+ * well as the maximum logarithmic distance. IPMAPM1 is workarray here.
+ */
+   for( j = 0, dist = 0; j < NPROCS; j++ )
+   {
+      IPMAP[j] = MModAdd( j, ICURROC, NPROCS ); ip = j; itmp = 0;
+      do { if( ip & 1 ) itmp++; ip >>= 1; } while ( ip );
+      IPMAPM1[j] = itmp; if( itmp > dist ) dist = itmp;
+   }
+/*
+ * Shift IPLEN[1..NPROCS]  of ICURROC places,  so that  IPLEN[1]  is now
+ * what used to be IPLEN[ICURROC+1]. Initialize IPMAP,  so that IPMAP[0]
+ * is ICURROC.
+ */
+   for( j = 0; j < ICURROC; j++ )
+   {
+      for( i = 2, itmp = IPLEN[1]; i <= NPROCS; i++ ) IPLEN[i-1] = IPLEN[i];
+      IPLEN[NPROCS] = itmp;
+   }
+/*
+ * logarithmic sort
+ */
+   for( k = 1; k <= dist; k++ )
+   {
+      for( j = 1; j < NPROCS; j++ )
+      {
+         if( IPMAPM1[j] == k )
+         {
+            for( i = 2; i < NPROCS; i++ )
+            {
+               if( k < IPMAPM1[i] )
+               {
+                  iplen_i = IPLEN[i+1]; iplen_j = IPLEN[j+1];
+
+                  if( iplen_j < iplen_i )
+                  {
+                     IPLEN[j+1] = iplen_i;  IPLEN[i+1] = iplen_j;
+                     itmp       = IPMAP[j]; IPMAP[j]   = IPMAP[i];
+                     IPMAP[i]   = itmp;
+                  }
+               }
+            }
+         }
+      }
+   }
+/*
+ * Compute IPLEN and IPMAPM1 (the inverse of IPMAP)
+ */
+   IPLEN[0] = 0;
+
+   for( i = 0; i < NPROCS; i++ )
+   {
+      IPMAPM1[ IPMAP[i] ] = i;
+      IPLEN[i+1]         += IPLEN[i];
+   }
+/*
+ * End of HPL_logsort
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesv.c
new file mode 100644
index 000000000..ced74269e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesv.c
@@ -0,0 +1,116 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesv
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesv
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesv factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with  or  without look-ahead.  The  lower  triangular  factor is left
+ * unpivoted and the pivots are not returned. The right hand side is the
+ * N+1 column of the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( A->n <= 0 ) return;
+
+   A->info = 0;
+
+   if( ( ALGO->depth == 0 ) || ( GRID->npcol == 1 ) )
+   {
+      HPL_pdgesv0(  GRID, ALGO, A );
+   }
+   else
+   {
+      HPL_pdgesvK2( GRID, ALGO, A );
+   }
+/*
+ * Solve upper triangular system
+ */
+   if( A->info == 0 ) HPL_pdtrsv( GRID, A );
+/*
+ * End of HPL_pdgesv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesv0.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesv0.c
new file mode 100644
index 000000000..d79b6fa55
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesv0.c
@@ -0,0 +1,167 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesv0
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesv0
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesv0 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * without look-ahead. The lower triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate;
+   int                        N, j, jb, n, nb, tag=MSGID_BEGIN_FACT,
+                              test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( N = A->n ) <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+ 
+   HPL_pdupdate = ALGO->upfun; nb = A->nb;
+/*
+ * Allocate a panel list of length 1 - Allocate panel[0] resources
+ */
+   panel = (HPL_T_panel **)malloc( sizeof( HPL_T_panel * ) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesv0", "Memory allocation failed" ); }
+
+   HPL_pdpanel_new( GRID, ALGO, N, N+1, Mmin( N, nb ), A, 0, 0, tag,
+                    &panel[0] );
+/*
+ * Loop over the columns of A
+ */
+   for( j = 0; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && GRID->mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Release panel resources - re-initialize panel data structure
+ */
+      (void) HPL_pdpanel_free( panel[0] );
+      HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[0] );
+/*
+ * Factor and broadcast current panel - update
+ */
+      HPL_pdfact(               panel[0] );
+      (void) HPL_binit(         panel[0] );
+      do
+      { (void) HPL_bcast(       panel[0], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(         panel[0] );
+      HPL_pdupdate( NULL, NULL, panel[0], -1 );
+/*
+ * Update message id for next factorization
+ */
+      tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Release panel resources and panel list
+ */
+   (void) HPL_pdpanel_disp( &panel[0] );
+
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesv0
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesvK1.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesvK1.c
new file mode 100644
index 000000000..ff1958cfc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesvK1.c
@@ -0,0 +1,222 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+#ifdef STDC_HEADERS
+void HPL_pdgesvK1
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesvK1
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesvK1 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with look-ahead.  The  lower  triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate;
+   int                        N, depth, icurcol=0, j, jb, jj=0, jstart,
+                              k, mycol, n, nb, nn, npcol, nq,
+                              tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   mycol = GRID->mycol; npcol        = GRID->npcol;
+   depth = ALGO->depth; HPL_pdupdate = ALGO->upfun;
+   N     = A->n;        nb           = A->nb; 
+
+   if( N <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+
+/*
+ * Allocate a panel list of length depth + 1 (depth >= 1)
+ */
+   panel = (HPL_T_panel **)malloc( (size_t)(depth+1)*sizeof( HPL_T_panel *) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesvK1", "Memory allocation failed" ); }
+/*
+ * Create and initialize the first depth panels
+ */
+   nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0;
+
+   for( k = 0; k < depth; k++ )
+   {
+      jb = Mmin( nn, nb );
+      HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart,
+                       tag, &panel[k] );
+      nn -= jb; jstart += jb;
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Initialize the lookahead - Factor jstart columns: panel[0..depth-1]
+ */
+   for( k = 0, j = 0; k < depth; k++ )
+   {
+      jb = jstart - j; jb = Mmin( jb, nb ); j += jb;
+/*
+ * Factor and broadcast k-th panel - use long topology for those
+ */
+      HPL_pdfact(         panel[k] );
+      (void) HPL_binit(   panel[k] );
+      do
+      { (void) HPL_bcast( panel[k], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(   panel[k] );
+/*
+ * Partial update of the depth-1-k panels in front of me
+ */
+      if( k < depth - 1 )
+      {
+         nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol );
+         HPL_pdupdate( NULL, NULL, panel[k], nn );
+      }
+   }
+/*
+ * Main loop over the remaining columns of A
+ */
+   for( j = jstart; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Allocate current panel resources - Finish latest update - Factor and
+ * broadcast current panel
+ */
+      HPL_pdpanel_new( GRID, ALGO, n, n+1, jb, A, j, j, tag, &panel[depth] );
+ 
+      if( mycol == icurcol )
+      {
+         nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol );
+         for( k = 0; k < depth; k++ )   /* partial updates 0..depth-1 */
+            HPL_pdupdate( NULL, NULL, panel[k], nn );
+         HPL_pdfact(    panel[depth] );       /* factor current panel */
+      }
+      else { nn = 0; }
+          /* Finish the latest update and broadcast the current panel */
+      (void) HPL_binit( panel[depth] );
+      HPL_pdupdate(     panel[depth], &test, panel[0], nq-nn );
+      (void) HPL_bwait( panel[depth] );
+/*
+ * Release latest panel resources - circular  of the panel pointers
+ * Go to the next process row and column -  update  the message ids  for
+ * broadcast
+ */
+      (void) HPL_pdpanel_disp( &panel[0] );
+      for( k = 0; k < depth; k++ ) panel[k] = panel[k+1];
+ 
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Clean-up: Finish updates - release panels and panel list
+ */
+   nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol );
+   for( k = 0; k < depth; k++ )
+   {
+      HPL_pdupdate( NULL, NULL, panel[k], nn );
+      (void) HPL_pdpanel_disp( &panel[k] );
+   }
+ 
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesvK1
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesvK2.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesvK2.c
new file mode 100644
index 000000000..dec506ab9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesvK2.c
@@ -0,0 +1,231 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesvK2
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesvK2
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesvK2 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with look-ahead.  The  lower  triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * p, * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate; 
+   int                        N, depth, icurcol=0, j, jb, jj=0, jstart,
+                              k, mycol, n, nb, nn, npcol, nq,
+                              tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   mycol = GRID->mycol; npcol        = GRID->npcol;
+   depth = ALGO->depth; HPL_pdupdate = ALGO->upfun;
+   N     = A->n;        nb           = A->nb;
+
+   if( N <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+
+/*
+ * Allocate a panel list of length depth + 1 (depth >= 1)
+ */
+   panel = (HPL_T_panel **)malloc( (size_t)(depth+1) * sizeof( HPL_T_panel *) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesvK2", "Memory allocation failed" ); }
+/*
+ * Create and initialize the first depth panels
+ */
+   nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0;
+
+   for( k = 0; k < depth; k++ )
+   {
+      jb = Mmin( nn, nb );
+      HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart,
+                       tag, &panel[k] );
+      nn -= jb; jstart += jb;
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Create last depth+1 panel
+ */
+   HPL_pdpanel_new( GRID, ALGO, nn, nn+1, Mmin( nn, nb ), A, jstart,
+                    jstart, tag, &panel[depth] );
+   tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+/*
+ * Initialize the lookahead - Factor jstart columns: panel[0..depth-1]
+ */
+   for( k = 0, j = 0; k < depth; k++ )
+   {
+      jb = jstart - j; jb = Mmin( jb, nb ); j += jb;
+/*
+ * Factor and broadcast k-th panel
+ */
+      HPL_pdfact(         panel[k] );
+      (void) HPL_binit(   panel[k] );
+      do
+      { (void) HPL_bcast( panel[k], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(   panel[k] );
+/*
+ * Partial update of the depth-k-1 panels in front of me
+ */
+      if( k < depth - 1 )
+      {
+         nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol );
+         HPL_pdupdate( NULL, NULL, panel[k], nn );
+      }
+   }
+/*
+ * Main loop over the remaining columns of A
+ */
+   for( j = jstart; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Initialize current panel - Finish latest update, Factor and broadcast
+ * current panel
+ */
+      (void) HPL_pdpanel_free( panel[depth] );
+      HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[depth] );
+
+      if( mycol == icurcol )
+      {
+         nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol );
+         for( k = 0; k < depth; k++ )   /* partial updates 0..depth-1 */
+            (void) HPL_pdupdate( NULL, NULL, panel[k], nn );
+         HPL_pdfact(       panel[depth] );    /* factor current panel */
+      }
+      else { nn = 0; }
+          /* Finish the latest update and broadcast the current panel */
+      (void) HPL_binit( panel[depth] );
+      HPL_pdupdate( panel[depth], &test, panel[0], nq-nn );
+      (void) HPL_bwait( panel[depth] );
+/*
+ * Circular  of the panel pointers:
+ * xtmp = x[0]; for( k=0; k < depth; k++ ) x[k] = x[k+1]; x[d] = xtmp;
+ *
+ * Go to next process row and column - update the message ids for broadcast
+ */
+      p = panel[0]; for( k = 0; k < depth; k++ ) panel[k] = panel[k+1];
+      panel[depth] = p;
+
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Clean-up: Finish updates - release panels and panel list
+ */
+   nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol );
+   for( k = 0; k < depth; k++ )
+   {
+      (void) HPL_pdupdate( NULL, NULL, panel[k], nn );
+      (void) HPL_pdpanel_disp(  &panel[k] );
+   }
+   (void) HPL_pdpanel_disp( &panel[depth] );
+
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesvK2
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp00N.c
new file mode 100644
index 000000000..b4433e1be
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp00N.c
@@ -0,0 +1,432 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp00N
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp00N
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp00N applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * Bi-directional  exchange  is used to perform the  swap :: broadcast of
+ * the row  panel U at once, resulting in a lower number of messages than
+ * usual as well as a lower communication volume. With P process rows and
+ * assuming  bi-directional links,  the running time of this function can
+ * be approximated by:
+ *  
+ *    log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  Mono
+ * directional links will double this communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be broadcast and swapped) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                  comm;
+   HPL_T_grid                * grid;
+   double                    * A, * U, * W;
+   void                       * vptr = NULL;
+   int                       * ipID, * lindxA, * lindxAU, * llen,
+                             * llen_sv;
+   unsigned int              ip2, ip2_=1, ipdist, ipow=1, mask=1,
+                             mydist, mydis_;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, Np2, align,
+                             hdim, i, icurrow, *iflag, ipA, ipW, *ipl,
+                             iprow, jb, k, lda, ldW, myrow, n, nprow,
+                             partner, root, size_, usize;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+   n = Mmin( NN, PANEL->n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   grid  = PANEL->grid;    nprow   = grid->nprow; myrow = grid->myrow;
+   comm  = grid->col_comm; ip2     = (unsigned int)grid->row_ip2;
+   hdim  = grid->row_hdim; align   = PANEL->algo->align;
+   A     = PANEL->A;       U       = PANEL->U;    iflag = PANEL->IWORK;
+   lda   = PANEL->lda;     icurrow = PANEL->prow; usize = jb * n;
+   ldW   = n + 1;
+/*
+ * Allocate space for temporary W (ldW * jb)
+ */
+   vptr = (void*)malloc( 
+      ((size_t)(align) + ((size_t)(jb) * (size_t)(ldW))) * sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdlaswp00N", "Memory allocation failed" ); }
+
+   W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) );
+/*
+ * Construct ipID and its local counter parts lindxA, lindxAU -  llen is
+ * the number of rows/columns that I have in workspace and that I should
+ * send.  Compute  lindx_, ipA, llen if it has not already been done for
+ * this panel;
+ */
+   k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1;
+   lindxA  = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k;
+   llen    = lindxAU + k; llen_sv = llen + nprow;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+   else if( *iflag == 1 ) /* HPL_pdlaswp01N called before: reuse ipID */
+   {
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+/*
+ * Copy the llen_sv into llen - Reset ipA to its correct value
+ */
+   ipA = llen_sv[myrow];
+   for( i = 0; i < nprow; i++ ) { llen[i]  = llen_sv[i]; }
+/*
+ * For i in [0..2*jb),  lindxA[i] is the offset in A of a row that ulti-
+ * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ).  In icurrow,
+ * we directly pack into U, otherwise we pack into workspace. The  first
+ * entry of each column packed in workspace is in fact the row or column
+ * offset in U where it should go to.
+ */
+   if( myrow == icurrow ) 
+   {
+      HPL_dlaswp01N( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+   else
+   {
+      HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU );
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * Algorithm for bi-directional data exchange:
+ *
+ * As long as I have not talked to a process that  already  had the data
+ * from icurrow,  I will be sending the workspace,  otherwise  I will be
+ * sending U. Note that the columns in workspace contain the local index
+ * in U they should go to.
+ *
+ * If I am receiving from a process that  has the data from  icurrow,  I
+ * will be receiving in  U, copy the data of  U  that stays into  A, and
+ * then the columns I have in workspace into U; otherwise  I will be re-
+ * ceiving in the remaining workspace.  If I am one  of  those processes 
+ * that already has the data from icurrow, I will be immediately copying
+ * the data I have in my workspace into U.
+ *
+ * When I receive U, some of U should be copied in my piece of A  before
+ * I can copy the rows I have in my workspace into  U.  This information
+ * is kept in the lists  lindx_:  the row lindxAU[i] should be copied in
+ * the row  lindxA[i] of my piece of  A, just as in the reversed initial
+ * packing operation. Those rows are thus the first ones in the work ar-
+ * ray.  After  this  operation  has  been  performed,  I will not  need
+ * those lindx arrays,  and  I  will  always be sending a buffer of size
+ * jb x n, or n x jb, that is, U.
+ *
+ * At  every  step  of  the algorithm, it is necesary to update the list 
+ * llen,  so that I can figure out how large the next messages I will be
+ * sending/receiving are.  It is  obvious when I am sending U. It is not
+ * otherwise.
+ *
+ * We  choose  icurrow  to be the source of the bi-directional exchange.
+ * This allows the processes in the non-power 2 part to receive U at the
+ * first exchange,  and  then  broadcast internally this U so that those 
+ * processes can grab their piece of A.
+ */
+   if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; }
+   ipW    = ipA;
+   Np2    = ( ( size_ = nprow - ip2 ) != 0 );
+   mydist = (unsigned int)MModSub( myrow, icurrow, nprow );
+/*
+ * bi-directional exchange:   If nprow is not a power of 2,  proc[i-ip2]
+ * receives local data from proc[i] for all i in  [ip2..nprow);  icurrow
+ * is the source, these last process indexes are relative to icurrow.
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+
+      if( mydist == 0 )  /* I am the current row: I send U and recv W */
+      {
+         (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW,
+                          Cmsgid, partner, comm );
+         if( llen[partner] > 0 )
+            HPL_dlaswp03N( llen[partner], n, U, LDU, W, W+1, ldW );
+      }
+      else if( mydist == ip2 )
+      {                      /* I recv U for later Bcast, I send my W */
+         (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                          Cmsgid, partner, comm );
+      }
+      else               /* None of us is icurrow, we exchange our Ws */
+      {
+         if( ( mydist & ip2 ) != 0 ) 
+         {
+            (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm );
+         }
+         else
+         {
+            (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW,
+                             partner, Cmsgid, comm );
+            if( llen[partner] > 0 ) ipW += llen[partner];
+         }
+      }
+   }
+/*
+ * Update llen
+ */
+   for( i = 1; i < size_; i++ )
+   {
+      iprow   = MModAdd( icurrow, i,          nprow );
+      partner = MModAdd( iprow,   (int)(ip2), nprow );
+      llen[ iprow ] += llen[ partner ];
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * power of 2 part of the processes collection:  only processes [0..ip2)
+ * are working;  some of them  (mydist >> (k+1) == 0) either send or re-
+ * ceive U.  At every step k, k is in [0 .. hdim),  of the algorithm,  a
+ * process pair that exchanges  U  is such that  (mydist >> (k+1) == 0).
+ * Among  those  processes,  the  ones  that are sending U are such that 
+ * mydist >> k == 0.
+ */
+   if( mydist < ip2 )
+   {
+      k = 0;
+
+      while( k < hdim )
+      {
+         partner = (int)(mydist ^ ipow);
+         partner = MModAdd( icurrow, partner, nprow );
+/*
+ * Exchange and combine the local results - If I receive U,  then I must
+ * copy from U the rows that belong to my piece of A, and then update  U
+ * by  copying in it the rows I have accumulated in W.  Otherwise, I re-
+ * ceive W.  In this later case, and I have U, I shall update my copy of
+ * U by copying in it the rows I have accumulated in  W.  If  I  did not
+ * have U before, I simply need to update my pointer in W for later use.
+ */
+         if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 )
+         {
+            if( ( mydist >> (unsigned int)(k) ) == 0 )
+            {
+               (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW,
+                                ldW ), llen[partner]*ldW, Cmsgid,
+                                partner, comm );
+               HPL_dlaswp03N( llen[partner], n, U, LDU, Mptr( W, 0, ipW,
+                              ldW ), Mptr( W, 1, ipW, ldW ), ldW );
+               ipW += llen[partner];
+            }
+            else
+            {
+               (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                                Cmsgid, partner, comm );
+               HPL_dlaswp04N( ipA, llen[myrow], n, U, LDU, A, lda, W,
+                              W+1, ldW, lindxA, lindxAU );
+            }
+         }
+         else
+         {
+            (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0,
+                             ipW, ldW ), llen[partner]*ldW, Cmsgid,
+                             partner, comm );
+            ipW += llen[partner];
+         }
+/*
+ * Update llen - Go to next process pairs
+ */
+         iprow = icurrow; ipdist = 0;
+         do
+         {
+            if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist )
+            {
+               partner = MModAdd( icurrow, partner, nprow );
+               llen[iprow]  += llen[partner];
+               llen[partner] = llen[iprow];
+            }
+            iprow = MModAdd( iprow, 1, nprow ); ipdist++;
+
+         } while( ipdist < ip2 );
+
+         ipow <<= 1; k++;
+/*
+ * Probe for column panel - forward it when available 
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+      }
+   }
+   else
+   {
+/*
+ * non power of 2 part of the process collection:  proc[ip2] broadcast U
+ * to procs[ip2..nprow) (relatively to icurrow).
+ */
+      if( size_ > 1 )
+      {
+         k = size_ - 1;
+         while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+         root   = MModAdd( icurrow, (int)(ip2), nprow );
+         mydis_ = (unsigned int)MModSub( myrow,  root, nprow );
+
+         do
+         {
+            mask ^= ip2_;
+            if( ( mydis_ & mask ) == 0 )
+            {
+               partner = (int)(mydis_ ^ ip2_);
+               if( ( mydis_ & ip2_ ) != 0 )
+               {
+                  (void) HPL_recv( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+
+               }
+               else if( partner < size_ )
+               {
+                  (void) HPL_send( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+               }
+            }
+            ip2_ >>= 1;
+/*
+ * Probe for column panel - forward it when available 
+ */
+            if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+         } while( ip2_ > 0 );
+      }
+/*
+ * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece
+ * of A.
+ */
+      HPL_dlaswp05N( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+/*
+ * If  nprow  is not a power of 2,  proc[i-ip2]  sends  global result to
+ * proc[i] for all i in [ip2..nprow);
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+      if( ( mydist & ip2 ) != 0 )
+      { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); }
+      else
+      { (void) HPL_send( U, usize, partner, Cmsgid, comm ); }
+   }
+
+   if( vptr ) free( vptr );
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp00N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp00T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp00T.c
new file mode 100644
index 000000000..7a9764c09
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp00T.c
@@ -0,0 +1,433 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp00T
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp00T
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp00T applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * Bi-directional  exchange  is used to perform the  swap :: broadcast of
+ * the row  panel U at once, resulting in a lower number of messages than
+ * usual as well as a lower communication volume. With P process rows and
+ * assuming  bi-directional links,  the running time of this function can
+ * be approximated by:
+ *  
+ *    log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  Mono
+ * directional links will double this communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be broadcast and swapped) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                  comm;
+   HPL_T_grid                * grid;
+   double                    * A, * U, * W;
+   void                       * vptr = NULL;
+   int                       * ipID, * lindxA, * lindxAU, * llen,
+                             * llen_sv;
+   unsigned int              ip2, ip2_=1, ipdist, ipow=1, mask=1,
+                             mydist, mydis_;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, Np2, align,
+                             hdim, i, icurrow, *iflag, ipA, ipW, *ipl,
+                             iprow, jb, k, lda, ldW, myrow, n, nprow,
+                             partner, root, size_, usize;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+   n = Mmin( NN, PANEL->n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   grid  = PANEL->grid;    nprow   = grid->nprow; myrow = grid->myrow;
+   comm  = grid->col_comm; ip2     = (unsigned int)grid->row_ip2;
+   hdim  = grid->row_hdim; align   = PANEL->algo->align;
+   A     = PANEL->A;       U       = PANEL->U;    iflag = PANEL->IWORK;
+   lda   = PANEL->lda;     icurrow = PANEL->prow; usize = jb * n;
+   ldW   = n + 1;
+/*
+ * Allocate space for temporary W (ldW * jb)
+ */
+   vptr = (void*)malloc( ( (size_t)(align) + 
+                           ((size_t)(jb) * (size_t)(ldW))) * 
+                           sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdlaswp00T", "Memory allocation failed" ); }
+
+   W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) );
+/*
+ * Construct ipID and its local counter parts lindxA, lindxAU -  llen is
+ * the number of rows/columns that I have in workspace and that I should
+ * send.  Compute  lindx_, ipA, llen if it has not already been done for
+ * this panel;
+ */
+   k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1;
+   lindxA  = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k;
+   llen    = lindxAU + k; llen_sv = llen + nprow;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+   else if( *iflag == 1 ) /* HPL_pdlaswp01T called before: reuse ipID */
+   {
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+/*
+ * Copy the llen_sv into llen - Reset ipA to its correct value
+ */
+   ipA = llen_sv[myrow];
+   for( i = 0; i < nprow; i++ ) { llen[i]  = llen_sv[i]; }
+/*
+ * For i in [0..2*jb),  lindxA[i] is the offset in A of a row that ulti-
+ * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ).  In icurrow,
+ * we directly pack into U, otherwise we pack into workspace. The  first
+ * entry of each column packed in workspace is in fact the row or column
+ * offset in U where it should go to.
+ */
+   if( myrow == icurrow ) 
+   {
+      HPL_dlaswp01T( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+   else
+   {
+      HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU );
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * Algorithm for bi-directional data exchange:
+ *
+ * As long as I have not talked to a process that  already  had the data
+ * from icurrow,  I will be sending the workspace,  otherwise  I will be
+ * sending U. Note that the columns in workspace contain the local index
+ * in U they should go to.
+ *
+ * If I am receiving from a process that  has the data from  icurrow,  I
+ * will be receiving in  U, copy the data of  U  that stays into  A, and
+ * then the columns I have in workspace into U; otherwise  I will be re-
+ * ceiving in the remaining workspace.  If I am one  of  those processes 
+ * that already has the data from icurrow, I will be immediately copying
+ * the data I have in my workspace into U.
+ *
+ * When I receive U, some of U should be copied in my piece of A  before
+ * I can copy the rows I have in my workspace into  U.  This information
+ * is kept in the lists  lindx_:  the row lindxAU[i] should be copied in
+ * the row  lindxA[i] of my piece of  A, just as in the reversed initial
+ * packing operation. Those rows are thus the first ones in the work ar-
+ * ray.  After  this  operation  has  been  performed,  I will not  need
+ * those lindx arrays,  and  I  will  always be sending a buffer of size
+ * jb x n, or n x jb, that is, U.
+ *
+ * At  every  step  of  the algorithm, it is necesary to update the list 
+ * llen,  so that I can figure out how large the next messages I will be
+ * sending/receiving are.  It is  obvious when I am sending U. It is not
+ * otherwise.
+ *
+ * We  choose  icurrow  to be the source of the bi-directional exchange.
+ * This allows the processes in the non-power 2 part to receive U at the
+ * first exchange,  and  then  broadcast internally this U so that those 
+ * processes can grab their piece of A.
+ */
+   if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; }
+   ipW    = ipA;
+   Np2    = ( ( size_ = nprow - ip2 ) != 0 );
+   mydist = (unsigned int)MModSub( myrow, icurrow, nprow );
+/*
+ * bi-directional exchange:   If nprow is not a power of 2,  proc[i-ip2]
+ * receives local data from proc[i] for all i in  [ip2..nprow);  icurrow
+ * is the source, these last process indexes are relative to icurrow.
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+
+      if( mydist == 0 )  /* I am the current row: I send U and recv W */
+      {
+         (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW,
+                          Cmsgid, partner, comm );
+         if( llen[partner] > 0 )
+            HPL_dlaswp03T( llen[partner], n, U, LDU, W, W+1, ldW );
+      }
+      else if( mydist == ip2 )
+      {                      /* I recv U for later Bcast, I send my W */
+         (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                          Cmsgid, partner, comm );
+      }
+      else               /* None of us is icurrow, we exchange our Ws */
+      {
+         if( ( mydist & ip2 ) != 0 ) 
+         {
+            (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm );
+         }
+         else
+         {
+            (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW,
+                             partner, Cmsgid, comm );
+            if( llen[partner] > 0 ) ipW += llen[partner];
+         }
+      }
+   }
+/*
+ * Update llen
+ */
+   for( i = 1; i < size_; i++ )
+   {
+      iprow   = MModAdd( icurrow, i,          nprow );
+      partner = MModAdd( iprow,   (int)(ip2), nprow );
+      llen[ iprow ] += llen[ partner ];
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * power of 2 part of the processes collection:  only processes [0..ip2)
+ * are working;  some of them  (mydist >> (k+1) == 0) either send or re-
+ * ceive U.  At every step k, k is in [0 .. hdim),  of the algorithm,  a
+ * process pair that exchanges  U  is such that  (mydist >> (k+1) == 0).
+ * Among  those  processes,  the  ones  that are sending U are such that 
+ * mydist >> k == 0.
+ */
+   if( mydist < ip2 )
+   {
+      k = 0;
+
+      while( k < hdim )
+      {
+         partner = (int)(mydist ^ ipow);
+         partner = MModAdd( icurrow, partner, nprow );
+/*
+ * Exchange and combine the local results - If I receive U,  then I must
+ * copy from U the rows that belong to my piece of A, and then update  U
+ * by  copying in it the rows I have accumulated in W.  Otherwise, I re-
+ * ceive W.  In this later case, and I have U, I shall update my copy of
+ * U by copying in it the rows I have accumulated in  W.  If  I  did not
+ * have U before, I simply need to update my pointer in W for later use.
+ */
+         if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 )
+         {
+            if( ( mydist >> (unsigned int)(k) ) == 0 )
+            {
+               (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW,
+                                ldW ), llen[partner]*ldW, Cmsgid,
+                                partner, comm );
+               HPL_dlaswp03T( llen[partner], n, U, LDU, Mptr( W, 0, ipW,
+                              ldW ), Mptr( W, 1, ipW, ldW ), ldW );
+               ipW += llen[partner];
+            }
+            else
+            {
+               (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                                Cmsgid, partner, comm );
+               HPL_dlaswp04T( ipA, llen[myrow], n, U, LDU, A, lda, W,
+                              W+1, ldW, lindxA, lindxAU );
+            }
+         }
+         else
+         {
+            (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0,
+                             ipW, ldW ), llen[partner]*ldW, Cmsgid,
+                             partner, comm );
+            ipW += llen[partner];
+         }
+/*
+ * Update llen - Go to next process pairs
+ */
+         iprow = icurrow; ipdist = 0;
+         do
+         {
+            if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist )
+            {
+               partner = MModAdd( icurrow, partner, nprow );
+               llen[iprow]  += llen[partner];
+               llen[partner] = llen[iprow];
+            }
+            iprow = MModAdd( iprow, 1, nprow ); ipdist++;
+
+         } while( ipdist < ip2 );
+
+         ipow <<= 1; k++;
+/*
+ * Probe for column panel - forward it when available 
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+      }
+   }
+   else
+   {
+/*
+ * non power of 2 part of the process collection:  proc[ip2] broadcast U
+ * to procs[ip2..nprow) (relatively to icurrow).
+ */
+      if( size_ > 1 )
+      {
+         k = size_ - 1;
+         while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+         root   = MModAdd( icurrow, (int)(ip2), nprow );
+         mydis_ = (unsigned int)MModSub( myrow,  root, nprow );
+
+         do
+         {
+            mask ^= ip2_;
+            if( ( mydis_ & mask ) == 0 )
+            {
+               partner = (int)(mydis_ ^ ip2_);
+               if( ( mydis_ & ip2_ ) != 0 )
+               {
+                  (void) HPL_recv( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+
+               }
+               else if( partner < size_ )
+               {
+                  (void) HPL_send( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+               }
+            }
+            ip2_ >>= 1;
+/*
+ * Probe for column panel - forward it when available 
+ */
+            if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+         } while( ip2_ > 0 );
+      }
+/*
+ * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece
+ * of A.
+ */
+      HPL_dlaswp05T( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+/*
+ * If  nprow  is not a power of 2,  proc[i-ip2]  sends  global result to
+ * proc[i] for all i in [ip2..nprow);
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+      if( ( mydist & ip2 ) != 0 )
+      { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); }
+      else
+      { (void) HPL_send( U, usize, partner, Cmsgid, comm ); }
+   }
+
+   if( vptr ) free( vptr );
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp00T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp01N.c
new file mode 100644
index 000000000..31f219840
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp01N.c
@@ -0,0 +1,217 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp01N
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp01N
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp01N applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+ * row panel U at once,  resulting in a minimal communication volume  and
+ * a "very good"  use of the connectivity if available.  With  P  process
+ * rows  and  assuming  bi-directional links,  the  running time  of this
+ * function can be approximated by:
+ *  
+ *    (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  K is
+ * a constant in (2,3] that depends on the achieved bandwidth  during  a
+ * simultaneous  message exchange  between two processes.  An  empirical
+ * optimistic value of K is typically 2.4.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * A, * U;
+   int                       * ipID, * iplen, * ipmap, * ipmapm1,
+                             * iwork, * lindxA = NULL, * lindxAU,
+                             * permU;
+   static int                equil=-1;
+   int                       icurrow, * iflag, * ipA, * ipl, jb, k,
+                             lda, myrow, n, nprow;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+   n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Decide whether equilibration should be performed or not
+ */
+   if( equil == -1 ) equil = PANEL->algo->equil;
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow;
+   A     = PANEL->A;   U       = PANEL->U;     iflag  = PANEL->IWORK;
+   lda   = PANEL->lda; icurrow = PANEL->prow;
+/*
+ * Compute ipID (if not already done for this panel). lindxA and lindxAU
+ * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1
+ * are of size nprow,  permU is of length jb, and  this function needs a 
+ * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): 
+ * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1)
+ * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1);
+ */
+   k = (int)((unsigned int)(jb) << 1);  ipl = iflag + 1; ipID = ipl + 1;
+   ipA     = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1;
+   lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1;
+   ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( *iflag == 0 ) /* HPL_pdlaswp00N called before: reuse ipID */
+   {
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( ( *iflag == 1 ) && ( equil != 0 ) )
+   {   /* HPL_pdlaswp01N was call before only re-compute IPLEN, IPMAP */
+      HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 );
+      *iflag = 1;
+   }
+/*
+ * Copy into U the rows to be spread (local to icurrow)
+ */
+   if( myrow == icurrow )
+   { HPL_dlaswp01N( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); }
+/*
+ * Spread U - optionally probe for column panel
+ */
+   HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen,
+                ipmap, ipmapm1 );
+/*
+ * Local exchange (everywhere but in process row icurrow)
+ */
+   if( myrow != icurrow )
+   {
+      k = ipmapm1[myrow];
+      HPL_dlaswp06N( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, iplen[k],
+                     0, LDU ), LDU, lindxA );
+   }
+/*
+ * Equilibration
+ */
+   if( equil != 0 )
+      HPL_equil( PBCST, IFLAG, PANEL, HplNoTrans, n, U, LDU, iplen,
+                 ipmap, ipmapm1, iwork );
+/*
+ * Rolling phase
+ */
+   HPL_rollN( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 );
+/*
+ * Permute U in every process row
+ */
+   HPL_dlaswp00N( jb, n, U, LDU, permU );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp01N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp01T.c
new file mode 100644
index 000000000..0c4de2669
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp01T.c
@@ -0,0 +1,217 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp01T
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp01T
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp01T applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+ * row panel U at once,  resulting in a minimal communication volume  and
+ * a "very good"  use of the connectivity if available.  With  P  process
+ * rows  and  assuming  bi-directional links,  the  running time  of this
+ * function can be approximated by:
+ *  
+ *    (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  K is
+ * a constant in (2,3] that depends on the achieved bandwidth  during  a
+ * simultaneous  message exchange  between two processes.  An  empirical
+ * optimistic value of K is typically 2.4.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * A, * U;
+   int                       * ipID, * iplen, * ipmap, * ipmapm1,
+                             * iwork, * lindxA = NULL, * lindxAU,
+                             * permU;
+   static int                equil=-1;
+   int                       icurrow, * iflag, * ipA, * ipl, jb, k,
+                             lda, myrow, n, nprow;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+   n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Decide whether equilibration should be performed or not
+ */
+   if( equil == -1 ) equil = PANEL->algo->equil;
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow;
+   A     = PANEL->A;   U       = PANEL->U;     iflag  = PANEL->IWORK;
+   lda   = PANEL->lda; icurrow = PANEL->prow;
+/*
+ * Compute ipID (if not already done for this panel). lindxA and lindxAU
+ * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1
+ * are of size nprow,  permU is of length jb, and  this function needs a 
+ * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): 
+ * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1)
+ * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1);
+ */
+   k = (int)((unsigned int)(jb) << 1);  ipl = iflag + 1; ipID = ipl + 1;
+   ipA     = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1;
+   lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1;
+   ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( *iflag == 0 ) /* HPL_pdlaswp00T called before: reuse ipID */
+   {
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( ( *iflag == 1 ) && ( equil != 0 ) )
+   {   /* HPL_pdlaswp01T was call before only re-compute IPLEN, IPMAP */
+      HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 );
+      *iflag = 1;
+   }
+/*
+ * Copy into U the rows to be spread (local to icurrow)
+ */
+   if( myrow == icurrow )
+   { HPL_dlaswp01T( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); }
+/*
+ * Spread U - optionally probe for column panel
+ */
+   HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen,
+                ipmap, ipmapm1 );
+/*
+ * Local exchange (everywhere but in process row icurrow)
+ */
+   if( myrow != icurrow )
+   {
+      k = ipmapm1[myrow];
+      HPL_dlaswp06T( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, 0,
+                     iplen[k], LDU ), LDU, lindxA );
+   }
+/*
+ * Equilibration
+ */
+   if( equil != 0 )
+      HPL_equil( PBCST, IFLAG, PANEL, HplTrans, n, U, LDU, iplen, ipmap,
+                 ipmapm1, iwork );
+/*
+ * Rolling phase
+ */
+   HPL_rollT( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 );
+/*
+ * Permute U in every process row
+ */
+   HPL_dlaswp10N( n, jb, U, LDU, permU );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp01T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdtrsv.c
new file mode 100644
index 000000000..d2135130a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdtrsv.c
@@ -0,0 +1,296 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdtrsv
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_pmat *                     AMAT
+)
+#else
+void HPL_pdtrsv
+( GRID, AMAT )
+   HPL_T_grid *                     GRID;
+   HPL_T_pmat *                     AMAT;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdtrsv solves an upper triangular system of linear equations.
+ *  
+ * The rhs is the last column of the N by N+1 matrix A. The solve starts
+ * in the process  column owning the  Nth  column of A, so the rhs b may
+ * need to be moved one process column to the left at the beginning. The
+ * routine therefore needs  a column  vector in every process column but
+ * the one owning  b. The result is  replicated in all process rows, and
+ * returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes.
+ *  
+ * The algorithm uses decreasing one-ring broadcast in process rows  and
+ * columns  implemented  in terms of  synchronous communication point to
+ * point primitives.  The  lookahead of depth 1 is used to minimize  the
+ * critical path. This entire operation is essentially ``latency'' bound
+ * and an estimate of its running time is given by:
+ *  
+ *    (move rhs) lat + N / ( P bdwth ) +            
+ *    (solve)    ((N / NB)-1) 2 (lat + NB / bdwth) +
+ *               gam2 N^2 / ( P Q ),                
+ *  
+ * where  gam2   is an estimate of the   Level 2 BLAS rate of execution.
+ * There are  N / NB  diagonal blocks. One must exchange  2  messages of
+ * length NB to compute the next  NB  entries of the vector solution, as
+ * well as performing a total of N^2 floating point operations.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * AMAT    (local input/output)          HPL_T_pmat *
+ *         On entry,  AMAT  points  to the data structure containing the
+ *         local array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   Ccomm, Rcomm;
+   double                     * A=NULL, * Aprev=NULL, * Aptr, * XC=NULL,
+                              * XR=NULL, * Xd=NULL, * Xdprev=NULL,
+                              * W=NULL;
+   int                        Alcol, Alrow, Anpprev, Anp, Anq, Bcol,
+                              Cmsgid, GridIsNotPx1, GridIsNot1xQ, Rmsgid,
+                              Wfr=0, colprev, kb, kbprev, lda, mycol,
+                              myrow, n, n1, n1p, n1pprev=0, nb, npcol,
+                              nprow, rowprev, tmp1, tmp2;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PTRSV );
+#endif
+   if( ( n = AMAT->n ) <= 0 ) return;
+   nb = AMAT->nb; lda = AMAT->ld; A = AMAT->A; XR = AMAT->X;
+
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Rcomm = GRID->row_comm; Rmsgid = MSGID_BEGIN_PTRSV;
+   Ccomm = GRID->col_comm; Cmsgid = MSGID_BEGIN_PTRSV + 1;
+   GridIsNot1xQ = ( nprow > 1 ); GridIsNotPx1 = ( npcol > 1 );
+/*
+ * Move the rhs in the process column owning the last column of A.
+ */
+   Mnumroc( Anp, n, nb, nb, myrow, 0, nprow );
+   Mnumroc( Anq, n, nb, nb, mycol, 0, npcol );
+
+   tmp1  = ( n - 1 ) / nb;
+   Alrow = tmp1 - ( tmp1 / nprow ) * nprow;
+   Alcol = tmp1 - ( tmp1 / npcol ) * npcol;
+   kb    = n    - tmp1 * nb;
+
+   Aptr = (double *)(A); XC = Mptr( Aptr, 0, Anq, lda );
+   Mindxg2p( n, nb, nb, Bcol, 0, npcol );
+
+   if( ( Anp > 0 ) && ( Alcol != Bcol ) )
+   {
+      if( mycol == Bcol  )
+      { (void) HPL_send( XC, Anp, Alcol, Rmsgid, Rcomm ); }
+      else if( mycol == Alcol )
+      { (void) HPL_recv( XC, Anp, Bcol,  Rmsgid, Rcomm ); }
+   }
+   Rmsgid = ( Rmsgid + 2 >
+              MSGID_END_PTRSV ? MSGID_BEGIN_PTRSV : Rmsgid + 2 );
+   if( mycol != Alcol )
+   { for( tmp1=0; tmp1 < Anp; tmp1++ ) XC[tmp1] = HPL_rzero; }
+/*
+ * Set up lookahead
+ */
+   n1 = ( npcol - 1 ) * nb; n1 = Mmax( n1, nb );
+   if( Anp > 0 )
+   {
+      W = (double*)malloc( (size_t)(Mmin( n1, Anp )) * sizeof( double ) );
+      if( W == NULL )
+      { HPL_pabort( __LINE__, "HPL_pdtrsv", "Memory allocation failed" ); }
+      Wfr = 1;
+   }
+
+   Anpprev = Anp; Xdprev = XR; Aprev = Aptr = Mptr( Aptr, 0, Anq, lda );
+   tmp1    = n - kb; tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+   MnumrocI( n1pprev, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+
+   if( myrow == Alrow ) { Anpprev = ( Anp -= kb ); }
+   if( mycol == Alcol )
+   {
+      Aprev = ( Aptr -= lda * kb ); Anq -= kb; Xdprev = ( Xd = XR + Anq );
+      if( myrow == Alrow )
+      {
+         HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit,
+                    kb, Aptr+Anp, lda, XC+Anp, 1 );
+         HPL_dcopy( kb, XC+Anp, 1, Xd, 1 );
+      }
+   }
+
+   rowprev = Alrow; Alrow = MModSub1( Alrow, nprow );
+   colprev = Alcol; Alcol = MModSub1( Alcol, npcol );
+   kbprev  = kb; n -= kb;
+   tmp1    = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+   MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+/*
+ * Start the operations
+ */
+   while( n > 0 )
+   {
+      if( mycol == Alcol ) { Aptr -= lda * kb; Anq -= kb; Xd = XR + Anq; }
+      if( myrow == Alrow ) { Anp -= kb; }
+/*
+ * Broadcast  (decreasing-ring)  of  previous solution block in previous
+ * process column,  compute  partial update of current block and send it
+ * to current process column.
+ */
+      if( mycol == colprev )
+      {
+/*
+ * Send previous solution block in process row above
+ */
+         if( myrow == rowprev )
+         {
+            if( GridIsNot1xQ )
+               (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ),
+                                Cmsgid, Ccomm );
+         }
+         else
+         {
+            (void) HPL_recv( Xdprev, kbprev, MModAdd1( myrow, nprow ),
+                             Cmsgid, Ccomm );
+         } 
+/*
+ * Compute partial update of previous solution block and send it to cur-
+ * rent column
+ */
+         if( n1pprev > 0 )
+         {
+            tmp1 = Anpprev - n1pprev;
+            HPL_dgemv( HplColumnMajor, HplNoTrans, n1pprev, kbprev,
+                       -HPL_rone, Aprev+tmp1, lda, Xdprev, 1, HPL_rone,
+                       XC+tmp1, 1 );
+            if( GridIsNotPx1 )
+               (void) HPL_send( XC+tmp1, n1pprev, Alcol, Rmsgid, Rcomm );
+         }
+/*
+ * Finish  the (decreasing-ring) broadcast of the solution block in pre-
+ * vious process column
+ */
+         if( ( myrow != rowprev ) &&
+             ( myrow != MModAdd1( rowprev, nprow ) ) )
+            (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ),
+                             Cmsgid, Ccomm );
+      }
+      else if( mycol == Alcol )
+      {
+/*
+ * Current  column  receives  and accumulates partial update of previous
+ * solution block
+ */
+         if( n1pprev > 0 )
+         {
+            (void) HPL_recv( W, n1pprev, colprev, Rmsgid, Rcomm );
+            HPL_daxpy( n1pprev, HPL_rone, W, 1, XC+Anpprev-n1pprev, 1 );
+         }
+      }
+/*
+ * Solve current diagonal block 
+ */
+      if( ( mycol == Alcol ) && ( myrow == Alrow ) )
+      {
+         HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit,
+                    kb, Aptr+Anp, lda, XC+Anp, 1 );
+         HPL_dcopy( kb, XC+Anp, 1, XR+Anq, 1 );
+      }
+/*
+*  Finish previous update
+*/
+      if( ( mycol == colprev ) && ( ( tmp1 = Anpprev - n1pprev ) > 0 ) )
+         HPL_dgemv( HplColumnMajor, HplNoTrans, tmp1, kbprev, -HPL_rone,
+                    Aprev, lda, Xdprev, 1, HPL_rone, XC, 1 );
+/*
+*  Save info of current step and update info for the next step
+*/
+      if( mycol == Alcol ) { Xdprev   = Xd; Aprev = Aptr; }
+      if( myrow == Alrow ) { Anpprev -= kb; }
+      rowprev = Alrow; colprev = Alcol;
+      n1pprev = n1p;   kbprev  = kb; n -= kb;
+      Alrow = MModSub1( Alrow, nprow ); Alcol = MModSub1( Alcol, npcol );
+      tmp1  = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+      MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+
+      Rmsgid = ( Rmsgid+2 > MSGID_END_PTRSV ? 
+                 MSGID_BEGIN_PTRSV   : Rmsgid+2 );
+      Cmsgid = ( Cmsgid+2 > MSGID_END_PTRSV ?
+                 MSGID_BEGIN_PTRSV+1 : Cmsgid+2 );
+   }
+/*
+ * Replicate last solution block
+ */
+   if( mycol == colprev )
+      (void) HPL_broadcast( (void *)(XR), kbprev, HPL_DOUBLE, rowprev,
+                            Ccomm );
+
+   if( Wfr  ) free( W  );
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PTRSV );
+#endif
+/*
+ * End of HPL_pdtrsv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateNN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateNN.c
new file mode 100644
index 000000000..7e31ddcd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateNN.c
@@ -0,0 +1,442 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateNN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateNN
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateNN broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,                n );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, 0, nn, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateNN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateNT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateNT.c
new file mode 100644
index 000000000..faa3ef207
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateNT.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateNT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateNT
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateNT broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */ 
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,               jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, nn, 0, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateNT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateTN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateTN.c
new file mode 100644
index 000000000..a16aa26a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateTN.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateTN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateTN
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateTN broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,                n );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, 0, nn, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateTN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateTT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateTT.c
new file mode 100644
index 000000000..81e6cc4b7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateTT.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateTT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateTT
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateTT broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,               jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, nn, 0, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateTT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_perm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_perm.c
new file mode 100644
index 000000000..bf7cc4503
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_perm.c
@@ -0,0 +1,131 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_perm
+(
+   const int                        N,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            IWORK
+)
+#else
+void HPL_perm
+( N, LINDXA, LINDXAU, IWORK )
+   const int                        N;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_perm combines  two  index  arrays  and generate the corresponding
+ * permutation. First, this function computes the inverse of LINDXA, and
+ * then combine it with LINDXAU.  Second, in order to be able to perform
+ * the permutation in place,  LINDXAU  is overwritten by the sequence of
+ * permutation  producing  the  same result.  What we ultimately want to
+ * achieve is:  U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the
+ * call to this function,  this in place permutation can be performed by
+ * for i in [0..N) swap U[i] with U[LINDXAU[i]].
+ *
+ * Arguments
+ * =========
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies the length of the arrays  LINDXA  and
+ *         LINDXAU. N should be at least zero.
+ *
+ * LINDXA  (global input/output)         int *
+ *         On entry,  LINDXA  is an array of dimension N  containing the
+ *         source indexes. On exit,  LINDXA  contains the combined index
+ *         array.
+ *
+ * LINDXAU (global input/output)         int *
+ *         On entry,  LINDXAU is an array of dimension N  containing the
+ *         target indexes.  On exit,  LINDXAU  contains  the sequence of
+ *         permutation,  that  should be applied  in increasing order to
+ *         permute the underlying array U in place.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension N.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j, k, fndd;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Inverse LINDXA - combine LINDXA and LINDXAU - Initialize IWORK
+ */
+   for( i = 0; i < N; i++ ) { IWORK[LINDXA[i]] = i; }
+   for( i = 0; i < N; i++ ) { LINDXA[i] = LINDXAU[IWORK[i]]; IWORK[i] = i; }
+ 
+   for( i = 0; i < N; i++ )
+   {
+      /* search LINDXA such that    LINDXA[j]  == i */
+      j = 0; do { fndd = ( LINDXA[j] == i ); j++; } while( !fndd ); j--;
+      /* search IWORK  such that    IWORK[k]   == j */
+      k = 0; do { fndd = ( IWORK[k]  == j ); k++; } while( !fndd ); k--;
+      /* swap IWORK[i] and IWORK[k]; LINDXAU[i] = k */
+      j = IWORK[i]; IWORK[i] = IWORK[k]; IWORK[k] = j;
+      LINDXAU[i] = k;
+   }
+/*
+ * End of HPL_perm
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pipid.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pipid.c
new file mode 100644
index 000000000..ab5ef949f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pipid.c
@@ -0,0 +1,187 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pipid
+(
+   HPL_T_panel *                    PANEL,
+   int *                            K,
+   int *                            IPID
+)
+#else
+void HPL_pipid
+( PANEL, K, IPID )
+   HPL_T_panel *                    PANEL;
+   int *                            K;
+   int *                            IPID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pipid computes an array  IPID  that contains the source and final
+ * destination  of  matrix rows  resulting  from  the  application  of N
+ * interchanges  as computed by the  LU  factorization  with row partial
+ * pivoting. The array IPID is such that the row of global index IPID(i)
+ * should be mapped onto the row of global index IPID(i+1). Note that we
+ * cannot really know the length of IPID a priori. However, we know that
+ * this array is at least 2*N long,  since  there are N rows to swap and
+ * broadcast. The length of this array  must be smaller than or equal to
+ * 4*N, since every row is swapped with at most a single distinct remote
+ * row. The algorithm constructing  IPID  goes as follows: Let IA be the
+ * global index of the first row to be swapped.
+ *  
+ * For every row src IA + i with i in [0..N) to be swapped with row  dst
+ * such that dst is given by DPIV[i]:
+ *  
+ * Is row  src  the destination  of a previous row of the current block,
+ * that is, is there k odd such that IPID(k) is equal to src ?
+ *     Yes:  update  this destination  with dst.  For  example,  if  the
+ * pivot array is  (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5,
+ * we swap in fact row 0 and 5,  i.e.,  row 0 goes to 5 and not 2  as it
+ * was thought so far ...
+ *     No :  add  the pair (src,dst) at the end of IPID; row src has not
+ * been moved yet.
+ *  
+ * Is row  dst  different  from src the destination of a previous row of
+ * the current block, i.e., is there k odd such that IPID(k) is equal to
+ * dst ?
+ *     Yes:  update  IPID(k) with src.  For example,  if the pivot array
+ * is (0,5)(1,1)(2,5) ... , then when  we swap rows  2 and 5, we swap in
+ * fact row 2 and 0,  i.e.,  row 0 goes to 2 and not 5 as it was thought
+ * so far ...
+ *     No : add  the  pair (dst,src) at the end of IPID; row dst has not
+ * been moved yet.
+ *  
+ * Note that when src is equal to dst, the pair (dst,src)  should not be
+ * added to  IPID  in  order  to avoid duplicated entries in this array.
+ * During  the construction of the array  IPID,  we  make  sure that the
+ * first N entries are such that IPID(k) with k odd is equal to  IA+k/2.
+ * For k in  [0..K/2),  the  row  of global index  IPID(2*k)  should  be
+ * mapped onto the row of global index IPID(2*k+1).
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global output)               int *
+ *         On exit, K specifies the number of entries in  IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global output)               int *
+ *         On entry, IPID is an array of length 4*N.  On exit, the first
+ *         K entries of that array contain the src and final destination
+ *         resulting  from  the  application of the  N  interchanges  as
+ *         specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+ *         stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+ *         in [0..N)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, fndd, fnds, ia, i, j, jb, lst, off,
+                              src;
+   double                     * dpiv;
+/* ..
+ * .. Executable Statements ..
+ */
+   dpiv = PANEL->DPIV; jb = PANEL->jb; src = ia = PANEL->ia;
+   dst  = (int)(dpiv[0]); IPID[0] = dst; IPID[1] = src; *K = 2;
+   if( src != dst ) { IPID[2] = src; IPID[3] = dst; *K += 2; }
+
+   for( i = 1; i < jb; i++ )
+   {
+      fnds = 0; j = 1;
+
+      if( ( src = ia + i ) == ( dst = (int)(dpiv[i]) ) )
+      {
+         do { if( src == IPID[j] ) { fnds = j; } else { j += 2; } }
+         while( !( fnds ) && ( j < *K ) );
+         if( !fnds ) { lst = *K;     off = 2; IPID[lst] = src; }
+         else        { lst = fnds-1; off = 0; }
+         IPID[lst+1] = dst;
+      }
+      else
+      {
+         fndd = 0;
+         do
+         {
+            if     ( src == IPID[j] ) { fnds = j; }
+            else if( dst == IPID[j] ) { fndd = j; }
+            j += 2;
+         }
+         while( ( !( fnds ) || !( fndd ) ) && ( j < *K ) );
+         if( !fnds ) { IPID[*K] = src; IPID[*K+1] = dst; off  = 2; }
+         else        {                 IPID[fnds] = dst; off  = 0; }
+         if( !fndd ) { lst = *K+off;   IPID[lst ] = dst; off += 2; }
+         else        { lst = fndd-1; }
+         IPID[lst+1] = src;
+      }
+/*
+ * Enforce IPID(1,i) equal to src = ia + i
+ */
+      if( lst != ( j = ( i << 1 ) ) )
+      {
+         src = IPID[j  ]; IPID[j  ] = IPID[lst  ]; IPID[lst  ] = src;
+         dst = IPID[j+1]; IPID[j+1] = IPID[lst+1]; IPID[lst+1] = dst;
+      }
+      *K += off;
+   }
+/*
+ * End of HPL_pipid
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx0.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx0.c
new file mode 100644
index 000000000..be12639d0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx0.c
@@ -0,0 +1,281 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx0
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   int *                            IPID,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            LLEN
+)
+#else
+void HPL_plindx0
+( PANEL, K, IPID, LINDXA, LINDXAU, LLEN )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   int *                            IPID;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            LLEN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx0 computes two local arrays  LINDXA and  LINDXAU  containing
+ * the  local  source and final destination position  resulting from the
+ * application of row interchanges.
+ *  
+ * On entry, the array  IPID  of length K is such that the row of global
+ * index  IPID(i)  should be mapped onto row of global index  IPID(i+1).
+ * Let  IA  be the global index of the first row to be swapped. For k in
+ * [0..K/2), the row of global index IPID(2*k) should be mapped onto the
+ * row of global index  IPID(2*k+1).  The question then, is to determine
+ * which rows should ultimately be part of U.
+ *  
+ * First, some rows of the process ICURROW  may be swapped locally.  One
+ * of this row belongs to U, the other one belongs to my local  piece of
+ * A.  The other  rows of the current block are swapped with remote rows
+ * and are thus not part of U. These rows however should be sent  along,
+ * and  grabbed by the other processes  as we  progress in the  exchange
+ * phase.
+ *  
+ * So, assume that I am  ICURROW  and consider a row of index  IPID(2*i)
+ * that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less
+ * than N,  this row is locally swapped and should be copied into  U  at
+ * the position IPID(2*i+1) - IA. No row will be exchanged for this one.
+ * If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be
+ * locally copied into my local piece of A at the position corresponding
+ * to the row of global index IPID(2*i+1).
+ *  
+ * If the process  ICURROW does not own  IPID(2*i+1), then row IPID(2*i)
+ * is to be swapped away and strictly speaking does not belong to U, but
+ * to  A  remotely.  Since this  process will however send this array U,
+ * this row is  copied into  U, exactly where the row IPID(2*i+1) should
+ * go. For this, we search IPID for k1, such that IPID(2*k1) is equal to
+ * IPID(2*i+1); and row  IPID(2*i) is to be copied in U  at the position
+ * IPID(2*k1+1)-IA.
+ *  
+ * It is thus  important to put the rows that go into U, i.e., such that
+ * IPID(2*i+1) - IA is less than N at the begining of the array IPID. By
+ * doing so,  U  is formed, and the local copy  is performed in just one
+ * sweep.
+ *  
+ * Two lists  LINDXA  and  LINDXAU are built.  LINDXA contains the local
+ * index of the rows I have that should be copied. LINDXAU  contains the
+ * local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A
+ * is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
+ * of A should be locally copied into A(-LINDXAU(k),:).  In the  process
+ * ICURROW, the initial packing algorithm proceeds as follows.
+ *  
+ *   for all entries in IPID,
+ *      if IPID(2*i) is in ICURROW,
+ *         if IPID(2*i+1) is in ICURROW,
+ *            if( IPID(2*i+1) - IA < N )
+ *             save corresponding local position
+ *             of this row (LINDXA);
+ *             save local position (LINDXAU) in U
+ *             where this row goes;
+ *             [copy row IPID(2*i) in U at position
+ *             IPID(2*i+1)-IA; ];
+ *            else
+ *             save corresponding local position of
+ *             this row (LINDXA);
+ *             save local position (-LINDXAU) in A
+ *             where this row goes;
+ *             [copy row IPID(2*i) in my piece of A
+ *             at IPID(2*i+1);]
+ *            end if
+ *         else
+ *            find k1 such that IPID(2*k1) = IPID(2*i+1);
+ *            copy row IPID(2*i) in U at position
+ *            IPID(2*k1+1)-IA;
+ *            save corresponding local position of this
+ *            row (LINDXA);
+ *            save local position (LINDXAU) in U where
+ *            this row goes;
+ *         end if
+ *      end if
+ *   end for
+ *  
+ * Second, if I am not the current row process  ICURROW, all source rows
+ * in IPID that I own are part of U. Indeed,  they  are swapped with one
+ * row  of  the  current  block  of rows,  and  the  main  factorization
+ * algorithm proceeds one row after each other.  The processes different
+ * from ICURROW,  should  exchange and accumulate  those rows until they
+ * receive some data previously owned by the process ICURROW.
+ *  
+ * In processes different from  ICURROW,  the  initial packing algorithm
+ * proceeds as follows.  Consider a row of global index IPID(2*i) that I
+ * own. When I will be receiving data previously owned by ICURROW, i.e.,
+ * U, row IPID(2*i) should  replace the row in U at pos. IPID(2*i+1)-IA,
+ * and  this particular row of U should be first copied into my piece of
+ * A, at A(il,:),  where  il is the  local row  index  corresponding  to
+ * IPID(2*i). Now,initially, this row will be packed into workspace, say
+ * as the kth row of  that  work array.  The  following  algorithm  sets
+ * LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row
+ * should be copied. LINDXA(k) stores the local index in  A  where  this
+ * row of U should be copied, i.e il.
+ *  
+ *   for all entries in IPID,
+ *      if IPID(2*i) is not in ICURROW,
+ *         copy row IPID(2*i) in work array;
+ *         save corresponding local position
+ *         of this row (LINDXA);
+ *         save position (LINDXAU) in U where
+ *         this row should be copied;
+ *      end if
+ *   end for
+ *  
+ * Since we are at it, we also globally figure  out  how many rows every
+ * process has. That is necessary, because it would rather be cumbersome
+ * to  figure it on  the fly  during the  bi-directional exchange phase.
+ * This information is kept in the array  LLEN  of size NPROW. Also note
+ * that the arrays LINDXA and LINDXAU are of max length equal to 2*N.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * LINDXA  (local output)                int *
+ *         On entry, LINDXA  is an array of dimension 2*N. On exit, this
+ *         array contains the local indexes of the rows of A I have that
+ *         should be copied into U.
+ *
+ * LINDXAU (local output)                int *
+ *         On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+ *         array contains  the local destination  information encoded as
+ *         follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+ *         copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+ *         of A should be locally copied into A(-LINDXAU(k),:).
+ *
+ * LLEN    (global output)               int *
+ *         On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+ *         contains how many rows every process has.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, dstrow, fndd, i, ia, icurrow, il,
+                              ip=0, iroff, j, jb, myrow, nb, nprow,
+                              src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Compute the local arrays  LINDXA  and  LINDXAU  containing  the local
+ * source and final destination position resulting from  the application
+ * of N interchanges.
+ */
+   myrow   = PANEL->grid->myrow; nprow = PANEL->grid->nprow;
+   icurrow = PANEL->prow;        jb    = PANEL->jb;
+   nb      = PANEL->nb;          ia    = PANEL->ia;
+   iroff   = PANEL->ii;
+
+   for( i = 0; i < nprow; i++ ) LLEN[i] = 0;
+
+   for( i = 0; i < K; i += 2 )
+   {
+      src = IPID[i];
+      Mindxg2p( src, nb, nb, srcrow, 0, nprow ); LLEN[ srcrow ]++;
+
+      if( myrow == srcrow )
+      {
+         Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
+         LINDXA[ip] = il - iroff; dst = IPID[i+1];
+
+         if( myrow == icurrow )
+         {
+            Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+            if( dstrow == icurrow )
+            {
+               if( dst - ia < jb ) { LINDXAU[ip] = dst - ia; }
+               else
+               {
+                  Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+                  LINDXAU[ip] = iroff - il;
+               }
+            }
+            else
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+               LINDXAU[ip] = IPID[j-1] - ia;
+            }
+         }
+         else { LINDXAU[ip] = dst - ia; }
+
+         ip++;
+      }
+   }
+/*
+ * End of HPL_plindx0
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx1.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx1.c
new file mode 100644
index 000000000..a24fd4c56
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx1.c
@@ -0,0 +1,275 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx1
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   const int *                      IPID,
+   int *                            IPA,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1,
+   int *                            PERMU,
+   int *                            IWORK
+)
+#else
+void HPL_plindx1
+( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   const int *                      IPID;
+   int *                            IPA;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+   int *                            PERMU;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx1 computes two local arrays  LINDXA and  LINDXAU  containing
+ * the  local  source and final destination position  resulting from the
+ * application of row interchanges.  In addition, this function computes
+ * three arrays IPLEN, IPMAP and IPMAPM1  that contain  the  logarithmic
+ * mapping information for the spreading phase.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                const int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * IPA     (global output)               int *
+ *         On exit,  IPA  specifies  the number of rows that the current
+ *         process row has that either belong to U  or should be swapped
+ *         with remote rows of A.
+ *
+ * LINDXA  (global output)               int *
+ *         On entry, LINDXA  is an array of dimension 2*N. On exit, this
+ *         array contains the local indexes of the rows of A I have that
+ *         should be copied into U.
+ *
+ * LINDXAU (global output)               int *
+ *         On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+ *         array contains  the local destination  information encoded as
+ *         follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+ *         copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+ *         of A should be locally copied into A(-LINDXAU(k),:).
+ *
+ * IPLEN   (global output)               int *
+ *         On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+ *         this array is such that  IPLEN[i]  is the number of rows of A
+ *         in  the  processes  before  process  IPMAP[i]  after the sort
+ *         with the convention that IPLEN[nprow]  is the total number of
+ *         rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+ *         local number of rows of A that should be moved to the process
+ *         IPMAP[i]. IPLEN is such that the number of rows of the source
+ *         process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+ *         remaining  entries  of  this  array  are  sorted  so that the
+ *         quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry, IPMAP is an array of dimension NPROW. On exit, this
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myrow] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROCS)
+ *
+ * PERMU   (global output)               int *
+ *         On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+ *         contains  a sequence of permutations,  that should be applied
+ *         in increasing order to permute in place the row panel U.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension 2*JB.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        * iwork;
+   int                        dst, dstrow, fndd, i, ia, icurrow, il,
+                              ip, ipU, iroff, j, jb, myrow, nb, nprow,
+                              src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1
+ */
+   HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 );
+/*
+ * Compute the local arrays  LINDXA  and  LINDXAU  containing  the local
+ * source and final destination position resulting from  the application
+ * of N interchanges. Compute LINDXA and LINDXAU in icurrow,  and LINDXA
+ * elsewhere and PERMU in every process.
+ */
+   myrow = PANEL->grid->myrow; nprow   = PANEL->grid->nprow;
+   jb    = PANEL->jb;          nb      = PANEL->nb;     ia = PANEL->ia;
+   iroff = PANEL->ii;          icurrow = PANEL->prow;
+
+   iwork = IWORK + jb;
+ 
+   if( myrow == icurrow )
+   {
+      for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
+      {
+         src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+ 
+         if( srcrow == icurrow )
+         {
+            dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+ 
+            Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
+            LINDXA[ip] = il - iroff;
+ 
+            if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
+            {
+               PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
+               j          = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j;
+               IPLEN[il]++; ipU++;
+            }
+            else if( dstrow != icurrow )
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+ 
+               PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow];
+               j          = IPLEN[il];    iwork[ipU] = LINDXAU[ip] = j;
+               IPLEN[il]++; ipU++;
+            }
+            else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) )
+            {
+               Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+               LINDXAU[ip] = iroff - il;
+            }
+            ip++;
+         }
+      }
+      *IPA = ip;
+   }
+   else
+   {
+      for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
+      {
+         src = IPID[i  ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+         dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+/*
+ * LINDXA[i] is the local index of the row of A that belongs into U
+ */
+         if( myrow == dstrow )
+         {
+            Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+            LINDXA[ip] = il - iroff; ip++;
+         }
+/*
+ * iwork[i] is the local (current) position  index in U
+ * PERMU[i] is the local (final) destination index in U
+ */
+         if( srcrow == icurrow )
+         {
+            if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
+            {
+               PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
+               iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
+            }
+            else if( dstrow != icurrow )
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+               PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow];
+               iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
+            }
+         }
+      }
+      *IPA = 0;
+   }
+/*
+ * Simplify iwork and PERMU, return in PERMU the sequence of permutation
+ * that need to be apply to U after it has been broadcast.
+ */
+   HPL_perm( jb, iwork, PERMU, IWORK );
+/*
+ * Reset IPLEN to its correct value
+ */
+   for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1];
+   IPLEN[0] = 0; 
+/*
+ * End of HPL_plindx1
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx10.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx10.c
new file mode 100644
index 000000000..fa460fd35
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx10.c
@@ -0,0 +1,155 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx10
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   const int *                      IPID,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1
+)
+#else
+void HPL_plindx10
+( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   const int *                      IPID;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx10 computes  three arrays  IPLEN,  IPMAP  and  IPMAPM1  that
+ * contain the logarithmic mapping information for the spreading phase.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                const int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * IPLEN   (global output)               int *
+ *         On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+ *         this array is such that  IPLEN[i]  is the number of rows of A
+ *         in the processes  before process IMAP[i] after the sort, with
+ *         the convention that IPLEN[nprow] is the total number of rows.
+ *         In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+ *         rows of  A  that should be moved for each process.  IPLEN  is
+ *         such that the number of rows of the source process row can be
+ *         computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+ *         this  array are sorted  so  that  the quantities IPLEN[i+1] -
+ *         IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry, IPMAP is an array of dimension NPROW. On exit, this
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myrow] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROW)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, dstrow, i, ia, icurrow, jb, nb,
+                              nprow, src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+   nprow = PANEL->grid->nprow; jb = PANEL->jb; nb = PANEL->nb;
+   ia    = PANEL->ia;          icurrow = PANEL->prow;
+/*
+ * Compute  redundantly  the local number of rows  that each process has
+ * and that belong to U in IPLEN[1 .. nprow+1]
+ */
+   for( i = 0; i <= nprow; i++ ) IPLEN[i] = 0;
+ 
+   for( i = 0; i < K; i += 2 )
+   {
+      src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+      if( srcrow == icurrow )
+      {
+         dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+         if( ( dstrow != srcrow ) || ( dst - ia < jb ) ) IPLEN[dstrow+1]++;
+      }
+   }
+/*
+ * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1
+ * (the inverse of IPMAP)
+ */
+   HPL_logsort( nprow, icurrow, IPLEN, IPMAP, IPMAPM1 );
+/*
+ * End of HPL_plindx10
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_rollN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_rollN.c
new file mode 100644
index 000000000..e68590a01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_rollN.c
@@ -0,0 +1,225 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+void HPL_rollN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_rollN
+( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rollN rolls the local arrays containing the local pieces of U, so
+ * that on exit to this function  U  is replicated in every process row.
+ * In addition, this function probe for the presence of the column panel
+ * and forwards it when available.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be rolled) information.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the number of columns of  U.  N must be
+ *         at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least  MAX(1,IPLEN[NPROW]).
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process row.
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IMAP  is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words,  IMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Datatype               type[2];
+   MPI_Status                 status;
+   MPI_Request                request;
+   MPI_Comm                   comm;
+   int                        Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS,
+                              ierr=MPI_SUCCESS, il, k, l, lengthR,
+                              lengthS, mydist, myrow, next, npm1, nprow,
+                              partner, prev;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= 0 ) return;
+
+   npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow;
+   comm = PANEL->grid->col_comm;
+/*
+ * Rolling phase
+ */
+   mydist = IPMAPM1[myrow];
+   prev   = IPMAP[MModSub1( mydist, nprow )];
+   next   = IPMAP[MModAdd1( mydist, nprow )];
+ 
+   for( k = 0; k < npm1; k++ )
+   {
+      l = (int)( (unsigned int)(k) >> 1 );
+ 
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         il      = MModAdd( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); 
+         il      = MModSub( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev;
+      }
+      else
+      {
+         il    = MModSub( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); 
+         il    = MModAdd( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next;
+      }
+ 
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_vector( N, lengthR, LDU, MPI_DOUBLE,
+                                      &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, ibufR, 0, LDU ), 1, type[I_RECV],
+                                partner, Cmsgid, comm, &request );
+      }
+ 
+      if( lengthS > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_vector( N, lengthS, LDU, MPI_DOUBLE,
+                                      &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, ibufS, 0, LDU ), 1, type[I_SEND],
+                               partner, Cmsgid, comm );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free(   &type[I_SEND] );
+      }
+
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free(   &type[I_RECV] );
+      }
+/*
+ * Probe for column panel - forward it when available
+ */
+      if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_rollN", "MPI call failed" ); }
+/*
+ * End of HPL_rollN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_rollT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_rollT.c
new file mode 100644
index 000000000..0160c9412
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_rollT.c
@@ -0,0 +1,259 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+void HPL_rollT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_rollT
+( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rollT rolls the local arrays containing the local pieces of U, so
+ * that on exit to this function  U  is replicated in every process row.
+ * In addition, this function probe for the presence of the column panel
+ * and forwards it when available.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be rolled) information.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the local number of rows of  U.  N must
+ *         be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least  MAX(1,N).
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process row.
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IMAP  is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words,  IMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#if 0
+   MPI_Datatype               type[2];
+#endif
+   MPI_Status                 status;
+   MPI_Request                request;
+   MPI_Comm                   comm;
+   int                        Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS,
+                              ierr=MPI_SUCCESS, il, k, l, lengthR, 
+                              lengthS, mydist, myrow, next, npm1, nprow,
+                              partner, prev;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= 0 ) return;
+
+   npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow;
+   comm = PANEL->grid->col_comm;
+/*
+ * Rolling phase
+ */
+   mydist = IPMAPM1[myrow];
+   prev   = IPMAP[MModSub1( mydist, nprow )];
+   next   = IPMAP[MModAdd1( mydist, nprow )];
+ 
+   for( k = 0; k < npm1; k++ )
+   {
+      l = (int)( (unsigned int)(k) >> 1 );
+ 
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         il      = MModAdd( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] );
+         il    = MModSub( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev;
+      }
+      else
+      {
+         il    = MModSub( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] );
+         il    = MModAdd( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next;
+      }
+ 
+      if( lengthR > 0 )
+      {
+#if 0
+         if( ierr == MPI_SUCCESS )
+         {
+            if( LDU == N )
+               ierr = MPI_Type_contiguous( lengthR * LDU, MPI_DOUBLE,
+                                           &type[I_RECV] );
+            else
+               ierr = MPI_Type_vector( lengthR, N, LDU, MPI_DOUBLE,
+                                       &type[I_RECV] );
+         }
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, 0, ibufR, LDU ), 1, type[I_RECV],
+                                partner, Cmsgid, comm, &request );
+#else
+/*
+ * In our case, LDU is N - Do not use the MPI datatype.
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, 0, ibufR, LDU ), lengthR*LDU,
+                                MPI_DOUBLE, partner, Cmsgid, comm, &request );
+#endif
+      }
+ 
+      if( lengthS > 0 )
+      {
+#if 0
+         if( ierr == MPI_SUCCESS )
+         {
+            if( LDU == N )
+               ierr =   MPI_Type_contiguous( lengthS*LDU, MPI_DOUBLE,
+                                             &type[I_SEND] );
+            else
+               ierr =   MPI_Type_vector( lengthS, N, LDU, MPI_DOUBLE,
+                                         &type[I_SEND] );
+         }
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, 0, ibufS, LDU ), 1, type[I_SEND],
+                               partner, Cmsgid, comm );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[I_SEND] );
+#else
+/*
+ * In our case, LDU is N - Do not use the MPI datatype.
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, 0, ibufS, LDU ), lengthS*LDU,
+                               MPI_DOUBLE, partner, Cmsgid, comm );
+#endif
+      }
+
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+#if 0
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[I_RECV] );
+#endif
+      }
+/*
+ * Probe for column panel - forward it when available
+ */
+      if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_rollT", "MPI call failed" ); }
+/*
+ * End of HPL_rollT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_spreadN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_spreadN.c
new file mode 100644
index 000000000..202611e7f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_spreadN.c
@@ -0,0 +1,303 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_spreadN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_SIDE              SIDE,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int                        SRCDIST,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_spreadN
+( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_SIDE              SIDE;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int                        SRCDIST;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_spreadN spreads the local array containing local pieces of U, so
+ * that on exit to this function,  a piece of  U  is contained in every
+ * process row. The array IPLEN contains the number of rows of U,  that
+ * should be spread on any given process row. This function also probes
+ * for the presence of the column panel PBCST. In case of success, this
+ * panel will be forwarded.  If  PBCST  is NULL on input,  this probing
+ * mechanism will be disabled.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be spread) information.
+ *
+ * SIDE    (global input)                const enum HPL_SIDE
+ *         On entry, SIDE specifies whether the local piece of U located
+ *         in process IPMAP[SRCDIST] should be spread to the right or to
+ *         the left. This feature is used by the equilibration process.
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies  the  local number of columns of U. N
+ *         must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,IPLEN[nprow]).
+ *
+ * SRCDIST (local input)                 const int
+ *         On entry,  SRCDIST  specifies the source process that spreads
+ *         its piece of U.
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process before process IPMAP[i], with the  convention
+ *         that IPLEN[nprow] is the total number of rows. In other words
+ *         IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+ *         should be moved to process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IPMAPM1 is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Datatype              type;
+   MPI_Status                status;
+   MPI_Comm                  comm;
+   unsigned int              ip2=1, mask=1, mydist, mydist2;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, ibuf,
+                             ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow,
+                             npm1, nprow, partner;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow;    nprow = PANEL->grid->nprow;
+   comm  = PANEL->grid->col_comm;
+/*
+ * Spread U to the left
+ */
+   if( SIDE == HplLeft )
+   {
+      nprow = ( npm1 = SRCDIST ) + 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) >
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2;
+      lgth    = IPLEN[nprow];
+
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); 
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm,
+                                        &status );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+               else if( partner < nprow )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il += ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+   else
+   {
+      npm1 = ( nprow -= SRCDIST ) - 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) <
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist -= SRCDIST ); il = ip2;
+      lgth    = IPLEN[SRCDIST+nprow];
+/*
+ * Spread U to the right - offset the IPLEN, and IPMAP arrays
+ */
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            k    = il      ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] );
+            k    = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf;
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm, &status );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+               else if( partner < nprow )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il -= ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il += ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_spreadN", "MPI call failed" ); }
+/*
+ * End of HPL_spreadN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_spreadT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_spreadT.c
new file mode 100644
index 000000000..1adf93507
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_spreadT.c
@@ -0,0 +1,372 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_spreadT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_SIDE              SIDE,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int                        SRCDIST,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_spreadT
+( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_SIDE              SIDE;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int                        SRCDIST;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_spreadT spreads  the local array containing local pieces of U, so
+ * that on exit to this function,  a piece of  U  is contained in every
+ * process row.  The array  IPLEN  contains the number of columns of U,
+ * that should be spread on any given process row.  This function  also
+ * probes for the presence of  the column panel  PBCST.  If  available,
+ * this  panel will be forwarded.  If  PBCST  is  NULL  on input,  this
+ * probing mechanism will be disabled.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be spread) information.
+ *
+ * SIDE    (global input)                const enum HPL_SIDE
+ *         On entry, SIDE specifies whether the local piece of U located
+ *         in process IPMAP[SRCDIST] should be spread to the right or to
+ *         the left. This feature is used by the equilibration process.
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies the local number of rows of U. N must
+ *         be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,N).
+ *
+ * SRCDIST (local input)                 const int
+ *         On entry,  SRCDIST  specifies the source process that spreads
+ *         its piece of U.
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process before process IPMAP[i], with the  convention
+ *         that IPLEN[nprow] is the total number of rows. In other words
+ *         IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+ *         should be moved to process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IPMAPM1 is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#if 0
+   MPI_Datatype              type;
+#endif
+   MPI_Status                status;
+   MPI_Comm                  comm;
+   unsigned int              ip2=1, mask=1, mydist, mydist2;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, ibuf,
+                             ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow,
+                             npm1, nprow, partner;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow;    nprow = PANEL->grid->nprow;
+   comm  = PANEL->grid->col_comm;
+/*
+ * Spread U
+ */
+   if( SIDE == HplLeft )
+   {
+      nprow = ( npm1 = SRCDIST ) + 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) >
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2;
+      lgth    = IPLEN[nprow];
+
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] );
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm,
+                                        &status );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[npm1-partner],
+                                        Cmsgid, comm, &status );
+#endif
+               }
+               else if( partner < nprow )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[npm1-partner],
+                                        Cmsgid, comm );
+#endif
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il += ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+   else
+   {
+      npm1 = ( nprow -= SRCDIST ) - 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) <
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist -= SRCDIST ); il = ip2;
+/*
+ * Spread to the right - offset the IPLEN and IPMAP arrays
+ */
+      lgth = IPLEN[SRCDIST+nprow];
+/*
+ * Spread U
+ */
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            k    = il      ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] );
+            k    = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf;
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm, &status );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[SRCDIST+partner],
+                                        Cmsgid, comm, &status );
+#endif
+               }
+               else if( partner < nprow )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[SRCDIST+partner],
+                                        Cmsgid, comm );
+#endif
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il -= ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il += ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_spreadT", "MPI call failed" ); }
+/*
+ * End of HPL_spreadT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_dmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_dmatgen.c
new file mode 100644
index 000000000..c14ef0fd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_dmatgen.c
@@ -0,0 +1,134 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dmatgen
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int                        ISEED
+)
+#else
+void HPL_dmatgen
+( M, N, A, LDA, ISEED )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int                        ISEED;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dmatgen generates (or regenerates) a random matrix A.
+ *  
+ * The  pseudo-random  generator uses the linear congruential algorithm:
+ * X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+ * Programming, Knuth 1973, Vol. 2.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (input)                       const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (input)                       const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * A       (output)                      double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         this  array  contains   the   coefficients  of  the  randomly
+ *         generated matrix.
+ *
+ * LDA     (input)                       const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * ISEED   (input)                       const int
+ *         On entry, ISEED  specifies  the  seed  number to generate the
+ *         matrix A. ISEED must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        iadd[2], ia1[2], ic1[2], iran1[2],
+                              jseed[2], mult[2];
+   int                        i, incA = LDA - M, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+/*
+ * Initialize the random sequence
+ */
+   mult [0] = HPL_MULT0; mult [1] = HPL_MULT1;
+   iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1;
+   jseed[0] = ISEED;     jseed[1] = 0;
+
+   HPL_xjumpm( 1, mult, iadd, jseed, iran1, ia1, ic1 );
+   HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 );
+/*
+ * Generate an M by N matrix
+ */
+   for( j = 0; j < N; A += incA, j++ )
+      for( i = 0; i < M; A++, i++ ) *A = HPL_rand();
+/*
+ * End of HPL_dmatgen
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_jumpit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_jumpit.c
new file mode 100644
index 000000000..4d4dc4db5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_jumpit.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_jumpit
+(
+   int *                            MULT,
+   int *                            IADD,
+   int *                            IRANN,
+   int *                            IRANM
+)
+#else
+void HPL_jumpit
+( MULT, IADD, IRANN, IRANM )
+   int *                            MULT;
+   int *                            IADD;
+   int *                            IRANN;
+   int *                            IRANM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_jumpit jumps in the random sequence from the number  X(n) encoded
+ * in IRANN to the number  X(m)  encoded in  IRANM using the constants A
+ * and C encoded in MULT and IADD: X(m) = A * X(n) + C.  The constants A
+ * and C obviously depend on m and n,  see  the function  HPL_xjumpm  in
+ * order to initialize them.
+ *
+ * Arguments
+ * =========
+ *
+ * MULT    (local input)                 int *
+ *         On entry, MULT is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of the constant A.
+ *
+ * IADD    (local input)                 int *
+ *         On entry, IADD is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of the constant C.
+ *
+ * IRANN   (local input)                 int *
+ *         On entry,  IRANN  is an array of dimension 2,  that contains 
+ *         the 16-lower and 15-higher bits of the encoding of X(n).
+ *
+ * IRANM   (local output)                int *
+ *         On entry,  IRANM  is an array of dimension 2.  On exit, this
+ *         array contains respectively the 16-lower and  15-higher bits
+ *         of the encoding of X(m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                          j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_lmul( IRANN, MULT, j );              /* j     = IRANN * MULT;  */
+   HPL_ladd( j, IADD, IRANM );              /* IRANM = j     + IADD;  */
+   HPL_setran( 0, IRANM );                  /* irand = IRANM          */
+/*
+ * End of HPL_jumpit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_ladd.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_ladd.c
new file mode 100644
index 000000000..0d4e4c08c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_ladd.c
@@ -0,0 +1,126 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_ladd
+(
+   int *                            J,
+   int *                            K,
+   int *                            I
+)
+#else
+void HPL_ladd
+( J, K, I )
+   int *                            J;
+   int *                            K;
+   int *                            I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ladd adds  without carry two long positive integers  K and J and
+ * puts the result into I. The long integers  I, J, K are encoded on 64
+ * bits using an array of 2 integers.  The 32-lower bits  are stored in
+ * the  first  entry  of each array,  the 32-higher bits  in the second
+ * entry.
+ *
+ * Arguments
+ * =========
+ *
+ * J       (local input)                 int *
+ *         On entry, J is an integer array of dimension 2 containing the
+ *         encoded long integer J.
+ *
+ * K       (local input)                 int *
+ *         On entry, K is an integer array of dimension 2 containing the
+ *         encoded long integer K.
+ *
+ * I       (local output)                int *
+ *         On entry, I is an integer array of dimension 2. On exit, this
+ *         array contains the encoded long integer result.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   unsigned int        itmp0, itmp1;
+   unsigned int        ktmp0 = K[0] & 65535, ktmp1 = (unsigned)K[0] >> 16;
+   unsigned int        ktmp2 = K[1] & 65535, ktmp3 = (unsigned)K[1] >> 16;
+   unsigned int        jtmp0 = J[0] & 65535, jtmp1 = (unsigned)J[0] >> 16;
+   unsigned int        jtmp2 = J[1] & 65535, jtmp3 = (unsigned)J[1] >> 16;
+
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ *    K[1] K[0] K  I[0]  = (K[0]+J[0]) % 2^32
+ *    XXXX XXXX    carry = (K[0]+J[0]) / 2^32
+ *
+ * +  J[1] J[0] J  I[1] = K[1] + J[1] + carry
+ *    XXXX XXXX    I[1] = I[1] % 2^32
+ *    -------------
+ *    I[1] I[0]
+ *    0XXX XXXX I
+ */
+   itmp0 = ktmp0 + jtmp0;
+   itmp1 = itmp0 >> 16;         I[0] = itmp0 - (itmp1 << 16 );
+   itmp1 += ktmp1 + jtmp1;      I[0] |= (itmp1 & 65535) << 16;
+   itmp0 = (itmp1 >> 16) + ktmp2 + jtmp2;
+   I[1] = itmp0 - ((itmp0 >> 16 ) << 16);
+   itmp1 = (itmp0 >> 16) + ktmp3 + jtmp3;
+   I[1] |= (itmp1 & 65535) << 16;
+/*
+ * End of HPL_ladd
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_lmul.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_lmul.c
new file mode 100644
index 000000000..254b192f6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_lmul.c
@@ -0,0 +1,131 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_lmul
+(
+   int *                            K,
+   int *                            J,
+   int *                            I
+)
+#else
+void HPL_lmul
+( K, J, I )
+   int *                            K;
+   int *                            J;
+   int *                            I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_lmul multiplies  without carry two long positive integers K and J
+ * and puts the result into I. The long integers  I, J, K are encoded on
+ * 64 bits using an array of 2 integers. The 32-lower bits are stored in
+ * the first entry of each array, the 32-higher bits in the second entry
+ * of each array. For efficiency purposes, the  intrisic modulo function
+ * is inlined.
+ *
+ * Arguments
+ * =========
+ *
+ * K       (local input)                 int *
+ *         On entry, K is an integer array of dimension 2 containing the
+ *         encoded long integer K.
+ *
+ * J       (local input)                 int *
+ *         On entry, J is an integer array of dimension 2 containing the
+ *         encoded long integer J.
+ *
+ * I       (local output)                int *
+ *         On entry, I is an integer array of dimension 2. On exit, this
+ *         array contains the encoded long integer result.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        r, c;
+   unsigned int               kk[4], jj[4], res[5];
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Addition is done with 16 bits at a time. Multiplying two 16-bit
+ * integers yields a 32-bit result. The lower 16-bits of the result
+ * are kept in I, and the higher 16-bits are carried over to the
+ * next multiplication.
+ */
+   for (c = 0; c < 2; ++c) {
+     kk[2*c] = K[c] & 65535;
+     kk[2*c+1] = ((unsigned)K[c] >> 16) & 65535;
+     jj[2*c] = J[c] & 65535;
+     jj[2*c+1] = ((unsigned)J[c] >> 16) & 65535;
+   }
+
+   res[0] = 0;
+   for (c = 0; c < 4; ++c) {
+     res[c+1] = (res[c] >> 16) & 65535;
+     res[c] &= 65535;
+     for (r = 0; r < c+1; ++r) {
+       res[c] = kk[r] * jj[c-r] + (res[c] & 65535);
+       res[c+1] += (res[c] >> 16) & 65535;
+     }
+   }
+
+   for (c = 0; c < 2; ++c)
+     I[c] = (int)(((res[2*c+1] & 65535) << 16) | (res[2*c] & 65535));
+/*
+ * End of HPL_lmul
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_rand.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_rand.c
new file mode 100644
index 000000000..fe4e12f5e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_rand.c
@@ -0,0 +1,94 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_rand( void )
+#else
+double HPL_rand()
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rand generates  the next number  in the  random  sequence.  This
+ * function  ensures  that this number lies in the interval (-0.5, 0.5].
+ *  
+ * The static array irand contains the information (2 integers) required
+ * to generate the  next number  in the sequence  X(n).  This  number is
+ * computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5,  where the
+ * constant d is the largest 64 bit positive unsigned integer. The array
+ * irand is then  updated  for the generation of the next number  X(n+1)
+ * in  the  random   sequence  as   follows  X(n+1) = a * X(n) + c.  The
+ * constants a and c should have been preliminarily stored in the arrays
+ * ias and ics as 2 pairs of integers.  The initialization of  ias,  ics
+ * and  irand  is performed by the function HPL_setran.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_setran( 3, j );
+/*
+ * return number between -0.5 and 0.5
+ */
+   return( HPL_HALF -
+           (((j[0] & 65535) + ((unsigned)j[0] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF +
+           (j[1] & 65535) + ((unsigned)j[1] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF );
+/*
+ * End of HPL_rand
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_setran.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_setran.c
new file mode 100644
index 000000000..1a3ca73aa
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_setran.c
@@ -0,0 +1,115 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int       ias[2], ics[2], irand[2];
+
+#ifdef STDC_HEADERS
+void HPL_setran
+(
+   const int                        OPTION,
+   int *                            IRAN
+)
+#else
+void HPL_setran
+( OPTION, IRAN )
+   const int                        OPTION;
+   int *                            IRAN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_setran initializes  the random generator with the encoding of the
+ * first number X(0) in the sequence,  and the constants a and c used to
+ * compute the next element in the sequence: X(n+1) = a*X(n) + c.  X(0),
+ * a and c are stored in the static variables  irand, ias and ics.  When
+ * OPTION is 0 (resp. 1 and 2),  irand  (resp. ia and ic)  is set to the
+ * values of the input array IRAN.  When OPTION is 3, IRAN is set to the
+ * current value of irand, and irand is then incremented.
+ *
+ * Arguments
+ * =========
+ *
+ * OPTION  (local input)                 const int
+ *         On entry, OPTION  is an integer that specifies the operations
+ *         to be performed on the random generator as specified above.
+ *
+ * IRAN    (local input/output)          int *
+ *         On entry,  IRAN is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of a random number.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   if(      OPTION == 3 )
+   {                                       /* return current value */
+      IRAN[0] = irand[0]; IRAN[1] = irand[1];
+      HPL_lmul( irand, ias, j );         /* j     = irand * ias;   */
+      HPL_ladd( j, ics, irand );         /* irand = j     + ics;   */
+   } 
+   else if( OPTION == 0 ) { irand[0] = IRAN[0]; irand[1] = IRAN[1]; }
+   else if( OPTION == 1 ) { ias  [0] = IRAN[0]; ias  [1] = IRAN[1]; }
+   else if( OPTION == 2 ) { ics  [0] = IRAN[0]; ics  [1] = IRAN[1]; }
+/*
+ * End of HPL_setran
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_xjumpm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_xjumpm.c
new file mode 100644
index 000000000..ae70bbc16
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_xjumpm.c
@@ -0,0 +1,158 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_xjumpm
+(
+   const int                        JUMPM,
+   int *                            MULT,
+   int *                            IADD,
+   int *                            IRANN,
+   int *                            IRANM,
+   int *                            IAM,
+   int *                            ICM
+)
+#else
+void HPL_xjumpm
+( JUMPM, MULT, IADD, IRANN, IRANM, IAM, ICM )
+   const int                        JUMPM;
+   int *                            MULT;
+   int *                            IADD;
+   int *                            IRANN;
+   int *                            IRANM;
+   int *                            IAM;
+   int *                            ICM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_xjumpm computes  the constants  A and C  to jump JUMPM numbers in
+ * the random sequence: X(n+JUMPM) = A*X(n)+C.  The constants encoded in
+ * MULT and IADD  specify  how to jump from one entry in the sequence to
+ * the next.
+ *
+ * Arguments
+ * =========
+ *
+ * JUMPM   (local input)                 const int
+ *         On entry,  JUMPM  specifies  the  number  of entries  in  the
+ *         sequence to jump over. When JUMPM is less or equal than zero,
+ *         A and C are not computed, IRANM is set to IRANN corresponding
+ *         to a jump of size zero.
+ *
+ * MULT    (local input)                 int *
+ *         On entry, MULT is an array of dimension 2,  that contains the
+ *         16-lower  and 15-higher bits of the constant  a  to jump from
+ *         X(n) to X(n+1) = a*X(n) + c in the random sequence.
+ *
+ * IADD    (local input)                 int *
+ *         On entry, IADD is an array of dimension 2,  that contains the
+ *         16-lower  and 15-higher bits of the constant  c  to jump from
+ *         X(n) to X(n+1) = a*X(n) + c in the random sequence.
+ *
+ * IRANN   (local input)                 int *
+ *         On entry, IRANN is an array of dimension 2. that contains the
+ *         16-lower and 15-higher bits of the encoding of X(n).
+ *
+ * IRANM   (local output)                int *
+ *         On entry,  IRANM  is an array of dimension 2.   On exit, this
+ *         array  contains respectively  the 16-lower and 15-higher bits
+ *         of the encoding of X(n+JUMPM).
+ *
+ * IAM     (local output)                int *
+ *         On entry, IAM is an array of dimension 2. On exit, when JUMPM
+ *         is  greater  than  zero,  this  array  contains  the  encoded
+ *         constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+ *         sequence. IAM(0:1)  contains  respectively  the  16-lower and
+ *         15-higher  bits  of this constant  A. When  JUMPM  is less or
+ *         equal than zero, this array is not referenced.
+ *
+ * ICM     (local output)                int *
+ *         On entry, ICM is an array of dimension 2. On exit, when JUMPM
+ *         is  greater  than  zero,  this  array  contains  the  encoded
+ *         constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+ *         sequence. ICM(0:1)  contains  respectively  the  16-lower and
+ *         15-higher  bits  of this constant  C. When  JUMPM  is less or
+ *         equal than zero, this array is not referenced.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2], k;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( JUMPM > 0 )
+   {
+      IAM[0] = MULT[0]; IAM[1] = MULT[1];   /* IAM   = MULT;          */
+      ICM[0] = IADD[0]; ICM[1] = IADD[1];   /* ICM   = IADD;          */
+      for( k = 1; k <= JUMPM-1; k++ )
+      {
+         HPL_lmul( IAM, MULT, j );          /* j     = IAM   * MULT;  */
+         IAM[0] = j[0]; IAM[1] = j[1];      /* IAM   = j;             */
+         HPL_lmul( ICM, MULT, j );          /* j     = ICM   * MULT;  */
+         HPL_ladd( IADD, j, ICM );          /* ICM   = IADD  + j;     */
+      }
+      HPL_lmul( IRANN, IAM, j );            /* j     = IRANN * IAM;   */
+      HPL_ladd( j, ICM, IRANM );            /* IRANM = j     + ICM;   */
+   }
+   else
+   {                                        /* IRANM = IRANN          */
+      IRANM[0] = IRANN[0]; IRANM[1] = IRANN[1];
+   }
+/*
+ * End of HPL_xjumpm
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/pmatgen/HPL_pdmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/pmatgen/HPL_pdmatgen.c
new file mode 100644
index 000000000..2d129c863
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/pmatgen/HPL_pdmatgen.c
@@ -0,0 +1,198 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdmatgen
+(
+   const HPL_T_grid *               GRID,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   double *                         A,
+   const int                        LDA,
+   const int                        ISEED
+)
+#else
+void HPL_pdmatgen
+( GRID, M, N, NB, A, LDA, ISEED )
+   const HPL_T_grid *               GRID;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   double *                         A;
+   const int                        LDA;
+   const int                        ISEED;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdmatgen generates (or regenerates) a parallel random matrix A.
+ *  
+ * The  pseudo-random  generator uses the linear congruential algorithm:
+ * X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+ * Programming, Knuth 1973, Vol. 2.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * A       (local output)                double *
+ *         On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+ *         On exit, this array contains the coefficients of the randomly
+ *         generated matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * ISEED   (global input)                const int
+ *         On entry, ISEED  specifies  the  seed  number to generate the
+ *         matrix A. ISEED must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        iadd [2], ia1  [2], ia2  [2], ia3  [2],
+                              ia4  [2], ia5  [2], ib1  [2], ib2  [2],
+                              ib3  [2], ic1  [2], ic2  [2], ic3  [2],
+                              ic4  [2], ic5  [2], iran1[2], iran2[2],
+                              iran3[2], iran4[2], itmp1[2], itmp2[2],
+                              itmp3[2], jseed[2], mult [2];
+   int                        ib, iblk, ik, jb, jblk, jk, jump1, jump2,
+                              jump3, jump4, jump5, jump6, jump7, lmb,
+                              lnb, mblks, mp, mycol, myrow, nblks,
+                              npcol, nprow, nq;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+
+   mult [0] = HPL_MULT0; mult [1] = HPL_MULT1;
+   iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1;
+   jseed[0] = ISEED;     jseed[1] = 0;
+/*
+ * Generate an M by N matrix starting in process (0,0)
+ */
+   Mnumroc( mp, M, NB, NB, myrow, 0, nprow );
+   Mnumroc( nq, N, NB, NB, mycol, 0, npcol );
+
+   if( ( mp <= 0 ) || ( nq <= 0 ) ) return;
+/*
+ * Local number of blocks and size of the last one
+ */
+   mblks = ( mp + NB - 1 ) / NB; lmb = mp - ( ( mp - 1 ) / NB ) * NB;
+   nblks = ( nq + NB - 1 ) / NB; lnb = nq - ( ( nq - 1 ) / NB ) * NB;
+/*
+ * Compute multiplier/adder for various jumps in random sequence
+ */
+   jump1 = 1;  jump2 = nprow * NB; jump3 = M; jump4 = npcol * NB;
+   jump5 = NB; jump6 = mycol;      jump7 = myrow * NB;
+
+   HPL_xjumpm( jump1, mult, iadd, jseed, iran1, ia1,   ic1   );
+   HPL_xjumpm( jump2, mult, iadd, iran1, itmp1, ia2,   ic2   );
+   HPL_xjumpm( jump3, mult, iadd, iran1, itmp1, ia3,   ic3   );
+   HPL_xjumpm( jump4, ia3,  ic3,  iran1, itmp1, ia4,   ic4   );
+   HPL_xjumpm( jump5, ia3,  ic3,  iran1, itmp1, ia5,   ic5   );
+   HPL_xjumpm( jump6, ia5,  ic5,  iran1, itmp3, itmp1, itmp2 );
+   HPL_xjumpm( jump7, mult, iadd, itmp3, iran1, itmp1, itmp2 );
+   HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 );
+/*
+ * Save value of first number in sequence
+ */
+   ib1[0] = iran1[0]; ib1[1] = iran1[1];
+   ib2[0] = iran1[0]; ib2[1] = iran1[1];
+   ib3[0] = iran1[0]; ib3[1] = iran1[1];
+
+   for( jblk = 0; jblk < nblks; jblk++ )
+   {
+      jb = ( jblk == nblks - 1 ? lnb : NB );
+      for( jk = 0; jk < jb; jk++ )
+      {
+         for( iblk = 0; iblk < mblks; iblk++ )
+         {
+            ib = ( iblk == mblks - 1 ? lmb : NB );
+            for( ik = 0; ik < ib; A++, ik++ ) *A = HPL_rand();
+            HPL_jumpit( ia2, ic2, ib1, iran2 );
+            ib1[0] = iran2[0]; ib1[1] = iran2[1];
+         }
+         A += LDA - mp;
+         HPL_jumpit( ia3, ic3, ib2, iran3 );
+         ib1[0] = iran3[0]; ib1[1] = iran3[1];
+         ib2[0] = iran3[0]; ib2[1] = iran3[1];
+      }
+      HPL_jumpit( ia4, ic4, ib3, iran4 );
+      ib1[0] = iran4[0]; ib1[1] = iran4[1];
+      ib2[0] = iran4[0]; ib2[1] = iran4[1];
+      ib3[0] = iran4[0]; ib3[1] = iran4[1];
+   }
+/*
+ * End of HPL_pdmatgen
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pddriver.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pddriver.c
new file mode 100644
index 000000000..5e4050f48
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pddriver.c
@@ -0,0 +1,293 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int main
+(
+   int                        ARGC,
+   char                       * * ARGV
+)
+#else
+int main( ARGC, ARGV )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        ARGC;
+/*
+ * .. Array Arguments ..
+ */
+   char                       * * ARGV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * main is the main driver program for testing the HPL routines.
+ * This  program is  driven  by  a short data file named  "HPL.dat".
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        nval  [HPL_MAX_PARAM],
+                              nbval [HPL_MAX_PARAM],
+                              pval  [HPL_MAX_PARAM],
+                              qval  [HPL_MAX_PARAM],
+                              nbmval[HPL_MAX_PARAM],
+                              ndvval[HPL_MAX_PARAM],
+                              ndhval[HPL_MAX_PARAM];
+
+   HPL_T_FACT                 pfaval[HPL_MAX_PARAM],
+                              rfaval[HPL_MAX_PARAM];
+
+   HPL_T_TOP                  topval[HPL_MAX_PARAM];
+
+   HPL_T_grid                 grid;
+   HPL_T_palg                 algo;
+   HPL_T_test                 test;
+   int                        L1notran, Unotran, align, equil, in, inb,
+                              inbm, indh, indv, ipfa, ipq, irfa, itop,
+                              mycol, myrow, ns, nbs, nbms, ndhs, ndvs,
+                              npcol, npfs, npqs, nprow, nrfs, ntps, 
+                              rank, size, tswap;
+   HPL_T_ORDER                pmapping;
+   HPL_T_FACT                 rpfa;
+   HPL_T_SWAP                 fswap;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Init( &ARGC, &ARGV );
+#ifdef HPL_CALL_VSIPL
+   vsip_init((void*)0);
+#endif
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+   MPI_Comm_size( MPI_COMM_WORLD, &size );
+/*
+ * Read and check validity of test parameters from input file
+ *
+ * HPL Version 1.0, Linpack benchmark input file
+ * Your message here
+ * HPL.out      output file name (if any)
+ * 6            device out (6=stdout,7=stderr,file)
+ * 4            # of problems sizes (N)
+ * 29 30 34 35  Ns
+ * 4            # of NBs
+ * 1 2 3 4      NBs
+ * 0            PMAP process mapping (0=Row-,1=Column-major)
+ * 3            # of process grids (P x Q)
+ * 2 1 4        Ps
+ * 2 4 1        Qs
+ * 16.0         threshold
+ * 3            # of panel fact
+ * 0 1 2        PFACTs (0=left, 1=Crout, 2=Right)
+ * 2            # of recursive stopping criterium
+ * 2 4          NBMINs (>= 1)
+ * 1            # of panels in recursion
+ * 2            NDIVs
+ * 3            # of recursive panel fact.
+ * 0 1 2        RFACTs (0=left, 1=Crout, 2=Right)
+ * 1            # of broadcast
+ * 0            BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+ * 1            # of lookahead depth
+ * 0            DEPTHs (>=0)
+ * 2            SWAP (0=bin-exch,1=long,2=mix)
+ * 4            swapping threshold
+ * 0            L1 in (0=transposed,1=no-transposed) form
+ * 0            U  in (0=transposed,1=no-transposed) form
+ * 1            Equilibration (0=no,1=yes)
+ * 8            memory alignment in double (> 0)
+ */
+   HPL_pdinfo( &test, &ns, nval, &nbs, nbval, &pmapping, &npqs, pval, qval,
+               &npfs, pfaval, &nbms, nbmval, &ndvs, ndvval, &nrfs, rfaval,
+               &ntps, topval, &ndhs, ndhval, &fswap, &tswap, &L1notran,
+               &Unotran, &equil, &align );
+/*
+ * Loop over different process grids - Define process grid. Go to bottom
+ * of process grid loop if this case does not use my process.
+ */
+   for( ipq = 0; ipq < npqs; ipq++ )
+   {
+      (void) HPL_grid_init( MPI_COMM_WORLD, pmapping, pval[ipq], qval[ipq],
+                            &grid );
+      (void) HPL_grid_info( &grid, &nprow, &npcol, &myrow, &mycol );
+
+      if( ( myrow < 0 ) || ( myrow >= nprow ) ||
+          ( mycol < 0 ) || ( mycol >= npcol ) ) goto label_end_of_npqs;
+
+      for( in = 0; in < ns; in++ )
+      {                            /* Loop over various problem sizes */
+       for( inb = 0; inb < nbs; inb++ )
+       {                        /* Loop over various blocking factors */
+        for( indh = 0; indh < ndhs; indh++ )
+        {                       /* Loop over various lookahead depths */
+         for( itop = 0; itop < ntps; itop++ )
+         {                  /* Loop over various broadcast topologies */
+          for( irfa = 0; irfa < nrfs; irfa++ )
+          {             /* Loop over various recursive factorizations */
+           for( ipfa = 0; ipfa < npfs; ipfa++ )
+           {                /* Loop over various panel factorizations */
+            for( inbm = 0; inbm < nbms; inbm++ )
+            {        /* Loop over various recursive stopping criteria */
+             for( indv = 0; indv < ndvs; indv++ )
+             {          /* Loop over various # of panels in recursion */
+/*
+ * Set up the algorithm parameters
+ */
+              algo.btopo = topval[itop]; algo.depth = ndhval[indh];
+              algo.nbmin = nbmval[inbm]; algo.nbdiv = ndvval[indv];
+
+              algo.pfact = rpfa = pfaval[ipfa];
+
+              if( L1notran != 0 )
+              {
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllN;
+                 else if( rpfa == HPL_CROUT   ) algo.pffun = HPL_pdpancrN;
+                 else                           algo.pffun = HPL_pdpanrlN;
+
+                 algo.rfact = rpfa = rfaval[irfa];
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllN;
+                 else if( rpfa == HPL_CROUT   ) algo.rffun = HPL_pdrpancrN;
+                 else                           algo.rffun = HPL_pdrpanrlN;
+
+                 if( Unotran != 0 ) algo.upfun = HPL_pdupdateNN;
+                 else               algo.upfun = HPL_pdupdateNT;
+              }
+              else
+              {
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllT;
+                 else if( rpfa == HPL_CROUT   ) algo.pffun = HPL_pdpancrT;
+                 else                           algo.pffun = HPL_pdpanrlT;
+
+                 algo.rfact = rpfa = rfaval[irfa];
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllT;
+                 else if( rpfa == HPL_CROUT   ) algo.rffun = HPL_pdrpancrT;
+                 else                           algo.rffun = HPL_pdrpanrlT;
+
+                 if( Unotran != 0 ) algo.upfun = HPL_pdupdateTN;
+                 else               algo.upfun = HPL_pdupdateTT;
+              }
+
+              algo.fswap = fswap; algo.fsthr = tswap;
+              algo.equil = equil; algo.align = align;
+
+              HPL_pdtest( &test, &grid, &algo, nval[in], nbval[inb] );
+
+             }
+            }
+           }
+          }
+         }
+        }
+       }
+      }
+      (void) HPL_grid_exit( &grid );
+label_end_of_npqs: ;
+   }
+/*
+ * Print ending messages, close output file, exit.
+ */
+   if( rank == 0 )
+   {
+      test.ktest = test.kpass + test.kfail + test.kskip;
+#ifndef HPL_DETAILED_TIMING
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+#else
+      if( test.thrsh > HPL_rzero )
+         HPL_fprintf( test.outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+#endif
+
+      HPL_fprintf( test.outfp, "\n%s %6d %s\n", "Finished", test.ktest,
+                   "tests with the following results:" );
+      if( test.thrsh > HPL_rzero )
+      {
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kpass,
+                      "tests completed and passed residual checks," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kfail,
+                      "tests completed and failed residual checks," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kskip,
+                      "tests skipped because of illegal input values." );
+      }
+      else
+      {
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kpass,
+                      "tests completed without checking," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kskip,
+                      "tests skipped because of illegal input values." );
+      }
+
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "----------------------------------------",
+                   "----------------------------------------" );
+      HPL_fprintf( test.outfp, "\nEnd of Tests.\n" );
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+
+      if( ( test.outfp != stdout ) && ( test.outfp != stderr ) )
+         (void) fclose( test.outfp );
+   }
+#ifdef HPL_CALL_VSIPL
+   vsip_finalize((void*)0);
+#endif
+   MPI_Finalize();
+   exit( 0 );
+
+   return( 0 );
+/*
+ * End of main
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pdinfo.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pdinfo.c
new file mode 100644
index 000000000..4ede45be6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pdinfo.c
@@ -0,0 +1,1182 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdinfo
+(
+   HPL_T_test *                     TEST,
+   int *                            NS,
+   int *                            N,
+   int *                            NBS,
+   int *                            NB,
+   HPL_T_ORDER *                    PMAPPIN,
+   int *                            NPQS,
+   int *                            P,
+   int *                            Q,
+   int *                            NPFS,
+   HPL_T_FACT *                     PF,
+   int *                            NBMS,
+   int *                            NBM,
+   int *                            NDVS,
+   int *                            NDV,
+   int *                            NRFS,
+   HPL_T_FACT *                     RF,
+   int *                            NTPS,
+   HPL_T_TOP *                      TP,
+   int *                            NDHS,
+   int *                            DH,
+   HPL_T_SWAP *                     FSWAP,
+   int *                            TSWAP,
+   int *                            L1NOTRAN,
+   int *                            UNOTRAN,
+   int *                            EQUIL,
+   int *                            ALIGN
+)
+#else
+void HPL_pdinfo
+( TEST, NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN )
+   HPL_T_test *                     TEST;
+   int *                            NS;
+   int *                            N;
+   int *                            NBS;
+   int *                            NB;
+   HPL_T_ORDER *                    PMAPPIN;
+   int *                            NPQS;
+   int *                            P;
+   int *                            Q;
+   int *                            NPFS;
+   HPL_T_FACT *                     PF;
+   int *                            NBMS;
+   int *                            NBM;
+   int *                            NDVS;
+   int *                            NDV;
+   int *                            NRFS;
+   HPL_T_FACT *                     RF;
+   int *                            NTPS;
+   HPL_T_TOP *                      TP;
+   int *                            NDHS;
+   int *                            DH;
+   HPL_T_SWAP *                     FSWAP;
+   int *                            TSWAP;
+   int *                            L1NOTRAN;
+   int *                            UNOTRAN;
+   int *                            EQUIL;
+   int *                            ALIGN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdinfo reads  the  startup  information for the various tests and
+ * transmits it to all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * TEST    (global output)               HPL_T_test *
+ *         On entry, TEST  points to a testing data structure.  On exit,
+ *         the fields of this data structure are initialized as follows:
+ *         TEST->outfp  specifies the output file where the results will
+ *         be printed.  It is only defined and used by  the process 0 of
+ *         the grid.  TEST->thrsh specifies the threshhold value for the
+ *         test ratio.  TEST->epsil is the relative machine precision of
+ *         the distributed computer.  Finally  the test counters, kfail,
+ *         kpass, kskip, ktest are initialized to zero.
+ *
+ * NS      (global output)               int *
+ *         On exit,  NS  specifies the number of different problem sizes
+ *         to be tested. NS is less than or equal to HPL_MAX_PARAM.
+ *
+ * N       (global output)               int *
+ *         On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+ *         the first NS entries of this array contain the  problem sizes
+ *         to run the code with.
+ *
+ * NBS     (global output)               int *
+ *         On exit,  NBS  specifies the number of different distribution
+ *         blocking factors to be tested. NBS must be less than or equal
+ *         to HPL_MAX_PARAM.
+ *
+ * NB      (global output)               int *
+ *         On exit,  PMAPPIN  specifies the process mapping onto the no-
+ *         des of the  MPI machine configuration.  PMAPPIN  defaults  to
+ *         row-major ordering.
+ *
+ * PMAPPIN (global output)               HPL_T_ORDER *
+ *         On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NBS entries of this array contain the values of the
+ *         various distribution blocking factors, to run the code with.
+ *
+ * NPQS    (global output)               int *
+ *         On exit, NPQS  specifies the  number of different values that
+ *         can be used for P and Q, i.e., the number of process grids to
+ *         run  the  code with.  NPQS must be  less  than  or  equal  to
+ *         HPL_MAX_PARAM.
+ *
+ * P       (global output)               int *
+ *         On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NPQS entries of this array contain the values of P,
+ *         the number of process rows of the  NPQS grids to run the code
+ *         with.
+ *
+ * Q       (global output)               int *
+ *         On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NPQS entries of this array contain the values of Q,
+ *         the number of process columns of the  NPQS  grids to  run the
+ *         code with.
+ *
+ * NPFS    (global output)               int *
+ *         On exit, NPFS  specifies the  number of different values that
+ *         can be used for PF : the panel factorization algorithm to run
+ *         the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+ *
+ * PF      (global output)               HPL_T_FACT *
+ *         On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first  NPFS  entries  of this array  contain  the various
+ *         panel factorization algorithms to run the code with.
+ *
+ * NBMS    (global output)               int *
+ *         On exit,  NBMS  specifies  the  number  of  various recursive
+ *         stopping criteria  to be tested.  NBMS  must be  less than or
+ *         equal to HPL_MAX_PARAM.
+ *
+ * NBM     (global output)               int *
+ *         On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NBMS entries of this array contain the values
+ *         of the various recursive stopping criteria to be tested.
+ *
+ * NDVS    (global output)               int *
+ *         On exit,  NDVS  specifies  the number  of various numbers  of
+ *         panels in recursion to be tested.  NDVS is less than or equal
+ *         to HPL_MAX_PARAM.
+ *
+ * NDV     (global output)               int *
+ *         On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NDVS entries of this array contain the values
+ *         of the various numbers of panels in recursion to be tested.
+ *
+ * NRFS    (global output)               int *
+ *         On exit, NRFS  specifies the  number of different values that
+ *         can be used for RF : the recursive factorization algorithm to
+ *         be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+ *
+ * RF      (global output)               HPL_T_FACT *
+ *         On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first  NRFS  entries  of  this array contain  the various
+ *         recursive factorization algorithms to run the code with.
+ *
+ * NTPS    (global output)               int *
+ *         On exit, NTPS  specifies the  number of different values that
+ *         can be used for the  broadcast topologies  to be tested. NTPS
+ *         is less than or equal to HPL_MAX_PARAM.
+ *
+ * TP      (global output)               HPL_T_TOP *
+ *         On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the  first NTPS  entries of this  array  contain  the various
+ *         broadcast (along rows) topologies to run the code with.
+ *
+ * NDHS    (global output)               int *
+ *         On exit, NDHS  specifies the  number of different values that
+ *         can be used for the  lookahead depths to be  tested.  NDHS is
+ *         less than or equal to HPL_MAX_PARAM.
+ *
+ * DH      (global output)               int *
+ *         On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NDHS entries of this array contain the values
+ *         of lookahead depths to run the code with.  Such a value is at
+ *         least 0 (no-lookahead) or greater than zero.
+ *
+ * FSWAP   (global output)               HPL_T_SWAP *
+ *         On exit, FSWAP specifies the swapping algorithm to be used in
+ *         all tests.
+ *
+ * TSWAP   (global output)               int *
+ *         On exit,  TSWAP  specifies the swapping threshold as a number
+ *         of columns when the mixed swapping algorithm was chosen.
+ *
+ * L1NOTRA (global output)               int *
+ *         On exit, L1NOTRAN specifies whether the upper triangle of the
+ *         panels of columns  should  be stored  in  no-transposed  form
+ *         (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+ *
+ * UNOTRAN (global output)               int *
+ *         On exit, UNOTRAN  specifies whether the panels of rows should
+ *         be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+ *         form (UNOTRAN=0) during their broadcast.
+ *
+ * EQUIL   (global output)               int *
+ *         On exit,  EQUIL  specifies  whether  equilibration during the
+ *         swap-broadcast  of  the  panel of rows  should  be  performed
+ *         (EQUIL=1) or not (EQUIL=0).
+ *
+ * ALIGN   (global output)               int *
+ *         On exit,  ALIGN  specifies the alignment  of  the dynamically
+ *         allocated buffers in double precision words. ALIGN is greater
+ *         than zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   char                       file[HPL_LINE_MAX], line[HPL_LINE_MAX],
+                              auth[HPL_LINE_MAX], num [HPL_LINE_MAX];
+   FILE                       * infp;
+   int                        * iwork = NULL;
+   char                       * lineptr;
+   int                        error=0, fid, i, j, lwork, maxp, nprocs,
+                              rank, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+   MPI_Comm_size( MPI_COMM_WORLD, &size );
+/*
+ * Initialize the TEST data structure with default values
+ */
+   TEST->outfp = stderr; TEST->epsil = 2.0e-16; TEST->thrsh = 16.0;
+   TEST->kfail = TEST->kpass = TEST->kskip = TEST->ktest = 0;
+/*
+ * Process 0 reads the input data, broadcasts to other processes and
+ * writes needed information to TEST->outfp.
+ */
+   if( rank == 0 )
+   {
+/*
+ * Open file and skip data file header
+ */
+      if( ( infp = fopen( "HPL.dat", "r" ) ) == NULL )
+      { 
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "cannot open file HPL.dat" );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) fgets( auth, HPL_LINE_MAX - 2, infp );
+/*
+ * Read name and unit number for summary output file
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", file );
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num  );
+      fid = atoi( num );
+      if     ( fid == 6 ) TEST->outfp = stdout;
+      else if( fid == 7 ) TEST->outfp = stderr;
+      else if( ( TEST->outfp = fopen( file, "w" ) ) == NULL )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "cannot open file %s.",
+                    file );
+         error = 1; goto label_error;
+      }
+/*
+ * Read and check the parameter values for the tests.
+ *
+ * Problem size (>=0) (N)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); 
+      (void) sscanf( line, "%s", num ); *NS = atoi( num );
+      if( ( *NS < 1 ) || ( *NS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %d",
+                    "Number of values of N is less than 1 or greater than",
+                    HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( N[ i ] = atoi( num ) ) < 0 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of N less than 0" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Block size (>=1) (NB)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NBS = atoi( num );
+      if( ( *NBS < 1 ) || ( *NBS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NB is less than 1 or",
+                    "greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NBS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NB[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", 
+                       "Value of NB less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Process grids, mapping, (>=1) (P, Q)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num );
+      *PMAPPIN = ( atoi( num ) == 1 ? HPL_COLUMN_MAJOR : HPL_ROW_MAJOR );
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NPQS = atoi( num );
+      if( ( *NPQS < 1 ) || ( *NPQS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of grids is less",
+                    "than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPQS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( P[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of P less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPQS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( Q[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of Q less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Check for enough processes in machine configuration
+ */
+      maxp = 0;
+      for( i = 0; i < *NPQS; i++ )
+      { nprocs   = P[i] * Q[i]; maxp = Mmax( maxp, nprocs ); }
+      if( maxp > size )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "Need at least %d processes for these tests", maxp );
+         error = 1; goto label_error;
+      }
+/*
+ * Checking threshold value (TEST->thrsh)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); TEST->thrsh = atof( num );
+/*
+ * Panel factorization algorithm (PF)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NPFS = atoi( num );
+      if( ( *NPFS < 1 ) || ( *NPFS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "number of values of PFACT",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPFS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) PF[ i ] = HPL_LEFT_LOOKING;
+         else if( j == 1 ) PF[ i ] = HPL_CROUT;
+         else if( j == 2 ) PF[ i ] = HPL_RIGHT_LOOKING;
+         else              PF[ i ] = HPL_RIGHT_LOOKING;
+      }
+/*
+ * Recursive stopping criterium (>=1) (NBM)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NBMS = atoi( num );
+      if( ( *NBMS < 1 ) || ( *NBMS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NBMIN",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NBMS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NBM[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of NBMIN less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Number of panels in recursion (>=2) (NDV)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NDVS = atoi( num );
+      if( ( *NDVS < 1 ) || ( *NDVS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NDIV",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NDVS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NDV[ i ] = atoi( num ) ) < 2 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of NDIV less than 2" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Recursive panel factorization (RF)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NRFS = atoi( num );
+      if( ( *NRFS < 1 ) || ( *NRFS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of RFACT",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NRFS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) RF[ i ] = HPL_LEFT_LOOKING;
+         else if( j == 1 ) RF[ i ] = HPL_CROUT;
+         else if( j == 2 ) RF[ i ] = HPL_RIGHT_LOOKING;
+         else              RF[ i ] = HPL_RIGHT_LOOKING;
+      }
+/*
+ * Broadcast topology (TP) (0=rg, 1=2rg, 2=rgM, 3=2rgM, 4=L)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NTPS = atoi( num );
+      if( ( *NTPS < 1 ) || ( *NTPS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of BCAST",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NTPS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) TP[ i ] = HPL_1RING;
+         else if( j == 1 ) TP[ i ] = HPL_1RING_M;
+         else if( j == 2 ) TP[ i ] = HPL_2RING;
+         else if( j == 3 ) TP[ i ] = HPL_2RING_M;
+         else if( j == 4 ) TP[ i ] = HPL_BLONG;
+         else if( j == 5 ) TP[ i ] = HPL_BLONG_M;
+         else              TP[ i ] = HPL_1RING_M;
+      }
+/*
+ * Lookahead depth (>=0) (NDH)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NDHS = atoi( num );
+      if( ( *NDHS < 1 ) || ( *NDHS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of DEPTH",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NDHS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num );
+         lineptr += strlen( num ) + 1;
+         if( ( DH[ i ] = atoi( num ) ) < 0 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of DEPTH less than 0" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Swapping algorithm (0,1 or 2) (FSWAP)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); j = atoi( num );
+      if(      j == 0 ) *FSWAP = HPL_SWAP00;
+      else if( j == 1 ) *FSWAP = HPL_SWAP01;
+      else if( j == 2 ) *FSWAP = HPL_SW_MIX;
+      else              *FSWAP = HPL_SWAP01;
+/*
+ * Swapping threshold (>=0) (TSWAP)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *TSWAP = atoi( num );
+      if( *TSWAP <= 0 ) *TSWAP = 0;
+/*
+ * L1 in (no-)transposed form (0 or 1)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *L1NOTRAN = atoi( num );
+      if( ( *L1NOTRAN != 0 ) && ( *L1NOTRAN != 1 ) ) *L1NOTRAN = 0; 
+/*
+ * U  in (no-)transposed form (0 or 1)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *UNOTRAN = atoi( num );
+      if( ( *UNOTRAN != 0 ) && ( *UNOTRAN != 1 ) ) *UNOTRAN = 0;
+/*
+ * Equilibration (0=no, 1=yes)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *EQUIL = atoi( num );
+      if( ( *EQUIL != 0 ) && ( *EQUIL != 1 ) ) *EQUIL = 1;
+/*
+ * Memory alignment in bytes (> 0) (ALIGN)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *ALIGN = atoi( num );
+      if( *ALIGN <= 0 ) *ALIGN = 4;
+/*
+ * Close input file
+ */
+label_error:
+      if (infp != NULL)
+      	(void) fclose( infp );
+   }
+   else { TEST->outfp = NULL; }
+/*
+ * Check for error on reading input file
+ */
+   (void) HPL_all_reduce( (void *)(&error), 1, HPL_INT, HPL_max,
+                          MPI_COMM_WORLD );
+   if( error )
+   {
+      if( rank == 0 )
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "Illegal input in file HPL.dat. Exiting ..." );
+      MPI_Finalize();
+#ifdef HPL_CALL_VSIPL
+      (void) vsip_finalize( NULL );
+#endif
+      exit( 1 );
+   }
+/*
+ * Compute and broadcast machine epsilon
+ */
+   TEST->epsil = HPL_pdlamch( MPI_COMM_WORLD, HPL_MACH_EPS );
+/*
+ * Pack information arrays and broadcast
+ */
+   (void) HPL_broadcast( (void *)(&(TEST->thrsh)), 1, HPL_DOUBLE, 0,
+                         MPI_COMM_WORLD );
+/*
+ * Broadcast array sizes
+ */
+   iwork = (int *)malloc( (size_t)(15) * sizeof( int ) );
+   if( rank == 0 )
+   {
+      iwork[ 0] = *NS;      iwork[ 1] = *NBS;
+      iwork[ 2] = ( *PMAPPIN == HPL_ROW_MAJOR ? 0 : 1 );
+      iwork[ 3] = *NPQS;    iwork[ 4] = *NPFS;     iwork[ 5] = *NBMS;
+      iwork[ 6] = *NDVS;    iwork[ 7] = *NRFS;     iwork[ 8] = *NTPS;
+      iwork[ 9] = *NDHS;    iwork[10] = *TSWAP;    iwork[11] = *L1NOTRAN;
+      iwork[12] = *UNOTRAN; iwork[13] = *EQUIL;    iwork[14] = *ALIGN;
+   }
+   (void) HPL_broadcast( (void *)iwork, 15, HPL_INT, 0, MPI_COMM_WORLD );
+   if( rank != 0 )
+   {
+      *NS       = iwork[ 0]; *NBS   = iwork[ 1];
+      *PMAPPIN  = ( iwork[ 2] == 0 ?  HPL_ROW_MAJOR : HPL_COLUMN_MAJOR );
+      *NPQS     = iwork[ 3]; *NPFS  = iwork[ 4]; *NBMS     = iwork[ 5];
+      *NDVS     = iwork[ 6]; *NRFS  = iwork[ 7]; *NTPS     = iwork[ 8];
+      *NDHS     = iwork[ 9]; *TSWAP = iwork[10]; *L1NOTRAN = iwork[11];
+      *UNOTRAN  = iwork[12]; *EQUIL = iwork[13]; *ALIGN    = iwork[14];
+   }
+   if( iwork ) free( iwork );
+/*
+ * Pack information arrays and broadcast
+ */
+   lwork = (*NS) + (*NBS) + 2 * (*NPQS) + (*NPFS) + (*NBMS) + 
+           (*NDVS) + (*NRFS) + (*NTPS) + (*NDHS) + 1;
+   
+   if (lwork < 0)
+	exit(EXIT_FAILURE); 
+
+
+   iwork = (int *)malloc( (size_t)(lwork) * sizeof( int ) );
+   if( rank == 0 )
+   {
+      j = 0;
+      for( i = 0; i < *NS;   i++ ) { iwork[j] = N [i]; j++; }
+      for( i = 0; i < *NBS;  i++ ) { iwork[j] = NB[i]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { iwork[j] = P [i]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { iwork[j] = Q [i]; j++; }
+      for( i = 0; i < *NPFS; i++ )
+      {
+         if(      PF[i] == HPL_LEFT_LOOKING  ) iwork[j] = 0;
+         else if( PF[i] == HPL_CROUT         ) iwork[j] = 1;
+         else if( PF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2;
+         j++;
+      }
+      for( i = 0; i < *NBMS; i++ ) { iwork[j] = NBM[i]; j++; }
+      for( i = 0; i < *NDVS; i++ ) { iwork[j] = NDV[i]; j++; }
+      for( i = 0; i < *NRFS; i++ )
+      {
+         if(      RF[i] == HPL_LEFT_LOOKING  ) iwork[j] = 0;
+         else if( RF[i] == HPL_CROUT         ) iwork[j] = 1;
+         else if( RF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2;
+         j++;
+      }
+      for( i = 0; i < *NTPS; i++ )
+      {
+         if(      TP[i] == HPL_1RING   ) iwork[j] = 0;
+         else if( TP[i] == HPL_1RING_M ) iwork[j] = 1;
+         else if( TP[i] == HPL_2RING   ) iwork[j] = 2;
+         else if( TP[i] == HPL_2RING_M ) iwork[j] = 3;
+         else if( TP[i] == HPL_BLONG   ) iwork[j] = 4;
+         else if( TP[i] == HPL_BLONG_M ) iwork[j] = 5;
+         j++;
+      }
+      for( i = 0; i < *NDHS; i++ ) { iwork[j] = DH[i]; j++; }
+
+      if(      *FSWAP == HPL_SWAP00 ) iwork[j] = 0;
+      else if( *FSWAP == HPL_SWAP01 ) iwork[j] = 1;
+      else if( *FSWAP == HPL_SW_MIX ) iwork[j] = 2;
+      j++;
+   }
+   (void) HPL_broadcast( (void*)iwork, lwork, HPL_INT, 0,
+                         MPI_COMM_WORLD );
+   if ((rank != 0) && (iwork != NULL))
+   {
+      j = 0;
+      for( i = 0; i < *NS;   i++ ) { N [i] = iwork[j]; j++; }
+      for( i = 0; i < *NBS;  i++ ) { NB[i] = iwork[j]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { P [i] = iwork[j]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { Q [i] = iwork[j]; j++; }
+
+      for( i = 0; i < *NPFS; i++ )
+      {
+         if(      iwork[j] == 0 ) PF[i] = HPL_LEFT_LOOKING;
+         else if( iwork[j] == 1 ) PF[i] = HPL_CROUT;
+         else if( iwork[j] == 2 ) PF[i] = HPL_RIGHT_LOOKING;
+         j++;
+      }
+      for( i = 0; i < *NBMS; i++ ) { NBM[i] = iwork[j]; j++; }
+      for( i = 0; i < *NDVS; i++ ) { NDV[i] = iwork[j]; j++; }
+      for( i = 0; i < *NRFS; i++ )
+      {
+         if(      iwork[j] == 0 ) RF[i] = HPL_LEFT_LOOKING;
+         else if( iwork[j] == 1 ) RF[i] = HPL_CROUT;
+         else if( iwork[j] == 2 ) RF[i] = HPL_RIGHT_LOOKING;
+         j++;
+      }
+      for( i = 0; i < *NTPS; i++ )
+      {
+         if(      iwork[j] == 0 ) TP[i] = HPL_1RING;
+         else if( iwork[j] == 1 ) TP[i] = HPL_1RING_M;
+         else if( iwork[j] == 2 ) TP[i] = HPL_2RING;
+         else if( iwork[j] == 3 ) TP[i] = HPL_2RING_M;
+         else if( iwork[j] == 4 ) TP[i] = HPL_BLONG;
+         else if( iwork[j] == 5 ) TP[i] = HPL_BLONG_M;
+         j++;
+      }
+      for( i = 0; i < *NDHS; i++ ) { DH[i] = iwork[j]; j++; }
+
+      if(      iwork[j] == 0 ) *FSWAP = HPL_SWAP00;
+      else if( iwork[j] == 1 ) *FSWAP = HPL_SWAP01;
+      else if( iwork[j] == 2 ) *FSWAP = HPL_SW_MIX;
+      j++;
+   
+      if( iwork ) free( iwork );
+   }
+/*
+ * regurgitate input
+ */
+   if( rank == 0 )
+   {
+      
+      if (TEST->outfp != NULL){
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "HPLinpack 2.3  --  High-Performance Linpack benchmark  --  ",
+          " December 2, 2018" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Written by A. Petitet and R. Clint Whaley,  ",
+          "Innovative Computing Laboratory, UTK" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Modified by Piotr Luszczek, ",
+          "Innovative Computing Laboratory, UTK" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Modified by Julien Langou, ",
+          "University of Colorado Denver");
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+
+      HPL_fprintf( TEST->outfp, "\n%s\n",
+          "An explanation of the input/output parameters follows:" );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "T/V    : Wall time / encoded variant." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+         "N      : The order of the coefficient matrix A." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "NB     : The partitioning blocking factor." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "P      : The number of process rows." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "Q      : The number of process columns." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+         "Time   : Time in seconds to solve the linear system." );
+      HPL_fprintf( TEST->outfp, "%s\n\n",
+         "Gflops : Rate of execution for solving the linear system." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "The following parameter values will be used:" );
+/*
+ * Problem size
+ */
+      HPL_fprintf( TEST->outfp,       "\nN      :" );
+      for( i = 0; i < Mmin( 8, *NS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", N[i]  );
+      if( *NS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", N[i]  );
+         if( *NS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", N[i]  );
+         }
+      }
+/*
+ * Distribution blocking factor
+ */
+      HPL_fprintf( TEST->outfp,       "\nNB     :" );
+      for( i = 0; i < Mmin( 8, *NBS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NB[i] );
+      if( *NBS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NBS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NB[i] );
+         if( *NBS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NBS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NB[i] );
+         }
+      }
+/*
+ * Process mapping
+ */
+      HPL_fprintf( TEST->outfp,       "\nPMAP   :" );
+      if(      *PMAPPIN == HPL_ROW_MAJOR    )
+         HPL_fprintf( TEST->outfp, " Row-major process mapping" );
+      else if( *PMAPPIN == HPL_COLUMN_MAJOR )
+         HPL_fprintf( TEST->outfp, " Column-major process mapping" );
+/*
+ * Process grid
+ */
+      HPL_fprintf( TEST->outfp,       "\nP      :" );
+      for( i = 0; i < Mmin( 8, *NPQS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", P[i]  );
+      if( *NPQS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPQS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", P[i]  );
+         if( *NPQS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPQS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", P[i]  );
+         }
+      }
+      HPL_fprintf( TEST->outfp,       "\nQ      :" );
+      for( i = 0; i < Mmin( 8, *NPQS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", Q[i]  );
+      if( *NPQS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPQS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", Q[i]  );
+         if( *NPQS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPQS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", Q[i]  );
+         }
+      }
+/*
+ * Panel Factorization
+ */
+      HPL_fprintf( TEST->outfp,       "\nPFACT  :" );
+      for( i = 0; i < Mmin( 8, *NPFS ); i++ )
+      {
+         if(      PF[i] == HPL_LEFT_LOOKING  )
+            HPL_fprintf( TEST->outfp,       "    Left " );
+         else if( PF[i] == HPL_CROUT         )
+            HPL_fprintf( TEST->outfp,       "   Crout " );
+         else if( PF[i] == HPL_RIGHT_LOOKING )
+            HPL_fprintf( TEST->outfp,       "   Right " );
+      }
+      if( *NPFS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPFS ); i++ )
+         {
+            if(      PF[i] == HPL_LEFT_LOOKING  )
+               HPL_fprintf( TEST->outfp,       "    Left " );
+            else if( PF[i] == HPL_CROUT         )
+               HPL_fprintf( TEST->outfp,       "   Crout " );
+            else if( PF[i] == HPL_RIGHT_LOOKING )
+               HPL_fprintf( TEST->outfp,       "   Right " );
+         }
+         if( *NPFS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPFS; i++ )
+            {
+               if(      PF[i] == HPL_LEFT_LOOKING  )
+                  HPL_fprintf( TEST->outfp,       "    Left " );
+               else if( PF[i] == HPL_CROUT         )
+                  HPL_fprintf( TEST->outfp,       "   Crout " );
+               else if( PF[i] == HPL_RIGHT_LOOKING )
+                  HPL_fprintf( TEST->outfp,       "   Right " );
+            }
+         }
+      }
+/*
+ * Recursive stopping criterium
+ */
+      HPL_fprintf( TEST->outfp,       "\nNBMIN  :" );
+      for( i = 0; i < Mmin( 8, *NBMS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NBM[i]  );
+      if( *NBMS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NBMS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NBM[i]  );
+         if( *NBMS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NBMS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NBM[i]  );
+         }
+      }
+/*
+ * Number of panels in recursion
+ */
+      HPL_fprintf( TEST->outfp,       "\nNDIV   :" );
+      for( i = 0; i < Mmin( 8, *NDVS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NDV[i]  );
+      if( *NDVS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NDVS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NDV[i]  );
+         if( *NDVS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NDVS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NDV[i]  );
+         }
+      }
+/*
+ * Recursive Factorization
+ */
+      HPL_fprintf( TEST->outfp,       "\nRFACT  :" );
+      for( i = 0; i < Mmin( 8, *NRFS ); i++ )
+      {
+         if(      RF[i] == HPL_LEFT_LOOKING  )
+            HPL_fprintf( TEST->outfp,       "    Left " );
+         else if( RF[i] == HPL_CROUT         )
+            HPL_fprintf( TEST->outfp,       "   Crout " );
+         else if( RF[i] == HPL_RIGHT_LOOKING )
+            HPL_fprintf( TEST->outfp,       "   Right " );
+      }
+      if( *NRFS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NRFS ); i++ )
+         {
+            if(      RF[i] == HPL_LEFT_LOOKING  )
+               HPL_fprintf( TEST->outfp,       "    Left " );
+            else if( RF[i] == HPL_CROUT         )
+               HPL_fprintf( TEST->outfp,       "   Crout " );
+            else if( RF[i] == HPL_RIGHT_LOOKING )
+               HPL_fprintf( TEST->outfp,       "   Right " );
+         }
+         if( *NRFS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NRFS; i++ )
+            {
+               if(      RF[i] == HPL_LEFT_LOOKING  )
+                  HPL_fprintf( TEST->outfp,       "    Left " );
+               else if( RF[i] == HPL_CROUT         )
+                  HPL_fprintf( TEST->outfp,       "   Crout " );
+               else if( RF[i] == HPL_RIGHT_LOOKING )
+                  HPL_fprintf( TEST->outfp,       "   Right " );
+            }
+         }
+      }
+/*
+ * Broadcast topology
+ */
+      HPL_fprintf( TEST->outfp,       "\nBCAST  :" );
+      for( i = 0; i < Mmin( 8, *NTPS ); i++ )
+      {
+         if(      TP[i] == HPL_1RING   )
+            HPL_fprintf( TEST->outfp,       "   1ring " );
+         else if( TP[i] == HPL_1RING_M )
+            HPL_fprintf( TEST->outfp,       "  1ringM " );
+         else if( TP[i] == HPL_2RING   )
+            HPL_fprintf( TEST->outfp,       "   2ring " );
+         else if( TP[i] == HPL_2RING_M )
+            HPL_fprintf( TEST->outfp,       "  2ringM " );
+         else if( TP[i] == HPL_BLONG   )
+            HPL_fprintf( TEST->outfp,       "   Blong " );
+         else if( TP[i] == HPL_BLONG_M )
+            HPL_fprintf( TEST->outfp,       "  BlongM " );
+      }
+      if( *NTPS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NTPS ); i++ )
+         {
+            if(      TP[i] == HPL_1RING   )
+               HPL_fprintf( TEST->outfp,       "   1ring " );
+            else if( TP[i] == HPL_1RING_M )
+               HPL_fprintf( TEST->outfp,       "  1ringM " );
+            else if( TP[i] == HPL_2RING   )
+               HPL_fprintf( TEST->outfp,       "   2ring " );
+            else if( TP[i] == HPL_2RING_M )
+               HPL_fprintf( TEST->outfp,       "  2ringM " );
+            else if( TP[i] == HPL_BLONG   )
+               HPL_fprintf( TEST->outfp,       "   Blong " );
+            else if( TP[i] == HPL_BLONG_M )
+               HPL_fprintf( TEST->outfp,       "  BlongM " );
+         }
+         if( *NTPS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NTPS; i++ )
+            {
+               if(      TP[i] == HPL_1RING   )
+                  HPL_fprintf( TEST->outfp,       "   1ring " );
+               else if( TP[i] == HPL_1RING_M )
+                  HPL_fprintf( TEST->outfp,       "  1ringM " );
+               else if( TP[i] == HPL_2RING   )
+                  HPL_fprintf( TEST->outfp,       "   2ring " );
+               else if( TP[i] == HPL_2RING_M )
+                  HPL_fprintf( TEST->outfp,       "  2ringM " );
+               else if( TP[i] == HPL_BLONG   )
+                  HPL_fprintf( TEST->outfp,       "   Blong " );
+               else if( TP[i] == HPL_BLONG_M )
+                  HPL_fprintf( TEST->outfp,       "  BlongM " );
+            }
+         }
+      }
+/*
+ * Lookahead depths
+ */
+      HPL_fprintf( TEST->outfp,       "\nDEPTH  :" );
+      for( i = 0; i < Mmin( 8, *NDHS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", DH[i]  );
+      if( *NDHS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NDHS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", DH[i]  );
+         if( *NDHS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NDHS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", DH[i]  );
+         }
+      }
+/*
+ * Swapping algorithm
+ */
+      HPL_fprintf( TEST->outfp,       "\nSWAP   :" );
+      if(      *FSWAP == HPL_SWAP00 )
+         HPL_fprintf( TEST->outfp, " Binary-exchange" );
+      else if( *FSWAP == HPL_SWAP01 )
+         HPL_fprintf( TEST->outfp, " Spread-roll (long)" );
+      else if( *FSWAP == HPL_SW_MIX )
+         HPL_fprintf( TEST->outfp, " Mix (threshold = %d)", *TSWAP );
+/*
+ * L1 storage form
+ */
+      HPL_fprintf( TEST->outfp,       "\nL1     :" );
+      if(      *L1NOTRAN != 0 )
+         HPL_fprintf( TEST->outfp, " no-transposed form" );
+      else
+         HPL_fprintf( TEST->outfp, " transposed form" );
+/*
+ * U  storage form
+ */
+      HPL_fprintf( TEST->outfp,       "\nU      :" );
+      if(      *UNOTRAN != 0 )
+         HPL_fprintf( TEST->outfp, " no-transposed form" );
+      else
+         HPL_fprintf( TEST->outfp, " transposed form" );
+/*
+ * Equilibration
+ */
+      HPL_fprintf( TEST->outfp,       "\nEQUIL  :" );
+      if(      *EQUIL != 0 )
+         HPL_fprintf( TEST->outfp, " yes" );
+      else
+         HPL_fprintf( TEST->outfp, " no" );
+/*
+ * Alignment
+ */
+      HPL_fprintf( TEST->outfp,       "\nALIGN  : %d double precision words",
+                   *ALIGN );
+
+      HPL_fprintf( TEST->outfp, "\n\n" );
+/*
+ * For testing only
+ */
+      if( TEST->thrsh > HPL_rzero )
+      {
+         HPL_fprintf( TEST->outfp, "%s%s\n\n",
+                      "----------------------------------------",
+                      "----------------------------------------" );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "- The matrix A is randomly generated for each test." );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "- The following scaled residual check will be computed:" );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "      ||Ax-b||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )" );
+         HPL_fprintf( TEST->outfp, "%s %21.6e\n",
+            "- The relative machine precision (eps) is taken to be     ",
+            TEST->epsil );
+         HPL_fprintf( TEST->outfp, "%s   %11.1f\n\n",
+            "- Computational tests pass if scaled residuals are less than      ",
+            TEST->thrsh );
+       }
+     }
+   }
+/*
+ * End of HPL_pdinfo
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pdtest.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pdtest.c
new file mode 100644
index 000000000..73a62a7ff
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pdtest.c
@@ -0,0 +1,438 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdtest
+(
+   HPL_T_test *                     TEST,
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        N,
+   const int                        NB
+)
+#else
+void HPL_pdtest
+( TEST, GRID, ALGO, N, NB )
+   HPL_T_test *                     TEST;
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        N;
+   const int                        NB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdtest performs  one  test  given a set of parameters such as the
+ * process grid, the  problem size, the distribution blocking factor ...
+ * This function generates  the data, calls  and times the linear system
+ * solver,  checks  the  accuracy  of the  obtained vector solution  and
+ * writes this information to the file pointed to by TEST->outfp.
+ *
+ * Arguments
+ * =========
+ *
+ * TEST    (global input)                HPL_T_test *
+ *         On entry,  TEST  points  to a testing data structure:  outfp
+ *         specifies the output file where the results will be printed.
+ *         It is only defined and used by the process  0  of the  grid.
+ *         thrsh  specifies  the  threshhold value  for the test ratio.
+ *         Concretely, a test is declared "PASSED"  if and only if the
+ *         following inequality is satisfied:
+ *         ||Ax-b||_oo / ( epsil *
+ *                         ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+ *                          N )  < thrsh.
+ *         epsil  is the  relative machine precision of the distributed
+ *         computer. Finally the test counters, kfail, kpass, kskip and
+ *         ktest are updated as follows:  if the test passes,  kpass is
+ *         incremented by one;  if the test fails, kfail is incremented
+ *         by one; if the test is skipped, kskip is incremented by one.
+ *         ktest is left unchanged.
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters to be used for this test.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the order of the coefficient matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   double                     HPL_w[HPL_TIMING_N];
+#endif
+   HPL_T_pmat                 mat;
+   double                     wtime[1];
+   int                        info[3];
+   double                     Anorm1, AnormI, Gflops, Xnorm1, XnormI,
+                              BnormI, resid0, resid1;
+   double                     * Bptr;
+   void                       * vptr = NULL;
+   static int                 first=1;
+   int                        ii, ip2, mycol, myrow, npcol, nprow, nq;
+   char                       ctop, cpfact, crfact;
+   time_t                     current_time_start, current_time_end;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+
+   mat.n  = N; mat.nb = NB; mat.info = 0;
+   mat.mp = HPL_numroc( N, NB, NB, myrow, 0, nprow );
+   nq     = HPL_numroc( N, NB, NB, mycol, 0, npcol );
+   mat.nq = nq + 1;
+/*
+ * Allocate matrix, right-hand-side, and vector solution x. [ A | b ] is
+ * N by N+1.  One column is added in every process column for the solve.
+ * The  result  however  is stored in a 1 x N vector replicated in every
+ * process row. In every process, A is lda * (nq+1), x is 1 * nq and the
+ * workspace is mp. 
+ *
+ * Ensure that lda is a multiple of ALIGN and not a power of 2
+ */
+   mat.ld = ( ( Mmax( 1, mat.mp ) - 1 ) / ALGO->align ) * ALGO->align;
+   do
+   {
+      ii = ( mat.ld += ALGO->align ); ip2 = 1;
+      while( ii > 1 ) { ii >>= 1; ip2 <<= 1; }
+   }
+   while( mat.ld == ip2 );
+/*
+ * Allocate dynamic memory
+ */
+   vptr = (void*)malloc( ( (size_t)(ALGO->align) + 
+                           (size_t)(mat.ld+1) * (size_t)(mat.nq) ) *
+                         sizeof(double) );
+   info[0] = (vptr == NULL); info[1] = myrow; info[2] = mycol;
+   (void) HPL_all_reduce( (void *)(info), 3, HPL_INT, HPL_max,
+                          GRID->all_comm );
+   if( info[0] != 0 )
+   {
+      if( ( myrow == 0 ) && ( mycol == 0 ) )
+         HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest",
+                    "[%d,%d] %s", info[1], info[2],
+                    "Memory allocation failed for A, x and b. Skip." );
+      (TEST->kskip)++;
+      /* some processes might have succeeded with allocation */
+      if (vptr) free(vptr);
+      return;
+   }
+/*
+ * generate matrix and right-hand-side, [ A | b ] which is N by N+1.
+ */
+   mat.A  = (double *)HPL_PTR( vptr,
+                               ((size_t)(ALGO->align) * sizeof(double) ) );
+   mat.X  = Mptr( mat.A, 0, mat.nq, mat.ld );
+   HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED );
+#ifdef HPL_CALL_VSIPL
+   mat.block = vsip_blockbind_d( (vsip_scalar_d *)(mat.A),
+                                 (vsip_length)(mat.ld * mat.nq),
+                                 VSIP_MEM_NONE );
+#endif
+/*
+ * Solve linear system
+ */
+   HPL_ptimer_boot(); (void) HPL_barrier( GRID->all_comm );
+   time( &current_time_start );
+   HPL_ptimer( 0 );
+   HPL_pdgesv( GRID, ALGO, &mat );
+   HPL_ptimer( 0 );
+   time( &current_time_end );
+#ifdef HPL_CALL_VSIPL
+   (void) vsip_blockrelease_d( mat.block, VSIP_TRUE ); 
+   vsip_blockdestroy_d( mat.block );
+#endif
+/*
+ * Gather max of all CPU and WALL clock timings and print timing results
+ */
+   HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME,
+                       1, 0, wtime );
+
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      if( first )
+      {
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "T/V                N    NB     P     Q",
+                      "               Time                 Gflops" );
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "----------------------------------------",
+                      "----------------------------------------" );
+         if( TEST->thrsh <= HPL_rzero ) first = 0;
+      }
+/*
+ * 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve.
+ * Print WALL time
+ */
+      Gflops = ( ( (double)(N) /   1.0e+9 ) * 
+                 ( (double)(N) / wtime[0] ) ) * 
+                 ( ( 2.0 / 3.0 ) * (double)(N) + ( 3.0 / 2.0 ) );
+
+      cpfact = ( ( (HPL_T_FACT)(ALGO->pfact) == 
+                   (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ?  (char)('L') :
+                 ( ( (HPL_T_FACT)(ALGO->pfact) == (HPL_T_FACT)(HPL_CROUT) ) ?
+                   (char)('C') : (char)('R') ) );
+      crfact = ( ( (HPL_T_FACT)(ALGO->rfact) == 
+                   (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ?  (char)('L') :
+                 ( ( (HPL_T_FACT)(ALGO->rfact) == (HPL_T_FACT)(HPL_CROUT) ) ? 
+                   (char)('C') : (char)('R') ) );
+
+      if(      ALGO->btopo == HPL_1RING   ) ctop = '0';
+      else if( ALGO->btopo == HPL_1RING_M ) ctop = '1';
+      else if( ALGO->btopo == HPL_2RING   ) ctop = '2';
+      else if( ALGO->btopo == HPL_2RING_M ) ctop = '3';
+      else if( ALGO->btopo == HPL_BLONG   ) ctop = '4';
+      else /* if( ALGO->btopo == HPL_BLONG_M ) */ ctop = '5';
+
+      if( wtime[0] > HPL_rzero ) {
+         HPL_fprintf( TEST->outfp,
+             "W%c%1d%c%c%1d%c%1d%12d %5d %5d %5d %18.2f    %19.4e\n",
+             ( GRID->order == HPL_ROW_MAJOR ? 'R' : 'C' ),
+             ALGO->depth, ctop, crfact, ALGO->nbdiv, cpfact, ALGO->nbmin,
+             N, NB, nprow, npcol, wtime[0], Gflops );
+         HPL_fprintf( TEST->outfp,
+             "HPL_pdgesv() start time %s\n", ctime( &current_time_start ) );
+         HPL_fprintf( TEST->outfp,
+             "HPL_pdgesv() end time   %s\n", ctime( &current_time_end ) );
+      }
+   }
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME,
+                       HPL_TIMING_N, HPL_TIMING_BEG, HPL_w );
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "--VVV--VVV--VVV--VVV--VVV--VVV--VVV--V",
+                   "VV--VVV--VVV--VVV--VVV--VVV--VVV--VVV-" );
+/*
+ * Recursive panel factorization
+ */
+      if( HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time rfact . . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] );
+/*
+ * Panel factorization
+ */
+      if( HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time pfact . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] );
+/*
+ * Panel factorization (swap)
+ */
+      if( HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time mxswp . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] );
+/*
+ * Update
+ */
+      if( HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time update  . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] );
+/*
+ * Update (swap)
+ */
+      if( HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time laswp . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] );
+/*
+ * Upper triangular system solve
+ */
+      if( HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time up tr sv  . : %18.2f\n",
+                      HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] );
+
+      if( TEST->thrsh <= HPL_rzero )
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+   }
+#endif
+/*
+ * Quick return, if I am not interested in checking the computations
+ */
+   if( TEST->thrsh <= HPL_rzero )
+   { (TEST->kpass)++; if( vptr ) free( vptr ); return; }
+/*
+ * Check info returned by solve
+ */
+   if( mat.info != 0 )
+   {
+      if( ( myrow == 0 ) && ( mycol == 0 ) )
+         HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", "%s %d, %s", 
+                    "Error code returned by solve is", mat.info, "skip" );
+      (TEST->kskip)++;
+      if( vptr ) free( vptr ); return;
+   }
+/*
+ * Check computation, re-generate [ A | b ], compute norm 1 and inf of A and x,
+ * and norm inf of b - A x. Display residual checks.
+ */
+   HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED );
+   Anorm1 = HPL_pdlange( GRID, HPL_NORM_1, N, N, NB, mat.A, mat.ld );
+   AnormI = HPL_pdlange( GRID, HPL_NORM_I, N, N, NB, mat.A, mat.ld );
+/*
+ * Because x is distributed in process rows, switch the norms
+ */
+   XnormI = HPL_pdlange( GRID, HPL_NORM_1, 1, N, NB, mat.X, 1 );
+   Xnorm1 = HPL_pdlange( GRID, HPL_NORM_I, 1, N, NB, mat.X, 1 );
+/*
+ * If I am in the col that owns b, (1) compute local BnormI, (2) all_reduce to
+ * find the max (in the col). Then (3) broadcast along the rows so that every
+ * process has BnormI. Note that since we use a uniform distribution in [-0.5,0.5]
+ * for the entries of B, it is very likely that BnormI (<=,~) 0.5.
+ */
+   Bptr = Mptr( mat.A, 0, nq, mat.ld );
+   if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ){
+      if( mat.mp > 0 )
+      {
+         BnormI = Bptr[HPL_idamax( mat.mp, Bptr, 1 )]; BnormI = Mabs( BnormI );
+      }
+      else
+      {
+         BnormI = HPL_rzero;
+      }
+      (void) HPL_all_reduce( (void *)(&BnormI), 1, HPL_DOUBLE, HPL_max,
+                             GRID->col_comm );
+   }
+   (void) HPL_broadcast( (void *)(&BnormI), 1, HPL_DOUBLE,
+                          HPL_indxg2p( N, NB, NB, 0, npcol ),
+                          GRID->row_comm );
+/*
+ * If I own b, compute ( b - A x ) and ( - A x ) otherwise
+ */
+   if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) )
+   {
+      HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone,
+                 mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1 );
+   }
+   else if( nq > 0 )
+   {
+      HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone,
+                 mat.A, mat.ld, mat.X, 1, HPL_rzero, Bptr, 1 );
+   }
+   else { for( ii = 0; ii < mat.mp; ii++ ) Bptr[ii] = HPL_rzero; }
+/*
+ * Reduce the distributed residual in process column 0
+ */
+   if( mat.mp > 0 )
+      (void) HPL_reduce( Bptr, mat.mp, HPL_DOUBLE, HPL_sum, 0,
+                         GRID->row_comm );
+/*
+ * Compute || b - A x ||_oo
+ */
+   resid0 = HPL_pdlange( GRID, HPL_NORM_I, N, 1, NB, Bptr, mat.ld );
+/*
+ * Computes and displays norms, residuals ...
+ */
+   if( N <= 0 )
+   {
+      resid1 = HPL_rzero;
+   }
+   else
+   {
+      resid1 = resid0 / ( TEST->epsil * ( AnormI * XnormI + BnormI ) * (double)(N) );
+   }
+
+   if( resid1 < TEST->thrsh ) (TEST->kpass)++;
+   else                       (TEST->kfail)++;
+
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "----------------------------------------",
+                   "----------------------------------------" );
+      HPL_fprintf( TEST->outfp, "%s%16.8e%s%s\n",
+         "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ", resid1,
+         " ...... ", ( resid1 < TEST->thrsh ? "PASSED" : "FAILED" ) );
+
+      if(resid1 >= TEST->thrsh ) 
+      {
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||Ax-b||_oo  . . . . . . . . . . . . . . . . . = ", resid0 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||A||_oo . . . . . . . . . . . . . . . . . . . = ", AnormI );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||A||_1  . . . . . . . . . . . . . . . . . . . = ", Anorm1 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||x||_oo . . . . . . . . . . . . . . . . . . . = ", XnormI );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||x||_1  . . . . . . . . . . . . . . . . . . . = ", Xnorm1 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||b||_oo . . . . . . . . . . . . . . . . . . . = ", BnormI );
+      }
+   }
+   if( vptr ) free( vptr );
+/*
+ * End of HPL_pdtest
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer.c
new file mode 100644
index 000000000..202416079
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer.c
@@ -0,0 +1,358 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int         HPL_ptimer_disabled;
+static double      HPL_ptimer_cpusec   [HPL_NPTIMER],
+                   HPL_ptimer_cpustart [HPL_NPTIMER],
+                   HPL_ptimer_wallsec  [HPL_NPTIMER],
+                   HPL_ptimer_wallstart[HPL_NPTIMER];
+/*
+ * ---------------------------------------------------------------------
+ * User callable functions
+ * ---------------------------------------------------------------------
+ */
+#ifdef STDC_HEADERS
+void HPL_ptimer_boot( void )
+#else
+void HPL_ptimer_boot()
+#endif
+{
+/*
+ * HPL_ptimer_boot (re)sets all timers to 0, and enables HPL_ptimer.
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 0;
+
+   for( i = 0; i < HPL_NPTIMER; i++ )
+   {
+      HPL_ptimer_cpusec  [i] = HPL_ptimer_wallsec  [i] = HPL_rzero;
+      HPL_ptimer_cpustart[i] = HPL_ptimer_wallstart[i] = HPL_PTIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_ptimer_boot
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_ptimer( const int I )
+#else
+void HPL_ptimer( I )
+   const int                  I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer provides a  "stopwatch"  functionality  cpu/wall  timer in
+ * seconds.  Up to  64  separate timers can be functioning at once.  The
+ * first call starts the timer,  and the second stops it.  This  routine
+ * can be disenabled  by calling HPL_ptimer_disable(),  so that calls to
+ * the timer are ignored.  This feature can be used to make sure certain
+ * sections of code do not affect timings,  even  if  they call routines
+ * which have HPL_ptimer calls in them. HPL_ptimer_enable()  will enable
+ * the  timer  functionality.  One  can retrieve  the current value of a
+ * timer by calling
+ *  
+ * t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ *  
+ * where  I  is the timer index in  [0..64).  To  inititialize the timer
+ * functionality, one must have called HPL_ptimer_boot() prior to any of
+ * the functions mentioned above.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                const int
+ *         On entry, I specifies the timer to stop/start.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( HPL_ptimer_disabled ) return;
+/*
+ * If timer has not been started, start it.  Otherwise,  stop it and add
+ * interval to count
+ */
+   if( HPL_ptimer_wallstart[I] == HPL_PTIMER_STARTFLAG )
+   {
+      HPL_ptimer_wallstart[I] = HPL_ptimer_walltime();
+      HPL_ptimer_cpustart [I] = HPL_ptimer_cputime ();
+   }
+   else
+   {
+      HPL_ptimer_cpusec   [I] += HPL_ptimer_cputime ()-HPL_ptimer_cpustart [I];
+      HPL_ptimer_wallsec  [I] += HPL_ptimer_walltime()-HPL_ptimer_wallstart[I];
+      HPL_ptimer_wallstart[I]  = HPL_PTIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_ptimer
+ */
+} 
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_enable( void )
+#else
+void HPL_ptimer_enable()
+#endif
+{
+/*
+ * HPL_ptimer_enable sets it so calls to HPL_ptimer are not ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 0;
+   return;
+/*
+ * End of HPL_ptimer_enable
+ */
+} 
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_disable( void )
+#else
+void HPL_ptimer_disable()
+#endif
+{
+/*
+ * HPL_ptimer_disable sets it so calls to HPL_ptimer are ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 1;
+   return;
+/*
+ * End of HPL_ptimer_disable
+ */
+} 
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_inquire
+(
+   const HPL_T_PTIME          TMTYPE,
+   const int                  I
+)
+#else
+double HPL_ptimer_inquire( TMTYPE, I )
+   const int                  I;
+   const HPL_T_PTIME          TMTYPE;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_inquire returns wall- or cpu- time that has accumulated in
+ * timer I.
+ *
+ * Arguments
+ * =========
+ *
+ * TMTYPE  (global input)              const HPL_T_PTIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_PTIME : wall clock time is returned,
+ *            = HPL_CPU_PTIME  : CPU time is returned (default).
+ *
+ * I       (global input)              const int
+ *         On entry, I specifies the timer to return.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double          time;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * If wall- or cpu-time are not available on this machine, return
+ * HPL_PTIMER_ERROR
+ */
+   if( TMTYPE == HPL_WALL_PTIME )
+   {
+      if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR )
+         time = HPL_PTIMER_ERROR;
+      else
+         time = HPL_ptimer_wallsec[I];
+   }
+   else
+   {
+      if( HPL_ptimer_cputime()  == HPL_PTIMER_ERROR )
+         time = HPL_PTIMER_ERROR;
+      else
+         time = HPL_ptimer_cpusec [I];
+   }
+   return( time );
+/*
+ * End of HPL_ptimer_inquire
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_combine
+(
+   MPI_Comm                   COMM,
+   const HPL_T_PTIME_OP       OPE,
+   const HPL_T_PTIME          TMTYPE,
+   const int                  N,
+   const int                  IBEG,
+   double                     * TIMES
+)
+#else
+void HPL_ptimer_combine( COMM, OPE, TMTYPE, N, IBEG, TIMES )
+   const int                  IBEG, N;
+   const HPL_T_PTIME_OP       OPE;
+   const HPL_T_PTIME          TMTYPE;
+   MPI_Comm                   COMM;
+   double                     * TIMES;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_combine  combines the timing information stored on a scope
+ * of processes into the user TIMES array.
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)        MPI_Comm
+ *         The MPI communicator  identifying  the process  collection on
+ *         which the timings are taken.
+ *
+ * OPE     (global input)              const HPL_T_PTIME_OP
+ *         On entry, OP  specifies what combine operation should be done
+ *         as follows:
+ *            = HPL_AMAX_PTIME get max. time on any process (default),
+ *            = HPL_AMIN_PTIME get min. time on any process,
+ *            = HPL_SUM_PTIME  get sum of times across processes.
+ *
+ * TMTYPE  (global input)              const HPL_T_PTIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_PTIME : wall clock time is returned,
+ *            = HPL_CPU_PTIME  : CPU time is returned (default).
+ *
+ * N       (global input)              const int
+ *         On entry, N specifies the number of timers to combine.
+ *
+ * IBEG    (global input)              const int
+ *         On entry, IBEG specifies the first timer to be combined.
+ *
+ * TIMES   (global output)             double *
+ *         On entry, TIMES is an array of dimension at least N. On exit,
+ *         this array contains the requested timing information.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i, tmpdis;
+/* ..
+ * .. Executable Statements ..
+ */
+   tmpdis = HPL_ptimer_disabled; HPL_ptimer_disabled = 1;
+/*
+ * Timer has been disabled for combine operation -  copy timing informa-
+ * tion into user times array.  If  wall- or  cpu-time are not available
+ * on this machine, fill in times with HPL_PTIMER_ERROR flag and return.
+ */
+   if( TMTYPE == HPL_WALL_PTIME )
+   {
+      if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR )
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return;   }
+      else
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_wallsec[IBEG+i]; }
+   }
+   else
+   {
+      if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR )
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return;  }
+      else
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_cpusec[IBEG+i]; }
+   }
+/*
+ * Combine all nodes information, restore HPL_ptimer_disabled, and return
+ */
+   for( i = 0; i < N; i++ ) TIMES[i] = Mmax( HPL_rzero, TIMES[i] );
+
+   if(      OPE == HPL_AMAX_PTIME )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM );
+   else if( OPE == HPL_AMIN_PTIME )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_min, COMM );
+   else if( OPE == HPL_SUM_PTIME  )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_sum, COMM );
+   else
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM );
+
+   HPL_ptimer_disabled = tmpdis;
+/*
+ * End of HPL_ptimer_combine
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer_cputime.c
new file mode 100644
index 000000000..711ef185d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer_cputime.c
@@ -0,0 +1,146 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_cputime returns the cpu time. If HPL_USE_CLOCK is defined,
+ * the  clock() function is used to return an approximation of processor
+ * time used by the program.  The value returned is the CPU time used so
+ * far as a clock_t;  to get the number of seconds used,  the result  is
+ * divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+ * standard library.  If  HPL_USE_TIMES is defined, the times() function
+ * is used instead.  This  function  returns  the current process times.
+ * times() returns the number of clock ticks that have elapsed since the
+ * system has been up.  Otherwise and by default,  the  standard library
+ * function getrusage() is used.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#if   defined( HPL_USE_CLOCK )
+ 
+#include <time.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   static double              cps = CLOCKS_PER_SEC;
+   double                     d;
+   clock_t                    t1;
+   static clock_t             t0 = 0;
+ 
+   if( t0 == 0 ) t0 = clock();
+   t1 = clock() - t0;
+   d = (double)(t1) / cps;
+   return( d );
+}
+ 
+#elif defined( HPL_USE_TIMES )
+ 
+#include <sys/times.h>
+#include <unistd.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   clock_t                    t1;
+   struct tms                 ts;
+   static double              ClockTick = HPL_rzero;
+ 
+   if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK));
+   (void) times( &ts );
+   return( (double)(ts.tms_utime) / ClockTick );
+}
+ 
+/* #elif defined( HPL_USE_GETRUSAGE ) */
+#else
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   struct rusage              ruse;
+ 
+   (void) getrusage( RUSAGE_SELF, &ruse );
+   return( (double)( ruse.ru_utime.tv_sec  ) +
+           ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) );
+}
+
+/* 
+#else
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   return( HPL_PTIMER_ERROR );
+}
+*/ 
+
+#endif
+/*
+ * End of HPL_ptimer_cputime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer_walltime.c
new file mode 100644
index 000000000..96cbd300f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer_walltime.c
@@ -0,0 +1,103 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_walltime returns the elapsed (wall-clock) time.
+ * 
+ *
+ * ---------------------------------------------------------------------
+ */ 
+ 
+#if defined( HPL_USE_GETTIMEOFDAY )
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_walltime( void )
+#else
+double HPL_ptimer_walltime()
+#endif
+{
+   struct timeval             tp;
+   static long                start=0, startu;
+ 
+   if( !start )
+   {
+      (void) gettimeofday( &tp, NULL );
+      start  = tp.tv_sec;
+      startu = tp.tv_usec;
+      return( HPL_rzero );
+   }
+   (void) gettimeofday( &tp, NULL );
+ 
+   return( (double)( tp.tv_sec - start ) +
+           ( (double)( tp.tv_usec-startu ) / 1000000.0 ) );
+}
+
+#else
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_walltime( void )
+#else
+double HPL_ptimer_walltime()
+#endif
+{
+   return( MPI_Wtime() );
+}
+ 
+#endif
+/*
+ * End of HPL_ptimer_walltime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer.c
new file mode 100644
index 000000000..3be9665f7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer.c
@@ -0,0 +1,253 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int                    HPL_timer_disabled;
+static double                 HPL_timer_cpusec   [HPL_NTIMER],
+                              HPL_timer_cpustart [HPL_NTIMER],
+                              HPL_timer_wallsec  [HPL_NTIMER],
+                              HPL_timer_wallstart[HPL_NTIMER];
+/*
+ * ---------------------------------------------------------------------
+ * User callable functions
+ * ---------------------------------------------------------------------
+ */
+#ifdef STDC_HEADERS
+void HPL_timer_boot( void )
+#else
+void HPL_timer_boot()
+#endif
+{
+/*
+ * HPL_timer_boot (re)sets all timers to 0, and enables HPL_timer.
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 0;
+
+   for( i = 0; i < HPL_NTIMER; i++ )
+   {
+      HPL_timer_cpusec  [i] = HPL_timer_wallsec  [i] = HPL_rzero;
+      HPL_timer_cpustart[i] = HPL_timer_wallstart[i] = HPL_TIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_timer_boot
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer( const int I )
+#else
+void HPL_timer( I )
+   const int                  I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer provides a  "stopwatch"  functionality  cpu/wall  timer  in
+ * seconds.  Up to  64  separate timers can be functioning at once.  The
+ * first call starts the timer,  and the second stops it.  This  routine
+ * can be disenabled  by calling  HPL_timer_disable(),  so that calls to
+ * the timer are ignored.  This feature can be used to make sure certain
+ * sections of code do not affect timings,  even  if  they call routines
+ * which have HPL_timer calls in them. HPL_timer_enable() will re-enable
+ * the  timer  functionality.  One  can retrieve  the current value of a
+ * timer by calling
+ *  
+ * t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ *  
+ * where  I  is the timer index in  [0..64).  To  initialize  the  timer
+ * functionality, one must have called HPL_timer_boot()  prior to any of
+ * the functions mentioned above.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                const int
+ *         On entry, I specifies the timer to stop/start.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( HPL_timer_disabled ) return;
+/*
+ * If timer has not been started, start it.  Otherwise,  stop it and add
+ * interval to count
+ */
+   if( HPL_timer_wallstart[I] == HPL_TIMER_STARTFLAG )
+   {
+      HPL_timer_wallstart[I] = HPL_timer_walltime();
+      HPL_timer_cpustart [I] = HPL_timer_cputime ();
+   }
+   else
+   {
+      HPL_timer_cpusec   [I] += HPL_timer_cputime () - HPL_timer_cpustart [I];
+      HPL_timer_wallsec  [I] += HPL_timer_walltime() - HPL_timer_wallstart[I];
+      HPL_timer_wallstart[I]  = HPL_TIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_timer
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer_enable( void )
+#else
+void HPL_timer_enable()
+#endif
+{
+/*
+ * HPL_timer_enable sets it so calls to HPL_timer are not ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 0;
+   return;
+/*
+ * End of HPL_timer_enable
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer_disable( void )
+#else
+void HPL_timer_disable()
+#endif
+{
+/*
+ * HPL_timer_disable sets it so calls to HPL_timer are ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 1;
+   return;
+/*
+ * End of HPL_timer_disable
+ */
+}
+
+#ifdef STDC_HEADERS
+double HPL_timer_inquire
+(
+   const HPL_T_TIME           TMTYPE,
+   const int                  I
+)
+#else
+double HPL_timer_inquire( TMTYPE, I )
+   const int                  I;
+   const HPL_T_TIME           TMTYPE;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_timer_inquire returns  wall- or cpu- time that has accumulated in
+ * timer I.
+ *
+ * Arguments
+ * =========
+ *
+ * TMTYPE  (global input)              const HPL_T_TIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_TIME : wall clock time is returned,
+ *            = HPL_CPU_TIME  : CPU time is returned (default).
+ *
+ * I       (global input)              const int
+ *         On entry, I specifies the timer to return.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double          time;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * If wall- or cpu-time are not available on this machine, return
+ * HPL_TIMER_ERROR
+ */
+   if( TMTYPE == HPL_WALL_TIME )
+   {
+      if( HPL_timer_walltime() == HPL_TIMER_ERROR )
+         time = HPL_TIMER_ERROR;
+      else
+         time = HPL_timer_wallsec[I];
+   }
+   else
+   {
+      if( HPL_timer_cputime()  == HPL_TIMER_ERROR )
+         time = HPL_TIMER_ERROR;
+      else
+         time = HPL_timer_cpusec [I];
+   }
+   return( time );
+/*
+ * End of HPL_timer_inquire
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer_cputime.c
new file mode 100644
index 000000000..4a7f9dfef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer_cputime.c
@@ -0,0 +1,145 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer_cputime returns the cpu time.  If HPL_USE_CLOCK is defined,
+ * the  clock() function is used to return an approximation of processor
+ * time used by the program.  The value returned is the CPU time used so
+ * far as a clock_t;  to get the number of seconds used,  the result  is
+ * divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+ * standard library.  If  HPL_USE_TIMES is defined, the times() function
+ * is used instead.  This  function  returns  the current process times.
+ * times() returns the number of clock ticks that have elapsed since the
+ * system has been up.  Otherwise and by default,  the  standard library
+ * function getrusage() is used.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#if   defined( HPL_USE_CLOCK )
+ 
+#include <time.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   static double              cps = CLOCKS_PER_SEC;
+   double                     d;
+   clock_t                    t1;
+   static clock_t             t0 = 0;
+ 
+   if( t0 == 0 ) t0 = clock();
+   t1 = clock() - t0;
+   d = (double)(t1) / cps;
+   return( d );
+} 
+
+#elif defined( HPL_USE_TIMES )
+ 
+#include <sys/times.h>
+#include <unistd.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   clock_t                    t1;
+   struct tms                 ts;
+   static double              ClockTick = HPL_rzero;
+ 
+   if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK));
+   (void) times( &ts );
+   return( (double)(ts.tms_utime) / ClockTick );
+}
+ 
+/* #elif defined( HPL_USE_GETRUSAGE )  */
+#else
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   struct rusage              ruse;
+   (void) getrusage( RUSAGE_SELF, &ruse );
+   return( (double)( ruse.ru_utime.tv_sec  ) +
+           ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) );
+}
+
+/* 
+#else
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   return( HPL_TIMER_ERROR );
+}
+*/
+
+#endif
+/*
+ * End of HPL_timer_cputime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer_walltime.c
new file mode 100644
index 000000000..f4f44f202
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer_walltime.c
@@ -0,0 +1,88 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer_walltime returns the elapsed (wall-clock) time.
+ * 
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_walltime( void )
+#else
+double HPL_timer_walltime()
+#endif
+{
+   struct timeval             tp;
+   static long                start=0, startu;
+
+   if( !start )
+   {
+      (void) gettimeofday( &tp, NULL );
+      start  = tp.tv_sec;
+      startu = tp.tv_usec;
+      return( HPL_rzero );
+   }
+   (void) gettimeofday( &tp, NULL );
+
+   return( (double)( tp.tv_sec - start ) +
+           ( (double)( tp.tv_usec-startu ) / 1000000.0 ) );
+}                                                                               
+/*
+ * End of HPL_timer_walltime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS
new file mode 100644
index 000000000..056fd81ba
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = FreeBSD_PIV_CBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpich
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a $(MPdir)/lib/libpmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/share/ATLAS/lib/FreeBSD_P5SSE2
+LAinc        =
+LAlib        = $(LAdir)/libcblas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = /usr/bin/f77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = /usr/bin/ranlib
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.HPUX_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.HPUX_FBLAS
new file mode 100644
index 000000000..af3f5da5f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.HPUX_FBLAS
@@ -0,0 +1,179 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = HPUX
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - MPI directories - library ------------------------------------------
+# ----------------------------------------------------------------------
+# MPIinc tells the C compiler where to find the MPI header files, MPIlib
+# is defined to be the name of the MPI library to be used. The variables
+# MPIdir and MPIplat are only used for defining MPIinc and MPIlib).
+#
+MPIdir       = $(HOME)/local/mpi
+MPIplat      = $(MPIdir)/hpux/ch_p4
+#
+MPIinc       = -I$(MPIdir)/include -I$(MPIplat)/include
+MPIlib       = $(MPIplat)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - BLAS library -------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+BLASlib      = /usr/lib/pa1.1/libblas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate 
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. 
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a 
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form: 
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses 
+#                       Cray  fcd  (fortran  character  descriptor)  for 
+#                       interoperation. 
+#
+F2CDEFS      = -DNoChange -DF77_INTEGER=int -DStringSunStyle 
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(MPIinc)
+HPL_LIBS     = $(HPLlib) $(BLASlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS F77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(HPL_INCLUDES) $(F2CDEFS) $(HPL_OPTS)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -D_INCLUDE_POSIX_SOURCE -DUseTimes -Aa +O4
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = cc
+LINKFLAGS    = -Aa
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.I860_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.I860_FBLAS
new file mode 100644
index 000000000..984236be2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.I860_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = I860_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        = -lmpi
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lkmath
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS) -nx
+CCFLAGS      = $(HPL_DEFS) -O4 -nx
+#
+LINKER       = f77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.IRIX_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.IRIX_FBLAS
new file mode 100644
index 000000000..d78bcf09f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.IRIX_FBLAS
@@ -0,0 +1,181 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = IRIX_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = $(HOME)/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/IRIX64/ch_p4/include
+MPlib        = $(MPdir)/IRIX64/ch_p4/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lblas
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DStringSunStyle -DF77_INTEGER=int
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS) -64
+CCFLAGS      = $(HPL_DEFS) -O3 -64 -OPT:Olimit=15000 -TARG:platform=IP30 \
+               -LNO:blocking=OFF -LOPT:alias=typed
+#
+LINKER       = cc
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS
new file mode 100644
index 000000000..624306902
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_ATHLON_CBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - MPI directories - library ------------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_ATHLON
+LAinc        =
+LAlib        = $(LAdir)/libcblas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the Fortran 77 BLAS interface
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+LINKER       = /usr/bin/gcc
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS
new file mode 100644
index 000000000..07985f781
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_ATHLON_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be 
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be 
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_ATHLON
+LAinc        =
+LAlib        = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) 
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+LINKER       = /usr/bin/g77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL
new file mode 100644
index 000000000..ddf3fb4b6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_ATHLON_VSIPL
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - MPI directories - library ------------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = /home/software/TASP_VSIPL_Core_Plus
+LAinc        = -I$(LAdir)/include
+LAlib        = $(LAdir)/lib/libvsip_c.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the Fortran 77 BLAS interface
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_VSIPL
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+LINKER       = /usr/bin/gcc
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_Intel64 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_Intel64
new file mode 100644
index 000000000..47661c25d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_Intel64
@@ -0,0 +1,193 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -fs
+MKDIR        = mkdir -p
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_Intel64
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+# MPdir        = /opt/intel/mpi/4.1.0
+# MPinc        = -I$(MPdir)/include64
+# MPlib        = $(MPdir)/lib64/libmpi.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(MKLROOT)
+ifndef  LAinc
+LAinc        = $(LAdir)/mkl/include
+endif
+ifndef  LAlib
+LAlib        = -L$(LAdir)/mkl/lib/intel64 \
+               -Wl,--start-group \
+               $(LAdir)/lib/intel64/libmkl_intel_lp64.a \
+               $(LAdir)/lib/intel64/libmkl_intel_thread.a \
+               $(LAdir)/lib/intel64/libmkl_core.a \
+               -Wl,--end-group -lpthread -ldl
+endif
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) -I$(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC       = mpiicc
+CCNOOPT  = $(HPL_DEFS)
+OMP_DEFS = -openmp
+CCFLAGS  = $(HPL_DEFS) -O3 -w -ansi-alias -i-static -z noexecstack -z relro -z now -nocompchk -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = $(CC)
+LINKFLAGS    = $(CCFLAGS) $(OMP_DEFS) -mt_mpi
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_CBLAS
new file mode 100644
index 000000000..535a0e214
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_CBLAS
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_CBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_PII
+LAinc        =
+LAlib        = $(LAdir)/libcblas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = /usr/bin/g77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm
new file mode 100644
index 000000000..31fc9ea74
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_CBLAS_gm
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_PII
+LAinc        =
+LAlib        = $(LAdir)/libcblas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpicc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = mpif77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_FBLAS
new file mode 100644
index 000000000..5ed9aac12
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_FBLAS
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_PII
+LAinc        =
+LAlib        = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = /usr/bin/g77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm
new file mode 100644
index 000000000..a2416396c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_FBLAS_gm
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_PII
+LAinc        =
+LAlib        = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpicc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = mpif77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_VSIPL
new file mode 100644
index 000000000..0f690a1b3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_VSIPL
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_VSIPL
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = /home/software/TASP_VSIPL_Core_Plus
+LAinc        = -I$(LAdir)/include
+LAlib        = $(LAdir)/lib/libvsip_c.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_VSIPL
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = /usr/bin/g77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm
new file mode 100644
index 000000000..fee265e46
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_VSIPL_gm
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = /home/software/TASP_VSIPL_Core_Plus
+LAinc        = -I$(LAdir)/include
+LAlib        = $(LAdir)/lib/libvsip_c.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_VSIPL
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpicc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = mpif77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.MacOSX_Accelerate b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.MacOSX_Accelerate
new file mode 100644
index 000000000..d1ce69b64
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.MacOSX_Accelerate
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -fs
+MKDIR        = mkdir -p
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = MacOSX_Accelerate
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+# MPdir        = /opt/intel/mpi/4.1.0
+# MPinc        = -I$(MPdir)/include64
+# MPlib        = $(MPdir)/lib64/libmpi.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -framework Accelerate
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC       = mpicc-openmpi-mp
+CCNOOPT  = $(HPL_DEFS)
+CCFLAGS  = $(HPL_DEFS) -O3
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = $(CC)
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = cr
+RANLIB       = ranlib
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWR2_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWR2_FBLAS
new file mode 100644
index 000000000..628f2c152
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWR2_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = PWR2_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lesslp2
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DNoChange -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpcc_r
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -O3 -qarch=pwr2 -qtune=pwr2 -qmaxmem=-1
+#
+LINKER       = mpxlf_r
+LINKFLAGS    = -bmaxdata:0x70000000 $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWR3_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWR3_FBLAS
new file mode 100644
index 000000000..bba468803
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWR3_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = PWR3_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lessl
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DNoChange -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/vac/bin/xlc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -qtune=pwr3 -qarch=pwr3 -O3 -qmaxmem=-1 -qfloat=hsflt
+#
+LINKER       = /usr/bin/xlf
+LINKFLAGS    = -bmaxdata:0x70000000 $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWRPC_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWRPC_FBLAS
new file mode 100644
index 000000000..2a0fb2ec6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWRPC_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = PWRPC_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lessl
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DNoChange -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpcc_r
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -O3 -qarch=ppc -qtune=604 -qmaxmem=-1
+#
+LINKER       = mpxlf_r
+LINKFLAGS    = -bmaxdata:0x70000000 $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS
new file mode 100644
index 000000000..1ade2d8aa
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = SUN4SOL2-g_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = $(HOME)/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include
+MPlib        = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -xlic_lib=sunperf
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -g
+#
+LINKER       = purify -best-effort f77
+LINKFLAGS    =
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL
new file mode 100644
index 000000000..1cbb371fd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = SUN4SOL2-g_VSIPL
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = $(HOME)/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include
+MPlib        = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/local/TASP_VSIPL_Core_Plus
+LAinc        = -I$(LAdir)/include
+LAlib        = $(LAdir)/lib/libvsip_c.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_VSIPL
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -g
+#
+LINKER       = purify -best-effort cc
+LINKFLAGS    =
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2_FBLAS
new file mode 100644
index 000000000..a1d5d6315
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = SUN4SOL2_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = $(HOME)/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include
+MPlib        = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -xlic_lib=sunperf
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -dalign -fsingle -xO5 -native -xarch=v8plusa 
+#
+LINKER       = f77
+LINKFLAGS    = -dalign -native -xarch=v8plusa -xO5
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.T3E_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.T3E_FBLAS
new file mode 100644
index 000000000..fe12cae9a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.T3E_FBLAS
@@ -0,0 +1,187 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = T3E_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        =
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DUpCase -DF77_INTEGER=long -DStringCrayStyle \
+               -DCRAY_BLAS -DHPL_USE_TIMES
+#
+# When UpCase is defined,  CRAY_BLAS redefines the BLAS routines used in
+# HPL to be prefixed with an S. In the Cray programming environment, the
+# default INTEGER and REAL size is 64 bits.  This  is  reflected  in the
+# Cray Scientific Library as well,  so SGEMM is the 64-bit matrix multi-
+# ply.
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -O3
+#
+LINKER       = f77
+LINKFLAGS    = -O3,unroll2,pipeline2
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Tru64_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Tru64_FBLAS
new file mode 100644
index 000000000..3d8062061
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Tru64_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Tru64_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/alpha/ch_p4/include
+MPlib        = $(MPdir)/alpha/ch_p4/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lcxml
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -arch host -tune host -std -O5
+#
+LINKER       = f77
+LINKFLAGS    = -nofor_main -O5 -arch host -tune host
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = ranlib
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Tru64_FBLAS_elan b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Tru64_FBLAS_elan
new file mode 100644
index 000000000..f9550412c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Tru64_FBLAS_elan
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Tru64_FBLAS_elan
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        = -lmpi -lelan
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lcxml
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -arch host -tune host -std -O5
+#
+LINKER       = f77
+LINKFLAGS    = -nofor_main -O5 -arch host -tune host
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = ranlib
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.UNKNOWN.in b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.UNKNOWN.in
new file mode 100644
index 000000000..8cbbd8242
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.UNKNOWN.in
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = @SHELL@
+#
+CD           = @CD@
+CP           = @CP@
+LN_S         = @LN_S@
+MKDIR        = @MKDIR@
+RM           = @RM@
+TOUCH        = @TOUCH@
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = @ARCH@
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be 
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = @MPDIR@
+MPinc        = @MPINC@
+MPlib        = @MPLIB@
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be 
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = @LADIR@
+LAinc        = @LAINC@
+LAlib        = @LALIB@
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = @F2CDEFS@
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) 
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = @CC@
+CCNOOPT      = $(HPL_DEFS) @CCNOOPT@
+CCFLAGS      = $(HPL_DEFS) @CCFLAGS@
+#
+LINKER       = @LINKER@
+LINKFLAGS    = @LINKFLAGS@
+#
+ARCHIVER     = @ARCHIVER@
+ARFLAGS      = @ARFLAGS@
+RANLIB       = @RANLIB@
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/make_generic b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/make_generic
new file mode 100644
index 000000000..68cf74a3a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/make_generic
@@ -0,0 +1,83 @@
+#!/bin/sh
+#
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+#
+# Configure script to create Make.UNKNOWN from  Make.UNKNOWN.in  for the
+# HPL distribution, so users without a real Unix system can have a gene-
+# ric  Make.UNKNOWN  to  edit  for  their needs. This script substitutes
+# pathless version of all the system programs, and commonly used options
+# values into Make.UNKNOWN.in.
+#
+########################################################################
+#
+sed -e 's%@SHELL@%/bin/sh%' \
+    -e 's%@CD@%cd%' \
+    -e 's%@CP@%cp%' \
+    -e 's%@LN_S@%ln -s%' \
+    -e 's%@MKDIR@%mkdir%' \
+    -e 's%@RM@%/bin/rm -f%' \
+    -e 's%@TOUCH@%touch%' \
+    -e 's%@ARCH@%UNKNOWN%' \
+    -e 's%@CC@%mpicc%' \
+    -e 's%@CCNOOPT@%%' \
+    -e 's%@CCFLAGS@%%' \
+    -e 's%@LINKER@%mpif77%' \
+    -e 's%@LINKFLAGS@%%' \
+    -e 's%@ARCHIVER@%ar%' \
+    -e 's%@ARFLAGS@%r%' \
+    -e 's%@RANLIB@%echo%' \
+    -e 's%@MPDIR@%%' \
+    -e 's%@MPINC@%%' \
+    -e 's%@MPLIB@%%' \
+    -e 's%@F2CDEFS@%-DAdd_ -DF77_INTEGER=int -DStringSunStyle%' \
+    -e 's%@LADIR@%%' \
+    -e 's%@LAINC@%%' \
+    -e 's%@LALIB@%-lblas%' \
+    Make.UNKNOWN.in > Make.UNKNOWN
+#
+########################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/Makefile.am
new file mode 100644
index 000000000..2e6d3d454
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/Makefile.am
@@ -0,0 +1,42 @@
+AM_CPPFLAGS = -I$(top_srcdir)/../include
+
+lib_LIBRARIES = libhpl.a
+
+libhpl_a_SOURCES = \
+auxil/HPL_dlatcpy.c auxil/HPL_fprintf.c auxil/HPL_dlacpy.c auxil/HPL_dlamch.c \
+blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \
+blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \
+blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \
+comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \
+comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \
+comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \
+grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \
+grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \
+grid/HPL_min.c grid/HPL_all_reduce.c grid/HPL_barrier.c \
+panel/HPL_pdpanel_disp.c panel/HPL_pdpanel_free.c panel/HPL_pdpanel_init.c panel/HPL_pdpanel_new.c \
+pauxil/HPL_pdlamch.c pauxil/HPL_pdlange.c \
+pauxil/HPL_indxg2p.c pauxil/HPL_numroc.c pauxil/HPL_numrocI.c pauxil/HPL_numrocI.c \
+pauxil/HPL_dlaswp00N.c pauxil/HPL_dlaswp01N.c pauxil/HPL_dlaswp01T.c \
+pauxil/HPL_dlaswp02N.c pauxil/HPL_dlaswp03N.c pauxil/HPL_dlaswp03T.c \
+pauxil/HPL_dlaswp04N.c pauxil/HPL_dlaswp04T.c pauxil/HPL_dlaswp05N.c \
+pauxil/HPL_dlaswp05T.c pauxil/HPL_dlaswp06N.c pauxil/HPL_dlaswp06T.c \
+pauxil/HPL_infog2l.c pauxil/HPL_dlaswp10N.c pauxil/HPL_pwarn.c \
+pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \
+pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \
+pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c pfact/HPL_pdrpanrlT.c \
+pfact/HPL_pdmxswp.c pfact/HPL_pdfact.c pfact/HPL_dlocmax.c \
+pfact/HPL_pdpancrT.c pfact/HPL_pdpancrN.c pfact/HPL_dlocmax.c \
+pfact/HPL_dlocswpN.c pfact/HPL_dlocswpT.c pfact/HPL_pdmxswp.c \
+pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \
+pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \
+pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c \
+pfact/HPL_pdrpanrlT.c pauxil/HPL_pabort.c pauxil/HPL_pdlamch.c \
+pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c \
+pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c pgesv/HPL_pdupdateTT.c \
+pgesv/HPL_equil.c pgesv/HPL_pipid.c pgesv/HPL_plindx0.c \
+pgesv/HPL_plindx10.c pgesv/HPL_plindx1.c pgesv/HPL_plindx10.c \
+pgesv/HPL_rollN.c pgesv/HPL_rollT.c pgesv/HPL_spreadN.c pgesv/HPL_spreadT.c \
+pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesvK2.c \
+pgesv/HPL_pdlaswp00N.c pgesv/HPL_pdlaswp00T.c pgesv/HPL_pdlaswp01N.c pgesv/HPL_pdlaswp01T.c \
+pgesv/HPL_pdtrsv.c pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c \
+pgesv/HPL_pdupdateTT.c pgesv/HPL_logsort.c pgesv/HPL_perm.c
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/Makefile.in
new file mode 100644
index 000000000..139ecbad0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/Makefile.in
@@ -0,0 +1,1355 @@
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+subdir = src
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+	$(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/include/hplconfig.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(libdir)"
+LIBRARIES = $(lib_LIBRARIES)
+AR = ar
+ARFLAGS = cru
+AM_V_AR = $(am__v_AR_@AM_V@)
+am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@)
+am__v_AR_0 = @echo "  AR      " $@;
+am__v_AR_1 = 
+libhpl_a_AR = $(AR) $(ARFLAGS)
+libhpl_a_LIBADD =
+am__dirstamp = $(am__leading_dot)dirstamp
+am_libhpl_a_OBJECTS = auxil/HPL_dlatcpy.$(OBJEXT) \
+	auxil/HPL_fprintf.$(OBJEXT) auxil/HPL_dlacpy.$(OBJEXT) \
+	auxil/HPL_dlamch.$(OBJEXT) blas/HPL_dscal.$(OBJEXT) \
+	blas/HPL_dtrsm.$(OBJEXT) blas/HPL_dtrsv.$(OBJEXT) \
+	blas/HPL_idamax.$(OBJEXT) blas/HPL_dgemv.$(OBJEXT) \
+	blas/HPL_dscal.$(OBJEXT) blas/HPL_daxpy.$(OBJEXT) \
+	blas/HPL_dcopy.$(OBJEXT) blas/HPL_dgemm.$(OBJEXT) \
+	blas/HPL_dgemv.$(OBJEXT) blas/HPL_dger.$(OBJEXT) \
+	comm/HPL_sdrv.$(OBJEXT) comm/HPL_send.$(OBJEXT) \
+	comm/HPL_recv.$(OBJEXT) comm/HPL_bcast.$(OBJEXT) \
+	comm/HPL_binit.$(OBJEXT) comm/HPL_bwait.$(OBJEXT) \
+	comm/HPL_blong.$(OBJEXT) comm/HPL_1ring.$(OBJEXT) \
+	comm/HPL_1rinM.$(OBJEXT) comm/HPL_2rinM.$(OBJEXT) \
+	comm/HPL_2ring.$(OBJEXT) comm/HPL_blonM.$(OBJEXT) \
+	comm/HPL_packL.$(OBJEXT) grid/HPL_reduce.$(OBJEXT) \
+	grid/HPL_sum.$(OBJEXT) grid/HPL_grid_info.$(OBJEXT) \
+	grid/HPL_grid_init.$(OBJEXT) grid/HPL_all_reduce.$(OBJEXT) \
+	grid/HPL_broadcast.$(OBJEXT) grid/HPL_grid_exit.$(OBJEXT) \
+	grid/HPL_max.$(OBJEXT) grid/HPL_min.$(OBJEXT) \
+	grid/HPL_all_reduce.$(OBJEXT) grid/HPL_barrier.$(OBJEXT) \
+	panel/HPL_pdpanel_disp.$(OBJEXT) \
+	panel/HPL_pdpanel_free.$(OBJEXT) \
+	panel/HPL_pdpanel_init.$(OBJEXT) \
+	panel/HPL_pdpanel_new.$(OBJEXT) pauxil/HPL_pdlamch.$(OBJEXT) \
+	pauxil/HPL_pdlange.$(OBJEXT) pauxil/HPL_indxg2p.$(OBJEXT) \
+	pauxil/HPL_numroc.$(OBJEXT) pauxil/HPL_numrocI.$(OBJEXT) \
+	pauxil/HPL_numrocI.$(OBJEXT) pauxil/HPL_dlaswp00N.$(OBJEXT) \
+	pauxil/HPL_dlaswp01N.$(OBJEXT) pauxil/HPL_dlaswp01T.$(OBJEXT) \
+	pauxil/HPL_dlaswp02N.$(OBJEXT) pauxil/HPL_dlaswp03N.$(OBJEXT) \
+	pauxil/HPL_dlaswp03T.$(OBJEXT) pauxil/HPL_dlaswp04N.$(OBJEXT) \
+	pauxil/HPL_dlaswp04T.$(OBJEXT) pauxil/HPL_dlaswp05N.$(OBJEXT) \
+	pauxil/HPL_dlaswp05T.$(OBJEXT) pauxil/HPL_dlaswp06N.$(OBJEXT) \
+	pauxil/HPL_dlaswp06T.$(OBJEXT) pauxil/HPL_infog2l.$(OBJEXT) \
+	pauxil/HPL_dlaswp10N.$(OBJEXT) pauxil/HPL_pwarn.$(OBJEXT) \
+	pfact/HPL_pdpanllN.$(OBJEXT) pfact/HPL_pdpanllT.$(OBJEXT) \
+	pfact/HPL_pdpanrlN.$(OBJEXT) pfact/HPL_pdpanrlT.$(OBJEXT) \
+	pfact/HPL_pdrpancrN.$(OBJEXT) pfact/HPL_pdrpancrT.$(OBJEXT) \
+	pfact/HPL_pdrpanllN.$(OBJEXT) pfact/HPL_pdrpanllT.$(OBJEXT) \
+	pfact/HPL_pdrpanrlN.$(OBJEXT) pfact/HPL_pdrpanrlT.$(OBJEXT) \
+	pfact/HPL_pdmxswp.$(OBJEXT) pfact/HPL_pdfact.$(OBJEXT) \
+	pfact/HPL_dlocmax.$(OBJEXT) pfact/HPL_pdpancrT.$(OBJEXT) \
+	pfact/HPL_pdpancrN.$(OBJEXT) pfact/HPL_dlocmax.$(OBJEXT) \
+	pfact/HPL_dlocswpN.$(OBJEXT) pfact/HPL_dlocswpT.$(OBJEXT) \
+	pfact/HPL_pdmxswp.$(OBJEXT) pfact/HPL_pdpanllN.$(OBJEXT) \
+	pfact/HPL_pdpanllT.$(OBJEXT) pfact/HPL_pdpanrlN.$(OBJEXT) \
+	pfact/HPL_pdpanrlT.$(OBJEXT) pfact/HPL_pdrpancrN.$(OBJEXT) \
+	pfact/HPL_pdrpancrT.$(OBJEXT) pfact/HPL_pdrpanllN.$(OBJEXT) \
+	pfact/HPL_pdrpanllT.$(OBJEXT) pfact/HPL_pdrpanrlN.$(OBJEXT) \
+	pfact/HPL_pdrpanrlT.$(OBJEXT) pauxil/HPL_pabort.$(OBJEXT) \
+	pauxil/HPL_pdlamch.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \
+	pgesv/HPL_pdgesv.$(OBJEXT) pgesv/HPL_pdgesvK1.$(OBJEXT) \
+	pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdupdateNN.$(OBJEXT) \
+	pgesv/HPL_pdupdateNT.$(OBJEXT) pgesv/HPL_pdupdateTN.$(OBJEXT) \
+	pgesv/HPL_pdupdateTT.$(OBJEXT) pgesv/HPL_equil.$(OBJEXT) \
+	pgesv/HPL_pipid.$(OBJEXT) pgesv/HPL_plindx0.$(OBJEXT) \
+	pgesv/HPL_plindx10.$(OBJEXT) pgesv/HPL_plindx1.$(OBJEXT) \
+	pgesv/HPL_plindx10.$(OBJEXT) pgesv/HPL_rollN.$(OBJEXT) \
+	pgesv/HPL_rollT.$(OBJEXT) pgesv/HPL_spreadN.$(OBJEXT) \
+	pgesv/HPL_spreadT.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \
+	pgesv/HPL_pdgesv.$(OBJEXT) pgesv/HPL_pdgesvK1.$(OBJEXT) \
+	pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \
+	pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdlaswp00N.$(OBJEXT) \
+	pgesv/HPL_pdlaswp00T.$(OBJEXT) pgesv/HPL_pdlaswp01N.$(OBJEXT) \
+	pgesv/HPL_pdlaswp01T.$(OBJEXT) pgesv/HPL_pdtrsv.$(OBJEXT) \
+	pgesv/HPL_pdupdateNN.$(OBJEXT) pgesv/HPL_pdupdateNT.$(OBJEXT) \
+	pgesv/HPL_pdupdateTN.$(OBJEXT) pgesv/HPL_pdupdateTT.$(OBJEXT) \
+	pgesv/HPL_logsort.$(OBJEXT) pgesv/HPL_perm.$(OBJEXT)
+libhpl_a_OBJECTS = $(am_libhpl_a_OBJECTS)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__maybe_remake_depfiles = depfiles
+am__depfiles_remade = auxil/$(DEPDIR)/HPL_dlacpy.Po \
+	auxil/$(DEPDIR)/HPL_dlamch.Po auxil/$(DEPDIR)/HPL_dlatcpy.Po \
+	auxil/$(DEPDIR)/HPL_fprintf.Po blas/$(DEPDIR)/HPL_daxpy.Po \
+	blas/$(DEPDIR)/HPL_dcopy.Po blas/$(DEPDIR)/HPL_dgemm.Po \
+	blas/$(DEPDIR)/HPL_dgemv.Po blas/$(DEPDIR)/HPL_dger.Po \
+	blas/$(DEPDIR)/HPL_dscal.Po blas/$(DEPDIR)/HPL_dtrsm.Po \
+	blas/$(DEPDIR)/HPL_dtrsv.Po blas/$(DEPDIR)/HPL_idamax.Po \
+	comm/$(DEPDIR)/HPL_1rinM.Po comm/$(DEPDIR)/HPL_1ring.Po \
+	comm/$(DEPDIR)/HPL_2rinM.Po comm/$(DEPDIR)/HPL_2ring.Po \
+	comm/$(DEPDIR)/HPL_bcast.Po comm/$(DEPDIR)/HPL_binit.Po \
+	comm/$(DEPDIR)/HPL_blonM.Po comm/$(DEPDIR)/HPL_blong.Po \
+	comm/$(DEPDIR)/HPL_bwait.Po comm/$(DEPDIR)/HPL_packL.Po \
+	comm/$(DEPDIR)/HPL_recv.Po comm/$(DEPDIR)/HPL_sdrv.Po \
+	comm/$(DEPDIR)/HPL_send.Po grid/$(DEPDIR)/HPL_all_reduce.Po \
+	grid/$(DEPDIR)/HPL_barrier.Po grid/$(DEPDIR)/HPL_broadcast.Po \
+	grid/$(DEPDIR)/HPL_grid_exit.Po \
+	grid/$(DEPDIR)/HPL_grid_info.Po \
+	grid/$(DEPDIR)/HPL_grid_init.Po grid/$(DEPDIR)/HPL_max.Po \
+	grid/$(DEPDIR)/HPL_min.Po grid/$(DEPDIR)/HPL_reduce.Po \
+	grid/$(DEPDIR)/HPL_sum.Po panel/$(DEPDIR)/HPL_pdpanel_disp.Po \
+	panel/$(DEPDIR)/HPL_pdpanel_free.Po \
+	panel/$(DEPDIR)/HPL_pdpanel_init.Po \
+	panel/$(DEPDIR)/HPL_pdpanel_new.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp00N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp01N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp01T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp02N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp03N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp03T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp04N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp04T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp05N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp05T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp06N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp06T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp10N.Po \
+	pauxil/$(DEPDIR)/HPL_indxg2p.Po \
+	pauxil/$(DEPDIR)/HPL_infog2l.Po pauxil/$(DEPDIR)/HPL_numroc.Po \
+	pauxil/$(DEPDIR)/HPL_numrocI.Po pauxil/$(DEPDIR)/HPL_pabort.Po \
+	pauxil/$(DEPDIR)/HPL_pdlamch.Po \
+	pauxil/$(DEPDIR)/HPL_pdlange.Po pauxil/$(DEPDIR)/HPL_pwarn.Po \
+	pfact/$(DEPDIR)/HPL_dlocmax.Po pfact/$(DEPDIR)/HPL_dlocswpN.Po \
+	pfact/$(DEPDIR)/HPL_dlocswpT.Po pfact/$(DEPDIR)/HPL_pdfact.Po \
+	pfact/$(DEPDIR)/HPL_pdmxswp.Po pfact/$(DEPDIR)/HPL_pdpancrN.Po \
+	pfact/$(DEPDIR)/HPL_pdpancrT.Po \
+	pfact/$(DEPDIR)/HPL_pdpanllN.Po \
+	pfact/$(DEPDIR)/HPL_pdpanllT.Po \
+	pfact/$(DEPDIR)/HPL_pdpanrlN.Po \
+	pfact/$(DEPDIR)/HPL_pdpanrlT.Po \
+	pfact/$(DEPDIR)/HPL_pdrpancrN.Po \
+	pfact/$(DEPDIR)/HPL_pdrpancrT.Po \
+	pfact/$(DEPDIR)/HPL_pdrpanllN.Po \
+	pfact/$(DEPDIR)/HPL_pdrpanllT.Po \
+	pfact/$(DEPDIR)/HPL_pdrpanrlN.Po \
+	pfact/$(DEPDIR)/HPL_pdrpanrlT.Po pgesv/$(DEPDIR)/HPL_equil.Po \
+	pgesv/$(DEPDIR)/HPL_logsort.Po pgesv/$(DEPDIR)/HPL_pdgesv.Po \
+	pgesv/$(DEPDIR)/HPL_pdgesv0.Po pgesv/$(DEPDIR)/HPL_pdgesvK1.Po \
+	pgesv/$(DEPDIR)/HPL_pdgesvK2.Po \
+	pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po \
+	pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po \
+	pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po \
+	pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po \
+	pgesv/$(DEPDIR)/HPL_pdtrsv.Po \
+	pgesv/$(DEPDIR)/HPL_pdupdateNN.Po \
+	pgesv/$(DEPDIR)/HPL_pdupdateNT.Po \
+	pgesv/$(DEPDIR)/HPL_pdupdateTN.Po \
+	pgesv/$(DEPDIR)/HPL_pdupdateTT.Po pgesv/$(DEPDIR)/HPL_perm.Po \
+	pgesv/$(DEPDIR)/HPL_pipid.Po pgesv/$(DEPDIR)/HPL_plindx0.Po \
+	pgesv/$(DEPDIR)/HPL_plindx1.Po pgesv/$(DEPDIR)/HPL_plindx10.Po \
+	pgesv/$(DEPDIR)/HPL_rollN.Po pgesv/$(DEPDIR)/HPL_rollT.Po \
+	pgesv/$(DEPDIR)/HPL_spreadN.Po pgesv/$(DEPDIR)/HPL_spreadT.Po
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(libhpl_a_SOURCES)
+DIST_SOURCES = $(libhpl_a_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BLAS_LIBS = @BLAS_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build_alias = @build_alias@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host_alias = @host_alias@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(top_srcdir)/../include
+lib_LIBRARIES = libhpl.a
+libhpl_a_SOURCES = \
+auxil/HPL_dlatcpy.c auxil/HPL_fprintf.c auxil/HPL_dlacpy.c auxil/HPL_dlamch.c \
+blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \
+blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \
+blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \
+comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \
+comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \
+comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \
+grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \
+grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \
+grid/HPL_min.c grid/HPL_all_reduce.c grid/HPL_barrier.c \
+panel/HPL_pdpanel_disp.c panel/HPL_pdpanel_free.c panel/HPL_pdpanel_init.c panel/HPL_pdpanel_new.c \
+pauxil/HPL_pdlamch.c pauxil/HPL_pdlange.c \
+pauxil/HPL_indxg2p.c pauxil/HPL_numroc.c pauxil/HPL_numrocI.c pauxil/HPL_numrocI.c \
+pauxil/HPL_dlaswp00N.c pauxil/HPL_dlaswp01N.c pauxil/HPL_dlaswp01T.c \
+pauxil/HPL_dlaswp02N.c pauxil/HPL_dlaswp03N.c pauxil/HPL_dlaswp03T.c \
+pauxil/HPL_dlaswp04N.c pauxil/HPL_dlaswp04T.c pauxil/HPL_dlaswp05N.c \
+pauxil/HPL_dlaswp05T.c pauxil/HPL_dlaswp06N.c pauxil/HPL_dlaswp06T.c \
+pauxil/HPL_infog2l.c pauxil/HPL_dlaswp10N.c pauxil/HPL_pwarn.c \
+pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \
+pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \
+pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c pfact/HPL_pdrpanrlT.c \
+pfact/HPL_pdmxswp.c pfact/HPL_pdfact.c pfact/HPL_dlocmax.c \
+pfact/HPL_pdpancrT.c pfact/HPL_pdpancrN.c pfact/HPL_dlocmax.c \
+pfact/HPL_dlocswpN.c pfact/HPL_dlocswpT.c pfact/HPL_pdmxswp.c \
+pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \
+pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \
+pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c \
+pfact/HPL_pdrpanrlT.c pauxil/HPL_pabort.c pauxil/HPL_pdlamch.c \
+pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c \
+pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c pgesv/HPL_pdupdateTT.c \
+pgesv/HPL_equil.c pgesv/HPL_pipid.c pgesv/HPL_plindx0.c \
+pgesv/HPL_plindx10.c pgesv/HPL_plindx1.c pgesv/HPL_plindx10.c \
+pgesv/HPL_rollN.c pgesv/HPL_rollT.c pgesv/HPL_spreadN.c pgesv/HPL_spreadT.c \
+pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesvK2.c \
+pgesv/HPL_pdlaswp00N.c pgesv/HPL_pdlaswp00T.c pgesv/HPL_pdlaswp01N.c pgesv/HPL_pdlaswp01T.c \
+pgesv/HPL_pdtrsv.c pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c \
+pgesv/HPL_pdupdateTT.c pgesv/HPL_logsort.c pgesv/HPL_perm.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu src/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-libLIBRARIES: $(lib_LIBRARIES)
+	@$(NORMAL_INSTALL)
+	@list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \
+	list2=; for p in $$list; do \
+	  if test -f $$p; then \
+	    list2="$$list2 $$p"; \
+	  else :; fi; \
+	done; \
+	test -z "$$list2" || { \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
+	  echo " $(INSTALL_DATA) $$list2 '$(DESTDIR)$(libdir)'"; \
+	  $(INSTALL_DATA) $$list2 "$(DESTDIR)$(libdir)" || exit $$?; }
+	@$(POST_INSTALL)
+	@list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \
+	for p in $$list; do \
+	  if test -f $$p; then \
+	    $(am__strip_dir) \
+	    echo " ( cd '$(DESTDIR)$(libdir)' && $(RANLIB) $$f )"; \
+	    ( cd "$(DESTDIR)$(libdir)" && $(RANLIB) $$f ) || exit $$?; \
+	  else :; fi; \
+	done
+
+uninstall-libLIBRARIES:
+	@$(NORMAL_UNINSTALL)
+	@list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \
+	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+	dir='$(DESTDIR)$(libdir)'; $(am__uninstall_files_from_dir)
+
+clean-libLIBRARIES:
+	-test -z "$(lib_LIBRARIES)" || rm -f $(lib_LIBRARIES)
+auxil/$(am__dirstamp):
+	@$(MKDIR_P) auxil
+	@: > auxil/$(am__dirstamp)
+auxil/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) auxil/$(DEPDIR)
+	@: > auxil/$(DEPDIR)/$(am__dirstamp)
+auxil/HPL_dlatcpy.$(OBJEXT): auxil/$(am__dirstamp) \
+	auxil/$(DEPDIR)/$(am__dirstamp)
+auxil/HPL_fprintf.$(OBJEXT): auxil/$(am__dirstamp) \
+	auxil/$(DEPDIR)/$(am__dirstamp)
+auxil/HPL_dlacpy.$(OBJEXT): auxil/$(am__dirstamp) \
+	auxil/$(DEPDIR)/$(am__dirstamp)
+auxil/HPL_dlamch.$(OBJEXT): auxil/$(am__dirstamp) \
+	auxil/$(DEPDIR)/$(am__dirstamp)
+blas/$(am__dirstamp):
+	@$(MKDIR_P) blas
+	@: > blas/$(am__dirstamp)
+blas/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) blas/$(DEPDIR)
+	@: > blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dscal.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dtrsm.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dtrsv.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_idamax.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dgemv.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_daxpy.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dcopy.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dgemm.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dger.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+comm/$(am__dirstamp):
+	@$(MKDIR_P) comm
+	@: > comm/$(am__dirstamp)
+comm/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) comm/$(DEPDIR)
+	@: > comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_sdrv.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_send.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_recv.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_bcast.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_binit.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_bwait.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_blong.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_1ring.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_1rinM.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_2rinM.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_2ring.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_blonM.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_packL.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+grid/$(am__dirstamp):
+	@$(MKDIR_P) grid
+	@: > grid/$(am__dirstamp)
+grid/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) grid/$(DEPDIR)
+	@: > grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_reduce.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_sum.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_grid_info.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_grid_init.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_all_reduce.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_broadcast.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_grid_exit.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_max.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_min.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_barrier.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+panel/$(am__dirstamp):
+	@$(MKDIR_P) panel
+	@: > panel/$(am__dirstamp)
+panel/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) panel/$(DEPDIR)
+	@: > panel/$(DEPDIR)/$(am__dirstamp)
+panel/HPL_pdpanel_disp.$(OBJEXT): panel/$(am__dirstamp) \
+	panel/$(DEPDIR)/$(am__dirstamp)
+panel/HPL_pdpanel_free.$(OBJEXT): panel/$(am__dirstamp) \
+	panel/$(DEPDIR)/$(am__dirstamp)
+panel/HPL_pdpanel_init.$(OBJEXT): panel/$(am__dirstamp) \
+	panel/$(DEPDIR)/$(am__dirstamp)
+panel/HPL_pdpanel_new.$(OBJEXT): panel/$(am__dirstamp) \
+	panel/$(DEPDIR)/$(am__dirstamp)
+pauxil/$(am__dirstamp):
+	@$(MKDIR_P) pauxil
+	@: > pauxil/$(am__dirstamp)
+pauxil/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) pauxil/$(DEPDIR)
+	@: > pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_pdlamch.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_pdlange.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_indxg2p.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_numroc.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_numrocI.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp00N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp01N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp01T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp02N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp03N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp03T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp04N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp04T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp05N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp05T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp06N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp06T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_infog2l.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp10N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_pwarn.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pfact/$(am__dirstamp):
+	@$(MKDIR_P) pfact
+	@: > pfact/$(am__dirstamp)
+pfact/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) pfact/$(DEPDIR)
+	@: > pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpanllN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpanllT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpanrlN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpanrlT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpancrN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpancrT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpanllN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpanllT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpanrlN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpanrlT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdmxswp.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdfact.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_dlocmax.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpancrT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpancrN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_dlocswpN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_dlocswpT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_pabort.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pgesv/$(am__dirstamp):
+	@$(MKDIR_P) pgesv
+	@: > pgesv/$(am__dirstamp)
+pgesv/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) pgesv/$(DEPDIR)
+	@: > pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdgesv0.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdgesv.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdgesvK1.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdgesvK2.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdupdateNN.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdupdateNT.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdupdateTN.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdupdateTT.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_equil.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pipid.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_plindx0.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_plindx10.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_plindx1.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_rollN.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_rollT.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_spreadN.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_spreadT.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdlaswp00N.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdlaswp00T.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdlaswp01N.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdlaswp01T.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdtrsv.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_logsort.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_perm.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+
+libhpl.a: $(libhpl_a_OBJECTS) $(libhpl_a_DEPENDENCIES) $(EXTRA_libhpl_a_DEPENDENCIES) 
+	$(AM_V_at)-rm -f libhpl.a
+	$(AM_V_AR)$(libhpl_a_AR) libhpl.a $(libhpl_a_OBJECTS) $(libhpl_a_LIBADD)
+	$(AM_V_at)$(RANLIB) libhpl.a
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f auxil/*.$(OBJEXT)
+	-rm -f blas/*.$(OBJEXT)
+	-rm -f comm/*.$(OBJEXT)
+	-rm -f grid/*.$(OBJEXT)
+	-rm -f panel/*.$(OBJEXT)
+	-rm -f pauxil/*.$(OBJEXT)
+	-rm -f pfact/*.$(OBJEXT)
+	-rm -f pgesv/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlacpy.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlamch.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlatcpy.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_fprintf.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_daxpy.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dcopy.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dgemm.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dgemv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dger.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dscal.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dtrsm.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dtrsv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_idamax.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_1rinM.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_1ring.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_2rinM.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_2ring.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bcast.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_binit.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blonM.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blong.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bwait.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_packL.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_recv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_sdrv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_send.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_all_reduce.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_barrier.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_broadcast.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_exit.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_info.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_init.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_max.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_min.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_reduce.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_sum.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_disp.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_free.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_init.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_new.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp00N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp01N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp01T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp02N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp03N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp03T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp04N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp04T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp05N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp05T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp06N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp06T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp10N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_indxg2p.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_infog2l.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_numroc.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_numrocI.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pabort.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pdlamch.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pdlange.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pwarn.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocmax.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocswpN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocswpT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdfact.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdmxswp.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpancrN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpancrT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanllN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanllT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanrlN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanrlT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpancrN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpancrT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanllN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanllT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanrlN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanrlT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_equil.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_logsort.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesv0.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesvK1.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesvK2.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdtrsv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateNN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateNT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateTN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateTT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_perm.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pipid.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx0.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx1.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx10.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_rollN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_rollT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_spreadN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_spreadT.Po@am__quote@ # am--include-marker
+
+$(am__depfiles_remade):
+	@$(MKDIR_P) $(@D)
+	@echo '# dummy' >$@-t && $(am__mv) $@-t $@
+
+am--depfiles: $(am__depfiles_remade)
+
+.c.o:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(BUILT_SOURCES)
+	$(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LIBRARIES)
+installdirs:
+	for dir in "$(DESTDIR)$(libdir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f auxil/$(DEPDIR)/$(am__dirstamp)
+	-rm -f auxil/$(am__dirstamp)
+	-rm -f blas/$(DEPDIR)/$(am__dirstamp)
+	-rm -f blas/$(am__dirstamp)
+	-rm -f comm/$(DEPDIR)/$(am__dirstamp)
+	-rm -f comm/$(am__dirstamp)
+	-rm -f grid/$(DEPDIR)/$(am__dirstamp)
+	-rm -f grid/$(am__dirstamp)
+	-rm -f panel/$(DEPDIR)/$(am__dirstamp)
+	-rm -f panel/$(am__dirstamp)
+	-rm -f pauxil/$(DEPDIR)/$(am__dirstamp)
+	-rm -f pauxil/$(am__dirstamp)
+	-rm -f pfact/$(DEPDIR)/$(am__dirstamp)
+	-rm -f pfact/$(am__dirstamp)
+	-rm -f pgesv/$(DEPDIR)/$(am__dirstamp)
+	-rm -f pgesv/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLIBRARIES mostlyclean-am
+
+distclean: distclean-am
+		-rm -f auxil/$(DEPDIR)/HPL_dlacpy.Po
+	-rm -f auxil/$(DEPDIR)/HPL_dlamch.Po
+	-rm -f auxil/$(DEPDIR)/HPL_dlatcpy.Po
+	-rm -f auxil/$(DEPDIR)/HPL_fprintf.Po
+	-rm -f blas/$(DEPDIR)/HPL_daxpy.Po
+	-rm -f blas/$(DEPDIR)/HPL_dcopy.Po
+	-rm -f blas/$(DEPDIR)/HPL_dgemm.Po
+	-rm -f blas/$(DEPDIR)/HPL_dgemv.Po
+	-rm -f blas/$(DEPDIR)/HPL_dger.Po
+	-rm -f blas/$(DEPDIR)/HPL_dscal.Po
+	-rm -f blas/$(DEPDIR)/HPL_dtrsm.Po
+	-rm -f blas/$(DEPDIR)/HPL_dtrsv.Po
+	-rm -f blas/$(DEPDIR)/HPL_idamax.Po
+	-rm -f comm/$(DEPDIR)/HPL_1rinM.Po
+	-rm -f comm/$(DEPDIR)/HPL_1ring.Po
+	-rm -f comm/$(DEPDIR)/HPL_2rinM.Po
+	-rm -f comm/$(DEPDIR)/HPL_2ring.Po
+	-rm -f comm/$(DEPDIR)/HPL_bcast.Po
+	-rm -f comm/$(DEPDIR)/HPL_binit.Po
+	-rm -f comm/$(DEPDIR)/HPL_blonM.Po
+	-rm -f comm/$(DEPDIR)/HPL_blong.Po
+	-rm -f comm/$(DEPDIR)/HPL_bwait.Po
+	-rm -f comm/$(DEPDIR)/HPL_packL.Po
+	-rm -f comm/$(DEPDIR)/HPL_recv.Po
+	-rm -f comm/$(DEPDIR)/HPL_sdrv.Po
+	-rm -f comm/$(DEPDIR)/HPL_send.Po
+	-rm -f grid/$(DEPDIR)/HPL_all_reduce.Po
+	-rm -f grid/$(DEPDIR)/HPL_barrier.Po
+	-rm -f grid/$(DEPDIR)/HPL_broadcast.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_exit.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_info.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_init.Po
+	-rm -f grid/$(DEPDIR)/HPL_max.Po
+	-rm -f grid/$(DEPDIR)/HPL_min.Po
+	-rm -f grid/$(DEPDIR)/HPL_reduce.Po
+	-rm -f grid/$(DEPDIR)/HPL_sum.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_disp.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_free.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_init.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_new.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp00N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp01N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp01T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp02N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp03N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp03T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp04N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp04T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp05N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp05T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp06N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp06T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp10N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_indxg2p.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_infog2l.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_numroc.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_numrocI.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pabort.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pdlamch.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pdlange.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pwarn.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocmax.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocswpN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocswpT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdfact.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdmxswp.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpancrN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpancrT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanllN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanllT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanrlN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanrlT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpancrN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpancrT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanllN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanllT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanrlN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanrlT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_equil.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_logsort.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesv.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesv0.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesvK1.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesvK2.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdtrsv.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateNN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateNT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateTN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateTT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_perm.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pipid.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx0.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx1.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx10.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_rollN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_rollT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_spreadN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_spreadT.Po
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-libLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+		-rm -f auxil/$(DEPDIR)/HPL_dlacpy.Po
+	-rm -f auxil/$(DEPDIR)/HPL_dlamch.Po
+	-rm -f auxil/$(DEPDIR)/HPL_dlatcpy.Po
+	-rm -f auxil/$(DEPDIR)/HPL_fprintf.Po
+	-rm -f blas/$(DEPDIR)/HPL_daxpy.Po
+	-rm -f blas/$(DEPDIR)/HPL_dcopy.Po
+	-rm -f blas/$(DEPDIR)/HPL_dgemm.Po
+	-rm -f blas/$(DEPDIR)/HPL_dgemv.Po
+	-rm -f blas/$(DEPDIR)/HPL_dger.Po
+	-rm -f blas/$(DEPDIR)/HPL_dscal.Po
+	-rm -f blas/$(DEPDIR)/HPL_dtrsm.Po
+	-rm -f blas/$(DEPDIR)/HPL_dtrsv.Po
+	-rm -f blas/$(DEPDIR)/HPL_idamax.Po
+	-rm -f comm/$(DEPDIR)/HPL_1rinM.Po
+	-rm -f comm/$(DEPDIR)/HPL_1ring.Po
+	-rm -f comm/$(DEPDIR)/HPL_2rinM.Po
+	-rm -f comm/$(DEPDIR)/HPL_2ring.Po
+	-rm -f comm/$(DEPDIR)/HPL_bcast.Po
+	-rm -f comm/$(DEPDIR)/HPL_binit.Po
+	-rm -f comm/$(DEPDIR)/HPL_blonM.Po
+	-rm -f comm/$(DEPDIR)/HPL_blong.Po
+	-rm -f comm/$(DEPDIR)/HPL_bwait.Po
+	-rm -f comm/$(DEPDIR)/HPL_packL.Po
+	-rm -f comm/$(DEPDIR)/HPL_recv.Po
+	-rm -f comm/$(DEPDIR)/HPL_sdrv.Po
+	-rm -f comm/$(DEPDIR)/HPL_send.Po
+	-rm -f grid/$(DEPDIR)/HPL_all_reduce.Po
+	-rm -f grid/$(DEPDIR)/HPL_barrier.Po
+	-rm -f grid/$(DEPDIR)/HPL_broadcast.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_exit.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_info.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_init.Po
+	-rm -f grid/$(DEPDIR)/HPL_max.Po
+	-rm -f grid/$(DEPDIR)/HPL_min.Po
+	-rm -f grid/$(DEPDIR)/HPL_reduce.Po
+	-rm -f grid/$(DEPDIR)/HPL_sum.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_disp.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_free.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_init.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_new.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp00N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp01N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp01T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp02N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp03N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp03T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp04N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp04T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp05N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp05T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp06N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp06T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp10N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_indxg2p.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_infog2l.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_numroc.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_numrocI.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pabort.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pdlamch.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pdlange.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pwarn.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocmax.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocswpN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocswpT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdfact.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdmxswp.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpancrN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpancrT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanllN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanllT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanrlN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanrlT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpancrN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpancrT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanllN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanllT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanrlN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanrlT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_equil.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_logsort.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesv.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesv0.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesvK1.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesvK2.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdtrsv.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateNN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateNT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateTN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateTT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_perm.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pipid.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx0.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx1.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx10.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_rollN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_rollT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_spreadN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_spreadT.Po
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
+	clean-generic clean-libLIBRARIES cscopelist-am ctags ctags-am \
+	distclean distclean-compile distclean-generic distclean-tags \
+	distdir dvi dvi-am html html-am info info-am install \
+	install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am \
+	install-libLIBRARIES install-man install-pdf install-pdf-am \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \
+	uninstall-am uninstall-libLIBRARIES
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_abort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_abort.c
new file mode 100644
index 000000000..bf0c5e727
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_abort.c
@@ -0,0 +1,129 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_abort
+(
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_abort( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_abort displays an error message on stderr and halts execution.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   LINE   = va_arg( argptr, int      );
+   SRNAME = va_arg( argptr, char *   );
+   FORM   = va_arg( argptr, char *   );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( stderr, "%s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR in function", SRNAME, cline );
+   else
+      HPL_fprintf( stderr, "%s %d %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR on line", LINE, "of function", SRNAME, cline );
+   exit( 0 );
+/*
+ * End of HPL_abort
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlacpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlacpy.c
new file mode 100644
index 000000000..ec71180eb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlacpy.c
@@ -0,0 +1,343 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factors
+ * #ifndef HPL_LACPY_M_DEPTH
+ * #define    HPL_LACPY_M_DEPTH       32
+ * #define    HPL_LACPY_LOG2_M_DEPTH   5
+ * #endif
+ * #ifndef HPL_LACPY_N_DEPTH
+ * #define    HPL_LACPY_N_DEPTH        4
+ * #define    HPL_LACPY_LOG2_N_DEPTH   2
+ * #endif
+ */
+#ifndef HPL_LACPY_M_DEPTH
+#define    HPL_LACPY_M_DEPTH        4
+#define    HPL_LACPY_LOG2_M_DEPTH   2
+#endif
+#ifndef HPL_LACPY_N_DEPTH
+#define    HPL_LACPY_N_DEPTH        2
+#define    HPL_LACPY_LOG2_N_DEPTH   1
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlacpy
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dlacpy
+( M, N, A, LDA, B, LDB )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlacpy copies an array A into an array B.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the number of rows of the arrays A and
+ *         B. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies  the number of columns of the arrays A
+ *         and B. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,N).
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * B       (local output)                double *
+ *         On entry, B points to an array of dimension (LDB,N). On exit,
+ *         B is overwritten with A.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB specifies the leading dimension of the array B.
+ *         LDB must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_LACPY_USE_COPY
+   register int               j;
+#else
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+   const double               * A0 = A;
+   double                     * B0 = B;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+   const double               * A0 = A,              * A1 = A +     LDA;
+   double                     * B0 = B,              * B1 = B +     LDB;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+   const double               * A0 = A,              * A1 = A +     LDA,
+                              * A2 = A + (LDA << 1), * A3 = A + 3 * LDA;
+   double                     * B0 = B,              * B1 = B +     LDB,
+                              * B2 = B + (LDB << 1), * B3 = B + 3 * LDB;
+#endif
+   const int                  incA = ( (unsigned int)(LDA) <<
+                                       HPL_LACPY_LOG2_N_DEPTH ) - M,
+                              incB = ( (unsigned int)(LDB) <<
+                                       HPL_LACPY_LOG2_N_DEPTH ) - M,
+                              incA0 = (unsigned int)(LDA) - M,
+                              incB0 = (unsigned int)(LDB) - M;
+   int                        mu, nu;
+   register int               i, j;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+#ifdef HPL_LACPY_USE_COPY
+   for( j = 0; j < N; j++, A0 += LDA, B0 += LDB ) HPL_dcopy( M, A0, 1, B0, 1 );
+#else
+   mu = (int)( ( (unsigned int)(M) >> HPL_LACPY_LOG2_M_DEPTH ) <<
+                                      HPL_LACPY_LOG2_M_DEPTH );
+   nu = (int)( ( (unsigned int)(N) >> HPL_LACPY_LOG2_N_DEPTH ) <<
+                                      HPL_LACPY_LOG2_N_DEPTH );
+
+   for( j = 0; j < nu; j += HPL_LACPY_N_DEPTH )
+   {
+      for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH )
+      {
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 0] = A0[ 0];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; B2[ 0] = A2[ 0]; B3[ 0] = A3[ 0];
+#endif
+
+#if ( HPL_LACPY_M_DEPTH >  1 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 1] = A0[ 1];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; B2[ 1] = A2[ 1]; B3[ 1] = A3[ 1];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  2 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B2[ 2] = A2[ 2]; B3[ 2] = A3[ 2];
+         B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; B2[ 3] = A2[ 3]; B3[ 3] = A3[ 3];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  4 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5];
+         B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B2[ 4] = A2[ 4]; B3[ 4] = A3[ 4];
+         B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; B2[ 5] = A2[ 5]; B3[ 5] = A3[ 5];
+         B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B2[ 6] = A2[ 6]; B3[ 6] = A3[ 6];
+         B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; B2[ 7] = A2[ 7]; B3[ 7] = A3[ 7];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  8 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11];
+         B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9];
+         B0[10] = A0[10]; B1[10] = A1[10]; B0[11] = A0[11]; B1[11] = A1[11];
+         B0[12] = A0[12]; B1[12] = A1[12]; B0[13] = A0[13]; B1[13] = A1[13];
+         B0[14] = A0[14]; B1[14] = A1[14]; B0[15] = A0[15]; B1[15] = A1[15];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B2[ 8] = A2[ 8]; B3[ 8] = A3[ 8];
+         B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; B2[ 9] = A2[ 9]; B3[ 9] = A3[ 9];
+         B0[10] = A0[10]; B1[10] = A1[10]; B2[10] = A2[10]; B3[10] = A3[10];
+         B0[11] = A0[11]; B1[11] = A1[11]; B2[11] = A2[11]; B3[11] = A3[11];
+         B0[12] = A0[12]; B1[12] = A1[12]; B2[12] = A2[12]; B3[12] = A3[12];
+         B0[13] = A0[13]; B1[13] = A1[13]; B2[13] = A2[13]; B3[13] = A3[13];
+         B0[14] = A0[14]; B1[14] = A1[14]; B2[14] = A2[14]; B3[14] = A3[14];
+         B0[15] = A0[15]; B1[15] = A1[15]; B2[15] = A2[15]; B3[15] = A3[15];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH > 16 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19];
+         B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23];
+         B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27];
+         B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[16] = A0[16]; B1[16] = A1[16]; B0[17] = A0[17]; B1[17] = A1[17];
+         B0[18] = A0[18]; B1[18] = A1[18]; B0[19] = A0[19]; B1[19] = A1[19];
+         B0[20] = A0[20]; B1[20] = A1[20]; B0[21] = A0[21]; B1[21] = A1[21];
+         B0[22] = A0[22]; B1[22] = A1[22]; B0[23] = A0[23]; B1[23] = A1[23];
+         B0[24] = A0[24]; B1[24] = A1[24]; B0[25] = A0[25]; B1[25] = A1[25];
+         B0[26] = A0[26]; B1[26] = A1[26]; B0[27] = A0[27]; B1[27] = A1[27];
+         B0[28] = A0[28]; B1[28] = A1[28]; B0[29] = A0[29]; B1[29] = A1[29];
+         B0[30] = A0[30]; B1[30] = A1[30]; B0[31] = A0[31]; B1[31] = A1[31];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[16] = A0[16]; B1[16] = A1[16]; B2[16] = A2[16]; B3[16] = A3[16];
+         B0[17] = A0[17]; B1[17] = A1[17]; B2[17] = A2[17]; B3[17] = A3[17];
+         B0[18] = A0[18]; B1[18] = A1[18]; B2[18] = A2[18]; B3[18] = A3[18];
+         B0[19] = A0[19]; B1[19] = A1[19]; B2[19] = A2[19]; B3[19] = A3[19];
+         B0[20] = A0[20]; B1[20] = A1[20]; B2[20] = A2[20]; B3[20] = A3[20];
+         B0[21] = A0[21]; B1[21] = A1[21]; B2[21] = A2[21]; B3[21] = A3[21];
+         B0[22] = A0[22]; B1[22] = A1[22]; B2[22] = A2[22]; B3[22] = A3[22];
+         B0[23] = A0[23]; B1[23] = A1[23]; B2[23] = A2[23]; B3[23] = A3[23];
+         B0[24] = A0[24]; B1[24] = A1[24]; B2[24] = A2[24]; B3[24] = A3[24];
+         B0[25] = A0[25]; B1[25] = A1[25]; B2[25] = A2[25]; B3[25] = A3[25];
+         B0[26] = A0[26]; B1[26] = A1[26]; B2[26] = A2[26]; B3[26] = A3[26];
+         B0[27] = A0[27]; B1[27] = A1[27]; B2[27] = A2[27]; B3[27] = A3[27];
+         B0[28] = A0[28]; B1[28] = A1[28]; B2[28] = A2[28]; B3[28] = A3[28];
+         B0[29] = A0[29]; B1[29] = A1[29]; B2[29] = A2[29]; B3[29] = A3[29];
+         B0[30] = A0[30]; B1[30] = A1[30]; B2[30] = A2[30]; B3[30] = A3[30];
+         B0[31] = A0[31]; B1[31] = A1[31]; B2[31] = A2[31]; B3[31] = A3[31];
+#endif
+
+#endif
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+         A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+         A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH;
+         A2 += HPL_LACPY_M_DEPTH; B2 += HPL_LACPY_M_DEPTH;
+         A3 += HPL_LACPY_M_DEPTH; B3 += HPL_LACPY_M_DEPTH;
+#endif
+      }
+
+      for( i = mu; i < M; i++ )
+      {
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         *B0 = *A0; B0++; A0++;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++;
+         *B2 = *A2; B2++; A2++; *B3 = *A3; B3++; A3++;
+#endif
+      }
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+      A0 += incA; B0 += incB;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+      A0 += incA; B0 += incB; A1 += incA; B1 += incB;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+      A0 += incA; B0 += incB; A1 += incA; B1 += incB;
+      A2 += incA; B2 += incB; A3 += incA; B3 += incB;
+#endif
+   }
+
+   for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 )
+   {
+      for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH,
+           B0 += HPL_LACPY_M_DEPTH, A0 += HPL_LACPY_M_DEPTH )
+      {
+         B0[ 0] = A0[ 0];
+#if ( HPL_LACPY_M_DEPTH >  1 )
+         B0[ 1] = A0[ 1];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  2 )
+         B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  4 )
+         B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  8 )
+         B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11];
+         B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15];
+#endif
+#if ( HPL_LACPY_M_DEPTH > 16 )
+         B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19];
+         B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23];
+         B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27];
+         B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31];
+#endif
+      }
+      for( i = mu; i < M; i++, B0++, A0++ ) { *B0 = *A0; }
+   }
+#endif
+/*
+ * End of HPL_dlacpy
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlamch.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlamch.c
new file mode 100644
index 000000000..c685f0d5e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlamch.c
@@ -0,0 +1,876 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static function prototypes
+ * ---------------------------------------------------------------------
+ */
+static void     HPL_dlamc1
+STDC_ARGS(
+(  int *,           int *,           int *,           int * ) );
+static void     HPL_dlamc2
+STDC_ARGS(
+(  int *,           int *,           int *,           double *,
+   int *,           double *,        int *,           double * ) );
+static double   HPL_dlamc3
+STDC_ARGS(
+(  const double,    const double ) );
+static void     HPL_dlamc4
+STDC_ARGS(
+(  int *,           const double,    const int ) );
+static void     HPL_dlamc5
+STDC_ARGS(
+(  const int,       const int,       const int,       const int,
+   int *,           double * ) );
+static double   HPL_dipow
+STDC_ARGS(
+(  const double,    const int ) );
+
+#ifdef STDC_HEADERS
+double HPL_dlamch
+(
+   const HPL_T_MACH                 CMACH
+)
+#else
+double HPL_dlamch
+( CMACH )
+   const HPL_T_MACH                 CMACH;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlamch determines  machine-specific  arithmetic constants such as
+ * the relative machine precision  (eps),  the safe minimum (sfmin) such
+ * that 1 / sfmin does not overflow, the base of the machine (base), the
+ * precision (prec), the  number of (base) digits  in the  mantissa (t),
+ * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise),  the
+ * minimum exponent before  (gradual)  underflow (emin),  the  underflow
+ * threshold (rmin) base**(emin-1), the largest exponent before overflow
+ * (emax), the overflow threshold (rmax) (base**emax)*(1-eps).
+ *
+ * Notes
+ * =====
+ * 
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamch.f  (version 2.0 -- 1992), that  was  itself
+ * based on the function ENVRON  by Malcolm and incorporated suggestions
+ * by Gentleman and Marovich. See                                       
+ *  
+ * Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+ * arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).                 
+ *  
+ * Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+ * properties of  floating point arithmetic units.,  Comms. of  the ACM,
+ * 17, 276-277 (1974).
+ * 
+ * Arguments
+ * =========
+ *
+ * CMACH   (local input)                 const HPL_T_MACH
+ *         Specifies the value to be returned by HPL_dlamch             
+ *            = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+ *            = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+ *            = HPL_MACH_BASE,  HPL_dlamch := base                      
+ *            = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+ *            = HPL_MACH_MLEN,  HPL_dlamch := t                         
+ *            = HPL_MACH_RND,   HPL_dlamch := rnd                       
+ *            = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+ *            = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+ *            = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+ *            = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+ *          
+ *         where                                                        
+ *          
+ *            eps   = relative machine precision,                       
+ *            sfmin = safe minimum,                                     
+ *            base  = base of the machine,                              
+ *            prec  = eps*base,                                         
+ *            t     = number of digits in the mantissa,                 
+ *            rnd   = 1.0 if rounding occurs in addition,               
+ *            emin  = minimum exponent before underflow,                
+ *            rmin  = underflow threshold,                              
+ *            emax  = largest exponent before overflow,                 
+ *            rmax  = overflow threshold.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   static double              eps, sfmin, base, t, rnd, emin, rmin, emax,
+                              rmax, prec;
+   double                     small;
+   static int                 first=1;
+   int                        beta=0, imax=0, imin=0, it=0, lrnd=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0;
+      HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax );
+      base  = (double)(beta);  t     = (double)(it);
+      if( lrnd != 0 )
+      { rnd = HPL_rone;  eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; }
+      else
+      { rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it );            }
+      prec  = eps * base;  emin  = (double)(imin); emax  = (double)(imax);
+      sfmin = rmin;        small = HPL_rone / rmax;
+/*
+ * Use  SMALL  plus a bit,  to avoid the possibility of rounding causing
+ * overflow when computing  1/sfmin.
+ */
+      if( small >= sfmin ) sfmin = small * ( HPL_rone + eps );
+   }
+
+   if( CMACH == HPL_MACH_EPS   ) return( eps   );
+   if( CMACH == HPL_MACH_SFMIN ) return( sfmin );
+   if( CMACH == HPL_MACH_BASE  ) return( base  );
+   if( CMACH == HPL_MACH_PREC  ) return( prec  );
+   if( CMACH == HPL_MACH_MLEN  ) return( t     );
+   if( CMACH == HPL_MACH_RND   ) return( rnd   );
+   if( CMACH == HPL_MACH_EMIN  ) return( emin  );
+   if( CMACH == HPL_MACH_RMIN  ) return( rmin  );
+   if( CMACH == HPL_MACH_EMAX  ) return( emax  );
+   if( CMACH == HPL_MACH_RMAX  ) return( rmax  );
+
+   return( eps );
+/*
+ * End of HPL_dlamch
+ */
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc1
+(
+   int                        * BETA,
+   int                        * T,
+   int                        * RND,
+   int                        * IEEE1
+)
+#else
+static void HPL_dlamc1
+( BETA, T, RND, IEEE1 )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * BETA, * IEEE1, * RND, * T;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc1  determines  the machine parameters given by BETA, T, RND,
+ * and IEEE1.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc1.f  (version 2.0 -- 1992), that  was  itself
+ * based on the function ENVRON  by Malcolm and incorporated suggestions
+ * by Gentleman and Marovich. See
+ *
+ * Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+ * arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).
+ *
+ * Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+ * properties of  floating point arithmetic units.,  Comms. of  the ACM,
+ * 17, 276-277 (1974).
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local output)              int *
+ *         The base of the machine.
+ *
+ * T       (local output)              int *
+ *         The number of ( BETA ) digits in the mantissa.
+ *
+ * RND     (local output)              int *
+ *         Specifies whether proper rounding (RND=1) or chopping (RND=0)
+ *         occurs in addition.  This may not be a  reliable guide to the
+ *         way in which the machine performs its arithmetic.
+ *
+ * IEEE1   (local output)              int *
+ *         Specifies  whether  rounding  appears  to be done in the IEEE
+ *         `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     a, b, c, f, one, qtr, savec, t1, t2;
+   static int                 first=1, lbeta, lieee1, lrnd, lt;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0; one = HPL_rone;
+/*
+ * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and
+ * RND. Throughout this routine we use the function HPL_dlamc3 to ensure
+ * that relevant values are stored and not held in registers, or are not
+ * affected by optimizers.
+ *
+ * Compute  a = 2.0**m  with the  smallest  positive integer m such that
+ * fl( a + 1.0 ) == a.
+ */
+      a = HPL_rone; c = HPL_rone;
+      do
+      { a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); }
+      while( c == HPL_rone );
+/*
+ * Now compute b = 2.0**m with the smallest positive integer m such that
+ * fl( a + b ) > a.
+ */
+      b = HPL_rone; c = HPL_dlamc3( a, b );
+      while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); }
+/*
+ * Now compute the base.  a and c  are  neighbouring floating point num-
+ * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe-
+ * rence is BETA.  Adding 0.25 to c is to ensure that it is truncated to
+ * BETA and not (BETA-1).
+ */
+      qtr = one / 4.0; savec = c;
+      c   = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr);
+/*
+ * Now  determine  whether  rounding or chopping occurs, by adding a bit
+ * less than BETA/2 and a bit more than BETA/2 to a.
+ */
+      b = (double)(lbeta);
+      f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a );
+      if( c == a ) { lrnd = 1; } else { lrnd = 0; }
+      f = HPL_dlamc3( b / HPL_rtwo,  b / 100.0 ); c = HPL_dlamc3( f, a );
+      if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0;
+/*
+ * Try  and decide whether rounding is done in the  IEEE  round to nea-
+ * rest style.  b/2 is half a unit in the last place of the two numbers
+ * a  and savec. Furthermore, a is even, i.e. has last bit zero, and sa-
+ * vec is odd.  Thus adding b/2 to a should not change a, but adding b/2
+ * to savec should change savec.
+ */
+      t1 = HPL_dlamc3( b / HPL_rtwo, a );
+      t2 = HPL_dlamc3( b / HPL_rtwo, savec );
+      if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1;
+      else                                                  lieee1 = 0;
+/*
+ * Now find the mantissa, T. It should be the integer part of log to the
+ * base BETA of a, however it is safer to determine T by powering. So we
+ * find T as the smallest positive integer for which fl( beta**t + 1.0 )
+ * is equal to 1.0.
+ */
+      lt = 0; a = HPL_rone; c = HPL_rone;
+
+      do
+      {
+         lt++; a *= (double)(lbeta);
+         c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c,  -a );
+      } while( c == HPL_rone );
+   }
+
+   *BETA  = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1;
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc2
+(
+   int                        * BETA, 
+   int                        * T,
+   int                        * RND,
+   double                     * EPS,
+   int                        * EMIN,
+   double                     * RMIN,
+   int                        * EMAX,
+   double                     * RMAX
+)
+#else
+static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * BETA, * EMAX, * EMIN, * RND, * T;
+   double                     * EPS, * RMAX, * RMIN;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc2  determines the machine  parameters specified in its argu-
+ * ment list.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function  dlamc2.f (version 2.0 -- 1992), that  was  itself
+ * based on a function PARANOIA  by  W. Kahan of the University of Cali-
+ * fornia at Berkeley for the computation of the  relative machine epsi-
+ * lon eps.
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local output)              int *
+ *         The base of the machine.
+ *
+ * T       (local output)              int *
+ *         The number of ( BETA ) digits in the mantissa.
+ *
+ * RND     (local output)              int *
+ *         Specifies whether proper rounding (RND=1) or chopping (RND=0)
+ *         occurs in addition. This may not be a reliable  guide to  the
+ *         way in which the machine performs its arithmetic.
+ *
+ * EPS     (local output)              double *
+ *         The smallest positive number such that fl( 1.0 - EPS ) < 1.0,
+ *         where fl denotes the computed value.
+ *
+ * EMIN    (local output)              int *
+ *         The minimum exponent before (gradual) underflow occurs.
+ *
+ * RMIN    (local output)              double *
+ *         The smallest  normalized  number  for  the  machine, given by
+ *         BASE**( EMIN - 1 ), where  BASE  is the floating  point value
+ *         of BETA.
+ *
+ * EMAX    (local output)              int *
+ *         The maximum exponent before overflow occurs.
+ *
+ * RMAX    (local output)              double *
+ *         The  largest  positive  number  for  the  machine,  given  by
+ *         BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating  point
+ *         value of BETA.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   static double              leps, lrmax, lrmin;
+   double                     a, b, c, half, one, rbase, sixth, small,
+                              third, two, zero;
+   static int                 first=1, iwarn=0, lbeta=0, lemax, lemin,
+                              lt=0;
+   int                        gnmin=0, gpmin=0, i, ieee, lieee1=0,
+                              lrnd=0, ngnmin=0, ngpmin=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo;
+/*
+ * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of  BETA,
+ * T, RND, EPS, EMIN and RMIN.
+ *
+ * Throughout this routine we use the function HPL_dlamc3 to ensure that
+ * relevant values are stored and not held in registers,  or are not af-
+ * fected by optimizers.
+ *
+ * HPL_dlamc1 returns the parameters  lbeta, lt, lrnd and lieee1.
+ */
+      HPL_dlamc1( &lbeta, &lt, &lrnd, &lieee1 );
+/*
+ * Start to find eps.
+ */
+      b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a;
+/*
+ * Try some tricks to see whether or not this is the correct  EPS.
+ */
+      b     = two / 3.0; 
+      half  = one / HPL_rtwo;
+      sixth = HPL_dlamc3( b, -half );
+      third = HPL_dlamc3( sixth, sixth );
+      b     = HPL_dlamc3( third, -half );
+      b     = HPL_dlamc3( b, sixth );
+      b     = Mabs( b ); if( b < leps ) b = leps;
+
+      leps = HPL_rone;
+
+      while( ( leps > b ) && ( b > zero ) )
+      {
+         leps = b;
+         c = HPL_dlamc3( half * leps,
+                         HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) );
+         c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c );
+         c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c );
+      }
+      if( a < leps ) leps = a;
+/*
+ * Computation of EPS complete.
+ *
+ * Now find  EMIN.  Let a = + or - 1, and + or - (1 + BASE**(-3)).  Keep
+ * dividing a by BETA until (gradual) underflow occurs. This is detected
+ * when we cannot recover the previous a.
+ */
+      rbase = one / (double)(lbeta); small = one;
+      for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero );
+      a = HPL_dlamc3( one, small );
+      HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta );
+      HPL_dlamc4( &gpmin,    a, lbeta ); HPL_dlamc4( &gnmin,    -a, lbeta );
+
+      ieee = 0;
+
+      if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) )
+      {
+         if( ngpmin == gpmin )
+         {
+/*
+ * Non twos-complement machines, no gradual underflow; e.g.,  VAX )
+ */
+            lemin = ngpmin;
+         }
+         else if( ( gpmin-ngpmin ) == 3 )
+         {
+/*
+ * Non twos-complement machines with gradual underflow; e.g., IEEE stan-
+ * dard followers
+ */
+            lemin = ngpmin - 1 + lt; ieee = 1;
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, gpmin );
+            iwarn = 1;
+         }
+      }
+      else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) )
+      {
+         if( Mabs( ngpmin-ngnmin ) == 1 )
+         {
+/*
+ * Twos-complement machines, no gradual underflow; e.g., CYBER 205
+ */
+            lemin = Mmax( ngpmin, ngnmin );
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, ngnmin );
+            iwarn = 1;
+         }
+      }
+      else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) )
+      {
+         if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 )
+         {
+/*
+ * Twos-complement machines with gradual underflow; no known machine
+ */
+            lemin = Mmax( ngpmin, ngnmin ) - 1 + lt;
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, ngnmin );
+            iwarn = 1;
+         }
+      }
+      else
+      {
+/*
+ * A guess; no known machine
+ */
+         lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin );
+         lemin = Mmin( lemin, gnmin ); iwarn = 1;
+      }
+/*
+ * Comment out this if block if EMIN is ok
+ */
+      if( iwarn != 0 )
+      {
+         first = 1;
+         HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n",
+"WARNING. The value EMIN may be incorrect:- EMIN =", lemin,
+"If, after inspection, the value EMIN looks acceptable, please comment ",
+"out the  if  block  as marked within the code of routine  HPL_dlamc2, ",
+"otherwise supply EMIN explicitly." );
+      }
+/*
+ * Assume IEEE arithmetic if we found denormalised  numbers above, or if
+ * arithmetic seems to round in the  IEEE style,  determined  in routine
+ * HPL_dlamc1.  A true  IEEE  machine should have both things true; how-
+ * ever, faulty machines may have one or the other.
+ */
+      if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1;
+      else                                   ieee = 0;
+/*
+ * Compute  RMIN by successive division by  BETA. We could compute  RMIN
+ * as BASE**( EMIN - 1 ), but some machines underflow during this compu-
+ * tation.
+ */
+      lrmin = HPL_rone;
+      for( i = 0; i < 1 - lemin; i++ )
+         lrmin = HPL_dlamc3( lrmin*rbase, zero );
+/*
+ * Finally, call HPL_dlamc5 to compute emax and rmax.
+ */
+      HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax );
+   }
+   *BETA = lbeta; *T    = lt;    *RND  = lrnd;  *EPS  = leps;
+   *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax;
+} 
+
+#ifdef STDC_HEADERS
+static double HPL_dlamc3( const double A, const double B )
+#else
+static double HPL_dlamc3( A, B )
+/*
+ * .. Scalar Arguments ..
+ */
+   const double               A, B;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc3  is intended to force a and b  to be stored prior to doing
+ * the addition of  a  and  b,  for  use  in situations where optimizers
+ * might hold one of these in a register.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc3.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * A, B    (local input)               double
+ *         The values a and b.
+ *
+ * ---------------------------------------------------------------------
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   return( A + B );
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc4
+(
+   int                        * EMIN,
+   const double               START,
+   const int                  BASE
+)
+#else
+static void HPL_dlamc4( EMIN, START, BASE )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * EMIN;
+   const int                  BASE;
+   const double               START;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc4 is a service function for HPL_dlamc2.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc4.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * EMIN    (local output)              int *
+ *         The minimum exponent before  (gradual) underflow, computed by
+ *         setting A = START and dividing  by  BASE until the previous A
+ *         can not be recovered.
+ *
+ * START   (local input)               double
+ *         The starting point for determining EMIN.
+ *
+ * BASE    (local input)               int
+ *         The base of the machine.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     a, b1, b2, c1, c2, d1, d2, one, rbase, zero;
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   a     = START; one = HPL_rone; rbase = one / (double)(BASE);
+   zero  = HPL_rzero;
+   *EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a;
+
+   do
+   {
+      (*EMIN)--; a = b1;
+      b1 = HPL_dlamc3( a /  BASE,  zero );
+      c1 = HPL_dlamc3( b1 *  BASE, zero );
+      d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1;
+      b2 = HPL_dlamc3( a * rbase,  zero );
+      c2 = HPL_dlamc3( b2 / rbase, zero );
+      d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2;
+   } while( ( c1 == a ) && ( c2 == a ) &&  ( d1 == a ) && ( d2 == a ) );
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc5
+(
+   const int                  BETA,
+   const int                  P, 
+   const int                  EMIN,
+   const int                  IEEE,
+   int                        * EMAX,
+   double                     * RMAX
+)
+#else
+static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX )
+/*
+ * .. Scalar Arguments ..
+ */
+   const int                  BETA, EMIN, IEEE, P; 
+   int                        * EMAX;
+   double                     * RMAX;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc5  attempts  to compute RMAX, the largest machine  floating-
+ * point number, without overflow.  It assumes that EMAX + abs(EMIN) sum
+ * approximately to a power of 2.  It will fail  on machines where  this
+ * assumption does not hold, for example, the  Cyber 205 (EMIN = -28625,
+ * EMAX = 28718).  It will also fail if  the value supplied for  EMIN is
+ * too large (i.e. too close to zero), probably with overflow.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc5.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local input)               int
+ *         The base of floating-point arithmetic.
+ *
+ * P       (local input)               int
+ *         The number of base BETA digits in the mantissa of a floating-
+ *         point value.
+ *
+ * EMIN    (local input)               int
+ *         The minimum exponent before (gradual) underflow.
+ *
+ * IEEE    (local input)               int
+ *         A logical flag specifying whether or not  the arithmetic sys-
+ *         tem is thought to comply with the IEEE standard.
+ *
+ * EMAX    (local output)              int *
+ *         The largest exponent before overflow.
+ *
+ * RMAX    (local output)              double *
+ *         The largest machine floating-point number.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     oldy=HPL_rzero, recbas, y, z;
+   int                        exbits=1, expsum, i, lexp=1, nbits, try,
+                              uexp;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * First compute  lexp  and  uexp, two powers of 2 that bound abs(EMIN).
+ * We then assume that  EMAX + abs( EMIN ) will sum approximately to the
+ * bound that  is closest to abs( EMIN ). (EMAX  is the  exponent of the
+ * required number RMAX).
+ */
+l_10:
+   try = (int)( (unsigned int)(lexp) << 1 );
+   if( try <= ( -EMIN ) ) { lexp = try; exbits++; goto l_10; }
+
+   if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try; exbits++; }
+/*
+ * Now -lexp is less than or equal to EMIN, and -uexp is greater than or
+ * equal to EMIN. exbits is the number of bits needed to store the expo-
+ * nent.
+ */
+   if( ( uexp+EMIN ) > ( -lexp-EMIN ) )
+   { expsum = (int)( (unsigned int)(lexp) << 1 ); }
+   else
+   { expsum = (int)( (unsigned int)(uexp) << 1 ); }
+/*
+ * expsum is the exponent range, approximately equal to EMAX - EMIN + 1.
+ */
+   *EMAX = expsum + EMIN - 1;
+/*
+ * nbits  is  the total number of bits needed to store a  floating-point
+ * number.
+ */
+   nbits = 1 + exbits + P;
+
+   if( ( nbits % 2 == 1 ) && ( BETA == 2 ) )
+   {
+/*
+ * Either there are an odd number of bits used to store a floating-point
+ * number, which is unlikely, or some bits are not used in the represen-
+ * tation of numbers,  which is possible,  (e.g. Cray machines)  or  the
+ * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines),
+ * which is perhaps the most likely. We have to assume the last alterna-
+ * tive.  If this is true,  then we need to reduce  EMAX  by one because
+ * there must be some way of representing zero  in an  implicit-bit sys-
+ * tem. On machines like Cray we are reducing EMAX by one unnecessarily.
+ */
+      (*EMAX)--;
+   }
+
+   if( IEEE != 0 )
+   {
+/*
+ * Assume we are on an IEEE  machine which reserves one exponent for in-
+ * finity and NaN.
+ */
+      (*EMAX)--;
+   }
+/*
+ * Now create RMAX, the largest machine number, which should be equal to
+ * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being
+ * careful that the result is less than 1.0.
+ */
+   recbas = HPL_rone / (double)(BETA);
+   z      = (double)(BETA) - HPL_rone;
+   y      = HPL_rzero;
+
+   for( i = 0; i < P; i++ )
+   { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); }
+
+   if( y >= HPL_rone ) y = oldy;
+/*
+ * Now multiply by BETA**EMAX to get RMAX.
+ */
+   for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero );
+
+   *RMAX = y;
+/*
+ * End of HPL_dlamch
+ */
+} 
+
+#ifdef STDC_HEADERS
+static double HPL_dipow
+(
+   const double               X,
+   const int                  N
+)
+#else
+static double HPL_dipow( X, N )
+/*
+ * .. Scalar Arguments ..
+ */
+   const int                  N;
+   const double               X;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dipow computes the integer n-th power of a real scalar x.
+ *
+ * Arguments
+ * =========
+ *
+ * X       (local input)               const double
+ *         The real scalar x.
+ *
+ * N       (local input)               const int
+ *         The integer power to raise x to.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     r, y=HPL_rone;
+   int                        k, n;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( X == HPL_rzero ) return( HPL_rzero );
+   if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; }
+   for( k = 0; k < n; k++ ) y *= r; 
+
+   return( y );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlange.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlange.c
new file mode 100644
index 000000000..82f118b6b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlange.c
@@ -0,0 +1,184 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_dlange
+(
+   const HPL_T_NORM                 NORM,
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA
+)
+#else
+double HPL_dlange
+( NORM, M, N, A, LDA )
+   const HPL_T_NORM                 NORM;
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlange returns  the value of the one norm,  or the infinity norm,
+ * or the element of largest absolute value of a matrix A:              
+ *  
+ *    max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+ *    norm1(A),        when NORM = HPL_NORM_1,                          
+ *    normI(A),        when NORM = HPL_NORM_I,                          
+ *  
+ * where norm1 denotes the one norm of a matrix (maximum column sum) and
+ * normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+ * that max(abs(A(i,j))) is not a matrix norm.
+ *
+ * Arguments
+ * =========
+ *
+ * NORM    (local input)                 const HPL_T_NORM
+ *         On entry,  NORM  specifies  the  value to be returned by this
+ *         function as described above.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points to an  array of dimension  (LDA,N), that
+ *         contains the matrix A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     s, v0=HPL_rzero, * work = NULL;
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return( HPL_rzero );
+
+   if(      NORM == HPL_NORM_A )
+   {
+/*
+ * max( abs( A ) )
+ */
+      for( j = 0; j < N; j++ )
+      {
+         for( i = 0; i < M; i++ ) { v0 = Mmax( v0, Mabs( *A ) ); A++; }
+         A += LDA - M;
+      }
+   }
+   else if( NORM == HPL_NORM_1 )
+   {
+/*
+ * Find norm_1( A ).
+ */
+      work = (double*)malloc( (size_t)(N) * sizeof( double ) );
+      if( work == NULL )
+      { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); }
+      else
+      {
+         for( j = 0; j < N; j++ )
+         {
+            s = HPL_rzero;
+            for( i = 0; i < M; i++ ) { s += Mabs( *A ); A++; }
+            work[j] = s; A += LDA - M;
+         }
+/*
+ * Find maximum sum of columns for 1-norm
+ */
+         v0 = work[HPL_idamax( N, work, 1 )]; v0 = Mabs( v0 );
+         if( work ) free( work );
+      }
+   }
+   else if( NORM == HPL_NORM_I )
+   {
+/*
+ * Find norm_inf( A )
+ */
+      work = (double*)malloc( (size_t)(M) * sizeof( double ) );
+      if( work == NULL )
+      { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); }
+      else
+      {
+         for( i = 0; i < M; i++ ) { work[i] = HPL_rzero; }
+
+         for( j = 0; j < N; j++ )
+         {
+            for( i = 0; i < M; i++ ) { work[i] += Mabs( *A ); A++; }
+            A += LDA - M;
+         }
+/*       
+ * Find maximum sum of rows for inf-norm
+ */      
+         v0 = work[HPL_idamax( M, work, 1 )]; v0 = Mabs( v0 );
+         if( work ) free( work );
+      }
+   }
+
+   return( v0 );
+/*
+ * End of HPL_dlange
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlaprnt.c
new file mode 100644
index 000000000..f29df3cd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlaprnt.c
@@ -0,0 +1,130 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dlaprnt
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        IA,
+   const int                        JA,
+   const int                        LDA,
+   const char *                     CMATNM
+)
+#else
+void HPL_dlaprnt
+( M, N, A, IA, JA, LDA, CMATNM )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        IA;
+   const int                        JA;
+   const int                        LDA;
+   const char *                     CMATNM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaprnt prints to standard error an M-by-N matrix A.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies the number of rows of A. M must be at
+ *         least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies the number of columns of A. N must be
+ *         at least zero.
+ *
+ * A       (local input)                 double *
+ *         On entry, A  points to an array of dimension (LDA,N).
+ *
+ * IA      (local input)                 const int
+ *         On entry, IA specifies the starting row index to be printed.
+ *
+ * JA      (local input)                 const int
+ *         On entry,  JA  specifies  the  starting  column index  to be
+ *         printed.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * CMATNM  (local input)                 const char *
+ *         On entry, CMATNM is the name of the matrix to be printed.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   for( j = 0; j < N; j++ )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         HPL_fprintf( stderr, "%s(%6d,%6d)=%30.18f\n", CMATNM, IA+i,
+                      JA+j, *(Mptr( A, i, j, LDA )) );
+      }
+   }
+/*
+ * End of HPL_dlaprnt
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlatcpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlatcpy.c
new file mode 100644
index 000000000..410451c24
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlatcpy.c
@@ -0,0 +1,398 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factors
+ * #ifndef HPL_LATCPY_M_DEPTH
+ * #define    HPL_LATCPY_M_DEPTH      32
+ * #define    HPL_LATCPY_LOG2_M_DEPTH  5
+ * #endif
+ * #ifndef HPL_LATCPY_N_DEPTH
+ * #define    HPL_LATCPY_N_DEPTH       4
+ * #define    HPL_LATCPY_LOG2_N_DEPTH  2
+ * #endif
+ */
+#ifndef HPL_LATCPY_M_DEPTH
+#define    HPL_LATCPY_M_DEPTH       4
+#define    HPL_LATCPY_LOG2_M_DEPTH  2
+#endif
+#ifndef HPL_LATCPY_N_DEPTH
+#define    HPL_LATCPY_N_DEPTH       2
+#define    HPL_LATCPY_LOG2_N_DEPTH  1
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlatcpy
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dlatcpy
+( M, N, A, LDA, B, LDB )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlatcpy copies the transpose of an array A into an array B.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the number of  rows of the array B and
+ *         the number of columns of A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the number of  rows of the array A and
+ *         the number of columns of B. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,M).
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,N).
+ *
+ * B       (local output)                double *
+ *         On entry, B points to an array of dimension (LDB,N). On exit,
+ *         B is overwritten with the transpose of A.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB specifies the leading dimension of the array B.
+ *         LDB must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_LATCPY_USE_COPY
+   register int               j;
+#else
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+   const double               * A0 = A;
+   double                     * B0 = B;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+   const double               * A0 = A,              * A1 = A + 1;
+   double                     * B0 = B,              * B1 = B +     LDB;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+   const double               * A0 = A,              * A1 = A + 1,
+                              * A2 = A + 2,          * A3 = A + 3;
+   double                     * B0 = B,              * B1 = B +     LDB,
+                              * B2 = B + (LDB << 1), * B3 = B + 3 * LDB;
+#endif
+   const int                  incA = -M * LDA + (1 << HPL_LATCPY_LOG2_N_DEPTH),
+                              incB = ( (unsigned int)(LDB) <<
+                                       HPL_LATCPY_LOG2_N_DEPTH ) - M,
+                              incA0 = -M * LDA + 1, incB0 = LDB - M;
+   int                        mu, nu;
+   register int               i, j;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+#ifdef HPL_LATCPY_USE_COPY
+   for( j = 0; j < N; j++, B0 += LDB ) HPL_dcopy( M, A0+j, LDA, B0, 1 );
+#else
+   mu = (int)( ( (unsigned int)(M) >> HPL_LATCPY_LOG2_M_DEPTH ) <<
+                                      HPL_LATCPY_LOG2_M_DEPTH );
+   nu = (int)( ( (unsigned int)(N) >> HPL_LATCPY_LOG2_N_DEPTH ) <<
+                                      HPL_LATCPY_LOG2_N_DEPTH );
+
+   for( j = 0; j < nu; j += HPL_LATCPY_N_DEPTH )
+   {
+      for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH )
+      {
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 0] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA;
+         B2[ 0] = *A2; A2 += LDA; B3[ 0] = *A3; A3 += LDA;
+#endif
+
+#if ( HPL_LATCPY_M_DEPTH >  1 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 1] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA;
+         B2[ 1] = *A2; A2 += LDA; B3[ 1] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  2 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 2] = *A0; A0 += LDA; B0[ 3] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA;
+         B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA;
+         B2[ 2] = *A2; A2 += LDA; B3[ 2] = *A3; A3 += LDA;
+         B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA;
+         B2[ 3] = *A2; A2 += LDA; B3[ 3] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  4 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 4] = *A0; A0 += LDA; B0[ 5] = *A0; A0 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B0[ 7] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA;
+         B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA;
+         B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA;
+         B2[ 4] = *A2; A2 += LDA; B3[ 4] = *A3; A3 += LDA;
+         B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA;
+         B2[ 5] = *A2; A2 += LDA; B3[ 5] = *A3; A3 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA;
+         B2[ 6] = *A2; A2 += LDA; B3[ 6] = *A3; A3 += LDA;
+         B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA;
+         B2[ 7] = *A2; A2 += LDA; B3[ 7] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  8 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 8] = *A0; A0 += LDA; B0[ 9] = *A0; A0 += LDA;
+         B0[10] = *A0; A0 += LDA; B0[11] = *A0; A0 += LDA;
+         B0[12] = *A0; A0 += LDA; B0[13] = *A0; A0 += LDA;
+         B0[14] = *A0; A0 += LDA; B0[15] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA;
+         B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA;
+         B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA;
+         B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA;
+         B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA;
+         B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA;
+         B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA;
+         B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA;
+         B2[ 8] = *A2; A2 += LDA; B3[ 8] = *A3; A3 += LDA;
+         B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA;
+         B2[ 9] = *A2; A2 += LDA; B3[ 9] = *A3; A3 += LDA;
+         B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA;
+         B2[10] = *A2; A2 += LDA; B3[10] = *A3; A3 += LDA;
+         B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA;
+         B2[11] = *A2; A2 += LDA; B3[11] = *A3; A3 += LDA;
+         B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA;
+         B2[12] = *A2; A2 += LDA; B3[12] = *A3; A3 += LDA;
+         B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA;
+         B2[13] = *A2; A2 += LDA; B3[13] = *A3; A3 += LDA;
+         B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA;
+         B2[14] = *A2; A2 += LDA; B3[14] = *A3; A3 += LDA;
+         B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA;
+         B2[15] = *A2; A2 += LDA; B3[15] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH > 16 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[16] = *A0; A0 += LDA; B0[17] = *A0; A0 += LDA;
+         B0[18] = *A0; A0 += LDA; B0[19] = *A0; A0 += LDA;
+         B0[20] = *A0; A0 += LDA; B0[21] = *A0; A0 += LDA;
+         B0[22] = *A0; A0 += LDA; B0[23] = *A0; A0 += LDA;
+         B0[24] = *A0; A0 += LDA; B0[25] = *A0; A0 += LDA;
+         B0[26] = *A0; A0 += LDA; B0[27] = *A0; A0 += LDA;
+         B0[28] = *A0; A0 += LDA; B0[29] = *A0; A0 += LDA;
+         B0[30] = *A0; A0 += LDA; B0[31] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA;
+         B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA;
+         B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA;
+         B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA;
+         B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA;
+         B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA;
+         B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA;
+         B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA;
+         B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA;
+         B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA;
+         B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA;
+         B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA;
+         B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA;
+         B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA;
+         B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA;
+         B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA;
+         B2[16] = *A2; A2 += LDA; B3[16] = *A3; A3 += LDA;
+         B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA;
+         B2[17] = *A2; A2 += LDA; B3[17] = *A3; A3 += LDA;
+         B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA;
+         B2[18] = *A2; A2 += LDA; B3[18] = *A3; A3 += LDA;
+         B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA;
+         B2[19] = *A2; A2 += LDA; B3[19] = *A3; A3 += LDA;
+         B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA;
+         B2[20] = *A2; A2 += LDA; B3[20] = *A3; A3 += LDA;
+         B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA;
+         B2[21] = *A2; A2 += LDA; B3[21] = *A3; A3 += LDA;
+         B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA;
+         B2[22] = *A2; A2 += LDA; B3[22] = *A3; A3 += LDA;
+         B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA;
+         B2[23] = *A2; A2 += LDA; B3[23] = *A3; A3 += LDA;
+         B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA;
+         B2[24] = *A2; A2 += LDA; B3[24] = *A3; A3 += LDA;
+         B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA;
+         B2[25] = *A2; A2 += LDA; B3[25] = *A3; A3 += LDA;
+         B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA;
+         B2[26] = *A2; A2 += LDA; B3[26] = *A3; A3 += LDA;
+         B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA;
+         B2[27] = *A2; A2 += LDA; B3[27] = *A3; A3 += LDA;
+         B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA;
+         B2[28] = *A2; A2 += LDA; B3[28] = *A3; A3 += LDA;
+         B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA;
+         B2[29] = *A2; A2 += LDA; B3[29] = *A3; A3 += LDA;
+         B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA;
+         B2[30] = *A2; A2 += LDA; B3[30] = *A3; A3 += LDA;
+         B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA;
+         B2[31] = *A2; A2 += LDA; B3[31] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0 += HPL_LATCPY_M_DEPTH;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH;
+         B2 += HPL_LATCPY_M_DEPTH; B3 += HPL_LATCPY_M_DEPTH;
+#endif
+      }
+
+      for( i = mu; i < M; i++ )
+      {
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         *B0 = *A0; B0++; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA;
+         *B2 = *A2; B2++; A2 += LDA; *B3 = *A3; B3++; A3 += LDA;
+#endif
+      }
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+      A0 += incA; B0 += incB;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+      A0 += incA; A1 += incA; B0 += incB; B1 += incB;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+      A0 += incA; A1 += incA; A2 += incA; A3 += incA;
+      B0 += incB; B1 += incB; B2 += incB; B3 += incB;
+#endif
+   }
+
+   for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 )
+   {
+      for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH, B0 += HPL_LATCPY_M_DEPTH )
+      {
+         B0[ 0]=*A0; A0 += LDA;
+#if ( HPL_LATCPY_M_DEPTH >  1 )
+         B0[ 1]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  2 )
+         B0[ 2]=*A0; A0 += LDA; B0[ 3]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  4 )
+         B0[ 4]=*A0; A0 += LDA; B0[ 5]=*A0; A0 += LDA;
+         B0[ 6]=*A0; A0 += LDA; B0[ 7]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  8 )
+         B0[ 8]=*A0; A0 += LDA; B0[ 9]=*A0; A0 += LDA;
+         B0[10]=*A0; A0 += LDA; B0[11]=*A0; A0 += LDA;
+         B0[12]=*A0; A0 += LDA; B0[13]=*A0; A0 += LDA;
+         B0[14]=*A0; A0 += LDA; B0[15]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH > 16 )
+         B0[16]=*A0; A0 += LDA; B0[17]=*A0; A0 += LDA;
+         B0[18]=*A0; A0 += LDA; B0[19]=*A0; A0 += LDA;
+         B0[20]=*A0; A0 += LDA; B0[21]=*A0; A0 += LDA;
+         B0[22]=*A0; A0 += LDA; B0[23]=*A0; A0 += LDA;
+         B0[24]=*A0; A0 += LDA; B0[25]=*A0; A0 += LDA;
+         B0[26]=*A0; A0 += LDA; B0[27]=*A0; A0 += LDA;
+         B0[28]=*A0; A0 += LDA; B0[29]=*A0; A0 += LDA;
+         B0[30]=*A0; A0 += LDA; B0[31]=*A0; A0 += LDA;
+#endif
+      }
+
+      for( i = mu; i < M; i++, B0++, A0 += LDA ) { *B0 = *A0; }
+   }
+#endif
+/*
+ * End of HPL_dlatcpy
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_fprintf.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_fprintf.c
new file mode 100644
index 000000000..adaf22b39
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_fprintf.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_fprintf
+(
+   FILE *                           STREAM,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_fprintf( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_fprintf is a wrapper around fprintf flushing the output stream.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[256];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   char                       * FORM;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   (void) fprintf( STREAM, "%s", cline );
+   (void) fflush( STREAM );
+/*
+ * End of HPL_fprintf
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_warn.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_warn.c
new file mode 100644
index 000000000..bc40818a9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_warn.c
@@ -0,0 +1,134 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_warn
+(
+   FILE *                           STREAM,
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_warn( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_warn displays an error message.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   LINE   = va_arg( argptr, int    );
+   SRNAME = va_arg( argptr, char * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( STREAM, "%s %s:\n>>> %s <<<\n\n", "HPL ERROR in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( STREAM, "%s %d %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR on line", LINE, "of function", SRNAME, cline );
+/*
+ * End of HPL_warn
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_abort.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_abort.o
new file mode 100644
index 000000000..25b7e6696
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_abort.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlacpy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlacpy.o
new file mode 100644
index 000000000..6703f341f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlacpy.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlamch.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlamch.o
new file mode 100644
index 000000000..ef5c411f0
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlamch.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlange.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlange.o
new file mode 100644
index 000000000..8fb657669
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlange.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlaprnt.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlaprnt.o
new file mode 100644
index 000000000..29c5f89f7
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlaprnt.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlatcpy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlatcpy.o
new file mode 100644
index 000000000..fea336857
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlatcpy.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_fprintf.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_fprintf.o
new file mode 100644
index 000000000..00861c5a9
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_fprintf.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_warn.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_warn.o
new file mode 100644
index 000000000..e4944e00f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_warn.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/Make.inc
new file mode 120000
index 000000000..8547ec814
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/Make.inc
@@ -0,0 +1 @@
+/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/Makefile
new file mode 100644
index 000000000..e92d18b80
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/Makefile
@@ -0,0 +1,100 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h
+#
+## Object files ########################################################
+#
+HPL_au0obj       = \
+   HPL_dlacpy.o           HPL_dlatcpy.o          HPL_fprintf.o          \
+   HPL_warn.o             HPL_abort.o            HPL_dlaprnt.o          \
+   HPL_dlange.o
+HPL_au1obj       = \
+   HPL_dlamch.o
+HPL_auxobj       = \
+   $(HPL_au0obj) $(HPL_au1obj)
+#
+## Targets #############################################################
+#
+all     : lib
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_auxobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_auxobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dlacpy.o           : ../HPL_dlacpy.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlacpy.c
+HPL_dlatcpy.o          : ../HPL_dlatcpy.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlatcpy.c
+HPL_fprintf.o          : ../HPL_fprintf.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_fprintf.c
+HPL_warn.o             : ../HPL_warn.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_warn.c
+HPL_abort.o            : ../HPL_abort.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_abort.c
+HPL_dlaprnt.o          : ../HPL_dlaprnt.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaprnt.c
+HPL_dlange.o           : ../HPL_dlange.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlange.c
+HPL_dlamch.o           : ../HPL_dlamch.c           $(INCdep)
+	$(CC) -o $@ -c $(CCNOOPT)  ../HPL_dlamch.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_daxpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_daxpy.c
new file mode 100644
index 000000000..72be5774b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_daxpy.c
@@ -0,0 +1,175 @@
+/*
+ * -- High Performance Computing Linpack Benchmark (HPL)
+ *    HPL - 2.3 - December 2, 2018
+ *    Antoine P. Petitet
+ *    University of Tennessee, Knoxville
+ *    Innovative Computing Laboratory
+ *    (C) Copyright 2000-2008 All Rights Reserved
+ *
+ * -- Copyright notice and Licensing terms:
+ *
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:
+ *
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.
+ *
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.
+ *
+ * -- Disclaimer:
+ *
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ---------------------------------------------------------------------
+ */
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_daxpy
+
+#ifdef STDC_HEADERS
+void HPL_daxpy
+(
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_daxpy
+( N, ALPHA, X, INCX, Y, INCY )
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_daxpy scales the vector x by alpha and adds it to y.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vectors  x  and  y. N
+ *         must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero, then the entries of the incremented array X
+ *         need not be set on input.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         On exit, the entries of the incremented array  Y  are updated
+ *         with the scaled entries of the incremented array X.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_daxpy( N, ALPHA, X, INCX, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register const double     alpha = ALPHA;
+   register double           x0, x1, x2, x3, y0, y1, y2, y3;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
+                             incX3 = 3 * INCX, incY3 = 3 * INCY,
+                             incX4 = 4 * INCX, incY4 = 4 * INCY;
+
+   if( ( N > 0 ) && ( alpha != HPL_rzero ) )
+   {
+      if( ( nu = ( N >> 2 ) << 2 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     y0 = (*Y);     x1 = X[INCX ]; y1 = Y[INCY ];
+            x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3];
+ 
+            *Y       = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1;
+            Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3;
+ 
+            X  += incX4;
+            Y  += incY4;
+ 
+         } while( X != StX );
+      }
+ 
+      for( i = N - nu; i != 0; i-- )
+      {
+         x0  = (*X);
+         y0  = (*Y);
+ 
+         *Y  = y0 + alpha * x0;
+ 
+         X  += INCX;
+         Y  += INCY;
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy );
+#endif
+/*
+ * End of HPL_daxpy
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dcopy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dcopy.c
new file mode 100644
index 000000000..a8fe24109
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dcopy.c
@@ -0,0 +1,168 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dcopy
+
+#ifdef STDC_HEADERS
+void HPL_dcopy
+(
+   const int                        N,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_dcopy
+( N, X, INCX, Y, INCY )
+   const int                        N;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dcopy copies the vector x into the vector y.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vectors  x  and  y. N
+ *         must be at least zero.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         On exit, the entries of the incremented array  Y  are updated
+ *         with the entries of the incremented array X.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dcopy( N, X, INCX, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           x0, x1, x2, x3, x4, x5, x6, x7;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
+                             incX3 = 3 * INCX, incY3 = 3 * INCY,
+                             incX4 = 4 * INCX, incY4 = 4 * INCY,
+                             incX5 = 5 * INCX, incY5 = 5 * INCY,
+                             incX6 = 6 * INCX, incY6 = 6 * INCY,
+                             incX7 = 7 * INCX, incY7 = 7 * INCY,
+                             incX8 = 8 * INCX, incY8 = 8 * INCY;
+
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+            x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+            *Y       = x0; Y[incY4] = x4; Y[INCY ] = x1; Y[incY5] = x5;
+            Y[incY2] = x2; Y[incY6] = x6; Y[incY3] = x3; Y[incY7] = x7;
+ 
+            X  += incX8;
+            Y  += incY8;
+ 
+         } while( X != StX );
+      }
+ 
+      for( i = N - nu; i != 0; i-- )
+      {
+         x0  = (*X);
+         *Y  = x0;
+ 
+         X  += INCX;
+         Y  += INCY;
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   F77dcopy( &F77N, X, &F77incx, Y, &F77incy );
+#endif
+/*
+ * End of HPL_dcopy
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemm.c
new file mode 100644
index 000000000..b222e4717
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemm.c
@@ -0,0 +1,521 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dgemm
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmNN
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iail, iblj, icij, j, jal, jbj, jcj, l;
+
+   for( j = 0, jbj = 0, jcj  = 0; j < N; j++, jbj += LDB, jcj += LDC )
+   {
+      HPL_dscal( M, BETA, C+jcj, 1 );
+      for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 )
+      {
+         t0 = ALPHA * B[iblj];
+         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
+         { C[icij] += A[iail] * t0; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmNT
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iail, ibj, ibjl, icij, j, jal, jcj, l;
+
+   for( j = 0, ibj  = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
+   {
+      HPL_dscal( M, BETA, C+jcj, 1 );
+      for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB )
+      {
+         t0 = ALPHA * B[ibjl];
+         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
+         { C[icij] += A[iail] * t0; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmTN
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iai, iail, iblj, icij, j, jbj, jcj, l;
+
+   for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC )
+   {
+      for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA )
+      {
+         t0 = HPL_rzero;
+         for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 )
+         { t0 += A[iail] * B[iblj]; }
+         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
+         else                    C[icij] *= BETA;
+         C[icij] += ALPHA * t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmTT
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iali, ibj, ibjl, icij, j, jai, jcj, l;
+
+   for( j = 0, ibj = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
+   {
+      for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA )
+      {
+         t0 = HPL_rzero;
+         for( l = 0,      iali  = jai, ibjl  = ibj;
+              l < K; l++, iali += 1,   ibjl += LDB ) t0 += A[iali] * B[ibjl];
+         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
+         else                    C[icij] *= BETA;
+         C[icij] += ALPHA * t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemm0
+(
+   const enum HPL_TRANS       TRANSA,
+   const enum HPL_TRANS       TRANSB,
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
+                        BETA, C, LDC )
+   const enum HPL_TRANS       TRANSA, TRANSB;
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   int                        i, j;
+
+   if( ( M == 0 ) || ( N == 0 ) ||
+       ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) &&
+         ( BETA == HPL_rone ) ) ) return;
+
+   if( ALPHA == HPL_rzero )
+   {
+      for( j = 0; j < N; j++ )
+      {  for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; }
+      return;
+   }
+
+   if( TRANSB == HplNoTrans )
+   {
+      if( TRANSA == HplNoTrans )
+      { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+      else
+      { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+   }
+   else
+   {
+      if( TRANSA == HplNoTrans )
+      { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+      else
+      { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dgemm
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_TRANS             TRANSA,
+   const enum HPL_TRANS             TRANSB,
+   const int                        M,
+   const int                        N,
+   const int                        K,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   const double *                   B,
+   const int                        LDB,
+   const double                     BETA,
+   double *                         C,
+   const int                        LDC
+)
+#else
+void HPL_dgemm
+( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_TRANS             TRANSA;
+   const enum HPL_TRANS             TRANSB;
+   const int                        M;
+   const int                        N;
+   const int                        K;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   const double *                   B;
+   const int                        LDB;
+   const double                     BETA;
+   double *                         C;
+   const int                        LDC;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dgemm performs one of the matrix-matrix operations
+ *  
+ *     C := alpha * op( A ) * op( B ) + beta * C
+ *  
+ *  where op( X ) is one of
+ *  
+ *     op( X ) = X   or   op( X ) = X^T.
+ *  
+ * Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
+ * an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * TRANSA  (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSA  specifies the form of  op(A)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSA==HplNoTrans    : op( A ) = A,                     
+ *            TRANSA==HplTrans      : op( A ) = A^T,                   
+ *            TRANSA==HplConjTrans  : op( A ) = A^T.                   
+ *
+ * TRANSB  (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSB  specifies the form of  op(B)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSB==HplNoTrans    : op( B ) = B,                     
+ *            TRANSB==HplTrans      : op( B ) = B^T,                   
+ *            TRANSB==HplConjTrans  : op( B ) = B^T.                   
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the  number  of rows  of the  matrix
+ *         op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the number  of columns of the matrix
+ *         op(B)  and  the number of columns of the matrix  C. N must be
+ *         at least zero.
+ *
+ * K       (local input)                 const int
+ *         On entry,  K  specifies  the  number of columns of the matrix
+ *         op(A) and the number of rows of the matrix op(B).  K  must be
+ *         be at least  zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied  as  zero  then the elements of the matrices A and B
+ *         need not be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  is an array of dimension (LDA,ka),  where ka is
+ *         k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+ *         entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+ *         the array  A must contain the matrix A, otherwise the leading
+ *         k  by  m  part of the array  A  must  contain the  matrix  A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA  specifies the first dimension of A as declared
+ *         in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+ *         LDA must be at least max(1,m), otherwise LDA must be at least
+ *         max(1,k).
+ *
+ * B       (local input)                 const double *
+ *         On entry, B is an array of dimension (LDB,kb),  where  kb  is
+ *         n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+ *         entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+ *         the array  B must contain the matrix B, otherwise the leading
+ *         n  by  k  part of the array  B  must  contain  the matrix  B.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB  specifies the first dimension of B as declared
+ *         in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+ *         LDB must be at least max(1,k), otherwise LDB must be at least
+ *         max(1,n).
+ *
+ * BETA    (local input)                 const double
+ *         On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+ *         supplied  as  zero  then  the  elements of the matrix C  need
+ *         not be set on input.
+ *
+ * C       (local input/output)          double *
+ *         On entry,  C  is an array of dimension (LDC,n). Before entry,
+ *         the  leading m by n part  of  the  array  C  must contain the
+ *         matrix C,  except when beta is zero, in which case C need not
+ *         be set on entry. On exit, the array  C  is overwritten by the
+ *         m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+ *
+ * LDC     (local input)                 const int
+ *         On entry, LDC  specifies the first dimension of C as declared
+ *         in  the   calling  (sub)  program.   LDC  must  be  at  least
+ *         max(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   printf("Order %d, TransA %d, TransB %d, M %d, N %d, K %d\n", ORDER, TRANSA, TRANSB, M, N, K);
+   cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA,
+                  C, LDC );
+   }
+   else
+   {
+      HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA,
+                  C, LDC );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA, beta = BETA;
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M   = M,   F77N   = N,   F77K = K,
+                             F77lda = LDA, F77ldb = LDB, F77ldc = LDC;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77K                 K
+#define F77lda               LDA
+#define F77ldb               LDB
+#define F77ldc               LDC
+#endif
+   char                      ctransa, ctransb;
+
+   if(      TRANSA == HplNoTrans ) ctransa = 'N';
+   else if( TRANSA == HplTrans   ) ctransa = 'T';
+   else                            ctransa = 'C';
+ 
+   if(      TRANSB == HplNoTrans ) ctransb = 'N';
+   else if( TRANSB == HplTrans   ) ctransb = 'T';
+   else                            ctransb = 'C';
+
+   if( ORDER == HplColumnMajor )
+   {
+#ifdef StringSunStyle
+      F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
+      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructVal
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructPtr
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+   }
+   else
+   {
+#ifdef StringSunStyle
+      F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
+      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructVal
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructPtr
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+   }
+#endif
+/*
+ * End of HPL_dgemm
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemv.c
new file mode 100644
index 000000000..6366c5a48
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemv.c
@@ -0,0 +1,326 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dgemv
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dgemv0
+(
+   const enum HPL_TRANS       TRANS,
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * X,
+   const int                  INCX,
+   const double               BETA,
+   double                     * Y,
+   const int                  INCY
+)
+#else
+static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
+   const enum HPL_TRANS       TRANS;
+   const int                  INCX, INCY, LDA, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * X;
+   double                     * Y;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   int                        i, iaij, ix, iy, j, jaj, jx, jy;
+   register double            t0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M == 0 ) || ( N == 0 ) ||
+       ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone  ) ) ) return;
+ 
+   if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; }
+ 
+   if( TRANS == HplNoTrans )
+   {
+      HPL_dscal( M, BETA, Y, INCY );
+      for( j = 0, jaj  = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+      {
+         t0 = ALPHA * X[jx];
+         for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY )
+         { Y[iy] += A[iaij] * t0; }
+      }
+   }
+   else
+   {
+      for( j = 0, jaj  = 0, jy  = 0; j < N; j++, jaj += LDA, jy += INCY )
+      {
+         t0 = HPL_rzero;
+         for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX )
+         { t0 += A[iaij] * X[ix]; }
+         if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0;
+         else                    Y[jy] = BETA * Y[jy] + ALPHA * t0;
+      }
+   }
+}
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dgemv
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_TRANS             TRANS,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   const double *                   X,
+   const int                        INCX,
+   const double                     BETA,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_dgemv
+( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_TRANS             TRANS;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   const double *                   X;
+   const int                        INCX;
+   const double                     BETA;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dgemv performs one of the matrix-vector operations
+ *  
+ *     y := alpha * op( A ) * x + beta * y,
+ *  
+ *  where op( X ) is one of
+ *  
+ *     op( X ) = X   or   op( X ) = X^T.
+ *  
+ * where alpha and beta are scalars, x and y are vectors and  A  is an m
+ * by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry,  TRANS  specifies the  operation to be performed as
+ *         follows:   
+ *            TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+ *            TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of  the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero then  A and X  need not be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n.  Before  entry, the leading m by n part  of the
+ *         array  A  must contain the matrix coefficients.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m).
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * BETA    (local input)                 const double
+ *         On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+ *         supplied as zero then  Y  need not be set on input.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         Before entry with BETA non-zero, the incremented array Y must
+ *         contain the vector  y.  On exit,  Y  is  overwritten  by  the
+ *         updated vector y.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+   }
+   else
+   {
+      HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ),
+                  N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA, beta = BETA;
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  ftran;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  ftran;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  ftran;
+#endif
+ 
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M    = M,   F77N    = N,
+                             F77lda  = LDA, F77incx = INCX, F77incy = INCY;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77lda               LDA
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   char                      ctran;
+
+   if( ORDER == HplColumnMajor )
+   {
+      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
+
+#ifdef StringSunStyle
+      F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftran = HPL_C2F_CHAR( ctran );
+      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructVal
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructPtr
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+   }
+   else
+   {
+      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
+#ifdef StringSunStyle
+      F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftran = HPL_C2F_CHAR( ctran );
+      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructVal
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructPtr
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+   }
+
+#endif
+/*
+ * End of HPL_dgemv
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dger.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dger.c
new file mode 100644
index 000000000..5ea702778
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dger.c
@@ -0,0 +1,195 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dger
+
+#ifdef STDC_HEADERS
+void HPL_dger
+(
+   const enum HPL_ORDER             ORDER,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY,
+   double *                         A,
+   const int                        LDA
+)
+#else
+void HPL_dger
+( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+   const enum HPL_ORDER             ORDER;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+   double *                         A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dger performs the rank 1 operation
+ *  
+ *     A := alpha * x * y^T + A,
+ *  
+ * where alpha is a scalar,  x is an m-element vector, y is an n-element
+ * vector and A is an m by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of  the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero then  X and Y  need not be set on input.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input)                 double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n.  Before  entry, the leading m by n part  of the
+ *         array  A  must contain the matrix coefficients. On exit, A is
+ *         overwritten by the updated matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dger( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           t0;
+   int                       i, iaij, ix, iy, j, jaj, jx, jy;
+
+   if( ( M == 0 ) || ( N == 0 ) || ( ALPHA == HPL_rzero ) ) return;
+ 
+   if( ORDER == HplColumnMajor )
+   {
+      for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY )
+      {
+         t0 = ALPHA * Y[jy];
+         for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX )
+         { A[iaij] += X[ix] * t0; }
+      }
+   }
+   else
+   {
+      for( j = 0, jaj = 0, jx = 0; j < M; j++, jaj += LDA, jx += INCX )
+      {
+         t0 = ALPHA * X[jx];
+         for( i = 0, iaij = jaj, iy = 0; i < N; i++, iaij += 1, iy += INCY )
+         { A[iaij] += Y[iy] * t0; }
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M    = M,   F77N    = N,
+                             F77lda  = LDA, F77incx = INCX, F77incy = INCY;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77lda               LDA
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+
+   if( ORDER == HplColumnMajor )
+   {  F77dger( &F77M, &F77N, &alpha, X, &F77incx, Y, &F77incy, A, &F77lda ); }
+   else
+   {  F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); }
+#endif
+/*
+ * End of HPL_dger
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dscal.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dscal.c
new file mode 100644
index 000000000..7e041991f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dscal.c
@@ -0,0 +1,179 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dscal
+
+#ifdef STDC_HEADERS
+void HPL_dscal
+(
+   const int                        N,
+   const double                     ALPHA,
+   double *                         X,
+   const int                        INCX
+)
+#else
+void HPL_dscal
+( N, ALPHA, X, INCX )
+   const int                        N;
+   const double                     ALPHA;
+   double *                         X;
+   const int                        INCX;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dscal scales the vector x by alpha.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero, then the entries of the incremented array X
+ *         need not be set on input.
+ *
+ * X       (local input/output)          double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *         On exit, the entries of the incremented array  X  are  scaled
+ *         by the scalar alpha.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dscal( N, ALPHA, X, INCX );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           x0, x1, x2, x3, x4, x5, x6, x7;
+   register const double     alpha = ALPHA;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incX3 = 3 * INCX,
+                             incX4 = 4 * INCX, incX5 = 5 * INCX,
+                             incX6 = 6 * INCX, incX7 = 7 * INCX,
+                             incX8 = 8 * INCX;
+
+   if( ( N > 0 ) && ( alpha != HPL_rone ) )
+   {
+      if( alpha == HPL_rzero )
+      {
+         if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+         {
+            StX = (double *)X + nu * INCX;
+ 
+            do
+            {
+               (*X)     = HPL_rzero; X[incX4] = HPL_rzero;
+               X[INCX ] = HPL_rzero; X[incX5] = HPL_rzero;
+               X[incX2] = HPL_rzero; X[incX6] = HPL_rzero;
+               X[incX3] = HPL_rzero; X[incX7] = HPL_rzero; X += incX8;
+
+            } while( X != StX );
+         }
+ 
+         for( i = N - nu; i != 0; i-- ) { *X = HPL_rzero; X += INCX; }
+      }
+      else
+      {
+         if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+         {
+            StX = X + nu * INCX;
+ 
+            do
+            {
+               x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+               x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+               x0 *= alpha;   x4 *= alpha;   x1 *= alpha;   x5 *= alpha;
+               x2 *= alpha;   x6 *= alpha;   x3 *= alpha;   x7 *= alpha;
+ 
+               (*X)     = x0; X[incX4] = x4; X[INCX ] = x1; X[incX5] = x5;
+               X[incX2] = x2; X[incX6] = x6; X[incX3] = x3; X[incX7] = x7;
+ 
+               X  += incX8;
+ 
+            } while( X != StX );
+         }
+ 
+         for( i = N - nu; i != 0; i-- )
+         { x0 = (*X); x0 *= alpha; *X = x0; X += INCX; }
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#endif
+
+   F77dscal( &F77N, &alpha, X, &F77incx );
+#endif
+/*
+ * End of HPL_dscal
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dswap.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dswap.c
new file mode 100644
index 000000000..eb1b8e08d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dswap.c
@@ -0,0 +1,157 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dswap
+
+#ifdef STDC_HEADERS
+void HPL_dswap
+(
+   const int                        N,
+   double *                         X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_dswap
+( N, X, INCX, Y, INCY )
+   const int                        N;
+   double *                         X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dswap swaps the vectors x and y.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vectors  x  and  y. N
+ *         must be at least zero.
+ *
+ * X       (local input/output)          double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *         On exit, the entries of the incremented array  X  are updated
+ *         with the entries of the incremented array Y.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         On exit, the entries of the incremented array  Y  are updated
+ *         with the entries of the incremented array X.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dswap( N, X, INCX, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           x0, x1, x2, x3, y0, y1, y2, y3;
+   double                    * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
+                             incX3 = 3 * INCX, incY3 = 3 * INCY,
+                             incX4 = 4 * INCX, incY4 = 4 * INCY;
+
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 2 ) << 2 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);      y0 = (*Y);      x1 = X[INCX ];  y1 = Y[INCY ];
+            x2 = X[incX2];  y2 = Y[incY2];  x3 = X[incX3];  y3 = Y[incY3];
+            *Y        = x0; *X        = y0; Y[INCY ]  = x1; X[INCX ]  = y1;
+            Y[incY2]  = x2; X[incX2]  = y2; Y[incY3]  = x3; X[incX3]  = y3;
+            X += incX4; Y += incY4;
+ 
+         } while( X != StX );
+      }
+ 
+      for( i = N - nu; i != 0; i-- )
+      { x0  = (*X); y0  = (*Y); *Y = x0; *X = y0; X += INCX; Y += INCY; }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   F77dswap( &F77N, X, &F77incx, Y, &F77incy );
+#endif
+/*
+ * End of HPL_dswap
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsm.c
new file mode 100644
index 000000000..a336a7d29
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsm.c
@@ -0,0 +1,977 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dtrsm
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, jak  = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 )
+      {
+         B[ibkj] /= A[k+jak];
+         for( i = k+1,    iaik  = k+1+jak, ibij  = k+1+jbj;
+              i < M; i++, iaik +=1,        ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, jak  = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 )
+      {
+         for( i = k+1,    iaik  = k+1+jak, ibij  = k+1+jbj;
+              i < M; i++, iaik +=1,        ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = M-1,     jai  = (M-1)*LDA, ibij  = M-1+jbj;
+           i >= 0; i--, jai -= LDA,       ibij -= 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = i+1,    iaki  = i+1+jai, ibkj  = i+1+jbj;
+              k < M; k++, iaki += 1,       ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         t0 /= A[i+jai];
+         B[ibij] = t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = M-1,     jai  = (M-1)*LDA, ibij  = M-1+jbj;
+           i >= 0; i--, jai -= LDA,       ibij -= 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = i+1,    iaki  = i+1+jai, ibkj  = i+1+jbj;
+              k < M; k++, iaki += 1,       ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         B[ibij] = t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = M-1,     jak  = (M-1)*LDA, ibkj  = M-1+jbj;
+           k >= 0; k--, jak -= LDA,       ibkj -= 1 )
+      {
+         B[ibkj] /= A[k+jak];
+         for( i = 0,      iaik  = jak, ibij  = jbj;
+              i < k; i++, iaik += 1,   ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = M-1,     jak  = (M-1)*LDA, ibkj  = M-1+jbj;
+           k >= 0; k--, jak -= LDA,       ibkj -= 1 )
+      {
+         for( i = 0,      iaik  = jak, ibij  = jbj;
+              i < k; i++, iaik += 1,   ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+   register double            t0;
+
+   for( j = 0, jbj  = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, jai  = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         t0 /= A[i+jai];
+         B[ibij] = t0;
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj  = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, jai  = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         B[ibij] = t0;
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = N-1,      jaj  = (N-1)*LDA, jbj  = (N-1)*LDB;
+        j >= 0;  j--, jaj -= LDA,       jbj -= LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = j+1,    iakj  = j+1+jaj, jbk  = (j+1)*LDB;
+           k < N; k++, iakj += 1,       jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = N-1,      jaj  = (N-1)*LDA, jbj  = (N-1)*LDB;
+        j >= 0;  j--, jaj -= LDA,       jbj -= LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = j+1,    iakj  = j+1+jaj, jbk  = (j+1)*LDB;
+           k < N; k++, iakj += 1,       jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB )
+   {
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; }
+      for( j = k+1,    iajk  = (k+1)+jak, jbj  = (k+1)*LDB;
+           j < N; j++, iajk += 1,         jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB )
+   {
+      for( j = k+1,    iajk  = (k+1)+jak, jbj  = (k+1)*LDB;
+           j < N; j++, iajk += 1,         jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = N-1,     jak  = (N-1)*LDA, jbk  = (N-1)*LDB;
+        k >= 0; k--, jak -= LDA,       jbk -= LDB )
+   {
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; }
+      for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = N-1,     jak  = (N-1)*LDA, jbk  = (N-1)*LDB;
+        k >= 0; k--, jak -= LDA,       jbk -= LDB )
+   {
+      for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsm0
+(
+   const enum HPL_SIDE        SIDE,
+   const enum HPL_UPLO        UPLO,
+   const enum HPL_TRANS       TRANS,
+   const enum HPL_DIAG        DIAG,
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB )
+   const enum HPL_SIDE        SIDE;
+   const enum HPL_UPLO        UPLO;
+   const enum HPL_TRANS       TRANS;
+   const enum HPL_DIAG        DIAG;
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{ 
+   int                        i, j;
+
+   if( ( M == 0 ) || ( N == 0 ) ) return;
+ 
+   if( ALPHA == HPL_rzero )
+   {
+      for( j = 0; j < N; j++ )
+      {  for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; }
+      return;
+   }
+
+   if( SIDE == HplLeft )
+   {
+      if( UPLO == HplUpper )
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+      else
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+   }
+   else
+   {
+      if( UPLO == HplUpper )
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+      else
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dtrsm
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_SIDE              SIDE,
+   const enum HPL_UPLO              UPLO,
+   const enum HPL_TRANS             TRANS,
+   const enum HPL_DIAG              DIAG,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dtrsm
+( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_SIDE              SIDE;
+   const enum HPL_UPLO              UPLO;
+   const enum HPL_TRANS             TRANS;
+   const enum HPL_DIAG              DIAG;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dtrsm solves one of the matrix equations
+ *  
+ *    op( A ) * X = alpha * B,   or  X * op( A ) = alpha * B,
+ *  
+ * where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+ * non-unit, upper or lower triangular matrix and op(A) is one of
+ *  
+ *    op( A ) = A   or   op( A ) = A^T.
+ *  
+ * The matrix X is overwritten on B.
+ *  
+ * No test for  singularity  or  near-singularity  is included  in  this
+ * routine. Such tests must be performed before calling this routine.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * SIDE    (local input)                 const enum HPL_SIDE
+ *         On entry, SIDE  specifies  whether  op(A) appears on the left
+ *         or right of X as follows:
+ *            SIDE==HplLeft    op( A ) * X = alpha * B,
+ *            SIDE==HplRight   X * op( A ) = alpha * B.
+ *
+ * UPLO    (local input)                 const enum HPL_UPLO
+ *         On  entry,   UPLO   specifies  whether  the  upper  or  lower
+ *         triangular  part  of the array  A  is to be referenced.  When
+ *         UPLO==HplUpper, only  the upper triangular part of A is to be
+ *         referenced, otherwise only the lower triangular part of A is 
+ *         to be referenced. 
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSA  specifies the form of  op(A)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSA==HplNoTrans    : op( A ) = A,                     
+ *            TRANSA==HplTrans      : op( A ) = A^T,                   
+ *            TRANSA==HplConjTrans  : op( A ) = A^T.                   
+ *
+ * DIAG    (local input)                 const enum HPL_DIAG
+ *         On entry,  DIAG  specifies  whether  A  is unit triangular or
+ *         not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+ *         and otherwise, A is not assumed to be unit triangular.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of the  matrix B.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix B.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied  as  zero then the elements of the matrix B need not
+ *         be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+ *         otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+ *         k by k upper triangular  part of the array A must contain the
+ *         upper triangular  matrix and the  strictly  lower  triangular
+ *         part of A is not referenced.  When  UPLO==HplLower on  entry,
+ *         the  leading k by k lower triangular part of the array A must
+ *         contain the lower triangular matrix  and  the  strictly upper
+ *         triangular part of A is not referenced.
+ *          
+ *         Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+ *         not referenced  either,  but are assumed to be unity.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+ *
+ * B       (local input/output)          double *
+ *         On entry,  B  points  to an array of size equal to or greater
+ *         than LDB * n.  Before entry, the leading  m by n  part of the
+ *         array B must contain the matrix  B, except when beta is zero,
+ *         in which case B need not be set on entry.  On exit, the array
+ *         B is overwritten by the m by n solution matrix.
+ *
+ * LDB     (local input)                 const int
+ *         On entry,  LDB  specifies  the  leading  dimension  of  B  as
+ *         declared  in  the  calling  (sub) program.  LDB  must  be  at
+ *         least MAX(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB );
+   }
+   else
+   {
+      HPL_dtrsm0( ( SIDE == HplRight ? HplLeft  : HplRight ),
+                  ( UPLO == HplLower ? HplUpper : HplLower ),
+                  TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef StringSunStyle
+#if defined( HPL_USE_F77_INTEGER_DEF )
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M   = M,   F77N   = N,
+                             F77lda = LDA, F77ldb = LDB;
+#else
+#define  F77M                M
+#define  F77N                N
+#define  F77lda              LDA
+#define  F77ldb              LDB
+#endif
+   char                      cside, cuplo, ctran, cdiag;
+
+   if(      TRANS == HplNoTrans ) ctran = 'N';
+   else if( TRANS == HplTrans   ) ctran = 'T';
+   else                           ctran = 'C';
+   cdiag = ( DIAG == HplUnit  ? 'U' : 'N' );
+
+   if( ORDER == HplColumnMajor )
+   {
+      cside = ( SIDE == HplRight ? 'R' : 'L' );
+      cuplo = ( UPLO == HplLower ? 'L' : 'U' );
+#ifdef StringSunStyle
+      F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo );
+      ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructVal
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructPtr
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+   }
+   else
+   {
+      cside = ( SIDE == HplRight ? 'L' : 'R' );
+      cuplo = ( UPLO == HplLower ? 'U' : 'L' );
+#ifdef StringSunStyle
+      F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo );
+      ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructVal
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructPtr
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+   }
+#endif
+/*
+ * End of HPL_dtrsm
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsv.c
new file mode 100644
index 000000000..99e84f073
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsv.c
@@ -0,0 +1,520 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dtrsv
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLNN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLNN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx  = 0; j < N; j++, jaj += ldap1, jx += INCX )
+   {
+      X[jx] /= A[jaj]; t0 = X[jx];
+      for( i = j+1,    iaij  = jaj+1, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLNU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLNU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = jaj+1, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLTN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLTN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*(ldap1), jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= ldap1,         jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = 1+jaj, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { t0 -= A[iaij] * X[ix]; }
+      t0 /= A[jaj]; X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLTU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLTU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*(ldap1), jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= ldap1,         jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = 1+jaj, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { t0 -= A[iaij] * X[ix]; }
+      X[jx] = t0;
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUNN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUNN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*LDA, jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= LDA,       jx -= INCX )
+   {
+      X[jx] /= A[j+jaj]; t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUNU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUNU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*LDA, jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= LDA,       jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUTN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUTN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = 0, jaj = 0,jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { t0 -= A[iaij] * X[ix]; }
+      t0 /= A[iaij]; X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUTU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUTU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { t0 -= A[iaij] * X[ix]; }
+      X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsv0
+(
+   const enum HPL_UPLO        UPLO,
+   const enum HPL_TRANS       TRANS,
+   const enum HPL_DIAG        DIAG,
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+) 
+#else
+static void HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+   const enum HPL_UPLO        UPLO;
+   const enum HPL_TRANS       TRANS;
+   const enum HPL_DIAG        DIAG;
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   if( N == 0 ) return;
+ 
+   if( UPLO == HplUpper )
+   {
+      if( TRANS == HplNoTrans )
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvUNN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvUNU( N,    A, LDA, X, INCX ); }
+      }
+      else
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvUTN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvUTU( N,    A, LDA, X, INCX ); }
+      }
+   }
+   else
+   {
+      if( TRANS == HplNoTrans )
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvLNN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvLNU( N,    A, LDA, X, INCX ); }
+      }
+      else
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvLTN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvLTU( N,    A, LDA, X, INCX ); }
+      }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dtrsv
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_UPLO              UPLO,
+   const enum HPL_TRANS             TRANS,
+   const enum HPL_DIAG              DIAG,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         X,
+   const int                        INCX
+)
+#else
+void HPL_dtrsv
+( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_UPLO              UPLO;
+   const enum HPL_TRANS             TRANS;
+   const enum HPL_DIAG              DIAG;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         X;
+   const int                        INCX;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dtrsv solves one of the systems of equations
+ *  
+ *     A * x = b,   or   A^T * x = b,
+ *  
+ * where b and x are n-element vectors and  A  is an n by n non-unit, or
+ * unit, upper or lower triangular matrix.
+ *  
+ * No test for  singularity  or  near-singularity  is included  in  this
+ * routine. Such tests must be performed before calling this routine.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * UPLO    (local input)                 const enum HPL_UPLO
+ *         On  entry,   UPLO   specifies  whether  the  upper  or  lower
+ *         triangular  part  of the array  A  is to be referenced.  When
+ *         UPLO==HplUpper, only  the upper triangular part of A is to be
+ *         referenced, otherwise only the lower triangular part of A is 
+ *         to be referenced. 
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry,  TRANS  specifies  the equations  to  be  solved as
+ *         follows:
+ *            TRANS==HplNoTrans     A   * x = b,
+ *            TRANS==HplTrans       A^T * x = b.
+ *
+ * DIAG    (local input)                 const enum HPL_DIAG
+ *         On entry,  DIAG  specifies  whether  A  is unit triangular or
+ *         not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+ *         and otherwise, A is not assumed to be unit triangular.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the order of the matrix A. N must be at
+ *         least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+ *         n by n upper triangular  part of the array A must contain the
+ *         upper triangular  matrix and the  strictly  lower  triangular
+ *         part of A is not referenced.  When  UPLO==HplLower  on entry,
+ *         the  leading n by n lower triangular part of the array A must
+ *         contain the lower triangular matrix  and  the  strictly upper
+ *         triangular part of A is not referenced.
+ *          
+ *         Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+ *         not referenced  either,  but are assumed to be unity.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,n).
+ *
+ * X       (local input/output)          double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *         Before entry,  the  incremented array  X  must contain  the n
+ *         element right-hand side vector b. On exit,  X  is overwritten
+ *         with the solution vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dtrsv( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX );
+   }
+   else
+   {
+      HPL_dtrsv0( ( UPLO  == HplUpper   ? HplLower : HplUpper   ),
+                  ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ),
+                  DIAG, N, A, LDA, X, INCX );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+ 
+#ifdef HPL_USE_F77_INTEGER_DEF 
+   const F77_INTEGER         F77N = N, F77lda = LDA, F77incx = INCX;
+#else
+#define F77N              N
+#define F77lda            LDA
+#define F77incx           INCX
+#endif
+   char                      cuplo, ctran, cdiag;
+
+   if( ORDER == HplColumnMajor )
+   {
+      cuplo = ( UPLO  == HplUpper   ? 'U' : 'L' );
+      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
+   }
+   else
+   {
+      cuplo = ( UPLO  == HplUpper   ? 'L' : 'U' );
+      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
+   }
+   cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' );
+
+#ifdef StringSunStyle
+   F77dtrsv( &cuplo, &ctran, &cdiag, &F77N, A, &F77lda, X, &F77incx,
+             IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+   ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+   fuplo = HPL_C2F_CHAR( cuplo );
+   F77dtrsv( fuplo,  ftran,  fdiag,  &F77N, A, &F77lda, X, &F77incx );
+#endif
+#ifdef StringStructVal
+   fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran;
+   fdiag.len = 1; fdiag.cp = &cdiag;
+   F77dtrsv( fuplo,  ftran,  fdiag,  &F77N, A, &F77lda, X, &F77incx );
+#endif
+#ifdef StringStructPtr
+   fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran;
+   fdiag.len = 1; fdiag.cp = &cdiag;
+   F77dtrsv( &fuplo, &ftran, &fdiag, &F77N, A, &F77lda, X, &F77incx );
+#endif
+
+#endif
+/*
+ * End of HPL_dtrsv
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_idamax.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_idamax.c
new file mode 100644
index 000000000..5ceabdf25
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_idamax.c
@@ -0,0 +1,167 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_idamax
+
+#ifdef STDC_HEADERS
+int HPL_idamax
+(
+   const int                        N,
+   const double *                   X,
+   const int                        INCX
+)
+#else
+int HPL_idamax
+( N, X, INCX )
+   const int                        N;
+   const double *                   X;
+   const int                        INCX;
+#endif 
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_idamax returns  the index in an n-vector  x  of the first element
+ * having maximum absolute value.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   return( (int)(cblas_idamax( N, X, INCX )) );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           absxi, smax = HPL_rzero, x0, x1, x2, x3,
+                             x4, x5, x6, x7;
+   const double              * StX;
+   register int              imax = 0, i = 0, j;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incX3 = 3 * INCX,
+                             incX4 = 4 * INCX, incX5 = 5 * INCX,
+                             incX6 = 6 * INCX, incX7 = 7 * INCX,
+                             incX8 = 8 * INCX;
+
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+            x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+            absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x1 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x2 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x3 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x4 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x5 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x6 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x7 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+ 
+            X    += incX8;
+ 
+         } while( X != StX );
+      }
+ 
+      for( j = N - nu; j != 0; j-- )
+      {
+         x0    = (*X);
+         absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; }
+         i    += 1;
+         X    += INCX;
+      }
+   }
+   return( imax );
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#endif
+   int                       imax = 0;
+
+   if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1;
+   return( imax );
+#endif
+/*
+ * End of HPL_idamax
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_daxpy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_daxpy.o
new file mode 100644
index 000000000..ff89e13c3
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_daxpy.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dcopy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dcopy.o
new file mode 100644
index 000000000..d0bc0e6e6
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dcopy.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dgemm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dgemm.o
new file mode 100644
index 000000000..12e87044c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dgemm.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dgemv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dgemv.o
new file mode 100644
index 000000000..a9b801898
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dgemv.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dger.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dger.o
new file mode 100644
index 000000000..255cfa4b2
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dger.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dscal.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dscal.o
new file mode 100644
index 000000000..4cb4cd8c9
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dscal.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dtrsm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dtrsm.o
new file mode 100644
index 000000000..339a5635f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dtrsm.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dtrsv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dtrsv.o
new file mode 100644
index 000000000..2930120c9
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dtrsv.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_idamax.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_idamax.o
new file mode 100644
index 000000000..b765e7be6
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_idamax.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/Make.inc
new file mode 120000
index 000000000..8547ec814
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/Make.inc
@@ -0,0 +1 @@
+/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/Makefile
new file mode 100644
index 000000000..ed9f3d0e2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/Makefile
@@ -0,0 +1,98 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h
+#
+## Object files ########################################################
+#
+HPL_blaobj       = \
+   HPL_dcopy.o            HPL_daxpy.o            HPL_dscal.o            \
+   HPL_idamax.o           HPL_dgemv.o            HPL_dtrsv.o            \
+   HPL_dger.o             HPL_dgemm.o            HPL_dtrsm.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_blaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_blaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dcopy.o            : ../HPL_dcopy.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dcopy.c
+HPL_daxpy.o            : ../HPL_daxpy.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_daxpy.c
+HPL_dscal.o            : ../HPL_dscal.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dscal.c
+HPL_idamax.o           : ../HPL_idamax.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_idamax.c
+HPL_dgemv.o            : ../HPL_dgemv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgemv.c
+HPL_dtrsv.o            : ../HPL_dtrsv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtrsv.c
+HPL_dger.o             : ../HPL_dger.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dger.c
+HPL_dgemm.o            : ../HPL_dgemm.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgemm.c
+HPL_dtrsm.o            : ../HPL_dtrsm.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtrsm.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1rinM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1rinM.c
new file mode 100644
index 000000000..dd03b79b1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1rinM.c
@@ -0,0 +1,224 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+ 
+#ifdef STDC_HEADERS
+int HPL_binit_1rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_1rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_1rinM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_1rinM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, prev,
+                              rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process,  then  send message to its two
+ * next neighbors. Otherwise, probe for message. If the message is here,
+ * then receive it,   and  if I am not the last process of the ring,  or
+ * just after the root process, then forward it to the next.  Otherwise,
+ * inform the caller that the panel has still not been received.
+ */
+   rank = PANEL->grid->mycol; comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;        msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( next,
+                          size ), msgid, comm );
+      }
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+      if( ( size > 2 ) && 
+          ( MModSub1( prev, size ) == root ) ) partner = root;
+      else                                     partner = prev;
+
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) &&
+                ( prev != root ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+} 
+
+#ifdef STDC_HEADERS
+int HPL_bwait_1rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_1rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1ring.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1ring.c
new file mode 100644
index 000000000..dd5eb2d12
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1ring.c
@@ -0,0 +1,216 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_1ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_1ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+
+#else
+
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_1ring
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_1ring( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, prev, rank, root,
+                              size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process, start spreading the panel.  If
+ * I am not the root process, probe for message. If the message is here,
+ * then receive it, and  if I am not the last process of the ring, then
+ * forward it to the next.  Otherwise, inform the caller that the panel
+ * has still not been received.
+ */
+   rank = PANEL->grid->mycol; comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;        msgid = PANEL->msgid;
+
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( rank,
+                       size ), msgid, comm );
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+
+      ierr = MPI_Iprobe( prev, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, prev, msgid,
+                             comm, &PANEL->status[0] );
+            next = MModAdd1( rank, size );
+            if( ( ierr == MPI_SUCCESS ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next,
+                                msgid, comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */  
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_1ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_1ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers 
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2rinM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2rinM.c
new file mode 100644
index 000000000..56581ea0d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2rinM.c
@@ -0,0 +1,236 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_2rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_2rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_2rinM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_2rinM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, prev,
+                              rank, roo2, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase: root process send to its two right neighbors and mid-pro-
+ * cess. If I am not the root process, probe for message. If the message
+ * is there, then receive it. If I am not the last process of both rings
+ * then forward it to the next.  Otherwise,  inform  the caller that the
+ * panel has still not been received.
+ */
+   rank = PANEL->grid->mycol;           comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;                  msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );       roo2  = ( ( size + 1 ) >> 1 );
+   roo2 = MModAdd(  root, roo2, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         if( MModAdd1( next, size ) != roo2 )
+         {
+            ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE,
+                             MModAdd1( next, size ), msgid, comm );
+         }
+
+         if( ierr == MPI_SUCCESS )
+         {
+            ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid,
+                             comm );
+         }
+      }
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+      if( ( prev == root ) || ( rank == roo2 ) ||
+          ( MModSub1( prev,  size )  == root ) ) partner = root;
+      else                                       partner = prev;
+ 
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) && ( prev != root ) &&
+                ( next != roo2        ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+} 
+
+#ifdef STDC_HEADERS
+int HPL_bwait_2rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_2rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2ring.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2ring.c
new file mode 100644
index 000000000..f0e6e2647
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2ring.c
@@ -0,0 +1,224 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_2ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_2ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+ 
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_2ring
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_2ring( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, rank,
+                              roo2, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase: root process  send to its right neighbor and mid-process.
+ * If I am not the root process,  probe for message.   If the message is
+ * there,  then receive it,  and  if I am not the last process  of  both
+ * rings, then forward it to the next. Otherwise, inform the caller that
+ * the panel has still not been received.
+ */
+   rank = PANEL->grid->mycol;           comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;                  msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );       roo2  = ( ( size + 1 ) >> 1 );
+   roo2 = MModAdd(  root, roo2, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid,
+                          comm );
+      }
+   }
+   else
+   {
+      partner = MModSub1( rank, size );
+      if( ( partner == root ) || ( rank == roo2 ) ) partner = root;
+ 
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) &&
+                ( next != roo2 ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_2ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_2ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bcast.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bcast.c
new file mode 100644
index 000000000..100161152
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bcast.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_bcast
+(
+   HPL_T_panel *                    PANEL,
+   int *                            IFLAG
+)
+#else
+int HPL_bcast
+( PANEL, IFLAG )
+   HPL_T_panel *                    PANEL;
+   int *                            IFLAG;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_bcast broadcasts  the  current  panel.  Successful  completion is
+ * indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to
+ * HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was
+ * not completed, in which case this function should be called again.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * IFLAG   (output)                      int *
+ *         On exit,  IFLAG  indicates  whether  or not the broadcast has
+ *         occured.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_bcast_1rinM( PANEL, IFLAG ); break;
+      case HPL_1RING   : ierr = HPL_bcast_1ring( PANEL, IFLAG ); break;
+      case HPL_2RING_M : ierr = HPL_bcast_2rinM( PANEL, IFLAG ); break;
+      case HPL_2RING   : ierr = HPL_bcast_2ring( PANEL, IFLAG ); break;
+      case HPL_BLONG_M : ierr = HPL_bcast_blonM( PANEL, IFLAG ); break;
+      case HPL_BLONG   : ierr = HPL_bcast_blong( PANEL, IFLAG ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_bcast
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_binit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_binit.c
new file mode 100644
index 000000000..3daf72b7d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_binit.c
@@ -0,0 +1,108 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_binit
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_binit
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_binit initializes  a  row  broadcast.  Successful  completion  is
+ * indicated by the returned error code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS );
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_binit_1rinM( PANEL ); break;
+      case HPL_1RING   : ierr = HPL_binit_1ring( PANEL ); break;
+      case HPL_2RING_M : ierr = HPL_binit_2rinM( PANEL ); break;
+      case HPL_2RING   : ierr = HPL_binit_2ring( PANEL ); break;
+      case HPL_BLONG_M : ierr = HPL_binit_blonM( PANEL ); break;
+      case HPL_BLONG   : ierr = HPL_binit_blong( PANEL ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_binit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blonM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blonM.c
new file mode 100644
index 000000000..5fa221937
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blonM.c
@@ -0,0 +1,445 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+int HPL_binit_blonM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_blonM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+   return( HPL_SUCCESS );
+}
+ 
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF_S1        PANEL->buffers[I_SEND]
+#define   _M_COUNT_S1       PANEL->counts[I_SEND]
+#define   _M_TYPE_S1        PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_S2        PANEL->buffers[I_SEND]
+#define   _M_COUNT_S2       PANEL->counts[I_SEND]
+#define   _M_TYPE_S2        PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_R1        PANEL->buffers[I_RECV]
+#define   _M_COUNT_R1       PANEL->counts[I_RECV]
+#define   _M_TYPE_R1        PANEL->dtypes[I_RECV]
+
+#define   _M_BUFF_R2        PANEL->buffers[I_RECV]
+#define   _M_COUNT_R2       PANEL->counts[I_RECV]
+#define   _M_TYPE_R2        PANEL->dtypes[I_RECV]
+ 
+#define   _M_ROLL_BUFF_S    PANEL->buffers[I_SEND]
+#define   _M_ROLL_COUNT_S   PANEL->counts[I_SEND]
+#define   _M_ROLL_TYPE_S    PANEL->dtypes[I_SEND]
+
+#define   _M_ROLL_BUFF_R    PANEL->buffers[I_RECV]
+#define   _M_ROLL_COUNT_R   PANEL->counts[I_RECV]
+#define   _M_ROLL_TYPE_R    PANEL->dtypes[I_RECV]
+
+#else
+
+#define   _M_BUFF_S1        (void *)(PANEL->L2)
+#define   _M_COUNT_S1       PANEL->len
+#define   _M_TYPE_S1        MPI_DOUBLE
+
+#define   _M_BUFF_S2        (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_S2       lbuf
+#define   _M_TYPE_S2        MPI_DOUBLE
+ 
+#define   _M_BUFF_R1        (void *)(PANEL->L2)
+#define   _M_COUNT_R1       PANEL->len
+#define   _M_TYPE_R1        MPI_DOUBLE
+ 
+#define   _M_BUFF_R2        (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_R2       lbuf
+#define   _M_TYPE_R2        MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_S    (void *)(PANEL->L2 + ibufS)
+#define   _M_ROLL_COUNT_S   lbufS
+#define   _M_ROLL_TYPE_S    MPI_DOUBLE
+#define   _M_ROLL_BUFF_R    (void *)(PANEL->L2 + ibufR)
+#define   _M_ROLL_COUNT_R   lbufR
+#define   _M_ROLL_TYPE_R    MPI_DOUBLE
+
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_blonM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_blonM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        COUNT, count, go=1, ierr=MPI_SUCCESS, ibuf,
+                              ibufR, ibufS, dummy=0, indx, ip2=1, k, l,
+                              lbuf, lbufR, lbufS, mask=1, msgid, mydist,
+                              mydist2, next, npm1, npm2, partner, prev,
+                              rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  root process  sends to its right neighbor,  then spread
+ * the panel on the other npcol - 2 processes.  If  I  am  not the root 
+ * process, probe for message received.  If the message is there,  then
+ * receive it. If I am just after the root process, return.  Otherwise,
+ * keep spreading on those npcol - 2 processes.  Otherwise,  inform the
+ * caller that the panel has still not been received.
+ */
+   comm = PANEL->grid->row_comm; rank  = PANEL->grid->mycol;
+   root = PANEL->pcol;           msgid = PANEL->msgid;
+   prev = MModSub1( rank, size );
+ 
+   if( rank == root )
+   {
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ierr == MPI_SUCCESS )
+         ierr =   HPL_packL( PANEL, 0, PANEL->len, I_SEND );
+#endif
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Ssend( _M_BUFF_S1, _M_COUNT_S1, _M_TYPE_S1,
+                             MModAdd1( rank, size ), msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+   else if( prev == root )
+   {
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ *
+ *    ierr = MPI_Iprobe( root, msgid, comm, &go, &PANEL->status[0] );
+ */
+      if( ierr == MPI_SUCCESS )
+      {                                  /* if panel is here, proceed */
+         if( go != 0 )
+         {
+#ifdef HPL_USE_MPI_DATATYPE
+            ierr =      HPL_packL( PANEL, 0, PANEL->len, I_RECV );
+#endif
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Recv( _M_BUFF_R1, _M_COUNT_R1, _M_TYPE_R1,
+                                  root, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+      }
+   }
+/*
+ * if I am just after the root, exit now. The message receive  completed
+ * successfully, this guy is done. If there are only 2 processes in each 
+ * row of processes, we are done as well.
+ */
+   if( ( prev == root ) || ( size == 2 ) )
+   {
+      *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+      return( *IFLAG );
+   }
+/*
+ * Otherwise, proceed with broadcast -  Spread  the panel across process
+ * columns
+ */
+   npm2 = ( npm1 = size - 1 ) - 1; COUNT = PANEL->len;
+
+   k = npm2; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   if( rank == root ) mydist2 = ( mydist = 0 );
+   else   mydist2 = ( mydist  = MModSub( rank, root, size ) - 1 );
+
+   indx = ip2; count = COUNT / npm1; count = Mmax( count, 1 );
+ 
+   do
+   {
+      mask ^= ip2;
+
+      if( ( mydist & mask ) == 0 )
+      {
+         lbuf = COUNT - ( ibuf = indx * count );
+         if( indx + ip2 < npm1 ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); }
+
+         partner = mydist ^ ip2;
+
+         if( ( mydist & ip2 ) != 0 )
+         {
+            partner = MModAdd( root, partner, size );
+            if( partner != root ) partner = MModAdd1( partner, size );  
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ */
+#if 0
+            ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+  
+            if( ierr == MPI_SUCCESS )
+            {        /* if panel is not here, return and keep testing */
+               if( go == 0 )
+               { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+            }
+#endif
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_RECV );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( _M_BUFF_R2, _M_COUNT_R2, _M_TYPE_R2,
+                                     partner, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                   msgid, comm, &PANEL->status[0] );
+            }
+         }
+         else if( partner < npm1 )
+         {
+            partner = MModAdd( root, partner, size );
+            if( partner != root ) partner = MModAdd1( partner, size );  
+
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_SEND );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( _M_BUFF_S2, _M_COUNT_S2, _M_TYPE_S2,
+                                      partner, msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE,
+                                      partner, msgid, comm );
+            }
+         }
+      }
+ 
+      if( mydist2 < ip2 ) {  ip2 >>= 1; indx -= ip2; }
+      else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; }
+
+   } while( ip2 > 0 );
+/*
+ * Roll the pieces
+ */
+   prev = MModSub1( rank, size );
+   if( MModSub1( prev, size ) == root ) prev = root;
+   next = MModAdd1( rank, size );
+   if( rank == root ) next = MModAdd1( next, size );
+
+   for( k = 0; k < npm2; k++ )
+   {
+      l = ( k >> 1 );
+/*
+ * Who is sending to who and how much
+ */
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         ibufS = ( indx = MModAdd( mydist, l,   npm1 ) ) * count;
+         lbufS = ( indx == npm2 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModSub( mydist, l+1, npm1 ) ) * count;
+         lbufR = ( indx == npm2 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = prev;
+      }
+      else
+      {
+         ibufS = ( indx = MModSub( mydist, l,   npm1 ) ) * count;
+         lbufS = ( indx == npm2 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModAdd( mydist, l+1, npm1 ) ) * count;
+         lbufR = ( indx == npm2 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = next;
+      }
+/*
+ * Exchange the messages
+ */
+      if( lbufS > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufS, lbufS, I_SEND );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S,
+                                 _M_ROLL_TYPE_S, partner, msgid, comm,
+                                 &PANEL->request[0] );
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                 msgid, comm, &PANEL->request[0] );
+      }
+ 
+      if(  lbufR > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufR, lbufR, I_RECV );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R,
+                               _M_ROLL_TYPE_R, partner, msgid, comm,
+                               &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                               msgid, comm, &PANEL->status[0] );
+      }
+ 
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Wait ( &PANEL->request[0], &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_blonM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_blonM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+
+   return( HPL_SUCCESS );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blong.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blong.c
new file mode 100644
index 000000000..e57f11bcc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blong.c
@@ -0,0 +1,363 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+int HPL_binit_blong
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_blong( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+   return( HPL_SUCCESS );
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF_S         PANEL->buffers[I_SEND]
+#define   _M_COUNT_S        PANEL->counts[I_SEND]
+#define   _M_TYPE_S         PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_R         PANEL->buffers[I_RECV]
+#define   _M_COUNT_R        PANEL->counts[I_RECV]
+#define   _M_TYPE_R         PANEL->dtypes[I_RECV]
+ 
+#define   _M_ROLL_BUFF_S    PANEL->buffers[I_SEND]
+#define   _M_ROLL_COUNT_S   PANEL->counts[I_SEND]
+#define   _M_ROLL_TYPE_S    PANEL->dtypes[I_SEND]
+ 
+#define   _M_ROLL_BUFF_R    PANEL->buffers[I_RECV]
+#define   _M_ROLL_COUNT_R   PANEL->counts[I_RECV]
+#define   _M_ROLL_TYPE_R    PANEL->dtypes[I_RECV]
+ 
+#else
+ 
+#define   _M_BUFF_S         (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_S        lbuf
+#define   _M_TYPE_S         MPI_DOUBLE
+ 
+#define   _M_BUFF_R         (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_R        lbuf
+#define   _M_TYPE_R         MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_S    (void *)(PANEL->L2 + ibufS)
+#define   _M_ROLL_COUNT_S   lbufS
+#define   _M_ROLL_TYPE_S    MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_R    (void *)(PANEL->L2 + ibufR)
+#define   _M_ROLL_COUNT_R   lbufR
+#define   _M_ROLL_TYPE_R    MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_blong
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_blong( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        COUNT, count, dummy=0, ierr=MPI_SUCCESS,
+                              ibuf, ibufR, ibufS, indx, ip2, k, l, lbuf,
+                              lbufR, lbufS, mask, msgid, mydist, mydist2,
+                              next, npm1, partner, prev, rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process, start spreading the panel.  If
+ * I am not the root process,  test  for  message receive completion. If
+ * the message  is there,  then receive it,  and  keep  spreading  in  a
+ * blocking fashion this time.  Otherwise,  inform  the caller  that the
+ * panel has still not been received. 
+ */
+   comm    = PANEL->grid->row_comm;  rank  = PANEL->grid->mycol;
+   mask    = PANEL->grid->col_mask;  ip2   = PANEL->grid->col_ip2m1;
+   root    = PANEL->pcol;            msgid = PANEL->msgid;
+   COUNT   = PANEL->len;             npm1  = size - 1;
+   mydist2 = ( mydist = MModSub( rank, root, size ) ); indx = ip2;
+   count   = COUNT / size; count = Mmax( count, 1 );
+/*
+ * Spread the panel across process columns
+ */
+   do
+   {
+      mask ^= ip2;
+ 
+      if( ( mydist & mask ) == 0 )
+      {
+         lbuf = COUNT - ( ibuf = indx * count );
+         if( indx + ip2 < size ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); }
+ 
+         partner = mydist ^ ip2;
+ 
+         if( ( mydist & ip2 ) != 0 )
+         {
+            partner = MModAdd( root, partner, size );
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on 
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ */
+#if 0
+            ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+            if( ierr == MPI_SUCCESS )
+            {        /* if panel is not here, return and keep testing */
+               if( go == 0 )
+               { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+            }
+#endif
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_RECV );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( _M_BUFF_R, _M_COUNT_R, _M_TYPE_R,
+                                     partner, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                     msgid, comm, &PANEL->status[0] );
+            }
+         }
+         else if( partner < size )
+         {
+            partner = MModAdd( root, partner, size );
+ 
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_SEND );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( _M_BUFF_S, _M_COUNT_S, _M_TYPE_S,
+                                      partner, msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+            }
+            else       /* Send message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE,
+                                      partner, msgid, comm );
+            }
+         }
+      }
+ 
+      if( mydist2 < ip2 ) {  ip2 >>= 1; indx -= ip2; }
+      else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; }
+ 
+   } while( ip2 > 0 );
+/*
+ * Roll the pieces
+ */
+   prev = MModSub1( rank, size ); next = MModAdd1( rank, size );
+
+   for( k = 0; k < npm1; k++ )
+   {
+      l = ( k >> 1 ); 
+/*
+ * Who is sending to who and how much
+ */
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         ibufS = ( indx = MModAdd( mydist, l,   size ) ) * count;
+         lbufS = ( indx == npm1 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModSub( mydist, l+1, size ) ) * count;
+         lbufR = ( indx == npm1 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = prev;
+      }
+      else
+      {
+         ibufS = ( indx = MModSub( mydist, l,   size ) ) * count;
+         lbufS = ( indx == npm1 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModAdd( mydist, l+1, size ) ) * count;
+         lbufR = ( indx == npm1 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = next;
+      }
+/*
+ * Exchange the messages
+ */
+      if( lbufS > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufS, lbufS, I_SEND );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S,
+                                 _M_ROLL_TYPE_S, partner, msgid, comm,
+                                 &PANEL->request[0] );
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                 msgid, comm, &PANEL->request[0] );
+      }
+
+      if(  lbufR > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufR, lbufR, I_RECV );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R,
+                               _M_ROLL_TYPE_R, partner, msgid, comm,
+                               &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                               msgid, comm, &PANEL->status[0] );
+      }
+
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Wait ( &PANEL->request[0], &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_blong
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_blong( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+
+   return( HPL_SUCCESS );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bwait.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bwait.c
new file mode 100644
index 000000000..a2e0f4df8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bwait.c
@@ -0,0 +1,109 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_bwait
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_bwait
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_bwait HPL_bwait waits  for  the  row  broadcast  of  the current  panel  to
+ * terminate.  Successful completion is indicated by the returned  error
+ * code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS );
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_bwait_1rinM( PANEL ); break;
+      case HPL_1RING   : ierr = HPL_bwait_1ring( PANEL ); break;
+      case HPL_2RING_M : ierr = HPL_bwait_2rinM( PANEL ); break;
+      case HPL_2RING   : ierr = HPL_bwait_2ring( PANEL ); break;
+      case HPL_BLONG_M : ierr = HPL_bwait_blonM( PANEL ); break;
+      case HPL_BLONG   : ierr = HPL_bwait_blong( PANEL ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_bwait
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_copyL.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_copyL.c
new file mode 100644
index 000000000..04f765a6b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_copyL.c
@@ -0,0 +1,108 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_copyL
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_copyL
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_copyL copies  the  panel of columns, the L1 replicated submatrix,
+ * the pivot array  and  the info scalar into a contiguous workspace for
+ * later broadcast.
+ *  
+ * The copy of this panel  into  a contiguous buffer  can be enforced by
+ * specifying -DHPL_COPY_L in the architecture specific Makefile.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        jb, lda;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->mycol == PANEL->pcol )
+   {
+      jb = PANEL->jb; lda = PANEL->lda;
+ 
+      if( PANEL->grid->myrow == PANEL->prow )
+      {
+         HPL_dlacpy( PANEL->mp-jb, jb, Mptr( PANEL->A, jb, -jb, lda ),
+                     lda, PANEL->L2, PANEL->ldl2 );
+      }
+      else
+      {
+         HPL_dlacpy( PANEL->mp,    jb, Mptr( PANEL->A,  0, -jb, lda ),
+                     lda, PANEL->L2, PANEL->ldl2 );
+      }
+   }
+/*
+ * End of HPL_copyL
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_packL.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_packL.c
new file mode 100644
index 000000000..8a70ef83d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_packL.c
@@ -0,0 +1,245 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_packL
+(
+   HPL_T_panel *                    PANEL,
+   const int                        INDEX,
+   const int                        LEN,
+   const int                        IBUF
+)
+#else
+int HPL_packL
+( PANEL, INDEX, LEN, IBUF )
+   HPL_T_panel *                    PANEL;
+   const int                        INDEX;
+   const int                        LEN;
+   const int                        IBUF;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_packL forms  the MPI data type for the panel to be broadcast.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * MPI_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * INDEX   (input)                       const int
+ *         On entry,  INDEX  points  to  the  first entry of the  packed
+ *         buffer being broadcast.
+ *
+ * LEN     (input)                       const int
+ *         On entry, LEN is the length of the packed buffer.
+ *
+ * IBUF    (input)                       const int
+ *         On entry, IBUF  specifies the panel buffer/count/type entries
+ *         that should be initialized.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+#ifndef HPL_COPY_L
+   MPI_Datatype               * type = NULL;
+   void                       * * * bufs = NULL;
+   double                     * A;
+   int                        * blen = NULL;
+   MPI_Aint                   * disp = NULL;
+   int                        curr, i, i1, ibuf, ierr=MPI_SUCCESS, j1,
+                              jb, jbm, jbp1, lda, len, m, m1, nbufs;
+#else
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_COPY_L
+/*
+ * Panel + L1 + DPIV  have been copied into a contiguous buffer - Create
+ * and commit a contiguous data type
+ */
+   PANEL->buffers[IBUF] = (void *)(PANEL->L2 + INDEX);
+   PANEL->counts [IBUF] = 1;
+
+   ierr =      MPI_Type_contiguous( LEN, MPI_DOUBLE, &PANEL->dtypes[IBUF] );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &PANEL->dtypes[IBUF] );
+
+   return( ierr );
+#else
+/*
+ * Panel is not contiguous (because of LDA and also L1 + DPIV) -  Create
+ * and commit a struct data type
+ */
+   jbp1 = ( jb = PANEL->jb ) + 1;
+/*
+ * Temporaries to create the type struct.
+ */
+   bufs = (void     * * *)malloc( jbp1 * sizeof( void * *     ) );
+   blen = (int          *)malloc( jbp1 * sizeof( int          ) );
+   disp = (MPI_Aint     *)malloc( jbp1 * sizeof( MPI_Aint     ) );
+   type = (MPI_Datatype *)malloc( jbp1 * sizeof( MPI_Datatype ) );
+ 
+   if( ( bufs != NULL ) && ( blen != NULL ) &&
+       ( disp != NULL ) && ( type != NULL ) )
+   {
+      m = PANEL->mp; curr = (int)( PANEL->grid->myrow == PANEL->prow );
+      if( curr != 0 ) m -= jb;
+ 
+      len = LEN; ibuf = INDEX; nbufs = 0; jbm = jb * m;
+ 
+      if( ( m > 0 ) && ( ibuf < jbm ) )
+      {
+/*
+ * Retrieve proper pointers depending on process row and column
+ */
+         if( PANEL->grid->mycol == PANEL->pcol )
+         {
+            lda = PANEL->lda;
+            if( curr != 0 ) { A = Mptr( PANEL->A, jb, -jb, lda ); }
+            else            { A = Mptr( PANEL->A,  0, -jb, lda ); }
+         }
+         else { lda = PANEL->ldl2; A = PANEL->L2; }
+/*
+ * Pack the first (partial) column of L
+ */
+         m1 = m - ( i1 = ibuf - ( j1 = ibuf / m ) * m );
+         m1 = Mmin( len, m1 );
+ 
+         bufs[nbufs] = (void *)(Mptr( A, i1, j1, lda ));
+         type[nbufs] = MPI_DOUBLE;
+         blen[nbufs] = m1;
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+ 
+         nbufs++; len -= m1; j1++; ibuf += m1;
+/*
+ * Pack the remaining columns of L
+ */
+         while( ( len > 0 ) && ( j1 < jb ) )
+         {
+            m1 = Mmin( len, m );
+ 
+            bufs[nbufs] = (void*)(Mptr( A, 0, j1, lda ));
+            type[nbufs] = MPI_DOUBLE;
+            blen[nbufs] = m1;
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+ 
+            nbufs++; len -= m1; j1++; ibuf += m1;
+         }
+      }
+/*
+ * Pack L1, DPIV, DINFO
+ */
+      if( len > 0 )
+      {                                            /* L1, DPIV, DINFO */
+         bufs[nbufs] = (void *)(PANEL->L1 + ibuf - jbm);
+         type[nbufs] = MPI_DOUBLE;
+         blen[nbufs] = len;
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+         nbufs++;
+      }
+ 
+      for( i = 1; i < nbufs; i++ ) disp[i] -= disp[0]; disp[0] = 0;
+ 
+      PANEL->buffers[IBUF] = (void *)(bufs[0]); PANEL->counts [IBUF] = 1;
+/*
+ * construct the struct type 
+ */
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_create_struct( nbufs, blen, disp, type,
+                                   &PANEL->dtypes[IBUF] );
+/*
+ * release temporaries
+ */
+      if( bufs ) free( bufs );
+      if( blen ) free( blen );
+      if( disp ) free( disp );
+      if( type ) free( type );
+/*
+ * commit the type 
+ */
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_commit( &PANEL->dtypes[IBUF] );
+
+      return( ierr );
+   }
+   else
+   {
+/*
+ * Memory allocation failed -> abort
+ */
+      HPL_pabort( __LINE__, "HPL_packL", "Memory allocation failed" );
+      return( MPI_SUCCESS );    /* never executed (hopefully ...) */
+   }
+#endif
+#else
+          /* HPL_USE_MPI_DATATYPE not defined - Oops, there is a bug
+             somewhere, so, just in case  and until I find it ... */
+   return( MPI_SUCCESS );   
+#endif
+/*
+ * End of HPL_packL
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_recv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_recv.c
new file mode 100644
index 000000000..ff426891c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_recv.c
@@ -0,0 +1,142 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_recv
+(
+   double *                         RBUF,
+   int                              RCOUNT,
+   int                              SRC,
+   int                              RTAG,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_recv
+( RBUF, RCOUNT, SRC, RTAG, COMM )
+   double *                         RBUF;
+   int                              RCOUNT;
+   int                              SRC;
+   int                              RTAG;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_recv is a simple wrapper around  MPI_Recv.  Its  main  purpose is
+ * to  allow for some  experimentation / tuning  of this simple routine.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * HPL_SUCCESS.  In the case of messages of length less than or equal to
+ * zero, this function returns immediately.
+ *
+ * Arguments
+ * =========
+ *
+ * RBUF    (local output)                double *
+ *         On entry, RBUF specifies the starting address of buffer to be
+ *         received.
+ *
+ * RCOUNT  (local input)                 int
+ *         On entry,  RCOUNT  specifies  the number  of double precision
+ *         entries in RBUF. RCOUNT must be at least zero.
+ *
+ * SRC     (local input)                 int
+ *         On entry, SRC  specifies the rank of the  sending  process in
+ *         the communication space defined by COMM.
+ *
+ * RTAG    (local input)                 int
+ *         On entry,  STAG specifies the message tag to be used for this
+ *         communication operation.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Status                 status;
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type;
+#endif
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( RCOUNT <= 0 ) return( HPL_SUCCESS );
+
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Recv( (void *)(RBUF), 1, type, SRC, RTAG, COMM,
+                         &status );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_free( &type );
+#else
+   ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, SRC, RTAG,
+                    COMM, &status );
+#endif
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+/*
+ * End of HPL_recv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_sdrv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_sdrv.c
new file mode 100644
index 000000000..0b2363563
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_sdrv.c
@@ -0,0 +1,239 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_sdrv
+(
+   double *                         SBUF,
+   int                              SCOUNT,
+   int                              STAG,
+   double *                         RBUF,
+   int                              RCOUNT,
+   int                              RTAG,
+   int                              PARTNER,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_sdrv
+( SBUF, SCOUNT, STAG, RBUF, RCOUNT, RTAG, PARTNER, COMM )
+   double *                         SBUF;
+   int                              SCOUNT;
+   int                              STAG;
+   double *                         RBUF;
+   int                              RCOUNT;
+   int                              RTAG;
+   int                              PARTNER;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_sdrv is a simple wrapper around MPI_Sendrecv. Its main purpose is
+ * to allow for some experimentation and tuning of this simple function.
+ * Messages  of  length  less than  or  equal to zero  are not sent  nor
+ * received.  Successful completion  is  indicated by the returned error
+ * code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * SBUF    (local input)                 double *
+ *         On entry, SBUF specifies the starting address of buffer to be
+ *         sent.
+ *
+ * SCOUNT  (local input)                 int
+ *         On entry,  SCOUNT  specifies  the number  of double precision
+ *         entries in SBUF. SCOUNT must be at least zero.
+ *
+ * STAG    (local input)                 int
+ *         On entry,  STAG  specifies the message tag to be used for the
+ *         sending communication operation.
+ *
+ * RBUF    (local output)                double *
+ *         On entry, RBUF specifies the starting address of buffer to be
+ *         received.
+ *
+ * RCOUNT  (local input)                 int
+ *         On entry,  RCOUNT  specifies  the number  of double precision
+ *         entries in RBUF. RCOUNT must be at least zero.
+ *
+ * RTAG    (local input)                 int
+ *         On entry,  RTAG  specifies the message tag to be used for the
+ *         receiving communication operation.
+ *
+ * PARTNER (local input)                 int
+ *         On entry,  PARTNER  specifies  the rank of the  collaborative
+ *         process in the communication space defined by COMM.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type[2];
+#endif
+   MPI_Request                request;
+   MPI_Status                 status;
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( RCOUNT > 0 )
+   {
+      if( SCOUNT > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * Post asynchronous receive
+ */
+         ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( (void *)(RBUF), 1, type[0], PARTNER,
+                                RTAG, COMM, &request );
+/*
+ * Blocking send
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[1] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( (void *)(SBUF), 1, type[1], PARTNER,
+                               STAG, COMM );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[1] );
+/*
+ * Wait for the receive to complete
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[0] );
+#else
+/*
+ * Post asynchronous receive
+ */
+         ierr =      MPI_Irecv( (void *)(RBUF), RCOUNT, MPI_DOUBLE,
+                                PARTNER, RTAG, COMM, &request );
+/*
+ * Blocking send
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE,
+                               PARTNER, STAG, COMM );
+/*
+ * Wait for the receive to complete
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+#endif
+      }
+      else
+      {
+/*
+ * Blocking receive
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+         ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(RBUF), 1, type[0], PARTNER, RTAG,
+                               COMM, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[0] );
+#else
+         ierr =      MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE,
+                               PARTNER, RTAG, COMM, &status );
+#endif
+      }
+   }
+   else if( SCOUNT > 0 )
+   {
+/*
+ * Blocking send
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+      ierr =      MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_commit( &type[1] );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG,
+                          COMM );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_free( &type[1] ) );
+#else
+      ierr =      MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, PARTNER,
+                            STAG, COMM );
+#endif
+   }
+   else { ierr = MPI_SUCCESS; }
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+/*
+ * End of HPL_sdrv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_send.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_send.c
new file mode 100644
index 000000000..9e9868594
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_send.c
@@ -0,0 +1,139 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_send
+(
+   double *                         SBUF,
+   int                              SCOUNT,
+   int                              DEST,
+   int                              STAG,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_send
+( SBUF, SCOUNT, DEST, STAG, COMM )
+   double *                         SBUF;
+   int                              SCOUNT;
+   int                              DEST;
+   int                              STAG;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_send is a simple wrapper around  MPI_Send.  Its  main  purpose is
+ * to  allow for some  experimentation / tuning  of this simple routine.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * MPI_SUCCESS.  In the case of messages of length less than or equal to
+ * zero, this function returns immediately.
+ *
+ * Arguments
+ * =========
+ *
+ * SBUF    (local input)                 double *
+ *         On entry, SBUF specifies the starting address of buffer to be
+ *         sent.
+ *
+ * SCOUNT  (local input)                 int
+ *         On entry,  SCOUNT  specifies  the number of  double precision
+ *         entries in SBUF. SCOUNT must be at least zero.
+ *
+ * DEST    (local input)                 int
+ *         On entry, DEST specifies the rank of the receiving process in
+ *         the communication space defined by COMM.
+ *
+ * STAG    (local input)                 int
+ *         On entry,  STAG specifies the message tag to be used for this
+ *         communication operation.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type;
+#endif
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( SCOUNT <= 0 ) return( HPL_SUCCESS );
+
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr =      MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Send( (void *)(SBUF), 1, type, DEST, STAG, COMM );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_free( &type );
+#else
+   ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, DEST, STAG, COMM );
+#endif
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); 
+/*
+ * End of HPL_send
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_1rinM.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_1rinM.o
new file mode 100644
index 000000000..3efb2c0bc
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_1rinM.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_1ring.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_1ring.o
new file mode 100644
index 000000000..ae90bde67
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_1ring.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_2rinM.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_2rinM.o
new file mode 100644
index 000000000..0d3a84021
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_2rinM.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_2ring.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_2ring.o
new file mode 100644
index 000000000..6994ef5fb
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_2ring.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_bcast.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_bcast.o
new file mode 100644
index 000000000..41728e2ef
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_bcast.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_binit.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_binit.o
new file mode 100644
index 000000000..e9127505b
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_binit.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_blonM.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_blonM.o
new file mode 100644
index 000000000..da8414559
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_blonM.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_blong.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_blong.o
new file mode 100644
index 000000000..52b677450
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_blong.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_bwait.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_bwait.o
new file mode 100644
index 000000000..2f5b30aa7
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_bwait.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_copyL.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_copyL.o
new file mode 100644
index 000000000..7db34d0b4
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_copyL.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_packL.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_packL.o
new file mode 100644
index 000000000..4561f82f0
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_packL.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_recv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_recv.o
new file mode 100644
index 000000000..1cd54ade4
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_recv.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_sdrv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_sdrv.o
new file mode 100644
index 000000000..8d188a0ec
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_sdrv.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_send.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_send.o
new file mode 100644
index 000000000..6f242b1ed
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_send.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/Make.inc
new file mode 120000
index 000000000..8547ec814
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/Make.inc
@@ -0,0 +1 @@
+/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/Makefile
new file mode 100644
index 000000000..529fe9aea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/Makefile
@@ -0,0 +1,111 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_pmisc.h   $(INCdir)/hpl_grid.h \
+   $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_comobj       = \
+   HPL_1ring.o            HPL_1rinM.o            HPL_2ring.o            \
+   HPL_2rinM.o            HPL_blong.o            HPL_blonM.o            \
+   HPL_packL.o            HPL_copyL.o            HPL_binit.o            \
+   HPL_bcast.o            HPL_bwait.o            HPL_send.o             \
+   HPL_recv.o             HPL_sdrv.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_comobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_comobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_1ring.o            : ../HPL_1ring.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_1ring.c
+HPL_1rinM.o            : ../HPL_1rinM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_1rinM.c
+HPL_2ring.o            : ../HPL_2ring.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_2ring.c
+HPL_2rinM.o            : ../HPL_2rinM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_2rinM.c
+HPL_blong.o            : ../HPL_blong.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_blong.c
+HPL_blonM.o            : ../HPL_blonM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_blonM.c
+HPL_packL.o            : ../HPL_packL.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_packL.c
+HPL_copyL.o            : ../HPL_copyL.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_copyL.c
+HPL_binit.o            : ../HPL_binit.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_binit.c
+HPL_bcast.o            : ../HPL_bcast.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_bcast.c
+HPL_bwait.o            : ../HPL_bwait.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_bwait.c
+HPL_send.o             : ../HPL_send.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_send.c
+HPL_recv.o             : ../HPL_recv.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_recv.c
+HPL_sdrv.o             : ../HPL_sdrv.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_sdrv.c
+#
+# ######################################################################
+# 
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/Makefile
new file mode 100644
index 000000000..93f18ebb3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/Makefile
@@ -0,0 +1,118 @@
+#    /* 
+#  * -- High Performance Computing Linpack Benchmark (HPL)                
+#  *    Modifications Copyright (C) 2023 Intel Corporation​
+#  *                                                                      
+#  * -- Copyright notice and Licensing terms:                             
+#  *                                                                      
+#  * Redistribution  and  use in  source and binary forms, with or without
+#  * modification, are  permitted provided  that the following  conditions
+#  * are met:                                                             
+#  *                                                                      
+#  * 1. Redistributions  of  source  code  must retain the above copyright
+#  * notice, this list of conditions and the following disclaimer.        
+#  *                                                                      
+#  * 2. Redistributions in binary form must reproduce  the above copyright
+#  * notice, this list of conditions,  and the following disclaimer in the
+#  * documentation and/or other materials provided with the distribution. 
+#  *                                                                      
+#  * 3. All  advertising  materials  mentioning  features  or  use of this
+#  * software must display the following acknowledgement:                 
+#  * This  product  includes  software  developed  at  the  University  of
+#  * Tennessee, Knoxville, Innovative Computing Laboratory.             
+#  *                                                                      
+#  * 4. The name of the  University,  the name of the  Laboratory,  or the
+#  * names  of  its  contributors  may  not  be used to endorse or promote
+#  * products  derived   from   this  software  without  specific  written
+#  * permission.                                                          
+#  *                                                                      
+#  * -- Disclaimer:                                                       
+#  *                                                                      
+#  * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+#  * ---------------------------------------------------------------------
+#  */ 
+
+# /* 
+#  * -- High Performance Computing Linpack Benchmark (HPL)                
+#  *    HPL - 2.3 - December 2, 2018                          
+#  *    Antoine P. Petitet                                                
+#  *    University of Tennessee, Knoxville                                
+#  *    Innovative Computing Laboratory                                 
+#  *    (C) Copyright 2000-2008 All Rights Reserved                       
+#  *                                                                      
+#  * -- Copyright notice and Licensing terms:                             
+#  *                                                                      
+#  * Redistribution  and  use in  source and binary forms, with or without
+#  * modification, are  permitted provided  that the following  conditions
+#  * are met:                                                             
+#  *                                                                      
+#  * 1. Redistributions  of  source  code  must retain the above copyright
+#  * notice, this list of conditions and the following disclaimer.        
+#  *                                                                      
+#  * 2. Redistributions in binary form must reproduce  the above copyright
+#  * notice, this list of conditions,  and the following disclaimer in the
+#  * documentation and/or other materials provided with the distribution. 
+#  *                                                                      
+#  * 3. All  advertising  materials  mentioning  features  or  use of this
+#  * software must display the following acknowledgement:                 
+#  * This  product  includes  software  developed  at  the  University  of
+#  * Tennessee, Knoxville, Innovative Computing Laboratory.             
+#  *                                                                      
+#  * 4. The name of the  University,  the name of the  Laboratory,  or the
+#  * names  of  its  contributors  may  not  be used to endorse or promote
+#  * products  derived   from   this  software  without  specific  written
+#  * permission.                                                          
+#  *                                                                      
+#  * -- Disclaimer:                                                       
+#  *                                                                      
+#  * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+#  * ---------------------------------------------------------------------
+#  */ 
+
+all: libdgemm.so.1.0.1
+
+OBJS = cuda_dgemm.o 
+
+.PRECIOUS: $(OBJS)
+
+all : libdgemm.so.1.0.1 
+
+fermi_dgemm.o : fermi_dgemm.c fermi_dgemm.h 
+
+DEFINES = -DMPI
+#DEFINES += -DUSE_FERMI_DGEMM
+#DEFINES += -DVERBOSE_PRINT
+#DEFINES += -DACML
+#DEFINES += -DGOTO
+
+%.o: %.cpp
+	mpicc -O0 -c -fPIC $(DEFINES) $*.cpp -o $*.o -I/usr/local/cuda/include
+
+libdgemm.so.1.0.1: $(OBJS)
+
+	mpicc -O3 -shared -Wl,-soname,libdgemm.so.1 -o libdgemm.so.1.0.1 $(OBJS) -L/usr/local/cuda/lib64 -lcudart -lcuda -lcublas
+	ln -sf libdgemm.so.1.0.1 libdgemm.so.1.0
+	ln -sf libdgemm.so.1.0 libdgemm.so.1
+	ln -sf libdgemm.so.1 libdgemm.so
+
+clean:
+	rm -f $(OBJS) $(CUBINS) libdgemm.so.1.0.1 libdgemm.so.1.0 libdgemm.so.1 libdgemm.so
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp
new file mode 100644
index 000000000..50b2c4b90
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp
@@ -0,0 +1,305 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+
+#define NUMBER_OF_STREAMS 4
+#define CHUNK_SIZE 512
+#define NN 64
+#define NM 128
+#define ERRCODE(e) (-(__LINE__ * 1000 + (e)))
+//#define DEVICE_DEBUG
+//#ifdef MPI
+//#include <mpi.h>
+//#endif
+
+
+#define _GNU_SOURCE
+
+#define CUDA_ERROR_CHECK
+#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ )
+#define CudaCheckError()    __cudaCheckError( __FILE__, __LINE__ )
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <ctype.h>
+#include <math.h>
+#include <array>
+
+#include <time.h>
+#include <sys/types.h>
+#include <sys/times.h>
+#include <sys/time.h>
+
+#include <iostream>
+#include <chrono> 
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <cublas.h>
+#include "mkl.h"
+
+
+extern "C" {
+
+
+   
+
+inline void __cudaSafeCall( cudaError err, const char *file, const int line )
+{
+    #ifdef CUDA_ERROR_CHECK
+        if ( cudaSuccess != err )
+        {
+            fprintf( stderr, "cudaSafeCall() failed at %s:%i : %s\n",
+                 file, line, cudaGetErrorString( err ) );
+            exit( -1 );
+        }
+    #endif
+
+    return;
+}
+
+inline void __cudaCheckError( const char *file, const int line )
+{
+    #ifdef CUDA_ERROR_CHECK
+        cudaError err = cudaGetLastError();
+        if ( cudaSuccess != err )
+        {
+            fprintf( stderr, "cudaCheckError() failed at %s:%i : %s\n",
+                 file, line, cudaGetErrorString( err ) );
+            exit( -1 );
+        }
+
+        // More careful checking. However, this will affect performance.
+        // Comment away if needed.
+        err = cudaDeviceSynchronize();
+        if( cudaSuccess != err )
+        {
+            fprintf( stderr, "cudaCheckError() with sync failed at %s:%i : %s\n",
+                 file, line, cudaGetErrorString( err ) );
+            exit( -1 );
+        }
+    #endif
+
+    return;
+}
+
+
+
+    void dpcpp_dgemm 
+        (   const int ORDER,
+            const int TRANSA,   const int TRANSB,
+            const int M,        const int N,        const int K,       
+            const double ALPHA, const double *A,    const int LDA,
+            const double *B,    const int LDB,      const double BETA,    
+            double *C,          const int LDC);
+
+    void dpcpp_dtrsm(
+       int HPL_ORDER,
+       int HPL_SIDE,
+       int HPL_UPLO,
+       int HPL_TRANS,
+       int HPL_DIAG,
+       const int,
+       const int,
+       const double,
+       const double *,
+       const int,
+       double *,
+       const int);
+}
+
+
+void dpcpp_dgemm 
+(   const int ORDER,   const int TRANSA,    const int TRANSB,       
+    const int M,       const int N,         const int K,       
+    const double ALPHA,const double *A,     const int LDA,
+    const double *B,   const int LDB,       
+    const double BETA, double *C,         const int LDC)
+{
+
+    if ((M==0)||(K==0)||(N==0)){
+	    return;
+    }
+
+    
+    if ( (N) < NN || (M) < NM || (K) < 128){ 
+         
+         #ifdef DEVICE_DEBUG
+            std::cout << "dgemm-Running on CPU" << std::endl; 
+         #endif
+          
+         cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,  M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC);
+          return;
+    }    
+
+    
+    #ifdef DEVICE_DEBUG
+            std::cout << "dgemm-Running on GPU" << std::endl; 
+    #endif
+
+    double *devPtrA, *devPtrB, *devPtrC;
+    int status;
+    
+    CudaSafeCall(cudaMalloc((void **)&devPtrA, K * LDA *sizeof(double)));
+    CudaSafeCall(cudaMemcpy(devPtrA, &A[0], K * LDA *sizeof(double), cudaMemcpyHostToDevice));
+    
+    CudaSafeCall(cudaMalloc((void **)&devPtrB, N *  LDB *sizeof(double)));
+    CudaSafeCall(cudaMemcpy(devPtrB, &B[0], N *  LDB *sizeof(double), cudaMemcpyHostToDevice));
+    
+    CudaSafeCall(cudaMalloc((void **)&devPtrC, N * LDC *sizeof(double)));
+    CudaSafeCall(cudaMemcpy(devPtrC, &C[0], N * LDC *sizeof(double), cudaMemcpyHostToDevice));
+    
+
+    cudaDeviceSynchronize();
+    cublasDgemm('N', 'N', M, N, K, ALPHA, devPtrA, LDA, devPtrB, LDB, BETA, devPtrC, LDC);
+    cudaDeviceSynchronize();
+    CudaSafeCall(cudaMemcpy(&C[0], devPtrC, N * LDC *sizeof(double), cudaMemcpyDeviceToHost));
+    cudaDeviceSynchronize(); 
+    cudaFree(devPtrA);
+    cudaFree(devPtrB);
+    cudaFree(devPtrC);
+}
+  
+void dpcpp_dtrsm
+
+(  const int ORDER,           const int SIDE,
+   const int UPLO,            const int TRANS,
+   const int DIAG,            const int M,       const int N,
+   const double ALPHA,    const double* A,  const int LDA,       double* B,
+   const int LDB)
+{
+
+  	if ((M==0)||(N==0)){
+        	return;
+  	}
+
+    double *devPtrA, *devPtrB;	
+    int status;	
+
+    
+    if ( (M) < 512 || (N) < 2*(M)){
+        #ifdef DEVICE_DEBUG
+            std::cout << "dtrsm-Running on CPU" << std::endl; 
+        #endif
+ 	    cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, M, N, ALPHA, A, LDA, B, LDB);
+    
+    
+    	return;
+    } 
+       
+    #ifdef DEVICE_DEBUG
+            std::cout << "dtrsm-Running on GPU" << std::endl; 
+    #endif
+
+
+    CudaSafeCall(cudaMalloc((void **)&devPtrA, M * LDA * sizeof(double)));
+    CudaSafeCall(cudaMemcpy(devPtrA, A, M * LDA * sizeof(double), cudaMemcpyHostToDevice));   	
+    
+        
+    CudaSafeCall(cudaMalloc((void **)&devPtrB, N * LDB * sizeof(double)));
+    CudaSafeCall(cudaMemcpy(devPtrB, B, N * LDB * sizeof(double), cudaMemcpyHostToDevice));
+    cudaDeviceSynchronize(); 
+   
+    cublasDtrsm('L','L','N','U',M,N,ALPHA,devPtrA,LDA,devPtrB,LDB);
+    
+    cudaDeviceSynchronize();     
+    CudaSafeCall(cudaMemcpy(B, devPtrB, N * LDB * sizeof(double), cudaMemcpyDeviceToHost));
+    
+    cudaDeviceSynchronize();
+    cudaFree(devPtrA);
+    cudaFree(devPtrB);
+  
+        
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.h
new file mode 100644
index 000000000..aa3008f94
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.h
@@ -0,0 +1,148 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+
+#define NUMBER_OF_STREAMS 2
+
+#include <iostream>
+#include <sycl/sycl.hpp>
+#include <array>
+
+class DeviceManager;
+static DeviceManager *instance[2];
+
+class DeviceManager{
+    cl::sycl::device *m_pDevice; 
+    cl::sycl::queue queues[NUMBER_OF_STREAMS]; 
+    
+    DeviceManager(){ 
+        try{
+              m_pDevice = new cl::sycl::device(cl::sycl::default_selector());
+        }catch(...){
+            std::cout << "ERROR: failed to create sycl device.\n";
+        }   
+
+        auto exception_handler = [] (cl::sycl::exception_list exceptions) {
+            for (std::exception_ptr const& e : exceptions) {
+                try {
+                    std::rethrow_exception(e);
+                } catch(cl::sycl::exception const& e) {
+                    std::cout << "Caught asynchronous SYCL exception during GEMM:\n"
+                        << e.what() << std::endl;
+                }
+            }
+        };
+        
+       
+        	
+        queues[0] = cl::sycl::queue(*m_pDevice, exception_handler);
+        queues[1] = cl::sycl::queue(*m_pDevice, exception_handler);   
+        //DeviceManager::display_device_properties(*m_pDevice);
+        //std::cout << "Done\n"; 
+
+    }
+    public:
+
+    static DeviceManager* getInstance(int mpi_id){
+        if(!instance[mpi_id]){
+           
+            std::cout << "Creating device for " << mpi_id << "\n"; 
+            instance[mpi_id] = new DeviceManager();
+            
+        }
+        return instance[mpi_id];
+    }
+
+    cl::sycl::device &getDevice(){ return *m_pDevice;}
+    cl::sycl::queue *getQueues(){ return queues;}
+
+    static void display_device_properties(cl::sycl::device const &dev);
+};
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.o
new file mode 100644
index 000000000..52546727c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so
new file mode 120000
index 000000000..505c044bb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so
@@ -0,0 +1 @@
+libdgemm.so.1
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1
new file mode 120000
index 000000000..ab21c8005
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1
@@ -0,0 +1 @@
+libdgemm.so.1.0
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1.0 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1.0
new file mode 120000
index 000000000..d08629732
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1.0
@@ -0,0 +1 @@
+libdgemm.so.1.0.1
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1.0.1 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1.0.1
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_all_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_all_reduce.c
new file mode 100644
index 000000000..776f48504
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_all_reduce.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_all_reduce
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const HPL_T_OP                   OP,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_all_reduce
+( BUFFER, COUNT, DTYPE, OP, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const HPL_T_OP                   OP;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_all_reduce performs   a   global   reduce  operation  across  all
+ * processes of a group leaving the results on all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/global output)   void *
+ *         On entry,  BUFFER  points to  the  buffer to be combined.  On
+ *         exit, this array contains the combined data and  is identical
+ *         on all processes in the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * OP      (global input)                const HPL_T_OP 
+ *         On entry, OP is a pointer to the local combine function.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr;
+/* ..
+ * .. Executable Statements ..
+ */
+   hplerr = HPL_reduce(   BUFFER, COUNT, DTYPE, OP, 0, COMM );
+   if( hplerr != MPI_SUCCESS ) return( hplerr );
+   return( HPL_broadcast( BUFFER, COUNT, DTYPE,     0, COMM ) );
+/*
+ * End of HPL_all_reduce
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_barrier.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_barrier.c
new file mode 100644
index 000000000..9a5d9b10a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_barrier.c
@@ -0,0 +1,90 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_barrier
+(
+   MPI_Comm                         COMM
+)
+#else
+int HPL_barrier
+( COMM )
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_barrier blocks the caller until all process members have call it.
+ * The  call  returns  at any process  only after all group members have
+ * entered the call.
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   return( HPL_broadcast( (void*)(&i), 1, HPL_INT, 0, COMM ) );
+/*
+ * End of HPL_barrier
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_broadcast.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_broadcast.c
new file mode 100644
index 000000000..42d962864
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_broadcast.c
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_broadcast
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const int                        ROOT,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_broadcast
+( BUFFER, COUNT, DTYPE, ROOT, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const int                        ROOT;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_broadcast broadcasts  a message from the process with rank ROOT to
+ * all processes in the group.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/output)          void *
+ *         On entry,  BUFFER  points to  the  buffer to be broadcast. On
+ *         exit, this array contains the broadcast data and is identical
+ *         on all processes in the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ROOT    (global input)                const int
+ *         On entry, ROOT is the coordinate of the source process.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr=MPI_SUCCESS, ip2=1, kk, mask=1, 
+                              mpierr, mydist, partner, rank, size, 
+                              tag = MSGID_BEGIN_COLL;
+   MPI_Status                 status;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( COUNT <= 0 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_size( COMM, &size ); if( size <= 1 ) return( mpierr );
+   mpierr = MPI_Comm_rank( COMM, &rank );
+
+   kk = size - 1;
+   while( kk > 1 ) { kk >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   mydist = MModSub( rank, ROOT, size );
+
+   do
+   {
+      mask ^= ip2;
+      if( ( mydist & mask ) == 0 )
+      {
+         partner = mydist ^ ip2;
+
+         if( mydist & ip2 )
+         {
+            partner = MModAdd( ROOT, partner, size );
+            mpierr  = MPI_Recv(  BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                 partner, tag, COMM, &status );
+         }
+         else if( partner < size )
+         {
+            partner = MModAdd( ROOT, partner, size );
+            mpierr  = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                partner, tag, COMM );
+         }
+         if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      }
+      ip2 >>= 1;
+   } while( ip2 );
+
+   return( hplerr );
+/*
+ * End of HPL_broadcast
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_exit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_exit.c
new file mode 100644
index 000000000..f0d00b065
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_exit.c
@@ -0,0 +1,109 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_exit
+(
+   HPL_T_grid *                     GRID
+)
+#else
+int HPL_grid_exit
+( GRID )
+   HPL_T_grid *                     GRID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_exit marks  the process  grid object for  deallocation.  The
+ * returned  error  code  MPI_SUCCESS  indicates  successful completion.
+ * Other error codes are (MPI) implementation dependent.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input/output)          HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid to be released.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr = MPI_SUCCESS, mpierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( GRID->all_comm != MPI_COMM_NULL )
+   {
+      mpierr = MPI_Comm_free( &(GRID->row_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      mpierr = MPI_Comm_free( &(GRID->col_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      mpierr = MPI_Comm_free( &(GRID->all_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+   }
+
+   GRID->order   = HPL_COLUMN_MAJOR;
+
+   GRID->iam     = GRID->myrow    = GRID->mycol     = -1;
+   GRID->nprow   = GRID->npcol    = GRID->nprocs    = -1;
+
+   GRID->row_ip2 = GRID->row_hdim = GRID->row_ip2m1 = GRID->row_mask = -1;
+   GRID->col_ip2 = GRID->col_hdim = GRID->col_ip2m1 = GRID->col_mask = -1;
+
+   return( hplerr );
+/*
+ * End of HPL_grid_exit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_info.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_info.c
new file mode 100644
index 000000000..95c5a7315
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_info.c
@@ -0,0 +1,116 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_info
+(
+   const HPL_T_grid *               GRID,
+   int *                            NPROW,
+   int *                            NPCOL,
+   int *                            MYROW,
+   int *                            MYCOL
+)
+#else
+int HPL_grid_info
+( GRID, NPROW, NPCOL, MYROW, MYCOL )
+   const HPL_T_grid *               GRID;
+   int *                            NPROW;
+   int *                            NPCOL;
+   int *                            MYROW;
+   int *                            MYCOL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_info returns  the grid shape and the coordinates in the grid
+ * of the calling process.  Successful  completion  is  indicated by the
+ * returned error code  MPI_SUCCESS. Other error codes depend on the MPI
+ * implementation.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * NPROW   (global output)               int *
+ *         On exit,   NPROW  specifies the number of process rows in the
+ *         grid. NPROW is at least one.
+ *
+ * NPCOL   (global output)               int *
+ *         On exit,   NPCOL  specifies  the number of process columns in
+ *         the grid. NPCOL is at least one.
+ *
+ * MYROW   (global output)               int *
+ *         On exit,  MYROW  specifies my  row process  coordinate in the
+ *         grid. MYROW is greater than or equal  to zero  and  less than
+ *         NPROW.
+ *
+ * MYCOL   (global output)               int *
+ *         On exit,  MYCOL specifies my column process coordinate in the
+ *         grid. MYCOL is greater than or equal  to zero  and  less than
+ *         NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   *NPROW = GRID->nprow; *NPCOL = GRID->npcol;
+   *MYROW = GRID->myrow; *MYCOL = GRID->mycol;
+   return( MPI_SUCCESS );
+/*
+ * End of HPL_grid_info
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_init.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_init.c
new file mode 100644
index 000000000..52111ac52
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_init.c
@@ -0,0 +1,184 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_init
+(
+   MPI_Comm                         COMM,
+   const HPL_T_ORDER                ORDER,
+   const int                        NPROW,
+   const int                        NPCOL,
+   HPL_T_grid *                     GRID
+)
+#else
+int HPL_grid_init
+( COMM, ORDER, NPROW, NPCOL, GRID )
+   MPI_Comm                         COMM;
+   const HPL_T_ORDER                ORDER;
+   const int                        NPROW;
+   const int                        NPCOL;
+   HPL_T_grid *                     GRID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_init creates a NPROW x NPCOL  process  grid using column- or
+ * row-major ordering from an initial collection of processes identified
+ * by an  MPI  communicator.  Successful  completion is indicated by the
+ * returned error code MPI_SUCCESS.  Other error codes depend on the MPI
+ * implementation. The coordinates of processes that are not part of the
+ * grid are set to values outside of [0..NPROW) x [0..NPCOL).
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         On entry,  COMM  is  the  MPI  communicator  identifying  the
+ *         initial  collection  of  processes out of which  the  grid is
+ *         formed.
+ *
+ * ORDER   (global input)                const HPL_T_ORDER
+ *         On entry, ORDER specifies how the processes should be ordered
+ *         in the grid as follows:
+ *            ORDER = HPL_ROW_MAJOR    row-major    ordering;
+ *            ORDER = HPL_COLUMN_MAJOR column-major ordering;
+ *
+ * NPROW   (global input)                const int
+ *         On entry,  NPROW  specifies the number of process rows in the
+ *         grid to be created. NPROW must be at least one.
+ *
+ * NPCOL   (global input)                const int
+ *         On entry,  NPCOL  specifies  the number of process columns in
+ *         the grid to be created. NPCOL must be at least one.
+ *
+ * GRID    (local input/output)          HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information to be initialized.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hdim, hplerr=MPI_SUCCESS, ierr, ip2, k,
+                              mask, mycol, myrow, nprocs, rank, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Comm_rank( COMM, &rank ); MPI_Comm_size( COMM, &size );
+/*
+ * Abort if illegal process grid
+ */
+   nprocs = NPROW * NPCOL;
+   if( ( nprocs > size ) || ( NPROW < 1 ) || ( NPCOL < 1 ) )
+   { HPL_pabort( __LINE__, "HPL_grid_init", "Illegal Grid" ); }
+/*
+ * Row- or column-major ordering of the processes
+ */
+   if( ORDER == HPL_ROW_MAJOR )
+   {
+      GRID->order = HPL_ROW_MAJOR;
+      myrow = rank / NPCOL; mycol = rank - myrow * NPCOL;
+   }
+   else
+   {
+      GRID->order = HPL_COLUMN_MAJOR;
+      mycol = rank / NPROW; myrow = rank - mycol * NPROW;
+   }
+   GRID->iam   = rank;  GRID->myrow = myrow; GRID->mycol  = mycol;
+   GRID->nprow = NPROW; GRID->npcol = NPCOL; GRID->nprocs = nprocs;
+/*
+ * row_ip2   : largest power of two <= nprow;
+ * row_hdim  : row_ip2 procs hypercube dim;
+ * row_ip2m1 : largest power of two <= nprow-1;
+ * row_mask  : row_ip2m1 procs hypercube mask;
+ */
+   hdim = 0; ip2 = 1; k = NPROW;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; }
+   GRID->row_ip2 = ip2; GRID->row_hdim = hdim; 
+
+   mask = ip2 = 1;    k = NPROW - 1;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   GRID->row_ip2m1 = ip2; GRID->row_mask = mask; 
+/*
+ * col_ip2   : largest power of two <= npcol;
+ * col_hdim  : col_ip2 procs hypercube dim;
+ * col_ip2m1 : largest power of two <= npcol-1;
+ * col_mask  : col_ip2m1 procs hypercube mask;
+ */
+   hdim = 0; ip2 = 1; k = NPCOL;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; }
+   GRID->col_ip2 = ip2; GRID->col_hdim = hdim; 
+
+   mask = ip2 = 1;    k = NPCOL - 1;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   GRID->col_ip2m1 = ip2; GRID->col_mask = mask; 
+/*
+ * All communicator, leave if I am not part of this grid. Creation of the
+ * row- and column communicators.
+ */
+   ierr = MPI_Comm_split( COMM, ( rank < nprocs ? 0 : MPI_UNDEFINED ),
+                          rank, &(GRID->all_comm) );
+   if( GRID->all_comm == MPI_COMM_NULL ) return( ierr );
+
+   ierr = MPI_Comm_split( GRID->all_comm, myrow, mycol, &(GRID->row_comm) );
+   if( ierr != MPI_SUCCESS ) hplerr = ierr;
+
+   ierr = MPI_Comm_split( GRID->all_comm, mycol, myrow, &(GRID->col_comm) );
+   if( ierr != MPI_SUCCESS ) hplerr = ierr;
+
+   return( hplerr );
+/*
+ * End of HPL_grid_init
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_max.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_max.c
new file mode 100644
index 000000000..002aabe01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_max.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_max
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_max
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_max combines (max) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] );
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] );
+   }
+/*
+ * End of HPL_max
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_min.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_min.c
new file mode 100644
index 000000000..a99e5e58a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_min.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_min
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_min
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_min combines (min) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] );
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] );
+   }
+/*
+ * End of HPL_min
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_pnum.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_pnum.c
new file mode 100644
index 000000000..c80885b9a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_pnum.c
@@ -0,0 +1,103 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pnum
+(
+   const HPL_T_grid *               GRID,
+   const int                        MYROW,
+   const int                        MYCOL
+)
+#else
+int HPL_pnum
+( GRID, MYROW, MYCOL )
+   const HPL_T_grid *               GRID;
+   const int                        MYROW;
+   const int                        MYCOL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pnum determines  the  rank  of a  process  as a function  of  its
+ * coordinates in the grid.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * MYROW   (local input)                 const int
+ *         On entry,  MYROW  specifies the row coordinate of the process
+ *         whose rank is to be determined. MYROW must be greater than or
+ *         equal to zero and less than NPROW.
+ *
+ * MYCOL   (local input)                 const int
+ *         On entry,  MYCOL  specifies  the  column  coordinate  of  the
+ *         process whose rank is to be determined. MYCOL must be greater
+ *         than or equal to zero and less than NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   if( GRID->order == HPL_ROW_MAJOR )
+      return( MYROW * GRID->npcol + MYCOL );
+   else
+      return( MYCOL * GRID->nprow + MYROW );
+/*
+ * End of HPL_pnum
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_reduce.c
new file mode 100644
index 000000000..417c21163
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_reduce.c
@@ -0,0 +1,179 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_reduce
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const HPL_T_OP                   OP,
+   const int                        ROOT,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_reduce
+( BUFFER, COUNT, DTYPE, OP, ROOT, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const HPL_T_OP                   OP;
+   const int                        ROOT;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_reduce performs a global reduce operation across all processes of
+ * a group.  Note that the input buffer is  used as workarray and in all
+ * processes but the accumulating process corrupting the original data.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/output)          void *
+ *         On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+ *         exit,  and  in process of rank  ROOT  this array contains the
+ *         reduced data.  This  buffer  is also used as workspace during
+ *         the operation in the other processes of the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * OP      (global input)                const HPL_T_OP 
+ *         On entry, OP is a pointer to the local combine function.
+ *
+ * ROOT    (global input)                const int
+ *         On entry, ROOT is the coordinate of the accumulating process.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Status                 status;
+   void                       * buffer = NULL;
+   int                        hplerr=MPI_SUCCESS, d=1, i, ip2=1, mask=0,
+                              mpierr, mydist, partner, rank, size, 
+                              tag = MSGID_BEGIN_COLL;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( COUNT <= 0 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_size( COMM, &size );
+   if( size  == 1 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_rank( COMM, &rank );
+   i = size - 1; while( i > 1 ) { i >>= 1; d++; }
+
+   if( DTYPE == HPL_INT )
+      buffer = (void *)( (int *)   malloc( (size_t)(COUNT) * 
+                                           sizeof( int    ) ) );
+   else
+      buffer = (void *)( (double *)malloc( (size_t)(COUNT) *
+                                           sizeof( double ) ) );
+
+   if( !( buffer ) )
+   { HPL_pabort( __LINE__, "HPL_reduce", "Memory allocation failed" ); }
+
+   if( ( mydist = MModSub( rank, ROOT, size ) ) == 0 )
+   {
+      do
+      {
+         mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                            MModAdd( ROOT, ip2, size ), tag, COMM,
+                            &status );
+         if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+         OP( COUNT, buffer, BUFFER, DTYPE );
+         ip2 <<= 1; d--;
+      } while( d );
+   }
+   else
+   {
+      do
+      {
+         if( ( mydist & mask ) == 0 )
+         {
+            partner = mydist ^ ip2;
+
+            if( mydist & ip2 )
+            {
+               partner = MModAdd( ROOT, partner, size );
+               mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                  partner, tag, COMM );
+            }
+            else if( partner < size )
+            {
+               partner = MModAdd( ROOT, partner, size );
+               mpierr  = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                   partner, tag, COMM, &status );
+               OP( COUNT, buffer, BUFFER, DTYPE );
+            }
+            if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+         }
+         mask ^= ip2; ip2 <<= 1; d--;
+      } while( d );
+   }
+   if( buffer ) free( buffer );
+
+   return( hplerr );
+/*
+ * End of HPL_reduce
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_sum.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_sum.c
new file mode 100644
index 000000000..34cf87210
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_sum.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_sum
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_sum
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_sum combines (sum) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] += a[i];
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] += a[i];
+   }
+/*
+ * End of HPL_sum
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_all_reduce.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_all_reduce.o
new file mode 100644
index 000000000..ac0f38d00
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_all_reduce.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_barrier.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_barrier.o
new file mode 100644
index 000000000..b842da4f7
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_barrier.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_broadcast.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_broadcast.o
new file mode 100644
index 000000000..8a9fc0a30
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_broadcast.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_exit.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_exit.o
new file mode 100644
index 000000000..9be5641c9
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_exit.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_info.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_info.o
new file mode 100644
index 000000000..2d6495818
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_info.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_init.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_init.o
new file mode 100644
index 000000000..596e96b7c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_init.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_max.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_max.o
new file mode 100644
index 000000000..0e92eb194
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_max.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_min.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_min.o
new file mode 100644
index 000000000..8c64b221a
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_min.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_pnum.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_pnum.o
new file mode 100644
index 000000000..8da27eae3
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_pnum.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_reduce.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_reduce.o
new file mode 100644
index 000000000..a758f26e7
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_reduce.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_sum.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_sum.o
new file mode 100644
index 000000000..e4fafa0e1
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_sum.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/Make.inc
new file mode 120000
index 000000000..8547ec814
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/Make.inc
@@ -0,0 +1 @@
+/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/Makefile
new file mode 100644
index 000000000..51549d817
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/Makefile
@@ -0,0 +1,103 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h
+#
+## Object files ########################################################
+#
+HPL_griobj       = \
+   HPL_grid_init.o        HPL_pnum.o             HPL_grid_info.o        \
+   HPL_grid_exit.o        HPL_broadcast.o        HPL_reduce.o           \
+   HPL_all_reduce.o       HPL_barrier.o          HPL_min.o              \
+   HPL_max.o              HPL_sum.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_griobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_griobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_grid_init.o        : ../HPL_grid_init.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_init.c
+HPL_pnum.o             : ../HPL_pnum.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pnum.c
+HPL_grid_info.o        : ../HPL_grid_info.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_info.c
+HPL_grid_exit.o        : ../HPL_grid_exit.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_exit.c
+HPL_broadcast.o        : ../HPL_broadcast.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_broadcast.c
+HPL_reduce.o           : ../HPL_reduce.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_reduce.c
+HPL_all_reduce.o       : ../HPL_all_reduce.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_all_reduce.c
+HPL_barrier.o          : ../HPL_barrier.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_barrier.c
+HPL_min.o              : ../HPL_min.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_min.c
+HPL_max.o              : ../HPL_max.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_max.c
+HPL_sum.o              : ../HPL_sum.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_sum.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_disp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_disp.c
new file mode 100644
index 000000000..757dad242
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_disp.c
@@ -0,0 +1,97 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pdpanel_disp
+(
+   HPL_T_panel * *                  PANEL
+)
+#else
+int HPL_pdpanel_disp
+( PANEL )
+   HPL_T_panel * *                  PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_disp deallocates  the  panel  structure  and  resources  and
+ * stores the error code returned by the panel factorization.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel * *
+ *         On entry,  PANEL  points  to  the  address  of the panel data
+ *         structure to be deallocated.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        mpierr;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Deallocate the panel resources and panel structure
+ */
+   mpierr = HPL_pdpanel_free( *PANEL );
+   if( *PANEL ) free( *PANEL );
+   *PANEL = NULL;
+
+   return( mpierr );
+/*
+ * End of HPL_pdpanel_disp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_free.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_free.c
new file mode 100644
index 000000000..38b5b0d97
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_free.c
@@ -0,0 +1,104 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pdpanel_free
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_pdpanel_free
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_free deallocates  the panel resources  and  stores the error
+ * code returned by the panel factorization.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points  to  the  panel data  structure from
+ *         which the resources should be deallocated.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->pmat->info == 0 ) PANEL->pmat->info = *(PANEL->DINFO);
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( PANEL->L1block, VSIP_TRUE );
+   (void) vsip_blockrelease_d( PANEL->L2block, VSIP_TRUE );
+   if( PANEL->grid->nprow > 1 )
+      (void) vsip_blockrelease_d( PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Destroy blocks
+ */
+   vsip_blockdestroy_d( PANEL->L1block );
+   vsip_blockdestroy_d( PANEL->L2block );
+   if( PANEL->grid->nprow > 1 )
+      vsip_blockdestroy_d( PANEL->Ublock );
+#endif
+
+   if( PANEL->WORK  ) free( PANEL->WORK  );
+   if( PANEL->IWORK ) free( PANEL->IWORK );
+
+   return( MPI_SUCCESS );
+/*
+ * End of HPL_pdpanel_free
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_init.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_init.c
new file mode 100644
index 000000000..9e35c7fb4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_init.c
@@ -0,0 +1,348 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_pdpanel_init
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        M,
+   const int                        N,
+   const int                        JB,
+   HPL_T_pmat *                     A,
+   const int                        IA,
+   const int                        JA,
+   const int                        TAG,
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_pdpanel_init
+( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        M;
+   const int                        N;
+   const int                        JB;
+   HPL_T_pmat *                     A;
+   const int                        IA;
+   const int                        JA;
+   const int                        TAG;
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_init initializes a panel data structure.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the global number of rows of the panel.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  global number of columns of the
+ *         panel and trailing submatrix. N must be at least zero.
+ *
+ * JB      (global input)                const int
+ *         On entry, JB specifies is the number of columns of the panel.
+ *         JB must be at least zero.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * IA      (global input)                const int
+ *         On entry,  IA  is  the global row index identifying the panel
+ *         and trailing submatrix. IA must be at least zero.
+ *
+ * JA      (global input)                const int
+ *         On entry, JA is the global column index identifying the panel
+ *         and trailing submatrix. JA must be at least zero.
+ *
+ * TAG     (global input)                const int
+ *         On entry, TAG is the row broadcast message id.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   size_t                     dalign;
+   int                        icurcol, icurrow, ii, itmp1, jj, lwork,
+                              ml2, mp, mycol, myrow, nb, npcol, nprow,
+                              nq, nu;
+/* ..
+ * .. Executable Statements ..
+ */
+   PANEL->grid    = GRID;                  /* ptr to the process grid */
+   PANEL->algo    = ALGO;               /* ptr to the algo parameters */
+   PANEL->pmat    = A;                 /* ptr to the local array info */
+
+   myrow = GRID->myrow; mycol = GRID->mycol;
+   nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb;
+
+   HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol,
+                nprow, npcol, &ii, &jj, &icurrow, &icurcol );
+   mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow );
+   nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol );
+                                         /* ptr to trailing part of A */
+   PANEL->A       = Mptr( (double *)(A->A), ii, jj, A->ld );
+/*
+ * Workspace pointers are initialized to NULL.
+ */
+   PANEL->WORK    = NULL; PANEL->L2      = NULL; PANEL->L1      = NULL;
+   PANEL->DPIV    = NULL; PANEL->DINFO   = NULL; PANEL->U       = NULL;
+   PANEL->IWORK   = NULL;
+/*
+ * Local lengths, indexes process coordinates
+ */
+   PANEL->nb      = nb;               /* distribution blocking factor */
+   PANEL->jb      = JB;                                /* panel width */
+   PANEL->m       = M;      /* global # of rows of trailing part of A */
+   PANEL->n       = N;      /* global # of cols of trailing part of A */
+   PANEL->ia      = IA;     /* global row index of trailing part of A */
+   PANEL->ja      = JA;     /* global col index of trailing part of A */
+   PANEL->mp      = mp;      /* local # of rows of trailing part of A */
+   PANEL->nq      = nq;      /* local # of cols of trailing part of A */
+   PANEL->ii      = ii;      /* local row index of trailing part of A */
+   PANEL->jj      = jj;      /* local col index of trailing part of A */
+   PANEL->lda     = A->ld;            /* local leading dim of array A */
+   PANEL->prow    = icurrow; /* proc row owning 1st row of trailing A */
+   PANEL->pcol    = icurcol; /* proc col owning 1st col of trailing A */
+   PANEL->msgid   = TAG;     /* message id to be used for panel bcast */
+/*
+ * Initialize  ldl2 and len to temporary dummy values and Update tag for
+ * next panel
+ */
+   PANEL->ldl2    = 0;               /* local leading dim of array L2 */
+   PANEL->len     = 0;           /* length of the buffer to broadcast */
+/*
+ * Figure out the exact amount of workspace  needed by the factorization
+ * and the update - Allocate that space - Finish the panel data structu-
+ * re initialization.
+ *
+ * L1:    JB x JB in all processes
+ * DPIV:  JB      in all processes
+ * DINFO: 1       in all processes
+ *
+ * We make sure that those three arrays are contiguous in memory for the
+ * later panel broadcast.  We  also  choose  to put this amount of space 
+ * right  after  L2 (when it exist) so that one can receive a contiguous
+ * buffer.
+ */
+   dalign = ALGO->align * sizeof( double );
+
+   if( npcol == 1 )                             /* P x 1 process grid */
+   {                                     /* space for L1, DPIV, DINFO */
+      lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 );
+      if( nprow > 1 )                                 /* space for U */
+      { nu = nq - JB; lwork += JB * Mmax( 0, nu ); }
+
+      if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * 
+                                           sizeof( double ) ) ) )
+      {
+         HPL_pabort( __LINE__, "HPL_pdpanel_init",
+                     "Memory allocation failed" );
+      }
+/*
+ * Initialize the pointers of the panel structure  -  Always re-use A in
+ * the only process column
+ */
+      PANEL->L2    = PANEL->A + ( myrow == icurrow ? JB : 0 );
+      PANEL->ldl2  = A->ld;
+      PANEL->L1    = (double *)HPL_PTR( PANEL->WORK, dalign );
+      PANEL->DPIV  = PANEL->L1    + JB * JB;
+      PANEL->DINFO = PANEL->DPIV + JB;       *(PANEL->DINFO) = 0.0;
+      PANEL->U     = ( nprow > 1 ? PANEL->DINFO + 1: NULL );
+   }
+   else
+   {                                        /* space for L2, L1, DPIV */
+      ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 );
+      PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 );
+#ifdef HPL_COPY_L
+      lwork = ALGO->align + PANEL->len;
+#else
+      lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len );
+#endif
+      if( nprow > 1 )                                 /* space for U */
+      { 
+         nu = ( mycol == icurcol ? nq - JB : nq );
+         lwork += JB * Mmax( 0, nu );
+      }
+
+      if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) *
+                                           sizeof( double ) ) ) )
+      {
+         HPL_pabort( __LINE__, "HPL_pdpanel_init",
+                     "Memory allocation failed" );
+      }
+/*
+ * Initialize the pointers of the panel structure - Re-use A in the cur-
+ * rent process column when HPL_COPY_L is not defined.
+ */
+#ifdef HPL_COPY_L
+      PANEL->L2    = (double *)HPL_PTR( PANEL->WORK, dalign );
+      PANEL->ldl2  = Mmax( 1, ml2 );
+      PANEL->L1    = PANEL->L2 + ml2 * JB;
+#else
+      if( mycol == icurcol )
+      {
+         PANEL->L2   = PANEL->A + ( myrow == icurrow ? JB : 0 );
+         PANEL->ldl2 = A->ld;
+         PANEL->L1   = (double *)HPL_PTR( PANEL->WORK, dalign );
+      }
+      else
+      {
+         PANEL->L2   = (double *)HPL_PTR( PANEL->WORK, dalign );
+         PANEL->ldl2 = Mmax( 1, ml2 );
+         PANEL->L1   = PANEL->L2 + ml2 * JB;
+      } 
+#endif
+      PANEL->DPIV  = PANEL->L1   + JB * JB;
+      PANEL->DINFO = PANEL->DPIV + JB;     *(PANEL->DINFO) = 0.0;
+      PANEL->U     = ( nprow > 1 ? PANEL->DINFO + 1 : NULL );
+   }
+#ifdef HPL_CALL_VSIPL
+   PANEL->Ablock  = A->block;
+/*
+ * Create blocks and bind them to the data pointers
+ */
+   PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1),
+                                      (vsip_length)(JB*JB), VSIP_MEM_NONE );
+   PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2),
+                                      (vsip_length)(PANEL->ldl2*JB),
+                                      VSIP_MEM_NONE );
+   if( nprow > 1 )
+   { 
+      nu = ( mycol == icurcol ? nq - JB : nq );
+      PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U),
+                                        (vsip_length)(JB * Mmax( 0, nu )),
+                                        VSIP_MEM_NONE );
+   }
+   else { PANEL->Ublock = A->block; }
+#endif
+/*
+ * If nprow is 1, we just allocate an array of JB integers for the swap.
+ * When nprow > 1, we allocate the space for the index arrays immediate-
+ * ly. The exact size of this array depends on the swapping routine that
+ * will be used, so we allocate the maximum:
+ *
+ *    IWORK[0] is of size at most 1      +
+ *    IPL      is of size at most 1      +
+ *    IPID     is of size at most 4 * JB +
+ *
+ *    For HPL_pdlaswp00:
+ *       lindxA   is of size at most 2 * JB +
+ *       lindxAU  is of size at most 2 * JB +
+ *       llen     is of size at most NPROW  +
+ *       llen_sv  is of size at most NPROW.
+ *
+ *    For HPL_pdlaswp01:
+ *       ipA      is of size ar most 1      +
+ *       lindxA   is of size at most 2 * JB +
+ *       lindxAU  is of size at most 2 * JB +
+ *       iplen    is of size at most NPROW  + 1 +
+ *       ipmap    is of size at most NPROW  +
+ *       ipmapm1  is of size at most NPROW  +
+ *       permU    is of size at most JB     +
+ *       iwork    is of size at most MAX( 2*JB, NPROW+1 ).
+ *
+ * that is  3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1))
+ *       =  4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ).
+ *
+ * We use the fist entry of this to work array  to indicate  whether the
+ * the  local  index arrays have already been computed,  and if yes,  by
+ * which function:
+ *    IWORK[0] = -1: no index arrays have been computed so far;
+ *    IWORK[0] =  0: HPL_pdlaswp00 already computed those arrays;
+ *    IWORK[0] =  1: HPL_pdlaswp01 already computed those arrays;
+ * This allows to save some redundant and useless computations.
+ */
+   if( nprow == 1 ) { lwork = JB; }
+   else             
+   {
+      itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork );
+      lwork = 4 + (9 * JB) + (3 * nprow) + itmp1;
+   }
+
+   PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) );
+
+   if( PANEL->IWORK == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); }
+                       /* Initialize the first entry of the workarray */
+   *(PANEL->IWORK) = -1;
+/*
+ * End of HPL_pdpanel_init
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_new.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_new.c
new file mode 100644
index 000000000..1dbd8a18f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_new.c
@@ -0,0 +1,152 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanel_new
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        M,
+   const int                        N,
+   const int                        JB,
+   HPL_T_pmat *                     A,
+   const int                        IA,
+   const int                        JA,
+   const int                        TAG,
+   HPL_T_panel * *                  PANEL
+)
+#else
+void HPL_pdpanel_new
+( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        M;
+   const int                        N;
+   const int                        JB;
+   HPL_T_pmat *                     A;
+   const int                        IA;
+   const int                        JA;
+   const int                        TAG;
+   HPL_T_panel * *                  PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_new creates and initializes a panel data structure.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the global number of rows of the panel.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  global number of columns of the
+ *         panel and trailing submatrix. N must be at least zero.
+ *
+ * JB      (global input)                const int
+ *         On entry, JB specifies is the number of columns of the panel.
+ *         JB must be at least zero.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * IA      (global input)                const int
+ *         On entry,  IA  is  the global row index identifying the panel
+ *         and trailing submatrix. IA must be at least zero.
+ *
+ * JA      (global input)                const int
+ *         On entry, JA is the global column index identifying the panel
+ *         and trailing submatrix. JA must be at least zero.
+ *
+ * TAG     (global input)                const int
+ *         On entry, TAG is the row broadcast message id.
+ *
+ * PANEL   (local input/output)          HPL_T_panel * *
+ *         On entry,  PANEL  points  to  the  address  of the panel data
+ *         structure to create and initialize.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * p = NULL;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Allocate the panel structure - Check for enough memory
+ */
+   if( !( p = (HPL_T_panel *)malloc( sizeof( HPL_T_panel ) ) ) )
+   {
+      HPL_pabort( __LINE__, "HPL_pdpanel_new", "Memory allocation failed" );
+   }
+
+   HPL_pdpanel_init( GRID, ALGO, M, N, JB, A, IA, JA, TAG, p );
+   *PANEL = p;
+/*
+ * End of HPL_pdpanel_new
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_disp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_disp.o
new file mode 100644
index 000000000..22d8bd1b5
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_disp.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_free.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_free.o
new file mode 100644
index 000000000..ea345e7fc
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_free.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_init.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_init.o
new file mode 100644
index 000000000..2eca8470c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_init.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_new.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_new.o
new file mode 100644
index 000000000..41f746d1f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_new.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/Make.inc
new file mode 120000
index 000000000..8547ec814
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/Make.inc
@@ -0,0 +1 @@
+/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/Makefile
new file mode 100644
index 000000000..804749cc2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/Makefile
@@ -0,0 +1,90 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h  $(INCdir)/hpl_comm.h  \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \
+   $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_panobj       = \
+   HPL_pdpanel_new.o      HPL_pdpanel_init.o     HPL_pdpanel_disp.o     \
+   HPL_pdpanel_free.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_panobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_panobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pdpanel_new.o      : ../HPL_pdpanel_new.c      $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_new.c
+HPL_pdpanel_init.o     : ../HPL_pdpanel_init.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_init.c
+HPL_pdpanel_disp.o     : ../HPL_pdpanel_disp.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_disp.c
+HPL_pdpanel_free.o     : ../HPL_pdpanel_free.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_free.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp00N.c
new file mode 100644
index 000000000..7ad5a1a99
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp00N.c
@@ -0,0 +1,198 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP00N_DEPTH
+#define    HPL_LASWP00N_DEPTH       32
+#define    HPL_LASWP00N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp00N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int *                      IPIV
+)
+#else
+void HPL_dlaswp00N
+( M, N, A, LDA, IPIV )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int *                      IPIV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp00N performs a series of local row interchanges on a matrix
+ * A. One row interchange is initiated for rows 0 through M-1 of A.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the number of rows of the array A to be
+ *         interchanged. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies  the number of columns of the array A.
+ *         N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A  points to an array of dimension (LDA,N) to which
+ *         the row interchanges will be  applied.  On exit, the permuted
+ *         matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * IPIV    (local input)                 const int *
+ *         On entry,  IPIV  is  an  array of size  M  that  contains the
+ *         pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+ *         implies that local rows k and l are to be interchanged.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register double            r;
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP00N_LOG2_DEPTH );
+   int                        ip, nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP00N_LOG2_DEPTH )
+                          << HPL_LASWP00N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP00N_DEPTH, A += incA )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         if( i != ( ip = IPIV[i] ) )
+         {
+            a0 = A + i; a1 = A + ip;
+
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#if ( HPL_LASWP00N_DEPTH >  1 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  2 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  4 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  8 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH > 16 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+         }
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         if( i != ( ip = IPIV[i] ) )
+         {
+            a0 = A + i; a1 = A + ip;
+            for( j = 0; j < nr; j++, a0 += LDA, a1 += LDA )
+            { r = *a0; *a0 = *a1; *a1 = r; }
+         }
+      }
+   }
+/*
+ * End of HPL_dlaswp00N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01N.c
new file mode 100644
index 000000000..786d1eff4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01N.c
@@ -0,0 +1,209 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP01N_DEPTH
+#define    HPL_LASWP01N_DEPTH      32
+#define    HPL_LASWP01N_LOG2_DEPTH  5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp01N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp01N
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp01N copies  scattered rows  of  A  into itself  and into an
+ * array  U.  The row offsets in  A  of the source rows are specified by
+ * LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+ * positive value of  LINDXAU indicates that the array destination is U,
+ * and A otherwise.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         moved within A or copied into U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         moved within A or copied into U. N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be moved within A or
+ *         copied into U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,N). The rows
+ *         of A specified by LINDXA are be copied within this array U at
+ *         the positions indicated by positive values of LINDXAU.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local  row indexes  of  A  that should be moved within  A  or
+ *         or copied into U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local  row indexes of  U  where the rows of  A  should be
+ *         copied at. This array also contains the  local row offsets in
+ *         A where some of the rows of A should be moved to.  A positive
+ *         value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+ *         should be copied into U at the position LINDXAU[i]; otherwise
+ *         the row  LINDXA[i]  of  A  should be moved  at  the  position
+ *         -LINDXAU[i] within A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP01N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP01N_LOG2_DEPTH );
+   int                        lda1, nu, nr;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01N_LOG2_DEPTH ) <<
+                            HPL_LASWP01N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP01N_DEPTH, A += incA, U += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
+         else                  { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
+
+         *a1 = *a0; a1 += lda1; a0 += LDA;
+#if ( HPL_LASWP01N_DEPTH >  1 )
+         *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  2 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  4 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  8 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH > 16 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
+         else                  { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
+         for( j = 0; j < nr; j++, a1 += lda1, a0 += LDA ) { *a1 = *a0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp01N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01T.c
new file mode 100644
index 000000000..429cfb6f2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01T.c
@@ -0,0 +1,252 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP01T_DEPTH
+#define    HPL_LASWP01T_DEPTH       32
+#define    HPL_LASWP01T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp01T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp01T
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp01T copies  scattered rows  of  A  into itself  and into an
+ * array U.  The row offsets in  A  of the source rows  are specified by
+ * LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+ * positive value of LINDXAU indicates that the array  destination is U,
+ * and A otherwise. Rows of A are stored as columns in U.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         moved within A or copied into U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         moved within A or copied into U. N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be moved within A or
+ *         copied into U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,M). The rows
+ *         of A specified by  LINDXA  are copied within this array  U at
+ *         the  positions indicated by positive values of LINDXAU.  The
+ *         rows of A are stored as columns in U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local  row indexes  of  A  that should be moved within  A  or
+ *         or copied into U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local  row indexes of  U  where the rows of  A  should be
+ *         copied at. This array also contains the  local row offsets in
+ *         A where some of the rows of A should be moved to.  A positive
+ *         value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+ *         should be copied into U at the position LINDXAU[i]; otherwise
+ *         the row  LINDXA[i]  of  A  should be moved  at  the  position
+ *         -LINDXAU[i] within A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP01T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP01T_LOG2_DEPTH );
+   int                        nu, nr;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01T_LOG2_DEPTH ) <<
+                            HPL_LASWP01T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP01T_DEPTH, A += incA, U += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+
+         if( LINDXAU[i] >= 0 )
+         {
+            a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+
+            a1[ 0] = *a0; a0 += LDA;
+#if ( HPL_LASWP01T_DEPTH >  1 )
+            a1[ 1] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  2 )
+            a1[ 2] = *a0; a0 += LDA; a1[ 3] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  4 )
+            a1[ 4] = *a0; a0 += LDA; a1[ 5] = *a0; a0 += LDA;
+            a1[ 6] = *a0; a0 += LDA; a1[ 7] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  8 )
+            a1[ 8] = *a0; a0 += LDA; a1[ 9] = *a0; a0 += LDA;
+            a1[10] = *a0; a0 += LDA; a1[11] = *a0; a0 += LDA;
+            a1[12] = *a0; a0 += LDA; a1[13] = *a0; a0 += LDA;
+            a1[14] = *a0; a0 += LDA; a1[15] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH > 16 )
+            a1[16] = *a0; a0 += LDA; a1[17] = *a0; a0 += LDA;
+            a1[18] = *a0; a0 += LDA; a1[19] = *a0; a0 += LDA;
+            a1[20] = *a0; a0 += LDA; a1[21] = *a0; a0 += LDA;
+            a1[22] = *a0; a0 += LDA; a1[23] = *a0; a0 += LDA;
+            a1[24] = *a0; a0 += LDA; a1[25] = *a0; a0 += LDA;
+            a1[26] = *a0; a0 += LDA; a1[27] = *a0; a0 += LDA;
+            a1[28] = *a0; a0 += LDA; a1[29] = *a0; a0 += LDA;
+            a1[30] = *a0; a0 += LDA; a1[31] = *a0; a0 += LDA;
+#endif
+         }
+         else
+         {
+            a1 = A - (size_t)(LINDXAU[i]);
+
+            *a1 = *a0; a1 += LDA; a0 += LDA;
+#if ( HPL_LASWP01T_DEPTH >  1 )
+            *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  2 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  4 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  8 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH > 16 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+         }
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+
+         if( LINDXAU[i] >= 0 )
+         {
+            a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+            for( j = 0; j < nr; j++, a0 += LDA ) { a1[j] = *a0; }
+         }
+         else
+         {
+            a1 = A - (size_t)(LINDXAU[i]);
+            for( j = 0; j < nr; j++, a1 += LDA, a0 += LDA ) { *a1 = *a0; }
+         }
+      }
+   }
+/*
+ * End of HPL_dlaswp01T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp02N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp02N.c
new file mode 100644
index 000000000..45c2f5f1f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp02N.c
@@ -0,0 +1,205 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP02N_DEPTH
+#define    HPL_LASWP02N_DEPTH       32
+#define    HPL_LASWP02N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp02N
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         W0,
+   double *                         W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp02N
+( M, N, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         W0;
+   double *                         W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp02N packs scattered rows of an array  A  into workspace  W.
+ * The row offsets in A are specified by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         copied into W. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         copied into W. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be copied into W.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * W0      (local input/output)          double *
+ *         On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local output)                double *
+ *         On entry, W  is an array of size (LDW,M). On exit, W contains
+ *         the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+ *         in W(:,i).
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied into W.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M  that  contains
+ *         the local  row indexes of  U that should be copied into A and
+ *         replaced by the rows of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * A0 = A, * a0;
+   double                     * w0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP02N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   for( i = 0; i < M; i++ ) 
+      *(W0+(size_t)(i)*(size_t)(LDW)) = (double)(LINDXAU[i]);
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP02N_LOG2_DEPTH ) <<
+                          HPL_LASWP02N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP02N_DEPTH, A0 += incA, W += HPL_LASWP02N_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW);
+
+         w0[ 0] = *a0; a0 += LDA;
+#if ( HPL_LASWP02N_DEPTH >  1 )
+         w0[ 1] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  2 )
+         w0[ 2] = *a0; a0 += LDA; w0[ 3] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  4 )
+         w0[ 4] = *a0; a0 += LDA; w0[ 5] = *a0; a0 += LDA;
+         w0[ 6] = *a0; a0 += LDA; w0[ 7] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  8 )
+         w0[ 8] = *a0; a0 += LDA; w0[ 9] = *a0; a0 += LDA;
+         w0[10] = *a0; a0 += LDA; w0[11] = *a0; a0 += LDA;
+         w0[12] = *a0; a0 += LDA; w0[13] = *a0; a0 += LDA;
+         w0[14] = *a0; a0 += LDA; w0[15] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH > 16 )
+         w0[16] = *a0; a0 += LDA; w0[17] = *a0; a0 += LDA;
+         w0[18] = *a0; a0 += LDA; w0[19] = *a0; a0 += LDA;
+         w0[20] = *a0; a0 += LDA; w0[21] = *a0; a0 += LDA;
+         w0[22] = *a0; a0 += LDA; w0[23] = *a0; a0 += LDA;
+         w0[24] = *a0; a0 += LDA; w0[25] = *a0; a0 += LDA;
+         w0[26] = *a0; a0 += LDA; w0[27] = *a0; a0 += LDA;
+         w0[28] = *a0; a0 += LDA; w0[29] = *a0; a0 += LDA;
+         w0[30] = *a0; a0 += LDA; w0[31] = *a0; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, a0 += LDA ) { w0[j] = *a0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp02N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03N.c
new file mode 100644
index 000000000..760732a8d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03N.c
@@ -0,0 +1,194 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP03N_DEPTH
+#define    HPL_LASWP03N_DEPTH       32
+#define    HPL_LASWP03N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp03N
+(
+   const int                        M,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW
+)
+#else
+void HPL_dlaswp03N
+( M, N, U, LDU, W0, W, LDW )
+   const int                        M;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp03N copies columns of  W  into  rows  of an  array  U.  The
+ * destination in U of these columns contained in W is stored within W0.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies  the  number  of columns of  W  stored
+ *         contiguously that should be copied into U. M must be at least
+ *         zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  length of columns of  W  stored
+ *         contiguously that should be copied into U. N must be at least
+ *         zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,N).  Columns
+ *         of W are copied as rows within this array U at  the positions
+ *         specified in W0.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M),  that contains data
+ *         to be copied into U. For i in [0..M),  entries W(:,i)  should
+ *         be copied into the row or column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * u0;
+   const int                  incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP03N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03N_LOG2_DEPTH ) <<
+                          HPL_LASWP03N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP03N_DEPTH, U += incU, w += HPL_LASWP03N_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); 
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *u0 = w0[ 0]; u0 += LDU;
+#if ( HPL_LASWP03N_DEPTH >  1 )
+         *u0 = w0[ 1]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  2 )
+         *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  4 )
+         *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU;
+         *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  8 )
+         *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU;
+         *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU;
+         *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU;
+         *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH > 16 )
+         *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU;
+         *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU;
+         *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU;
+         *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU;
+         *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU;
+         *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU;
+         *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU;
+         *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); 
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp03N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03T.c
new file mode 100644
index 000000000..fece692ce
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03T.c
@@ -0,0 +1,186 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP03T_DEPTH
+#define    HPL_LASWP03T_DEPTH       32
+#define    HPL_LASWP03T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp03T
+(
+   const int                        M,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW
+)
+#else
+void HPL_dlaswp03T
+( M, N, U, LDU, W0, W, LDW )
+   const int                        M;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp03T copies  columns of W into an array U.  The  destination
+ * in U of these columns contained in W is stored within W0.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies  the  number  of columns of  W  stored
+ *         contiguously that should be copied into U. M must be at least
+ *         zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  length of columns of  W  stored
+ *         contiguously that should be copied into U. N must be at least
+ *         zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,M).  Columns
+ *         of W are copied within the array U at the positions specified
+ *         in W0.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M),  that contains data
+ *         to be copied into U. For i in [0..M),  entries W(:,i)  should
+ *         be copied into the row or column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0; 
+   double                     * u0;
+   const int                  incU = ( 1 << HPL_LASWP03T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03T_LOG2_DEPTH ) <<
+                          HPL_LASWP03T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP03T_DEPTH, U += incU, w += HPL_LASWP03T_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         u0[ 0] = w0[ 0];
+#if ( HPL_LASWP03T_DEPTH >  1 )
+         u0[ 1] = w0[ 1];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  2 )
+         u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  4 )
+         u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  8 )
+         u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11];
+         u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15];
+#endif
+#if ( HPL_LASWP03T_DEPTH > 16 )
+         u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19];
+         u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23];
+         u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27];
+         u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31];
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp03T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04N.c
new file mode 100644
index 000000000..4f9c490a5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04N.c
@@ -0,0 +1,285 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP04N_DEPTH
+#define    HPL_LASWP04N_DEPTH       32
+#define    HPL_LASWP04N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp04N
+(
+   const int                        M0,
+   const int                        M1,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   double *                         A,
+   const int                        LDA,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp04N
+( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M0;
+   const int                        M1;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   double *                         A;
+   const int                        LDA;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp04N copies M0 rows of U into A and replaces those rows of U
+ * with columns of W. In addition M1 - M0 columns of  W  are copied into
+ * rows of U.
+ *
+ * Arguments
+ * =========
+ *
+ * M0      (local input)                 const int
+ *         On entry, M0 specifies the number of rows of U that should be
+ *         copied into  A  and replaced by columns of  W.  M0 must be at
+ *         least zero.
+ *
+ * M1      (local input)                 const int
+ *         On entry, M1 specifies the number of columns of W that should
+ *         be copied into rows of U. M1 must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of U that should
+ *         be copied into A. N must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points to  an array of dimension (LDU,N).  This
+ *         array contains the rows that are to be copied into A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M1).
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M0).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M0+M1),  that  contains
+ *         data to be copied into U.  For i in [M0..M0+M1),  the entries
+ *         W(:,i) are copied into the row W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA  is an array of dimension  M0 containing the
+ *         local row indexes A into which rows of U are copied.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M0 that  contains
+ *         the local  row indexes of  U that should be copied into A and
+ *         replaced by the columns of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) << 
+                                            HPL_LASWP04N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP04N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04N_LOG2_DEPTH ) <<
+                          HPL_LASWP04N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP04N_DEPTH, A += incA, U += incU,
+        w += HPL_LASWP04N_DEPTH )
+   {
+      for( i =  0; i < M0; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         u0 = U + (size_t)(LINDXAU[i]);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *a0 = *u0; *u0 = w0[ 0]; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP04N_DEPTH >  1 )
+         *a0 = *u0; *u0 = w0[ 1]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  2 )
+         *a0 = *u0; *u0 = w0[ 2]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 3]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  4 )
+         *a0 = *u0; *u0 = w0[ 4]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 5]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 6]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 7]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  8 )
+         *a0 = *u0; *u0 = w0[ 8]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 9]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[10]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[11]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[12]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[13]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[14]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[15]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH > 16 )
+         *a0 = *u0; *u0 = w0[16]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[17]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[18]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[19]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[20]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[21]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[22]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[23]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[24]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[25]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[26]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[27]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[28]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[29]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[30]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[31]; a0 += LDA; u0 += LDU;
+#endif
+      }
+
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW)));
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *u0 = w0[ 0]; u0 += LDU;
+#if ( HPL_LASWP04N_DEPTH >  1 )
+         *u0 = w0[ 1]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  2 )
+         *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  4 )
+         *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU;
+         *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  8 )
+         *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU;
+         *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU;
+         *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU;
+         *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH > 16 )
+         *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU;
+         *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU;
+         *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU;
+         *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU;
+         *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU;
+         *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU;
+         *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU;
+         *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         u0 = U + (size_t)(LINDXAU[i]);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU )
+         { *a0 = *u0; *u0 = w0[j]; }
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW)));
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp04N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04T.c
new file mode 100644
index 000000000..9cbb4c863
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04T.c
@@ -0,0 +1,270 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP04T_DEPTH
+#define    HPL_LASWP04T_DEPTH       32
+#define    HPL_LASWP04T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp04T
+(
+   const int                        M0,
+   const int                        M1,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   double *                         A,
+   const int                        LDA,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp04T
+( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M0;
+   const int                        M1;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   double *                         A;
+   const int                        LDA;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp04T copies M0 columns of U into rows of A and replaces those
+ * columns of U with columns of W. In addition M1 - M0 columns of W  are
+ * copied into U.
+ *
+ * Arguments
+ * =========
+ *
+ * M0      (local input)                 const int
+ *         On entry, M0 specifies the number of columns of U that should
+ *         be copied into A and replaced by columns of W.  M0 must be at
+ *         least zero.
+ *
+ * M1      (local input)                 const int
+ *         On entry, M1 specifies  the number of columnns of W that will
+ *         be copied into U. M1 must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies the length of the columns of  U  that
+ *         will be copied into rows of A. N must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns that are to be copied into rows of
+ *         A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M0).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M0+M1),  that  contains
+ *         data to be copied into U.  For i in [M0..M0+M1),  the entries
+ *         W(:,i) are copied into the column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA  is an array of dimension  M0 containing the
+ *         local row indexes A into which columns of U are copied.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M0 that  contains
+ *         the  local column indexes of  U  that should be copied into A
+ *         and replaced by the columns of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP04T_LOG2_DEPTH ),
+                              incU = (   1 << HPL_LASWP04T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04T_LOG2_DEPTH ) <<
+                          HPL_LASWP04T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP04T_DEPTH, A += incA, U += incU,
+        w += HPL_LASWP04T_DEPTH )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW;
+
+         *a0 = u0[ 0]; u0[ 0] = w0[ 0]; a0 += LDA;
+#if ( HPL_LASWP04T_DEPTH >  1 )
+         *a0 = u0[ 1]; u0[ 1] = w0[ 1]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  2 )
+         *a0 = u0[ 2]; u0[ 2] = w0[ 2]; a0 += LDA;
+         *a0 = u0[ 3]; u0[ 3] = w0[ 3]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  4 )
+         *a0 = u0[ 4]; u0[ 4] = w0[ 4]; a0 += LDA;
+         *a0 = u0[ 5]; u0[ 5] = w0[ 5]; a0 += LDA;
+         *a0 = u0[ 6]; u0[ 6] = w0[ 6]; a0 += LDA;
+         *a0 = u0[ 7]; u0[ 7] = w0[ 7]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  8 )
+         *a0 = u0[ 8]; u0[ 8] = w0[ 8]; a0 += LDA;
+         *a0 = u0[ 9]; u0[ 9] = w0[ 9]; a0 += LDA;
+         *a0 = u0[10]; u0[10] = w0[10]; a0 += LDA;
+         *a0 = u0[11]; u0[11] = w0[11]; a0 += LDA;
+         *a0 = u0[12]; u0[12] = w0[12]; a0 += LDA;
+         *a0 = u0[13]; u0[13] = w0[13]; a0 += LDA;
+         *a0 = u0[14]; u0[14] = w0[14]; a0 += LDA;
+         *a0 = u0[15]; u0[15] = w0[15]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH > 16 )
+         *a0 = u0[16]; u0[16] = w0[16]; a0 += LDA;
+         *a0 = u0[17]; u0[17] = w0[17]; a0 += LDA;
+         *a0 = u0[18]; u0[18] = w0[18]; a0 += LDA;
+         *a0 = u0[19]; u0[19] = w0[19]; a0 += LDA;
+         *a0 = u0[20]; u0[20] = w0[20]; a0 += LDA;
+         *a0 = u0[21]; u0[21] = w0[21]; a0 += LDA;
+         *a0 = u0[22]; u0[22] = w0[22]; a0 += LDA;
+         *a0 = u0[23]; u0[23] = w0[23]; a0 += LDA;
+         *a0 = u0[24]; u0[24] = w0[24]; a0 += LDA;
+         *a0 = u0[25]; u0[25] = w0[25]; a0 += LDA;
+         *a0 = u0[26]; u0[26] = w0[26]; a0 += LDA;
+         *a0 = u0[27]; u0[27] = w0[27]; a0 += LDA;
+         *a0 = u0[28]; u0[28] = w0[28]; a0 += LDA;
+         *a0 = u0[29]; u0[29] = w0[29]; a0 += LDA;
+         *a0 = u0[30]; u0[30] = w0[30]; a0 += LDA;
+         *a0 = u0[31]; u0[31] = w0[31]; a0 += LDA;
+#endif
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW;
+
+         u0[ 0] = w0[ 0];
+#if ( HPL_LASWP04T_DEPTH >  1 )
+         u0[ 1] = w0[ 1];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  2 )
+         u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  4 )
+         u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  8 )
+         u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11];
+         u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15];
+#endif
+#if ( HPL_LASWP04T_DEPTH > 16 )
+         u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19];
+         u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23];
+         u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27];
+         u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31];
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW;
+         for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; u0[j] = w0[j]; }
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW;
+         for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp04T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05N.c
new file mode 100644
index 000000000..3edcf91a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05N.c
@@ -0,0 +1,195 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP05N_DEPTH
+#define    HPL_LASWP05N_DEPTH       32
+#define    HPL_LASWP05N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp05N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const double *                   U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp05N
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const double *                   U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp05N copies rows of  U of global offset LINDXAU into rows of
+ * A at positions indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of U that should be
+ *         copied into A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of U that should
+ *         be copied into A. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          const double *
+ *         On entry,  U  points to an array of dimension  (LDU,N).  This
+ *         array contains the rows that are to be copied into A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied from U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local row indexes of U that should be copied in A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * U0 = U, * u0;
+   double                     * a0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP05N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP05N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05N_LOG2_DEPTH ) <<
+                            HPL_LASWP05N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP05N_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]);
+
+         *a0 = *u0; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP05N_DEPTH >  1 )
+         *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  2 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  4 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  8 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH > 16 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { *a0 = *u0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp05N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05T.c
new file mode 100644
index 000000000..0adaa102d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05T.c
@@ -0,0 +1,196 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP05T_DEPTH
+#define    HPL_LASWP05T_DEPTH       32
+#define    HPL_LASWP05T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp05T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const double *                   U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp05T
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const double *                   U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp05T copies columns of  U of global offset LINDXAU into rows
+ * of A at positions indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the columns of U that will
+ *         be copied into rows of A. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          const double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns that are to be copied into rows of
+ *         A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied from U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local column indexes of U that should be copied in A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * U0 = U, * u0;
+   double                     * a0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP05T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP05T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05T_LOG2_DEPTH ) <<
+                            HPL_LASWP05T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP05T_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[ i]);
+         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+
+         *a0 = u0[ 0]; a0 += LDA;
+#if ( HPL_LASWP05T_DEPTH >  1 )
+         *a0 = u0[ 1]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  2 )
+         *a0 = u0[ 2]; a0 += LDA; *a0 = u0[ 3]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  4 )
+         *a0 = u0[ 4]; a0 += LDA; *a0 = u0[ 5]; a0 += LDA;
+         *a0 = u0[ 6]; a0 += LDA; *a0 = u0[ 7]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  8 )
+         *a0 = u0[ 8]; a0 += LDA; *a0 = u0[ 9]; a0 += LDA;
+         *a0 = u0[10]; a0 += LDA; *a0 = u0[11]; a0 += LDA;
+         *a0 = u0[12]; a0 += LDA; *a0 = u0[13]; a0 += LDA;
+         *a0 = u0[14]; a0 += LDA; *a0 = u0[15]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH > 16 )
+         *a0 = u0[16]; a0 += LDA; *a0 = u0[17]; a0 += LDA;
+         *a0 = u0[18]; a0 += LDA; *a0 = u0[19]; a0 += LDA;
+         *a0 = u0[20]; a0 += LDA; *a0 = u0[21]; a0 += LDA;
+         *a0 = u0[22]; a0 += LDA; *a0 = u0[23]; a0 += LDA;
+         *a0 = u0[24]; a0 += LDA; *a0 = u0[25]; a0 += LDA;
+         *a0 = u0[26]; a0 += LDA; *a0 = u0[27]; a0 += LDA;
+         *a0 = u0[28]; a0 += LDA; *a0 = u0[29]; a0 += LDA;
+         *a0 = u0[30]; a0 += LDA; *a0 = u0[31]; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[ i]);
+         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+         for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp05T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06N.c
new file mode 100644
index 000000000..a74bae75c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06N.c
@@ -0,0 +1,206 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP06N_DEPTH
+#define    HPL_LASWP06N_DEPTH       32
+#define    HPL_LASWP06N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp06N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA
+)
+#else
+void HPL_dlaswp06N
+( M, N, A, LDA, U, LDU, LINDXA )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp06N swaps rows of  U  with rows of A at positions
+ * indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         swapped with rows of U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of A that should
+ *         be swapped with rows of U. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows or columns of U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,N).  This
+ *         array contains the rows of U that are to be swapped with rows
+ *         of A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be swapped with U.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * U0 = U, * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP06N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP06N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06N_LOG2_DEPTH ) <<
+                            HPL_LASWP06N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP06N_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i);
+
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP06N_DEPTH >  1 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  2 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  4 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  8 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH > 16 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU )
+         { r = *a0; *a0 = *u0; *u0 = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp06N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06T.c
new file mode 100644
index 000000000..fb53c2a31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06T.c
@@ -0,0 +1,207 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP06T_DEPTH
+#define    HPL_LASWP06T_DEPTH       32
+#define    HPL_LASWP06T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp06T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA
+)
+#else
+void HPL_dlaswp06T
+( M, N, A, LDA, U, LDU, LINDXA )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp06T swaps  columns  of  U  with  rows  of  A  at  positions
+ * indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         swapped with columns of U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of A that should
+ *         be swapped with columns of U. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns of  U  that are to be swapped with
+ *         rows of A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be swapped with U.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * U0 = U, * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP06T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP06T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06T_LOG2_DEPTH ) <<
+                            HPL_LASWP06T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP06T_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[i]);
+         u0 = U0 + (size_t)(i) * (size_t)(LDU);
+
+         r = *a0; *a0 = u0[ 0]; u0[ 0] = r; a0 += LDA;
+#if ( HPL_LASWP06T_DEPTH >  1 )
+         r = *a0; *a0 = u0[ 1]; u0[ 1] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  2 )
+         r = *a0; *a0 = u0[ 2]; u0[ 2] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 3]; u0[ 3] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  4 )
+         r = *a0; *a0 = u0[ 4]; u0[ 4] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 5]; u0[ 5] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 6]; u0[ 6] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 7]; u0[ 7] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  8 )
+         r = *a0; *a0 = u0[ 8]; u0[ 8] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 9]; u0[ 9] = r; a0 += LDA;
+         r = *a0; *a0 = u0[10]; u0[10] = r; a0 += LDA;
+         r = *a0; *a0 = u0[11]; u0[11] = r; a0 += LDA;
+         r = *a0; *a0 = u0[12]; u0[12] = r; a0 += LDA;
+         r = *a0; *a0 = u0[13]; u0[13] = r; a0 += LDA;
+         r = *a0; *a0 = u0[14]; u0[14] = r; a0 += LDA;
+         r = *a0; *a0 = u0[15]; u0[15] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH > 16 )
+         r = *a0; *a0 = u0[16]; u0[16] = r; a0 += LDA;
+         r = *a0; *a0 = u0[17]; u0[17] = r; a0 += LDA;
+         r = *a0; *a0 = u0[18]; u0[18] = r; a0 += LDA;
+         r = *a0; *a0 = u0[19]; u0[19] = r; a0 += LDA;
+         r = *a0; *a0 = u0[20]; u0[20] = r; a0 += LDA;
+         r = *a0; *a0 = u0[21]; u0[21] = r; a0 += LDA;
+         r = *a0; *a0 = u0[22]; u0[22] = r; a0 += LDA;
+         r = *a0; *a0 = u0[23]; u0[23] = r; a0 += LDA;
+         r = *a0; *a0 = u0[24]; u0[24] = r; a0 += LDA;
+         r = *a0; *a0 = u0[25]; u0[25] = r; a0 += LDA;
+         r = *a0; *a0 = u0[26]; u0[26] = r; a0 += LDA;
+         r = *a0; *a0 = u0[27]; u0[27] = r; a0 += LDA;
+         r = *a0; *a0 = u0[28]; u0[28] = r; a0 += LDA;
+         r = *a0; *a0 = u0[29]; u0[29] = r; a0 += LDA;
+         r = *a0; *a0 = u0[30]; u0[30] = r; a0 += LDA;
+         r = *a0; *a0 = u0[31]; u0[31] = r; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[i]);
+         u0 = U0 + (size_t)(i) * (size_t)(LDU);
+         for( j = 0; j < nr; j++, a0 += LDA )
+         { r = *a0; *a0 = u0[j]; u0[j] = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp06T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp10N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp10N.c
new file mode 100644
index 000000000..7dbf934f2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp10N.c
@@ -0,0 +1,186 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP10N_DEPTH
+#define    HPL_LASWP10N_DEPTH       32
+#define    HPL_LASWP10N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp10N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int *                      IPIV
+)
+#else
+void HPL_dlaswp10N
+( M, N, A, LDA, IPIV )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int *                      IPIV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp10N performs a sequence  of  local column interchanges on a
+ * matrix A.  One column interchange is initiated  for columns 0 through
+ * N-1 of A.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         __arg0__
+ *
+ * N       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of the array A. M
+ *         must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, N specifies the number of columns of the array A. N
+ *         must be at least zero.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, A  points to an  array of  dimension (LDA,N).  This
+ *         array contains the columns onto which the interchanges should
+ *         be applied. On exit, A contains the permuted matrix.
+ *
+ * IPIV    (local input)                 const int *
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * a0, * a1;
+   const int                  incA = ( 1 << HPL_LASWP10N_LOG2_DEPTH );
+   int                        jp, mr, mu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   mr = M - ( mu = (int)( ( (unsigned int)(M) >> HPL_LASWP10N_LOG2_DEPTH )
+                            << HPL_LASWP10N_LOG2_DEPTH ) );
+
+   for( j = 0; j < N; j++ )
+   {
+      if( j != ( jp = IPIV[j] ) )
+      {
+         a0 = A + j * LDA; a1 = A + jp * LDA;
+
+         for( i = 0; i < mu; i += incA, a0 += incA, a1 += incA )
+         {
+            r = *a0;    *a0    = *a1;    *a1    = r;
+#if ( HPL_LASWP10N_DEPTH >  1 )
+            r = a0[ 1]; a0[ 1] = a1[ 1]; a1[ 1] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  2 )
+            r = a0[ 2]; a0[ 2] = a1[ 2]; a1[ 2] = r;
+            r = a0[ 3]; a0[ 3] = a1[ 3]; a1[ 3] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  4 )
+            r = a0[ 4]; a0[ 4] = a1[ 4]; a1[ 4] = r;
+            r = a0[ 5]; a0[ 5] = a1[ 5]; a1[ 5] = r;
+            r = a0[ 6]; a0[ 6] = a1[ 6]; a1[ 6] = r;
+            r = a0[ 7]; a0[ 7] = a1[ 7]; a1[ 7] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  8 )
+            r = a0[ 8]; a0[ 8] = a1[ 8]; a1[ 8] = r;
+            r = a0[ 9]; a0[ 9] = a1[ 9]; a1[ 9] = r;
+            r = a0[10]; a0[10] = a1[10]; a1[10] = r;
+            r = a0[11]; a0[11] = a1[11]; a1[11] = r;
+            r = a0[12]; a0[12] = a1[12]; a1[12] = r;
+            r = a0[13]; a0[13] = a1[13]; a1[13] = r;
+            r = a0[14]; a0[14] = a1[14]; a1[14] = r;
+            r = a0[15]; a0[15] = a1[15]; a1[15] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH > 16 )
+            r = a0[16]; a0[16] = a1[16]; a1[16] = r;
+            r = a0[17]; a0[17] = a1[17]; a1[17] = r;
+            r = a0[18]; a0[18] = a1[18]; a1[18] = r;
+            r = a0[19]; a0[19] = a1[19]; a1[19] = r;
+            r = a0[20]; a0[20] = a1[20]; a1[20] = r;
+            r = a0[21]; a0[21] = a1[21]; a1[21] = r;
+            r = a0[22]; a0[22] = a1[22]; a1[22] = r;
+            r = a0[23]; a0[23] = a1[23]; a1[23] = r;
+            r = a0[24]; a0[24] = a1[24]; a1[24] = r;
+            r = a0[25]; a0[25] = a1[25]; a1[25] = r;
+            r = a0[26]; a0[26] = a1[26]; a1[26] = r;
+            r = a0[27]; a0[27] = a1[27]; a1[27] = r;
+            r = a0[28]; a0[28] = a1[28]; a1[28] = r;
+            r = a0[29]; a0[29] = a1[29]; a1[29] = r;
+            r = a0[30]; a0[30] = a1[30]; a1[30] = r;
+            r = a0[31]; a0[31] = a1[31]; a1[31] = r;
+#endif
+         }
+
+         for( i = 0; i < mr; i++ )
+         { r = a0[i]; a0[i] = a1[i]; a1[i] = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp10N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2l.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2l.c
new file mode 100644
index 000000000..e1b5bbfac
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2l.c
@@ -0,0 +1,151 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxg2l
+(
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxg2l
+( IG, INB, NB, SRCPROC, NPROCS )
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2l computes  the local index of a matrix entry pointed to by
+ * the  global index IG.  This  local  returned index is the same in all
+ * processes.
+ *
+ * Arguments
+ * =========
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry, if SRCPROC = -1, the data  is not  distributed  but
+ *         replicated,  in  which  case  this  routine returns IG in all
+ *         processes. Otherwise, the value of SRCPROC is ignored.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      return( IG );
+/*
+ * IG  =  INB - NB + ( l * NPROCS + MYROC ) * NB + X  with  0 <= X < NB,
+ * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC
+ * with  0 <= MYROC < NPROCS.  The local index to be returned depends on 
+ * whether  IG  resides in the process owning the first partial block of
+ * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB,
+ * so that if NPROCS divides i+1, i.e. MYROC=0,  we have i+1 = l*NPROCS.
+ * If we set  j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is
+ * equal to (j+1) * NPROCS.  Conversely, if NPROCS does not divide  i+1, 
+ * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that
+ * j=l and thus (j+1)*NPROCS > i+1.
+ */
+   j = ( i = ( IG - INB ) / NB ) / NPROCS;
+/*
+ * When IG resides in the process owning the first partial block of size
+ * INB (MYROC = 0), then the result IL can be written as:
+ * IL = INB - NB + l * NB + X  = IG + ( l - (l * NPROCS + MYROC) ) * NB.
+ * Using the above notation,  we have i+1 = l*NPROCS + MYROC = l*NPROCS,
+ * i.e l = ( i+1 ) / NPROCS = j+1,  since  NPROCS divides i+1, therefore
+ * IL = IG + ( j + 1 - ( i + 1 ) ) * NB.
+ *
+ * Otherwise when MYROC >= 1, the result IL can be written as:
+ * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB.
+ * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1,
+ * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e
+ * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB.
+ */
+   return( NB * (j - i) + 
+           ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ) );
+/*
+ * End of HPL_indxg2l
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2lp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2lp.c
new file mode 100644
index 000000000..74662f9d2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2lp.c
@@ -0,0 +1,176 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_indxg2lp
+(
+   int *                            IL,
+   int *                            PROC,
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+void HPL_indxg2lp
+( IL, PROC, IG, INB, NB, SRCPROC, NPROCS )
+   int *                            IL;
+   int *                            PROC;
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2lp computes the local index of a matrix entry pointed to by
+ * the global  index IG as well as the process coordinate which posseses
+ * this entry. The local returned index is the same in all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * IL      (output)                      int *
+ *         On exit, IL specifies the local index corresponding to IG. IL
+ *         is at least zero.
+ *
+ * PROC    (output)                      int *
+ *         On exit,  PROC  is the  coordinate of the process  owning the
+ *         entry specified by the global index IG. PROC is at least zero
+ *         and less than NPROCS.
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry, if SRCPROC = -1, the data  is not  distributed  but
+ *         replicated,  in  which  case  this  routine returns IG in all
+ *         processes. Otherwise, the value of SRCPROC is ignored.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+   {
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      *IL   = IG;
+      *PROC = SRCPROC;
+   }
+   else
+   {
+/*
+ * IG  =  INB - NB + ( l * NPROCS + MYROC ) * NB + X  with  0 <= X < NB,
+ * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC
+ * with  0 <= MYROC < NPROCS.  The local index to be returned depends on
+ * whether  IG  resides in the process owning the first partial block of
+ * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB,
+ * so that if NPROCS divides i+1, i.e. MYROC=0,  we have i+1 = l*NPROCS.
+ * If we set  j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is
+ * equal to (j+1) * NPROCS.  Conversely, if NPROCS does not divide  i+1,
+ * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that
+ * j=l and thus (j+1)*NPROCS > i+1.
+ */
+      j = ( i = ( IG - INB ) / NB ) / NPROCS;
+/*
+ * IG  is in block  1 + ( IG - INB ) / NB.  Add this to SRCPROC and take
+ * the NPROCS modulo (definition of the block-cyclic data distribution).
+ */
+      *PROC = SRCPROC + 1 + i;
+      *PROC = MPosMod( *PROC, NPROCS );
+/*
+ * When IG resides in the process owning the first partial block of size
+ * INB (MYROC = 0), then the result IL can be written as:
+ * IL = INB - NB + l * NB + X  = IG + ( l - (l * NPROCS + MYROC) ) * NB.
+ * Using the above notation,  we have i+1 = l*NPROCS + MYROC = l*NPROCS,
+ * i.e l = ( i+1 ) / NPROCS = j+1,  since  NPROCS divides i+1, therefore
+ * IL = IG + ( j + 1 - ( i + 1 ) ) * NB.
+ *
+ * Otherwise when MYROC >= 1, the result IL can be written as:
+ * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB.
+ * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1,
+ * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e
+ * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB.
+ */
+      *IL = NB * (j - i) + 
+            ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG );
+   }
+/*
+ * End of HPL_indxg2lp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2p.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2p.c
new file mode 100644
index 000000000..d0e75f516
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2p.c
@@ -0,0 +1,128 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxg2p
+(
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxg2p
+( IG, INB, NB, SRCPROC, NPROCS )
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2p computes the process coordinate  which posseses the entry
+ * of a matrix specified by a global index IG.
+ *
+ * Arguments
+ * =========
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        proc;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      return( SRCPROC );
+/*
+ * Otherwise,  IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC
+ * and take the NPROCS  modulo (definition of the block-cyclic data dis-
+ * tribution).
+ */
+   proc = SRCPROC + 1 + ( IG - INB ) / NB;
+   return( MPosMod( proc, NPROCS ) );
+/*
+ * End of HPL_indxg2p
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxl2g.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxl2g.c
new file mode 100644
index 000000000..7f139425a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxl2g.c
@@ -0,0 +1,164 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxl2g
+(
+   const int                        IL,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxl2g
+( IL, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        IL;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxl2g computes the global index of a matrix  entry  pointed to
+ * by the local index IL of the process indicated by PROC.
+ *
+ * Arguments
+ * =========
+ *
+ * IL      (input)                       const int
+ *         On entry, IL specifies the local  index of the matrix  entry.
+ *         IL must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC  specifies the coordinate of the process whose
+ *         local array row or column is to be determined. PROC  must  be
+ *         at least zero and strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+   {
+/*
+ * The data is not distributed, or there is just one process in this di-
+ * mension of the grid.
+ */
+      return( IL );
+   }
+   else if( PROC == SRCPROC )
+   {
+/*
+ * If I am SRCPROC, my first block is of size INB
+ */
+      if( IL < INB )
+/*
+ * If  IL  belongs to the first block,  the local and global indexes are
+ * equal.
+ */
+         return ( IL );
+/*
+ * The  number  of  entire  blocks  before  the  one  IL  belongs  to is
+ * ( IL - INB ) / NB + 1.  In  the other NPROCS-1 processes,  there  are
+ * thus NB*( ( IL-INB )/NB + 1 ) entries,  that are  globally before the
+ * global entry corresponding to IL.
+ */
+      return( ( NPROCS - 1 ) * NB * ( ( IL - INB ) / NB + 1 ) + IL );
+   }
+   else if( PROC < SRCPROC )
+   {
+/*
+ * Otherwise, the process of coordinate  MOD(SRCPROC+1, NPROCS) owns the
+ * second block. Let IPROC = PROC-SRCPROC-1+NPROCS be the number of pro-
+ * cesses between this process and  PROC  not  included  when going from
+ * left to right on the process line  with  possible wrap around.  These
+ * IPROC  processes have one more NB block than the other processes, who
+ * own IL / NB blocks of size NB.
+ */
+      return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1+NPROCS )+IL+INB );
+   }
+   else
+   {
+/*
+ * Same reasoning as above with IPROC = PROC - SRCPROC - 1.
+ */
+      return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1        )+IL+INB );
+   }
+/*
+ * End of HPL_indxl2g
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_infog2l.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_infog2l.c
new file mode 100644
index 000000000..2580f2ad4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_infog2l.c
@@ -0,0 +1,382 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_infog2l
+(
+   int                              I,
+   int                              J,
+   const int                        IMB,
+   const int                        MB,
+   const int                        INB,
+   const int                        NB,
+   const int                        RSRC,
+   const int                        CSRC,
+   const int                        MYROW,
+   const int                        MYCOL,
+   const int                        NPROW,
+   const int                        NPCOL,
+   int *                            II,
+   int *                            JJ,
+   int *                            PROW,
+   int *                            PCOL
+)
+#else
+void HPL_infog2l
+( I, J, IMB, MB, INB, NB, RSRC, CSRC, MYROW, MYCOL, NPROW, NPCOL, II, JJ, PROW, PCOL )
+   int                              I;
+   int                              J;
+   const int                        IMB;
+   const int                        MB;
+   const int                        INB;
+   const int                        NB;
+   const int                        RSRC;
+   const int                        CSRC;
+   const int                        MYROW;
+   const int                        MYCOL;
+   const int                        NPROW;
+   const int                        NPCOL;
+   int *                            II;
+   int *                            JJ;
+   int *                            PROW;
+   int *                            PCOL;
+#endif 
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_infog2l computes the starting local index II, JJ corresponding to
+ * the submatrix starting globally at the entry pointed by  I,  J.  This
+ * routine returns the coordinates in the grid of the process owning the
+ * matrix entry of global indexes I, J, namely PROW and PCOL.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                int
+ *         On entry,  I  specifies  the  global  row index of the matrix
+ *         entry. I must be at least zero.
+ *
+ * J       (global input)                int
+ *         On entry,  J  specifies the global column index of the matrix
+ *         entry. J must be at least zero.
+ *
+ * IMB     (global input)                const int
+ *         On entry,  IMB  specifies  the size of the first row block of
+ *         the global matrix. IMB must be at least one.
+ *
+ * MB      (global input)                const int
+ *         On entry,  MB specifies the blocking factor used to partition
+ *         and  distribute the rows of the matrix A.  MB  must be larger
+ *         than one.
+ *
+ * INB     (global input)                const int
+ *         On entry, INB specifies the size of the first column block of
+ *         the global matrix. INB must be at least one.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the columns of the matrix A. NB must be larger
+ *         than one.
+ *
+ * RSRC    (global input)                const int
+ *         On entry,  RSRC  specifies  the row coordinate of the process
+ *         that possesses the row  I.  RSRC  must  be at least zero  and
+ *         strictly less than NPROW.
+ *
+ * CSRC    (global input)                const int
+ *         On entry, CSRC specifies the column coordinate of the process
+ *         that possesses the column J. CSRC  must be at least zero  and
+ *         strictly less than NPCOL.
+ *
+ * MYROW   (local input)                 const int
+ *         On entry, MYROW  specifies my  row process  coordinate in the
+ *         grid. MYROW is greater than or equal  to zero  and  less than
+ *         NPROW.
+ *
+ * MYCOL   (local input)                 const int
+ *         On entry, MYCOL specifies my column process coordinate in the
+ *         grid. MYCOL is greater than or equal  to zero  and  less than
+ *         NPCOL.
+ *
+ * NPROW   (global input)                const int
+ *         On entry,  NPROW  specifies the number of process rows in the
+ *         grid. NPROW is at least one.
+ *
+ * NPCOL   (global input)                const int
+ *         On entry,  NPCOL  specifies  the number of process columns in
+ *         the grid. NPCOL is at least one.
+ *
+ * II      (local output)                int *
+ *         On exit, II  specifies the  local  starting  row index of the
+ *         submatrix. On exit, II is at least 0.
+ *
+ * JJ      (local output)                int *
+ *         On exit, JJ  specifies the local starting column index of the
+ *         submatrix. On exit, JJ is at least 0.
+ *
+ * PROW    (global output)               int *
+ *         On exit, PROW is the row coordinate of the process owning the
+ *         entry specified by the global index I.  PROW is at least zero
+ *         and less than NPROW.
+ *
+ * PCOL    (global output)               int *
+ *         On exit, PCOL  is the column coordinate of the process owning
+ *         the entry specified by the global index J.  PCOL  is at least
+ *         zero and less than NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int            ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc;
+/* ..
+ * .. Executable Statements ..
+ */
+   imb   = IMB;
+   *PROW = RSRC;
+
+   if( ( *PROW == -1 ) || ( NPROW == 1 ) )
+   {
+/*
+ * The data is not distributed,  or there is just one process row in the
+ * grid.
+ */
+     *II = I;
+   }
+   else if( I < imb )
+   {
+/*
+ * I refers to an entry in the first block of rows
+ */
+     *II = ( MYROW == *PROW ? I : 0 );
+   }
+   else
+   {
+      mb   = MB;
+      rsrc = *PROW;
+/*
+ * The discussion goes as follows:  compute  my distance from the source
+ * process so that  within  this process coordinate system,  the  source
+ * process   is  the  process  such  that  mydist = 0,  or  equivalently
+ * MYROW == rsrc.
+ *
+ * Find  out  the global coordinate of the block I belongs to (nblocks),
+ * as well as the minimum local number of blocks that every process has.
+ *
+ * when mydist < nblocks-ilocblk*NPROCS,  I own ilocblk + 1 full blocks,
+ * when mydist > nblocks-ilocblk*NPROCS,  I own ilocblk     full blocks,
+ * when mydist = nblocks-ilocblk*NPROCS,  I own ilocblk     full blocks
+ * but not I, or I own ilocblk + 1 blocks and the entry I refers to.
+ */
+      if( MYROW == rsrc )
+      {
+/*
+ * I refers  to an entry  that is not in the first block, find out which
+ * process has it.
+ */
+         nblocks = ( I - imb ) / mb + 1;
+         *PROW  += nblocks;
+         *PROW  -= ( *PROW / NPROW ) * NPROW;
+/*
+ * Since  mydist = 0  and nblocks - ilocblk * NPROW >= 0, there are only
+ * three possible cases:
+ *
+ *   1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I do not own
+ *      I, in which case II = IMB + ( ilocblk - 1 ) * MB. Note that this
+ *      case  cannot  happen  when  ilocblk is zero, since nblocks is at
+ *      least one.
+ *
+ *   2) When  0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in
+ *      which  case  I  and  II  can  respectively  be  written as IMB + 
+ *      (nblocks-1)*NB + IL  and  IMB + (ilocblk-1) * MB + IL.  That  is
+ *      II = I + (ilocblk-nblocks)*MB. Note that this case cannot happen
+ *      when ilocblk is zero, since nblocks is at least one.
+ *
+ *   3) mydist = 0 < nblocks - ilocblk * NPROW,  the source process owns
+ *      ilocblk+1 full blocks,  and  therefore  II = IMB + ilocblk * MB.
+ *      Note that when ilocblk is zero, II is just IMB.
+ */
+         if( nblocks < NPROW )
+         {
+            *II = imb;
+         }
+         else
+         {
+            ilocblk = nblocks / NPROW;
+            if( ilocblk * NPROW >= nblocks )
+            {
+               *II = ( ( MYROW == *PROW ) ?
+                       I   + ( ilocblk - nblocks ) * mb :
+                       imb + ( ilocblk - 1       ) * mb );
+            }
+            else
+            {
+               *II =  imb + ilocblk * mb;
+            }
+         }
+      }
+      else
+      {
+/*
+ * I refers  to  an entry that is not in the first block, find out which
+ * process has it.
+ */
+         nblocks = ( I -= imb ) / mb + 1;
+         *PROW  += nblocks;
+         *PROW  -= ( *PROW / NPROW ) * NPROW;
+/*
+ * Compute  my distance from the source process so that within this pro-
+ * cess coordinate system,  the  source process is the process such that
+ * mydist=0.
+ */
+         if( ( mydist  = MYROW - rsrc ) < 0 ) mydist += NPROW;
+/*
+ * When mydist <  nblocks - ilocblk * NPROW, I own ilocblk+1 full blocks
+ * of size MB since I am not the source process, i.e. II=(ilocblk+1)*MB.
+ * When mydist>=nblocks-ilocblk*NPROW and I do not own I,  I own ilocblk
+ * full blocks of size MB, i.e. II = ilocblk*MB, otherwise I own ilocblk
+ * blocks and I,  in which case I can be written as IMB + (nblocks-1)*MB
+ * + IL and II = ilocblk*MB + IL = I - IMB + (ilocblk - nblocks + 1)*MB.
+ */
+         if( nblocks < NPROW )
+         {
+            mydist -= nblocks;
+            *II     = ( ( mydist < 0 ) ? mb :
+                        ( ( MYROW == *PROW ) ?
+                          I + ( 1 - nblocks ) * mb : 0 ) );
+         }
+         else
+         {
+            ilocblk = nblocks / NPROW;
+            mydist -= nblocks - ilocblk * NPROW;
+            *II     = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb :
+                        ( ( MYROW == *PROW ) ?
+                          ( ilocblk - nblocks + 1 ) * mb + I :
+                          ilocblk * mb ) );
+         }
+      }
+   }
+/*
+ * Idem for the columns
+ */
+   inb   = INB;
+   *PCOL = CSRC;
+
+   if( ( *PCOL == -1 ) || ( NPCOL == 1 ) )
+   {
+      *JJ = J;
+   }
+   else if( J < inb )
+   {
+      *JJ = ( MYCOL == *PCOL ? J : 0 );
+   }
+   else
+   {
+      nb   = NB;
+      csrc = *PCOL;
+
+      if( MYCOL == csrc )
+      {
+         nblocks = ( J - inb ) / nb + 1;
+         *PCOL  += nblocks;
+         *PCOL  -= ( *PCOL / NPCOL ) * NPCOL;
+
+         if( nblocks < NPCOL )
+         {
+            *JJ = inb;
+         }
+         else
+         {
+            ilocblk = nblocks / NPCOL;
+            if( ilocblk * NPCOL >= nblocks )
+            {
+               *JJ = ( ( MYCOL == *PCOL ) ?
+                       J   + ( ilocblk - nblocks ) * nb :
+                       inb + ( ilocblk - 1       ) * nb );
+            }
+            else
+            {
+               *JJ = inb + ilocblk * nb;
+            }
+         }
+      }
+      else
+      {
+         nblocks = ( J -= inb ) / nb + 1;
+         *PCOL  += nblocks;
+         *PCOL  -= ( *PCOL / NPCOL ) * NPCOL;
+
+         if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL;
+
+         if( nblocks < NPCOL )
+         {
+            mydist -= nblocks;
+            *JJ     = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ?
+                        J + ( 1 - nblocks )*nb : 0 ) );
+         }
+         else
+         {
+            ilocblk = nblocks / NPCOL;
+            mydist -= nblocks - ilocblk * NPCOL;
+            *JJ     = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb :
+                        ( ( MYCOL == *PCOL ) ?
+                          ( ilocblk - nblocks + 1 ) * nb + J :
+                          ilocblk * nb ) );
+         }
+      }
+   }
+/*
+ * End of HPL_infog2l
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numroc.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numroc.c
new file mode 100644
index 000000000..39cd736d3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numroc.c
@@ -0,0 +1,120 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_numroc
+(
+   const int                        N,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_numroc
+( N, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        N;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_numroc returns  the  local number of matrix rows/columns process
+ * PROC  will  get  if  we give out  N rows/columns starting from global
+ * index 0.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies the number of rows/columns being dealt
+ *         out. N must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC specifies  the coordinate of the process whose
+ *         local portion is determined.  PROC must be at least zero  and
+ *         strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   return( HPL_numrocI( N, 0, INB, NB, PROC, SRCPROC, NPROCS ) );
+/*
+ * End of HPL_numroc
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numrocI.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numrocI.c
new file mode 100644
index 000000000..70f3497de
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numrocI.c
@@ -0,0 +1,243 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_numrocI
+(
+   const int                        N,
+   const int                        I,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_numrocI
+( N, I, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        N;
+   const int                        I;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_numrocI returns  the  local number of matrix rows/columns process
+ * PROC  will  get  if  we give out  N rows/columns starting from global
+ * index I.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies the number of rows/columns being dealt
+ *         out. N must be at least zero.
+ *
+ * I       (input)                       const int
+ *         On entry, I  specifies the global index of the matrix  entry
+ *         I must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of th
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC specifies  the coordinate of the process whos
+ *         local portion is determined.  PROC must be at least zero  an
+ *         strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  proces
+ *         that possesses the first row or column of the matrix. SRCPRO
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process row
+ *         or columns over which the matrix is distributed.  NPROCS mus
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ilocblk, inb, mydist, nblocks, srcproc;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * The data is not distributed, or there is just one process in this di-
+ * mension of the grid.
+ */
+      return( N );
+/*
+ * Compute coordinate of process owning I and corresponding INB
+ */
+   srcproc = SRCPROC;
+
+   if( ( inb = INB - I ) <= 0 )
+   {
+/*
+ * I is not in the first block, find out which process has it and update
+ * the size of first block
+ */
+      srcproc += ( nblocks = (-inb) / NB + 1 ); 
+      srcproc -= ( srcproc / NPROCS ) * NPROCS;
+      inb     += nblocks * NB;
+   }
+/*
+ * Now  everything  is  just like  N, I=0, INB, NB, srcproc, NPROCS. The
+ * discussion goes as follows:  compute my distance from the source pro-
+ * cess  so that within this process coordinate system,  the source pro-
+ * cess is the process such that mydist = 0, or PROC == srcproc.
+ *
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries. Then remark that
+ *
+ * when  mydist < nblocks - ilocblk*NPROCS, I own ilocblk+1 full blocks,
+ * when  mydist > nblocks - ilocblk*NPROCS, I own ilocblk   full blocks,
+ * when  mydist = nblocks - ilocblk*NPROCS, either the last block is not
+ * full and I own it,  or the last block is full and I am the first pro-
+ * cess owning only ilocblk full blocks.
+ */
+   if( PROC == srcproc )
+   {
+/*
+ * I am the source process, i.e. I own I (mydist=0).  When N <= INB, the
+ * answer is simply N.
+ */
+      if( N <= inb ) return( N );
+/*
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries.
+ */
+      nblocks = ( N - inb ) / NB + 1;
+/*
+ * Since  mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only
+ * two possible cases:
+ *
+ *   1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS di-
+ *      vides the global number of full blocks,  then the source process
+ *      srcproc owns one more block than the other processes;  and N can
+ *      be rewritten as N = INB + (nblocks-1) * NB + LNB  with  LNB >= 0
+ *      size of the last block. Similarly, the local value Np correspon-
+ *      ding to N can be written as  Np = INB + (ilocblk-1) * NB + LNB =
+ *      N + ( ilocblk-1 - (nblocks-1) )*NB.  Note  that this case cannot
+ *      happen when ilocblk is zero, since nblocks is at least one.
+ *
+ *   2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only
+ *      owns full blocks,  and  therefore Np = INB + ilocblk * NB.  Note
+ *      that when ilocblk is zero, Np is just INB.
+ */
+      if( nblocks < NPROCS ) return( inb );
+ 
+      ilocblk = nblocks / NPROCS;
+      return( ( nblocks - ilocblk * NPROCS ) ? inb + ilocblk * NB :
+              N + ( ilocblk - nblocks ) * NB );
+   }
+   else
+   {
+/*
+ * I am not the source process. When N <= INB, the answer is simply 0.
+ */
+      if( N <= inb ) return( 0 );
+/*
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries
+ */
+      nblocks = ( N - inb ) / NB + 1;
+/*
+ * Compute  my distance from the source process so that within this pro-
+ * cess coordinate system,  the source  process is the process such that
+ * mydist=0.
+ */
+      if( ( mydist = PROC - srcproc ) < 0 ) mydist += NPROCS;
+/*
+ * When mydist < nblocks - ilocblk*NPROCS, I own ilocblk + 1 full blocks
+ * of size NB since I am not the source process,
+ *
+ * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk   full blocks
+ * of size NB since I am not the source process,
+ *
+ * when mydist = nblocks - ilocblk*NPROCS,
+ * either the last block is not full and I own it, in which case
+ *    N = INB + (nblocks - 1)*NB + LNB with  LNB  the  size  of the last
+ *    block such that NB > LNB > 0;  the local value Np corresponding to
+ *    N is given by  Np = ilocblk*NB+LNB = N-INB+(ilocblk-nblocks+1)*NB;
+ * or the  last  block  is  full  and I am the first process owning only
+ *    ilocblk full blocks of size NB, that is N = INB+(nblocks-1)*NB and
+ *    Np = ilocblk * NB = N - INB + (ilocblk-nblocks+1) * NB.
+ */
+      if( nblocks < NPROCS )
+         return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 :
+                 N - inb + NB * ( 1 - nblocks ) ) );
+ 
+      ilocblk = nblocks / NPROCS;
+      mydist -= nblocks - ilocblk * NPROCS;
+      return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB :
+              ( ( mydist > 0 ) ? ilocblk * NB :
+                N - inb + NB * ( ilocblk - nblocks + 1 ) ) );
+   }
+/*
+ * End of HPL_numrocI
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pabort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pabort.c
new file mode 100644
index 000000000..268975fc1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pabort.c
@@ -0,0 +1,137 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pabort
+(
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_pabort( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pabort displays an error message on stderr and halts execution.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   int                        rank;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   LINE   = va_arg( argptr, int      );
+   SRNAME = va_arg( argptr, char *   );
+   FORM   = va_arg( argptr, char *   );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( stderr, "%s %s %d, %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR", "from process #", rank, "in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( stderr,
+                   "%s %s %d, %s %d %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR", "from process #", rank, "on line", LINE,
+                   "of function", SRNAME, cline );
+
+   MPI_Abort( MPI_COMM_WORLD, -1 );
+   exit( -1 );
+/*
+ * End of HPL_pabort
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlamch.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlamch.c
new file mode 100644
index 000000000..73cf649da
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlamch.c
@@ -0,0 +1,143 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_pdlamch
+(
+   MPI_Comm                         COMM,
+   const HPL_T_MACH                 CMACH
+)
+#else
+double HPL_pdlamch
+( COMM, CMACH )
+   MPI_Comm                         COMM;
+   const HPL_T_MACH                 CMACH;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlamch determines  machine-specific  arithmetic  constants  such  as
+ * the relative machine precision (eps),  the safe minimum(sfmin) such that
+ * 1/sfmin does not overflow, the base of the machine (base), the precision
+ * (prec),  the  number  of  (base)  digits in the  mantissa  (t),  whether
+ * rounding occurs in addition (rnd = 1.0 and 0.0 otherwise),  the  minimum
+ * exponent before  (gradual)  underflow (emin),  the  underflow  threshold
+ * (rmin)- base**(emin-1), the largest exponent before overflow (emax), the
+ * overflow threshold (rmax)  - (base**emax)*(1-eps).
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * CMACH   (global input)                const HPL_T_MACH
+ *         Specifies the value to be returned by HPL_pdlamch            
+ *            = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+ *            = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+ *            = HPL_MACH_BASE,  HPL_pdlamch := base                     
+ *            = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+ *            = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+ *            = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+ *            = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+ *            = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+ *            = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+ *            = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+ *          
+ *         where                                                        
+ *          
+ *            eps   = relative machine precision,                       
+ *            sfmin = safe minimum,                                     
+ *            base  = base of the machine,                              
+ *            prec  = eps*base,                                         
+ *            t     = number of digits in the mantissa,                 
+ *            rnd   = 1.0 if rounding occurs in addition,               
+ *            emin  = minimum exponent before underflow,                
+ *            rmin  = underflow threshold,                              
+ *            emax  = largest exponent before overflow,                 
+ *            rmax  = overflow threshold.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     param;
+/* ..
+ * .. Executable Statements ..
+ */
+   param = HPL_dlamch( CMACH );
+
+   switch( CMACH )
+   {
+      case HPL_MACH_EPS   :
+      case HPL_MACH_SFMIN :
+      case HPL_MACH_EMIN  :
+      case HPL_MACH_RMIN  :
+         (void) HPL_all_reduce( (void *)(&param), 1, HPL_DOUBLE,
+                                HPL_max, COMM );
+         break;
+      case HPL_MACH_EMAX  :
+      case HPL_MACH_RMAX  :
+         (void) HPL_all_reduce( (void *)(&param), 1, HPL_DOUBLE,
+                                HPL_min, COMM );
+         break;
+      default             :
+         break;
+   } 
+
+   return( param );
+/*
+ * End of HPL_pdlamch
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlange.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlange.c
new file mode 100644
index 000000000..40bdcc36b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlange.c
@@ -0,0 +1,242 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_pdlange
+(
+   const HPL_T_grid *               GRID,
+   const HPL_T_NORM                 NORM,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   const double *                   A,
+   const int                        LDA
+)
+#else
+double HPL_pdlange
+( GRID, NORM, M, N, NB, A, LDA )
+   const HPL_T_grid *               GRID;
+   const HPL_T_NORM                 NORM;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   const double *                   A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlange returns  the value of the one norm,  or the infinity norm,
+ * or the element of largest absolute value of a distributed matrix A:  
+ *  
+ *  
+ *    max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+ *    norm1(A),        when NORM = HPL_NORM_1,                          
+ *    normI(A),        when NORM = HPL_NORM_I,                          
+ *  
+ * where norm1 denotes the one norm of a matrix (maximum column sum) and
+ * normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+ * that max(abs(A(i,j))) is not a matrix norm.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * NORM    (global input)                const HPL_T_NORM
+ *         On entry,  NORM  specifies  the  value to be returned by this
+ *         function as described above.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+ *         that contains the local pieces of the distributed matrix A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     s, v0=HPL_rzero, * work = NULL;
+   MPI_Comm                   Acomm, Ccomm, Rcomm;
+   int                        ii, jj, mp, mycol, myrow, npcol, nprow,
+                              nq;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Rcomm = GRID->row_comm; Ccomm = GRID->col_comm;
+   Acomm = GRID->all_comm;
+
+   Mnumroc( mp, M, NB, NB, myrow, 0, nprow );
+   Mnumroc( nq, N, NB, NB, mycol, 0, npcol );
+
+   if( Mmin( M, N ) == 0 ) { return( v0 ); }
+   else if( NORM == HPL_NORM_A )
+   {
+/*
+ * max( abs( A ) )
+ */
+      if( ( nq > 0 ) && ( mp > 0 ) )
+      {
+         for( jj = 0; jj < nq; jj++ )
+         {
+            for( ii = 0; ii < mp; ii++ )
+            { v0 = Mmax( v0, Mabs( *A ) ); A++; }
+            A += LDA - mp;
+         }
+      }
+      (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0,
+                         Acomm );
+   }
+   else if( NORM == HPL_NORM_1 )
+   {
+/*
+ * Find norm_1( A ).
+ */
+      if( nq > 0 )
+      {
+         work = (double*)malloc( (size_t)(nq) * sizeof( double ) );
+         if( work == NULL )
+         { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); }
+
+         for( jj = 0; jj < nq; jj++ )
+         {
+            s = HPL_rzero;
+            for( ii = 0; ii < mp; ii++ ) { s += Mabs( *A ); A++; }
+            work[jj] = s; A += LDA - mp;
+         }
+/*
+ * Find sum of global matrix columns, store on row 0 of process grid
+ */
+         (void) HPL_reduce( (void *)(work), nq, HPL_DOUBLE, HPL_sum,
+                            0, Ccomm );
+/*
+ * Find maximum sum of columns for 1-norm
+ */
+         if( myrow == 0 )
+         { v0 = work[HPL_idamax( nq, work, 1 )]; v0 = Mabs( v0 ); }
+         if( work ) free( work );
+      }
+/*
+ * Find max in row 0, store result in process (0,0)
+ */
+      if( myrow == 0 )
+         (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0,
+                            Rcomm );
+   }
+   else if( NORM == HPL_NORM_I )
+   {
+/*
+ * Find norm_inf( A )
+ */
+      if( mp > 0 )
+      {
+         work = (double*)malloc( (size_t)(mp) * sizeof( double ) );
+         if( work == NULL )
+         { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); }
+
+         for( ii = 0; ii < mp; ii++ ) { work[ii] = HPL_rzero; }
+
+         for( jj = 0; jj < nq; jj++ )
+         {
+            for( ii = 0; ii < mp; ii++ )
+            { work[ii] += Mabs( *A ); A++; }
+            A += LDA - mp;
+         }
+/*       
+ * Find sum of global matrix rows, store on column 0 of process grid
+ */      
+         (void) HPL_reduce( (void *)(work), mp, HPL_DOUBLE, HPL_sum,
+                            0, Rcomm );
+/*       
+ * Find maximum sum of rows for inf-norm
+ */      
+         if( mycol == 0 )
+         { v0 = work[HPL_idamax( mp, work, 1 )]; v0 = Mabs( v0 ); }
+         if( work ) free( work );
+      }
+/*
+ * Find max in column 0, store result in process (0,0)
+ */
+      if( mycol == 0 )
+         (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max,
+                            0, Ccomm );
+   }
+/*
+ * Broadcast answer to every process in the grid
+ */
+   (void) HPL_broadcast( (void *)(&v0), 1, HPL_DOUBLE, 0, Acomm );
+
+   return( v0 );
+/*
+ * End of HPL_pdlange
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlaprnt.c
new file mode 100644
index 000000000..20f11129a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlaprnt.c
@@ -0,0 +1,236 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaprnt
+(
+   const HPL_T_grid *               GRID,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   double *                         A,
+   const int                        LDA,
+   const int                        IAROW,
+   const int                        IACOL,
+   const char *                     CMATNM
+)
+#else
+void HPL_pdlaprnt
+( GRID, M, N, NB, A, LDA, IAROW, IACOL, CMATNM )
+   const HPL_T_grid *               GRID;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   double *                         A;
+   const int                        LDA;
+   const int                        IAROW;
+   const int                        IACOL;
+   const char *                     CMATNM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaprnt prints  to  standard  error a distributed matrix A. The
+ * local pieces of  A  are sent to the process of coordinates  (0,0)  in
+ * the grid and then printed.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies the number of rows of the coefficient
+ *         matrix A. M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On  entry,   N   specifies  the  number  of  columns  of  the
+ *         coefficient matrix A. N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * A       (local input)                 double *
+ *         On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+ *         This array contains the coefficient matrix to be printed.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * IAROW   (global input)                const int
+ *         On entry,  IAROW  specifies the row process coordinate owning
+ *         the  first row of A.  IAROW  must be  larger than or equal to
+ *         zero and less than NPROW.
+ *
+ * IACOL   (global input)                const int
+ *         On entry,  IACOL  specifies  the  column  process  coordinate
+ *         owning the  first column  of A. IACOL  must be larger than or
+ *         equal to zero and less than NPCOL.
+ *
+ * CMATNM  (global input)                const char *
+ *         On entry, CMATNM is the name of the matrix to be printed.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   Acomm;
+   double                     * buf = NULL;
+   int                        h, i, ib, icurcol=IACOL, icurrow=IAROW,
+                              ii=0, j, jb, jj=0, mycol, myrow, npcol,
+                              nprow, src;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Acomm = GRID->all_comm; 
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+      buf = (double*)malloc( (size_t)(NB) * sizeof( double ) );
+
+   for( j = 0; j < N; j += NB )
+   {
+      jb = N-j; jb = Mmin( jb, NB );
+      for( h = 0; h < jb; h++ )
+      {
+         (void) HPL_barrier( Acomm );
+
+         for( i = 0; i < M; i += NB )
+         {
+            ib = M-i; ib = Mmin( ib, NB );
+            if( ( icurrow == 0 ) && ( icurcol == 0 ) )
+            {
+               if( ( myrow == 0 ) && ( mycol == 0 ) )
+                  HPL_dlaprnt( ib, 1, Mptr( A, ii, jj+h, LDA ), i+1,
+                               j+h+1, LDA, CMATNM );
+            }
+            else
+            {
+               if( ( myrow == icurrow ) && ( mycol == icurcol ) )
+               {
+                  (void) HPL_send( Mptr( A, ii, jj+h, LDA ), ib, 0,
+                                   9000+(j+h)*M+i, Acomm );
+               }
+               else if( ( myrow == 0 ) && ( mycol == 0 ) )
+               {
+                  src = HPL_pnum( GRID, icurrow, icurcol );
+                  (void) HPL_recv( buf, ib, src, 9000+(j+h)*M+i,
+                                   Acomm );
+                  if (buf != NULL)
+                  	HPL_dlaprnt( ib, 1, buf, i+1, j+h+1, NB, CMATNM );
+               }
+            }
+            if( myrow == icurrow ) ii += ib;
+            icurrow = MModAdd1( icurrow, nprow );
+            (void) HPL_barrier( Acomm );
+         }
+         ii = 0; icurrow = IAROW;
+      }
+      if( mycol == icurcol ) jj += jb;
+      icurcol = MModAdd1( icurcol, npcol );
+      (void) HPL_barrier( Acomm );
+   }
+   if( ( myrow == 0 ) && ( mycol == 0 ) && ( buf ) ) free( buf );
+/*
+ * End of HPL_pdlaprnt
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pwarn.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pwarn.c
new file mode 100644
index 000000000..a9f666f89
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pwarn.c
@@ -0,0 +1,139 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pwarn
+(
+   FILE *                           STREAM,
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_pwarn( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pwarn displays an error message.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   int                        rank;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   LINE   = va_arg( argptr, int    );
+   SRNAME = va_arg( argptr, char * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( STREAM, "%s %s %d, %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR", "from process #", rank, "in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( STREAM, "%s %s %d, %s %d %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR", "from process #", rank, "on line", LINE,
+                   "of function", SRNAME, cline );
+/*
+ * End of HPL_pwarn
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp00N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp00N.o
new file mode 100644
index 000000000..d84ffda98
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp00N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp01N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp01N.o
new file mode 100644
index 000000000..3108f50af
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp01N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp01T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp01T.o
new file mode 100644
index 000000000..be595015b
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp01T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp02N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp02N.o
new file mode 100644
index 000000000..93be20fff
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp02N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp03N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp03N.o
new file mode 100644
index 000000000..590fd4d1d
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp03N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp03T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp03T.o
new file mode 100644
index 000000000..0a3bb8457
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp03T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp04N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp04N.o
new file mode 100644
index 000000000..087a0c1e5
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp04N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp04T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp04T.o
new file mode 100644
index 000000000..9dd386467
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp04T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp05N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp05N.o
new file mode 100644
index 000000000..06f5cfb2f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp05N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp05T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp05T.o
new file mode 100644
index 000000000..39f157054
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp05T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp06N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp06N.o
new file mode 100644
index 000000000..4eb581ad9
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp06N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp06T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp06T.o
new file mode 100644
index 000000000..695696633
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp06T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp10N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp10N.o
new file mode 100644
index 000000000..9e61ce691
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp10N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2l.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2l.o
new file mode 100644
index 000000000..e01b53375
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2l.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2lp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2lp.o
new file mode 100644
index 000000000..47d7464fe
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2lp.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2p.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2p.o
new file mode 100644
index 000000000..c7fde1dcc
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2p.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxl2g.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxl2g.o
new file mode 100644
index 000000000..3fc06a373
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxl2g.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_infog2l.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_infog2l.o
new file mode 100644
index 000000000..d1e9791b3
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_infog2l.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_numroc.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_numroc.o
new file mode 100644
index 000000000..5c9ee9fd6
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_numroc.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_numrocI.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_numrocI.o
new file mode 100644
index 000000000..89b5cfa00
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_numrocI.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pabort.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pabort.o
new file mode 100644
index 000000000..a59aca124
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pabort.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlamch.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlamch.o
new file mode 100644
index 000000000..c7731580e
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlamch.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlange.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlange.o
new file mode 100644
index 000000000..3dc00e880
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlange.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlaprnt.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlaprnt.o
new file mode 100644
index 000000000..ede794de4
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlaprnt.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pwarn.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pwarn.o
new file mode 100644
index 000000000..1313518bd
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pwarn.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/Make.inc
new file mode 120000
index 000000000..8547ec814
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/Make.inc
@@ -0,0 +1 @@
+/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/Makefile
new file mode 100644
index 000000000..ea93cd150
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/Makefile
@@ -0,0 +1,137 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h   $(INCdir)/hpl_pauxil.h
+#
+## Object files ########################################################
+#
+HPL_pauobj       = \
+   HPL_indxg2l.o          HPL_indxg2lp.o         HPL_indxg2p.o          \
+   HPL_indxl2g.o          HPL_infog2l.o          HPL_numroc.o           \
+   HPL_numrocI.o          HPL_dlaswp00N.o        HPL_dlaswp10N.o        \
+   HPL_dlaswp01N.o        HPL_dlaswp01T.o        HPL_dlaswp02N.o        \
+   HPL_dlaswp03N.o        HPL_dlaswp03T.o        HPL_dlaswp04N.o        \
+   HPL_dlaswp04T.o        HPL_dlaswp05N.o        HPL_dlaswp05T.o        \
+   HPL_dlaswp06N.o        HPL_dlaswp06T.o        HPL_pwarn.o            \
+   HPL_pabort.o           HPL_pdlaprnt.o         HPL_pdlamch.o          \
+   HPL_pdlange.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pauobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pauobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_indxg2l.o          : ../HPL_indxg2l.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2l.c
+HPL_indxg2lp.o         : ../HPL_indxg2lp.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2lp.c
+HPL_indxg2p.o          : ../HPL_indxg2p.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2p.c
+HPL_indxl2g.o          : ../HPL_indxl2g.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxl2g.c
+HPL_infog2l.o          : ../HPL_infog2l.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_infog2l.c
+HPL_numroc.o           : ../HPL_numroc.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_numroc.c
+HPL_numrocI.o          : ../HPL_numrocI.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_numrocI.c
+HPL_dlaswp00N.o        : ../HPL_dlaswp00N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp00N.c
+HPL_dlaswp10N.o        : ../HPL_dlaswp10N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp10N.c
+HPL_dlaswp01N.o        : ../HPL_dlaswp01N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp01N.c
+HPL_dlaswp01T.o        : ../HPL_dlaswp01T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp01T.c
+HPL_dlaswp02N.o        : ../HPL_dlaswp02N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp02N.c
+HPL_dlaswp03N.o        : ../HPL_dlaswp03N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp03N.c
+HPL_dlaswp03T.o        : ../HPL_dlaswp03T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp03T.c
+HPL_dlaswp04N.o        : ../HPL_dlaswp04N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp04N.c
+HPL_dlaswp04T.o        : ../HPL_dlaswp04T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp04T.c
+HPL_dlaswp05N.o        : ../HPL_dlaswp05N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp05N.c
+HPL_dlaswp05T.o        : ../HPL_dlaswp05T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp05T.c
+HPL_dlaswp06N.o        : ../HPL_dlaswp06N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp06N.c
+HPL_dlaswp06T.o        : ../HPL_dlaswp06T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp06T.c
+HPL_pwarn.o            : ../HPL_pwarn.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pwarn.c
+HPL_pabort.o           : ../HPL_pabort.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pabort.c
+HPL_pdlaprnt.o         : ../HPL_pdlaprnt.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaprnt.c
+HPL_pdlamch.o          : ../HPL_pdlamch.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlamch.c
+HPL_pdlange.o          : ../HPL_pdlange.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlange.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocmax.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocmax.c
new file mode 100644
index 000000000..644641412
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocmax.c
@@ -0,0 +1,149 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dlocmax
+(
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocmax
+( PANEL, N, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocmax finds  the maximum entry in the current column  and packs
+ * the useful information in  WORK[0:3].  On exit,  WORK[0] contains the
+ * local maximum  absolute value  scalar,  WORK[1] is the  corresponding
+ * local row index,  WORK[2]  is the corresponding global row index, and
+ * WORK[3] is the coordinate of the process owning this max.  When N  is
+ * less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set
+ * to the total number of process rows.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of rows of the column
+ *         of A on which we operate.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is  a workarray of size at least 4.  On exit,
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A;
+   int                        kk, igindx, ilindx, myrow, nb, nprow;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N > 0 )
+   {
+      A      = Mptr( PANEL->A, II, JJ, PANEL->lda );
+      myrow  = PANEL->grid->myrow;
+      nprow  = PANEL->grid->nprow;
+      nb     = PANEL->nb;
+      kk     = PANEL->ii + II + ( ilindx = HPL_idamax( N, A, 1 ) );
+      Mindxl2g( igindx, kk, nb, nb, myrow, 0, nprow );
+/*
+ * WORK[0] := local maximum absolute value scalar,
+ * WORK[1] := corresponding local  row index,
+ * WORK[2] := corresponding global row index,
+ * WORK[3] := coordinate of process owning this max.
+ */
+      WORK[0] = A[ilindx];         WORK[1] = (double)(ilindx);
+      WORK[2] = (double)(igindx);  WORK[3] = (double)(myrow);
+   }
+   else
+   {
+/*
+ * If I do not have any row of A, then set the coordinate of the process
+ * (WORK[3]) owning this "ghost" row,  such that it  will never be used,
+ * even if there are only zeros in the current column of A.
+ */
+      WORK[0] = WORK[1] = WORK[2] = HPL_rzero;
+      WORK[3] = (double)(PANEL->grid->nprow);
+   }
+/*
+ * End of HPL_dlocmax
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpN.c
new file mode 100644
index 000000000..a3919500a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpN.c
@@ -0,0 +1,436 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LOCSWP_DEPTH
+#define    HPL_LOCSWP_DEPTH        32
+#define    HPL_LOCSWP_LOG2_DEPTH    5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlocswpN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocswpN
+( PANEL, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocswpN performs  the local swapping operations  within a panel.
+ * The lower triangular  N0-by-N0  upper block of the panel is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.  The N0 length max
+ *         row is stored in WORK[4:4+N0-1];  Note  that this is also the
+ *         JJth row  (or column) of L1. The remaining part of this array
+ *         is used as workspace.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax;
+   double                     * A1, * A2, * L, * Wr0, * Wmx;
+   int                        ilindx, lda, myrow, n0, nr, nu;
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda;
+
+   Wr0   = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0];
+   nu    = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) 
+                  << HPL_LOCSWP_LOG2_DEPTH );
+   nr    = n0 - nu;
+/*
+ * Replicated swap and copy of the current (new) row of A into L1
+ */
+   L  = Mptr( PANEL->L1, JJ, 0, n0  );
+/*
+ * If the pivot is non-zero ...
+ */
+   if( gmax != HPL_rzero )
+   {
+/*
+ * and if I own the current row of A ...
+ */
+      if( myrow == PANEL->prow )
+      {
+/*
+ * and if I also own the row to be swapped with the current row of A ...
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+/*
+ * and if the current row of A is not to swapped with itself ...
+ */
+            if( ( ilindx = (int)(WORK[1]) ) != 0 )
+            {
+/*
+ * then copy the max row into L1 and locally swap the 2 rows of A.
+ */
+               A1 = Mptr( PANEL->A,  II,     0, lda );
+               A2 = Mptr( A1,        ilindx, 0, lda );
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH )
+               {
+                  *L=*A1=Wmx[ 0]; *A2=Wr0[ 0]; L+=n0; A1+=lda; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  *L=*A1=Wmx[ 1]; *A2=Wr0[ 1]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  *L=*A1=Wmx[ 2]; *A2=Wr0[ 2]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 3]; *A2=Wr0[ 3]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  *L=*A1=Wmx[ 4]; *A2=Wr0[ 4]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 5]; *A2=Wr0[ 5]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 6]; *A2=Wr0[ 6]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 7]; *A2=Wr0[ 7]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  *L=*A1=Wmx[ 8]; *A2=Wr0[ 8]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 9]; *A2=Wr0[ 9]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[10]; *A2=Wr0[10]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[11]; *A2=Wr0[11]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[12]; *A2=Wr0[12]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[13]; *A2=Wr0[13]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[14]; *A2=Wr0[14]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[15]; *A2=Wr0[15]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  *L=*A1=Wmx[16]; *A2=Wr0[16]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[17]; *A2=Wr0[17]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[18]; *A2=Wr0[18]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[19]; *A2=Wr0[19]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[20]; *A2=Wr0[20]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[21]; *A2=Wr0[21]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[22]; *A2=Wr0[22]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[23]; *A2=Wr0[23]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[24]; *A2=Wr0[24]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[25]; *A2=Wr0[25]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[26]; *A2=Wr0[26]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[27]; *A2=Wr0[27]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[28]; *A2=Wr0[28]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[29]; *A2=Wr0[29]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[30]; *A2=Wr0[30]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[31]; *A2=Wr0[31]; L+=n0; A1+=lda; A2+=lda;
+#endif
+               }
+               for( i = 0; i < nr; i++, L += n0, A1 += lda, A2 += lda )
+               { *L = *A1 = Wmx[i]; *A2 = Wr0[i]; }
+            }
+            else
+            {
+/*
+ * otherwise the current row of  A  is swapped with itself, so just copy
+ * the current of A into L1.
+ */
+               *Mptr( PANEL->A, II, JJ, lda ) = gmax;
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH )
+               {
+                  *L = Wmx[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  *L = Wmx[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0;
+                  *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0;
+                  *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0;
+                  *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0;
+                  *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0;
+                  *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0;
+                  *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0;
+                  *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0;
+                  *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0;
+                  *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0;
+                  *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0;
+                  *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0;
+#endif
+               }
+               for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; }
+            }
+         }
+         else
+         {
+/*
+ * otherwise, the row to be swapped with the current row of A is in Wmx,
+ * so copy Wmx into L1 and A.
+ */
+            A1 = Mptr( PANEL->A,  II, 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wmx += HPL_LOCSWP_DEPTH )
+            {
+               *L = *A1 = Wmx[ 0]; L += n0; A1 += lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *L = *A1 = Wmx[ 1]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *L = *A1 = Wmx[ 2]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 3]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *L = *A1 = Wmx[ 4]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 5]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 6]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 7]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *L = *A1 = Wmx[ 8]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 9]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[10]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[11]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[12]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[13]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[14]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[15]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *L = *A1 = Wmx[16]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[17]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[18]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[19]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[20]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[21]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[22]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[23]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[24]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[25]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[26]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[27]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[28]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[29]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[30]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[31]; L += n0; A1 += lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, L += n0, A1 += lda )
+            { *L = *A1 = Wmx[i]; }
+         }
+      }
+      else
+      {
+/*
+ * otherwise I do not own the current row of A, so copy the max row  Wmx
+ * into L1.
+ */
+         for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+              Wmx += HPL_LOCSWP_DEPTH )
+         {
+            *L = Wmx[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+            *L = Wmx[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+            *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+            *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0;
+            *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+            *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0;
+            *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0;
+            *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0;
+            *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+            *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0;
+            *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0;
+            *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0;
+            *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0;
+            *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0;
+            *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0;
+            *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0;
+            *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0;
+#endif
+         }
+         for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; }
+/*
+ * and if I own the max row, overwrite it with the current row Wr0.
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+            A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wr0 += HPL_LOCSWP_DEPTH )
+            {
+               *A2 = Wr0[ 0]; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *A2 = Wr0[ 1]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda;
+               *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda;
+               *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda;
+               *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda;
+               *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda;
+               *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda;
+               *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda;
+               *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda;
+               *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda;
+               *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda;
+               *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda;
+               *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; }
+         }
+      }
+   }
+   else
+   {
+/*
+ * Otherwise the max element in the current column is zero,  simply copy
+ * the current row Wr0 into L1. The matrix is singular.
+ */
+      for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+           Wr0 += HPL_LOCSWP_DEPTH )
+      {
+         *L = Wr0[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+         *L = Wr0[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+         *L = Wr0[ 2]; L+=n0; *L = Wr0[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+         *L = Wr0[ 4]; L+=n0; *L = Wr0[ 5]; L+=n0;
+         *L = Wr0[ 6]; L+=n0; *L = Wr0[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+         *L = Wr0[ 8]; L+=n0; *L = Wr0[ 9]; L+=n0;
+         *L = Wr0[10]; L+=n0; *L = Wr0[11]; L+=n0;
+         *L = Wr0[12]; L+=n0; *L = Wr0[13]; L+=n0;
+         *L = Wr0[14]; L+=n0; *L = Wr0[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+         *L = Wr0[16]; L+=n0; *L = Wr0[17]; L+=n0;
+         *L = Wr0[18]; L+=n0; *L = Wr0[19]; L+=n0;
+         *L = Wr0[20]; L+=n0; *L = Wr0[21]; L+=n0;
+         *L = Wr0[22]; L+=n0; *L = Wr0[23]; L+=n0;
+         *L = Wr0[24]; L+=n0; *L = Wr0[25]; L+=n0;
+         *L = Wr0[26]; L+=n0; *L = Wr0[27]; L+=n0;
+         *L = Wr0[28]; L+=n0; *L = Wr0[29]; L+=n0;
+         *L = Wr0[30]; L+=n0; *L = Wr0[31]; L+=n0;
+#endif
+      }
+
+      for( i = 0; i < nr; i++, L += n0 ) { *L = Wr0[i]; }
+/*
+ * set INFO.
+ */
+      if( *(PANEL->DINFO) == 0.0 )
+         *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1);
+   }
+/*
+ * End of HPL_dlocswpN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpT.c
new file mode 100644
index 000000000..89b86e35a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpT.c
@@ -0,0 +1,406 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LOCSWP_DEPTH
+#define    HPL_LOCSWP_DEPTH        32
+#define    HPL_LOCSWP_LOG2_DEPTH    5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlocswpT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocswpT
+( PANEL, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocswpT performs  the local swapping operations  within a panel.
+ * The lower triangular  N0-by-N0  upper block of the panel is stored in
+ * transpose form.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.  The N0 length max
+ *         row is stored in WORK[4:4+N0-1];  Note  that this is also the
+ *         JJth row  (or column) of L1. The remaining part of this array
+ *         is used as workspace.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax;
+   double                     * A1, * A2, * L, * Wr0, * Wmx;
+   int                        ilindx, lda, myrow, n0, nr, nu;
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda;
+
+   Wr0   = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0];
+   nu    = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) 
+                  << HPL_LOCSWP_LOG2_DEPTH );
+   nr    = n0 - nu;
+/*
+ * Replicated swap and copy of the current (new) row of A into L1
+ */
+   L  = Mptr( PANEL->L1, 0, JJ, n0  );
+/*
+ * If the pivot is non-zero ...
+ */
+   if( gmax != HPL_rzero )
+   {
+/*
+ * and if I own the current row of A ...
+ */
+      if( myrow == PANEL->prow )
+      {
+/*
+ * and if I also own the row to be swapped with the current row of A ...
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+/*
+ * and if the current row of A is not to swapped with itself ...
+ */
+            if( ( ilindx = (int)(WORK[1]) ) != 0 )
+            {
+/*
+ * then copy the max row into L1 and locally swap the 2 rows of A.
+ */
+               A1 = Mptr( PANEL->A, II,     0, lda );
+               A2 = Mptr( A1,       ilindx, 0, lda );
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH,
+                    L   += HPL_LOCSWP_DEPTH )
+               {
+                  L[ 0]=*A1=Wmx[ 0]; *A2=Wr0[ 0]; A1+=lda; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  L[ 1]=*A1=Wmx[ 1]; *A2=Wr0[ 1]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  L[ 2]=*A1=Wmx[ 2]; *A2=Wr0[ 2]; A1+=lda; A2+=lda;
+                  L[ 3]=*A1=Wmx[ 3]; *A2=Wr0[ 3]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  L[ 4]=*A1=Wmx[ 4]; *A2=Wr0[ 4]; A1+=lda; A2+=lda;
+                  L[ 5]=*A1=Wmx[ 5]; *A2=Wr0[ 5]; A1+=lda; A2+=lda;
+                  L[ 6]=*A1=Wmx[ 6]; *A2=Wr0[ 6]; A1+=lda; A2+=lda;
+                  L[ 7]=*A1=Wmx[ 7]; *A2=Wr0[ 7]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  L[ 8]=*A1=Wmx[ 8]; *A2=Wr0[ 8]; A1+=lda; A2+=lda;
+                  L[ 9]=*A1=Wmx[ 9]; *A2=Wr0[ 9]; A1+=lda; A2+=lda;
+                  L[10]=*A1=Wmx[10]; *A2=Wr0[10]; A1+=lda; A2+=lda;
+                  L[11]=*A1=Wmx[11]; *A2=Wr0[11]; A1+=lda; A2+=lda;
+                  L[12]=*A1=Wmx[12]; *A2=Wr0[12]; A1+=lda; A2+=lda;
+                  L[13]=*A1=Wmx[13]; *A2=Wr0[13]; A1+=lda; A2+=lda;
+                  L[14]=*A1=Wmx[14]; *A2=Wr0[14]; A1+=lda; A2+=lda;
+                  L[15]=*A1=Wmx[15]; *A2=Wr0[15]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  L[16]=*A1=Wmx[16]; *A2=Wr0[16]; A1+=lda; A2+=lda;
+                  L[17]=*A1=Wmx[17]; *A2=Wr0[17]; A1+=lda; A2+=lda;
+                  L[18]=*A1=Wmx[18]; *A2=Wr0[18]; A1+=lda; A2+=lda;
+                  L[19]=*A1=Wmx[19]; *A2=Wr0[19]; A1+=lda; A2+=lda;
+                  L[20]=*A1=Wmx[20]; *A2=Wr0[20]; A1+=lda; A2+=lda;
+                  L[21]=*A1=Wmx[21]; *A2=Wr0[21]; A1+=lda; A2+=lda;
+                  L[22]=*A1=Wmx[22]; *A2=Wr0[22]; A1+=lda; A2+=lda;
+                  L[23]=*A1=Wmx[23]; *A2=Wr0[23]; A1+=lda; A2+=lda;
+                  L[24]=*A1=Wmx[24]; *A2=Wr0[24]; A1+=lda; A2+=lda;
+                  L[25]=*A1=Wmx[25]; *A2=Wr0[25]; A1+=lda; A2+=lda;
+                  L[26]=*A1=Wmx[26]; *A2=Wr0[26]; A1+=lda; A2+=lda;
+                  L[27]=*A1=Wmx[27]; *A2=Wr0[27]; A1+=lda; A2+=lda;
+                  L[28]=*A1=Wmx[28]; *A2=Wr0[28]; A1+=lda; A2+=lda;
+                  L[29]=*A1=Wmx[29]; *A2=Wr0[29]; A1+=lda; A2+=lda;
+                  L[30]=*A1=Wmx[30]; *A2=Wr0[30]; A1+=lda; A2+=lda;
+                  L[31]=*A1=Wmx[31]; *A2=Wr0[31]; A1+=lda; A2+=lda;
+#endif
+               }
+
+               for( i = 0; i < nr; i++, A1 += lda, A2 += lda )
+               { L[i] = *A1 = Wmx[i]; *A2 = Wr0[i]; }
+            }
+            else
+            {
+/*
+ * otherwise the current row of  A  is swapped with itself, so just copy
+ * the current of A into L1.
+ */
+               *Mptr( PANEL->A, II, JJ, lda ) = gmax;
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+               {
+                  L[ 0]=Wmx[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  L[ 1]=Wmx[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5];
+                  L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  L[ 8]=Wmx[ 8]; L[12]=Wmx[12];
+                  L[ 9]=Wmx[ 9]; L[13]=Wmx[13];
+                  L[10]=Wmx[10]; L[14]=Wmx[14];
+                  L[11]=Wmx[11]; L[15]=Wmx[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  L[16]=Wmx[16]; L[20]=Wmx[20];
+                  L[17]=Wmx[17]; L[21]=Wmx[21];
+                  L[18]=Wmx[18]; L[22]=Wmx[22];
+                  L[19]=Wmx[19]; L[23]=Wmx[23];
+                  L[24]=Wmx[24]; L[28]=Wmx[28];
+                  L[25]=Wmx[25]; L[29]=Wmx[29];
+                  L[26]=Wmx[26]; L[30]=Wmx[30];
+                  L[27]=Wmx[27]; L[31]=Wmx[31];
+#endif
+               }
+               for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; }
+            }
+         }
+         else
+         {
+/*
+ * otherwise, the row to be swapped with the current row of A is in Wmx,
+ * so copy Wmx into L1 and A.
+ */
+            A1 = Mptr( PANEL->A, II, 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+            {
+               L[ 0]=*A1=Wmx[ 0]; A1+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               L[ 1]=*A1=Wmx[ 1]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               L[ 2]=*A1=Wmx[ 2]; A1+=lda; L[ 3]=*A1=Wmx[ 3]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               L[ 4]=*A1=Wmx[ 4]; A1+=lda; L[ 5]=*A1=Wmx[ 5]; A1+=lda;
+               L[ 6]=*A1=Wmx[ 6]; A1+=lda; L[ 7]=*A1=Wmx[ 7]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               L[ 8]=*A1=Wmx[ 8]; A1+=lda; L[ 9]=*A1=Wmx[ 9]; A1+=lda;
+               L[10]=*A1=Wmx[10]; A1+=lda; L[11]=*A1=Wmx[11]; A1+=lda;
+               L[12]=*A1=Wmx[12]; A1+=lda; L[13]=*A1=Wmx[13]; A1+=lda;
+               L[14]=*A1=Wmx[14]; A1+=lda; L[15]=*A1=Wmx[15]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               L[16]=*A1=Wmx[16]; A1+=lda; L[17]=*A1=Wmx[17]; A1+=lda;
+               L[18]=*A1=Wmx[18]; A1+=lda; L[19]=*A1=Wmx[19]; A1+=lda;
+               L[20]=*A1=Wmx[20]; A1+=lda; L[21]=*A1=Wmx[21]; A1+=lda;
+               L[22]=*A1=Wmx[22]; A1+=lda; L[23]=*A1=Wmx[23]; A1+=lda;
+               L[24]=*A1=Wmx[24]; A1+=lda; L[25]=*A1=Wmx[25]; A1+=lda;
+               L[26]=*A1=Wmx[26]; A1+=lda; L[27]=*A1=Wmx[27]; A1+=lda;
+               L[28]=*A1=Wmx[28]; A1+=lda; L[29]=*A1=Wmx[29]; A1+=lda;
+               L[30]=*A1=Wmx[30]; A1+=lda; L[31]=*A1=Wmx[31]; A1+=lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, A1 += lda ) { L[i]=*A1=Wmx[i]; } 
+         }
+      }
+      else
+      {
+/*
+ * otherwise I do not own the current row of A, so copy the max row  Wmx
+ * into L1.
+ */
+         for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+              Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+         {
+            L[ 0]=Wmx[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+            L[ 1]=Wmx[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+            L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+            L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+            L[ 8]=Wmx[ 8]; L[ 9]=Wmx[ 9]; L[10]=Wmx[10]; L[11]=Wmx[11];
+            L[12]=Wmx[12]; L[13]=Wmx[13]; L[14]=Wmx[14]; L[15]=Wmx[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+            L[16]=Wmx[16]; L[17]=Wmx[17]; L[18]=Wmx[18]; L[19]=Wmx[19];
+            L[20]=Wmx[20]; L[21]=Wmx[21]; L[22]=Wmx[22]; L[23]=Wmx[23];
+            L[24]=Wmx[24]; L[25]=Wmx[25]; L[26]=Wmx[26]; L[27]=Wmx[27];
+            L[28]=Wmx[28]; L[29]=Wmx[29]; L[30]=Wmx[30]; L[31]=Wmx[31];
+#endif
+         }
+         for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; }
+/*
+ * and if I own the max row, overwrite it with the current row Wr0.
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+            A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wr0 += HPL_LOCSWP_DEPTH )
+            {
+               *A2 = Wr0[ 0]; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *A2 = Wr0[ 1]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda;
+               *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda;
+               *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda;
+               *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda;
+               *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda;
+               *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda;
+               *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda;
+               *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda;
+               *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda;
+               *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda;
+               *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda;
+               *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda;
+#endif
+            }
+            for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; }
+         }
+      }
+   }
+   else
+   {
+/*
+ * Otherwise the max element in the current column is zero,  simply copy
+ * the current row Wr0 into L1. The matrix is singular.
+ */
+      for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+           Wr0 += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+      {
+         L[ 0]=Wr0[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+         L[ 1]=Wr0[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+         L[ 2]=Wr0[ 2]; L[ 3]=Wr0[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+         L[ 4]=Wr0[ 4]; L[ 5]=Wr0[ 5]; L[ 6]=Wr0[ 6]; L[ 7]=Wr0[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+         L[ 8]=Wr0[ 8]; L[12]=Wr0[12]; L[ 9]=Wr0[ 9]; L[13]=Wr0[13];
+         L[10]=Wr0[10]; L[14]=Wr0[14]; L[11]=Wr0[11]; L[15]=Wr0[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+         L[16]=Wr0[16]; L[20]=Wr0[20]; L[17]=Wr0[17]; L[21]=Wr0[21];
+         L[18]=Wr0[18]; L[22]=Wr0[22]; L[19]=Wr0[19]; L[23]=Wr0[23];
+         L[24]=Wr0[24]; L[28]=Wr0[28]; L[25]=Wr0[25]; L[29]=Wr0[29];
+         L[26]=Wr0[26]; L[30]=Wr0[30]; L[27]=Wr0[27]; L[31]=Wr0[31];
+#endif
+      }
+      for( i = 0; i < nr; i++ ) { L[i] = Wr0[i]; }
+/*
+ * Set INFO.
+ */
+      if( *(PANEL->DINFO) == 0.0 )
+         *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1);
+   }
+/*
+ * End of HPL_dlocswpT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdfact.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdfact.c
new file mode 100644
index 000000000..1d99c6e14
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdfact.c
@@ -0,0 +1,141 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdfact
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_pdfact
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdfact recursively factorizes a  1-dimensional  panel of columns.
+ * The  RPFACT  function pointer specifies the recursive algorithm to be
+ * used, either Crout, Left- or Right looking.  NBMIN allows to vary the
+ * recursive stopping criterium in terms of the number of columns in the
+ * panel, and  NDIV allows to specify the number of subpanels each panel
+ * should be divided into. Usuallly a value of 2 will be chosen. Finally
+ * PFACT is a function pointer specifying the non-recursive algorithm to
+ * to be used on at most NBMIN columns. One can also choose here between
+ * Crout, Left- or Right looking.  Empirical tests seem to indicate that
+ * values of 4 or 8 for NBMIN give the best results.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   void                       * vptr = NULL;
+   int                        align, jb;
+/* ..
+ * .. Executable Statements ..
+ */
+   jb = PANEL->jb; PANEL->n -= jb; PANEL->ja += jb;
+
+   if( ( PANEL->grid->mycol != PANEL->pcol ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_RPFACT );
+#endif
+   align = PANEL->algo->align;
+   vptr  = (void *)malloc( ( (size_t)(align) + 
+              (size_t)(((4+((unsigned int)(jb) << 1)) << 1) )) *
+              sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdfact", "Memory allocation failed" ); }
+/*
+ * Factor the panel - Update the panel pointers
+ */
+   PANEL->algo->rffun( PANEL, PANEL->mp, jb, 0, (double *)HPL_PTR( vptr,
+                       ((size_t)(align) * sizeof(double) ) ) );
+   if( vptr ) free( vptr );
+
+   PANEL->A   = Mptr( PANEL->A, 0, jb, PANEL->lda );
+   PANEL->nq -= jb; PANEL->jj += jb;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_RPFACT );
+#endif
+/*
+ * End of HPL_pdfact
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdmxswp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdmxswp.c
new file mode 100644
index 000000000..b14452197
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdmxswp.c
@@ -0,0 +1,311 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdmxswp
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_pdmxswp
+( PANEL, M, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdmxswp swaps  and  broadcasts  the  absolute value max row using
+ * bi-directional exchange.  The buffer is partially set by HPL_dlocmax.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by
+ *  
+ *    log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth )
+ *  
+ * where  lat and bdwth are the latency and bandwidth of the network for
+ * double precision real elements.  Communication  only  occurs  in  one
+ * process  column. Mono-directional links  will cause the communication
+ * cost to double.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of the matrix
+ *         column on which this function operates.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         It  is assumed that  HPL_dlocmax  was called  prior  to  this
+ *         routine to  initialize  the first four entries of this array.
+ *         On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+ *         Note that this is also the  JJth  row  (or column) of L1. The
+ *         remaining part is used as a temporary array.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax, tmp1;
+   double                     * A0, * Wmx, * Wwork;
+   HPL_T_grid                 * grid;
+   MPI_Comm                   comm;
+   unsigned int               hdim, ip2, ip2_, ipow, k, mask;
+   int                        Np2, cnt_, cnt0, i, icurrow, lda, mydist,
+                              mydis_, myrow, n0, nprow, partner, rcnt,
+                              root, scnt, size_;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_MXSWP );
+#endif
+   grid = PANEL->grid; myrow = grid->myrow; nprow = grid->nprow;
+/*
+ * ip2  : the smallest power of two less than or equal to nprow;
+ * hdim : dimension of the hypercube made of those ip2 processes;
+ * Np2  : logical flag indicating whether or not nprow is a power of 2;
+ */
+   comm    = grid->col_comm; ip2 = (unsigned int)(grid->row_ip2);
+   hdim    = (unsigned int)(grid->row_hdim);     n0  = PANEL->jb;
+   icurrow = PANEL->prow; Np2 = (int)( ( size_ = nprow - ip2 ) != 0 );
+   mydist  = MModSub( myrow, icurrow, nprow );
+/*
+ * Set up pointers in workspace:  WORK and Wwork  point to the beginning
+ * of the buffers of size 4 + 2*N0 to be combined. Wmx points to the row
+ * owning the local (before combine) and global (after combine) absolute
+ * value max. A0 points to the copy of the current row of the matrix.
+ */
+   cnt0  = ( cnt_ = n0 + 4 ) + n0; A0 = ( Wmx = WORK + 4 ) + n0;
+   Wwork = WORK + cnt0;
+/*
+ * Wmx[0:N0-1] := A[ilindx,0:N0-1] where ilindx is  (int)(WORK[1])  (row
+ * with max in current column). If I am the current process row, pack in
+ * addition the current row of A in A0[0:N0-1].  If I do not own any row
+ * of A, then zero out Wmx[0:N0-1].
+ */
+   if( M > 0 )
+   {
+      lda = PANEL->lda;
+      HPL_dcopy( n0, Mptr( PANEL->A, II+(int)(WORK[1]), 0, lda ), lda,
+                 Wmx, 1 );
+      if( myrow == icurrow )
+      { HPL_dcopy( n0, Mptr( PANEL->A, II, 0, lda ), lda, A0, 1 ); }
+   }
+   else { for( i = 0; i < n0; i++ ) Wmx[i] = HPL_rzero; }
+/*
+ * Combine the results (bi-directional exchange):  the process coordina-
+ * tes are relative to icurrow,  this allows to reduce the communication
+ * volume when nprow is not a power of 2.
+ *
+ * When nprow is not a power of 2:  proc[i-ip2] receives local data from
+ * proc[i]  for all i in [ip2..nprow).  In addition,  proc[0]  (icurrow)
+ * sends to proc[ip2] the current row of A  for later broadcast in procs
+ * [ip2..nprow).
+ */
+   if( ( Np2 != 0 ) &&
+       ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) )
+   {
+      if( ( mydist & ip2 ) != 0 )
+      {
+         if( mydist == (int)(ip2) )
+            (void) HPL_sdrv( WORK, cnt_, MSGID_BEGIN_PFACT, A0, n0,
+                             MSGID_BEGIN_PFACT, MModAdd( partner,
+                             icurrow, nprow ), comm );
+         else
+            (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow,
+                             nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+      else
+      {
+         if( mydist == 0 )
+            (void) HPL_sdrv( A0, n0, MSGID_BEGIN_PFACT, Wwork, cnt_,
+                             MSGID_BEGIN_PFACT, MModAdd( partner,
+                             icurrow, nprow ), comm );
+         else
+            (void) HPL_recv( Wwork, cnt_, MModAdd( partner, icurrow,
+                             nprow ), MSGID_BEGIN_PFACT, comm );
+ 
+         tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] );
+         if( ( tmp1 > gmax ) ||
+             ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) )
+         { HPL_dcopy( cnt_, Wwork, 1, WORK, 1 ); }
+      }
+   }
+
+   if( mydist < (int)(ip2) )
+   {
+/*
+ * power of 2 part of the processes collection: processes  [0..ip2)  are
+ * combining (binary exchange); proc[0] has two rows to send, but one to
+ * receive.  At every step  k  in [0..hdim) of the algorithm,  a process 
+ * pair exchanging 2 rows is such that  myrow >> k+1 is 0.  Among  those
+ * processes the ones  that are sending one more row than  what they are
+ * receiving are such that myrow >> k is equal to 0.
+ */
+      k = 0; ipow = 1;
+ 
+      while( k < hdim )
+      {
+         if( ( (unsigned int)(mydist) >> ( k + 1 ) ) == 0 )
+         {
+            if( ( (unsigned int)(mydist) >> k ) == 0 )
+            { scnt = cnt0; rcnt = cnt_; }
+            else
+            { scnt = cnt_; rcnt = cnt0; }
+         }
+         else { scnt = rcnt = cnt_; }
+ 
+         partner = (int)( (unsigned int)(mydist) ^ ipow );
+         (void) HPL_sdrv( WORK, scnt, MSGID_BEGIN_PFACT, Wwork, rcnt,
+                          MSGID_BEGIN_PFACT, MModAdd( partner, icurrow,
+                          nprow ), comm );
+ 
+         tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] );
+         if( ( tmp1 > gmax ) ||
+             ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) )
+         {
+            HPL_dcopy( ( rcnt == cnt0 ? cnt0 : cnt_ ), Wwork, 1,
+                       WORK, 1 );
+         }
+         else if( rcnt == cnt0 )
+         { HPL_dcopy( n0, Wwork+cnt_, 1, A0, 1 ); }
+ 
+         ipow <<= 1; k++;
+      }
+   }
+   else if( size_ > 1 )
+   {
+/*
+ * proc[ip2] broadcast current row of A to procs [ip2+1..nprow).
+ */
+      k = (unsigned int)(size_) - 1; ip2_ = mask = 1;
+      while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+ 
+      root   = MModAdd( icurrow, (int)(ip2), nprow );
+      mydis_ = MModSub( myrow,   root,       nprow );
+ 
+      do
+      {
+         mask ^= ip2_;
+         if( ( mydis_ & mask ) == 0 )
+         {
+            partner = (int)(mydis_ ^ ip2_);
+            if( ( mydis_ & ip2_ ) != 0 )
+            {
+               (void) HPL_recv( A0, n0, MModAdd( root, partner,
+                                nprow ), MSGID_BEGIN_PFACT, comm );
+            }
+            else if( partner < size_ )
+            {
+               (void) HPL_send( A0, n0, MModAdd( root, partner,
+                                nprow ), MSGID_BEGIN_PFACT, comm );
+            }
+         }
+         ip2_ >>= 1;
+      } while( ip2_ > 0 );
+   }
+/*
+ * If nprow is not a power of 2,  for all i in [ip2..nprow), proc[i-ip2]
+ * sends the pivot row to proc[i]  along  with the first four entries of
+ * the WORK array.
+ */
+   if( ( Np2 != 0 ) &&
+       ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) )
+   {
+      if( ( mydist & ip2 ) != 0 )
+      {
+         (void) HPL_recv( WORK, cnt_, MModAdd( partner, icurrow,
+                          nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+      else
+      {
+         (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow,
+                          nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+   }
+/*
+ * Save the global pivot index in pivot array
+ */
+   (PANEL->DPIV)[JJ] = WORK[2];
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_MXSWP );
+#endif
+/*
+ * End of HPL_pdmxswp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrN.c
new file mode 100644
index 000000000..4ea170b73
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrN.c
@@ -0,0 +1,270 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpancrN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpancrN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpancrN factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel  A using the Crout variant of the  usual
+ * one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+ * of the panel is stored in no-transpose form (i.e. just like the input
+ * matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and gam2-3 is  an  estimate  of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+/*
+ * Compute row (column) jj of L1
+ */
+      if( kk > 0 )
+      {
+         L1ptr = Mptr( L1, jj, jj+1, n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Xv0, ICOFF, jj+1,  kk, Nm1 );
+         Xv1 = vsip_msubview_d( Xv0, jj,    ICOFF, 1,  kk  );
+         Yv1 = vsip_msubview_d( Xv0, jj,    jj+1,  1,  Nm1 );
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Av1, VSIP_MAT_NTRANS,
+                      HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 ); 
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dgemv( HplColumnMajor, HplTrans, kk, Nm1, -HPL_rone,
+                    Mptr( L1, ICOFF, jj+1, n0 ), n0, Mptr( L1, jj,
+                    ICOFF, n0 ), n0, HPL_rone, L1ptr, n0 );
+#endif
+         if( curr != 0 )
+            HPL_dcopy( Nm1, L1ptr, n0, Mptr( A, ii, jj+1, lda ), lda );
+      }
+/*
+ * Scale current column by its absolute value max entry  -  Update  dia-
+ * diagonal and subdiagonal elements in column  A(iip1:iip1+Mm1-1, jj+1)
+ * and  find local  absolute value max in  that column  (Only  one  pass
+ * through cache for each current column).  This sequence of  operations
+ * could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 );
+      Xv1 = vsip_msubview_d( Xv0, ICOFF,          jj+1,            kk+1,   1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,    1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      vsip_mdestroy_d( Yv1 );
+      vsip_mdestroy_d( Xv1 );
+      vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, ICOFF,
+                 jj+1, n0 ), 1, HPL_rone, Mptr( A, iip1, jj+1, lda ),
+                 1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++; kk++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpancrN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrT.c
new file mode 100644
index 000000000..50ed300aa
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrT.c
@@ -0,0 +1,267 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpancrT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpancrT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpancrT factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel  A using the Crout variant of the  usual
+ * one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+ * of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is an  estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+/*
+ * Compute row (column) jj of L1
+ */
+      if( kk > 0 )
+      {
+         L1ptr = Mptr( L1, jj+1, jj, n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Xv0, jj+1,  ICOFF, Nm1, kk );
+         Xv1 = vsip_msubview_d( Xv0, ICOFF, jj,    kk,   1 );
+         Yv1 = vsip_msubview_d( Xv0, jj+1,  jj,    Nm1,  1 );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dgemv( HplColumnMajor, HplNoTrans, Nm1, kk, -HPL_rone,
+                    Mptr( L1, jj+1, ICOFF, n0 ), n0, Mptr( L1, ICOFF,
+                    jj, n0 ), 1, HPL_rone, L1ptr, 1 );
+#endif
+         if( curr != 0 )
+            HPL_dcopy( Nm1, L1ptr, 1, Mptr( A, ii, jj+1, lda ), lda );
+      }
+/*
+ * Scale current column by its absolute value max entry  -  Update  dia-
+ * diagonal and subdiagonal elements in column  A(iip1:iip1+Mm1-1, jj+1)
+ * and  find local  absolute value max in  that column  (Only  one  pass
+ * through cache for each current column).  This sequence of  operations
+ * could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 );
+      Xv1 = vsip_msubview_d( Xv0, jj+1,           ICOFF,           1,   kk+1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,    1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, jj+1, ICOFF,
+                 n0 ), n0, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++; kk++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpancrT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllN.c
new file mode 100644
index 000000000..fa471198d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllN.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanllN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanllN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanllN factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel A  using the Left-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in no-transpose form (i.e. just like the
+ * input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1 = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column and initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+
+      L1ptr = Mptr( L1, ICOFF, jj+1, n0 ); kk = jj + 1 - ICOFF;
+      HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, HplUnit, kk, 
+                 Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr,  1 );
+/*
+ * Scale  current column by its absolute value max entry  -  Update  and 
+ * find local  absolute value max  in next column (Only one pass through 
+ * cache for each next column).  This sequence of operations could bene-
+ * fit from a specialized  blocked implementation.
+ */ 
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk );
+      Xv1 = vsip_msubview_d( Xv0, ICOFF,        jj+1,              kk,   1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,  1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk,  -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, 1,
+                 HPL_rone, Mptr( A, iip1, jj+1, lda ),  1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 )
+      {
+         HPL_dcopy( kk, L1ptr,  1, Mptr( A, ICOFF, jj+1, lda ), 1 );
+         ii = iip1; iip1++; m = Mm1; Mm1--;
+      }
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanllN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllT.c
new file mode 100644
index 000000000..a6e1b67bd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllT.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanllT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanllT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanllT factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel A  using the Left-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1 = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column and initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+
+      L1ptr = Mptr( L1, jj+1, ICOFF, n0 ); kk = jj + 1 - ICOFF;
+      HPL_dtrsv( HplColumnMajor, HplUpper, HplTrans,   HplUnit, kk,
+                 Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, n0 );
+/*
+ * Scale  current column by its absolute value max entry  -  Update  and 
+ * find local  absolute value max  in next column (Only one pass through 
+ * cache for each next column).  This sequence of operations could bene-
+ * fit from a specialized  blocked implementation.
+ */ 
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk );
+      Xv1 = vsip_msubview_d( Xv0, jj+1,         ICOFF,             1,   kk );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,  1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk,  -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, n0,
+                 HPL_rone, Mptr( A, iip1, jj+1, lda ),  1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 )
+      {
+         HPL_dcopy( kk, L1ptr, n0, Mptr( A, ICOFF, jj+1, lda ), 1 );
+         ii = iip1; iip1++; m = Mm1; Mm1--;
+      }
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanllT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlN.c
new file mode 100644
index 000000000..0a3b9a542
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlN.c
@@ -0,0 +1,250 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanrlN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanrlN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanrlN factorizes  a panel of columns  that is a sub-array of a
+ * larger one-dimensional panel A using the Right-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in no-transpose form (i.e. just like the
+ * input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Acur, * Anxt;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Xv1, * Yv0, * Yv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, lda, m=M;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 >= 1 )
+   {
+      Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda );
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+/*
+ * Scale current column by its absolute value max entry  -  Update trai-
+ * ling sub-matrix and find local absolute value max in next column (On-
+ * ly one pass through cache for each current column).  This sequence of
+ * operations could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 );
+      HPL_daxpy( Mm1, -WORK[4+jj+1], Acur, 1, Anxt, 1 );
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+#ifdef HPL_CALL_VSIPL
+      if( Nm1 > 1 )
+      {
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2,
+                                Mm1, Nm1-1 );
+         Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj,
+                                Mm1, 1   );
+         Yv1 = vsip_msubview_d( Yv0, jj, jj+2, 1, Nm1-1 );
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+      }
+#else
+      if( Nm1 > 1 )
+         HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+                   WORK+4+jj+2, 1, Mptr( Anxt, 0, 1, lda ), lda );
+#endif
+/*
+ * Same thing as above but with worse data access on y (A += x * y^T)
+ *
+ *    if( Nm1 > 1 ) )
+ *       HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+ *                 Mptr( L1, jj, jj+2, n0 ), n0, Mptr( Anxt, 0, 1, lda ),
+ *                 lda );
+ */  
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Yv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanrlN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlT.c
new file mode 100644
index 000000000..68c1afc02
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlT.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanrlT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanrlT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanrlT factorizes  a panel of columns  that is a sub-array of a
+ * larger one-dimensional panel A using the Right-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Acur, * Anxt, * L1;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Xv1, * Yv0, * Yv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, lda, m=M,
+                              n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 >= 1 )
+   {
+      Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda );
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+/*
+ * Scale current column by its absolute value max entry  -  Update trai-
+ * ling sub-matrix and find local absolute value max in next column (On-
+ * ly one pass through cache for each current column).  This sequence of
+ * operations could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 );
+      HPL_daxpy( Mm1, -(*(Mptr( L1, jj+1, jj, n0 ))), Acur, 1, Anxt, 1 );
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+
+      if( Nm1 > 1 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2,
+                                Mm1, Nm1-1 );
+         Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj,
+                                Mm1, 1   );
+         Yv1 = vsip_msubview_d( Yv0, jj+2, jj, Nm1-1, 1 ); 
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_TRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+                   Mptr( L1, jj+2, jj, n0 ), 1, Mptr( Anxt, 0, 1, lda ),
+                   lda );
+#endif
+      }
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Yv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanrlT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrN.c
new file mode 100644
index 000000000..348d7ebe6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrN.c
@@ -0,0 +1,282 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpancrN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpancrN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpancrN HPL_pdrpancrN recursively  factorizes  a panel of columns  using  the
+ * recursive  Crout  variant of the usual one-dimensional algorithm. The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Local update - Factor current panel - Replicated update and solve
+ */
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jb );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jb );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, jj,
+                 -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr,
+                 0, jj, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ),
+                 lda );
+#endif
+      HPL_pdrpancrN( PANEL, m, jb, ioff, WORK );
+
+      if( n > 0 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+         (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+         Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0 );
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Lv0, ioff,  ICOFF,   jb, jj );
+         Av2 = vsip_msubview_d( Lv0, ioff,  ioff+jb, jb,  n );
+         Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff+jb, jj,  n );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Lv1 );
+         (void) vsip_mdestroy_d( Av2 );
+         (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+         (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+         (void) vsip_mdestroy_d( Lv0 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, jb, n,
+                    jj, -HPL_rone, Mptr( L1ptr, jj, 0, n0 ), n0,
+                    Mptr( L1ptr, 0, jj+jb, n0 ), n0, HPL_rone, 
+                    Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj,
+                    n0 ), n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+      }
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpancrN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrT.c
new file mode 100644
index 000000000..a1ecfac2c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrT.c
@@ -0,0 +1,282 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpancrT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpancrT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpancrT recursively  factorizes  a panel  of columns using  the
+ * recursive  Crout  variant  of  the  usual one-dimensional  algorithm.
+ * The lower triangular N0-by-N0  upper block of the panel  is stored in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Local update - Factor current panel - Replicated update and solve
+ */
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jb );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1,
+                   VSIP_MAT_TRANS, HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, jj,
+                 -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr,
+                 jj, 0, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ),
+                 lda );
+#endif
+      HPL_pdrpancrT( PANEL, m, jb, ioff, WORK );
+
+      if( n > 0 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+         (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+         Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 );
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Lv0, ioff+jb, ICOFF, n, jj );
+         Av2 = vsip_msubview_d( Lv0, ioff+jb, ioff,  n, jb );
+         Lv1 = vsip_msubview_d( Lv0, ICOFF,   ioff, jj, jb );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1,
+                      VSIP_MAT_NTRANS, HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Lv1 );
+         (void) vsip_mdestroy_d( Av2 );
+         (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+         (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+         (void) vsip_mdestroy_d( Lv0 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, n, jb,
+                    jj, -HPL_rone, Mptr( L1ptr, jj+jb, 0, n0 ), n0,
+                    Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone,
+                    Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj,
+                    n0 ), n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+      }
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpancrT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllN.c
new file mode 100644
index 000000000..4dbc13b44
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllN.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanllN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanllN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanllN recursively  factorizes  a panel  of columns using  the
+ * recursive Left-looking variant of the one-dimensional algorithm.  The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Replicated solve - Local update - Factor current panel
+ */
+      HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit,
+                 jj, jb, HPL_rone, L1ptr, n0, Mptr( L1ptr, 0, jj, n0 ),
+                 n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jj );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m,  jj );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb,
+                 jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda,
+                 Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj, lda ), lda );
+#endif
+      HPL_pdrpanllN( PANEL, m, jb, ioff, WORK );
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanllN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllT.c
new file mode 100644
index 000000000..887caeb87
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllT.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanllT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanllT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanllT recursively  factorizes  a panel of columns  using  the
+ * recursive Left-looking variant of the one-dimensional algorithm.  The
+ * lower  triangular  N0-by-N0  upper block  of  the panel  is stored in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Replicated solve - Local update - Factor current panel
+ */
+      HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                 HplUnit, jb, jj, HPL_rone, L1ptr, n0, Mptr( L1ptr,
+                 jj, 0, n0 ), n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jj );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jj );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb,  jj );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av1 );
+      (void) vsip_mdestroy_d( Av2 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb,
+                 jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda,
+                 Mptr( L1ptr, jj, 0, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj, lda ), lda );
+#endif
+      HPL_pdrpanllT( PANEL, m, jb, ioff, WORK );
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanllT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlN.c
new file mode 100644
index 000000000..22f105cf4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlN.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanrlN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanrlN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanrlN recursively  factorizes  a panel of columns  using  the
+ * recursive Right-looking variant of the one-dimensional algorithm. The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+ 
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Factor current panel - Replicated solve - Local update
+ */
+      HPL_pdrpanrlN( PANEL, m, jb, ioff, WORK );
+      HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                 HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, n0 ),
+                 n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+      if( curr != 0 ) { ii += jb; m -= jb; }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb,
+                                m, n );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,    m, jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m,  n );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, n,
+                 jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda,
+                 Mptr( L1ptr, jj, jj+jb, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj+jb, lda ), lda );
+#endif
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanrlN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlT.c
new file mode 100644
index 000000000..a77301b9b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlT.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanrlT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanrlT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanrlT recursively  factorizes  a panel of columns  using  the
+ * recursive Right-looking variant of the one-dimensional algorithm. The
+ * lower  triangular  N0-by-N0  upper  block of the panel  is stored  in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+ 
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Factor current panel - Replicated solve - Local update
+ */
+      HPL_pdrpanrlT( PANEL, m, jb, ioff, WORK );
+      HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                 HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, n0 ),
+                 n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+      if( curr != 0 ) { ii += jb; m -= jb; }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb,
+                                m, N );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,    m, jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m,  n );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, n,
+                 jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda,
+                 Mptr( L1ptr, jj+jb, jj, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj+jb, lda ), lda );
+#endif
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanrlT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocmax.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocmax.o
new file mode 100644
index 000000000..80c7da494
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocmax.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocswpN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocswpN.o
new file mode 100644
index 000000000..6402eb6b7
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocswpN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocswpT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocswpT.o
new file mode 100644
index 000000000..d1d72ab3d
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocswpT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdfact.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdfact.o
new file mode 100644
index 000000000..defc0a050
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdfact.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdmxswp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdmxswp.o
new file mode 100644
index 000000000..ff0ce4cec
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdmxswp.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpancrN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpancrN.o
new file mode 100644
index 000000000..2ed4cbf13
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpancrN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpancrT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpancrT.o
new file mode 100644
index 000000000..f461a1bca
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpancrT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanllN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanllN.o
new file mode 100644
index 000000000..1f5cd25a8
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanllN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanllT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanllT.o
new file mode 100644
index 000000000..d2422b8ed
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanllT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanrlN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanrlN.o
new file mode 100644
index 000000000..21641a08b
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanrlN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanrlT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanrlT.o
new file mode 100644
index 000000000..4b5c0fbad
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanrlT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpancrN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpancrN.o
new file mode 100644
index 000000000..e74bf6712
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpancrN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpancrT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpancrT.o
new file mode 100644
index 000000000..c6fc53453
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpancrT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanllN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanllN.o
new file mode 100644
index 000000000..9581736c6
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanllN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanllT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanllT.o
new file mode 100644
index 000000000..83de419e2
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanllT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanrlN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanrlN.o
new file mode 100644
index 000000000..1d1f5c17a
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanrlN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanrlT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanrlT.o
new file mode 100644
index 000000000..cda3fd920
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanrlT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/Make.inc
new file mode 120000
index 000000000..8547ec814
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/Make.inc
@@ -0,0 +1 @@
+/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/Makefile
new file mode 100644
index 000000000..bf4634d31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/Makefile
@@ -0,0 +1,118 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pfact.h
+#
+## Object files ########################################################
+#
+HPL_pfaobj       = \
+   HPL_dlocmax.o          HPL_dlocswpN.o         HPL_dlocswpT.o         \
+   HPL_pdmxswp.o          HPL_pdpancrN.o         HPL_pdpancrT.o         \
+   HPL_pdpanllN.o         HPL_pdpanllT.o         HPL_pdpanrlN.o         \
+   HPL_pdpanrlT.o         HPL_pdrpanllN.o        HPL_pdrpanllT.o        \
+   HPL_pdrpancrN.o        HPL_pdrpancrT.o        HPL_pdrpanrlN.o        \
+   HPL_pdrpanrlT.o        HPL_pdfact.o
+#
+## Targets #############################################################
+#
+all              : lib 
+#
+lib              : lib.grd
+#
+lib.grd          : $(HPL_pfaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pfaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dlocmax.o          : ../HPL_dlocmax.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocmax.c
+HPL_dlocswpN.o         : ../HPL_dlocswpN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocswpN.c
+HPL_dlocswpT.o         : ../HPL_dlocswpT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocswpT.c
+HPL_pdmxswp.o          : ../HPL_pdmxswp.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdmxswp.c
+HPL_pdpancrN.o         : ../HPL_pdpancrN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpancrN.c
+HPL_pdpancrT.o         : ../HPL_pdpancrT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpancrT.c
+HPL_pdpanllN.o         : ../HPL_pdpanllN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanllN.c
+HPL_pdpanllT.o         : ../HPL_pdpanllT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanllT.c
+HPL_pdpanrlN.o         : ../HPL_pdpanrlN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanrlN.c
+HPL_pdpanrlT.o         : ../HPL_pdpanrlT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanrlT.c
+HPL_pdrpanllN.o        : ../HPL_pdrpanllN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanllN.c
+HPL_pdrpanllT.o        : ../HPL_pdrpanllT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanllT.c
+HPL_pdrpancrN.o        : ../HPL_pdrpancrN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpancrN.c
+HPL_pdrpancrT.o        : ../HPL_pdrpancrT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpancrT.c
+HPL_pdrpanrlN.o        : ../HPL_pdrpanrlN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanrlN.c
+HPL_pdrpanrlT.o        : ../HPL_pdrpanrlT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanrlT.c
+HPL_pdfact.o           : ../HPL_pdfact.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdfact.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_equil.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_equil.c
new file mode 100644
index 000000000..b917a6525
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_equil.c
@@ -0,0 +1,253 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_equil
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_TRANS             TRANS,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   int *                            IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1,
+   int *                            IWORK
+)
+#else
+void HPL_equil
+( PBCST, IFLAG, PANEL, TRANS, N, U, LDU, IPLEN, IPMAP, IPMAPM1, IWORK )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_TRANS             TRANS;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   int *                            IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_equil equilibrates  the  local  pieces  of U, so that on exit to
+ * this function, pieces of U contained in every process row are of the
+ * same size. This phase makes the rolling phase optimal.  In addition,
+ * this  function probes  for  the  column panel L and forwards it when
+ * possible.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be equilibrated) information.
+ *
+ * TRANS   (global input)                const enum HPL_TRANS
+ *         On entry, TRANS specifies whether  U  is stored in transposed
+ *         or non-transposed form.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of rows or columns of  U. N
+ *         must be at least 0.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+ *         non-transposed form, and MAX(1,N) otherwise.
+ *
+ * IPLEN   (global input)                int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension NPROW+1.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, ip, ipU, ipcur, iprow, iptgt, lastrow,
+                              left, npm1, nprow, ll, llU, llcur, lltgt,
+                              right, slen, smax, smin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( npm1 = ( nprow = PANEL->grid->nprow ) - 1 ) <= 1 ) return;
+/*
+ * If the current distribution of the pieces of U is already optimal for
+ * the rolling phase, then return imediately.  The  optimal distribution
+ * is such that ip processes have smax items and the remaining processes
+ * only have smin items. Another way to check this is to verify that all
+ * differences IPLEN[i+1] - IPLEN[i] are either smin or smax.
+ */
+   smax = ( ( slen = IPLEN[nprow] ) + npm1 ) / nprow;
+   ip   = slen - nprow * ( smin = slen / nprow );
+
+   iprow = 0;
+   do
+   {
+      ll = IPLEN[iprow+1] - IPLEN[iprow]; iprow++;
+   } while( ( iprow < nprow ) && ( ( ll == smin ) || ( ll == smax ) ) );
+
+   if( iprow == nprow ) return;
+/*
+ * Now,  we are sure  the distribution of the pieces of U is not optimal
+ * with respect to the rolling phase,  thus  perform  equilibration.  Go
+ * through the list of processes:  Processes  that have rows that do not
+ * belong to them  with respect to the optimal mapping spread them  in a
+ * logarithmic fashion. To simplify a little bit the implementation, and
+ * mainly the packing, a source process row spreads its data to its left
+ * first, and then to its right.
+ */
+   IWORK[nprow] = slen;
+
+   for( iprow = 0; iprow < nprow; iprow++ )
+   {
+      llU = IPLEN[iprow+1] - ( ipU = IPLEN[iprow] );
+      if( iprow < ip ) { lltgt = smax; iptgt = iprow * smax;      }
+      else             { lltgt = smin; iptgt = iprow * smin + ip; }
+
+      left = ( ipU < iptgt ); right = ( iptgt + lltgt < ipU + llU );
+/*
+ * If I have something to spread to either the left or the right
+ */
+      if( ( llU > 0 ) && ( left || right ) )
+      {        /* Figure out how much every other process should have */
+
+         ipcur = ipU; llcur = llU;
+
+         for( i = 0; i < nprow; i++ )
+         {
+            if( i < ip ) { lltgt = smax; iptgt = i * smax;      }
+            else         { lltgt = smin; iptgt = i * smin + ip; }
+            lastrow = iptgt + lltgt - 1;
+
+            if( ( lastrow >= ipcur ) && ( llcur > 0 ) )
+            { ll = lastrow - ipcur + 1; ll = Mmin( ll, llcur ); llcur -= ll; }
+            else { ll = 0; }
+
+            IWORK[i] = ipcur; ipcur += ll; IWORK[i+1] = ipcur;
+         }
+/*
+ * Equilibration phase
+ */
+         if( TRANS == HplNoTrans )
+         {
+            if( left  )
+            {
+               HPL_spreadN( PBCST, IFLAG, PANEL, HplLeft,  N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+
+            if( right )
+            {
+               HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+         }
+         else
+         {
+            if( left  )
+            {
+               HPL_spreadT( PBCST, IFLAG, PANEL, HplLeft,  N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+
+            if( right )
+            {
+               HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+         }
+      }
+   }
+/*
+ * Finally update  IPLEN  with the indexes corresponding to the new dis-
+ * tribution of U - IPLEN[nprow] remained unchanged.
+ */
+   for( i = 0; i < nprow; i++ ) IPLEN[i] = ( i < ip ? i*smax : i*smin + ip );
+/*
+ * End of HPL_equil
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_logsort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_logsort.c
new file mode 100644
index 000000000..0715159bd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_logsort.c
@@ -0,0 +1,185 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_logsort
+(
+   const int                        NPROCS,
+   const int                        ICURROC,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1
+)
+#else
+void HPL_logsort
+( NPROCS, ICURROC, IPLEN, IPMAP, IPMAPM1 )
+   const int                        NPROCS;
+   const int                        ICURROC;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_logsort computes an array  IPMAP  and  its inverse  IPMAPM1  that
+ * contain  the logarithmic sorted processes id with repect to the local
+ * number of rows of  U  that they own. This is necessary to ensure that
+ * the logarithmic spreading of U is optimal in terms of number of steps
+ * and communication volume as well.  In other words,  the larget pieces
+ * of U will be sent a minimal number of times.
+ *
+ * Arguments
+ * =========
+ *
+ * NPROCS  (global input)                const int
+ *         On entry, NPROCS  specifies the number of process rows in the
+ *         process grid. NPROCS is at least one.
+ *
+ * ICURROC (global input)                const int
+ *         On entry, ICURROC is the source process row.
+ *
+ * IPLEN   (global input/output)         int *
+ *         On entry, IPLEN is an array of dimension NPROCS+1,  such that
+ *         IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+ *         that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+ *         rows of U  in the processes before process IPMAP[i] after the
+ *         sort,  with  the convention that  IPLEN[NPROCS] is  the total
+ *         number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+ *         IPLEN[i] is  the  number of rows of A that should be moved to
+ *         the process IPMAP[i].  IPLEN  is such that the number of rows
+ *         of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+ *         remaining  entries  of  this  array  are  sorted  so that the
+ *         quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myroc] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROCS)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dist, i, ip, iplen_i, iplen_j, itmp, j, k;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Compute the  logarithmic distance between process j and process 0, as
+ * well as the maximum logarithmic distance. IPMAPM1 is workarray here.
+ */
+   for( j = 0, dist = 0; j < NPROCS; j++ )
+   {
+      IPMAP[j] = MModAdd( j, ICURROC, NPROCS ); ip = j; itmp = 0;
+      do { if( ip & 1 ) itmp++; ip >>= 1; } while ( ip );
+      IPMAPM1[j] = itmp; if( itmp > dist ) dist = itmp;
+   }
+/*
+ * Shift IPLEN[1..NPROCS]  of ICURROC places,  so that  IPLEN[1]  is now
+ * what used to be IPLEN[ICURROC+1]. Initialize IPMAP,  so that IPMAP[0]
+ * is ICURROC.
+ */
+   for( j = 0; j < ICURROC; j++ )
+   {
+      for( i = 2, itmp = IPLEN[1]; i <= NPROCS; i++ ) IPLEN[i-1] = IPLEN[i];
+      IPLEN[NPROCS] = itmp;
+   }
+/*
+ * logarithmic sort
+ */
+   for( k = 1; k <= dist; k++ )
+   {
+      for( j = 1; j < NPROCS; j++ )
+      {
+         if( IPMAPM1[j] == k )
+         {
+            for( i = 2; i < NPROCS; i++ )
+            {
+               if( k < IPMAPM1[i] )
+               {
+                  iplen_i = IPLEN[i+1]; iplen_j = IPLEN[j+1];
+
+                  if( iplen_j < iplen_i )
+                  {
+                     IPLEN[j+1] = iplen_i;  IPLEN[i+1] = iplen_j;
+                     itmp       = IPMAP[j]; IPMAP[j]   = IPMAP[i];
+                     IPMAP[i]   = itmp;
+                  }
+               }
+            }
+         }
+      }
+   }
+/*
+ * Compute IPLEN and IPMAPM1 (the inverse of IPMAP)
+ */
+   IPLEN[0] = 0;
+
+   for( i = 0; i < NPROCS; i++ )
+   {
+      IPMAPM1[ IPMAP[i] ] = i;
+      IPLEN[i+1]         += IPLEN[i];
+   }
+/*
+ * End of HPL_logsort
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv.c
new file mode 100644
index 000000000..ced74269e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv.c
@@ -0,0 +1,116 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesv
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesv
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesv factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with  or  without look-ahead.  The  lower  triangular  factor is left
+ * unpivoted and the pivots are not returned. The right hand side is the
+ * N+1 column of the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( A->n <= 0 ) return;
+
+   A->info = 0;
+
+   if( ( ALGO->depth == 0 ) || ( GRID->npcol == 1 ) )
+   {
+      HPL_pdgesv0(  GRID, ALGO, A );
+   }
+   else
+   {
+      HPL_pdgesvK2( GRID, ALGO, A );
+   }
+/*
+ * Solve upper triangular system
+ */
+   if( A->info == 0 ) HPL_pdtrsv( GRID, A );
+/*
+ * End of HPL_pdgesv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv0.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv0.c
new file mode 100644
index 000000000..d79b6fa55
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv0.c
@@ -0,0 +1,167 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesv0
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesv0
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesv0 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * without look-ahead. The lower triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate;
+   int                        N, j, jb, n, nb, tag=MSGID_BEGIN_FACT,
+                              test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( N = A->n ) <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+ 
+   HPL_pdupdate = ALGO->upfun; nb = A->nb;
+/*
+ * Allocate a panel list of length 1 - Allocate panel[0] resources
+ */
+   panel = (HPL_T_panel **)malloc( sizeof( HPL_T_panel * ) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesv0", "Memory allocation failed" ); }
+
+   HPL_pdpanel_new( GRID, ALGO, N, N+1, Mmin( N, nb ), A, 0, 0, tag,
+                    &panel[0] );
+/*
+ * Loop over the columns of A
+ */
+   for( j = 0; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && GRID->mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Release panel resources - re-initialize panel data structure
+ */
+      (void) HPL_pdpanel_free( panel[0] );
+      HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[0] );
+/*
+ * Factor and broadcast current panel - update
+ */
+      HPL_pdfact(               panel[0] );
+      (void) HPL_binit(         panel[0] );
+      do
+      { (void) HPL_bcast(       panel[0], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(         panel[0] );
+      HPL_pdupdate( NULL, NULL, panel[0], -1 );
+/*
+ * Update message id for next factorization
+ */
+      tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Release panel resources and panel list
+ */
+   (void) HPL_pdpanel_disp( &panel[0] );
+
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesv0
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK1.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK1.c
new file mode 100644
index 000000000..ff1958cfc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK1.c
@@ -0,0 +1,222 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+#ifdef STDC_HEADERS
+void HPL_pdgesvK1
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesvK1
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesvK1 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with look-ahead.  The  lower  triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate;
+   int                        N, depth, icurcol=0, j, jb, jj=0, jstart,
+                              k, mycol, n, nb, nn, npcol, nq,
+                              tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   mycol = GRID->mycol; npcol        = GRID->npcol;
+   depth = ALGO->depth; HPL_pdupdate = ALGO->upfun;
+   N     = A->n;        nb           = A->nb; 
+
+   if( N <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+
+/*
+ * Allocate a panel list of length depth + 1 (depth >= 1)
+ */
+   panel = (HPL_T_panel **)malloc( (size_t)(depth+1)*sizeof( HPL_T_panel *) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesvK1", "Memory allocation failed" ); }
+/*
+ * Create and initialize the first depth panels
+ */
+   nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0;
+
+   for( k = 0; k < depth; k++ )
+   {
+      jb = Mmin( nn, nb );
+      HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart,
+                       tag, &panel[k] );
+      nn -= jb; jstart += jb;
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Initialize the lookahead - Factor jstart columns: panel[0..depth-1]
+ */
+   for( k = 0, j = 0; k < depth; k++ )
+   {
+      jb = jstart - j; jb = Mmin( jb, nb ); j += jb;
+/*
+ * Factor and broadcast k-th panel - use long topology for those
+ */
+      HPL_pdfact(         panel[k] );
+      (void) HPL_binit(   panel[k] );
+      do
+      { (void) HPL_bcast( panel[k], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(   panel[k] );
+/*
+ * Partial update of the depth-1-k panels in front of me
+ */
+      if( k < depth - 1 )
+      {
+         nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol );
+         HPL_pdupdate( NULL, NULL, panel[k], nn );
+      }
+   }
+/*
+ * Main loop over the remaining columns of A
+ */
+   for( j = jstart; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Allocate current panel resources - Finish latest update - Factor and
+ * broadcast current panel
+ */
+      HPL_pdpanel_new( GRID, ALGO, n, n+1, jb, A, j, j, tag, &panel[depth] );
+ 
+      if( mycol == icurcol )
+      {
+         nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol );
+         for( k = 0; k < depth; k++ )   /* partial updates 0..depth-1 */
+            HPL_pdupdate( NULL, NULL, panel[k], nn );
+         HPL_pdfact(    panel[depth] );       /* factor current panel */
+      }
+      else { nn = 0; }
+          /* Finish the latest update and broadcast the current panel */
+      (void) HPL_binit( panel[depth] );
+      HPL_pdupdate(     panel[depth], &test, panel[0], nq-nn );
+      (void) HPL_bwait( panel[depth] );
+/*
+ * Release latest panel resources - circular  of the panel pointers
+ * Go to the next process row and column -  update  the message ids  for
+ * broadcast
+ */
+      (void) HPL_pdpanel_disp( &panel[0] );
+      for( k = 0; k < depth; k++ ) panel[k] = panel[k+1];
+ 
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Clean-up: Finish updates - release panels and panel list
+ */
+   nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol );
+   for( k = 0; k < depth; k++ )
+   {
+      HPL_pdupdate( NULL, NULL, panel[k], nn );
+      (void) HPL_pdpanel_disp( &panel[k] );
+   }
+ 
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesvK1
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK2.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK2.c
new file mode 100644
index 000000000..dec506ab9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK2.c
@@ -0,0 +1,231 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesvK2
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesvK2
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesvK2 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with look-ahead.  The  lower  triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * p, * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate; 
+   int                        N, depth, icurcol=0, j, jb, jj=0, jstart,
+                              k, mycol, n, nb, nn, npcol, nq,
+                              tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   mycol = GRID->mycol; npcol        = GRID->npcol;
+   depth = ALGO->depth; HPL_pdupdate = ALGO->upfun;
+   N     = A->n;        nb           = A->nb;
+
+   if( N <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+
+/*
+ * Allocate a panel list of length depth + 1 (depth >= 1)
+ */
+   panel = (HPL_T_panel **)malloc( (size_t)(depth+1) * sizeof( HPL_T_panel *) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesvK2", "Memory allocation failed" ); }
+/*
+ * Create and initialize the first depth panels
+ */
+   nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0;
+
+   for( k = 0; k < depth; k++ )
+   {
+      jb = Mmin( nn, nb );
+      HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart,
+                       tag, &panel[k] );
+      nn -= jb; jstart += jb;
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Create last depth+1 panel
+ */
+   HPL_pdpanel_new( GRID, ALGO, nn, nn+1, Mmin( nn, nb ), A, jstart,
+                    jstart, tag, &panel[depth] );
+   tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+/*
+ * Initialize the lookahead - Factor jstart columns: panel[0..depth-1]
+ */
+   for( k = 0, j = 0; k < depth; k++ )
+   {
+      jb = jstart - j; jb = Mmin( jb, nb ); j += jb;
+/*
+ * Factor and broadcast k-th panel
+ */
+      HPL_pdfact(         panel[k] );
+      (void) HPL_binit(   panel[k] );
+      do
+      { (void) HPL_bcast( panel[k], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(   panel[k] );
+/*
+ * Partial update of the depth-k-1 panels in front of me
+ */
+      if( k < depth - 1 )
+      {
+         nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol );
+         HPL_pdupdate( NULL, NULL, panel[k], nn );
+      }
+   }
+/*
+ * Main loop over the remaining columns of A
+ */
+   for( j = jstart; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Initialize current panel - Finish latest update, Factor and broadcast
+ * current panel
+ */
+      (void) HPL_pdpanel_free( panel[depth] );
+      HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[depth] );
+
+      if( mycol == icurcol )
+      {
+         nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol );
+         for( k = 0; k < depth; k++ )   /* partial updates 0..depth-1 */
+            (void) HPL_pdupdate( NULL, NULL, panel[k], nn );
+         HPL_pdfact(       panel[depth] );    /* factor current panel */
+      }
+      else { nn = 0; }
+          /* Finish the latest update and broadcast the current panel */
+      (void) HPL_binit( panel[depth] );
+      HPL_pdupdate( panel[depth], &test, panel[0], nq-nn );
+      (void) HPL_bwait( panel[depth] );
+/*
+ * Circular  of the panel pointers:
+ * xtmp = x[0]; for( k=0; k < depth; k++ ) x[k] = x[k+1]; x[d] = xtmp;
+ *
+ * Go to next process row and column - update the message ids for broadcast
+ */
+      p = panel[0]; for( k = 0; k < depth; k++ ) panel[k] = panel[k+1];
+      panel[depth] = p;
+
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Clean-up: Finish updates - release panels and panel list
+ */
+   nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol );
+   for( k = 0; k < depth; k++ )
+   {
+      (void) HPL_pdupdate( NULL, NULL, panel[k], nn );
+      (void) HPL_pdpanel_disp(  &panel[k] );
+   }
+   (void) HPL_pdpanel_disp( &panel[depth] );
+
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesvK2
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c
new file mode 100644
index 000000000..b4433e1be
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c
@@ -0,0 +1,432 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp00N
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp00N
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp00N applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * Bi-directional  exchange  is used to perform the  swap :: broadcast of
+ * the row  panel U at once, resulting in a lower number of messages than
+ * usual as well as a lower communication volume. With P process rows and
+ * assuming  bi-directional links,  the running time of this function can
+ * be approximated by:
+ *  
+ *    log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  Mono
+ * directional links will double this communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be broadcast and swapped) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                  comm;
+   HPL_T_grid                * grid;
+   double                    * A, * U, * W;
+   void                       * vptr = NULL;
+   int                       * ipID, * lindxA, * lindxAU, * llen,
+                             * llen_sv;
+   unsigned int              ip2, ip2_=1, ipdist, ipow=1, mask=1,
+                             mydist, mydis_;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, Np2, align,
+                             hdim, i, icurrow, *iflag, ipA, ipW, *ipl,
+                             iprow, jb, k, lda, ldW, myrow, n, nprow,
+                             partner, root, size_, usize;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+   n = Mmin( NN, PANEL->n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   grid  = PANEL->grid;    nprow   = grid->nprow; myrow = grid->myrow;
+   comm  = grid->col_comm; ip2     = (unsigned int)grid->row_ip2;
+   hdim  = grid->row_hdim; align   = PANEL->algo->align;
+   A     = PANEL->A;       U       = PANEL->U;    iflag = PANEL->IWORK;
+   lda   = PANEL->lda;     icurrow = PANEL->prow; usize = jb * n;
+   ldW   = n + 1;
+/*
+ * Allocate space for temporary W (ldW * jb)
+ */
+   vptr = (void*)malloc( 
+      ((size_t)(align) + ((size_t)(jb) * (size_t)(ldW))) * sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdlaswp00N", "Memory allocation failed" ); }
+
+   W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) );
+/*
+ * Construct ipID and its local counter parts lindxA, lindxAU -  llen is
+ * the number of rows/columns that I have in workspace and that I should
+ * send.  Compute  lindx_, ipA, llen if it has not already been done for
+ * this panel;
+ */
+   k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1;
+   lindxA  = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k;
+   llen    = lindxAU + k; llen_sv = llen + nprow;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+   else if( *iflag == 1 ) /* HPL_pdlaswp01N called before: reuse ipID */
+   {
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+/*
+ * Copy the llen_sv into llen - Reset ipA to its correct value
+ */
+   ipA = llen_sv[myrow];
+   for( i = 0; i < nprow; i++ ) { llen[i]  = llen_sv[i]; }
+/*
+ * For i in [0..2*jb),  lindxA[i] is the offset in A of a row that ulti-
+ * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ).  In icurrow,
+ * we directly pack into U, otherwise we pack into workspace. The  first
+ * entry of each column packed in workspace is in fact the row or column
+ * offset in U where it should go to.
+ */
+   if( myrow == icurrow ) 
+   {
+      HPL_dlaswp01N( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+   else
+   {
+      HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU );
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * Algorithm for bi-directional data exchange:
+ *
+ * As long as I have not talked to a process that  already  had the data
+ * from icurrow,  I will be sending the workspace,  otherwise  I will be
+ * sending U. Note that the columns in workspace contain the local index
+ * in U they should go to.
+ *
+ * If I am receiving from a process that  has the data from  icurrow,  I
+ * will be receiving in  U, copy the data of  U  that stays into  A, and
+ * then the columns I have in workspace into U; otherwise  I will be re-
+ * ceiving in the remaining workspace.  If I am one  of  those processes 
+ * that already has the data from icurrow, I will be immediately copying
+ * the data I have in my workspace into U.
+ *
+ * When I receive U, some of U should be copied in my piece of A  before
+ * I can copy the rows I have in my workspace into  U.  This information
+ * is kept in the lists  lindx_:  the row lindxAU[i] should be copied in
+ * the row  lindxA[i] of my piece of  A, just as in the reversed initial
+ * packing operation. Those rows are thus the first ones in the work ar-
+ * ray.  After  this  operation  has  been  performed,  I will not  need
+ * those lindx arrays,  and  I  will  always be sending a buffer of size
+ * jb x n, or n x jb, that is, U.
+ *
+ * At  every  step  of  the algorithm, it is necesary to update the list 
+ * llen,  so that I can figure out how large the next messages I will be
+ * sending/receiving are.  It is  obvious when I am sending U. It is not
+ * otherwise.
+ *
+ * We  choose  icurrow  to be the source of the bi-directional exchange.
+ * This allows the processes in the non-power 2 part to receive U at the
+ * first exchange,  and  then  broadcast internally this U so that those 
+ * processes can grab their piece of A.
+ */
+   if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; }
+   ipW    = ipA;
+   Np2    = ( ( size_ = nprow - ip2 ) != 0 );
+   mydist = (unsigned int)MModSub( myrow, icurrow, nprow );
+/*
+ * bi-directional exchange:   If nprow is not a power of 2,  proc[i-ip2]
+ * receives local data from proc[i] for all i in  [ip2..nprow);  icurrow
+ * is the source, these last process indexes are relative to icurrow.
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+
+      if( mydist == 0 )  /* I am the current row: I send U and recv W */
+      {
+         (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW,
+                          Cmsgid, partner, comm );
+         if( llen[partner] > 0 )
+            HPL_dlaswp03N( llen[partner], n, U, LDU, W, W+1, ldW );
+      }
+      else if( mydist == ip2 )
+      {                      /* I recv U for later Bcast, I send my W */
+         (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                          Cmsgid, partner, comm );
+      }
+      else               /* None of us is icurrow, we exchange our Ws */
+      {
+         if( ( mydist & ip2 ) != 0 ) 
+         {
+            (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm );
+         }
+         else
+         {
+            (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW,
+                             partner, Cmsgid, comm );
+            if( llen[partner] > 0 ) ipW += llen[partner];
+         }
+      }
+   }
+/*
+ * Update llen
+ */
+   for( i = 1; i < size_; i++ )
+   {
+      iprow   = MModAdd( icurrow, i,          nprow );
+      partner = MModAdd( iprow,   (int)(ip2), nprow );
+      llen[ iprow ] += llen[ partner ];
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * power of 2 part of the processes collection:  only processes [0..ip2)
+ * are working;  some of them  (mydist >> (k+1) == 0) either send or re-
+ * ceive U.  At every step k, k is in [0 .. hdim),  of the algorithm,  a
+ * process pair that exchanges  U  is such that  (mydist >> (k+1) == 0).
+ * Among  those  processes,  the  ones  that are sending U are such that 
+ * mydist >> k == 0.
+ */
+   if( mydist < ip2 )
+   {
+      k = 0;
+
+      while( k < hdim )
+      {
+         partner = (int)(mydist ^ ipow);
+         partner = MModAdd( icurrow, partner, nprow );
+/*
+ * Exchange and combine the local results - If I receive U,  then I must
+ * copy from U the rows that belong to my piece of A, and then update  U
+ * by  copying in it the rows I have accumulated in W.  Otherwise, I re-
+ * ceive W.  In this later case, and I have U, I shall update my copy of
+ * U by copying in it the rows I have accumulated in  W.  If  I  did not
+ * have U before, I simply need to update my pointer in W for later use.
+ */
+         if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 )
+         {
+            if( ( mydist >> (unsigned int)(k) ) == 0 )
+            {
+               (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW,
+                                ldW ), llen[partner]*ldW, Cmsgid,
+                                partner, comm );
+               HPL_dlaswp03N( llen[partner], n, U, LDU, Mptr( W, 0, ipW,
+                              ldW ), Mptr( W, 1, ipW, ldW ), ldW );
+               ipW += llen[partner];
+            }
+            else
+            {
+               (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                                Cmsgid, partner, comm );
+               HPL_dlaswp04N( ipA, llen[myrow], n, U, LDU, A, lda, W,
+                              W+1, ldW, lindxA, lindxAU );
+            }
+         }
+         else
+         {
+            (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0,
+                             ipW, ldW ), llen[partner]*ldW, Cmsgid,
+                             partner, comm );
+            ipW += llen[partner];
+         }
+/*
+ * Update llen - Go to next process pairs
+ */
+         iprow = icurrow; ipdist = 0;
+         do
+         {
+            if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist )
+            {
+               partner = MModAdd( icurrow, partner, nprow );
+               llen[iprow]  += llen[partner];
+               llen[partner] = llen[iprow];
+            }
+            iprow = MModAdd( iprow, 1, nprow ); ipdist++;
+
+         } while( ipdist < ip2 );
+
+         ipow <<= 1; k++;
+/*
+ * Probe for column panel - forward it when available 
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+      }
+   }
+   else
+   {
+/*
+ * non power of 2 part of the process collection:  proc[ip2] broadcast U
+ * to procs[ip2..nprow) (relatively to icurrow).
+ */
+      if( size_ > 1 )
+      {
+         k = size_ - 1;
+         while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+         root   = MModAdd( icurrow, (int)(ip2), nprow );
+         mydis_ = (unsigned int)MModSub( myrow,  root, nprow );
+
+         do
+         {
+            mask ^= ip2_;
+            if( ( mydis_ & mask ) == 0 )
+            {
+               partner = (int)(mydis_ ^ ip2_);
+               if( ( mydis_ & ip2_ ) != 0 )
+               {
+                  (void) HPL_recv( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+
+               }
+               else if( partner < size_ )
+               {
+                  (void) HPL_send( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+               }
+            }
+            ip2_ >>= 1;
+/*
+ * Probe for column panel - forward it when available 
+ */
+            if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+         } while( ip2_ > 0 );
+      }
+/*
+ * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece
+ * of A.
+ */
+      HPL_dlaswp05N( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+/*
+ * If  nprow  is not a power of 2,  proc[i-ip2]  sends  global result to
+ * proc[i] for all i in [ip2..nprow);
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+      if( ( mydist & ip2 ) != 0 )
+      { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); }
+      else
+      { (void) HPL_send( U, usize, partner, Cmsgid, comm ); }
+   }
+
+   if( vptr ) free( vptr );
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp00N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c
new file mode 100644
index 000000000..7a9764c09
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c
@@ -0,0 +1,433 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp00T
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp00T
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp00T applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * Bi-directional  exchange  is used to perform the  swap :: broadcast of
+ * the row  panel U at once, resulting in a lower number of messages than
+ * usual as well as a lower communication volume. With P process rows and
+ * assuming  bi-directional links,  the running time of this function can
+ * be approximated by:
+ *  
+ *    log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  Mono
+ * directional links will double this communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be broadcast and swapped) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                  comm;
+   HPL_T_grid                * grid;
+   double                    * A, * U, * W;
+   void                       * vptr = NULL;
+   int                       * ipID, * lindxA, * lindxAU, * llen,
+                             * llen_sv;
+   unsigned int              ip2, ip2_=1, ipdist, ipow=1, mask=1,
+                             mydist, mydis_;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, Np2, align,
+                             hdim, i, icurrow, *iflag, ipA, ipW, *ipl,
+                             iprow, jb, k, lda, ldW, myrow, n, nprow,
+                             partner, root, size_, usize;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+   n = Mmin( NN, PANEL->n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   grid  = PANEL->grid;    nprow   = grid->nprow; myrow = grid->myrow;
+   comm  = grid->col_comm; ip2     = (unsigned int)grid->row_ip2;
+   hdim  = grid->row_hdim; align   = PANEL->algo->align;
+   A     = PANEL->A;       U       = PANEL->U;    iflag = PANEL->IWORK;
+   lda   = PANEL->lda;     icurrow = PANEL->prow; usize = jb * n;
+   ldW   = n + 1;
+/*
+ * Allocate space for temporary W (ldW * jb)
+ */
+   vptr = (void*)malloc( ( (size_t)(align) + 
+                           ((size_t)(jb) * (size_t)(ldW))) * 
+                           sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdlaswp00T", "Memory allocation failed" ); }
+
+   W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) );
+/*
+ * Construct ipID and its local counter parts lindxA, lindxAU -  llen is
+ * the number of rows/columns that I have in workspace and that I should
+ * send.  Compute  lindx_, ipA, llen if it has not already been done for
+ * this panel;
+ */
+   k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1;
+   lindxA  = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k;
+   llen    = lindxAU + k; llen_sv = llen + nprow;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+   else if( *iflag == 1 ) /* HPL_pdlaswp01T called before: reuse ipID */
+   {
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+/*
+ * Copy the llen_sv into llen - Reset ipA to its correct value
+ */
+   ipA = llen_sv[myrow];
+   for( i = 0; i < nprow; i++ ) { llen[i]  = llen_sv[i]; }
+/*
+ * For i in [0..2*jb),  lindxA[i] is the offset in A of a row that ulti-
+ * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ).  In icurrow,
+ * we directly pack into U, otherwise we pack into workspace. The  first
+ * entry of each column packed in workspace is in fact the row or column
+ * offset in U where it should go to.
+ */
+   if( myrow == icurrow ) 
+   {
+      HPL_dlaswp01T( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+   else
+   {
+      HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU );
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * Algorithm for bi-directional data exchange:
+ *
+ * As long as I have not talked to a process that  already  had the data
+ * from icurrow,  I will be sending the workspace,  otherwise  I will be
+ * sending U. Note that the columns in workspace contain the local index
+ * in U they should go to.
+ *
+ * If I am receiving from a process that  has the data from  icurrow,  I
+ * will be receiving in  U, copy the data of  U  that stays into  A, and
+ * then the columns I have in workspace into U; otherwise  I will be re-
+ * ceiving in the remaining workspace.  If I am one  of  those processes 
+ * that already has the data from icurrow, I will be immediately copying
+ * the data I have in my workspace into U.
+ *
+ * When I receive U, some of U should be copied in my piece of A  before
+ * I can copy the rows I have in my workspace into  U.  This information
+ * is kept in the lists  lindx_:  the row lindxAU[i] should be copied in
+ * the row  lindxA[i] of my piece of  A, just as in the reversed initial
+ * packing operation. Those rows are thus the first ones in the work ar-
+ * ray.  After  this  operation  has  been  performed,  I will not  need
+ * those lindx arrays,  and  I  will  always be sending a buffer of size
+ * jb x n, or n x jb, that is, U.
+ *
+ * At  every  step  of  the algorithm, it is necesary to update the list 
+ * llen,  so that I can figure out how large the next messages I will be
+ * sending/receiving are.  It is  obvious when I am sending U. It is not
+ * otherwise.
+ *
+ * We  choose  icurrow  to be the source of the bi-directional exchange.
+ * This allows the processes in the non-power 2 part to receive U at the
+ * first exchange,  and  then  broadcast internally this U so that those 
+ * processes can grab their piece of A.
+ */
+   if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; }
+   ipW    = ipA;
+   Np2    = ( ( size_ = nprow - ip2 ) != 0 );
+   mydist = (unsigned int)MModSub( myrow, icurrow, nprow );
+/*
+ * bi-directional exchange:   If nprow is not a power of 2,  proc[i-ip2]
+ * receives local data from proc[i] for all i in  [ip2..nprow);  icurrow
+ * is the source, these last process indexes are relative to icurrow.
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+
+      if( mydist == 0 )  /* I am the current row: I send U and recv W */
+      {
+         (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW,
+                          Cmsgid, partner, comm );
+         if( llen[partner] > 0 )
+            HPL_dlaswp03T( llen[partner], n, U, LDU, W, W+1, ldW );
+      }
+      else if( mydist == ip2 )
+      {                      /* I recv U for later Bcast, I send my W */
+         (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                          Cmsgid, partner, comm );
+      }
+      else               /* None of us is icurrow, we exchange our Ws */
+      {
+         if( ( mydist & ip2 ) != 0 ) 
+         {
+            (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm );
+         }
+         else
+         {
+            (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW,
+                             partner, Cmsgid, comm );
+            if( llen[partner] > 0 ) ipW += llen[partner];
+         }
+      }
+   }
+/*
+ * Update llen
+ */
+   for( i = 1; i < size_; i++ )
+   {
+      iprow   = MModAdd( icurrow, i,          nprow );
+      partner = MModAdd( iprow,   (int)(ip2), nprow );
+      llen[ iprow ] += llen[ partner ];
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * power of 2 part of the processes collection:  only processes [0..ip2)
+ * are working;  some of them  (mydist >> (k+1) == 0) either send or re-
+ * ceive U.  At every step k, k is in [0 .. hdim),  of the algorithm,  a
+ * process pair that exchanges  U  is such that  (mydist >> (k+1) == 0).
+ * Among  those  processes,  the  ones  that are sending U are such that 
+ * mydist >> k == 0.
+ */
+   if( mydist < ip2 )
+   {
+      k = 0;
+
+      while( k < hdim )
+      {
+         partner = (int)(mydist ^ ipow);
+         partner = MModAdd( icurrow, partner, nprow );
+/*
+ * Exchange and combine the local results - If I receive U,  then I must
+ * copy from U the rows that belong to my piece of A, and then update  U
+ * by  copying in it the rows I have accumulated in W.  Otherwise, I re-
+ * ceive W.  In this later case, and I have U, I shall update my copy of
+ * U by copying in it the rows I have accumulated in  W.  If  I  did not
+ * have U before, I simply need to update my pointer in W for later use.
+ */
+         if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 )
+         {
+            if( ( mydist >> (unsigned int)(k) ) == 0 )
+            {
+               (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW,
+                                ldW ), llen[partner]*ldW, Cmsgid,
+                                partner, comm );
+               HPL_dlaswp03T( llen[partner], n, U, LDU, Mptr( W, 0, ipW,
+                              ldW ), Mptr( W, 1, ipW, ldW ), ldW );
+               ipW += llen[partner];
+            }
+            else
+            {
+               (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                                Cmsgid, partner, comm );
+               HPL_dlaswp04T( ipA, llen[myrow], n, U, LDU, A, lda, W,
+                              W+1, ldW, lindxA, lindxAU );
+            }
+         }
+         else
+         {
+            (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0,
+                             ipW, ldW ), llen[partner]*ldW, Cmsgid,
+                             partner, comm );
+            ipW += llen[partner];
+         }
+/*
+ * Update llen - Go to next process pairs
+ */
+         iprow = icurrow; ipdist = 0;
+         do
+         {
+            if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist )
+            {
+               partner = MModAdd( icurrow, partner, nprow );
+               llen[iprow]  += llen[partner];
+               llen[partner] = llen[iprow];
+            }
+            iprow = MModAdd( iprow, 1, nprow ); ipdist++;
+
+         } while( ipdist < ip2 );
+
+         ipow <<= 1; k++;
+/*
+ * Probe for column panel - forward it when available 
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+      }
+   }
+   else
+   {
+/*
+ * non power of 2 part of the process collection:  proc[ip2] broadcast U
+ * to procs[ip2..nprow) (relatively to icurrow).
+ */
+      if( size_ > 1 )
+      {
+         k = size_ - 1;
+         while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+         root   = MModAdd( icurrow, (int)(ip2), nprow );
+         mydis_ = (unsigned int)MModSub( myrow,  root, nprow );
+
+         do
+         {
+            mask ^= ip2_;
+            if( ( mydis_ & mask ) == 0 )
+            {
+               partner = (int)(mydis_ ^ ip2_);
+               if( ( mydis_ & ip2_ ) != 0 )
+               {
+                  (void) HPL_recv( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+
+               }
+               else if( partner < size_ )
+               {
+                  (void) HPL_send( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+               }
+            }
+            ip2_ >>= 1;
+/*
+ * Probe for column panel - forward it when available 
+ */
+            if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+         } while( ip2_ > 0 );
+      }
+/*
+ * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece
+ * of A.
+ */
+      HPL_dlaswp05T( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+/*
+ * If  nprow  is not a power of 2,  proc[i-ip2]  sends  global result to
+ * proc[i] for all i in [ip2..nprow);
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+      if( ( mydist & ip2 ) != 0 )
+      { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); }
+      else
+      { (void) HPL_send( U, usize, partner, Cmsgid, comm ); }
+   }
+
+   if( vptr ) free( vptr );
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp00T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c
new file mode 100644
index 000000000..31f219840
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c
@@ -0,0 +1,217 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp01N
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp01N
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp01N applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+ * row panel U at once,  resulting in a minimal communication volume  and
+ * a "very good"  use of the connectivity if available.  With  P  process
+ * rows  and  assuming  bi-directional links,  the  running time  of this
+ * function can be approximated by:
+ *  
+ *    (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  K is
+ * a constant in (2,3] that depends on the achieved bandwidth  during  a
+ * simultaneous  message exchange  between two processes.  An  empirical
+ * optimistic value of K is typically 2.4.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * A, * U;
+   int                       * ipID, * iplen, * ipmap, * ipmapm1,
+                             * iwork, * lindxA = NULL, * lindxAU,
+                             * permU;
+   static int                equil=-1;
+   int                       icurrow, * iflag, * ipA, * ipl, jb, k,
+                             lda, myrow, n, nprow;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+   n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Decide whether equilibration should be performed or not
+ */
+   if( equil == -1 ) equil = PANEL->algo->equil;
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow;
+   A     = PANEL->A;   U       = PANEL->U;     iflag  = PANEL->IWORK;
+   lda   = PANEL->lda; icurrow = PANEL->prow;
+/*
+ * Compute ipID (if not already done for this panel). lindxA and lindxAU
+ * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1
+ * are of size nprow,  permU is of length jb, and  this function needs a 
+ * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): 
+ * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1)
+ * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1);
+ */
+   k = (int)((unsigned int)(jb) << 1);  ipl = iflag + 1; ipID = ipl + 1;
+   ipA     = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1;
+   lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1;
+   ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( *iflag == 0 ) /* HPL_pdlaswp00N called before: reuse ipID */
+   {
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( ( *iflag == 1 ) && ( equil != 0 ) )
+   {   /* HPL_pdlaswp01N was call before only re-compute IPLEN, IPMAP */
+      HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 );
+      *iflag = 1;
+   }
+/*
+ * Copy into U the rows to be spread (local to icurrow)
+ */
+   if( myrow == icurrow )
+   { HPL_dlaswp01N( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); }
+/*
+ * Spread U - optionally probe for column panel
+ */
+   HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen,
+                ipmap, ipmapm1 );
+/*
+ * Local exchange (everywhere but in process row icurrow)
+ */
+   if( myrow != icurrow )
+   {
+      k = ipmapm1[myrow];
+      HPL_dlaswp06N( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, iplen[k],
+                     0, LDU ), LDU, lindxA );
+   }
+/*
+ * Equilibration
+ */
+   if( equil != 0 )
+      HPL_equil( PBCST, IFLAG, PANEL, HplNoTrans, n, U, LDU, iplen,
+                 ipmap, ipmapm1, iwork );
+/*
+ * Rolling phase
+ */
+   HPL_rollN( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 );
+/*
+ * Permute U in every process row
+ */
+   HPL_dlaswp00N( jb, n, U, LDU, permU );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp01N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c
new file mode 100644
index 000000000..0c4de2669
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c
@@ -0,0 +1,217 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp01T
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp01T
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp01T applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+ * row panel U at once,  resulting in a minimal communication volume  and
+ * a "very good"  use of the connectivity if available.  With  P  process
+ * rows  and  assuming  bi-directional links,  the  running time  of this
+ * function can be approximated by:
+ *  
+ *    (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  K is
+ * a constant in (2,3] that depends on the achieved bandwidth  during  a
+ * simultaneous  message exchange  between two processes.  An  empirical
+ * optimistic value of K is typically 2.4.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * A, * U;
+   int                       * ipID, * iplen, * ipmap, * ipmapm1,
+                             * iwork, * lindxA = NULL, * lindxAU,
+                             * permU;
+   static int                equil=-1;
+   int                       icurrow, * iflag, * ipA, * ipl, jb, k,
+                             lda, myrow, n, nprow;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+   n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Decide whether equilibration should be performed or not
+ */
+   if( equil == -1 ) equil = PANEL->algo->equil;
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow;
+   A     = PANEL->A;   U       = PANEL->U;     iflag  = PANEL->IWORK;
+   lda   = PANEL->lda; icurrow = PANEL->prow;
+/*
+ * Compute ipID (if not already done for this panel). lindxA and lindxAU
+ * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1
+ * are of size nprow,  permU is of length jb, and  this function needs a 
+ * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): 
+ * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1)
+ * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1);
+ */
+   k = (int)((unsigned int)(jb) << 1);  ipl = iflag + 1; ipID = ipl + 1;
+   ipA     = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1;
+   lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1;
+   ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( *iflag == 0 ) /* HPL_pdlaswp00T called before: reuse ipID */
+   {
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( ( *iflag == 1 ) && ( equil != 0 ) )
+   {   /* HPL_pdlaswp01T was call before only re-compute IPLEN, IPMAP */
+      HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 );
+      *iflag = 1;
+   }
+/*
+ * Copy into U the rows to be spread (local to icurrow)
+ */
+   if( myrow == icurrow )
+   { HPL_dlaswp01T( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); }
+/*
+ * Spread U - optionally probe for column panel
+ */
+   HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen,
+                ipmap, ipmapm1 );
+/*
+ * Local exchange (everywhere but in process row icurrow)
+ */
+   if( myrow != icurrow )
+   {
+      k = ipmapm1[myrow];
+      HPL_dlaswp06T( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, 0,
+                     iplen[k], LDU ), LDU, lindxA );
+   }
+/*
+ * Equilibration
+ */
+   if( equil != 0 )
+      HPL_equil( PBCST, IFLAG, PANEL, HplTrans, n, U, LDU, iplen, ipmap,
+                 ipmapm1, iwork );
+/*
+ * Rolling phase
+ */
+   HPL_rollT( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 );
+/*
+ * Permute U in every process row
+ */
+   HPL_dlaswp10N( n, jb, U, LDU, permU );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp01T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdtrsv.c
new file mode 100644
index 000000000..d2135130a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdtrsv.c
@@ -0,0 +1,296 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdtrsv
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_pmat *                     AMAT
+)
+#else
+void HPL_pdtrsv
+( GRID, AMAT )
+   HPL_T_grid *                     GRID;
+   HPL_T_pmat *                     AMAT;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdtrsv solves an upper triangular system of linear equations.
+ *  
+ * The rhs is the last column of the N by N+1 matrix A. The solve starts
+ * in the process  column owning the  Nth  column of A, so the rhs b may
+ * need to be moved one process column to the left at the beginning. The
+ * routine therefore needs  a column  vector in every process column but
+ * the one owning  b. The result is  replicated in all process rows, and
+ * returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes.
+ *  
+ * The algorithm uses decreasing one-ring broadcast in process rows  and
+ * columns  implemented  in terms of  synchronous communication point to
+ * point primitives.  The  lookahead of depth 1 is used to minimize  the
+ * critical path. This entire operation is essentially ``latency'' bound
+ * and an estimate of its running time is given by:
+ *  
+ *    (move rhs) lat + N / ( P bdwth ) +            
+ *    (solve)    ((N / NB)-1) 2 (lat + NB / bdwth) +
+ *               gam2 N^2 / ( P Q ),                
+ *  
+ * where  gam2   is an estimate of the   Level 2 BLAS rate of execution.
+ * There are  N / NB  diagonal blocks. One must exchange  2  messages of
+ * length NB to compute the next  NB  entries of the vector solution, as
+ * well as performing a total of N^2 floating point operations.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * AMAT    (local input/output)          HPL_T_pmat *
+ *         On entry,  AMAT  points  to the data structure containing the
+ *         local array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   Ccomm, Rcomm;
+   double                     * A=NULL, * Aprev=NULL, * Aptr, * XC=NULL,
+                              * XR=NULL, * Xd=NULL, * Xdprev=NULL,
+                              * W=NULL;
+   int                        Alcol, Alrow, Anpprev, Anp, Anq, Bcol,
+                              Cmsgid, GridIsNotPx1, GridIsNot1xQ, Rmsgid,
+                              Wfr=0, colprev, kb, kbprev, lda, mycol,
+                              myrow, n, n1, n1p, n1pprev=0, nb, npcol,
+                              nprow, rowprev, tmp1, tmp2;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PTRSV );
+#endif
+   if( ( n = AMAT->n ) <= 0 ) return;
+   nb = AMAT->nb; lda = AMAT->ld; A = AMAT->A; XR = AMAT->X;
+
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Rcomm = GRID->row_comm; Rmsgid = MSGID_BEGIN_PTRSV;
+   Ccomm = GRID->col_comm; Cmsgid = MSGID_BEGIN_PTRSV + 1;
+   GridIsNot1xQ = ( nprow > 1 ); GridIsNotPx1 = ( npcol > 1 );
+/*
+ * Move the rhs in the process column owning the last column of A.
+ */
+   Mnumroc( Anp, n, nb, nb, myrow, 0, nprow );
+   Mnumroc( Anq, n, nb, nb, mycol, 0, npcol );
+
+   tmp1  = ( n - 1 ) / nb;
+   Alrow = tmp1 - ( tmp1 / nprow ) * nprow;
+   Alcol = tmp1 - ( tmp1 / npcol ) * npcol;
+   kb    = n    - tmp1 * nb;
+
+   Aptr = (double *)(A); XC = Mptr( Aptr, 0, Anq, lda );
+   Mindxg2p( n, nb, nb, Bcol, 0, npcol );
+
+   if( ( Anp > 0 ) && ( Alcol != Bcol ) )
+   {
+      if( mycol == Bcol  )
+      { (void) HPL_send( XC, Anp, Alcol, Rmsgid, Rcomm ); }
+      else if( mycol == Alcol )
+      { (void) HPL_recv( XC, Anp, Bcol,  Rmsgid, Rcomm ); }
+   }
+   Rmsgid = ( Rmsgid + 2 >
+              MSGID_END_PTRSV ? MSGID_BEGIN_PTRSV : Rmsgid + 2 );
+   if( mycol != Alcol )
+   { for( tmp1=0; tmp1 < Anp; tmp1++ ) XC[tmp1] = HPL_rzero; }
+/*
+ * Set up lookahead
+ */
+   n1 = ( npcol - 1 ) * nb; n1 = Mmax( n1, nb );
+   if( Anp > 0 )
+   {
+      W = (double*)malloc( (size_t)(Mmin( n1, Anp )) * sizeof( double ) );
+      if( W == NULL )
+      { HPL_pabort( __LINE__, "HPL_pdtrsv", "Memory allocation failed" ); }
+      Wfr = 1;
+   }
+
+   Anpprev = Anp; Xdprev = XR; Aprev = Aptr = Mptr( Aptr, 0, Anq, lda );
+   tmp1    = n - kb; tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+   MnumrocI( n1pprev, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+
+   if( myrow == Alrow ) { Anpprev = ( Anp -= kb ); }
+   if( mycol == Alcol )
+   {
+      Aprev = ( Aptr -= lda * kb ); Anq -= kb; Xdprev = ( Xd = XR + Anq );
+      if( myrow == Alrow )
+      {
+         HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit,
+                    kb, Aptr+Anp, lda, XC+Anp, 1 );
+         HPL_dcopy( kb, XC+Anp, 1, Xd, 1 );
+      }
+   }
+
+   rowprev = Alrow; Alrow = MModSub1( Alrow, nprow );
+   colprev = Alcol; Alcol = MModSub1( Alcol, npcol );
+   kbprev  = kb; n -= kb;
+   tmp1    = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+   MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+/*
+ * Start the operations
+ */
+   while( n > 0 )
+   {
+      if( mycol == Alcol ) { Aptr -= lda * kb; Anq -= kb; Xd = XR + Anq; }
+      if( myrow == Alrow ) { Anp -= kb; }
+/*
+ * Broadcast  (decreasing-ring)  of  previous solution block in previous
+ * process column,  compute  partial update of current block and send it
+ * to current process column.
+ */
+      if( mycol == colprev )
+      {
+/*
+ * Send previous solution block in process row above
+ */
+         if( myrow == rowprev )
+         {
+            if( GridIsNot1xQ )
+               (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ),
+                                Cmsgid, Ccomm );
+         }
+         else
+         {
+            (void) HPL_recv( Xdprev, kbprev, MModAdd1( myrow, nprow ),
+                             Cmsgid, Ccomm );
+         } 
+/*
+ * Compute partial update of previous solution block and send it to cur-
+ * rent column
+ */
+         if( n1pprev > 0 )
+         {
+            tmp1 = Anpprev - n1pprev;
+            HPL_dgemv( HplColumnMajor, HplNoTrans, n1pprev, kbprev,
+                       -HPL_rone, Aprev+tmp1, lda, Xdprev, 1, HPL_rone,
+                       XC+tmp1, 1 );
+            if( GridIsNotPx1 )
+               (void) HPL_send( XC+tmp1, n1pprev, Alcol, Rmsgid, Rcomm );
+         }
+/*
+ * Finish  the (decreasing-ring) broadcast of the solution block in pre-
+ * vious process column
+ */
+         if( ( myrow != rowprev ) &&
+             ( myrow != MModAdd1( rowprev, nprow ) ) )
+            (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ),
+                             Cmsgid, Ccomm );
+      }
+      else if( mycol == Alcol )
+      {
+/*
+ * Current  column  receives  and accumulates partial update of previous
+ * solution block
+ */
+         if( n1pprev > 0 )
+         {
+            (void) HPL_recv( W, n1pprev, colprev, Rmsgid, Rcomm );
+            HPL_daxpy( n1pprev, HPL_rone, W, 1, XC+Anpprev-n1pprev, 1 );
+         }
+      }
+/*
+ * Solve current diagonal block 
+ */
+      if( ( mycol == Alcol ) && ( myrow == Alrow ) )
+      {
+         HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit,
+                    kb, Aptr+Anp, lda, XC+Anp, 1 );
+         HPL_dcopy( kb, XC+Anp, 1, XR+Anq, 1 );
+      }
+/*
+*  Finish previous update
+*/
+      if( ( mycol == colprev ) && ( ( tmp1 = Anpprev - n1pprev ) > 0 ) )
+         HPL_dgemv( HplColumnMajor, HplNoTrans, tmp1, kbprev, -HPL_rone,
+                    Aprev, lda, Xdprev, 1, HPL_rone, XC, 1 );
+/*
+*  Save info of current step and update info for the next step
+*/
+      if( mycol == Alcol ) { Xdprev   = Xd; Aprev = Aptr; }
+      if( myrow == Alrow ) { Anpprev -= kb; }
+      rowprev = Alrow; colprev = Alcol;
+      n1pprev = n1p;   kbprev  = kb; n -= kb;
+      Alrow = MModSub1( Alrow, nprow ); Alcol = MModSub1( Alcol, npcol );
+      tmp1  = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+      MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+
+      Rmsgid = ( Rmsgid+2 > MSGID_END_PTRSV ? 
+                 MSGID_BEGIN_PTRSV   : Rmsgid+2 );
+      Cmsgid = ( Cmsgid+2 > MSGID_END_PTRSV ?
+                 MSGID_BEGIN_PTRSV+1 : Cmsgid+2 );
+   }
+/*
+ * Replicate last solution block
+ */
+   if( mycol == colprev )
+      (void) HPL_broadcast( (void *)(XR), kbprev, HPL_DOUBLE, rowprev,
+                            Ccomm );
+
+   if( Wfr  ) free( W  );
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PTRSV );
+#endif
+/*
+ * End of HPL_pdtrsv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNN.c
new file mode 100644
index 000000000..7e31ddcd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNN.c
@@ -0,0 +1,442 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateNN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateNN
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateNN broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,                n );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, 0, nn, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateNN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNT.c
new file mode 100644
index 000000000..faa3ef207
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNT.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateNT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateNT
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateNT broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */ 
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,               jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, nn, 0, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateNT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTN.c
new file mode 100644
index 000000000..a16aa26a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTN.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateTN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateTN
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateTN broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,                n );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, 0, nn, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateTN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTT.c
new file mode 100644
index 000000000..81e6cc4b7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTT.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateTT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateTT
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateTT broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,               jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, nn, 0, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateTT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_perm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_perm.c
new file mode 100644
index 000000000..bf7cc4503
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_perm.c
@@ -0,0 +1,131 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_perm
+(
+   const int                        N,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            IWORK
+)
+#else
+void HPL_perm
+( N, LINDXA, LINDXAU, IWORK )
+   const int                        N;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_perm combines  two  index  arrays  and generate the corresponding
+ * permutation. First, this function computes the inverse of LINDXA, and
+ * then combine it with LINDXAU.  Second, in order to be able to perform
+ * the permutation in place,  LINDXAU  is overwritten by the sequence of
+ * permutation  producing  the  same result.  What we ultimately want to
+ * achieve is:  U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the
+ * call to this function,  this in place permutation can be performed by
+ * for i in [0..N) swap U[i] with U[LINDXAU[i]].
+ *
+ * Arguments
+ * =========
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies the length of the arrays  LINDXA  and
+ *         LINDXAU. N should be at least zero.
+ *
+ * LINDXA  (global input/output)         int *
+ *         On entry,  LINDXA  is an array of dimension N  containing the
+ *         source indexes. On exit,  LINDXA  contains the combined index
+ *         array.
+ *
+ * LINDXAU (global input/output)         int *
+ *         On entry,  LINDXAU is an array of dimension N  containing the
+ *         target indexes.  On exit,  LINDXAU  contains  the sequence of
+ *         permutation,  that  should be applied  in increasing order to
+ *         permute the underlying array U in place.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension N.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j, k, fndd;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Inverse LINDXA - combine LINDXA and LINDXAU - Initialize IWORK
+ */
+   for( i = 0; i < N; i++ ) { IWORK[LINDXA[i]] = i; }
+   for( i = 0; i < N; i++ ) { LINDXA[i] = LINDXAU[IWORK[i]]; IWORK[i] = i; }
+ 
+   for( i = 0; i < N; i++ )
+   {
+      /* search LINDXA such that    LINDXA[j]  == i */
+      j = 0; do { fndd = ( LINDXA[j] == i ); j++; } while( !fndd ); j--;
+      /* search IWORK  such that    IWORK[k]   == j */
+      k = 0; do { fndd = ( IWORK[k]  == j ); k++; } while( !fndd ); k--;
+      /* swap IWORK[i] and IWORK[k]; LINDXAU[i] = k */
+      j = IWORK[i]; IWORK[i] = IWORK[k]; IWORK[k] = j;
+      LINDXAU[i] = k;
+   }
+/*
+ * End of HPL_perm
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pipid.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pipid.c
new file mode 100644
index 000000000..ab5ef949f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pipid.c
@@ -0,0 +1,187 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pipid
+(
+   HPL_T_panel *                    PANEL,
+   int *                            K,
+   int *                            IPID
+)
+#else
+void HPL_pipid
+( PANEL, K, IPID )
+   HPL_T_panel *                    PANEL;
+   int *                            K;
+   int *                            IPID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pipid computes an array  IPID  that contains the source and final
+ * destination  of  matrix rows  resulting  from  the  application  of N
+ * interchanges  as computed by the  LU  factorization  with row partial
+ * pivoting. The array IPID is such that the row of global index IPID(i)
+ * should be mapped onto the row of global index IPID(i+1). Note that we
+ * cannot really know the length of IPID a priori. However, we know that
+ * this array is at least 2*N long,  since  there are N rows to swap and
+ * broadcast. The length of this array  must be smaller than or equal to
+ * 4*N, since every row is swapped with at most a single distinct remote
+ * row. The algorithm constructing  IPID  goes as follows: Let IA be the
+ * global index of the first row to be swapped.
+ *  
+ * For every row src IA + i with i in [0..N) to be swapped with row  dst
+ * such that dst is given by DPIV[i]:
+ *  
+ * Is row  src  the destination  of a previous row of the current block,
+ * that is, is there k odd such that IPID(k) is equal to src ?
+ *     Yes:  update  this destination  with dst.  For  example,  if  the
+ * pivot array is  (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5,
+ * we swap in fact row 0 and 5,  i.e.,  row 0 goes to 5 and not 2  as it
+ * was thought so far ...
+ *     No :  add  the pair (src,dst) at the end of IPID; row src has not
+ * been moved yet.
+ *  
+ * Is row  dst  different  from src the destination of a previous row of
+ * the current block, i.e., is there k odd such that IPID(k) is equal to
+ * dst ?
+ *     Yes:  update  IPID(k) with src.  For example,  if the pivot array
+ * is (0,5)(1,1)(2,5) ... , then when  we swap rows  2 and 5, we swap in
+ * fact row 2 and 0,  i.e.,  row 0 goes to 2 and not 5 as it was thought
+ * so far ...
+ *     No : add  the  pair (dst,src) at the end of IPID; row dst has not
+ * been moved yet.
+ *  
+ * Note that when src is equal to dst, the pair (dst,src)  should not be
+ * added to  IPID  in  order  to avoid duplicated entries in this array.
+ * During  the construction of the array  IPID,  we  make  sure that the
+ * first N entries are such that IPID(k) with k odd is equal to  IA+k/2.
+ * For k in  [0..K/2),  the  row  of global index  IPID(2*k)  should  be
+ * mapped onto the row of global index IPID(2*k+1).
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global output)               int *
+ *         On exit, K specifies the number of entries in  IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global output)               int *
+ *         On entry, IPID is an array of length 4*N.  On exit, the first
+ *         K entries of that array contain the src and final destination
+ *         resulting  from  the  application of the  N  interchanges  as
+ *         specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+ *         stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+ *         in [0..N)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, fndd, fnds, ia, i, j, jb, lst, off,
+                              src;
+   double                     * dpiv;
+/* ..
+ * .. Executable Statements ..
+ */
+   dpiv = PANEL->DPIV; jb = PANEL->jb; src = ia = PANEL->ia;
+   dst  = (int)(dpiv[0]); IPID[0] = dst; IPID[1] = src; *K = 2;
+   if( src != dst ) { IPID[2] = src; IPID[3] = dst; *K += 2; }
+
+   for( i = 1; i < jb; i++ )
+   {
+      fnds = 0; j = 1;
+
+      if( ( src = ia + i ) == ( dst = (int)(dpiv[i]) ) )
+      {
+         do { if( src == IPID[j] ) { fnds = j; } else { j += 2; } }
+         while( !( fnds ) && ( j < *K ) );
+         if( !fnds ) { lst = *K;     off = 2; IPID[lst] = src; }
+         else        { lst = fnds-1; off = 0; }
+         IPID[lst+1] = dst;
+      }
+      else
+      {
+         fndd = 0;
+         do
+         {
+            if     ( src == IPID[j] ) { fnds = j; }
+            else if( dst == IPID[j] ) { fndd = j; }
+            j += 2;
+         }
+         while( ( !( fnds ) || !( fndd ) ) && ( j < *K ) );
+         if( !fnds ) { IPID[*K] = src; IPID[*K+1] = dst; off  = 2; }
+         else        {                 IPID[fnds] = dst; off  = 0; }
+         if( !fndd ) { lst = *K+off;   IPID[lst ] = dst; off += 2; }
+         else        { lst = fndd-1; }
+         IPID[lst+1] = src;
+      }
+/*
+ * Enforce IPID(1,i) equal to src = ia + i
+ */
+      if( lst != ( j = ( i << 1 ) ) )
+      {
+         src = IPID[j  ]; IPID[j  ] = IPID[lst  ]; IPID[lst  ] = src;
+         dst = IPID[j+1]; IPID[j+1] = IPID[lst+1]; IPID[lst+1] = dst;
+      }
+      *K += off;
+   }
+/*
+ * End of HPL_pipid
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx0.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx0.c
new file mode 100644
index 000000000..be12639d0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx0.c
@@ -0,0 +1,281 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx0
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   int *                            IPID,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            LLEN
+)
+#else
+void HPL_plindx0
+( PANEL, K, IPID, LINDXA, LINDXAU, LLEN )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   int *                            IPID;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            LLEN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx0 computes two local arrays  LINDXA and  LINDXAU  containing
+ * the  local  source and final destination position  resulting from the
+ * application of row interchanges.
+ *  
+ * On entry, the array  IPID  of length K is such that the row of global
+ * index  IPID(i)  should be mapped onto row of global index  IPID(i+1).
+ * Let  IA  be the global index of the first row to be swapped. For k in
+ * [0..K/2), the row of global index IPID(2*k) should be mapped onto the
+ * row of global index  IPID(2*k+1).  The question then, is to determine
+ * which rows should ultimately be part of U.
+ *  
+ * First, some rows of the process ICURROW  may be swapped locally.  One
+ * of this row belongs to U, the other one belongs to my local  piece of
+ * A.  The other  rows of the current block are swapped with remote rows
+ * and are thus not part of U. These rows however should be sent  along,
+ * and  grabbed by the other processes  as we  progress in the  exchange
+ * phase.
+ *  
+ * So, assume that I am  ICURROW  and consider a row of index  IPID(2*i)
+ * that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less
+ * than N,  this row is locally swapped and should be copied into  U  at
+ * the position IPID(2*i+1) - IA. No row will be exchanged for this one.
+ * If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be
+ * locally copied into my local piece of A at the position corresponding
+ * to the row of global index IPID(2*i+1).
+ *  
+ * If the process  ICURROW does not own  IPID(2*i+1), then row IPID(2*i)
+ * is to be swapped away and strictly speaking does not belong to U, but
+ * to  A  remotely.  Since this  process will however send this array U,
+ * this row is  copied into  U, exactly where the row IPID(2*i+1) should
+ * go. For this, we search IPID for k1, such that IPID(2*k1) is equal to
+ * IPID(2*i+1); and row  IPID(2*i) is to be copied in U  at the position
+ * IPID(2*k1+1)-IA.
+ *  
+ * It is thus  important to put the rows that go into U, i.e., such that
+ * IPID(2*i+1) - IA is less than N at the begining of the array IPID. By
+ * doing so,  U  is formed, and the local copy  is performed in just one
+ * sweep.
+ *  
+ * Two lists  LINDXA  and  LINDXAU are built.  LINDXA contains the local
+ * index of the rows I have that should be copied. LINDXAU  contains the
+ * local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A
+ * is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
+ * of A should be locally copied into A(-LINDXAU(k),:).  In the  process
+ * ICURROW, the initial packing algorithm proceeds as follows.
+ *  
+ *   for all entries in IPID,
+ *      if IPID(2*i) is in ICURROW,
+ *         if IPID(2*i+1) is in ICURROW,
+ *            if( IPID(2*i+1) - IA < N )
+ *             save corresponding local position
+ *             of this row (LINDXA);
+ *             save local position (LINDXAU) in U
+ *             where this row goes;
+ *             [copy row IPID(2*i) in U at position
+ *             IPID(2*i+1)-IA; ];
+ *            else
+ *             save corresponding local position of
+ *             this row (LINDXA);
+ *             save local position (-LINDXAU) in A
+ *             where this row goes;
+ *             [copy row IPID(2*i) in my piece of A
+ *             at IPID(2*i+1);]
+ *            end if
+ *         else
+ *            find k1 such that IPID(2*k1) = IPID(2*i+1);
+ *            copy row IPID(2*i) in U at position
+ *            IPID(2*k1+1)-IA;
+ *            save corresponding local position of this
+ *            row (LINDXA);
+ *            save local position (LINDXAU) in U where
+ *            this row goes;
+ *         end if
+ *      end if
+ *   end for
+ *  
+ * Second, if I am not the current row process  ICURROW, all source rows
+ * in IPID that I own are part of U. Indeed,  they  are swapped with one
+ * row  of  the  current  block  of rows,  and  the  main  factorization
+ * algorithm proceeds one row after each other.  The processes different
+ * from ICURROW,  should  exchange and accumulate  those rows until they
+ * receive some data previously owned by the process ICURROW.
+ *  
+ * In processes different from  ICURROW,  the  initial packing algorithm
+ * proceeds as follows.  Consider a row of global index IPID(2*i) that I
+ * own. When I will be receiving data previously owned by ICURROW, i.e.,
+ * U, row IPID(2*i) should  replace the row in U at pos. IPID(2*i+1)-IA,
+ * and  this particular row of U should be first copied into my piece of
+ * A, at A(il,:),  where  il is the  local row  index  corresponding  to
+ * IPID(2*i). Now,initially, this row will be packed into workspace, say
+ * as the kth row of  that  work array.  The  following  algorithm  sets
+ * LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row
+ * should be copied. LINDXA(k) stores the local index in  A  where  this
+ * row of U should be copied, i.e il.
+ *  
+ *   for all entries in IPID,
+ *      if IPID(2*i) is not in ICURROW,
+ *         copy row IPID(2*i) in work array;
+ *         save corresponding local position
+ *         of this row (LINDXA);
+ *         save position (LINDXAU) in U where
+ *         this row should be copied;
+ *      end if
+ *   end for
+ *  
+ * Since we are at it, we also globally figure  out  how many rows every
+ * process has. That is necessary, because it would rather be cumbersome
+ * to  figure it on  the fly  during the  bi-directional exchange phase.
+ * This information is kept in the array  LLEN  of size NPROW. Also note
+ * that the arrays LINDXA and LINDXAU are of max length equal to 2*N.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * LINDXA  (local output)                int *
+ *         On entry, LINDXA  is an array of dimension 2*N. On exit, this
+ *         array contains the local indexes of the rows of A I have that
+ *         should be copied into U.
+ *
+ * LINDXAU (local output)                int *
+ *         On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+ *         array contains  the local destination  information encoded as
+ *         follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+ *         copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+ *         of A should be locally copied into A(-LINDXAU(k),:).
+ *
+ * LLEN    (global output)               int *
+ *         On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+ *         contains how many rows every process has.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, dstrow, fndd, i, ia, icurrow, il,
+                              ip=0, iroff, j, jb, myrow, nb, nprow,
+                              src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Compute the local arrays  LINDXA  and  LINDXAU  containing  the local
+ * source and final destination position resulting from  the application
+ * of N interchanges.
+ */
+   myrow   = PANEL->grid->myrow; nprow = PANEL->grid->nprow;
+   icurrow = PANEL->prow;        jb    = PANEL->jb;
+   nb      = PANEL->nb;          ia    = PANEL->ia;
+   iroff   = PANEL->ii;
+
+   for( i = 0; i < nprow; i++ ) LLEN[i] = 0;
+
+   for( i = 0; i < K; i += 2 )
+   {
+      src = IPID[i];
+      Mindxg2p( src, nb, nb, srcrow, 0, nprow ); LLEN[ srcrow ]++;
+
+      if( myrow == srcrow )
+      {
+         Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
+         LINDXA[ip] = il - iroff; dst = IPID[i+1];
+
+         if( myrow == icurrow )
+         {
+            Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+            if( dstrow == icurrow )
+            {
+               if( dst - ia < jb ) { LINDXAU[ip] = dst - ia; }
+               else
+               {
+                  Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+                  LINDXAU[ip] = iroff - il;
+               }
+            }
+            else
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+               LINDXAU[ip] = IPID[j-1] - ia;
+            }
+         }
+         else { LINDXAU[ip] = dst - ia; }
+
+         ip++;
+      }
+   }
+/*
+ * End of HPL_plindx0
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx1.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx1.c
new file mode 100644
index 000000000..a24fd4c56
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx1.c
@@ -0,0 +1,275 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx1
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   const int *                      IPID,
+   int *                            IPA,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1,
+   int *                            PERMU,
+   int *                            IWORK
+)
+#else
+void HPL_plindx1
+( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   const int *                      IPID;
+   int *                            IPA;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+   int *                            PERMU;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx1 computes two local arrays  LINDXA and  LINDXAU  containing
+ * the  local  source and final destination position  resulting from the
+ * application of row interchanges.  In addition, this function computes
+ * three arrays IPLEN, IPMAP and IPMAPM1  that contain  the  logarithmic
+ * mapping information for the spreading phase.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                const int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * IPA     (global output)               int *
+ *         On exit,  IPA  specifies  the number of rows that the current
+ *         process row has that either belong to U  or should be swapped
+ *         with remote rows of A.
+ *
+ * LINDXA  (global output)               int *
+ *         On entry, LINDXA  is an array of dimension 2*N. On exit, this
+ *         array contains the local indexes of the rows of A I have that
+ *         should be copied into U.
+ *
+ * LINDXAU (global output)               int *
+ *         On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+ *         array contains  the local destination  information encoded as
+ *         follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+ *         copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+ *         of A should be locally copied into A(-LINDXAU(k),:).
+ *
+ * IPLEN   (global output)               int *
+ *         On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+ *         this array is such that  IPLEN[i]  is the number of rows of A
+ *         in  the  processes  before  process  IPMAP[i]  after the sort
+ *         with the convention that IPLEN[nprow]  is the total number of
+ *         rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+ *         local number of rows of A that should be moved to the process
+ *         IPMAP[i]. IPLEN is such that the number of rows of the source
+ *         process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+ *         remaining  entries  of  this  array  are  sorted  so that the
+ *         quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry, IPMAP is an array of dimension NPROW. On exit, this
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myrow] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROCS)
+ *
+ * PERMU   (global output)               int *
+ *         On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+ *         contains  a sequence of permutations,  that should be applied
+ *         in increasing order to permute in place the row panel U.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension 2*JB.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        * iwork;
+   int                        dst, dstrow, fndd, i, ia, icurrow, il,
+                              ip, ipU, iroff, j, jb, myrow, nb, nprow,
+                              src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1
+ */
+   HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 );
+/*
+ * Compute the local arrays  LINDXA  and  LINDXAU  containing  the local
+ * source and final destination position resulting from  the application
+ * of N interchanges. Compute LINDXA and LINDXAU in icurrow,  and LINDXA
+ * elsewhere and PERMU in every process.
+ */
+   myrow = PANEL->grid->myrow; nprow   = PANEL->grid->nprow;
+   jb    = PANEL->jb;          nb      = PANEL->nb;     ia = PANEL->ia;
+   iroff = PANEL->ii;          icurrow = PANEL->prow;
+
+   iwork = IWORK + jb;
+ 
+   if( myrow == icurrow )
+   {
+      for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
+      {
+         src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+ 
+         if( srcrow == icurrow )
+         {
+            dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+ 
+            Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
+            LINDXA[ip] = il - iroff;
+ 
+            if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
+            {
+               PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
+               j          = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j;
+               IPLEN[il]++; ipU++;
+            }
+            else if( dstrow != icurrow )
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+ 
+               PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow];
+               j          = IPLEN[il];    iwork[ipU] = LINDXAU[ip] = j;
+               IPLEN[il]++; ipU++;
+            }
+            else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) )
+            {
+               Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+               LINDXAU[ip] = iroff - il;
+            }
+            ip++;
+         }
+      }
+      *IPA = ip;
+   }
+   else
+   {
+      for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
+      {
+         src = IPID[i  ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+         dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+/*
+ * LINDXA[i] is the local index of the row of A that belongs into U
+ */
+         if( myrow == dstrow )
+         {
+            Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+            LINDXA[ip] = il - iroff; ip++;
+         }
+/*
+ * iwork[i] is the local (current) position  index in U
+ * PERMU[i] is the local (final) destination index in U
+ */
+         if( srcrow == icurrow )
+         {
+            if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
+            {
+               PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
+               iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
+            }
+            else if( dstrow != icurrow )
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+               PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow];
+               iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
+            }
+         }
+      }
+      *IPA = 0;
+   }
+/*
+ * Simplify iwork and PERMU, return in PERMU the sequence of permutation
+ * that need to be apply to U after it has been broadcast.
+ */
+   HPL_perm( jb, iwork, PERMU, IWORK );
+/*
+ * Reset IPLEN to its correct value
+ */
+   for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1];
+   IPLEN[0] = 0; 
+/*
+ * End of HPL_plindx1
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx10.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx10.c
new file mode 100644
index 000000000..fa460fd35
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx10.c
@@ -0,0 +1,155 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx10
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   const int *                      IPID,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1
+)
+#else
+void HPL_plindx10
+( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   const int *                      IPID;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx10 computes  three arrays  IPLEN,  IPMAP  and  IPMAPM1  that
+ * contain the logarithmic mapping information for the spreading phase.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                const int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * IPLEN   (global output)               int *
+ *         On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+ *         this array is such that  IPLEN[i]  is the number of rows of A
+ *         in the processes  before process IMAP[i] after the sort, with
+ *         the convention that IPLEN[nprow] is the total number of rows.
+ *         In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+ *         rows of  A  that should be moved for each process.  IPLEN  is
+ *         such that the number of rows of the source process row can be
+ *         computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+ *         this  array are sorted  so  that  the quantities IPLEN[i+1] -
+ *         IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry, IPMAP is an array of dimension NPROW. On exit, this
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myrow] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROW)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, dstrow, i, ia, icurrow, jb, nb,
+                              nprow, src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+   nprow = PANEL->grid->nprow; jb = PANEL->jb; nb = PANEL->nb;
+   ia    = PANEL->ia;          icurrow = PANEL->prow;
+/*
+ * Compute  redundantly  the local number of rows  that each process has
+ * and that belong to U in IPLEN[1 .. nprow+1]
+ */
+   for( i = 0; i <= nprow; i++ ) IPLEN[i] = 0;
+ 
+   for( i = 0; i < K; i += 2 )
+   {
+      src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+      if( srcrow == icurrow )
+      {
+         dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+         if( ( dstrow != srcrow ) || ( dst - ia < jb ) ) IPLEN[dstrow+1]++;
+      }
+   }
+/*
+ * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1
+ * (the inverse of IPMAP)
+ */
+   HPL_logsort( nprow, icurrow, IPLEN, IPMAP, IPMAPM1 );
+/*
+ * End of HPL_plindx10
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollN.c
new file mode 100644
index 000000000..e68590a01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollN.c
@@ -0,0 +1,225 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+void HPL_rollN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_rollN
+( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rollN rolls the local arrays containing the local pieces of U, so
+ * that on exit to this function  U  is replicated in every process row.
+ * In addition, this function probe for the presence of the column panel
+ * and forwards it when available.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be rolled) information.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the number of columns of  U.  N must be
+ *         at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least  MAX(1,IPLEN[NPROW]).
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process row.
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IMAP  is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words,  IMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Datatype               type[2];
+   MPI_Status                 status;
+   MPI_Request                request;
+   MPI_Comm                   comm;
+   int                        Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS,
+                              ierr=MPI_SUCCESS, il, k, l, lengthR,
+                              lengthS, mydist, myrow, next, npm1, nprow,
+                              partner, prev;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= 0 ) return;
+
+   npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow;
+   comm = PANEL->grid->col_comm;
+/*
+ * Rolling phase
+ */
+   mydist = IPMAPM1[myrow];
+   prev   = IPMAP[MModSub1( mydist, nprow )];
+   next   = IPMAP[MModAdd1( mydist, nprow )];
+ 
+   for( k = 0; k < npm1; k++ )
+   {
+      l = (int)( (unsigned int)(k) >> 1 );
+ 
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         il      = MModAdd( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); 
+         il      = MModSub( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev;
+      }
+      else
+      {
+         il    = MModSub( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); 
+         il    = MModAdd( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next;
+      }
+ 
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_vector( N, lengthR, LDU, MPI_DOUBLE,
+                                      &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, ibufR, 0, LDU ), 1, type[I_RECV],
+                                partner, Cmsgid, comm, &request );
+      }
+ 
+      if( lengthS > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_vector( N, lengthS, LDU, MPI_DOUBLE,
+                                      &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, ibufS, 0, LDU ), 1, type[I_SEND],
+                               partner, Cmsgid, comm );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free(   &type[I_SEND] );
+      }
+
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free(   &type[I_RECV] );
+      }
+/*
+ * Probe for column panel - forward it when available
+ */
+      if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_rollN", "MPI call failed" ); }
+/*
+ * End of HPL_rollN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollT.c
new file mode 100644
index 000000000..0160c9412
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollT.c
@@ -0,0 +1,259 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+void HPL_rollT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_rollT
+( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rollT rolls the local arrays containing the local pieces of U, so
+ * that on exit to this function  U  is replicated in every process row.
+ * In addition, this function probe for the presence of the column panel
+ * and forwards it when available.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be rolled) information.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the local number of rows of  U.  N must
+ *         be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least  MAX(1,N).
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process row.
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IMAP  is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words,  IMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#if 0
+   MPI_Datatype               type[2];
+#endif
+   MPI_Status                 status;
+   MPI_Request                request;
+   MPI_Comm                   comm;
+   int                        Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS,
+                              ierr=MPI_SUCCESS, il, k, l, lengthR, 
+                              lengthS, mydist, myrow, next, npm1, nprow,
+                              partner, prev;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= 0 ) return;
+
+   npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow;
+   comm = PANEL->grid->col_comm;
+/*
+ * Rolling phase
+ */
+   mydist = IPMAPM1[myrow];
+   prev   = IPMAP[MModSub1( mydist, nprow )];
+   next   = IPMAP[MModAdd1( mydist, nprow )];
+ 
+   for( k = 0; k < npm1; k++ )
+   {
+      l = (int)( (unsigned int)(k) >> 1 );
+ 
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         il      = MModAdd( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] );
+         il    = MModSub( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev;
+      }
+      else
+      {
+         il    = MModSub( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] );
+         il    = MModAdd( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next;
+      }
+ 
+      if( lengthR > 0 )
+      {
+#if 0
+         if( ierr == MPI_SUCCESS )
+         {
+            if( LDU == N )
+               ierr = MPI_Type_contiguous( lengthR * LDU, MPI_DOUBLE,
+                                           &type[I_RECV] );
+            else
+               ierr = MPI_Type_vector( lengthR, N, LDU, MPI_DOUBLE,
+                                       &type[I_RECV] );
+         }
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, 0, ibufR, LDU ), 1, type[I_RECV],
+                                partner, Cmsgid, comm, &request );
+#else
+/*
+ * In our case, LDU is N - Do not use the MPI datatype.
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, 0, ibufR, LDU ), lengthR*LDU,
+                                MPI_DOUBLE, partner, Cmsgid, comm, &request );
+#endif
+      }
+ 
+      if( lengthS > 0 )
+      {
+#if 0
+         if( ierr == MPI_SUCCESS )
+         {
+            if( LDU == N )
+               ierr =   MPI_Type_contiguous( lengthS*LDU, MPI_DOUBLE,
+                                             &type[I_SEND] );
+            else
+               ierr =   MPI_Type_vector( lengthS, N, LDU, MPI_DOUBLE,
+                                         &type[I_SEND] );
+         }
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, 0, ibufS, LDU ), 1, type[I_SEND],
+                               partner, Cmsgid, comm );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[I_SEND] );
+#else
+/*
+ * In our case, LDU is N - Do not use the MPI datatype.
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, 0, ibufS, LDU ), lengthS*LDU,
+                               MPI_DOUBLE, partner, Cmsgid, comm );
+#endif
+      }
+
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+#if 0
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[I_RECV] );
+#endif
+      }
+/*
+ * Probe for column panel - forward it when available
+ */
+      if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_rollT", "MPI call failed" ); }
+/*
+ * End of HPL_rollT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadN.c
new file mode 100644
index 000000000..202611e7f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadN.c
@@ -0,0 +1,303 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_spreadN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_SIDE              SIDE,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int                        SRCDIST,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_spreadN
+( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_SIDE              SIDE;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int                        SRCDIST;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_spreadN spreads the local array containing local pieces of U, so
+ * that on exit to this function,  a piece of  U  is contained in every
+ * process row. The array IPLEN contains the number of rows of U,  that
+ * should be spread on any given process row. This function also probes
+ * for the presence of the column panel PBCST. In case of success, this
+ * panel will be forwarded.  If  PBCST  is NULL on input,  this probing
+ * mechanism will be disabled.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be spread) information.
+ *
+ * SIDE    (global input)                const enum HPL_SIDE
+ *         On entry, SIDE specifies whether the local piece of U located
+ *         in process IPMAP[SRCDIST] should be spread to the right or to
+ *         the left. This feature is used by the equilibration process.
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies  the  local number of columns of U. N
+ *         must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,IPLEN[nprow]).
+ *
+ * SRCDIST (local input)                 const int
+ *         On entry,  SRCDIST  specifies the source process that spreads
+ *         its piece of U.
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process before process IPMAP[i], with the  convention
+ *         that IPLEN[nprow] is the total number of rows. In other words
+ *         IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+ *         should be moved to process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IPMAPM1 is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Datatype              type;
+   MPI_Status                status;
+   MPI_Comm                  comm;
+   unsigned int              ip2=1, mask=1, mydist, mydist2;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, ibuf,
+                             ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow,
+                             npm1, nprow, partner;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow;    nprow = PANEL->grid->nprow;
+   comm  = PANEL->grid->col_comm;
+/*
+ * Spread U to the left
+ */
+   if( SIDE == HplLeft )
+   {
+      nprow = ( npm1 = SRCDIST ) + 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) >
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2;
+      lgth    = IPLEN[nprow];
+
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); 
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm,
+                                        &status );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+               else if( partner < nprow )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il += ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+   else
+   {
+      npm1 = ( nprow -= SRCDIST ) - 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) <
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist -= SRCDIST ); il = ip2;
+      lgth    = IPLEN[SRCDIST+nprow];
+/*
+ * Spread U to the right - offset the IPLEN, and IPMAP arrays
+ */
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            k    = il      ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] );
+            k    = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf;
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm, &status );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+               else if( partner < nprow )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il -= ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il += ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_spreadN", "MPI call failed" ); }
+/*
+ * End of HPL_spreadN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadT.c
new file mode 100644
index 000000000..1adf93507
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadT.c
@@ -0,0 +1,372 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_spreadT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_SIDE              SIDE,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int                        SRCDIST,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_spreadT
+( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_SIDE              SIDE;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int                        SRCDIST;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_spreadT spreads  the local array containing local pieces of U, so
+ * that on exit to this function,  a piece of  U  is contained in every
+ * process row.  The array  IPLEN  contains the number of columns of U,
+ * that should be spread on any given process row.  This function  also
+ * probes for the presence of  the column panel  PBCST.  If  available,
+ * this  panel will be forwarded.  If  PBCST  is  NULL  on input,  this
+ * probing mechanism will be disabled.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be spread) information.
+ *
+ * SIDE    (global input)                const enum HPL_SIDE
+ *         On entry, SIDE specifies whether the local piece of U located
+ *         in process IPMAP[SRCDIST] should be spread to the right or to
+ *         the left. This feature is used by the equilibration process.
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies the local number of rows of U. N must
+ *         be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,N).
+ *
+ * SRCDIST (local input)                 const int
+ *         On entry,  SRCDIST  specifies the source process that spreads
+ *         its piece of U.
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process before process IPMAP[i], with the  convention
+ *         that IPLEN[nprow] is the total number of rows. In other words
+ *         IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+ *         should be moved to process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IPMAPM1 is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#if 0
+   MPI_Datatype              type;
+#endif
+   MPI_Status                status;
+   MPI_Comm                  comm;
+   unsigned int              ip2=1, mask=1, mydist, mydist2;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, ibuf,
+                             ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow,
+                             npm1, nprow, partner;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow;    nprow = PANEL->grid->nprow;
+   comm  = PANEL->grid->col_comm;
+/*
+ * Spread U
+ */
+   if( SIDE == HplLeft )
+   {
+      nprow = ( npm1 = SRCDIST ) + 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) >
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2;
+      lgth    = IPLEN[nprow];
+
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] );
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm,
+                                        &status );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[npm1-partner],
+                                        Cmsgid, comm, &status );
+#endif
+               }
+               else if( partner < nprow )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[npm1-partner],
+                                        Cmsgid, comm );
+#endif
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il += ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+   else
+   {
+      npm1 = ( nprow -= SRCDIST ) - 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) <
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist -= SRCDIST ); il = ip2;
+/*
+ * Spread to the right - offset the IPLEN and IPMAP arrays
+ */
+      lgth = IPLEN[SRCDIST+nprow];
+/*
+ * Spread U
+ */
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            k    = il      ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] );
+            k    = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf;
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm, &status );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[SRCDIST+partner],
+                                        Cmsgid, comm, &status );
+#endif
+               }
+               else if( partner < nprow )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[SRCDIST+partner],
+                                        Cmsgid, comm );
+#endif
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il -= ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il += ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_spreadT", "MPI call failed" ); }
+/*
+ * End of HPL_spreadT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_equil.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_equil.o
new file mode 100644
index 000000000..8f1c51fed
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_equil.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_logsort.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_logsort.o
new file mode 100644
index 000000000..bcdbb6bc8
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_logsort.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesv.o
new file mode 100644
index 000000000..eebf1d2bd
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesv.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesv0.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesv0.o
new file mode 100644
index 000000000..7f9f518d0
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesv0.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesvK1.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesvK1.o
new file mode 100644
index 000000000..2e9264e1b
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesvK1.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesvK2.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesvK2.o
new file mode 100644
index 000000000..a60d80722
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesvK2.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp00N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp00N.o
new file mode 100644
index 000000000..bee048eab
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp00N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp00T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp00T.o
new file mode 100644
index 000000000..503cc4120
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp00T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp01N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp01N.o
new file mode 100644
index 000000000..40903b766
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp01N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp01T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp01T.o
new file mode 100644
index 000000000..a9d2aa518
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp01T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdtrsv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdtrsv.o
new file mode 100644
index 000000000..90ba3bb3c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdtrsv.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateNN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateNN.o
new file mode 100644
index 000000000..c824b6c3d
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateNN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateNT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateNT.o
new file mode 100644
index 000000000..e29f83012
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateNT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateTN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateTN.o
new file mode 100644
index 000000000..0c246fbd0
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateTN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateTT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateTT.o
new file mode 100644
index 000000000..363c2ffc4
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateTT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_perm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_perm.o
new file mode 100644
index 000000000..edfda7f58
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_perm.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pipid.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pipid.o
new file mode 100644
index 000000000..76fa9d64f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pipid.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx0.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx0.o
new file mode 100644
index 000000000..9e136a0dc
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx0.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx1.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx1.o
new file mode 100644
index 000000000..b4871bfcc
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx1.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx10.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx10.o
new file mode 100644
index 000000000..4fbb806ae
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx10.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_rollN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_rollN.o
new file mode 100644
index 000000000..04dcb0fa6
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_rollN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_rollT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_rollT.o
new file mode 100644
index 000000000..26c36d981
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_rollT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_spreadN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_spreadN.o
new file mode 100644
index 000000000..513ee6fe1
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_spreadN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_spreadT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_spreadT.o
new file mode 100644
index 000000000..a4ecb7f87
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_spreadT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/Make.inc
new file mode 120000
index 000000000..8547ec814
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/Make.inc
@@ -0,0 +1 @@
+/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/Makefile
new file mode 100644
index 000000000..7898665f0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/Makefile
@@ -0,0 +1,136 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h   $(INCdir)/hpl_comm.h  \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pfact.h \
+   $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_pgeobj       = \
+   HPL_pipid.o            HPL_plindx0.o          HPL_pdlaswp00N.o       \
+   HPL_pdlaswp00T.o       HPL_perm.o             HPL_logsort.o          \
+   HPL_plindx10.o         HPL_plindx1.o          HPL_spreadN.o          \
+   HPL_spreadT.o          HPL_rollN.o            HPL_rollT.o            \
+   HPL_equil.o            HPL_pdlaswp01N.o       HPL_pdlaswp01T.o       \
+   HPL_pdupdateNN.o       HPL_pdupdateNT.o       HPL_pdupdateTN.o       \
+   HPL_pdupdateTT.o       HPL_pdtrsv.o           HPL_pdgesv0.o          \
+   HPL_pdgesvK1.o         HPL_pdgesvK2.o         HPL_pdgesv.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pgeobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pgeobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pipid.o            : ../HPL_pipid.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pipid.c
+HPL_plindx0.o          : ../HPL_plindx0.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx0.c
+HPL_pdlaswp00N.o       : ../HPL_pdlaswp00N.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp00N.c
+HPL_pdlaswp00T.o       : ../HPL_pdlaswp00T.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp00T.c
+HPL_perm.o             : ../HPL_perm.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_perm.c
+HPL_logsort.o          : ../HPL_logsort.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_logsort.c
+HPL_plindx10.o         : ../HPL_plindx10.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx10.c
+HPL_plindx1.o          : ../HPL_plindx1.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx1.c
+HPL_spreadN.o          : ../HPL_spreadN.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_spreadN.c
+HPL_spreadT.o          : ../HPL_spreadT.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_spreadT.c
+HPL_rollN.o            : ../HPL_rollN.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rollN.c
+HPL_rollT.o            : ../HPL_rollT.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rollT.c
+HPL_equil.o            : ../HPL_equil.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_equil.c
+HPL_pdlaswp01N.o       : ../HPL_pdlaswp01N.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp01N.c
+HPL_pdlaswp01T.o       : ../HPL_pdlaswp01T.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp01T.c
+HPL_pdupdateNN.o       : ../HPL_pdupdateNN.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateNN.c
+HPL_pdupdateNT.o       : ../HPL_pdupdateNT.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateNT.c
+HPL_pdupdateTN.o       : ../HPL_pdupdateTN.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateTN.c
+HPL_pdupdateTT.o       : ../HPL_pdupdateTT.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateTT.c
+HPL_pdtrsv.o           : ../HPL_pdtrsv.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdtrsv.c
+HPL_pdgesv0.o          : ../HPL_pdgesv0.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesv0.c
+HPL_pdgesvK1.o         : ../HPL_pdgesvK1.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesvK1.c
+HPL_pdgesvK2.o         : ../HPL_pdgesvK2.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesvK2.c
+HPL_pdgesv.o           : ../HPL_pdgesv.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesv.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/MainSourceFiles.yaml b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/MainSourceFiles.yaml
new file mode 100644
index 000000000..19e73e079
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/MainSourceFiles.yaml
@@ -0,0 +1,1000 @@
+---
+MainSourceFile:  MainSrcFiles_placehold
+Replacements:
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          6545
+    Length:          0
+    ReplacementText: "#include <sycl/sycl.hpp>\n#include <dpct/dpct.hpp>\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          6822
+    Length:          0
+    ReplacementText: "\n#include <dpct/blas_utils.hpp>\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          6825
+    Length:          18
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          6843
+    Length:          26
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          6869
+    Length:          20
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          6956
+    Length:          9
+    ReplacementText: 'dpct::err0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7044
+    Length:          197
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7334
+    Length:          0
+    ReplacementText: ' try '
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7364
+    Length:          0
+    ReplacementText: "        /*\n        DPCT1010:1: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n        */\n"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7372
+    Length:          9
+    ReplacementText: 'dpct::err0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7388
+    Length:          18
+    ReplacementText: '0'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7416
+    Length:          199
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7739
+    Length:          23
+    ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          7772
+    Length:          208
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          8006
+    Length:          0
+    ReplacementText: "\ncatch (sycl::exception const &exc) {\n  std::cerr << exc.what() << \"Exception caught at file:\" << __FILE__ << \", line:\" << __LINE__ << std::endl;\n  std::exit(1);\n}"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          8954
+    Length:          0
+    ReplacementText: "\n   dpct::device_ext &dev_ct1 = dpct::get_current_device();\n   sycl::queue &q_ct1 = dev_ct1.in_order_queue();"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9513
+    Length:          54
+    ReplacementText: 'DPCT_CHECK_ERROR(devPtrA = sycl::malloc_device<double>(K * LDA, q_ct1))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9587
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9637
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9662
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9687
+    Length:          55
+    ReplacementText: 'DPCT_CHECK_ERROR(devPtrB = sycl::malloc_device<double>(N *  LDB, q_ct1))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9762
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9813
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9838
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9863
+    Length:          54
+    ReplacementText: 'DPCT_CHECK_ERROR(devPtrC = sycl::malloc_device<double>(N * LDC, q_ct1))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9937
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          9987
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10012
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10025
+    Length:          23
+    ReplacementText: 'dev_ct1.queues_wait_and_throw()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10054
+    Length:          85
+    ReplacementText: 'oneapi::mkl::blas::column_major::gemm(*dpct::get_current_device().get_saved_queue(), oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, M, N, K, ALPHA, devPtrA, LDA, devPtrB, LDB, BETA, devPtrC, LDC).wait()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10145
+    Length:          23
+    ReplacementText: 'dev_ct1.queues_wait_and_throw()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10187
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10237
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10262
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10269
+    Length:          23
+    ReplacementText: 'dev_ct1.queues_wait_and_throw()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10299
+    Length:          17
+    ReplacementText: 'sycl::free(devPtrA, q_ct1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10322
+    Length:          17
+    ReplacementText: 'sycl::free(devPtrB, q_ct1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10345
+    Length:          17
+    ReplacementText: 'sycl::free(devPtrC, q_ct1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          10637
+    Length:          0
+    ReplacementText: "\n   dpct::device_ext &dev_ct1 = dpct::get_current_device();\n   sycl::queue &q_ct1 = dev_ct1.in_order_queue();"
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11149
+    Length:          55
+    ReplacementText: 'DPCT_CHECK_ERROR(devPtrA = sycl::malloc_device<double>(M * LDA, q_ct1))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11224
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11271
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11296
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11334
+    Length:          55
+    ReplacementText: 'DPCT_CHECK_ERROR(devPtrB = sycl::malloc_device<double>(N * LDB, q_ct1))'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11409
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11456
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11481
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11488
+    Length:          23
+    ReplacementText: 'dev_ct1.queues_wait_and_throw()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11522
+    Length:          62
+    ReplacementText: 'oneapi::mkl::blas::column_major::trsm(*dpct::get_current_device().get_saved_queue(), oneapi::mkl::side::left, oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, M, N, ALPHA, devPtrA, LDA, devPtrB, LDB).wait()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11595
+    Length:          23
+    ReplacementText: 'dev_ct1.queues_wait_and_throw()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11642
+    Length:          10
+    ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11689
+    Length:          24
+    ReplacementText: ''
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11714
+    Length:          0
+    ReplacementText: '.wait())'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11726
+    Length:          23
+    ReplacementText: 'dev_ct1.queues_wait_and_throw()'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11755
+    Length:          17
+    ReplacementText: 'sycl::free(devPtrA, q_ct1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+  - FilePath:        '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Offset:          11778
+    Length:          17
+    ReplacementText: 'sycl::free(devPtrB, q_ct1)'
+    ConstantFlag:    ''
+    ConstantOffset:  0
+    InitStr:         ''
+    NewHostVarName:  ''
+    BlockLevelFormatFlag: false
+MainSourceFilesDigest:
+  - MainSourceFile:  '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp'
+    Digest:          c9ea63d69505b8c70080ff9792b77dd8
+DpctVersion:     18.0.0
+MainHelperFileName: ''
+USMLevel:        ''
+FeatureMap:      {}
+CompileTargets:
+  /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl:
+    - MigratedFileName: './testing/ptest/HPL_pddriver.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdinfo.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdtest.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pddriver.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdinfo.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdtest.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pddriver.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdinfo.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdtest.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pddriver.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdinfo.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptest/HPL_pdtest.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+  /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a:
+    - MigratedFileName: './src/auxil/HPL_dlacpy.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_dlatcpy.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_fprintf.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_warn.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_abort.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_dlaprnt.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_dlange.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/auxil/HPL_dlamch.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dcopy.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_daxpy.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dscal.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_idamax.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dgemv.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dtrsv.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dger.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dgemm.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/blas/HPL_dtrsm.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_1ring.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_1rinM.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_2ring.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_2rinM.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_blong.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_blonM.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_packL.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_copyL.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_binit.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_bcast.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_bwait.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_send.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_recv.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/comm/HPL_sdrv.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_grid_init.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_pnum.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_grid_info.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_grid_exit.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_broadcast.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_reduce.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_all_reduce.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_barrier.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_min.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_max.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/grid/HPL_sum.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/panel/HPL_pdpanel_new.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/panel/HPL_pdpanel_init.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/panel/HPL_pdpanel_disp.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/panel/HPL_pdpanel_free.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_indxg2l.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_indxg2lp.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_indxg2p.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_indxl2g.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_infog2l.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_numroc.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_numrocI.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp00N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp10N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp01N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp01T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp02N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp03N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp03T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp04N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp04T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp05N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp05T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp06N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_dlaswp06T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_pwarn.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_pabort.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_pdlaprnt.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_pdlamch.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pauxil/HPL_pdlange.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_dlocmax.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_dlocswpN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_dlocswpT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdmxswp.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdpancrN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdpancrT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdpanllN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdpanllT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdpanrlN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdpanrlT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdrpanllN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdrpanllT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdrpancrN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdrpancrT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdrpanrlN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdrpanrlT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pfact/HPL_pdfact.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pipid.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_plindx0.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdlaswp00N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdlaswp00T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_perm.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_logsort.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_plindx10.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_plindx1.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_spreadN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_spreadT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_rollN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_rollT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_equil.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdlaswp01N.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdlaswp01T.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdupdateNN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdupdateNT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdupdateTN.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdupdateTT.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdtrsv.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdgesv0.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdgesvK1.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdgesvK2.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './src/pgesv/HPL_pdgesv.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_dmatgen.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_ladd.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_lmul.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_xjumpm.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_jumpit.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_rand.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/matgen/HPL_setran.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/timer/HPL_timer.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/timer/HPL_timer_cputime.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/timer/HPL_timer_walltime.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/pmatgen/HPL_pdmatgen.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptimer/HPL_ptimer.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptimer/HPL_ptimer_cputime.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+    - MigratedFileName: './testing/ptimer/HPL_ptimer_walltime.c'
+      CompileOptions:  '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include '
+      Compiler:        cc
+  libdgemm.so.1.0.1:
+    - MigratedFileName: './src/cuda/cuda_dgemm.cpp.dp.cpp'
+      CompileOptions:  '-O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) '
+      Compiler:        cc
+    - MigratedFileName: './src/cuda/cuda_dgemm.cpp.dp.cpp'
+      CompileOptions:  '-O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) '
+      Compiler:        cc
+OptionMap:
+  AnalysisScopePath:
+    Value:           '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3'
+    Specified:       false
+  AsyncHandler:
+    Value:           'false'
+    Specified:       false
+  CommentsEnabled:
+    Value:           'false'
+    Specified:       false
+  CompilationsDir:
+    Value:           '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3'
+    Specified:       true
+  CtadEnabled:
+    Value:           'false'
+    Specified:       false
+  EnablepProfiling:
+    Value:           'false'
+    Specified:       false
+  ExperimentalFlag:
+    Value:           '0'
+    Specified:       false
+  ExplicitClNamespace:
+    Value:           'false'
+    Specified:       false
+  ExplicitNamespace:
+    Value:           '20'
+    Specified:       false
+  ExtensionDDFlag:
+    Value:           '0'
+    Specified:       false
+  ExtensionDEFlag:
+    Value:           '4294967295'
+    Specified:       false
+  HelperFuncPreferenceFlag:
+    Value:           '0'
+    Specified:       false
+  NDRangeDim:
+    Value:           '3'
+    Specified:       false
+  NoDRYPattern:
+    Value:           'false'
+    Specified:       false
+  NoUseGenericSpace:
+    Value:           ''
+    Specified:       true
+  OptimizeMigration:
+    Value:           'false'
+    Specified:       false
+  ProcessAll:
+    Value:           'false'
+    Specified:       false
+  RuleFile:
+    Value:           ''
+    Specified:       false
+  SyclNamedLambda:
+    Value:           'false'
+    Specified:       false
+  UsmLevel:
+    Value:           '1'
+    Specified:       false
+...
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/Makefile.dpct b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/Makefile.dpct
new file mode 100644
index 000000000..15b4e8109
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/Makefile.dpct
@@ -0,0 +1,1018 @@
+CC := icpx
+
+LD := $(CC)
+
+#DPCT2001:4: You can link with more library by add them here.
+LIB := -lmpi
+
+FLAGS :=  -fPIC
+
+ifeq ($(shell which $(CC)),)
+    $(error ERROR - $(CC) compiler not found)
+endif
+
+ROOT_DIR     := $(shell dirname $(shell which $(CC)))
+INCLUDE_SYCL := $(ROOT_DIR)/../include
+INCLUDE_CL   := $(ROOT_DIR)/../include/sycl
+
+TARGET_0_SRC_0 = ./testing/ptest/HPL_pddriver.c
+TARGET_0_OBJ_0 = ./testing/ptest/HPL_pddriver.o
+TARGET_0_FLAG_0 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_1 = ./testing/ptest/HPL_pdinfo.c
+TARGET_0_OBJ_1 = ./testing/ptest/HPL_pdinfo.o
+TARGET_0_FLAG_1 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_2 = ./testing/ptest/HPL_pdtest.c
+TARGET_0_OBJ_2 = ./testing/ptest/HPL_pdtest.o
+TARGET_0_FLAG_2 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_3 = ./testing/ptest/HPL_pddriver.c
+TARGET_0_OBJ_3 = ./testing/ptest/HPL_pddriver.o
+TARGET_0_FLAG_3 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_4 = ./testing/ptest/HPL_pdinfo.c
+TARGET_0_OBJ_4 = ./testing/ptest/HPL_pdinfo.o
+TARGET_0_FLAG_4 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_5 = ./testing/ptest/HPL_pdtest.c
+TARGET_0_OBJ_5 = ./testing/ptest/HPL_pdtest.o
+TARGET_0_FLAG_5 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_6 = ./testing/ptest/HPL_pddriver.c
+TARGET_0_OBJ_6 = ./testing/ptest/HPL_pddriver.o
+TARGET_0_FLAG_6 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_7 = ./testing/ptest/HPL_pdinfo.c
+TARGET_0_OBJ_7 = ./testing/ptest/HPL_pdinfo.o
+TARGET_0_FLAG_7 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_8 = ./testing/ptest/HPL_pdtest.c
+TARGET_0_OBJ_8 = ./testing/ptest/HPL_pdtest.o
+TARGET_0_FLAG_8 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_9 = ./testing/ptest/HPL_pddriver.c
+TARGET_0_OBJ_9 = ./testing/ptest/HPL_pddriver.o
+TARGET_0_FLAG_9 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_10 = ./testing/ptest/HPL_pdinfo.c
+TARGET_0_OBJ_10 = ./testing/ptest/HPL_pdinfo.o
+TARGET_0_FLAG_10 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_0_SRC_11 = ./testing/ptest/HPL_pdtest.c
+TARGET_0_OBJ_11 = ./testing/ptest/HPL_pdtest.o
+TARGET_0_FLAG_11 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_0 = ./src/auxil/HPL_dlacpy.c
+TARGET_1_OBJ_0 = ./src/auxil/HPL_dlacpy.o
+TARGET_1_FLAG_0 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_1 = ./src/auxil/HPL_dlatcpy.c
+TARGET_1_OBJ_1 = ./src/auxil/HPL_dlatcpy.o
+TARGET_1_FLAG_1 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_2 = ./src/auxil/HPL_fprintf.c
+TARGET_1_OBJ_2 = ./src/auxil/HPL_fprintf.o
+TARGET_1_FLAG_2 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_3 = ./src/auxil/HPL_warn.c
+TARGET_1_OBJ_3 = ./src/auxil/HPL_warn.o
+TARGET_1_FLAG_3 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_4 = ./src/auxil/HPL_abort.c
+TARGET_1_OBJ_4 = ./src/auxil/HPL_abort.o
+TARGET_1_FLAG_4 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_5 = ./src/auxil/HPL_dlaprnt.c
+TARGET_1_OBJ_5 = ./src/auxil/HPL_dlaprnt.o
+TARGET_1_FLAG_5 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_6 = ./src/auxil/HPL_dlange.c
+TARGET_1_OBJ_6 = ./src/auxil/HPL_dlange.o
+TARGET_1_FLAG_6 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_7 = ./src/auxil/HPL_dlamch.c
+TARGET_1_OBJ_7 = ./src/auxil/HPL_dlamch.o
+TARGET_1_FLAG_7 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -I ./include ${FLAGS}
+
+TARGET_1_SRC_8 = ./src/blas/HPL_dcopy.c
+TARGET_1_OBJ_8 = ./src/blas/HPL_dcopy.o
+TARGET_1_FLAG_8 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_9 = ./src/blas/HPL_daxpy.c
+TARGET_1_OBJ_9 = ./src/blas/HPL_daxpy.o
+TARGET_1_FLAG_9 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_10 = ./src/blas/HPL_dscal.c
+TARGET_1_OBJ_10 = ./src/blas/HPL_dscal.o
+TARGET_1_FLAG_10 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_11 = ./src/blas/HPL_idamax.c
+TARGET_1_OBJ_11 = ./src/blas/HPL_idamax.o
+TARGET_1_FLAG_11 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_12 = ./src/blas/HPL_dgemv.c
+TARGET_1_OBJ_12 = ./src/blas/HPL_dgemv.o
+TARGET_1_FLAG_12 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_13 = ./src/blas/HPL_dtrsv.c
+TARGET_1_OBJ_13 = ./src/blas/HPL_dtrsv.o
+TARGET_1_FLAG_13 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_14 = ./src/blas/HPL_dger.c
+TARGET_1_OBJ_14 = ./src/blas/HPL_dger.o
+TARGET_1_FLAG_14 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_15 = ./src/blas/HPL_dgemm.c
+TARGET_1_OBJ_15 = ./src/blas/HPL_dgemm.o
+TARGET_1_FLAG_15 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_16 = ./src/blas/HPL_dtrsm.c
+TARGET_1_OBJ_16 = ./src/blas/HPL_dtrsm.o
+TARGET_1_FLAG_16 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_17 = ./src/comm/HPL_1ring.c
+TARGET_1_OBJ_17 = ./src/comm/HPL_1ring.o
+TARGET_1_FLAG_17 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_18 = ./src/comm/HPL_1rinM.c
+TARGET_1_OBJ_18 = ./src/comm/HPL_1rinM.o
+TARGET_1_FLAG_18 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_19 = ./src/comm/HPL_2ring.c
+TARGET_1_OBJ_19 = ./src/comm/HPL_2ring.o
+TARGET_1_FLAG_19 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_20 = ./src/comm/HPL_2rinM.c
+TARGET_1_OBJ_20 = ./src/comm/HPL_2rinM.o
+TARGET_1_FLAG_20 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_21 = ./src/comm/HPL_blong.c
+TARGET_1_OBJ_21 = ./src/comm/HPL_blong.o
+TARGET_1_FLAG_21 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_22 = ./src/comm/HPL_blonM.c
+TARGET_1_OBJ_22 = ./src/comm/HPL_blonM.o
+TARGET_1_FLAG_22 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_23 = ./src/comm/HPL_packL.c
+TARGET_1_OBJ_23 = ./src/comm/HPL_packL.o
+TARGET_1_FLAG_23 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_24 = ./src/comm/HPL_copyL.c
+TARGET_1_OBJ_24 = ./src/comm/HPL_copyL.o
+TARGET_1_FLAG_24 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_25 = ./src/comm/HPL_binit.c
+TARGET_1_OBJ_25 = ./src/comm/HPL_binit.o
+TARGET_1_FLAG_25 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_26 = ./src/comm/HPL_bcast.c
+TARGET_1_OBJ_26 = ./src/comm/HPL_bcast.o
+TARGET_1_FLAG_26 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_27 = ./src/comm/HPL_bwait.c
+TARGET_1_OBJ_27 = ./src/comm/HPL_bwait.o
+TARGET_1_FLAG_27 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_28 = ./src/comm/HPL_send.c
+TARGET_1_OBJ_28 = ./src/comm/HPL_send.o
+TARGET_1_FLAG_28 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_29 = ./src/comm/HPL_recv.c
+TARGET_1_OBJ_29 = ./src/comm/HPL_recv.o
+TARGET_1_FLAG_29 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_30 = ./src/comm/HPL_sdrv.c
+TARGET_1_OBJ_30 = ./src/comm/HPL_sdrv.o
+TARGET_1_FLAG_30 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_31 = ./src/grid/HPL_grid_init.c
+TARGET_1_OBJ_31 = ./src/grid/HPL_grid_init.o
+TARGET_1_FLAG_31 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_32 = ./src/grid/HPL_pnum.c
+TARGET_1_OBJ_32 = ./src/grid/HPL_pnum.o
+TARGET_1_FLAG_32 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_33 = ./src/grid/HPL_grid_info.c
+TARGET_1_OBJ_33 = ./src/grid/HPL_grid_info.o
+TARGET_1_FLAG_33 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_34 = ./src/grid/HPL_grid_exit.c
+TARGET_1_OBJ_34 = ./src/grid/HPL_grid_exit.o
+TARGET_1_FLAG_34 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_35 = ./src/grid/HPL_broadcast.c
+TARGET_1_OBJ_35 = ./src/grid/HPL_broadcast.o
+TARGET_1_FLAG_35 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_36 = ./src/grid/HPL_reduce.c
+TARGET_1_OBJ_36 = ./src/grid/HPL_reduce.o
+TARGET_1_FLAG_36 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_37 = ./src/grid/HPL_all_reduce.c
+TARGET_1_OBJ_37 = ./src/grid/HPL_all_reduce.o
+TARGET_1_FLAG_37 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_38 = ./src/grid/HPL_barrier.c
+TARGET_1_OBJ_38 = ./src/grid/HPL_barrier.o
+TARGET_1_FLAG_38 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_39 = ./src/grid/HPL_min.c
+TARGET_1_OBJ_39 = ./src/grid/HPL_min.o
+TARGET_1_FLAG_39 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_40 = ./src/grid/HPL_max.c
+TARGET_1_OBJ_40 = ./src/grid/HPL_max.o
+TARGET_1_FLAG_40 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_41 = ./src/grid/HPL_sum.c
+TARGET_1_OBJ_41 = ./src/grid/HPL_sum.o
+TARGET_1_FLAG_41 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_42 = ./src/panel/HPL_pdpanel_new.c
+TARGET_1_OBJ_42 = ./src/panel/HPL_pdpanel_new.o
+TARGET_1_FLAG_42 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_43 = ./src/panel/HPL_pdpanel_init.c
+TARGET_1_OBJ_43 = ./src/panel/HPL_pdpanel_init.o
+TARGET_1_FLAG_43 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_44 = ./src/panel/HPL_pdpanel_disp.c
+TARGET_1_OBJ_44 = ./src/panel/HPL_pdpanel_disp.o
+TARGET_1_FLAG_44 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_45 = ./src/panel/HPL_pdpanel_free.c
+TARGET_1_OBJ_45 = ./src/panel/HPL_pdpanel_free.o
+TARGET_1_FLAG_45 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_46 = ./src/pauxil/HPL_indxg2l.c
+TARGET_1_OBJ_46 = ./src/pauxil/HPL_indxg2l.o
+TARGET_1_FLAG_46 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_47 = ./src/pauxil/HPL_indxg2lp.c
+TARGET_1_OBJ_47 = ./src/pauxil/HPL_indxg2lp.o
+TARGET_1_FLAG_47 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_48 = ./src/pauxil/HPL_indxg2p.c
+TARGET_1_OBJ_48 = ./src/pauxil/HPL_indxg2p.o
+TARGET_1_FLAG_48 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_49 = ./src/pauxil/HPL_indxl2g.c
+TARGET_1_OBJ_49 = ./src/pauxil/HPL_indxl2g.o
+TARGET_1_FLAG_49 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_50 = ./src/pauxil/HPL_infog2l.c
+TARGET_1_OBJ_50 = ./src/pauxil/HPL_infog2l.o
+TARGET_1_FLAG_50 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_51 = ./src/pauxil/HPL_numroc.c
+TARGET_1_OBJ_51 = ./src/pauxil/HPL_numroc.o
+TARGET_1_FLAG_51 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_52 = ./src/pauxil/HPL_numrocI.c
+TARGET_1_OBJ_52 = ./src/pauxil/HPL_numrocI.o
+TARGET_1_FLAG_52 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_53 = ./src/pauxil/HPL_dlaswp00N.c
+TARGET_1_OBJ_53 = ./src/pauxil/HPL_dlaswp00N.o
+TARGET_1_FLAG_53 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_54 = ./src/pauxil/HPL_dlaswp10N.c
+TARGET_1_OBJ_54 = ./src/pauxil/HPL_dlaswp10N.o
+TARGET_1_FLAG_54 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_55 = ./src/pauxil/HPL_dlaswp01N.c
+TARGET_1_OBJ_55 = ./src/pauxil/HPL_dlaswp01N.o
+TARGET_1_FLAG_55 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_56 = ./src/pauxil/HPL_dlaswp01T.c
+TARGET_1_OBJ_56 = ./src/pauxil/HPL_dlaswp01T.o
+TARGET_1_FLAG_56 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_57 = ./src/pauxil/HPL_dlaswp02N.c
+TARGET_1_OBJ_57 = ./src/pauxil/HPL_dlaswp02N.o
+TARGET_1_FLAG_57 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_58 = ./src/pauxil/HPL_dlaswp03N.c
+TARGET_1_OBJ_58 = ./src/pauxil/HPL_dlaswp03N.o
+TARGET_1_FLAG_58 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_59 = ./src/pauxil/HPL_dlaswp03T.c
+TARGET_1_OBJ_59 = ./src/pauxil/HPL_dlaswp03T.o
+TARGET_1_FLAG_59 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_60 = ./src/pauxil/HPL_dlaswp04N.c
+TARGET_1_OBJ_60 = ./src/pauxil/HPL_dlaswp04N.o
+TARGET_1_FLAG_60 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_61 = ./src/pauxil/HPL_dlaswp04T.c
+TARGET_1_OBJ_61 = ./src/pauxil/HPL_dlaswp04T.o
+TARGET_1_FLAG_61 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_62 = ./src/pauxil/HPL_dlaswp05N.c
+TARGET_1_OBJ_62 = ./src/pauxil/HPL_dlaswp05N.o
+TARGET_1_FLAG_62 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_63 = ./src/pauxil/HPL_dlaswp05T.c
+TARGET_1_OBJ_63 = ./src/pauxil/HPL_dlaswp05T.o
+TARGET_1_FLAG_63 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_64 = ./src/pauxil/HPL_dlaswp06N.c
+TARGET_1_OBJ_64 = ./src/pauxil/HPL_dlaswp06N.o
+TARGET_1_FLAG_64 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_65 = ./src/pauxil/HPL_dlaswp06T.c
+TARGET_1_OBJ_65 = ./src/pauxil/HPL_dlaswp06T.o
+TARGET_1_FLAG_65 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_66 = ./src/pauxil/HPL_pwarn.c
+TARGET_1_OBJ_66 = ./src/pauxil/HPL_pwarn.o
+TARGET_1_FLAG_66 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_67 = ./src/pauxil/HPL_pabort.c
+TARGET_1_OBJ_67 = ./src/pauxil/HPL_pabort.o
+TARGET_1_FLAG_67 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_68 = ./src/pauxil/HPL_pdlaprnt.c
+TARGET_1_OBJ_68 = ./src/pauxil/HPL_pdlaprnt.o
+TARGET_1_FLAG_68 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_69 = ./src/pauxil/HPL_pdlamch.c
+TARGET_1_OBJ_69 = ./src/pauxil/HPL_pdlamch.o
+TARGET_1_FLAG_69 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_70 = ./src/pauxil/HPL_pdlange.c
+TARGET_1_OBJ_70 = ./src/pauxil/HPL_pdlange.o
+TARGET_1_FLAG_70 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_71 = ./src/pfact/HPL_dlocmax.c
+TARGET_1_OBJ_71 = ./src/pfact/HPL_dlocmax.o
+TARGET_1_FLAG_71 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_72 = ./src/pfact/HPL_dlocswpN.c
+TARGET_1_OBJ_72 = ./src/pfact/HPL_dlocswpN.o
+TARGET_1_FLAG_72 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_73 = ./src/pfact/HPL_dlocswpT.c
+TARGET_1_OBJ_73 = ./src/pfact/HPL_dlocswpT.o
+TARGET_1_FLAG_73 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_74 = ./src/pfact/HPL_pdmxswp.c
+TARGET_1_OBJ_74 = ./src/pfact/HPL_pdmxswp.o
+TARGET_1_FLAG_74 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_75 = ./src/pfact/HPL_pdpancrN.c
+TARGET_1_OBJ_75 = ./src/pfact/HPL_pdpancrN.o
+TARGET_1_FLAG_75 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_76 = ./src/pfact/HPL_pdpancrT.c
+TARGET_1_OBJ_76 = ./src/pfact/HPL_pdpancrT.o
+TARGET_1_FLAG_76 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_77 = ./src/pfact/HPL_pdpanllN.c
+TARGET_1_OBJ_77 = ./src/pfact/HPL_pdpanllN.o
+TARGET_1_FLAG_77 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_78 = ./src/pfact/HPL_pdpanllT.c
+TARGET_1_OBJ_78 = ./src/pfact/HPL_pdpanllT.o
+TARGET_1_FLAG_78 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_79 = ./src/pfact/HPL_pdpanrlN.c
+TARGET_1_OBJ_79 = ./src/pfact/HPL_pdpanrlN.o
+TARGET_1_FLAG_79 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_80 = ./src/pfact/HPL_pdpanrlT.c
+TARGET_1_OBJ_80 = ./src/pfact/HPL_pdpanrlT.o
+TARGET_1_FLAG_80 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_81 = ./src/pfact/HPL_pdrpanllN.c
+TARGET_1_OBJ_81 = ./src/pfact/HPL_pdrpanllN.o
+TARGET_1_FLAG_81 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_82 = ./src/pfact/HPL_pdrpanllT.c
+TARGET_1_OBJ_82 = ./src/pfact/HPL_pdrpanllT.o
+TARGET_1_FLAG_82 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_83 = ./src/pfact/HPL_pdrpancrN.c
+TARGET_1_OBJ_83 = ./src/pfact/HPL_pdrpancrN.o
+TARGET_1_FLAG_83 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_84 = ./src/pfact/HPL_pdrpancrT.c
+TARGET_1_OBJ_84 = ./src/pfact/HPL_pdrpancrT.o
+TARGET_1_FLAG_84 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_85 = ./src/pfact/HPL_pdrpanrlN.c
+TARGET_1_OBJ_85 = ./src/pfact/HPL_pdrpanrlN.o
+TARGET_1_FLAG_85 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_86 = ./src/pfact/HPL_pdrpanrlT.c
+TARGET_1_OBJ_86 = ./src/pfact/HPL_pdrpanrlT.o
+TARGET_1_FLAG_86 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_87 = ./src/pfact/HPL_pdfact.c
+TARGET_1_OBJ_87 = ./src/pfact/HPL_pdfact.o
+TARGET_1_FLAG_87 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_88 = ./src/pgesv/HPL_pipid.c
+TARGET_1_OBJ_88 = ./src/pgesv/HPL_pipid.o
+TARGET_1_FLAG_88 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_89 = ./src/pgesv/HPL_plindx0.c
+TARGET_1_OBJ_89 = ./src/pgesv/HPL_plindx0.o
+TARGET_1_FLAG_89 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_90 = ./src/pgesv/HPL_pdlaswp00N.c
+TARGET_1_OBJ_90 = ./src/pgesv/HPL_pdlaswp00N.o
+TARGET_1_FLAG_90 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_91 = ./src/pgesv/HPL_pdlaswp00T.c
+TARGET_1_OBJ_91 = ./src/pgesv/HPL_pdlaswp00T.o
+TARGET_1_FLAG_91 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_92 = ./src/pgesv/HPL_perm.c
+TARGET_1_OBJ_92 = ./src/pgesv/HPL_perm.o
+TARGET_1_FLAG_92 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_93 = ./src/pgesv/HPL_logsort.c
+TARGET_1_OBJ_93 = ./src/pgesv/HPL_logsort.o
+TARGET_1_FLAG_93 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_94 = ./src/pgesv/HPL_plindx10.c
+TARGET_1_OBJ_94 = ./src/pgesv/HPL_plindx10.o
+TARGET_1_FLAG_94 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_95 = ./src/pgesv/HPL_plindx1.c
+TARGET_1_OBJ_95 = ./src/pgesv/HPL_plindx1.o
+TARGET_1_FLAG_95 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_96 = ./src/pgesv/HPL_spreadN.c
+TARGET_1_OBJ_96 = ./src/pgesv/HPL_spreadN.o
+TARGET_1_FLAG_96 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_97 = ./src/pgesv/HPL_spreadT.c
+TARGET_1_OBJ_97 = ./src/pgesv/HPL_spreadT.o
+TARGET_1_FLAG_97 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_98 = ./src/pgesv/HPL_rollN.c
+TARGET_1_OBJ_98 = ./src/pgesv/HPL_rollN.o
+TARGET_1_FLAG_98 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_99 = ./src/pgesv/HPL_rollT.c
+TARGET_1_OBJ_99 = ./src/pgesv/HPL_rollT.o
+TARGET_1_FLAG_99 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_100 = ./src/pgesv/HPL_equil.c
+TARGET_1_OBJ_100 = ./src/pgesv/HPL_equil.o
+TARGET_1_FLAG_100 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_101 = ./src/pgesv/HPL_pdlaswp01N.c
+TARGET_1_OBJ_101 = ./src/pgesv/HPL_pdlaswp01N.o
+TARGET_1_FLAG_101 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_102 = ./src/pgesv/HPL_pdlaswp01T.c
+TARGET_1_OBJ_102 = ./src/pgesv/HPL_pdlaswp01T.o
+TARGET_1_FLAG_102 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_103 = ./src/pgesv/HPL_pdupdateNN.c
+TARGET_1_OBJ_103 = ./src/pgesv/HPL_pdupdateNN.o
+TARGET_1_FLAG_103 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_104 = ./src/pgesv/HPL_pdupdateNT.c
+TARGET_1_OBJ_104 = ./src/pgesv/HPL_pdupdateNT.o
+TARGET_1_FLAG_104 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_105 = ./src/pgesv/HPL_pdupdateTN.c
+TARGET_1_OBJ_105 = ./src/pgesv/HPL_pdupdateTN.o
+TARGET_1_FLAG_105 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_106 = ./src/pgesv/HPL_pdupdateTT.c
+TARGET_1_OBJ_106 = ./src/pgesv/HPL_pdupdateTT.o
+TARGET_1_FLAG_106 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_107 = ./src/pgesv/HPL_pdtrsv.c
+TARGET_1_OBJ_107 = ./src/pgesv/HPL_pdtrsv.o
+TARGET_1_FLAG_107 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_108 = ./src/pgesv/HPL_pdgesv0.c
+TARGET_1_OBJ_108 = ./src/pgesv/HPL_pdgesv0.o
+TARGET_1_FLAG_108 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_109 = ./src/pgesv/HPL_pdgesvK1.c
+TARGET_1_OBJ_109 = ./src/pgesv/HPL_pdgesvK1.o
+TARGET_1_FLAG_109 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_110 = ./src/pgesv/HPL_pdgesvK2.c
+TARGET_1_OBJ_110 = ./src/pgesv/HPL_pdgesvK2.o
+TARGET_1_FLAG_110 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_111 = ./src/pgesv/HPL_pdgesv.c
+TARGET_1_OBJ_111 = ./src/pgesv/HPL_pdgesv.o
+TARGET_1_FLAG_111 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_112 = ./testing/matgen/HPL_dmatgen.c
+TARGET_1_OBJ_112 = ./testing/matgen/HPL_dmatgen.o
+TARGET_1_FLAG_112 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_113 = ./testing/matgen/HPL_ladd.c
+TARGET_1_OBJ_113 = ./testing/matgen/HPL_ladd.o
+TARGET_1_FLAG_113 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_114 = ./testing/matgen/HPL_lmul.c
+TARGET_1_OBJ_114 = ./testing/matgen/HPL_lmul.o
+TARGET_1_FLAG_114 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_115 = ./testing/matgen/HPL_xjumpm.c
+TARGET_1_OBJ_115 = ./testing/matgen/HPL_xjumpm.o
+TARGET_1_FLAG_115 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_116 = ./testing/matgen/HPL_jumpit.c
+TARGET_1_OBJ_116 = ./testing/matgen/HPL_jumpit.o
+TARGET_1_FLAG_116 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_117 = ./testing/matgen/HPL_rand.c
+TARGET_1_OBJ_117 = ./testing/matgen/HPL_rand.o
+TARGET_1_FLAG_117 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_118 = ./testing/matgen/HPL_setran.c
+TARGET_1_OBJ_118 = ./testing/matgen/HPL_setran.o
+TARGET_1_FLAG_118 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_119 = ./testing/timer/HPL_timer.c
+TARGET_1_OBJ_119 = ./testing/timer/HPL_timer.o
+TARGET_1_FLAG_119 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_120 = ./testing/timer/HPL_timer_cputime.c
+TARGET_1_OBJ_120 = ./testing/timer/HPL_timer_cputime.o
+TARGET_1_FLAG_120 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_121 = ./testing/timer/HPL_timer_walltime.c
+TARGET_1_OBJ_121 = ./testing/timer/HPL_timer_walltime.o
+TARGET_1_FLAG_121 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_122 = ./testing/pmatgen/HPL_pdmatgen.c
+TARGET_1_OBJ_122 = ./testing/pmatgen/HPL_pdmatgen.o
+TARGET_1_FLAG_122 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_123 = ./testing/ptimer/HPL_ptimer.c
+TARGET_1_OBJ_123 = ./testing/ptimer/HPL_ptimer.o
+TARGET_1_FLAG_123 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_124 = ./testing/ptimer/HPL_ptimer_cputime.c
+TARGET_1_OBJ_124 = ./testing/ptimer/HPL_ptimer_cputime.o
+TARGET_1_FLAG_124 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_1_SRC_125 = ./testing/ptimer/HPL_ptimer_walltime.c
+TARGET_1_OBJ_125 = ./testing/ptimer/HPL_ptimer_walltime.o
+TARGET_1_FLAG_125 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS}
+
+TARGET_2_SRC_0 = ./src/cuda/cuda_dgemm.cpp.dp.cpp
+TARGET_2_OBJ_0 = ./src/cuda/cuda_dgemm.cpp.dp.o
+TARGET_2_FLAG_0 = -O0 -DMPI -I ./include -I ${MKLROOT}/include  -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS}
+
+TARGET_2_SRC_1 = ./src/cuda/cuda_dgemm.cpp.dp.cpp
+TARGET_2_OBJ_1 = ./src/cuda/cuda_dgemm.cpp.dp.o
+TARGET_2_FLAG_1 = -O0 -DMPI -I ./include -I ${MKLROOT}/include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS}
+
+TARGET_0 := /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl
+TARGET_1 := /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a
+TARGET_2 := libdgemm.so.1.0.1
+
+TARGET :=   ${TARGET_1} ${TARGET_2} ${TARGET_0} 
+.PHONY:all clean
+OBJS_0 :=  ${TARGET_0_OBJ_0} ${TARGET_0_OBJ_1} ${TARGET_0_OBJ_2} ${TARGET_0_OBJ_3} ${TARGET_0_OBJ_4} ${TARGET_0_OBJ_5} ${TARGET_0_OBJ_6} ${TARGET_0_OBJ_7} ${TARGET_0_OBJ_8} ${TARGET_0_OBJ_9} ${TARGET_0_OBJ_10} ${TARGET_0_OBJ_11}
+OBJS_1 :=  ${TARGET_1_OBJ_0} ${TARGET_1_OBJ_1} ${TARGET_1_OBJ_2} ${TARGET_1_OBJ_3} ${TARGET_1_OBJ_4} ${TARGET_1_OBJ_5} ${TARGET_1_OBJ_6} ${TARGET_1_OBJ_7} ${TARGET_1_OBJ_8} ${TARGET_1_OBJ_9} ${TARGET_1_OBJ_10} ${TARGET_1_OBJ_11} ${TARGET_1_OBJ_12} ${TARGET_1_OBJ_13} ${TARGET_1_OBJ_14} ${TARGET_1_OBJ_15} ${TARGET_1_OBJ_16} ${TARGET_1_OBJ_17} ${TARGET_1_OBJ_18} ${TARGET_1_OBJ_19} ${TARGET_1_OBJ_20} ${TARGET_1_OBJ_21} ${TARGET_1_OBJ_22} ${TARGET_1_OBJ_23} ${TARGET_1_OBJ_24} ${TARGET_1_OBJ_25} ${TARGET_1_OBJ_26} ${TARGET_1_OBJ_27} ${TARGET_1_OBJ_28} ${TARGET_1_OBJ_29} ${TARGET_1_OBJ_30} ${TARGET_1_OBJ_31} ${TARGET_1_OBJ_32} ${TARGET_1_OBJ_33} ${TARGET_1_OBJ_34} ${TARGET_1_OBJ_35} ${TARGET_1_OBJ_36} ${TARGET_1_OBJ_37} ${TARGET_1_OBJ_38} ${TARGET_1_OBJ_39} ${TARGET_1_OBJ_40} ${TARGET_1_OBJ_41} ${TARGET_1_OBJ_42} ${TARGET_1_OBJ_43} ${TARGET_1_OBJ_44} ${TARGET_1_OBJ_45} ${TARGET_1_OBJ_46} ${TARGET_1_OBJ_47} ${TARGET_1_OBJ_48} ${TARGET_1_OBJ_49} ${TARGET_1_OBJ_50} ${TARGET_1_OBJ_51} ${TARGET_1_OBJ_52} ${TARGET_1_OBJ_53} ${TARGET_1_OBJ_54} ${TARGET_1_OBJ_55} ${TARGET_1_OBJ_56} ${TARGET_1_OBJ_57} ${TARGET_1_OBJ_58} ${TARGET_1_OBJ_59} ${TARGET_1_OBJ_60} ${TARGET_1_OBJ_61} ${TARGET_1_OBJ_62} ${TARGET_1_OBJ_63} ${TARGET_1_OBJ_64} ${TARGET_1_OBJ_65} ${TARGET_1_OBJ_66} ${TARGET_1_OBJ_67} ${TARGET_1_OBJ_68} ${TARGET_1_OBJ_69} ${TARGET_1_OBJ_70} ${TARGET_1_OBJ_71} ${TARGET_1_OBJ_72} ${TARGET_1_OBJ_73} ${TARGET_1_OBJ_74} ${TARGET_1_OBJ_75} ${TARGET_1_OBJ_76} ${TARGET_1_OBJ_77} ${TARGET_1_OBJ_78} ${TARGET_1_OBJ_79} ${TARGET_1_OBJ_80} ${TARGET_1_OBJ_81} ${TARGET_1_OBJ_82} ${TARGET_1_OBJ_83} ${TARGET_1_OBJ_84} ${TARGET_1_OBJ_85} ${TARGET_1_OBJ_86} ${TARGET_1_OBJ_87} ${TARGET_1_OBJ_88} ${TARGET_1_OBJ_89} ${TARGET_1_OBJ_90} ${TARGET_1_OBJ_91} ${TARGET_1_OBJ_92} ${TARGET_1_OBJ_93} ${TARGET_1_OBJ_94} ${TARGET_1_OBJ_95} ${TARGET_1_OBJ_96} ${TARGET_1_OBJ_97} ${TARGET_1_OBJ_98} ${TARGET_1_OBJ_99} ${TARGET_1_OBJ_100} ${TARGET_1_OBJ_101} ${TARGET_1_OBJ_102} ${TARGET_1_OBJ_103} ${TARGET_1_OBJ_104} ${TARGET_1_OBJ_105} ${TARGET_1_OBJ_106} ${TARGET_1_OBJ_107} ${TARGET_1_OBJ_108} ${TARGET_1_OBJ_109} ${TARGET_1_OBJ_110} ${TARGET_1_OBJ_111} ${TARGET_1_OBJ_112} ${TARGET_1_OBJ_113} ${TARGET_1_OBJ_114} ${TARGET_1_OBJ_115} ${TARGET_1_OBJ_116} ${TARGET_1_OBJ_117} ${TARGET_1_OBJ_118} ${TARGET_1_OBJ_119} ${TARGET_1_OBJ_120} ${TARGET_1_OBJ_121} ${TARGET_1_OBJ_122} ${TARGET_1_OBJ_123} ${TARGET_1_OBJ_124} ${TARGET_1_OBJ_125}
+OBJS_2 :=  ${TARGET_2_OBJ_0} ${TARGET_2_OBJ_1}
+all: $(TARGET)
+$(TARGET_0): $(OBJS_0)
+	$(CC) -fsycl -o $@ $^ $(LIB) -qmkl  /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a libdgemm.so.1.0.1
+
+$(TARGET_0_OBJ_0):$(TARGET_0_SRC_0)
+	cc -c ${TARGET_0_SRC_0} -o ${TARGET_0_OBJ_0} $(TARGET_0_FLAG_0)
+
+$(TARGET_0_OBJ_1):$(TARGET_0_SRC_1)
+	cc -c ${TARGET_0_SRC_1} -o ${TARGET_0_OBJ_1} $(TARGET_0_FLAG_1)
+
+$(TARGET_0_OBJ_2):$(TARGET_0_SRC_2)
+	cc -c ${TARGET_0_SRC_2} -o ${TARGET_0_OBJ_2} $(TARGET_0_FLAG_2)
+
+$(TARGET_0_OBJ_3):$(TARGET_0_SRC_3)
+	cc -c ${TARGET_0_SRC_3} -o ${TARGET_0_OBJ_3} $(TARGET_0_FLAG_3)
+
+$(TARGET_0_OBJ_4):$(TARGET_0_SRC_4)
+	cc -c ${TARGET_0_SRC_4} -o ${TARGET_0_OBJ_4} $(TARGET_0_FLAG_4)
+
+$(TARGET_0_OBJ_5):$(TARGET_0_SRC_5)
+	cc -c ${TARGET_0_SRC_5} -o ${TARGET_0_OBJ_5} $(TARGET_0_FLAG_5)
+
+$(TARGET_0_OBJ_6):$(TARGET_0_SRC_6)
+	cc -c ${TARGET_0_SRC_6} -o ${TARGET_0_OBJ_6} $(TARGET_0_FLAG_6)
+
+$(TARGET_0_OBJ_7):$(TARGET_0_SRC_7)
+	cc -c ${TARGET_0_SRC_7} -o ${TARGET_0_OBJ_7} $(TARGET_0_FLAG_7)
+
+$(TARGET_0_OBJ_8):$(TARGET_0_SRC_8)
+	cc -c ${TARGET_0_SRC_8} -o ${TARGET_0_OBJ_8} $(TARGET_0_FLAG_8)
+
+$(TARGET_0_OBJ_9):$(TARGET_0_SRC_9)
+	cc -c ${TARGET_0_SRC_9} -o ${TARGET_0_OBJ_9} $(TARGET_0_FLAG_9)
+
+$(TARGET_0_OBJ_10):$(TARGET_0_SRC_10)
+	cc -c ${TARGET_0_SRC_10} -o ${TARGET_0_OBJ_10} $(TARGET_0_FLAG_10)
+
+$(TARGET_0_OBJ_11):$(TARGET_0_SRC_11)
+	cc -c ${TARGET_0_SRC_11} -o ${TARGET_0_OBJ_11} $(TARGET_0_FLAG_11)
+
+$(TARGET_1): $(OBJS_1)
+	ar -r $@ $^ $(LIB) 
+
+$(TARGET_1_OBJ_0):$(TARGET_1_SRC_0)
+	cc -c ${TARGET_1_SRC_0} -o ${TARGET_1_OBJ_0} $(TARGET_1_FLAG_0)
+
+$(TARGET_1_OBJ_1):$(TARGET_1_SRC_1)
+	cc -c ${TARGET_1_SRC_1} -o ${TARGET_1_OBJ_1} $(TARGET_1_FLAG_1)
+
+$(TARGET_1_OBJ_2):$(TARGET_1_SRC_2)
+	cc -c ${TARGET_1_SRC_2} -o ${TARGET_1_OBJ_2} $(TARGET_1_FLAG_2)
+
+$(TARGET_1_OBJ_3):$(TARGET_1_SRC_3)
+	cc -c ${TARGET_1_SRC_3} -o ${TARGET_1_OBJ_3} $(TARGET_1_FLAG_3)
+
+$(TARGET_1_OBJ_4):$(TARGET_1_SRC_4)
+	cc -c ${TARGET_1_SRC_4} -o ${TARGET_1_OBJ_4} $(TARGET_1_FLAG_4)
+
+$(TARGET_1_OBJ_5):$(TARGET_1_SRC_5)
+	cc -c ${TARGET_1_SRC_5} -o ${TARGET_1_OBJ_5} $(TARGET_1_FLAG_5)
+
+$(TARGET_1_OBJ_6):$(TARGET_1_SRC_6)
+	cc -c ${TARGET_1_SRC_6} -o ${TARGET_1_OBJ_6} $(TARGET_1_FLAG_6)
+
+$(TARGET_1_OBJ_7):$(TARGET_1_SRC_7)
+	cc -c ${TARGET_1_SRC_7} -o ${TARGET_1_OBJ_7} $(TARGET_1_FLAG_7)
+
+$(TARGET_1_OBJ_8):$(TARGET_1_SRC_8)
+	cc -c ${TARGET_1_SRC_8} -o ${TARGET_1_OBJ_8} $(TARGET_1_FLAG_8)
+
+$(TARGET_1_OBJ_9):$(TARGET_1_SRC_9)
+	cc -c ${TARGET_1_SRC_9} -o ${TARGET_1_OBJ_9} $(TARGET_1_FLAG_9)
+
+$(TARGET_1_OBJ_10):$(TARGET_1_SRC_10)
+	cc -c ${TARGET_1_SRC_10} -o ${TARGET_1_OBJ_10} $(TARGET_1_FLAG_10)
+
+$(TARGET_1_OBJ_11):$(TARGET_1_SRC_11)
+	cc -c ${TARGET_1_SRC_11} -o ${TARGET_1_OBJ_11} $(TARGET_1_FLAG_11)
+
+$(TARGET_1_OBJ_12):$(TARGET_1_SRC_12)
+	cc -c ${TARGET_1_SRC_12} -o ${TARGET_1_OBJ_12} $(TARGET_1_FLAG_12)
+
+$(TARGET_1_OBJ_13):$(TARGET_1_SRC_13)
+	cc -c ${TARGET_1_SRC_13} -o ${TARGET_1_OBJ_13} $(TARGET_1_FLAG_13)
+
+$(TARGET_1_OBJ_14):$(TARGET_1_SRC_14)
+	cc -c ${TARGET_1_SRC_14} -o ${TARGET_1_OBJ_14} $(TARGET_1_FLAG_14)
+
+$(TARGET_1_OBJ_15):$(TARGET_1_SRC_15)
+	cc -c ${TARGET_1_SRC_15} -o ${TARGET_1_OBJ_15} $(TARGET_1_FLAG_15)
+
+$(TARGET_1_OBJ_16):$(TARGET_1_SRC_16)
+	cc -c ${TARGET_1_SRC_16} -o ${TARGET_1_OBJ_16} $(TARGET_1_FLAG_16)
+
+$(TARGET_1_OBJ_17):$(TARGET_1_SRC_17)
+	cc -c ${TARGET_1_SRC_17} -o ${TARGET_1_OBJ_17} $(TARGET_1_FLAG_17)
+
+$(TARGET_1_OBJ_18):$(TARGET_1_SRC_18)
+	cc -c ${TARGET_1_SRC_18} -o ${TARGET_1_OBJ_18} $(TARGET_1_FLAG_18)
+
+$(TARGET_1_OBJ_19):$(TARGET_1_SRC_19)
+	cc -c ${TARGET_1_SRC_19} -o ${TARGET_1_OBJ_19} $(TARGET_1_FLAG_19)
+
+$(TARGET_1_OBJ_20):$(TARGET_1_SRC_20)
+	cc -c ${TARGET_1_SRC_20} -o ${TARGET_1_OBJ_20} $(TARGET_1_FLAG_20)
+
+$(TARGET_1_OBJ_21):$(TARGET_1_SRC_21)
+	cc -c ${TARGET_1_SRC_21} -o ${TARGET_1_OBJ_21} $(TARGET_1_FLAG_21)
+
+$(TARGET_1_OBJ_22):$(TARGET_1_SRC_22)
+	cc -c ${TARGET_1_SRC_22} -o ${TARGET_1_OBJ_22} $(TARGET_1_FLAG_22)
+
+$(TARGET_1_OBJ_23):$(TARGET_1_SRC_23)
+	cc -c ${TARGET_1_SRC_23} -o ${TARGET_1_OBJ_23} $(TARGET_1_FLAG_23)
+
+$(TARGET_1_OBJ_24):$(TARGET_1_SRC_24)
+	cc -c ${TARGET_1_SRC_24} -o ${TARGET_1_OBJ_24} $(TARGET_1_FLAG_24)
+
+$(TARGET_1_OBJ_25):$(TARGET_1_SRC_25)
+	cc -c ${TARGET_1_SRC_25} -o ${TARGET_1_OBJ_25} $(TARGET_1_FLAG_25)
+
+$(TARGET_1_OBJ_26):$(TARGET_1_SRC_26)
+	cc -c ${TARGET_1_SRC_26} -o ${TARGET_1_OBJ_26} $(TARGET_1_FLAG_26)
+
+$(TARGET_1_OBJ_27):$(TARGET_1_SRC_27)
+	cc -c ${TARGET_1_SRC_27} -o ${TARGET_1_OBJ_27} $(TARGET_1_FLAG_27)
+
+$(TARGET_1_OBJ_28):$(TARGET_1_SRC_28)
+	cc -c ${TARGET_1_SRC_28} -o ${TARGET_1_OBJ_28} $(TARGET_1_FLAG_28)
+
+$(TARGET_1_OBJ_29):$(TARGET_1_SRC_29)
+	cc -c ${TARGET_1_SRC_29} -o ${TARGET_1_OBJ_29} $(TARGET_1_FLAG_29)
+
+$(TARGET_1_OBJ_30):$(TARGET_1_SRC_30)
+	cc -c ${TARGET_1_SRC_30} -o ${TARGET_1_OBJ_30} $(TARGET_1_FLAG_30)
+
+$(TARGET_1_OBJ_31):$(TARGET_1_SRC_31)
+	cc -c ${TARGET_1_SRC_31} -o ${TARGET_1_OBJ_31} $(TARGET_1_FLAG_31)
+
+$(TARGET_1_OBJ_32):$(TARGET_1_SRC_32)
+	cc -c ${TARGET_1_SRC_32} -o ${TARGET_1_OBJ_32} $(TARGET_1_FLAG_32)
+
+$(TARGET_1_OBJ_33):$(TARGET_1_SRC_33)
+	cc -c ${TARGET_1_SRC_33} -o ${TARGET_1_OBJ_33} $(TARGET_1_FLAG_33)
+
+$(TARGET_1_OBJ_34):$(TARGET_1_SRC_34)
+	cc -c ${TARGET_1_SRC_34} -o ${TARGET_1_OBJ_34} $(TARGET_1_FLAG_34)
+
+$(TARGET_1_OBJ_35):$(TARGET_1_SRC_35)
+	cc -c ${TARGET_1_SRC_35} -o ${TARGET_1_OBJ_35} $(TARGET_1_FLAG_35)
+
+$(TARGET_1_OBJ_36):$(TARGET_1_SRC_36)
+	cc -c ${TARGET_1_SRC_36} -o ${TARGET_1_OBJ_36} $(TARGET_1_FLAG_36)
+
+$(TARGET_1_OBJ_37):$(TARGET_1_SRC_37)
+	cc -c ${TARGET_1_SRC_37} -o ${TARGET_1_OBJ_37} $(TARGET_1_FLAG_37)
+
+$(TARGET_1_OBJ_38):$(TARGET_1_SRC_38)
+	cc -c ${TARGET_1_SRC_38} -o ${TARGET_1_OBJ_38} $(TARGET_1_FLAG_38)
+
+$(TARGET_1_OBJ_39):$(TARGET_1_SRC_39)
+	cc -c ${TARGET_1_SRC_39} -o ${TARGET_1_OBJ_39} $(TARGET_1_FLAG_39)
+
+$(TARGET_1_OBJ_40):$(TARGET_1_SRC_40)
+	cc -c ${TARGET_1_SRC_40} -o ${TARGET_1_OBJ_40} $(TARGET_1_FLAG_40)
+
+$(TARGET_1_OBJ_41):$(TARGET_1_SRC_41)
+	cc -c ${TARGET_1_SRC_41} -o ${TARGET_1_OBJ_41} $(TARGET_1_FLAG_41)
+
+$(TARGET_1_OBJ_42):$(TARGET_1_SRC_42)
+	cc -c ${TARGET_1_SRC_42} -o ${TARGET_1_OBJ_42} $(TARGET_1_FLAG_42)
+
+$(TARGET_1_OBJ_43):$(TARGET_1_SRC_43)
+	cc -c ${TARGET_1_SRC_43} -o ${TARGET_1_OBJ_43} $(TARGET_1_FLAG_43)
+
+$(TARGET_1_OBJ_44):$(TARGET_1_SRC_44)
+	cc -c ${TARGET_1_SRC_44} -o ${TARGET_1_OBJ_44} $(TARGET_1_FLAG_44)
+
+$(TARGET_1_OBJ_45):$(TARGET_1_SRC_45)
+	cc -c ${TARGET_1_SRC_45} -o ${TARGET_1_OBJ_45} $(TARGET_1_FLAG_45)
+
+$(TARGET_1_OBJ_46):$(TARGET_1_SRC_46)
+	cc -c ${TARGET_1_SRC_46} -o ${TARGET_1_OBJ_46} $(TARGET_1_FLAG_46)
+
+$(TARGET_1_OBJ_47):$(TARGET_1_SRC_47)
+	cc -c ${TARGET_1_SRC_47} -o ${TARGET_1_OBJ_47} $(TARGET_1_FLAG_47)
+
+$(TARGET_1_OBJ_48):$(TARGET_1_SRC_48)
+	cc -c ${TARGET_1_SRC_48} -o ${TARGET_1_OBJ_48} $(TARGET_1_FLAG_48)
+
+$(TARGET_1_OBJ_49):$(TARGET_1_SRC_49)
+	cc -c ${TARGET_1_SRC_49} -o ${TARGET_1_OBJ_49} $(TARGET_1_FLAG_49)
+
+$(TARGET_1_OBJ_50):$(TARGET_1_SRC_50)
+	cc -c ${TARGET_1_SRC_50} -o ${TARGET_1_OBJ_50} $(TARGET_1_FLAG_50)
+
+$(TARGET_1_OBJ_51):$(TARGET_1_SRC_51)
+	cc -c ${TARGET_1_SRC_51} -o ${TARGET_1_OBJ_51} $(TARGET_1_FLAG_51)
+
+$(TARGET_1_OBJ_52):$(TARGET_1_SRC_52)
+	cc -c ${TARGET_1_SRC_52} -o ${TARGET_1_OBJ_52} $(TARGET_1_FLAG_52)
+
+$(TARGET_1_OBJ_53):$(TARGET_1_SRC_53)
+	cc -c ${TARGET_1_SRC_53} -o ${TARGET_1_OBJ_53} $(TARGET_1_FLAG_53)
+
+$(TARGET_1_OBJ_54):$(TARGET_1_SRC_54)
+	cc -c ${TARGET_1_SRC_54} -o ${TARGET_1_OBJ_54} $(TARGET_1_FLAG_54)
+
+$(TARGET_1_OBJ_55):$(TARGET_1_SRC_55)
+	cc -c ${TARGET_1_SRC_55} -o ${TARGET_1_OBJ_55} $(TARGET_1_FLAG_55)
+
+$(TARGET_1_OBJ_56):$(TARGET_1_SRC_56)
+	cc -c ${TARGET_1_SRC_56} -o ${TARGET_1_OBJ_56} $(TARGET_1_FLAG_56)
+
+$(TARGET_1_OBJ_57):$(TARGET_1_SRC_57)
+	cc -c ${TARGET_1_SRC_57} -o ${TARGET_1_OBJ_57} $(TARGET_1_FLAG_57)
+
+$(TARGET_1_OBJ_58):$(TARGET_1_SRC_58)
+	cc -c ${TARGET_1_SRC_58} -o ${TARGET_1_OBJ_58} $(TARGET_1_FLAG_58)
+
+$(TARGET_1_OBJ_59):$(TARGET_1_SRC_59)
+	cc -c ${TARGET_1_SRC_59} -o ${TARGET_1_OBJ_59} $(TARGET_1_FLAG_59)
+
+$(TARGET_1_OBJ_60):$(TARGET_1_SRC_60)
+	cc -c ${TARGET_1_SRC_60} -o ${TARGET_1_OBJ_60} $(TARGET_1_FLAG_60)
+
+$(TARGET_1_OBJ_61):$(TARGET_1_SRC_61)
+	cc -c ${TARGET_1_SRC_61} -o ${TARGET_1_OBJ_61} $(TARGET_1_FLAG_61)
+
+$(TARGET_1_OBJ_62):$(TARGET_1_SRC_62)
+	cc -c ${TARGET_1_SRC_62} -o ${TARGET_1_OBJ_62} $(TARGET_1_FLAG_62)
+
+$(TARGET_1_OBJ_63):$(TARGET_1_SRC_63)
+	cc -c ${TARGET_1_SRC_63} -o ${TARGET_1_OBJ_63} $(TARGET_1_FLAG_63)
+
+$(TARGET_1_OBJ_64):$(TARGET_1_SRC_64)
+	cc -c ${TARGET_1_SRC_64} -o ${TARGET_1_OBJ_64} $(TARGET_1_FLAG_64)
+
+$(TARGET_1_OBJ_65):$(TARGET_1_SRC_65)
+	cc -c ${TARGET_1_SRC_65} -o ${TARGET_1_OBJ_65} $(TARGET_1_FLAG_65)
+
+$(TARGET_1_OBJ_66):$(TARGET_1_SRC_66)
+	cc -c ${TARGET_1_SRC_66} -o ${TARGET_1_OBJ_66} $(TARGET_1_FLAG_66)
+
+$(TARGET_1_OBJ_67):$(TARGET_1_SRC_67)
+	cc -c ${TARGET_1_SRC_67} -o ${TARGET_1_OBJ_67} $(TARGET_1_FLAG_67)
+
+$(TARGET_1_OBJ_68):$(TARGET_1_SRC_68)
+	cc -c ${TARGET_1_SRC_68} -o ${TARGET_1_OBJ_68} $(TARGET_1_FLAG_68)
+
+$(TARGET_1_OBJ_69):$(TARGET_1_SRC_69)
+	cc -c ${TARGET_1_SRC_69} -o ${TARGET_1_OBJ_69} $(TARGET_1_FLAG_69)
+
+$(TARGET_1_OBJ_70):$(TARGET_1_SRC_70)
+	cc -c ${TARGET_1_SRC_70} -o ${TARGET_1_OBJ_70} $(TARGET_1_FLAG_70)
+
+$(TARGET_1_OBJ_71):$(TARGET_1_SRC_71)
+	cc -c ${TARGET_1_SRC_71} -o ${TARGET_1_OBJ_71} $(TARGET_1_FLAG_71)
+
+$(TARGET_1_OBJ_72):$(TARGET_1_SRC_72)
+	cc -c ${TARGET_1_SRC_72} -o ${TARGET_1_OBJ_72} $(TARGET_1_FLAG_72)
+
+$(TARGET_1_OBJ_73):$(TARGET_1_SRC_73)
+	cc -c ${TARGET_1_SRC_73} -o ${TARGET_1_OBJ_73} $(TARGET_1_FLAG_73)
+
+$(TARGET_1_OBJ_74):$(TARGET_1_SRC_74)
+	cc -c ${TARGET_1_SRC_74} -o ${TARGET_1_OBJ_74} $(TARGET_1_FLAG_74)
+
+$(TARGET_1_OBJ_75):$(TARGET_1_SRC_75)
+	cc -c ${TARGET_1_SRC_75} -o ${TARGET_1_OBJ_75} $(TARGET_1_FLAG_75)
+
+$(TARGET_1_OBJ_76):$(TARGET_1_SRC_76)
+	cc -c ${TARGET_1_SRC_76} -o ${TARGET_1_OBJ_76} $(TARGET_1_FLAG_76)
+
+$(TARGET_1_OBJ_77):$(TARGET_1_SRC_77)
+	cc -c ${TARGET_1_SRC_77} -o ${TARGET_1_OBJ_77} $(TARGET_1_FLAG_77)
+
+$(TARGET_1_OBJ_78):$(TARGET_1_SRC_78)
+	cc -c ${TARGET_1_SRC_78} -o ${TARGET_1_OBJ_78} $(TARGET_1_FLAG_78)
+
+$(TARGET_1_OBJ_79):$(TARGET_1_SRC_79)
+	cc -c ${TARGET_1_SRC_79} -o ${TARGET_1_OBJ_79} $(TARGET_1_FLAG_79)
+
+$(TARGET_1_OBJ_80):$(TARGET_1_SRC_80)
+	cc -c ${TARGET_1_SRC_80} -o ${TARGET_1_OBJ_80} $(TARGET_1_FLAG_80)
+
+$(TARGET_1_OBJ_81):$(TARGET_1_SRC_81)
+	cc -c ${TARGET_1_SRC_81} -o ${TARGET_1_OBJ_81} $(TARGET_1_FLAG_81)
+
+$(TARGET_1_OBJ_82):$(TARGET_1_SRC_82)
+	cc -c ${TARGET_1_SRC_82} -o ${TARGET_1_OBJ_82} $(TARGET_1_FLAG_82)
+
+$(TARGET_1_OBJ_83):$(TARGET_1_SRC_83)
+	cc -c ${TARGET_1_SRC_83} -o ${TARGET_1_OBJ_83} $(TARGET_1_FLAG_83)
+
+$(TARGET_1_OBJ_84):$(TARGET_1_SRC_84)
+	cc -c ${TARGET_1_SRC_84} -o ${TARGET_1_OBJ_84} $(TARGET_1_FLAG_84)
+
+$(TARGET_1_OBJ_85):$(TARGET_1_SRC_85)
+	cc -c ${TARGET_1_SRC_85} -o ${TARGET_1_OBJ_85} $(TARGET_1_FLAG_85)
+
+$(TARGET_1_OBJ_86):$(TARGET_1_SRC_86)
+	cc -c ${TARGET_1_SRC_86} -o ${TARGET_1_OBJ_86} $(TARGET_1_FLAG_86)
+
+$(TARGET_1_OBJ_87):$(TARGET_1_SRC_87)
+	cc -c ${TARGET_1_SRC_87} -o ${TARGET_1_OBJ_87} $(TARGET_1_FLAG_87)
+
+$(TARGET_1_OBJ_88):$(TARGET_1_SRC_88)
+	cc -c ${TARGET_1_SRC_88} -o ${TARGET_1_OBJ_88} $(TARGET_1_FLAG_88)
+
+$(TARGET_1_OBJ_89):$(TARGET_1_SRC_89)
+	cc -c ${TARGET_1_SRC_89} -o ${TARGET_1_OBJ_89} $(TARGET_1_FLAG_89)
+
+$(TARGET_1_OBJ_90):$(TARGET_1_SRC_90)
+	cc -c ${TARGET_1_SRC_90} -o ${TARGET_1_OBJ_90} $(TARGET_1_FLAG_90)
+
+$(TARGET_1_OBJ_91):$(TARGET_1_SRC_91)
+	cc -c ${TARGET_1_SRC_91} -o ${TARGET_1_OBJ_91} $(TARGET_1_FLAG_91)
+
+$(TARGET_1_OBJ_92):$(TARGET_1_SRC_92)
+	cc -c ${TARGET_1_SRC_92} -o ${TARGET_1_OBJ_92} $(TARGET_1_FLAG_92)
+
+$(TARGET_1_OBJ_93):$(TARGET_1_SRC_93)
+	cc -c ${TARGET_1_SRC_93} -o ${TARGET_1_OBJ_93} $(TARGET_1_FLAG_93)
+
+$(TARGET_1_OBJ_94):$(TARGET_1_SRC_94)
+	cc -c ${TARGET_1_SRC_94} -o ${TARGET_1_OBJ_94} $(TARGET_1_FLAG_94)
+
+$(TARGET_1_OBJ_95):$(TARGET_1_SRC_95)
+	cc -c ${TARGET_1_SRC_95} -o ${TARGET_1_OBJ_95} $(TARGET_1_FLAG_95)
+
+$(TARGET_1_OBJ_96):$(TARGET_1_SRC_96)
+	cc -c ${TARGET_1_SRC_96} -o ${TARGET_1_OBJ_96} $(TARGET_1_FLAG_96)
+
+$(TARGET_1_OBJ_97):$(TARGET_1_SRC_97)
+	cc -c ${TARGET_1_SRC_97} -o ${TARGET_1_OBJ_97} $(TARGET_1_FLAG_97)
+
+$(TARGET_1_OBJ_98):$(TARGET_1_SRC_98)
+	cc -c ${TARGET_1_SRC_98} -o ${TARGET_1_OBJ_98} $(TARGET_1_FLAG_98)
+
+$(TARGET_1_OBJ_99):$(TARGET_1_SRC_99)
+	cc -c ${TARGET_1_SRC_99} -o ${TARGET_1_OBJ_99} $(TARGET_1_FLAG_99)
+
+$(TARGET_1_OBJ_100):$(TARGET_1_SRC_100)
+	cc -c ${TARGET_1_SRC_100} -o ${TARGET_1_OBJ_100} $(TARGET_1_FLAG_100)
+
+$(TARGET_1_OBJ_101):$(TARGET_1_SRC_101)
+	cc -c ${TARGET_1_SRC_101} -o ${TARGET_1_OBJ_101} $(TARGET_1_FLAG_101)
+
+$(TARGET_1_OBJ_102):$(TARGET_1_SRC_102)
+	cc -c ${TARGET_1_SRC_102} -o ${TARGET_1_OBJ_102} $(TARGET_1_FLAG_102)
+
+$(TARGET_1_OBJ_103):$(TARGET_1_SRC_103)
+	cc -c ${TARGET_1_SRC_103} -o ${TARGET_1_OBJ_103} $(TARGET_1_FLAG_103)
+
+$(TARGET_1_OBJ_104):$(TARGET_1_SRC_104)
+	cc -c ${TARGET_1_SRC_104} -o ${TARGET_1_OBJ_104} $(TARGET_1_FLAG_104)
+
+$(TARGET_1_OBJ_105):$(TARGET_1_SRC_105)
+	cc -c ${TARGET_1_SRC_105} -o ${TARGET_1_OBJ_105} $(TARGET_1_FLAG_105)
+
+$(TARGET_1_OBJ_106):$(TARGET_1_SRC_106)
+	cc -c ${TARGET_1_SRC_106} -o ${TARGET_1_OBJ_106} $(TARGET_1_FLAG_106)
+
+$(TARGET_1_OBJ_107):$(TARGET_1_SRC_107)
+	cc -c ${TARGET_1_SRC_107} -o ${TARGET_1_OBJ_107} $(TARGET_1_FLAG_107)
+
+$(TARGET_1_OBJ_108):$(TARGET_1_SRC_108)
+	cc -c ${TARGET_1_SRC_108} -o ${TARGET_1_OBJ_108} $(TARGET_1_FLAG_108)
+
+$(TARGET_1_OBJ_109):$(TARGET_1_SRC_109)
+	cc -c ${TARGET_1_SRC_109} -o ${TARGET_1_OBJ_109} $(TARGET_1_FLAG_109)
+
+$(TARGET_1_OBJ_110):$(TARGET_1_SRC_110)
+	cc -c ${TARGET_1_SRC_110} -o ${TARGET_1_OBJ_110} $(TARGET_1_FLAG_110)
+
+$(TARGET_1_OBJ_111):$(TARGET_1_SRC_111)
+	cc -c ${TARGET_1_SRC_111} -o ${TARGET_1_OBJ_111} $(TARGET_1_FLAG_111)
+
+$(TARGET_1_OBJ_112):$(TARGET_1_SRC_112)
+	cc -c ${TARGET_1_SRC_112} -o ${TARGET_1_OBJ_112} $(TARGET_1_FLAG_112)
+
+$(TARGET_1_OBJ_113):$(TARGET_1_SRC_113)
+	cc -c ${TARGET_1_SRC_113} -o ${TARGET_1_OBJ_113} $(TARGET_1_FLAG_113)
+
+$(TARGET_1_OBJ_114):$(TARGET_1_SRC_114)
+	cc -c ${TARGET_1_SRC_114} -o ${TARGET_1_OBJ_114} $(TARGET_1_FLAG_114)
+
+$(TARGET_1_OBJ_115):$(TARGET_1_SRC_115)
+	cc -c ${TARGET_1_SRC_115} -o ${TARGET_1_OBJ_115} $(TARGET_1_FLAG_115)
+
+$(TARGET_1_OBJ_116):$(TARGET_1_SRC_116)
+	cc -c ${TARGET_1_SRC_116} -o ${TARGET_1_OBJ_116} $(TARGET_1_FLAG_116)
+
+$(TARGET_1_OBJ_117):$(TARGET_1_SRC_117)
+	cc -c ${TARGET_1_SRC_117} -o ${TARGET_1_OBJ_117} $(TARGET_1_FLAG_117)
+
+$(TARGET_1_OBJ_118):$(TARGET_1_SRC_118)
+	cc -c ${TARGET_1_SRC_118} -o ${TARGET_1_OBJ_118} $(TARGET_1_FLAG_118)
+
+$(TARGET_1_OBJ_119):$(TARGET_1_SRC_119)
+	cc -c ${TARGET_1_SRC_119} -o ${TARGET_1_OBJ_119} $(TARGET_1_FLAG_119)
+
+$(TARGET_1_OBJ_120):$(TARGET_1_SRC_120)
+	cc -c ${TARGET_1_SRC_120} -o ${TARGET_1_OBJ_120} $(TARGET_1_FLAG_120)
+
+$(TARGET_1_OBJ_121):$(TARGET_1_SRC_121)
+	cc -c ${TARGET_1_SRC_121} -o ${TARGET_1_OBJ_121} $(TARGET_1_FLAG_121)
+
+$(TARGET_1_OBJ_122):$(TARGET_1_SRC_122)
+	cc -c ${TARGET_1_SRC_122} -o ${TARGET_1_OBJ_122} $(TARGET_1_FLAG_122)
+
+$(TARGET_1_OBJ_123):$(TARGET_1_SRC_123)
+	cc -c ${TARGET_1_SRC_123} -o ${TARGET_1_OBJ_123} $(TARGET_1_FLAG_123)
+
+$(TARGET_1_OBJ_124):$(TARGET_1_SRC_124)
+	cc -c ${TARGET_1_SRC_124} -o ${TARGET_1_OBJ_124} $(TARGET_1_FLAG_124)
+
+$(TARGET_1_OBJ_125):$(TARGET_1_SRC_125)
+	cc -c ${TARGET_1_SRC_125} -o ${TARGET_1_OBJ_125} $(TARGET_1_FLAG_125)
+
+$(TARGET_2): $(OBJS_2)
+	$(CC) -fsycl -shared -o $@ $^ $(LIB) -qmkl
+
+$(TARGET_2_OBJ_0):$(TARGET_2_SRC_0)
+	cc -c ${TARGET_2_SRC_0} -o ${TARGET_2_OBJ_0} $(TARGET_2_FLAG_0)
+
+$(TARGET_2_OBJ_1):$(TARGET_2_SRC_1)
+	icpx -c ${TARGET_2_SRC_1} -o ${TARGET_2_OBJ_1} $(TARGET_2_FLAG_1)
+
+clean:
+	rm -f  ${OBJS_0} ${OBJS_1} ${OBJS_2} $(TARGET)
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl.h
new file mode 100644
index 000000000..6d131963f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl.h
@@ -0,0 +1,97 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_H
+#define HPL_H
+/*
+ * ---------------------------------------------------------------------
+ * HPL default compile options that can overridden in the Make.<arch>
+ * ---------------------------------------------------------------------
+ */
+#ifndef HPL_NO_MPI_DATATYPE         /* Use MPI user-defined data type */
+#define HPL_USE_MPI_DATATYPE
+#endif
+ 
+#ifndef HPL_COPY_L  /* do not copy L, use MPI user-defined data types */
+#define HPL_NO_COPY_L
+#endif
+ 
+#ifndef HPL_DETAILED_TIMING         /* Do not enable detailed timings */
+#define HPL_NO_DETAILED_TIMING
+#endif
+ 
+#ifndef HPL_CALL_VSIPL          /* Call the Fortran 77 BLAS interface */
+#ifndef HPL_CALL_CBLAS                       /* there can be only one */
+#define HPL_CALL_FBLAS
+#endif
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pfact.h"
+#include "hpl_pgesv.h"
+
+#include "hpl_timer.h"
+#include "hpl_matgen.h"
+#include "hpl_test.h"
+
+#include "hpl_ptimer.h"
+#include "hpl_pmatgen.h"
+#include "hpl_ptest.h"
+
+#endif
+/*
+ * End of hpl.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_auxil.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_auxil.h
new file mode 100644
index 000000000..861caf380
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_auxil.h
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_AUXIL_H
+#define HPL_AUXIL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+/*
+ * ---------------------------------------------------------------------
+ * typedef definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{ HPL_NORM_A = 800, HPL_NORM_1 = 801, HPL_NORM_I = 802 } HPL_T_NORM;
+
+typedef enum
+{
+   HPL_MACH_EPS   = 900,                /* relative machine precision */
+   HPL_MACH_SFMIN = 901, /* safe minimum st 1/sfmin does not overflow */
+   HPL_MACH_BASE  = 902,                /* base = base of the machine */
+   HPL_MACH_PREC  = 903,                          /* prec  = eps*base */
+   HPL_MACH_MLEN  = 904,   /* number of (base) digits in the mantissa */
+   HPL_MACH_RND   = 905,        /* 1.0 if rounding occurs in addition */
+   HPL_MACH_EMIN  = 906,   /* min exponent before (gradual) underflow */
+   HPL_MACH_RMIN  = 907,        /* underflow threshold base**(emin-1) */
+   HPL_MACH_EMAX  = 908,          /* largest exponent before overflow */
+   HPL_MACH_RMAX  = 909  /* overflow threshold - (base**emax)*(1-eps) */
+ 
+} HPL_T_MACH;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_fprintf
+STDC_ARGS( (
+   FILE *,
+   const char *,
+   ...
+) );
+void                             HPL_warn
+STDC_ARGS( (
+   FILE *,
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_abort
+STDC_ARGS( (
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_dlacpy
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dlatcpy
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dlaprnt
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int,
+   const char *
+) );
+double                           HPL_dlange
+STDC_ARGS( (
+   const HPL_T_NORM,
+   const int,
+   const int,
+   const double *,
+   const int
+) );
+double                           HPL_dlamch
+STDC_ARGS( (
+   const HPL_T_MACH
+) );
+
+#endif
+/*
+ * End of hpl_auxil.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_blas.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_blas.h
new file mode 100644
index 000000000..2a510471a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_blas.h
@@ -0,0 +1,630 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_BLAS_H
+#define HPL_BLAS_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+
+
+/*
+ * ---------------------------------------------------------------------
+ * typedef definitions
+ * ---------------------------------------------------------------------
+ */
+enum HPL_ORDER
+{  HplRowMajor = 101,  HplColumnMajor  = 102 };
+enum HPL_TRANS
+{  HplNoTrans  = 111,  HplTrans        = 112,  HplConjTrans    = 113 };
+enum HPL_UPLO
+{  HplUpper    = 121,  HplLower        = 122 };
+enum HPL_DIAG
+{  HplNonUnit  = 131,  HplUnit         = 132 };
+enum HPL_SIDE
+{  HplLeft     = 141,  HplRight        = 142 }; 
+
+
+#ifdef HPL_CALL_CBLAS
+
+
+/*
+ * ---------------------------------------------------------------------
+ * The C interface of the BLAS is available ...
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    CBLAS_INDEX         int
+ 
+#define    CBLAS_ORDER         HPL_ORDER
+#define    CblasRowMajor       HplRowMajor
+#define    CblasColMajor       HplColMajor
+ 
+#define    CBLAS_TRANSPOSE     HPL_TRANS
+#define    CblasNoTrans        HplNoTrans
+#define    CblasTrans          HplTrans
+#define    CblasConjTrans      HplConjTrans
+ 
+#define    CBLAS_UPLO          HPL_UPLO
+#define    CblasUpper          HplUpper
+#define    CblasLower          HplLower
+ 
+#define    CBLAS_DIAG          HPL_DIAG
+#define    CblasNonUnit        HplNonUnit
+#define    CblasUnit           HplUnit
+ 
+#define    CBLAS_SIDE          HPL_SIDE
+#define    CblasLeft           HplLeft
+#define    CblasRight          HplRight
+/*
+ * ---------------------------------------------------------------------
+ * CBLAS Function prototypes
+ * ---------------------------------------------------------------------
+ */
+CBLAS_INDEX       cblas_idamax
+STDC_ARGS(
+(  const int,       const double *,  const int ) );
+void              cblas_dswap
+STDC_ARGS(
+(  const int,       double *,        const int,       double *,
+   const int ) );
+void              cblas_dcopy
+STDC_ARGS(
+(  const int,       const double *,  const int,       double *,
+   const int ) );
+void              cblas_daxpy
+STDC_ARGS(
+(  const int,       const double,    const double *,  const int,
+   double *,        const int ) );
+void              cblas_dscal
+STDC_ARGS(
+(  const int,       const double,    double *,        const int ) );
+
+void              cblas_dgemv
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const int,       const int,       const double,    const double *,
+   const int,       const double *,  const int,       const double,
+   double *,        const int ) );
+
+void              cblas_dger
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const int,       const int,
+   const double,    const double *,  const int,       const double *,
+   const int,       double *,        const int ) );
+void              cblas_dtrsv
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_UPLO,
+   const enum CBLAS_TRANSPOSE,       const enum CBLAS_DIAG,
+   const int,       const double *,  const int,       double *,
+   const int ) );
+
+void              cblas_dgemm
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_TRANSPOSE,       const int,       const int,
+   const int,       const double,    const double *,  const int,
+   const double *,  const int,       const double,    double *,
+   const int ) );
+
+void              cblas_dtrsm
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_SIDE,
+   const enum CBLAS_UPLO,            const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_DIAG,            const int,       const int,
+   const double,    const double *,  const int,       double *,
+   const int ) );
+void             dpcpp_dgemm 
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_TRANSPOSE,       const int,       const int,
+   const int,       const double,    const double *,  const int,
+   const double *,  const int,       const double,    double *,
+   const int ) );
+
+void             dpcpp_dtrsm 
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_SIDE,
+   const enum CBLAS_UPLO,            const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_DIAG,            const int,       const int,
+   const double,    const double *,  const int,       double *,
+   const int ) );
+/*
+ * ---------------------------------------------------------------------
+ * HPL C BLAS macro definition
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_dswap           cblas_dswap
+#define    HPL_dcopy           cblas_dcopy
+#define    HPL_daxpy           cblas_daxpy
+#define    HPL_dscal           cblas_dscal
+#define    HPL_idamax          cblas_idamax
+
+#define    HPL_dgemv           cblas_dgemv
+#define    HPL_dtrsv           cblas_dtrsv
+#define    HPL_dger            cblas_dger
+
+//#define    HPL_dgemm           cblas_dgemm
+//#define    HPL_dtrsm           cblas_dtrsm
+#define    HPL_dgemm           dpcpp_dgemm
+#define    HPL_dtrsm           dpcpp_dtrsm  
+
+#endif
+
+//#define    HPL_hello           sss_gemm 
+
+#ifdef HPL_CALL_FBLAS
+/*
+ * ---------------------------------------------------------------------
+ * Use the Fortran 77 interface of the BLAS ...
+ * ---------------------------------------------------------------------
+ * Defaults: Add_, F77_INTEGER=int, StringSunStyle
+ * ---------------------------------------------------------------------
+ */
+#ifndef NoChange
+#ifndef UpCase
+#ifndef Add__
+#ifndef Add_
+
+#define Add_
+
+#endif
+#endif
+#endif
+#endif
+
+#ifndef F77_INTEGER
+#define    F77_INTEGER         int
+#else
+#define    HPL_USE_F77_INTEGER_DEF
+#endif
+
+#ifndef StringCrayStyle
+#ifndef StringStructVal
+#ifndef StringStructPtr
+#ifndef StringSunStyle
+
+#define StringSunStyle
+
+#endif
+#endif
+#endif
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Fortran 77 <-> C interface
+ * ---------------------------------------------------------------------
+ *
+ * These macros identifies how Fortran routines will be called.
+ *
+ * Add_     : the Fortran compiler expects the name of C functions to be
+ * in all lower case and to have an underscore postfixed it (Suns, Intel
+ * compilers expect this).
+ *
+ * NoChange : the Fortran compiler expects the name of C functions to be
+ * in all lower case (IBM RS6K compilers do this).
+ *
+ * UpCase   : the Fortran compiler expects the name of C functions to be
+ * in all upcase. (Cray compilers expect this).
+ *
+ * Add__    : the Fortran compiler in use is f2c, a Fortran to C conver-
+ * ter.
+ */
+#ifdef NoChange
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm(...)
+ */
+#define    F77dswap               dswap
+#define    F77dscal               dscal
+#define    F77dcopy               dcopy
+#define    F77daxpy               daxpy
+#define    F77idamax              idamax
+
+#define    F77dgemv               dgemv
+#define    F77dtrsv               dtrsv
+#define    F77dger                dger
+
+#define    F77dgemm               dgemm
+#define    F77dtrsm               dtrsm
+
+#endif
+
+#ifdef UpCase
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          DGEMM(...)
+ */
+#ifdef CRAY_BLAS
+                                                                                
+#define    F77dswap               SSWAP
+#define    F77dscal               SSCAL
+#define    F77dcopy               SCOPY
+#define    F77daxpy               SAXPY
+#define    F77idamax              ISAMAX
+                                                                                
+#define    F77dgemv               SGEMV
+#define    F77dtrsv               STRSV
+#define    F77dger                SGER
+                                                                                
+#define    F77dgemm               SGEMM
+#define    F77dtrsm               STRSM
+                                                                                
+#else
+
+#define    F77dswap               DSWAP
+#define    F77dscal               DSCAL
+#define    F77dcopy               DCOPY
+#define    F77daxpy               DAXPY
+#define    F77idamax              IDAMAX
+
+#define    F77dgemv               DGEMV
+#define    F77dtrsv               DTRSV
+#define    F77dger                DGER
+
+#define    F77dgemm               DGEMM
+#define    F77dtrsm               DTRSM
+
+#endif
+
+#endif
+
+#ifdef Add_
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine  with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm_(...)
+ */
+#define    F77dswap               dswap_
+#define    F77dscal               dscal_
+#define    F77dcopy               dcopy_
+#define    F77daxpy               daxpy_
+#define    F77idamax              idamax_
+
+#define    F77dgemv               dgemv_
+#define    F77dtrsv               dtrsv_
+#define    F77dger                dger_
+
+#define    F77dgemm               dgemm_
+#define    F77dtrsm               dtrsm_
+
+#endif
+
+#ifdef Add__
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine  with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm_(...)
+ */
+#define    F77dswap               dswap_
+#define    F77dscal               dscal_
+#define    F77dcopy               dcopy_
+#define    F77daxpy               daxpy_
+#define    F77idamax              idamax_
+ 
+#define    F77dgemv               dgemv_
+#define    F77dtrsv               dtrsv_
+#define    F77dger                dger_
+ 
+#define    F77dgemm               dgemm_
+#define    F77dtrsm               dtrsm_
+//#define    F77hello               sss_gemm
+ 
+#endif
+//#define    F77hello               sss_gemm
+/*
+ * ---------------------------------------------------------------------
+ * Typedef definitions and conversion utilities
+ * ---------------------------------------------------------------------
+ */
+#ifdef StringCrayStyle
+
+#include <fortran.h>
+                      /* Type of character argument in a FORTRAN call */
+#define    F77_CHAR            _fcd
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(_fcdtocp(c) ))
+#define    HPL_C2F_CHAR(c)     (_cptofcd(&(c), 1))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringStructVal
+                      /* Type of character argument in a FORTRAN call */
+typedef struct { char *cp; F77_INTEGER len; } F77_CHAR;
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c.cp))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringStructPtr
+                      /* Type of character argument in a FORTRAN call */
+typedef struct { char *cp; F77_INTEGER len; } F77_CHAR;
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c->cp))
+
+#define    F77_CHAR_DECL       F77_CHAR *        /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringSunStyle
+                      /* Type of character argument in a FORTRAN call */
+#define    F77_CHAR            char *
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c))
+#define    HPL_C2F_CHAR(c)     (&(c))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+#define    F77_1_CHAR          , F77_INTEGER
+#define    F77_2_CHAR          F77_1_CHAR F77_1_CHAR
+#define    F77_3_CHAR          F77_2_CHAR F77_1_CHAR
+#define    F77_4_CHAR          F77_3_CHAR F77_1_CHAR
+
+#endif
+/* ------------------------------------------------------------------ */
+
+#ifndef F77_1_CHAR
+#define    F77_1_CHAR
+#define    F77_2_CHAR
+#define    F77_3_CHAR
+#define    F77_4_CHAR
+#endif
+
+#define    F77_INT_DECL        const F77_INTEGER *   /* input integer */
+#define    F77_SIN_DECL        const double *         /* input scalar */
+#define    F77_VIN_DECL        const double *         /* input vector */
+#define    F77_VINOUT_DECL     double *        /* input/output matrix */
+#define    F77_MIN_DECL        const double *         /* input matrix */
+#define    F77_MINOUT_DECL     double *        /* input/output matrix */
+ 
+#ifdef CRAY_PVP_ENV                      /* Type of FORTRAN functions */
+#define    F77_VOID_FUN        extern fortran void      /* subroutine */
+#define    F77_INT_FUN         extern fortran int /* integer function */
+#else
+#define    F77_VOID_FUN        extern void              /* subroutine */
+#define    F77_INT_FUN         extern int         /* integer function */
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Fortran 77 BLAS function prototypes
+ * ---------------------------------------------------------------------
+ */
+F77_VOID_FUN    F77dswap
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VINOUT_DECL, F77_INT_DECL,    F77_VINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77dscal
+STDC_ARGS(
+(  F77_INT_DECL,    F77_SIN_DECL,    F77_VINOUT_DECL, F77_INT_DECL ) );
+F77_VOID_FUN    F77dcopy
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,    F77_VINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77daxpy
+STDC_ARGS(
+(  F77_INT_DECL,    F77_SIN_DECL,    F77_VIN_DECL,    F77_INT_DECL,
+   F77_VINOUT_DECL, F77_INT_DECL ) );
+F77_INT_FUN     F77idamax
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL ) );
+
+F77_VOID_FUN    F77dgemv
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,
+   F77_SIN_DECL,    F77_VINOUT_DECL, F77_INT_DECL     F77_1_CHAR ) );
+F77_VOID_FUN    F77dger
+STDC_ARGS(
+(  F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_VIN_DECL,
+   F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,    F77_MINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77dtrsv
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,   F77_INT_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_VINOUT_DECL, F77_INT_DECL
+   F77_3_CHAR ) );
+
+F77_VOID_FUN    F77dgemm
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_INT_DECL,    F77_INT_DECL,
+   F77_INT_DECL,    F77_SIN_DECL,    F77_MIN_DECL,    F77_INT_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_MINOUT_DECL,
+   F77_INT_DECL     F77_2_CHAR ) );
+F77_VOID_FUN    F77dtrsm
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,
+   F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_MIN_DECL,
+   F77_INT_DECL,    F77_MINOUT_DECL, F77_INT_DECL     F77_4_CHAR ) );
+
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * HPL BLAS Function prototypes
+ * ---------------------------------------------------------------------
+ */
+#ifndef HPL_CALL_CBLAS
+
+int                              HPL_idamax
+STDC_ARGS( (
+   const int,
+   const double *,
+   const int
+) );
+void                             HPL_daxpy
+STDC_ARGS( (
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dcopy
+STDC_ARGS( (
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dscal
+STDC_ARGS( (
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_dswap
+STDC_ARGS( (
+   const int,
+   double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dgemv
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_TRANS,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   const double *,
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_dger
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dtrsv
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_UPLO,
+   const enum HPL_TRANS,
+   const enum HPL_DIAG,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dgemm
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_TRANS,
+   const enum HPL_TRANS,
+   const int,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   const double *,
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_hello
+STDC_ARGS( (
+) );
+#endif
+void                             HPL_dtrsm
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_SIDE,
+   const enum HPL_UPLO,
+   const enum HPL_TRANS,
+   const enum HPL_DIAG,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+
+//#endif
+
+#endif
+/*
+ * hpl_blas.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_comm.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_comm.h
new file mode 100644
index 000000000..e3ba51a57
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_comm.h
@@ -0,0 +1,161 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_COMM_H
+#define HPL_COMM_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+#include "hpl_panel.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_1RING         = 401,                        /* Increasing ring */
+   HPL_1RING_M       = 402,             /* Increasing ring (modified) */
+   HPL_2RING         = 403,                      /* Increasing 2-ring */
+   HPL_2RING_M       = 404,           /* Increasing 2-ring (modified) */
+   HPL_BLONG         = 405,                         /* long broadcast */
+   HPL_BLONG_M       = 406               /* long broadcast (modified) */
+} HPL_T_TOP;
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_FAILURE            0
+#define    HPL_SUCCESS            1
+#define    HPL_KEEP_TESTING       2
+/*
+ * ---------------------------------------------------------------------
+ * comm function prototypes
+ * ---------------------------------------------------------------------
+ */
+int                              HPL_send
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_recv
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_sdrv
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_binit
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+int                              HPL_bcast
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *
+) );
+int                              HPL_bwait
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+int                              HPL_packL
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int
+) );
+void                             HPL_copyL
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+ 
+int HPL_binit_1ring STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_1ring STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_1ring STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_1rinM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_1rinM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_1rinM STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_2ring STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_2ring STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_2ring STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_2rinM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_2rinM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_2rinM STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_blong STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_blong STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_blong STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_blonM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_blonM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_blonM STDC_ARGS( ( HPL_T_panel *        ) );
+
+#endif
+/*
+ * End of hpl_comm.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_gesv.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_gesv.h
new file mode 100644
index 000000000..ce671cf2b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_gesv.h
@@ -0,0 +1,87 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_GESV_H
+#define HPL_GESV_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_LEFT_LOOKING  = 301,           /* Left looking lu fact variant */
+   HPL_CROUT         = 302,                  /* Crout lu fact variant */
+   HPL_RIGHT_LOOKING = 303           /* Right looking lu fact variant */
+} HPL_T_FACT;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void              HPL_dgesv
+STDC_ARGS(
+(  const int,       const int,       const int,       const HPL_T_FACT,
+   const HPL_T_FACT,                 const int,       double *,
+   const int,       int * ) );
+void              HPL_ipid
+STDC_ARGS(
+(  const int,       double *,        int *,           int *,
+   int *,           int *,           int *,           int *,
+   const int,       const int,       const int,       const int,
+   const int ) );
+
+#endif
+/*
+ * End of hpl_gesv.h
+ */ 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_grid.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_grid.h
new file mode 100644
index 000000000..1895a5ed4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_grid.h
@@ -0,0 +1,212 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_GRID_H
+#define HPL_GRID_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum { HPL_INT       = 100, HPL_DOUBLE       = 101 } HPL_T_TYPE;
+ 
+typedef enum
+{
+   HPL_ROW_MAJOR     = 201,
+   HPL_COLUMN_MAJOR  = 202
+} HPL_T_ORDER;
+
+typedef struct HPL_S_grid
+{
+   MPI_Comm        all_comm;                     /* grid communicator */
+   MPI_Comm        row_comm;                      /* row communicator */
+   MPI_Comm        col_comm;                   /* column communicator */
+   HPL_T_ORDER     order;        /* ordering of the procs in the grid */
+   int             iam;                        /* my rank in the grid */
+   int             myrow;                /* my row number in the grid */
+   int             mycol;             /* my column number in the grid */
+   int             nprow;          /* the total # of rows in the grid */
+   int             npcol;       /* the total # of columns in the grid */
+   int             nprocs;        /* the total # of procs in the grid */
+   int             row_ip2;          /* largest power of two <= nprow */
+   int             row_hdim;     /* row_ip2 procs hypercube dimension */
+   int             row_ip2m1;      /* largest power of two <= nprow-1 */
+   int             row_mask;        /* row_ip2m1 procs hypercube mask */
+   int             col_ip2;          /* largest power of two <= npcol */
+   int             col_hdim;     /* col_ip2 procs hypercube dimension */
+   int             col_ip2m1;      /* largest power of two <= npcol-1 */
+   int             col_mask;        /* col_ip2m1 procs hypercube mask */
+} HPL_T_grid;
+
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef void (*HPL_T_OP)
+(  const int,       const void *,    void *,          const HPL_T_TYPE );
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_2_MPI_TYPE( typ ) \
+                           ( ( typ == HPL_INT ? MPI_INT : MPI_DOUBLE ) )
+/*
+ * The following macros perform common modulo operations;  All functions
+ * except MPosMod assume arguments are < d (i.e., arguments are themsel-
+ * ves within modulo range).
+ */
+                                                /* increment with mod */
+#define    MModInc(I, d)       if(++(I) == (d)) (I) = 0
+                                                /* decrement with mod */
+#define    MModDec(I, d)       if(--(I) == -1) (I) = (d)-1
+                                                   /* positive modulo */
+#define    MPosMod(I, d)       ( (I) - ((I)/(d))*(d) )
+                                                   /* add two numbers */
+#define    MModAdd(I1, I2, d) \
+           ( ( (I1) + (I2) < (d) ) ? (I1) + (I2) : (I1) + (I2) - (d) )
+                                                        /* add 1 to # */
+#define    MModAdd1(I, d) ( ((I) != (d)-1) ? (I) + 1 : 0 )
+                                              /* subtract two numbers */
+#define    MModSub(I1, I2, d) \
+           ( ( (I1) < (I2) ) ? (d) + (I1) - (I2) : (I1) - (I2) )
+                                                      /* sub 1 from # */
+#define    MModSub1(I, d) ( ((I)!=0) ? (I)-1 : (d)-1 )
+/*
+ * ---------------------------------------------------------------------
+ * grid function prototypes
+ * ---------------------------------------------------------------------
+ */
+int                              HPL_grid_init
+STDC_ARGS( (
+   MPI_Comm,
+   const HPL_T_ORDER,
+   const int,
+   const int,
+   HPL_T_grid *
+) );
+int                              HPL_grid_exit
+STDC_ARGS( (
+   HPL_T_grid *
+) );
+
+int                              HPL_grid_info
+STDC_ARGS( (
+   const HPL_T_grid *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+int                              HPL_pnum
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int
+) );
+
+int                              HPL_barrier
+STDC_ARGS( (
+   MPI_Comm
+) );
+int                              HPL_broadcast
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const int,
+   MPI_Comm
+) );
+int                              HPL_reduce
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const HPL_T_OP ,
+   const int,
+   MPI_Comm
+) );
+int                              HPL_all_reduce
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const HPL_T_OP ,
+   MPI_Comm
+) );
+
+void                             HPL_max
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+void                             HPL_min
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+void                             HPL_sum
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+
+#endif
+/*
+ * End of hpl_grid.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_matgen.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_matgen.h
new file mode 100644
index 000000000..de6503eea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_matgen.h
@@ -0,0 +1,120 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_MATGEN_H
+#define HPL_MATGEN_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_MULT0         1284865837
+#define    HPL_MULT1         1481765933
+#define    HPL_IADD0         1
+#define    HPL_IADD1         0
+#define    HPL_DIVFAC        2147483648.0
+#define    HPL_POW16         65536.0
+#define    HPL_HALF          0.5
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_dmatgen
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int
+) );
+void                             HPL_lmul
+STDC_ARGS( (
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_ladd
+STDC_ARGS( (
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_xjumpm
+STDC_ARGS( (
+   const int,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_setran
+STDC_ARGS( (
+   const int,
+   int *
+) );
+void                             HPL_jumpit
+STDC_ARGS( (
+   int *,
+   int *,
+   int *,
+   int *
+) );
+double                           HPL_rand STDC_ARGS( ( void ) );
+
+#endif
+/*
+ * End of hpl_matgen.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_misc.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_misc.h
new file mode 100644
index 000000000..ea421a403
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_misc.h
@@ -0,0 +1,110 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_MISC_H
+#define HPL_MISC_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#ifdef __STDC__
+#define STDC_HEADERS
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#ifdef STDC_HEADERS
+#include <stdarg.h>
+#define STDC_ARGS(p)           p
+#else
+#include <varargs.h>
+#define STDC_ARGS(p)           ()
+#endif
+
+#ifdef HPL_CALL_VSIPL
+#include <vsip.h>
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_rone             1.0
+#define    HPL_rtwo             2.0
+#define    HPL_rzero            0.0
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    Mabs( a_ )          ( ( (a_) <   0  ) ? -(a_) : (a_) )
+#define    Mmin( a_, b_ )      ( ( (a_) < (b_) ) ?  (a_) : (b_) )
+#define    Mmax( a_, b_ )      ( ( (a_) > (b_) ) ?  (a_) : (b_) )
+
+#define    Mfloor(a,b) (((a)>0) ? (((a)/(b))) : (-(((-(a))+(b)-1)/(b))))
+#define    Mceil(a,b)           ( ( (a)+(b)-1 ) / (b) )
+#define    Miceil(a,b) (((a)>0) ? ((((a)+(b)-1)/(b))) : (-((-(a))/(b))))
+
+#define    Mupcase(C)          (((C)>96 && (C)<123) ? (C) & 0xDF : (C))
+#define    Mlowcase(C)         (((C)>64 && (C)< 91) ? (C) | 32   : (C))
+/*
+ * Mptr returns a pointer to a_( i_, j_ ) for readability reasons and
+ * also less silly errors ...
+ */
+#define    Mptr( a_, i_, j_, lda_ ) \
+   ( (a_) + (size_t)(i_) + (size_t)(j_)*(size_t)(lda_) )
+/*
+ * Align pointer
+ */
+#define    HPL_PTR( ptr_, al_ ) \
+                      ( ( ( (size_t)(ptr_)+(al_)-1 ) / (al_) ) * (al_) ) 
+#endif
+/*
+ * End of hpl_misc.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_panel.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_panel.h
new file mode 100644
index 000000000..d5ba2939c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_panel.h
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PANEL_H
+#define HPL_PANEL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef struct HPL_S_panel
+{
+   struct HPL_S_grid   * grid;             /* ptr to the process grid */
+   struct HPL_S_palg   * algo;          /* ptr to the algo parameters */
+   struct HPL_S_pmat   * pmat;         /* ptr to the local array info */
+   double              * A;              /* ptr to trailing part of A */
+   double              * WORK;                          /* work space */
+   double              * L2;                              /* ptr to L */
+   double              * L1;       /* ptr to jb x jb upper block of A */
+   double              * DPIV;    /* ptr to replicated jb pivot array */
+   double              * DINFO;      /* ptr to replicated scalar info */
+   double              * U;                               /* ptr to U */
+   int                 * IWORK;     /* integer workspace for swapping */
+   void                * * * buffers[2];   /* buffers for panel bcast */
+   int                 counts [2];          /* counts for panel bcast */
+   MPI_Datatype        dtypes [2];      /* data types for panel bcast */
+   MPI_Request         request[1];        /* requests for panel bcast */
+   MPI_Status          status [1];          /* status for panel bcast */
+   int                 nb;            /* distribution blocking factor */
+   int                 jb;                             /* panel width */
+   int                 m;   /* global # of rows of trailing part of A */
+   int                 n;   /* global # of cols of trailing part of A */
+   int                 ia;  /* global row index of trailing part of A */
+   int                 ja;  /* global col index of trailing part of A */
+   int                 mp;   /* local # of rows of trailing part of A */
+   int                 nq;   /* local # of cols of trailing part of A */
+   int                 ii;   /* local row index of trailing part of A */
+   int                 jj;   /* local col index of trailing part of A */
+   int                 lda;           /* local leading dim of array A */
+   int                 prow;  /* proc. row owning 1st row of trail. A */
+   int                 pcol;  /* proc. col owning 1st col of trail. A */
+   int                 msgid;           /* message id for panel bcast */
+   int                 ldl2;         /* local leading dim of array L2 */
+   int                 len;      /* length of the buffer to broadcast */
+#ifdef HPL_CALL_VSIPL
+   vsip_block_d        * Ablock;                           /* A block */
+   vsip_block_d        * L1block;                         /* L1 block */
+   vsip_block_d        * L2block;                         /* L2 block */
+   vsip_block_d        * Ublock;                           /* U block */
+#endif
+} HPL_T_panel;
+
+/*
+ * ---------------------------------------------------------------------
+ * panel function prototypes
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pgesv.h"
+
+void                             HPL_pdpanel_new
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int,
+   const int,
+   HPL_T_pmat *,
+   const int,
+   const int,
+   const int,
+   HPL_T_panel * *
+) );
+void                             HPL_pdpanel_init
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int,
+   const int,
+   HPL_T_pmat *,
+   const int,
+   const int,
+   const int,
+   HPL_T_panel *
+) );
+int                              HPL_pdpanel_disp
+STDC_ARGS( (
+   HPL_T_panel * *
+) );
+int                              HPL_pdpanel_free
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+
+#endif
+/*
+ * End of hpl_panel.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pauxil.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pauxil.h
new file mode 100644
index 000000000..1fd0ee457
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pauxil.h
@@ -0,0 +1,505 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PAUXIL_H
+#define HPL_PAUXIL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+/*
+ * Mindxg2p  returns the process coodinate owning the entry globally in-
+ * dexed by ig_.
+ */
+#define    Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ )            \
+           {                                                           \
+              if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) &&            \
+                  ( (nprocs_) > 1 ) )                                  \
+              {                                                        \
+                 proc_  = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_);       \
+                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 proc_ = (src_);                                       \
+              }                                                        \
+           }
+
+#define    Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ )       \
+           {                                                           \
+              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
+                  ( (nprocs_) == 1 ) ) { il_ = (ig_); }                \
+              else                                                     \
+              {                                                        \
+                 int i__, j__;                                         \
+                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
+                 il_ = (nb_)*( j__ - i__ ) +                           \
+                       ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?        \
+                         (ig_) - (inb_) : (ig_) );                     \
+              }                                                        \
+           }
+
+#define    Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ )      \
+           {                                                           \
+              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
+                  ( (nprocs_) == 1 ) )                                 \
+              { il_ = (ig_); proc_ = (src_); }                         \
+              else                                                     \
+              {                                                        \
+                 int i__, j__;                                         \
+                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
+                 il_ = (nb_)*(j__-i__) +                               \
+                       ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?       \
+                         (ig_) - (inb_) : (ig_) );                     \
+                 proc_  = (src_) + 1 + i__;                            \
+                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
+              }                                                        \
+           }
+/*
+ * Mindxl2g computes the global index ig_ corresponding to the local
+ * index il_ in process proc_.
+ */
+#define    Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ )       \
+           {                                                           \
+              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
+              {                                                        \
+                 if( (proc_) == (src_) )                               \
+                 {                                                     \
+                    if( (il_) < (inb_) ) ig_ = (il_);                  \
+                    else                 ig_ = (il_) +                 \
+                       (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \
+                 }                                                     \
+                 else if( (proc_) < (src_) )                           \
+                 {                                                     \
+                    ig_ = (il_) + (inb_) +                             \
+                          (nb_)*(  ((nprocs_)-1)*((il_)/(nb_)) +       \
+                                   (proc_)-(src_)-1+(nprocs_) );       \
+                 }                                                     \
+                 else                                                  \
+                 {                                                     \
+                    ig_ =  (il_) + (inb_) +                            \
+                           (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) +       \
+                           (proc_)-(src_)-1 );                         \
+                 }                                                     \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 ig_ = (il_);                                          \
+              }                                                        \
+           }
+/*
+ * MnumrocI computes the # of local indexes  np_ residing in the process
+ * of coordinate  proc_  corresponding to the interval of global indexes
+ * i_:i_+n_-1  assuming  that the global index 0 resides in  the process
+ * src_,  and that the indexes are distributed from src_ using the para-
+ * meters inb_, nb_ and nprocs_.
+ */
+#define    MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ )    \
+           {                                                           \
+              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
+              {                                                        \
+                 int inb__, mydist__, n__, nblk__, quot__, src__;      \
+                 if( ( inb__ = (inb_) - (i_) ) <= 0 )                  \
+                 {                                                     \
+                    nblk__ = (-inb__) / (nb_) + 1;                     \
+                    src__  = (src_) + nblk__;                          \
+                    src__ -= ( src__ / (nprocs_) ) * (nprocs_);        \
+                    inb__ += nblk__*(nb_);                             \
+                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
+                    {                                                  \
+                       if( (proc_) == src__ ) np_ = (n_);              \
+                       else                   np_ = 0;                 \
+                    }                                                  \
+                    else                                               \
+                    {                                                  \
+                       if( ( mydist__ = (proc_) - src__ ) < 0 )        \
+                          mydist__ += (nprocs_);                       \
+                       nblk__    = n__ / (nb_) + 1;                    \
+                       mydist__ -= nblk__ -                            \
+                          (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \
+                       if( mydist__ < 0 )                              \
+                       {                                               \
+                          if( (proc_) != src__ )                       \
+                             np_ = (nb_) + (nb_) * quot__;             \
+                          else                                         \
+                             np_ = inb__ + (nb_) * quot__;             \
+                       }                                               \
+                       else if( mydist__ > 0 )                         \
+                       {                                               \
+                          np_ = (nb_) * quot__;                        \
+                       }                                               \
+                       else                                            \
+                       {                                               \
+                          if( (proc_) != src__ )                       \
+                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
+                          else                                         \
+                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
+                       }                                               \
+                    }                                                  \
+                 }                                                     \
+                 else                                                  \
+                 {                                                     \
+                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
+                    {                                                  \
+                       if( (proc_) == (src_) ) np_ = (n_);             \
+                       else                    np_ = 0;                \
+                    }                                                  \
+                    else                                               \
+                    {                                                  \
+                       if( ( mydist__ = (proc_) - (src_) ) < 0 )       \
+                          mydist__ += (nprocs_);                       \
+                       nblk__    = n__ / (nb_) + 1;                    \
+                       mydist__ -= nblk__ -                            \
+                          ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \
+                       if( mydist__ < 0 )                              \
+                       {                                               \
+                          if( (proc_) != (src_) )                      \
+                             np_ = (nb_) + (nb_) * quot__;             \
+                          else                                         \
+                             np_ = inb__ + (nb_) * quot__;             \
+                       }                                               \
+                       else if( mydist__ > 0 )                         \
+                       {                                               \
+                          np_ = (nb_) * quot__;                        \
+                       }                                               \
+                       else                                            \
+                       {                                               \
+                          if( (proc_) != (src_) )                      \
+                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
+                          else                                         \
+                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
+                       }                                               \
+                    }                                                  \
+                 }                                                     \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 np_ = (n_);                                           \
+              }                                                        \
+           }
+
+#define    Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ )         \
+           MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ )
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_indxg2lp
+STDC_ARGS( (
+   int *,
+   int *,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxg2l
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxg2p
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxl2g
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+void                             HPL_infog2l
+STDC_ARGS( (
+   int,
+   int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+int                              HPL_numroc
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_numrocI
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+
+void                             HPL_dlaswp00N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp10N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp01N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp01T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp02N
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp03N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int
+) );
+void                             HPL_dlaswp03T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int
+) );
+void                             HPL_dlaswp04N
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp04T
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp05N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp05T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp06N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp06T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+
+void                             HPL_pabort
+STDC_ARGS( (
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_pwarn
+STDC_ARGS( (
+   FILE *,
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_pdlaprnt
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int,
+   const char *
+) );
+double                           HPL_pdlamch
+STDC_ARGS( (
+   MPI_Comm,
+   const HPL_T_MACH
+) );
+double                           HPL_pdlange
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const HPL_T_NORM,
+   const int,
+   const int,
+   const int,
+   const double *,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_pauxil.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pfact.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pfact.h
new file mode 100644
index 000000000..09eee79ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pfact.h
@@ -0,0 +1,216 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PFACT_H
+#define HPL_PFACT_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef void (*HPL_T_PFA_FUN)
+(  HPL_T_panel *,   const int,       const int,       const int,
+   double * );
+typedef void (*HPL_T_RFA_FUN)
+(  HPL_T_panel *,   const int,       const int,       const int,
+   double * );
+typedef void (*HPL_T_UPD_FUN)
+(  HPL_T_panel *,   int *,           HPL_T_panel *,   const int ); 
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_dlocmax
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_dlocswpN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_dlocswpT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdmxswp
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdpancrN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpancrT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanllN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanllT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanrlN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanrlT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdrpancrN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpancrT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanllN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanllT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanrlN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanrlT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdfact
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+ 
+#endif
+/*
+ * End of hpl_pfact.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pgesv.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pgesv.h
new file mode 100644
index 000000000..3ca576c68
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pgesv.h
@@ -0,0 +1,346 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PGESV_H
+#define HPL_PGESV_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+#include "hpl_comm.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pfact.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_SWAP00        = 451,                      /* Use HPL_pdlaswp00 */
+   HPL_SWAP01        = 452,                      /* Use HPL_pdlaswp01 */
+   HPL_SW_MIX        = 453, /* Use HPL_pdlaswp00_ for small number of */
+                            /* columns, and HPL_pdlaswp01_ otherwise. */
+   HPL_NO_SWP        = 499
+} HPL_T_SWAP;
+
+typedef struct HPL_S_palg
+{
+   HPL_T_TOP           btopo;               /* row broadcast topology */
+   int                 depth;                     /* look-ahead depth */
+   int                 nbdiv;            /* recursive division factor */
+   int                 nbmin;         /* recursion stopping criterium */
+   HPL_T_FACT          pfact;                   /* panel fact variant */
+   HPL_T_FACT          rfact;               /* recursive fact variant */
+   HPL_T_PFA_FUN       pffun;              /* panel fact function ptr */
+   HPL_T_RFA_FUN       rffun;          /* recursive fact function ptr */
+   HPL_T_UPD_FUN       upfun;                      /* update function */
+   HPL_T_SWAP          fswap;                   /* Swapping algorithm */
+   int                 fsthr;                   /* Swapping threshold */
+   int                 equil;                        /* Equilibration */
+   int                 align;              /* data alignment constant */
+} HPL_T_palg;
+
+typedef struct HPL_S_pmat
+{
+#ifdef HPL_CALL_VSIPL
+   vsip_block_d        * block;
+#endif
+   double              * A;            /* pointer to local piece of A */
+   double              * X;             /* pointer to solution vector */
+   int                 n;                      /* global problem size */
+   int                 nb;                         /* blocking factor */
+   int                 ld;                 /* local leading dimension */
+   int                 mp;                    /* local number of rows */
+   int                 nq;                 /* local number of columns */
+   int                 info;                    /* computational flag */
+} HPL_T_pmat;
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    MSGID_BEGIN_PFACT   1001              /* message id ranges */
+#define    MSGID_END_PFACT     2000
+#define    MSGID_BEGIN_FACT    2001
+#define    MSGID_END_FACT      3000
+#define    MSGID_BEGIN_PTRSV   3001
+#define    MSGID_END_PTRSV     4000
+ 
+#define    MSGID_BEGIN_COLL    9001
+#define    MSGID_END_COLL     10000
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    MNxtMgid( id_, beg_, end_ ) \
+                             (( (id_)+1 > (end_) ?  (beg_) : (id_)+1 ))
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pipid
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   int *
+) );
+void                             HPL_plindx0
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_pdlaswp00N
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdlaswp00T
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_perm
+STDC_ARGS( (
+   const int,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_logsort
+STDC_ARGS( (
+   const int,
+   const int,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_plindx10
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_plindx1
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_spreadN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_SIDE,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_spreadT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_SIDE,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_equil
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_TRANS,
+   const int,
+   double *,
+   const int,
+   int *,
+   const int *,
+   const int *,
+   int *
+) );
+void                             HPL_rollN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_rollT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_pdlaswp01N
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdlaswp01T
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_pdupdateNN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateNT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateTN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateTT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_pdgesv0
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesvK1
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesvK2
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesv
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+ 
+void                             HPL_pdtrsv
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_pmat *
+) );
+
+#endif
+/*
+ * End of hpl_pgesv.h
+ */ 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pmatgen.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pmatgen.h
new file mode 100644
index 000000000..1091b0f60
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pmatgen.h
@@ -0,0 +1,77 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PMATGEN_H
+#define HPL_PMATGEN_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_matgen.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pdmatgen
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_pmatgen.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pmisc.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pmisc.h
new file mode 100644
index 000000000..23550d47b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pmisc.h
@@ -0,0 +1,59 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PMISC_H
+#define HPL_PMISC_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "mpi.h"
+
+#endif
+/*
+ * End of hpl_pmisc.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_ptest.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_ptest.h
new file mode 100644
index 000000000..5777bd536
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_ptest.h
@@ -0,0 +1,151 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PTEST_H
+#define HPL_PTEST_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pgesv.h"
+
+#include "hpl_ptimer.h"
+#include "hpl_pmatgen.h"
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef struct HPL_S_test
+{
+   double              epsil;                      /* epsilon machine */
+   double              thrsh;                            /* threshold */
+   FILE *              outfp;       /* output stream (only in proc 0) */
+   int                 kfail;                    /* # of tests failed */
+   int                 kpass;                    /* # of tests passed */
+   int                 kskip;                   /* # of tests skipped */
+   int                 ktest;                /* total number of tests */
+} HPL_T_test;
+
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants for testing only
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_LINE_MAX         256
+#define    HPL_MAX_PARAM         20
+#define    HPL_ISEED            100
+/*
+ * ---------------------------------------------------------------------
+ * global timers for timing analysis only
+ * ---------------------------------------------------------------------
+ */
+#ifdef HPL_DETAILED_TIMING
+#define    HPL_TIMING_BEG        11 /* timer 0 reserved, used by main */
+#define    HPL_TIMING_N           6 /* number of timers defined below */
+#define    HPL_TIMING_RPFACT     11 /* starting from here, contiguous */
+#define    HPL_TIMING_PFACT      12
+#define    HPL_TIMING_MXSWP      13
+#define    HPL_TIMING_UPDATE     14
+#define    HPL_TIMING_LASWP      15
+#define    HPL_TIMING_PTRSV      16
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pdinfo
+STDC_ARGS( (
+   HPL_T_test *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_ORDER *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_FACT *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_FACT *,
+   int *,
+   HPL_T_TOP *,
+   int *,
+   int *,
+   HPL_T_SWAP *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_pdtest
+STDC_ARGS( (
+   HPL_T_test *,
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_ptest.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_ptimer.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_ptimer.h
new file mode 100644
index 000000000..43c8fe33a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_ptimer.h
@@ -0,0 +1,96 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PTIMER_H
+#define HPL_PTIMER_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_NPTIMER             64
+#define    HPL_PTIMER_STARTFLAG   5.0
+#define    HPL_PTIMER_ERROR      -1.0
+/*
+ * ---------------------------------------------------------------------
+ * type definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{  HPL_WALL_PTIME = 101, HPL_CPU_PTIME  = 102 } HPL_T_PTIME;
+
+typedef enum
+{ HPL_AMAX_PTIME  = 201, HPL_AMIN_PTIME = 202, HPL_SUM_PTIME  = 203 }
+HPL_T_PTIME_OP;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+double          HPL_ptimer_cputime   STDC_ARGS(     ( void      ) );
+double          HPL_ptimer_walltime  STDC_ARGS(     ( void      ) );
+
+void            HPL_ptimer           STDC_ARGS(     ( const int ) );
+void            HPL_ptimer_boot      STDC_ARGS(     ( void      ) );
+void            HPL_ptimer_combine
+STDC_ARGS(
+(  MPI_Comm comm,   const HPL_T_PTIME_OP,             const HPL_T_PTIME,
+   const int,       const int,       double * ) );
+void            HPL_ptimer_disable   STDC_ARGS(     ( void      ) );
+void            HPL_ptimer_enable    STDC_ARGS(     ( void      ) );
+double          HPL_ptimer_inquire
+STDC_ARGS(
+(  const HPL_T_PTIME,                const int ) );
+
+#endif
+/*
+ * End of hpl_ptimer.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_test.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_test.h
new file mode 100644
index 000000000..1eedc97e0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_test.h
@@ -0,0 +1,80 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_TEST_H
+#define HPL_TEST_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_matgen.h"
+#include "hpl_timer.h"
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void            HPL_dinfo
+STDC_ARGS(
+(  FILE * *,        int *,           int *,           int *,
+   HPL_T_FACT *,    int *,           int *,           int *, 
+   int *,           int *,           HPL_T_FACT *,    int *,
+   double *,        double * ) );
+void            HPL_dtest
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   HPL_T_FACT,      HPL_T_FACT,      const int,       const double,
+   const double,    int *,           int *,           int * ) );
+
+#endif
+/*
+ * End of hpl_test.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_timer.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_timer.h
new file mode 100644
index 000000000..4c91700ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_timer.h
@@ -0,0 +1,88 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_TIMER_H
+#define HPL_TIMER_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_NTIMER              64
+#define    HPL_TIMER_STARTFLAG    5.0
+#define    HPL_TIMER_ERROR       -1.0
+/*
+ * ---------------------------------------------------------------------
+ * type definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{  HPL_WALL_TIME = 101, HPL_CPU_TIME  = 102 } HPL_T_TIME;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+double          HPL_timer_cputime    STDC_ARGS(     ( void      ) );
+double          HPL_timer_walltime   STDC_ARGS(     ( void      ) );
+
+void            HPL_timer            STDC_ARGS(     ( const int ) );
+void            HPL_timer_boot       STDC_ARGS(     ( void      ) );
+void            HPL_timer_enable     STDC_ARGS(     ( void      ) );
+void            HPL_timer_disable    STDC_ARGS(     ( void      ) );
+double          HPL_timer_inquire
+STDC_ARGS(
+(  const HPL_T_TIME,                 const int ) );
+
+#endif
+/*
+ * End of hpl_timer.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/libdgemm.so.1.0.1 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/libdgemm.so.1.0.1
new file mode 100755
index 000000000..1f2100053
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/libdgemm.so.1.0.1 differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_abort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_abort.c
new file mode 100644
index 000000000..bf0c5e727
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_abort.c
@@ -0,0 +1,129 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_abort
+(
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_abort( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_abort displays an error message on stderr and halts execution.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   LINE   = va_arg( argptr, int      );
+   SRNAME = va_arg( argptr, char *   );
+   FORM   = va_arg( argptr, char *   );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( stderr, "%s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR in function", SRNAME, cline );
+   else
+      HPL_fprintf( stderr, "%s %d %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR on line", LINE, "of function", SRNAME, cline );
+   exit( 0 );
+/*
+ * End of HPL_abort
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_abort.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_abort.o
new file mode 100644
index 000000000..394d35b67
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_abort.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlacpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlacpy.c
new file mode 100644
index 000000000..ec71180eb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlacpy.c
@@ -0,0 +1,343 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factors
+ * #ifndef HPL_LACPY_M_DEPTH
+ * #define    HPL_LACPY_M_DEPTH       32
+ * #define    HPL_LACPY_LOG2_M_DEPTH   5
+ * #endif
+ * #ifndef HPL_LACPY_N_DEPTH
+ * #define    HPL_LACPY_N_DEPTH        4
+ * #define    HPL_LACPY_LOG2_N_DEPTH   2
+ * #endif
+ */
+#ifndef HPL_LACPY_M_DEPTH
+#define    HPL_LACPY_M_DEPTH        4
+#define    HPL_LACPY_LOG2_M_DEPTH   2
+#endif
+#ifndef HPL_LACPY_N_DEPTH
+#define    HPL_LACPY_N_DEPTH        2
+#define    HPL_LACPY_LOG2_N_DEPTH   1
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlacpy
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dlacpy
+( M, N, A, LDA, B, LDB )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlacpy copies an array A into an array B.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the number of rows of the arrays A and
+ *         B. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies  the number of columns of the arrays A
+ *         and B. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,N).
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * B       (local output)                double *
+ *         On entry, B points to an array of dimension (LDB,N). On exit,
+ *         B is overwritten with A.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB specifies the leading dimension of the array B.
+ *         LDB must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_LACPY_USE_COPY
+   register int               j;
+#else
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+   const double               * A0 = A;
+   double                     * B0 = B;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+   const double               * A0 = A,              * A1 = A +     LDA;
+   double                     * B0 = B,              * B1 = B +     LDB;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+   const double               * A0 = A,              * A1 = A +     LDA,
+                              * A2 = A + (LDA << 1), * A3 = A + 3 * LDA;
+   double                     * B0 = B,              * B1 = B +     LDB,
+                              * B2 = B + (LDB << 1), * B3 = B + 3 * LDB;
+#endif
+   const int                  incA = ( (unsigned int)(LDA) <<
+                                       HPL_LACPY_LOG2_N_DEPTH ) - M,
+                              incB = ( (unsigned int)(LDB) <<
+                                       HPL_LACPY_LOG2_N_DEPTH ) - M,
+                              incA0 = (unsigned int)(LDA) - M,
+                              incB0 = (unsigned int)(LDB) - M;
+   int                        mu, nu;
+   register int               i, j;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+#ifdef HPL_LACPY_USE_COPY
+   for( j = 0; j < N; j++, A0 += LDA, B0 += LDB ) HPL_dcopy( M, A0, 1, B0, 1 );
+#else
+   mu = (int)( ( (unsigned int)(M) >> HPL_LACPY_LOG2_M_DEPTH ) <<
+                                      HPL_LACPY_LOG2_M_DEPTH );
+   nu = (int)( ( (unsigned int)(N) >> HPL_LACPY_LOG2_N_DEPTH ) <<
+                                      HPL_LACPY_LOG2_N_DEPTH );
+
+   for( j = 0; j < nu; j += HPL_LACPY_N_DEPTH )
+   {
+      for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH )
+      {
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 0] = A0[ 0];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; B2[ 0] = A2[ 0]; B3[ 0] = A3[ 0];
+#endif
+
+#if ( HPL_LACPY_M_DEPTH >  1 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 1] = A0[ 1];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; B2[ 1] = A2[ 1]; B3[ 1] = A3[ 1];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  2 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B2[ 2] = A2[ 2]; B3[ 2] = A3[ 2];
+         B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; B2[ 3] = A2[ 3]; B3[ 3] = A3[ 3];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  4 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5];
+         B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B2[ 4] = A2[ 4]; B3[ 4] = A3[ 4];
+         B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; B2[ 5] = A2[ 5]; B3[ 5] = A3[ 5];
+         B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B2[ 6] = A2[ 6]; B3[ 6] = A3[ 6];
+         B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; B2[ 7] = A2[ 7]; B3[ 7] = A3[ 7];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  8 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11];
+         B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9];
+         B0[10] = A0[10]; B1[10] = A1[10]; B0[11] = A0[11]; B1[11] = A1[11];
+         B0[12] = A0[12]; B1[12] = A1[12]; B0[13] = A0[13]; B1[13] = A1[13];
+         B0[14] = A0[14]; B1[14] = A1[14]; B0[15] = A0[15]; B1[15] = A1[15];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B2[ 8] = A2[ 8]; B3[ 8] = A3[ 8];
+         B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; B2[ 9] = A2[ 9]; B3[ 9] = A3[ 9];
+         B0[10] = A0[10]; B1[10] = A1[10]; B2[10] = A2[10]; B3[10] = A3[10];
+         B0[11] = A0[11]; B1[11] = A1[11]; B2[11] = A2[11]; B3[11] = A3[11];
+         B0[12] = A0[12]; B1[12] = A1[12]; B2[12] = A2[12]; B3[12] = A3[12];
+         B0[13] = A0[13]; B1[13] = A1[13]; B2[13] = A2[13]; B3[13] = A3[13];
+         B0[14] = A0[14]; B1[14] = A1[14]; B2[14] = A2[14]; B3[14] = A3[14];
+         B0[15] = A0[15]; B1[15] = A1[15]; B2[15] = A2[15]; B3[15] = A3[15];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH > 16 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19];
+         B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23];
+         B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27];
+         B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[16] = A0[16]; B1[16] = A1[16]; B0[17] = A0[17]; B1[17] = A1[17];
+         B0[18] = A0[18]; B1[18] = A1[18]; B0[19] = A0[19]; B1[19] = A1[19];
+         B0[20] = A0[20]; B1[20] = A1[20]; B0[21] = A0[21]; B1[21] = A1[21];
+         B0[22] = A0[22]; B1[22] = A1[22]; B0[23] = A0[23]; B1[23] = A1[23];
+         B0[24] = A0[24]; B1[24] = A1[24]; B0[25] = A0[25]; B1[25] = A1[25];
+         B0[26] = A0[26]; B1[26] = A1[26]; B0[27] = A0[27]; B1[27] = A1[27];
+         B0[28] = A0[28]; B1[28] = A1[28]; B0[29] = A0[29]; B1[29] = A1[29];
+         B0[30] = A0[30]; B1[30] = A1[30]; B0[31] = A0[31]; B1[31] = A1[31];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[16] = A0[16]; B1[16] = A1[16]; B2[16] = A2[16]; B3[16] = A3[16];
+         B0[17] = A0[17]; B1[17] = A1[17]; B2[17] = A2[17]; B3[17] = A3[17];
+         B0[18] = A0[18]; B1[18] = A1[18]; B2[18] = A2[18]; B3[18] = A3[18];
+         B0[19] = A0[19]; B1[19] = A1[19]; B2[19] = A2[19]; B3[19] = A3[19];
+         B0[20] = A0[20]; B1[20] = A1[20]; B2[20] = A2[20]; B3[20] = A3[20];
+         B0[21] = A0[21]; B1[21] = A1[21]; B2[21] = A2[21]; B3[21] = A3[21];
+         B0[22] = A0[22]; B1[22] = A1[22]; B2[22] = A2[22]; B3[22] = A3[22];
+         B0[23] = A0[23]; B1[23] = A1[23]; B2[23] = A2[23]; B3[23] = A3[23];
+         B0[24] = A0[24]; B1[24] = A1[24]; B2[24] = A2[24]; B3[24] = A3[24];
+         B0[25] = A0[25]; B1[25] = A1[25]; B2[25] = A2[25]; B3[25] = A3[25];
+         B0[26] = A0[26]; B1[26] = A1[26]; B2[26] = A2[26]; B3[26] = A3[26];
+         B0[27] = A0[27]; B1[27] = A1[27]; B2[27] = A2[27]; B3[27] = A3[27];
+         B0[28] = A0[28]; B1[28] = A1[28]; B2[28] = A2[28]; B3[28] = A3[28];
+         B0[29] = A0[29]; B1[29] = A1[29]; B2[29] = A2[29]; B3[29] = A3[29];
+         B0[30] = A0[30]; B1[30] = A1[30]; B2[30] = A2[30]; B3[30] = A3[30];
+         B0[31] = A0[31]; B1[31] = A1[31]; B2[31] = A2[31]; B3[31] = A3[31];
+#endif
+
+#endif
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+         A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+         A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH;
+         A2 += HPL_LACPY_M_DEPTH; B2 += HPL_LACPY_M_DEPTH;
+         A3 += HPL_LACPY_M_DEPTH; B3 += HPL_LACPY_M_DEPTH;
+#endif
+      }
+
+      for( i = mu; i < M; i++ )
+      {
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         *B0 = *A0; B0++; A0++;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++;
+         *B2 = *A2; B2++; A2++; *B3 = *A3; B3++; A3++;
+#endif
+      }
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+      A0 += incA; B0 += incB;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+      A0 += incA; B0 += incB; A1 += incA; B1 += incB;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+      A0 += incA; B0 += incB; A1 += incA; B1 += incB;
+      A2 += incA; B2 += incB; A3 += incA; B3 += incB;
+#endif
+   }
+
+   for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 )
+   {
+      for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH,
+           B0 += HPL_LACPY_M_DEPTH, A0 += HPL_LACPY_M_DEPTH )
+      {
+         B0[ 0] = A0[ 0];
+#if ( HPL_LACPY_M_DEPTH >  1 )
+         B0[ 1] = A0[ 1];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  2 )
+         B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  4 )
+         B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  8 )
+         B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11];
+         B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15];
+#endif
+#if ( HPL_LACPY_M_DEPTH > 16 )
+         B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19];
+         B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23];
+         B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27];
+         B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31];
+#endif
+      }
+      for( i = mu; i < M; i++, B0++, A0++ ) { *B0 = *A0; }
+   }
+#endif
+/*
+ * End of HPL_dlacpy
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlacpy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlacpy.o
new file mode 100644
index 000000000..565483191
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlacpy.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlamch.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlamch.c
new file mode 100644
index 000000000..c685f0d5e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlamch.c
@@ -0,0 +1,876 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static function prototypes
+ * ---------------------------------------------------------------------
+ */
+static void     HPL_dlamc1
+STDC_ARGS(
+(  int *,           int *,           int *,           int * ) );
+static void     HPL_dlamc2
+STDC_ARGS(
+(  int *,           int *,           int *,           double *,
+   int *,           double *,        int *,           double * ) );
+static double   HPL_dlamc3
+STDC_ARGS(
+(  const double,    const double ) );
+static void     HPL_dlamc4
+STDC_ARGS(
+(  int *,           const double,    const int ) );
+static void     HPL_dlamc5
+STDC_ARGS(
+(  const int,       const int,       const int,       const int,
+   int *,           double * ) );
+static double   HPL_dipow
+STDC_ARGS(
+(  const double,    const int ) );
+
+#ifdef STDC_HEADERS
+double HPL_dlamch
+(
+   const HPL_T_MACH                 CMACH
+)
+#else
+double HPL_dlamch
+( CMACH )
+   const HPL_T_MACH                 CMACH;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlamch determines  machine-specific  arithmetic constants such as
+ * the relative machine precision  (eps),  the safe minimum (sfmin) such
+ * that 1 / sfmin does not overflow, the base of the machine (base), the
+ * precision (prec), the  number of (base) digits  in the  mantissa (t),
+ * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise),  the
+ * minimum exponent before  (gradual)  underflow (emin),  the  underflow
+ * threshold (rmin) base**(emin-1), the largest exponent before overflow
+ * (emax), the overflow threshold (rmax) (base**emax)*(1-eps).
+ *
+ * Notes
+ * =====
+ * 
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamch.f  (version 2.0 -- 1992), that  was  itself
+ * based on the function ENVRON  by Malcolm and incorporated suggestions
+ * by Gentleman and Marovich. See                                       
+ *  
+ * Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+ * arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).                 
+ *  
+ * Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+ * properties of  floating point arithmetic units.,  Comms. of  the ACM,
+ * 17, 276-277 (1974).
+ * 
+ * Arguments
+ * =========
+ *
+ * CMACH   (local input)                 const HPL_T_MACH
+ *         Specifies the value to be returned by HPL_dlamch             
+ *            = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+ *            = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+ *            = HPL_MACH_BASE,  HPL_dlamch := base                      
+ *            = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+ *            = HPL_MACH_MLEN,  HPL_dlamch := t                         
+ *            = HPL_MACH_RND,   HPL_dlamch := rnd                       
+ *            = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+ *            = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+ *            = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+ *            = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+ *          
+ *         where                                                        
+ *          
+ *            eps   = relative machine precision,                       
+ *            sfmin = safe minimum,                                     
+ *            base  = base of the machine,                              
+ *            prec  = eps*base,                                         
+ *            t     = number of digits in the mantissa,                 
+ *            rnd   = 1.0 if rounding occurs in addition,               
+ *            emin  = minimum exponent before underflow,                
+ *            rmin  = underflow threshold,                              
+ *            emax  = largest exponent before overflow,                 
+ *            rmax  = overflow threshold.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   static double              eps, sfmin, base, t, rnd, emin, rmin, emax,
+                              rmax, prec;
+   double                     small;
+   static int                 first=1;
+   int                        beta=0, imax=0, imin=0, it=0, lrnd=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0;
+      HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax );
+      base  = (double)(beta);  t     = (double)(it);
+      if( lrnd != 0 )
+      { rnd = HPL_rone;  eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; }
+      else
+      { rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it );            }
+      prec  = eps * base;  emin  = (double)(imin); emax  = (double)(imax);
+      sfmin = rmin;        small = HPL_rone / rmax;
+/*
+ * Use  SMALL  plus a bit,  to avoid the possibility of rounding causing
+ * overflow when computing  1/sfmin.
+ */
+      if( small >= sfmin ) sfmin = small * ( HPL_rone + eps );
+   }
+
+   if( CMACH == HPL_MACH_EPS   ) return( eps   );
+   if( CMACH == HPL_MACH_SFMIN ) return( sfmin );
+   if( CMACH == HPL_MACH_BASE  ) return( base  );
+   if( CMACH == HPL_MACH_PREC  ) return( prec  );
+   if( CMACH == HPL_MACH_MLEN  ) return( t     );
+   if( CMACH == HPL_MACH_RND   ) return( rnd   );
+   if( CMACH == HPL_MACH_EMIN  ) return( emin  );
+   if( CMACH == HPL_MACH_RMIN  ) return( rmin  );
+   if( CMACH == HPL_MACH_EMAX  ) return( emax  );
+   if( CMACH == HPL_MACH_RMAX  ) return( rmax  );
+
+   return( eps );
+/*
+ * End of HPL_dlamch
+ */
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc1
+(
+   int                        * BETA,
+   int                        * T,
+   int                        * RND,
+   int                        * IEEE1
+)
+#else
+static void HPL_dlamc1
+( BETA, T, RND, IEEE1 )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * BETA, * IEEE1, * RND, * T;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc1  determines  the machine parameters given by BETA, T, RND,
+ * and IEEE1.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc1.f  (version 2.0 -- 1992), that  was  itself
+ * based on the function ENVRON  by Malcolm and incorporated suggestions
+ * by Gentleman and Marovich. See
+ *
+ * Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+ * arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).
+ *
+ * Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+ * properties of  floating point arithmetic units.,  Comms. of  the ACM,
+ * 17, 276-277 (1974).
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local output)              int *
+ *         The base of the machine.
+ *
+ * T       (local output)              int *
+ *         The number of ( BETA ) digits in the mantissa.
+ *
+ * RND     (local output)              int *
+ *         Specifies whether proper rounding (RND=1) or chopping (RND=0)
+ *         occurs in addition.  This may not be a  reliable guide to the
+ *         way in which the machine performs its arithmetic.
+ *
+ * IEEE1   (local output)              int *
+ *         Specifies  whether  rounding  appears  to be done in the IEEE
+ *         `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     a, b, c, f, one, qtr, savec, t1, t2;
+   static int                 first=1, lbeta, lieee1, lrnd, lt;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0; one = HPL_rone;
+/*
+ * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and
+ * RND. Throughout this routine we use the function HPL_dlamc3 to ensure
+ * that relevant values are stored and not held in registers, or are not
+ * affected by optimizers.
+ *
+ * Compute  a = 2.0**m  with the  smallest  positive integer m such that
+ * fl( a + 1.0 ) == a.
+ */
+      a = HPL_rone; c = HPL_rone;
+      do
+      { a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); }
+      while( c == HPL_rone );
+/*
+ * Now compute b = 2.0**m with the smallest positive integer m such that
+ * fl( a + b ) > a.
+ */
+      b = HPL_rone; c = HPL_dlamc3( a, b );
+      while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); }
+/*
+ * Now compute the base.  a and c  are  neighbouring floating point num-
+ * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe-
+ * rence is BETA.  Adding 0.25 to c is to ensure that it is truncated to
+ * BETA and not (BETA-1).
+ */
+      qtr = one / 4.0; savec = c;
+      c   = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr);
+/*
+ * Now  determine  whether  rounding or chopping occurs, by adding a bit
+ * less than BETA/2 and a bit more than BETA/2 to a.
+ */
+      b = (double)(lbeta);
+      f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a );
+      if( c == a ) { lrnd = 1; } else { lrnd = 0; }
+      f = HPL_dlamc3( b / HPL_rtwo,  b / 100.0 ); c = HPL_dlamc3( f, a );
+      if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0;
+/*
+ * Try  and decide whether rounding is done in the  IEEE  round to nea-
+ * rest style.  b/2 is half a unit in the last place of the two numbers
+ * a  and savec. Furthermore, a is even, i.e. has last bit zero, and sa-
+ * vec is odd.  Thus adding b/2 to a should not change a, but adding b/2
+ * to savec should change savec.
+ */
+      t1 = HPL_dlamc3( b / HPL_rtwo, a );
+      t2 = HPL_dlamc3( b / HPL_rtwo, savec );
+      if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1;
+      else                                                  lieee1 = 0;
+/*
+ * Now find the mantissa, T. It should be the integer part of log to the
+ * base BETA of a, however it is safer to determine T by powering. So we
+ * find T as the smallest positive integer for which fl( beta**t + 1.0 )
+ * is equal to 1.0.
+ */
+      lt = 0; a = HPL_rone; c = HPL_rone;
+
+      do
+      {
+         lt++; a *= (double)(lbeta);
+         c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c,  -a );
+      } while( c == HPL_rone );
+   }
+
+   *BETA  = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1;
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc2
+(
+   int                        * BETA, 
+   int                        * T,
+   int                        * RND,
+   double                     * EPS,
+   int                        * EMIN,
+   double                     * RMIN,
+   int                        * EMAX,
+   double                     * RMAX
+)
+#else
+static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * BETA, * EMAX, * EMIN, * RND, * T;
+   double                     * EPS, * RMAX, * RMIN;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc2  determines the machine  parameters specified in its argu-
+ * ment list.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function  dlamc2.f (version 2.0 -- 1992), that  was  itself
+ * based on a function PARANOIA  by  W. Kahan of the University of Cali-
+ * fornia at Berkeley for the computation of the  relative machine epsi-
+ * lon eps.
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local output)              int *
+ *         The base of the machine.
+ *
+ * T       (local output)              int *
+ *         The number of ( BETA ) digits in the mantissa.
+ *
+ * RND     (local output)              int *
+ *         Specifies whether proper rounding (RND=1) or chopping (RND=0)
+ *         occurs in addition. This may not be a reliable  guide to  the
+ *         way in which the machine performs its arithmetic.
+ *
+ * EPS     (local output)              double *
+ *         The smallest positive number such that fl( 1.0 - EPS ) < 1.0,
+ *         where fl denotes the computed value.
+ *
+ * EMIN    (local output)              int *
+ *         The minimum exponent before (gradual) underflow occurs.
+ *
+ * RMIN    (local output)              double *
+ *         The smallest  normalized  number  for  the  machine, given by
+ *         BASE**( EMIN - 1 ), where  BASE  is the floating  point value
+ *         of BETA.
+ *
+ * EMAX    (local output)              int *
+ *         The maximum exponent before overflow occurs.
+ *
+ * RMAX    (local output)              double *
+ *         The  largest  positive  number  for  the  machine,  given  by
+ *         BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating  point
+ *         value of BETA.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   static double              leps, lrmax, lrmin;
+   double                     a, b, c, half, one, rbase, sixth, small,
+                              third, two, zero;
+   static int                 first=1, iwarn=0, lbeta=0, lemax, lemin,
+                              lt=0;
+   int                        gnmin=0, gpmin=0, i, ieee, lieee1=0,
+                              lrnd=0, ngnmin=0, ngpmin=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo;
+/*
+ * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of  BETA,
+ * T, RND, EPS, EMIN and RMIN.
+ *
+ * Throughout this routine we use the function HPL_dlamc3 to ensure that
+ * relevant values are stored and not held in registers,  or are not af-
+ * fected by optimizers.
+ *
+ * HPL_dlamc1 returns the parameters  lbeta, lt, lrnd and lieee1.
+ */
+      HPL_dlamc1( &lbeta, &lt, &lrnd, &lieee1 );
+/*
+ * Start to find eps.
+ */
+      b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a;
+/*
+ * Try some tricks to see whether or not this is the correct  EPS.
+ */
+      b     = two / 3.0; 
+      half  = one / HPL_rtwo;
+      sixth = HPL_dlamc3( b, -half );
+      third = HPL_dlamc3( sixth, sixth );
+      b     = HPL_dlamc3( third, -half );
+      b     = HPL_dlamc3( b, sixth );
+      b     = Mabs( b ); if( b < leps ) b = leps;
+
+      leps = HPL_rone;
+
+      while( ( leps > b ) && ( b > zero ) )
+      {
+         leps = b;
+         c = HPL_dlamc3( half * leps,
+                         HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) );
+         c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c );
+         c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c );
+      }
+      if( a < leps ) leps = a;
+/*
+ * Computation of EPS complete.
+ *
+ * Now find  EMIN.  Let a = + or - 1, and + or - (1 + BASE**(-3)).  Keep
+ * dividing a by BETA until (gradual) underflow occurs. This is detected
+ * when we cannot recover the previous a.
+ */
+      rbase = one / (double)(lbeta); small = one;
+      for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero );
+      a = HPL_dlamc3( one, small );
+      HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta );
+      HPL_dlamc4( &gpmin,    a, lbeta ); HPL_dlamc4( &gnmin,    -a, lbeta );
+
+      ieee = 0;
+
+      if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) )
+      {
+         if( ngpmin == gpmin )
+         {
+/*
+ * Non twos-complement machines, no gradual underflow; e.g.,  VAX )
+ */
+            lemin = ngpmin;
+         }
+         else if( ( gpmin-ngpmin ) == 3 )
+         {
+/*
+ * Non twos-complement machines with gradual underflow; e.g., IEEE stan-
+ * dard followers
+ */
+            lemin = ngpmin - 1 + lt; ieee = 1;
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, gpmin );
+            iwarn = 1;
+         }
+      }
+      else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) )
+      {
+         if( Mabs( ngpmin-ngnmin ) == 1 )
+         {
+/*
+ * Twos-complement machines, no gradual underflow; e.g., CYBER 205
+ */
+            lemin = Mmax( ngpmin, ngnmin );
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, ngnmin );
+            iwarn = 1;
+         }
+      }
+      else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) )
+      {
+         if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 )
+         {
+/*
+ * Twos-complement machines with gradual underflow; no known machine
+ */
+            lemin = Mmax( ngpmin, ngnmin ) - 1 + lt;
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, ngnmin );
+            iwarn = 1;
+         }
+      }
+      else
+      {
+/*
+ * A guess; no known machine
+ */
+         lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin );
+         lemin = Mmin( lemin, gnmin ); iwarn = 1;
+      }
+/*
+ * Comment out this if block if EMIN is ok
+ */
+      if( iwarn != 0 )
+      {
+         first = 1;
+         HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n",
+"WARNING. The value EMIN may be incorrect:- EMIN =", lemin,
+"If, after inspection, the value EMIN looks acceptable, please comment ",
+"out the  if  block  as marked within the code of routine  HPL_dlamc2, ",
+"otherwise supply EMIN explicitly." );
+      }
+/*
+ * Assume IEEE arithmetic if we found denormalised  numbers above, or if
+ * arithmetic seems to round in the  IEEE style,  determined  in routine
+ * HPL_dlamc1.  A true  IEEE  machine should have both things true; how-
+ * ever, faulty machines may have one or the other.
+ */
+      if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1;
+      else                                   ieee = 0;
+/*
+ * Compute  RMIN by successive division by  BETA. We could compute  RMIN
+ * as BASE**( EMIN - 1 ), but some machines underflow during this compu-
+ * tation.
+ */
+      lrmin = HPL_rone;
+      for( i = 0; i < 1 - lemin; i++ )
+         lrmin = HPL_dlamc3( lrmin*rbase, zero );
+/*
+ * Finally, call HPL_dlamc5 to compute emax and rmax.
+ */
+      HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax );
+   }
+   *BETA = lbeta; *T    = lt;    *RND  = lrnd;  *EPS  = leps;
+   *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax;
+} 
+
+#ifdef STDC_HEADERS
+static double HPL_dlamc3( const double A, const double B )
+#else
+static double HPL_dlamc3( A, B )
+/*
+ * .. Scalar Arguments ..
+ */
+   const double               A, B;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc3  is intended to force a and b  to be stored prior to doing
+ * the addition of  a  and  b,  for  use  in situations where optimizers
+ * might hold one of these in a register.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc3.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * A, B    (local input)               double
+ *         The values a and b.
+ *
+ * ---------------------------------------------------------------------
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   return( A + B );
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc4
+(
+   int                        * EMIN,
+   const double               START,
+   const int                  BASE
+)
+#else
+static void HPL_dlamc4( EMIN, START, BASE )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * EMIN;
+   const int                  BASE;
+   const double               START;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc4 is a service function for HPL_dlamc2.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc4.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * EMIN    (local output)              int *
+ *         The minimum exponent before  (gradual) underflow, computed by
+ *         setting A = START and dividing  by  BASE until the previous A
+ *         can not be recovered.
+ *
+ * START   (local input)               double
+ *         The starting point for determining EMIN.
+ *
+ * BASE    (local input)               int
+ *         The base of the machine.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     a, b1, b2, c1, c2, d1, d2, one, rbase, zero;
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   a     = START; one = HPL_rone; rbase = one / (double)(BASE);
+   zero  = HPL_rzero;
+   *EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a;
+
+   do
+   {
+      (*EMIN)--; a = b1;
+      b1 = HPL_dlamc3( a /  BASE,  zero );
+      c1 = HPL_dlamc3( b1 *  BASE, zero );
+      d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1;
+      b2 = HPL_dlamc3( a * rbase,  zero );
+      c2 = HPL_dlamc3( b2 / rbase, zero );
+      d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2;
+   } while( ( c1 == a ) && ( c2 == a ) &&  ( d1 == a ) && ( d2 == a ) );
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc5
+(
+   const int                  BETA,
+   const int                  P, 
+   const int                  EMIN,
+   const int                  IEEE,
+   int                        * EMAX,
+   double                     * RMAX
+)
+#else
+static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX )
+/*
+ * .. Scalar Arguments ..
+ */
+   const int                  BETA, EMIN, IEEE, P; 
+   int                        * EMAX;
+   double                     * RMAX;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc5  attempts  to compute RMAX, the largest machine  floating-
+ * point number, without overflow.  It assumes that EMAX + abs(EMIN) sum
+ * approximately to a power of 2.  It will fail  on machines where  this
+ * assumption does not hold, for example, the  Cyber 205 (EMIN = -28625,
+ * EMAX = 28718).  It will also fail if  the value supplied for  EMIN is
+ * too large (i.e. too close to zero), probably with overflow.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc5.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local input)               int
+ *         The base of floating-point arithmetic.
+ *
+ * P       (local input)               int
+ *         The number of base BETA digits in the mantissa of a floating-
+ *         point value.
+ *
+ * EMIN    (local input)               int
+ *         The minimum exponent before (gradual) underflow.
+ *
+ * IEEE    (local input)               int
+ *         A logical flag specifying whether or not  the arithmetic sys-
+ *         tem is thought to comply with the IEEE standard.
+ *
+ * EMAX    (local output)              int *
+ *         The largest exponent before overflow.
+ *
+ * RMAX    (local output)              double *
+ *         The largest machine floating-point number.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     oldy=HPL_rzero, recbas, y, z;
+   int                        exbits=1, expsum, i, lexp=1, nbits, try,
+                              uexp;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * First compute  lexp  and  uexp, two powers of 2 that bound abs(EMIN).
+ * We then assume that  EMAX + abs( EMIN ) will sum approximately to the
+ * bound that  is closest to abs( EMIN ). (EMAX  is the  exponent of the
+ * required number RMAX).
+ */
+l_10:
+   try = (int)( (unsigned int)(lexp) << 1 );
+   if( try <= ( -EMIN ) ) { lexp = try; exbits++; goto l_10; }
+
+   if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try; exbits++; }
+/*
+ * Now -lexp is less than or equal to EMIN, and -uexp is greater than or
+ * equal to EMIN. exbits is the number of bits needed to store the expo-
+ * nent.
+ */
+   if( ( uexp+EMIN ) > ( -lexp-EMIN ) )
+   { expsum = (int)( (unsigned int)(lexp) << 1 ); }
+   else
+   { expsum = (int)( (unsigned int)(uexp) << 1 ); }
+/*
+ * expsum is the exponent range, approximately equal to EMAX - EMIN + 1.
+ */
+   *EMAX = expsum + EMIN - 1;
+/*
+ * nbits  is  the total number of bits needed to store a  floating-point
+ * number.
+ */
+   nbits = 1 + exbits + P;
+
+   if( ( nbits % 2 == 1 ) && ( BETA == 2 ) )
+   {
+/*
+ * Either there are an odd number of bits used to store a floating-point
+ * number, which is unlikely, or some bits are not used in the represen-
+ * tation of numbers,  which is possible,  (e.g. Cray machines)  or  the
+ * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines),
+ * which is perhaps the most likely. We have to assume the last alterna-
+ * tive.  If this is true,  then we need to reduce  EMAX  by one because
+ * there must be some way of representing zero  in an  implicit-bit sys-
+ * tem. On machines like Cray we are reducing EMAX by one unnecessarily.
+ */
+      (*EMAX)--;
+   }
+
+   if( IEEE != 0 )
+   {
+/*
+ * Assume we are on an IEEE  machine which reserves one exponent for in-
+ * finity and NaN.
+ */
+      (*EMAX)--;
+   }
+/*
+ * Now create RMAX, the largest machine number, which should be equal to
+ * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being
+ * careful that the result is less than 1.0.
+ */
+   recbas = HPL_rone / (double)(BETA);
+   z      = (double)(BETA) - HPL_rone;
+   y      = HPL_rzero;
+
+   for( i = 0; i < P; i++ )
+   { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); }
+
+   if( y >= HPL_rone ) y = oldy;
+/*
+ * Now multiply by BETA**EMAX to get RMAX.
+ */
+   for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero );
+
+   *RMAX = y;
+/*
+ * End of HPL_dlamch
+ */
+} 
+
+#ifdef STDC_HEADERS
+static double HPL_dipow
+(
+   const double               X,
+   const int                  N
+)
+#else
+static double HPL_dipow( X, N )
+/*
+ * .. Scalar Arguments ..
+ */
+   const int                  N;
+   const double               X;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dipow computes the integer n-th power of a real scalar x.
+ *
+ * Arguments
+ * =========
+ *
+ * X       (local input)               const double
+ *         The real scalar x.
+ *
+ * N       (local input)               const int
+ *         The integer power to raise x to.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     r, y=HPL_rone;
+   int                        k, n;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( X == HPL_rzero ) return( HPL_rzero );
+   if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; }
+   for( k = 0; k < n; k++ ) y *= r; 
+
+   return( y );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlamch.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlamch.o
new file mode 100644
index 000000000..ecbe3bc06
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlamch.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlange.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlange.c
new file mode 100644
index 000000000..82f118b6b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlange.c
@@ -0,0 +1,184 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_dlange
+(
+   const HPL_T_NORM                 NORM,
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA
+)
+#else
+double HPL_dlange
+( NORM, M, N, A, LDA )
+   const HPL_T_NORM                 NORM;
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlange returns  the value of the one norm,  or the infinity norm,
+ * or the element of largest absolute value of a matrix A:              
+ *  
+ *    max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+ *    norm1(A),        when NORM = HPL_NORM_1,                          
+ *    normI(A),        when NORM = HPL_NORM_I,                          
+ *  
+ * where norm1 denotes the one norm of a matrix (maximum column sum) and
+ * normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+ * that max(abs(A(i,j))) is not a matrix norm.
+ *
+ * Arguments
+ * =========
+ *
+ * NORM    (local input)                 const HPL_T_NORM
+ *         On entry,  NORM  specifies  the  value to be returned by this
+ *         function as described above.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points to an  array of dimension  (LDA,N), that
+ *         contains the matrix A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     s, v0=HPL_rzero, * work = NULL;
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return( HPL_rzero );
+
+   if(      NORM == HPL_NORM_A )
+   {
+/*
+ * max( abs( A ) )
+ */
+      for( j = 0; j < N; j++ )
+      {
+         for( i = 0; i < M; i++ ) { v0 = Mmax( v0, Mabs( *A ) ); A++; }
+         A += LDA - M;
+      }
+   }
+   else if( NORM == HPL_NORM_1 )
+   {
+/*
+ * Find norm_1( A ).
+ */
+      work = (double*)malloc( (size_t)(N) * sizeof( double ) );
+      if( work == NULL )
+      { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); }
+      else
+      {
+         for( j = 0; j < N; j++ )
+         {
+            s = HPL_rzero;
+            for( i = 0; i < M; i++ ) { s += Mabs( *A ); A++; }
+            work[j] = s; A += LDA - M;
+         }
+/*
+ * Find maximum sum of columns for 1-norm
+ */
+         v0 = work[HPL_idamax( N, work, 1 )]; v0 = Mabs( v0 );
+         if( work ) free( work );
+      }
+   }
+   else if( NORM == HPL_NORM_I )
+   {
+/*
+ * Find norm_inf( A )
+ */
+      work = (double*)malloc( (size_t)(M) * sizeof( double ) );
+      if( work == NULL )
+      { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); }
+      else
+      {
+         for( i = 0; i < M; i++ ) { work[i] = HPL_rzero; }
+
+         for( j = 0; j < N; j++ )
+         {
+            for( i = 0; i < M; i++ ) { work[i] += Mabs( *A ); A++; }
+            A += LDA - M;
+         }
+/*       
+ * Find maximum sum of rows for inf-norm
+ */      
+         v0 = work[HPL_idamax( M, work, 1 )]; v0 = Mabs( v0 );
+         if( work ) free( work );
+      }
+   }
+
+   return( v0 );
+/*
+ * End of HPL_dlange
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlange.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlange.o
new file mode 100644
index 000000000..869bd9c89
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlange.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlaprnt.c
new file mode 100644
index 000000000..f29df3cd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlaprnt.c
@@ -0,0 +1,130 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dlaprnt
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        IA,
+   const int                        JA,
+   const int                        LDA,
+   const char *                     CMATNM
+)
+#else
+void HPL_dlaprnt
+( M, N, A, IA, JA, LDA, CMATNM )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        IA;
+   const int                        JA;
+   const int                        LDA;
+   const char *                     CMATNM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaprnt prints to standard error an M-by-N matrix A.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies the number of rows of A. M must be at
+ *         least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies the number of columns of A. N must be
+ *         at least zero.
+ *
+ * A       (local input)                 double *
+ *         On entry, A  points to an array of dimension (LDA,N).
+ *
+ * IA      (local input)                 const int
+ *         On entry, IA specifies the starting row index to be printed.
+ *
+ * JA      (local input)                 const int
+ *         On entry,  JA  specifies  the  starting  column index  to be
+ *         printed.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * CMATNM  (local input)                 const char *
+ *         On entry, CMATNM is the name of the matrix to be printed.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   for( j = 0; j < N; j++ )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         HPL_fprintf( stderr, "%s(%6d,%6d)=%30.18f\n", CMATNM, IA+i,
+                      JA+j, *(Mptr( A, i, j, LDA )) );
+      }
+   }
+/*
+ * End of HPL_dlaprnt
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlaprnt.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlaprnt.o
new file mode 100644
index 000000000..2fe4d970e
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlaprnt.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlatcpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlatcpy.c
new file mode 100644
index 000000000..410451c24
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlatcpy.c
@@ -0,0 +1,398 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factors
+ * #ifndef HPL_LATCPY_M_DEPTH
+ * #define    HPL_LATCPY_M_DEPTH      32
+ * #define    HPL_LATCPY_LOG2_M_DEPTH  5
+ * #endif
+ * #ifndef HPL_LATCPY_N_DEPTH
+ * #define    HPL_LATCPY_N_DEPTH       4
+ * #define    HPL_LATCPY_LOG2_N_DEPTH  2
+ * #endif
+ */
+#ifndef HPL_LATCPY_M_DEPTH
+#define    HPL_LATCPY_M_DEPTH       4
+#define    HPL_LATCPY_LOG2_M_DEPTH  2
+#endif
+#ifndef HPL_LATCPY_N_DEPTH
+#define    HPL_LATCPY_N_DEPTH       2
+#define    HPL_LATCPY_LOG2_N_DEPTH  1
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlatcpy
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dlatcpy
+( M, N, A, LDA, B, LDB )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlatcpy copies the transpose of an array A into an array B.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the number of  rows of the array B and
+ *         the number of columns of A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the number of  rows of the array A and
+ *         the number of columns of B. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,M).
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,N).
+ *
+ * B       (local output)                double *
+ *         On entry, B points to an array of dimension (LDB,N). On exit,
+ *         B is overwritten with the transpose of A.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB specifies the leading dimension of the array B.
+ *         LDB must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_LATCPY_USE_COPY
+   register int               j;
+#else
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+   const double               * A0 = A;
+   double                     * B0 = B;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+   const double               * A0 = A,              * A1 = A + 1;
+   double                     * B0 = B,              * B1 = B +     LDB;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+   const double               * A0 = A,              * A1 = A + 1,
+                              * A2 = A + 2,          * A3 = A + 3;
+   double                     * B0 = B,              * B1 = B +     LDB,
+                              * B2 = B + (LDB << 1), * B3 = B + 3 * LDB;
+#endif
+   const int                  incA = -M * LDA + (1 << HPL_LATCPY_LOG2_N_DEPTH),
+                              incB = ( (unsigned int)(LDB) <<
+                                       HPL_LATCPY_LOG2_N_DEPTH ) - M,
+                              incA0 = -M * LDA + 1, incB0 = LDB - M;
+   int                        mu, nu;
+   register int               i, j;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+#ifdef HPL_LATCPY_USE_COPY
+   for( j = 0; j < N; j++, B0 += LDB ) HPL_dcopy( M, A0+j, LDA, B0, 1 );
+#else
+   mu = (int)( ( (unsigned int)(M) >> HPL_LATCPY_LOG2_M_DEPTH ) <<
+                                      HPL_LATCPY_LOG2_M_DEPTH );
+   nu = (int)( ( (unsigned int)(N) >> HPL_LATCPY_LOG2_N_DEPTH ) <<
+                                      HPL_LATCPY_LOG2_N_DEPTH );
+
+   for( j = 0; j < nu; j += HPL_LATCPY_N_DEPTH )
+   {
+      for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH )
+      {
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 0] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA;
+         B2[ 0] = *A2; A2 += LDA; B3[ 0] = *A3; A3 += LDA;
+#endif
+
+#if ( HPL_LATCPY_M_DEPTH >  1 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 1] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA;
+         B2[ 1] = *A2; A2 += LDA; B3[ 1] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  2 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 2] = *A0; A0 += LDA; B0[ 3] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA;
+         B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA;
+         B2[ 2] = *A2; A2 += LDA; B3[ 2] = *A3; A3 += LDA;
+         B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA;
+         B2[ 3] = *A2; A2 += LDA; B3[ 3] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  4 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 4] = *A0; A0 += LDA; B0[ 5] = *A0; A0 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B0[ 7] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA;
+         B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA;
+         B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA;
+         B2[ 4] = *A2; A2 += LDA; B3[ 4] = *A3; A3 += LDA;
+         B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA;
+         B2[ 5] = *A2; A2 += LDA; B3[ 5] = *A3; A3 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA;
+         B2[ 6] = *A2; A2 += LDA; B3[ 6] = *A3; A3 += LDA;
+         B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA;
+         B2[ 7] = *A2; A2 += LDA; B3[ 7] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  8 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 8] = *A0; A0 += LDA; B0[ 9] = *A0; A0 += LDA;
+         B0[10] = *A0; A0 += LDA; B0[11] = *A0; A0 += LDA;
+         B0[12] = *A0; A0 += LDA; B0[13] = *A0; A0 += LDA;
+         B0[14] = *A0; A0 += LDA; B0[15] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA;
+         B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA;
+         B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA;
+         B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA;
+         B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA;
+         B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA;
+         B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA;
+         B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA;
+         B2[ 8] = *A2; A2 += LDA; B3[ 8] = *A3; A3 += LDA;
+         B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA;
+         B2[ 9] = *A2; A2 += LDA; B3[ 9] = *A3; A3 += LDA;
+         B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA;
+         B2[10] = *A2; A2 += LDA; B3[10] = *A3; A3 += LDA;
+         B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA;
+         B2[11] = *A2; A2 += LDA; B3[11] = *A3; A3 += LDA;
+         B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA;
+         B2[12] = *A2; A2 += LDA; B3[12] = *A3; A3 += LDA;
+         B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA;
+         B2[13] = *A2; A2 += LDA; B3[13] = *A3; A3 += LDA;
+         B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA;
+         B2[14] = *A2; A2 += LDA; B3[14] = *A3; A3 += LDA;
+         B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA;
+         B2[15] = *A2; A2 += LDA; B3[15] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH > 16 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[16] = *A0; A0 += LDA; B0[17] = *A0; A0 += LDA;
+         B0[18] = *A0; A0 += LDA; B0[19] = *A0; A0 += LDA;
+         B0[20] = *A0; A0 += LDA; B0[21] = *A0; A0 += LDA;
+         B0[22] = *A0; A0 += LDA; B0[23] = *A0; A0 += LDA;
+         B0[24] = *A0; A0 += LDA; B0[25] = *A0; A0 += LDA;
+         B0[26] = *A0; A0 += LDA; B0[27] = *A0; A0 += LDA;
+         B0[28] = *A0; A0 += LDA; B0[29] = *A0; A0 += LDA;
+         B0[30] = *A0; A0 += LDA; B0[31] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA;
+         B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA;
+         B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA;
+         B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA;
+         B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA;
+         B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA;
+         B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA;
+         B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA;
+         B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA;
+         B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA;
+         B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA;
+         B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA;
+         B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA;
+         B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA;
+         B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA;
+         B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA;
+         B2[16] = *A2; A2 += LDA; B3[16] = *A3; A3 += LDA;
+         B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA;
+         B2[17] = *A2; A2 += LDA; B3[17] = *A3; A3 += LDA;
+         B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA;
+         B2[18] = *A2; A2 += LDA; B3[18] = *A3; A3 += LDA;
+         B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA;
+         B2[19] = *A2; A2 += LDA; B3[19] = *A3; A3 += LDA;
+         B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA;
+         B2[20] = *A2; A2 += LDA; B3[20] = *A3; A3 += LDA;
+         B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA;
+         B2[21] = *A2; A2 += LDA; B3[21] = *A3; A3 += LDA;
+         B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA;
+         B2[22] = *A2; A2 += LDA; B3[22] = *A3; A3 += LDA;
+         B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA;
+         B2[23] = *A2; A2 += LDA; B3[23] = *A3; A3 += LDA;
+         B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA;
+         B2[24] = *A2; A2 += LDA; B3[24] = *A3; A3 += LDA;
+         B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA;
+         B2[25] = *A2; A2 += LDA; B3[25] = *A3; A3 += LDA;
+         B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA;
+         B2[26] = *A2; A2 += LDA; B3[26] = *A3; A3 += LDA;
+         B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA;
+         B2[27] = *A2; A2 += LDA; B3[27] = *A3; A3 += LDA;
+         B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA;
+         B2[28] = *A2; A2 += LDA; B3[28] = *A3; A3 += LDA;
+         B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA;
+         B2[29] = *A2; A2 += LDA; B3[29] = *A3; A3 += LDA;
+         B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA;
+         B2[30] = *A2; A2 += LDA; B3[30] = *A3; A3 += LDA;
+         B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA;
+         B2[31] = *A2; A2 += LDA; B3[31] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0 += HPL_LATCPY_M_DEPTH;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH;
+         B2 += HPL_LATCPY_M_DEPTH; B3 += HPL_LATCPY_M_DEPTH;
+#endif
+      }
+
+      for( i = mu; i < M; i++ )
+      {
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         *B0 = *A0; B0++; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA;
+         *B2 = *A2; B2++; A2 += LDA; *B3 = *A3; B3++; A3 += LDA;
+#endif
+      }
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+      A0 += incA; B0 += incB;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+      A0 += incA; A1 += incA; B0 += incB; B1 += incB;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+      A0 += incA; A1 += incA; A2 += incA; A3 += incA;
+      B0 += incB; B1 += incB; B2 += incB; B3 += incB;
+#endif
+   }
+
+   for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 )
+   {
+      for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH, B0 += HPL_LATCPY_M_DEPTH )
+      {
+         B0[ 0]=*A0; A0 += LDA;
+#if ( HPL_LATCPY_M_DEPTH >  1 )
+         B0[ 1]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  2 )
+         B0[ 2]=*A0; A0 += LDA; B0[ 3]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  4 )
+         B0[ 4]=*A0; A0 += LDA; B0[ 5]=*A0; A0 += LDA;
+         B0[ 6]=*A0; A0 += LDA; B0[ 7]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  8 )
+         B0[ 8]=*A0; A0 += LDA; B0[ 9]=*A0; A0 += LDA;
+         B0[10]=*A0; A0 += LDA; B0[11]=*A0; A0 += LDA;
+         B0[12]=*A0; A0 += LDA; B0[13]=*A0; A0 += LDA;
+         B0[14]=*A0; A0 += LDA; B0[15]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH > 16 )
+         B0[16]=*A0; A0 += LDA; B0[17]=*A0; A0 += LDA;
+         B0[18]=*A0; A0 += LDA; B0[19]=*A0; A0 += LDA;
+         B0[20]=*A0; A0 += LDA; B0[21]=*A0; A0 += LDA;
+         B0[22]=*A0; A0 += LDA; B0[23]=*A0; A0 += LDA;
+         B0[24]=*A0; A0 += LDA; B0[25]=*A0; A0 += LDA;
+         B0[26]=*A0; A0 += LDA; B0[27]=*A0; A0 += LDA;
+         B0[28]=*A0; A0 += LDA; B0[29]=*A0; A0 += LDA;
+         B0[30]=*A0; A0 += LDA; B0[31]=*A0; A0 += LDA;
+#endif
+      }
+
+      for( i = mu; i < M; i++, B0++, A0 += LDA ) { *B0 = *A0; }
+   }
+#endif
+/*
+ * End of HPL_dlatcpy
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlatcpy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlatcpy.o
new file mode 100644
index 000000000..5ebbb92b0
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlatcpy.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_fprintf.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_fprintf.c
new file mode 100644
index 000000000..adaf22b39
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_fprintf.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_fprintf
+(
+   FILE *                           STREAM,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_fprintf( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_fprintf is a wrapper around fprintf flushing the output stream.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[256];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   char                       * FORM;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   (void) fprintf( STREAM, "%s", cline );
+   (void) fflush( STREAM );
+/*
+ * End of HPL_fprintf
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_fprintf.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_fprintf.o
new file mode 100644
index 000000000..28a92f79f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_fprintf.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_warn.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_warn.c
new file mode 100644
index 000000000..bc40818a9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_warn.c
@@ -0,0 +1,134 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_warn
+(
+   FILE *                           STREAM,
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_warn( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_warn displays an error message.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   LINE   = va_arg( argptr, int    );
+   SRNAME = va_arg( argptr, char * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( STREAM, "%s %s:\n>>> %s <<<\n\n", "HPL ERROR in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( STREAM, "%s %d %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR on line", LINE, "of function", SRNAME, cline );
+/*
+ * End of HPL_warn
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_warn.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_warn.o
new file mode 100644
index 000000000..1464ddcb8
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_warn.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_daxpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_daxpy.c
new file mode 100644
index 000000000..72be5774b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_daxpy.c
@@ -0,0 +1,175 @@
+/*
+ * -- High Performance Computing Linpack Benchmark (HPL)
+ *    HPL - 2.3 - December 2, 2018
+ *    Antoine P. Petitet
+ *    University of Tennessee, Knoxville
+ *    Innovative Computing Laboratory
+ *    (C) Copyright 2000-2008 All Rights Reserved
+ *
+ * -- Copyright notice and Licensing terms:
+ *
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:
+ *
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.
+ *
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.
+ *
+ * -- Disclaimer:
+ *
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ---------------------------------------------------------------------
+ */
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_daxpy
+
+#ifdef STDC_HEADERS
+void HPL_daxpy
+(
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_daxpy
+( N, ALPHA, X, INCX, Y, INCY )
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_daxpy scales the vector x by alpha and adds it to y.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vectors  x  and  y. N
+ *         must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero, then the entries of the incremented array X
+ *         need not be set on input.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         On exit, the entries of the incremented array  Y  are updated
+ *         with the scaled entries of the incremented array X.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_daxpy( N, ALPHA, X, INCX, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register const double     alpha = ALPHA;
+   register double           x0, x1, x2, x3, y0, y1, y2, y3;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
+                             incX3 = 3 * INCX, incY3 = 3 * INCY,
+                             incX4 = 4 * INCX, incY4 = 4 * INCY;
+
+   if( ( N > 0 ) && ( alpha != HPL_rzero ) )
+   {
+      if( ( nu = ( N >> 2 ) << 2 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     y0 = (*Y);     x1 = X[INCX ]; y1 = Y[INCY ];
+            x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3];
+ 
+            *Y       = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1;
+            Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3;
+ 
+            X  += incX4;
+            Y  += incY4;
+ 
+         } while( X != StX );
+      }
+ 
+      for( i = N - nu; i != 0; i-- )
+      {
+         x0  = (*X);
+         y0  = (*Y);
+ 
+         *Y  = y0 + alpha * x0;
+ 
+         X  += INCX;
+         Y  += INCY;
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy );
+#endif
+/*
+ * End of HPL_daxpy
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_daxpy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_daxpy.o
new file mode 100644
index 000000000..ff89e13c3
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_daxpy.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dcopy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dcopy.c
new file mode 100644
index 000000000..a8fe24109
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dcopy.c
@@ -0,0 +1,168 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dcopy
+
+#ifdef STDC_HEADERS
+void HPL_dcopy
+(
+   const int                        N,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_dcopy
+( N, X, INCX, Y, INCY )
+   const int                        N;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dcopy copies the vector x into the vector y.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vectors  x  and  y. N
+ *         must be at least zero.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         On exit, the entries of the incremented array  Y  are updated
+ *         with the entries of the incremented array X.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dcopy( N, X, INCX, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           x0, x1, x2, x3, x4, x5, x6, x7;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
+                             incX3 = 3 * INCX, incY3 = 3 * INCY,
+                             incX4 = 4 * INCX, incY4 = 4 * INCY,
+                             incX5 = 5 * INCX, incY5 = 5 * INCY,
+                             incX6 = 6 * INCX, incY6 = 6 * INCY,
+                             incX7 = 7 * INCX, incY7 = 7 * INCY,
+                             incX8 = 8 * INCX, incY8 = 8 * INCY;
+
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+            x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+            *Y       = x0; Y[incY4] = x4; Y[INCY ] = x1; Y[incY5] = x5;
+            Y[incY2] = x2; Y[incY6] = x6; Y[incY3] = x3; Y[incY7] = x7;
+ 
+            X  += incX8;
+            Y  += incY8;
+ 
+         } while( X != StX );
+      }
+ 
+      for( i = N - nu; i != 0; i-- )
+      {
+         x0  = (*X);
+         *Y  = x0;
+ 
+         X  += INCX;
+         Y  += INCY;
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   F77dcopy( &F77N, X, &F77incx, Y, &F77incy );
+#endif
+/*
+ * End of HPL_dcopy
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dcopy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dcopy.o
new file mode 100644
index 000000000..d0bc0e6e6
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dcopy.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemm.c
new file mode 100644
index 000000000..b222e4717
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemm.c
@@ -0,0 +1,521 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dgemm
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmNN
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iail, iblj, icij, j, jal, jbj, jcj, l;
+
+   for( j = 0, jbj = 0, jcj  = 0; j < N; j++, jbj += LDB, jcj += LDC )
+   {
+      HPL_dscal( M, BETA, C+jcj, 1 );
+      for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 )
+      {
+         t0 = ALPHA * B[iblj];
+         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
+         { C[icij] += A[iail] * t0; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmNT
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iail, ibj, ibjl, icij, j, jal, jcj, l;
+
+   for( j = 0, ibj  = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
+   {
+      HPL_dscal( M, BETA, C+jcj, 1 );
+      for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB )
+      {
+         t0 = ALPHA * B[ibjl];
+         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
+         { C[icij] += A[iail] * t0; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmTN
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iai, iail, iblj, icij, j, jbj, jcj, l;
+
+   for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC )
+   {
+      for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA )
+      {
+         t0 = HPL_rzero;
+         for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 )
+         { t0 += A[iail] * B[iblj]; }
+         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
+         else                    C[icij] *= BETA;
+         C[icij] += ALPHA * t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmTT
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iali, ibj, ibjl, icij, j, jai, jcj, l;
+
+   for( j = 0, ibj = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
+   {
+      for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA )
+      {
+         t0 = HPL_rzero;
+         for( l = 0,      iali  = jai, ibjl  = ibj;
+              l < K; l++, iali += 1,   ibjl += LDB ) t0 += A[iali] * B[ibjl];
+         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
+         else                    C[icij] *= BETA;
+         C[icij] += ALPHA * t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemm0
+(
+   const enum HPL_TRANS       TRANSA,
+   const enum HPL_TRANS       TRANSB,
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
+                        BETA, C, LDC )
+   const enum HPL_TRANS       TRANSA, TRANSB;
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   int                        i, j;
+
+   if( ( M == 0 ) || ( N == 0 ) ||
+       ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) &&
+         ( BETA == HPL_rone ) ) ) return;
+
+   if( ALPHA == HPL_rzero )
+   {
+      for( j = 0; j < N; j++ )
+      {  for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; }
+      return;
+   }
+
+   if( TRANSB == HplNoTrans )
+   {
+      if( TRANSA == HplNoTrans )
+      { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+      else
+      { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+   }
+   else
+   {
+      if( TRANSA == HplNoTrans )
+      { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+      else
+      { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dgemm
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_TRANS             TRANSA,
+   const enum HPL_TRANS             TRANSB,
+   const int                        M,
+   const int                        N,
+   const int                        K,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   const double *                   B,
+   const int                        LDB,
+   const double                     BETA,
+   double *                         C,
+   const int                        LDC
+)
+#else
+void HPL_dgemm
+( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_TRANS             TRANSA;
+   const enum HPL_TRANS             TRANSB;
+   const int                        M;
+   const int                        N;
+   const int                        K;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   const double *                   B;
+   const int                        LDB;
+   const double                     BETA;
+   double *                         C;
+   const int                        LDC;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dgemm performs one of the matrix-matrix operations
+ *  
+ *     C := alpha * op( A ) * op( B ) + beta * C
+ *  
+ *  where op( X ) is one of
+ *  
+ *     op( X ) = X   or   op( X ) = X^T.
+ *  
+ * Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
+ * an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * TRANSA  (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSA  specifies the form of  op(A)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSA==HplNoTrans    : op( A ) = A,                     
+ *            TRANSA==HplTrans      : op( A ) = A^T,                   
+ *            TRANSA==HplConjTrans  : op( A ) = A^T.                   
+ *
+ * TRANSB  (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSB  specifies the form of  op(B)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSB==HplNoTrans    : op( B ) = B,                     
+ *            TRANSB==HplTrans      : op( B ) = B^T,                   
+ *            TRANSB==HplConjTrans  : op( B ) = B^T.                   
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the  number  of rows  of the  matrix
+ *         op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the number  of columns of the matrix
+ *         op(B)  and  the number of columns of the matrix  C. N must be
+ *         at least zero.
+ *
+ * K       (local input)                 const int
+ *         On entry,  K  specifies  the  number of columns of the matrix
+ *         op(A) and the number of rows of the matrix op(B).  K  must be
+ *         be at least  zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied  as  zero  then the elements of the matrices A and B
+ *         need not be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  is an array of dimension (LDA,ka),  where ka is
+ *         k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+ *         entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+ *         the array  A must contain the matrix A, otherwise the leading
+ *         k  by  m  part of the array  A  must  contain the  matrix  A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA  specifies the first dimension of A as declared
+ *         in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+ *         LDA must be at least max(1,m), otherwise LDA must be at least
+ *         max(1,k).
+ *
+ * B       (local input)                 const double *
+ *         On entry, B is an array of dimension (LDB,kb),  where  kb  is
+ *         n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+ *         entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+ *         the array  B must contain the matrix B, otherwise the leading
+ *         n  by  k  part of the array  B  must  contain  the matrix  B.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB  specifies the first dimension of B as declared
+ *         in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+ *         LDB must be at least max(1,k), otherwise LDB must be at least
+ *         max(1,n).
+ *
+ * BETA    (local input)                 const double
+ *         On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+ *         supplied  as  zero  then  the  elements of the matrix C  need
+ *         not be set on input.
+ *
+ * C       (local input/output)          double *
+ *         On entry,  C  is an array of dimension (LDC,n). Before entry,
+ *         the  leading m by n part  of  the  array  C  must contain the
+ *         matrix C,  except when beta is zero, in which case C need not
+ *         be set on entry. On exit, the array  C  is overwritten by the
+ *         m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+ *
+ * LDC     (local input)                 const int
+ *         On entry, LDC  specifies the first dimension of C as declared
+ *         in  the   calling  (sub)  program.   LDC  must  be  at  least
+ *         max(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   printf("Order %d, TransA %d, TransB %d, M %d, N %d, K %d\n", ORDER, TRANSA, TRANSB, M, N, K);
+   cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA,
+                  C, LDC );
+   }
+   else
+   {
+      HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA,
+                  C, LDC );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA, beta = BETA;
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M   = M,   F77N   = N,   F77K = K,
+                             F77lda = LDA, F77ldb = LDB, F77ldc = LDC;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77K                 K
+#define F77lda               LDA
+#define F77ldb               LDB
+#define F77ldc               LDC
+#endif
+   char                      ctransa, ctransb;
+
+   if(      TRANSA == HplNoTrans ) ctransa = 'N';
+   else if( TRANSA == HplTrans   ) ctransa = 'T';
+   else                            ctransa = 'C';
+ 
+   if(      TRANSB == HplNoTrans ) ctransb = 'N';
+   else if( TRANSB == HplTrans   ) ctransb = 'T';
+   else                            ctransb = 'C';
+
+   if( ORDER == HplColumnMajor )
+   {
+#ifdef StringSunStyle
+      F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
+      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructVal
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructPtr
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+   }
+   else
+   {
+#ifdef StringSunStyle
+      F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
+      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructVal
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructPtr
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+   }
+#endif
+/*
+ * End of HPL_dgemm
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemm.o
new file mode 100644
index 000000000..12e87044c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemm.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemv.c
new file mode 100644
index 000000000..6366c5a48
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemv.c
@@ -0,0 +1,326 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dgemv
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dgemv0
+(
+   const enum HPL_TRANS       TRANS,
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * X,
+   const int                  INCX,
+   const double               BETA,
+   double                     * Y,
+   const int                  INCY
+)
+#else
+static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
+   const enum HPL_TRANS       TRANS;
+   const int                  INCX, INCY, LDA, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * X;
+   double                     * Y;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   int                        i, iaij, ix, iy, j, jaj, jx, jy;
+   register double            t0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M == 0 ) || ( N == 0 ) ||
+       ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone  ) ) ) return;
+ 
+   if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; }
+ 
+   if( TRANS == HplNoTrans )
+   {
+      HPL_dscal( M, BETA, Y, INCY );
+      for( j = 0, jaj  = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+      {
+         t0 = ALPHA * X[jx];
+         for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY )
+         { Y[iy] += A[iaij] * t0; }
+      }
+   }
+   else
+   {
+      for( j = 0, jaj  = 0, jy  = 0; j < N; j++, jaj += LDA, jy += INCY )
+      {
+         t0 = HPL_rzero;
+         for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX )
+         { t0 += A[iaij] * X[ix]; }
+         if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0;
+         else                    Y[jy] = BETA * Y[jy] + ALPHA * t0;
+      }
+   }
+}
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dgemv
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_TRANS             TRANS,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   const double *                   X,
+   const int                        INCX,
+   const double                     BETA,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_dgemv
+( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_TRANS             TRANS;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   const double *                   X;
+   const int                        INCX;
+   const double                     BETA;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dgemv performs one of the matrix-vector operations
+ *  
+ *     y := alpha * op( A ) * x + beta * y,
+ *  
+ *  where op( X ) is one of
+ *  
+ *     op( X ) = X   or   op( X ) = X^T.
+ *  
+ * where alpha and beta are scalars, x and y are vectors and  A  is an m
+ * by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry,  TRANS  specifies the  operation to be performed as
+ *         follows:   
+ *            TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+ *            TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of  the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero then  A and X  need not be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n.  Before  entry, the leading m by n part  of the
+ *         array  A  must contain the matrix coefficients.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m).
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * BETA    (local input)                 const double
+ *         On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+ *         supplied as zero then  Y  need not be set on input.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         Before entry with BETA non-zero, the incremented array Y must
+ *         contain the vector  y.  On exit,  Y  is  overwritten  by  the
+ *         updated vector y.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+   }
+   else
+   {
+      HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ),
+                  N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA, beta = BETA;
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  ftran;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  ftran;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  ftran;
+#endif
+ 
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M    = M,   F77N    = N,
+                             F77lda  = LDA, F77incx = INCX, F77incy = INCY;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77lda               LDA
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   char                      ctran;
+
+   if( ORDER == HplColumnMajor )
+   {
+      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
+
+#ifdef StringSunStyle
+      F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftran = HPL_C2F_CHAR( ctran );
+      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructVal
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructPtr
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+   }
+   else
+   {
+      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
+#ifdef StringSunStyle
+      F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftran = HPL_C2F_CHAR( ctran );
+      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructVal
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructPtr
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+   }
+
+#endif
+/*
+ * End of HPL_dgemv
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemv.o
new file mode 100644
index 000000000..a9b801898
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemv.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dger.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dger.c
new file mode 100644
index 000000000..5ea702778
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dger.c
@@ -0,0 +1,195 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dger
+
+#ifdef STDC_HEADERS
+void HPL_dger
+(
+   const enum HPL_ORDER             ORDER,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY,
+   double *                         A,
+   const int                        LDA
+)
+#else
+void HPL_dger
+( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+   const enum HPL_ORDER             ORDER;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+   double *                         A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dger performs the rank 1 operation
+ *  
+ *     A := alpha * x * y^T + A,
+ *  
+ * where alpha is a scalar,  x is an m-element vector, y is an n-element
+ * vector and A is an m by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of  the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero then  X and Y  need not be set on input.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input)                 double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n.  Before  entry, the leading m by n part  of the
+ *         array  A  must contain the matrix coefficients. On exit, A is
+ *         overwritten by the updated matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dger( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           t0;
+   int                       i, iaij, ix, iy, j, jaj, jx, jy;
+
+   if( ( M == 0 ) || ( N == 0 ) || ( ALPHA == HPL_rzero ) ) return;
+ 
+   if( ORDER == HplColumnMajor )
+   {
+      for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY )
+      {
+         t0 = ALPHA * Y[jy];
+         for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX )
+         { A[iaij] += X[ix] * t0; }
+      }
+   }
+   else
+   {
+      for( j = 0, jaj = 0, jx = 0; j < M; j++, jaj += LDA, jx += INCX )
+      {
+         t0 = ALPHA * X[jx];
+         for( i = 0, iaij = jaj, iy = 0; i < N; i++, iaij += 1, iy += INCY )
+         { A[iaij] += Y[iy] * t0; }
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M    = M,   F77N    = N,
+                             F77lda  = LDA, F77incx = INCX, F77incy = INCY;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77lda               LDA
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+
+   if( ORDER == HplColumnMajor )
+   {  F77dger( &F77M, &F77N, &alpha, X, &F77incx, Y, &F77incy, A, &F77lda ); }
+   else
+   {  F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); }
+#endif
+/*
+ * End of HPL_dger
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dger.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dger.o
new file mode 100644
index 000000000..255cfa4b2
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dger.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dscal.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dscal.c
new file mode 100644
index 000000000..7e041991f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dscal.c
@@ -0,0 +1,179 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dscal
+
+#ifdef STDC_HEADERS
+void HPL_dscal
+(
+   const int                        N,
+   const double                     ALPHA,
+   double *                         X,
+   const int                        INCX
+)
+#else
+void HPL_dscal
+( N, ALPHA, X, INCX )
+   const int                        N;
+   const double                     ALPHA;
+   double *                         X;
+   const int                        INCX;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dscal scales the vector x by alpha.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero, then the entries of the incremented array X
+ *         need not be set on input.
+ *
+ * X       (local input/output)          double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *         On exit, the entries of the incremented array  X  are  scaled
+ *         by the scalar alpha.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dscal( N, ALPHA, X, INCX );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           x0, x1, x2, x3, x4, x5, x6, x7;
+   register const double     alpha = ALPHA;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incX3 = 3 * INCX,
+                             incX4 = 4 * INCX, incX5 = 5 * INCX,
+                             incX6 = 6 * INCX, incX7 = 7 * INCX,
+                             incX8 = 8 * INCX;
+
+   if( ( N > 0 ) && ( alpha != HPL_rone ) )
+   {
+      if( alpha == HPL_rzero )
+      {
+         if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+         {
+            StX = (double *)X + nu * INCX;
+ 
+            do
+            {
+               (*X)     = HPL_rzero; X[incX4] = HPL_rzero;
+               X[INCX ] = HPL_rzero; X[incX5] = HPL_rzero;
+               X[incX2] = HPL_rzero; X[incX6] = HPL_rzero;
+               X[incX3] = HPL_rzero; X[incX7] = HPL_rzero; X += incX8;
+
+            } while( X != StX );
+         }
+ 
+         for( i = N - nu; i != 0; i-- ) { *X = HPL_rzero; X += INCX; }
+      }
+      else
+      {
+         if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+         {
+            StX = X + nu * INCX;
+ 
+            do
+            {
+               x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+               x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+               x0 *= alpha;   x4 *= alpha;   x1 *= alpha;   x5 *= alpha;
+               x2 *= alpha;   x6 *= alpha;   x3 *= alpha;   x7 *= alpha;
+ 
+               (*X)     = x0; X[incX4] = x4; X[INCX ] = x1; X[incX5] = x5;
+               X[incX2] = x2; X[incX6] = x6; X[incX3] = x3; X[incX7] = x7;
+ 
+               X  += incX8;
+ 
+            } while( X != StX );
+         }
+ 
+         for( i = N - nu; i != 0; i-- )
+         { x0 = (*X); x0 *= alpha; *X = x0; X += INCX; }
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#endif
+
+   F77dscal( &F77N, &alpha, X, &F77incx );
+#endif
+/*
+ * End of HPL_dscal
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dscal.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dscal.o
new file mode 100644
index 000000000..4cb4cd8c9
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dscal.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsm.c
new file mode 100644
index 000000000..a336a7d29
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsm.c
@@ -0,0 +1,977 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dtrsm
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, jak  = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 )
+      {
+         B[ibkj] /= A[k+jak];
+         for( i = k+1,    iaik  = k+1+jak, ibij  = k+1+jbj;
+              i < M; i++, iaik +=1,        ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, jak  = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 )
+      {
+         for( i = k+1,    iaik  = k+1+jak, ibij  = k+1+jbj;
+              i < M; i++, iaik +=1,        ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = M-1,     jai  = (M-1)*LDA, ibij  = M-1+jbj;
+           i >= 0; i--, jai -= LDA,       ibij -= 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = i+1,    iaki  = i+1+jai, ibkj  = i+1+jbj;
+              k < M; k++, iaki += 1,       ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         t0 /= A[i+jai];
+         B[ibij] = t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = M-1,     jai  = (M-1)*LDA, ibij  = M-1+jbj;
+           i >= 0; i--, jai -= LDA,       ibij -= 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = i+1,    iaki  = i+1+jai, ibkj  = i+1+jbj;
+              k < M; k++, iaki += 1,       ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         B[ibij] = t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = M-1,     jak  = (M-1)*LDA, ibkj  = M-1+jbj;
+           k >= 0; k--, jak -= LDA,       ibkj -= 1 )
+      {
+         B[ibkj] /= A[k+jak];
+         for( i = 0,      iaik  = jak, ibij  = jbj;
+              i < k; i++, iaik += 1,   ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = M-1,     jak  = (M-1)*LDA, ibkj  = M-1+jbj;
+           k >= 0; k--, jak -= LDA,       ibkj -= 1 )
+      {
+         for( i = 0,      iaik  = jak, ibij  = jbj;
+              i < k; i++, iaik += 1,   ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+   register double            t0;
+
+   for( j = 0, jbj  = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, jai  = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         t0 /= A[i+jai];
+         B[ibij] = t0;
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj  = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, jai  = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         B[ibij] = t0;
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = N-1,      jaj  = (N-1)*LDA, jbj  = (N-1)*LDB;
+        j >= 0;  j--, jaj -= LDA,       jbj -= LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = j+1,    iakj  = j+1+jaj, jbk  = (j+1)*LDB;
+           k < N; k++, iakj += 1,       jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = N-1,      jaj  = (N-1)*LDA, jbj  = (N-1)*LDB;
+        j >= 0;  j--, jaj -= LDA,       jbj -= LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = j+1,    iakj  = j+1+jaj, jbk  = (j+1)*LDB;
+           k < N; k++, iakj += 1,       jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB )
+   {
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; }
+      for( j = k+1,    iajk  = (k+1)+jak, jbj  = (k+1)*LDB;
+           j < N; j++, iajk += 1,         jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB )
+   {
+      for( j = k+1,    iajk  = (k+1)+jak, jbj  = (k+1)*LDB;
+           j < N; j++, iajk += 1,         jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = N-1,     jak  = (N-1)*LDA, jbk  = (N-1)*LDB;
+        k >= 0; k--, jak -= LDA,       jbk -= LDB )
+   {
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; }
+      for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = N-1,     jak  = (N-1)*LDA, jbk  = (N-1)*LDB;
+        k >= 0; k--, jak -= LDA,       jbk -= LDB )
+   {
+      for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsm0
+(
+   const enum HPL_SIDE        SIDE,
+   const enum HPL_UPLO        UPLO,
+   const enum HPL_TRANS       TRANS,
+   const enum HPL_DIAG        DIAG,
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB )
+   const enum HPL_SIDE        SIDE;
+   const enum HPL_UPLO        UPLO;
+   const enum HPL_TRANS       TRANS;
+   const enum HPL_DIAG        DIAG;
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{ 
+   int                        i, j;
+
+   if( ( M == 0 ) || ( N == 0 ) ) return;
+ 
+   if( ALPHA == HPL_rzero )
+   {
+      for( j = 0; j < N; j++ )
+      {  for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; }
+      return;
+   }
+
+   if( SIDE == HplLeft )
+   {
+      if( UPLO == HplUpper )
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+      else
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+   }
+   else
+   {
+      if( UPLO == HplUpper )
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+      else
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dtrsm
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_SIDE              SIDE,
+   const enum HPL_UPLO              UPLO,
+   const enum HPL_TRANS             TRANS,
+   const enum HPL_DIAG              DIAG,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dtrsm
+( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_SIDE              SIDE;
+   const enum HPL_UPLO              UPLO;
+   const enum HPL_TRANS             TRANS;
+   const enum HPL_DIAG              DIAG;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dtrsm solves one of the matrix equations
+ *  
+ *    op( A ) * X = alpha * B,   or  X * op( A ) = alpha * B,
+ *  
+ * where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+ * non-unit, upper or lower triangular matrix and op(A) is one of
+ *  
+ *    op( A ) = A   or   op( A ) = A^T.
+ *  
+ * The matrix X is overwritten on B.
+ *  
+ * No test for  singularity  or  near-singularity  is included  in  this
+ * routine. Such tests must be performed before calling this routine.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * SIDE    (local input)                 const enum HPL_SIDE
+ *         On entry, SIDE  specifies  whether  op(A) appears on the left
+ *         or right of X as follows:
+ *            SIDE==HplLeft    op( A ) * X = alpha * B,
+ *            SIDE==HplRight   X * op( A ) = alpha * B.
+ *
+ * UPLO    (local input)                 const enum HPL_UPLO
+ *         On  entry,   UPLO   specifies  whether  the  upper  or  lower
+ *         triangular  part  of the array  A  is to be referenced.  When
+ *         UPLO==HplUpper, only  the upper triangular part of A is to be
+ *         referenced, otherwise only the lower triangular part of A is 
+ *         to be referenced. 
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSA  specifies the form of  op(A)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSA==HplNoTrans    : op( A ) = A,                     
+ *            TRANSA==HplTrans      : op( A ) = A^T,                   
+ *            TRANSA==HplConjTrans  : op( A ) = A^T.                   
+ *
+ * DIAG    (local input)                 const enum HPL_DIAG
+ *         On entry,  DIAG  specifies  whether  A  is unit triangular or
+ *         not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+ *         and otherwise, A is not assumed to be unit triangular.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of the  matrix B.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix B.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied  as  zero then the elements of the matrix B need not
+ *         be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+ *         otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+ *         k by k upper triangular  part of the array A must contain the
+ *         upper triangular  matrix and the  strictly  lower  triangular
+ *         part of A is not referenced.  When  UPLO==HplLower on  entry,
+ *         the  leading k by k lower triangular part of the array A must
+ *         contain the lower triangular matrix  and  the  strictly upper
+ *         triangular part of A is not referenced.
+ *          
+ *         Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+ *         not referenced  either,  but are assumed to be unity.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+ *
+ * B       (local input/output)          double *
+ *         On entry,  B  points  to an array of size equal to or greater
+ *         than LDB * n.  Before entry, the leading  m by n  part of the
+ *         array B must contain the matrix  B, except when beta is zero,
+ *         in which case B need not be set on entry.  On exit, the array
+ *         B is overwritten by the m by n solution matrix.
+ *
+ * LDB     (local input)                 const int
+ *         On entry,  LDB  specifies  the  leading  dimension  of  B  as
+ *         declared  in  the  calling  (sub) program.  LDB  must  be  at
+ *         least MAX(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB );
+   }
+   else
+   {
+      HPL_dtrsm0( ( SIDE == HplRight ? HplLeft  : HplRight ),
+                  ( UPLO == HplLower ? HplUpper : HplLower ),
+                  TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef StringSunStyle
+#if defined( HPL_USE_F77_INTEGER_DEF )
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M   = M,   F77N   = N,
+                             F77lda = LDA, F77ldb = LDB;
+#else
+#define  F77M                M
+#define  F77N                N
+#define  F77lda              LDA
+#define  F77ldb              LDB
+#endif
+   char                      cside, cuplo, ctran, cdiag;
+
+   if(      TRANS == HplNoTrans ) ctran = 'N';
+   else if( TRANS == HplTrans   ) ctran = 'T';
+   else                           ctran = 'C';
+   cdiag = ( DIAG == HplUnit  ? 'U' : 'N' );
+
+   if( ORDER == HplColumnMajor )
+   {
+      cside = ( SIDE == HplRight ? 'R' : 'L' );
+      cuplo = ( UPLO == HplLower ? 'L' : 'U' );
+#ifdef StringSunStyle
+      F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo );
+      ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructVal
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructPtr
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+   }
+   else
+   {
+      cside = ( SIDE == HplRight ? 'L' : 'R' );
+      cuplo = ( UPLO == HplLower ? 'U' : 'L' );
+#ifdef StringSunStyle
+      F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo );
+      ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructVal
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructPtr
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+   }
+#endif
+/*
+ * End of HPL_dtrsm
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsm.o
new file mode 100644
index 000000000..339a5635f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsm.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsv.c
new file mode 100644
index 000000000..99e84f073
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsv.c
@@ -0,0 +1,520 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dtrsv
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLNN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLNN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx  = 0; j < N; j++, jaj += ldap1, jx += INCX )
+   {
+      X[jx] /= A[jaj]; t0 = X[jx];
+      for( i = j+1,    iaij  = jaj+1, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLNU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLNU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = jaj+1, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLTN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLTN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*(ldap1), jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= ldap1,         jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = 1+jaj, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { t0 -= A[iaij] * X[ix]; }
+      t0 /= A[jaj]; X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLTU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLTU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*(ldap1), jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= ldap1,         jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = 1+jaj, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { t0 -= A[iaij] * X[ix]; }
+      X[jx] = t0;
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUNN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUNN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*LDA, jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= LDA,       jx -= INCX )
+   {
+      X[jx] /= A[j+jaj]; t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUNU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUNU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*LDA, jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= LDA,       jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUTN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUTN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = 0, jaj = 0,jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { t0 -= A[iaij] * X[ix]; }
+      t0 /= A[iaij]; X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUTU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUTU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { t0 -= A[iaij] * X[ix]; }
+      X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsv0
+(
+   const enum HPL_UPLO        UPLO,
+   const enum HPL_TRANS       TRANS,
+   const enum HPL_DIAG        DIAG,
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+) 
+#else
+static void HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+   const enum HPL_UPLO        UPLO;
+   const enum HPL_TRANS       TRANS;
+   const enum HPL_DIAG        DIAG;
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   if( N == 0 ) return;
+ 
+   if( UPLO == HplUpper )
+   {
+      if( TRANS == HplNoTrans )
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvUNN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvUNU( N,    A, LDA, X, INCX ); }
+      }
+      else
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvUTN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvUTU( N,    A, LDA, X, INCX ); }
+      }
+   }
+   else
+   {
+      if( TRANS == HplNoTrans )
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvLNN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvLNU( N,    A, LDA, X, INCX ); }
+      }
+      else
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvLTN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvLTU( N,    A, LDA, X, INCX ); }
+      }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dtrsv
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_UPLO              UPLO,
+   const enum HPL_TRANS             TRANS,
+   const enum HPL_DIAG              DIAG,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         X,
+   const int                        INCX
+)
+#else
+void HPL_dtrsv
+( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_UPLO              UPLO;
+   const enum HPL_TRANS             TRANS;
+   const enum HPL_DIAG              DIAG;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         X;
+   const int                        INCX;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dtrsv solves one of the systems of equations
+ *  
+ *     A * x = b,   or   A^T * x = b,
+ *  
+ * where b and x are n-element vectors and  A  is an n by n non-unit, or
+ * unit, upper or lower triangular matrix.
+ *  
+ * No test for  singularity  or  near-singularity  is included  in  this
+ * routine. Such tests must be performed before calling this routine.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * UPLO    (local input)                 const enum HPL_UPLO
+ *         On  entry,   UPLO   specifies  whether  the  upper  or  lower
+ *         triangular  part  of the array  A  is to be referenced.  When
+ *         UPLO==HplUpper, only  the upper triangular part of A is to be
+ *         referenced, otherwise only the lower triangular part of A is 
+ *         to be referenced. 
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry,  TRANS  specifies  the equations  to  be  solved as
+ *         follows:
+ *            TRANS==HplNoTrans     A   * x = b,
+ *            TRANS==HplTrans       A^T * x = b.
+ *
+ * DIAG    (local input)                 const enum HPL_DIAG
+ *         On entry,  DIAG  specifies  whether  A  is unit triangular or
+ *         not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+ *         and otherwise, A is not assumed to be unit triangular.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the order of the matrix A. N must be at
+ *         least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+ *         n by n upper triangular  part of the array A must contain the
+ *         upper triangular  matrix and the  strictly  lower  triangular
+ *         part of A is not referenced.  When  UPLO==HplLower  on entry,
+ *         the  leading n by n lower triangular part of the array A must
+ *         contain the lower triangular matrix  and  the  strictly upper
+ *         triangular part of A is not referenced.
+ *          
+ *         Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+ *         not referenced  either,  but are assumed to be unity.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,n).
+ *
+ * X       (local input/output)          double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *         Before entry,  the  incremented array  X  must contain  the n
+ *         element right-hand side vector b. On exit,  X  is overwritten
+ *         with the solution vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dtrsv( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX );
+   }
+   else
+   {
+      HPL_dtrsv0( ( UPLO  == HplUpper   ? HplLower : HplUpper   ),
+                  ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ),
+                  DIAG, N, A, LDA, X, INCX );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+ 
+#ifdef HPL_USE_F77_INTEGER_DEF 
+   const F77_INTEGER         F77N = N, F77lda = LDA, F77incx = INCX;
+#else
+#define F77N              N
+#define F77lda            LDA
+#define F77incx           INCX
+#endif
+   char                      cuplo, ctran, cdiag;
+
+   if( ORDER == HplColumnMajor )
+   {
+      cuplo = ( UPLO  == HplUpper   ? 'U' : 'L' );
+      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
+   }
+   else
+   {
+      cuplo = ( UPLO  == HplUpper   ? 'L' : 'U' );
+      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
+   }
+   cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' );
+
+#ifdef StringSunStyle
+   F77dtrsv( &cuplo, &ctran, &cdiag, &F77N, A, &F77lda, X, &F77incx,
+             IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+   ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+   fuplo = HPL_C2F_CHAR( cuplo );
+   F77dtrsv( fuplo,  ftran,  fdiag,  &F77N, A, &F77lda, X, &F77incx );
+#endif
+#ifdef StringStructVal
+   fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran;
+   fdiag.len = 1; fdiag.cp = &cdiag;
+   F77dtrsv( fuplo,  ftran,  fdiag,  &F77N, A, &F77lda, X, &F77incx );
+#endif
+#ifdef StringStructPtr
+   fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran;
+   fdiag.len = 1; fdiag.cp = &cdiag;
+   F77dtrsv( &fuplo, &ftran, &fdiag, &F77N, A, &F77lda, X, &F77incx );
+#endif
+
+#endif
+/*
+ * End of HPL_dtrsv
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsv.o
new file mode 100644
index 000000000..2930120c9
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsv.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_idamax.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_idamax.c
new file mode 100644
index 000000000..5ceabdf25
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_idamax.c
@@ -0,0 +1,167 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_idamax
+
+#ifdef STDC_HEADERS
+int HPL_idamax
+(
+   const int                        N,
+   const double *                   X,
+   const int                        INCX
+)
+#else
+int HPL_idamax
+( N, X, INCX )
+   const int                        N;
+   const double *                   X;
+   const int                        INCX;
+#endif 
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_idamax returns  the index in an n-vector  x  of the first element
+ * having maximum absolute value.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   return( (int)(cblas_idamax( N, X, INCX )) );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           absxi, smax = HPL_rzero, x0, x1, x2, x3,
+                             x4, x5, x6, x7;
+   const double              * StX;
+   register int              imax = 0, i = 0, j;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incX3 = 3 * INCX,
+                             incX4 = 4 * INCX, incX5 = 5 * INCX,
+                             incX6 = 6 * INCX, incX7 = 7 * INCX,
+                             incX8 = 8 * INCX;
+
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+            x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+            absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x1 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x2 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x3 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x4 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x5 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x6 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x7 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+ 
+            X    += incX8;
+ 
+         } while( X != StX );
+      }
+ 
+      for( j = N - nu; j != 0; j-- )
+      {
+         x0    = (*X);
+         absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; }
+         i    += 1;
+         X    += INCX;
+      }
+   }
+   return( imax );
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#endif
+   int                       imax = 0;
+
+   if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1;
+   return( imax );
+#endif
+/*
+ * End of HPL_idamax
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_idamax.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_idamax.o
new file mode 100644
index 000000000..b765e7be6
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_idamax.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1rinM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1rinM.c
new file mode 100644
index 000000000..dd03b79b1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1rinM.c
@@ -0,0 +1,224 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+ 
+#ifdef STDC_HEADERS
+int HPL_binit_1rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_1rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_1rinM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_1rinM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, prev,
+                              rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process,  then  send message to its two
+ * next neighbors. Otherwise, probe for message. If the message is here,
+ * then receive it,   and  if I am not the last process of the ring,  or
+ * just after the root process, then forward it to the next.  Otherwise,
+ * inform the caller that the panel has still not been received.
+ */
+   rank = PANEL->grid->mycol; comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;        msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( next,
+                          size ), msgid, comm );
+      }
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+      if( ( size > 2 ) && 
+          ( MModSub1( prev, size ) == root ) ) partner = root;
+      else                                     partner = prev;
+
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) &&
+                ( prev != root ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+} 
+
+#ifdef STDC_HEADERS
+int HPL_bwait_1rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_1rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1rinM.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1rinM.o
new file mode 100644
index 000000000..6753a83a3
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1rinM.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1ring.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1ring.c
new file mode 100644
index 000000000..dd5eb2d12
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1ring.c
@@ -0,0 +1,216 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_1ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_1ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+
+#else
+
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_1ring
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_1ring( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, prev, rank, root,
+                              size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process, start spreading the panel.  If
+ * I am not the root process, probe for message. If the message is here,
+ * then receive it, and  if I am not the last process of the ring, then
+ * forward it to the next.  Otherwise, inform the caller that the panel
+ * has still not been received.
+ */
+   rank = PANEL->grid->mycol; comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;        msgid = PANEL->msgid;
+
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( rank,
+                       size ), msgid, comm );
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+
+      ierr = MPI_Iprobe( prev, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, prev, msgid,
+                             comm, &PANEL->status[0] );
+            next = MModAdd1( rank, size );
+            if( ( ierr == MPI_SUCCESS ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next,
+                                msgid, comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */  
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_1ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_1ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers 
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1ring.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1ring.o
new file mode 100644
index 000000000..5ce4009a8
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1ring.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2rinM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2rinM.c
new file mode 100644
index 000000000..56581ea0d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2rinM.c
@@ -0,0 +1,236 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_2rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_2rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_2rinM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_2rinM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, prev,
+                              rank, roo2, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase: root process send to its two right neighbors and mid-pro-
+ * cess. If I am not the root process, probe for message. If the message
+ * is there, then receive it. If I am not the last process of both rings
+ * then forward it to the next.  Otherwise,  inform  the caller that the
+ * panel has still not been received.
+ */
+   rank = PANEL->grid->mycol;           comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;                  msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );       roo2  = ( ( size + 1 ) >> 1 );
+   roo2 = MModAdd(  root, roo2, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         if( MModAdd1( next, size ) != roo2 )
+         {
+            ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE,
+                             MModAdd1( next, size ), msgid, comm );
+         }
+
+         if( ierr == MPI_SUCCESS )
+         {
+            ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid,
+                             comm );
+         }
+      }
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+      if( ( prev == root ) || ( rank == roo2 ) ||
+          ( MModSub1( prev,  size )  == root ) ) partner = root;
+      else                                       partner = prev;
+ 
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) && ( prev != root ) &&
+                ( next != roo2        ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+} 
+
+#ifdef STDC_HEADERS
+int HPL_bwait_2rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_2rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2rinM.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2rinM.o
new file mode 100644
index 000000000..9c738a796
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2rinM.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2ring.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2ring.c
new file mode 100644
index 000000000..f0e6e2647
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2ring.c
@@ -0,0 +1,224 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_2ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_2ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+ 
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_2ring
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_2ring( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, rank,
+                              roo2, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase: root process  send to its right neighbor and mid-process.
+ * If I am not the root process,  probe for message.   If the message is
+ * there,  then receive it,  and  if I am not the last process  of  both
+ * rings, then forward it to the next. Otherwise, inform the caller that
+ * the panel has still not been received.
+ */
+   rank = PANEL->grid->mycol;           comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;                  msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );       roo2  = ( ( size + 1 ) >> 1 );
+   roo2 = MModAdd(  root, roo2, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid,
+                          comm );
+      }
+   }
+   else
+   {
+      partner = MModSub1( rank, size );
+      if( ( partner == root ) || ( rank == roo2 ) ) partner = root;
+ 
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) &&
+                ( next != roo2 ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_2ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_2ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2ring.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2ring.o
new file mode 100644
index 000000000..1de2094e7
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2ring.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bcast.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bcast.c
new file mode 100644
index 000000000..100161152
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bcast.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_bcast
+(
+   HPL_T_panel *                    PANEL,
+   int *                            IFLAG
+)
+#else
+int HPL_bcast
+( PANEL, IFLAG )
+   HPL_T_panel *                    PANEL;
+   int *                            IFLAG;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_bcast broadcasts  the  current  panel.  Successful  completion is
+ * indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to
+ * HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was
+ * not completed, in which case this function should be called again.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * IFLAG   (output)                      int *
+ *         On exit,  IFLAG  indicates  whether  or not the broadcast has
+ *         occured.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_bcast_1rinM( PANEL, IFLAG ); break;
+      case HPL_1RING   : ierr = HPL_bcast_1ring( PANEL, IFLAG ); break;
+      case HPL_2RING_M : ierr = HPL_bcast_2rinM( PANEL, IFLAG ); break;
+      case HPL_2RING   : ierr = HPL_bcast_2ring( PANEL, IFLAG ); break;
+      case HPL_BLONG_M : ierr = HPL_bcast_blonM( PANEL, IFLAG ); break;
+      case HPL_BLONG   : ierr = HPL_bcast_blong( PANEL, IFLAG ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_bcast
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bcast.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bcast.o
new file mode 100644
index 000000000..5ce7d8a27
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bcast.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_binit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_binit.c
new file mode 100644
index 000000000..3daf72b7d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_binit.c
@@ -0,0 +1,108 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_binit
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_binit
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_binit initializes  a  row  broadcast.  Successful  completion  is
+ * indicated by the returned error code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS );
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_binit_1rinM( PANEL ); break;
+      case HPL_1RING   : ierr = HPL_binit_1ring( PANEL ); break;
+      case HPL_2RING_M : ierr = HPL_binit_2rinM( PANEL ); break;
+      case HPL_2RING   : ierr = HPL_binit_2ring( PANEL ); break;
+      case HPL_BLONG_M : ierr = HPL_binit_blonM( PANEL ); break;
+      case HPL_BLONG   : ierr = HPL_binit_blong( PANEL ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_binit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_binit.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_binit.o
new file mode 100644
index 000000000..c9f9da1e6
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_binit.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blonM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blonM.c
new file mode 100644
index 000000000..5fa221937
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blonM.c
@@ -0,0 +1,445 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+int HPL_binit_blonM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_blonM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+   return( HPL_SUCCESS );
+}
+ 
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF_S1        PANEL->buffers[I_SEND]
+#define   _M_COUNT_S1       PANEL->counts[I_SEND]
+#define   _M_TYPE_S1        PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_S2        PANEL->buffers[I_SEND]
+#define   _M_COUNT_S2       PANEL->counts[I_SEND]
+#define   _M_TYPE_S2        PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_R1        PANEL->buffers[I_RECV]
+#define   _M_COUNT_R1       PANEL->counts[I_RECV]
+#define   _M_TYPE_R1        PANEL->dtypes[I_RECV]
+
+#define   _M_BUFF_R2        PANEL->buffers[I_RECV]
+#define   _M_COUNT_R2       PANEL->counts[I_RECV]
+#define   _M_TYPE_R2        PANEL->dtypes[I_RECV]
+ 
+#define   _M_ROLL_BUFF_S    PANEL->buffers[I_SEND]
+#define   _M_ROLL_COUNT_S   PANEL->counts[I_SEND]
+#define   _M_ROLL_TYPE_S    PANEL->dtypes[I_SEND]
+
+#define   _M_ROLL_BUFF_R    PANEL->buffers[I_RECV]
+#define   _M_ROLL_COUNT_R   PANEL->counts[I_RECV]
+#define   _M_ROLL_TYPE_R    PANEL->dtypes[I_RECV]
+
+#else
+
+#define   _M_BUFF_S1        (void *)(PANEL->L2)
+#define   _M_COUNT_S1       PANEL->len
+#define   _M_TYPE_S1        MPI_DOUBLE
+
+#define   _M_BUFF_S2        (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_S2       lbuf
+#define   _M_TYPE_S2        MPI_DOUBLE
+ 
+#define   _M_BUFF_R1        (void *)(PANEL->L2)
+#define   _M_COUNT_R1       PANEL->len
+#define   _M_TYPE_R1        MPI_DOUBLE
+ 
+#define   _M_BUFF_R2        (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_R2       lbuf
+#define   _M_TYPE_R2        MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_S    (void *)(PANEL->L2 + ibufS)
+#define   _M_ROLL_COUNT_S   lbufS
+#define   _M_ROLL_TYPE_S    MPI_DOUBLE
+#define   _M_ROLL_BUFF_R    (void *)(PANEL->L2 + ibufR)
+#define   _M_ROLL_COUNT_R   lbufR
+#define   _M_ROLL_TYPE_R    MPI_DOUBLE
+
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_blonM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_blonM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        COUNT, count, go=1, ierr=MPI_SUCCESS, ibuf,
+                              ibufR, ibufS, dummy=0, indx, ip2=1, k, l,
+                              lbuf, lbufR, lbufS, mask=1, msgid, mydist,
+                              mydist2, next, npm1, npm2, partner, prev,
+                              rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  root process  sends to its right neighbor,  then spread
+ * the panel on the other npcol - 2 processes.  If  I  am  not the root 
+ * process, probe for message received.  If the message is there,  then
+ * receive it. If I am just after the root process, return.  Otherwise,
+ * keep spreading on those npcol - 2 processes.  Otherwise,  inform the
+ * caller that the panel has still not been received.
+ */
+   comm = PANEL->grid->row_comm; rank  = PANEL->grid->mycol;
+   root = PANEL->pcol;           msgid = PANEL->msgid;
+   prev = MModSub1( rank, size );
+ 
+   if( rank == root )
+   {
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ierr == MPI_SUCCESS )
+         ierr =   HPL_packL( PANEL, 0, PANEL->len, I_SEND );
+#endif
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Ssend( _M_BUFF_S1, _M_COUNT_S1, _M_TYPE_S1,
+                             MModAdd1( rank, size ), msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+   else if( prev == root )
+   {
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ *
+ *    ierr = MPI_Iprobe( root, msgid, comm, &go, &PANEL->status[0] );
+ */
+      if( ierr == MPI_SUCCESS )
+      {                                  /* if panel is here, proceed */
+         if( go != 0 )
+         {
+#ifdef HPL_USE_MPI_DATATYPE
+            ierr =      HPL_packL( PANEL, 0, PANEL->len, I_RECV );
+#endif
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Recv( _M_BUFF_R1, _M_COUNT_R1, _M_TYPE_R1,
+                                  root, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+      }
+   }
+/*
+ * if I am just after the root, exit now. The message receive  completed
+ * successfully, this guy is done. If there are only 2 processes in each 
+ * row of processes, we are done as well.
+ */
+   if( ( prev == root ) || ( size == 2 ) )
+   {
+      *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+      return( *IFLAG );
+   }
+/*
+ * Otherwise, proceed with broadcast -  Spread  the panel across process
+ * columns
+ */
+   npm2 = ( npm1 = size - 1 ) - 1; COUNT = PANEL->len;
+
+   k = npm2; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   if( rank == root ) mydist2 = ( mydist = 0 );
+   else   mydist2 = ( mydist  = MModSub( rank, root, size ) - 1 );
+
+   indx = ip2; count = COUNT / npm1; count = Mmax( count, 1 );
+ 
+   do
+   {
+      mask ^= ip2;
+
+      if( ( mydist & mask ) == 0 )
+      {
+         lbuf = COUNT - ( ibuf = indx * count );
+         if( indx + ip2 < npm1 ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); }
+
+         partner = mydist ^ ip2;
+
+         if( ( mydist & ip2 ) != 0 )
+         {
+            partner = MModAdd( root, partner, size );
+            if( partner != root ) partner = MModAdd1( partner, size );  
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ */
+#if 0
+            ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+  
+            if( ierr == MPI_SUCCESS )
+            {        /* if panel is not here, return and keep testing */
+               if( go == 0 )
+               { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+            }
+#endif
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_RECV );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( _M_BUFF_R2, _M_COUNT_R2, _M_TYPE_R2,
+                                     partner, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                   msgid, comm, &PANEL->status[0] );
+            }
+         }
+         else if( partner < npm1 )
+         {
+            partner = MModAdd( root, partner, size );
+            if( partner != root ) partner = MModAdd1( partner, size );  
+
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_SEND );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( _M_BUFF_S2, _M_COUNT_S2, _M_TYPE_S2,
+                                      partner, msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE,
+                                      partner, msgid, comm );
+            }
+         }
+      }
+ 
+      if( mydist2 < ip2 ) {  ip2 >>= 1; indx -= ip2; }
+      else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; }
+
+   } while( ip2 > 0 );
+/*
+ * Roll the pieces
+ */
+   prev = MModSub1( rank, size );
+   if( MModSub1( prev, size ) == root ) prev = root;
+   next = MModAdd1( rank, size );
+   if( rank == root ) next = MModAdd1( next, size );
+
+   for( k = 0; k < npm2; k++ )
+   {
+      l = ( k >> 1 );
+/*
+ * Who is sending to who and how much
+ */
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         ibufS = ( indx = MModAdd( mydist, l,   npm1 ) ) * count;
+         lbufS = ( indx == npm2 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModSub( mydist, l+1, npm1 ) ) * count;
+         lbufR = ( indx == npm2 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = prev;
+      }
+      else
+      {
+         ibufS = ( indx = MModSub( mydist, l,   npm1 ) ) * count;
+         lbufS = ( indx == npm2 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModAdd( mydist, l+1, npm1 ) ) * count;
+         lbufR = ( indx == npm2 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = next;
+      }
+/*
+ * Exchange the messages
+ */
+      if( lbufS > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufS, lbufS, I_SEND );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S,
+                                 _M_ROLL_TYPE_S, partner, msgid, comm,
+                                 &PANEL->request[0] );
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                 msgid, comm, &PANEL->request[0] );
+      }
+ 
+      if(  lbufR > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufR, lbufR, I_RECV );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R,
+                               _M_ROLL_TYPE_R, partner, msgid, comm,
+                               &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                               msgid, comm, &PANEL->status[0] );
+      }
+ 
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Wait ( &PANEL->request[0], &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_blonM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_blonM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+
+   return( HPL_SUCCESS );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blonM.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blonM.o
new file mode 100644
index 000000000..0e2589292
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blonM.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blong.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blong.c
new file mode 100644
index 000000000..e57f11bcc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blong.c
@@ -0,0 +1,363 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+int HPL_binit_blong
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_blong( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+   return( HPL_SUCCESS );
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF_S         PANEL->buffers[I_SEND]
+#define   _M_COUNT_S        PANEL->counts[I_SEND]
+#define   _M_TYPE_S         PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_R         PANEL->buffers[I_RECV]
+#define   _M_COUNT_R        PANEL->counts[I_RECV]
+#define   _M_TYPE_R         PANEL->dtypes[I_RECV]
+ 
+#define   _M_ROLL_BUFF_S    PANEL->buffers[I_SEND]
+#define   _M_ROLL_COUNT_S   PANEL->counts[I_SEND]
+#define   _M_ROLL_TYPE_S    PANEL->dtypes[I_SEND]
+ 
+#define   _M_ROLL_BUFF_R    PANEL->buffers[I_RECV]
+#define   _M_ROLL_COUNT_R   PANEL->counts[I_RECV]
+#define   _M_ROLL_TYPE_R    PANEL->dtypes[I_RECV]
+ 
+#else
+ 
+#define   _M_BUFF_S         (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_S        lbuf
+#define   _M_TYPE_S         MPI_DOUBLE
+ 
+#define   _M_BUFF_R         (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_R        lbuf
+#define   _M_TYPE_R         MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_S    (void *)(PANEL->L2 + ibufS)
+#define   _M_ROLL_COUNT_S   lbufS
+#define   _M_ROLL_TYPE_S    MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_R    (void *)(PANEL->L2 + ibufR)
+#define   _M_ROLL_COUNT_R   lbufR
+#define   _M_ROLL_TYPE_R    MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_blong
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_blong( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        COUNT, count, dummy=0, ierr=MPI_SUCCESS,
+                              ibuf, ibufR, ibufS, indx, ip2, k, l, lbuf,
+                              lbufR, lbufS, mask, msgid, mydist, mydist2,
+                              next, npm1, partner, prev, rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process, start spreading the panel.  If
+ * I am not the root process,  test  for  message receive completion. If
+ * the message  is there,  then receive it,  and  keep  spreading  in  a
+ * blocking fashion this time.  Otherwise,  inform  the caller  that the
+ * panel has still not been received. 
+ */
+   comm    = PANEL->grid->row_comm;  rank  = PANEL->grid->mycol;
+   mask    = PANEL->grid->col_mask;  ip2   = PANEL->grid->col_ip2m1;
+   root    = PANEL->pcol;            msgid = PANEL->msgid;
+   COUNT   = PANEL->len;             npm1  = size - 1;
+   mydist2 = ( mydist = MModSub( rank, root, size ) ); indx = ip2;
+   count   = COUNT / size; count = Mmax( count, 1 );
+/*
+ * Spread the panel across process columns
+ */
+   do
+   {
+      mask ^= ip2;
+ 
+      if( ( mydist & mask ) == 0 )
+      {
+         lbuf = COUNT - ( ibuf = indx * count );
+         if( indx + ip2 < size ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); }
+ 
+         partner = mydist ^ ip2;
+ 
+         if( ( mydist & ip2 ) != 0 )
+         {
+            partner = MModAdd( root, partner, size );
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on 
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ */
+#if 0
+            ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+            if( ierr == MPI_SUCCESS )
+            {        /* if panel is not here, return and keep testing */
+               if( go == 0 )
+               { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+            }
+#endif
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_RECV );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( _M_BUFF_R, _M_COUNT_R, _M_TYPE_R,
+                                     partner, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                     msgid, comm, &PANEL->status[0] );
+            }
+         }
+         else if( partner < size )
+         {
+            partner = MModAdd( root, partner, size );
+ 
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_SEND );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( _M_BUFF_S, _M_COUNT_S, _M_TYPE_S,
+                                      partner, msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+            }
+            else       /* Send message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE,
+                                      partner, msgid, comm );
+            }
+         }
+      }
+ 
+      if( mydist2 < ip2 ) {  ip2 >>= 1; indx -= ip2; }
+      else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; }
+ 
+   } while( ip2 > 0 );
+/*
+ * Roll the pieces
+ */
+   prev = MModSub1( rank, size ); next = MModAdd1( rank, size );
+
+   for( k = 0; k < npm1; k++ )
+   {
+      l = ( k >> 1 ); 
+/*
+ * Who is sending to who and how much
+ */
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         ibufS = ( indx = MModAdd( mydist, l,   size ) ) * count;
+         lbufS = ( indx == npm1 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModSub( mydist, l+1, size ) ) * count;
+         lbufR = ( indx == npm1 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = prev;
+      }
+      else
+      {
+         ibufS = ( indx = MModSub( mydist, l,   size ) ) * count;
+         lbufS = ( indx == npm1 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModAdd( mydist, l+1, size ) ) * count;
+         lbufR = ( indx == npm1 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = next;
+      }
+/*
+ * Exchange the messages
+ */
+      if( lbufS > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufS, lbufS, I_SEND );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S,
+                                 _M_ROLL_TYPE_S, partner, msgid, comm,
+                                 &PANEL->request[0] );
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                 msgid, comm, &PANEL->request[0] );
+      }
+
+      if(  lbufR > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufR, lbufR, I_RECV );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R,
+                               _M_ROLL_TYPE_R, partner, msgid, comm,
+                               &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                               msgid, comm, &PANEL->status[0] );
+      }
+
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Wait ( &PANEL->request[0], &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_blong
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_blong( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+
+   return( HPL_SUCCESS );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blong.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blong.o
new file mode 100644
index 000000000..3a0c08b06
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blong.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bwait.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bwait.c
new file mode 100644
index 000000000..a2e0f4df8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bwait.c
@@ -0,0 +1,109 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_bwait
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_bwait
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_bwait HPL_bwait waits  for  the  row  broadcast  of  the current  panel  to
+ * terminate.  Successful completion is indicated by the returned  error
+ * code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS );
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_bwait_1rinM( PANEL ); break;
+      case HPL_1RING   : ierr = HPL_bwait_1ring( PANEL ); break;
+      case HPL_2RING_M : ierr = HPL_bwait_2rinM( PANEL ); break;
+      case HPL_2RING   : ierr = HPL_bwait_2ring( PANEL ); break;
+      case HPL_BLONG_M : ierr = HPL_bwait_blonM( PANEL ); break;
+      case HPL_BLONG   : ierr = HPL_bwait_blong( PANEL ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_bwait
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bwait.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bwait.o
new file mode 100644
index 000000000..03ee92ae4
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bwait.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_copyL.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_copyL.c
new file mode 100644
index 000000000..04f765a6b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_copyL.c
@@ -0,0 +1,108 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_copyL
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_copyL
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_copyL copies  the  panel of columns, the L1 replicated submatrix,
+ * the pivot array  and  the info scalar into a contiguous workspace for
+ * later broadcast.
+ *  
+ * The copy of this panel  into  a contiguous buffer  can be enforced by
+ * specifying -DHPL_COPY_L in the architecture specific Makefile.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        jb, lda;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->mycol == PANEL->pcol )
+   {
+      jb = PANEL->jb; lda = PANEL->lda;
+ 
+      if( PANEL->grid->myrow == PANEL->prow )
+      {
+         HPL_dlacpy( PANEL->mp-jb, jb, Mptr( PANEL->A, jb, -jb, lda ),
+                     lda, PANEL->L2, PANEL->ldl2 );
+      }
+      else
+      {
+         HPL_dlacpy( PANEL->mp,    jb, Mptr( PANEL->A,  0, -jb, lda ),
+                     lda, PANEL->L2, PANEL->ldl2 );
+      }
+   }
+/*
+ * End of HPL_copyL
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_copyL.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_copyL.o
new file mode 100644
index 000000000..7db34d0b4
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_copyL.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_packL.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_packL.c
new file mode 100644
index 000000000..8a70ef83d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_packL.c
@@ -0,0 +1,245 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_packL
+(
+   HPL_T_panel *                    PANEL,
+   const int                        INDEX,
+   const int                        LEN,
+   const int                        IBUF
+)
+#else
+int HPL_packL
+( PANEL, INDEX, LEN, IBUF )
+   HPL_T_panel *                    PANEL;
+   const int                        INDEX;
+   const int                        LEN;
+   const int                        IBUF;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_packL forms  the MPI data type for the panel to be broadcast.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * MPI_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * INDEX   (input)                       const int
+ *         On entry,  INDEX  points  to  the  first entry of the  packed
+ *         buffer being broadcast.
+ *
+ * LEN     (input)                       const int
+ *         On entry, LEN is the length of the packed buffer.
+ *
+ * IBUF    (input)                       const int
+ *         On entry, IBUF  specifies the panel buffer/count/type entries
+ *         that should be initialized.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+#ifndef HPL_COPY_L
+   MPI_Datatype               * type = NULL;
+   void                       * * * bufs = NULL;
+   double                     * A;
+   int                        * blen = NULL;
+   MPI_Aint                   * disp = NULL;
+   int                        curr, i, i1, ibuf, ierr=MPI_SUCCESS, j1,
+                              jb, jbm, jbp1, lda, len, m, m1, nbufs;
+#else
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_COPY_L
+/*
+ * Panel + L1 + DPIV  have been copied into a contiguous buffer - Create
+ * and commit a contiguous data type
+ */
+   PANEL->buffers[IBUF] = (void *)(PANEL->L2 + INDEX);
+   PANEL->counts [IBUF] = 1;
+
+   ierr =      MPI_Type_contiguous( LEN, MPI_DOUBLE, &PANEL->dtypes[IBUF] );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &PANEL->dtypes[IBUF] );
+
+   return( ierr );
+#else
+/*
+ * Panel is not contiguous (because of LDA and also L1 + DPIV) -  Create
+ * and commit a struct data type
+ */
+   jbp1 = ( jb = PANEL->jb ) + 1;
+/*
+ * Temporaries to create the type struct.
+ */
+   bufs = (void     * * *)malloc( jbp1 * sizeof( void * *     ) );
+   blen = (int          *)malloc( jbp1 * sizeof( int          ) );
+   disp = (MPI_Aint     *)malloc( jbp1 * sizeof( MPI_Aint     ) );
+   type = (MPI_Datatype *)malloc( jbp1 * sizeof( MPI_Datatype ) );
+ 
+   if( ( bufs != NULL ) && ( blen != NULL ) &&
+       ( disp != NULL ) && ( type != NULL ) )
+   {
+      m = PANEL->mp; curr = (int)( PANEL->grid->myrow == PANEL->prow );
+      if( curr != 0 ) m -= jb;
+ 
+      len = LEN; ibuf = INDEX; nbufs = 0; jbm = jb * m;
+ 
+      if( ( m > 0 ) && ( ibuf < jbm ) )
+      {
+/*
+ * Retrieve proper pointers depending on process row and column
+ */
+         if( PANEL->grid->mycol == PANEL->pcol )
+         {
+            lda = PANEL->lda;
+            if( curr != 0 ) { A = Mptr( PANEL->A, jb, -jb, lda ); }
+            else            { A = Mptr( PANEL->A,  0, -jb, lda ); }
+         }
+         else { lda = PANEL->ldl2; A = PANEL->L2; }
+/*
+ * Pack the first (partial) column of L
+ */
+         m1 = m - ( i1 = ibuf - ( j1 = ibuf / m ) * m );
+         m1 = Mmin( len, m1 );
+ 
+         bufs[nbufs] = (void *)(Mptr( A, i1, j1, lda ));
+         type[nbufs] = MPI_DOUBLE;
+         blen[nbufs] = m1;
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+ 
+         nbufs++; len -= m1; j1++; ibuf += m1;
+/*
+ * Pack the remaining columns of L
+ */
+         while( ( len > 0 ) && ( j1 < jb ) )
+         {
+            m1 = Mmin( len, m );
+ 
+            bufs[nbufs] = (void*)(Mptr( A, 0, j1, lda ));
+            type[nbufs] = MPI_DOUBLE;
+            blen[nbufs] = m1;
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+ 
+            nbufs++; len -= m1; j1++; ibuf += m1;
+         }
+      }
+/*
+ * Pack L1, DPIV, DINFO
+ */
+      if( len > 0 )
+      {                                            /* L1, DPIV, DINFO */
+         bufs[nbufs] = (void *)(PANEL->L1 + ibuf - jbm);
+         type[nbufs] = MPI_DOUBLE;
+         blen[nbufs] = len;
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+         nbufs++;
+      }
+ 
+      for( i = 1; i < nbufs; i++ ) disp[i] -= disp[0]; disp[0] = 0;
+ 
+      PANEL->buffers[IBUF] = (void *)(bufs[0]); PANEL->counts [IBUF] = 1;
+/*
+ * construct the struct type 
+ */
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_create_struct( nbufs, blen, disp, type,
+                                   &PANEL->dtypes[IBUF] );
+/*
+ * release temporaries
+ */
+      if( bufs ) free( bufs );
+      if( blen ) free( blen );
+      if( disp ) free( disp );
+      if( type ) free( type );
+/*
+ * commit the type 
+ */
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_commit( &PANEL->dtypes[IBUF] );
+
+      return( ierr );
+   }
+   else
+   {
+/*
+ * Memory allocation failed -> abort
+ */
+      HPL_pabort( __LINE__, "HPL_packL", "Memory allocation failed" );
+      return( MPI_SUCCESS );    /* never executed (hopefully ...) */
+   }
+#endif
+#else
+          /* HPL_USE_MPI_DATATYPE not defined - Oops, there is a bug
+             somewhere, so, just in case  and until I find it ... */
+   return( MPI_SUCCESS );   
+#endif
+/*
+ * End of HPL_packL
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_packL.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_packL.o
new file mode 100644
index 000000000..609133f84
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_packL.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_recv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_recv.c
new file mode 100644
index 000000000..ff426891c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_recv.c
@@ -0,0 +1,142 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_recv
+(
+   double *                         RBUF,
+   int                              RCOUNT,
+   int                              SRC,
+   int                              RTAG,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_recv
+( RBUF, RCOUNT, SRC, RTAG, COMM )
+   double *                         RBUF;
+   int                              RCOUNT;
+   int                              SRC;
+   int                              RTAG;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_recv is a simple wrapper around  MPI_Recv.  Its  main  purpose is
+ * to  allow for some  experimentation / tuning  of this simple routine.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * HPL_SUCCESS.  In the case of messages of length less than or equal to
+ * zero, this function returns immediately.
+ *
+ * Arguments
+ * =========
+ *
+ * RBUF    (local output)                double *
+ *         On entry, RBUF specifies the starting address of buffer to be
+ *         received.
+ *
+ * RCOUNT  (local input)                 int
+ *         On entry,  RCOUNT  specifies  the number  of double precision
+ *         entries in RBUF. RCOUNT must be at least zero.
+ *
+ * SRC     (local input)                 int
+ *         On entry, SRC  specifies the rank of the  sending  process in
+ *         the communication space defined by COMM.
+ *
+ * RTAG    (local input)                 int
+ *         On entry,  STAG specifies the message tag to be used for this
+ *         communication operation.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Status                 status;
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type;
+#endif
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( RCOUNT <= 0 ) return( HPL_SUCCESS );
+
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Recv( (void *)(RBUF), 1, type, SRC, RTAG, COMM,
+                         &status );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_free( &type );
+#else
+   ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, SRC, RTAG,
+                    COMM, &status );
+#endif
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+/*
+ * End of HPL_recv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_recv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_recv.o
new file mode 100644
index 000000000..a87fbb1f9
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_recv.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_sdrv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_sdrv.c
new file mode 100644
index 000000000..0b2363563
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_sdrv.c
@@ -0,0 +1,239 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_sdrv
+(
+   double *                         SBUF,
+   int                              SCOUNT,
+   int                              STAG,
+   double *                         RBUF,
+   int                              RCOUNT,
+   int                              RTAG,
+   int                              PARTNER,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_sdrv
+( SBUF, SCOUNT, STAG, RBUF, RCOUNT, RTAG, PARTNER, COMM )
+   double *                         SBUF;
+   int                              SCOUNT;
+   int                              STAG;
+   double *                         RBUF;
+   int                              RCOUNT;
+   int                              RTAG;
+   int                              PARTNER;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_sdrv is a simple wrapper around MPI_Sendrecv. Its main purpose is
+ * to allow for some experimentation and tuning of this simple function.
+ * Messages  of  length  less than  or  equal to zero  are not sent  nor
+ * received.  Successful completion  is  indicated by the returned error
+ * code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * SBUF    (local input)                 double *
+ *         On entry, SBUF specifies the starting address of buffer to be
+ *         sent.
+ *
+ * SCOUNT  (local input)                 int
+ *         On entry,  SCOUNT  specifies  the number  of double precision
+ *         entries in SBUF. SCOUNT must be at least zero.
+ *
+ * STAG    (local input)                 int
+ *         On entry,  STAG  specifies the message tag to be used for the
+ *         sending communication operation.
+ *
+ * RBUF    (local output)                double *
+ *         On entry, RBUF specifies the starting address of buffer to be
+ *         received.
+ *
+ * RCOUNT  (local input)                 int
+ *         On entry,  RCOUNT  specifies  the number  of double precision
+ *         entries in RBUF. RCOUNT must be at least zero.
+ *
+ * RTAG    (local input)                 int
+ *         On entry,  RTAG  specifies the message tag to be used for the
+ *         receiving communication operation.
+ *
+ * PARTNER (local input)                 int
+ *         On entry,  PARTNER  specifies  the rank of the  collaborative
+ *         process in the communication space defined by COMM.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type[2];
+#endif
+   MPI_Request                request;
+   MPI_Status                 status;
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( RCOUNT > 0 )
+   {
+      if( SCOUNT > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * Post asynchronous receive
+ */
+         ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( (void *)(RBUF), 1, type[0], PARTNER,
+                                RTAG, COMM, &request );
+/*
+ * Blocking send
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[1] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( (void *)(SBUF), 1, type[1], PARTNER,
+                               STAG, COMM );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[1] );
+/*
+ * Wait for the receive to complete
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[0] );
+#else
+/*
+ * Post asynchronous receive
+ */
+         ierr =      MPI_Irecv( (void *)(RBUF), RCOUNT, MPI_DOUBLE,
+                                PARTNER, RTAG, COMM, &request );
+/*
+ * Blocking send
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE,
+                               PARTNER, STAG, COMM );
+/*
+ * Wait for the receive to complete
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+#endif
+      }
+      else
+      {
+/*
+ * Blocking receive
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+         ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(RBUF), 1, type[0], PARTNER, RTAG,
+                               COMM, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[0] );
+#else
+         ierr =      MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE,
+                               PARTNER, RTAG, COMM, &status );
+#endif
+      }
+   }
+   else if( SCOUNT > 0 )
+   {
+/*
+ * Blocking send
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+      ierr =      MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_commit( &type[1] );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG,
+                          COMM );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_free( &type[1] ) );
+#else
+      ierr =      MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, PARTNER,
+                            STAG, COMM );
+#endif
+   }
+   else { ierr = MPI_SUCCESS; }
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+/*
+ * End of HPL_sdrv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_sdrv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_sdrv.o
new file mode 100644
index 000000000..8d188a0ec
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_sdrv.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_send.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_send.c
new file mode 100644
index 000000000..9e9868594
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_send.c
@@ -0,0 +1,139 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_send
+(
+   double *                         SBUF,
+   int                              SCOUNT,
+   int                              DEST,
+   int                              STAG,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_send
+( SBUF, SCOUNT, DEST, STAG, COMM )
+   double *                         SBUF;
+   int                              SCOUNT;
+   int                              DEST;
+   int                              STAG;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_send is a simple wrapper around  MPI_Send.  Its  main  purpose is
+ * to  allow for some  experimentation / tuning  of this simple routine.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * MPI_SUCCESS.  In the case of messages of length less than or equal to
+ * zero, this function returns immediately.
+ *
+ * Arguments
+ * =========
+ *
+ * SBUF    (local input)                 double *
+ *         On entry, SBUF specifies the starting address of buffer to be
+ *         sent.
+ *
+ * SCOUNT  (local input)                 int
+ *         On entry,  SCOUNT  specifies  the number of  double precision
+ *         entries in SBUF. SCOUNT must be at least zero.
+ *
+ * DEST    (local input)                 int
+ *         On entry, DEST specifies the rank of the receiving process in
+ *         the communication space defined by COMM.
+ *
+ * STAG    (local input)                 int
+ *         On entry,  STAG specifies the message tag to be used for this
+ *         communication operation.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type;
+#endif
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( SCOUNT <= 0 ) return( HPL_SUCCESS );
+
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr =      MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Send( (void *)(SBUF), 1, type, DEST, STAG, COMM );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_free( &type );
+#else
+   ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, DEST, STAG, COMM );
+#endif
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); 
+/*
+ * End of HPL_send
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_send.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_send.o
new file mode 100644
index 000000000..6f242b1ed
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_send.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/cuda/cuda_dgemm.cpp.dp.cpp b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/cuda/cuda_dgemm.cpp.dp.cpp
new file mode 100644
index 000000000..644503181
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/cuda/cuda_dgemm.cpp.dp.cpp
@@ -0,0 +1,310 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+
+#define NUMBER_OF_STREAMS 4
+#define CHUNK_SIZE 512
+#define NN 64
+#define NM 128
+#define ERRCODE(e) (-(__LINE__ * 1000 + (e)))
+//#define DEVICE_DEBUG
+//#ifdef MPI
+//#include <mpi.h>
+//#endif
+
+
+#define _GNU_SOURCE
+
+#define CUDA_ERROR_CHECK
+#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ )
+#define CudaCheckError()    __cudaCheckError( __FILE__, __LINE__ )
+
+#include <sycl/sycl.hpp>
+#include <dpct/dpct.hpp>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <ctype.h>
+#include <math.h>
+#include <array>
+
+#include <time.h>
+#include <sys/types.h>
+#include <sys/times.h>
+#include <sys/time.h>
+
+#include <iostream>
+#include <chrono>
+#include <dpct/blas_utils.hpp>
+
+#include "mkl.h"
+
+extern "C" {
+
+inline void __cudaSafeCall(dpct::err0 err, const char *file, const int line)
+{
+    #ifdef CUDA_ERROR_CHECK
+
+#endif
+
+    return;
+}
+
+inline void __cudaCheckError(const char *file, const int line) try {
+#ifdef CUDA_ERROR_CHECK
+        /*
+        DPCT1010:1: SYCL uses exceptions to report errors and does not use the
+        error codes. The call was replaced with 0. You need to rewrite this
+        code.
+        */
+        dpct::err0 err = 0;
+
+        // More careful checking. However, this will affect performance.
+        // Comment away if needed.
+        err = DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw());
+
+#endif
+
+    return;
+}
+catch (sycl::exception const &exc) {
+  std::cerr << exc.what() << "Exception caught at file:" << __FILE__
+            << ", line:" << __LINE__ << std::endl;
+  std::exit(1);
+}
+
+    void dpcpp_dgemm 
+        (   const int ORDER,
+            const int TRANSA,   const int TRANSB,
+            const int M,        const int N,        const int K,       
+            const double ALPHA, const double *A,    const int LDA,
+            const double *B,    const int LDB,      const double BETA,    
+            double *C,          const int LDC);
+
+    void dpcpp_dtrsm(
+       int HPL_ORDER,
+       int HPL_SIDE,
+       int HPL_UPLO,
+       int HPL_TRANS,
+       int HPL_DIAG,
+       const int,
+       const int,
+       const double,
+       const double *,
+       const int,
+       double *,
+       const int);
+}
+
+
+void dpcpp_dgemm 
+(   const int ORDER,   const int TRANSA,    const int TRANSB,       
+    const int M,       const int N,         const int K,       
+    const double ALPHA,const double *A,     const int LDA,
+    const double *B,   const int LDB,       
+    const double BETA, double *C,         const int LDC)
+{
+   dpct::device_ext &dev_ct1 = dpct::get_current_device();
+   sycl::queue &q_ct1 = dev_ct1.in_order_queue();
+
+    if ((M==0)||(K==0)||(N==0)){
+	    return;
+    }
+
+    
+    if ( (N) < NN || (M) < NM || (K) < 128){ 
+         
+         #ifdef DEVICE_DEBUG
+            std::cout << "dgemm-Running on CPU" << std::endl; 
+         #endif
+          
+         cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,  M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC);
+          return;
+    }    
+
+    
+    #ifdef DEVICE_DEBUG
+            std::cout << "dgemm-Running on GPU" << std::endl; 
+    #endif
+
+    double *devPtrA, *devPtrB, *devPtrC;
+    int status;
+
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        devPtrA = sycl::malloc_device<double>(K * LDA, q_ct1)));
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(devPtrA, &A[0], K * LDA * sizeof(double)).wait()));
+
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        devPtrB = sycl::malloc_device<double>(N * LDB, q_ct1)));
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(devPtrB, &B[0], N * LDB * sizeof(double)).wait()));
+
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        devPtrC = sycl::malloc_device<double>(N * LDC, q_ct1)));
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(devPtrC, &C[0], N * LDC * sizeof(double)).wait()));
+
+    dev_ct1.queues_wait_and_throw();
+    oneapi::mkl::blas::column_major::gemm(
+        *dpct::get_current_device().get_saved_queue(),
+        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, M,
+        N, K, ALPHA, devPtrA, LDA, devPtrB, LDB, BETA, devPtrC, LDC)
+        .wait();
+    dev_ct1.queues_wait_and_throw();
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(&C[0], devPtrC, N * LDC * sizeof(double)).wait()));
+    dev_ct1.queues_wait_and_throw();
+    sycl::free(devPtrA, q_ct1);
+    sycl::free(devPtrB, q_ct1);
+    sycl::free(devPtrC, q_ct1);
+}
+  
+void dpcpp_dtrsm
+
+(  const int ORDER,           const int SIDE,
+   const int UPLO,            const int TRANS,
+   const int DIAG,            const int M,       const int N,
+   const double ALPHA,    const double* A,  const int LDA,       double* B,
+   const int LDB)
+{
+   dpct::device_ext &dev_ct1 = dpct::get_current_device();
+   sycl::queue &q_ct1 = dev_ct1.in_order_queue();
+
+        if ((M==0)||(N==0)){
+        	return;
+  	}
+
+    double *devPtrA, *devPtrB;	
+    int status;	
+
+    
+    if ( (M) < 512 || (N) < 2*(M)){
+        #ifdef DEVICE_DEBUG
+            std::cout << "dtrsm-Running on CPU" << std::endl; 
+        #endif
+ 	    cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, M, N, ALPHA, A, LDA, B, LDB);
+    
+    
+    	return;
+    } 
+       
+    #ifdef DEVICE_DEBUG
+            std::cout << "dtrsm-Running on GPU" << std::endl; 
+    #endif
+
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        devPtrA = sycl::malloc_device<double>(M * LDA, q_ct1)));
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(devPtrA, A, M * LDA * sizeof(double)).wait()));
+
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        devPtrB = sycl::malloc_device<double>(N * LDB, q_ct1)));
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(devPtrB, B, N * LDB * sizeof(double)).wait()));
+    dev_ct1.queues_wait_and_throw();
+
+    oneapi::mkl::blas::column_major::trsm(
+        *dpct::get_current_device().get_saved_queue(), oneapi::mkl::side::left,
+        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
+        oneapi::mkl::diag::unit, M, N, ALPHA, devPtrA, LDA, devPtrB, LDB)
+        .wait();
+
+    dev_ct1.queues_wait_and_throw();
+    CudaSafeCall(DPCT_CHECK_ERROR(
+        q_ct1.memcpy(B, devPtrB, N * LDB * sizeof(double)).wait()));
+
+    dev_ct1.queues_wait_and_throw();
+    sycl::free(devPtrA, q_ct1);
+    sycl::free(devPtrB, q_ct1);
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/cuda/cuda_dgemm.cpp.dp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/cuda/cuda_dgemm.cpp.dp.o
new file mode 100644
index 000000000..5284ec6c1
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/cuda/cuda_dgemm.cpp.dp.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_all_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_all_reduce.c
new file mode 100644
index 000000000..776f48504
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_all_reduce.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_all_reduce
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const HPL_T_OP                   OP,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_all_reduce
+( BUFFER, COUNT, DTYPE, OP, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const HPL_T_OP                   OP;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_all_reduce performs   a   global   reduce  operation  across  all
+ * processes of a group leaving the results on all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/global output)   void *
+ *         On entry,  BUFFER  points to  the  buffer to be combined.  On
+ *         exit, this array contains the combined data and  is identical
+ *         on all processes in the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * OP      (global input)                const HPL_T_OP 
+ *         On entry, OP is a pointer to the local combine function.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr;
+/* ..
+ * .. Executable Statements ..
+ */
+   hplerr = HPL_reduce(   BUFFER, COUNT, DTYPE, OP, 0, COMM );
+   if( hplerr != MPI_SUCCESS ) return( hplerr );
+   return( HPL_broadcast( BUFFER, COUNT, DTYPE,     0, COMM ) );
+/*
+ * End of HPL_all_reduce
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_all_reduce.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_all_reduce.o
new file mode 100644
index 000000000..ac0f38d00
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_all_reduce.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_barrier.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_barrier.c
new file mode 100644
index 000000000..9a5d9b10a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_barrier.c
@@ -0,0 +1,90 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_barrier
+(
+   MPI_Comm                         COMM
+)
+#else
+int HPL_barrier
+( COMM )
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_barrier blocks the caller until all process members have call it.
+ * The  call  returns  at any process  only after all group members have
+ * entered the call.
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   return( HPL_broadcast( (void*)(&i), 1, HPL_INT, 0, COMM ) );
+/*
+ * End of HPL_barrier
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_barrier.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_barrier.o
new file mode 100644
index 000000000..b842da4f7
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_barrier.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_broadcast.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_broadcast.c
new file mode 100644
index 000000000..42d962864
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_broadcast.c
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_broadcast
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const int                        ROOT,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_broadcast
+( BUFFER, COUNT, DTYPE, ROOT, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const int                        ROOT;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_broadcast broadcasts  a message from the process with rank ROOT to
+ * all processes in the group.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/output)          void *
+ *         On entry,  BUFFER  points to  the  buffer to be broadcast. On
+ *         exit, this array contains the broadcast data and is identical
+ *         on all processes in the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ROOT    (global input)                const int
+ *         On entry, ROOT is the coordinate of the source process.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr=MPI_SUCCESS, ip2=1, kk, mask=1, 
+                              mpierr, mydist, partner, rank, size, 
+                              tag = MSGID_BEGIN_COLL;
+   MPI_Status                 status;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( COUNT <= 0 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_size( COMM, &size ); if( size <= 1 ) return( mpierr );
+   mpierr = MPI_Comm_rank( COMM, &rank );
+
+   kk = size - 1;
+   while( kk > 1 ) { kk >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   mydist = MModSub( rank, ROOT, size );
+
+   do
+   {
+      mask ^= ip2;
+      if( ( mydist & mask ) == 0 )
+      {
+         partner = mydist ^ ip2;
+
+         if( mydist & ip2 )
+         {
+            partner = MModAdd( ROOT, partner, size );
+            mpierr  = MPI_Recv(  BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                 partner, tag, COMM, &status );
+         }
+         else if( partner < size )
+         {
+            partner = MModAdd( ROOT, partner, size );
+            mpierr  = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                partner, tag, COMM );
+         }
+         if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      }
+      ip2 >>= 1;
+   } while( ip2 );
+
+   return( hplerr );
+/*
+ * End of HPL_broadcast
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_broadcast.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_broadcast.o
new file mode 100644
index 000000000..1862a82f8
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_broadcast.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_exit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_exit.c
new file mode 100644
index 000000000..f0d00b065
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_exit.c
@@ -0,0 +1,109 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_exit
+(
+   HPL_T_grid *                     GRID
+)
+#else
+int HPL_grid_exit
+( GRID )
+   HPL_T_grid *                     GRID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_exit marks  the process  grid object for  deallocation.  The
+ * returned  error  code  MPI_SUCCESS  indicates  successful completion.
+ * Other error codes are (MPI) implementation dependent.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input/output)          HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid to be released.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr = MPI_SUCCESS, mpierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( GRID->all_comm != MPI_COMM_NULL )
+   {
+      mpierr = MPI_Comm_free( &(GRID->row_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      mpierr = MPI_Comm_free( &(GRID->col_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      mpierr = MPI_Comm_free( &(GRID->all_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+   }
+
+   GRID->order   = HPL_COLUMN_MAJOR;
+
+   GRID->iam     = GRID->myrow    = GRID->mycol     = -1;
+   GRID->nprow   = GRID->npcol    = GRID->nprocs    = -1;
+
+   GRID->row_ip2 = GRID->row_hdim = GRID->row_ip2m1 = GRID->row_mask = -1;
+   GRID->col_ip2 = GRID->col_hdim = GRID->col_ip2m1 = GRID->col_mask = -1;
+
+   return( hplerr );
+/*
+ * End of HPL_grid_exit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_exit.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_exit.o
new file mode 100644
index 000000000..75a094fff
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_exit.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_info.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_info.c
new file mode 100644
index 000000000..95c5a7315
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_info.c
@@ -0,0 +1,116 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_info
+(
+   const HPL_T_grid *               GRID,
+   int *                            NPROW,
+   int *                            NPCOL,
+   int *                            MYROW,
+   int *                            MYCOL
+)
+#else
+int HPL_grid_info
+( GRID, NPROW, NPCOL, MYROW, MYCOL )
+   const HPL_T_grid *               GRID;
+   int *                            NPROW;
+   int *                            NPCOL;
+   int *                            MYROW;
+   int *                            MYCOL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_info returns  the grid shape and the coordinates in the grid
+ * of the calling process.  Successful  completion  is  indicated by the
+ * returned error code  MPI_SUCCESS. Other error codes depend on the MPI
+ * implementation.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * NPROW   (global output)               int *
+ *         On exit,   NPROW  specifies the number of process rows in the
+ *         grid. NPROW is at least one.
+ *
+ * NPCOL   (global output)               int *
+ *         On exit,   NPCOL  specifies  the number of process columns in
+ *         the grid. NPCOL is at least one.
+ *
+ * MYROW   (global output)               int *
+ *         On exit,  MYROW  specifies my  row process  coordinate in the
+ *         grid. MYROW is greater than or equal  to zero  and  less than
+ *         NPROW.
+ *
+ * MYCOL   (global output)               int *
+ *         On exit,  MYCOL specifies my column process coordinate in the
+ *         grid. MYCOL is greater than or equal  to zero  and  less than
+ *         NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   *NPROW = GRID->nprow; *NPCOL = GRID->npcol;
+   *MYROW = GRID->myrow; *MYCOL = GRID->mycol;
+   return( MPI_SUCCESS );
+/*
+ * End of HPL_grid_info
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_info.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_info.o
new file mode 100644
index 000000000..0b216fcf1
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_info.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_init.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_init.c
new file mode 100644
index 000000000..52111ac52
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_init.c
@@ -0,0 +1,184 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_init
+(
+   MPI_Comm                         COMM,
+   const HPL_T_ORDER                ORDER,
+   const int                        NPROW,
+   const int                        NPCOL,
+   HPL_T_grid *                     GRID
+)
+#else
+int HPL_grid_init
+( COMM, ORDER, NPROW, NPCOL, GRID )
+   MPI_Comm                         COMM;
+   const HPL_T_ORDER                ORDER;
+   const int                        NPROW;
+   const int                        NPCOL;
+   HPL_T_grid *                     GRID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_init creates a NPROW x NPCOL  process  grid using column- or
+ * row-major ordering from an initial collection of processes identified
+ * by an  MPI  communicator.  Successful  completion is indicated by the
+ * returned error code MPI_SUCCESS.  Other error codes depend on the MPI
+ * implementation. The coordinates of processes that are not part of the
+ * grid are set to values outside of [0..NPROW) x [0..NPCOL).
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         On entry,  COMM  is  the  MPI  communicator  identifying  the
+ *         initial  collection  of  processes out of which  the  grid is
+ *         formed.
+ *
+ * ORDER   (global input)                const HPL_T_ORDER
+ *         On entry, ORDER specifies how the processes should be ordered
+ *         in the grid as follows:
+ *            ORDER = HPL_ROW_MAJOR    row-major    ordering;
+ *            ORDER = HPL_COLUMN_MAJOR column-major ordering;
+ *
+ * NPROW   (global input)                const int
+ *         On entry,  NPROW  specifies the number of process rows in the
+ *         grid to be created. NPROW must be at least one.
+ *
+ * NPCOL   (global input)                const int
+ *         On entry,  NPCOL  specifies  the number of process columns in
+ *         the grid to be created. NPCOL must be at least one.
+ *
+ * GRID    (local input/output)          HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information to be initialized.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hdim, hplerr=MPI_SUCCESS, ierr, ip2, k,
+                              mask, mycol, myrow, nprocs, rank, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Comm_rank( COMM, &rank ); MPI_Comm_size( COMM, &size );
+/*
+ * Abort if illegal process grid
+ */
+   nprocs = NPROW * NPCOL;
+   if( ( nprocs > size ) || ( NPROW < 1 ) || ( NPCOL < 1 ) )
+   { HPL_pabort( __LINE__, "HPL_grid_init", "Illegal Grid" ); }
+/*
+ * Row- or column-major ordering of the processes
+ */
+   if( ORDER == HPL_ROW_MAJOR )
+   {
+      GRID->order = HPL_ROW_MAJOR;
+      myrow = rank / NPCOL; mycol = rank - myrow * NPCOL;
+   }
+   else
+   {
+      GRID->order = HPL_COLUMN_MAJOR;
+      mycol = rank / NPROW; myrow = rank - mycol * NPROW;
+   }
+   GRID->iam   = rank;  GRID->myrow = myrow; GRID->mycol  = mycol;
+   GRID->nprow = NPROW; GRID->npcol = NPCOL; GRID->nprocs = nprocs;
+/*
+ * row_ip2   : largest power of two <= nprow;
+ * row_hdim  : row_ip2 procs hypercube dim;
+ * row_ip2m1 : largest power of two <= nprow-1;
+ * row_mask  : row_ip2m1 procs hypercube mask;
+ */
+   hdim = 0; ip2 = 1; k = NPROW;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; }
+   GRID->row_ip2 = ip2; GRID->row_hdim = hdim; 
+
+   mask = ip2 = 1;    k = NPROW - 1;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   GRID->row_ip2m1 = ip2; GRID->row_mask = mask; 
+/*
+ * col_ip2   : largest power of two <= npcol;
+ * col_hdim  : col_ip2 procs hypercube dim;
+ * col_ip2m1 : largest power of two <= npcol-1;
+ * col_mask  : col_ip2m1 procs hypercube mask;
+ */
+   hdim = 0; ip2 = 1; k = NPCOL;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; }
+   GRID->col_ip2 = ip2; GRID->col_hdim = hdim; 
+
+   mask = ip2 = 1;    k = NPCOL - 1;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   GRID->col_ip2m1 = ip2; GRID->col_mask = mask; 
+/*
+ * All communicator, leave if I am not part of this grid. Creation of the
+ * row- and column communicators.
+ */
+   ierr = MPI_Comm_split( COMM, ( rank < nprocs ? 0 : MPI_UNDEFINED ),
+                          rank, &(GRID->all_comm) );
+   if( GRID->all_comm == MPI_COMM_NULL ) return( ierr );
+
+   ierr = MPI_Comm_split( GRID->all_comm, myrow, mycol, &(GRID->row_comm) );
+   if( ierr != MPI_SUCCESS ) hplerr = ierr;
+
+   ierr = MPI_Comm_split( GRID->all_comm, mycol, myrow, &(GRID->col_comm) );
+   if( ierr != MPI_SUCCESS ) hplerr = ierr;
+
+   return( hplerr );
+/*
+ * End of HPL_grid_init
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_init.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_init.o
new file mode 100644
index 000000000..7bad72781
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_init.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_max.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_max.c
new file mode 100644
index 000000000..002aabe01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_max.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_max
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_max
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_max combines (max) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] );
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] );
+   }
+/*
+ * End of HPL_max
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_max.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_max.o
new file mode 100644
index 000000000..5cb94b4ef
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_max.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_min.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_min.c
new file mode 100644
index 000000000..a99e5e58a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_min.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_min
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_min
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_min combines (min) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] );
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] );
+   }
+/*
+ * End of HPL_min
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_min.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_min.o
new file mode 100644
index 000000000..144fc1ec6
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_min.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_pnum.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_pnum.c
new file mode 100644
index 000000000..c80885b9a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_pnum.c
@@ -0,0 +1,103 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pnum
+(
+   const HPL_T_grid *               GRID,
+   const int                        MYROW,
+   const int                        MYCOL
+)
+#else
+int HPL_pnum
+( GRID, MYROW, MYCOL )
+   const HPL_T_grid *               GRID;
+   const int                        MYROW;
+   const int                        MYCOL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pnum determines  the  rank  of a  process  as a function  of  its
+ * coordinates in the grid.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * MYROW   (local input)                 const int
+ *         On entry,  MYROW  specifies the row coordinate of the process
+ *         whose rank is to be determined. MYROW must be greater than or
+ *         equal to zero and less than NPROW.
+ *
+ * MYCOL   (local input)                 const int
+ *         On entry,  MYCOL  specifies  the  column  coordinate  of  the
+ *         process whose rank is to be determined. MYCOL must be greater
+ *         than or equal to zero and less than NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   if( GRID->order == HPL_ROW_MAJOR )
+      return( MYROW * GRID->npcol + MYCOL );
+   else
+      return( MYCOL * GRID->nprow + MYROW );
+/*
+ * End of HPL_pnum
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_pnum.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_pnum.o
new file mode 100644
index 000000000..8da27eae3
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_pnum.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_reduce.c
new file mode 100644
index 000000000..417c21163
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_reduce.c
@@ -0,0 +1,179 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_reduce
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const HPL_T_OP                   OP,
+   const int                        ROOT,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_reduce
+( BUFFER, COUNT, DTYPE, OP, ROOT, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const HPL_T_OP                   OP;
+   const int                        ROOT;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_reduce performs a global reduce operation across all processes of
+ * a group.  Note that the input buffer is  used as workarray and in all
+ * processes but the accumulating process corrupting the original data.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/output)          void *
+ *         On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+ *         exit,  and  in process of rank  ROOT  this array contains the
+ *         reduced data.  This  buffer  is also used as workspace during
+ *         the operation in the other processes of the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * OP      (global input)                const HPL_T_OP 
+ *         On entry, OP is a pointer to the local combine function.
+ *
+ * ROOT    (global input)                const int
+ *         On entry, ROOT is the coordinate of the accumulating process.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Status                 status;
+   void                       * buffer = NULL;
+   int                        hplerr=MPI_SUCCESS, d=1, i, ip2=1, mask=0,
+                              mpierr, mydist, partner, rank, size, 
+                              tag = MSGID_BEGIN_COLL;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( COUNT <= 0 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_size( COMM, &size );
+   if( size  == 1 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_rank( COMM, &rank );
+   i = size - 1; while( i > 1 ) { i >>= 1; d++; }
+
+   if( DTYPE == HPL_INT )
+      buffer = (void *)( (int *)   malloc( (size_t)(COUNT) * 
+                                           sizeof( int    ) ) );
+   else
+      buffer = (void *)( (double *)malloc( (size_t)(COUNT) *
+                                           sizeof( double ) ) );
+
+   if( !( buffer ) )
+   { HPL_pabort( __LINE__, "HPL_reduce", "Memory allocation failed" ); }
+
+   if( ( mydist = MModSub( rank, ROOT, size ) ) == 0 )
+   {
+      do
+      {
+         mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                            MModAdd( ROOT, ip2, size ), tag, COMM,
+                            &status );
+         if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+         OP( COUNT, buffer, BUFFER, DTYPE );
+         ip2 <<= 1; d--;
+      } while( d );
+   }
+   else
+   {
+      do
+      {
+         if( ( mydist & mask ) == 0 )
+         {
+            partner = mydist ^ ip2;
+
+            if( mydist & ip2 )
+            {
+               partner = MModAdd( ROOT, partner, size );
+               mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                  partner, tag, COMM );
+            }
+            else if( partner < size )
+            {
+               partner = MModAdd( ROOT, partner, size );
+               mpierr  = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                   partner, tag, COMM, &status );
+               OP( COUNT, buffer, BUFFER, DTYPE );
+            }
+            if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+         }
+         mask ^= ip2; ip2 <<= 1; d--;
+      } while( d );
+   }
+   if( buffer ) free( buffer );
+
+   return( hplerr );
+/*
+ * End of HPL_reduce
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_reduce.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_reduce.o
new file mode 100644
index 000000000..d731bed78
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_reduce.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_sum.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_sum.c
new file mode 100644
index 000000000..34cf87210
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_sum.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_sum
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_sum
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_sum combines (sum) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] += a[i];
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] += a[i];
+   }
+/*
+ * End of HPL_sum
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_sum.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_sum.o
new file mode 100644
index 000000000..99df49676
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_sum.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_disp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_disp.c
new file mode 100644
index 000000000..757dad242
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_disp.c
@@ -0,0 +1,97 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pdpanel_disp
+(
+   HPL_T_panel * *                  PANEL
+)
+#else
+int HPL_pdpanel_disp
+( PANEL )
+   HPL_T_panel * *                  PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_disp deallocates  the  panel  structure  and  resources  and
+ * stores the error code returned by the panel factorization.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel * *
+ *         On entry,  PANEL  points  to  the  address  of the panel data
+ *         structure to be deallocated.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        mpierr;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Deallocate the panel resources and panel structure
+ */
+   mpierr = HPL_pdpanel_free( *PANEL );
+   if( *PANEL ) free( *PANEL );
+   *PANEL = NULL;
+
+   return( mpierr );
+/*
+ * End of HPL_pdpanel_disp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_disp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_disp.o
new file mode 100644
index 000000000..22d8bd1b5
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_disp.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_free.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_free.c
new file mode 100644
index 000000000..38b5b0d97
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_free.c
@@ -0,0 +1,104 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pdpanel_free
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_pdpanel_free
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_free deallocates  the panel resources  and  stores the error
+ * code returned by the panel factorization.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points  to  the  panel data  structure from
+ *         which the resources should be deallocated.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->pmat->info == 0 ) PANEL->pmat->info = *(PANEL->DINFO);
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( PANEL->L1block, VSIP_TRUE );
+   (void) vsip_blockrelease_d( PANEL->L2block, VSIP_TRUE );
+   if( PANEL->grid->nprow > 1 )
+      (void) vsip_blockrelease_d( PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Destroy blocks
+ */
+   vsip_blockdestroy_d( PANEL->L1block );
+   vsip_blockdestroy_d( PANEL->L2block );
+   if( PANEL->grid->nprow > 1 )
+      vsip_blockdestroy_d( PANEL->Ublock );
+#endif
+
+   if( PANEL->WORK  ) free( PANEL->WORK  );
+   if( PANEL->IWORK ) free( PANEL->IWORK );
+
+   return( MPI_SUCCESS );
+/*
+ * End of HPL_pdpanel_free
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_free.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_free.o
new file mode 100644
index 000000000..66da88393
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_free.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_init.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_init.c
new file mode 100644
index 000000000..9e35c7fb4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_init.c
@@ -0,0 +1,348 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_pdpanel_init
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        M,
+   const int                        N,
+   const int                        JB,
+   HPL_T_pmat *                     A,
+   const int                        IA,
+   const int                        JA,
+   const int                        TAG,
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_pdpanel_init
+( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        M;
+   const int                        N;
+   const int                        JB;
+   HPL_T_pmat *                     A;
+   const int                        IA;
+   const int                        JA;
+   const int                        TAG;
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_init initializes a panel data structure.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the global number of rows of the panel.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  global number of columns of the
+ *         panel and trailing submatrix. N must be at least zero.
+ *
+ * JB      (global input)                const int
+ *         On entry, JB specifies is the number of columns of the panel.
+ *         JB must be at least zero.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * IA      (global input)                const int
+ *         On entry,  IA  is  the global row index identifying the panel
+ *         and trailing submatrix. IA must be at least zero.
+ *
+ * JA      (global input)                const int
+ *         On entry, JA is the global column index identifying the panel
+ *         and trailing submatrix. JA must be at least zero.
+ *
+ * TAG     (global input)                const int
+ *         On entry, TAG is the row broadcast message id.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   size_t                     dalign;
+   int                        icurcol, icurrow, ii, itmp1, jj, lwork,
+                              ml2, mp, mycol, myrow, nb, npcol, nprow,
+                              nq, nu;
+/* ..
+ * .. Executable Statements ..
+ */
+   PANEL->grid    = GRID;                  /* ptr to the process grid */
+   PANEL->algo    = ALGO;               /* ptr to the algo parameters */
+   PANEL->pmat    = A;                 /* ptr to the local array info */
+
+   myrow = GRID->myrow; mycol = GRID->mycol;
+   nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb;
+
+   HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol,
+                nprow, npcol, &ii, &jj, &icurrow, &icurcol );
+   mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow );
+   nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol );
+                                         /* ptr to trailing part of A */
+   PANEL->A       = Mptr( (double *)(A->A), ii, jj, A->ld );
+/*
+ * Workspace pointers are initialized to NULL.
+ */
+   PANEL->WORK    = NULL; PANEL->L2      = NULL; PANEL->L1      = NULL;
+   PANEL->DPIV    = NULL; PANEL->DINFO   = NULL; PANEL->U       = NULL;
+   PANEL->IWORK   = NULL;
+/*
+ * Local lengths, indexes process coordinates
+ */
+   PANEL->nb      = nb;               /* distribution blocking factor */
+   PANEL->jb      = JB;                                /* panel width */
+   PANEL->m       = M;      /* global # of rows of trailing part of A */
+   PANEL->n       = N;      /* global # of cols of trailing part of A */
+   PANEL->ia      = IA;     /* global row index of trailing part of A */
+   PANEL->ja      = JA;     /* global col index of trailing part of A */
+   PANEL->mp      = mp;      /* local # of rows of trailing part of A */
+   PANEL->nq      = nq;      /* local # of cols of trailing part of A */
+   PANEL->ii      = ii;      /* local row index of trailing part of A */
+   PANEL->jj      = jj;      /* local col index of trailing part of A */
+   PANEL->lda     = A->ld;            /* local leading dim of array A */
+   PANEL->prow    = icurrow; /* proc row owning 1st row of trailing A */
+   PANEL->pcol    = icurcol; /* proc col owning 1st col of trailing A */
+   PANEL->msgid   = TAG;     /* message id to be used for panel bcast */
+/*
+ * Initialize  ldl2 and len to temporary dummy values and Update tag for
+ * next panel
+ */
+   PANEL->ldl2    = 0;               /* local leading dim of array L2 */
+   PANEL->len     = 0;           /* length of the buffer to broadcast */
+/*
+ * Figure out the exact amount of workspace  needed by the factorization
+ * and the update - Allocate that space - Finish the panel data structu-
+ * re initialization.
+ *
+ * L1:    JB x JB in all processes
+ * DPIV:  JB      in all processes
+ * DINFO: 1       in all processes
+ *
+ * We make sure that those three arrays are contiguous in memory for the
+ * later panel broadcast.  We  also  choose  to put this amount of space 
+ * right  after  L2 (when it exist) so that one can receive a contiguous
+ * buffer.
+ */
+   dalign = ALGO->align * sizeof( double );
+
+   if( npcol == 1 )                             /* P x 1 process grid */
+   {                                     /* space for L1, DPIV, DINFO */
+      lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 );
+      if( nprow > 1 )                                 /* space for U */
+      { nu = nq - JB; lwork += JB * Mmax( 0, nu ); }
+
+      if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * 
+                                           sizeof( double ) ) ) )
+      {
+         HPL_pabort( __LINE__, "HPL_pdpanel_init",
+                     "Memory allocation failed" );
+      }
+/*
+ * Initialize the pointers of the panel structure  -  Always re-use A in
+ * the only process column
+ */
+      PANEL->L2    = PANEL->A + ( myrow == icurrow ? JB : 0 );
+      PANEL->ldl2  = A->ld;
+      PANEL->L1    = (double *)HPL_PTR( PANEL->WORK, dalign );
+      PANEL->DPIV  = PANEL->L1    + JB * JB;
+      PANEL->DINFO = PANEL->DPIV + JB;       *(PANEL->DINFO) = 0.0;
+      PANEL->U     = ( nprow > 1 ? PANEL->DINFO + 1: NULL );
+   }
+   else
+   {                                        /* space for L2, L1, DPIV */
+      ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 );
+      PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 );
+#ifdef HPL_COPY_L
+      lwork = ALGO->align + PANEL->len;
+#else
+      lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len );
+#endif
+      if( nprow > 1 )                                 /* space for U */
+      { 
+         nu = ( mycol == icurcol ? nq - JB : nq );
+         lwork += JB * Mmax( 0, nu );
+      }
+
+      if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) *
+                                           sizeof( double ) ) ) )
+      {
+         HPL_pabort( __LINE__, "HPL_pdpanel_init",
+                     "Memory allocation failed" );
+      }
+/*
+ * Initialize the pointers of the panel structure - Re-use A in the cur-
+ * rent process column when HPL_COPY_L is not defined.
+ */
+#ifdef HPL_COPY_L
+      PANEL->L2    = (double *)HPL_PTR( PANEL->WORK, dalign );
+      PANEL->ldl2  = Mmax( 1, ml2 );
+      PANEL->L1    = PANEL->L2 + ml2 * JB;
+#else
+      if( mycol == icurcol )
+      {
+         PANEL->L2   = PANEL->A + ( myrow == icurrow ? JB : 0 );
+         PANEL->ldl2 = A->ld;
+         PANEL->L1   = (double *)HPL_PTR( PANEL->WORK, dalign );
+      }
+      else
+      {
+         PANEL->L2   = (double *)HPL_PTR( PANEL->WORK, dalign );
+         PANEL->ldl2 = Mmax( 1, ml2 );
+         PANEL->L1   = PANEL->L2 + ml2 * JB;
+      } 
+#endif
+      PANEL->DPIV  = PANEL->L1   + JB * JB;
+      PANEL->DINFO = PANEL->DPIV + JB;     *(PANEL->DINFO) = 0.0;
+      PANEL->U     = ( nprow > 1 ? PANEL->DINFO + 1 : NULL );
+   }
+#ifdef HPL_CALL_VSIPL
+   PANEL->Ablock  = A->block;
+/*
+ * Create blocks and bind them to the data pointers
+ */
+   PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1),
+                                      (vsip_length)(JB*JB), VSIP_MEM_NONE );
+   PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2),
+                                      (vsip_length)(PANEL->ldl2*JB),
+                                      VSIP_MEM_NONE );
+   if( nprow > 1 )
+   { 
+      nu = ( mycol == icurcol ? nq - JB : nq );
+      PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U),
+                                        (vsip_length)(JB * Mmax( 0, nu )),
+                                        VSIP_MEM_NONE );
+   }
+   else { PANEL->Ublock = A->block; }
+#endif
+/*
+ * If nprow is 1, we just allocate an array of JB integers for the swap.
+ * When nprow > 1, we allocate the space for the index arrays immediate-
+ * ly. The exact size of this array depends on the swapping routine that
+ * will be used, so we allocate the maximum:
+ *
+ *    IWORK[0] is of size at most 1      +
+ *    IPL      is of size at most 1      +
+ *    IPID     is of size at most 4 * JB +
+ *
+ *    For HPL_pdlaswp00:
+ *       lindxA   is of size at most 2 * JB +
+ *       lindxAU  is of size at most 2 * JB +
+ *       llen     is of size at most NPROW  +
+ *       llen_sv  is of size at most NPROW.
+ *
+ *    For HPL_pdlaswp01:
+ *       ipA      is of size ar most 1      +
+ *       lindxA   is of size at most 2 * JB +
+ *       lindxAU  is of size at most 2 * JB +
+ *       iplen    is of size at most NPROW  + 1 +
+ *       ipmap    is of size at most NPROW  +
+ *       ipmapm1  is of size at most NPROW  +
+ *       permU    is of size at most JB     +
+ *       iwork    is of size at most MAX( 2*JB, NPROW+1 ).
+ *
+ * that is  3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1))
+ *       =  4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ).
+ *
+ * We use the fist entry of this to work array  to indicate  whether the
+ * the  local  index arrays have already been computed,  and if yes,  by
+ * which function:
+ *    IWORK[0] = -1: no index arrays have been computed so far;
+ *    IWORK[0] =  0: HPL_pdlaswp00 already computed those arrays;
+ *    IWORK[0] =  1: HPL_pdlaswp01 already computed those arrays;
+ * This allows to save some redundant and useless computations.
+ */
+   if( nprow == 1 ) { lwork = JB; }
+   else             
+   {
+      itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork );
+      lwork = 4 + (9 * JB) + (3 * nprow) + itmp1;
+   }
+
+   PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) );
+
+   if( PANEL->IWORK == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); }
+                       /* Initialize the first entry of the workarray */
+   *(PANEL->IWORK) = -1;
+/*
+ * End of HPL_pdpanel_init
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_init.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_init.o
new file mode 100644
index 000000000..8e9fd1360
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_init.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_new.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_new.c
new file mode 100644
index 000000000..1dbd8a18f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_new.c
@@ -0,0 +1,152 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanel_new
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        M,
+   const int                        N,
+   const int                        JB,
+   HPL_T_pmat *                     A,
+   const int                        IA,
+   const int                        JA,
+   const int                        TAG,
+   HPL_T_panel * *                  PANEL
+)
+#else
+void HPL_pdpanel_new
+( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        M;
+   const int                        N;
+   const int                        JB;
+   HPL_T_pmat *                     A;
+   const int                        IA;
+   const int                        JA;
+   const int                        TAG;
+   HPL_T_panel * *                  PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_new creates and initializes a panel data structure.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the global number of rows of the panel.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  global number of columns of the
+ *         panel and trailing submatrix. N must be at least zero.
+ *
+ * JB      (global input)                const int
+ *         On entry, JB specifies is the number of columns of the panel.
+ *         JB must be at least zero.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * IA      (global input)                const int
+ *         On entry,  IA  is  the global row index identifying the panel
+ *         and trailing submatrix. IA must be at least zero.
+ *
+ * JA      (global input)                const int
+ *         On entry, JA is the global column index identifying the panel
+ *         and trailing submatrix. JA must be at least zero.
+ *
+ * TAG     (global input)                const int
+ *         On entry, TAG is the row broadcast message id.
+ *
+ * PANEL   (local input/output)          HPL_T_panel * *
+ *         On entry,  PANEL  points  to  the  address  of the panel data
+ *         structure to create and initialize.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * p = NULL;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Allocate the panel structure - Check for enough memory
+ */
+   if( !( p = (HPL_T_panel *)malloc( sizeof( HPL_T_panel ) ) ) )
+   {
+      HPL_pabort( __LINE__, "HPL_pdpanel_new", "Memory allocation failed" );
+   }
+
+   HPL_pdpanel_init( GRID, ALGO, M, N, JB, A, IA, JA, TAG, p );
+   *PANEL = p;
+/*
+ * End of HPL_pdpanel_new
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_new.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_new.o
new file mode 100644
index 000000000..b63cf0f8a
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_new.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp00N.c
new file mode 100644
index 000000000..7ad5a1a99
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp00N.c
@@ -0,0 +1,198 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP00N_DEPTH
+#define    HPL_LASWP00N_DEPTH       32
+#define    HPL_LASWP00N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp00N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int *                      IPIV
+)
+#else
+void HPL_dlaswp00N
+( M, N, A, LDA, IPIV )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int *                      IPIV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp00N performs a series of local row interchanges on a matrix
+ * A. One row interchange is initiated for rows 0 through M-1 of A.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the number of rows of the array A to be
+ *         interchanged. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies  the number of columns of the array A.
+ *         N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A  points to an array of dimension (LDA,N) to which
+ *         the row interchanges will be  applied.  On exit, the permuted
+ *         matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * IPIV    (local input)                 const int *
+ *         On entry,  IPIV  is  an  array of size  M  that  contains the
+ *         pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+ *         implies that local rows k and l are to be interchanged.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register double            r;
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP00N_LOG2_DEPTH );
+   int                        ip, nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP00N_LOG2_DEPTH )
+                          << HPL_LASWP00N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP00N_DEPTH, A += incA )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         if( i != ( ip = IPIV[i] ) )
+         {
+            a0 = A + i; a1 = A + ip;
+
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#if ( HPL_LASWP00N_DEPTH >  1 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  2 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  4 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  8 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH > 16 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+         }
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         if( i != ( ip = IPIV[i] ) )
+         {
+            a0 = A + i; a1 = A + ip;
+            for( j = 0; j < nr; j++, a0 += LDA, a1 += LDA )
+            { r = *a0; *a0 = *a1; *a1 = r; }
+         }
+      }
+   }
+/*
+ * End of HPL_dlaswp00N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp00N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp00N.o
new file mode 100644
index 000000000..acc66206b
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp00N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01N.c
new file mode 100644
index 000000000..786d1eff4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01N.c
@@ -0,0 +1,209 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP01N_DEPTH
+#define    HPL_LASWP01N_DEPTH      32
+#define    HPL_LASWP01N_LOG2_DEPTH  5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp01N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp01N
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp01N copies  scattered rows  of  A  into itself  and into an
+ * array  U.  The row offsets in  A  of the source rows are specified by
+ * LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+ * positive value of  LINDXAU indicates that the array destination is U,
+ * and A otherwise.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         moved within A or copied into U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         moved within A or copied into U. N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be moved within A or
+ *         copied into U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,N). The rows
+ *         of A specified by LINDXA are be copied within this array U at
+ *         the positions indicated by positive values of LINDXAU.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local  row indexes  of  A  that should be moved within  A  or
+ *         or copied into U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local  row indexes of  U  where the rows of  A  should be
+ *         copied at. This array also contains the  local row offsets in
+ *         A where some of the rows of A should be moved to.  A positive
+ *         value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+ *         should be copied into U at the position LINDXAU[i]; otherwise
+ *         the row  LINDXA[i]  of  A  should be moved  at  the  position
+ *         -LINDXAU[i] within A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP01N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP01N_LOG2_DEPTH );
+   int                        lda1, nu, nr;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01N_LOG2_DEPTH ) <<
+                            HPL_LASWP01N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP01N_DEPTH, A += incA, U += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
+         else                  { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
+
+         *a1 = *a0; a1 += lda1; a0 += LDA;
+#if ( HPL_LASWP01N_DEPTH >  1 )
+         *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  2 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  4 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  8 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH > 16 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
+         else                  { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
+         for( j = 0; j < nr; j++, a1 += lda1, a0 += LDA ) { *a1 = *a0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp01N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01N.o
new file mode 100644
index 000000000..8722bb62c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01T.c
new file mode 100644
index 000000000..429cfb6f2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01T.c
@@ -0,0 +1,252 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP01T_DEPTH
+#define    HPL_LASWP01T_DEPTH       32
+#define    HPL_LASWP01T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp01T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp01T
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp01T copies  scattered rows  of  A  into itself  and into an
+ * array U.  The row offsets in  A  of the source rows  are specified by
+ * LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+ * positive value of LINDXAU indicates that the array  destination is U,
+ * and A otherwise. Rows of A are stored as columns in U.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         moved within A or copied into U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         moved within A or copied into U. N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be moved within A or
+ *         copied into U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,M). The rows
+ *         of A specified by  LINDXA  are copied within this array  U at
+ *         the  positions indicated by positive values of LINDXAU.  The
+ *         rows of A are stored as columns in U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local  row indexes  of  A  that should be moved within  A  or
+ *         or copied into U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local  row indexes of  U  where the rows of  A  should be
+ *         copied at. This array also contains the  local row offsets in
+ *         A where some of the rows of A should be moved to.  A positive
+ *         value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+ *         should be copied into U at the position LINDXAU[i]; otherwise
+ *         the row  LINDXA[i]  of  A  should be moved  at  the  position
+ *         -LINDXAU[i] within A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP01T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP01T_LOG2_DEPTH );
+   int                        nu, nr;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01T_LOG2_DEPTH ) <<
+                            HPL_LASWP01T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP01T_DEPTH, A += incA, U += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+
+         if( LINDXAU[i] >= 0 )
+         {
+            a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+
+            a1[ 0] = *a0; a0 += LDA;
+#if ( HPL_LASWP01T_DEPTH >  1 )
+            a1[ 1] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  2 )
+            a1[ 2] = *a0; a0 += LDA; a1[ 3] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  4 )
+            a1[ 4] = *a0; a0 += LDA; a1[ 5] = *a0; a0 += LDA;
+            a1[ 6] = *a0; a0 += LDA; a1[ 7] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  8 )
+            a1[ 8] = *a0; a0 += LDA; a1[ 9] = *a0; a0 += LDA;
+            a1[10] = *a0; a0 += LDA; a1[11] = *a0; a0 += LDA;
+            a1[12] = *a0; a0 += LDA; a1[13] = *a0; a0 += LDA;
+            a1[14] = *a0; a0 += LDA; a1[15] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH > 16 )
+            a1[16] = *a0; a0 += LDA; a1[17] = *a0; a0 += LDA;
+            a1[18] = *a0; a0 += LDA; a1[19] = *a0; a0 += LDA;
+            a1[20] = *a0; a0 += LDA; a1[21] = *a0; a0 += LDA;
+            a1[22] = *a0; a0 += LDA; a1[23] = *a0; a0 += LDA;
+            a1[24] = *a0; a0 += LDA; a1[25] = *a0; a0 += LDA;
+            a1[26] = *a0; a0 += LDA; a1[27] = *a0; a0 += LDA;
+            a1[28] = *a0; a0 += LDA; a1[29] = *a0; a0 += LDA;
+            a1[30] = *a0; a0 += LDA; a1[31] = *a0; a0 += LDA;
+#endif
+         }
+         else
+         {
+            a1 = A - (size_t)(LINDXAU[i]);
+
+            *a1 = *a0; a1 += LDA; a0 += LDA;
+#if ( HPL_LASWP01T_DEPTH >  1 )
+            *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  2 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  4 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  8 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH > 16 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+         }
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+
+         if( LINDXAU[i] >= 0 )
+         {
+            a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+            for( j = 0; j < nr; j++, a0 += LDA ) { a1[j] = *a0; }
+         }
+         else
+         {
+            a1 = A - (size_t)(LINDXAU[i]);
+            for( j = 0; j < nr; j++, a1 += LDA, a0 += LDA ) { *a1 = *a0; }
+         }
+      }
+   }
+/*
+ * End of HPL_dlaswp01T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01T.o
new file mode 100644
index 000000000..8061746bb
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp02N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp02N.c
new file mode 100644
index 000000000..45c2f5f1f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp02N.c
@@ -0,0 +1,205 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP02N_DEPTH
+#define    HPL_LASWP02N_DEPTH       32
+#define    HPL_LASWP02N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp02N
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         W0,
+   double *                         W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp02N
+( M, N, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         W0;
+   double *                         W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp02N packs scattered rows of an array  A  into workspace  W.
+ * The row offsets in A are specified by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         copied into W. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         copied into W. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be copied into W.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * W0      (local input/output)          double *
+ *         On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local output)                double *
+ *         On entry, W  is an array of size (LDW,M). On exit, W contains
+ *         the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+ *         in W(:,i).
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied into W.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M  that  contains
+ *         the local  row indexes of  U that should be copied into A and
+ *         replaced by the rows of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * A0 = A, * a0;
+   double                     * w0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP02N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   for( i = 0; i < M; i++ ) 
+      *(W0+(size_t)(i)*(size_t)(LDW)) = (double)(LINDXAU[i]);
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP02N_LOG2_DEPTH ) <<
+                          HPL_LASWP02N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP02N_DEPTH, A0 += incA, W += HPL_LASWP02N_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW);
+
+         w0[ 0] = *a0; a0 += LDA;
+#if ( HPL_LASWP02N_DEPTH >  1 )
+         w0[ 1] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  2 )
+         w0[ 2] = *a0; a0 += LDA; w0[ 3] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  4 )
+         w0[ 4] = *a0; a0 += LDA; w0[ 5] = *a0; a0 += LDA;
+         w0[ 6] = *a0; a0 += LDA; w0[ 7] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  8 )
+         w0[ 8] = *a0; a0 += LDA; w0[ 9] = *a0; a0 += LDA;
+         w0[10] = *a0; a0 += LDA; w0[11] = *a0; a0 += LDA;
+         w0[12] = *a0; a0 += LDA; w0[13] = *a0; a0 += LDA;
+         w0[14] = *a0; a0 += LDA; w0[15] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH > 16 )
+         w0[16] = *a0; a0 += LDA; w0[17] = *a0; a0 += LDA;
+         w0[18] = *a0; a0 += LDA; w0[19] = *a0; a0 += LDA;
+         w0[20] = *a0; a0 += LDA; w0[21] = *a0; a0 += LDA;
+         w0[22] = *a0; a0 += LDA; w0[23] = *a0; a0 += LDA;
+         w0[24] = *a0; a0 += LDA; w0[25] = *a0; a0 += LDA;
+         w0[26] = *a0; a0 += LDA; w0[27] = *a0; a0 += LDA;
+         w0[28] = *a0; a0 += LDA; w0[29] = *a0; a0 += LDA;
+         w0[30] = *a0; a0 += LDA; w0[31] = *a0; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, a0 += LDA ) { w0[j] = *a0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp02N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp02N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp02N.o
new file mode 100644
index 000000000..22f23ffdc
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp02N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03N.c
new file mode 100644
index 000000000..760732a8d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03N.c
@@ -0,0 +1,194 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP03N_DEPTH
+#define    HPL_LASWP03N_DEPTH       32
+#define    HPL_LASWP03N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp03N
+(
+   const int                        M,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW
+)
+#else
+void HPL_dlaswp03N
+( M, N, U, LDU, W0, W, LDW )
+   const int                        M;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp03N copies columns of  W  into  rows  of an  array  U.  The
+ * destination in U of these columns contained in W is stored within W0.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies  the  number  of columns of  W  stored
+ *         contiguously that should be copied into U. M must be at least
+ *         zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  length of columns of  W  stored
+ *         contiguously that should be copied into U. N must be at least
+ *         zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,N).  Columns
+ *         of W are copied as rows within this array U at  the positions
+ *         specified in W0.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M),  that contains data
+ *         to be copied into U. For i in [0..M),  entries W(:,i)  should
+ *         be copied into the row or column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * u0;
+   const int                  incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP03N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03N_LOG2_DEPTH ) <<
+                          HPL_LASWP03N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP03N_DEPTH, U += incU, w += HPL_LASWP03N_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); 
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *u0 = w0[ 0]; u0 += LDU;
+#if ( HPL_LASWP03N_DEPTH >  1 )
+         *u0 = w0[ 1]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  2 )
+         *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  4 )
+         *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU;
+         *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  8 )
+         *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU;
+         *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU;
+         *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU;
+         *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH > 16 )
+         *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU;
+         *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU;
+         *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU;
+         *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU;
+         *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU;
+         *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU;
+         *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU;
+         *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); 
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp03N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03N.o
new file mode 100644
index 000000000..dd84de51f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03T.c
new file mode 100644
index 000000000..fece692ce
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03T.c
@@ -0,0 +1,186 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP03T_DEPTH
+#define    HPL_LASWP03T_DEPTH       32
+#define    HPL_LASWP03T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp03T
+(
+   const int                        M,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW
+)
+#else
+void HPL_dlaswp03T
+( M, N, U, LDU, W0, W, LDW )
+   const int                        M;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp03T copies  columns of W into an array U.  The  destination
+ * in U of these columns contained in W is stored within W0.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies  the  number  of columns of  W  stored
+ *         contiguously that should be copied into U. M must be at least
+ *         zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  length of columns of  W  stored
+ *         contiguously that should be copied into U. N must be at least
+ *         zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,M).  Columns
+ *         of W are copied within the array U at the positions specified
+ *         in W0.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M),  that contains data
+ *         to be copied into U. For i in [0..M),  entries W(:,i)  should
+ *         be copied into the row or column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0; 
+   double                     * u0;
+   const int                  incU = ( 1 << HPL_LASWP03T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03T_LOG2_DEPTH ) <<
+                          HPL_LASWP03T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP03T_DEPTH, U += incU, w += HPL_LASWP03T_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         u0[ 0] = w0[ 0];
+#if ( HPL_LASWP03T_DEPTH >  1 )
+         u0[ 1] = w0[ 1];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  2 )
+         u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  4 )
+         u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  8 )
+         u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11];
+         u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15];
+#endif
+#if ( HPL_LASWP03T_DEPTH > 16 )
+         u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19];
+         u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23];
+         u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27];
+         u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31];
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp03T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03T.o
new file mode 100644
index 000000000..ef64f8ad7
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04N.c
new file mode 100644
index 000000000..4f9c490a5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04N.c
@@ -0,0 +1,285 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP04N_DEPTH
+#define    HPL_LASWP04N_DEPTH       32
+#define    HPL_LASWP04N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp04N
+(
+   const int                        M0,
+   const int                        M1,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   double *                         A,
+   const int                        LDA,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp04N
+( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M0;
+   const int                        M1;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   double *                         A;
+   const int                        LDA;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp04N copies M0 rows of U into A and replaces those rows of U
+ * with columns of W. In addition M1 - M0 columns of  W  are copied into
+ * rows of U.
+ *
+ * Arguments
+ * =========
+ *
+ * M0      (local input)                 const int
+ *         On entry, M0 specifies the number of rows of U that should be
+ *         copied into  A  and replaced by columns of  W.  M0 must be at
+ *         least zero.
+ *
+ * M1      (local input)                 const int
+ *         On entry, M1 specifies the number of columns of W that should
+ *         be copied into rows of U. M1 must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of U that should
+ *         be copied into A. N must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points to  an array of dimension (LDU,N).  This
+ *         array contains the rows that are to be copied into A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M1).
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M0).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M0+M1),  that  contains
+ *         data to be copied into U.  For i in [M0..M0+M1),  the entries
+ *         W(:,i) are copied into the row W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA  is an array of dimension  M0 containing the
+ *         local row indexes A into which rows of U are copied.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M0 that  contains
+ *         the local  row indexes of  U that should be copied into A and
+ *         replaced by the columns of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) << 
+                                            HPL_LASWP04N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP04N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04N_LOG2_DEPTH ) <<
+                          HPL_LASWP04N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP04N_DEPTH, A += incA, U += incU,
+        w += HPL_LASWP04N_DEPTH )
+   {
+      for( i =  0; i < M0; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         u0 = U + (size_t)(LINDXAU[i]);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *a0 = *u0; *u0 = w0[ 0]; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP04N_DEPTH >  1 )
+         *a0 = *u0; *u0 = w0[ 1]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  2 )
+         *a0 = *u0; *u0 = w0[ 2]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 3]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  4 )
+         *a0 = *u0; *u0 = w0[ 4]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 5]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 6]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 7]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  8 )
+         *a0 = *u0; *u0 = w0[ 8]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 9]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[10]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[11]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[12]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[13]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[14]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[15]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH > 16 )
+         *a0 = *u0; *u0 = w0[16]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[17]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[18]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[19]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[20]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[21]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[22]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[23]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[24]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[25]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[26]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[27]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[28]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[29]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[30]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[31]; a0 += LDA; u0 += LDU;
+#endif
+      }
+
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW)));
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *u0 = w0[ 0]; u0 += LDU;
+#if ( HPL_LASWP04N_DEPTH >  1 )
+         *u0 = w0[ 1]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  2 )
+         *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  4 )
+         *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU;
+         *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  8 )
+         *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU;
+         *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU;
+         *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU;
+         *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH > 16 )
+         *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU;
+         *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU;
+         *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU;
+         *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU;
+         *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU;
+         *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU;
+         *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU;
+         *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         u0 = U + (size_t)(LINDXAU[i]);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU )
+         { *a0 = *u0; *u0 = w0[j]; }
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW)));
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp04N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04N.o
new file mode 100644
index 000000000..d55277d49
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04T.c
new file mode 100644
index 000000000..9cbb4c863
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04T.c
@@ -0,0 +1,270 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP04T_DEPTH
+#define    HPL_LASWP04T_DEPTH       32
+#define    HPL_LASWP04T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp04T
+(
+   const int                        M0,
+   const int                        M1,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   double *                         A,
+   const int                        LDA,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp04T
+( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M0;
+   const int                        M1;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   double *                         A;
+   const int                        LDA;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp04T copies M0 columns of U into rows of A and replaces those
+ * columns of U with columns of W. In addition M1 - M0 columns of W  are
+ * copied into U.
+ *
+ * Arguments
+ * =========
+ *
+ * M0      (local input)                 const int
+ *         On entry, M0 specifies the number of columns of U that should
+ *         be copied into A and replaced by columns of W.  M0 must be at
+ *         least zero.
+ *
+ * M1      (local input)                 const int
+ *         On entry, M1 specifies  the number of columnns of W that will
+ *         be copied into U. M1 must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies the length of the columns of  U  that
+ *         will be copied into rows of A. N must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns that are to be copied into rows of
+ *         A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M0).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M0+M1),  that  contains
+ *         data to be copied into U.  For i in [M0..M0+M1),  the entries
+ *         W(:,i) are copied into the column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA  is an array of dimension  M0 containing the
+ *         local row indexes A into which columns of U are copied.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M0 that  contains
+ *         the  local column indexes of  U  that should be copied into A
+ *         and replaced by the columns of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP04T_LOG2_DEPTH ),
+                              incU = (   1 << HPL_LASWP04T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04T_LOG2_DEPTH ) <<
+                          HPL_LASWP04T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP04T_DEPTH, A += incA, U += incU,
+        w += HPL_LASWP04T_DEPTH )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW;
+
+         *a0 = u0[ 0]; u0[ 0] = w0[ 0]; a0 += LDA;
+#if ( HPL_LASWP04T_DEPTH >  1 )
+         *a0 = u0[ 1]; u0[ 1] = w0[ 1]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  2 )
+         *a0 = u0[ 2]; u0[ 2] = w0[ 2]; a0 += LDA;
+         *a0 = u0[ 3]; u0[ 3] = w0[ 3]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  4 )
+         *a0 = u0[ 4]; u0[ 4] = w0[ 4]; a0 += LDA;
+         *a0 = u0[ 5]; u0[ 5] = w0[ 5]; a0 += LDA;
+         *a0 = u0[ 6]; u0[ 6] = w0[ 6]; a0 += LDA;
+         *a0 = u0[ 7]; u0[ 7] = w0[ 7]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  8 )
+         *a0 = u0[ 8]; u0[ 8] = w0[ 8]; a0 += LDA;
+         *a0 = u0[ 9]; u0[ 9] = w0[ 9]; a0 += LDA;
+         *a0 = u0[10]; u0[10] = w0[10]; a0 += LDA;
+         *a0 = u0[11]; u0[11] = w0[11]; a0 += LDA;
+         *a0 = u0[12]; u0[12] = w0[12]; a0 += LDA;
+         *a0 = u0[13]; u0[13] = w0[13]; a0 += LDA;
+         *a0 = u0[14]; u0[14] = w0[14]; a0 += LDA;
+         *a0 = u0[15]; u0[15] = w0[15]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH > 16 )
+         *a0 = u0[16]; u0[16] = w0[16]; a0 += LDA;
+         *a0 = u0[17]; u0[17] = w0[17]; a0 += LDA;
+         *a0 = u0[18]; u0[18] = w0[18]; a0 += LDA;
+         *a0 = u0[19]; u0[19] = w0[19]; a0 += LDA;
+         *a0 = u0[20]; u0[20] = w0[20]; a0 += LDA;
+         *a0 = u0[21]; u0[21] = w0[21]; a0 += LDA;
+         *a0 = u0[22]; u0[22] = w0[22]; a0 += LDA;
+         *a0 = u0[23]; u0[23] = w0[23]; a0 += LDA;
+         *a0 = u0[24]; u0[24] = w0[24]; a0 += LDA;
+         *a0 = u0[25]; u0[25] = w0[25]; a0 += LDA;
+         *a0 = u0[26]; u0[26] = w0[26]; a0 += LDA;
+         *a0 = u0[27]; u0[27] = w0[27]; a0 += LDA;
+         *a0 = u0[28]; u0[28] = w0[28]; a0 += LDA;
+         *a0 = u0[29]; u0[29] = w0[29]; a0 += LDA;
+         *a0 = u0[30]; u0[30] = w0[30]; a0 += LDA;
+         *a0 = u0[31]; u0[31] = w0[31]; a0 += LDA;
+#endif
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW;
+
+         u0[ 0] = w0[ 0];
+#if ( HPL_LASWP04T_DEPTH >  1 )
+         u0[ 1] = w0[ 1];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  2 )
+         u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  4 )
+         u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  8 )
+         u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11];
+         u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15];
+#endif
+#if ( HPL_LASWP04T_DEPTH > 16 )
+         u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19];
+         u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23];
+         u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27];
+         u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31];
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW;
+         for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; u0[j] = w0[j]; }
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW;
+         for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp04T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04T.o
new file mode 100644
index 000000000..8382d0d39
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05N.c
new file mode 100644
index 000000000..3edcf91a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05N.c
@@ -0,0 +1,195 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP05N_DEPTH
+#define    HPL_LASWP05N_DEPTH       32
+#define    HPL_LASWP05N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp05N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const double *                   U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp05N
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const double *                   U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp05N copies rows of  U of global offset LINDXAU into rows of
+ * A at positions indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of U that should be
+ *         copied into A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of U that should
+ *         be copied into A. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          const double *
+ *         On entry,  U  points to an array of dimension  (LDU,N).  This
+ *         array contains the rows that are to be copied into A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied from U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local row indexes of U that should be copied in A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * U0 = U, * u0;
+   double                     * a0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP05N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP05N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05N_LOG2_DEPTH ) <<
+                            HPL_LASWP05N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP05N_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]);
+
+         *a0 = *u0; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP05N_DEPTH >  1 )
+         *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  2 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  4 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  8 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH > 16 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { *a0 = *u0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp05N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05N.o
new file mode 100644
index 000000000..cfedb5ffe
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05T.c
new file mode 100644
index 000000000..0adaa102d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05T.c
@@ -0,0 +1,196 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP05T_DEPTH
+#define    HPL_LASWP05T_DEPTH       32
+#define    HPL_LASWP05T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp05T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const double *                   U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp05T
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const double *                   U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp05T copies columns of  U of global offset LINDXAU into rows
+ * of A at positions indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the columns of U that will
+ *         be copied into rows of A. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          const double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns that are to be copied into rows of
+ *         A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied from U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local column indexes of U that should be copied in A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * U0 = U, * u0;
+   double                     * a0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP05T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP05T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05T_LOG2_DEPTH ) <<
+                            HPL_LASWP05T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP05T_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[ i]);
+         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+
+         *a0 = u0[ 0]; a0 += LDA;
+#if ( HPL_LASWP05T_DEPTH >  1 )
+         *a0 = u0[ 1]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  2 )
+         *a0 = u0[ 2]; a0 += LDA; *a0 = u0[ 3]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  4 )
+         *a0 = u0[ 4]; a0 += LDA; *a0 = u0[ 5]; a0 += LDA;
+         *a0 = u0[ 6]; a0 += LDA; *a0 = u0[ 7]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  8 )
+         *a0 = u0[ 8]; a0 += LDA; *a0 = u0[ 9]; a0 += LDA;
+         *a0 = u0[10]; a0 += LDA; *a0 = u0[11]; a0 += LDA;
+         *a0 = u0[12]; a0 += LDA; *a0 = u0[13]; a0 += LDA;
+         *a0 = u0[14]; a0 += LDA; *a0 = u0[15]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH > 16 )
+         *a0 = u0[16]; a0 += LDA; *a0 = u0[17]; a0 += LDA;
+         *a0 = u0[18]; a0 += LDA; *a0 = u0[19]; a0 += LDA;
+         *a0 = u0[20]; a0 += LDA; *a0 = u0[21]; a0 += LDA;
+         *a0 = u0[22]; a0 += LDA; *a0 = u0[23]; a0 += LDA;
+         *a0 = u0[24]; a0 += LDA; *a0 = u0[25]; a0 += LDA;
+         *a0 = u0[26]; a0 += LDA; *a0 = u0[27]; a0 += LDA;
+         *a0 = u0[28]; a0 += LDA; *a0 = u0[29]; a0 += LDA;
+         *a0 = u0[30]; a0 += LDA; *a0 = u0[31]; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[ i]);
+         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+         for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp05T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05T.o
new file mode 100644
index 000000000..f995aa8f5
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06N.c
new file mode 100644
index 000000000..a74bae75c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06N.c
@@ -0,0 +1,206 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP06N_DEPTH
+#define    HPL_LASWP06N_DEPTH       32
+#define    HPL_LASWP06N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp06N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA
+)
+#else
+void HPL_dlaswp06N
+( M, N, A, LDA, U, LDU, LINDXA )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp06N swaps rows of  U  with rows of A at positions
+ * indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         swapped with rows of U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of A that should
+ *         be swapped with rows of U. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows or columns of U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,N).  This
+ *         array contains the rows of U that are to be swapped with rows
+ *         of A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be swapped with U.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * U0 = U, * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP06N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP06N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06N_LOG2_DEPTH ) <<
+                            HPL_LASWP06N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP06N_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i);
+
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP06N_DEPTH >  1 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  2 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  4 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  8 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH > 16 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU )
+         { r = *a0; *a0 = *u0; *u0 = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp06N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06N.o
new file mode 100644
index 000000000..ccc0984e9
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06T.c
new file mode 100644
index 000000000..fb53c2a31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06T.c
@@ -0,0 +1,207 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP06T_DEPTH
+#define    HPL_LASWP06T_DEPTH       32
+#define    HPL_LASWP06T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp06T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA
+)
+#else
+void HPL_dlaswp06T
+( M, N, A, LDA, U, LDU, LINDXA )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp06T swaps  columns  of  U  with  rows  of  A  at  positions
+ * indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         swapped with columns of U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of A that should
+ *         be swapped with columns of U. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns of  U  that are to be swapped with
+ *         rows of A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be swapped with U.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * U0 = U, * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP06T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP06T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06T_LOG2_DEPTH ) <<
+                            HPL_LASWP06T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP06T_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[i]);
+         u0 = U0 + (size_t)(i) * (size_t)(LDU);
+
+         r = *a0; *a0 = u0[ 0]; u0[ 0] = r; a0 += LDA;
+#if ( HPL_LASWP06T_DEPTH >  1 )
+         r = *a0; *a0 = u0[ 1]; u0[ 1] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  2 )
+         r = *a0; *a0 = u0[ 2]; u0[ 2] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 3]; u0[ 3] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  4 )
+         r = *a0; *a0 = u0[ 4]; u0[ 4] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 5]; u0[ 5] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 6]; u0[ 6] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 7]; u0[ 7] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  8 )
+         r = *a0; *a0 = u0[ 8]; u0[ 8] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 9]; u0[ 9] = r; a0 += LDA;
+         r = *a0; *a0 = u0[10]; u0[10] = r; a0 += LDA;
+         r = *a0; *a0 = u0[11]; u0[11] = r; a0 += LDA;
+         r = *a0; *a0 = u0[12]; u0[12] = r; a0 += LDA;
+         r = *a0; *a0 = u0[13]; u0[13] = r; a0 += LDA;
+         r = *a0; *a0 = u0[14]; u0[14] = r; a0 += LDA;
+         r = *a0; *a0 = u0[15]; u0[15] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH > 16 )
+         r = *a0; *a0 = u0[16]; u0[16] = r; a0 += LDA;
+         r = *a0; *a0 = u0[17]; u0[17] = r; a0 += LDA;
+         r = *a0; *a0 = u0[18]; u0[18] = r; a0 += LDA;
+         r = *a0; *a0 = u0[19]; u0[19] = r; a0 += LDA;
+         r = *a0; *a0 = u0[20]; u0[20] = r; a0 += LDA;
+         r = *a0; *a0 = u0[21]; u0[21] = r; a0 += LDA;
+         r = *a0; *a0 = u0[22]; u0[22] = r; a0 += LDA;
+         r = *a0; *a0 = u0[23]; u0[23] = r; a0 += LDA;
+         r = *a0; *a0 = u0[24]; u0[24] = r; a0 += LDA;
+         r = *a0; *a0 = u0[25]; u0[25] = r; a0 += LDA;
+         r = *a0; *a0 = u0[26]; u0[26] = r; a0 += LDA;
+         r = *a0; *a0 = u0[27]; u0[27] = r; a0 += LDA;
+         r = *a0; *a0 = u0[28]; u0[28] = r; a0 += LDA;
+         r = *a0; *a0 = u0[29]; u0[29] = r; a0 += LDA;
+         r = *a0; *a0 = u0[30]; u0[30] = r; a0 += LDA;
+         r = *a0; *a0 = u0[31]; u0[31] = r; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[i]);
+         u0 = U0 + (size_t)(i) * (size_t)(LDU);
+         for( j = 0; j < nr; j++, a0 += LDA )
+         { r = *a0; *a0 = u0[j]; u0[j] = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp06T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06T.o
new file mode 100644
index 000000000..1e2d93537
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp10N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp10N.c
new file mode 100644
index 000000000..7dbf934f2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp10N.c
@@ -0,0 +1,186 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP10N_DEPTH
+#define    HPL_LASWP10N_DEPTH       32
+#define    HPL_LASWP10N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp10N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int *                      IPIV
+)
+#else
+void HPL_dlaswp10N
+( M, N, A, LDA, IPIV )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int *                      IPIV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp10N performs a sequence  of  local column interchanges on a
+ * matrix A.  One column interchange is initiated  for columns 0 through
+ * N-1 of A.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         __arg0__
+ *
+ * N       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of the array A. M
+ *         must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, N specifies the number of columns of the array A. N
+ *         must be at least zero.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, A  points to an  array of  dimension (LDA,N).  This
+ *         array contains the columns onto which the interchanges should
+ *         be applied. On exit, A contains the permuted matrix.
+ *
+ * IPIV    (local input)                 const int *
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * a0, * a1;
+   const int                  incA = ( 1 << HPL_LASWP10N_LOG2_DEPTH );
+   int                        jp, mr, mu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   mr = M - ( mu = (int)( ( (unsigned int)(M) >> HPL_LASWP10N_LOG2_DEPTH )
+                            << HPL_LASWP10N_LOG2_DEPTH ) );
+
+   for( j = 0; j < N; j++ )
+   {
+      if( j != ( jp = IPIV[j] ) )
+      {
+         a0 = A + j * LDA; a1 = A + jp * LDA;
+
+         for( i = 0; i < mu; i += incA, a0 += incA, a1 += incA )
+         {
+            r = *a0;    *a0    = *a1;    *a1    = r;
+#if ( HPL_LASWP10N_DEPTH >  1 )
+            r = a0[ 1]; a0[ 1] = a1[ 1]; a1[ 1] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  2 )
+            r = a0[ 2]; a0[ 2] = a1[ 2]; a1[ 2] = r;
+            r = a0[ 3]; a0[ 3] = a1[ 3]; a1[ 3] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  4 )
+            r = a0[ 4]; a0[ 4] = a1[ 4]; a1[ 4] = r;
+            r = a0[ 5]; a0[ 5] = a1[ 5]; a1[ 5] = r;
+            r = a0[ 6]; a0[ 6] = a1[ 6]; a1[ 6] = r;
+            r = a0[ 7]; a0[ 7] = a1[ 7]; a1[ 7] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  8 )
+            r = a0[ 8]; a0[ 8] = a1[ 8]; a1[ 8] = r;
+            r = a0[ 9]; a0[ 9] = a1[ 9]; a1[ 9] = r;
+            r = a0[10]; a0[10] = a1[10]; a1[10] = r;
+            r = a0[11]; a0[11] = a1[11]; a1[11] = r;
+            r = a0[12]; a0[12] = a1[12]; a1[12] = r;
+            r = a0[13]; a0[13] = a1[13]; a1[13] = r;
+            r = a0[14]; a0[14] = a1[14]; a1[14] = r;
+            r = a0[15]; a0[15] = a1[15]; a1[15] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH > 16 )
+            r = a0[16]; a0[16] = a1[16]; a1[16] = r;
+            r = a0[17]; a0[17] = a1[17]; a1[17] = r;
+            r = a0[18]; a0[18] = a1[18]; a1[18] = r;
+            r = a0[19]; a0[19] = a1[19]; a1[19] = r;
+            r = a0[20]; a0[20] = a1[20]; a1[20] = r;
+            r = a0[21]; a0[21] = a1[21]; a1[21] = r;
+            r = a0[22]; a0[22] = a1[22]; a1[22] = r;
+            r = a0[23]; a0[23] = a1[23]; a1[23] = r;
+            r = a0[24]; a0[24] = a1[24]; a1[24] = r;
+            r = a0[25]; a0[25] = a1[25]; a1[25] = r;
+            r = a0[26]; a0[26] = a1[26]; a1[26] = r;
+            r = a0[27]; a0[27] = a1[27]; a1[27] = r;
+            r = a0[28]; a0[28] = a1[28]; a1[28] = r;
+            r = a0[29]; a0[29] = a1[29]; a1[29] = r;
+            r = a0[30]; a0[30] = a1[30]; a1[30] = r;
+            r = a0[31]; a0[31] = a1[31]; a1[31] = r;
+#endif
+         }
+
+         for( i = 0; i < mr; i++ )
+         { r = a0[i]; a0[i] = a1[i]; a1[i] = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp10N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp10N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp10N.o
new file mode 100644
index 000000000..90f330e48
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp10N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2l.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2l.c
new file mode 100644
index 000000000..e1b5bbfac
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2l.c
@@ -0,0 +1,151 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxg2l
+(
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxg2l
+( IG, INB, NB, SRCPROC, NPROCS )
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2l computes  the local index of a matrix entry pointed to by
+ * the  global index IG.  This  local  returned index is the same in all
+ * processes.
+ *
+ * Arguments
+ * =========
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry, if SRCPROC = -1, the data  is not  distributed  but
+ *         replicated,  in  which  case  this  routine returns IG in all
+ *         processes. Otherwise, the value of SRCPROC is ignored.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      return( IG );
+/*
+ * IG  =  INB - NB + ( l * NPROCS + MYROC ) * NB + X  with  0 <= X < NB,
+ * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC
+ * with  0 <= MYROC < NPROCS.  The local index to be returned depends on 
+ * whether  IG  resides in the process owning the first partial block of
+ * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB,
+ * so that if NPROCS divides i+1, i.e. MYROC=0,  we have i+1 = l*NPROCS.
+ * If we set  j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is
+ * equal to (j+1) * NPROCS.  Conversely, if NPROCS does not divide  i+1, 
+ * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that
+ * j=l and thus (j+1)*NPROCS > i+1.
+ */
+   j = ( i = ( IG - INB ) / NB ) / NPROCS;
+/*
+ * When IG resides in the process owning the first partial block of size
+ * INB (MYROC = 0), then the result IL can be written as:
+ * IL = INB - NB + l * NB + X  = IG + ( l - (l * NPROCS + MYROC) ) * NB.
+ * Using the above notation,  we have i+1 = l*NPROCS + MYROC = l*NPROCS,
+ * i.e l = ( i+1 ) / NPROCS = j+1,  since  NPROCS divides i+1, therefore
+ * IL = IG + ( j + 1 - ( i + 1 ) ) * NB.
+ *
+ * Otherwise when MYROC >= 1, the result IL can be written as:
+ * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB.
+ * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1,
+ * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e
+ * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB.
+ */
+   return( NB * (j - i) + 
+           ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ) );
+/*
+ * End of HPL_indxg2l
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2l.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2l.o
new file mode 100644
index 000000000..e62d9ce51
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2l.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2lp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2lp.c
new file mode 100644
index 000000000..74662f9d2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2lp.c
@@ -0,0 +1,176 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_indxg2lp
+(
+   int *                            IL,
+   int *                            PROC,
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+void HPL_indxg2lp
+( IL, PROC, IG, INB, NB, SRCPROC, NPROCS )
+   int *                            IL;
+   int *                            PROC;
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2lp computes the local index of a matrix entry pointed to by
+ * the global  index IG as well as the process coordinate which posseses
+ * this entry. The local returned index is the same in all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * IL      (output)                      int *
+ *         On exit, IL specifies the local index corresponding to IG. IL
+ *         is at least zero.
+ *
+ * PROC    (output)                      int *
+ *         On exit,  PROC  is the  coordinate of the process  owning the
+ *         entry specified by the global index IG. PROC is at least zero
+ *         and less than NPROCS.
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry, if SRCPROC = -1, the data  is not  distributed  but
+ *         replicated,  in  which  case  this  routine returns IG in all
+ *         processes. Otherwise, the value of SRCPROC is ignored.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+   {
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      *IL   = IG;
+      *PROC = SRCPROC;
+   }
+   else
+   {
+/*
+ * IG  =  INB - NB + ( l * NPROCS + MYROC ) * NB + X  with  0 <= X < NB,
+ * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC
+ * with  0 <= MYROC < NPROCS.  The local index to be returned depends on
+ * whether  IG  resides in the process owning the first partial block of
+ * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB,
+ * so that if NPROCS divides i+1, i.e. MYROC=0,  we have i+1 = l*NPROCS.
+ * If we set  j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is
+ * equal to (j+1) * NPROCS.  Conversely, if NPROCS does not divide  i+1,
+ * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that
+ * j=l and thus (j+1)*NPROCS > i+1.
+ */
+      j = ( i = ( IG - INB ) / NB ) / NPROCS;
+/*
+ * IG  is in block  1 + ( IG - INB ) / NB.  Add this to SRCPROC and take
+ * the NPROCS modulo (definition of the block-cyclic data distribution).
+ */
+      *PROC = SRCPROC + 1 + i;
+      *PROC = MPosMod( *PROC, NPROCS );
+/*
+ * When IG resides in the process owning the first partial block of size
+ * INB (MYROC = 0), then the result IL can be written as:
+ * IL = INB - NB + l * NB + X  = IG + ( l - (l * NPROCS + MYROC) ) * NB.
+ * Using the above notation,  we have i+1 = l*NPROCS + MYROC = l*NPROCS,
+ * i.e l = ( i+1 ) / NPROCS = j+1,  since  NPROCS divides i+1, therefore
+ * IL = IG + ( j + 1 - ( i + 1 ) ) * NB.
+ *
+ * Otherwise when MYROC >= 1, the result IL can be written as:
+ * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB.
+ * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1,
+ * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e
+ * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB.
+ */
+      *IL = NB * (j - i) + 
+            ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG );
+   }
+/*
+ * End of HPL_indxg2lp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2lp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2lp.o
new file mode 100644
index 000000000..a5da4d443
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2lp.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2p.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2p.c
new file mode 100644
index 000000000..d0e75f516
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2p.c
@@ -0,0 +1,128 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxg2p
+(
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxg2p
+( IG, INB, NB, SRCPROC, NPROCS )
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2p computes the process coordinate  which posseses the entry
+ * of a matrix specified by a global index IG.
+ *
+ * Arguments
+ * =========
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        proc;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      return( SRCPROC );
+/*
+ * Otherwise,  IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC
+ * and take the NPROCS  modulo (definition of the block-cyclic data dis-
+ * tribution).
+ */
+   proc = SRCPROC + 1 + ( IG - INB ) / NB;
+   return( MPosMod( proc, NPROCS ) );
+/*
+ * End of HPL_indxg2p
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2p.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2p.o
new file mode 100644
index 000000000..59e79159d
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2p.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxl2g.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxl2g.c
new file mode 100644
index 000000000..7f139425a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxl2g.c
@@ -0,0 +1,164 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxl2g
+(
+   const int                        IL,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxl2g
+( IL, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        IL;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxl2g computes the global index of a matrix  entry  pointed to
+ * by the local index IL of the process indicated by PROC.
+ *
+ * Arguments
+ * =========
+ *
+ * IL      (input)                       const int
+ *         On entry, IL specifies the local  index of the matrix  entry.
+ *         IL must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC  specifies the coordinate of the process whose
+ *         local array row or column is to be determined. PROC  must  be
+ *         at least zero and strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+   {
+/*
+ * The data is not distributed, or there is just one process in this di-
+ * mension of the grid.
+ */
+      return( IL );
+   }
+   else if( PROC == SRCPROC )
+   {
+/*
+ * If I am SRCPROC, my first block is of size INB
+ */
+      if( IL < INB )
+/*
+ * If  IL  belongs to the first block,  the local and global indexes are
+ * equal.
+ */
+         return ( IL );
+/*
+ * The  number  of  entire  blocks  before  the  one  IL  belongs  to is
+ * ( IL - INB ) / NB + 1.  In  the other NPROCS-1 processes,  there  are
+ * thus NB*( ( IL-INB )/NB + 1 ) entries,  that are  globally before the
+ * global entry corresponding to IL.
+ */
+      return( ( NPROCS - 1 ) * NB * ( ( IL - INB ) / NB + 1 ) + IL );
+   }
+   else if( PROC < SRCPROC )
+   {
+/*
+ * Otherwise, the process of coordinate  MOD(SRCPROC+1, NPROCS) owns the
+ * second block. Let IPROC = PROC-SRCPROC-1+NPROCS be the number of pro-
+ * cesses between this process and  PROC  not  included  when going from
+ * left to right on the process line  with  possible wrap around.  These
+ * IPROC  processes have one more NB block than the other processes, who
+ * own IL / NB blocks of size NB.
+ */
+      return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1+NPROCS )+IL+INB );
+   }
+   else
+   {
+/*
+ * Same reasoning as above with IPROC = PROC - SRCPROC - 1.
+ */
+      return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1        )+IL+INB );
+   }
+/*
+ * End of HPL_indxl2g
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxl2g.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxl2g.o
new file mode 100644
index 000000000..739c73a85
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxl2g.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_infog2l.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_infog2l.c
new file mode 100644
index 000000000..2580f2ad4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_infog2l.c
@@ -0,0 +1,382 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_infog2l
+(
+   int                              I,
+   int                              J,
+   const int                        IMB,
+   const int                        MB,
+   const int                        INB,
+   const int                        NB,
+   const int                        RSRC,
+   const int                        CSRC,
+   const int                        MYROW,
+   const int                        MYCOL,
+   const int                        NPROW,
+   const int                        NPCOL,
+   int *                            II,
+   int *                            JJ,
+   int *                            PROW,
+   int *                            PCOL
+)
+#else
+void HPL_infog2l
+( I, J, IMB, MB, INB, NB, RSRC, CSRC, MYROW, MYCOL, NPROW, NPCOL, II, JJ, PROW, PCOL )
+   int                              I;
+   int                              J;
+   const int                        IMB;
+   const int                        MB;
+   const int                        INB;
+   const int                        NB;
+   const int                        RSRC;
+   const int                        CSRC;
+   const int                        MYROW;
+   const int                        MYCOL;
+   const int                        NPROW;
+   const int                        NPCOL;
+   int *                            II;
+   int *                            JJ;
+   int *                            PROW;
+   int *                            PCOL;
+#endif 
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_infog2l computes the starting local index II, JJ corresponding to
+ * the submatrix starting globally at the entry pointed by  I,  J.  This
+ * routine returns the coordinates in the grid of the process owning the
+ * matrix entry of global indexes I, J, namely PROW and PCOL.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                int
+ *         On entry,  I  specifies  the  global  row index of the matrix
+ *         entry. I must be at least zero.
+ *
+ * J       (global input)                int
+ *         On entry,  J  specifies the global column index of the matrix
+ *         entry. J must be at least zero.
+ *
+ * IMB     (global input)                const int
+ *         On entry,  IMB  specifies  the size of the first row block of
+ *         the global matrix. IMB must be at least one.
+ *
+ * MB      (global input)                const int
+ *         On entry,  MB specifies the blocking factor used to partition
+ *         and  distribute the rows of the matrix A.  MB  must be larger
+ *         than one.
+ *
+ * INB     (global input)                const int
+ *         On entry, INB specifies the size of the first column block of
+ *         the global matrix. INB must be at least one.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the columns of the matrix A. NB must be larger
+ *         than one.
+ *
+ * RSRC    (global input)                const int
+ *         On entry,  RSRC  specifies  the row coordinate of the process
+ *         that possesses the row  I.  RSRC  must  be at least zero  and
+ *         strictly less than NPROW.
+ *
+ * CSRC    (global input)                const int
+ *         On entry, CSRC specifies the column coordinate of the process
+ *         that possesses the column J. CSRC  must be at least zero  and
+ *         strictly less than NPCOL.
+ *
+ * MYROW   (local input)                 const int
+ *         On entry, MYROW  specifies my  row process  coordinate in the
+ *         grid. MYROW is greater than or equal  to zero  and  less than
+ *         NPROW.
+ *
+ * MYCOL   (local input)                 const int
+ *         On entry, MYCOL specifies my column process coordinate in the
+ *         grid. MYCOL is greater than or equal  to zero  and  less than
+ *         NPCOL.
+ *
+ * NPROW   (global input)                const int
+ *         On entry,  NPROW  specifies the number of process rows in the
+ *         grid. NPROW is at least one.
+ *
+ * NPCOL   (global input)                const int
+ *         On entry,  NPCOL  specifies  the number of process columns in
+ *         the grid. NPCOL is at least one.
+ *
+ * II      (local output)                int *
+ *         On exit, II  specifies the  local  starting  row index of the
+ *         submatrix. On exit, II is at least 0.
+ *
+ * JJ      (local output)                int *
+ *         On exit, JJ  specifies the local starting column index of the
+ *         submatrix. On exit, JJ is at least 0.
+ *
+ * PROW    (global output)               int *
+ *         On exit, PROW is the row coordinate of the process owning the
+ *         entry specified by the global index I.  PROW is at least zero
+ *         and less than NPROW.
+ *
+ * PCOL    (global output)               int *
+ *         On exit, PCOL  is the column coordinate of the process owning
+ *         the entry specified by the global index J.  PCOL  is at least
+ *         zero and less than NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int            ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc;
+/* ..
+ * .. Executable Statements ..
+ */
+   imb   = IMB;
+   *PROW = RSRC;
+
+   if( ( *PROW == -1 ) || ( NPROW == 1 ) )
+   {
+/*
+ * The data is not distributed,  or there is just one process row in the
+ * grid.
+ */
+     *II = I;
+   }
+   else if( I < imb )
+   {
+/*
+ * I refers to an entry in the first block of rows
+ */
+     *II = ( MYROW == *PROW ? I : 0 );
+   }
+   else
+   {
+      mb   = MB;
+      rsrc = *PROW;
+/*
+ * The discussion goes as follows:  compute  my distance from the source
+ * process so that  within  this process coordinate system,  the  source
+ * process   is  the  process  such  that  mydist = 0,  or  equivalently
+ * MYROW == rsrc.
+ *
+ * Find  out  the global coordinate of the block I belongs to (nblocks),
+ * as well as the minimum local number of blocks that every process has.
+ *
+ * when mydist < nblocks-ilocblk*NPROCS,  I own ilocblk + 1 full blocks,
+ * when mydist > nblocks-ilocblk*NPROCS,  I own ilocblk     full blocks,
+ * when mydist = nblocks-ilocblk*NPROCS,  I own ilocblk     full blocks
+ * but not I, or I own ilocblk + 1 blocks and the entry I refers to.
+ */
+      if( MYROW == rsrc )
+      {
+/*
+ * I refers  to an entry  that is not in the first block, find out which
+ * process has it.
+ */
+         nblocks = ( I - imb ) / mb + 1;
+         *PROW  += nblocks;
+         *PROW  -= ( *PROW / NPROW ) * NPROW;
+/*
+ * Since  mydist = 0  and nblocks - ilocblk * NPROW >= 0, there are only
+ * three possible cases:
+ *
+ *   1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I do not own
+ *      I, in which case II = IMB + ( ilocblk - 1 ) * MB. Note that this
+ *      case  cannot  happen  when  ilocblk is zero, since nblocks is at
+ *      least one.
+ *
+ *   2) When  0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in
+ *      which  case  I  and  II  can  respectively  be  written as IMB + 
+ *      (nblocks-1)*NB + IL  and  IMB + (ilocblk-1) * MB + IL.  That  is
+ *      II = I + (ilocblk-nblocks)*MB. Note that this case cannot happen
+ *      when ilocblk is zero, since nblocks is at least one.
+ *
+ *   3) mydist = 0 < nblocks - ilocblk * NPROW,  the source process owns
+ *      ilocblk+1 full blocks,  and  therefore  II = IMB + ilocblk * MB.
+ *      Note that when ilocblk is zero, II is just IMB.
+ */
+         if( nblocks < NPROW )
+         {
+            *II = imb;
+         }
+         else
+         {
+            ilocblk = nblocks / NPROW;
+            if( ilocblk * NPROW >= nblocks )
+            {
+               *II = ( ( MYROW == *PROW ) ?
+                       I   + ( ilocblk - nblocks ) * mb :
+                       imb + ( ilocblk - 1       ) * mb );
+            }
+            else
+            {
+               *II =  imb + ilocblk * mb;
+            }
+         }
+      }
+      else
+      {
+/*
+ * I refers  to  an entry that is not in the first block, find out which
+ * process has it.
+ */
+         nblocks = ( I -= imb ) / mb + 1;
+         *PROW  += nblocks;
+         *PROW  -= ( *PROW / NPROW ) * NPROW;
+/*
+ * Compute  my distance from the source process so that within this pro-
+ * cess coordinate system,  the  source process is the process such that
+ * mydist=0.
+ */
+         if( ( mydist  = MYROW - rsrc ) < 0 ) mydist += NPROW;
+/*
+ * When mydist <  nblocks - ilocblk * NPROW, I own ilocblk+1 full blocks
+ * of size MB since I am not the source process, i.e. II=(ilocblk+1)*MB.
+ * When mydist>=nblocks-ilocblk*NPROW and I do not own I,  I own ilocblk
+ * full blocks of size MB, i.e. II = ilocblk*MB, otherwise I own ilocblk
+ * blocks and I,  in which case I can be written as IMB + (nblocks-1)*MB
+ * + IL and II = ilocblk*MB + IL = I - IMB + (ilocblk - nblocks + 1)*MB.
+ */
+         if( nblocks < NPROW )
+         {
+            mydist -= nblocks;
+            *II     = ( ( mydist < 0 ) ? mb :
+                        ( ( MYROW == *PROW ) ?
+                          I + ( 1 - nblocks ) * mb : 0 ) );
+         }
+         else
+         {
+            ilocblk = nblocks / NPROW;
+            mydist -= nblocks - ilocblk * NPROW;
+            *II     = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb :
+                        ( ( MYROW == *PROW ) ?
+                          ( ilocblk - nblocks + 1 ) * mb + I :
+                          ilocblk * mb ) );
+         }
+      }
+   }
+/*
+ * Idem for the columns
+ */
+   inb   = INB;
+   *PCOL = CSRC;
+
+   if( ( *PCOL == -1 ) || ( NPCOL == 1 ) )
+   {
+      *JJ = J;
+   }
+   else if( J < inb )
+   {
+      *JJ = ( MYCOL == *PCOL ? J : 0 );
+   }
+   else
+   {
+      nb   = NB;
+      csrc = *PCOL;
+
+      if( MYCOL == csrc )
+      {
+         nblocks = ( J - inb ) / nb + 1;
+         *PCOL  += nblocks;
+         *PCOL  -= ( *PCOL / NPCOL ) * NPCOL;
+
+         if( nblocks < NPCOL )
+         {
+            *JJ = inb;
+         }
+         else
+         {
+            ilocblk = nblocks / NPCOL;
+            if( ilocblk * NPCOL >= nblocks )
+            {
+               *JJ = ( ( MYCOL == *PCOL ) ?
+                       J   + ( ilocblk - nblocks ) * nb :
+                       inb + ( ilocblk - 1       ) * nb );
+            }
+            else
+            {
+               *JJ = inb + ilocblk * nb;
+            }
+         }
+      }
+      else
+      {
+         nblocks = ( J -= inb ) / nb + 1;
+         *PCOL  += nblocks;
+         *PCOL  -= ( *PCOL / NPCOL ) * NPCOL;
+
+         if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL;
+
+         if( nblocks < NPCOL )
+         {
+            mydist -= nblocks;
+            *JJ     = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ?
+                        J + ( 1 - nblocks )*nb : 0 ) );
+         }
+         else
+         {
+            ilocblk = nblocks / NPCOL;
+            mydist -= nblocks - ilocblk * NPCOL;
+            *JJ     = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb :
+                        ( ( MYCOL == *PCOL ) ?
+                          ( ilocblk - nblocks + 1 ) * nb + J :
+                          ilocblk * nb ) );
+         }
+      }
+   }
+/*
+ * End of HPL_infog2l
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_infog2l.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_infog2l.o
new file mode 100644
index 000000000..60e9f71da
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_infog2l.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numroc.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numroc.c
new file mode 100644
index 000000000..39cd736d3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numroc.c
@@ -0,0 +1,120 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_numroc
+(
+   const int                        N,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_numroc
+( N, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        N;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_numroc returns  the  local number of matrix rows/columns process
+ * PROC  will  get  if  we give out  N rows/columns starting from global
+ * index 0.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies the number of rows/columns being dealt
+ *         out. N must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC specifies  the coordinate of the process whose
+ *         local portion is determined.  PROC must be at least zero  and
+ *         strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   return( HPL_numrocI( N, 0, INB, NB, PROC, SRCPROC, NPROCS ) );
+/*
+ * End of HPL_numroc
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numroc.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numroc.o
new file mode 100644
index 000000000..5c9ee9fd6
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numroc.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numrocI.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numrocI.c
new file mode 100644
index 000000000..70f3497de
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numrocI.c
@@ -0,0 +1,243 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_numrocI
+(
+   const int                        N,
+   const int                        I,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_numrocI
+( N, I, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        N;
+   const int                        I;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_numrocI returns  the  local number of matrix rows/columns process
+ * PROC  will  get  if  we give out  N rows/columns starting from global
+ * index I.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies the number of rows/columns being dealt
+ *         out. N must be at least zero.
+ *
+ * I       (input)                       const int
+ *         On entry, I  specifies the global index of the matrix  entry
+ *         I must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of th
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC specifies  the coordinate of the process whos
+ *         local portion is determined.  PROC must be at least zero  an
+ *         strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  proces
+ *         that possesses the first row or column of the matrix. SRCPRO
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process row
+ *         or columns over which the matrix is distributed.  NPROCS mus
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ilocblk, inb, mydist, nblocks, srcproc;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * The data is not distributed, or there is just one process in this di-
+ * mension of the grid.
+ */
+      return( N );
+/*
+ * Compute coordinate of process owning I and corresponding INB
+ */
+   srcproc = SRCPROC;
+
+   if( ( inb = INB - I ) <= 0 )
+   {
+/*
+ * I is not in the first block, find out which process has it and update
+ * the size of first block
+ */
+      srcproc += ( nblocks = (-inb) / NB + 1 ); 
+      srcproc -= ( srcproc / NPROCS ) * NPROCS;
+      inb     += nblocks * NB;
+   }
+/*
+ * Now  everything  is  just like  N, I=0, INB, NB, srcproc, NPROCS. The
+ * discussion goes as follows:  compute my distance from the source pro-
+ * cess  so that within this process coordinate system,  the source pro-
+ * cess is the process such that mydist = 0, or PROC == srcproc.
+ *
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries. Then remark that
+ *
+ * when  mydist < nblocks - ilocblk*NPROCS, I own ilocblk+1 full blocks,
+ * when  mydist > nblocks - ilocblk*NPROCS, I own ilocblk   full blocks,
+ * when  mydist = nblocks - ilocblk*NPROCS, either the last block is not
+ * full and I own it,  or the last block is full and I am the first pro-
+ * cess owning only ilocblk full blocks.
+ */
+   if( PROC == srcproc )
+   {
+/*
+ * I am the source process, i.e. I own I (mydist=0).  When N <= INB, the
+ * answer is simply N.
+ */
+      if( N <= inb ) return( N );
+/*
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries.
+ */
+      nblocks = ( N - inb ) / NB + 1;
+/*
+ * Since  mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only
+ * two possible cases:
+ *
+ *   1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS di-
+ *      vides the global number of full blocks,  then the source process
+ *      srcproc owns one more block than the other processes;  and N can
+ *      be rewritten as N = INB + (nblocks-1) * NB + LNB  with  LNB >= 0
+ *      size of the last block. Similarly, the local value Np correspon-
+ *      ding to N can be written as  Np = INB + (ilocblk-1) * NB + LNB =
+ *      N + ( ilocblk-1 - (nblocks-1) )*NB.  Note  that this case cannot
+ *      happen when ilocblk is zero, since nblocks is at least one.
+ *
+ *   2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only
+ *      owns full blocks,  and  therefore Np = INB + ilocblk * NB.  Note
+ *      that when ilocblk is zero, Np is just INB.
+ */
+      if( nblocks < NPROCS ) return( inb );
+ 
+      ilocblk = nblocks / NPROCS;
+      return( ( nblocks - ilocblk * NPROCS ) ? inb + ilocblk * NB :
+              N + ( ilocblk - nblocks ) * NB );
+   }
+   else
+   {
+/*
+ * I am not the source process. When N <= INB, the answer is simply 0.
+ */
+      if( N <= inb ) return( 0 );
+/*
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries
+ */
+      nblocks = ( N - inb ) / NB + 1;
+/*
+ * Compute  my distance from the source process so that within this pro-
+ * cess coordinate system,  the source  process is the process such that
+ * mydist=0.
+ */
+      if( ( mydist = PROC - srcproc ) < 0 ) mydist += NPROCS;
+/*
+ * When mydist < nblocks - ilocblk*NPROCS, I own ilocblk + 1 full blocks
+ * of size NB since I am not the source process,
+ *
+ * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk   full blocks
+ * of size NB since I am not the source process,
+ *
+ * when mydist = nblocks - ilocblk*NPROCS,
+ * either the last block is not full and I own it, in which case
+ *    N = INB + (nblocks - 1)*NB + LNB with  LNB  the  size  of the last
+ *    block such that NB > LNB > 0;  the local value Np corresponding to
+ *    N is given by  Np = ilocblk*NB+LNB = N-INB+(ilocblk-nblocks+1)*NB;
+ * or the  last  block  is  full  and I am the first process owning only
+ *    ilocblk full blocks of size NB, that is N = INB+(nblocks-1)*NB and
+ *    Np = ilocblk * NB = N - INB + (ilocblk-nblocks+1) * NB.
+ */
+      if( nblocks < NPROCS )
+         return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 :
+                 N - inb + NB * ( 1 - nblocks ) ) );
+ 
+      ilocblk = nblocks / NPROCS;
+      mydist -= nblocks - ilocblk * NPROCS;
+      return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB :
+              ( ( mydist > 0 ) ? ilocblk * NB :
+                N - inb + NB * ( ilocblk - nblocks + 1 ) ) );
+   }
+/*
+ * End of HPL_numrocI
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numrocI.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numrocI.o
new file mode 100644
index 000000000..d6d0bbf65
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numrocI.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pabort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pabort.c
new file mode 100644
index 000000000..268975fc1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pabort.c
@@ -0,0 +1,137 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pabort
+(
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_pabort( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pabort displays an error message on stderr and halts execution.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   int                        rank;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   LINE   = va_arg( argptr, int      );
+   SRNAME = va_arg( argptr, char *   );
+   FORM   = va_arg( argptr, char *   );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( stderr, "%s %s %d, %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR", "from process #", rank, "in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( stderr,
+                   "%s %s %d, %s %d %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR", "from process #", rank, "on line", LINE,
+                   "of function", SRNAME, cline );
+
+   MPI_Abort( MPI_COMM_WORLD, -1 );
+   exit( -1 );
+/*
+ * End of HPL_pabort
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pabort.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pabort.o
new file mode 100644
index 000000000..2bf97bc44
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pabort.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlamch.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlamch.c
new file mode 100644
index 000000000..73cf649da
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlamch.c
@@ -0,0 +1,143 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_pdlamch
+(
+   MPI_Comm                         COMM,
+   const HPL_T_MACH                 CMACH
+)
+#else
+double HPL_pdlamch
+( COMM, CMACH )
+   MPI_Comm                         COMM;
+   const HPL_T_MACH                 CMACH;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlamch determines  machine-specific  arithmetic  constants  such  as
+ * the relative machine precision (eps),  the safe minimum(sfmin) such that
+ * 1/sfmin does not overflow, the base of the machine (base), the precision
+ * (prec),  the  number  of  (base)  digits in the  mantissa  (t),  whether
+ * rounding occurs in addition (rnd = 1.0 and 0.0 otherwise),  the  minimum
+ * exponent before  (gradual)  underflow (emin),  the  underflow  threshold
+ * (rmin)- base**(emin-1), the largest exponent before overflow (emax), the
+ * overflow threshold (rmax)  - (base**emax)*(1-eps).
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * CMACH   (global input)                const HPL_T_MACH
+ *         Specifies the value to be returned by HPL_pdlamch            
+ *            = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+ *            = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+ *            = HPL_MACH_BASE,  HPL_pdlamch := base                     
+ *            = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+ *            = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+ *            = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+ *            = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+ *            = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+ *            = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+ *            = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+ *          
+ *         where                                                        
+ *          
+ *            eps   = relative machine precision,                       
+ *            sfmin = safe minimum,                                     
+ *            base  = base of the machine,                              
+ *            prec  = eps*base,                                         
+ *            t     = number of digits in the mantissa,                 
+ *            rnd   = 1.0 if rounding occurs in addition,               
+ *            emin  = minimum exponent before underflow,                
+ *            rmin  = underflow threshold,                              
+ *            emax  = largest exponent before overflow,                 
+ *            rmax  = overflow threshold.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     param;
+/* ..
+ * .. Executable Statements ..
+ */
+   param = HPL_dlamch( CMACH );
+
+   switch( CMACH )
+   {
+      case HPL_MACH_EPS   :
+      case HPL_MACH_SFMIN :
+      case HPL_MACH_EMIN  :
+      case HPL_MACH_RMIN  :
+         (void) HPL_all_reduce( (void *)(&param), 1, HPL_DOUBLE,
+                                HPL_max, COMM );
+         break;
+      case HPL_MACH_EMAX  :
+      case HPL_MACH_RMAX  :
+         (void) HPL_all_reduce( (void *)(&param), 1, HPL_DOUBLE,
+                                HPL_min, COMM );
+         break;
+      default             :
+         break;
+   } 
+
+   return( param );
+/*
+ * End of HPL_pdlamch
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlamch.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlamch.o
new file mode 100644
index 000000000..c7731580e
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlamch.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlange.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlange.c
new file mode 100644
index 000000000..40bdcc36b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlange.c
@@ -0,0 +1,242 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_pdlange
+(
+   const HPL_T_grid *               GRID,
+   const HPL_T_NORM                 NORM,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   const double *                   A,
+   const int                        LDA
+)
+#else
+double HPL_pdlange
+( GRID, NORM, M, N, NB, A, LDA )
+   const HPL_T_grid *               GRID;
+   const HPL_T_NORM                 NORM;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   const double *                   A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlange returns  the value of the one norm,  or the infinity norm,
+ * or the element of largest absolute value of a distributed matrix A:  
+ *  
+ *  
+ *    max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+ *    norm1(A),        when NORM = HPL_NORM_1,                          
+ *    normI(A),        when NORM = HPL_NORM_I,                          
+ *  
+ * where norm1 denotes the one norm of a matrix (maximum column sum) and
+ * normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+ * that max(abs(A(i,j))) is not a matrix norm.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * NORM    (global input)                const HPL_T_NORM
+ *         On entry,  NORM  specifies  the  value to be returned by this
+ *         function as described above.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+ *         that contains the local pieces of the distributed matrix A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     s, v0=HPL_rzero, * work = NULL;
+   MPI_Comm                   Acomm, Ccomm, Rcomm;
+   int                        ii, jj, mp, mycol, myrow, npcol, nprow,
+                              nq;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Rcomm = GRID->row_comm; Ccomm = GRID->col_comm;
+   Acomm = GRID->all_comm;
+
+   Mnumroc( mp, M, NB, NB, myrow, 0, nprow );
+   Mnumroc( nq, N, NB, NB, mycol, 0, npcol );
+
+   if( Mmin( M, N ) == 0 ) { return( v0 ); }
+   else if( NORM == HPL_NORM_A )
+   {
+/*
+ * max( abs( A ) )
+ */
+      if( ( nq > 0 ) && ( mp > 0 ) )
+      {
+         for( jj = 0; jj < nq; jj++ )
+         {
+            for( ii = 0; ii < mp; ii++ )
+            { v0 = Mmax( v0, Mabs( *A ) ); A++; }
+            A += LDA - mp;
+         }
+      }
+      (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0,
+                         Acomm );
+   }
+   else if( NORM == HPL_NORM_1 )
+   {
+/*
+ * Find norm_1( A ).
+ */
+      if( nq > 0 )
+      {
+         work = (double*)malloc( (size_t)(nq) * sizeof( double ) );
+         if( work == NULL )
+         { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); }
+
+         for( jj = 0; jj < nq; jj++ )
+         {
+            s = HPL_rzero;
+            for( ii = 0; ii < mp; ii++ ) { s += Mabs( *A ); A++; }
+            work[jj] = s; A += LDA - mp;
+         }
+/*
+ * Find sum of global matrix columns, store on row 0 of process grid
+ */
+         (void) HPL_reduce( (void *)(work), nq, HPL_DOUBLE, HPL_sum,
+                            0, Ccomm );
+/*
+ * Find maximum sum of columns for 1-norm
+ */
+         if( myrow == 0 )
+         { v0 = work[HPL_idamax( nq, work, 1 )]; v0 = Mabs( v0 ); }
+         if( work ) free( work );
+      }
+/*
+ * Find max in row 0, store result in process (0,0)
+ */
+      if( myrow == 0 )
+         (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0,
+                            Rcomm );
+   }
+   else if( NORM == HPL_NORM_I )
+   {
+/*
+ * Find norm_inf( A )
+ */
+      if( mp > 0 )
+      {
+         work = (double*)malloc( (size_t)(mp) * sizeof( double ) );
+         if( work == NULL )
+         { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); }
+
+         for( ii = 0; ii < mp; ii++ ) { work[ii] = HPL_rzero; }
+
+         for( jj = 0; jj < nq; jj++ )
+         {
+            for( ii = 0; ii < mp; ii++ )
+            { work[ii] += Mabs( *A ); A++; }
+            A += LDA - mp;
+         }
+/*       
+ * Find sum of global matrix rows, store on column 0 of process grid
+ */      
+         (void) HPL_reduce( (void *)(work), mp, HPL_DOUBLE, HPL_sum,
+                            0, Rcomm );
+/*       
+ * Find maximum sum of rows for inf-norm
+ */      
+         if( mycol == 0 )
+         { v0 = work[HPL_idamax( mp, work, 1 )]; v0 = Mabs( v0 ); }
+         if( work ) free( work );
+      }
+/*
+ * Find max in column 0, store result in process (0,0)
+ */
+      if( mycol == 0 )
+         (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max,
+                            0, Ccomm );
+   }
+/*
+ * Broadcast answer to every process in the grid
+ */
+   (void) HPL_broadcast( (void *)(&v0), 1, HPL_DOUBLE, 0, Acomm );
+
+   return( v0 );
+/*
+ * End of HPL_pdlange
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlange.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlange.o
new file mode 100644
index 000000000..b9e697826
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlange.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlaprnt.c
new file mode 100644
index 000000000..20f11129a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlaprnt.c
@@ -0,0 +1,236 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaprnt
+(
+   const HPL_T_grid *               GRID,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   double *                         A,
+   const int                        LDA,
+   const int                        IAROW,
+   const int                        IACOL,
+   const char *                     CMATNM
+)
+#else
+void HPL_pdlaprnt
+( GRID, M, N, NB, A, LDA, IAROW, IACOL, CMATNM )
+   const HPL_T_grid *               GRID;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   double *                         A;
+   const int                        LDA;
+   const int                        IAROW;
+   const int                        IACOL;
+   const char *                     CMATNM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaprnt prints  to  standard  error a distributed matrix A. The
+ * local pieces of  A  are sent to the process of coordinates  (0,0)  in
+ * the grid and then printed.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies the number of rows of the coefficient
+ *         matrix A. M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On  entry,   N   specifies  the  number  of  columns  of  the
+ *         coefficient matrix A. N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * A       (local input)                 double *
+ *         On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+ *         This array contains the coefficient matrix to be printed.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * IAROW   (global input)                const int
+ *         On entry,  IAROW  specifies the row process coordinate owning
+ *         the  first row of A.  IAROW  must be  larger than or equal to
+ *         zero and less than NPROW.
+ *
+ * IACOL   (global input)                const int
+ *         On entry,  IACOL  specifies  the  column  process  coordinate
+ *         owning the  first column  of A. IACOL  must be larger than or
+ *         equal to zero and less than NPCOL.
+ *
+ * CMATNM  (global input)                const char *
+ *         On entry, CMATNM is the name of the matrix to be printed.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   Acomm;
+   double                     * buf = NULL;
+   int                        h, i, ib, icurcol=IACOL, icurrow=IAROW,
+                              ii=0, j, jb, jj=0, mycol, myrow, npcol,
+                              nprow, src;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Acomm = GRID->all_comm; 
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+      buf = (double*)malloc( (size_t)(NB) * sizeof( double ) );
+
+   for( j = 0; j < N; j += NB )
+   {
+      jb = N-j; jb = Mmin( jb, NB );
+      for( h = 0; h < jb; h++ )
+      {
+         (void) HPL_barrier( Acomm );
+
+         for( i = 0; i < M; i += NB )
+         {
+            ib = M-i; ib = Mmin( ib, NB );
+            if( ( icurrow == 0 ) && ( icurcol == 0 ) )
+            {
+               if( ( myrow == 0 ) && ( mycol == 0 ) )
+                  HPL_dlaprnt( ib, 1, Mptr( A, ii, jj+h, LDA ), i+1,
+                               j+h+1, LDA, CMATNM );
+            }
+            else
+            {
+               if( ( myrow == icurrow ) && ( mycol == icurcol ) )
+               {
+                  (void) HPL_send( Mptr( A, ii, jj+h, LDA ), ib, 0,
+                                   9000+(j+h)*M+i, Acomm );
+               }
+               else if( ( myrow == 0 ) && ( mycol == 0 ) )
+               {
+                  src = HPL_pnum( GRID, icurrow, icurcol );
+                  (void) HPL_recv( buf, ib, src, 9000+(j+h)*M+i,
+                                   Acomm );
+                  if (buf != NULL)
+                  	HPL_dlaprnt( ib, 1, buf, i+1, j+h+1, NB, CMATNM );
+               }
+            }
+            if( myrow == icurrow ) ii += ib;
+            icurrow = MModAdd1( icurrow, nprow );
+            (void) HPL_barrier( Acomm );
+         }
+         ii = 0; icurrow = IAROW;
+      }
+      if( mycol == icurcol ) jj += jb;
+      icurcol = MModAdd1( icurcol, npcol );
+      (void) HPL_barrier( Acomm );
+   }
+   if( ( myrow == 0 ) && ( mycol == 0 ) && ( buf ) ) free( buf );
+/*
+ * End of HPL_pdlaprnt
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlaprnt.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlaprnt.o
new file mode 100644
index 000000000..f2f86a8bd
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlaprnt.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pwarn.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pwarn.c
new file mode 100644
index 000000000..a9f666f89
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pwarn.c
@@ -0,0 +1,139 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pwarn
+(
+   FILE *                           STREAM,
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_pwarn( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pwarn displays an error message.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   int                        rank;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   LINE   = va_arg( argptr, int    );
+   SRNAME = va_arg( argptr, char * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( STREAM, "%s %s %d, %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR", "from process #", rank, "in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( STREAM, "%s %s %d, %s %d %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR", "from process #", rank, "on line", LINE,
+                   "of function", SRNAME, cline );
+/*
+ * End of HPL_pwarn
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pwarn.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pwarn.o
new file mode 100644
index 000000000..1d409181d
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pwarn.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocmax.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocmax.c
new file mode 100644
index 000000000..644641412
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocmax.c
@@ -0,0 +1,149 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dlocmax
+(
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocmax
+( PANEL, N, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocmax finds  the maximum entry in the current column  and packs
+ * the useful information in  WORK[0:3].  On exit,  WORK[0] contains the
+ * local maximum  absolute value  scalar,  WORK[1] is the  corresponding
+ * local row index,  WORK[2]  is the corresponding global row index, and
+ * WORK[3] is the coordinate of the process owning this max.  When N  is
+ * less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set
+ * to the total number of process rows.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of rows of the column
+ *         of A on which we operate.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is  a workarray of size at least 4.  On exit,
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A;
+   int                        kk, igindx, ilindx, myrow, nb, nprow;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N > 0 )
+   {
+      A      = Mptr( PANEL->A, II, JJ, PANEL->lda );
+      myrow  = PANEL->grid->myrow;
+      nprow  = PANEL->grid->nprow;
+      nb     = PANEL->nb;
+      kk     = PANEL->ii + II + ( ilindx = HPL_idamax( N, A, 1 ) );
+      Mindxl2g( igindx, kk, nb, nb, myrow, 0, nprow );
+/*
+ * WORK[0] := local maximum absolute value scalar,
+ * WORK[1] := corresponding local  row index,
+ * WORK[2] := corresponding global row index,
+ * WORK[3] := coordinate of process owning this max.
+ */
+      WORK[0] = A[ilindx];         WORK[1] = (double)(ilindx);
+      WORK[2] = (double)(igindx);  WORK[3] = (double)(myrow);
+   }
+   else
+   {
+/*
+ * If I do not have any row of A, then set the coordinate of the process
+ * (WORK[3]) owning this "ghost" row,  such that it  will never be used,
+ * even if there are only zeros in the current column of A.
+ */
+      WORK[0] = WORK[1] = WORK[2] = HPL_rzero;
+      WORK[3] = (double)(PANEL->grid->nprow);
+   }
+/*
+ * End of HPL_dlocmax
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocmax.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocmax.o
new file mode 100644
index 000000000..c6f19ce77
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocmax.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpN.c
new file mode 100644
index 000000000..a3919500a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpN.c
@@ -0,0 +1,436 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LOCSWP_DEPTH
+#define    HPL_LOCSWP_DEPTH        32
+#define    HPL_LOCSWP_LOG2_DEPTH    5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlocswpN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocswpN
+( PANEL, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocswpN performs  the local swapping operations  within a panel.
+ * The lower triangular  N0-by-N0  upper block of the panel is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.  The N0 length max
+ *         row is stored in WORK[4:4+N0-1];  Note  that this is also the
+ *         JJth row  (or column) of L1. The remaining part of this array
+ *         is used as workspace.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax;
+   double                     * A1, * A2, * L, * Wr0, * Wmx;
+   int                        ilindx, lda, myrow, n0, nr, nu;
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda;
+
+   Wr0   = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0];
+   nu    = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) 
+                  << HPL_LOCSWP_LOG2_DEPTH );
+   nr    = n0 - nu;
+/*
+ * Replicated swap and copy of the current (new) row of A into L1
+ */
+   L  = Mptr( PANEL->L1, JJ, 0, n0  );
+/*
+ * If the pivot is non-zero ...
+ */
+   if( gmax != HPL_rzero )
+   {
+/*
+ * and if I own the current row of A ...
+ */
+      if( myrow == PANEL->prow )
+      {
+/*
+ * and if I also own the row to be swapped with the current row of A ...
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+/*
+ * and if the current row of A is not to swapped with itself ...
+ */
+            if( ( ilindx = (int)(WORK[1]) ) != 0 )
+            {
+/*
+ * then copy the max row into L1 and locally swap the 2 rows of A.
+ */
+               A1 = Mptr( PANEL->A,  II,     0, lda );
+               A2 = Mptr( A1,        ilindx, 0, lda );
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH )
+               {
+                  *L=*A1=Wmx[ 0]; *A2=Wr0[ 0]; L+=n0; A1+=lda; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  *L=*A1=Wmx[ 1]; *A2=Wr0[ 1]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  *L=*A1=Wmx[ 2]; *A2=Wr0[ 2]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 3]; *A2=Wr0[ 3]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  *L=*A1=Wmx[ 4]; *A2=Wr0[ 4]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 5]; *A2=Wr0[ 5]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 6]; *A2=Wr0[ 6]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 7]; *A2=Wr0[ 7]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  *L=*A1=Wmx[ 8]; *A2=Wr0[ 8]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 9]; *A2=Wr0[ 9]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[10]; *A2=Wr0[10]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[11]; *A2=Wr0[11]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[12]; *A2=Wr0[12]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[13]; *A2=Wr0[13]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[14]; *A2=Wr0[14]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[15]; *A2=Wr0[15]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  *L=*A1=Wmx[16]; *A2=Wr0[16]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[17]; *A2=Wr0[17]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[18]; *A2=Wr0[18]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[19]; *A2=Wr0[19]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[20]; *A2=Wr0[20]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[21]; *A2=Wr0[21]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[22]; *A2=Wr0[22]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[23]; *A2=Wr0[23]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[24]; *A2=Wr0[24]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[25]; *A2=Wr0[25]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[26]; *A2=Wr0[26]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[27]; *A2=Wr0[27]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[28]; *A2=Wr0[28]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[29]; *A2=Wr0[29]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[30]; *A2=Wr0[30]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[31]; *A2=Wr0[31]; L+=n0; A1+=lda; A2+=lda;
+#endif
+               }
+               for( i = 0; i < nr; i++, L += n0, A1 += lda, A2 += lda )
+               { *L = *A1 = Wmx[i]; *A2 = Wr0[i]; }
+            }
+            else
+            {
+/*
+ * otherwise the current row of  A  is swapped with itself, so just copy
+ * the current of A into L1.
+ */
+               *Mptr( PANEL->A, II, JJ, lda ) = gmax;
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH )
+               {
+                  *L = Wmx[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  *L = Wmx[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0;
+                  *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0;
+                  *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0;
+                  *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0;
+                  *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0;
+                  *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0;
+                  *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0;
+                  *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0;
+                  *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0;
+                  *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0;
+                  *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0;
+                  *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0;
+#endif
+               }
+               for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; }
+            }
+         }
+         else
+         {
+/*
+ * otherwise, the row to be swapped with the current row of A is in Wmx,
+ * so copy Wmx into L1 and A.
+ */
+            A1 = Mptr( PANEL->A,  II, 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wmx += HPL_LOCSWP_DEPTH )
+            {
+               *L = *A1 = Wmx[ 0]; L += n0; A1 += lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *L = *A1 = Wmx[ 1]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *L = *A1 = Wmx[ 2]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 3]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *L = *A1 = Wmx[ 4]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 5]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 6]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 7]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *L = *A1 = Wmx[ 8]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 9]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[10]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[11]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[12]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[13]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[14]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[15]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *L = *A1 = Wmx[16]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[17]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[18]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[19]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[20]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[21]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[22]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[23]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[24]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[25]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[26]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[27]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[28]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[29]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[30]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[31]; L += n0; A1 += lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, L += n0, A1 += lda )
+            { *L = *A1 = Wmx[i]; }
+         }
+      }
+      else
+      {
+/*
+ * otherwise I do not own the current row of A, so copy the max row  Wmx
+ * into L1.
+ */
+         for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+              Wmx += HPL_LOCSWP_DEPTH )
+         {
+            *L = Wmx[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+            *L = Wmx[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+            *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+            *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0;
+            *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+            *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0;
+            *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0;
+            *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0;
+            *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+            *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0;
+            *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0;
+            *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0;
+            *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0;
+            *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0;
+            *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0;
+            *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0;
+            *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0;
+#endif
+         }
+         for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; }
+/*
+ * and if I own the max row, overwrite it with the current row Wr0.
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+            A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wr0 += HPL_LOCSWP_DEPTH )
+            {
+               *A2 = Wr0[ 0]; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *A2 = Wr0[ 1]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda;
+               *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda;
+               *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda;
+               *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda;
+               *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda;
+               *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda;
+               *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda;
+               *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda;
+               *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda;
+               *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda;
+               *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda;
+               *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; }
+         }
+      }
+   }
+   else
+   {
+/*
+ * Otherwise the max element in the current column is zero,  simply copy
+ * the current row Wr0 into L1. The matrix is singular.
+ */
+      for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+           Wr0 += HPL_LOCSWP_DEPTH )
+      {
+         *L = Wr0[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+         *L = Wr0[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+         *L = Wr0[ 2]; L+=n0; *L = Wr0[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+         *L = Wr0[ 4]; L+=n0; *L = Wr0[ 5]; L+=n0;
+         *L = Wr0[ 6]; L+=n0; *L = Wr0[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+         *L = Wr0[ 8]; L+=n0; *L = Wr0[ 9]; L+=n0;
+         *L = Wr0[10]; L+=n0; *L = Wr0[11]; L+=n0;
+         *L = Wr0[12]; L+=n0; *L = Wr0[13]; L+=n0;
+         *L = Wr0[14]; L+=n0; *L = Wr0[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+         *L = Wr0[16]; L+=n0; *L = Wr0[17]; L+=n0;
+         *L = Wr0[18]; L+=n0; *L = Wr0[19]; L+=n0;
+         *L = Wr0[20]; L+=n0; *L = Wr0[21]; L+=n0;
+         *L = Wr0[22]; L+=n0; *L = Wr0[23]; L+=n0;
+         *L = Wr0[24]; L+=n0; *L = Wr0[25]; L+=n0;
+         *L = Wr0[26]; L+=n0; *L = Wr0[27]; L+=n0;
+         *L = Wr0[28]; L+=n0; *L = Wr0[29]; L+=n0;
+         *L = Wr0[30]; L+=n0; *L = Wr0[31]; L+=n0;
+#endif
+      }
+
+      for( i = 0; i < nr; i++, L += n0 ) { *L = Wr0[i]; }
+/*
+ * set INFO.
+ */
+      if( *(PANEL->DINFO) == 0.0 )
+         *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1);
+   }
+/*
+ * End of HPL_dlocswpN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpN.o
new file mode 100644
index 000000000..09d9b9dfe
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpT.c
new file mode 100644
index 000000000..89b86e35a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpT.c
@@ -0,0 +1,406 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LOCSWP_DEPTH
+#define    HPL_LOCSWP_DEPTH        32
+#define    HPL_LOCSWP_LOG2_DEPTH    5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlocswpT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocswpT
+( PANEL, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocswpT performs  the local swapping operations  within a panel.
+ * The lower triangular  N0-by-N0  upper block of the panel is stored in
+ * transpose form.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.  The N0 length max
+ *         row is stored in WORK[4:4+N0-1];  Note  that this is also the
+ *         JJth row  (or column) of L1. The remaining part of this array
+ *         is used as workspace.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax;
+   double                     * A1, * A2, * L, * Wr0, * Wmx;
+   int                        ilindx, lda, myrow, n0, nr, nu;
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda;
+
+   Wr0   = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0];
+   nu    = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) 
+                  << HPL_LOCSWP_LOG2_DEPTH );
+   nr    = n0 - nu;
+/*
+ * Replicated swap and copy of the current (new) row of A into L1
+ */
+   L  = Mptr( PANEL->L1, 0, JJ, n0  );
+/*
+ * If the pivot is non-zero ...
+ */
+   if( gmax != HPL_rzero )
+   {
+/*
+ * and if I own the current row of A ...
+ */
+      if( myrow == PANEL->prow )
+      {
+/*
+ * and if I also own the row to be swapped with the current row of A ...
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+/*
+ * and if the current row of A is not to swapped with itself ...
+ */
+            if( ( ilindx = (int)(WORK[1]) ) != 0 )
+            {
+/*
+ * then copy the max row into L1 and locally swap the 2 rows of A.
+ */
+               A1 = Mptr( PANEL->A, II,     0, lda );
+               A2 = Mptr( A1,       ilindx, 0, lda );
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH,
+                    L   += HPL_LOCSWP_DEPTH )
+               {
+                  L[ 0]=*A1=Wmx[ 0]; *A2=Wr0[ 0]; A1+=lda; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  L[ 1]=*A1=Wmx[ 1]; *A2=Wr0[ 1]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  L[ 2]=*A1=Wmx[ 2]; *A2=Wr0[ 2]; A1+=lda; A2+=lda;
+                  L[ 3]=*A1=Wmx[ 3]; *A2=Wr0[ 3]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  L[ 4]=*A1=Wmx[ 4]; *A2=Wr0[ 4]; A1+=lda; A2+=lda;
+                  L[ 5]=*A1=Wmx[ 5]; *A2=Wr0[ 5]; A1+=lda; A2+=lda;
+                  L[ 6]=*A1=Wmx[ 6]; *A2=Wr0[ 6]; A1+=lda; A2+=lda;
+                  L[ 7]=*A1=Wmx[ 7]; *A2=Wr0[ 7]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  L[ 8]=*A1=Wmx[ 8]; *A2=Wr0[ 8]; A1+=lda; A2+=lda;
+                  L[ 9]=*A1=Wmx[ 9]; *A2=Wr0[ 9]; A1+=lda; A2+=lda;
+                  L[10]=*A1=Wmx[10]; *A2=Wr0[10]; A1+=lda; A2+=lda;
+                  L[11]=*A1=Wmx[11]; *A2=Wr0[11]; A1+=lda; A2+=lda;
+                  L[12]=*A1=Wmx[12]; *A2=Wr0[12]; A1+=lda; A2+=lda;
+                  L[13]=*A1=Wmx[13]; *A2=Wr0[13]; A1+=lda; A2+=lda;
+                  L[14]=*A1=Wmx[14]; *A2=Wr0[14]; A1+=lda; A2+=lda;
+                  L[15]=*A1=Wmx[15]; *A2=Wr0[15]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  L[16]=*A1=Wmx[16]; *A2=Wr0[16]; A1+=lda; A2+=lda;
+                  L[17]=*A1=Wmx[17]; *A2=Wr0[17]; A1+=lda; A2+=lda;
+                  L[18]=*A1=Wmx[18]; *A2=Wr0[18]; A1+=lda; A2+=lda;
+                  L[19]=*A1=Wmx[19]; *A2=Wr0[19]; A1+=lda; A2+=lda;
+                  L[20]=*A1=Wmx[20]; *A2=Wr0[20]; A1+=lda; A2+=lda;
+                  L[21]=*A1=Wmx[21]; *A2=Wr0[21]; A1+=lda; A2+=lda;
+                  L[22]=*A1=Wmx[22]; *A2=Wr0[22]; A1+=lda; A2+=lda;
+                  L[23]=*A1=Wmx[23]; *A2=Wr0[23]; A1+=lda; A2+=lda;
+                  L[24]=*A1=Wmx[24]; *A2=Wr0[24]; A1+=lda; A2+=lda;
+                  L[25]=*A1=Wmx[25]; *A2=Wr0[25]; A1+=lda; A2+=lda;
+                  L[26]=*A1=Wmx[26]; *A2=Wr0[26]; A1+=lda; A2+=lda;
+                  L[27]=*A1=Wmx[27]; *A2=Wr0[27]; A1+=lda; A2+=lda;
+                  L[28]=*A1=Wmx[28]; *A2=Wr0[28]; A1+=lda; A2+=lda;
+                  L[29]=*A1=Wmx[29]; *A2=Wr0[29]; A1+=lda; A2+=lda;
+                  L[30]=*A1=Wmx[30]; *A2=Wr0[30]; A1+=lda; A2+=lda;
+                  L[31]=*A1=Wmx[31]; *A2=Wr0[31]; A1+=lda; A2+=lda;
+#endif
+               }
+
+               for( i = 0; i < nr; i++, A1 += lda, A2 += lda )
+               { L[i] = *A1 = Wmx[i]; *A2 = Wr0[i]; }
+            }
+            else
+            {
+/*
+ * otherwise the current row of  A  is swapped with itself, so just copy
+ * the current of A into L1.
+ */
+               *Mptr( PANEL->A, II, JJ, lda ) = gmax;
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+               {
+                  L[ 0]=Wmx[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  L[ 1]=Wmx[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5];
+                  L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  L[ 8]=Wmx[ 8]; L[12]=Wmx[12];
+                  L[ 9]=Wmx[ 9]; L[13]=Wmx[13];
+                  L[10]=Wmx[10]; L[14]=Wmx[14];
+                  L[11]=Wmx[11]; L[15]=Wmx[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  L[16]=Wmx[16]; L[20]=Wmx[20];
+                  L[17]=Wmx[17]; L[21]=Wmx[21];
+                  L[18]=Wmx[18]; L[22]=Wmx[22];
+                  L[19]=Wmx[19]; L[23]=Wmx[23];
+                  L[24]=Wmx[24]; L[28]=Wmx[28];
+                  L[25]=Wmx[25]; L[29]=Wmx[29];
+                  L[26]=Wmx[26]; L[30]=Wmx[30];
+                  L[27]=Wmx[27]; L[31]=Wmx[31];
+#endif
+               }
+               for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; }
+            }
+         }
+         else
+         {
+/*
+ * otherwise, the row to be swapped with the current row of A is in Wmx,
+ * so copy Wmx into L1 and A.
+ */
+            A1 = Mptr( PANEL->A, II, 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+            {
+               L[ 0]=*A1=Wmx[ 0]; A1+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               L[ 1]=*A1=Wmx[ 1]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               L[ 2]=*A1=Wmx[ 2]; A1+=lda; L[ 3]=*A1=Wmx[ 3]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               L[ 4]=*A1=Wmx[ 4]; A1+=lda; L[ 5]=*A1=Wmx[ 5]; A1+=lda;
+               L[ 6]=*A1=Wmx[ 6]; A1+=lda; L[ 7]=*A1=Wmx[ 7]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               L[ 8]=*A1=Wmx[ 8]; A1+=lda; L[ 9]=*A1=Wmx[ 9]; A1+=lda;
+               L[10]=*A1=Wmx[10]; A1+=lda; L[11]=*A1=Wmx[11]; A1+=lda;
+               L[12]=*A1=Wmx[12]; A1+=lda; L[13]=*A1=Wmx[13]; A1+=lda;
+               L[14]=*A1=Wmx[14]; A1+=lda; L[15]=*A1=Wmx[15]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               L[16]=*A1=Wmx[16]; A1+=lda; L[17]=*A1=Wmx[17]; A1+=lda;
+               L[18]=*A1=Wmx[18]; A1+=lda; L[19]=*A1=Wmx[19]; A1+=lda;
+               L[20]=*A1=Wmx[20]; A1+=lda; L[21]=*A1=Wmx[21]; A1+=lda;
+               L[22]=*A1=Wmx[22]; A1+=lda; L[23]=*A1=Wmx[23]; A1+=lda;
+               L[24]=*A1=Wmx[24]; A1+=lda; L[25]=*A1=Wmx[25]; A1+=lda;
+               L[26]=*A1=Wmx[26]; A1+=lda; L[27]=*A1=Wmx[27]; A1+=lda;
+               L[28]=*A1=Wmx[28]; A1+=lda; L[29]=*A1=Wmx[29]; A1+=lda;
+               L[30]=*A1=Wmx[30]; A1+=lda; L[31]=*A1=Wmx[31]; A1+=lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, A1 += lda ) { L[i]=*A1=Wmx[i]; } 
+         }
+      }
+      else
+      {
+/*
+ * otherwise I do not own the current row of A, so copy the max row  Wmx
+ * into L1.
+ */
+         for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+              Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+         {
+            L[ 0]=Wmx[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+            L[ 1]=Wmx[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+            L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+            L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+            L[ 8]=Wmx[ 8]; L[ 9]=Wmx[ 9]; L[10]=Wmx[10]; L[11]=Wmx[11];
+            L[12]=Wmx[12]; L[13]=Wmx[13]; L[14]=Wmx[14]; L[15]=Wmx[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+            L[16]=Wmx[16]; L[17]=Wmx[17]; L[18]=Wmx[18]; L[19]=Wmx[19];
+            L[20]=Wmx[20]; L[21]=Wmx[21]; L[22]=Wmx[22]; L[23]=Wmx[23];
+            L[24]=Wmx[24]; L[25]=Wmx[25]; L[26]=Wmx[26]; L[27]=Wmx[27];
+            L[28]=Wmx[28]; L[29]=Wmx[29]; L[30]=Wmx[30]; L[31]=Wmx[31];
+#endif
+         }
+         for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; }
+/*
+ * and if I own the max row, overwrite it with the current row Wr0.
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+            A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wr0 += HPL_LOCSWP_DEPTH )
+            {
+               *A2 = Wr0[ 0]; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *A2 = Wr0[ 1]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda;
+               *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda;
+               *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda;
+               *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda;
+               *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda;
+               *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda;
+               *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda;
+               *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda;
+               *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda;
+               *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda;
+               *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda;
+               *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda;
+#endif
+            }
+            for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; }
+         }
+      }
+   }
+   else
+   {
+/*
+ * Otherwise the max element in the current column is zero,  simply copy
+ * the current row Wr0 into L1. The matrix is singular.
+ */
+      for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+           Wr0 += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+      {
+         L[ 0]=Wr0[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+         L[ 1]=Wr0[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+         L[ 2]=Wr0[ 2]; L[ 3]=Wr0[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+         L[ 4]=Wr0[ 4]; L[ 5]=Wr0[ 5]; L[ 6]=Wr0[ 6]; L[ 7]=Wr0[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+         L[ 8]=Wr0[ 8]; L[12]=Wr0[12]; L[ 9]=Wr0[ 9]; L[13]=Wr0[13];
+         L[10]=Wr0[10]; L[14]=Wr0[14]; L[11]=Wr0[11]; L[15]=Wr0[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+         L[16]=Wr0[16]; L[20]=Wr0[20]; L[17]=Wr0[17]; L[21]=Wr0[21];
+         L[18]=Wr0[18]; L[22]=Wr0[22]; L[19]=Wr0[19]; L[23]=Wr0[23];
+         L[24]=Wr0[24]; L[28]=Wr0[28]; L[25]=Wr0[25]; L[29]=Wr0[29];
+         L[26]=Wr0[26]; L[30]=Wr0[30]; L[27]=Wr0[27]; L[31]=Wr0[31];
+#endif
+      }
+      for( i = 0; i < nr; i++ ) { L[i] = Wr0[i]; }
+/*
+ * Set INFO.
+ */
+      if( *(PANEL->DINFO) == 0.0 )
+         *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1);
+   }
+/*
+ * End of HPL_dlocswpT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpT.o
new file mode 100644
index 000000000..674e04044
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdfact.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdfact.c
new file mode 100644
index 000000000..1d99c6e14
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdfact.c
@@ -0,0 +1,141 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdfact
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_pdfact
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdfact recursively factorizes a  1-dimensional  panel of columns.
+ * The  RPFACT  function pointer specifies the recursive algorithm to be
+ * used, either Crout, Left- or Right looking.  NBMIN allows to vary the
+ * recursive stopping criterium in terms of the number of columns in the
+ * panel, and  NDIV allows to specify the number of subpanels each panel
+ * should be divided into. Usuallly a value of 2 will be chosen. Finally
+ * PFACT is a function pointer specifying the non-recursive algorithm to
+ * to be used on at most NBMIN columns. One can also choose here between
+ * Crout, Left- or Right looking.  Empirical tests seem to indicate that
+ * values of 4 or 8 for NBMIN give the best results.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   void                       * vptr = NULL;
+   int                        align, jb;
+/* ..
+ * .. Executable Statements ..
+ */
+   jb = PANEL->jb; PANEL->n -= jb; PANEL->ja += jb;
+
+   if( ( PANEL->grid->mycol != PANEL->pcol ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_RPFACT );
+#endif
+   align = PANEL->algo->align;
+   vptr  = (void *)malloc( ( (size_t)(align) + 
+              (size_t)(((4+((unsigned int)(jb) << 1)) << 1) )) *
+              sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdfact", "Memory allocation failed" ); }
+/*
+ * Factor the panel - Update the panel pointers
+ */
+   PANEL->algo->rffun( PANEL, PANEL->mp, jb, 0, (double *)HPL_PTR( vptr,
+                       ((size_t)(align) * sizeof(double) ) ) );
+   if( vptr ) free( vptr );
+
+   PANEL->A   = Mptr( PANEL->A, 0, jb, PANEL->lda );
+   PANEL->nq -= jb; PANEL->jj += jb;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_RPFACT );
+#endif
+/*
+ * End of HPL_pdfact
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdfact.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdfact.o
new file mode 100644
index 000000000..56b7bfa6c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdfact.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdmxswp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdmxswp.c
new file mode 100644
index 000000000..b14452197
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdmxswp.c
@@ -0,0 +1,311 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdmxswp
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_pdmxswp
+( PANEL, M, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdmxswp swaps  and  broadcasts  the  absolute value max row using
+ * bi-directional exchange.  The buffer is partially set by HPL_dlocmax.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by
+ *  
+ *    log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth )
+ *  
+ * where  lat and bdwth are the latency and bandwidth of the network for
+ * double precision real elements.  Communication  only  occurs  in  one
+ * process  column. Mono-directional links  will cause the communication
+ * cost to double.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of the matrix
+ *         column on which this function operates.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         It  is assumed that  HPL_dlocmax  was called  prior  to  this
+ *         routine to  initialize  the first four entries of this array.
+ *         On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+ *         Note that this is also the  JJth  row  (or column) of L1. The
+ *         remaining part is used as a temporary array.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax, tmp1;
+   double                     * A0, * Wmx, * Wwork;
+   HPL_T_grid                 * grid;
+   MPI_Comm                   comm;
+   unsigned int               hdim, ip2, ip2_, ipow, k, mask;
+   int                        Np2, cnt_, cnt0, i, icurrow, lda, mydist,
+                              mydis_, myrow, n0, nprow, partner, rcnt,
+                              root, scnt, size_;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_MXSWP );
+#endif
+   grid = PANEL->grid; myrow = grid->myrow; nprow = grid->nprow;
+/*
+ * ip2  : the smallest power of two less than or equal to nprow;
+ * hdim : dimension of the hypercube made of those ip2 processes;
+ * Np2  : logical flag indicating whether or not nprow is a power of 2;
+ */
+   comm    = grid->col_comm; ip2 = (unsigned int)(grid->row_ip2);
+   hdim    = (unsigned int)(grid->row_hdim);     n0  = PANEL->jb;
+   icurrow = PANEL->prow; Np2 = (int)( ( size_ = nprow - ip2 ) != 0 );
+   mydist  = MModSub( myrow, icurrow, nprow );
+/*
+ * Set up pointers in workspace:  WORK and Wwork  point to the beginning
+ * of the buffers of size 4 + 2*N0 to be combined. Wmx points to the row
+ * owning the local (before combine) and global (after combine) absolute
+ * value max. A0 points to the copy of the current row of the matrix.
+ */
+   cnt0  = ( cnt_ = n0 + 4 ) + n0; A0 = ( Wmx = WORK + 4 ) + n0;
+   Wwork = WORK + cnt0;
+/*
+ * Wmx[0:N0-1] := A[ilindx,0:N0-1] where ilindx is  (int)(WORK[1])  (row
+ * with max in current column). If I am the current process row, pack in
+ * addition the current row of A in A0[0:N0-1].  If I do not own any row
+ * of A, then zero out Wmx[0:N0-1].
+ */
+   if( M > 0 )
+   {
+      lda = PANEL->lda;
+      HPL_dcopy( n0, Mptr( PANEL->A, II+(int)(WORK[1]), 0, lda ), lda,
+                 Wmx, 1 );
+      if( myrow == icurrow )
+      { HPL_dcopy( n0, Mptr( PANEL->A, II, 0, lda ), lda, A0, 1 ); }
+   }
+   else { for( i = 0; i < n0; i++ ) Wmx[i] = HPL_rzero; }
+/*
+ * Combine the results (bi-directional exchange):  the process coordina-
+ * tes are relative to icurrow,  this allows to reduce the communication
+ * volume when nprow is not a power of 2.
+ *
+ * When nprow is not a power of 2:  proc[i-ip2] receives local data from
+ * proc[i]  for all i in [ip2..nprow).  In addition,  proc[0]  (icurrow)
+ * sends to proc[ip2] the current row of A  for later broadcast in procs
+ * [ip2..nprow).
+ */
+   if( ( Np2 != 0 ) &&
+       ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) )
+   {
+      if( ( mydist & ip2 ) != 0 )
+      {
+         if( mydist == (int)(ip2) )
+            (void) HPL_sdrv( WORK, cnt_, MSGID_BEGIN_PFACT, A0, n0,
+                             MSGID_BEGIN_PFACT, MModAdd( partner,
+                             icurrow, nprow ), comm );
+         else
+            (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow,
+                             nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+      else
+      {
+         if( mydist == 0 )
+            (void) HPL_sdrv( A0, n0, MSGID_BEGIN_PFACT, Wwork, cnt_,
+                             MSGID_BEGIN_PFACT, MModAdd( partner,
+                             icurrow, nprow ), comm );
+         else
+            (void) HPL_recv( Wwork, cnt_, MModAdd( partner, icurrow,
+                             nprow ), MSGID_BEGIN_PFACT, comm );
+ 
+         tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] );
+         if( ( tmp1 > gmax ) ||
+             ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) )
+         { HPL_dcopy( cnt_, Wwork, 1, WORK, 1 ); }
+      }
+   }
+
+   if( mydist < (int)(ip2) )
+   {
+/*
+ * power of 2 part of the processes collection: processes  [0..ip2)  are
+ * combining (binary exchange); proc[0] has two rows to send, but one to
+ * receive.  At every step  k  in [0..hdim) of the algorithm,  a process 
+ * pair exchanging 2 rows is such that  myrow >> k+1 is 0.  Among  those
+ * processes the ones  that are sending one more row than  what they are
+ * receiving are such that myrow >> k is equal to 0.
+ */
+      k = 0; ipow = 1;
+ 
+      while( k < hdim )
+      {
+         if( ( (unsigned int)(mydist) >> ( k + 1 ) ) == 0 )
+         {
+            if( ( (unsigned int)(mydist) >> k ) == 0 )
+            { scnt = cnt0; rcnt = cnt_; }
+            else
+            { scnt = cnt_; rcnt = cnt0; }
+         }
+         else { scnt = rcnt = cnt_; }
+ 
+         partner = (int)( (unsigned int)(mydist) ^ ipow );
+         (void) HPL_sdrv( WORK, scnt, MSGID_BEGIN_PFACT, Wwork, rcnt,
+                          MSGID_BEGIN_PFACT, MModAdd( partner, icurrow,
+                          nprow ), comm );
+ 
+         tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] );
+         if( ( tmp1 > gmax ) ||
+             ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) )
+         {
+            HPL_dcopy( ( rcnt == cnt0 ? cnt0 : cnt_ ), Wwork, 1,
+                       WORK, 1 );
+         }
+         else if( rcnt == cnt0 )
+         { HPL_dcopy( n0, Wwork+cnt_, 1, A0, 1 ); }
+ 
+         ipow <<= 1; k++;
+      }
+   }
+   else if( size_ > 1 )
+   {
+/*
+ * proc[ip2] broadcast current row of A to procs [ip2+1..nprow).
+ */
+      k = (unsigned int)(size_) - 1; ip2_ = mask = 1;
+      while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+ 
+      root   = MModAdd( icurrow, (int)(ip2), nprow );
+      mydis_ = MModSub( myrow,   root,       nprow );
+ 
+      do
+      {
+         mask ^= ip2_;
+         if( ( mydis_ & mask ) == 0 )
+         {
+            partner = (int)(mydis_ ^ ip2_);
+            if( ( mydis_ & ip2_ ) != 0 )
+            {
+               (void) HPL_recv( A0, n0, MModAdd( root, partner,
+                                nprow ), MSGID_BEGIN_PFACT, comm );
+            }
+            else if( partner < size_ )
+            {
+               (void) HPL_send( A0, n0, MModAdd( root, partner,
+                                nprow ), MSGID_BEGIN_PFACT, comm );
+            }
+         }
+         ip2_ >>= 1;
+      } while( ip2_ > 0 );
+   }
+/*
+ * If nprow is not a power of 2,  for all i in [ip2..nprow), proc[i-ip2]
+ * sends the pivot row to proc[i]  along  with the first four entries of
+ * the WORK array.
+ */
+   if( ( Np2 != 0 ) &&
+       ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) )
+   {
+      if( ( mydist & ip2 ) != 0 )
+      {
+         (void) HPL_recv( WORK, cnt_, MModAdd( partner, icurrow,
+                          nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+      else
+      {
+         (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow,
+                          nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+   }
+/*
+ * Save the global pivot index in pivot array
+ */
+   (PANEL->DPIV)[JJ] = WORK[2];
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_MXSWP );
+#endif
+/*
+ * End of HPL_pdmxswp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdmxswp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdmxswp.o
new file mode 100644
index 000000000..f1d41539a
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdmxswp.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrN.c
new file mode 100644
index 000000000..4ea170b73
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrN.c
@@ -0,0 +1,270 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpancrN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpancrN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpancrN factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel  A using the Crout variant of the  usual
+ * one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+ * of the panel is stored in no-transpose form (i.e. just like the input
+ * matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and gam2-3 is  an  estimate  of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+/*
+ * Compute row (column) jj of L1
+ */
+      if( kk > 0 )
+      {
+         L1ptr = Mptr( L1, jj, jj+1, n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Xv0, ICOFF, jj+1,  kk, Nm1 );
+         Xv1 = vsip_msubview_d( Xv0, jj,    ICOFF, 1,  kk  );
+         Yv1 = vsip_msubview_d( Xv0, jj,    jj+1,  1,  Nm1 );
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Av1, VSIP_MAT_NTRANS,
+                      HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 ); 
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dgemv( HplColumnMajor, HplTrans, kk, Nm1, -HPL_rone,
+                    Mptr( L1, ICOFF, jj+1, n0 ), n0, Mptr( L1, jj,
+                    ICOFF, n0 ), n0, HPL_rone, L1ptr, n0 );
+#endif
+         if( curr != 0 )
+            HPL_dcopy( Nm1, L1ptr, n0, Mptr( A, ii, jj+1, lda ), lda );
+      }
+/*
+ * Scale current column by its absolute value max entry  -  Update  dia-
+ * diagonal and subdiagonal elements in column  A(iip1:iip1+Mm1-1, jj+1)
+ * and  find local  absolute value max in  that column  (Only  one  pass
+ * through cache for each current column).  This sequence of  operations
+ * could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 );
+      Xv1 = vsip_msubview_d( Xv0, ICOFF,          jj+1,            kk+1,   1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,    1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      vsip_mdestroy_d( Yv1 );
+      vsip_mdestroy_d( Xv1 );
+      vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, ICOFF,
+                 jj+1, n0 ), 1, HPL_rone, Mptr( A, iip1, jj+1, lda ),
+                 1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++; kk++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpancrN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrN.o
new file mode 100644
index 000000000..4e646a182
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrT.c
new file mode 100644
index 000000000..50ed300aa
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrT.c
@@ -0,0 +1,267 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpancrT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpancrT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpancrT factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel  A using the Crout variant of the  usual
+ * one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+ * of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is an  estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+/*
+ * Compute row (column) jj of L1
+ */
+      if( kk > 0 )
+      {
+         L1ptr = Mptr( L1, jj+1, jj, n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Xv0, jj+1,  ICOFF, Nm1, kk );
+         Xv1 = vsip_msubview_d( Xv0, ICOFF, jj,    kk,   1 );
+         Yv1 = vsip_msubview_d( Xv0, jj+1,  jj,    Nm1,  1 );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dgemv( HplColumnMajor, HplNoTrans, Nm1, kk, -HPL_rone,
+                    Mptr( L1, jj+1, ICOFF, n0 ), n0, Mptr( L1, ICOFF,
+                    jj, n0 ), 1, HPL_rone, L1ptr, 1 );
+#endif
+         if( curr != 0 )
+            HPL_dcopy( Nm1, L1ptr, 1, Mptr( A, ii, jj+1, lda ), lda );
+      }
+/*
+ * Scale current column by its absolute value max entry  -  Update  dia-
+ * diagonal and subdiagonal elements in column  A(iip1:iip1+Mm1-1, jj+1)
+ * and  find local  absolute value max in  that column  (Only  one  pass
+ * through cache for each current column).  This sequence of  operations
+ * could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 );
+      Xv1 = vsip_msubview_d( Xv0, jj+1,           ICOFF,           1,   kk+1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,    1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, jj+1, ICOFF,
+                 n0 ), n0, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++; kk++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpancrT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrT.o
new file mode 100644
index 000000000..02f30764d
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllN.c
new file mode 100644
index 000000000..fa471198d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllN.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanllN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanllN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanllN factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel A  using the Left-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in no-transpose form (i.e. just like the
+ * input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1 = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column and initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+
+      L1ptr = Mptr( L1, ICOFF, jj+1, n0 ); kk = jj + 1 - ICOFF;
+      HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, HplUnit, kk, 
+                 Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr,  1 );
+/*
+ * Scale  current column by its absolute value max entry  -  Update  and 
+ * find local  absolute value max  in next column (Only one pass through 
+ * cache for each next column).  This sequence of operations could bene-
+ * fit from a specialized  blocked implementation.
+ */ 
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk );
+      Xv1 = vsip_msubview_d( Xv0, ICOFF,        jj+1,              kk,   1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,  1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk,  -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, 1,
+                 HPL_rone, Mptr( A, iip1, jj+1, lda ),  1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 )
+      {
+         HPL_dcopy( kk, L1ptr,  1, Mptr( A, ICOFF, jj+1, lda ), 1 );
+         ii = iip1; iip1++; m = Mm1; Mm1--;
+      }
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanllN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllN.o
new file mode 100644
index 000000000..0bcc4417f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllT.c
new file mode 100644
index 000000000..a6e1b67bd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllT.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanllT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanllT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanllT factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel A  using the Left-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1 = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column and initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+
+      L1ptr = Mptr( L1, jj+1, ICOFF, n0 ); kk = jj + 1 - ICOFF;
+      HPL_dtrsv( HplColumnMajor, HplUpper, HplTrans,   HplUnit, kk,
+                 Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, n0 );
+/*
+ * Scale  current column by its absolute value max entry  -  Update  and 
+ * find local  absolute value max  in next column (Only one pass through 
+ * cache for each next column).  This sequence of operations could bene-
+ * fit from a specialized  blocked implementation.
+ */ 
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk );
+      Xv1 = vsip_msubview_d( Xv0, jj+1,         ICOFF,             1,   kk );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,  1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk,  -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, n0,
+                 HPL_rone, Mptr( A, iip1, jj+1, lda ),  1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 )
+      {
+         HPL_dcopy( kk, L1ptr, n0, Mptr( A, ICOFF, jj+1, lda ), 1 );
+         ii = iip1; iip1++; m = Mm1; Mm1--;
+      }
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanllT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllT.o
new file mode 100644
index 000000000..4fbf6ebca
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlN.c
new file mode 100644
index 000000000..0a3b9a542
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlN.c
@@ -0,0 +1,250 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanrlN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanrlN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanrlN factorizes  a panel of columns  that is a sub-array of a
+ * larger one-dimensional panel A using the Right-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in no-transpose form (i.e. just like the
+ * input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Acur, * Anxt;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Xv1, * Yv0, * Yv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, lda, m=M;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 >= 1 )
+   {
+      Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda );
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+/*
+ * Scale current column by its absolute value max entry  -  Update trai-
+ * ling sub-matrix and find local absolute value max in next column (On-
+ * ly one pass through cache for each current column).  This sequence of
+ * operations could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 );
+      HPL_daxpy( Mm1, -WORK[4+jj+1], Acur, 1, Anxt, 1 );
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+#ifdef HPL_CALL_VSIPL
+      if( Nm1 > 1 )
+      {
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2,
+                                Mm1, Nm1-1 );
+         Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj,
+                                Mm1, 1   );
+         Yv1 = vsip_msubview_d( Yv0, jj, jj+2, 1, Nm1-1 );
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+      }
+#else
+      if( Nm1 > 1 )
+         HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+                   WORK+4+jj+2, 1, Mptr( Anxt, 0, 1, lda ), lda );
+#endif
+/*
+ * Same thing as above but with worse data access on y (A += x * y^T)
+ *
+ *    if( Nm1 > 1 ) )
+ *       HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+ *                 Mptr( L1, jj, jj+2, n0 ), n0, Mptr( Anxt, 0, 1, lda ),
+ *                 lda );
+ */  
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Yv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanrlN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlN.o
new file mode 100644
index 000000000..4ccb67fe7
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlT.c
new file mode 100644
index 000000000..68c1afc02
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlT.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanrlT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanrlT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanrlT factorizes  a panel of columns  that is a sub-array of a
+ * larger one-dimensional panel A using the Right-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Acur, * Anxt, * L1;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Xv1, * Yv0, * Yv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, lda, m=M,
+                              n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 >= 1 )
+   {
+      Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda );
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+/*
+ * Scale current column by its absolute value max entry  -  Update trai-
+ * ling sub-matrix and find local absolute value max in next column (On-
+ * ly one pass through cache for each current column).  This sequence of
+ * operations could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 );
+      HPL_daxpy( Mm1, -(*(Mptr( L1, jj+1, jj, n0 ))), Acur, 1, Anxt, 1 );
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+
+      if( Nm1 > 1 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2,
+                                Mm1, Nm1-1 );
+         Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj,
+                                Mm1, 1   );
+         Yv1 = vsip_msubview_d( Yv0, jj+2, jj, Nm1-1, 1 ); 
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_TRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+                   Mptr( L1, jj+2, jj, n0 ), 1, Mptr( Anxt, 0, 1, lda ),
+                   lda );
+#endif
+      }
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Yv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanrlT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlT.o
new file mode 100644
index 000000000..75bdb487b
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrN.c
new file mode 100644
index 000000000..348d7ebe6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrN.c
@@ -0,0 +1,282 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpancrN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpancrN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpancrN HPL_pdrpancrN recursively  factorizes  a panel of columns  using  the
+ * recursive  Crout  variant of the usual one-dimensional algorithm. The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Local update - Factor current panel - Replicated update and solve
+ */
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jb );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jb );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, jj,
+                 -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr,
+                 0, jj, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ),
+                 lda );
+#endif
+      HPL_pdrpancrN( PANEL, m, jb, ioff, WORK );
+
+      if( n > 0 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+         (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+         Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0 );
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Lv0, ioff,  ICOFF,   jb, jj );
+         Av2 = vsip_msubview_d( Lv0, ioff,  ioff+jb, jb,  n );
+         Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff+jb, jj,  n );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Lv1 );
+         (void) vsip_mdestroy_d( Av2 );
+         (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+         (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+         (void) vsip_mdestroy_d( Lv0 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, jb, n,
+                    jj, -HPL_rone, Mptr( L1ptr, jj, 0, n0 ), n0,
+                    Mptr( L1ptr, 0, jj+jb, n0 ), n0, HPL_rone, 
+                    Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj,
+                    n0 ), n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+      }
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpancrN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrN.o
new file mode 100644
index 000000000..ca755e4a1
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrT.c
new file mode 100644
index 000000000..a1ecfac2c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrT.c
@@ -0,0 +1,282 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpancrT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpancrT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpancrT recursively  factorizes  a panel  of columns using  the
+ * recursive  Crout  variant  of  the  usual one-dimensional  algorithm.
+ * The lower triangular N0-by-N0  upper block of the panel  is stored in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Local update - Factor current panel - Replicated update and solve
+ */
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jb );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1,
+                   VSIP_MAT_TRANS, HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, jj,
+                 -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr,
+                 jj, 0, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ),
+                 lda );
+#endif
+      HPL_pdrpancrT( PANEL, m, jb, ioff, WORK );
+
+      if( n > 0 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+         (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+         Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 );
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Lv0, ioff+jb, ICOFF, n, jj );
+         Av2 = vsip_msubview_d( Lv0, ioff+jb, ioff,  n, jb );
+         Lv1 = vsip_msubview_d( Lv0, ICOFF,   ioff, jj, jb );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1,
+                      VSIP_MAT_NTRANS, HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Lv1 );
+         (void) vsip_mdestroy_d( Av2 );
+         (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+         (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+         (void) vsip_mdestroy_d( Lv0 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, n, jb,
+                    jj, -HPL_rone, Mptr( L1ptr, jj+jb, 0, n0 ), n0,
+                    Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone,
+                    Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj,
+                    n0 ), n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+      }
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpancrT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrT.o
new file mode 100644
index 000000000..2ae6cc537
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllN.c
new file mode 100644
index 000000000..4dbc13b44
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllN.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanllN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanllN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanllN recursively  factorizes  a panel  of columns using  the
+ * recursive Left-looking variant of the one-dimensional algorithm.  The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Replicated solve - Local update - Factor current panel
+ */
+      HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit,
+                 jj, jb, HPL_rone, L1ptr, n0, Mptr( L1ptr, 0, jj, n0 ),
+                 n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jj );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m,  jj );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb,
+                 jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda,
+                 Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj, lda ), lda );
+#endif
+      HPL_pdrpanllN( PANEL, m, jb, ioff, WORK );
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanllN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllN.o
new file mode 100644
index 000000000..330396b19
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllT.c
new file mode 100644
index 000000000..887caeb87
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllT.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanllT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanllT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanllT recursively  factorizes  a panel of columns  using  the
+ * recursive Left-looking variant of the one-dimensional algorithm.  The
+ * lower  triangular  N0-by-N0  upper block  of  the panel  is stored in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Replicated solve - Local update - Factor current panel
+ */
+      HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                 HplUnit, jb, jj, HPL_rone, L1ptr, n0, Mptr( L1ptr,
+                 jj, 0, n0 ), n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jj );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jj );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb,  jj );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av1 );
+      (void) vsip_mdestroy_d( Av2 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb,
+                 jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda,
+                 Mptr( L1ptr, jj, 0, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj, lda ), lda );
+#endif
+      HPL_pdrpanllT( PANEL, m, jb, ioff, WORK );
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanllT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllT.o
new file mode 100644
index 000000000..546461349
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlN.c
new file mode 100644
index 000000000..22f105cf4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlN.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanrlN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanrlN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanrlN recursively  factorizes  a panel of columns  using  the
+ * recursive Right-looking variant of the one-dimensional algorithm. The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+ 
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Factor current panel - Replicated solve - Local update
+ */
+      HPL_pdrpanrlN( PANEL, m, jb, ioff, WORK );
+      HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                 HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, n0 ),
+                 n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+      if( curr != 0 ) { ii += jb; m -= jb; }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb,
+                                m, n );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,    m, jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m,  n );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, n,
+                 jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda,
+                 Mptr( L1ptr, jj, jj+jb, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj+jb, lda ), lda );
+#endif
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanrlN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlN.o
new file mode 100644
index 000000000..56ede64e7
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlT.c
new file mode 100644
index 000000000..a77301b9b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlT.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanrlT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanrlT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanrlT recursively  factorizes  a panel of columns  using  the
+ * recursive Right-looking variant of the one-dimensional algorithm. The
+ * lower  triangular  N0-by-N0  upper  block of the panel  is stored  in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+ 
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Factor current panel - Replicated solve - Local update
+ */
+      HPL_pdrpanrlT( PANEL, m, jb, ioff, WORK );
+      HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                 HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, n0 ),
+                 n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+      if( curr != 0 ) { ii += jb; m -= jb; }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb,
+                                m, N );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,    m, jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m,  n );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, n,
+                 jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda,
+                 Mptr( L1ptr, jj+jb, jj, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj+jb, lda ), lda );
+#endif
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanrlT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlT.o
new file mode 100644
index 000000000..22cbbc0cf
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_equil.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_equil.c
new file mode 100644
index 000000000..b917a6525
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_equil.c
@@ -0,0 +1,253 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_equil
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_TRANS             TRANS,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   int *                            IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1,
+   int *                            IWORK
+)
+#else
+void HPL_equil
+( PBCST, IFLAG, PANEL, TRANS, N, U, LDU, IPLEN, IPMAP, IPMAPM1, IWORK )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_TRANS             TRANS;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   int *                            IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_equil equilibrates  the  local  pieces  of U, so that on exit to
+ * this function, pieces of U contained in every process row are of the
+ * same size. This phase makes the rolling phase optimal.  In addition,
+ * this  function probes  for  the  column panel L and forwards it when
+ * possible.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be equilibrated) information.
+ *
+ * TRANS   (global input)                const enum HPL_TRANS
+ *         On entry, TRANS specifies whether  U  is stored in transposed
+ *         or non-transposed form.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of rows or columns of  U. N
+ *         must be at least 0.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+ *         non-transposed form, and MAX(1,N) otherwise.
+ *
+ * IPLEN   (global input)                int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension NPROW+1.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, ip, ipU, ipcur, iprow, iptgt, lastrow,
+                              left, npm1, nprow, ll, llU, llcur, lltgt,
+                              right, slen, smax, smin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( npm1 = ( nprow = PANEL->grid->nprow ) - 1 ) <= 1 ) return;
+/*
+ * If the current distribution of the pieces of U is already optimal for
+ * the rolling phase, then return imediately.  The  optimal distribution
+ * is such that ip processes have smax items and the remaining processes
+ * only have smin items. Another way to check this is to verify that all
+ * differences IPLEN[i+1] - IPLEN[i] are either smin or smax.
+ */
+   smax = ( ( slen = IPLEN[nprow] ) + npm1 ) / nprow;
+   ip   = slen - nprow * ( smin = slen / nprow );
+
+   iprow = 0;
+   do
+   {
+      ll = IPLEN[iprow+1] - IPLEN[iprow]; iprow++;
+   } while( ( iprow < nprow ) && ( ( ll == smin ) || ( ll == smax ) ) );
+
+   if( iprow == nprow ) return;
+/*
+ * Now,  we are sure  the distribution of the pieces of U is not optimal
+ * with respect to the rolling phase,  thus  perform  equilibration.  Go
+ * through the list of processes:  Processes  that have rows that do not
+ * belong to them  with respect to the optimal mapping spread them  in a
+ * logarithmic fashion. To simplify a little bit the implementation, and
+ * mainly the packing, a source process row spreads its data to its left
+ * first, and then to its right.
+ */
+   IWORK[nprow] = slen;
+
+   for( iprow = 0; iprow < nprow; iprow++ )
+   {
+      llU = IPLEN[iprow+1] - ( ipU = IPLEN[iprow] );
+      if( iprow < ip ) { lltgt = smax; iptgt = iprow * smax;      }
+      else             { lltgt = smin; iptgt = iprow * smin + ip; }
+
+      left = ( ipU < iptgt ); right = ( iptgt + lltgt < ipU + llU );
+/*
+ * If I have something to spread to either the left or the right
+ */
+      if( ( llU > 0 ) && ( left || right ) )
+      {        /* Figure out how much every other process should have */
+
+         ipcur = ipU; llcur = llU;
+
+         for( i = 0; i < nprow; i++ )
+         {
+            if( i < ip ) { lltgt = smax; iptgt = i * smax;      }
+            else         { lltgt = smin; iptgt = i * smin + ip; }
+            lastrow = iptgt + lltgt - 1;
+
+            if( ( lastrow >= ipcur ) && ( llcur > 0 ) )
+            { ll = lastrow - ipcur + 1; ll = Mmin( ll, llcur ); llcur -= ll; }
+            else { ll = 0; }
+
+            IWORK[i] = ipcur; ipcur += ll; IWORK[i+1] = ipcur;
+         }
+/*
+ * Equilibration phase
+ */
+         if( TRANS == HplNoTrans )
+         {
+            if( left  )
+            {
+               HPL_spreadN( PBCST, IFLAG, PANEL, HplLeft,  N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+
+            if( right )
+            {
+               HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+         }
+         else
+         {
+            if( left  )
+            {
+               HPL_spreadT( PBCST, IFLAG, PANEL, HplLeft,  N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+
+            if( right )
+            {
+               HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+         }
+      }
+   }
+/*
+ * Finally update  IPLEN  with the indexes corresponding to the new dis-
+ * tribution of U - IPLEN[nprow] remained unchanged.
+ */
+   for( i = 0; i < nprow; i++ ) IPLEN[i] = ( i < ip ? i*smax : i*smin + ip );
+/*
+ * End of HPL_equil
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_equil.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_equil.o
new file mode 100644
index 000000000..5551089fd
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_equil.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_logsort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_logsort.c
new file mode 100644
index 000000000..0715159bd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_logsort.c
@@ -0,0 +1,185 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_logsort
+(
+   const int                        NPROCS,
+   const int                        ICURROC,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1
+)
+#else
+void HPL_logsort
+( NPROCS, ICURROC, IPLEN, IPMAP, IPMAPM1 )
+   const int                        NPROCS;
+   const int                        ICURROC;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_logsort computes an array  IPMAP  and  its inverse  IPMAPM1  that
+ * contain  the logarithmic sorted processes id with repect to the local
+ * number of rows of  U  that they own. This is necessary to ensure that
+ * the logarithmic spreading of U is optimal in terms of number of steps
+ * and communication volume as well.  In other words,  the larget pieces
+ * of U will be sent a minimal number of times.
+ *
+ * Arguments
+ * =========
+ *
+ * NPROCS  (global input)                const int
+ *         On entry, NPROCS  specifies the number of process rows in the
+ *         process grid. NPROCS is at least one.
+ *
+ * ICURROC (global input)                const int
+ *         On entry, ICURROC is the source process row.
+ *
+ * IPLEN   (global input/output)         int *
+ *         On entry, IPLEN is an array of dimension NPROCS+1,  such that
+ *         IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+ *         that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+ *         rows of U  in the processes before process IPMAP[i] after the
+ *         sort,  with  the convention that  IPLEN[NPROCS] is  the total
+ *         number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+ *         IPLEN[i] is  the  number of rows of A that should be moved to
+ *         the process IPMAP[i].  IPLEN  is such that the number of rows
+ *         of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+ *         remaining  entries  of  this  array  are  sorted  so that the
+ *         quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myroc] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROCS)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dist, i, ip, iplen_i, iplen_j, itmp, j, k;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Compute the  logarithmic distance between process j and process 0, as
+ * well as the maximum logarithmic distance. IPMAPM1 is workarray here.
+ */
+   for( j = 0, dist = 0; j < NPROCS; j++ )
+   {
+      IPMAP[j] = MModAdd( j, ICURROC, NPROCS ); ip = j; itmp = 0;
+      do { if( ip & 1 ) itmp++; ip >>= 1; } while ( ip );
+      IPMAPM1[j] = itmp; if( itmp > dist ) dist = itmp;
+   }
+/*
+ * Shift IPLEN[1..NPROCS]  of ICURROC places,  so that  IPLEN[1]  is now
+ * what used to be IPLEN[ICURROC+1]. Initialize IPMAP,  so that IPMAP[0]
+ * is ICURROC.
+ */
+   for( j = 0; j < ICURROC; j++ )
+   {
+      for( i = 2, itmp = IPLEN[1]; i <= NPROCS; i++ ) IPLEN[i-1] = IPLEN[i];
+      IPLEN[NPROCS] = itmp;
+   }
+/*
+ * logarithmic sort
+ */
+   for( k = 1; k <= dist; k++ )
+   {
+      for( j = 1; j < NPROCS; j++ )
+      {
+         if( IPMAPM1[j] == k )
+         {
+            for( i = 2; i < NPROCS; i++ )
+            {
+               if( k < IPMAPM1[i] )
+               {
+                  iplen_i = IPLEN[i+1]; iplen_j = IPLEN[j+1];
+
+                  if( iplen_j < iplen_i )
+                  {
+                     IPLEN[j+1] = iplen_i;  IPLEN[i+1] = iplen_j;
+                     itmp       = IPMAP[j]; IPMAP[j]   = IPMAP[i];
+                     IPMAP[i]   = itmp;
+                  }
+               }
+            }
+         }
+      }
+   }
+/*
+ * Compute IPLEN and IPMAPM1 (the inverse of IPMAP)
+ */
+   IPLEN[0] = 0;
+
+   for( i = 0; i < NPROCS; i++ )
+   {
+      IPMAPM1[ IPMAP[i] ] = i;
+      IPLEN[i+1]         += IPLEN[i];
+   }
+/*
+ * End of HPL_logsort
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_logsort.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_logsort.o
new file mode 100644
index 000000000..bc6a54df3
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_logsort.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv.c
new file mode 100644
index 000000000..ced74269e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv.c
@@ -0,0 +1,116 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesv
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesv
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesv factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with  or  without look-ahead.  The  lower  triangular  factor is left
+ * unpivoted and the pivots are not returned. The right hand side is the
+ * N+1 column of the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( A->n <= 0 ) return;
+
+   A->info = 0;
+
+   if( ( ALGO->depth == 0 ) || ( GRID->npcol == 1 ) )
+   {
+      HPL_pdgesv0(  GRID, ALGO, A );
+   }
+   else
+   {
+      HPL_pdgesvK2( GRID, ALGO, A );
+   }
+/*
+ * Solve upper triangular system
+ */
+   if( A->info == 0 ) HPL_pdtrsv( GRID, A );
+/*
+ * End of HPL_pdgesv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv.o
new file mode 100644
index 000000000..eebf1d2bd
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv0.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv0.c
new file mode 100644
index 000000000..d79b6fa55
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv0.c
@@ -0,0 +1,167 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesv0
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesv0
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesv0 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * without look-ahead. The lower triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate;
+   int                        N, j, jb, n, nb, tag=MSGID_BEGIN_FACT,
+                              test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( N = A->n ) <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+ 
+   HPL_pdupdate = ALGO->upfun; nb = A->nb;
+/*
+ * Allocate a panel list of length 1 - Allocate panel[0] resources
+ */
+   panel = (HPL_T_panel **)malloc( sizeof( HPL_T_panel * ) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesv0", "Memory allocation failed" ); }
+
+   HPL_pdpanel_new( GRID, ALGO, N, N+1, Mmin( N, nb ), A, 0, 0, tag,
+                    &panel[0] );
+/*
+ * Loop over the columns of A
+ */
+   for( j = 0; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && GRID->mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Release panel resources - re-initialize panel data structure
+ */
+      (void) HPL_pdpanel_free( panel[0] );
+      HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[0] );
+/*
+ * Factor and broadcast current panel - update
+ */
+      HPL_pdfact(               panel[0] );
+      (void) HPL_binit(         panel[0] );
+      do
+      { (void) HPL_bcast(       panel[0], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(         panel[0] );
+      HPL_pdupdate( NULL, NULL, panel[0], -1 );
+/*
+ * Update message id for next factorization
+ */
+      tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Release panel resources and panel list
+ */
+   (void) HPL_pdpanel_disp( &panel[0] );
+
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesv0
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv0.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv0.o
new file mode 100644
index 000000000..542c74bbb
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv0.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK1.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK1.c
new file mode 100644
index 000000000..ff1958cfc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK1.c
@@ -0,0 +1,222 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+#ifdef STDC_HEADERS
+void HPL_pdgesvK1
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesvK1
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesvK1 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with look-ahead.  The  lower  triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate;
+   int                        N, depth, icurcol=0, j, jb, jj=0, jstart,
+                              k, mycol, n, nb, nn, npcol, nq,
+                              tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   mycol = GRID->mycol; npcol        = GRID->npcol;
+   depth = ALGO->depth; HPL_pdupdate = ALGO->upfun;
+   N     = A->n;        nb           = A->nb; 
+
+   if( N <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+
+/*
+ * Allocate a panel list of length depth + 1 (depth >= 1)
+ */
+   panel = (HPL_T_panel **)malloc( (size_t)(depth+1)*sizeof( HPL_T_panel *) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesvK1", "Memory allocation failed" ); }
+/*
+ * Create and initialize the first depth panels
+ */
+   nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0;
+
+   for( k = 0; k < depth; k++ )
+   {
+      jb = Mmin( nn, nb );
+      HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart,
+                       tag, &panel[k] );
+      nn -= jb; jstart += jb;
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Initialize the lookahead - Factor jstart columns: panel[0..depth-1]
+ */
+   for( k = 0, j = 0; k < depth; k++ )
+   {
+      jb = jstart - j; jb = Mmin( jb, nb ); j += jb;
+/*
+ * Factor and broadcast k-th panel - use long topology for those
+ */
+      HPL_pdfact(         panel[k] );
+      (void) HPL_binit(   panel[k] );
+      do
+      { (void) HPL_bcast( panel[k], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(   panel[k] );
+/*
+ * Partial update of the depth-1-k panels in front of me
+ */
+      if( k < depth - 1 )
+      {
+         nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol );
+         HPL_pdupdate( NULL, NULL, panel[k], nn );
+      }
+   }
+/*
+ * Main loop over the remaining columns of A
+ */
+   for( j = jstart; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Allocate current panel resources - Finish latest update - Factor and
+ * broadcast current panel
+ */
+      HPL_pdpanel_new( GRID, ALGO, n, n+1, jb, A, j, j, tag, &panel[depth] );
+ 
+      if( mycol == icurcol )
+      {
+         nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol );
+         for( k = 0; k < depth; k++ )   /* partial updates 0..depth-1 */
+            HPL_pdupdate( NULL, NULL, panel[k], nn );
+         HPL_pdfact(    panel[depth] );       /* factor current panel */
+      }
+      else { nn = 0; }
+          /* Finish the latest update and broadcast the current panel */
+      (void) HPL_binit( panel[depth] );
+      HPL_pdupdate(     panel[depth], &test, panel[0], nq-nn );
+      (void) HPL_bwait( panel[depth] );
+/*
+ * Release latest panel resources - circular  of the panel pointers
+ * Go to the next process row and column -  update  the message ids  for
+ * broadcast
+ */
+      (void) HPL_pdpanel_disp( &panel[0] );
+      for( k = 0; k < depth; k++ ) panel[k] = panel[k+1];
+ 
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Clean-up: Finish updates - release panels and panel list
+ */
+   nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol );
+   for( k = 0; k < depth; k++ )
+   {
+      HPL_pdupdate( NULL, NULL, panel[k], nn );
+      (void) HPL_pdpanel_disp( &panel[k] );
+   }
+ 
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesvK1
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK1.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK1.o
new file mode 100644
index 000000000..e84aa62e0
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK1.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK2.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK2.c
new file mode 100644
index 000000000..dec506ab9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK2.c
@@ -0,0 +1,231 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesvK2
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesvK2
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesvK2 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with look-ahead.  The  lower  triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * p, * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate; 
+   int                        N, depth, icurcol=0, j, jb, jj=0, jstart,
+                              k, mycol, n, nb, nn, npcol, nq,
+                              tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   mycol = GRID->mycol; npcol        = GRID->npcol;
+   depth = ALGO->depth; HPL_pdupdate = ALGO->upfun;
+   N     = A->n;        nb           = A->nb;
+
+   if( N <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+
+/*
+ * Allocate a panel list of length depth + 1 (depth >= 1)
+ */
+   panel = (HPL_T_panel **)malloc( (size_t)(depth+1) * sizeof( HPL_T_panel *) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesvK2", "Memory allocation failed" ); }
+/*
+ * Create and initialize the first depth panels
+ */
+   nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0;
+
+   for( k = 0; k < depth; k++ )
+   {
+      jb = Mmin( nn, nb );
+      HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart,
+                       tag, &panel[k] );
+      nn -= jb; jstart += jb;
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Create last depth+1 panel
+ */
+   HPL_pdpanel_new( GRID, ALGO, nn, nn+1, Mmin( nn, nb ), A, jstart,
+                    jstart, tag, &panel[depth] );
+   tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+/*
+ * Initialize the lookahead - Factor jstart columns: panel[0..depth-1]
+ */
+   for( k = 0, j = 0; k < depth; k++ )
+   {
+      jb = jstart - j; jb = Mmin( jb, nb ); j += jb;
+/*
+ * Factor and broadcast k-th panel
+ */
+      HPL_pdfact(         panel[k] );
+      (void) HPL_binit(   panel[k] );
+      do
+      { (void) HPL_bcast( panel[k], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(   panel[k] );
+/*
+ * Partial update of the depth-k-1 panels in front of me
+ */
+      if( k < depth - 1 )
+      {
+         nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol );
+         HPL_pdupdate( NULL, NULL, panel[k], nn );
+      }
+   }
+/*
+ * Main loop over the remaining columns of A
+ */
+   for( j = jstart; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Initialize current panel - Finish latest update, Factor and broadcast
+ * current panel
+ */
+      (void) HPL_pdpanel_free( panel[depth] );
+      HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[depth] );
+
+      if( mycol == icurcol )
+      {
+         nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol );
+         for( k = 0; k < depth; k++ )   /* partial updates 0..depth-1 */
+            (void) HPL_pdupdate( NULL, NULL, panel[k], nn );
+         HPL_pdfact(       panel[depth] );    /* factor current panel */
+      }
+      else { nn = 0; }
+          /* Finish the latest update and broadcast the current panel */
+      (void) HPL_binit( panel[depth] );
+      HPL_pdupdate( panel[depth], &test, panel[0], nq-nn );
+      (void) HPL_bwait( panel[depth] );
+/*
+ * Circular  of the panel pointers:
+ * xtmp = x[0]; for( k=0; k < depth; k++ ) x[k] = x[k+1]; x[d] = xtmp;
+ *
+ * Go to next process row and column - update the message ids for broadcast
+ */
+      p = panel[0]; for( k = 0; k < depth; k++ ) panel[k] = panel[k+1];
+      panel[depth] = p;
+
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Clean-up: Finish updates - release panels and panel list
+ */
+   nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol );
+   for( k = 0; k < depth; k++ )
+   {
+      (void) HPL_pdupdate( NULL, NULL, panel[k], nn );
+      (void) HPL_pdpanel_disp(  &panel[k] );
+   }
+   (void) HPL_pdpanel_disp( &panel[depth] );
+
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesvK2
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK2.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK2.o
new file mode 100644
index 000000000..97892453c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK2.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00N.c
new file mode 100644
index 000000000..b4433e1be
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00N.c
@@ -0,0 +1,432 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp00N
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp00N
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp00N applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * Bi-directional  exchange  is used to perform the  swap :: broadcast of
+ * the row  panel U at once, resulting in a lower number of messages than
+ * usual as well as a lower communication volume. With P process rows and
+ * assuming  bi-directional links,  the running time of this function can
+ * be approximated by:
+ *  
+ *    log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  Mono
+ * directional links will double this communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be broadcast and swapped) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                  comm;
+   HPL_T_grid                * grid;
+   double                    * A, * U, * W;
+   void                       * vptr = NULL;
+   int                       * ipID, * lindxA, * lindxAU, * llen,
+                             * llen_sv;
+   unsigned int              ip2, ip2_=1, ipdist, ipow=1, mask=1,
+                             mydist, mydis_;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, Np2, align,
+                             hdim, i, icurrow, *iflag, ipA, ipW, *ipl,
+                             iprow, jb, k, lda, ldW, myrow, n, nprow,
+                             partner, root, size_, usize;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+   n = Mmin( NN, PANEL->n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   grid  = PANEL->grid;    nprow   = grid->nprow; myrow = grid->myrow;
+   comm  = grid->col_comm; ip2     = (unsigned int)grid->row_ip2;
+   hdim  = grid->row_hdim; align   = PANEL->algo->align;
+   A     = PANEL->A;       U       = PANEL->U;    iflag = PANEL->IWORK;
+   lda   = PANEL->lda;     icurrow = PANEL->prow; usize = jb * n;
+   ldW   = n + 1;
+/*
+ * Allocate space for temporary W (ldW * jb)
+ */
+   vptr = (void*)malloc( 
+      ((size_t)(align) + ((size_t)(jb) * (size_t)(ldW))) * sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdlaswp00N", "Memory allocation failed" ); }
+
+   W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) );
+/*
+ * Construct ipID and its local counter parts lindxA, lindxAU -  llen is
+ * the number of rows/columns that I have in workspace and that I should
+ * send.  Compute  lindx_, ipA, llen if it has not already been done for
+ * this panel;
+ */
+   k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1;
+   lindxA  = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k;
+   llen    = lindxAU + k; llen_sv = llen + nprow;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+   else if( *iflag == 1 ) /* HPL_pdlaswp01N called before: reuse ipID */
+   {
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+/*
+ * Copy the llen_sv into llen - Reset ipA to its correct value
+ */
+   ipA = llen_sv[myrow];
+   for( i = 0; i < nprow; i++ ) { llen[i]  = llen_sv[i]; }
+/*
+ * For i in [0..2*jb),  lindxA[i] is the offset in A of a row that ulti-
+ * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ).  In icurrow,
+ * we directly pack into U, otherwise we pack into workspace. The  first
+ * entry of each column packed in workspace is in fact the row or column
+ * offset in U where it should go to.
+ */
+   if( myrow == icurrow ) 
+   {
+      HPL_dlaswp01N( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+   else
+   {
+      HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU );
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * Algorithm for bi-directional data exchange:
+ *
+ * As long as I have not talked to a process that  already  had the data
+ * from icurrow,  I will be sending the workspace,  otherwise  I will be
+ * sending U. Note that the columns in workspace contain the local index
+ * in U they should go to.
+ *
+ * If I am receiving from a process that  has the data from  icurrow,  I
+ * will be receiving in  U, copy the data of  U  that stays into  A, and
+ * then the columns I have in workspace into U; otherwise  I will be re-
+ * ceiving in the remaining workspace.  If I am one  of  those processes 
+ * that already has the data from icurrow, I will be immediately copying
+ * the data I have in my workspace into U.
+ *
+ * When I receive U, some of U should be copied in my piece of A  before
+ * I can copy the rows I have in my workspace into  U.  This information
+ * is kept in the lists  lindx_:  the row lindxAU[i] should be copied in
+ * the row  lindxA[i] of my piece of  A, just as in the reversed initial
+ * packing operation. Those rows are thus the first ones in the work ar-
+ * ray.  After  this  operation  has  been  performed,  I will not  need
+ * those lindx arrays,  and  I  will  always be sending a buffer of size
+ * jb x n, or n x jb, that is, U.
+ *
+ * At  every  step  of  the algorithm, it is necesary to update the list 
+ * llen,  so that I can figure out how large the next messages I will be
+ * sending/receiving are.  It is  obvious when I am sending U. It is not
+ * otherwise.
+ *
+ * We  choose  icurrow  to be the source of the bi-directional exchange.
+ * This allows the processes in the non-power 2 part to receive U at the
+ * first exchange,  and  then  broadcast internally this U so that those 
+ * processes can grab their piece of A.
+ */
+   if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; }
+   ipW    = ipA;
+   Np2    = ( ( size_ = nprow - ip2 ) != 0 );
+   mydist = (unsigned int)MModSub( myrow, icurrow, nprow );
+/*
+ * bi-directional exchange:   If nprow is not a power of 2,  proc[i-ip2]
+ * receives local data from proc[i] for all i in  [ip2..nprow);  icurrow
+ * is the source, these last process indexes are relative to icurrow.
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+
+      if( mydist == 0 )  /* I am the current row: I send U and recv W */
+      {
+         (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW,
+                          Cmsgid, partner, comm );
+         if( llen[partner] > 0 )
+            HPL_dlaswp03N( llen[partner], n, U, LDU, W, W+1, ldW );
+      }
+      else if( mydist == ip2 )
+      {                      /* I recv U for later Bcast, I send my W */
+         (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                          Cmsgid, partner, comm );
+      }
+      else               /* None of us is icurrow, we exchange our Ws */
+      {
+         if( ( mydist & ip2 ) != 0 ) 
+         {
+            (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm );
+         }
+         else
+         {
+            (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW,
+                             partner, Cmsgid, comm );
+            if( llen[partner] > 0 ) ipW += llen[partner];
+         }
+      }
+   }
+/*
+ * Update llen
+ */
+   for( i = 1; i < size_; i++ )
+   {
+      iprow   = MModAdd( icurrow, i,          nprow );
+      partner = MModAdd( iprow,   (int)(ip2), nprow );
+      llen[ iprow ] += llen[ partner ];
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * power of 2 part of the processes collection:  only processes [0..ip2)
+ * are working;  some of them  (mydist >> (k+1) == 0) either send or re-
+ * ceive U.  At every step k, k is in [0 .. hdim),  of the algorithm,  a
+ * process pair that exchanges  U  is such that  (mydist >> (k+1) == 0).
+ * Among  those  processes,  the  ones  that are sending U are such that 
+ * mydist >> k == 0.
+ */
+   if( mydist < ip2 )
+   {
+      k = 0;
+
+      while( k < hdim )
+      {
+         partner = (int)(mydist ^ ipow);
+         partner = MModAdd( icurrow, partner, nprow );
+/*
+ * Exchange and combine the local results - If I receive U,  then I must
+ * copy from U the rows that belong to my piece of A, and then update  U
+ * by  copying in it the rows I have accumulated in W.  Otherwise, I re-
+ * ceive W.  In this later case, and I have U, I shall update my copy of
+ * U by copying in it the rows I have accumulated in  W.  If  I  did not
+ * have U before, I simply need to update my pointer in W for later use.
+ */
+         if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 )
+         {
+            if( ( mydist >> (unsigned int)(k) ) == 0 )
+            {
+               (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW,
+                                ldW ), llen[partner]*ldW, Cmsgid,
+                                partner, comm );
+               HPL_dlaswp03N( llen[partner], n, U, LDU, Mptr( W, 0, ipW,
+                              ldW ), Mptr( W, 1, ipW, ldW ), ldW );
+               ipW += llen[partner];
+            }
+            else
+            {
+               (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                                Cmsgid, partner, comm );
+               HPL_dlaswp04N( ipA, llen[myrow], n, U, LDU, A, lda, W,
+                              W+1, ldW, lindxA, lindxAU );
+            }
+         }
+         else
+         {
+            (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0,
+                             ipW, ldW ), llen[partner]*ldW, Cmsgid,
+                             partner, comm );
+            ipW += llen[partner];
+         }
+/*
+ * Update llen - Go to next process pairs
+ */
+         iprow = icurrow; ipdist = 0;
+         do
+         {
+            if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist )
+            {
+               partner = MModAdd( icurrow, partner, nprow );
+               llen[iprow]  += llen[partner];
+               llen[partner] = llen[iprow];
+            }
+            iprow = MModAdd( iprow, 1, nprow ); ipdist++;
+
+         } while( ipdist < ip2 );
+
+         ipow <<= 1; k++;
+/*
+ * Probe for column panel - forward it when available 
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+      }
+   }
+   else
+   {
+/*
+ * non power of 2 part of the process collection:  proc[ip2] broadcast U
+ * to procs[ip2..nprow) (relatively to icurrow).
+ */
+      if( size_ > 1 )
+      {
+         k = size_ - 1;
+         while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+         root   = MModAdd( icurrow, (int)(ip2), nprow );
+         mydis_ = (unsigned int)MModSub( myrow,  root, nprow );
+
+         do
+         {
+            mask ^= ip2_;
+            if( ( mydis_ & mask ) == 0 )
+            {
+               partner = (int)(mydis_ ^ ip2_);
+               if( ( mydis_ & ip2_ ) != 0 )
+               {
+                  (void) HPL_recv( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+
+               }
+               else if( partner < size_ )
+               {
+                  (void) HPL_send( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+               }
+            }
+            ip2_ >>= 1;
+/*
+ * Probe for column panel - forward it when available 
+ */
+            if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+         } while( ip2_ > 0 );
+      }
+/*
+ * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece
+ * of A.
+ */
+      HPL_dlaswp05N( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+/*
+ * If  nprow  is not a power of 2,  proc[i-ip2]  sends  global result to
+ * proc[i] for all i in [ip2..nprow);
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+      if( ( mydist & ip2 ) != 0 )
+      { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); }
+      else
+      { (void) HPL_send( U, usize, partner, Cmsgid, comm ); }
+   }
+
+   if( vptr ) free( vptr );
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp00N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00N.o
new file mode 100644
index 000000000..45646d165
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00T.c
new file mode 100644
index 000000000..7a9764c09
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00T.c
@@ -0,0 +1,433 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp00T
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp00T
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp00T applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * Bi-directional  exchange  is used to perform the  swap :: broadcast of
+ * the row  panel U at once, resulting in a lower number of messages than
+ * usual as well as a lower communication volume. With P process rows and
+ * assuming  bi-directional links,  the running time of this function can
+ * be approximated by:
+ *  
+ *    log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  Mono
+ * directional links will double this communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be broadcast and swapped) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                  comm;
+   HPL_T_grid                * grid;
+   double                    * A, * U, * W;
+   void                       * vptr = NULL;
+   int                       * ipID, * lindxA, * lindxAU, * llen,
+                             * llen_sv;
+   unsigned int              ip2, ip2_=1, ipdist, ipow=1, mask=1,
+                             mydist, mydis_;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, Np2, align,
+                             hdim, i, icurrow, *iflag, ipA, ipW, *ipl,
+                             iprow, jb, k, lda, ldW, myrow, n, nprow,
+                             partner, root, size_, usize;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+   n = Mmin( NN, PANEL->n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   grid  = PANEL->grid;    nprow   = grid->nprow; myrow = grid->myrow;
+   comm  = grid->col_comm; ip2     = (unsigned int)grid->row_ip2;
+   hdim  = grid->row_hdim; align   = PANEL->algo->align;
+   A     = PANEL->A;       U       = PANEL->U;    iflag = PANEL->IWORK;
+   lda   = PANEL->lda;     icurrow = PANEL->prow; usize = jb * n;
+   ldW   = n + 1;
+/*
+ * Allocate space for temporary W (ldW * jb)
+ */
+   vptr = (void*)malloc( ( (size_t)(align) + 
+                           ((size_t)(jb) * (size_t)(ldW))) * 
+                           sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdlaswp00T", "Memory allocation failed" ); }
+
+   W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) );
+/*
+ * Construct ipID and its local counter parts lindxA, lindxAU -  llen is
+ * the number of rows/columns that I have in workspace and that I should
+ * send.  Compute  lindx_, ipA, llen if it has not already been done for
+ * this panel;
+ */
+   k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1;
+   lindxA  = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k;
+   llen    = lindxAU + k; llen_sv = llen + nprow;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+   else if( *iflag == 1 ) /* HPL_pdlaswp01T called before: reuse ipID */
+   {
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+/*
+ * Copy the llen_sv into llen - Reset ipA to its correct value
+ */
+   ipA = llen_sv[myrow];
+   for( i = 0; i < nprow; i++ ) { llen[i]  = llen_sv[i]; }
+/*
+ * For i in [0..2*jb),  lindxA[i] is the offset in A of a row that ulti-
+ * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ).  In icurrow,
+ * we directly pack into U, otherwise we pack into workspace. The  first
+ * entry of each column packed in workspace is in fact the row or column
+ * offset in U where it should go to.
+ */
+   if( myrow == icurrow ) 
+   {
+      HPL_dlaswp01T( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+   else
+   {
+      HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU );
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * Algorithm for bi-directional data exchange:
+ *
+ * As long as I have not talked to a process that  already  had the data
+ * from icurrow,  I will be sending the workspace,  otherwise  I will be
+ * sending U. Note that the columns in workspace contain the local index
+ * in U they should go to.
+ *
+ * If I am receiving from a process that  has the data from  icurrow,  I
+ * will be receiving in  U, copy the data of  U  that stays into  A, and
+ * then the columns I have in workspace into U; otherwise  I will be re-
+ * ceiving in the remaining workspace.  If I am one  of  those processes 
+ * that already has the data from icurrow, I will be immediately copying
+ * the data I have in my workspace into U.
+ *
+ * When I receive U, some of U should be copied in my piece of A  before
+ * I can copy the rows I have in my workspace into  U.  This information
+ * is kept in the lists  lindx_:  the row lindxAU[i] should be copied in
+ * the row  lindxA[i] of my piece of  A, just as in the reversed initial
+ * packing operation. Those rows are thus the first ones in the work ar-
+ * ray.  After  this  operation  has  been  performed,  I will not  need
+ * those lindx arrays,  and  I  will  always be sending a buffer of size
+ * jb x n, or n x jb, that is, U.
+ *
+ * At  every  step  of  the algorithm, it is necesary to update the list 
+ * llen,  so that I can figure out how large the next messages I will be
+ * sending/receiving are.  It is  obvious when I am sending U. It is not
+ * otherwise.
+ *
+ * We  choose  icurrow  to be the source of the bi-directional exchange.
+ * This allows the processes in the non-power 2 part to receive U at the
+ * first exchange,  and  then  broadcast internally this U so that those 
+ * processes can grab their piece of A.
+ */
+   if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; }
+   ipW    = ipA;
+   Np2    = ( ( size_ = nprow - ip2 ) != 0 );
+   mydist = (unsigned int)MModSub( myrow, icurrow, nprow );
+/*
+ * bi-directional exchange:   If nprow is not a power of 2,  proc[i-ip2]
+ * receives local data from proc[i] for all i in  [ip2..nprow);  icurrow
+ * is the source, these last process indexes are relative to icurrow.
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+
+      if( mydist == 0 )  /* I am the current row: I send U and recv W */
+      {
+         (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW,
+                          Cmsgid, partner, comm );
+         if( llen[partner] > 0 )
+            HPL_dlaswp03T( llen[partner], n, U, LDU, W, W+1, ldW );
+      }
+      else if( mydist == ip2 )
+      {                      /* I recv U for later Bcast, I send my W */
+         (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                          Cmsgid, partner, comm );
+      }
+      else               /* None of us is icurrow, we exchange our Ws */
+      {
+         if( ( mydist & ip2 ) != 0 ) 
+         {
+            (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm );
+         }
+         else
+         {
+            (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW,
+                             partner, Cmsgid, comm );
+            if( llen[partner] > 0 ) ipW += llen[partner];
+         }
+      }
+   }
+/*
+ * Update llen
+ */
+   for( i = 1; i < size_; i++ )
+   {
+      iprow   = MModAdd( icurrow, i,          nprow );
+      partner = MModAdd( iprow,   (int)(ip2), nprow );
+      llen[ iprow ] += llen[ partner ];
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * power of 2 part of the processes collection:  only processes [0..ip2)
+ * are working;  some of them  (mydist >> (k+1) == 0) either send or re-
+ * ceive U.  At every step k, k is in [0 .. hdim),  of the algorithm,  a
+ * process pair that exchanges  U  is such that  (mydist >> (k+1) == 0).
+ * Among  those  processes,  the  ones  that are sending U are such that 
+ * mydist >> k == 0.
+ */
+   if( mydist < ip2 )
+   {
+      k = 0;
+
+      while( k < hdim )
+      {
+         partner = (int)(mydist ^ ipow);
+         partner = MModAdd( icurrow, partner, nprow );
+/*
+ * Exchange and combine the local results - If I receive U,  then I must
+ * copy from U the rows that belong to my piece of A, and then update  U
+ * by  copying in it the rows I have accumulated in W.  Otherwise, I re-
+ * ceive W.  In this later case, and I have U, I shall update my copy of
+ * U by copying in it the rows I have accumulated in  W.  If  I  did not
+ * have U before, I simply need to update my pointer in W for later use.
+ */
+         if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 )
+         {
+            if( ( mydist >> (unsigned int)(k) ) == 0 )
+            {
+               (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW,
+                                ldW ), llen[partner]*ldW, Cmsgid,
+                                partner, comm );
+               HPL_dlaswp03T( llen[partner], n, U, LDU, Mptr( W, 0, ipW,
+                              ldW ), Mptr( W, 1, ipW, ldW ), ldW );
+               ipW += llen[partner];
+            }
+            else
+            {
+               (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                                Cmsgid, partner, comm );
+               HPL_dlaswp04T( ipA, llen[myrow], n, U, LDU, A, lda, W,
+                              W+1, ldW, lindxA, lindxAU );
+            }
+         }
+         else
+         {
+            (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0,
+                             ipW, ldW ), llen[partner]*ldW, Cmsgid,
+                             partner, comm );
+            ipW += llen[partner];
+         }
+/*
+ * Update llen - Go to next process pairs
+ */
+         iprow = icurrow; ipdist = 0;
+         do
+         {
+            if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist )
+            {
+               partner = MModAdd( icurrow, partner, nprow );
+               llen[iprow]  += llen[partner];
+               llen[partner] = llen[iprow];
+            }
+            iprow = MModAdd( iprow, 1, nprow ); ipdist++;
+
+         } while( ipdist < ip2 );
+
+         ipow <<= 1; k++;
+/*
+ * Probe for column panel - forward it when available 
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+      }
+   }
+   else
+   {
+/*
+ * non power of 2 part of the process collection:  proc[ip2] broadcast U
+ * to procs[ip2..nprow) (relatively to icurrow).
+ */
+      if( size_ > 1 )
+      {
+         k = size_ - 1;
+         while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+         root   = MModAdd( icurrow, (int)(ip2), nprow );
+         mydis_ = (unsigned int)MModSub( myrow,  root, nprow );
+
+         do
+         {
+            mask ^= ip2_;
+            if( ( mydis_ & mask ) == 0 )
+            {
+               partner = (int)(mydis_ ^ ip2_);
+               if( ( mydis_ & ip2_ ) != 0 )
+               {
+                  (void) HPL_recv( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+
+               }
+               else if( partner < size_ )
+               {
+                  (void) HPL_send( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+               }
+            }
+            ip2_ >>= 1;
+/*
+ * Probe for column panel - forward it when available 
+ */
+            if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+         } while( ip2_ > 0 );
+      }
+/*
+ * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece
+ * of A.
+ */
+      HPL_dlaswp05T( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+/*
+ * If  nprow  is not a power of 2,  proc[i-ip2]  sends  global result to
+ * proc[i] for all i in [ip2..nprow);
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+      if( ( mydist & ip2 ) != 0 )
+      { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); }
+      else
+      { (void) HPL_send( U, usize, partner, Cmsgid, comm ); }
+   }
+
+   if( vptr ) free( vptr );
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp00T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00T.o
new file mode 100644
index 000000000..64d2a7b87
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01N.c
new file mode 100644
index 000000000..31f219840
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01N.c
@@ -0,0 +1,217 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp01N
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp01N
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp01N applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+ * row panel U at once,  resulting in a minimal communication volume  and
+ * a "very good"  use of the connectivity if available.  With  P  process
+ * rows  and  assuming  bi-directional links,  the  running time  of this
+ * function can be approximated by:
+ *  
+ *    (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  K is
+ * a constant in (2,3] that depends on the achieved bandwidth  during  a
+ * simultaneous  message exchange  between two processes.  An  empirical
+ * optimistic value of K is typically 2.4.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * A, * U;
+   int                       * ipID, * iplen, * ipmap, * ipmapm1,
+                             * iwork, * lindxA = NULL, * lindxAU,
+                             * permU;
+   static int                equil=-1;
+   int                       icurrow, * iflag, * ipA, * ipl, jb, k,
+                             lda, myrow, n, nprow;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+   n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Decide whether equilibration should be performed or not
+ */
+   if( equil == -1 ) equil = PANEL->algo->equil;
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow;
+   A     = PANEL->A;   U       = PANEL->U;     iflag  = PANEL->IWORK;
+   lda   = PANEL->lda; icurrow = PANEL->prow;
+/*
+ * Compute ipID (if not already done for this panel). lindxA and lindxAU
+ * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1
+ * are of size nprow,  permU is of length jb, and  this function needs a 
+ * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): 
+ * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1)
+ * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1);
+ */
+   k = (int)((unsigned int)(jb) << 1);  ipl = iflag + 1; ipID = ipl + 1;
+   ipA     = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1;
+   lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1;
+   ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( *iflag == 0 ) /* HPL_pdlaswp00N called before: reuse ipID */
+   {
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( ( *iflag == 1 ) && ( equil != 0 ) )
+   {   /* HPL_pdlaswp01N was call before only re-compute IPLEN, IPMAP */
+      HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 );
+      *iflag = 1;
+   }
+/*
+ * Copy into U the rows to be spread (local to icurrow)
+ */
+   if( myrow == icurrow )
+   { HPL_dlaswp01N( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); }
+/*
+ * Spread U - optionally probe for column panel
+ */
+   HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen,
+                ipmap, ipmapm1 );
+/*
+ * Local exchange (everywhere but in process row icurrow)
+ */
+   if( myrow != icurrow )
+   {
+      k = ipmapm1[myrow];
+      HPL_dlaswp06N( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, iplen[k],
+                     0, LDU ), LDU, lindxA );
+   }
+/*
+ * Equilibration
+ */
+   if( equil != 0 )
+      HPL_equil( PBCST, IFLAG, PANEL, HplNoTrans, n, U, LDU, iplen,
+                 ipmap, ipmapm1, iwork );
+/*
+ * Rolling phase
+ */
+   HPL_rollN( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 );
+/*
+ * Permute U in every process row
+ */
+   HPL_dlaswp00N( jb, n, U, LDU, permU );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp01N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01N.o
new file mode 100644
index 000000000..eb5b938b6
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01N.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01T.c
new file mode 100644
index 000000000..0c4de2669
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01T.c
@@ -0,0 +1,217 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp01T
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp01T
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp01T applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+ * row panel U at once,  resulting in a minimal communication volume  and
+ * a "very good"  use of the connectivity if available.  With  P  process
+ * rows  and  assuming  bi-directional links,  the  running time  of this
+ * function can be approximated by:
+ *  
+ *    (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  K is
+ * a constant in (2,3] that depends on the achieved bandwidth  during  a
+ * simultaneous  message exchange  between two processes.  An  empirical
+ * optimistic value of K is typically 2.4.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * A, * U;
+   int                       * ipID, * iplen, * ipmap, * ipmapm1,
+                             * iwork, * lindxA = NULL, * lindxAU,
+                             * permU;
+   static int                equil=-1;
+   int                       icurrow, * iflag, * ipA, * ipl, jb, k,
+                             lda, myrow, n, nprow;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+   n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Decide whether equilibration should be performed or not
+ */
+   if( equil == -1 ) equil = PANEL->algo->equil;
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow;
+   A     = PANEL->A;   U       = PANEL->U;     iflag  = PANEL->IWORK;
+   lda   = PANEL->lda; icurrow = PANEL->prow;
+/*
+ * Compute ipID (if not already done for this panel). lindxA and lindxAU
+ * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1
+ * are of size nprow,  permU is of length jb, and  this function needs a 
+ * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): 
+ * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1)
+ * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1);
+ */
+   k = (int)((unsigned int)(jb) << 1);  ipl = iflag + 1; ipID = ipl + 1;
+   ipA     = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1;
+   lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1;
+   ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( *iflag == 0 ) /* HPL_pdlaswp00T called before: reuse ipID */
+   {
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( ( *iflag == 1 ) && ( equil != 0 ) )
+   {   /* HPL_pdlaswp01T was call before only re-compute IPLEN, IPMAP */
+      HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 );
+      *iflag = 1;
+   }
+/*
+ * Copy into U the rows to be spread (local to icurrow)
+ */
+   if( myrow == icurrow )
+   { HPL_dlaswp01T( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); }
+/*
+ * Spread U - optionally probe for column panel
+ */
+   HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen,
+                ipmap, ipmapm1 );
+/*
+ * Local exchange (everywhere but in process row icurrow)
+ */
+   if( myrow != icurrow )
+   {
+      k = ipmapm1[myrow];
+      HPL_dlaswp06T( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, 0,
+                     iplen[k], LDU ), LDU, lindxA );
+   }
+/*
+ * Equilibration
+ */
+   if( equil != 0 )
+      HPL_equil( PBCST, IFLAG, PANEL, HplTrans, n, U, LDU, iplen, ipmap,
+                 ipmapm1, iwork );
+/*
+ * Rolling phase
+ */
+   HPL_rollT( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 );
+/*
+ * Permute U in every process row
+ */
+   HPL_dlaswp10N( n, jb, U, LDU, permU );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp01T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01T.o
new file mode 100644
index 000000000..020b40f86
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01T.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdtrsv.c
new file mode 100644
index 000000000..d2135130a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdtrsv.c
@@ -0,0 +1,296 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdtrsv
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_pmat *                     AMAT
+)
+#else
+void HPL_pdtrsv
+( GRID, AMAT )
+   HPL_T_grid *                     GRID;
+   HPL_T_pmat *                     AMAT;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdtrsv solves an upper triangular system of linear equations.
+ *  
+ * The rhs is the last column of the N by N+1 matrix A. The solve starts
+ * in the process  column owning the  Nth  column of A, so the rhs b may
+ * need to be moved one process column to the left at the beginning. The
+ * routine therefore needs  a column  vector in every process column but
+ * the one owning  b. The result is  replicated in all process rows, and
+ * returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes.
+ *  
+ * The algorithm uses decreasing one-ring broadcast in process rows  and
+ * columns  implemented  in terms of  synchronous communication point to
+ * point primitives.  The  lookahead of depth 1 is used to minimize  the
+ * critical path. This entire operation is essentially ``latency'' bound
+ * and an estimate of its running time is given by:
+ *  
+ *    (move rhs) lat + N / ( P bdwth ) +            
+ *    (solve)    ((N / NB)-1) 2 (lat + NB / bdwth) +
+ *               gam2 N^2 / ( P Q ),                
+ *  
+ * where  gam2   is an estimate of the   Level 2 BLAS rate of execution.
+ * There are  N / NB  diagonal blocks. One must exchange  2  messages of
+ * length NB to compute the next  NB  entries of the vector solution, as
+ * well as performing a total of N^2 floating point operations.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * AMAT    (local input/output)          HPL_T_pmat *
+ *         On entry,  AMAT  points  to the data structure containing the
+ *         local array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   Ccomm, Rcomm;
+   double                     * A=NULL, * Aprev=NULL, * Aptr, * XC=NULL,
+                              * XR=NULL, * Xd=NULL, * Xdprev=NULL,
+                              * W=NULL;
+   int                        Alcol, Alrow, Anpprev, Anp, Anq, Bcol,
+                              Cmsgid, GridIsNotPx1, GridIsNot1xQ, Rmsgid,
+                              Wfr=0, colprev, kb, kbprev, lda, mycol,
+                              myrow, n, n1, n1p, n1pprev=0, nb, npcol,
+                              nprow, rowprev, tmp1, tmp2;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PTRSV );
+#endif
+   if( ( n = AMAT->n ) <= 0 ) return;
+   nb = AMAT->nb; lda = AMAT->ld; A = AMAT->A; XR = AMAT->X;
+
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Rcomm = GRID->row_comm; Rmsgid = MSGID_BEGIN_PTRSV;
+   Ccomm = GRID->col_comm; Cmsgid = MSGID_BEGIN_PTRSV + 1;
+   GridIsNot1xQ = ( nprow > 1 ); GridIsNotPx1 = ( npcol > 1 );
+/*
+ * Move the rhs in the process column owning the last column of A.
+ */
+   Mnumroc( Anp, n, nb, nb, myrow, 0, nprow );
+   Mnumroc( Anq, n, nb, nb, mycol, 0, npcol );
+
+   tmp1  = ( n - 1 ) / nb;
+   Alrow = tmp1 - ( tmp1 / nprow ) * nprow;
+   Alcol = tmp1 - ( tmp1 / npcol ) * npcol;
+   kb    = n    - tmp1 * nb;
+
+   Aptr = (double *)(A); XC = Mptr( Aptr, 0, Anq, lda );
+   Mindxg2p( n, nb, nb, Bcol, 0, npcol );
+
+   if( ( Anp > 0 ) && ( Alcol != Bcol ) )
+   {
+      if( mycol == Bcol  )
+      { (void) HPL_send( XC, Anp, Alcol, Rmsgid, Rcomm ); }
+      else if( mycol == Alcol )
+      { (void) HPL_recv( XC, Anp, Bcol,  Rmsgid, Rcomm ); }
+   }
+   Rmsgid = ( Rmsgid + 2 >
+              MSGID_END_PTRSV ? MSGID_BEGIN_PTRSV : Rmsgid + 2 );
+   if( mycol != Alcol )
+   { for( tmp1=0; tmp1 < Anp; tmp1++ ) XC[tmp1] = HPL_rzero; }
+/*
+ * Set up lookahead
+ */
+   n1 = ( npcol - 1 ) * nb; n1 = Mmax( n1, nb );
+   if( Anp > 0 )
+   {
+      W = (double*)malloc( (size_t)(Mmin( n1, Anp )) * sizeof( double ) );
+      if( W == NULL )
+      { HPL_pabort( __LINE__, "HPL_pdtrsv", "Memory allocation failed" ); }
+      Wfr = 1;
+   }
+
+   Anpprev = Anp; Xdprev = XR; Aprev = Aptr = Mptr( Aptr, 0, Anq, lda );
+   tmp1    = n - kb; tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+   MnumrocI( n1pprev, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+
+   if( myrow == Alrow ) { Anpprev = ( Anp -= kb ); }
+   if( mycol == Alcol )
+   {
+      Aprev = ( Aptr -= lda * kb ); Anq -= kb; Xdprev = ( Xd = XR + Anq );
+      if( myrow == Alrow )
+      {
+         HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit,
+                    kb, Aptr+Anp, lda, XC+Anp, 1 );
+         HPL_dcopy( kb, XC+Anp, 1, Xd, 1 );
+      }
+   }
+
+   rowprev = Alrow; Alrow = MModSub1( Alrow, nprow );
+   colprev = Alcol; Alcol = MModSub1( Alcol, npcol );
+   kbprev  = kb; n -= kb;
+   tmp1    = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+   MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+/*
+ * Start the operations
+ */
+   while( n > 0 )
+   {
+      if( mycol == Alcol ) { Aptr -= lda * kb; Anq -= kb; Xd = XR + Anq; }
+      if( myrow == Alrow ) { Anp -= kb; }
+/*
+ * Broadcast  (decreasing-ring)  of  previous solution block in previous
+ * process column,  compute  partial update of current block and send it
+ * to current process column.
+ */
+      if( mycol == colprev )
+      {
+/*
+ * Send previous solution block in process row above
+ */
+         if( myrow == rowprev )
+         {
+            if( GridIsNot1xQ )
+               (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ),
+                                Cmsgid, Ccomm );
+         }
+         else
+         {
+            (void) HPL_recv( Xdprev, kbprev, MModAdd1( myrow, nprow ),
+                             Cmsgid, Ccomm );
+         } 
+/*
+ * Compute partial update of previous solution block and send it to cur-
+ * rent column
+ */
+         if( n1pprev > 0 )
+         {
+            tmp1 = Anpprev - n1pprev;
+            HPL_dgemv( HplColumnMajor, HplNoTrans, n1pprev, kbprev,
+                       -HPL_rone, Aprev+tmp1, lda, Xdprev, 1, HPL_rone,
+                       XC+tmp1, 1 );
+            if( GridIsNotPx1 )
+               (void) HPL_send( XC+tmp1, n1pprev, Alcol, Rmsgid, Rcomm );
+         }
+/*
+ * Finish  the (decreasing-ring) broadcast of the solution block in pre-
+ * vious process column
+ */
+         if( ( myrow != rowprev ) &&
+             ( myrow != MModAdd1( rowprev, nprow ) ) )
+            (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ),
+                             Cmsgid, Ccomm );
+      }
+      else if( mycol == Alcol )
+      {
+/*
+ * Current  column  receives  and accumulates partial update of previous
+ * solution block
+ */
+         if( n1pprev > 0 )
+         {
+            (void) HPL_recv( W, n1pprev, colprev, Rmsgid, Rcomm );
+            HPL_daxpy( n1pprev, HPL_rone, W, 1, XC+Anpprev-n1pprev, 1 );
+         }
+      }
+/*
+ * Solve current diagonal block 
+ */
+      if( ( mycol == Alcol ) && ( myrow == Alrow ) )
+      {
+         HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit,
+                    kb, Aptr+Anp, lda, XC+Anp, 1 );
+         HPL_dcopy( kb, XC+Anp, 1, XR+Anq, 1 );
+      }
+/*
+*  Finish previous update
+*/
+      if( ( mycol == colprev ) && ( ( tmp1 = Anpprev - n1pprev ) > 0 ) )
+         HPL_dgemv( HplColumnMajor, HplNoTrans, tmp1, kbprev, -HPL_rone,
+                    Aprev, lda, Xdprev, 1, HPL_rone, XC, 1 );
+/*
+*  Save info of current step and update info for the next step
+*/
+      if( mycol == Alcol ) { Xdprev   = Xd; Aprev = Aptr; }
+      if( myrow == Alrow ) { Anpprev -= kb; }
+      rowprev = Alrow; colprev = Alcol;
+      n1pprev = n1p;   kbprev  = kb; n -= kb;
+      Alrow = MModSub1( Alrow, nprow ); Alcol = MModSub1( Alcol, npcol );
+      tmp1  = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+      MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+
+      Rmsgid = ( Rmsgid+2 > MSGID_END_PTRSV ? 
+                 MSGID_BEGIN_PTRSV   : Rmsgid+2 );
+      Cmsgid = ( Cmsgid+2 > MSGID_END_PTRSV ?
+                 MSGID_BEGIN_PTRSV+1 : Cmsgid+2 );
+   }
+/*
+ * Replicate last solution block
+ */
+   if( mycol == colprev )
+      (void) HPL_broadcast( (void *)(XR), kbprev, HPL_DOUBLE, rowprev,
+                            Ccomm );
+
+   if( Wfr  ) free( W  );
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PTRSV );
+#endif
+/*
+ * End of HPL_pdtrsv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdtrsv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdtrsv.o
new file mode 100644
index 000000000..1b4f1597b
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdtrsv.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNN.c
new file mode 100644
index 000000000..7e31ddcd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNN.c
@@ -0,0 +1,442 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateNN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateNN
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateNN broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,                n );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, 0, nn, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateNN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNN.o
new file mode 100644
index 000000000..67ec6202d
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNT.c
new file mode 100644
index 000000000..faa3ef207
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNT.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateNT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateNT
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateNT broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */ 
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,               jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, nn, 0, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateNT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNT.o
new file mode 100644
index 000000000..f66995f02
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTN.c
new file mode 100644
index 000000000..a16aa26a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTN.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateTN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateTN
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateTN broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,                n );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, 0, nn, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateTN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTN.o
new file mode 100644
index 000000000..dbe1285f1
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTT.c
new file mode 100644
index 000000000..81e6cc4b7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTT.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateTT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateTT
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateTT broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,               jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, nn, 0, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateTT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTT.o
new file mode 100644
index 000000000..344e0cdc3
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_perm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_perm.c
new file mode 100644
index 000000000..bf7cc4503
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_perm.c
@@ -0,0 +1,131 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_perm
+(
+   const int                        N,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            IWORK
+)
+#else
+void HPL_perm
+( N, LINDXA, LINDXAU, IWORK )
+   const int                        N;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_perm combines  two  index  arrays  and generate the corresponding
+ * permutation. First, this function computes the inverse of LINDXA, and
+ * then combine it with LINDXAU.  Second, in order to be able to perform
+ * the permutation in place,  LINDXAU  is overwritten by the sequence of
+ * permutation  producing  the  same result.  What we ultimately want to
+ * achieve is:  U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the
+ * call to this function,  this in place permutation can be performed by
+ * for i in [0..N) swap U[i] with U[LINDXAU[i]].
+ *
+ * Arguments
+ * =========
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies the length of the arrays  LINDXA  and
+ *         LINDXAU. N should be at least zero.
+ *
+ * LINDXA  (global input/output)         int *
+ *         On entry,  LINDXA  is an array of dimension N  containing the
+ *         source indexes. On exit,  LINDXA  contains the combined index
+ *         array.
+ *
+ * LINDXAU (global input/output)         int *
+ *         On entry,  LINDXAU is an array of dimension N  containing the
+ *         target indexes.  On exit,  LINDXAU  contains  the sequence of
+ *         permutation,  that  should be applied  in increasing order to
+ *         permute the underlying array U in place.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension N.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j, k, fndd;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Inverse LINDXA - combine LINDXA and LINDXAU - Initialize IWORK
+ */
+   for( i = 0; i < N; i++ ) { IWORK[LINDXA[i]] = i; }
+   for( i = 0; i < N; i++ ) { LINDXA[i] = LINDXAU[IWORK[i]]; IWORK[i] = i; }
+ 
+   for( i = 0; i < N; i++ )
+   {
+      /* search LINDXA such that    LINDXA[j]  == i */
+      j = 0; do { fndd = ( LINDXA[j] == i ); j++; } while( !fndd ); j--;
+      /* search IWORK  such that    IWORK[k]   == j */
+      k = 0; do { fndd = ( IWORK[k]  == j ); k++; } while( !fndd ); k--;
+      /* swap IWORK[i] and IWORK[k]; LINDXAU[i] = k */
+      j = IWORK[i]; IWORK[i] = IWORK[k]; IWORK[k] = j;
+      LINDXAU[i] = k;
+   }
+/*
+ * End of HPL_perm
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_perm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_perm.o
new file mode 100644
index 000000000..6e8f33ec4
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_perm.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pipid.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pipid.c
new file mode 100644
index 000000000..ab5ef949f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pipid.c
@@ -0,0 +1,187 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pipid
+(
+   HPL_T_panel *                    PANEL,
+   int *                            K,
+   int *                            IPID
+)
+#else
+void HPL_pipid
+( PANEL, K, IPID )
+   HPL_T_panel *                    PANEL;
+   int *                            K;
+   int *                            IPID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pipid computes an array  IPID  that contains the source and final
+ * destination  of  matrix rows  resulting  from  the  application  of N
+ * interchanges  as computed by the  LU  factorization  with row partial
+ * pivoting. The array IPID is such that the row of global index IPID(i)
+ * should be mapped onto the row of global index IPID(i+1). Note that we
+ * cannot really know the length of IPID a priori. However, we know that
+ * this array is at least 2*N long,  since  there are N rows to swap and
+ * broadcast. The length of this array  must be smaller than or equal to
+ * 4*N, since every row is swapped with at most a single distinct remote
+ * row. The algorithm constructing  IPID  goes as follows: Let IA be the
+ * global index of the first row to be swapped.
+ *  
+ * For every row src IA + i with i in [0..N) to be swapped with row  dst
+ * such that dst is given by DPIV[i]:
+ *  
+ * Is row  src  the destination  of a previous row of the current block,
+ * that is, is there k odd such that IPID(k) is equal to src ?
+ *     Yes:  update  this destination  with dst.  For  example,  if  the
+ * pivot array is  (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5,
+ * we swap in fact row 0 and 5,  i.e.,  row 0 goes to 5 and not 2  as it
+ * was thought so far ...
+ *     No :  add  the pair (src,dst) at the end of IPID; row src has not
+ * been moved yet.
+ *  
+ * Is row  dst  different  from src the destination of a previous row of
+ * the current block, i.e., is there k odd such that IPID(k) is equal to
+ * dst ?
+ *     Yes:  update  IPID(k) with src.  For example,  if the pivot array
+ * is (0,5)(1,1)(2,5) ... , then when  we swap rows  2 and 5, we swap in
+ * fact row 2 and 0,  i.e.,  row 0 goes to 2 and not 5 as it was thought
+ * so far ...
+ *     No : add  the  pair (dst,src) at the end of IPID; row dst has not
+ * been moved yet.
+ *  
+ * Note that when src is equal to dst, the pair (dst,src)  should not be
+ * added to  IPID  in  order  to avoid duplicated entries in this array.
+ * During  the construction of the array  IPID,  we  make  sure that the
+ * first N entries are such that IPID(k) with k odd is equal to  IA+k/2.
+ * For k in  [0..K/2),  the  row  of global index  IPID(2*k)  should  be
+ * mapped onto the row of global index IPID(2*k+1).
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global output)               int *
+ *         On exit, K specifies the number of entries in  IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global output)               int *
+ *         On entry, IPID is an array of length 4*N.  On exit, the first
+ *         K entries of that array contain the src and final destination
+ *         resulting  from  the  application of the  N  interchanges  as
+ *         specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+ *         stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+ *         in [0..N)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, fndd, fnds, ia, i, j, jb, lst, off,
+                              src;
+   double                     * dpiv;
+/* ..
+ * .. Executable Statements ..
+ */
+   dpiv = PANEL->DPIV; jb = PANEL->jb; src = ia = PANEL->ia;
+   dst  = (int)(dpiv[0]); IPID[0] = dst; IPID[1] = src; *K = 2;
+   if( src != dst ) { IPID[2] = src; IPID[3] = dst; *K += 2; }
+
+   for( i = 1; i < jb; i++ )
+   {
+      fnds = 0; j = 1;
+
+      if( ( src = ia + i ) == ( dst = (int)(dpiv[i]) ) )
+      {
+         do { if( src == IPID[j] ) { fnds = j; } else { j += 2; } }
+         while( !( fnds ) && ( j < *K ) );
+         if( !fnds ) { lst = *K;     off = 2; IPID[lst] = src; }
+         else        { lst = fnds-1; off = 0; }
+         IPID[lst+1] = dst;
+      }
+      else
+      {
+         fndd = 0;
+         do
+         {
+            if     ( src == IPID[j] ) { fnds = j; }
+            else if( dst == IPID[j] ) { fndd = j; }
+            j += 2;
+         }
+         while( ( !( fnds ) || !( fndd ) ) && ( j < *K ) );
+         if( !fnds ) { IPID[*K] = src; IPID[*K+1] = dst; off  = 2; }
+         else        {                 IPID[fnds] = dst; off  = 0; }
+         if( !fndd ) { lst = *K+off;   IPID[lst ] = dst; off += 2; }
+         else        { lst = fndd-1; }
+         IPID[lst+1] = src;
+      }
+/*
+ * Enforce IPID(1,i) equal to src = ia + i
+ */
+      if( lst != ( j = ( i << 1 ) ) )
+      {
+         src = IPID[j  ]; IPID[j  ] = IPID[lst  ]; IPID[lst  ] = src;
+         dst = IPID[j+1]; IPID[j+1] = IPID[lst+1]; IPID[lst+1] = dst;
+      }
+      *K += off;
+   }
+/*
+ * End of HPL_pipid
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pipid.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pipid.o
new file mode 100644
index 000000000..13544e481
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pipid.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx0.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx0.c
new file mode 100644
index 000000000..be12639d0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx0.c
@@ -0,0 +1,281 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx0
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   int *                            IPID,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            LLEN
+)
+#else
+void HPL_plindx0
+( PANEL, K, IPID, LINDXA, LINDXAU, LLEN )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   int *                            IPID;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            LLEN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx0 computes two local arrays  LINDXA and  LINDXAU  containing
+ * the  local  source and final destination position  resulting from the
+ * application of row interchanges.
+ *  
+ * On entry, the array  IPID  of length K is such that the row of global
+ * index  IPID(i)  should be mapped onto row of global index  IPID(i+1).
+ * Let  IA  be the global index of the first row to be swapped. For k in
+ * [0..K/2), the row of global index IPID(2*k) should be mapped onto the
+ * row of global index  IPID(2*k+1).  The question then, is to determine
+ * which rows should ultimately be part of U.
+ *  
+ * First, some rows of the process ICURROW  may be swapped locally.  One
+ * of this row belongs to U, the other one belongs to my local  piece of
+ * A.  The other  rows of the current block are swapped with remote rows
+ * and are thus not part of U. These rows however should be sent  along,
+ * and  grabbed by the other processes  as we  progress in the  exchange
+ * phase.
+ *  
+ * So, assume that I am  ICURROW  and consider a row of index  IPID(2*i)
+ * that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less
+ * than N,  this row is locally swapped and should be copied into  U  at
+ * the position IPID(2*i+1) - IA. No row will be exchanged for this one.
+ * If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be
+ * locally copied into my local piece of A at the position corresponding
+ * to the row of global index IPID(2*i+1).
+ *  
+ * If the process  ICURROW does not own  IPID(2*i+1), then row IPID(2*i)
+ * is to be swapped away and strictly speaking does not belong to U, but
+ * to  A  remotely.  Since this  process will however send this array U,
+ * this row is  copied into  U, exactly where the row IPID(2*i+1) should
+ * go. For this, we search IPID for k1, such that IPID(2*k1) is equal to
+ * IPID(2*i+1); and row  IPID(2*i) is to be copied in U  at the position
+ * IPID(2*k1+1)-IA.
+ *  
+ * It is thus  important to put the rows that go into U, i.e., such that
+ * IPID(2*i+1) - IA is less than N at the begining of the array IPID. By
+ * doing so,  U  is formed, and the local copy  is performed in just one
+ * sweep.
+ *  
+ * Two lists  LINDXA  and  LINDXAU are built.  LINDXA contains the local
+ * index of the rows I have that should be copied. LINDXAU  contains the
+ * local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A
+ * is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
+ * of A should be locally copied into A(-LINDXAU(k),:).  In the  process
+ * ICURROW, the initial packing algorithm proceeds as follows.
+ *  
+ *   for all entries in IPID,
+ *      if IPID(2*i) is in ICURROW,
+ *         if IPID(2*i+1) is in ICURROW,
+ *            if( IPID(2*i+1) - IA < N )
+ *             save corresponding local position
+ *             of this row (LINDXA);
+ *             save local position (LINDXAU) in U
+ *             where this row goes;
+ *             [copy row IPID(2*i) in U at position
+ *             IPID(2*i+1)-IA; ];
+ *            else
+ *             save corresponding local position of
+ *             this row (LINDXA);
+ *             save local position (-LINDXAU) in A
+ *             where this row goes;
+ *             [copy row IPID(2*i) in my piece of A
+ *             at IPID(2*i+1);]
+ *            end if
+ *         else
+ *            find k1 such that IPID(2*k1) = IPID(2*i+1);
+ *            copy row IPID(2*i) in U at position
+ *            IPID(2*k1+1)-IA;
+ *            save corresponding local position of this
+ *            row (LINDXA);
+ *            save local position (LINDXAU) in U where
+ *            this row goes;
+ *         end if
+ *      end if
+ *   end for
+ *  
+ * Second, if I am not the current row process  ICURROW, all source rows
+ * in IPID that I own are part of U. Indeed,  they  are swapped with one
+ * row  of  the  current  block  of rows,  and  the  main  factorization
+ * algorithm proceeds one row after each other.  The processes different
+ * from ICURROW,  should  exchange and accumulate  those rows until they
+ * receive some data previously owned by the process ICURROW.
+ *  
+ * In processes different from  ICURROW,  the  initial packing algorithm
+ * proceeds as follows.  Consider a row of global index IPID(2*i) that I
+ * own. When I will be receiving data previously owned by ICURROW, i.e.,
+ * U, row IPID(2*i) should  replace the row in U at pos. IPID(2*i+1)-IA,
+ * and  this particular row of U should be first copied into my piece of
+ * A, at A(il,:),  where  il is the  local row  index  corresponding  to
+ * IPID(2*i). Now,initially, this row will be packed into workspace, say
+ * as the kth row of  that  work array.  The  following  algorithm  sets
+ * LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row
+ * should be copied. LINDXA(k) stores the local index in  A  where  this
+ * row of U should be copied, i.e il.
+ *  
+ *   for all entries in IPID,
+ *      if IPID(2*i) is not in ICURROW,
+ *         copy row IPID(2*i) in work array;
+ *         save corresponding local position
+ *         of this row (LINDXA);
+ *         save position (LINDXAU) in U where
+ *         this row should be copied;
+ *      end if
+ *   end for
+ *  
+ * Since we are at it, we also globally figure  out  how many rows every
+ * process has. That is necessary, because it would rather be cumbersome
+ * to  figure it on  the fly  during the  bi-directional exchange phase.
+ * This information is kept in the array  LLEN  of size NPROW. Also note
+ * that the arrays LINDXA and LINDXAU are of max length equal to 2*N.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * LINDXA  (local output)                int *
+ *         On entry, LINDXA  is an array of dimension 2*N. On exit, this
+ *         array contains the local indexes of the rows of A I have that
+ *         should be copied into U.
+ *
+ * LINDXAU (local output)                int *
+ *         On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+ *         array contains  the local destination  information encoded as
+ *         follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+ *         copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+ *         of A should be locally copied into A(-LINDXAU(k),:).
+ *
+ * LLEN    (global output)               int *
+ *         On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+ *         contains how many rows every process has.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, dstrow, fndd, i, ia, icurrow, il,
+                              ip=0, iroff, j, jb, myrow, nb, nprow,
+                              src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Compute the local arrays  LINDXA  and  LINDXAU  containing  the local
+ * source and final destination position resulting from  the application
+ * of N interchanges.
+ */
+   myrow   = PANEL->grid->myrow; nprow = PANEL->grid->nprow;
+   icurrow = PANEL->prow;        jb    = PANEL->jb;
+   nb      = PANEL->nb;          ia    = PANEL->ia;
+   iroff   = PANEL->ii;
+
+   for( i = 0; i < nprow; i++ ) LLEN[i] = 0;
+
+   for( i = 0; i < K; i += 2 )
+   {
+      src = IPID[i];
+      Mindxg2p( src, nb, nb, srcrow, 0, nprow ); LLEN[ srcrow ]++;
+
+      if( myrow == srcrow )
+      {
+         Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
+         LINDXA[ip] = il - iroff; dst = IPID[i+1];
+
+         if( myrow == icurrow )
+         {
+            Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+            if( dstrow == icurrow )
+            {
+               if( dst - ia < jb ) { LINDXAU[ip] = dst - ia; }
+               else
+               {
+                  Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+                  LINDXAU[ip] = iroff - il;
+               }
+            }
+            else
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+               LINDXAU[ip] = IPID[j-1] - ia;
+            }
+         }
+         else { LINDXAU[ip] = dst - ia; }
+
+         ip++;
+      }
+   }
+/*
+ * End of HPL_plindx0
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx0.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx0.o
new file mode 100644
index 000000000..b41a64031
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx0.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx1.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx1.c
new file mode 100644
index 000000000..a24fd4c56
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx1.c
@@ -0,0 +1,275 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx1
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   const int *                      IPID,
+   int *                            IPA,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1,
+   int *                            PERMU,
+   int *                            IWORK
+)
+#else
+void HPL_plindx1
+( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   const int *                      IPID;
+   int *                            IPA;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+   int *                            PERMU;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx1 computes two local arrays  LINDXA and  LINDXAU  containing
+ * the  local  source and final destination position  resulting from the
+ * application of row interchanges.  In addition, this function computes
+ * three arrays IPLEN, IPMAP and IPMAPM1  that contain  the  logarithmic
+ * mapping information for the spreading phase.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                const int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * IPA     (global output)               int *
+ *         On exit,  IPA  specifies  the number of rows that the current
+ *         process row has that either belong to U  or should be swapped
+ *         with remote rows of A.
+ *
+ * LINDXA  (global output)               int *
+ *         On entry, LINDXA  is an array of dimension 2*N. On exit, this
+ *         array contains the local indexes of the rows of A I have that
+ *         should be copied into U.
+ *
+ * LINDXAU (global output)               int *
+ *         On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+ *         array contains  the local destination  information encoded as
+ *         follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+ *         copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+ *         of A should be locally copied into A(-LINDXAU(k),:).
+ *
+ * IPLEN   (global output)               int *
+ *         On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+ *         this array is such that  IPLEN[i]  is the number of rows of A
+ *         in  the  processes  before  process  IPMAP[i]  after the sort
+ *         with the convention that IPLEN[nprow]  is the total number of
+ *         rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+ *         local number of rows of A that should be moved to the process
+ *         IPMAP[i]. IPLEN is such that the number of rows of the source
+ *         process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+ *         remaining  entries  of  this  array  are  sorted  so that the
+ *         quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry, IPMAP is an array of dimension NPROW. On exit, this
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myrow] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROCS)
+ *
+ * PERMU   (global output)               int *
+ *         On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+ *         contains  a sequence of permutations,  that should be applied
+ *         in increasing order to permute in place the row panel U.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension 2*JB.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        * iwork;
+   int                        dst, dstrow, fndd, i, ia, icurrow, il,
+                              ip, ipU, iroff, j, jb, myrow, nb, nprow,
+                              src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1
+ */
+   HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 );
+/*
+ * Compute the local arrays  LINDXA  and  LINDXAU  containing  the local
+ * source and final destination position resulting from  the application
+ * of N interchanges. Compute LINDXA and LINDXAU in icurrow,  and LINDXA
+ * elsewhere and PERMU in every process.
+ */
+   myrow = PANEL->grid->myrow; nprow   = PANEL->grid->nprow;
+   jb    = PANEL->jb;          nb      = PANEL->nb;     ia = PANEL->ia;
+   iroff = PANEL->ii;          icurrow = PANEL->prow;
+
+   iwork = IWORK + jb;
+ 
+   if( myrow == icurrow )
+   {
+      for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
+      {
+         src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+ 
+         if( srcrow == icurrow )
+         {
+            dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+ 
+            Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
+            LINDXA[ip] = il - iroff;
+ 
+            if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
+            {
+               PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
+               j          = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j;
+               IPLEN[il]++; ipU++;
+            }
+            else if( dstrow != icurrow )
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+ 
+               PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow];
+               j          = IPLEN[il];    iwork[ipU] = LINDXAU[ip] = j;
+               IPLEN[il]++; ipU++;
+            }
+            else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) )
+            {
+               Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+               LINDXAU[ip] = iroff - il;
+            }
+            ip++;
+         }
+      }
+      *IPA = ip;
+   }
+   else
+   {
+      for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
+      {
+         src = IPID[i  ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+         dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+/*
+ * LINDXA[i] is the local index of the row of A that belongs into U
+ */
+         if( myrow == dstrow )
+         {
+            Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+            LINDXA[ip] = il - iroff; ip++;
+         }
+/*
+ * iwork[i] is the local (current) position  index in U
+ * PERMU[i] is the local (final) destination index in U
+ */
+         if( srcrow == icurrow )
+         {
+            if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
+            {
+               PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
+               iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
+            }
+            else if( dstrow != icurrow )
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+               PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow];
+               iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
+            }
+         }
+      }
+      *IPA = 0;
+   }
+/*
+ * Simplify iwork and PERMU, return in PERMU the sequence of permutation
+ * that need to be apply to U after it has been broadcast.
+ */
+   HPL_perm( jb, iwork, PERMU, IWORK );
+/*
+ * Reset IPLEN to its correct value
+ */
+   for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1];
+   IPLEN[0] = 0; 
+/*
+ * End of HPL_plindx1
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx1.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx1.o
new file mode 100644
index 000000000..5196523b4
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx1.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx10.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx10.c
new file mode 100644
index 000000000..fa460fd35
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx10.c
@@ -0,0 +1,155 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx10
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   const int *                      IPID,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1
+)
+#else
+void HPL_plindx10
+( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   const int *                      IPID;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx10 computes  three arrays  IPLEN,  IPMAP  and  IPMAPM1  that
+ * contain the logarithmic mapping information for the spreading phase.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                const int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * IPLEN   (global output)               int *
+ *         On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+ *         this array is such that  IPLEN[i]  is the number of rows of A
+ *         in the processes  before process IMAP[i] after the sort, with
+ *         the convention that IPLEN[nprow] is the total number of rows.
+ *         In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+ *         rows of  A  that should be moved for each process.  IPLEN  is
+ *         such that the number of rows of the source process row can be
+ *         computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+ *         this  array are sorted  so  that  the quantities IPLEN[i+1] -
+ *         IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry, IPMAP is an array of dimension NPROW. On exit, this
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myrow] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROW)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, dstrow, i, ia, icurrow, jb, nb,
+                              nprow, src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+   nprow = PANEL->grid->nprow; jb = PANEL->jb; nb = PANEL->nb;
+   ia    = PANEL->ia;          icurrow = PANEL->prow;
+/*
+ * Compute  redundantly  the local number of rows  that each process has
+ * and that belong to U in IPLEN[1 .. nprow+1]
+ */
+   for( i = 0; i <= nprow; i++ ) IPLEN[i] = 0;
+ 
+   for( i = 0; i < K; i += 2 )
+   {
+      src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+      if( srcrow == icurrow )
+      {
+         dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+         if( ( dstrow != srcrow ) || ( dst - ia < jb ) ) IPLEN[dstrow+1]++;
+      }
+   }
+/*
+ * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1
+ * (the inverse of IPMAP)
+ */
+   HPL_logsort( nprow, icurrow, IPLEN, IPMAP, IPMAPM1 );
+/*
+ * End of HPL_plindx10
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx10.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx10.o
new file mode 100644
index 000000000..b6e933947
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx10.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollN.c
new file mode 100644
index 000000000..e68590a01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollN.c
@@ -0,0 +1,225 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+void HPL_rollN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_rollN
+( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rollN rolls the local arrays containing the local pieces of U, so
+ * that on exit to this function  U  is replicated in every process row.
+ * In addition, this function probe for the presence of the column panel
+ * and forwards it when available.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be rolled) information.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the number of columns of  U.  N must be
+ *         at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least  MAX(1,IPLEN[NPROW]).
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process row.
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IMAP  is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words,  IMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Datatype               type[2];
+   MPI_Status                 status;
+   MPI_Request                request;
+   MPI_Comm                   comm;
+   int                        Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS,
+                              ierr=MPI_SUCCESS, il, k, l, lengthR,
+                              lengthS, mydist, myrow, next, npm1, nprow,
+                              partner, prev;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= 0 ) return;
+
+   npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow;
+   comm = PANEL->grid->col_comm;
+/*
+ * Rolling phase
+ */
+   mydist = IPMAPM1[myrow];
+   prev   = IPMAP[MModSub1( mydist, nprow )];
+   next   = IPMAP[MModAdd1( mydist, nprow )];
+ 
+   for( k = 0; k < npm1; k++ )
+   {
+      l = (int)( (unsigned int)(k) >> 1 );
+ 
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         il      = MModAdd( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); 
+         il      = MModSub( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev;
+      }
+      else
+      {
+         il    = MModSub( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); 
+         il    = MModAdd( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next;
+      }
+ 
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_vector( N, lengthR, LDU, MPI_DOUBLE,
+                                      &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, ibufR, 0, LDU ), 1, type[I_RECV],
+                                partner, Cmsgid, comm, &request );
+      }
+ 
+      if( lengthS > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_vector( N, lengthS, LDU, MPI_DOUBLE,
+                                      &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, ibufS, 0, LDU ), 1, type[I_SEND],
+                               partner, Cmsgid, comm );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free(   &type[I_SEND] );
+      }
+
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free(   &type[I_RECV] );
+      }
+/*
+ * Probe for column panel - forward it when available
+ */
+      if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_rollN", "MPI call failed" ); }
+/*
+ * End of HPL_rollN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollN.o
new file mode 100644
index 000000000..fe91d1449
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollT.c
new file mode 100644
index 000000000..0160c9412
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollT.c
@@ -0,0 +1,259 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+void HPL_rollT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_rollT
+( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rollT rolls the local arrays containing the local pieces of U, so
+ * that on exit to this function  U  is replicated in every process row.
+ * In addition, this function probe for the presence of the column panel
+ * and forwards it when available.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be rolled) information.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the local number of rows of  U.  N must
+ *         be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least  MAX(1,N).
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process row.
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IMAP  is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words,  IMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#if 0
+   MPI_Datatype               type[2];
+#endif
+   MPI_Status                 status;
+   MPI_Request                request;
+   MPI_Comm                   comm;
+   int                        Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS,
+                              ierr=MPI_SUCCESS, il, k, l, lengthR, 
+                              lengthS, mydist, myrow, next, npm1, nprow,
+                              partner, prev;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= 0 ) return;
+
+   npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow;
+   comm = PANEL->grid->col_comm;
+/*
+ * Rolling phase
+ */
+   mydist = IPMAPM1[myrow];
+   prev   = IPMAP[MModSub1( mydist, nprow )];
+   next   = IPMAP[MModAdd1( mydist, nprow )];
+ 
+   for( k = 0; k < npm1; k++ )
+   {
+      l = (int)( (unsigned int)(k) >> 1 );
+ 
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         il      = MModAdd( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] );
+         il    = MModSub( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev;
+      }
+      else
+      {
+         il    = MModSub( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] );
+         il    = MModAdd( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next;
+      }
+ 
+      if( lengthR > 0 )
+      {
+#if 0
+         if( ierr == MPI_SUCCESS )
+         {
+            if( LDU == N )
+               ierr = MPI_Type_contiguous( lengthR * LDU, MPI_DOUBLE,
+                                           &type[I_RECV] );
+            else
+               ierr = MPI_Type_vector( lengthR, N, LDU, MPI_DOUBLE,
+                                       &type[I_RECV] );
+         }
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, 0, ibufR, LDU ), 1, type[I_RECV],
+                                partner, Cmsgid, comm, &request );
+#else
+/*
+ * In our case, LDU is N - Do not use the MPI datatype.
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, 0, ibufR, LDU ), lengthR*LDU,
+                                MPI_DOUBLE, partner, Cmsgid, comm, &request );
+#endif
+      }
+ 
+      if( lengthS > 0 )
+      {
+#if 0
+         if( ierr == MPI_SUCCESS )
+         {
+            if( LDU == N )
+               ierr =   MPI_Type_contiguous( lengthS*LDU, MPI_DOUBLE,
+                                             &type[I_SEND] );
+            else
+               ierr =   MPI_Type_vector( lengthS, N, LDU, MPI_DOUBLE,
+                                         &type[I_SEND] );
+         }
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, 0, ibufS, LDU ), 1, type[I_SEND],
+                               partner, Cmsgid, comm );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[I_SEND] );
+#else
+/*
+ * In our case, LDU is N - Do not use the MPI datatype.
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, 0, ibufS, LDU ), lengthS*LDU,
+                               MPI_DOUBLE, partner, Cmsgid, comm );
+#endif
+      }
+
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+#if 0
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[I_RECV] );
+#endif
+      }
+/*
+ * Probe for column panel - forward it when available
+ */
+      if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_rollT", "MPI call failed" ); }
+/*
+ * End of HPL_rollT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollT.o
new file mode 100644
index 000000000..c40488766
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadN.c
new file mode 100644
index 000000000..202611e7f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadN.c
@@ -0,0 +1,303 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_spreadN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_SIDE              SIDE,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int                        SRCDIST,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_spreadN
+( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_SIDE              SIDE;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int                        SRCDIST;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_spreadN spreads the local array containing local pieces of U, so
+ * that on exit to this function,  a piece of  U  is contained in every
+ * process row. The array IPLEN contains the number of rows of U,  that
+ * should be spread on any given process row. This function also probes
+ * for the presence of the column panel PBCST. In case of success, this
+ * panel will be forwarded.  If  PBCST  is NULL on input,  this probing
+ * mechanism will be disabled.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be spread) information.
+ *
+ * SIDE    (global input)                const enum HPL_SIDE
+ *         On entry, SIDE specifies whether the local piece of U located
+ *         in process IPMAP[SRCDIST] should be spread to the right or to
+ *         the left. This feature is used by the equilibration process.
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies  the  local number of columns of U. N
+ *         must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,IPLEN[nprow]).
+ *
+ * SRCDIST (local input)                 const int
+ *         On entry,  SRCDIST  specifies the source process that spreads
+ *         its piece of U.
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process before process IPMAP[i], with the  convention
+ *         that IPLEN[nprow] is the total number of rows. In other words
+ *         IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+ *         should be moved to process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IPMAPM1 is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Datatype              type;
+   MPI_Status                status;
+   MPI_Comm                  comm;
+   unsigned int              ip2=1, mask=1, mydist, mydist2;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, ibuf,
+                             ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow,
+                             npm1, nprow, partner;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow;    nprow = PANEL->grid->nprow;
+   comm  = PANEL->grid->col_comm;
+/*
+ * Spread U to the left
+ */
+   if( SIDE == HplLeft )
+   {
+      nprow = ( npm1 = SRCDIST ) + 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) >
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2;
+      lgth    = IPLEN[nprow];
+
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); 
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm,
+                                        &status );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+               else if( partner < nprow )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il += ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+   else
+   {
+      npm1 = ( nprow -= SRCDIST ) - 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) <
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist -= SRCDIST ); il = ip2;
+      lgth    = IPLEN[SRCDIST+nprow];
+/*
+ * Spread U to the right - offset the IPLEN, and IPMAP arrays
+ */
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            k    = il      ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] );
+            k    = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf;
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm, &status );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+               else if( partner < nprow )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il -= ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il += ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_spreadN", "MPI call failed" ); }
+/*
+ * End of HPL_spreadN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadN.o
new file mode 100644
index 000000000..566eb66ab
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadN.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadT.c
new file mode 100644
index 000000000..1adf93507
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadT.c
@@ -0,0 +1,372 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_spreadT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_SIDE              SIDE,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int                        SRCDIST,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_spreadT
+( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_SIDE              SIDE;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int                        SRCDIST;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_spreadT spreads  the local array containing local pieces of U, so
+ * that on exit to this function,  a piece of  U  is contained in every
+ * process row.  The array  IPLEN  contains the number of columns of U,
+ * that should be spread on any given process row.  This function  also
+ * probes for the presence of  the column panel  PBCST.  If  available,
+ * this  panel will be forwarded.  If  PBCST  is  NULL  on input,  this
+ * probing mechanism will be disabled.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be spread) information.
+ *
+ * SIDE    (global input)                const enum HPL_SIDE
+ *         On entry, SIDE specifies whether the local piece of U located
+ *         in process IPMAP[SRCDIST] should be spread to the right or to
+ *         the left. This feature is used by the equilibration process.
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies the local number of rows of U. N must
+ *         be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,N).
+ *
+ * SRCDIST (local input)                 const int
+ *         On entry,  SRCDIST  specifies the source process that spreads
+ *         its piece of U.
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process before process IPMAP[i], with the  convention
+ *         that IPLEN[nprow] is the total number of rows. In other words
+ *         IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+ *         should be moved to process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IPMAPM1 is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#if 0
+   MPI_Datatype              type;
+#endif
+   MPI_Status                status;
+   MPI_Comm                  comm;
+   unsigned int              ip2=1, mask=1, mydist, mydist2;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, ibuf,
+                             ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow,
+                             npm1, nprow, partner;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow;    nprow = PANEL->grid->nprow;
+   comm  = PANEL->grid->col_comm;
+/*
+ * Spread U
+ */
+   if( SIDE == HplLeft )
+   {
+      nprow = ( npm1 = SRCDIST ) + 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) >
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2;
+      lgth    = IPLEN[nprow];
+
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] );
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm,
+                                        &status );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[npm1-partner],
+                                        Cmsgid, comm, &status );
+#endif
+               }
+               else if( partner < nprow )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[npm1-partner],
+                                        Cmsgid, comm );
+#endif
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il += ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+   else
+   {
+      npm1 = ( nprow -= SRCDIST ) - 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) <
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist -= SRCDIST ); il = ip2;
+/*
+ * Spread to the right - offset the IPLEN and IPMAP arrays
+ */
+      lgth = IPLEN[SRCDIST+nprow];
+/*
+ * Spread U
+ */
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            k    = il      ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] );
+            k    = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf;
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm, &status );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[SRCDIST+partner],
+                                        Cmsgid, comm, &status );
+#endif
+               }
+               else if( partner < nprow )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[SRCDIST+partner],
+                                        Cmsgid, comm );
+#endif
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il -= ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il += ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_spreadT", "MPI call failed" ); }
+/*
+ * End of HPL_spreadT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadT.o
new file mode 100644
index 000000000..710235018
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadT.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_dmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_dmatgen.c
new file mode 100644
index 000000000..c14ef0fd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_dmatgen.c
@@ -0,0 +1,134 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dmatgen
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int                        ISEED
+)
+#else
+void HPL_dmatgen
+( M, N, A, LDA, ISEED )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int                        ISEED;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dmatgen generates (or regenerates) a random matrix A.
+ *  
+ * The  pseudo-random  generator uses the linear congruential algorithm:
+ * X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+ * Programming, Knuth 1973, Vol. 2.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (input)                       const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (input)                       const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * A       (output)                      double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         this  array  contains   the   coefficients  of  the  randomly
+ *         generated matrix.
+ *
+ * LDA     (input)                       const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * ISEED   (input)                       const int
+ *         On entry, ISEED  specifies  the  seed  number to generate the
+ *         matrix A. ISEED must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        iadd[2], ia1[2], ic1[2], iran1[2],
+                              jseed[2], mult[2];
+   int                        i, incA = LDA - M, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+/*
+ * Initialize the random sequence
+ */
+   mult [0] = HPL_MULT0; mult [1] = HPL_MULT1;
+   iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1;
+   jseed[0] = ISEED;     jseed[1] = 0;
+
+   HPL_xjumpm( 1, mult, iadd, jseed, iran1, ia1, ic1 );
+   HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 );
+/*
+ * Generate an M by N matrix
+ */
+   for( j = 0; j < N; A += incA, j++ )
+      for( i = 0; i < M; A++, i++ ) *A = HPL_rand();
+/*
+ * End of HPL_dmatgen
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_dmatgen.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_dmatgen.o
new file mode 100644
index 000000000..a2ea27c62
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_dmatgen.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_jumpit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_jumpit.c
new file mode 100644
index 000000000..4d4dc4db5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_jumpit.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_jumpit
+(
+   int *                            MULT,
+   int *                            IADD,
+   int *                            IRANN,
+   int *                            IRANM
+)
+#else
+void HPL_jumpit
+( MULT, IADD, IRANN, IRANM )
+   int *                            MULT;
+   int *                            IADD;
+   int *                            IRANN;
+   int *                            IRANM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_jumpit jumps in the random sequence from the number  X(n) encoded
+ * in IRANN to the number  X(m)  encoded in  IRANM using the constants A
+ * and C encoded in MULT and IADD: X(m) = A * X(n) + C.  The constants A
+ * and C obviously depend on m and n,  see  the function  HPL_xjumpm  in
+ * order to initialize them.
+ *
+ * Arguments
+ * =========
+ *
+ * MULT    (local input)                 int *
+ *         On entry, MULT is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of the constant A.
+ *
+ * IADD    (local input)                 int *
+ *         On entry, IADD is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of the constant C.
+ *
+ * IRANN   (local input)                 int *
+ *         On entry,  IRANN  is an array of dimension 2,  that contains 
+ *         the 16-lower and 15-higher bits of the encoding of X(n).
+ *
+ * IRANM   (local output)                int *
+ *         On entry,  IRANM  is an array of dimension 2.  On exit, this
+ *         array contains respectively the 16-lower and  15-higher bits
+ *         of the encoding of X(m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                          j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_lmul( IRANN, MULT, j );              /* j     = IRANN * MULT;  */
+   HPL_ladd( j, IADD, IRANM );              /* IRANM = j     + IADD;  */
+   HPL_setran( 0, IRANM );                  /* irand = IRANM          */
+/*
+ * End of HPL_jumpit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_jumpit.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_jumpit.o
new file mode 100644
index 000000000..65b616d11
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_jumpit.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_ladd.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_ladd.c
new file mode 100644
index 000000000..0d4e4c08c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_ladd.c
@@ -0,0 +1,126 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_ladd
+(
+   int *                            J,
+   int *                            K,
+   int *                            I
+)
+#else
+void HPL_ladd
+( J, K, I )
+   int *                            J;
+   int *                            K;
+   int *                            I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ladd adds  without carry two long positive integers  K and J and
+ * puts the result into I. The long integers  I, J, K are encoded on 64
+ * bits using an array of 2 integers.  The 32-lower bits  are stored in
+ * the  first  entry  of each array,  the 32-higher bits  in the second
+ * entry.
+ *
+ * Arguments
+ * =========
+ *
+ * J       (local input)                 int *
+ *         On entry, J is an integer array of dimension 2 containing the
+ *         encoded long integer J.
+ *
+ * K       (local input)                 int *
+ *         On entry, K is an integer array of dimension 2 containing the
+ *         encoded long integer K.
+ *
+ * I       (local output)                int *
+ *         On entry, I is an integer array of dimension 2. On exit, this
+ *         array contains the encoded long integer result.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   unsigned int        itmp0, itmp1;
+   unsigned int        ktmp0 = K[0] & 65535, ktmp1 = (unsigned)K[0] >> 16;
+   unsigned int        ktmp2 = K[1] & 65535, ktmp3 = (unsigned)K[1] >> 16;
+   unsigned int        jtmp0 = J[0] & 65535, jtmp1 = (unsigned)J[0] >> 16;
+   unsigned int        jtmp2 = J[1] & 65535, jtmp3 = (unsigned)J[1] >> 16;
+
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ *    K[1] K[0] K  I[0]  = (K[0]+J[0]) % 2^32
+ *    XXXX XXXX    carry = (K[0]+J[0]) / 2^32
+ *
+ * +  J[1] J[0] J  I[1] = K[1] + J[1] + carry
+ *    XXXX XXXX    I[1] = I[1] % 2^32
+ *    -------------
+ *    I[1] I[0]
+ *    0XXX XXXX I
+ */
+   itmp0 = ktmp0 + jtmp0;
+   itmp1 = itmp0 >> 16;         I[0] = itmp0 - (itmp1 << 16 );
+   itmp1 += ktmp1 + jtmp1;      I[0] |= (itmp1 & 65535) << 16;
+   itmp0 = (itmp1 >> 16) + ktmp2 + jtmp2;
+   I[1] = itmp0 - ((itmp0 >> 16 ) << 16);
+   itmp1 = (itmp0 >> 16) + ktmp3 + jtmp3;
+   I[1] |= (itmp1 & 65535) << 16;
+/*
+ * End of HPL_ladd
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_ladd.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_ladd.o
new file mode 100644
index 000000000..2d0724592
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_ladd.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_lmul.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_lmul.c
new file mode 100644
index 000000000..254b192f6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_lmul.c
@@ -0,0 +1,131 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_lmul
+(
+   int *                            K,
+   int *                            J,
+   int *                            I
+)
+#else
+void HPL_lmul
+( K, J, I )
+   int *                            K;
+   int *                            J;
+   int *                            I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_lmul multiplies  without carry two long positive integers K and J
+ * and puts the result into I. The long integers  I, J, K are encoded on
+ * 64 bits using an array of 2 integers. The 32-lower bits are stored in
+ * the first entry of each array, the 32-higher bits in the second entry
+ * of each array. For efficiency purposes, the  intrisic modulo function
+ * is inlined.
+ *
+ * Arguments
+ * =========
+ *
+ * K       (local input)                 int *
+ *         On entry, K is an integer array of dimension 2 containing the
+ *         encoded long integer K.
+ *
+ * J       (local input)                 int *
+ *         On entry, J is an integer array of dimension 2 containing the
+ *         encoded long integer J.
+ *
+ * I       (local output)                int *
+ *         On entry, I is an integer array of dimension 2. On exit, this
+ *         array contains the encoded long integer result.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        r, c;
+   unsigned int               kk[4], jj[4], res[5];
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Addition is done with 16 bits at a time. Multiplying two 16-bit
+ * integers yields a 32-bit result. The lower 16-bits of the result
+ * are kept in I, and the higher 16-bits are carried over to the
+ * next multiplication.
+ */
+   for (c = 0; c < 2; ++c) {
+     kk[2*c] = K[c] & 65535;
+     kk[2*c+1] = ((unsigned)K[c] >> 16) & 65535;
+     jj[2*c] = J[c] & 65535;
+     jj[2*c+1] = ((unsigned)J[c] >> 16) & 65535;
+   }
+
+   res[0] = 0;
+   for (c = 0; c < 4; ++c) {
+     res[c+1] = (res[c] >> 16) & 65535;
+     res[c] &= 65535;
+     for (r = 0; r < c+1; ++r) {
+       res[c] = kk[r] * jj[c-r] + (res[c] & 65535);
+       res[c+1] += (res[c] >> 16) & 65535;
+     }
+   }
+
+   for (c = 0; c < 2; ++c)
+     I[c] = (int)(((res[2*c+1] & 65535) << 16) | (res[2*c] & 65535));
+/*
+ * End of HPL_lmul
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_lmul.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_lmul.o
new file mode 100644
index 000000000..af6abfe4c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_lmul.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_rand.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_rand.c
new file mode 100644
index 000000000..fe4e12f5e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_rand.c
@@ -0,0 +1,94 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_rand( void )
+#else
+double HPL_rand()
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rand generates  the next number  in the  random  sequence.  This
+ * function  ensures  that this number lies in the interval (-0.5, 0.5].
+ *  
+ * The static array irand contains the information (2 integers) required
+ * to generate the  next number  in the sequence  X(n).  This  number is
+ * computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5,  where the
+ * constant d is the largest 64 bit positive unsigned integer. The array
+ * irand is then  updated  for the generation of the next number  X(n+1)
+ * in  the  random   sequence  as   follows  X(n+1) = a * X(n) + c.  The
+ * constants a and c should have been preliminarily stored in the arrays
+ * ias and ics as 2 pairs of integers.  The initialization of  ias,  ics
+ * and  irand  is performed by the function HPL_setran.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_setran( 3, j );
+/*
+ * return number between -0.5 and 0.5
+ */
+   return( HPL_HALF -
+           (((j[0] & 65535) + ((unsigned)j[0] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF +
+           (j[1] & 65535) + ((unsigned)j[1] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF );
+/*
+ * End of HPL_rand
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_rand.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_rand.o
new file mode 100644
index 000000000..99981cf0e
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_rand.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_setran.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_setran.c
new file mode 100644
index 000000000..1a3ca73aa
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_setran.c
@@ -0,0 +1,115 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int       ias[2], ics[2], irand[2];
+
+#ifdef STDC_HEADERS
+void HPL_setran
+(
+   const int                        OPTION,
+   int *                            IRAN
+)
+#else
+void HPL_setran
+( OPTION, IRAN )
+   const int                        OPTION;
+   int *                            IRAN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_setran initializes  the random generator with the encoding of the
+ * first number X(0) in the sequence,  and the constants a and c used to
+ * compute the next element in the sequence: X(n+1) = a*X(n) + c.  X(0),
+ * a and c are stored in the static variables  irand, ias and ics.  When
+ * OPTION is 0 (resp. 1 and 2),  irand  (resp. ia and ic)  is set to the
+ * values of the input array IRAN.  When OPTION is 3, IRAN is set to the
+ * current value of irand, and irand is then incremented.
+ *
+ * Arguments
+ * =========
+ *
+ * OPTION  (local input)                 const int
+ *         On entry, OPTION  is an integer that specifies the operations
+ *         to be performed on the random generator as specified above.
+ *
+ * IRAN    (local input/output)          int *
+ *         On entry,  IRAN is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of a random number.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   if(      OPTION == 3 )
+   {                                       /* return current value */
+      IRAN[0] = irand[0]; IRAN[1] = irand[1];
+      HPL_lmul( irand, ias, j );         /* j     = irand * ias;   */
+      HPL_ladd( j, ics, irand );         /* irand = j     + ics;   */
+   } 
+   else if( OPTION == 0 ) { irand[0] = IRAN[0]; irand[1] = IRAN[1]; }
+   else if( OPTION == 1 ) { ias  [0] = IRAN[0]; ias  [1] = IRAN[1]; }
+   else if( OPTION == 2 ) { ics  [0] = IRAN[0]; ics  [1] = IRAN[1]; }
+/*
+ * End of HPL_setran
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_setran.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_setran.o
new file mode 100644
index 000000000..5c8c2451b
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_setran.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_xjumpm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_xjumpm.c
new file mode 100644
index 000000000..ae70bbc16
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_xjumpm.c
@@ -0,0 +1,158 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_xjumpm
+(
+   const int                        JUMPM,
+   int *                            MULT,
+   int *                            IADD,
+   int *                            IRANN,
+   int *                            IRANM,
+   int *                            IAM,
+   int *                            ICM
+)
+#else
+void HPL_xjumpm
+( JUMPM, MULT, IADD, IRANN, IRANM, IAM, ICM )
+   const int                        JUMPM;
+   int *                            MULT;
+   int *                            IADD;
+   int *                            IRANN;
+   int *                            IRANM;
+   int *                            IAM;
+   int *                            ICM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_xjumpm computes  the constants  A and C  to jump JUMPM numbers in
+ * the random sequence: X(n+JUMPM) = A*X(n)+C.  The constants encoded in
+ * MULT and IADD  specify  how to jump from one entry in the sequence to
+ * the next.
+ *
+ * Arguments
+ * =========
+ *
+ * JUMPM   (local input)                 const int
+ *         On entry,  JUMPM  specifies  the  number  of entries  in  the
+ *         sequence to jump over. When JUMPM is less or equal than zero,
+ *         A and C are not computed, IRANM is set to IRANN corresponding
+ *         to a jump of size zero.
+ *
+ * MULT    (local input)                 int *
+ *         On entry, MULT is an array of dimension 2,  that contains the
+ *         16-lower  and 15-higher bits of the constant  a  to jump from
+ *         X(n) to X(n+1) = a*X(n) + c in the random sequence.
+ *
+ * IADD    (local input)                 int *
+ *         On entry, IADD is an array of dimension 2,  that contains the
+ *         16-lower  and 15-higher bits of the constant  c  to jump from
+ *         X(n) to X(n+1) = a*X(n) + c in the random sequence.
+ *
+ * IRANN   (local input)                 int *
+ *         On entry, IRANN is an array of dimension 2. that contains the
+ *         16-lower and 15-higher bits of the encoding of X(n).
+ *
+ * IRANM   (local output)                int *
+ *         On entry,  IRANM  is an array of dimension 2.   On exit, this
+ *         array  contains respectively  the 16-lower and 15-higher bits
+ *         of the encoding of X(n+JUMPM).
+ *
+ * IAM     (local output)                int *
+ *         On entry, IAM is an array of dimension 2. On exit, when JUMPM
+ *         is  greater  than  zero,  this  array  contains  the  encoded
+ *         constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+ *         sequence. IAM(0:1)  contains  respectively  the  16-lower and
+ *         15-higher  bits  of this constant  A. When  JUMPM  is less or
+ *         equal than zero, this array is not referenced.
+ *
+ * ICM     (local output)                int *
+ *         On entry, ICM is an array of dimension 2. On exit, when JUMPM
+ *         is  greater  than  zero,  this  array  contains  the  encoded
+ *         constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+ *         sequence. ICM(0:1)  contains  respectively  the  16-lower and
+ *         15-higher  bits  of this constant  C. When  JUMPM  is less or
+ *         equal than zero, this array is not referenced.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2], k;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( JUMPM > 0 )
+   {
+      IAM[0] = MULT[0]; IAM[1] = MULT[1];   /* IAM   = MULT;          */
+      ICM[0] = IADD[0]; ICM[1] = IADD[1];   /* ICM   = IADD;          */
+      for( k = 1; k <= JUMPM-1; k++ )
+      {
+         HPL_lmul( IAM, MULT, j );          /* j     = IAM   * MULT;  */
+         IAM[0] = j[0]; IAM[1] = j[1];      /* IAM   = j;             */
+         HPL_lmul( ICM, MULT, j );          /* j     = ICM   * MULT;  */
+         HPL_ladd( IADD, j, ICM );          /* ICM   = IADD  + j;     */
+      }
+      HPL_lmul( IRANN, IAM, j );            /* j     = IRANN * IAM;   */
+      HPL_ladd( j, ICM, IRANM );            /* IRANM = j     + ICM;   */
+   }
+   else
+   {                                        /* IRANM = IRANN          */
+      IRANM[0] = IRANN[0]; IRANM[1] = IRANN[1];
+   }
+/*
+ * End of HPL_xjumpm
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_xjumpm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_xjumpm.o
new file mode 100644
index 000000000..0fbb4ec34
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_xjumpm.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/pmatgen/HPL_pdmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/pmatgen/HPL_pdmatgen.c
new file mode 100644
index 000000000..2d129c863
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/pmatgen/HPL_pdmatgen.c
@@ -0,0 +1,198 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdmatgen
+(
+   const HPL_T_grid *               GRID,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   double *                         A,
+   const int                        LDA,
+   const int                        ISEED
+)
+#else
+void HPL_pdmatgen
+( GRID, M, N, NB, A, LDA, ISEED )
+   const HPL_T_grid *               GRID;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   double *                         A;
+   const int                        LDA;
+   const int                        ISEED;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdmatgen generates (or regenerates) a parallel random matrix A.
+ *  
+ * The  pseudo-random  generator uses the linear congruential algorithm:
+ * X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+ * Programming, Knuth 1973, Vol. 2.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * A       (local output)                double *
+ *         On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+ *         On exit, this array contains the coefficients of the randomly
+ *         generated matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * ISEED   (global input)                const int
+ *         On entry, ISEED  specifies  the  seed  number to generate the
+ *         matrix A. ISEED must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        iadd [2], ia1  [2], ia2  [2], ia3  [2],
+                              ia4  [2], ia5  [2], ib1  [2], ib2  [2],
+                              ib3  [2], ic1  [2], ic2  [2], ic3  [2],
+                              ic4  [2], ic5  [2], iran1[2], iran2[2],
+                              iran3[2], iran4[2], itmp1[2], itmp2[2],
+                              itmp3[2], jseed[2], mult [2];
+   int                        ib, iblk, ik, jb, jblk, jk, jump1, jump2,
+                              jump3, jump4, jump5, jump6, jump7, lmb,
+                              lnb, mblks, mp, mycol, myrow, nblks,
+                              npcol, nprow, nq;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+
+   mult [0] = HPL_MULT0; mult [1] = HPL_MULT1;
+   iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1;
+   jseed[0] = ISEED;     jseed[1] = 0;
+/*
+ * Generate an M by N matrix starting in process (0,0)
+ */
+   Mnumroc( mp, M, NB, NB, myrow, 0, nprow );
+   Mnumroc( nq, N, NB, NB, mycol, 0, npcol );
+
+   if( ( mp <= 0 ) || ( nq <= 0 ) ) return;
+/*
+ * Local number of blocks and size of the last one
+ */
+   mblks = ( mp + NB - 1 ) / NB; lmb = mp - ( ( mp - 1 ) / NB ) * NB;
+   nblks = ( nq + NB - 1 ) / NB; lnb = nq - ( ( nq - 1 ) / NB ) * NB;
+/*
+ * Compute multiplier/adder for various jumps in random sequence
+ */
+   jump1 = 1;  jump2 = nprow * NB; jump3 = M; jump4 = npcol * NB;
+   jump5 = NB; jump6 = mycol;      jump7 = myrow * NB;
+
+   HPL_xjumpm( jump1, mult, iadd, jseed, iran1, ia1,   ic1   );
+   HPL_xjumpm( jump2, mult, iadd, iran1, itmp1, ia2,   ic2   );
+   HPL_xjumpm( jump3, mult, iadd, iran1, itmp1, ia3,   ic3   );
+   HPL_xjumpm( jump4, ia3,  ic3,  iran1, itmp1, ia4,   ic4   );
+   HPL_xjumpm( jump5, ia3,  ic3,  iran1, itmp1, ia5,   ic5   );
+   HPL_xjumpm( jump6, ia5,  ic5,  iran1, itmp3, itmp1, itmp2 );
+   HPL_xjumpm( jump7, mult, iadd, itmp3, iran1, itmp1, itmp2 );
+   HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 );
+/*
+ * Save value of first number in sequence
+ */
+   ib1[0] = iran1[0]; ib1[1] = iran1[1];
+   ib2[0] = iran1[0]; ib2[1] = iran1[1];
+   ib3[0] = iran1[0]; ib3[1] = iran1[1];
+
+   for( jblk = 0; jblk < nblks; jblk++ )
+   {
+      jb = ( jblk == nblks - 1 ? lnb : NB );
+      for( jk = 0; jk < jb; jk++ )
+      {
+         for( iblk = 0; iblk < mblks; iblk++ )
+         {
+            ib = ( iblk == mblks - 1 ? lmb : NB );
+            for( ik = 0; ik < ib; A++, ik++ ) *A = HPL_rand();
+            HPL_jumpit( ia2, ic2, ib1, iran2 );
+            ib1[0] = iran2[0]; ib1[1] = iran2[1];
+         }
+         A += LDA - mp;
+         HPL_jumpit( ia3, ic3, ib2, iran3 );
+         ib1[0] = iran3[0]; ib1[1] = iran3[1];
+         ib2[0] = iran3[0]; ib2[1] = iran3[1];
+      }
+      HPL_jumpit( ia4, ic4, ib3, iran4 );
+      ib1[0] = iran4[0]; ib1[1] = iran4[1];
+      ib2[0] = iran4[0]; ib2[1] = iran4[1];
+      ib3[0] = iran4[0]; ib3[1] = iran4[1];
+   }
+/*
+ * End of HPL_pdmatgen
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/pmatgen/HPL_pdmatgen.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/pmatgen/HPL_pdmatgen.o
new file mode 100644
index 000000000..1965382af
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/pmatgen/HPL_pdmatgen.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pddriver.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pddriver.c
new file mode 100644
index 000000000..5e4050f48
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pddriver.c
@@ -0,0 +1,293 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int main
+(
+   int                        ARGC,
+   char                       * * ARGV
+)
+#else
+int main( ARGC, ARGV )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        ARGC;
+/*
+ * .. Array Arguments ..
+ */
+   char                       * * ARGV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * main is the main driver program for testing the HPL routines.
+ * This  program is  driven  by  a short data file named  "HPL.dat".
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        nval  [HPL_MAX_PARAM],
+                              nbval [HPL_MAX_PARAM],
+                              pval  [HPL_MAX_PARAM],
+                              qval  [HPL_MAX_PARAM],
+                              nbmval[HPL_MAX_PARAM],
+                              ndvval[HPL_MAX_PARAM],
+                              ndhval[HPL_MAX_PARAM];
+
+   HPL_T_FACT                 pfaval[HPL_MAX_PARAM],
+                              rfaval[HPL_MAX_PARAM];
+
+   HPL_T_TOP                  topval[HPL_MAX_PARAM];
+
+   HPL_T_grid                 grid;
+   HPL_T_palg                 algo;
+   HPL_T_test                 test;
+   int                        L1notran, Unotran, align, equil, in, inb,
+                              inbm, indh, indv, ipfa, ipq, irfa, itop,
+                              mycol, myrow, ns, nbs, nbms, ndhs, ndvs,
+                              npcol, npfs, npqs, nprow, nrfs, ntps, 
+                              rank, size, tswap;
+   HPL_T_ORDER                pmapping;
+   HPL_T_FACT                 rpfa;
+   HPL_T_SWAP                 fswap;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Init( &ARGC, &ARGV );
+#ifdef HPL_CALL_VSIPL
+   vsip_init((void*)0);
+#endif
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+   MPI_Comm_size( MPI_COMM_WORLD, &size );
+/*
+ * Read and check validity of test parameters from input file
+ *
+ * HPL Version 1.0, Linpack benchmark input file
+ * Your message here
+ * HPL.out      output file name (if any)
+ * 6            device out (6=stdout,7=stderr,file)
+ * 4            # of problems sizes (N)
+ * 29 30 34 35  Ns
+ * 4            # of NBs
+ * 1 2 3 4      NBs
+ * 0            PMAP process mapping (0=Row-,1=Column-major)
+ * 3            # of process grids (P x Q)
+ * 2 1 4        Ps
+ * 2 4 1        Qs
+ * 16.0         threshold
+ * 3            # of panel fact
+ * 0 1 2        PFACTs (0=left, 1=Crout, 2=Right)
+ * 2            # of recursive stopping criterium
+ * 2 4          NBMINs (>= 1)
+ * 1            # of panels in recursion
+ * 2            NDIVs
+ * 3            # of recursive panel fact.
+ * 0 1 2        RFACTs (0=left, 1=Crout, 2=Right)
+ * 1            # of broadcast
+ * 0            BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+ * 1            # of lookahead depth
+ * 0            DEPTHs (>=0)
+ * 2            SWAP (0=bin-exch,1=long,2=mix)
+ * 4            swapping threshold
+ * 0            L1 in (0=transposed,1=no-transposed) form
+ * 0            U  in (0=transposed,1=no-transposed) form
+ * 1            Equilibration (0=no,1=yes)
+ * 8            memory alignment in double (> 0)
+ */
+   HPL_pdinfo( &test, &ns, nval, &nbs, nbval, &pmapping, &npqs, pval, qval,
+               &npfs, pfaval, &nbms, nbmval, &ndvs, ndvval, &nrfs, rfaval,
+               &ntps, topval, &ndhs, ndhval, &fswap, &tswap, &L1notran,
+               &Unotran, &equil, &align );
+/*
+ * Loop over different process grids - Define process grid. Go to bottom
+ * of process grid loop if this case does not use my process.
+ */
+   for( ipq = 0; ipq < npqs; ipq++ )
+   {
+      (void) HPL_grid_init( MPI_COMM_WORLD, pmapping, pval[ipq], qval[ipq],
+                            &grid );
+      (void) HPL_grid_info( &grid, &nprow, &npcol, &myrow, &mycol );
+
+      if( ( myrow < 0 ) || ( myrow >= nprow ) ||
+          ( mycol < 0 ) || ( mycol >= npcol ) ) goto label_end_of_npqs;
+
+      for( in = 0; in < ns; in++ )
+      {                            /* Loop over various problem sizes */
+       for( inb = 0; inb < nbs; inb++ )
+       {                        /* Loop over various blocking factors */
+        for( indh = 0; indh < ndhs; indh++ )
+        {                       /* Loop over various lookahead depths */
+         for( itop = 0; itop < ntps; itop++ )
+         {                  /* Loop over various broadcast topologies */
+          for( irfa = 0; irfa < nrfs; irfa++ )
+          {             /* Loop over various recursive factorizations */
+           for( ipfa = 0; ipfa < npfs; ipfa++ )
+           {                /* Loop over various panel factorizations */
+            for( inbm = 0; inbm < nbms; inbm++ )
+            {        /* Loop over various recursive stopping criteria */
+             for( indv = 0; indv < ndvs; indv++ )
+             {          /* Loop over various # of panels in recursion */
+/*
+ * Set up the algorithm parameters
+ */
+              algo.btopo = topval[itop]; algo.depth = ndhval[indh];
+              algo.nbmin = nbmval[inbm]; algo.nbdiv = ndvval[indv];
+
+              algo.pfact = rpfa = pfaval[ipfa];
+
+              if( L1notran != 0 )
+              {
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllN;
+                 else if( rpfa == HPL_CROUT   ) algo.pffun = HPL_pdpancrN;
+                 else                           algo.pffun = HPL_pdpanrlN;
+
+                 algo.rfact = rpfa = rfaval[irfa];
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllN;
+                 else if( rpfa == HPL_CROUT   ) algo.rffun = HPL_pdrpancrN;
+                 else                           algo.rffun = HPL_pdrpanrlN;
+
+                 if( Unotran != 0 ) algo.upfun = HPL_pdupdateNN;
+                 else               algo.upfun = HPL_pdupdateNT;
+              }
+              else
+              {
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllT;
+                 else if( rpfa == HPL_CROUT   ) algo.pffun = HPL_pdpancrT;
+                 else                           algo.pffun = HPL_pdpanrlT;
+
+                 algo.rfact = rpfa = rfaval[irfa];
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllT;
+                 else if( rpfa == HPL_CROUT   ) algo.rffun = HPL_pdrpancrT;
+                 else                           algo.rffun = HPL_pdrpanrlT;
+
+                 if( Unotran != 0 ) algo.upfun = HPL_pdupdateTN;
+                 else               algo.upfun = HPL_pdupdateTT;
+              }
+
+              algo.fswap = fswap; algo.fsthr = tswap;
+              algo.equil = equil; algo.align = align;
+
+              HPL_pdtest( &test, &grid, &algo, nval[in], nbval[inb] );
+
+             }
+            }
+           }
+          }
+         }
+        }
+       }
+      }
+      (void) HPL_grid_exit( &grid );
+label_end_of_npqs: ;
+   }
+/*
+ * Print ending messages, close output file, exit.
+ */
+   if( rank == 0 )
+   {
+      test.ktest = test.kpass + test.kfail + test.kskip;
+#ifndef HPL_DETAILED_TIMING
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+#else
+      if( test.thrsh > HPL_rzero )
+         HPL_fprintf( test.outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+#endif
+
+      HPL_fprintf( test.outfp, "\n%s %6d %s\n", "Finished", test.ktest,
+                   "tests with the following results:" );
+      if( test.thrsh > HPL_rzero )
+      {
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kpass,
+                      "tests completed and passed residual checks," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kfail,
+                      "tests completed and failed residual checks," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kskip,
+                      "tests skipped because of illegal input values." );
+      }
+      else
+      {
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kpass,
+                      "tests completed without checking," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kskip,
+                      "tests skipped because of illegal input values." );
+      }
+
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "----------------------------------------",
+                   "----------------------------------------" );
+      HPL_fprintf( test.outfp, "\nEnd of Tests.\n" );
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+
+      if( ( test.outfp != stdout ) && ( test.outfp != stderr ) )
+         (void) fclose( test.outfp );
+   }
+#ifdef HPL_CALL_VSIPL
+   vsip_finalize((void*)0);
+#endif
+   MPI_Finalize();
+   exit( 0 );
+
+   return( 0 );
+/*
+ * End of main
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pddriver.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pddriver.o
new file mode 100644
index 000000000..f087c3d97
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pddriver.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdinfo.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdinfo.c
new file mode 100644
index 000000000..4ede45be6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdinfo.c
@@ -0,0 +1,1182 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdinfo
+(
+   HPL_T_test *                     TEST,
+   int *                            NS,
+   int *                            N,
+   int *                            NBS,
+   int *                            NB,
+   HPL_T_ORDER *                    PMAPPIN,
+   int *                            NPQS,
+   int *                            P,
+   int *                            Q,
+   int *                            NPFS,
+   HPL_T_FACT *                     PF,
+   int *                            NBMS,
+   int *                            NBM,
+   int *                            NDVS,
+   int *                            NDV,
+   int *                            NRFS,
+   HPL_T_FACT *                     RF,
+   int *                            NTPS,
+   HPL_T_TOP *                      TP,
+   int *                            NDHS,
+   int *                            DH,
+   HPL_T_SWAP *                     FSWAP,
+   int *                            TSWAP,
+   int *                            L1NOTRAN,
+   int *                            UNOTRAN,
+   int *                            EQUIL,
+   int *                            ALIGN
+)
+#else
+void HPL_pdinfo
+( TEST, NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN )
+   HPL_T_test *                     TEST;
+   int *                            NS;
+   int *                            N;
+   int *                            NBS;
+   int *                            NB;
+   HPL_T_ORDER *                    PMAPPIN;
+   int *                            NPQS;
+   int *                            P;
+   int *                            Q;
+   int *                            NPFS;
+   HPL_T_FACT *                     PF;
+   int *                            NBMS;
+   int *                            NBM;
+   int *                            NDVS;
+   int *                            NDV;
+   int *                            NRFS;
+   HPL_T_FACT *                     RF;
+   int *                            NTPS;
+   HPL_T_TOP *                      TP;
+   int *                            NDHS;
+   int *                            DH;
+   HPL_T_SWAP *                     FSWAP;
+   int *                            TSWAP;
+   int *                            L1NOTRAN;
+   int *                            UNOTRAN;
+   int *                            EQUIL;
+   int *                            ALIGN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdinfo reads  the  startup  information for the various tests and
+ * transmits it to all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * TEST    (global output)               HPL_T_test *
+ *         On entry, TEST  points to a testing data structure.  On exit,
+ *         the fields of this data structure are initialized as follows:
+ *         TEST->outfp  specifies the output file where the results will
+ *         be printed.  It is only defined and used by  the process 0 of
+ *         the grid.  TEST->thrsh specifies the threshhold value for the
+ *         test ratio.  TEST->epsil is the relative machine precision of
+ *         the distributed computer.  Finally  the test counters, kfail,
+ *         kpass, kskip, ktest are initialized to zero.
+ *
+ * NS      (global output)               int *
+ *         On exit,  NS  specifies the number of different problem sizes
+ *         to be tested. NS is less than or equal to HPL_MAX_PARAM.
+ *
+ * N       (global output)               int *
+ *         On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+ *         the first NS entries of this array contain the  problem sizes
+ *         to run the code with.
+ *
+ * NBS     (global output)               int *
+ *         On exit,  NBS  specifies the number of different distribution
+ *         blocking factors to be tested. NBS must be less than or equal
+ *         to HPL_MAX_PARAM.
+ *
+ * NB      (global output)               int *
+ *         On exit,  PMAPPIN  specifies the process mapping onto the no-
+ *         des of the  MPI machine configuration.  PMAPPIN  defaults  to
+ *         row-major ordering.
+ *
+ * PMAPPIN (global output)               HPL_T_ORDER *
+ *         On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NBS entries of this array contain the values of the
+ *         various distribution blocking factors, to run the code with.
+ *
+ * NPQS    (global output)               int *
+ *         On exit, NPQS  specifies the  number of different values that
+ *         can be used for P and Q, i.e., the number of process grids to
+ *         run  the  code with.  NPQS must be  less  than  or  equal  to
+ *         HPL_MAX_PARAM.
+ *
+ * P       (global output)               int *
+ *         On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NPQS entries of this array contain the values of P,
+ *         the number of process rows of the  NPQS grids to run the code
+ *         with.
+ *
+ * Q       (global output)               int *
+ *         On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NPQS entries of this array contain the values of Q,
+ *         the number of process columns of the  NPQS  grids to  run the
+ *         code with.
+ *
+ * NPFS    (global output)               int *
+ *         On exit, NPFS  specifies the  number of different values that
+ *         can be used for PF : the panel factorization algorithm to run
+ *         the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+ *
+ * PF      (global output)               HPL_T_FACT *
+ *         On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first  NPFS  entries  of this array  contain  the various
+ *         panel factorization algorithms to run the code with.
+ *
+ * NBMS    (global output)               int *
+ *         On exit,  NBMS  specifies  the  number  of  various recursive
+ *         stopping criteria  to be tested.  NBMS  must be  less than or
+ *         equal to HPL_MAX_PARAM.
+ *
+ * NBM     (global output)               int *
+ *         On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NBMS entries of this array contain the values
+ *         of the various recursive stopping criteria to be tested.
+ *
+ * NDVS    (global output)               int *
+ *         On exit,  NDVS  specifies  the number  of various numbers  of
+ *         panels in recursion to be tested.  NDVS is less than or equal
+ *         to HPL_MAX_PARAM.
+ *
+ * NDV     (global output)               int *
+ *         On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NDVS entries of this array contain the values
+ *         of the various numbers of panels in recursion to be tested.
+ *
+ * NRFS    (global output)               int *
+ *         On exit, NRFS  specifies the  number of different values that
+ *         can be used for RF : the recursive factorization algorithm to
+ *         be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+ *
+ * RF      (global output)               HPL_T_FACT *
+ *         On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first  NRFS  entries  of  this array contain  the various
+ *         recursive factorization algorithms to run the code with.
+ *
+ * NTPS    (global output)               int *
+ *         On exit, NTPS  specifies the  number of different values that
+ *         can be used for the  broadcast topologies  to be tested. NTPS
+ *         is less than or equal to HPL_MAX_PARAM.
+ *
+ * TP      (global output)               HPL_T_TOP *
+ *         On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the  first NTPS  entries of this  array  contain  the various
+ *         broadcast (along rows) topologies to run the code with.
+ *
+ * NDHS    (global output)               int *
+ *         On exit, NDHS  specifies the  number of different values that
+ *         can be used for the  lookahead depths to be  tested.  NDHS is
+ *         less than or equal to HPL_MAX_PARAM.
+ *
+ * DH      (global output)               int *
+ *         On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NDHS entries of this array contain the values
+ *         of lookahead depths to run the code with.  Such a value is at
+ *         least 0 (no-lookahead) or greater than zero.
+ *
+ * FSWAP   (global output)               HPL_T_SWAP *
+ *         On exit, FSWAP specifies the swapping algorithm to be used in
+ *         all tests.
+ *
+ * TSWAP   (global output)               int *
+ *         On exit,  TSWAP  specifies the swapping threshold as a number
+ *         of columns when the mixed swapping algorithm was chosen.
+ *
+ * L1NOTRA (global output)               int *
+ *         On exit, L1NOTRAN specifies whether the upper triangle of the
+ *         panels of columns  should  be stored  in  no-transposed  form
+ *         (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+ *
+ * UNOTRAN (global output)               int *
+ *         On exit, UNOTRAN  specifies whether the panels of rows should
+ *         be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+ *         form (UNOTRAN=0) during their broadcast.
+ *
+ * EQUIL   (global output)               int *
+ *         On exit,  EQUIL  specifies  whether  equilibration during the
+ *         swap-broadcast  of  the  panel of rows  should  be  performed
+ *         (EQUIL=1) or not (EQUIL=0).
+ *
+ * ALIGN   (global output)               int *
+ *         On exit,  ALIGN  specifies the alignment  of  the dynamically
+ *         allocated buffers in double precision words. ALIGN is greater
+ *         than zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   char                       file[HPL_LINE_MAX], line[HPL_LINE_MAX],
+                              auth[HPL_LINE_MAX], num [HPL_LINE_MAX];
+   FILE                       * infp;
+   int                        * iwork = NULL;
+   char                       * lineptr;
+   int                        error=0, fid, i, j, lwork, maxp, nprocs,
+                              rank, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+   MPI_Comm_size( MPI_COMM_WORLD, &size );
+/*
+ * Initialize the TEST data structure with default values
+ */
+   TEST->outfp = stderr; TEST->epsil = 2.0e-16; TEST->thrsh = 16.0;
+   TEST->kfail = TEST->kpass = TEST->kskip = TEST->ktest = 0;
+/*
+ * Process 0 reads the input data, broadcasts to other processes and
+ * writes needed information to TEST->outfp.
+ */
+   if( rank == 0 )
+   {
+/*
+ * Open file and skip data file header
+ */
+      if( ( infp = fopen( "HPL.dat", "r" ) ) == NULL )
+      { 
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "cannot open file HPL.dat" );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) fgets( auth, HPL_LINE_MAX - 2, infp );
+/*
+ * Read name and unit number for summary output file
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", file );
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num  );
+      fid = atoi( num );
+      if     ( fid == 6 ) TEST->outfp = stdout;
+      else if( fid == 7 ) TEST->outfp = stderr;
+      else if( ( TEST->outfp = fopen( file, "w" ) ) == NULL )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "cannot open file %s.",
+                    file );
+         error = 1; goto label_error;
+      }
+/*
+ * Read and check the parameter values for the tests.
+ *
+ * Problem size (>=0) (N)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); 
+      (void) sscanf( line, "%s", num ); *NS = atoi( num );
+      if( ( *NS < 1 ) || ( *NS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %d",
+                    "Number of values of N is less than 1 or greater than",
+                    HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( N[ i ] = atoi( num ) ) < 0 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of N less than 0" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Block size (>=1) (NB)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NBS = atoi( num );
+      if( ( *NBS < 1 ) || ( *NBS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NB is less than 1 or",
+                    "greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NBS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NB[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", 
+                       "Value of NB less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Process grids, mapping, (>=1) (P, Q)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num );
+      *PMAPPIN = ( atoi( num ) == 1 ? HPL_COLUMN_MAJOR : HPL_ROW_MAJOR );
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NPQS = atoi( num );
+      if( ( *NPQS < 1 ) || ( *NPQS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of grids is less",
+                    "than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPQS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( P[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of P less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPQS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( Q[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of Q less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Check for enough processes in machine configuration
+ */
+      maxp = 0;
+      for( i = 0; i < *NPQS; i++ )
+      { nprocs   = P[i] * Q[i]; maxp = Mmax( maxp, nprocs ); }
+      if( maxp > size )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "Need at least %d processes for these tests", maxp );
+         error = 1; goto label_error;
+      }
+/*
+ * Checking threshold value (TEST->thrsh)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); TEST->thrsh = atof( num );
+/*
+ * Panel factorization algorithm (PF)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NPFS = atoi( num );
+      if( ( *NPFS < 1 ) || ( *NPFS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "number of values of PFACT",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPFS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) PF[ i ] = HPL_LEFT_LOOKING;
+         else if( j == 1 ) PF[ i ] = HPL_CROUT;
+         else if( j == 2 ) PF[ i ] = HPL_RIGHT_LOOKING;
+         else              PF[ i ] = HPL_RIGHT_LOOKING;
+      }
+/*
+ * Recursive stopping criterium (>=1) (NBM)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NBMS = atoi( num );
+      if( ( *NBMS < 1 ) || ( *NBMS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NBMIN",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NBMS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NBM[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of NBMIN less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Number of panels in recursion (>=2) (NDV)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NDVS = atoi( num );
+      if( ( *NDVS < 1 ) || ( *NDVS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NDIV",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NDVS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NDV[ i ] = atoi( num ) ) < 2 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of NDIV less than 2" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Recursive panel factorization (RF)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NRFS = atoi( num );
+      if( ( *NRFS < 1 ) || ( *NRFS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of RFACT",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NRFS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) RF[ i ] = HPL_LEFT_LOOKING;
+         else if( j == 1 ) RF[ i ] = HPL_CROUT;
+         else if( j == 2 ) RF[ i ] = HPL_RIGHT_LOOKING;
+         else              RF[ i ] = HPL_RIGHT_LOOKING;
+      }
+/*
+ * Broadcast topology (TP) (0=rg, 1=2rg, 2=rgM, 3=2rgM, 4=L)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NTPS = atoi( num );
+      if( ( *NTPS < 1 ) || ( *NTPS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of BCAST",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NTPS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) TP[ i ] = HPL_1RING;
+         else if( j == 1 ) TP[ i ] = HPL_1RING_M;
+         else if( j == 2 ) TP[ i ] = HPL_2RING;
+         else if( j == 3 ) TP[ i ] = HPL_2RING_M;
+         else if( j == 4 ) TP[ i ] = HPL_BLONG;
+         else if( j == 5 ) TP[ i ] = HPL_BLONG_M;
+         else              TP[ i ] = HPL_1RING_M;
+      }
+/*
+ * Lookahead depth (>=0) (NDH)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NDHS = atoi( num );
+      if( ( *NDHS < 1 ) || ( *NDHS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of DEPTH",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NDHS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num );
+         lineptr += strlen( num ) + 1;
+         if( ( DH[ i ] = atoi( num ) ) < 0 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of DEPTH less than 0" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Swapping algorithm (0,1 or 2) (FSWAP)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); j = atoi( num );
+      if(      j == 0 ) *FSWAP = HPL_SWAP00;
+      else if( j == 1 ) *FSWAP = HPL_SWAP01;
+      else if( j == 2 ) *FSWAP = HPL_SW_MIX;
+      else              *FSWAP = HPL_SWAP01;
+/*
+ * Swapping threshold (>=0) (TSWAP)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *TSWAP = atoi( num );
+      if( *TSWAP <= 0 ) *TSWAP = 0;
+/*
+ * L1 in (no-)transposed form (0 or 1)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *L1NOTRAN = atoi( num );
+      if( ( *L1NOTRAN != 0 ) && ( *L1NOTRAN != 1 ) ) *L1NOTRAN = 0; 
+/*
+ * U  in (no-)transposed form (0 or 1)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *UNOTRAN = atoi( num );
+      if( ( *UNOTRAN != 0 ) && ( *UNOTRAN != 1 ) ) *UNOTRAN = 0;
+/*
+ * Equilibration (0=no, 1=yes)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *EQUIL = atoi( num );
+      if( ( *EQUIL != 0 ) && ( *EQUIL != 1 ) ) *EQUIL = 1;
+/*
+ * Memory alignment in bytes (> 0) (ALIGN)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *ALIGN = atoi( num );
+      if( *ALIGN <= 0 ) *ALIGN = 4;
+/*
+ * Close input file
+ */
+label_error:
+      if (infp != NULL)
+      	(void) fclose( infp );
+   }
+   else { TEST->outfp = NULL; }
+/*
+ * Check for error on reading input file
+ */
+   (void) HPL_all_reduce( (void *)(&error), 1, HPL_INT, HPL_max,
+                          MPI_COMM_WORLD );
+   if( error )
+   {
+      if( rank == 0 )
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "Illegal input in file HPL.dat. Exiting ..." );
+      MPI_Finalize();
+#ifdef HPL_CALL_VSIPL
+      (void) vsip_finalize( NULL );
+#endif
+      exit( 1 );
+   }
+/*
+ * Compute and broadcast machine epsilon
+ */
+   TEST->epsil = HPL_pdlamch( MPI_COMM_WORLD, HPL_MACH_EPS );
+/*
+ * Pack information arrays and broadcast
+ */
+   (void) HPL_broadcast( (void *)(&(TEST->thrsh)), 1, HPL_DOUBLE, 0,
+                         MPI_COMM_WORLD );
+/*
+ * Broadcast array sizes
+ */
+   iwork = (int *)malloc( (size_t)(15) * sizeof( int ) );
+   if( rank == 0 )
+   {
+      iwork[ 0] = *NS;      iwork[ 1] = *NBS;
+      iwork[ 2] = ( *PMAPPIN == HPL_ROW_MAJOR ? 0 : 1 );
+      iwork[ 3] = *NPQS;    iwork[ 4] = *NPFS;     iwork[ 5] = *NBMS;
+      iwork[ 6] = *NDVS;    iwork[ 7] = *NRFS;     iwork[ 8] = *NTPS;
+      iwork[ 9] = *NDHS;    iwork[10] = *TSWAP;    iwork[11] = *L1NOTRAN;
+      iwork[12] = *UNOTRAN; iwork[13] = *EQUIL;    iwork[14] = *ALIGN;
+   }
+   (void) HPL_broadcast( (void *)iwork, 15, HPL_INT, 0, MPI_COMM_WORLD );
+   if( rank != 0 )
+   {
+      *NS       = iwork[ 0]; *NBS   = iwork[ 1];
+      *PMAPPIN  = ( iwork[ 2] == 0 ?  HPL_ROW_MAJOR : HPL_COLUMN_MAJOR );
+      *NPQS     = iwork[ 3]; *NPFS  = iwork[ 4]; *NBMS     = iwork[ 5];
+      *NDVS     = iwork[ 6]; *NRFS  = iwork[ 7]; *NTPS     = iwork[ 8];
+      *NDHS     = iwork[ 9]; *TSWAP = iwork[10]; *L1NOTRAN = iwork[11];
+      *UNOTRAN  = iwork[12]; *EQUIL = iwork[13]; *ALIGN    = iwork[14];
+   }
+   if( iwork ) free( iwork );
+/*
+ * Pack information arrays and broadcast
+ */
+   lwork = (*NS) + (*NBS) + 2 * (*NPQS) + (*NPFS) + (*NBMS) + 
+           (*NDVS) + (*NRFS) + (*NTPS) + (*NDHS) + 1;
+   
+   if (lwork < 0)
+	exit(EXIT_FAILURE); 
+
+
+   iwork = (int *)malloc( (size_t)(lwork) * sizeof( int ) );
+   if( rank == 0 )
+   {
+      j = 0;
+      for( i = 0; i < *NS;   i++ ) { iwork[j] = N [i]; j++; }
+      for( i = 0; i < *NBS;  i++ ) { iwork[j] = NB[i]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { iwork[j] = P [i]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { iwork[j] = Q [i]; j++; }
+      for( i = 0; i < *NPFS; i++ )
+      {
+         if(      PF[i] == HPL_LEFT_LOOKING  ) iwork[j] = 0;
+         else if( PF[i] == HPL_CROUT         ) iwork[j] = 1;
+         else if( PF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2;
+         j++;
+      }
+      for( i = 0; i < *NBMS; i++ ) { iwork[j] = NBM[i]; j++; }
+      for( i = 0; i < *NDVS; i++ ) { iwork[j] = NDV[i]; j++; }
+      for( i = 0; i < *NRFS; i++ )
+      {
+         if(      RF[i] == HPL_LEFT_LOOKING  ) iwork[j] = 0;
+         else if( RF[i] == HPL_CROUT         ) iwork[j] = 1;
+         else if( RF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2;
+         j++;
+      }
+      for( i = 0; i < *NTPS; i++ )
+      {
+         if(      TP[i] == HPL_1RING   ) iwork[j] = 0;
+         else if( TP[i] == HPL_1RING_M ) iwork[j] = 1;
+         else if( TP[i] == HPL_2RING   ) iwork[j] = 2;
+         else if( TP[i] == HPL_2RING_M ) iwork[j] = 3;
+         else if( TP[i] == HPL_BLONG   ) iwork[j] = 4;
+         else if( TP[i] == HPL_BLONG_M ) iwork[j] = 5;
+         j++;
+      }
+      for( i = 0; i < *NDHS; i++ ) { iwork[j] = DH[i]; j++; }
+
+      if(      *FSWAP == HPL_SWAP00 ) iwork[j] = 0;
+      else if( *FSWAP == HPL_SWAP01 ) iwork[j] = 1;
+      else if( *FSWAP == HPL_SW_MIX ) iwork[j] = 2;
+      j++;
+   }
+   (void) HPL_broadcast( (void*)iwork, lwork, HPL_INT, 0,
+                         MPI_COMM_WORLD );
+   if ((rank != 0) && (iwork != NULL))
+   {
+      j = 0;
+      for( i = 0; i < *NS;   i++ ) { N [i] = iwork[j]; j++; }
+      for( i = 0; i < *NBS;  i++ ) { NB[i] = iwork[j]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { P [i] = iwork[j]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { Q [i] = iwork[j]; j++; }
+
+      for( i = 0; i < *NPFS; i++ )
+      {
+         if(      iwork[j] == 0 ) PF[i] = HPL_LEFT_LOOKING;
+         else if( iwork[j] == 1 ) PF[i] = HPL_CROUT;
+         else if( iwork[j] == 2 ) PF[i] = HPL_RIGHT_LOOKING;
+         j++;
+      }
+      for( i = 0; i < *NBMS; i++ ) { NBM[i] = iwork[j]; j++; }
+      for( i = 0; i < *NDVS; i++ ) { NDV[i] = iwork[j]; j++; }
+      for( i = 0; i < *NRFS; i++ )
+      {
+         if(      iwork[j] == 0 ) RF[i] = HPL_LEFT_LOOKING;
+         else if( iwork[j] == 1 ) RF[i] = HPL_CROUT;
+         else if( iwork[j] == 2 ) RF[i] = HPL_RIGHT_LOOKING;
+         j++;
+      }
+      for( i = 0; i < *NTPS; i++ )
+      {
+         if(      iwork[j] == 0 ) TP[i] = HPL_1RING;
+         else if( iwork[j] == 1 ) TP[i] = HPL_1RING_M;
+         else if( iwork[j] == 2 ) TP[i] = HPL_2RING;
+         else if( iwork[j] == 3 ) TP[i] = HPL_2RING_M;
+         else if( iwork[j] == 4 ) TP[i] = HPL_BLONG;
+         else if( iwork[j] == 5 ) TP[i] = HPL_BLONG_M;
+         j++;
+      }
+      for( i = 0; i < *NDHS; i++ ) { DH[i] = iwork[j]; j++; }
+
+      if(      iwork[j] == 0 ) *FSWAP = HPL_SWAP00;
+      else if( iwork[j] == 1 ) *FSWAP = HPL_SWAP01;
+      else if( iwork[j] == 2 ) *FSWAP = HPL_SW_MIX;
+      j++;
+   
+      if( iwork ) free( iwork );
+   }
+/*
+ * regurgitate input
+ */
+   if( rank == 0 )
+   {
+      
+      if (TEST->outfp != NULL){
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "HPLinpack 2.3  --  High-Performance Linpack benchmark  --  ",
+          " December 2, 2018" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Written by A. Petitet and R. Clint Whaley,  ",
+          "Innovative Computing Laboratory, UTK" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Modified by Piotr Luszczek, ",
+          "Innovative Computing Laboratory, UTK" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Modified by Julien Langou, ",
+          "University of Colorado Denver");
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+
+      HPL_fprintf( TEST->outfp, "\n%s\n",
+          "An explanation of the input/output parameters follows:" );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "T/V    : Wall time / encoded variant." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+         "N      : The order of the coefficient matrix A." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "NB     : The partitioning blocking factor." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "P      : The number of process rows." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "Q      : The number of process columns." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+         "Time   : Time in seconds to solve the linear system." );
+      HPL_fprintf( TEST->outfp, "%s\n\n",
+         "Gflops : Rate of execution for solving the linear system." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "The following parameter values will be used:" );
+/*
+ * Problem size
+ */
+      HPL_fprintf( TEST->outfp,       "\nN      :" );
+      for( i = 0; i < Mmin( 8, *NS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", N[i]  );
+      if( *NS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", N[i]  );
+         if( *NS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", N[i]  );
+         }
+      }
+/*
+ * Distribution blocking factor
+ */
+      HPL_fprintf( TEST->outfp,       "\nNB     :" );
+      for( i = 0; i < Mmin( 8, *NBS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NB[i] );
+      if( *NBS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NBS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NB[i] );
+         if( *NBS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NBS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NB[i] );
+         }
+      }
+/*
+ * Process mapping
+ */
+      HPL_fprintf( TEST->outfp,       "\nPMAP   :" );
+      if(      *PMAPPIN == HPL_ROW_MAJOR    )
+         HPL_fprintf( TEST->outfp, " Row-major process mapping" );
+      else if( *PMAPPIN == HPL_COLUMN_MAJOR )
+         HPL_fprintf( TEST->outfp, " Column-major process mapping" );
+/*
+ * Process grid
+ */
+      HPL_fprintf( TEST->outfp,       "\nP      :" );
+      for( i = 0; i < Mmin( 8, *NPQS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", P[i]  );
+      if( *NPQS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPQS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", P[i]  );
+         if( *NPQS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPQS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", P[i]  );
+         }
+      }
+      HPL_fprintf( TEST->outfp,       "\nQ      :" );
+      for( i = 0; i < Mmin( 8, *NPQS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", Q[i]  );
+      if( *NPQS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPQS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", Q[i]  );
+         if( *NPQS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPQS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", Q[i]  );
+         }
+      }
+/*
+ * Panel Factorization
+ */
+      HPL_fprintf( TEST->outfp,       "\nPFACT  :" );
+      for( i = 0; i < Mmin( 8, *NPFS ); i++ )
+      {
+         if(      PF[i] == HPL_LEFT_LOOKING  )
+            HPL_fprintf( TEST->outfp,       "    Left " );
+         else if( PF[i] == HPL_CROUT         )
+            HPL_fprintf( TEST->outfp,       "   Crout " );
+         else if( PF[i] == HPL_RIGHT_LOOKING )
+            HPL_fprintf( TEST->outfp,       "   Right " );
+      }
+      if( *NPFS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPFS ); i++ )
+         {
+            if(      PF[i] == HPL_LEFT_LOOKING  )
+               HPL_fprintf( TEST->outfp,       "    Left " );
+            else if( PF[i] == HPL_CROUT         )
+               HPL_fprintf( TEST->outfp,       "   Crout " );
+            else if( PF[i] == HPL_RIGHT_LOOKING )
+               HPL_fprintf( TEST->outfp,       "   Right " );
+         }
+         if( *NPFS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPFS; i++ )
+            {
+               if(      PF[i] == HPL_LEFT_LOOKING  )
+                  HPL_fprintf( TEST->outfp,       "    Left " );
+               else if( PF[i] == HPL_CROUT         )
+                  HPL_fprintf( TEST->outfp,       "   Crout " );
+               else if( PF[i] == HPL_RIGHT_LOOKING )
+                  HPL_fprintf( TEST->outfp,       "   Right " );
+            }
+         }
+      }
+/*
+ * Recursive stopping criterium
+ */
+      HPL_fprintf( TEST->outfp,       "\nNBMIN  :" );
+      for( i = 0; i < Mmin( 8, *NBMS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NBM[i]  );
+      if( *NBMS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NBMS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NBM[i]  );
+         if( *NBMS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NBMS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NBM[i]  );
+         }
+      }
+/*
+ * Number of panels in recursion
+ */
+      HPL_fprintf( TEST->outfp,       "\nNDIV   :" );
+      for( i = 0; i < Mmin( 8, *NDVS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NDV[i]  );
+      if( *NDVS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NDVS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NDV[i]  );
+         if( *NDVS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NDVS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NDV[i]  );
+         }
+      }
+/*
+ * Recursive Factorization
+ */
+      HPL_fprintf( TEST->outfp,       "\nRFACT  :" );
+      for( i = 0; i < Mmin( 8, *NRFS ); i++ )
+      {
+         if(      RF[i] == HPL_LEFT_LOOKING  )
+            HPL_fprintf( TEST->outfp,       "    Left " );
+         else if( RF[i] == HPL_CROUT         )
+            HPL_fprintf( TEST->outfp,       "   Crout " );
+         else if( RF[i] == HPL_RIGHT_LOOKING )
+            HPL_fprintf( TEST->outfp,       "   Right " );
+      }
+      if( *NRFS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NRFS ); i++ )
+         {
+            if(      RF[i] == HPL_LEFT_LOOKING  )
+               HPL_fprintf( TEST->outfp,       "    Left " );
+            else if( RF[i] == HPL_CROUT         )
+               HPL_fprintf( TEST->outfp,       "   Crout " );
+            else if( RF[i] == HPL_RIGHT_LOOKING )
+               HPL_fprintf( TEST->outfp,       "   Right " );
+         }
+         if( *NRFS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NRFS; i++ )
+            {
+               if(      RF[i] == HPL_LEFT_LOOKING  )
+                  HPL_fprintf( TEST->outfp,       "    Left " );
+               else if( RF[i] == HPL_CROUT         )
+                  HPL_fprintf( TEST->outfp,       "   Crout " );
+               else if( RF[i] == HPL_RIGHT_LOOKING )
+                  HPL_fprintf( TEST->outfp,       "   Right " );
+            }
+         }
+      }
+/*
+ * Broadcast topology
+ */
+      HPL_fprintf( TEST->outfp,       "\nBCAST  :" );
+      for( i = 0; i < Mmin( 8, *NTPS ); i++ )
+      {
+         if(      TP[i] == HPL_1RING   )
+            HPL_fprintf( TEST->outfp,       "   1ring " );
+         else if( TP[i] == HPL_1RING_M )
+            HPL_fprintf( TEST->outfp,       "  1ringM " );
+         else if( TP[i] == HPL_2RING   )
+            HPL_fprintf( TEST->outfp,       "   2ring " );
+         else if( TP[i] == HPL_2RING_M )
+            HPL_fprintf( TEST->outfp,       "  2ringM " );
+         else if( TP[i] == HPL_BLONG   )
+            HPL_fprintf( TEST->outfp,       "   Blong " );
+         else if( TP[i] == HPL_BLONG_M )
+            HPL_fprintf( TEST->outfp,       "  BlongM " );
+      }
+      if( *NTPS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NTPS ); i++ )
+         {
+            if(      TP[i] == HPL_1RING   )
+               HPL_fprintf( TEST->outfp,       "   1ring " );
+            else if( TP[i] == HPL_1RING_M )
+               HPL_fprintf( TEST->outfp,       "  1ringM " );
+            else if( TP[i] == HPL_2RING   )
+               HPL_fprintf( TEST->outfp,       "   2ring " );
+            else if( TP[i] == HPL_2RING_M )
+               HPL_fprintf( TEST->outfp,       "  2ringM " );
+            else if( TP[i] == HPL_BLONG   )
+               HPL_fprintf( TEST->outfp,       "   Blong " );
+            else if( TP[i] == HPL_BLONG_M )
+               HPL_fprintf( TEST->outfp,       "  BlongM " );
+         }
+         if( *NTPS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NTPS; i++ )
+            {
+               if(      TP[i] == HPL_1RING   )
+                  HPL_fprintf( TEST->outfp,       "   1ring " );
+               else if( TP[i] == HPL_1RING_M )
+                  HPL_fprintf( TEST->outfp,       "  1ringM " );
+               else if( TP[i] == HPL_2RING   )
+                  HPL_fprintf( TEST->outfp,       "   2ring " );
+               else if( TP[i] == HPL_2RING_M )
+                  HPL_fprintf( TEST->outfp,       "  2ringM " );
+               else if( TP[i] == HPL_BLONG   )
+                  HPL_fprintf( TEST->outfp,       "   Blong " );
+               else if( TP[i] == HPL_BLONG_M )
+                  HPL_fprintf( TEST->outfp,       "  BlongM " );
+            }
+         }
+      }
+/*
+ * Lookahead depths
+ */
+      HPL_fprintf( TEST->outfp,       "\nDEPTH  :" );
+      for( i = 0; i < Mmin( 8, *NDHS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", DH[i]  );
+      if( *NDHS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NDHS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", DH[i]  );
+         if( *NDHS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NDHS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", DH[i]  );
+         }
+      }
+/*
+ * Swapping algorithm
+ */
+      HPL_fprintf( TEST->outfp,       "\nSWAP   :" );
+      if(      *FSWAP == HPL_SWAP00 )
+         HPL_fprintf( TEST->outfp, " Binary-exchange" );
+      else if( *FSWAP == HPL_SWAP01 )
+         HPL_fprintf( TEST->outfp, " Spread-roll (long)" );
+      else if( *FSWAP == HPL_SW_MIX )
+         HPL_fprintf( TEST->outfp, " Mix (threshold = %d)", *TSWAP );
+/*
+ * L1 storage form
+ */
+      HPL_fprintf( TEST->outfp,       "\nL1     :" );
+      if(      *L1NOTRAN != 0 )
+         HPL_fprintf( TEST->outfp, " no-transposed form" );
+      else
+         HPL_fprintf( TEST->outfp, " transposed form" );
+/*
+ * U  storage form
+ */
+      HPL_fprintf( TEST->outfp,       "\nU      :" );
+      if(      *UNOTRAN != 0 )
+         HPL_fprintf( TEST->outfp, " no-transposed form" );
+      else
+         HPL_fprintf( TEST->outfp, " transposed form" );
+/*
+ * Equilibration
+ */
+      HPL_fprintf( TEST->outfp,       "\nEQUIL  :" );
+      if(      *EQUIL != 0 )
+         HPL_fprintf( TEST->outfp, " yes" );
+      else
+         HPL_fprintf( TEST->outfp, " no" );
+/*
+ * Alignment
+ */
+      HPL_fprintf( TEST->outfp,       "\nALIGN  : %d double precision words",
+                   *ALIGN );
+
+      HPL_fprintf( TEST->outfp, "\n\n" );
+/*
+ * For testing only
+ */
+      if( TEST->thrsh > HPL_rzero )
+      {
+         HPL_fprintf( TEST->outfp, "%s%s\n\n",
+                      "----------------------------------------",
+                      "----------------------------------------" );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "- The matrix A is randomly generated for each test." );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "- The following scaled residual check will be computed:" );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "      ||Ax-b||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )" );
+         HPL_fprintf( TEST->outfp, "%s %21.6e\n",
+            "- The relative machine precision (eps) is taken to be     ",
+            TEST->epsil );
+         HPL_fprintf( TEST->outfp, "%s   %11.1f\n\n",
+            "- Computational tests pass if scaled residuals are less than      ",
+            TEST->thrsh );
+       }
+     }
+   }
+/*
+ * End of HPL_pdinfo
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdinfo.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdinfo.o
new file mode 100644
index 000000000..6926b4095
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdinfo.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdtest.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdtest.c
new file mode 100644
index 000000000..73a62a7ff
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdtest.c
@@ -0,0 +1,438 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdtest
+(
+   HPL_T_test *                     TEST,
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        N,
+   const int                        NB
+)
+#else
+void HPL_pdtest
+( TEST, GRID, ALGO, N, NB )
+   HPL_T_test *                     TEST;
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        N;
+   const int                        NB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdtest performs  one  test  given a set of parameters such as the
+ * process grid, the  problem size, the distribution blocking factor ...
+ * This function generates  the data, calls  and times the linear system
+ * solver,  checks  the  accuracy  of the  obtained vector solution  and
+ * writes this information to the file pointed to by TEST->outfp.
+ *
+ * Arguments
+ * =========
+ *
+ * TEST    (global input)                HPL_T_test *
+ *         On entry,  TEST  points  to a testing data structure:  outfp
+ *         specifies the output file where the results will be printed.
+ *         It is only defined and used by the process  0  of the  grid.
+ *         thrsh  specifies  the  threshhold value  for the test ratio.
+ *         Concretely, a test is declared "PASSED"  if and only if the
+ *         following inequality is satisfied:
+ *         ||Ax-b||_oo / ( epsil *
+ *                         ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+ *                          N )  < thrsh.
+ *         epsil  is the  relative machine precision of the distributed
+ *         computer. Finally the test counters, kfail, kpass, kskip and
+ *         ktest are updated as follows:  if the test passes,  kpass is
+ *         incremented by one;  if the test fails, kfail is incremented
+ *         by one; if the test is skipped, kskip is incremented by one.
+ *         ktest is left unchanged.
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters to be used for this test.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the order of the coefficient matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   double                     HPL_w[HPL_TIMING_N];
+#endif
+   HPL_T_pmat                 mat;
+   double                     wtime[1];
+   int                        info[3];
+   double                     Anorm1, AnormI, Gflops, Xnorm1, XnormI,
+                              BnormI, resid0, resid1;
+   double                     * Bptr;
+   void                       * vptr = NULL;
+   static int                 first=1;
+   int                        ii, ip2, mycol, myrow, npcol, nprow, nq;
+   char                       ctop, cpfact, crfact;
+   time_t                     current_time_start, current_time_end;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+
+   mat.n  = N; mat.nb = NB; mat.info = 0;
+   mat.mp = HPL_numroc( N, NB, NB, myrow, 0, nprow );
+   nq     = HPL_numroc( N, NB, NB, mycol, 0, npcol );
+   mat.nq = nq + 1;
+/*
+ * Allocate matrix, right-hand-side, and vector solution x. [ A | b ] is
+ * N by N+1.  One column is added in every process column for the solve.
+ * The  result  however  is stored in a 1 x N vector replicated in every
+ * process row. In every process, A is lda * (nq+1), x is 1 * nq and the
+ * workspace is mp. 
+ *
+ * Ensure that lda is a multiple of ALIGN and not a power of 2
+ */
+   mat.ld = ( ( Mmax( 1, mat.mp ) - 1 ) / ALGO->align ) * ALGO->align;
+   do
+   {
+      ii = ( mat.ld += ALGO->align ); ip2 = 1;
+      while( ii > 1 ) { ii >>= 1; ip2 <<= 1; }
+   }
+   while( mat.ld == ip2 );
+/*
+ * Allocate dynamic memory
+ */
+   vptr = (void*)malloc( ( (size_t)(ALGO->align) + 
+                           (size_t)(mat.ld+1) * (size_t)(mat.nq) ) *
+                         sizeof(double) );
+   info[0] = (vptr == NULL); info[1] = myrow; info[2] = mycol;
+   (void) HPL_all_reduce( (void *)(info), 3, HPL_INT, HPL_max,
+                          GRID->all_comm );
+   if( info[0] != 0 )
+   {
+      if( ( myrow == 0 ) && ( mycol == 0 ) )
+         HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest",
+                    "[%d,%d] %s", info[1], info[2],
+                    "Memory allocation failed for A, x and b. Skip." );
+      (TEST->kskip)++;
+      /* some processes might have succeeded with allocation */
+      if (vptr) free(vptr);
+      return;
+   }
+/*
+ * generate matrix and right-hand-side, [ A | b ] which is N by N+1.
+ */
+   mat.A  = (double *)HPL_PTR( vptr,
+                               ((size_t)(ALGO->align) * sizeof(double) ) );
+   mat.X  = Mptr( mat.A, 0, mat.nq, mat.ld );
+   HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED );
+#ifdef HPL_CALL_VSIPL
+   mat.block = vsip_blockbind_d( (vsip_scalar_d *)(mat.A),
+                                 (vsip_length)(mat.ld * mat.nq),
+                                 VSIP_MEM_NONE );
+#endif
+/*
+ * Solve linear system
+ */
+   HPL_ptimer_boot(); (void) HPL_barrier( GRID->all_comm );
+   time( &current_time_start );
+   HPL_ptimer( 0 );
+   HPL_pdgesv( GRID, ALGO, &mat );
+   HPL_ptimer( 0 );
+   time( &current_time_end );
+#ifdef HPL_CALL_VSIPL
+   (void) vsip_blockrelease_d( mat.block, VSIP_TRUE ); 
+   vsip_blockdestroy_d( mat.block );
+#endif
+/*
+ * Gather max of all CPU and WALL clock timings and print timing results
+ */
+   HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME,
+                       1, 0, wtime );
+
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      if( first )
+      {
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "T/V                N    NB     P     Q",
+                      "               Time                 Gflops" );
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "----------------------------------------",
+                      "----------------------------------------" );
+         if( TEST->thrsh <= HPL_rzero ) first = 0;
+      }
+/*
+ * 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve.
+ * Print WALL time
+ */
+      Gflops = ( ( (double)(N) /   1.0e+9 ) * 
+                 ( (double)(N) / wtime[0] ) ) * 
+                 ( ( 2.0 / 3.0 ) * (double)(N) + ( 3.0 / 2.0 ) );
+
+      cpfact = ( ( (HPL_T_FACT)(ALGO->pfact) == 
+                   (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ?  (char)('L') :
+                 ( ( (HPL_T_FACT)(ALGO->pfact) == (HPL_T_FACT)(HPL_CROUT) ) ?
+                   (char)('C') : (char)('R') ) );
+      crfact = ( ( (HPL_T_FACT)(ALGO->rfact) == 
+                   (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ?  (char)('L') :
+                 ( ( (HPL_T_FACT)(ALGO->rfact) == (HPL_T_FACT)(HPL_CROUT) ) ? 
+                   (char)('C') : (char)('R') ) );
+
+      if(      ALGO->btopo == HPL_1RING   ) ctop = '0';
+      else if( ALGO->btopo == HPL_1RING_M ) ctop = '1';
+      else if( ALGO->btopo == HPL_2RING   ) ctop = '2';
+      else if( ALGO->btopo == HPL_2RING_M ) ctop = '3';
+      else if( ALGO->btopo == HPL_BLONG   ) ctop = '4';
+      else /* if( ALGO->btopo == HPL_BLONG_M ) */ ctop = '5';
+
+      if( wtime[0] > HPL_rzero ) {
+         HPL_fprintf( TEST->outfp,
+             "W%c%1d%c%c%1d%c%1d%12d %5d %5d %5d %18.2f    %19.4e\n",
+             ( GRID->order == HPL_ROW_MAJOR ? 'R' : 'C' ),
+             ALGO->depth, ctop, crfact, ALGO->nbdiv, cpfact, ALGO->nbmin,
+             N, NB, nprow, npcol, wtime[0], Gflops );
+         HPL_fprintf( TEST->outfp,
+             "HPL_pdgesv() start time %s\n", ctime( &current_time_start ) );
+         HPL_fprintf( TEST->outfp,
+             "HPL_pdgesv() end time   %s\n", ctime( &current_time_end ) );
+      }
+   }
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME,
+                       HPL_TIMING_N, HPL_TIMING_BEG, HPL_w );
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "--VVV--VVV--VVV--VVV--VVV--VVV--VVV--V",
+                   "VV--VVV--VVV--VVV--VVV--VVV--VVV--VVV-" );
+/*
+ * Recursive panel factorization
+ */
+      if( HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time rfact . . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] );
+/*
+ * Panel factorization
+ */
+      if( HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time pfact . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] );
+/*
+ * Panel factorization (swap)
+ */
+      if( HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time mxswp . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] );
+/*
+ * Update
+ */
+      if( HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time update  . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] );
+/*
+ * Update (swap)
+ */
+      if( HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time laswp . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] );
+/*
+ * Upper triangular system solve
+ */
+      if( HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time up tr sv  . : %18.2f\n",
+                      HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] );
+
+      if( TEST->thrsh <= HPL_rzero )
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+   }
+#endif
+/*
+ * Quick return, if I am not interested in checking the computations
+ */
+   if( TEST->thrsh <= HPL_rzero )
+   { (TEST->kpass)++; if( vptr ) free( vptr ); return; }
+/*
+ * Check info returned by solve
+ */
+   if( mat.info != 0 )
+   {
+      if( ( myrow == 0 ) && ( mycol == 0 ) )
+         HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", "%s %d, %s", 
+                    "Error code returned by solve is", mat.info, "skip" );
+      (TEST->kskip)++;
+      if( vptr ) free( vptr ); return;
+   }
+/*
+ * Check computation, re-generate [ A | b ], compute norm 1 and inf of A and x,
+ * and norm inf of b - A x. Display residual checks.
+ */
+   HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED );
+   Anorm1 = HPL_pdlange( GRID, HPL_NORM_1, N, N, NB, mat.A, mat.ld );
+   AnormI = HPL_pdlange( GRID, HPL_NORM_I, N, N, NB, mat.A, mat.ld );
+/*
+ * Because x is distributed in process rows, switch the norms
+ */
+   XnormI = HPL_pdlange( GRID, HPL_NORM_1, 1, N, NB, mat.X, 1 );
+   Xnorm1 = HPL_pdlange( GRID, HPL_NORM_I, 1, N, NB, mat.X, 1 );
+/*
+ * If I am in the col that owns b, (1) compute local BnormI, (2) all_reduce to
+ * find the max (in the col). Then (3) broadcast along the rows so that every
+ * process has BnormI. Note that since we use a uniform distribution in [-0.5,0.5]
+ * for the entries of B, it is very likely that BnormI (<=,~) 0.5.
+ */
+   Bptr = Mptr( mat.A, 0, nq, mat.ld );
+   if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ){
+      if( mat.mp > 0 )
+      {
+         BnormI = Bptr[HPL_idamax( mat.mp, Bptr, 1 )]; BnormI = Mabs( BnormI );
+      }
+      else
+      {
+         BnormI = HPL_rzero;
+      }
+      (void) HPL_all_reduce( (void *)(&BnormI), 1, HPL_DOUBLE, HPL_max,
+                             GRID->col_comm );
+   }
+   (void) HPL_broadcast( (void *)(&BnormI), 1, HPL_DOUBLE,
+                          HPL_indxg2p( N, NB, NB, 0, npcol ),
+                          GRID->row_comm );
+/*
+ * If I own b, compute ( b - A x ) and ( - A x ) otherwise
+ */
+   if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) )
+   {
+      HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone,
+                 mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1 );
+   }
+   else if( nq > 0 )
+   {
+      HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone,
+                 mat.A, mat.ld, mat.X, 1, HPL_rzero, Bptr, 1 );
+   }
+   else { for( ii = 0; ii < mat.mp; ii++ ) Bptr[ii] = HPL_rzero; }
+/*
+ * Reduce the distributed residual in process column 0
+ */
+   if( mat.mp > 0 )
+      (void) HPL_reduce( Bptr, mat.mp, HPL_DOUBLE, HPL_sum, 0,
+                         GRID->row_comm );
+/*
+ * Compute || b - A x ||_oo
+ */
+   resid0 = HPL_pdlange( GRID, HPL_NORM_I, N, 1, NB, Bptr, mat.ld );
+/*
+ * Computes and displays norms, residuals ...
+ */
+   if( N <= 0 )
+   {
+      resid1 = HPL_rzero;
+   }
+   else
+   {
+      resid1 = resid0 / ( TEST->epsil * ( AnormI * XnormI + BnormI ) * (double)(N) );
+   }
+
+   if( resid1 < TEST->thrsh ) (TEST->kpass)++;
+   else                       (TEST->kfail)++;
+
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "----------------------------------------",
+                   "----------------------------------------" );
+      HPL_fprintf( TEST->outfp, "%s%16.8e%s%s\n",
+         "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ", resid1,
+         " ...... ", ( resid1 < TEST->thrsh ? "PASSED" : "FAILED" ) );
+
+      if(resid1 >= TEST->thrsh ) 
+      {
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||Ax-b||_oo  . . . . . . . . . . . . . . . . . = ", resid0 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||A||_oo . . . . . . . . . . . . . . . . . . . = ", AnormI );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||A||_1  . . . . . . . . . . . . . . . . . . . = ", Anorm1 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||x||_oo . . . . . . . . . . . . . . . . . . . = ", XnormI );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||x||_1  . . . . . . . . . . . . . . . . . . . = ", Xnorm1 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||b||_oo . . . . . . . . . . . . . . . . . . . = ", BnormI );
+      }
+   }
+   if( vptr ) free( vptr );
+/*
+ * End of HPL_pdtest
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdtest.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdtest.o
new file mode 100644
index 000000000..2d3e0fcc4
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdtest.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer.c
new file mode 100644
index 000000000..202416079
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer.c
@@ -0,0 +1,358 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int         HPL_ptimer_disabled;
+static double      HPL_ptimer_cpusec   [HPL_NPTIMER],
+                   HPL_ptimer_cpustart [HPL_NPTIMER],
+                   HPL_ptimer_wallsec  [HPL_NPTIMER],
+                   HPL_ptimer_wallstart[HPL_NPTIMER];
+/*
+ * ---------------------------------------------------------------------
+ * User callable functions
+ * ---------------------------------------------------------------------
+ */
+#ifdef STDC_HEADERS
+void HPL_ptimer_boot( void )
+#else
+void HPL_ptimer_boot()
+#endif
+{
+/*
+ * HPL_ptimer_boot (re)sets all timers to 0, and enables HPL_ptimer.
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 0;
+
+   for( i = 0; i < HPL_NPTIMER; i++ )
+   {
+      HPL_ptimer_cpusec  [i] = HPL_ptimer_wallsec  [i] = HPL_rzero;
+      HPL_ptimer_cpustart[i] = HPL_ptimer_wallstart[i] = HPL_PTIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_ptimer_boot
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_ptimer( const int I )
+#else
+void HPL_ptimer( I )
+   const int                  I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer provides a  "stopwatch"  functionality  cpu/wall  timer in
+ * seconds.  Up to  64  separate timers can be functioning at once.  The
+ * first call starts the timer,  and the second stops it.  This  routine
+ * can be disenabled  by calling HPL_ptimer_disable(),  so that calls to
+ * the timer are ignored.  This feature can be used to make sure certain
+ * sections of code do not affect timings,  even  if  they call routines
+ * which have HPL_ptimer calls in them. HPL_ptimer_enable()  will enable
+ * the  timer  functionality.  One  can retrieve  the current value of a
+ * timer by calling
+ *  
+ * t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ *  
+ * where  I  is the timer index in  [0..64).  To  inititialize the timer
+ * functionality, one must have called HPL_ptimer_boot() prior to any of
+ * the functions mentioned above.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                const int
+ *         On entry, I specifies the timer to stop/start.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( HPL_ptimer_disabled ) return;
+/*
+ * If timer has not been started, start it.  Otherwise,  stop it and add
+ * interval to count
+ */
+   if( HPL_ptimer_wallstart[I] == HPL_PTIMER_STARTFLAG )
+   {
+      HPL_ptimer_wallstart[I] = HPL_ptimer_walltime();
+      HPL_ptimer_cpustart [I] = HPL_ptimer_cputime ();
+   }
+   else
+   {
+      HPL_ptimer_cpusec   [I] += HPL_ptimer_cputime ()-HPL_ptimer_cpustart [I];
+      HPL_ptimer_wallsec  [I] += HPL_ptimer_walltime()-HPL_ptimer_wallstart[I];
+      HPL_ptimer_wallstart[I]  = HPL_PTIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_ptimer
+ */
+} 
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_enable( void )
+#else
+void HPL_ptimer_enable()
+#endif
+{
+/*
+ * HPL_ptimer_enable sets it so calls to HPL_ptimer are not ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 0;
+   return;
+/*
+ * End of HPL_ptimer_enable
+ */
+} 
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_disable( void )
+#else
+void HPL_ptimer_disable()
+#endif
+{
+/*
+ * HPL_ptimer_disable sets it so calls to HPL_ptimer are ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 1;
+   return;
+/*
+ * End of HPL_ptimer_disable
+ */
+} 
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_inquire
+(
+   const HPL_T_PTIME          TMTYPE,
+   const int                  I
+)
+#else
+double HPL_ptimer_inquire( TMTYPE, I )
+   const int                  I;
+   const HPL_T_PTIME          TMTYPE;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_inquire returns wall- or cpu- time that has accumulated in
+ * timer I.
+ *
+ * Arguments
+ * =========
+ *
+ * TMTYPE  (global input)              const HPL_T_PTIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_PTIME : wall clock time is returned,
+ *            = HPL_CPU_PTIME  : CPU time is returned (default).
+ *
+ * I       (global input)              const int
+ *         On entry, I specifies the timer to return.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double          time;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * If wall- or cpu-time are not available on this machine, return
+ * HPL_PTIMER_ERROR
+ */
+   if( TMTYPE == HPL_WALL_PTIME )
+   {
+      if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR )
+         time = HPL_PTIMER_ERROR;
+      else
+         time = HPL_ptimer_wallsec[I];
+   }
+   else
+   {
+      if( HPL_ptimer_cputime()  == HPL_PTIMER_ERROR )
+         time = HPL_PTIMER_ERROR;
+      else
+         time = HPL_ptimer_cpusec [I];
+   }
+   return( time );
+/*
+ * End of HPL_ptimer_inquire
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_combine
+(
+   MPI_Comm                   COMM,
+   const HPL_T_PTIME_OP       OPE,
+   const HPL_T_PTIME          TMTYPE,
+   const int                  N,
+   const int                  IBEG,
+   double                     * TIMES
+)
+#else
+void HPL_ptimer_combine( COMM, OPE, TMTYPE, N, IBEG, TIMES )
+   const int                  IBEG, N;
+   const HPL_T_PTIME_OP       OPE;
+   const HPL_T_PTIME          TMTYPE;
+   MPI_Comm                   COMM;
+   double                     * TIMES;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_combine  combines the timing information stored on a scope
+ * of processes into the user TIMES array.
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)        MPI_Comm
+ *         The MPI communicator  identifying  the process  collection on
+ *         which the timings are taken.
+ *
+ * OPE     (global input)              const HPL_T_PTIME_OP
+ *         On entry, OP  specifies what combine operation should be done
+ *         as follows:
+ *            = HPL_AMAX_PTIME get max. time on any process (default),
+ *            = HPL_AMIN_PTIME get min. time on any process,
+ *            = HPL_SUM_PTIME  get sum of times across processes.
+ *
+ * TMTYPE  (global input)              const HPL_T_PTIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_PTIME : wall clock time is returned,
+ *            = HPL_CPU_PTIME  : CPU time is returned (default).
+ *
+ * N       (global input)              const int
+ *         On entry, N specifies the number of timers to combine.
+ *
+ * IBEG    (global input)              const int
+ *         On entry, IBEG specifies the first timer to be combined.
+ *
+ * TIMES   (global output)             double *
+ *         On entry, TIMES is an array of dimension at least N. On exit,
+ *         this array contains the requested timing information.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i, tmpdis;
+/* ..
+ * .. Executable Statements ..
+ */
+   tmpdis = HPL_ptimer_disabled; HPL_ptimer_disabled = 1;
+/*
+ * Timer has been disabled for combine operation -  copy timing informa-
+ * tion into user times array.  If  wall- or  cpu-time are not available
+ * on this machine, fill in times with HPL_PTIMER_ERROR flag and return.
+ */
+   if( TMTYPE == HPL_WALL_PTIME )
+   {
+      if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR )
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return;   }
+      else
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_wallsec[IBEG+i]; }
+   }
+   else
+   {
+      if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR )
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return;  }
+      else
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_cpusec[IBEG+i]; }
+   }
+/*
+ * Combine all nodes information, restore HPL_ptimer_disabled, and return
+ */
+   for( i = 0; i < N; i++ ) TIMES[i] = Mmax( HPL_rzero, TIMES[i] );
+
+   if(      OPE == HPL_AMAX_PTIME )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM );
+   else if( OPE == HPL_AMIN_PTIME )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_min, COMM );
+   else if( OPE == HPL_SUM_PTIME  )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_sum, COMM );
+   else
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM );
+
+   HPL_ptimer_disabled = tmpdis;
+/*
+ * End of HPL_ptimer_combine
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer.o
new file mode 100644
index 000000000..8c41fc9d1
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_cputime.c
new file mode 100644
index 000000000..711ef185d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_cputime.c
@@ -0,0 +1,146 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_cputime returns the cpu time. If HPL_USE_CLOCK is defined,
+ * the  clock() function is used to return an approximation of processor
+ * time used by the program.  The value returned is the CPU time used so
+ * far as a clock_t;  to get the number of seconds used,  the result  is
+ * divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+ * standard library.  If  HPL_USE_TIMES is defined, the times() function
+ * is used instead.  This  function  returns  the current process times.
+ * times() returns the number of clock ticks that have elapsed since the
+ * system has been up.  Otherwise and by default,  the  standard library
+ * function getrusage() is used.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#if   defined( HPL_USE_CLOCK )
+ 
+#include <time.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   static double              cps = CLOCKS_PER_SEC;
+   double                     d;
+   clock_t                    t1;
+   static clock_t             t0 = 0;
+ 
+   if( t0 == 0 ) t0 = clock();
+   t1 = clock() - t0;
+   d = (double)(t1) / cps;
+   return( d );
+}
+ 
+#elif defined( HPL_USE_TIMES )
+ 
+#include <sys/times.h>
+#include <unistd.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   clock_t                    t1;
+   struct tms                 ts;
+   static double              ClockTick = HPL_rzero;
+ 
+   if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK));
+   (void) times( &ts );
+   return( (double)(ts.tms_utime) / ClockTick );
+}
+ 
+/* #elif defined( HPL_USE_GETRUSAGE ) */
+#else
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   struct rusage              ruse;
+ 
+   (void) getrusage( RUSAGE_SELF, &ruse );
+   return( (double)( ruse.ru_utime.tv_sec  ) +
+           ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) );
+}
+
+/* 
+#else
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   return( HPL_PTIMER_ERROR );
+}
+*/ 
+
+#endif
+/*
+ * End of HPL_ptimer_cputime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_cputime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_cputime.o
new file mode 100644
index 000000000..0ed678ecb
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_cputime.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_walltime.c
new file mode 100644
index 000000000..96cbd300f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_walltime.c
@@ -0,0 +1,103 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_walltime returns the elapsed (wall-clock) time.
+ * 
+ *
+ * ---------------------------------------------------------------------
+ */ 
+ 
+#if defined( HPL_USE_GETTIMEOFDAY )
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_walltime( void )
+#else
+double HPL_ptimer_walltime()
+#endif
+{
+   struct timeval             tp;
+   static long                start=0, startu;
+ 
+   if( !start )
+   {
+      (void) gettimeofday( &tp, NULL );
+      start  = tp.tv_sec;
+      startu = tp.tv_usec;
+      return( HPL_rzero );
+   }
+   (void) gettimeofday( &tp, NULL );
+ 
+   return( (double)( tp.tv_sec - start ) +
+           ( (double)( tp.tv_usec-startu ) / 1000000.0 ) );
+}
+
+#else
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_walltime( void )
+#else
+double HPL_ptimer_walltime()
+#endif
+{
+   return( MPI_Wtime() );
+}
+ 
+#endif
+/*
+ * End of HPL_ptimer_walltime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_walltime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_walltime.o
new file mode 100644
index 000000000..b00e05dc8
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_walltime.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer.c
new file mode 100644
index 000000000..3be9665f7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer.c
@@ -0,0 +1,253 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int                    HPL_timer_disabled;
+static double                 HPL_timer_cpusec   [HPL_NTIMER],
+                              HPL_timer_cpustart [HPL_NTIMER],
+                              HPL_timer_wallsec  [HPL_NTIMER],
+                              HPL_timer_wallstart[HPL_NTIMER];
+/*
+ * ---------------------------------------------------------------------
+ * User callable functions
+ * ---------------------------------------------------------------------
+ */
+#ifdef STDC_HEADERS
+void HPL_timer_boot( void )
+#else
+void HPL_timer_boot()
+#endif
+{
+/*
+ * HPL_timer_boot (re)sets all timers to 0, and enables HPL_timer.
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 0;
+
+   for( i = 0; i < HPL_NTIMER; i++ )
+   {
+      HPL_timer_cpusec  [i] = HPL_timer_wallsec  [i] = HPL_rzero;
+      HPL_timer_cpustart[i] = HPL_timer_wallstart[i] = HPL_TIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_timer_boot
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer( const int I )
+#else
+void HPL_timer( I )
+   const int                  I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer provides a  "stopwatch"  functionality  cpu/wall  timer  in
+ * seconds.  Up to  64  separate timers can be functioning at once.  The
+ * first call starts the timer,  and the second stops it.  This  routine
+ * can be disenabled  by calling  HPL_timer_disable(),  so that calls to
+ * the timer are ignored.  This feature can be used to make sure certain
+ * sections of code do not affect timings,  even  if  they call routines
+ * which have HPL_timer calls in them. HPL_timer_enable() will re-enable
+ * the  timer  functionality.  One  can retrieve  the current value of a
+ * timer by calling
+ *  
+ * t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ *  
+ * where  I  is the timer index in  [0..64).  To  initialize  the  timer
+ * functionality, one must have called HPL_timer_boot()  prior to any of
+ * the functions mentioned above.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                const int
+ *         On entry, I specifies the timer to stop/start.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( HPL_timer_disabled ) return;
+/*
+ * If timer has not been started, start it.  Otherwise,  stop it and add
+ * interval to count
+ */
+   if( HPL_timer_wallstart[I] == HPL_TIMER_STARTFLAG )
+   {
+      HPL_timer_wallstart[I] = HPL_timer_walltime();
+      HPL_timer_cpustart [I] = HPL_timer_cputime ();
+   }
+   else
+   {
+      HPL_timer_cpusec   [I] += HPL_timer_cputime () - HPL_timer_cpustart [I];
+      HPL_timer_wallsec  [I] += HPL_timer_walltime() - HPL_timer_wallstart[I];
+      HPL_timer_wallstart[I]  = HPL_TIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_timer
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer_enable( void )
+#else
+void HPL_timer_enable()
+#endif
+{
+/*
+ * HPL_timer_enable sets it so calls to HPL_timer are not ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 0;
+   return;
+/*
+ * End of HPL_timer_enable
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer_disable( void )
+#else
+void HPL_timer_disable()
+#endif
+{
+/*
+ * HPL_timer_disable sets it so calls to HPL_timer are ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 1;
+   return;
+/*
+ * End of HPL_timer_disable
+ */
+}
+
+#ifdef STDC_HEADERS
+double HPL_timer_inquire
+(
+   const HPL_T_TIME           TMTYPE,
+   const int                  I
+)
+#else
+double HPL_timer_inquire( TMTYPE, I )
+   const int                  I;
+   const HPL_T_TIME           TMTYPE;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_timer_inquire returns  wall- or cpu- time that has accumulated in
+ * timer I.
+ *
+ * Arguments
+ * =========
+ *
+ * TMTYPE  (global input)              const HPL_T_TIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_TIME : wall clock time is returned,
+ *            = HPL_CPU_TIME  : CPU time is returned (default).
+ *
+ * I       (global input)              const int
+ *         On entry, I specifies the timer to return.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double          time;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * If wall- or cpu-time are not available on this machine, return
+ * HPL_TIMER_ERROR
+ */
+   if( TMTYPE == HPL_WALL_TIME )
+   {
+      if( HPL_timer_walltime() == HPL_TIMER_ERROR )
+         time = HPL_TIMER_ERROR;
+      else
+         time = HPL_timer_wallsec[I];
+   }
+   else
+   {
+      if( HPL_timer_cputime()  == HPL_TIMER_ERROR )
+         time = HPL_TIMER_ERROR;
+      else
+         time = HPL_timer_cpusec [I];
+   }
+   return( time );
+/*
+ * End of HPL_timer_inquire
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer.o
new file mode 100644
index 000000000..fe3540ab4
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_cputime.c
new file mode 100644
index 000000000..4a7f9dfef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_cputime.c
@@ -0,0 +1,145 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer_cputime returns the cpu time.  If HPL_USE_CLOCK is defined,
+ * the  clock() function is used to return an approximation of processor
+ * time used by the program.  The value returned is the CPU time used so
+ * far as a clock_t;  to get the number of seconds used,  the result  is
+ * divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+ * standard library.  If  HPL_USE_TIMES is defined, the times() function
+ * is used instead.  This  function  returns  the current process times.
+ * times() returns the number of clock ticks that have elapsed since the
+ * system has been up.  Otherwise and by default,  the  standard library
+ * function getrusage() is used.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#if   defined( HPL_USE_CLOCK )
+ 
+#include <time.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   static double              cps = CLOCKS_PER_SEC;
+   double                     d;
+   clock_t                    t1;
+   static clock_t             t0 = 0;
+ 
+   if( t0 == 0 ) t0 = clock();
+   t1 = clock() - t0;
+   d = (double)(t1) / cps;
+   return( d );
+} 
+
+#elif defined( HPL_USE_TIMES )
+ 
+#include <sys/times.h>
+#include <unistd.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   clock_t                    t1;
+   struct tms                 ts;
+   static double              ClockTick = HPL_rzero;
+ 
+   if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK));
+   (void) times( &ts );
+   return( (double)(ts.tms_utime) / ClockTick );
+}
+ 
+/* #elif defined( HPL_USE_GETRUSAGE )  */
+#else
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   struct rusage              ruse;
+   (void) getrusage( RUSAGE_SELF, &ruse );
+   return( (double)( ruse.ru_utime.tv_sec  ) +
+           ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) );
+}
+
+/* 
+#else
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   return( HPL_TIMER_ERROR );
+}
+*/
+
+#endif
+/*
+ * End of HPL_timer_cputime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_cputime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_cputime.o
new file mode 100644
index 000000000..3b221b80d
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_cputime.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_walltime.c
new file mode 100644
index 000000000..f4f44f202
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_walltime.c
@@ -0,0 +1,88 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer_walltime returns the elapsed (wall-clock) time.
+ * 
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_walltime( void )
+#else
+double HPL_timer_walltime()
+#endif
+{
+   struct timeval             tp;
+   static long                start=0, startu;
+
+   if( !start )
+   {
+      (void) gettimeofday( &tp, NULL );
+      start  = tp.tv_sec;
+      startu = tp.tv_usec;
+      return( HPL_rzero );
+   }
+   (void) gettimeofday( &tp, NULL );
+
+   return( (double)( tp.tv_sec - start ) +
+           ( (double)( tp.tv_usec-startu ) / 1000000.0 ) );
+}                                                                               
+/*
+ * End of HPL_timer_walltime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_walltime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_walltime.o
new file mode 100644
index 000000000..e73b5e8a6
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_walltime.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/Makefile.am
new file mode 100644
index 000000000..452ea5f06
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/Makefile.am
@@ -0,0 +1,13 @@
+
+AM_CPPFLAGS = -I$(top_srcdir)/include
+
+xhpl_LDADD = ../src/libhpl.a
+
+bin_PROGRAMS = xhpl
+
+xhpl_SOURCES =  \
+matgen/HPL_jumpit.c matgen/HPL_rand.c matgen/HPL_setran.c matgen/HPL_xjumpm.c \
+matgen/HPL_lmul.c matgen/HPL_ladd.c \
+pmatgen/HPL_pdmatgen.c \
+ptest/HPL_pddriver.c ptest/HPL_pdinfo.c ptest/HPL_pdtest.c \
+ptimer/HPL_ptimer.c ptimer/HPL_ptimer_cputime.c ptimer/HPL_ptimer_walltime.c
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/Makefile.in
new file mode 100644
index 000000000..034564545
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/Makefile.in
@@ -0,0 +1,698 @@
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+bin_PROGRAMS = xhpl$(EXEEXT)
+subdir = testing
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+	$(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/include/hplconfig.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
+am_xhpl_OBJECTS = matgen/HPL_jumpit.$(OBJEXT) \
+	matgen/HPL_rand.$(OBJEXT) matgen/HPL_setran.$(OBJEXT) \
+	matgen/HPL_xjumpm.$(OBJEXT) matgen/HPL_lmul.$(OBJEXT) \
+	matgen/HPL_ladd.$(OBJEXT) pmatgen/HPL_pdmatgen.$(OBJEXT) \
+	ptest/HPL_pddriver.$(OBJEXT) ptest/HPL_pdinfo.$(OBJEXT) \
+	ptest/HPL_pdtest.$(OBJEXT) ptimer/HPL_ptimer.$(OBJEXT) \
+	ptimer/HPL_ptimer_cputime.$(OBJEXT) \
+	ptimer/HPL_ptimer_walltime.$(OBJEXT)
+xhpl_OBJECTS = $(am_xhpl_OBJECTS)
+xhpl_DEPENDENCIES = ../src/libhpl.a
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__maybe_remake_depfiles = depfiles
+am__depfiles_remade = matgen/$(DEPDIR)/HPL_jumpit.Po \
+	matgen/$(DEPDIR)/HPL_ladd.Po matgen/$(DEPDIR)/HPL_lmul.Po \
+	matgen/$(DEPDIR)/HPL_rand.Po matgen/$(DEPDIR)/HPL_setran.Po \
+	matgen/$(DEPDIR)/HPL_xjumpm.Po \
+	pmatgen/$(DEPDIR)/HPL_pdmatgen.Po \
+	ptest/$(DEPDIR)/HPL_pddriver.Po ptest/$(DEPDIR)/HPL_pdinfo.Po \
+	ptest/$(DEPDIR)/HPL_pdtest.Po ptimer/$(DEPDIR)/HPL_ptimer.Po \
+	ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po \
+	ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(xhpl_SOURCES)
+DIST_SOURCES = $(xhpl_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BLAS_LIBS = @BLAS_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build_alias = @build_alias@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host_alias = @host_alias@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(top_srcdir)/include
+xhpl_LDADD = ../src/libhpl.a
+xhpl_SOURCES = \
+matgen/HPL_jumpit.c matgen/HPL_rand.c matgen/HPL_setran.c matgen/HPL_xjumpm.c \
+matgen/HPL_lmul.c matgen/HPL_ladd.c \
+pmatgen/HPL_pdmatgen.c \
+ptest/HPL_pddriver.c ptest/HPL_pdinfo.c ptest/HPL_pdtest.c \
+ptimer/HPL_ptimer.c ptimer/HPL_ptimer_cputime.c ptimer/HPL_ptimer_walltime.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu testing/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu testing/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p \
+	  ; then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' \
+	    -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	      echo " $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	      $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' \
+	`; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
+matgen/$(am__dirstamp):
+	@$(MKDIR_P) matgen
+	@: > matgen/$(am__dirstamp)
+matgen/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) matgen/$(DEPDIR)
+	@: > matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_jumpit.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_rand.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_setran.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_xjumpm.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_lmul.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_ladd.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+pmatgen/$(am__dirstamp):
+	@$(MKDIR_P) pmatgen
+	@: > pmatgen/$(am__dirstamp)
+pmatgen/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) pmatgen/$(DEPDIR)
+	@: > pmatgen/$(DEPDIR)/$(am__dirstamp)
+pmatgen/HPL_pdmatgen.$(OBJEXT): pmatgen/$(am__dirstamp) \
+	pmatgen/$(DEPDIR)/$(am__dirstamp)
+ptest/$(am__dirstamp):
+	@$(MKDIR_P) ptest
+	@: > ptest/$(am__dirstamp)
+ptest/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) ptest/$(DEPDIR)
+	@: > ptest/$(DEPDIR)/$(am__dirstamp)
+ptest/HPL_pddriver.$(OBJEXT): ptest/$(am__dirstamp) \
+	ptest/$(DEPDIR)/$(am__dirstamp)
+ptest/HPL_pdinfo.$(OBJEXT): ptest/$(am__dirstamp) \
+	ptest/$(DEPDIR)/$(am__dirstamp)
+ptest/HPL_pdtest.$(OBJEXT): ptest/$(am__dirstamp) \
+	ptest/$(DEPDIR)/$(am__dirstamp)
+ptimer/$(am__dirstamp):
+	@$(MKDIR_P) ptimer
+	@: > ptimer/$(am__dirstamp)
+ptimer/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) ptimer/$(DEPDIR)
+	@: > ptimer/$(DEPDIR)/$(am__dirstamp)
+ptimer/HPL_ptimer.$(OBJEXT): ptimer/$(am__dirstamp) \
+	ptimer/$(DEPDIR)/$(am__dirstamp)
+ptimer/HPL_ptimer_cputime.$(OBJEXT): ptimer/$(am__dirstamp) \
+	ptimer/$(DEPDIR)/$(am__dirstamp)
+ptimer/HPL_ptimer_walltime.$(OBJEXT): ptimer/$(am__dirstamp) \
+	ptimer/$(DEPDIR)/$(am__dirstamp)
+
+xhpl$(EXEEXT): $(xhpl_OBJECTS) $(xhpl_DEPENDENCIES) $(EXTRA_xhpl_DEPENDENCIES) 
+	@rm -f xhpl$(EXEEXT)
+	$(AM_V_CCLD)$(LINK) $(xhpl_OBJECTS) $(xhpl_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f matgen/*.$(OBJEXT)
+	-rm -f pmatgen/*.$(OBJEXT)
+	-rm -f ptest/*.$(OBJEXT)
+	-rm -f ptimer/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_jumpit.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_ladd.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_lmul.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_rand.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_setran.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_xjumpm.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pmatgen/$(DEPDIR)/HPL_pdmatgen.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pddriver.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pdinfo.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pdtest.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po@am__quote@ # am--include-marker
+
+$(am__depfiles_remade):
+	@$(MKDIR_P) $(@D)
+	@echo '# dummy' >$@-t && $(am__mv) $@-t $@
+
+am--depfiles: $(am__depfiles_remade)
+
+.c.o:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(BUILT_SOURCES)
+	$(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f matgen/$(DEPDIR)/$(am__dirstamp)
+	-rm -f matgen/$(am__dirstamp)
+	-rm -f pmatgen/$(DEPDIR)/$(am__dirstamp)
+	-rm -f pmatgen/$(am__dirstamp)
+	-rm -f ptest/$(DEPDIR)/$(am__dirstamp)
+	-rm -f ptest/$(am__dirstamp)
+	-rm -f ptimer/$(DEPDIR)/$(am__dirstamp)
+	-rm -f ptimer/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
+
+distclean: distclean-am
+		-rm -f matgen/$(DEPDIR)/HPL_jumpit.Po
+	-rm -f matgen/$(DEPDIR)/HPL_ladd.Po
+	-rm -f matgen/$(DEPDIR)/HPL_lmul.Po
+	-rm -f matgen/$(DEPDIR)/HPL_rand.Po
+	-rm -f matgen/$(DEPDIR)/HPL_setran.Po
+	-rm -f matgen/$(DEPDIR)/HPL_xjumpm.Po
+	-rm -f pmatgen/$(DEPDIR)/HPL_pdmatgen.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pddriver.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pdinfo.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pdtest.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+		-rm -f matgen/$(DEPDIR)/HPL_jumpit.Po
+	-rm -f matgen/$(DEPDIR)/HPL_ladd.Po
+	-rm -f matgen/$(DEPDIR)/HPL_lmul.Po
+	-rm -f matgen/$(DEPDIR)/HPL_rand.Po
+	-rm -f matgen/$(DEPDIR)/HPL_setran.Po
+	-rm -f matgen/$(DEPDIR)/HPL_xjumpm.Po
+	-rm -f pmatgen/$(DEPDIR)/HPL_pdmatgen.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pddriver.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pdinfo.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pdtest.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
+	clean-binPROGRAMS clean-generic cscopelist-am ctags ctags-am \
+	distclean distclean-compile distclean-generic distclean-tags \
+	distdir dvi dvi-am html html-am info info-am install \
+	install-am install-binPROGRAMS install-data install-data-am \
+	install-dvi install-dvi-am install-exec install-exec-am \
+	install-html install-html-am install-info install-info-am \
+	install-man install-pdf install-pdf-am install-ps \
+	install-ps-am install-strip installcheck installcheck-am \
+	installdirs maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-compile mostlyclean-generic pdf pdf-am \
+	ps ps-am tags tags-am uninstall uninstall-am \
+	uninstall-binPROGRAMS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_dmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_dmatgen.c
new file mode 100644
index 000000000..c14ef0fd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_dmatgen.c
@@ -0,0 +1,134 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dmatgen
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int                        ISEED
+)
+#else
+void HPL_dmatgen
+( M, N, A, LDA, ISEED )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int                        ISEED;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dmatgen generates (or regenerates) a random matrix A.
+ *  
+ * The  pseudo-random  generator uses the linear congruential algorithm:
+ * X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+ * Programming, Knuth 1973, Vol. 2.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (input)                       const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (input)                       const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * A       (output)                      double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         this  array  contains   the   coefficients  of  the  randomly
+ *         generated matrix.
+ *
+ * LDA     (input)                       const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * ISEED   (input)                       const int
+ *         On entry, ISEED  specifies  the  seed  number to generate the
+ *         matrix A. ISEED must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        iadd[2], ia1[2], ic1[2], iran1[2],
+                              jseed[2], mult[2];
+   int                        i, incA = LDA - M, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+/*
+ * Initialize the random sequence
+ */
+   mult [0] = HPL_MULT0; mult [1] = HPL_MULT1;
+   iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1;
+   jseed[0] = ISEED;     jseed[1] = 0;
+
+   HPL_xjumpm( 1, mult, iadd, jseed, iran1, ia1, ic1 );
+   HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 );
+/*
+ * Generate an M by N matrix
+ */
+   for( j = 0; j < N; A += incA, j++ )
+      for( i = 0; i < M; A++, i++ ) *A = HPL_rand();
+/*
+ * End of HPL_dmatgen
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_jumpit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_jumpit.c
new file mode 100644
index 000000000..4d4dc4db5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_jumpit.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_jumpit
+(
+   int *                            MULT,
+   int *                            IADD,
+   int *                            IRANN,
+   int *                            IRANM
+)
+#else
+void HPL_jumpit
+( MULT, IADD, IRANN, IRANM )
+   int *                            MULT;
+   int *                            IADD;
+   int *                            IRANN;
+   int *                            IRANM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_jumpit jumps in the random sequence from the number  X(n) encoded
+ * in IRANN to the number  X(m)  encoded in  IRANM using the constants A
+ * and C encoded in MULT and IADD: X(m) = A * X(n) + C.  The constants A
+ * and C obviously depend on m and n,  see  the function  HPL_xjumpm  in
+ * order to initialize them.
+ *
+ * Arguments
+ * =========
+ *
+ * MULT    (local input)                 int *
+ *         On entry, MULT is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of the constant A.
+ *
+ * IADD    (local input)                 int *
+ *         On entry, IADD is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of the constant C.
+ *
+ * IRANN   (local input)                 int *
+ *         On entry,  IRANN  is an array of dimension 2,  that contains 
+ *         the 16-lower and 15-higher bits of the encoding of X(n).
+ *
+ * IRANM   (local output)                int *
+ *         On entry,  IRANM  is an array of dimension 2.  On exit, this
+ *         array contains respectively the 16-lower and  15-higher bits
+ *         of the encoding of X(m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                          j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_lmul( IRANN, MULT, j );              /* j     = IRANN * MULT;  */
+   HPL_ladd( j, IADD, IRANM );              /* IRANM = j     + IADD;  */
+   HPL_setran( 0, IRANM );                  /* irand = IRANM          */
+/*
+ * End of HPL_jumpit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_ladd.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_ladd.c
new file mode 100644
index 000000000..0d4e4c08c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_ladd.c
@@ -0,0 +1,126 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_ladd
+(
+   int *                            J,
+   int *                            K,
+   int *                            I
+)
+#else
+void HPL_ladd
+( J, K, I )
+   int *                            J;
+   int *                            K;
+   int *                            I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ladd adds  without carry two long positive integers  K and J and
+ * puts the result into I. The long integers  I, J, K are encoded on 64
+ * bits using an array of 2 integers.  The 32-lower bits  are stored in
+ * the  first  entry  of each array,  the 32-higher bits  in the second
+ * entry.
+ *
+ * Arguments
+ * =========
+ *
+ * J       (local input)                 int *
+ *         On entry, J is an integer array of dimension 2 containing the
+ *         encoded long integer J.
+ *
+ * K       (local input)                 int *
+ *         On entry, K is an integer array of dimension 2 containing the
+ *         encoded long integer K.
+ *
+ * I       (local output)                int *
+ *         On entry, I is an integer array of dimension 2. On exit, this
+ *         array contains the encoded long integer result.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   unsigned int        itmp0, itmp1;
+   unsigned int        ktmp0 = K[0] & 65535, ktmp1 = (unsigned)K[0] >> 16;
+   unsigned int        ktmp2 = K[1] & 65535, ktmp3 = (unsigned)K[1] >> 16;
+   unsigned int        jtmp0 = J[0] & 65535, jtmp1 = (unsigned)J[0] >> 16;
+   unsigned int        jtmp2 = J[1] & 65535, jtmp3 = (unsigned)J[1] >> 16;
+
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ *    K[1] K[0] K  I[0]  = (K[0]+J[0]) % 2^32
+ *    XXXX XXXX    carry = (K[0]+J[0]) / 2^32
+ *
+ * +  J[1] J[0] J  I[1] = K[1] + J[1] + carry
+ *    XXXX XXXX    I[1] = I[1] % 2^32
+ *    -------------
+ *    I[1] I[0]
+ *    0XXX XXXX I
+ */
+   itmp0 = ktmp0 + jtmp0;
+   itmp1 = itmp0 >> 16;         I[0] = itmp0 - (itmp1 << 16 );
+   itmp1 += ktmp1 + jtmp1;      I[0] |= (itmp1 & 65535) << 16;
+   itmp0 = (itmp1 >> 16) + ktmp2 + jtmp2;
+   I[1] = itmp0 - ((itmp0 >> 16 ) << 16);
+   itmp1 = (itmp0 >> 16) + ktmp3 + jtmp3;
+   I[1] |= (itmp1 & 65535) << 16;
+/*
+ * End of HPL_ladd
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_lmul.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_lmul.c
new file mode 100644
index 000000000..254b192f6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_lmul.c
@@ -0,0 +1,131 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_lmul
+(
+   int *                            K,
+   int *                            J,
+   int *                            I
+)
+#else
+void HPL_lmul
+( K, J, I )
+   int *                            K;
+   int *                            J;
+   int *                            I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_lmul multiplies  without carry two long positive integers K and J
+ * and puts the result into I. The long integers  I, J, K are encoded on
+ * 64 bits using an array of 2 integers. The 32-lower bits are stored in
+ * the first entry of each array, the 32-higher bits in the second entry
+ * of each array. For efficiency purposes, the  intrisic modulo function
+ * is inlined.
+ *
+ * Arguments
+ * =========
+ *
+ * K       (local input)                 int *
+ *         On entry, K is an integer array of dimension 2 containing the
+ *         encoded long integer K.
+ *
+ * J       (local input)                 int *
+ *         On entry, J is an integer array of dimension 2 containing the
+ *         encoded long integer J.
+ *
+ * I       (local output)                int *
+ *         On entry, I is an integer array of dimension 2. On exit, this
+ *         array contains the encoded long integer result.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        r, c;
+   unsigned int               kk[4], jj[4], res[5];
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Addition is done with 16 bits at a time. Multiplying two 16-bit
+ * integers yields a 32-bit result. The lower 16-bits of the result
+ * are kept in I, and the higher 16-bits are carried over to the
+ * next multiplication.
+ */
+   for (c = 0; c < 2; ++c) {
+     kk[2*c] = K[c] & 65535;
+     kk[2*c+1] = ((unsigned)K[c] >> 16) & 65535;
+     jj[2*c] = J[c] & 65535;
+     jj[2*c+1] = ((unsigned)J[c] >> 16) & 65535;
+   }
+
+   res[0] = 0;
+   for (c = 0; c < 4; ++c) {
+     res[c+1] = (res[c] >> 16) & 65535;
+     res[c] &= 65535;
+     for (r = 0; r < c+1; ++r) {
+       res[c] = kk[r] * jj[c-r] + (res[c] & 65535);
+       res[c+1] += (res[c] >> 16) & 65535;
+     }
+   }
+
+   for (c = 0; c < 2; ++c)
+     I[c] = (int)(((res[2*c+1] & 65535) << 16) | (res[2*c] & 65535));
+/*
+ * End of HPL_lmul
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_rand.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_rand.c
new file mode 100644
index 000000000..fe4e12f5e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_rand.c
@@ -0,0 +1,94 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_rand( void )
+#else
+double HPL_rand()
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rand generates  the next number  in the  random  sequence.  This
+ * function  ensures  that this number lies in the interval (-0.5, 0.5].
+ *  
+ * The static array irand contains the information (2 integers) required
+ * to generate the  next number  in the sequence  X(n).  This  number is
+ * computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5,  where the
+ * constant d is the largest 64 bit positive unsigned integer. The array
+ * irand is then  updated  for the generation of the next number  X(n+1)
+ * in  the  random   sequence  as   follows  X(n+1) = a * X(n) + c.  The
+ * constants a and c should have been preliminarily stored in the arrays
+ * ias and ics as 2 pairs of integers.  The initialization of  ias,  ics
+ * and  irand  is performed by the function HPL_setran.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_setran( 3, j );
+/*
+ * return number between -0.5 and 0.5
+ */
+   return( HPL_HALF -
+           (((j[0] & 65535) + ((unsigned)j[0] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF +
+           (j[1] & 65535) + ((unsigned)j[1] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF );
+/*
+ * End of HPL_rand
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_setran.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_setran.c
new file mode 100644
index 000000000..1a3ca73aa
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_setran.c
@@ -0,0 +1,115 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int       ias[2], ics[2], irand[2];
+
+#ifdef STDC_HEADERS
+void HPL_setran
+(
+   const int                        OPTION,
+   int *                            IRAN
+)
+#else
+void HPL_setran
+( OPTION, IRAN )
+   const int                        OPTION;
+   int *                            IRAN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_setran initializes  the random generator with the encoding of the
+ * first number X(0) in the sequence,  and the constants a and c used to
+ * compute the next element in the sequence: X(n+1) = a*X(n) + c.  X(0),
+ * a and c are stored in the static variables  irand, ias and ics.  When
+ * OPTION is 0 (resp. 1 and 2),  irand  (resp. ia and ic)  is set to the
+ * values of the input array IRAN.  When OPTION is 3, IRAN is set to the
+ * current value of irand, and irand is then incremented.
+ *
+ * Arguments
+ * =========
+ *
+ * OPTION  (local input)                 const int
+ *         On entry, OPTION  is an integer that specifies the operations
+ *         to be performed on the random generator as specified above.
+ *
+ * IRAN    (local input/output)          int *
+ *         On entry,  IRAN is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of a random number.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   if(      OPTION == 3 )
+   {                                       /* return current value */
+      IRAN[0] = irand[0]; IRAN[1] = irand[1];
+      HPL_lmul( irand, ias, j );         /* j     = irand * ias;   */
+      HPL_ladd( j, ics, irand );         /* irand = j     + ics;   */
+   } 
+   else if( OPTION == 0 ) { irand[0] = IRAN[0]; irand[1] = IRAN[1]; }
+   else if( OPTION == 1 ) { ias  [0] = IRAN[0]; ias  [1] = IRAN[1]; }
+   else if( OPTION == 2 ) { ics  [0] = IRAN[0]; ics  [1] = IRAN[1]; }
+/*
+ * End of HPL_setran
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_xjumpm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_xjumpm.c
new file mode 100644
index 000000000..ae70bbc16
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_xjumpm.c
@@ -0,0 +1,158 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_xjumpm
+(
+   const int                        JUMPM,
+   int *                            MULT,
+   int *                            IADD,
+   int *                            IRANN,
+   int *                            IRANM,
+   int *                            IAM,
+   int *                            ICM
+)
+#else
+void HPL_xjumpm
+( JUMPM, MULT, IADD, IRANN, IRANM, IAM, ICM )
+   const int                        JUMPM;
+   int *                            MULT;
+   int *                            IADD;
+   int *                            IRANN;
+   int *                            IRANM;
+   int *                            IAM;
+   int *                            ICM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_xjumpm computes  the constants  A and C  to jump JUMPM numbers in
+ * the random sequence: X(n+JUMPM) = A*X(n)+C.  The constants encoded in
+ * MULT and IADD  specify  how to jump from one entry in the sequence to
+ * the next.
+ *
+ * Arguments
+ * =========
+ *
+ * JUMPM   (local input)                 const int
+ *         On entry,  JUMPM  specifies  the  number  of entries  in  the
+ *         sequence to jump over. When JUMPM is less or equal than zero,
+ *         A and C are not computed, IRANM is set to IRANN corresponding
+ *         to a jump of size zero.
+ *
+ * MULT    (local input)                 int *
+ *         On entry, MULT is an array of dimension 2,  that contains the
+ *         16-lower  and 15-higher bits of the constant  a  to jump from
+ *         X(n) to X(n+1) = a*X(n) + c in the random sequence.
+ *
+ * IADD    (local input)                 int *
+ *         On entry, IADD is an array of dimension 2,  that contains the
+ *         16-lower  and 15-higher bits of the constant  c  to jump from
+ *         X(n) to X(n+1) = a*X(n) + c in the random sequence.
+ *
+ * IRANN   (local input)                 int *
+ *         On entry, IRANN is an array of dimension 2. that contains the
+ *         16-lower and 15-higher bits of the encoding of X(n).
+ *
+ * IRANM   (local output)                int *
+ *         On entry,  IRANM  is an array of dimension 2.   On exit, this
+ *         array  contains respectively  the 16-lower and 15-higher bits
+ *         of the encoding of X(n+JUMPM).
+ *
+ * IAM     (local output)                int *
+ *         On entry, IAM is an array of dimension 2. On exit, when JUMPM
+ *         is  greater  than  zero,  this  array  contains  the  encoded
+ *         constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+ *         sequence. IAM(0:1)  contains  respectively  the  16-lower and
+ *         15-higher  bits  of this constant  A. When  JUMPM  is less or
+ *         equal than zero, this array is not referenced.
+ *
+ * ICM     (local output)                int *
+ *         On entry, ICM is an array of dimension 2. On exit, when JUMPM
+ *         is  greater  than  zero,  this  array  contains  the  encoded
+ *         constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+ *         sequence. ICM(0:1)  contains  respectively  the  16-lower and
+ *         15-higher  bits  of this constant  C. When  JUMPM  is less or
+ *         equal than zero, this array is not referenced.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2], k;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( JUMPM > 0 )
+   {
+      IAM[0] = MULT[0]; IAM[1] = MULT[1];   /* IAM   = MULT;          */
+      ICM[0] = IADD[0]; ICM[1] = IADD[1];   /* ICM   = IADD;          */
+      for( k = 1; k <= JUMPM-1; k++ )
+      {
+         HPL_lmul( IAM, MULT, j );          /* j     = IAM   * MULT;  */
+         IAM[0] = j[0]; IAM[1] = j[1];      /* IAM   = j;             */
+         HPL_lmul( ICM, MULT, j );          /* j     = ICM   * MULT;  */
+         HPL_ladd( IADD, j, ICM );          /* ICM   = IADD  + j;     */
+      }
+      HPL_lmul( IRANN, IAM, j );            /* j     = IRANN * IAM;   */
+      HPL_ladd( j, ICM, IRANM );            /* IRANM = j     + ICM;   */
+   }
+   else
+   {                                        /* IRANM = IRANN          */
+      IRANM[0] = IRANN[0]; IRANM[1] = IRANN[1];
+   }
+/*
+ * End of HPL_xjumpm
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_dmatgen.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_dmatgen.o
new file mode 100644
index 000000000..f2887d460
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_dmatgen.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_jumpit.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_jumpit.o
new file mode 100644
index 000000000..65b616d11
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_jumpit.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_ladd.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_ladd.o
new file mode 100644
index 000000000..cb47dddff
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_ladd.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_lmul.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_lmul.o
new file mode 100644
index 000000000..10dc8eedf
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_lmul.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_rand.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_rand.o
new file mode 100644
index 000000000..dd2332dee
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_rand.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_setran.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_setran.o
new file mode 100644
index 000000000..dd58ebfaf
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_setran.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_xjumpm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_xjumpm.o
new file mode 100644
index 000000000..e740f38d1
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_xjumpm.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/Make.inc
new file mode 120000
index 000000000..8547ec814
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/Make.inc
@@ -0,0 +1 @@
+/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/Makefile
new file mode 100644
index 000000000..f027fbc06
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/Makefile
@@ -0,0 +1,95 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h  \
+   $(INCdir)/hpl_matgen.h
+#
+## Object files ########################################################
+#
+HPL_matobj       = \
+   HPL_dmatgen.o          HPL_ladd.o             HPL_lmul.o             \
+   HPL_xjumpm.o           HPL_jumpit.o           HPL_rand.o             \
+   HPL_setran.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_matobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_matobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dmatgen.o          : ../HPL_dmatgen.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dmatgen.c
+HPL_ladd.o             : ../HPL_ladd.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ladd.c
+HPL_lmul.o             : ../HPL_lmul.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_lmul.c
+HPL_xjumpm.o           : ../HPL_xjumpm.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_xjumpm.c
+HPL_jumpit.o           : ../HPL_jumpit.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_jumpit.c
+HPL_rand.o             : ../HPL_rand.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rand.c
+HPL_setran.o           : ../HPL_setran.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_setran.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c
new file mode 100644
index 000000000..2d129c863
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c
@@ -0,0 +1,198 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdmatgen
+(
+   const HPL_T_grid *               GRID,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   double *                         A,
+   const int                        LDA,
+   const int                        ISEED
+)
+#else
+void HPL_pdmatgen
+( GRID, M, N, NB, A, LDA, ISEED )
+   const HPL_T_grid *               GRID;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   double *                         A;
+   const int                        LDA;
+   const int                        ISEED;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdmatgen generates (or regenerates) a parallel random matrix A.
+ *  
+ * The  pseudo-random  generator uses the linear congruential algorithm:
+ * X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+ * Programming, Knuth 1973, Vol. 2.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * A       (local output)                double *
+ *         On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+ *         On exit, this array contains the coefficients of the randomly
+ *         generated matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * ISEED   (global input)                const int
+ *         On entry, ISEED  specifies  the  seed  number to generate the
+ *         matrix A. ISEED must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        iadd [2], ia1  [2], ia2  [2], ia3  [2],
+                              ia4  [2], ia5  [2], ib1  [2], ib2  [2],
+                              ib3  [2], ic1  [2], ic2  [2], ic3  [2],
+                              ic4  [2], ic5  [2], iran1[2], iran2[2],
+                              iran3[2], iran4[2], itmp1[2], itmp2[2],
+                              itmp3[2], jseed[2], mult [2];
+   int                        ib, iblk, ik, jb, jblk, jk, jump1, jump2,
+                              jump3, jump4, jump5, jump6, jump7, lmb,
+                              lnb, mblks, mp, mycol, myrow, nblks,
+                              npcol, nprow, nq;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+
+   mult [0] = HPL_MULT0; mult [1] = HPL_MULT1;
+   iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1;
+   jseed[0] = ISEED;     jseed[1] = 0;
+/*
+ * Generate an M by N matrix starting in process (0,0)
+ */
+   Mnumroc( mp, M, NB, NB, myrow, 0, nprow );
+   Mnumroc( nq, N, NB, NB, mycol, 0, npcol );
+
+   if( ( mp <= 0 ) || ( nq <= 0 ) ) return;
+/*
+ * Local number of blocks and size of the last one
+ */
+   mblks = ( mp + NB - 1 ) / NB; lmb = mp - ( ( mp - 1 ) / NB ) * NB;
+   nblks = ( nq + NB - 1 ) / NB; lnb = nq - ( ( nq - 1 ) / NB ) * NB;
+/*
+ * Compute multiplier/adder for various jumps in random sequence
+ */
+   jump1 = 1;  jump2 = nprow * NB; jump3 = M; jump4 = npcol * NB;
+   jump5 = NB; jump6 = mycol;      jump7 = myrow * NB;
+
+   HPL_xjumpm( jump1, mult, iadd, jseed, iran1, ia1,   ic1   );
+   HPL_xjumpm( jump2, mult, iadd, iran1, itmp1, ia2,   ic2   );
+   HPL_xjumpm( jump3, mult, iadd, iran1, itmp1, ia3,   ic3   );
+   HPL_xjumpm( jump4, ia3,  ic3,  iran1, itmp1, ia4,   ic4   );
+   HPL_xjumpm( jump5, ia3,  ic3,  iran1, itmp1, ia5,   ic5   );
+   HPL_xjumpm( jump6, ia5,  ic5,  iran1, itmp3, itmp1, itmp2 );
+   HPL_xjumpm( jump7, mult, iadd, itmp3, iran1, itmp1, itmp2 );
+   HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 );
+/*
+ * Save value of first number in sequence
+ */
+   ib1[0] = iran1[0]; ib1[1] = iran1[1];
+   ib2[0] = iran1[0]; ib2[1] = iran1[1];
+   ib3[0] = iran1[0]; ib3[1] = iran1[1];
+
+   for( jblk = 0; jblk < nblks; jblk++ )
+   {
+      jb = ( jblk == nblks - 1 ? lnb : NB );
+      for( jk = 0; jk < jb; jk++ )
+      {
+         for( iblk = 0; iblk < mblks; iblk++ )
+         {
+            ib = ( iblk == mblks - 1 ? lmb : NB );
+            for( ik = 0; ik < ib; A++, ik++ ) *A = HPL_rand();
+            HPL_jumpit( ia2, ic2, ib1, iran2 );
+            ib1[0] = iran2[0]; ib1[1] = iran2[1];
+         }
+         A += LDA - mp;
+         HPL_jumpit( ia3, ic3, ib2, iran3 );
+         ib1[0] = iran3[0]; ib1[1] = iran3[1];
+         ib2[0] = iran3[0]; ib2[1] = iran3[1];
+      }
+      HPL_jumpit( ia4, ic4, ib3, iran4 );
+      ib1[0] = iran4[0]; ib1[1] = iran4[1];
+      ib2[0] = iran4[0]; ib2[1] = iran4[1];
+      ib3[0] = iran4[0]; ib3[1] = iran4[1];
+   }
+/*
+ * End of HPL_pdmatgen
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/HPL_pdmatgen.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/HPL_pdmatgen.o
new file mode 100644
index 000000000..3e89d607f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/HPL_pdmatgen.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/Make.inc
new file mode 120000
index 000000000..8547ec814
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/Make.inc
@@ -0,0 +1 @@
+/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/Makefile
new file mode 100644
index 000000000..bf33fcd7b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/Makefile
@@ -0,0 +1,81 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_matgen.h $(INCdir)/hpl_pmisc.h \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pmatgen.h
+#
+## Object files ########################################################
+#
+HPL_pmaobj       = \
+   HPL_pdmatgen.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pmaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pmaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pdmatgen.o         : ../HPL_pdmatgen.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdmatgen.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL.dat b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL.dat
new file mode 100644
index 000000000..47aee883e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL.dat
@@ -0,0 +1,31 @@
+HPLinpack benchmark input file
+Innovative Computing Laboratory, University of Tennessee
+HPL.out      output file name (if any)
+6            device out (6=stdout,7=stderr,file)
+4            # of problems sizes (N)
+29 30 34 35  Ns
+4            # of NBs
+1 2 3 4      NBs
+0            PMAP process mapping (0=Row-,1=Column-major)
+3            # of process grids (P x Q)
+2 1 4        Ps
+2 4 1        Qs
+16.0         threshold
+3            # of panel fact
+0 1 2        PFACTs (0=left, 1=Crout, 2=Right)
+2            # of recursive stopping criterium
+2 4          NBMINs (>= 1)
+1            # of panels in recursion
+2            NDIVs
+3            # of recursive panel fact.
+0 1 2        RFACTs (0=left, 1=Crout, 2=Right)
+1            # of broadcast
+0            BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+1            # of lookahead depth
+0            DEPTHs (>=0)
+2            SWAP (0=bin-exch,1=long,2=mix)
+64           swapping threshold
+0            L1 in (0=transposed,1=no-transposed) form
+0            U  in (0=transposed,1=no-transposed) form
+1            Equilibration (0=no,1=yes)
+8            memory alignment in double (> 0)
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pddriver.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pddriver.c
new file mode 100644
index 000000000..5e4050f48
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pddriver.c
@@ -0,0 +1,293 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int main
+(
+   int                        ARGC,
+   char                       * * ARGV
+)
+#else
+int main( ARGC, ARGV )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        ARGC;
+/*
+ * .. Array Arguments ..
+ */
+   char                       * * ARGV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * main is the main driver program for testing the HPL routines.
+ * This  program is  driven  by  a short data file named  "HPL.dat".
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        nval  [HPL_MAX_PARAM],
+                              nbval [HPL_MAX_PARAM],
+                              pval  [HPL_MAX_PARAM],
+                              qval  [HPL_MAX_PARAM],
+                              nbmval[HPL_MAX_PARAM],
+                              ndvval[HPL_MAX_PARAM],
+                              ndhval[HPL_MAX_PARAM];
+
+   HPL_T_FACT                 pfaval[HPL_MAX_PARAM],
+                              rfaval[HPL_MAX_PARAM];
+
+   HPL_T_TOP                  topval[HPL_MAX_PARAM];
+
+   HPL_T_grid                 grid;
+   HPL_T_palg                 algo;
+   HPL_T_test                 test;
+   int                        L1notran, Unotran, align, equil, in, inb,
+                              inbm, indh, indv, ipfa, ipq, irfa, itop,
+                              mycol, myrow, ns, nbs, nbms, ndhs, ndvs,
+                              npcol, npfs, npqs, nprow, nrfs, ntps, 
+                              rank, size, tswap;
+   HPL_T_ORDER                pmapping;
+   HPL_T_FACT                 rpfa;
+   HPL_T_SWAP                 fswap;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Init( &ARGC, &ARGV );
+#ifdef HPL_CALL_VSIPL
+   vsip_init((void*)0);
+#endif
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+   MPI_Comm_size( MPI_COMM_WORLD, &size );
+/*
+ * Read and check validity of test parameters from input file
+ *
+ * HPL Version 1.0, Linpack benchmark input file
+ * Your message here
+ * HPL.out      output file name (if any)
+ * 6            device out (6=stdout,7=stderr,file)
+ * 4            # of problems sizes (N)
+ * 29 30 34 35  Ns
+ * 4            # of NBs
+ * 1 2 3 4      NBs
+ * 0            PMAP process mapping (0=Row-,1=Column-major)
+ * 3            # of process grids (P x Q)
+ * 2 1 4        Ps
+ * 2 4 1        Qs
+ * 16.0         threshold
+ * 3            # of panel fact
+ * 0 1 2        PFACTs (0=left, 1=Crout, 2=Right)
+ * 2            # of recursive stopping criterium
+ * 2 4          NBMINs (>= 1)
+ * 1            # of panels in recursion
+ * 2            NDIVs
+ * 3            # of recursive panel fact.
+ * 0 1 2        RFACTs (0=left, 1=Crout, 2=Right)
+ * 1            # of broadcast
+ * 0            BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+ * 1            # of lookahead depth
+ * 0            DEPTHs (>=0)
+ * 2            SWAP (0=bin-exch,1=long,2=mix)
+ * 4            swapping threshold
+ * 0            L1 in (0=transposed,1=no-transposed) form
+ * 0            U  in (0=transposed,1=no-transposed) form
+ * 1            Equilibration (0=no,1=yes)
+ * 8            memory alignment in double (> 0)
+ */
+   HPL_pdinfo( &test, &ns, nval, &nbs, nbval, &pmapping, &npqs, pval, qval,
+               &npfs, pfaval, &nbms, nbmval, &ndvs, ndvval, &nrfs, rfaval,
+               &ntps, topval, &ndhs, ndhval, &fswap, &tswap, &L1notran,
+               &Unotran, &equil, &align );
+/*
+ * Loop over different process grids - Define process grid. Go to bottom
+ * of process grid loop if this case does not use my process.
+ */
+   for( ipq = 0; ipq < npqs; ipq++ )
+   {
+      (void) HPL_grid_init( MPI_COMM_WORLD, pmapping, pval[ipq], qval[ipq],
+                            &grid );
+      (void) HPL_grid_info( &grid, &nprow, &npcol, &myrow, &mycol );
+
+      if( ( myrow < 0 ) || ( myrow >= nprow ) ||
+          ( mycol < 0 ) || ( mycol >= npcol ) ) goto label_end_of_npqs;
+
+      for( in = 0; in < ns; in++ )
+      {                            /* Loop over various problem sizes */
+       for( inb = 0; inb < nbs; inb++ )
+       {                        /* Loop over various blocking factors */
+        for( indh = 0; indh < ndhs; indh++ )
+        {                       /* Loop over various lookahead depths */
+         for( itop = 0; itop < ntps; itop++ )
+         {                  /* Loop over various broadcast topologies */
+          for( irfa = 0; irfa < nrfs; irfa++ )
+          {             /* Loop over various recursive factorizations */
+           for( ipfa = 0; ipfa < npfs; ipfa++ )
+           {                /* Loop over various panel factorizations */
+            for( inbm = 0; inbm < nbms; inbm++ )
+            {        /* Loop over various recursive stopping criteria */
+             for( indv = 0; indv < ndvs; indv++ )
+             {          /* Loop over various # of panels in recursion */
+/*
+ * Set up the algorithm parameters
+ */
+              algo.btopo = topval[itop]; algo.depth = ndhval[indh];
+              algo.nbmin = nbmval[inbm]; algo.nbdiv = ndvval[indv];
+
+              algo.pfact = rpfa = pfaval[ipfa];
+
+              if( L1notran != 0 )
+              {
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllN;
+                 else if( rpfa == HPL_CROUT   ) algo.pffun = HPL_pdpancrN;
+                 else                           algo.pffun = HPL_pdpanrlN;
+
+                 algo.rfact = rpfa = rfaval[irfa];
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllN;
+                 else if( rpfa == HPL_CROUT   ) algo.rffun = HPL_pdrpancrN;
+                 else                           algo.rffun = HPL_pdrpanrlN;
+
+                 if( Unotran != 0 ) algo.upfun = HPL_pdupdateNN;
+                 else               algo.upfun = HPL_pdupdateNT;
+              }
+              else
+              {
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllT;
+                 else if( rpfa == HPL_CROUT   ) algo.pffun = HPL_pdpancrT;
+                 else                           algo.pffun = HPL_pdpanrlT;
+
+                 algo.rfact = rpfa = rfaval[irfa];
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllT;
+                 else if( rpfa == HPL_CROUT   ) algo.rffun = HPL_pdrpancrT;
+                 else                           algo.rffun = HPL_pdrpanrlT;
+
+                 if( Unotran != 0 ) algo.upfun = HPL_pdupdateTN;
+                 else               algo.upfun = HPL_pdupdateTT;
+              }
+
+              algo.fswap = fswap; algo.fsthr = tswap;
+              algo.equil = equil; algo.align = align;
+
+              HPL_pdtest( &test, &grid, &algo, nval[in], nbval[inb] );
+
+             }
+            }
+           }
+          }
+         }
+        }
+       }
+      }
+      (void) HPL_grid_exit( &grid );
+label_end_of_npqs: ;
+   }
+/*
+ * Print ending messages, close output file, exit.
+ */
+   if( rank == 0 )
+   {
+      test.ktest = test.kpass + test.kfail + test.kskip;
+#ifndef HPL_DETAILED_TIMING
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+#else
+      if( test.thrsh > HPL_rzero )
+         HPL_fprintf( test.outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+#endif
+
+      HPL_fprintf( test.outfp, "\n%s %6d %s\n", "Finished", test.ktest,
+                   "tests with the following results:" );
+      if( test.thrsh > HPL_rzero )
+      {
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kpass,
+                      "tests completed and passed residual checks," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kfail,
+                      "tests completed and failed residual checks," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kskip,
+                      "tests skipped because of illegal input values." );
+      }
+      else
+      {
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kpass,
+                      "tests completed without checking," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kskip,
+                      "tests skipped because of illegal input values." );
+      }
+
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "----------------------------------------",
+                   "----------------------------------------" );
+      HPL_fprintf( test.outfp, "\nEnd of Tests.\n" );
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+
+      if( ( test.outfp != stdout ) && ( test.outfp != stderr ) )
+         (void) fclose( test.outfp );
+   }
+#ifdef HPL_CALL_VSIPL
+   vsip_finalize((void*)0);
+#endif
+   MPI_Finalize();
+   exit( 0 );
+
+   return( 0 );
+/*
+ * End of main
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdinfo.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdinfo.c
new file mode 100644
index 000000000..4ede45be6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdinfo.c
@@ -0,0 +1,1182 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdinfo
+(
+   HPL_T_test *                     TEST,
+   int *                            NS,
+   int *                            N,
+   int *                            NBS,
+   int *                            NB,
+   HPL_T_ORDER *                    PMAPPIN,
+   int *                            NPQS,
+   int *                            P,
+   int *                            Q,
+   int *                            NPFS,
+   HPL_T_FACT *                     PF,
+   int *                            NBMS,
+   int *                            NBM,
+   int *                            NDVS,
+   int *                            NDV,
+   int *                            NRFS,
+   HPL_T_FACT *                     RF,
+   int *                            NTPS,
+   HPL_T_TOP *                      TP,
+   int *                            NDHS,
+   int *                            DH,
+   HPL_T_SWAP *                     FSWAP,
+   int *                            TSWAP,
+   int *                            L1NOTRAN,
+   int *                            UNOTRAN,
+   int *                            EQUIL,
+   int *                            ALIGN
+)
+#else
+void HPL_pdinfo
+( TEST, NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN )
+   HPL_T_test *                     TEST;
+   int *                            NS;
+   int *                            N;
+   int *                            NBS;
+   int *                            NB;
+   HPL_T_ORDER *                    PMAPPIN;
+   int *                            NPQS;
+   int *                            P;
+   int *                            Q;
+   int *                            NPFS;
+   HPL_T_FACT *                     PF;
+   int *                            NBMS;
+   int *                            NBM;
+   int *                            NDVS;
+   int *                            NDV;
+   int *                            NRFS;
+   HPL_T_FACT *                     RF;
+   int *                            NTPS;
+   HPL_T_TOP *                      TP;
+   int *                            NDHS;
+   int *                            DH;
+   HPL_T_SWAP *                     FSWAP;
+   int *                            TSWAP;
+   int *                            L1NOTRAN;
+   int *                            UNOTRAN;
+   int *                            EQUIL;
+   int *                            ALIGN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdinfo reads  the  startup  information for the various tests and
+ * transmits it to all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * TEST    (global output)               HPL_T_test *
+ *         On entry, TEST  points to a testing data structure.  On exit,
+ *         the fields of this data structure are initialized as follows:
+ *         TEST->outfp  specifies the output file where the results will
+ *         be printed.  It is only defined and used by  the process 0 of
+ *         the grid.  TEST->thrsh specifies the threshhold value for the
+ *         test ratio.  TEST->epsil is the relative machine precision of
+ *         the distributed computer.  Finally  the test counters, kfail,
+ *         kpass, kskip, ktest are initialized to zero.
+ *
+ * NS      (global output)               int *
+ *         On exit,  NS  specifies the number of different problem sizes
+ *         to be tested. NS is less than or equal to HPL_MAX_PARAM.
+ *
+ * N       (global output)               int *
+ *         On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+ *         the first NS entries of this array contain the  problem sizes
+ *         to run the code with.
+ *
+ * NBS     (global output)               int *
+ *         On exit,  NBS  specifies the number of different distribution
+ *         blocking factors to be tested. NBS must be less than or equal
+ *         to HPL_MAX_PARAM.
+ *
+ * NB      (global output)               int *
+ *         On exit,  PMAPPIN  specifies the process mapping onto the no-
+ *         des of the  MPI machine configuration.  PMAPPIN  defaults  to
+ *         row-major ordering.
+ *
+ * PMAPPIN (global output)               HPL_T_ORDER *
+ *         On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NBS entries of this array contain the values of the
+ *         various distribution blocking factors, to run the code with.
+ *
+ * NPQS    (global output)               int *
+ *         On exit, NPQS  specifies the  number of different values that
+ *         can be used for P and Q, i.e., the number of process grids to
+ *         run  the  code with.  NPQS must be  less  than  or  equal  to
+ *         HPL_MAX_PARAM.
+ *
+ * P       (global output)               int *
+ *         On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NPQS entries of this array contain the values of P,
+ *         the number of process rows of the  NPQS grids to run the code
+ *         with.
+ *
+ * Q       (global output)               int *
+ *         On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NPQS entries of this array contain the values of Q,
+ *         the number of process columns of the  NPQS  grids to  run the
+ *         code with.
+ *
+ * NPFS    (global output)               int *
+ *         On exit, NPFS  specifies the  number of different values that
+ *         can be used for PF : the panel factorization algorithm to run
+ *         the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+ *
+ * PF      (global output)               HPL_T_FACT *
+ *         On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first  NPFS  entries  of this array  contain  the various
+ *         panel factorization algorithms to run the code with.
+ *
+ * NBMS    (global output)               int *
+ *         On exit,  NBMS  specifies  the  number  of  various recursive
+ *         stopping criteria  to be tested.  NBMS  must be  less than or
+ *         equal to HPL_MAX_PARAM.
+ *
+ * NBM     (global output)               int *
+ *         On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NBMS entries of this array contain the values
+ *         of the various recursive stopping criteria to be tested.
+ *
+ * NDVS    (global output)               int *
+ *         On exit,  NDVS  specifies  the number  of various numbers  of
+ *         panels in recursion to be tested.  NDVS is less than or equal
+ *         to HPL_MAX_PARAM.
+ *
+ * NDV     (global output)               int *
+ *         On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NDVS entries of this array contain the values
+ *         of the various numbers of panels in recursion to be tested.
+ *
+ * NRFS    (global output)               int *
+ *         On exit, NRFS  specifies the  number of different values that
+ *         can be used for RF : the recursive factorization algorithm to
+ *         be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+ *
+ * RF      (global output)               HPL_T_FACT *
+ *         On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first  NRFS  entries  of  this array contain  the various
+ *         recursive factorization algorithms to run the code with.
+ *
+ * NTPS    (global output)               int *
+ *         On exit, NTPS  specifies the  number of different values that
+ *         can be used for the  broadcast topologies  to be tested. NTPS
+ *         is less than or equal to HPL_MAX_PARAM.
+ *
+ * TP      (global output)               HPL_T_TOP *
+ *         On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the  first NTPS  entries of this  array  contain  the various
+ *         broadcast (along rows) topologies to run the code with.
+ *
+ * NDHS    (global output)               int *
+ *         On exit, NDHS  specifies the  number of different values that
+ *         can be used for the  lookahead depths to be  tested.  NDHS is
+ *         less than or equal to HPL_MAX_PARAM.
+ *
+ * DH      (global output)               int *
+ *         On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NDHS entries of this array contain the values
+ *         of lookahead depths to run the code with.  Such a value is at
+ *         least 0 (no-lookahead) or greater than zero.
+ *
+ * FSWAP   (global output)               HPL_T_SWAP *
+ *         On exit, FSWAP specifies the swapping algorithm to be used in
+ *         all tests.
+ *
+ * TSWAP   (global output)               int *
+ *         On exit,  TSWAP  specifies the swapping threshold as a number
+ *         of columns when the mixed swapping algorithm was chosen.
+ *
+ * L1NOTRA (global output)               int *
+ *         On exit, L1NOTRAN specifies whether the upper triangle of the
+ *         panels of columns  should  be stored  in  no-transposed  form
+ *         (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+ *
+ * UNOTRAN (global output)               int *
+ *         On exit, UNOTRAN  specifies whether the panels of rows should
+ *         be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+ *         form (UNOTRAN=0) during their broadcast.
+ *
+ * EQUIL   (global output)               int *
+ *         On exit,  EQUIL  specifies  whether  equilibration during the
+ *         swap-broadcast  of  the  panel of rows  should  be  performed
+ *         (EQUIL=1) or not (EQUIL=0).
+ *
+ * ALIGN   (global output)               int *
+ *         On exit,  ALIGN  specifies the alignment  of  the dynamically
+ *         allocated buffers in double precision words. ALIGN is greater
+ *         than zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   char                       file[HPL_LINE_MAX], line[HPL_LINE_MAX],
+                              auth[HPL_LINE_MAX], num [HPL_LINE_MAX];
+   FILE                       * infp;
+   int                        * iwork = NULL;
+   char                       * lineptr;
+   int                        error=0, fid, i, j, lwork, maxp, nprocs,
+                              rank, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+   MPI_Comm_size( MPI_COMM_WORLD, &size );
+/*
+ * Initialize the TEST data structure with default values
+ */
+   TEST->outfp = stderr; TEST->epsil = 2.0e-16; TEST->thrsh = 16.0;
+   TEST->kfail = TEST->kpass = TEST->kskip = TEST->ktest = 0;
+/*
+ * Process 0 reads the input data, broadcasts to other processes and
+ * writes needed information to TEST->outfp.
+ */
+   if( rank == 0 )
+   {
+/*
+ * Open file and skip data file header
+ */
+      if( ( infp = fopen( "HPL.dat", "r" ) ) == NULL )
+      { 
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "cannot open file HPL.dat" );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) fgets( auth, HPL_LINE_MAX - 2, infp );
+/*
+ * Read name and unit number for summary output file
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", file );
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num  );
+      fid = atoi( num );
+      if     ( fid == 6 ) TEST->outfp = stdout;
+      else if( fid == 7 ) TEST->outfp = stderr;
+      else if( ( TEST->outfp = fopen( file, "w" ) ) == NULL )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "cannot open file %s.",
+                    file );
+         error = 1; goto label_error;
+      }
+/*
+ * Read and check the parameter values for the tests.
+ *
+ * Problem size (>=0) (N)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); 
+      (void) sscanf( line, "%s", num ); *NS = atoi( num );
+      if( ( *NS < 1 ) || ( *NS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %d",
+                    "Number of values of N is less than 1 or greater than",
+                    HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( N[ i ] = atoi( num ) ) < 0 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of N less than 0" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Block size (>=1) (NB)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NBS = atoi( num );
+      if( ( *NBS < 1 ) || ( *NBS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NB is less than 1 or",
+                    "greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NBS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NB[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", 
+                       "Value of NB less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Process grids, mapping, (>=1) (P, Q)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num );
+      *PMAPPIN = ( atoi( num ) == 1 ? HPL_COLUMN_MAJOR : HPL_ROW_MAJOR );
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NPQS = atoi( num );
+      if( ( *NPQS < 1 ) || ( *NPQS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of grids is less",
+                    "than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPQS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( P[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of P less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPQS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( Q[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of Q less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Check for enough processes in machine configuration
+ */
+      maxp = 0;
+      for( i = 0; i < *NPQS; i++ )
+      { nprocs   = P[i] * Q[i]; maxp = Mmax( maxp, nprocs ); }
+      if( maxp > size )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "Need at least %d processes for these tests", maxp );
+         error = 1; goto label_error;
+      }
+/*
+ * Checking threshold value (TEST->thrsh)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); TEST->thrsh = atof( num );
+/*
+ * Panel factorization algorithm (PF)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NPFS = atoi( num );
+      if( ( *NPFS < 1 ) || ( *NPFS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "number of values of PFACT",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPFS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) PF[ i ] = HPL_LEFT_LOOKING;
+         else if( j == 1 ) PF[ i ] = HPL_CROUT;
+         else if( j == 2 ) PF[ i ] = HPL_RIGHT_LOOKING;
+         else              PF[ i ] = HPL_RIGHT_LOOKING;
+      }
+/*
+ * Recursive stopping criterium (>=1) (NBM)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NBMS = atoi( num );
+      if( ( *NBMS < 1 ) || ( *NBMS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NBMIN",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NBMS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NBM[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of NBMIN less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Number of panels in recursion (>=2) (NDV)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NDVS = atoi( num );
+      if( ( *NDVS < 1 ) || ( *NDVS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NDIV",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NDVS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NDV[ i ] = atoi( num ) ) < 2 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of NDIV less than 2" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Recursive panel factorization (RF)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NRFS = atoi( num );
+      if( ( *NRFS < 1 ) || ( *NRFS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of RFACT",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NRFS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) RF[ i ] = HPL_LEFT_LOOKING;
+         else if( j == 1 ) RF[ i ] = HPL_CROUT;
+         else if( j == 2 ) RF[ i ] = HPL_RIGHT_LOOKING;
+         else              RF[ i ] = HPL_RIGHT_LOOKING;
+      }
+/*
+ * Broadcast topology (TP) (0=rg, 1=2rg, 2=rgM, 3=2rgM, 4=L)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NTPS = atoi( num );
+      if( ( *NTPS < 1 ) || ( *NTPS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of BCAST",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NTPS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) TP[ i ] = HPL_1RING;
+         else if( j == 1 ) TP[ i ] = HPL_1RING_M;
+         else if( j == 2 ) TP[ i ] = HPL_2RING;
+         else if( j == 3 ) TP[ i ] = HPL_2RING_M;
+         else if( j == 4 ) TP[ i ] = HPL_BLONG;
+         else if( j == 5 ) TP[ i ] = HPL_BLONG_M;
+         else              TP[ i ] = HPL_1RING_M;
+      }
+/*
+ * Lookahead depth (>=0) (NDH)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NDHS = atoi( num );
+      if( ( *NDHS < 1 ) || ( *NDHS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of DEPTH",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NDHS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num );
+         lineptr += strlen( num ) + 1;
+         if( ( DH[ i ] = atoi( num ) ) < 0 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of DEPTH less than 0" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Swapping algorithm (0,1 or 2) (FSWAP)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); j = atoi( num );
+      if(      j == 0 ) *FSWAP = HPL_SWAP00;
+      else if( j == 1 ) *FSWAP = HPL_SWAP01;
+      else if( j == 2 ) *FSWAP = HPL_SW_MIX;
+      else              *FSWAP = HPL_SWAP01;
+/*
+ * Swapping threshold (>=0) (TSWAP)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *TSWAP = atoi( num );
+      if( *TSWAP <= 0 ) *TSWAP = 0;
+/*
+ * L1 in (no-)transposed form (0 or 1)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *L1NOTRAN = atoi( num );
+      if( ( *L1NOTRAN != 0 ) && ( *L1NOTRAN != 1 ) ) *L1NOTRAN = 0; 
+/*
+ * U  in (no-)transposed form (0 or 1)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *UNOTRAN = atoi( num );
+      if( ( *UNOTRAN != 0 ) && ( *UNOTRAN != 1 ) ) *UNOTRAN = 0;
+/*
+ * Equilibration (0=no, 1=yes)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *EQUIL = atoi( num );
+      if( ( *EQUIL != 0 ) && ( *EQUIL != 1 ) ) *EQUIL = 1;
+/*
+ * Memory alignment in bytes (> 0) (ALIGN)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *ALIGN = atoi( num );
+      if( *ALIGN <= 0 ) *ALIGN = 4;
+/*
+ * Close input file
+ */
+label_error:
+      if (infp != NULL)
+      	(void) fclose( infp );
+   }
+   else { TEST->outfp = NULL; }
+/*
+ * Check for error on reading input file
+ */
+   (void) HPL_all_reduce( (void *)(&error), 1, HPL_INT, HPL_max,
+                          MPI_COMM_WORLD );
+   if( error )
+   {
+      if( rank == 0 )
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "Illegal input in file HPL.dat. Exiting ..." );
+      MPI_Finalize();
+#ifdef HPL_CALL_VSIPL
+      (void) vsip_finalize( NULL );
+#endif
+      exit( 1 );
+   }
+/*
+ * Compute and broadcast machine epsilon
+ */
+   TEST->epsil = HPL_pdlamch( MPI_COMM_WORLD, HPL_MACH_EPS );
+/*
+ * Pack information arrays and broadcast
+ */
+   (void) HPL_broadcast( (void *)(&(TEST->thrsh)), 1, HPL_DOUBLE, 0,
+                         MPI_COMM_WORLD );
+/*
+ * Broadcast array sizes
+ */
+   iwork = (int *)malloc( (size_t)(15) * sizeof( int ) );
+   if( rank == 0 )
+   {
+      iwork[ 0] = *NS;      iwork[ 1] = *NBS;
+      iwork[ 2] = ( *PMAPPIN == HPL_ROW_MAJOR ? 0 : 1 );
+      iwork[ 3] = *NPQS;    iwork[ 4] = *NPFS;     iwork[ 5] = *NBMS;
+      iwork[ 6] = *NDVS;    iwork[ 7] = *NRFS;     iwork[ 8] = *NTPS;
+      iwork[ 9] = *NDHS;    iwork[10] = *TSWAP;    iwork[11] = *L1NOTRAN;
+      iwork[12] = *UNOTRAN; iwork[13] = *EQUIL;    iwork[14] = *ALIGN;
+   }
+   (void) HPL_broadcast( (void *)iwork, 15, HPL_INT, 0, MPI_COMM_WORLD );
+   if( rank != 0 )
+   {
+      *NS       = iwork[ 0]; *NBS   = iwork[ 1];
+      *PMAPPIN  = ( iwork[ 2] == 0 ?  HPL_ROW_MAJOR : HPL_COLUMN_MAJOR );
+      *NPQS     = iwork[ 3]; *NPFS  = iwork[ 4]; *NBMS     = iwork[ 5];
+      *NDVS     = iwork[ 6]; *NRFS  = iwork[ 7]; *NTPS     = iwork[ 8];
+      *NDHS     = iwork[ 9]; *TSWAP = iwork[10]; *L1NOTRAN = iwork[11];
+      *UNOTRAN  = iwork[12]; *EQUIL = iwork[13]; *ALIGN    = iwork[14];
+   }
+   if( iwork ) free( iwork );
+/*
+ * Pack information arrays and broadcast
+ */
+   lwork = (*NS) + (*NBS) + 2 * (*NPQS) + (*NPFS) + (*NBMS) + 
+           (*NDVS) + (*NRFS) + (*NTPS) + (*NDHS) + 1;
+   
+   if (lwork < 0)
+	exit(EXIT_FAILURE); 
+
+
+   iwork = (int *)malloc( (size_t)(lwork) * sizeof( int ) );
+   if( rank == 0 )
+   {
+      j = 0;
+      for( i = 0; i < *NS;   i++ ) { iwork[j] = N [i]; j++; }
+      for( i = 0; i < *NBS;  i++ ) { iwork[j] = NB[i]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { iwork[j] = P [i]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { iwork[j] = Q [i]; j++; }
+      for( i = 0; i < *NPFS; i++ )
+      {
+         if(      PF[i] == HPL_LEFT_LOOKING  ) iwork[j] = 0;
+         else if( PF[i] == HPL_CROUT         ) iwork[j] = 1;
+         else if( PF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2;
+         j++;
+      }
+      for( i = 0; i < *NBMS; i++ ) { iwork[j] = NBM[i]; j++; }
+      for( i = 0; i < *NDVS; i++ ) { iwork[j] = NDV[i]; j++; }
+      for( i = 0; i < *NRFS; i++ )
+      {
+         if(      RF[i] == HPL_LEFT_LOOKING  ) iwork[j] = 0;
+         else if( RF[i] == HPL_CROUT         ) iwork[j] = 1;
+         else if( RF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2;
+         j++;
+      }
+      for( i = 0; i < *NTPS; i++ )
+      {
+         if(      TP[i] == HPL_1RING   ) iwork[j] = 0;
+         else if( TP[i] == HPL_1RING_M ) iwork[j] = 1;
+         else if( TP[i] == HPL_2RING   ) iwork[j] = 2;
+         else if( TP[i] == HPL_2RING_M ) iwork[j] = 3;
+         else if( TP[i] == HPL_BLONG   ) iwork[j] = 4;
+         else if( TP[i] == HPL_BLONG_M ) iwork[j] = 5;
+         j++;
+      }
+      for( i = 0; i < *NDHS; i++ ) { iwork[j] = DH[i]; j++; }
+
+      if(      *FSWAP == HPL_SWAP00 ) iwork[j] = 0;
+      else if( *FSWAP == HPL_SWAP01 ) iwork[j] = 1;
+      else if( *FSWAP == HPL_SW_MIX ) iwork[j] = 2;
+      j++;
+   }
+   (void) HPL_broadcast( (void*)iwork, lwork, HPL_INT, 0,
+                         MPI_COMM_WORLD );
+   if ((rank != 0) && (iwork != NULL))
+   {
+      j = 0;
+      for( i = 0; i < *NS;   i++ ) { N [i] = iwork[j]; j++; }
+      for( i = 0; i < *NBS;  i++ ) { NB[i] = iwork[j]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { P [i] = iwork[j]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { Q [i] = iwork[j]; j++; }
+
+      for( i = 0; i < *NPFS; i++ )
+      {
+         if(      iwork[j] == 0 ) PF[i] = HPL_LEFT_LOOKING;
+         else if( iwork[j] == 1 ) PF[i] = HPL_CROUT;
+         else if( iwork[j] == 2 ) PF[i] = HPL_RIGHT_LOOKING;
+         j++;
+      }
+      for( i = 0; i < *NBMS; i++ ) { NBM[i] = iwork[j]; j++; }
+      for( i = 0; i < *NDVS; i++ ) { NDV[i] = iwork[j]; j++; }
+      for( i = 0; i < *NRFS; i++ )
+      {
+         if(      iwork[j] == 0 ) RF[i] = HPL_LEFT_LOOKING;
+         else if( iwork[j] == 1 ) RF[i] = HPL_CROUT;
+         else if( iwork[j] == 2 ) RF[i] = HPL_RIGHT_LOOKING;
+         j++;
+      }
+      for( i = 0; i < *NTPS; i++ )
+      {
+         if(      iwork[j] == 0 ) TP[i] = HPL_1RING;
+         else if( iwork[j] == 1 ) TP[i] = HPL_1RING_M;
+         else if( iwork[j] == 2 ) TP[i] = HPL_2RING;
+         else if( iwork[j] == 3 ) TP[i] = HPL_2RING_M;
+         else if( iwork[j] == 4 ) TP[i] = HPL_BLONG;
+         else if( iwork[j] == 5 ) TP[i] = HPL_BLONG_M;
+         j++;
+      }
+      for( i = 0; i < *NDHS; i++ ) { DH[i] = iwork[j]; j++; }
+
+      if(      iwork[j] == 0 ) *FSWAP = HPL_SWAP00;
+      else if( iwork[j] == 1 ) *FSWAP = HPL_SWAP01;
+      else if( iwork[j] == 2 ) *FSWAP = HPL_SW_MIX;
+      j++;
+   
+      if( iwork ) free( iwork );
+   }
+/*
+ * regurgitate input
+ */
+   if( rank == 0 )
+   {
+      
+      if (TEST->outfp != NULL){
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "HPLinpack 2.3  --  High-Performance Linpack benchmark  --  ",
+          " December 2, 2018" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Written by A. Petitet and R. Clint Whaley,  ",
+          "Innovative Computing Laboratory, UTK" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Modified by Piotr Luszczek, ",
+          "Innovative Computing Laboratory, UTK" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Modified by Julien Langou, ",
+          "University of Colorado Denver");
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+
+      HPL_fprintf( TEST->outfp, "\n%s\n",
+          "An explanation of the input/output parameters follows:" );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "T/V    : Wall time / encoded variant." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+         "N      : The order of the coefficient matrix A." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "NB     : The partitioning blocking factor." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "P      : The number of process rows." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "Q      : The number of process columns." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+         "Time   : Time in seconds to solve the linear system." );
+      HPL_fprintf( TEST->outfp, "%s\n\n",
+         "Gflops : Rate of execution for solving the linear system." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "The following parameter values will be used:" );
+/*
+ * Problem size
+ */
+      HPL_fprintf( TEST->outfp,       "\nN      :" );
+      for( i = 0; i < Mmin( 8, *NS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", N[i]  );
+      if( *NS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", N[i]  );
+         if( *NS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", N[i]  );
+         }
+      }
+/*
+ * Distribution blocking factor
+ */
+      HPL_fprintf( TEST->outfp,       "\nNB     :" );
+      for( i = 0; i < Mmin( 8, *NBS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NB[i] );
+      if( *NBS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NBS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NB[i] );
+         if( *NBS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NBS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NB[i] );
+         }
+      }
+/*
+ * Process mapping
+ */
+      HPL_fprintf( TEST->outfp,       "\nPMAP   :" );
+      if(      *PMAPPIN == HPL_ROW_MAJOR    )
+         HPL_fprintf( TEST->outfp, " Row-major process mapping" );
+      else if( *PMAPPIN == HPL_COLUMN_MAJOR )
+         HPL_fprintf( TEST->outfp, " Column-major process mapping" );
+/*
+ * Process grid
+ */
+      HPL_fprintf( TEST->outfp,       "\nP      :" );
+      for( i = 0; i < Mmin( 8, *NPQS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", P[i]  );
+      if( *NPQS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPQS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", P[i]  );
+         if( *NPQS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPQS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", P[i]  );
+         }
+      }
+      HPL_fprintf( TEST->outfp,       "\nQ      :" );
+      for( i = 0; i < Mmin( 8, *NPQS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", Q[i]  );
+      if( *NPQS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPQS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", Q[i]  );
+         if( *NPQS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPQS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", Q[i]  );
+         }
+      }
+/*
+ * Panel Factorization
+ */
+      HPL_fprintf( TEST->outfp,       "\nPFACT  :" );
+      for( i = 0; i < Mmin( 8, *NPFS ); i++ )
+      {
+         if(      PF[i] == HPL_LEFT_LOOKING  )
+            HPL_fprintf( TEST->outfp,       "    Left " );
+         else if( PF[i] == HPL_CROUT         )
+            HPL_fprintf( TEST->outfp,       "   Crout " );
+         else if( PF[i] == HPL_RIGHT_LOOKING )
+            HPL_fprintf( TEST->outfp,       "   Right " );
+      }
+      if( *NPFS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPFS ); i++ )
+         {
+            if(      PF[i] == HPL_LEFT_LOOKING  )
+               HPL_fprintf( TEST->outfp,       "    Left " );
+            else if( PF[i] == HPL_CROUT         )
+               HPL_fprintf( TEST->outfp,       "   Crout " );
+            else if( PF[i] == HPL_RIGHT_LOOKING )
+               HPL_fprintf( TEST->outfp,       "   Right " );
+         }
+         if( *NPFS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPFS; i++ )
+            {
+               if(      PF[i] == HPL_LEFT_LOOKING  )
+                  HPL_fprintf( TEST->outfp,       "    Left " );
+               else if( PF[i] == HPL_CROUT         )
+                  HPL_fprintf( TEST->outfp,       "   Crout " );
+               else if( PF[i] == HPL_RIGHT_LOOKING )
+                  HPL_fprintf( TEST->outfp,       "   Right " );
+            }
+         }
+      }
+/*
+ * Recursive stopping criterium
+ */
+      HPL_fprintf( TEST->outfp,       "\nNBMIN  :" );
+      for( i = 0; i < Mmin( 8, *NBMS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NBM[i]  );
+      if( *NBMS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NBMS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NBM[i]  );
+         if( *NBMS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NBMS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NBM[i]  );
+         }
+      }
+/*
+ * Number of panels in recursion
+ */
+      HPL_fprintf( TEST->outfp,       "\nNDIV   :" );
+      for( i = 0; i < Mmin( 8, *NDVS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NDV[i]  );
+      if( *NDVS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NDVS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NDV[i]  );
+         if( *NDVS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NDVS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NDV[i]  );
+         }
+      }
+/*
+ * Recursive Factorization
+ */
+      HPL_fprintf( TEST->outfp,       "\nRFACT  :" );
+      for( i = 0; i < Mmin( 8, *NRFS ); i++ )
+      {
+         if(      RF[i] == HPL_LEFT_LOOKING  )
+            HPL_fprintf( TEST->outfp,       "    Left " );
+         else if( RF[i] == HPL_CROUT         )
+            HPL_fprintf( TEST->outfp,       "   Crout " );
+         else if( RF[i] == HPL_RIGHT_LOOKING )
+            HPL_fprintf( TEST->outfp,       "   Right " );
+      }
+      if( *NRFS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NRFS ); i++ )
+         {
+            if(      RF[i] == HPL_LEFT_LOOKING  )
+               HPL_fprintf( TEST->outfp,       "    Left " );
+            else if( RF[i] == HPL_CROUT         )
+               HPL_fprintf( TEST->outfp,       "   Crout " );
+            else if( RF[i] == HPL_RIGHT_LOOKING )
+               HPL_fprintf( TEST->outfp,       "   Right " );
+         }
+         if( *NRFS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NRFS; i++ )
+            {
+               if(      RF[i] == HPL_LEFT_LOOKING  )
+                  HPL_fprintf( TEST->outfp,       "    Left " );
+               else if( RF[i] == HPL_CROUT         )
+                  HPL_fprintf( TEST->outfp,       "   Crout " );
+               else if( RF[i] == HPL_RIGHT_LOOKING )
+                  HPL_fprintf( TEST->outfp,       "   Right " );
+            }
+         }
+      }
+/*
+ * Broadcast topology
+ */
+      HPL_fprintf( TEST->outfp,       "\nBCAST  :" );
+      for( i = 0; i < Mmin( 8, *NTPS ); i++ )
+      {
+         if(      TP[i] == HPL_1RING   )
+            HPL_fprintf( TEST->outfp,       "   1ring " );
+         else if( TP[i] == HPL_1RING_M )
+            HPL_fprintf( TEST->outfp,       "  1ringM " );
+         else if( TP[i] == HPL_2RING   )
+            HPL_fprintf( TEST->outfp,       "   2ring " );
+         else if( TP[i] == HPL_2RING_M )
+            HPL_fprintf( TEST->outfp,       "  2ringM " );
+         else if( TP[i] == HPL_BLONG   )
+            HPL_fprintf( TEST->outfp,       "   Blong " );
+         else if( TP[i] == HPL_BLONG_M )
+            HPL_fprintf( TEST->outfp,       "  BlongM " );
+      }
+      if( *NTPS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NTPS ); i++ )
+         {
+            if(      TP[i] == HPL_1RING   )
+               HPL_fprintf( TEST->outfp,       "   1ring " );
+            else if( TP[i] == HPL_1RING_M )
+               HPL_fprintf( TEST->outfp,       "  1ringM " );
+            else if( TP[i] == HPL_2RING   )
+               HPL_fprintf( TEST->outfp,       "   2ring " );
+            else if( TP[i] == HPL_2RING_M )
+               HPL_fprintf( TEST->outfp,       "  2ringM " );
+            else if( TP[i] == HPL_BLONG   )
+               HPL_fprintf( TEST->outfp,       "   Blong " );
+            else if( TP[i] == HPL_BLONG_M )
+               HPL_fprintf( TEST->outfp,       "  BlongM " );
+         }
+         if( *NTPS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NTPS; i++ )
+            {
+               if(      TP[i] == HPL_1RING   )
+                  HPL_fprintf( TEST->outfp,       "   1ring " );
+               else if( TP[i] == HPL_1RING_M )
+                  HPL_fprintf( TEST->outfp,       "  1ringM " );
+               else if( TP[i] == HPL_2RING   )
+                  HPL_fprintf( TEST->outfp,       "   2ring " );
+               else if( TP[i] == HPL_2RING_M )
+                  HPL_fprintf( TEST->outfp,       "  2ringM " );
+               else if( TP[i] == HPL_BLONG   )
+                  HPL_fprintf( TEST->outfp,       "   Blong " );
+               else if( TP[i] == HPL_BLONG_M )
+                  HPL_fprintf( TEST->outfp,       "  BlongM " );
+            }
+         }
+      }
+/*
+ * Lookahead depths
+ */
+      HPL_fprintf( TEST->outfp,       "\nDEPTH  :" );
+      for( i = 0; i < Mmin( 8, *NDHS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", DH[i]  );
+      if( *NDHS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NDHS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", DH[i]  );
+         if( *NDHS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NDHS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", DH[i]  );
+         }
+      }
+/*
+ * Swapping algorithm
+ */
+      HPL_fprintf( TEST->outfp,       "\nSWAP   :" );
+      if(      *FSWAP == HPL_SWAP00 )
+         HPL_fprintf( TEST->outfp, " Binary-exchange" );
+      else if( *FSWAP == HPL_SWAP01 )
+         HPL_fprintf( TEST->outfp, " Spread-roll (long)" );
+      else if( *FSWAP == HPL_SW_MIX )
+         HPL_fprintf( TEST->outfp, " Mix (threshold = %d)", *TSWAP );
+/*
+ * L1 storage form
+ */
+      HPL_fprintf( TEST->outfp,       "\nL1     :" );
+      if(      *L1NOTRAN != 0 )
+         HPL_fprintf( TEST->outfp, " no-transposed form" );
+      else
+         HPL_fprintf( TEST->outfp, " transposed form" );
+/*
+ * U  storage form
+ */
+      HPL_fprintf( TEST->outfp,       "\nU      :" );
+      if(      *UNOTRAN != 0 )
+         HPL_fprintf( TEST->outfp, " no-transposed form" );
+      else
+         HPL_fprintf( TEST->outfp, " transposed form" );
+/*
+ * Equilibration
+ */
+      HPL_fprintf( TEST->outfp,       "\nEQUIL  :" );
+      if(      *EQUIL != 0 )
+         HPL_fprintf( TEST->outfp, " yes" );
+      else
+         HPL_fprintf( TEST->outfp, " no" );
+/*
+ * Alignment
+ */
+      HPL_fprintf( TEST->outfp,       "\nALIGN  : %d double precision words",
+                   *ALIGN );
+
+      HPL_fprintf( TEST->outfp, "\n\n" );
+/*
+ * For testing only
+ */
+      if( TEST->thrsh > HPL_rzero )
+      {
+         HPL_fprintf( TEST->outfp, "%s%s\n\n",
+                      "----------------------------------------",
+                      "----------------------------------------" );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "- The matrix A is randomly generated for each test." );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "- The following scaled residual check will be computed:" );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "      ||Ax-b||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )" );
+         HPL_fprintf( TEST->outfp, "%s %21.6e\n",
+            "- The relative machine precision (eps) is taken to be     ",
+            TEST->epsil );
+         HPL_fprintf( TEST->outfp, "%s   %11.1f\n\n",
+            "- Computational tests pass if scaled residuals are less than      ",
+            TEST->thrsh );
+       }
+     }
+   }
+/*
+ * End of HPL_pdinfo
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdtest.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdtest.c
new file mode 100644
index 000000000..73a62a7ff
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdtest.c
@@ -0,0 +1,438 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdtest
+(
+   HPL_T_test *                     TEST,
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        N,
+   const int                        NB
+)
+#else
+void HPL_pdtest
+( TEST, GRID, ALGO, N, NB )
+   HPL_T_test *                     TEST;
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        N;
+   const int                        NB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdtest performs  one  test  given a set of parameters such as the
+ * process grid, the  problem size, the distribution blocking factor ...
+ * This function generates  the data, calls  and times the linear system
+ * solver,  checks  the  accuracy  of the  obtained vector solution  and
+ * writes this information to the file pointed to by TEST->outfp.
+ *
+ * Arguments
+ * =========
+ *
+ * TEST    (global input)                HPL_T_test *
+ *         On entry,  TEST  points  to a testing data structure:  outfp
+ *         specifies the output file where the results will be printed.
+ *         It is only defined and used by the process  0  of the  grid.
+ *         thrsh  specifies  the  threshhold value  for the test ratio.
+ *         Concretely, a test is declared "PASSED"  if and only if the
+ *         following inequality is satisfied:
+ *         ||Ax-b||_oo / ( epsil *
+ *                         ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+ *                          N )  < thrsh.
+ *         epsil  is the  relative machine precision of the distributed
+ *         computer. Finally the test counters, kfail, kpass, kskip and
+ *         ktest are updated as follows:  if the test passes,  kpass is
+ *         incremented by one;  if the test fails, kfail is incremented
+ *         by one; if the test is skipped, kskip is incremented by one.
+ *         ktest is left unchanged.
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters to be used for this test.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the order of the coefficient matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   double                     HPL_w[HPL_TIMING_N];
+#endif
+   HPL_T_pmat                 mat;
+   double                     wtime[1];
+   int                        info[3];
+   double                     Anorm1, AnormI, Gflops, Xnorm1, XnormI,
+                              BnormI, resid0, resid1;
+   double                     * Bptr;
+   void                       * vptr = NULL;
+   static int                 first=1;
+   int                        ii, ip2, mycol, myrow, npcol, nprow, nq;
+   char                       ctop, cpfact, crfact;
+   time_t                     current_time_start, current_time_end;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+
+   mat.n  = N; mat.nb = NB; mat.info = 0;
+   mat.mp = HPL_numroc( N, NB, NB, myrow, 0, nprow );
+   nq     = HPL_numroc( N, NB, NB, mycol, 0, npcol );
+   mat.nq = nq + 1;
+/*
+ * Allocate matrix, right-hand-side, and vector solution x. [ A | b ] is
+ * N by N+1.  One column is added in every process column for the solve.
+ * The  result  however  is stored in a 1 x N vector replicated in every
+ * process row. In every process, A is lda * (nq+1), x is 1 * nq and the
+ * workspace is mp. 
+ *
+ * Ensure that lda is a multiple of ALIGN and not a power of 2
+ */
+   mat.ld = ( ( Mmax( 1, mat.mp ) - 1 ) / ALGO->align ) * ALGO->align;
+   do
+   {
+      ii = ( mat.ld += ALGO->align ); ip2 = 1;
+      while( ii > 1 ) { ii >>= 1; ip2 <<= 1; }
+   }
+   while( mat.ld == ip2 );
+/*
+ * Allocate dynamic memory
+ */
+   vptr = (void*)malloc( ( (size_t)(ALGO->align) + 
+                           (size_t)(mat.ld+1) * (size_t)(mat.nq) ) *
+                         sizeof(double) );
+   info[0] = (vptr == NULL); info[1] = myrow; info[2] = mycol;
+   (void) HPL_all_reduce( (void *)(info), 3, HPL_INT, HPL_max,
+                          GRID->all_comm );
+   if( info[0] != 0 )
+   {
+      if( ( myrow == 0 ) && ( mycol == 0 ) )
+         HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest",
+                    "[%d,%d] %s", info[1], info[2],
+                    "Memory allocation failed for A, x and b. Skip." );
+      (TEST->kskip)++;
+      /* some processes might have succeeded with allocation */
+      if (vptr) free(vptr);
+      return;
+   }
+/*
+ * generate matrix and right-hand-side, [ A | b ] which is N by N+1.
+ */
+   mat.A  = (double *)HPL_PTR( vptr,
+                               ((size_t)(ALGO->align) * sizeof(double) ) );
+   mat.X  = Mptr( mat.A, 0, mat.nq, mat.ld );
+   HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED );
+#ifdef HPL_CALL_VSIPL
+   mat.block = vsip_blockbind_d( (vsip_scalar_d *)(mat.A),
+                                 (vsip_length)(mat.ld * mat.nq),
+                                 VSIP_MEM_NONE );
+#endif
+/*
+ * Solve linear system
+ */
+   HPL_ptimer_boot(); (void) HPL_barrier( GRID->all_comm );
+   time( &current_time_start );
+   HPL_ptimer( 0 );
+   HPL_pdgesv( GRID, ALGO, &mat );
+   HPL_ptimer( 0 );
+   time( &current_time_end );
+#ifdef HPL_CALL_VSIPL
+   (void) vsip_blockrelease_d( mat.block, VSIP_TRUE ); 
+   vsip_blockdestroy_d( mat.block );
+#endif
+/*
+ * Gather max of all CPU and WALL clock timings and print timing results
+ */
+   HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME,
+                       1, 0, wtime );
+
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      if( first )
+      {
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "T/V                N    NB     P     Q",
+                      "               Time                 Gflops" );
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "----------------------------------------",
+                      "----------------------------------------" );
+         if( TEST->thrsh <= HPL_rzero ) first = 0;
+      }
+/*
+ * 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve.
+ * Print WALL time
+ */
+      Gflops = ( ( (double)(N) /   1.0e+9 ) * 
+                 ( (double)(N) / wtime[0] ) ) * 
+                 ( ( 2.0 / 3.0 ) * (double)(N) + ( 3.0 / 2.0 ) );
+
+      cpfact = ( ( (HPL_T_FACT)(ALGO->pfact) == 
+                   (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ?  (char)('L') :
+                 ( ( (HPL_T_FACT)(ALGO->pfact) == (HPL_T_FACT)(HPL_CROUT) ) ?
+                   (char)('C') : (char)('R') ) );
+      crfact = ( ( (HPL_T_FACT)(ALGO->rfact) == 
+                   (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ?  (char)('L') :
+                 ( ( (HPL_T_FACT)(ALGO->rfact) == (HPL_T_FACT)(HPL_CROUT) ) ? 
+                   (char)('C') : (char)('R') ) );
+
+      if(      ALGO->btopo == HPL_1RING   ) ctop = '0';
+      else if( ALGO->btopo == HPL_1RING_M ) ctop = '1';
+      else if( ALGO->btopo == HPL_2RING   ) ctop = '2';
+      else if( ALGO->btopo == HPL_2RING_M ) ctop = '3';
+      else if( ALGO->btopo == HPL_BLONG   ) ctop = '4';
+      else /* if( ALGO->btopo == HPL_BLONG_M ) */ ctop = '5';
+
+      if( wtime[0] > HPL_rzero ) {
+         HPL_fprintf( TEST->outfp,
+             "W%c%1d%c%c%1d%c%1d%12d %5d %5d %5d %18.2f    %19.4e\n",
+             ( GRID->order == HPL_ROW_MAJOR ? 'R' : 'C' ),
+             ALGO->depth, ctop, crfact, ALGO->nbdiv, cpfact, ALGO->nbmin,
+             N, NB, nprow, npcol, wtime[0], Gflops );
+         HPL_fprintf( TEST->outfp,
+             "HPL_pdgesv() start time %s\n", ctime( &current_time_start ) );
+         HPL_fprintf( TEST->outfp,
+             "HPL_pdgesv() end time   %s\n", ctime( &current_time_end ) );
+      }
+   }
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME,
+                       HPL_TIMING_N, HPL_TIMING_BEG, HPL_w );
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "--VVV--VVV--VVV--VVV--VVV--VVV--VVV--V",
+                   "VV--VVV--VVV--VVV--VVV--VVV--VVV--VVV-" );
+/*
+ * Recursive panel factorization
+ */
+      if( HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time rfact . . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] );
+/*
+ * Panel factorization
+ */
+      if( HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time pfact . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] );
+/*
+ * Panel factorization (swap)
+ */
+      if( HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time mxswp . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] );
+/*
+ * Update
+ */
+      if( HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time update  . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] );
+/*
+ * Update (swap)
+ */
+      if( HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time laswp . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] );
+/*
+ * Upper triangular system solve
+ */
+      if( HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time up tr sv  . : %18.2f\n",
+                      HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] );
+
+      if( TEST->thrsh <= HPL_rzero )
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+   }
+#endif
+/*
+ * Quick return, if I am not interested in checking the computations
+ */
+   if( TEST->thrsh <= HPL_rzero )
+   { (TEST->kpass)++; if( vptr ) free( vptr ); return; }
+/*
+ * Check info returned by solve
+ */
+   if( mat.info != 0 )
+   {
+      if( ( myrow == 0 ) && ( mycol == 0 ) )
+         HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", "%s %d, %s", 
+                    "Error code returned by solve is", mat.info, "skip" );
+      (TEST->kskip)++;
+      if( vptr ) free( vptr ); return;
+   }
+/*
+ * Check computation, re-generate [ A | b ], compute norm 1 and inf of A and x,
+ * and norm inf of b - A x. Display residual checks.
+ */
+   HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED );
+   Anorm1 = HPL_pdlange( GRID, HPL_NORM_1, N, N, NB, mat.A, mat.ld );
+   AnormI = HPL_pdlange( GRID, HPL_NORM_I, N, N, NB, mat.A, mat.ld );
+/*
+ * Because x is distributed in process rows, switch the norms
+ */
+   XnormI = HPL_pdlange( GRID, HPL_NORM_1, 1, N, NB, mat.X, 1 );
+   Xnorm1 = HPL_pdlange( GRID, HPL_NORM_I, 1, N, NB, mat.X, 1 );
+/*
+ * If I am in the col that owns b, (1) compute local BnormI, (2) all_reduce to
+ * find the max (in the col). Then (3) broadcast along the rows so that every
+ * process has BnormI. Note that since we use a uniform distribution in [-0.5,0.5]
+ * for the entries of B, it is very likely that BnormI (<=,~) 0.5.
+ */
+   Bptr = Mptr( mat.A, 0, nq, mat.ld );
+   if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ){
+      if( mat.mp > 0 )
+      {
+         BnormI = Bptr[HPL_idamax( mat.mp, Bptr, 1 )]; BnormI = Mabs( BnormI );
+      }
+      else
+      {
+         BnormI = HPL_rzero;
+      }
+      (void) HPL_all_reduce( (void *)(&BnormI), 1, HPL_DOUBLE, HPL_max,
+                             GRID->col_comm );
+   }
+   (void) HPL_broadcast( (void *)(&BnormI), 1, HPL_DOUBLE,
+                          HPL_indxg2p( N, NB, NB, 0, npcol ),
+                          GRID->row_comm );
+/*
+ * If I own b, compute ( b - A x ) and ( - A x ) otherwise
+ */
+   if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) )
+   {
+      HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone,
+                 mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1 );
+   }
+   else if( nq > 0 )
+   {
+      HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone,
+                 mat.A, mat.ld, mat.X, 1, HPL_rzero, Bptr, 1 );
+   }
+   else { for( ii = 0; ii < mat.mp; ii++ ) Bptr[ii] = HPL_rzero; }
+/*
+ * Reduce the distributed residual in process column 0
+ */
+   if( mat.mp > 0 )
+      (void) HPL_reduce( Bptr, mat.mp, HPL_DOUBLE, HPL_sum, 0,
+                         GRID->row_comm );
+/*
+ * Compute || b - A x ||_oo
+ */
+   resid0 = HPL_pdlange( GRID, HPL_NORM_I, N, 1, NB, Bptr, mat.ld );
+/*
+ * Computes and displays norms, residuals ...
+ */
+   if( N <= 0 )
+   {
+      resid1 = HPL_rzero;
+   }
+   else
+   {
+      resid1 = resid0 / ( TEST->epsil * ( AnormI * XnormI + BnormI ) * (double)(N) );
+   }
+
+   if( resid1 < TEST->thrsh ) (TEST->kpass)++;
+   else                       (TEST->kfail)++;
+
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "----------------------------------------",
+                   "----------------------------------------" );
+      HPL_fprintf( TEST->outfp, "%s%16.8e%s%s\n",
+         "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ", resid1,
+         " ...... ", ( resid1 < TEST->thrsh ? "PASSED" : "FAILED" ) );
+
+      if(resid1 >= TEST->thrsh ) 
+      {
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||Ax-b||_oo  . . . . . . . . . . . . . . . . . = ", resid0 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||A||_oo . . . . . . . . . . . . . . . . . . . = ", AnormI );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||A||_1  . . . . . . . . . . . . . . . . . . . = ", Anorm1 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||x||_oo . . . . . . . . . . . . . . . . . . . = ", XnormI );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||x||_1  . . . . . . . . . . . . . . . . . . . = ", Xnorm1 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||b||_oo . . . . . . . . . . . . . . . . . . . = ", BnormI );
+      }
+   }
+   if( vptr ) free( vptr );
+/*
+ * End of HPL_pdtest
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pddriver.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pddriver.o
new file mode 100644
index 000000000..2f493afd2
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pddriver.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pdinfo.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pdinfo.o
new file mode 100644
index 000000000..619588240
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pdinfo.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pdtest.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pdtest.o
new file mode 100644
index 000000000..3460f7cbd
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pdtest.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/Make.inc
new file mode 120000
index 000000000..8547ec814
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/Make.inc
@@ -0,0 +1 @@
+/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/Makefile
new file mode 100644
index 000000000..cfc96e667
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/Makefile
@@ -0,0 +1,94 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h   \
+   $(INCdir)/hpl_gesv.h   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h  \
+   $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pgesv.h $(INCdir)/hpl_pmatgen.h \
+   $(INCdir)/hpl_ptimer.h $(INCdir)/hpl_ptest.h
+#
+## Executable names ####################################################
+# 
+xhpl             = $(BINdir)/xhpl
+#
+## Object files ########################################################
+#
+HPL_pteobj       = \
+   HPL_pddriver.o         HPL_pdinfo.o           HPL_pdtest.o
+#
+## Targets #############################################################
+#
+all     : dexe
+#
+dexe    : dexe.grd
+#
+$(BINdir)/HPL.dat : ../HPL.dat
+	( $(CP) ../HPL.dat $(BINdir) )
+#
+dexe.grd: $(HPL_pteobj) $(HPLlib)
+	$(LINKER) $(LINKFLAGS) -o $(xhpl) $(HPL_pteobj) $(HPL_LIBS)
+	$(MAKE) $(BINdir)/HPL.dat
+	$(TOUCH) dexe.grd
+#
+# ######################################################################
+#
+HPL_pddriver.o         : ../HPL_pddriver.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pddriver.c
+HPL_pdinfo.o           : ../HPL_pdinfo.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdinfo.c
+HPL_pdtest.o           : ../HPL_pdtest.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdtest.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/dexe.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/dexe.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer.c
new file mode 100644
index 000000000..202416079
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer.c
@@ -0,0 +1,358 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int         HPL_ptimer_disabled;
+static double      HPL_ptimer_cpusec   [HPL_NPTIMER],
+                   HPL_ptimer_cpustart [HPL_NPTIMER],
+                   HPL_ptimer_wallsec  [HPL_NPTIMER],
+                   HPL_ptimer_wallstart[HPL_NPTIMER];
+/*
+ * ---------------------------------------------------------------------
+ * User callable functions
+ * ---------------------------------------------------------------------
+ */
+#ifdef STDC_HEADERS
+void HPL_ptimer_boot( void )
+#else
+void HPL_ptimer_boot()
+#endif
+{
+/*
+ * HPL_ptimer_boot (re)sets all timers to 0, and enables HPL_ptimer.
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 0;
+
+   for( i = 0; i < HPL_NPTIMER; i++ )
+   {
+      HPL_ptimer_cpusec  [i] = HPL_ptimer_wallsec  [i] = HPL_rzero;
+      HPL_ptimer_cpustart[i] = HPL_ptimer_wallstart[i] = HPL_PTIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_ptimer_boot
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_ptimer( const int I )
+#else
+void HPL_ptimer( I )
+   const int                  I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer provides a  "stopwatch"  functionality  cpu/wall  timer in
+ * seconds.  Up to  64  separate timers can be functioning at once.  The
+ * first call starts the timer,  and the second stops it.  This  routine
+ * can be disenabled  by calling HPL_ptimer_disable(),  so that calls to
+ * the timer are ignored.  This feature can be used to make sure certain
+ * sections of code do not affect timings,  even  if  they call routines
+ * which have HPL_ptimer calls in them. HPL_ptimer_enable()  will enable
+ * the  timer  functionality.  One  can retrieve  the current value of a
+ * timer by calling
+ *  
+ * t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ *  
+ * where  I  is the timer index in  [0..64).  To  inititialize the timer
+ * functionality, one must have called HPL_ptimer_boot() prior to any of
+ * the functions mentioned above.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                const int
+ *         On entry, I specifies the timer to stop/start.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( HPL_ptimer_disabled ) return;
+/*
+ * If timer has not been started, start it.  Otherwise,  stop it and add
+ * interval to count
+ */
+   if( HPL_ptimer_wallstart[I] == HPL_PTIMER_STARTFLAG )
+   {
+      HPL_ptimer_wallstart[I] = HPL_ptimer_walltime();
+      HPL_ptimer_cpustart [I] = HPL_ptimer_cputime ();
+   }
+   else
+   {
+      HPL_ptimer_cpusec   [I] += HPL_ptimer_cputime ()-HPL_ptimer_cpustart [I];
+      HPL_ptimer_wallsec  [I] += HPL_ptimer_walltime()-HPL_ptimer_wallstart[I];
+      HPL_ptimer_wallstart[I]  = HPL_PTIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_ptimer
+ */
+} 
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_enable( void )
+#else
+void HPL_ptimer_enable()
+#endif
+{
+/*
+ * HPL_ptimer_enable sets it so calls to HPL_ptimer are not ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 0;
+   return;
+/*
+ * End of HPL_ptimer_enable
+ */
+} 
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_disable( void )
+#else
+void HPL_ptimer_disable()
+#endif
+{
+/*
+ * HPL_ptimer_disable sets it so calls to HPL_ptimer are ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 1;
+   return;
+/*
+ * End of HPL_ptimer_disable
+ */
+} 
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_inquire
+(
+   const HPL_T_PTIME          TMTYPE,
+   const int                  I
+)
+#else
+double HPL_ptimer_inquire( TMTYPE, I )
+   const int                  I;
+   const HPL_T_PTIME          TMTYPE;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_inquire returns wall- or cpu- time that has accumulated in
+ * timer I.
+ *
+ * Arguments
+ * =========
+ *
+ * TMTYPE  (global input)              const HPL_T_PTIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_PTIME : wall clock time is returned,
+ *            = HPL_CPU_PTIME  : CPU time is returned (default).
+ *
+ * I       (global input)              const int
+ *         On entry, I specifies the timer to return.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double          time;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * If wall- or cpu-time are not available on this machine, return
+ * HPL_PTIMER_ERROR
+ */
+   if( TMTYPE == HPL_WALL_PTIME )
+   {
+      if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR )
+         time = HPL_PTIMER_ERROR;
+      else
+         time = HPL_ptimer_wallsec[I];
+   }
+   else
+   {
+      if( HPL_ptimer_cputime()  == HPL_PTIMER_ERROR )
+         time = HPL_PTIMER_ERROR;
+      else
+         time = HPL_ptimer_cpusec [I];
+   }
+   return( time );
+/*
+ * End of HPL_ptimer_inquire
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_combine
+(
+   MPI_Comm                   COMM,
+   const HPL_T_PTIME_OP       OPE,
+   const HPL_T_PTIME          TMTYPE,
+   const int                  N,
+   const int                  IBEG,
+   double                     * TIMES
+)
+#else
+void HPL_ptimer_combine( COMM, OPE, TMTYPE, N, IBEG, TIMES )
+   const int                  IBEG, N;
+   const HPL_T_PTIME_OP       OPE;
+   const HPL_T_PTIME          TMTYPE;
+   MPI_Comm                   COMM;
+   double                     * TIMES;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_combine  combines the timing information stored on a scope
+ * of processes into the user TIMES array.
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)        MPI_Comm
+ *         The MPI communicator  identifying  the process  collection on
+ *         which the timings are taken.
+ *
+ * OPE     (global input)              const HPL_T_PTIME_OP
+ *         On entry, OP  specifies what combine operation should be done
+ *         as follows:
+ *            = HPL_AMAX_PTIME get max. time on any process (default),
+ *            = HPL_AMIN_PTIME get min. time on any process,
+ *            = HPL_SUM_PTIME  get sum of times across processes.
+ *
+ * TMTYPE  (global input)              const HPL_T_PTIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_PTIME : wall clock time is returned,
+ *            = HPL_CPU_PTIME  : CPU time is returned (default).
+ *
+ * N       (global input)              const int
+ *         On entry, N specifies the number of timers to combine.
+ *
+ * IBEG    (global input)              const int
+ *         On entry, IBEG specifies the first timer to be combined.
+ *
+ * TIMES   (global output)             double *
+ *         On entry, TIMES is an array of dimension at least N. On exit,
+ *         this array contains the requested timing information.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i, tmpdis;
+/* ..
+ * .. Executable Statements ..
+ */
+   tmpdis = HPL_ptimer_disabled; HPL_ptimer_disabled = 1;
+/*
+ * Timer has been disabled for combine operation -  copy timing informa-
+ * tion into user times array.  If  wall- or  cpu-time are not available
+ * on this machine, fill in times with HPL_PTIMER_ERROR flag and return.
+ */
+   if( TMTYPE == HPL_WALL_PTIME )
+   {
+      if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR )
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return;   }
+      else
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_wallsec[IBEG+i]; }
+   }
+   else
+   {
+      if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR )
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return;  }
+      else
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_cpusec[IBEG+i]; }
+   }
+/*
+ * Combine all nodes information, restore HPL_ptimer_disabled, and return
+ */
+   for( i = 0; i < N; i++ ) TIMES[i] = Mmax( HPL_rzero, TIMES[i] );
+
+   if(      OPE == HPL_AMAX_PTIME )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM );
+   else if( OPE == HPL_AMIN_PTIME )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_min, COMM );
+   else if( OPE == HPL_SUM_PTIME  )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_sum, COMM );
+   else
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM );
+
+   HPL_ptimer_disabled = tmpdis;
+/*
+ * End of HPL_ptimer_combine
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c
new file mode 100644
index 000000000..711ef185d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c
@@ -0,0 +1,146 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_cputime returns the cpu time. If HPL_USE_CLOCK is defined,
+ * the  clock() function is used to return an approximation of processor
+ * time used by the program.  The value returned is the CPU time used so
+ * far as a clock_t;  to get the number of seconds used,  the result  is
+ * divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+ * standard library.  If  HPL_USE_TIMES is defined, the times() function
+ * is used instead.  This  function  returns  the current process times.
+ * times() returns the number of clock ticks that have elapsed since the
+ * system has been up.  Otherwise and by default,  the  standard library
+ * function getrusage() is used.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#if   defined( HPL_USE_CLOCK )
+ 
+#include <time.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   static double              cps = CLOCKS_PER_SEC;
+   double                     d;
+   clock_t                    t1;
+   static clock_t             t0 = 0;
+ 
+   if( t0 == 0 ) t0 = clock();
+   t1 = clock() - t0;
+   d = (double)(t1) / cps;
+   return( d );
+}
+ 
+#elif defined( HPL_USE_TIMES )
+ 
+#include <sys/times.h>
+#include <unistd.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   clock_t                    t1;
+   struct tms                 ts;
+   static double              ClockTick = HPL_rzero;
+ 
+   if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK));
+   (void) times( &ts );
+   return( (double)(ts.tms_utime) / ClockTick );
+}
+ 
+/* #elif defined( HPL_USE_GETRUSAGE ) */
+#else
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   struct rusage              ruse;
+ 
+   (void) getrusage( RUSAGE_SELF, &ruse );
+   return( (double)( ruse.ru_utime.tv_sec  ) +
+           ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) );
+}
+
+/* 
+#else
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   return( HPL_PTIMER_ERROR );
+}
+*/ 
+
+#endif
+/*
+ * End of HPL_ptimer_cputime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c
new file mode 100644
index 000000000..96cbd300f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c
@@ -0,0 +1,103 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_walltime returns the elapsed (wall-clock) time.
+ * 
+ *
+ * ---------------------------------------------------------------------
+ */ 
+ 
+#if defined( HPL_USE_GETTIMEOFDAY )
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_walltime( void )
+#else
+double HPL_ptimer_walltime()
+#endif
+{
+   struct timeval             tp;
+   static long                start=0, startu;
+ 
+   if( !start )
+   {
+      (void) gettimeofday( &tp, NULL );
+      start  = tp.tv_sec;
+      startu = tp.tv_usec;
+      return( HPL_rzero );
+   }
+   (void) gettimeofday( &tp, NULL );
+ 
+   return( (double)( tp.tv_sec - start ) +
+           ( (double)( tp.tv_usec-startu ) / 1000000.0 ) );
+}
+
+#else
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_walltime( void )
+#else
+double HPL_ptimer_walltime()
+#endif
+{
+   return( MPI_Wtime() );
+}
+ 
+#endif
+/*
+ * End of HPL_ptimer_walltime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer.o
new file mode 100644
index 000000000..da32dc1af
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer_cputime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer_cputime.o
new file mode 100644
index 000000000..0ed678ecb
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer_cputime.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer_walltime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer_walltime.o
new file mode 100644
index 000000000..b00e05dc8
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer_walltime.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/Make.inc
new file mode 120000
index 000000000..8547ec814
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/Make.inc
@@ -0,0 +1 @@
+/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/Makefile
new file mode 100644
index 000000000..971500764
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/Makefile
@@ -0,0 +1,84 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_ptimer.h
+#
+## Object files ########################################################
+#
+HPL_ptiobj       = \
+   HPL_ptimer.o           HPL_ptimer_cputime.o   HPL_ptimer_walltime.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_ptiobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_ptiobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_ptimer.o           : ../HPL_ptimer.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer.c
+HPL_ptimer_cputime.o   : ../HPL_ptimer_cputime.c   $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer_cputime.c
+HPL_ptimer_walltime.o  : ../HPL_ptimer_walltime.c  $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer_walltime.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer.c
new file mode 100644
index 000000000..3be9665f7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer.c
@@ -0,0 +1,253 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int                    HPL_timer_disabled;
+static double                 HPL_timer_cpusec   [HPL_NTIMER],
+                              HPL_timer_cpustart [HPL_NTIMER],
+                              HPL_timer_wallsec  [HPL_NTIMER],
+                              HPL_timer_wallstart[HPL_NTIMER];
+/*
+ * ---------------------------------------------------------------------
+ * User callable functions
+ * ---------------------------------------------------------------------
+ */
+#ifdef STDC_HEADERS
+void HPL_timer_boot( void )
+#else
+void HPL_timer_boot()
+#endif
+{
+/*
+ * HPL_timer_boot (re)sets all timers to 0, and enables HPL_timer.
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 0;
+
+   for( i = 0; i < HPL_NTIMER; i++ )
+   {
+      HPL_timer_cpusec  [i] = HPL_timer_wallsec  [i] = HPL_rzero;
+      HPL_timer_cpustart[i] = HPL_timer_wallstart[i] = HPL_TIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_timer_boot
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer( const int I )
+#else
+void HPL_timer( I )
+   const int                  I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer provides a  "stopwatch"  functionality  cpu/wall  timer  in
+ * seconds.  Up to  64  separate timers can be functioning at once.  The
+ * first call starts the timer,  and the second stops it.  This  routine
+ * can be disenabled  by calling  HPL_timer_disable(),  so that calls to
+ * the timer are ignored.  This feature can be used to make sure certain
+ * sections of code do not affect timings,  even  if  they call routines
+ * which have HPL_timer calls in them. HPL_timer_enable() will re-enable
+ * the  timer  functionality.  One  can retrieve  the current value of a
+ * timer by calling
+ *  
+ * t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ *  
+ * where  I  is the timer index in  [0..64).  To  initialize  the  timer
+ * functionality, one must have called HPL_timer_boot()  prior to any of
+ * the functions mentioned above.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                const int
+ *         On entry, I specifies the timer to stop/start.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( HPL_timer_disabled ) return;
+/*
+ * If timer has not been started, start it.  Otherwise,  stop it and add
+ * interval to count
+ */
+   if( HPL_timer_wallstart[I] == HPL_TIMER_STARTFLAG )
+   {
+      HPL_timer_wallstart[I] = HPL_timer_walltime();
+      HPL_timer_cpustart [I] = HPL_timer_cputime ();
+   }
+   else
+   {
+      HPL_timer_cpusec   [I] += HPL_timer_cputime () - HPL_timer_cpustart [I];
+      HPL_timer_wallsec  [I] += HPL_timer_walltime() - HPL_timer_wallstart[I];
+      HPL_timer_wallstart[I]  = HPL_TIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_timer
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer_enable( void )
+#else
+void HPL_timer_enable()
+#endif
+{
+/*
+ * HPL_timer_enable sets it so calls to HPL_timer are not ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 0;
+   return;
+/*
+ * End of HPL_timer_enable
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer_disable( void )
+#else
+void HPL_timer_disable()
+#endif
+{
+/*
+ * HPL_timer_disable sets it so calls to HPL_timer are ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 1;
+   return;
+/*
+ * End of HPL_timer_disable
+ */
+}
+
+#ifdef STDC_HEADERS
+double HPL_timer_inquire
+(
+   const HPL_T_TIME           TMTYPE,
+   const int                  I
+)
+#else
+double HPL_timer_inquire( TMTYPE, I )
+   const int                  I;
+   const HPL_T_TIME           TMTYPE;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_timer_inquire returns  wall- or cpu- time that has accumulated in
+ * timer I.
+ *
+ * Arguments
+ * =========
+ *
+ * TMTYPE  (global input)              const HPL_T_TIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_TIME : wall clock time is returned,
+ *            = HPL_CPU_TIME  : CPU time is returned (default).
+ *
+ * I       (global input)              const int
+ *         On entry, I specifies the timer to return.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double          time;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * If wall- or cpu-time are not available on this machine, return
+ * HPL_TIMER_ERROR
+ */
+   if( TMTYPE == HPL_WALL_TIME )
+   {
+      if( HPL_timer_walltime() == HPL_TIMER_ERROR )
+         time = HPL_TIMER_ERROR;
+      else
+         time = HPL_timer_wallsec[I];
+   }
+   else
+   {
+      if( HPL_timer_cputime()  == HPL_TIMER_ERROR )
+         time = HPL_TIMER_ERROR;
+      else
+         time = HPL_timer_cpusec [I];
+   }
+   return( time );
+/*
+ * End of HPL_timer_inquire
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_cputime.c
new file mode 100644
index 000000000..4a7f9dfef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_cputime.c
@@ -0,0 +1,145 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer_cputime returns the cpu time.  If HPL_USE_CLOCK is defined,
+ * the  clock() function is used to return an approximation of processor
+ * time used by the program.  The value returned is the CPU time used so
+ * far as a clock_t;  to get the number of seconds used,  the result  is
+ * divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+ * standard library.  If  HPL_USE_TIMES is defined, the times() function
+ * is used instead.  This  function  returns  the current process times.
+ * times() returns the number of clock ticks that have elapsed since the
+ * system has been up.  Otherwise and by default,  the  standard library
+ * function getrusage() is used.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#if   defined( HPL_USE_CLOCK )
+ 
+#include <time.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   static double              cps = CLOCKS_PER_SEC;
+   double                     d;
+   clock_t                    t1;
+   static clock_t             t0 = 0;
+ 
+   if( t0 == 0 ) t0 = clock();
+   t1 = clock() - t0;
+   d = (double)(t1) / cps;
+   return( d );
+} 
+
+#elif defined( HPL_USE_TIMES )
+ 
+#include <sys/times.h>
+#include <unistd.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   clock_t                    t1;
+   struct tms                 ts;
+   static double              ClockTick = HPL_rzero;
+ 
+   if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK));
+   (void) times( &ts );
+   return( (double)(ts.tms_utime) / ClockTick );
+}
+ 
+/* #elif defined( HPL_USE_GETRUSAGE )  */
+#else
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   struct rusage              ruse;
+   (void) getrusage( RUSAGE_SELF, &ruse );
+   return( (double)( ruse.ru_utime.tv_sec  ) +
+           ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) );
+}
+
+/* 
+#else
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   return( HPL_TIMER_ERROR );
+}
+*/
+
+#endif
+/*
+ * End of HPL_timer_cputime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_walltime.c
new file mode 100644
index 000000000..f4f44f202
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_walltime.c
@@ -0,0 +1,88 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer_walltime returns the elapsed (wall-clock) time.
+ * 
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_walltime( void )
+#else
+double HPL_timer_walltime()
+#endif
+{
+   struct timeval             tp;
+   static long                start=0, startu;
+
+   if( !start )
+   {
+      (void) gettimeofday( &tp, NULL );
+      start  = tp.tv_sec;
+      startu = tp.tv_usec;
+      return( HPL_rzero );
+   }
+   (void) gettimeofday( &tp, NULL );
+
+   return( (double)( tp.tv_sec - start ) +
+           ( (double)( tp.tv_usec-startu ) / 1000000.0 ) );
+}                                                                               
+/*
+ * End of HPL_timer_walltime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer.o
new file mode 100644
index 000000000..874d5ee26
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer_cputime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer_cputime.o
new file mode 100644
index 000000000..3b221b80d
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer_cputime.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer_walltime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer_walltime.o
new file mode 100644
index 000000000..4ec1ce1b4
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer_walltime.o differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/Make.inc
new file mode 120000
index 000000000..8547ec814
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/Make.inc
@@ -0,0 +1 @@
+/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/Makefile
new file mode 100644
index 000000000..b8009e88a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/Makefile
@@ -0,0 +1,84 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_timer.h
+#
+## Object files ########################################################
+#
+HPL_timobj       = \
+   HPL_timer.o            HPL_timer_cputime.o    HPL_timer_walltime.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_timobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_timobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_timer.o            : ../HPL_timer.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer.c
+HPL_timer_cputime.o    : ../HPL_timer_cputime.c    $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer_cputime.c
+HPL_timer_walltime.o   : ../HPL_timer_walltime.c   $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer_walltime.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/1rinM.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/1rinM.jpg
new file mode 100755
index 000000000..9af78f844
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/1rinM.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/1ring.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/1ring.jpg
new file mode 100755
index 000000000..73e4391cf
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/1ring.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2-273x48.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2-273x48.jpg
new file mode 100755
index 000000000..23795f8b9
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2-273x48.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2rinM.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2rinM.jpg
new file mode 100755
index 000000000..c294e0d07
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2rinM.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2ring.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2ring.jpg
new file mode 100755
index 000000000..f37187f13
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2ring.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_abort.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_abort.html
new file mode 100755
index 000000000..49a4bd318
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_abort.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_abort HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_abort</B> halts execution.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_abort(</CODE>
+<CODE>int</CODE>
+<CODE>LINE</CODE>,
+<CODE>const char *</CODE>
+<CODE>SRNAME</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_abort</B>
+displays an error message on stderr and halts execution.
+
+<H1>Arguments</H1>
+<PRE>
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+</PRE>
+<PRE>
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   HPL_abort( __LINE__, __FILE__, "Halt.\n" );
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>,
+<A HREF="HPL_warn.html">HPL_warn</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_all_reduce.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_all_reduce.html
new file mode 100755
index 000000000..591cdd596
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_all_reduce.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_all_reduce HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_all_reduce</B> All reduce operation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_all_reduce(</CODE>
+<CODE>void *</CODE>
+<CODE>BUFFER</CODE>,
+<CODE>const int</CODE>
+<CODE>COUNT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>,
+<CODE>const HPL_T_OP </CODE>
+<CODE>OP</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_all_reduce</B>
+performs   a   global   reduce  operation  across  all
+processes of a group leaving the results on all processes.
+
+<H1>Arguments</H1>
+<PRE>
+BUFFER  (local input/global output)   void *
+        On entry,  BUFFER  points to  the  buffer to be combined.  On
+        exit, this array contains the combined data and  is identical
+        on all processes in the group.
+</PRE>
+<PRE>
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+</PRE>
+<PRE>
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+<PRE>
+OP      (global input)                const HPL_T_OP 
+        On entry, OP is a pointer to the local combine function.
+</PRE>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_barrier.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_barrier.html
new file mode 100755
index 000000000..86ae426ad
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_barrier.html
@@ -0,0 +1,41 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_barrier HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_barrier</B> Barrier operation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_barrier(</CODE>
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_barrier</B>
+blocks the caller until all process members have call it.
+The  call  returns  at any process  only after all group members have
+entered the call.
+
+<H1>Arguments</H1>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_bcast.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_bcast.html
new file mode 100755
index 000000000..079325ed7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_bcast.html
@@ -0,0 +1,46 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_bcast HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_bcast</B> Perform the row broadcast.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_bcast(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_bcast</B>
+broadcasts  the  current  panel.  Successful  completion is
+indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to
+HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was
+not completed, in which case this function should be called again.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+<PRE>
+IFLAG   (output)                      int *
+        On exit,  IFLAG  indicates  whether  or not the broadcast has
+        occured.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_binit.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_binit.html
new file mode 100755
index 000000000..0f9a9e1ae
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_binit.html
@@ -0,0 +1,37 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_binit HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_binit</B> Initialize the row broadcast.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_binit(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_binit</B>
+initializes  a  row  broadcast.  Successful  completion  is
+indicated by the returned error code HPL_SUCCESS.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_broadcast.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_broadcast.html
new file mode 100755
index 000000000..6e24b2c2b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_broadcast.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_broadcast HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_broadcast</B> Broadcast operation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_broadcast(</CODE>
+<CODE>void *</CODE>
+<CODE>BUFFER</CODE>,
+<CODE>const int</CODE>
+<CODE>COUNT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>,
+<CODE>const int</CODE>
+<CODE>ROOT</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_broadcast</B>
+broadcasts  a message from the process with rank ROOT to
+all processes in the group.
+
+<H1>Arguments</H1>
+<PRE>
+BUFFER  (local input/output)          void *
+        On entry,  BUFFER  points to  the  buffer to be broadcast. On
+        exit, this array contains the broadcast data and is identical
+        on all processes in the group.
+</PRE>
+<PRE>
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+</PRE>
+<PRE>
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+<PRE>
+ROOT    (global input)                const int
+        On entry, ROOT is the coordinate of the source process.
+</PRE>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_bwait.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_bwait.html
new file mode 100755
index 000000000..f1dd51e7b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_bwait.html
@@ -0,0 +1,38 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_bwait HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_bwait</B> Finalize the row broadcast.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_bwait(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_bwait</B>
+HPL_bwait waits  for  the  row  broadcast  of  the current  panel  to
+terminate.  Successful completion is indicated by the returned  error
+code HPL_SUCCESS.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_copyL.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_copyL.html
new file mode 100755
index 000000000..4b98963ac
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_copyL.html
@@ -0,0 +1,42 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_copyL HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_copyL</B> Copy the current panel into a contiguous workspace.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_copyL(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_copyL</B>
+copies  the  panel of columns, the L1 replicated submatrix,
+the pivot array  and  the info scalar into a contiguous workspace for
+later broadcast.
+ 
+The copy of this panel  into  a contiguous buffer  can be enforced by
+specifying -DHPL_COPY_L in the architecture specific Makefile.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_daxpy.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_daxpy.html
new file mode 100755
index 000000000..c34d0b2e8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_daxpy.html
@@ -0,0 +1,89 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_daxpy HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_daxpy</B> y := y + alpha * x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_daxpy(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_daxpy</B>
+scales the vector x by alpha and adds it to y.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero, then the entries of the incremented array X
+        need not be set on input.
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the scaled entries of the incremented array X.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_daxpy( 3, 2.0, x, 1, y, 1 );
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dcopy.html">HPL_dcopy</A>,
+<A HREF="HPL_dscal.html">HPL_dscal</A>,
+<A HREF="HPL_dswap.html">HPL_dswap</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dcopy.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dcopy.html
new file mode 100755
index 000000000..2a4a485b5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dcopy.html
@@ -0,0 +1,81 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dcopy HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dcopy</B> y := x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dcopy(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dcopy</B>
+copies the vector x into the vector y.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the entries of the incremented array X.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_dcopy( 3, x, 1, y, 1 );
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_daxpy.html">HPL_daxpy</A>,
+<A HREF="HPL_dscal.html">HPL_dscal</A>,
+<A HREF="HPL_dswap.html">HPL_dswap</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dgemm.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dgemm.html
new file mode 100755
index 000000000..667c0ff01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dgemm.html
@@ -0,0 +1,178 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dgemm HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dgemm</B> C := alpha * op(A) * op(B) + beta * C.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dgemm(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANSA</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANSB</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>K</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>B</CODE>,
+<CODE>const int</CODE>
+<CODE>LDB</CODE>,
+<CODE>const double</CODE>
+<CODE>BETA</CODE>,
+<CODE>double *</CODE>
+<CODE>C</CODE>,
+<CODE>const int</CODE>
+<CODE>LDC</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dgemm</B>
+performs one of the matrix-matrix operations
+ 
+    C := alpha * op( A ) * op( B ) + beta * C
+ 
+ where op( X ) is one of
+ 
+    op( X ) = X   or   op( X ) = X^T.
+ 
+Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
+an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+TRANSA  (local input)                 const enum HPL_TRANS
+        On entry, TRANSA  specifies the form of  op(A)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSA==HplNoTrans    : op( A ) = A,                     
+           TRANSA==HplTrans      : op( A ) = A^T,                   
+           TRANSA==HplConjTrans  : op( A ) = A^T.                   
+</PRE>
+<PRE>
+TRANSB  (local input)                 const enum HPL_TRANS
+        On entry, TRANSB  specifies the form of  op(B)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSB==HplNoTrans    : op( B ) = B,                     
+           TRANSB==HplTrans      : op( B ) = B^T,                   
+           TRANSB==HplConjTrans  : op( B ) = B^T.                   
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the  number  of rows  of the  matrix
+        op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the number  of columns of the matrix
+        op(B)  and  the number of columns of the matrix  C. N must be
+        at least zero.
+</PRE>
+<PRE>
+K       (local input)                 const int
+        On entry,  K  specifies  the  number of columns of the matrix
+        op(A) and the number of rows of the matrix op(B).  K  must be
+        be at least  zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied  as  zero  then the elements of the matrices A and B
+        need not be set on input.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  is an array of dimension (LDA,ka),  where ka is
+        k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+        entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+        the array  A must contain the matrix A, otherwise the leading
+        k  by  m  part of the array  A  must  contain the  matrix  A.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA  specifies the first dimension of A as declared
+        in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+        LDA must be at least max(1,m), otherwise LDA must be at least
+        max(1,k).
+</PRE>
+<PRE>
+B       (local input)                 const double *
+        On entry, B is an array of dimension (LDB,kb),  where  kb  is
+        n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+        entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+        the array  B must contain the matrix B, otherwise the leading
+        n  by  k  part of the array  B  must  contain  the matrix  B.
+</PRE>
+<PRE>
+LDB     (local input)                 const int
+        On entry, LDB  specifies the first dimension of B as declared
+        in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+        LDB must be at least max(1,k), otherwise LDB must be at least
+        max(1,n).
+</PRE>
+<PRE>
+BETA    (local input)                 const double
+        On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+        supplied  as  zero  then  the  elements of the matrix C  need
+        not be set on input.
+</PRE>
+<PRE>
+C       (local input/output)          double *
+        On entry,  C  is an array of dimension (LDC,n). Before entry,
+        the  leading m by n part  of  the  array  C  must contain the
+        matrix C,  except when beta is zero, in which case C need not
+        be set on entry. On exit, the array  C  is overwritten by the
+        m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+</PRE>
+<PRE>
+LDC     (local input)                 const int
+        On entry, LDC  specifies the first dimension of C as declared
+        in  the   calling  (sub)  program.   LDC  must  be  at  least
+        max(1,m).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2], c[2*2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+   c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0;
+   HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans,
+              2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 );
+   printf("  [%f,%f]\n", c[0], c[2]);
+   printf("c=[%f,%f]\n", c[1], c[3]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dtrsm.html">HPL_dtrsm</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dgemv.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dgemv.html
new file mode 100755
index 000000000..d5921a9b2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dgemv.html
@@ -0,0 +1,146 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dgemv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dgemv</B> y := beta * y + alpha * op(A) * x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dgemv(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANS</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>const double</CODE>
+<CODE>BETA</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dgemv</B>
+performs one of the matrix-vector operations
+ 
+    y := alpha * op( A ) * x + beta * y,
+ 
+ where op( X ) is one of
+ 
+    op( X ) = X   or   op( X ) = X^T.
+ 
+where alpha and beta are scalars, x and y are vectors and  A  is an m
+by n matrix.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry,  TRANS  specifies the  operation to be performed as
+        follows:   
+           TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+           TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of  the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero then  A and X  need not be set on input.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n.  Before  entry, the leading m by n part  of the
+        array  A  must contain the matrix coefficients.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m).
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+BETA    (local input)                 const double
+        On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+        supplied as zero then  Y  need not be set on input.
+</PRE>
+<PRE>
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        Before entry with BETA non-zero, the incremented array Y must
+        contain the vector  y.  On exit,  Y  is  overwritten  by  the
+        updated vector y.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2], y[2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+   HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0,
+              a, 2, x, 1, -1.0, y, 1 );
+   printf("y=[%f,%f]\n", y[0], y[1]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dger.html">HPL_dger</A>,
+<A HREF="HPL_dtrsv.html">HPL_dtrsv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dger.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dger.html
new file mode 100755
index 000000000..e4ea948ed
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dger.html
@@ -0,0 +1,124 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dger HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dger</B> A := alpha * x * y^T + A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dger(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dger</B>
+performs the rank 1 operation
+ 
+    A := alpha * x * y^T + A,
+ 
+where alpha is a scalar,  x is an m-element vector, y is an n-element
+vector and A is an m by n matrix.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of  the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero then  X and Y  need not be set on input.
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+Y       (local input)                 double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n.  Before  entry, the leading m by n part  of the
+        array  A  must contain the matrix coefficients. On exit, A is
+        overwritten by the updated matrix.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2], y[2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+   HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1,
+             a, 2 );
+   printf("y=[%f,%f]\n", y[0], y[1]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dgemv.html">HPL_dgemv</A>,
+<A HREF="HPL_dtrsv.html">HPL_dtrsv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlacpy.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlacpy.html
new file mode 100755
index 000000000..b64d34e0c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlacpy.html
@@ -0,0 +1,84 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlacpy HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlacpy</B> B := A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlacpy(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>B</CODE>,
+<CODE>const int</CODE>
+<CODE>LDB</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlacpy</B>
+copies an array A into an array B.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the number of rows of the arrays A and
+        B. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies  the number of columns of the arrays A
+        and B. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,N).
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+B       (local output)                double *
+        On entry, B points to an array of dimension (LDB,N). On exit,
+        B is overwritten with A.
+</PRE>
+<PRE>
+LDB     (local input)                 const int
+        On entry, LDB specifies the leading dimension of the array B.
+        LDB must be at least MAX(1,M).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0);
+   return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlatcpy.html">HPL_dlatcpy</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlamch.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlamch.html
new file mode 100755
index 000000000..cb87a90ba
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlamch.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlamch HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlamch</B> determines machine-specific arithmetic constants.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_dlamch(</CODE>
+<CODE>const HPL_T_MACH</CODE>
+<CODE>CMACH</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlamch</B>
+determines  machine-specific  arithmetic constants such as
+the relative machine precision  (eps),  the safe minimum (sfmin) such
+that 1 / sfmin does not overflow, the base of the machine (base), the
+precision (prec), the  number of (base) digits  in the  mantissa (t),
+whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise),  the
+minimum exponent before  (gradual)  underflow (emin),  the  underflow
+threshold (rmin) base**(emin-1), the largest exponent before overflow
+(emax), the overflow threshold (rmax) (base**emax)*(1-eps).
+
+<H1>Arguments</H1>
+<PRE>
+CMACH   (local input)                 const HPL_T_MACH
+        Specifies the value to be returned by HPL_dlamch             
+           = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+           = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+           = HPL_MACH_BASE,  HPL_dlamch := base                      
+           = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+           = HPL_MACH_MLEN,  HPL_dlamch := t                         
+           = HPL_MACH_RND,   HPL_dlamch := rnd                       
+           = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+           = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+           = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+           = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+         
+        where                                                        
+         
+           eps   = relative machine precision,                       
+           sfmin = safe minimum,                                     
+           base  = base of the machine,                              
+           prec  = eps*base,                                         
+           t     = number of digits in the mantissa,                 
+           rnd   = 1.0 if rounding occurs in addition,               
+           emin  = minimum exponent before underflow,                
+           rmin  = underflow threshold,                              
+           emax  = largest exponent before overflow,                 
+           rmax  = overflow threshold.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double eps;
+   eps = HPL_dlamch( HPL_MACH_EPS );
+   printf("eps=%18.8e\n", eps);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>References</H1>
+This function has been manually translated from the Fortran 77 LAPACK
+auxiliary function dlamch.f  (version 2.0 -- 1992), that  was  itself
+based on the function ENVRON  by Malcolm and incorporated suggestions
+by Gentleman and Marovich. See                                       
+ 
+Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).                 
+ 
+Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+properties of  floating point arithmetic units.,  Comms. of  the ACM,
+17, 276-277 (1974).
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlange.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlange.html
new file mode 100755
index 000000000..ce276e257
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlange.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlange HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlange</B> Compute ||A||.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_dlange(</CODE>
+<CODE>const HPL_T_NORM</CODE>
+<CODE>NORM</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlange</B>
+returns  the value of the one norm,  or the infinity norm,
+or the element of largest absolute value of a matrix A:              
+ 
+   max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+   norm1(A),        when NORM = HPL_NORM_1,                          
+   normI(A),        when NORM = HPL_NORM_I,                          
+ 
+where norm1 denotes the one norm of a matrix (maximum column sum) and
+normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+that max(abs(A(i,j))) is not a matrix norm.
+
+<H1>Arguments</H1>
+<PRE>
+NORM    (local input)                 const HPL_T_NORM
+        On entry,  NORM  specifies  the  value to be returned by this
+        function as described above.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points to an  array of dimension  (LDA,N), that
+        contains the matrix A.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 );
+   printf("norm=%f\n", norm);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaprnt.html">HPL_dlaprnt</A>,
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaprnt.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaprnt.html
new file mode 100755
index 000000000..f589ee2bb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaprnt.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaprnt HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaprnt</B> Print the matrix A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaprnt(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>IA</CODE>,
+<CODE>const int</CODE>
+<CODE>JA</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const char *</CODE>
+<CODE>CMATNM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaprnt</B>
+prints to standard error an M-by-N matrix A.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies the number of rows of A. M must be at
+        least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies the number of columns of A. N must be
+        at least zero.
+</PRE>
+<PRE>
+A       (local input)                 double *
+        On entry, A  points to an array of dimension (LDA,N).
+</PRE>
+<PRE>
+IA      (local input)                 const int
+        On entry, IA specifies the starting row index to be printed.
+</PRE>
+<PRE>
+JA      (local input)                 const int
+        On entry,  JA  specifies  the  starting  column index  to be
+        printed.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+</PRE>
+<PRE>
+CMATNM  (local input)                 const char *
+        On entry, CMATNM is the name of the matrix to be printed.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" );
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp00N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp00N.html
new file mode 100755
index 000000000..8e36cf6c6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp00N.html
@@ -0,0 +1,78 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp00N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp00N</B> performs a series of row interchanges.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp00N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPIV</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp00N</B>
+performs a series of local row interchanges on a matrix
+A. One row interchange is initiated for rows 0 through M-1 of A.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M specifies the number of rows of the array A to be
+        interchanged. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies  the number of columns of the array A.
+        N must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry, A  points to an array of dimension (LDA,N) to which
+        the row interchanges will be  applied.  On exit, the permuted
+        matrix.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+IPIV    (local input)                 const int *
+        On entry,  IPIV  is  an  array of size  M  that  contains the
+        pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+        implies that local rows k and l are to be interchanged.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp01N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp01N.html
new file mode 100755
index 000000000..aa8861d10
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp01N.html
@@ -0,0 +1,109 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp01N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp01N</B> copies rows of A into itself and into U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp01N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp01N</B>
+copies  scattered rows  of  A  into itself  and into an
+array  U.  The row offsets in  A  of the source rows are specified by
+LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+positive value of  LINDXAU indicates that the array destination is U,
+and A otherwise.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        moved within A or copied into U. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        moved within A or copied into U. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be moved within A or
+        copied into U.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,N). The rows
+        of A specified by LINDXA are be copied within this array U at
+        the positions indicated by positive values of LINDXAU.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local  row indexes  of  A  that should be moved within  A  or
+        or copied into U.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local  row indexes of  U  where the rows of  A  should be
+        copied at. This array also contains the  local row offsets in
+        A where some of the rows of A should be moved to.  A positive
+        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+        should be copied into U at the position LINDXAU[i]; otherwise
+        the row  LINDXA[i]  of  A  should be moved  at  the  position
+        -LINDXAU[i] within A.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp01T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp01T.html
new file mode 100755
index 000000000..9697471c5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp01T.html
@@ -0,0 +1,110 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp01T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp01T</B> copies rows of A into itself and into U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp01T(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp01T</B>
+copies  scattered rows  of  A  into itself  and into an
+array U.  The row offsets in  A  of the source rows  are specified by
+LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+positive value of LINDXAU indicates that the array  destination is U,
+and A otherwise. Rows of A are stored as columns in U.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        moved within A or copied into U. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        moved within A or copied into U. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be moved within A or
+        copied into U.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,M). The rows
+        of A specified by  LINDXA  are copied within this array  U at
+        the  positions indicated by positive values of LINDXAU.  The
+        rows of A are stored as columns in U.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local  row indexes  of  A  that should be moved within  A  or
+        or copied into U.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local  row indexes of  U  where the rows of  A  should be
+        copied at. This array also contains the  local row offsets in
+        A where some of the rows of A should be moved to.  A positive
+        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+        should be copied into U at the position LINDXAU[i]; otherwise
+        the row  LINDXA[i]  of  A  should be moved  at  the  position
+        -LINDXAU[i] within A.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp02N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp02N.html
new file mode 100755
index 000000000..d4e1a0cf8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp02N.html
@@ -0,0 +1,107 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp02N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp02N</B> pack rows of A into columns of W.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp02N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>W0</CODE>,
+<CODE>double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp02N</B>
+packs scattered rows of an array  A  into workspace  W.
+The row offsets in A are specified by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        copied into W. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        copied into W. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be copied into W.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+W0      (local input/output)          double *
+        On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local output)                double *
+        On entry, W  is an array of size (LDW,M). On exit, W contains
+        the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+        in W(:,i).
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied into W.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M  that  contains
+        the local  row indexes of  U that should be copied into A and
+        replaced by the rows of W.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp03N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp03N.html
new file mode 100755
index 000000000..f5c4127b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp03N.html
@@ -0,0 +1,95 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp03N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp03N</B> copy rows of W into U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp03N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const double *</CODE>
+<CODE>W0</CODE>,
+<CODE>const double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp03N</B>
+copies columns of  W  into  rows  of an  array  U.  The
+destination in U of these columns contained in W is stored within W0.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies  the  number  of columns of  W  stored
+        contiguously that should be copied into U. M must be at least
+        zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the  length of columns of  W  stored
+        contiguously that should be copied into U. N must be at least
+        zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,N).  Columns
+        of W are copied as rows within this array U at  the positions
+        specified in W0.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+</PRE>
+<PRE>
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M),  that contains data
+        to be copied into U. For i in [0..M),  entries W(:,i)  should
+        be copied into the row or column W0(i*LDW) of U.
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp03T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp03T.html
new file mode 100755
index 000000000..010175313
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp03T.html
@@ -0,0 +1,95 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp03T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp03T</B> copy columns of W into U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp03T(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const double *</CODE>
+<CODE>W0</CODE>,
+<CODE>const double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp03T</B>
+copies  columns of W into an array U.  The  destination
+in U of these columns contained in W is stored within W0.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies  the  number  of columns of  W  stored
+        contiguously that should be copied into U. M must be at least
+        zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the  length of columns of  W  stored
+        contiguously that should be copied into U. N must be at least
+        zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,M).  Columns
+        of W are copied within the array U at the positions specified
+        in W0.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M),  that contains data
+        to be copied into U. For i in [0..M),  entries W(:,i)  should
+        be copied into the row or column W0(i*LDW) of U.
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp04N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp04N.html
new file mode 100755
index 000000000..bb6cab0a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp04N.html
@@ -0,0 +1,131 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp04N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp04N</B> copy rows of U in A and replace them with columns of W.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp04N(</CODE>
+<CODE>const int</CODE>
+<CODE>M0</CODE>,
+<CODE>const int</CODE>
+<CODE>M1</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>W0</CODE>,
+<CODE>const double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp04N</B>
+copies M0 rows of U into A and replaces those rows of U
+with columns of W. In addition M1 - M0 columns of  W  are copied into
+rows of U.
+
+<H1>Arguments</H1>
+<PRE>
+M0      (local input)                 const int
+        On entry, M0 specifies the number of rows of U that should be
+        copied into  A  and replaced by columns of  W.  M0 must be at
+        least zero.
+</PRE>
+<PRE>
+M1      (local input)                 const int
+        On entry, M1 specifies the number of columns of W that should
+        be copied into rows of U. M1 must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of U that should
+        be copied into A. N must be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  points to  an array of dimension (LDU,N).  This
+        array contains the rows that are to be copied into A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M1).
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows of U indicated by LINDXAU.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M0).
+</PRE>
+<PRE>
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M0+M1),  that  contains
+        data to be copied into U.  For i in [M0..M0+M1),  the entries
+        W(:,i) are copied into the row W0(i*LDW) of U.
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA  is an array of dimension  M0 containing the
+        local row indexes A into which rows of U are copied.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M0 that  contains
+        the local  row indexes of  U that should be copied into A and
+        replaced by the columns of W.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp04T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp04T.html
new file mode 100755
index 000000000..0209a3689
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp04T.html
@@ -0,0 +1,132 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp04T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp04T</B> copy columns of U in rows of A and replace them with columns of W.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp04T(</CODE>
+<CODE>const int</CODE>
+<CODE>M0</CODE>,
+<CODE>const int</CODE>
+<CODE>M1</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>W0</CODE>,
+<CODE>const double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp04T</B>
+copies M0 columns of U into rows of A and replaces those
+columns of U with columns of W. In addition M1 - M0 columns of W  are
+copied into U.
+
+<H1>Arguments</H1>
+<PRE>
+M0      (local input)                 const int
+        On entry, M0 specifies the number of columns of U that should
+        be copied into A and replaced by columns of W.  M0 must be at
+        least zero.
+</PRE>
+<PRE>
+M1      (local input)                 const int
+        On entry, M1 specifies  the number of columnns of W that will
+        be copied into U. M1 must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies the length of the columns of  U  that
+        will be copied into rows of A. N must be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns that are to be copied into rows of
+        A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U indicated by LINDXAU.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M0).
+</PRE>
+<PRE>
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M0+M1),  that  contains
+        data to be copied into U.  For i in [M0..M0+M1),  the entries
+        W(:,i) are copied into the column W0(i*LDW) of U.
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA  is an array of dimension  M0 containing the
+        local row indexes A into which columns of U are copied.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M0 that  contains
+        the  local column indexes of  U  that should be copied into A
+        and replaced by the columns of W.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp05N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp05N.html
new file mode 100755
index 000000000..f428b7354
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp05N.html
@@ -0,0 +1,98 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp05N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp05N</B> copy rows of U into A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp05N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp05N</B>
+copies rows of  U of global offset LINDXAU into rows of
+A at positions indicated by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of U that should be
+        copied into A. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of U that should
+        be copied into A. N must be at least zero.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows of U indicated by LINDXAU.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          const double *
+        On entry,  U  points to an array of dimension  (LDU,N).  This
+        array contains the rows that are to be copied into A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied from U.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local row indexes of U that should be copied in A.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp05T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp05T.html
new file mode 100755
index 000000000..fffb9f320
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp05T.html
@@ -0,0 +1,98 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp05T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp05T</B> copy rows of U into A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp05T(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp05T</B>
+copies columns of  U of global offset LINDXAU into rows
+of A at positions indicated by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the columns of U that will
+        be copied into rows of A. N must be at least zero.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U indicated by LINDXAU.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          const double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns that are to be copied into rows of
+        A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied from U.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local column indexes of U that should be copied in A.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp06N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp06N.html
new file mode 100755
index 000000000..f28ab48c6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp06N.html
@@ -0,0 +1,92 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp06N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp06N</B> swap rows of U with rows of A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp06N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp06N</B>
+swaps rows of  U  with rows of A at positions
+indicated by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        swapped with rows of U. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of A that should
+        be swapped with rows of U. N must be at least zero.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows or columns of U.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,N).  This
+        array contains the rows of U that are to be swapped with rows
+        of A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be swapped with U.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp06T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp06T.html
new file mode 100755
index 000000000..86032a9f4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp06T.html
@@ -0,0 +1,92 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp06T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp06T</B> swap rows or columns of U with rows of A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp06T(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp06T</B>
+swaps  columns  of  U  with  rows  of  A  at  positions
+indicated by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        swapped with columns of U. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of A that should
+        be swapped with columns of U. N must be at least zero.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns of  U  that are to be swapped with
+        rows of A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be swapped with U.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp10N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp10N.html
new file mode 100755
index 000000000..84403ca79
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp10N.html
@@ -0,0 +1,77 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp10N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp10N</B> performs a series column interchanges.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp10N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPIV</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp10N</B>
+performs a sequence  of  local column interchanges on a
+matrix A.  One column interchange is initiated  for columns 0 through
+N-1 of A.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        __arg0__
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  M  specifies  the number of rows of the array A. M
+        must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry, N specifies the number of columns of the array A. N
+        must be at least zero.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, A  points to an  array of  dimension (LDA,N).  This
+        array contains the columns onto which the interchanges should
+        be applied. On exit, A contains the permuted matrix.
+</PRE>
+<PRE>
+IPIV    (local input)                 const int *
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlatcpy.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlatcpy.html
new file mode 100755
index 000000000..fa1cca5d9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlatcpy.html
@@ -0,0 +1,83 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlatcpy HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlatcpy</B> B := A^T
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlatcpy(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>B</CODE>,
+<CODE>const int</CODE>
+<CODE>LDB</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlatcpy</B>
+copies the transpose of an array A into an array B.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the number of  rows of the array B and
+        the number of columns of A. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the number of  rows of the array A and
+        the number of columns of B. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,M).
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,N).
+</PRE>
+<PRE>
+B       (local output)                double *
+        On entry, B points to an array of dimension (LDB,N). On exit,
+        B is overwritten with the transpose of A.
+</PRE>
+<PRE>
+LDB     (local input)                 const int
+        On entry, LDB specifies the leading dimension of the array B.
+        LDB must be at least MAX(1,M).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlacpy.html">HPL_dlacpy</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocmax.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocmax.html
new file mode 100755
index 000000000..c3361f32d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocmax.html
@@ -0,0 +1,87 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlocmax HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlocmax</B> finds the maximum entry in matrix column.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlocmax(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>II</CODE>,
+<CODE>const int</CODE>
+<CODE>JJ</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlocmax</B>
+finds  the maximum entry in the current column  and packs
+the useful information in  WORK[0:3].  On exit,  WORK[0] contains the
+local maximum  absolute value  scalar,  WORK[1] is the  corresponding
+local row index,  WORK[2]  is the corresponding global row index, and
+WORK[3] is the coordinate of the process owning this max.  When N  is
+less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set
+to the total number of process rows.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of rows of the column
+        of A on which we operate.
+</PRE>
+<PRE>
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+</PRE>
+<PRE>
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is  a workarray of size at least 4.  On exit,
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocswpN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocswpN.html
new file mode 100755
index 000000000..b5c4b74a9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocswpN.html
@@ -0,0 +1,79 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlocswpN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlocswpN</B> locally swaps rows within panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlocswpN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>II</CODE>,
+<CODE>const int</CODE>
+<CODE>JJ</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlocswpN</B>
+performs  the local swapping operations  within a panel.
+The lower triangular  N0-by-N0  upper block of the panel is stored in
+no-transpose form (i.e. just like the input matrix itself).
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+</PRE>
+<PRE>
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.  The N0 length max
+        row is stored in WORK[4:4+N0-1];  Note  that this is also the
+        JJth row  (or column) of L1. The remaining part of this array
+        is used as workspace.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocswpT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocswpT.html
new file mode 100755
index 000000000..d31361543
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocswpT.html
@@ -0,0 +1,79 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlocswpT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlocswpT</B> locally swaps rows within panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlocswpT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>II</CODE>,
+<CODE>const int</CODE>
+<CODE>JJ</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlocswpT</B>
+performs  the local swapping operations  within a panel.
+The lower triangular  N0-by-N0  upper block of the panel is stored in
+transpose form.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+</PRE>
+<PRE>
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.  The N0 length max
+        row is stored in WORK[4:4+N0-1];  Note  that this is also the
+        JJth row  (or column) of L1. The remaining part of this array
+        is used as workspace.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dmatgen.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dmatgen.html
new file mode 100755
index 000000000..7886da146
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dmatgen.html
@@ -0,0 +1,73 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dmatgen HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dmatgen</B> random matrix generator.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dmatgen(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int</CODE>
+<CODE>ISEED</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dmatgen</B>
+generates (or regenerates) a random matrix A.
+ 
+The  pseudo-random  generator uses the linear congruential algorithm:
+X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+Programming, Knuth 1973, Vol. 2.
+
+<H1>Arguments</H1>
+<PRE>
+M       (input)                       const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (input)                       const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+A       (output)                      double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        this  array  contains   the   coefficients  of  the  randomly
+        generated matrix.
+</PRE>
+<PRE>
+LDA     (input)                       const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+</PRE>
+<PRE>
+ISEED   (input)                       const int
+        On entry, ISEED  specifies  the  seed  number to generate the
+        matrix A. ISEED must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dscal.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dscal.html
new file mode 100755
index 000000000..c13427f44
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dscal.html
@@ -0,0 +1,74 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dscal HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dscal</B> x = alpha * x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dscal(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dscal</B>
+scales the vector x by alpha.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vector x. N  must  be
+        at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero, then the entries of the incremented array X
+        need not be set on input.
+</PRE>
+<PRE>
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        On exit, the entries of the incremented array  X  are  scaled
+        by the scalar alpha.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   HPL_dscal( 3, 2.0, x, 1 );
+   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_daxpy.html">HPL_daxpy</A>,
+<A HREF="HPL_dcopy.html">HPL_dcopy</A>,
+<A HREF="HPL_dswap.html">HPL_dswap</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dswap.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dswap.html
new file mode 100755
index 000000000..cae6980a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dswap.html
@@ -0,0 +1,84 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dswap HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dswap</B> y <-> x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dswap(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dswap</B>
+swaps the vectors x and y.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+</PRE>
+<PRE>
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        On exit, the entries of the incremented array  X  are updated
+        with the entries of the incremented array Y.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the entries of the incremented array X.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_dswap( 3, x, 1, y, 1 );
+   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_daxpy.html">HPL_daxpy</A>,
+<A HREF="HPL_dcopy.html">HPL_dcopy</A>,
+<A HREF="HPL_dscal.html">HPL_dscal</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dtrsm.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dtrsm.html
new file mode 100755
index 000000000..3d60e597f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dtrsm.html
@@ -0,0 +1,168 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dtrsm HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dtrsm</B> B := A^{-1} * B  or  B := B * A^{-1}.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dtrsm(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const enum HPL_SIDE</CODE>
+<CODE>SIDE</CODE>,
+<CODE>const enum HPL_UPLO</CODE>
+<CODE>UPLO</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANS</CODE>,
+<CODE>const enum HPL_DIAG</CODE>
+<CODE>DIAG</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>B</CODE>,
+<CODE>const int</CODE>
+<CODE>LDB</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dtrsm</B>
+solves one of the matrix equations
+ 
+   op( A ) * X = alpha * B,   or  X * op( A ) = alpha * B,
+ 
+where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+non-unit, upper or lower triangular matrix and op(A) is one of
+ 
+   op( A ) = A   or   op( A ) = A^T.
+ 
+The matrix X is overwritten on B.
+ 
+No test for  singularity  or  near-singularity  is included  in  this
+routine. Such tests must be performed before calling this routine.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+SIDE    (local input)                 const enum HPL_SIDE
+        On entry, SIDE  specifies  whether  op(A) appears on the left
+        or right of X as follows:
+           SIDE==HplLeft    op( A ) * X = alpha * B,
+           SIDE==HplRight   X * op( A ) = alpha * B.
+</PRE>
+<PRE>
+UPLO    (local input)                 const enum HPL_UPLO
+        On  entry,   UPLO   specifies  whether  the  upper  or  lower
+        triangular  part  of the array  A  is to be referenced.  When
+        UPLO==HplUpper, only  the upper triangular part of A is to be
+        referenced, otherwise only the lower triangular part of A is 
+        to be referenced. 
+</PRE>
+<PRE>
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry, TRANSA  specifies the form of  op(A)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSA==HplNoTrans    : op( A ) = A,                     
+           TRANSA==HplTrans      : op( A ) = A^T,                   
+           TRANSA==HplConjTrans  : op( A ) = A^T.                   
+</PRE>
+<PRE>
+DIAG    (local input)                 const enum HPL_DIAG
+        On entry,  DIAG  specifies  whether  A  is unit triangular or
+        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+        and otherwise, A is not assumed to be unit triangular.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of the  matrix B.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix B.
+        N must be at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied  as  zero then the elements of the matrix B need not
+        be set on input.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+        otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+        k by k upper triangular  part of the array A must contain the
+        upper triangular  matrix and the  strictly  lower  triangular
+        part of A is not referenced.  When  UPLO==HplLower on  entry,
+        the  leading k by k lower triangular part of the array A must
+        contain the lower triangular matrix  and  the  strictly upper
+        triangular part of A is not referenced.
+         
+        Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+        not referenced  either,  but are assumed to be unity.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+</PRE>
+<PRE>
+B       (local input/output)          double *
+        On entry,  B  points  to an array of size equal to or greater
+        than LDB * n.  Before entry, the leading  m by n  part of the
+        array B must contain the matrix  B, except when beta is zero,
+        in which case B need not be set on entry.  On exit, the array
+        B is overwritten by the m by n solution matrix.
+</PRE>
+<PRE>
+LDB     (local input)                 const int
+        On entry,  LDB  specifies  the  leading  dimension  of  B  as
+        declared  in  the  calling  (sub) program.  LDB  must  be  at
+        least MAX(1,m).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+   HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper,
+              HplNoTrans, HplNonUnit, 2, 2, 2.0,
+              a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dgemm.html">HPL_dgemm</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dtrsv.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dtrsv.html
new file mode 100755
index 000000000..3e4703529
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dtrsv.html
@@ -0,0 +1,136 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dtrsv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dtrsv</B> x := A^{-1} x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dtrsv(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const enum HPL_UPLO</CODE>
+<CODE>UPLO</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANS</CODE>,
+<CODE>const enum HPL_DIAG</CODE>
+<CODE>DIAG</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dtrsv</B>
+solves one of the systems of equations
+ 
+    A * x = b,   or   A^T * x = b,
+ 
+where b and x are n-element vectors and  A  is an n by n non-unit, or
+unit, upper or lower triangular matrix.
+ 
+No test for  singularity  or  near-singularity  is included  in  this
+routine. Such tests must be performed before calling this routine.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+UPLO    (local input)                 const enum HPL_UPLO
+        On  entry,   UPLO   specifies  whether  the  upper  or  lower
+        triangular  part  of the array  A  is to be referenced.  When
+        UPLO==HplUpper, only  the upper triangular part of A is to be
+        referenced, otherwise only the lower triangular part of A is 
+        to be referenced. 
+</PRE>
+<PRE>
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry,  TRANS  specifies  the equations  to  be  solved as
+        follows:
+           TRANS==HplNoTrans     A   * x = b,
+           TRANS==HplTrans       A^T * x = b.
+</PRE>
+<PRE>
+DIAG    (local input)                 const enum HPL_DIAG
+        On entry,  DIAG  specifies  whether  A  is unit triangular or
+        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+        and otherwise, A is not assumed to be unit triangular.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the order of the matrix A. N must be at
+        least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+        n by n upper triangular  part of the array A must contain the
+        upper triangular  matrix and the  strictly  lower  triangular
+        part of A is not referenced.  When  UPLO==HplLower  on entry,
+        the  leading n by n lower triangular part of the array A must
+        contain the lower triangular matrix  and  the  strictly upper
+        triangular part of A is not referenced.
+         
+        Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+        not referenced  either,  but are assumed to be unity.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,n).
+</PRE>
+<PRE>
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        Before entry,  the  incremented array  X  must contain  the n
+        element right-hand side vector b. On exit,  X  is overwritten
+        with the solution vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2];
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+   x[0] = 2.0; x[1] = 1.0;
+   HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans,
+              HplNoUnit, a, 2, x, 1 );
+   printf("x=[%f,%f]\n", x[0], x[1]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dger.html">HPL_dger</A>,
+<A HREF="HPL_dgemv.html">HPL_dgemv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_equil.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_equil.html
new file mode 100755
index 000000000..d64ecab99
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_equil.html
@@ -0,0 +1,115 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_equil HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_equil</B> Equilibrate U and forward the column panel L.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_equil(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANS</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>,
+<CODE>int *</CODE>
+<CODE>IWORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_equil</B>
+equilibrates  the  local  pieces  of U, so that on exit to
+this function, pieces of U contained in every process row are of the
+same size. This phase makes the rolling phase optimal.  In addition,
+this  function probes  for  the  column panel L and forwards it when
+possible.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be equilibrated) information.
+</PRE>
+<PRE>
+TRANS   (global input)                const enum HPL_TRANS
+        On entry, TRANS specifies whether  U  is stored in transposed
+        or non-transposed form.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the number of rows or columns of  U. N
+        must be at least 0.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+        non-transposed form, and MAX(1,N) otherwise.
+</PRE>
+<PRE>
+IPLEN   (global input)                int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in process IPMAP[i].
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry, IPMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+</PRE>
+<PRE>
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension NPROW+1.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_fprintf.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_fprintf.html
new file mode 100755
index 000000000..d62b2c871
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_fprintf.html
@@ -0,0 +1,58 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_fprintf HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_fprintf</B> fprintf + fflush wrapper.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_fprintf(</CODE>
+<CODE>FILE *</CODE>
+<CODE>STREAM</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_fprintf</B>
+is a wrapper around fprintf flushing the output stream.
+
+<H1>Arguments</H1>
+<PRE>
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   HPL_fprintf( stdout, "Hello World.\n" );
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_abort.html">HPL_abort</A>,
+<A HREF="HPL_warn.html">HPL_warn</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_exit.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_exit.html
new file mode 100755
index 000000000..b42f315c9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_exit.html
@@ -0,0 +1,39 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_grid_exit HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_grid_exit</B> Exit process grid.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_grid_exit(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_grid_exit</B>
+marks  the process  grid object for  deallocation.  The
+returned  error  code  MPI_SUCCESS  indicates  successful completion.
+Other error codes are (MPI) implementation dependent.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input/output)          HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid to be released.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pnum.html">HPL_pnum</A>,
+<A HREF="HPL_grid_init.html">HPL_grid_init</A>,
+<A HREF="HPL_grid_info.html">HPL_grid_info</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_info.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_info.html
new file mode 100755
index 000000000..47f63672d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_info.html
@@ -0,0 +1,70 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_grid_info HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_grid_info</B> Retrieve grid information.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_grid_info(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>int *</CODE>
+<CODE>NPROW</CODE>,
+<CODE>int *</CODE>
+<CODE>NPCOL</CODE>,
+<CODE>int *</CODE>
+<CODE>MYROW</CODE>,
+<CODE>int *</CODE>
+<CODE>MYCOL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_grid_info</B>
+returns  the grid shape and the coordinates in the grid
+of the calling process.  Successful  completion  is  indicated by the
+returned error code  MPI_SUCCESS. Other error codes depend on the MPI
+implementation.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+NPROW   (global output)               int *
+        On exit,   NPROW  specifies the number of process rows in the
+        grid. NPROW is at least one.
+</PRE>
+<PRE>
+NPCOL   (global output)               int *
+        On exit,   NPCOL  specifies  the number of process columns in
+        the grid. NPCOL is at least one.
+</PRE>
+<PRE>
+MYROW   (global output)               int *
+        On exit,  MYROW  specifies my  row process  coordinate in the
+        grid. MYROW is greater than or equal  to zero  and  less than
+        NPROW.
+</PRE>
+<PRE>
+MYCOL   (global output)               int *
+        On exit,  MYCOL specifies my column process coordinate in the
+        grid. MYCOL is greater than or equal  to zero  and  less than
+        NPCOL.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pnum.html">HPL_pnum</A>,
+<A HREF="HPL_grid_init.html">HPL_grid_init</A>,
+<A HREF="HPL_grid_exit.html">HPL_grid_exit</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_init.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_init.html
new file mode 100755
index 000000000..0bec56e6e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_init.html
@@ -0,0 +1,73 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_grid_init HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_grid_init</B> Create a process grid.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_grid_init(</CODE>
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>,
+<CODE>const HPL_T_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROW</CODE>,
+<CODE>const int</CODE>
+<CODE>NPCOL</CODE>,
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_grid_init</B>
+creates a NPROW x NPCOL  process  grid using column- or
+row-major ordering from an initial collection of processes identified
+by an  MPI  communicator.  Successful  completion is indicated by the
+returned error code MPI_SUCCESS.  Other error codes depend on the MPI
+implementation. The coordinates of processes that are not part of the
+grid are set to values outside of [0..NPROW) x [0..NPCOL).
+
+<H1>Arguments</H1>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        On entry,  COMM  is  the  MPI  communicator  identifying  the
+        initial  collection  of  processes out of which  the  grid is
+        formed.
+</PRE>
+<PRE>
+ORDER   (global input)                const HPL_T_ORDER
+        On entry, ORDER specifies how the processes should be ordered
+        in the grid as follows:
+           ORDER = HPL_ROW_MAJOR    row-major    ordering;
+           ORDER = HPL_COLUMN_MAJOR column-major ordering;
+</PRE>
+<PRE>
+NPROW   (global input)                const int
+        On entry,  NPROW  specifies the number of process rows in the
+        grid to be created. NPROW must be at least one.
+</PRE>
+<PRE>
+NPCOL   (global input)                const int
+        On entry,  NPCOL  specifies  the number of process columns in
+        the grid to be created. NPCOL must be at least one.
+</PRE>
+<PRE>
+GRID    (local input/output)          HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information to be initialized.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pnum.html">HPL_pnum</A>,
+<A HREF="HPL_grid_info.html">HPL_grid_info</A>,
+<A HREF="HPL_grid_exit.html">HPL_grid_exit</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_idamax.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_idamax.html
new file mode 100755
index 000000000..f16b296f6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_idamax.html
@@ -0,0 +1,68 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_idamax HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_idamax</B> 1st k s.t. |x_k| = max_i(|x_i|).
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_idamax(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_idamax</B>
+returns  the index in an n-vector  x  of the first element
+having maximum absolute value.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vector x. N  must  be
+        at least zero.
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3];
+   int    imax;
+   x[0] = 1.0; x[1] = 3.0; x[2] = 2.0;
+   imax = HPL_idamax( 3, x, 1 );
+   printf("imax=%d\n", imax);
+   exit(0);
+   return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_daxpy.html">HPL_daxpy</A>,
+<A HREF="HPL_dcopy.html">HPL_dcopy</A>,
+<A HREF="HPL_dscal.html">HPL_dscal</A>,
+<A HREF="HPL_dswap.html">HPL_dswap</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2l.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2l.html
new file mode 100755
index 000000000..a3eb758da
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2l.html
@@ -0,0 +1,71 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_indxg2l HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_indxg2l</B> Map a global index into a local one.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_indxg2l(</CODE>
+<CODE>const int</CODE>
+<CODE>IG</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_indxg2l</B>
+computes  the local index of a matrix entry pointed to by
+the  global index IG.  This  local  returned index is the same in all
+processes.
+
+<H1>Arguments</H1>
+<PRE>
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry, if SRCPROC = -1, the data  is not  distributed  but
+        replicated,  in  which  case  this  routine returns IG in all
+        processes. Otherwise, the value of SRCPROC is ignored.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2lp.html">HPL_indxg2lp</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2lp.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2lp.html
new file mode 100755
index 000000000..d9fa00436
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2lp.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_indxg2lp HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_indxg2lp</B> Map a local index into a global one.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_indxg2lp(</CODE>
+<CODE>int *</CODE>
+<CODE>IL</CODE>,
+<CODE>int *</CODE>
+<CODE>PROC</CODE>,
+<CODE>const int</CODE>
+<CODE>IG</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_indxg2lp</B>
+computes the local index of a matrix entry pointed to by
+the global  index IG as well as the process coordinate which posseses
+this entry. The local returned index is the same in all processes.
+
+<H1>Arguments</H1>
+<PRE>
+IL      (output)                      int *
+        On exit, IL specifies the local index corresponding to IG. IL
+        is at least zero.
+</PRE>
+<PRE>
+PROC    (output)                      int *
+        On exit,  PROC  is the  coordinate of the process  owning the
+        entry specified by the global index IG. PROC is at least zero
+        and less than NPROCS.
+</PRE>
+<PRE>
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry, if SRCPROC = -1, the data  is not  distributed  but
+        replicated,  in  which  case  this  routine returns IG in all
+        processes. Otherwise, the value of SRCPROC is ignored.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2p.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2p.html
new file mode 100755
index 000000000..0068dede3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2p.html
@@ -0,0 +1,70 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_indxg2p HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_indxg2p</B> Map a global index into a process coordinate.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_indxg2p(</CODE>
+<CODE>const int</CODE>
+<CODE>IG</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_indxg2p</B>
+computes the process coordinate  which posseses the entry
+of a matrix specified by a global index IG.
+
+<H1>Arguments</H1>
+<PRE>
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxl2g.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxl2g.html
new file mode 100755
index 000000000..216e98057
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxl2g.html
@@ -0,0 +1,78 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_indxl2g HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_indxl2g</B> Map a index-process pair into a global index.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_indxl2g(</CODE>
+<CODE>const int</CODE>
+<CODE>IL</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>PROC</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_indxl2g</B>
+computes the global index of a matrix  entry  pointed to
+by the local index IL of the process indicated by PROC.
+
+<H1>Arguments</H1>
+<PRE>
+IL      (input)                       const int
+        On entry, IL specifies the local  index of the matrix  entry.
+        IL must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+PROC    (input)                       const int
+        On entry, PROC  specifies the coordinate of the process whose
+        local array row or column is to be determined. PROC  must  be
+        at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2lp.html">HPL_indxg2lp</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_infog2l.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_infog2l.html
new file mode 100755
index 000000000..34feff72c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_infog2l.html
@@ -0,0 +1,155 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_infog2l HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_infog2l</B> global to local index translation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_infog2l(</CODE>
+<CODE>int</CODE>
+<CODE>I</CODE>,
+<CODE>int</CODE>
+<CODE>J</CODE>,
+<CODE>const int</CODE>
+<CODE>IMB</CODE>,
+<CODE>const int</CODE>
+<CODE>MB</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>RSRC</CODE>,
+<CODE>const int</CODE>
+<CODE>CSRC</CODE>,
+<CODE>const int</CODE>
+<CODE>MYROW</CODE>,
+<CODE>const int</CODE>
+<CODE>MYCOL</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROW</CODE>,
+<CODE>const int</CODE>
+<CODE>NPCOL</CODE>,
+<CODE>int *</CODE>
+<CODE>II</CODE>,
+<CODE>int *</CODE>
+<CODE>JJ</CODE>,
+<CODE>int *</CODE>
+<CODE>PROW</CODE>,
+<CODE>int *</CODE>
+<CODE>PCOL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_infog2l</B>
+computes the starting local index II, JJ corresponding to
+the submatrix starting globally at the entry pointed by  I,  J.  This
+routine returns the coordinates in the grid of the process owning the
+matrix entry of global indexes I, J, namely PROW and PCOL.
+
+<H1>Arguments</H1>
+<PRE>
+I       (global input)                int
+        On entry,  I  specifies  the  global  row index of the matrix
+        entry. I must be at least zero.
+</PRE>
+<PRE>
+J       (global input)                int
+        On entry,  J  specifies the global column index of the matrix
+        entry. J must be at least zero.
+</PRE>
+<PRE>
+IMB     (global input)                const int
+        On entry,  IMB  specifies  the size of the first row block of
+        the global matrix. IMB must be at least one.
+</PRE>
+<PRE>
+MB      (global input)                const int
+        On entry,  MB specifies the blocking factor used to partition
+        and  distribute the rows of the matrix A.  MB  must be larger
+        than one.
+</PRE>
+<PRE>
+INB     (global input)                const int
+        On entry, INB specifies the size of the first column block of
+        the global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the columns of the matrix A. NB must be larger
+        than one.
+</PRE>
+<PRE>
+RSRC    (global input)                const int
+        On entry,  RSRC  specifies  the row coordinate of the process
+        that possesses the row  I.  RSRC  must  be at least zero  and
+        strictly less than NPROW.
+</PRE>
+<PRE>
+CSRC    (global input)                const int
+        On entry, CSRC specifies the column coordinate of the process
+        that possesses the column J. CSRC  must be at least zero  and
+        strictly less than NPCOL.
+</PRE>
+<PRE>
+MYROW   (local input)                 const int
+        On entry, MYROW  specifies my  row process  coordinate in the
+        grid. MYROW is greater than or equal  to zero  and  less than
+        NPROW.
+</PRE>
+<PRE>
+MYCOL   (local input)                 const int
+        On entry, MYCOL specifies my column process coordinate in the
+        grid. MYCOL is greater than or equal  to zero  and  less than
+        NPCOL.
+</PRE>
+<PRE>
+NPROW   (global input)                const int
+        On entry,  NPROW  specifies the number of process rows in the
+        grid. NPROW is at least one.
+</PRE>
+<PRE>
+NPCOL   (global input)                const int
+        On entry,  NPCOL  specifies  the number of process columns in
+        the grid. NPCOL is at least one.
+</PRE>
+<PRE>
+II      (local output)                int *
+        On exit, II  specifies the  local  starting  row index of the
+        submatrix. On exit, II is at least 0.
+</PRE>
+<PRE>
+JJ      (local output)                int *
+        On exit, JJ  specifies the local starting column index of the
+        submatrix. On exit, JJ is at least 0.
+</PRE>
+<PRE>
+PROW    (global output)               int *
+        On exit, PROW is the row coordinate of the process owning the
+        entry specified by the global index I.  PROW is at least zero
+        and less than NPROW.
+</PRE>
+<PRE>
+PCOL    (global output)               int *
+        On exit, PCOL  is the column coordinate of the process owning
+        the entry specified by the global index J.  PCOL  is at least
+        zero and less than NPCOL.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_jumpit.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_jumpit.html
new file mode 100755
index 000000000..be87a1f53
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_jumpit.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_jumpit HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_jumpit</B> jump into the random sequence.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_jumpit(</CODE>
+<CODE>int *</CODE>
+<CODE>MULT</CODE>,
+<CODE>int *</CODE>
+<CODE>IADD</CODE>,
+<CODE>int *</CODE>
+<CODE>IRANN</CODE>,
+<CODE>int *</CODE>
+<CODE>IRANM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_jumpit</B>
+jumps in the random sequence from the number  X(n) encoded
+in IRANN to the number  X(m)  encoded in  IRANM using the constants A
+and C encoded in MULT and IADD: X(m) = A * X(n) + C.  The constants A
+and C obviously depend on m and n,  see  the function  HPL_xjumpm  in
+order to initialize them.
+
+<H1>Arguments</H1>
+<PRE>
+MULT    (local input)                 int *
+        On entry, MULT is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of the constant A.
+</PRE>
+<PRE>
+IADD    (local input)                 int *
+        On entry, IADD is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of the constant C.
+</PRE>
+<PRE>
+IRANN   (local input)                 int *
+        On entry,  IRANN  is an array of dimension 2,  that contains 
+        the 16-lower and 15-higher bits of the encoding of X(n).
+</PRE>
+<PRE>
+IRANM   (local output)                int *
+        On entry,  IRANM  is an array of dimension 2.  On exit, this
+        array contains respectively the 16-lower and  15-higher bits
+        of the encoding of X(m).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ladd.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ladd.html
new file mode 100755
index 000000000..0c42d80d8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ladd.html
@@ -0,0 +1,57 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_ladd HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_ladd</B> Adds two long positive integers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_ladd(</CODE>
+<CODE>int *</CODE>
+<CODE>J</CODE>,
+<CODE>int *</CODE>
+<CODE>K</CODE>,
+<CODE>int *</CODE>
+<CODE>I</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_ladd</B>
+adds  without carry two long positive integers  K and J  and
+puts the result into I. The long integers  I, J, K are encoded on 64
+bits using an array of 2 integers.  The 32-lower bits  are stored in
+the  first  entry  of each array,  the 32-higher bits  in the second
+entry.
+
+<H1>Arguments</H1>
+<PRE>
+J       (local input)                 int *
+        On entry, J is an integer array of dimension 2 containing the
+        encoded long integer J.
+</PRE>
+<PRE>
+K       (local input)                 int *
+        On entry, K is an integer array of dimension 2 containing the
+        encoded long integer K.
+</PRE>
+<PRE>
+I       (local output)                int *
+        On entry, I is an integer array of dimension 2. On exit, this
+        array contains the encoded long integer result.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_lmul.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_lmul.html
new file mode 100755
index 000000000..8ef70cba5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_lmul.html
@@ -0,0 +1,58 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_lmul HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_lmul</B> multiplies 2 long positive integers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_lmul(</CODE>
+<CODE>int *</CODE>
+<CODE>K</CODE>,
+<CODE>int *</CODE>
+<CODE>J</CODE>,
+<CODE>int *</CODE>
+<CODE>I</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_lmul</B>
+multiplies  without carry two long positive integers K and J
+and puts the result into I. The long integers  I, J, K are encoded on
+64 bits using an array of 2 integers. The 32-lower bits are stored in
+the first entry of each array, the 32-higher bits in the second entry
+of each array. For efficiency purposes, the  intrisic modulo function
+is inlined.
+
+<H1>Arguments</H1>
+<PRE>
+K       (local input)                 int *
+        On entry, K is an integer array of dimension 2 containing the
+        encoded long integer K.
+</PRE>
+<PRE>
+J       (local input)                 int *
+        On entry, J is an integer array of dimension 2 containing the
+        encoded long integer J.
+</PRE>
+<PRE>
+I       (local output)                int *
+        On entry, I is an integer array of dimension 2. On exit, this
+        array contains the encoded long integer result.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_logsort.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_logsort.html
new file mode 100755
index 000000000..da271fc19
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_logsort.html
@@ -0,0 +1,83 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_logsort HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_logsort</B> Sort the processes in logarithmic order.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_logsort(</CODE>
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>,
+<CODE>const int</CODE>
+<CODE>ICURROC</CODE>,
+<CODE>int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_logsort</B>
+computes an array  IPMAP  and  its inverse  IPMAPM1  that
+contain  the logarithmic sorted processes id with repect to the local
+number of rows of  U  that they own. This is necessary to ensure that
+the logarithmic spreading of U is optimal in terms of number of steps
+and communication volume as well.  In other words,  the larget pieces
+of U will be sent a minimal number of times.
+
+<H1>Arguments</H1>
+<PRE>
+NPROCS  (global input)                const int
+        On entry, NPROCS  specifies the number of process rows in the
+        process grid. NPROCS is at least one.
+</PRE>
+<PRE>
+ICURROC (global input)                const int
+        On entry, ICURROC is the source process row.
+</PRE>
+<PRE>
+IPLEN   (global input/output)         int *
+        On entry, IPLEN is an array of dimension NPROCS+1,  such that
+        IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+        that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+        rows of U  in the processes before process IPMAP[i] after the
+        sort,  with  the convention that  IPLEN[NPROCS] is  the total
+        number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+        IPLEN[i] is  the  number of rows of A that should be moved to
+        the process IPMAP[i].  IPLEN  is such that the number of rows
+        of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+        remaining  entries  of  this  array  are  sorted  so that the
+        quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+</PRE>
+<PRE>
+IPMAP   (global output)               int *
+        On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myroc] is the corresponding sorted process
+        coordinate.
+</PRE>
+<PRE>
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROCS)
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_plindx1.html">HPL_plindx1</A>,
+<A HREF="HPL_plindx10.html">HPL_plindx10</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_max.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_max.html
new file mode 100755
index 000000000..7cf0b0670
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_max.html
@@ -0,0 +1,60 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_max HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_max</B> Combine (max) two buffers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_max(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const void *</CODE>
+<CODE>IN</CODE>,
+<CODE>void *</CODE>
+<CODE>INOUT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_max</B>
+combines (max) two buffers.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+</PRE>
+<PRE>
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+</PRE>
+<PRE>
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+</PRE>
+<PRE>
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_min.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_min.html
new file mode 100755
index 000000000..9c109c338
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_min.html
@@ -0,0 +1,60 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_min HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_min</B> Combine (min) two buffers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_min(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const void *</CODE>
+<CODE>IN</CODE>,
+<CODE>void *</CODE>
+<CODE>INOUT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_min</B>
+combines (min) two buffers.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+</PRE>
+<PRE>
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+</PRE>
+<PRE>
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+</PRE>
+<PRE>
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_numroc.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_numroc.html
new file mode 100755
index 000000000..fa617cac3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_numroc.html
@@ -0,0 +1,79 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_numroc HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_numroc</B> Compute the local number of row/columns.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_numroc(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>PROC</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_numroc</B>
+returns  the  local number of matrix rows/columns process
+PROC  will  get  if  we give out  N rows/columns starting from global
+index 0.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies the number of rows/columns being dealt
+        out. N must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+PROC    (input)                       const int
+        On entry, PROC specifies  the coordinate of the process whose
+        local portion is determined.  PROC must be at least zero  and
+        strictly less than NPROCS.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2lp.html">HPL_indxg2lp</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_numrocI.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_numrocI.html
new file mode 100755
index 000000000..c1037a193
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_numrocI.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_numrocI HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_numrocI</B> Compute the local number of row/columns.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_numrocI(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>I</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>PROC</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_numrocI</B>
+returns  the  local number of matrix rows/columns process
+PROC  will  get  if  we give out  N rows/columns starting from global
+index I.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies the number of rows/columns being dealt
+        out. N must be at least zero.
+</PRE>
+<PRE>
+I       (input)                       const int
+        On entry, I  specifies the global index of the matrix  entry
+        I must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of th
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+PROC    (input)                       const int
+        On entry, PROC specifies  the coordinate of the process whos
+        local portion is determined.  PROC must be at least zero  an
+        strictly less than NPROCS.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  proces
+        that possesses the first row or column of the matrix. SRCPRO
+        must be at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process row
+        or columns over which the matrix is distributed.  NPROCS mus
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2lp.html">HPL_indxg2lp</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pabort.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pabort.html
new file mode 100755
index 000000000..89aacbd9f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pabort.html
@@ -0,0 +1,57 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pabort HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pabort</B> halts execution.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pabort(</CODE>
+<CODE>int</CODE>
+<CODE>LINE</CODE>,
+<CODE>const char *</CODE>
+<CODE>SRNAME</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pabort</B>
+displays an error message on stderr and halts execution.
+
+<H1>Arguments</H1>
+<PRE>
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+</PRE>
+<PRE>
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>,
+<A HREF="HPL_pwarn.html">HPL_pwarn</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_packL.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_packL.html
new file mode 100755
index 000000000..1e8f8106c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_packL.html
@@ -0,0 +1,59 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_packL HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_packL</B> Form the MPI structure for the row ring broadcasts.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_packL(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>INDEX</CODE>,
+<CODE>const int</CODE>
+<CODE>LEN</CODE>,
+<CODE>const int</CODE>
+<CODE>IBUF</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_packL</B>
+forms  the MPI data type for the panel to be broadcast.
+Successful  completion  is  indicated  by  the  returned  error  code
+MPI_SUCCESS.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+<PRE>
+INDEX   (input)                       const int
+        On entry,  INDEX  points  to  the  first entry of the  packed
+        buffer being broadcast.
+</PRE>
+<PRE>
+LEN     (input)                       const int
+        On entry, LEN is the length of the packed buffer.
+</PRE>
+<PRE>
+IBUF    (input)                       const int
+        On entry, IBUF  specifies the panel buffer/count/type entries
+        that should be initialized.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pddriver.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pddriver.html
new file mode 100755
index 000000000..adcc02e00
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pddriver.html
@@ -0,0 +1,27 @@
+<HTML>
+<HEAD>
+<TITLE>main HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>main</B> HPL main timing program.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>main();</CODE>
+
+<H1>Description</H1>
+<B>main</B>
+is the main driver program for testing the HPL routines.
+This  program is  driven  by  a short data file named  "HPL.dat".
+
+<H1>See Also</H1>
+<A HREF="HPL_pdinfo.html">HPL_pdinfo</A>,
+<A HREF="HPL_pdtest.html">HPL_pdtest</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdfact.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdfact.html
new file mode 100755
index 000000000..f51cee5d2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdfact.html
@@ -0,0 +1,78 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdfact HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdfact</B> recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdfact(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdfact</B>
+recursively factorizes a  1-dimensional  panel of columns.
+The  RPFACT  function pointer specifies the recursive algorithm to be
+used, either Crout, Left- or Right looking.  NBMIN allows to vary the
+recursive stopping criterium in terms of the number of columns in the
+panel, and  NDIV  allow to specify the number of subpanels each panel
+should be divided into. Usuallly a value of 2 will be chosen. Finally
+PFACT is a function pointer specifying the non-recursive algorithm to
+to be used on at most NBMIN columns. One can also choose here between
+Crout, Left- or Right looking.  Empirical tests seem to indicate that
+values of 4 or 8 for NBMIN give the best results.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesv.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesv.html
new file mode 100755
index 000000000..ebb9c18e4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesv.html
@@ -0,0 +1,56 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdgesv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdgesv</B> Solve A x = b.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdgesv(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdgesv</B>
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with  or  without look-ahead.  The  lower  triangular  factor is left
+unpivoted and the pivots are not returned. The right hand side is the
+N+1 column of the coefficient matrix.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdtrsv.html">HPL_pdtrsv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesv0.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesv0.html
new file mode 100755
index 000000000..c137975d4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesv0.html
@@ -0,0 +1,63 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdgesv0 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdgesv0</B> Factor an N x N+1 matrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdgesv0(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdgesv0</B>
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+without look-ahead. The lower triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>,
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesvK1.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesvK1.html
new file mode 100755
index 000000000..1a19edc05
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesvK1.html
@@ -0,0 +1,62 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdgesvK1 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdgesvK1</B> Factor an N x N+1 matrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdgesvK1(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdgesvK1</B>
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with look-ahead.  The  lower  triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>,
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesvK2.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesvK2.html
new file mode 100755
index 000000000..f2a9a25f0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesvK2.html
@@ -0,0 +1,63 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdgesvK2 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdgesvK2</B> Factor an N x N+1 matrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdgesvK2(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdgesvK2</B>
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with look-ahead.  The  lower  triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>,
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdinfo.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdinfo.html
new file mode 100755
index 000000000..94a7f78c0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdinfo.html
@@ -0,0 +1,252 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdinfo HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdinfo</B> Read input parameter file.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdinfo(</CODE>
+<CODE>HPL_T_test *</CODE>
+<CODE>TEST</CODE>,
+<CODE>int *</CODE>
+<CODE>NS</CODE>,
+<CODE>int *</CODE>
+<CODE>N</CODE>,
+<CODE>int *</CODE>
+<CODE>NBS</CODE>,
+<CODE>int *</CODE>
+<CODE>NB</CODE>,
+<CODE>HPL_T_ORDER *</CODE>
+<CODE>PMAPPIN</CODE>,
+<CODE>int *</CODE>
+<CODE>NPQS</CODE>,
+<CODE>int *</CODE>
+<CODE>P</CODE>,
+<CODE>int *</CODE>
+<CODE>Q</CODE>,
+<CODE>int *</CODE>
+<CODE>NPFS</CODE>,
+<CODE>HPL_T_FACT *</CODE>
+<CODE>PF</CODE>,
+<CODE>int *</CODE>
+<CODE>NBMS</CODE>,
+<CODE>int *</CODE>
+<CODE>NBM</CODE>,
+<CODE>int *</CODE>
+<CODE>NDVS</CODE>,
+<CODE>int *</CODE>
+<CODE>NDV</CODE>,
+<CODE>int *</CODE>
+<CODE>NRFS</CODE>,
+<CODE>HPL_T_FACT *</CODE>
+<CODE>RF</CODE>,
+<CODE>int *</CODE>
+<CODE>NTPS</CODE>,
+<CODE>HPL_T_TOP *</CODE>
+<CODE>TP</CODE>,
+<CODE>int *</CODE>
+<CODE>NDHS</CODE>,
+<CODE>int *</CODE>
+<CODE>DH</CODE>,
+<CODE>HPL_T_SWAP *</CODE>
+<CODE>FSWAP</CODE>,
+<CODE>int *</CODE>
+<CODE>TSWAP</CODE>,
+<CODE>int *</CODE>
+<CODE>L1NOTRAN</CODE>,
+<CODE>int *</CODE>
+<CODE>UNOTRAN</CODE>,
+<CODE>int *</CODE>
+<CODE>EQUIL</CODE>,
+<CODE>int *</CODE>
+<CODE>ALIGN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdinfo</B>
+reads  the  startup  information for the various tests and
+transmits it to all processes.
+
+<H1>Arguments</H1>
+<PRE>
+TEST    (global output)               HPL_T_test *
+        On entry, TEST  points to a testing data structure.  On exit,
+        the fields of this data structure are initialized as follows:
+        TEST->outfp  specifies the output file where the results will
+        be printed.  It is only defined and used by  the process 0 of
+        the grid.  TEST->thrsh specifies the threshhold value for the
+        test ratio.  TEST->epsil is the relative machine precision of
+        the distributed computer.  Finally  the test counters, kfail,
+        kpass, kskip, ktest are initialized to zero.
+</PRE>
+<PRE>
+NS      (global output)               int *
+        On exit,  NS  specifies the number of different problem sizes
+        to be tested. NS is less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+N       (global output)               int *
+        On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+        the first NS entries of this array contain the  problem sizes
+        to run the code with.
+</PRE>
+<PRE>
+NBS     (global output)               int *
+        On exit,  NBS  specifies the number of different distribution
+        blocking factors to be tested. NBS must be less than or equal
+        to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+NB      (global output)               int *
+        On exit,  PMAPPIN  specifies the process mapping onto the no-
+        des of the  MPI machine configuration.  PMAPPIN  defaults  to
+        row-major ordering.
+</PRE>
+<PRE>
+PMAPPIN (global output)               HPL_T_ORDER *
+        On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NBS entries of this array contain the values of the
+        various distribution blocking factors, to run the code with.
+</PRE>
+<PRE>
+NPQS    (global output)               int *
+        On exit, NPQS  specifies the  number of different values that
+        can be used for P and Q, i.e., the number of process grids to
+        run  the  code with.  NPQS must be  less  than  or  equal  to
+        HPL_MAX_PARAM.
+</PRE>
+<PRE>
+P       (global output)               int *
+        On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NPQS entries of this array contain the values of P,
+        the number of process rows of the  NPQS grids to run the code
+        with.
+</PRE>
+<PRE>
+Q       (global output)               int *
+        On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NPQS entries of this array contain the values of Q,
+        the number of process columns of the  NPQS  grids to  run the
+        code with.
+</PRE>
+<PRE>
+NPFS    (global output)               int *
+        On exit, NPFS  specifies the  number of different values that
+        can be used for PF : the panel factorization algorithm to run
+        the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+PF      (global output)               HPL_T_FACT *
+        On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+        the first  NPFS  entries  of this array  contain  the various
+        panel factorization algorithms to run the code with.
+</PRE>
+<PRE>
+NBMS    (global output)               int *
+        On exit,  NBMS  specifies  the  number  of  various recursive
+        stopping criteria  to be tested.  NBMS  must be  less than or
+        equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+NBM     (global output)               int *
+        On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NBMS entries of this array contain the values
+        of the various recursive stopping criteria to be tested.
+</PRE>
+<PRE>
+NDVS    (global output)               int *
+        On exit,  NDVS  specifies  the number  of various numbers  of
+        panels in recursion to be tested.  NDVS is less than or equal
+        to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+NDV     (global output)               int *
+        On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NDVS entries of this array contain the values
+        of the various numbers of panels in recursion to be tested.
+</PRE>
+<PRE>
+NRFS    (global output)               int *
+        On exit, NRFS  specifies the  number of different values that
+        can be used for RF : the recursive factorization algorithm to
+        be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+RF      (global output)               HPL_T_FACT *
+        On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+        the first  NRFS  entries  of  this array contain  the various
+        recursive factorization algorithms to run the code with.
+</PRE>
+<PRE>
+NTPS    (global output)               int *
+        On exit, NTPS  specifies the  number of different values that
+        can be used for the  broadcast topologies  to be tested. NTPS
+        is less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+TP      (global output)               HPL_T_TOP *
+        On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+        the  first NTPS  entries of this  array  contain  the various
+        broadcast (along rows) topologies to run the code with.
+</PRE>
+<PRE>
+NDHS    (global output)               int *
+        On exit, NDHS  specifies the  number of different values that
+        can be used for the  lookahead depths to be  tested.  NDHS is
+        less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+DH      (global output)               int *
+        On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NDHS entries of this array contain the values
+        of lookahead depths to run the code with.  Such a value is at
+        least 0 (no-lookahead) or greater than zero.
+</PRE>
+<PRE>
+FSWAP   (global output)               HPL_T_SWAP *
+        On exit, FSWAP specifies the swapping algorithm to be used in
+        all tests.
+</PRE>
+<PRE>
+TSWAP   (global output)               int *
+        On exit,  TSWAP  specifies the swapping threshold as a number
+        of columns when the mixed swapping algorithm was chosen.
+</PRE>
+<PRE>
+L1NOTRA (global output)               int *
+        On exit, L1NOTRAN specifies whether the upper triangle of the
+        panels of columns  should  be stored  in  no-transposed  form
+        (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+</PRE>
+<PRE>
+UNOTRAN (global output)               int *
+        On exit, UNOTRAN  specifies whether the panels of rows should
+        be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+        form (UNOTRAN=0) during their broadcast.
+</PRE>
+<PRE>
+EQUIL   (global output)               int *
+        On exit,  EQUIL  specifies  whether  equilibration during the
+        swap-broadcast  of  the  panel of rows  should  be  performed
+        (EQUIL=1) or not (EQUIL=0).
+</PRE>
+<PRE>
+ALIGN   (global output)               int *
+        On exit,  ALIGN  specifies the alignment  of  the dynamically
+        allocated buffers in double precision words. ALIGN is greater
+        than zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pddriver.html">HPL_pddriver</A>,
+<A HREF="HPL_pdtest.html">HPL_pdtest</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlamch.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlamch.html
new file mode 100755
index 000000000..c1b51370a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlamch.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlamch HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlamch</B> determines machine-specific arithmetic constants.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_pdlamch(</CODE>
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>,
+<CODE>const HPL_T_MACH</CODE>
+<CODE>CMACH</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlamch</B>
+determines  machine-specific  arithmetic  constants  such  as
+the relative machine precision (eps),  the safe minimum(sfmin) such that
+1/sfmin does not overflow, the base of the machine (base), the precision
+(prec),  the  number  of  (base)  digits in the  mantissa  (t),  whether
+rounding occurs in addition (rnd = 1.0 and 0.0 otherwise),  the  minimum
+exponent before  (gradual)  underflow (emin),  the  underflow  threshold
+(rmin)- base**(emin-1), the largest exponent before overflow (emax), the
+overflow threshold (rmax)  - (base**emax)*(1-eps).
+
+<H1>Arguments</H1>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+<PRE>
+CMACH   (global input)                const HPL_T_MACH
+        Specifies the value to be returned by HPL_pdlamch            
+           = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+           = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+           = HPL_MACH_BASE,  HPL_pdlamch := base                     
+           = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+           = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+           = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+           = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+           = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+           = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+           = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+         
+        where                                                        
+         
+           eps   = relative machine precision,                       
+           sfmin = safe minimum,                                     
+           base  = base of the machine,                              
+           prec  = eps*base,                                         
+           t     = number of digits in the mantissa,                 
+           rnd   = 1.0 if rounding occurs in addition,               
+           emin  = minimum exponent before underflow,                
+           rmin  = underflow threshold,                              
+           emax  = largest exponent before overflow,                 
+           rmax  = overflow threshold.
+</PRE>
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlange.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlange.html
new file mode 100755
index 000000000..0d1affc3d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlange.html
@@ -0,0 +1,88 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlange HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlange</B> Compute ||A||.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_pdlange(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>const HPL_T_NORM</CODE>
+<CODE>NORM</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlange</B>
+returns  the value of the one norm,  or the infinity norm,
+or the element of largest absolute value of a distributed matrix A:  
+ 
+ 
+   max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+   norm1(A),        when NORM = HPL_NORM_1,                          
+   normI(A),        when NORM = HPL_NORM_I,                          
+ 
+where norm1 denotes the one norm of a matrix (maximum column sum) and
+normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+that max(abs(A(i,j))) is not a matrix norm.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+NORM    (global input)                const HPL_T_NORM
+        On entry,  NORM  specifies  the  value to be returned by this
+        function as described above.
+</PRE>
+<PRE>
+M       (global input)                const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+        that contains the local pieces of the distributed matrix A.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaprnt.html">HPL_pdlaprnt</A>,
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaprnt.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaprnt.html
new file mode 100755
index 000000000..0ce810db0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaprnt.html
@@ -0,0 +1,94 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaprnt HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaprnt</B> Print a distributed matrix A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaprnt(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int</CODE>
+<CODE>IAROW</CODE>,
+<CODE>const int</CODE>
+<CODE>IACOL</CODE>,
+<CODE>const char *</CODE>
+<CODE>CMATNM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaprnt</B>
+prints  to  standard  error a distributed matrix A. The
+local pieces of  A  are sent to the process of coordinates  (0,0)  in
+the grid and then printed.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+M       (global input)                const int
+        On entry,  M  specifies the number of rows of the coefficient
+        matrix A. M must be at least zero.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On  entry,   N   specifies  the  number  of  columns  of  the
+        coefficient matrix A. N must be at least zero.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+</PRE>
+<PRE>
+A       (local input)                 double *
+        On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+        This array contains the coefficient matrix to be printed.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+</PRE>
+<PRE>
+IAROW   (global input)                const int
+        On entry,  IAROW  specifies the row process coordinate owning
+        the  first row of A.  IAROW  must be  larger than or equal to
+        zero and less than NPROW.
+</PRE>
+<PRE>
+IACOL   (global input)                const int
+        On entry,  IACOL  specifies  the  column  process  coordinate
+        owning the  first column  of A. IACOL  must be larger than or
+        equal to zero and less than NPCOL.
+</PRE>
+<PRE>
+CMATNM  (global input)                const char *
+        On entry, CMATNM is the name of the matrix to be printed.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp00N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp00N.html
new file mode 100755
index 000000000..07279fdb0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp00N.html
@@ -0,0 +1,82 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaswp00N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaswp00N</B> Broadcast a column panel L and swap the row panel U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaswp00N(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaswp00N</B>
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+Bi-directional  exchange  is used to perform the  swap :: broadcast of
+the row  panel U at once, resulting in a lower number of messages than
+usual as well as a lower communication volume. With P process rows and
+assuming  bi-directional links,  the running time of this function can
+be approximated by:
+ 
+   log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  Mono
+directional links will double this communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be broadcast and swapped) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pipid.html">HPL_pipid</A>,
+<A HREF="HPL_plindx0.html">HPL_plindx0</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp00T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp00T.html
new file mode 100755
index 000000000..08b8ea770
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp00T.html
@@ -0,0 +1,82 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaswp00T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaswp00T</B> Broadcast a column panel L and swap the row panel U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaswp00T(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaswp00T</B>
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+Bi-directional  exchange  is used to perform the  swap :: broadcast of
+the row  panel U at once, resulting in a lower number of messages than
+usual as well as a lower communication volume. With P process rows and
+assuming  bi-directional links,  the running time of this function can
+be approximated by:
+ 
+   log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  Mono
+directional links will double this communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be broadcast and swapped) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>,
+<A HREF="HPL_pipid.html">HPL_pipid</A>,
+<A HREF="HPL_plindx0.html">HPL_plindx0</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp01N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp01N.html
new file mode 100755
index 000000000..2d4772fda
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp01N.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaswp01N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaswp01N</B> Broadcast a column panel L and swap the row panel U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaswp01N(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaswp01N</B>
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+row panel U at once,  resulting in a minimal communication volume  and
+a "very good"  use of the connectivity if available.  With  P  process
+rows  and  assuming  bi-directional links,  the  running time  of this
+function can be approximated by:
+ 
+   (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  K is
+a constant in (2,3] that depends on the achieved bandwidth  during  a
+simultaneous  message exchange  between two processes.  An  empirical
+optimistic value of K is typically 2.4.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pipid.html">HPL_pipid</A>,
+<A HREF="HPL_plindx1.html">HPL_plindx1</A>,
+<A HREF="HPL_plindx10.html">HPL_plindx10</A>,
+<A HREF="HPL_spreadN.html">HPL_spreadN</A>,
+<A HREF="HPL_equil.html">HPL_equil</A>,
+<A HREF="HPL_rollN.html">HPL_rollN</A>,
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp01T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp01T.html
new file mode 100755
index 000000000..f6a5d8c4b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp01T.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaswp01T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaswp01T</B> Broadcast a column panel L and swap the row panel U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaswp01T(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaswp01T</B>
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+row panel U at once,  resulting in a minimal communication volume  and
+a "very good"  use of the connectivity if available.  With  P  process
+rows  and  assuming  bi-directional links,  the  running time  of this
+function can be approximated by:
+ 
+   (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  K is
+a constant in (2,3] that depends on the achieved bandwidth  during  a
+simultaneous  message exchange  between two processes.  An  empirical
+optimistic value of K is typically 2.4.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>,
+<A HREF="HPL_pipid.html">HPL_pipid</A>,
+<A HREF="HPL_plindx1.html">HPL_plindx1</A>,
+<A HREF="HPL_plindx10.html">HPL_plindx10</A>,
+<A HREF="HPL_spreadT.html">HPL_spreadT</A>,
+<A HREF="HPL_equil.html">HPL_equil</A>,
+<A HREF="HPL_rollT.html">HPL_rollT</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdmatgen.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdmatgen.html
new file mode 100755
index 000000000..28fb95509
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdmatgen.html
@@ -0,0 +1,87 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdmatgen HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdmatgen</B> Parallel random matrix generator.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdmatgen(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int</CODE>
+<CODE>ISEED</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdmatgen</B>
+generates (or regenerates) a parallel random matrix A.
+ 
+The  pseudo-random  generator uses the linear congruential algorithm:
+X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+Programming, Knuth 1973, Vol. 2.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+M       (global input)                const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+        On exit, this array contains the coefficients of the randomly
+        generated matrix.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+</PRE>
+<PRE>
+ISEED   (global input)                const int
+        On entry, ISEED  specifies  the  seed  number to generate the
+        matrix A. ISEED must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdmxswp.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdmxswp.html
new file mode 100755
index 000000000..c11d2b2da
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdmxswp.html
@@ -0,0 +1,96 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdmxswp HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdmxswp</B> swaps and broacast the pivot row.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdmxswp(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>II</CODE>,
+<CODE>const int</CODE>
+<CODE>JJ</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdmxswp</B>
+swaps  and  broadcasts  the  absolute value max row using
+bi-directional exchange.  The buffer is partially set by HPL_dlocmax.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by
+ 
+   log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth )
+ 
+where  lat and bdwth are the latency and bandwidth of the network for
+double precision real elements.  Communication  only  occurs  in  one
+process  column. Mono-directional links  will cause the communication
+cost to double.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of the matrix
+        column on which this function operates.
+</PRE>
+<PRE>
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+</PRE>
+<PRE>
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        It  is assumed that  HPL_dlocmax  was called  prior  to  this
+        routine to  initialize  the first four entries of this array.
+        On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+        Note that this is also the  JJth  row  (or column) of L1. The
+        remaining part is used as a temporary array.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpancrN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpancrN.html
new file mode 100755
index 000000000..663d2e266
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpancrN.html
@@ -0,0 +1,100 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpancrN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpancrN</B> Crout panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpancrN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpancrN</B>
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel  A using the Crout variant of the  usual
+one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+of the panel is stored in no-transpose form (i.e. just like the input
+matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and gam2-3 is  an  estimate  of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpancrT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpancrT.html
new file mode 100755
index 000000000..0e1490430
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpancrT.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpancrT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpancrT</B> Crout panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpancrT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpancrT</B>
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel  A using the Crout variant of the  usual
+one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is an  estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_disp.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_disp.html
new file mode 100755
index 000000000..cb78fa4be
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_disp.html
@@ -0,0 +1,38 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanel_disp HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanel_disp</B> Deallocate a panel data structure.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_pdpanel_disp(</CODE>
+<CODE>HPL_T_panel * *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanel_disp</B>
+deallocates  the  panel  structure  and  resources  and
+stores the error code returned by the panel factorization.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel * *
+        On entry,  PANEL  points  to  the  address  of the panel data
+        structure to be deallocated.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
+<A HREF="HPL_pdpanel_init.html">HPL_pdpanel_init</A>,
+<A HREF="HPL_pdpanel_free.html">HPL_pdpanel_free</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_free.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_free.html
new file mode 100755
index 000000000..d33e5e400
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_free.html
@@ -0,0 +1,38 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanel_free HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanel_free</B> Deallocate the panel ressources.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_pdpanel_free(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanel_free</B>
+deallocates  the panel resources  and  stores the error
+code returned by the panel factorization.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points  to  the  panel data  structure from
+        which the resources should be deallocated.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
+<A HREF="HPL_pdpanel_init.html">HPL_pdpanel_init</A>,
+<A HREF="HPL_pdpanel_disp.html">HPL_pdpanel_disp</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_init.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_init.html
new file mode 100755
index 000000000..2d105354f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_init.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanel_init HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanel_init</B> Initialize the panel resources.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanel_init(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>JB</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>IA</CODE>,
+<CODE>const int</CODE>
+<CODE>JA</CODE>,
+<CODE>const int</CODE>
+<CODE>TAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanel_init</B>
+initializes a panel data structure.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry, M specifies the global number of rows of the panel.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the  global number of columns of the
+        panel and trailing submatrix. N must be at least zero.
+</PRE>
+<PRE>
+JB      (global input)                const int
+        On entry, JB specifies is the number of columns of the panel.
+        JB must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+<PRE>
+IA      (global input)                const int
+        On entry,  IA  is  the global row index identifying the panel
+        and trailing submatrix. IA must be at least zero.
+</PRE>
+<PRE>
+JA      (global input)                const int
+        On entry, JA is the global column index identifying the panel
+        and trailing submatrix. JA must be at least zero.
+</PRE>
+<PRE>
+TAG     (global input)                const int
+        On entry, TAG is the row broadcast message id.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
+<A HREF="HPL_pdpanel_disp.html">HPL_pdpanel_disp</A>,
+<A HREF="HPL_pdpanel_free.html">HPL_pdpanel_free</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_new.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_new.html
new file mode 100755
index 000000000..1b3029ecb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_new.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanel_new HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanel_new</B> Create a panel data structure.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanel_new(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>JB</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>IA</CODE>,
+<CODE>const int</CODE>
+<CODE>JA</CODE>,
+<CODE>const int</CODE>
+<CODE>TAG</CODE>,
+<CODE>HPL_T_panel * *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanel_new</B>
+creates and initializes a panel data structure.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry, M specifies the global number of rows of the panel.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the  global number of columns of the
+        panel and trailing submatrix. N must be at least zero.
+</PRE>
+<PRE>
+JB      (global input)                const int
+        On entry, JB specifies is the number of columns of the panel.
+        JB must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+<PRE>
+IA      (global input)                const int
+        On entry,  IA  is  the global row index identifying the panel
+        and trailing submatrix. IA must be at least zero.
+</PRE>
+<PRE>
+JA      (global input)                const int
+        On entry, JA is the global column index identifying the panel
+        and trailing submatrix. JA must be at least zero.
+</PRE>
+<PRE>
+TAG     (global input)                const int
+        On entry, TAG is the row broadcast message id.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel * *
+        On entry,  PANEL  points  to  the  address  of the panel data
+        structure to create and initialize.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
+<A HREF="HPL_pdpanel_init.html">HPL_pdpanel_init</A>,
+<A HREF="HPL_pdpanel_disp.html">HPL_pdpanel_disp</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanllN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanllN.html
new file mode 100755
index 000000000..386815fd2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanllN.html
@@ -0,0 +1,100 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanllN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanllN</B> Left-looking panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanllN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanllN</B>
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel A  using the Left-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in no-transpose form (i.e. just like the
+input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanllT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanllT.html
new file mode 100755
index 000000000..04307e823
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanllT.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanllT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanllT</B> Left-looking panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanllT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanllT</B>
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel A  using the Left-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanrlN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanrlN.html
new file mode 100755
index 000000000..8d705c63c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanrlN.html
@@ -0,0 +1,100 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanrlN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanrlN</B> Right-looking panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanrlN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanrlN</B>
+factorizes  a panel of columns  that is a sub-array of a
+larger one-dimensional panel A using the Right-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in no-transpose form (i.e. just like the
+input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanrlT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanrlT.html
new file mode 100755
index 000000000..af458e7a1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanrlT.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanrlT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanrlT</B> Right-looking panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanrlT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanrlT</B>
+factorizes  a panel of columns  that is a sub-array of a
+larger one-dimensional panel A using the Right-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpancrN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpancrN.html
new file mode 100755
index 000000000..9169c48cc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpancrN.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpancrN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpancrN</B> Crout recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpancrN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpancrN</B>
+HPL_pdrpancrN recursively  factorizes  a panel of columns  using  the
+recursive  Crout  variant of the usual one-dimensional algorithm. The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpancrT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpancrT.html
new file mode 100755
index 000000000..cc9047c3c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpancrT.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpancrT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpancrT</B> Crout recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpancrT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpancrT</B>
+recursively  factorizes  a panel  of columns using  the
+recursive  Crout  variant  of  the  usual one-dimensional  algorithm.
+The lower triangular N0-by-N0  upper block of the panel  is stored in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanllN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanllN.html
new file mode 100755
index 000000000..bf16e6009
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanllN.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpanllN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpanllN</B> Left-looking recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpanllN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpanllN</B>
+recursively  factorizes  a panel  of columns using  the
+recursive Left-looking variant of the one-dimensional algorithm.  The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanllT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanllT.html
new file mode 100755
index 000000000..9904fb326
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanllT.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpanllT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpanllT</B> Left-looking recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpanllT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpanllT</B>
+recursively  factorizes  a panel of columns  using  the
+recursive Left-looking variant of the one-dimensional algorithm.  The
+lower  triangular  N0-by-N0  upper block  of  the panel  is stored in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanrlN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanrlN.html
new file mode 100755
index 000000000..9758c0722
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanrlN.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpanrlN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpanrlN</B> Right-looking recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpanrlN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpanrlN</B>
+recursively  factorizes  a panel of columns  using  the
+recursive Right-looking variant of the one-dimensional algorithm. The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanrlT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanrlT.html
new file mode 100755
index 000000000..ed48a815d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanrlT.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpanrlT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpanrlT</B> Right-looking recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpanrlT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpanrlT</B>
+recursively  factorizes  a panel of columns  using  the
+recursive Right-looking variant of the one-dimensional algorithm. The
+lower  triangular  N0-by-N0  upper  block of the panel  is stored  in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdtest.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdtest.html
new file mode 100755
index 000000000..1c11c34d7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdtest.html
@@ -0,0 +1,81 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdtest HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdtest</B> Perform one test.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdtest(</CODE>
+<CODE>HPL_T_test *</CODE>
+<CODE>TEST</CODE>,
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdtest</B>
+performs  one  test  given a set of parameters such as the
+process grid, the  problem size, the distribution blocking factor ...
+This function generates  the data, calls  and times the linear system
+solver,  checks  the  accuracy  of the  obtained vector solution  and
+writes this information to the file pointed to by TEST->outfp.
+
+<H1>Arguments</H1>
+<PRE>
+TEST    (global input)                HPL_T_test *
+        On entry,  TEST  points  to a testing data structure:  outfp
+        specifies the output file where the results will be printed.
+        It is only defined and used by the process  0  of the  grid.
+        thrsh  specifies  the  threshhold value  for the test ratio.
+        Concretely, a test is declared "PASSED"  if and only if  the
+        following inequality is satisfied:
+        ||Ax-b||_oo / ( epsil *
+                        ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+                         N )  < thrsh.
+        epsil  is the  relative machine precision of the distributed
+        computer. Finally the test counters, kfail, kpass, kskip and
+        ktest are updated as follows:  if the test passes,  kpass is
+        incremented by one;  if the test fails, kfail is incremented
+        by one; if the test is skipped, kskip is incremented by one.
+        ktest is left unchanged.
+</PRE>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters to be used for this test.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N specifies the order of the coefficient matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pddriver.html">HPL_pddriver</A>,
+<A HREF="HPL_pdinfo.html">HPL_pdinfo</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdtrsv.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdtrsv.html
new file mode 100755
index 000000000..0bb182dc9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdtrsv.html
@@ -0,0 +1,64 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdtrsv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdtrsv</B> Solve triu( A ) x = b.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdtrsv(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>AMAT</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdtrsv</B>
+solves an upper triangular system of linear equations.
+ 
+The rhs is the last column of the N by N+1 matrix A. The solve starts
+in the process  column owning the  Nth  column of A, so the rhs b may
+need to be moved one process column to the left at the beginning. The
+routine therefore needs  a column  vector in every process column but
+the one owning  b. The result is  replicated in all process rows, and
+returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes.
+ 
+The algorithm uses decreasing one-ring broadcast in process rows  and
+columns  implemented  in terms of  synchronous communication point to
+point primitives.  The  lookahead of depth 1 is used to minimize  the
+critical path. This entire operation is essentially ``latency'' bound
+and an estimate of its running time is given by:
+ 
+   (move rhs) lat + N / ( P bdwth ) +            
+   (solve)    ((N / NB)-1) 2 (lat + NB / bdwth) +
+              gam2 N^2 / ( P Q ),                
+ 
+where  gam2   is an estimate of the   Level 2 BLAS rate of execution.
+There are  N / NB  diagonal blocks. One must exchange  2  messages of
+length NB to compute the next  NB  entries of the vector solution, as
+well as performing a total of N^2 floating point operations.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+AMAT    (local input/output)          HPL_T_pmat *
+        On entry,  AMAT  points  to the data structure containing the
+        local array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateNN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateNN.html
new file mode 100755
index 000000000..b77cddbce
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateNN.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdupdateNN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdupdateNN</B> Broadcast a panel and update the trailing submatrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdupdateNN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdupdateNN</B>
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateNT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateNT.html
new file mode 100755
index 000000000..4ecb1f687
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateNT.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdupdateNT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdupdateNT</B> Broadcast a panel and update the trailing submatrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdupdateNT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdupdateNT</B>
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateTN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateTN.html
new file mode 100755
index 000000000..ae735bf84
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateTN.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdupdateTN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdupdateTN</B> Broadcast a panel and update the trailing submatrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdupdateTN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdupdateTN</B>
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateTT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateTT.html
new file mode 100755
index 000000000..7c69f8828
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateTT.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdupdateTT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdupdateTT</B> Broadcast a panel and update the trailing submatrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdupdateTT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdupdateTT</B>
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_perm.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_perm.html
new file mode 100755
index 000000000..9312eb4eb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_perm.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_perm HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_perm</B> Combine 2 index arrays - Generate the permutation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_perm(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXAU</CODE>,
+<CODE>int *</CODE>
+<CODE>IWORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_perm</B>
+combines  two  index  arrays  and generate the corresponding
+permutation. First, this function computes the inverse of LINDXA, and
+then combine it with LINDXAU.  Second, in order to be able to perform
+the permutation in place,  LINDXAU  is overwritten by the sequence of
+permutation  producing  the  same result.  What we ultimately want to
+achieve is:  U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the
+call to this function,  this in place permutation can be performed by
+for i in [0..N) swap U[i] with U[LINDXAU[i]].
+
+<H1>Arguments</H1>
+<PRE>
+N       (global input)                const int
+        On entry,  N  specifies the length of the arrays  LINDXA  and
+        LINDXAU. N should be at least zero.
+</PRE>
+<PRE>
+LINDXA  (global input/output)         int *
+        On entry,  LINDXA  is an array of dimension N  containing the
+        source indexes. On exit,  LINDXA  contains the combined index
+        array.
+</PRE>
+<PRE>
+LINDXAU (global input/output)         int *
+        On entry,  LINDXAU is an array of dimension N  containing the
+        target indexes.  On exit,  LINDXAU  contains  the sequence of
+        permutation,  that  should be applied  in increasing order to
+        permute the underlying array U in place.
+</PRE>
+<PRE>
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension N.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_plindx1.html">HPL_plindx1</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pipid.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pipid.html
new file mode 100755
index 000000000..e6deb3d93
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pipid.html
@@ -0,0 +1,95 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pipid HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pipid</B> Simplify the pivot vector.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pipid(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>int *</CODE>
+<CODE>K</CODE>,
+<CODE>int *</CODE>
+<CODE>IPID</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pipid</B>
+computes an array  IPID  that contains the source and final
+destination  of  matrix rows  resulting  from  the  application  of N
+interchanges  as computed by the  LU  factorization  with row partial
+pivoting. The array IPID is such that the row of global index IPID(i)
+should be mapped onto the row of global index IPID(i+1). Note that we
+cannot really know the length of IPID a priori. However, we know that
+this array is at least 2*N long,  since  there are N rows to swap and
+broadcast. The length of this array  must be smaller than or equal to
+4*N, since every row is swapped with at most a single distinct remote
+row. The algorithm constructing  IPID  goes as follows: Let IA be the
+global index of the first row to be swapped.
+ 
+For every row src IA + i with i in [0..N) to be swapped with row  dst
+such that dst is given by DPIV[i]:
+ 
+Is row  src  the destination  of a previous row of the current block,
+that is, is there k odd such that IPID(k) is equal to src ?
+    Yes:  update  this destination  with dst.  For  example,  if  the
+pivot array is  (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5,
+we swap in fact row 0 and 5,  i.e.,  row 0 goes to 5 and not 2  as it
+was thought so far ...
+    No :  add  the pair (src,dst) at the end of IPID; row src has not
+been moved yet.
+ 
+Is row  dst  different  from src the destination of a previous row of
+the current block, i.e., is there k odd such that IPID(k) is equal to
+dst ?
+    Yes:  update  IPID(k) with src.  For example,  if the pivot array
+is (0,5)(1,1)(2,5) ... , then when  we swap rows  2 and 5, we swap in
+fact row 2 and 0,  i.e.,  row 0 goes to 2 and not 5 as it was thought
+so far ...
+    No : add  the  pair (dst,src) at the end of IPID; row dst has not
+been moved yet.
+ 
+Note that when src is equal to dst, the pair (dst,src)  should not be
+added to  IPID  in  order  to avoid duplicated entries in this array.
+During  the construction of the array  IPID,  we  make  sure that the
+first N entries are such that IPID(k) with k odd is equal to  IA+k/2.
+For k in  [0..K/2),  the  row  of global index  IPID(2*k)  should  be
+mapped onto the row of global index IPID(2*k+1).
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+K       (global output)               int *
+        On exit, K specifies the number of entries in  IPID.  K is at
+        least 2*N, and at most 4*N.
+</PRE>
+<PRE>
+IPID    (global output)               int *
+        On entry, IPID is an array of length 4*N.  On exit, the first
+        K entries of that array contain the src and final destination
+        resulting  from  the  application of the  N  interchanges  as
+        specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+        stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+        in [0..N)
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx0.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx0.html
new file mode 100755
index 000000000..f3dbbcdea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx0.html
@@ -0,0 +1,187 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_plindx0 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_plindx0</B> Compute local swapping index arrays.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_plindx0(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>K</CODE>,
+<CODE>int *</CODE>
+<CODE>IPID</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXAU</CODE>,
+<CODE>int *</CODE>
+<CODE>LLEN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_plindx0</B>
+computes two local arrays  LINDXA and  LINDXAU  containing
+the  local  source and final destination position  resulting from the
+application of row interchanges.
+ 
+On entry, the array  IPID  of length K is such that the row of global
+index  IPID(i)  should be mapped onto row of global index  IPID(i+1).
+Let  IA  be the global index of the first row to be swapped. For k in
+[0..K/2), the row of global index IPID(2*k) should be mapped onto the
+row of global index  IPID(2*k+1).  The question then, is to determine
+which rows should ultimately be part of U.
+ 
+First, some rows of the process ICURROW  may be swapped locally.  One
+of this row belongs to U, the other one belongs to my local  piece of
+A.  The other  rows of the current block are swapped with remote rows
+and are thus not part of U. These rows however should be sent  along,
+and  grabbed by the other processes  as we  progress in the  exchange
+phase.
+ 
+So, assume that I am  ICURROW  and consider a row of index  IPID(2*i)
+that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less
+than N,  this row is locally swapped and should be copied into  U  at
+the position IPID(2*i+1) - IA. No row will be exchanged for this one.
+If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be
+locally copied into my local piece of A at the position corresponding
+to the row of global index IPID(2*i+1).
+ 
+If the process  ICURROW does not own  IPID(2*i+1), then row IPID(2*i)
+is to be swapped away and strictly speaking does not belong to U, but
+to  A  remotely.  Since this  process will however send this array U,
+this row is  copied into  U, exactly where the row IPID(2*i+1) should
+go. For this, we search IPID for k1, such that IPID(2*k1) is equal to
+IPID(2*i+1); and row  IPID(2*i) is to be copied in U  at the position
+IPID(2*k1+1)-IA.
+ 
+It is thus  important to put the rows that go into U, i.e., such that
+IPID(2*i+1) - IA is less than N at the begining of the array IPID. By
+doing so,  U  is formed, and the local copy  is performed in just one
+sweep.
+ 
+Two lists  LINDXA  and  LINDXAU are built.  LINDXA contains the local
+index of the rows I have that should be copied. LINDXAU  contains the
+local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A
+is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
+of A should be locally copied into A(-LINDXAU(k),:).  In the  process
+ICURROW, the initial packing algorithm proceeds as follows.
+ 
+  for all entries in IPID,
+     if IPID(2*i) is in ICURROW,
+        if IPID(2*i+1) is in ICURROW,
+           if( IPID(2*i+1) - IA < N )
+            save corresponding local position
+            of this row (LINDXA);
+            save local position (LINDXAU) in U
+            where this row goes;
+            [copy row IPID(2*i) in U at position
+            IPID(2*i+1)-IA; ];
+           else
+            save corresponding local position of
+            this row (LINDXA);
+            save local position (-LINDXAU) in A
+            where this row goes;
+            [copy row IPID(2*i) in my piece of A
+            at IPID(2*i+1);]
+           end if
+        else
+           find k1 such that IPID(2*k1) = IPID(2*i+1);
+           copy row IPID(2*i) in U at position
+           IPID(2*k1+1)-IA;
+           save corresponding local position of this
+           row (LINDXA);
+           save local position (LINDXAU) in U where
+           this row goes;
+        end if
+     end if
+  end for
+ 
+Second, if I am not the current row process  ICURROW, all source rows
+in IPID that I own are part of U. Indeed,  they  are swapped with one
+row  of  the  current  block  of rows,  and  the  main  factorization
+algorithm proceeds one row after each other.  The processes different
+from ICURROW,  should  exchange and accumulate  those rows until they
+receive some data previously owned by the process ICURROW.
+ 
+In processes different from  ICURROW,  the  initial packing algorithm
+proceeds as follows.  Consider a row of global index IPID(2*i) that I
+own. When I will be receiving data previously owned by ICURROW, i.e.,
+U, row IPID(2*i) should  replace the row in U at pos. IPID(2*i+1)-IA,
+and  this particular row of U should be first copied into my piece of
+A, at A(il,:),  where  il is the  local row  index  corresponding  to
+IPID(2*i). Now,initially, this row will be packed into workspace, say
+as the kth row of  that  work array.  The  following  algorithm  sets
+LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row
+should be copied. LINDXA(k) stores the local index in  A  where  this
+row of U should be copied, i.e il.
+ 
+  for all entries in IPID,
+     if IPID(2*i) is not in ICURROW,
+        copy row IPID(2*i) in work array;
+        save corresponding local position
+        of this row (LINDXA);
+        save position (LINDXAU) in U where
+        this row should be copied;
+     end if
+  end for
+ 
+Since we are at it, we also globally figure  out  how many rows every
+process has. That is necessary, because it would rather be cumbersome
+to  figure it on  the fly  during the  bi-directional exchange phase.
+This information is kept in the array  LLEN  of size NPROW. Also note
+that the arrays LINDXA and LINDXAU are of max length equal to 2*N.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+</PRE>
+<PRE>
+IPID    (global input)                int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+</PRE>
+<PRE>
+LINDXA  (local output)                int *
+        On entry, LINDXA  is an array of dimension 2*N. On exit, this
+        array contains the local indexes of the rows of A I have that
+        should be copied into U.
+</PRE>
+<PRE>
+LINDXAU (local output)                int *
+        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+        array contains  the local destination  information encoded as
+        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+        of A should be locally copied into A(-LINDXAU(k),:).
+</PRE>
+<PRE>
+LLEN    (global output)               int *
+        On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+        contains how many rows every process has.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx1.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx1.html
new file mode 100755
index 000000000..0a49ede0b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx1.html
@@ -0,0 +1,130 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_plindx1 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_plindx1</B> Compute local swapping index arrays.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_plindx1(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>K</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPID</CODE>,
+<CODE>int *</CODE>
+<CODE>IPA</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXAU</CODE>,
+<CODE>int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAPM1</CODE>,
+<CODE>int *</CODE>
+<CODE>PERMU</CODE>,
+<CODE>int *</CODE>
+<CODE>IWORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_plindx1</B>
+computes two local arrays  LINDXA and  LINDXAU  containing
+the  local  source and final destination position  resulting from the
+application of row interchanges.  In addition, this function computes
+three arrays IPLEN, IPMAP and IPMAPM1  that contain  the  logarithmic
+mapping information for the spreading phase.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+</PRE>
+<PRE>
+IPID    (global input)                const int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+</PRE>
+<PRE>
+IPA     (global output)               int *
+        On exit,  IPA  specifies  the number of rows that the current
+        process row has that either belong to U  or should be swapped
+        with remote rows of A.
+</PRE>
+<PRE>
+LINDXA  (global output)               int *
+        On entry, LINDXA  is an array of dimension 2*N. On exit, this
+        array contains the local indexes of the rows of A I have that
+        should be copied into U.
+</PRE>
+<PRE>
+LINDXAU (global output)               int *
+        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+        array contains  the local destination  information encoded as
+        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+        of A should be locally copied into A(-LINDXAU(k),:).
+</PRE>
+<PRE>
+IPLEN   (global output)               int *
+        On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+        this array is such that  IPLEN[i]  is the number of rows of A
+        in  the  processes  before  process  IPMAP[i]  after the sort
+        with the convention that IPLEN[nprow]  is the total number of
+        rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+        local number of rows of A that should be moved to the process
+        IPMAP[i]. IPLEN is such that the number of rows of the source
+        process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+        remaining  entries  of  this  array  are  sorted  so that the
+        quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+</PRE>
+<PRE>
+IPMAP   (global output)               int *
+        On entry, IPMAP is an array of dimension NPROW. On exit, this
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myrow] is the corresponding sorted process
+        coordinate.
+</PRE>
+<PRE>
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROCS)
+</PRE>
+<PRE>
+PERMU   (global output)               int *
+        On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+        contains  a sequence of permutations,  that should be applied
+        in increasing order to permute in place the row panel U.
+</PRE>
+<PRE>
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension 2*JB.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx10.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx10.html
new file mode 100755
index 000000000..fbfd6be2f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx10.html
@@ -0,0 +1,87 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_plindx10 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_plindx10</B> Compute the logarithmic maps for the spreading.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_plindx10(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>K</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPID</CODE>,
+<CODE>int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_plindx10</B>
+computes  three arrays  IPLEN,  IPMAP  and  IPMAPM1  that
+contain the logarithmic mapping information for the spreading phase.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+</PRE>
+<PRE>
+IPID    (global input)                const int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+</PRE>
+<PRE>
+IPLEN   (global output)               int *
+        On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+        this array is such that  IPLEN[i]  is the number of rows of A
+        in the processes  before process IMAP[i] after the sort, with
+        the convention that IPLEN[nprow] is the total number of rows.
+        In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+        rows of  A  that should be moved for each process.  IPLEN  is
+        such that the number of rows of the source process row can be
+        computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+        this  array are sorted  so  that  the quantities IPLEN[i+1] -
+        IPLEN[i] are logarithmically sorted.
+</PRE>
+<PRE>
+IPMAP   (global output)               int *
+        On entry, IPMAP is an array of dimension NPROW. On exit, this
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myrow] is the corresponding sorted process
+        coordinate.
+</PRE>
+<PRE>
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROW)
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pnum.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pnum.html
new file mode 100755
index 000000000..8bedc3016
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pnum.html
@@ -0,0 +1,54 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pnum HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pnum</B> Rank determination.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_pnum(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>const int</CODE>
+<CODE>MYROW</CODE>,
+<CODE>const int</CODE>
+<CODE>MYCOL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pnum</B>
+determines  the  rank  of a  process  as a function  of  its
+coordinates in the grid.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+MYROW   (local input)                 const int
+        On entry,  MYROW  specifies the row coordinate of the process
+        whose rank is to be determined. MYROW must be greater than or
+        equal to zero and less than NPROW.
+</PRE>
+<PRE>
+MYCOL   (local input)                 const int
+        On entry,  MYCOL  specifies  the  column  coordinate  of  the
+        process whose rank is to be determined. MYCOL must be greater
+        than or equal to zero and less than NPCOL.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_grid_init.html">HPL_grid_init</A>,
+<A HREF="HPL_grid_info.html">HPL_grid_info</A>,
+<A HREF="HPL_grid_exit.html">HPL_grid_exit</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer.html
new file mode 100755
index 000000000..abef45946
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer.html
@@ -0,0 +1,49 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_ptimer HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_ptimer</B> Timer facility.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_ptimer(</CODE>
+<CODE>const int</CODE>
+<CODE>I</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_ptimer</B>
+provides a  "stopwatch"  functionality  cpu/wall  timer in
+seconds.  Up to  64  separate timers can be functioning at once.  The
+first call starts the timer,  and the second stops it.  This  routine
+can be disenabled  by calling HPL_ptimer_disable(),  so that calls to
+the timer are ignored.  This feature can be used to make sure certain
+sections of code do not affect timings,  even  if  they call routines
+which have HPL_ptimer calls in them. HPL_ptimer_enable()  will enable
+the  timer  functionality.  One  can retrieve  the current value of a
+timer by calling
+ 
+t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ 
+where  I  is the timer index in  [0..64).  To  inititialize the timer
+functionality, one must have called HPL_ptimer_boot() prior to any of
+the functions mentioned above.
+
+<H1>Arguments</H1>
+<PRE>
+I       (global input)                const int
+        On entry, I specifies the timer to stop/start.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ptimer_cputime.html">HPL_ptimer_cputime</A>,
+<A HREF="HPL_ptimer_walltime.html">HPL_ptimer_walltime</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer_cputime.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer_cputime.html
new file mode 100755
index 000000000..cffd863b3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer_cputime.html
@@ -0,0 +1,35 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_ptimer_cputime HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_ptimer_cputime</B> Return the CPU time.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_ptimer_cputime();</CODE>
+
+<H1>Description</H1>
+<B>HPL_ptimer_cputime</B>
+returns the cpu time. If HPL_USE_CLOCK is defined,
+the  clock() function is used to return an approximation of processor
+time used by the program.  The value returned is the CPU time used so
+far as a clock_t;  to get the number of seconds used,  the result  is
+divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+standard library.  If  HPL_USE_TIMES is defined, the times() function
+is used instead.  This  function  returns  the current process times.
+times() returns the number of clock ticks that have elapsed since the
+system has been up.  Otherwise and by default,  the  standard library
+function getrusage() is used.
+
+<H1>See Also</H1>
+<A HREF="HPL_ptimer_walltime.html">HPL_ptimer_walltime</A>,
+<A HREF="HPL_ptimer.html">HPL_ptimer</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer_walltime.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer_walltime.html
new file mode 100755
index 000000000..a509897f1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer_walltime.html
@@ -0,0 +1,26 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_ptimer_walltime HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_ptimer_walltime</B> Return the elapsed (wall-clock) time.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_ptimer_walltime();</CODE>
+
+<H1>Description</H1>
+<B>HPL_ptimer_walltime</B>
+returns the elapsed (wall-clock) time.
+
+<H1>See Also</H1>
+<A HREF="HPL_ptimer_cputime.html">HPL_ptimer_cputime</A>,
+<A HREF="HPL_ptimer.html">HPL_ptimer</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pwarn.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pwarn.html
new file mode 100755
index 000000000..221d23982
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pwarn.html
@@ -0,0 +1,63 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pwarn HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pwarn</B> displays an error message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pwarn(</CODE>
+<CODE>FILE *</CODE>
+<CODE>STREAM</CODE>,
+<CODE>int</CODE>
+<CODE>LINE</CODE>,
+<CODE>const char *</CODE>
+<CODE>SRNAME</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pwarn</B>
+displays an error message.
+
+<H1>Arguments</H1>
+<PRE>
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+</PRE>
+<PRE>
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+</PRE>
+<PRE>
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pabort.html">HPL_pabort</A>,
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rand.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rand.html
new file mode 100755
index 000000000..5aef6669c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rand.html
@@ -0,0 +1,40 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_rand HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_rand</B> random number generator.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_rand();</CODE>
+
+<H1>Description</H1>
+<B>HPL_rand</B>
+generates  the next number  in the  random  sequence.  This
+function  ensures  that this number lies in the interval (-0.5, 0.5].
+ 
+The static array irand contains the information (2 integers) required
+to generate the  next number  in the sequence  X(n).  This  number is
+computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5,  where the
+constant d is the largest 64 bit positive integer. The array irand is
+then  updated  for the generation of the next number  X(n+1)  in  the
+random sequence as follows X(n+1) = a * X(n) + c. The constants a and
+c  should have been preliminarily stored in the arrays ias and ics as
+2 pairs of integers.  The initialization of  ias,  ics and  irand  is
+performed by the function HPL_setran.
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_recv.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_recv.html
new file mode 100755
index 000000000..afcb570c5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_recv.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_recv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_recv</B> Receive a message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_recv(</CODE>
+<CODE>double *</CODE>
+<CODE>RBUF</CODE>,
+<CODE>int</CODE>
+<CODE>RCOUNT</CODE>,
+<CODE>int</CODE>
+<CODE>SRC</CODE>,
+<CODE>int</CODE>
+<CODE>RTAG</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_recv</B>
+is a simple wrapper around  MPI_Recv.  Its  main  purpose is
+to  allow for some  experimentation / tuning  of this simple routine.
+Successful  completion  is  indicated  by  the  returned  error  code
+HPL_SUCCESS.  In the case of messages of length less than or equal to
+zero, this function returns immediately.
+
+<H1>Arguments</H1>
+<PRE>
+RBUF    (local output)                double *
+        On entry, RBUF specifies the starting address of buffer to be
+        received.
+</PRE>
+<PRE>
+RCOUNT  (local input)                 int
+        On entry,  RCOUNT  specifies  the number  of double precision
+        entries in RBUF. RCOUNT must be at least zero.
+</PRE>
+<PRE>
+SRC     (local input)                 int
+        On entry, SRC  specifies the rank of the  sending  process in
+        the communication space defined by COMM.
+</PRE>
+<PRE>
+RTAG    (local input)                 int
+        On entry,  STAG specifies the message tag to be used for this
+        communication operation.
+</PRE>
+<PRE>
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_send.html">HPL_send</A>,
+<A HREF="HPL_sdrv.html">HPL_sdrv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_reduce.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_reduce.html
new file mode 100755
index 000000000..026435ed6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_reduce.html
@@ -0,0 +1,75 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_reduce HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_reduce</B> Reduce operation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_reduce(</CODE>
+<CODE>void *</CODE>
+<CODE>BUFFER</CODE>,
+<CODE>const int</CODE>
+<CODE>COUNT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>,
+<CODE>const HPL_T_OP </CODE>
+<CODE>OP</CODE>,
+<CODE>const int</CODE>
+<CODE>ROOT</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_reduce</B>
+performs a global reduce operation across all processes of
+a group.  Note that the input buffer is  used as workarray and in all
+processes but the accumulating process corrupting the original data.
+
+<H1>Arguments</H1>
+<PRE>
+BUFFER  (local input/output)          void *
+        On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+        exit,  and  in process of rank  ROOT  this array contains the
+        reduced data.  This  buffer  is also used as workspace during
+        the operation in the other processes of the group.
+</PRE>
+<PRE>
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+</PRE>
+<PRE>
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+<PRE>
+OP      (global input)                const HPL_T_OP 
+        On entry, OP is a pointer to the local combine function.
+</PRE>
+<PRE>
+ROOT    (global input)                const int
+        On entry, ROOT is the coordinate of the accumulating process.
+</PRE>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rollN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rollN.html
new file mode 100755
index 000000000..1e1a49068
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rollN.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_rollN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_rollN</B> Roll U and forward the column panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_rollN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_rollN</B>
+rolls the local arrays containing the local pieces of U, so
+that on exit to this function  U  is replicated in every process row.
+In addition, this function probe for the presence of the column panel
+and forwards it when available.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be rolled) information.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the number of columns of  U.  N must be
+        at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least  MAX(1,IPLEN[NPROW]).
+</PRE>
+<PRE>
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process row.
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IMAP  is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words,  IMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry,  IMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rollT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rollT.html
new file mode 100755
index 000000000..a6ac29336
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rollT.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_rollT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_rollT</B> Roll U and forward the column panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_rollT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_rollT</B>
+rolls the local arrays containing the local pieces of U, so
+that on exit to this function  U  is replicated in every process row.
+In addition, this function probe for the presence of the column panel
+and forwards it when available.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be rolled) information.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the local number of rows of  U.  N must
+        be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least  MAX(1,N).
+</PRE>
+<PRE>
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process row.
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IMAP  is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words,  IMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry,  IMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_sdrv.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_sdrv.html
new file mode 100755
index 000000000..6f5b5880c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_sdrv.html
@@ -0,0 +1,88 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_sdrv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_sdrv</B> Send and receive a message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_sdrv(</CODE>
+<CODE>double *</CODE>
+<CODE>SBUF</CODE>,
+<CODE>int</CODE>
+<CODE>SCOUNT</CODE>,
+<CODE>int</CODE>
+<CODE>STAG</CODE>,
+<CODE>double *</CODE>
+<CODE>RBUF</CODE>,
+<CODE>int</CODE>
+<CODE>RCOUNT</CODE>,
+<CODE>int</CODE>
+<CODE>RTAG</CODE>,
+<CODE>int</CODE>
+<CODE>PARTNER</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_sdrv</B>
+is a simple wrapper around MPI_Sendrecv. Its main purpose is
+to allow for some experimentation and tuning of this simple function.
+Messages  of  length  less than  or  equal to zero  are not sent  nor
+received.  Successful completion  is  indicated by the returned error
+code HPL_SUCCESS.
+
+<H1>Arguments</H1>
+<PRE>
+SBUF    (local input)                 double *
+        On entry, SBUF specifies the starting address of buffer to be
+        sent.
+</PRE>
+<PRE>
+SCOUNT  (local input)                 int
+        On entry,  SCOUNT  specifies  the number  of double precision
+        entries in SBUF. SCOUNT must be at least zero.
+</PRE>
+<PRE>
+STAG    (local input)                 int
+        On entry,  STAG  specifies the message tag to be used for the
+        sending communication operation.
+</PRE>
+<PRE>
+RBUF    (local output)                double *
+        On entry, RBUF specifies the starting address of buffer to be
+        received.
+</PRE>
+<PRE>
+RCOUNT  (local input)                 int
+        On entry,  RCOUNT  specifies  the number  of double precision
+        entries in RBUF. RCOUNT must be at least zero.
+</PRE>
+<PRE>
+RTAG    (local input)                 int
+        On entry,  RTAG  specifies the message tag to be used for the
+        receiving communication operation.
+</PRE>
+<PRE>
+PARTNER (local input)                 int
+        On entry,  PARTNER  specifies  the rank of the  collaborative
+        process in the communication space defined by COMM.
+</PRE>
+<PRE>
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_send.html">HPL_send</A>,
+<A HREF="HPL_recv.html">HPL_recv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_send.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_send.html
new file mode 100755
index 000000000..05dcb7e6d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_send.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_send HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_send</B> Send a message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_send(</CODE>
+<CODE>double *</CODE>
+<CODE>SBUF</CODE>,
+<CODE>int</CODE>
+<CODE>SCOUNT</CODE>,
+<CODE>int</CODE>
+<CODE>DEST</CODE>,
+<CODE>int</CODE>
+<CODE>STAG</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_send</B>
+is a simple wrapper around  MPI_Send.  Its  main  purpose is
+to  allow for some  experimentation / tuning  of this simple routine.
+Successful  completion  is  indicated  by  the  returned  error  code
+MPI_SUCCESS.  In the case of messages of length less than or equal to
+zero, this function returns immediately.
+
+<H1>Arguments</H1>
+<PRE>
+SBUF    (local input)                 double *
+        On entry, SBUF specifies the starting address of buffer to be
+        sent.
+</PRE>
+<PRE>
+SCOUNT  (local input)                 int
+        On entry,  SCOUNT  specifies  the number of  double precision
+        entries in SBUF. SCOUNT must be at least zero.
+</PRE>
+<PRE>
+DEST    (local input)                 int
+        On entry, DEST specifies the rank of the receiving process in
+        the communication space defined by COMM.
+</PRE>
+<PRE>
+STAG    (local input)                 int
+        On entry,  STAG specifies the message tag to be used for this
+        communication operation.
+</PRE>
+<PRE>
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_recv.html">HPL_recv</A>,
+<A HREF="HPL_sdrv.html">HPL_sdrv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_setran.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_setran.html
new file mode 100755
index 000000000..44f37e35e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_setran.html
@@ -0,0 +1,52 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_setran HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_setran</B> Manage the random number generator.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_setran(</CODE>
+<CODE>const int</CODE>
+<CODE>OPTION</CODE>,
+<CODE>int *</CODE>
+<CODE>IRAN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_setran</B>
+initializes  the random generator with the encoding of the
+first number X(0) in the sequence,  and the constants a and c used to
+compute the next element in the sequence: X(n+1) = a*X(n) + c.  X(0),
+a and c are stored in the static variables  irand, ias and ics.  When
+OPTION is 0 (resp. 1 and 2),  irand  (resp. ia and ic)  is set to the
+values of the input array IRAN.  When OPTION is 3, IRAN is set to the
+current value of irand, and irand is then incremented.
+
+<H1>Arguments</H1>
+<PRE>
+OPTION  (local input)                 const int
+        On entry, OPTION  is an integer that specifies the operations
+        to be performed on the random generator as specified above.
+</PRE>
+<PRE>
+IRAN    (local input/output)          int *
+        On entry,  IRAN is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of a random number.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_spreadN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_spreadN.html
new file mode 100755
index 000000000..f0d8f8938
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_spreadN.html
@@ -0,0 +1,120 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_spreadN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_spreadN</B> Spread row panel U and forward current column panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_spreadN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const enum HPL_SIDE</CODE>
+<CODE>SIDE</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCDIST</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_spreadN</B>
+spreads the local array containing local pieces of U, so
+that on exit to this function,  a piece of  U  is contained in every
+process row. The array IPLEN contains the number of rows of U,  that
+should be spread on any given process row. This function also probes
+for the presence of the column panel PBCST. In case of success, this
+panel will be forwarded.  If  PBCST  is NULL on input,  this probing
+mechanism will be disabled.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be spread) information.
+</PRE>
+<PRE>
+SIDE    (global input)                const enum HPL_SIDE
+        On entry, SIDE specifies whether the local piece of U located
+        in process IPMAP[SRCDIST] should be spread to the right or to
+        the left. This feature is used by the equilibration process.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N  specifies  the  local number of columns of U. N
+        must be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,IPLEN[nprow]).
+</PRE>
+<PRE>
+SRCDIST (local input)                 const int
+        On entry,  SRCDIST  specifies the source process that spreads
+        its piece of U.
+</PRE>
+<PRE>
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process before process IPMAP[i], with the  convention
+        that IPLEN[nprow] is the total number of rows. In other words
+        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+        should be moved to process IPMAP[i].
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry,  IPMAPM1 is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_spreadT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_spreadT.html
new file mode 100755
index 000000000..cec561646
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_spreadT.html
@@ -0,0 +1,120 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_spreadT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_spreadT</B> Spread row panel U and forward current column panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_spreadT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const enum HPL_SIDE</CODE>
+<CODE>SIDE</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCDIST</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_spreadT</B>
+spreads  the local array containing local pieces of U, so
+that on exit to this function,  a piece of  U  is contained in every
+process row.  The array  IPLEN  contains the number of columns of U,
+that should be spread on any given process row.  This function  also
+probes for the presence of  the column panel  PBCST.  If  available,
+this  panel will be forwarded.  If  PBCST  is  NULL  on input,  this
+probing mechanism will be disabled.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be spread) information.
+</PRE>
+<PRE>
+SIDE    (global input)                const enum HPL_SIDE
+        On entry, SIDE specifies whether the local piece of U located
+        in process IPMAP[SRCDIST] should be spread to the right or to
+        the left. This feature is used by the equilibration process.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N  specifies the local number of rows of U. N must
+        be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,N).
+</PRE>
+<PRE>
+SRCDIST (local input)                 const int
+        On entry,  SRCDIST  specifies the source process that spreads
+        its piece of U.
+</PRE>
+<PRE>
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process before process IPMAP[i], with the  convention
+        that IPLEN[nprow] is the total number of rows. In other words
+        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+        should be moved to process IPMAP[i].
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry,  IPMAPM1 is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_sum.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_sum.html
new file mode 100755
index 000000000..be785b99e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_sum.html
@@ -0,0 +1,61 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_sum HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_sum</B> Combine (sum) two buffers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_sum(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const void *</CODE>
+<CODE>IN</CODE>,
+<CODE>void *</CODE>
+<CODE>INOUT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_sum</B>
+combines (sum) two buffers.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+</PRE>
+<PRE>
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+</PRE>
+<PRE>
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+</PRE>
+<PRE>
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer.html
new file mode 100755
index 000000000..8e6a79803
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer.html
@@ -0,0 +1,49 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_timer HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_timer</B> Timer facility.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_timer(</CODE>
+<CODE>const int</CODE>
+<CODE>I</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_timer</B>
+provides a  "stopwatch"  functionality  cpu/wall  timer  in
+seconds.  Up to  64  separate timers can be functioning at once.  The
+first call starts the timer,  and the second stops it.  This  routine
+can be disenabled  by calling  HPL_timer_disable(),  so that calls to
+the timer are ignored.  This feature can be used to make sure certain
+sections of code do not affect timings,  even  if  they call routines
+which have HPL_timer calls in them. HPL_timer_enable() will re-enable
+the  timer  functionality.  One  can retrieve  the current value of a
+timer by calling
+ 
+t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ 
+where  I  is the timer index in  [0..64).  To  initialize  the  timer
+functionality, one must have called HPL_timer_boot()  prior to any of
+the functions mentioned above.
+
+<H1>Arguments</H1>
+<PRE>
+I       (global input)                const int
+        On entry, I specifies the timer to stop/start.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_timer_cputime.html">HPL_timer_cputime</A>,
+<A HREF="HPL_timer_walltime.html">HPL_timer_walltime</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer_cputime.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer_cputime.html
new file mode 100755
index 000000000..0fa9b6575
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer_cputime.html
@@ -0,0 +1,35 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_timer_cputime HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_timer_cputime</B> Return the CPU time.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_timer_cputime();</CODE>
+
+<H1>Description</H1>
+<B>HPL_timer_cputime</B>
+returns the cpu time.  If HPL_USE_CLOCK is defined,
+the  clock() function is used to return an approximation of processor
+time used by the program.  The value returned is the CPU time used so
+far as a clock_t;  to get the number of seconds used,  the result  is
+divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+standard library.  If  HPL_USE_TIMES is defined, the times() function
+is used instead.  This  function  returns  the current process times.
+times() returns the number of clock ticks that have elapsed since the
+system has been up.  Otherwise and by default,  the  standard library
+function getrusage() is used.
+
+<H1>See Also</H1>
+<A HREF="HPL_timer_walltime.html">HPL_timer_walltime</A>,
+<A HREF="HPL_timer.html">HPL_timer</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer_walltime.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer_walltime.html
new file mode 100755
index 000000000..92588e49f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer_walltime.html
@@ -0,0 +1,26 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_timer_walltime HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_timer_walltime</B> Return the elapsed (wall-clock) time.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_timer_walltime();</CODE>
+
+<H1>Description</H1>
+<B>HPL_timer_walltime</B>
+returns the elapsed (wall-clock) time.
+
+<H1>See Also</H1>
+<A HREF="HPL_timer_cputime.html">HPL_timer_cputime</A>,
+<A HREF="HPL_timer.html">HPL_timer</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_warn.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_warn.html
new file mode 100755
index 000000000..773df9ae0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_warn.html
@@ -0,0 +1,74 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_warn HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_warn</B> displays an error message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_warn(</CODE>
+<CODE>FILE *</CODE>
+<CODE>STREAM</CODE>,
+<CODE>int</CODE>
+<CODE>LINE</CODE>,
+<CODE>const char *</CODE>
+<CODE>SRNAME</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_warn</B>
+displays an error message.
+
+<H1>Arguments</H1>
+<PRE>
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+</PRE>
+<PRE>
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+</PRE>
+<PRE>
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   HPL_warn( stderr, __LINE__, __FILE__,
+             "Demo.\n" );
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_abort.html">HPL_abort</A>,
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_xjumpm.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_xjumpm.html
new file mode 100755
index 000000000..794ae3a8b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_xjumpm.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_xjumpm HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_xjumpm</B> Compute constants to jump in the random sequence.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_xjumpm(</CODE>
+<CODE>const int</CODE>
+<CODE>JUMPM</CODE>,
+<CODE>int *</CODE>
+<CODE>MULT</CODE>,
+<CODE>int *</CODE>
+<CODE>IADD</CODE>,
+<CODE>int *</CODE>
+<CODE>IRANN</CODE>,
+<CODE>int *</CODE>
+<CODE>IRANM</CODE>,
+<CODE>int *</CODE>
+<CODE>IAM</CODE>,
+<CODE>int *</CODE>
+<CODE>ICM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_xjumpm</B>
+computes  the constants  A and C  to jump JUMPM numbers in
+the random sequence: X(n+JUMPM) = A*X(n)+C.  The constants encoded in
+MULT and IADD  specify  how to jump from one entry in the sequence to
+the next.
+
+<H1>Arguments</H1>
+<PRE>
+JUMPM   (local input)                 const int
+        On entry,  JUMPM  specifies  the  number  of entries  in  the
+        sequence to jump over. When JUMPM is less or equal than zero,
+        A and C are not computed, IRANM is set to IRANN corresponding
+        to a jump of size zero.
+</PRE>
+<PRE>
+MULT    (local input)                 int *
+        On entry, MULT is an array of dimension 2,  that contains the
+        16-lower  and 15-higher bits of the constant  a  to jump from
+        X(n) to X(n+1) = a*X(n) + c in the random sequence.
+</PRE>
+<PRE>
+IADD    (local input)                 int *
+        On entry, IADD is an array of dimension 2,  that contains the
+        16-lower  and 15-higher bits of the constant  c  to jump from
+        X(n) to X(n+1) = a*X(n) + c in the random sequence.
+</PRE>
+<PRE>
+IRANN   (local input)                 int *
+        On entry, IRANN is an array of dimension 2. that contains the
+        16-lower and 15-higher bits of the encoding of X(n).
+</PRE>
+<PRE>
+IRANM   (local output)                int *
+        On entry,  IRANM  is an array of dimension 2.   On exit, this
+        array  contains respectively  the 16-lower and 15-higher bits
+        of the encoding of X(n+JUMPM).
+</PRE>
+<PRE>
+IAM     (local output)                int *
+        On entry, IAM is an array of dimension 2. On exit, when JUMPM
+        is  greater  than  zero,  this  array  contains  the  encoded
+        constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+        sequence. IAM(0:1)  contains  respectively  the  16-lower and
+        15-higher  bits  of this constant  A. When  JUMPM  is less or
+        equal than zero, this array is not referenced.
+</PRE>
+<PRE>
+ICM     (local output)                int *
+        On entry, ICM is an array of dimension 2. On exit, when JUMPM
+        is  greater  than  zero,  this  array  contains  the  encoded
+        constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+        sequence. ICM(0:1)  contains  respectively  the  16-lower and
+        15-higher  bits  of this constant  C. When  JUMPM  is less or
+        equal than zero, this array is not referenced.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/algorithm.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/algorithm.html
new file mode 100755
index 000000000..9b1d7222e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/algorithm.html
@@ -0,0 +1,299 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Algorithm</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Algorithm</H2>
+
+<STRONG>
+This  page provides  a high-level description of the algorithm used in
+this package. As indicated below,  HPL  contains in fact many possible
+variants for various operations.  Defaults could have been chosen,  or
+even  variants  could  be selected  during  the execution.  Due to the
+performance requirements,  it was  decided  to leave the user with the
+opportunity of choosing,  so that an "optimal" set of parameters could
+easily be experimentally determined for a given machine configuration.
+From a numerical accuracy point of view, <STRONG>all</STRONG> possible
+combinations are rigorously equivalent  to each other  even though the
+result may slightly differ (bit-wise).
+</STRONG><BR><BR>
+
+<UL>
+<LI><A HREF="algorithm.html#main">Main Algorithm</A>
+<LI><A HREF="algorithm.html#pfact">Panel Factorization</A>
+<LI><A HREF="algorithm.html#bcast">Panel Broadcast</A>
+<LI><A HREF="algorithm.html#look_ahead">Look-ahead</A>
+<LI><A HREF="algorithm.html#update">Update</A>
+<LI><A HREF="algorithm.html#trsv">Backward Substitution</A>
+<LI><A HREF="algorithm.html#check">Checking the Solution</A>
+</UL>
+<HR NOSHADE>
+
+<H3><A NAME="main">Main Algorithm</A></H3>
+
+This  software  package  solves  a linear system  of order n:  A x = b by
+first  computing  the  LU  factorization with row partial pivoting of the
+n-by-n+1 coefficient matrix [A b] = [[L,U] y]. Since the lower triangular
+factor L is applied to b as the factorization progresses, the solution  x
+is obtained  by  solving  the upper triangular system U x = y.  The lower
+triangular  matrix  L  is left unpivoted  and  the array of pivots is not
+returned.<BR><BR>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR>
+<TD ALIGN=LEFT>
+The  data  is distributed onto a two-dimensional P-by-Q grid of processes
+according  to  the  block-cyclic  scheme  to ensure  "good"  load balance
+as well as  the scalability  of the algorithm.  The  n-by-n+1 coefficient
+matrix is  first  logically partitioned into  nb-by-nb  blocks,  that are
+cyclically "dealt" onto the  P-by-Q  process grid.  This is done  in both
+dimensions of the matrix.</TD>
+<TD ALIGN=CENTER><IMG SRC = "mat2.jpg" BORDER=0 HEIGHT=165 WIDTH=340></TD>
+</TR>
+</TABLE>
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR>
+<TD ALIGN=CENTER><IMG SRC ="main.jpg" BORDER=0 HEIGHT=165 WIDTH=165></TD>
+<TD ALIGN=LEFT>
+The  right-looking  variant  has been chosen for the main loop of the  LU
+factorization.  This  means that at each iteration of the loop a panel of
+nb columns is factorized,  and  the  trailing submatrix is updated.  Note
+that this computation is  thus  logically partitioned with the same block
+size nb that was used for the data distribution.</TD>
+</TR>
+</TABLE>
+<HR NOSHADE>
+
+<H3><A NAME="pfact">Panel Factorization</A></H3>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=10>
+<TR>
+<TD ALIGN=LEFT>
+At  a given iteration  of the main loop,  and  because of  the  cartesian 
+property of the distribution scheme,  each panel factorization  occurs in
+one column of processes.   This  particular part of the computation  lies
+on the critical path of  the overall algorithm.  The user is  offered the
+choice of three  (Crout, left- and right-looking)  matrix-multiply  based 
+recursive variants. The software also allows the user  to choose  in  how
+many  sub-panels  the current panel  should be divided  into  during  the
+recursion.  Furthermore,  one  can also  select at run-time the recursion
+stopping criterium in terms of the number  of  columns left to factorize.
+When this  threshold is reached,  the sub-panel will  then be  factorized
+using one of the three Crout, left- or right-looking matrix-vector  based 
+variant.  Finally, for each panel column the pivot search, the associated
+swap  and broadcast  operation  of  the pivot row  are combined  into one 
+single communication step.  A   binary-exchange  (leave-on-all) reduction
+performs these three operations at once.</TD>
+<TD ALIGN=CENTER><IMG SRC = "pfact.jpg" BORDER=0 HEIGHT=300 WIDTH=160></TD>
+</TR>
+</TABLE>
+<HR NOSHADE>
+
+<H3><A NAME="bcast">Panel Broadcast</A></H3>
+
+Once  the panel factorization has been computed,  this  panel  of columns
+is  broadcast  to the other process columns.   There  are  many  possible 
+broadcast  algorithms  and  the  software currently offers  6 variants to 
+choose from.  These variants are described below assuming  that process 0
+is the source of the broadcast for convenience. "->" means "sends to".
+<UL>
+<LI><STRONG>Increasing-ring</STRONG>:  0 -> 1;  1 -> 2; 2 -> 3 and so on.
+This algorithm is the classic one;  it has  the caveat that process 1 has
+to send a message.
+<CENTER>
+<IMG SRC="1ring.jpg">
+</CENTER>
+
+<LI><STRONG>Increasing-ring (modified)</STRONG>:  0 -> 1;  0 -> 2; 2 -> 3
+and so on. Process 0 sends two messages and process 1  only  receives one
+message. This algorithm is almost always better, if not the best.
+<CENTER>
+<IMG SRC="1rinM.jpg">
+</CENTER>
+
+<LI><STRONG>Increasing-2-ring</STRONG>:  The Q processes are divided into
+two parts: 0 -> 1 and 0 -> Q/2;  Then processes 1  and Q/2 act as sources
+of two rings: 1 -> 2, Q/2 -> Q/2+1;  2 -> 3, Q/2+1 -> to Q/2+2 and so on.
+This  algorithm has the advantage  of reducing the time by which the last
+process  will  receive  the  panel  at  the  cost  of process 0 sending 2
+messages.
+<CENTER>
+<IMG SRC="2ring.jpg">
+</CENTER>
+
+<LI><STRONG>Increasing-2-ring (modified)</STRONG>:  As  one  may  expect,
+first 0 -> 1,  then  the  Q-1  processes  left are divided into two equal
+parts: 0 -> 2 and 0 -> Q/2;  Processes  2 and Q/2  act then as sources of
+two rings:  2 -> 3,  Q/2 -> Q/2+1; 3 -> 4,  Q/2+1 -> to Q/2+2  and so on.
+This algorithm is probably  the most serious competitor to the increasing
+ring modified variant.
+<CENTER>
+<IMG SRC="2rinM.jpg">
+</CENTER>
+
+<LI><STRONG>Long  (bandwidth  reducing)</STRONG>:  as   opposed   to  the
+previous  variants,  this  algorithm  and  its follower  synchronize  all 
+processes involved in the operation. The message is chopped into  Q equal
+pieces that are scattered  across the Q processes. 
+<CENTER>
+<IMG SRC="spread.jpg">
+</CENTER>
+The pieces are then rolled in Q-1 steps.  The scatter phase uses a binary
+tree and the rolling phase exclusively uses mutual message exchanges.  In
+odd steps 0 <-> 1,  2 <-> 3, 4 <-> 5 and so on;  in even steps Q-1 <-> 0,
+1 <-> 2, 3 <-> 4, 5 <-> 6 and so on.
+<CENTER>
+<IMG SRC="roll.jpg">
+</CENTER>
+More messages are exchanged, however the total volume of communication is
+independent of Q, making this algorithm  particularly suitable for  large
+messages.  This algorithm  becomes  competitive  when the nodes are "very 
+fast" and the network (comparatively) "very slow".<BR><BR>
+
+<LI><STRONG>Long (bandwidth reducing modified)</STRONG>:  same  as above,
+except that 0 -> 1 first,  and then the Long variant is used on processes
+0,2,3,4 .. Q-1.<BR><BR>
+<CENTER>
+<IMG SRC="spreadM.jpg">
+<IMG SRC="rollM.jpg">
+</CENTER>
+
+</UL>
+
+The rings variants are distinguished by a probe mechanism  that activates
+them.  In other words,  a process involved in the broadcast and different
+from  the source asynchronously  probes for the message to receive.  When
+the  message  is  available  the broadcast proceeds,  and  otherwise  the
+function returns.  This allows to interleave the broadcast operation with
+the update phase. This contributes to reduce the idle time spent by those
+processes waiting for the factorized panel.  This  mechanism is necessary
+to accomodate for various computation/communication performance ratio.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="look_ahead">Look-ahead</A></H3>
+
+Once the panel has been broadcast or say during this broadcast operation,
+the trailing submatrix is updated  using the last panel in the look-ahead
+pipe: as mentioned before,  the panel factorization  lies on the critical
+path,  which  means  that when the kth panel has been factorized and then 
+broadcast, the next most urgent task to complete is the factorization and
+broadcast of the k+1 th panel.  This technique  is  often  refered  to as
+"look-ahead" or "send-ahead" in the literature.  This  package  allows to
+select various "depth" of look-ahead.  By  convention,  a  depth  of zero
+corresponds to no lookahead,  in which case  the  trailing  submatrix  is
+updated by the panel currently broadcast.  Look-ahead consumes some extra
+memory  to  essentially  keep  all the panels of columns currently in the
+look-ahead pipe.  A look-ahead  of depth 1 (maybe 2) is likely to achieve
+the best performance gain.<BR><BR> 
+<HR NOSHADE>
+
+<H3><A NAME="update">Update</A></H3>
+
+The update of the trailing submatrix by the last panel in the  look-ahead
+pipe is made of two phases. First, the pivots must be applied to form the
+current row panel U. U should then be solved by the upper triangle of the
+column panel. U finally needs to be broadcast to each process row so that
+the  local  rank-nb  update  can take place.  We choose  to  combine  the
+swapping and broadcast of  U  at the cost of  replicating the solve.  Two
+algorithms are available for this communication operation.
+<UL>
+<LI><STRONG>Binary-exchange</STRONG>:  this is a modified variant  of the
+binary-exchange (leave on all) reduction operation.  Every process column
+performs the same operation.  The algorithm essentially works as follows.
+It pretends reducing the row panel U, but at the beginning the only valid
+copy is owned by the current process row.  The  other process  rows  will
+contribute rows of A they own that should be copied in U and replace them
+with rows that were originally in the current process row.  The  complete
+operation is performed in  log(P) steps.  For the sake of simplicity, let
+assume that  P  is a power of two.  At step k,  process row p exchanges a 
+message with process row p+2^k.  There are  essentially two cases. First,
+one of those two process rows  has received  U  in  a previous step.  The
+exchange occurs.  One process  swaps  its  local rows of  A into U.  Both
+processes copy in  U remote rows of A. Second, none of those process rows
+has received U,  the exchange occurs, and both processes simply add those
+remote rows  to  the list  they have accumulated so far.  At each step, a 
+message  of  the size of  U  is exchanged by at least one pair of process
+rows.<BR><BR>
+
+<LI><STRONG>Long</STRONG>:   this  is   a   bandwidth   reducing  variant
+accomplishing the same task. The row panel is first spread (using a tree)
+among the process rows with respect to the pivot array. This is a scatter
+(V variant for MPI users).  Locally,  every process row  then swaps these
+rows with the the rows of A it owns and that belong to U.  These  buffers
+are then rolled  (P-1 steps) to finish the broadcast of U.  Every process
+row permutes U and proceed  with the computational part of the update.  A
+couple  of  notes:   process  rows  are  logarithmically   sorted  before
+spreading,  so  that  processes  receiving the largest number of rows are
+first in the tree.  This makes  the communication volume optimal for this
+phase. Finally, before rolling and after the local swap, an equilibration
+phase occurs during  which the local pieces of  U  are  uniformly  spread
+across  the process rows.  A tree-based algorithm is used. This operation
+is necessary to keep the rolling phase optimal  even  when the pivot rows
+are  not  equally distributed  in  process rows.  This  algorithm  has  a 
+complexity  in  terms  of communication volume that solely depends on the 
+size of U.  In particular,  the number of process rows  only  impacts the
+number of messages exchanged.  It  will  thus  outperforms  the  previous
+variant for large problems on large machine configurations.<BR><BR>
+
+</UL>
+
+The user can select any of the two variants above.  In addition, a mix is
+possible as well.  The  "binary-exchange"  algorithm will be used when  U
+contains at most a certain number of columns. Choosing at least the block
+size  nb as the threshold value is clearly recommended when look-ahead is
+on.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="trsv">Backward Substitution</A></H3>
+
+The factorization has just now ended, the back-substitution remains to be
+done.  For this,  we  choose  a look-ahead  of  depth  one  variant.  The
+right-hand-side  is  forwarded  in  process  rows  in  a  decreasing-ring 
+fashion,  so that  we solve Q * nb entries at a time.  At each step, this
+shrinking piece of the right-hand-side is updated. The process just above
+the one owning the current diagonal block of the matrix  A  updates first 
+its last nb piece of x,  forwards it to the previous process column, then
+broadcast  it in the process column in a decreasing-ring fashion as well.
+The solution is then updated and sent to the previous process column. The
+solution of the linear system is left replicated in every process row.<BR><BR>
+<HR NOSHADE>
+ 
+<H3><A NAME="check">Checking the Solution</A></H3>
+
+To verify the result obtained,  the input matrix  and right-hand side are
+regenerated.  The  normwise  backward  error  (see formula below) is then
+computed.  A solution  is  considered  as "numerically correct" when this
+quantity  is  less  than  a  threshold  value of the order of 1.0. In the
+expression   below,  eps  is  the  relative  (distributed-memory) machine
+precision.
+
+<UL>
+<LI>|| Ax - b ||_oo / ( eps * ( || A ||_oo * || x ||_oo + || b ||_oo ) * n )
+</UL>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/aprunner.gif b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/aprunner.gif
new file mode 100755
index 000000000..6508c806f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/aprunner.gif differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/copyright.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/copyright.html
new file mode 100755
index 000000000..934282c81
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/copyright.html
@@ -0,0 +1,66 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Copyright and Licensing Terms</TITLE>
+</HEAD>
+
+<BODY
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Copyright Notice and Licensing Terms</H2>
+
+Redistribution  and  use in  source and binary forms, with or without
+modification, are  permitted provided  that the following  conditions
+are met:                                                             
+<OL>
+<LI>Redistributions  of  source code  must retain the above copyright
+notice, this list of conditions and the following disclaimer.        
+<LI>Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions,  and the following disclaimer in the
+documentation and/or other materials provided with the distribution. 
+<LI>All  advertising  materials mentioning  features  or  use of this
+software must display  the  following  acknowledgement:  This product
+includes   software   developed   at  the  University  of  Tennessee,
+Knoxville, Innovative Computing Laboratory.             
+<LI>The name of the  University, the name of the  Laboratory,  or the
+names  of  its  contributors  may  not  be used to endorse or promote
+products  derived   from   this  software  without  specific  written
+permission.                                                          
+</OL>
+                                                                      
+<H3>Disclaimer</H3>
+
+THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+`AS IS' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/documentation.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/documentation.html
new file mode 100755
index 000000000..152188041
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/documentation.html
@@ -0,0 +1,304 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Documentation</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Documentation</H2>
+
+The  HPL software distribution comes  with a set of text files explaining
+how to install,  run and tune the software. These files reside in the top
+level  directory  and their names are in upper case.  To  a large extent,
+this page reproduces them. In addition,  man- and HTML-pages are provided
+for every routine in the package. To access the man pages,  one  must add
+hpl/man  to its  MANPATH  environment variable.  The  HTML  pages  can be 
+accessed on this site,  or by pointing your browser to your local hpl/www
+directory. Finally,  the source code has been heavily documented. Despite
+all the other documentation efforts, the  source  code remains  the  most
+trustworthy  and truthful piece of information about what goes on in HPL.
+<BR><BR>
+
+<H3>HPL Functions HTML Pages</H3>
+
+<STRONG>Computational Kernels Wrappers</STRONG> When calling the Fortran
+77 BLAS interface, these C functions allow to confine the C  to  Fortran
+77 interface  issues  to  a small  subset of routines.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_idamax.html">         HPL_idamax</A>
+<LI><A HREF = "HPL_dscal.html">          HPL_dscal</A>
+<LI><A HREF = "HPL_dswap.html">          HPL_dswap</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dcopy.html">          HPL_dcopy</A>
+<LI><A HREF = "HPL_daxpy.html">          HPL_daxpy</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dgemv.html">          HPL_dgemv</A>
+<LI><A HREF = "HPL_dger.html">           HPL_dger</A>
+<LI><A HREF = "HPL_dtrsv.html">          HPL_dtrsv</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dgemm.html">          HPL_dgemm</A>
+<LI><A HREF = "HPL_dtrsm.html">          HPL_dtrsm</A>
+</UL></TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Local Auxiliaries</STRONG> Basic functionality, local swap functions.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_fprintf.html">        HPL_fprintf</A>
+<LI><A HREF = "HPL_warn.html">           HPL_warn</A>
+<LI><A HREF = "HPL_abort.html">          HPL_abort</A>
+<LI><A HREF = "HPL_dlaprnt.html">        HPL_dlaprnt</A>
+<LI><A HREF = "HPL_dlamch.html">         HPL_dlamch</A>
+<LI><A HREF = "HPL_dlacpy.html">         HPL_dlacpy</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dlange.html">         HPL_dlange</A>
+<LI><A HREF = "HPL_dlatcpy.html">        HPL_dlatcpy</A>
+<LI><A HREF = "HPL_dlaswp00N.html">      HPL_dlaswp00N</A>
+<LI><A HREF = "HPL_dlaswp01N.html">      HPL_dlaswp01N</A>
+<LI><A HREF = "HPL_dlaswp02N.html">      HPL_dlaswp02N</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dlaswp03N.html">      HPL_dlaswp03N</A>
+<LI><A HREF = "HPL_dlaswp04N.html">      HPL_dlaswp04N</A>
+<LI><A HREF = "HPL_dlaswp05N.html">      HPL_dlaswp05N</A>
+<LI><A HREF = "HPL_dlaswp06N.html">      HPL_dlaswp06N</A>
+<LI><A HREF = "HPL_dlaswp10N.html">      HPL_dlaswp10N</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dlaswp01T.html">      HPL_dlaswp01T</A>
+<LI><A HREF = "HPL_dlaswp03T.html">      HPL_dlaswp03T</A>
+<LI><A HREF = "HPL_dlaswp04T.html">      HPL_dlaswp04T</A>
+<LI><A HREF = "HPL_dlaswp05T.html">      HPL_dlaswp05T</A>
+<LI><A HREF = "HPL_dlaswp06T.html">      HPL_dlaswp06T</A>
+</UL></TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Parallel Auxiliaries</STRONG> Index computations, parallel basic
+functionality.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_indxg2l.html">        HPL_indxg2l</A>
+<LI><A HREF = "HPL_indxg2lp.html">       HPL_indxg2lp</A>
+<LI><A HREF = "HPL_indxg2p.html">        HPL_indxg2p</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_indxl2g.html">        HPL_indxl2g</A>
+<LI><A HREF = "HPL_infog2l.html">        HPL_infog2l</A>
+<LI><A HREF = "HPL_numroc.html">         HPL_numroc</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_numrocI.html">        HPL_numrocI</A>
+<LI><A HREF = "HPL_pwarn.html">          HPL_pwarn</A>
+<LI><A HREF = "HPL_pabort.html">         HPL_pabort</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdlaprnt.html">       HPL_pdlaprnt</A>
+<LI><A HREF = "HPL_pdlamch.html">        HPL_pdlamch</A>
+<LI><A HREF = "HPL_pdlange.html">        HPL_pdlange</A>
+</UL></TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Grid Management</STRONG>  Most of these routines have a direct
+MPI equivalent.  On new systems,  when the entire MPI functionality is
+not yet readily available, these functions are particularly convenient
+since they rely on a mininal  subset of the MPI standard.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_grid_exit.html">      HPL_grid_exit</A>
+<LI><A HREF = "HPL_grid_info.html">      HPL_grid_info</A>
+<LI><A HREF = "HPL_grid_init.html">      HPL_grid_init</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_all_reduce.html">     HPL_all_reduce</A>
+<LI><A HREF = "HPL_barrier.html">        HPL_barrier</A>
+<LI><A HREF = "HPL_broadcast.html">      HPL_broadcast</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_reduce.html">         HPL_reduce</A>
+<LI><A HREF = "HPL_max.html">            HPL_max</A>
+<LI><A HREF = "HPL_min.html">            HPL_min</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pnum.html">           HPL_pnum</A>
+<LI><A HREF = "HPL_sum.html">            HPL_sum</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Panel Management</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_pdpanel_disp.html">   HPL_pdpanel_disp</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanel_free.html">   HPL_pdpanel_free</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanel_init.html">   HPL_pdpanel_init</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanel_new.html">    HPL_pdpanel_new</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Panel Factorization</STRONG> Recursive (matrix-multiply based) and
+(matrix-vector based) panel factorization.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_dlocmax.html">        HPL_dlocmax</A>
+<LI><A HREF = "HPL_dlocswpN.html">       HPL_dlocswpN</A>
+<LI><A HREF = "HPL_dlocswpT.html">       HPL_dlocswpT</A>
+<LI><A HREF = "HPL_pdmxswp.html">        HPL_pdmxswp</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpancrN.html">       HPL_pdpancrN</A>
+<LI><A HREF = "HPL_pdpancrT.html">       HPL_pdpancrT</A>
+<LI><A HREF = "HPL_pdrpancrN.html">      HPL_pdrpancrN</A>
+<LI><A HREF = "HPL_pdrpancrT.html">      HPL_pdrpancrT</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanllN.html">       HPL_pdpanllN</A>
+<LI><A HREF = "HPL_pdpanllT.html">       HPL_pdpanllT</A>
+<LI><A HREF = "HPL_pdrpanllN.html">      HPL_pdrpanllN</A>
+<LI><A HREF = "HPL_pdrpanllT.html">      HPL_pdrpanllT</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanrlN.html">       HPL_pdpanrlN</A>
+<LI><A HREF = "HPL_pdpanrlT.html">       HPL_pdpanrlT</A>
+<LI><A HREF = "HPL_pdrpanrlN.html">      HPL_pdrpanrlN</A>
+<LI><A HREF = "HPL_pdrpanrlT.html">      HPL_pdrpanrlT</A>
+<LI><A HREF = "HPL_pdfact.html">         HPL_pdfact</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Panel Broadcast</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_bcast.html">          HPL_bcast</A>
+<LI><A HREF = "HPL_binit.html">          HPL_binit</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_bwait.html">          HPL_bwait</A>
+<LI><A HREF = "HPL_copyL.html">          HPL_copyL</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_packL.html">          HPL_packL</A>
+<LI><A HREF = "HPL_recv.html">           HPL_recv</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_sdrv.html">           HPL_sdrv</A>
+<LI><A HREF = "HPL_send.html">           HPL_send</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Update</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_perm.html">           HPL_perm</A>
+<LI><A HREF = "HPL_pipid.html">          HPL_pipid</A>
+<LI><A HREF = "HPL_plindx0.html">        HPL_plindx0</A>
+<LI><A HREF = "HPL_plindx1.html">        HPL_plindx1</A>
+<LI><A HREF = "HPL_plindx10.html">       HPL_plindx10</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_equil.html">          HPL_equil</A>
+<LI><A HREF = "HPL_pdlaswp00N.html">     HPL_pdlaswp00N</A>
+<LI><A HREF = "HPL_pdlaswp01N.html">     HPL_pdlaswp01N</A>
+<LI><A HREF = "HPL_pdlaswp00T.html">     HPL_pdlaswp00T</A>
+<LI><A HREF = "HPL_pdlaswp01T.html">     HPL_pdlaswp01T</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_logsort.html">        HPL_logsort</A>
+<LI><A HREF = "HPL_rollN.html">          HPL_rollN</A>
+<LI><A HREF = "HPL_rollT.html">          HPL_rollT</A>
+<LI><A HREF = "HPL_spreadN.html">        HPL_spreadN</A>
+<LI><A HREF = "HPL_spreadT.html">        HPL_spreadT</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdupdateNN.html">     HPL_pdupdateNN</A>
+<LI><A HREF = "HPL_pdupdateNT.html">     HPL_pdupdateNT</A>
+<LI><A HREF = "HPL_pdupdateTN.html">     HPL_pdupdateTN</A>
+<LI><A HREF = "HPL_pdupdateTT.html">     HPL_pdupdateTT</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Main Factorization / Look-ahead</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_pdgesv.html">         HPL_pdgesv</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdgesv0.html">        HPL_pdgesv0</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdgesvK1.html">       HPL_pdgesvK1</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdgesvK2.html">       HPL_pdgesvK2</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Backward Substitution</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_pdtrsv.html">         HPL_pdtrsv</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Matrix generation</STRONG> A C version of the ScaLAPACK random
+matrix generator with less functionality though.
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_setran.html">         HPL_setran</A>
+<LI><A HREF = "HPL_rand.html">           HPL_rand</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_jumpit.html">         HPL_jumpit</A>
+<LI><A HREF = "HPL_xjumpm.html">         HPL_xjumpm</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_ladd.html">           HPL_ladd</A>
+<LI><A HREF = "HPL_lmul.html">           HPL_lmul</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dmatgen.html">        HPL_dmatgen</A>
+<LI><A HREF = "HPL_pdmatgen.html">       HPL_pdmatgen</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Timers</STRONG> Sequential and parallel timing utilities.
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_timer.html">          HPL_timer</A>
+<LI><A HREF = "HPL_ptimer.html">         HPL_ptimer</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_timer_cputime.html">  HPL_timer_cputime</A>
+<LI><A HREF = "HPL_timer_walltime.html"> HPL_timer_walltime</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_ptimer_cputime.html"> HPL_ptimer_cputime</A>
+<LI><A HREF = "HPL_ptimer_walltime.html">HPL_ptimer_walltime</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Main Testing / Timing Driver</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_pddriver.html">       HPL_pddriver</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdinfo.html">         HPL_pdinfo</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdtest.html">         HPL_pdtest</A>
+</UL></TD></TR></TABLE>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/errata.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/errata.html
new file mode 100755
index 000000000..24275d2dd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/errata.html
@@ -0,0 +1,116 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Errata-Bugs</TITLE>
+</HEAD>
+
+<BODY
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Errata - Bugs</H2>
+
+<H3>Issues fixed in Version 2.1, October 26th, 2012</H3>
+
+The output now reports exact time stamps before and after the
+execution of the solver function pdgesv() was run. This could
+allow for accurate accounting of running time for data center
+management   purposes.    For   example  as  reporting  power
+consumption.  This  is  important  for  the Green500 project.<BR><BR>
+
+Fixed an out-of-bounds access to arrays  in the HPL_spreadN()
+and  HPL_spreadT()  functions.  This  may cause  segmentation
+fault signals. It was reported by Stephen Whalen from Cray.<BR><BR>
+
+<H3>Issues fixed in Version 2.0, September 10th, 2008</H3>
+
+Gregory Bauer  found  a  problem  size  corresponding  to the
+periodicity of the pseudo-random matrix generator used in the
+HPL  timing  program. This causes  the  LU  factorization  to
+detect the singularity of the input matrix as it should have.<BR><BR>
+
+A problem size of 2^17 = 131072 causes columns 14 modulo 2^14
+(i.e. 16384)  (starting from 0)  to be bitwise identical on a
+homogeneous platform.  Every problem size  being a power of 2
+and larger than  2^15  will  feature a similar problem if one
+searches far enough in the columns of the square input matrix.<BR><BR>
+
+The pseudo-random  generator  uses  the  linear  congruential
+algorithm:  X(n+1) = (a * X(n) + c) mod m as described in the
+Art of Computer  Programming, Knuth 1973,  Vol. 2. In the HPL
+case, m is set to 2^31.<BR><BR>
+
+It is very important  to realize that this issue is a problem
+of  the  testing  part  of the  HPL software.  The  numerical
+properties  of the  algorithms  used in the factorization and
+the solve should not be questioned because of this.  In fact,
+this is just the opposite: the factorization demonstrated the
+weakness of the testing part of the software by detecting the
+singularity of the input matrix.<BR><BR>
+
+This issue of  the testing program  is not easy to fix.  This
+pseudo-random  generator  has  very useful properties despite
+this.  It is  thus currently recommended to HPL users willing
+to test matrices of size larger than  2^15  to  not use power
+twos.<BR><BR>
+
+This  issue  has  been fixed by  changing  the  pseudo-random
+matrix  generator.   Now the  periodicity of the generator is
+2^64.<BR><BR>
+
+<H3>Issues fixed in Version 1.0b, December 15th, 2004</H3>
+
+When the matrix size is such that one needs  more  than 16 GB
+per  MPI  rank,  the  intermediate  calculation  (mat.ld+1) *
+mat.nq in  HPL_pdtest.c  ends up  overflowing  because  it is
+done using  32-bit arithmetic.   This issue has been fixed by
+typecasting to size_t; Thanks to John Baron.<BR><BR>
+
+<H3>Issues fixed in Version 1.0a, January 20th, 2004</H3>
+
+The  MPI  process  grid numbering scheme defaults now to row-
+major ordering. This option can now be selected at run time.<BR><BR>
+
+The  inlined  assembly  timer  routine  that  was causing the
+compilation to fail when using gcc version  3.3 and above has
+been removed from the package.<BR><BR>
+
+Various building problems on the T3E have been fixed;  Thanks
+to Edward Anderson.<BR><BR>
+
+<H3>Issues fixed in Version 1.0, September 27th, 2000</H3>
+
+Due to a  couple errors  spotted in the  VSIPL  port  of  the
+software,  the  distribution  contained  in  the  tar file of
+September 9th, 2000 had been updated on September 27th,  2000
+with a corrected  distribution.  <STRONG>These  problems were
+not affecting in any way possible the  BLAS  version  of  the
+software.</STRONG>  If you are using  the  VSIPL port of HPL,
+and  want  to  make  sure  you are  indeed  using  the latest
+corrected version, please  check  the  date  contained in the
+file HPL.build.log contained in the main directory.<BR><BR>
+
+
+
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/faqs.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/faqs.html
new file mode 100755
index 000000000..ad853e760
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/faqs.html
@@ -0,0 +1,126 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Frequently Asked Questions</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Frequently Asked Questions</H2>
+
+<UL>
+<LI><A HREF="faqs.html#pbsize">What problem size N should I run ?</A>
+<LI><A HREF="faqs.html#blsize">What block size NB should I use ?</A>
+<LI><A HREF="faqs.html#grid">What process grid ratio P x Q should I use ?</A>
+<LI><A HREF="faqs.html#1node">What about the one processor case ?</A>
+<LI><A HREF="faqs.html#options">Why so many options in HPL.dat ?</A>
+<LI><A HREF="faqs.html#outperf">Can HPL be outperformed ?</A>
+</UL>
+<HR NOSHADE>
+
+<H3><A NAME="pbsize">What problem size N should I run ?</A></H3>
+
+In order  to find out  the  best performance   of  your  system,  the
+largest   problem size  fitting in memory is what you should aim for.
+The  amount  of  memory  used  by  HPL is essentially the size of the 
+coefficient matrix.  So for example, if you have 4 nodes  with 256 Mb
+of memory on each, this corresponds to 1 Gb total, i.e., 125 M double
+precision  (8  bytes)  elements. The  square  root  of that number is
+11585.  One  definitely needs to leave some memory for the OS as well
+as for other things, so a problem size of 10000 is likely to fit.  As
+a rule of thumb, 80 % of the  total amount of memory is a good guess.
+If the problem size you pick is too large,  swapping will occur,  and
+the performance will drop.  If multiple processes  are spawn  on each
+node  (say  you have 2 processors  per  node),  what  counts  is  the
+available amount of memory to each process.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="blsize">What block size NB should I use ?</A></H3>
+
+HPL  uses  the block size NB for the data distribution as well as for
+the  computational  granularity.  From  a data distribution  point of
+view,  the smallest NB,  the better the load balance.  You definitely
+want  to stay away  from very large values of NB.  From a computation
+point of view,  a too small value of NB  may  limit the computational
+performance by a large factor because almost no data reuse will occur
+in the highest level of the memory hierarchy. The  number of messages
+will  also  increase.  Efficient  matrix-multiply  routines are often 
+internally  blocked.  Small  multiples  of  this  blocking factor are 
+likely to be good block sizes for HPL. The bottom line is that "good"
+block sizes are almost always in the [32 .. 256] interval.  The  best
+values depend on the computation / communication performance ratio of
+your system. To a much less extent, the problem size matters as well.
+Say for example,  you emperically found that 44 was a good block size
+with respect to performance.  88 or 132  are likely  to give slightly 
+better results  for large problem sizes because of a slighlty  higher
+flop rate.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="grid">What process grid ratio P x Q should I use ?</A></H3>
+
+This  depends  on  the  physical  interconnection  network  you have.
+Assuming a mesh or a switch HPL "likes" a 1:k ratio with k in [1..3].
+In  other  words,  P  and  Q  should  be approximately equal,  with Q 
+slightly larger than P. Examples: 2 x 2, 2 x 4, 2 x 5,  3 x 4, 4 x 4,
+4 x 6, 5 x 6, 4 x 8 ...  If  you  are  running  on  a simple Ethernet 
+network,  there  is  only one wire through which all the messages are
+exchanged. On  such a network, the performance and scalability of HPL
+is strongly limited  and very flat process grids are likely to be the
+best choices: 1 x 4, 1 x 8, 2 x 4 ...<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="1node">What about the one processor case ?</A></H3>
+
+HPL  has  been  designed  to  perform well for large problem sizes on
+hundreds  of  nodes and more.  The software works on one node and for
+large problem sizes, one  can usually achieve pretty good performance
+on a single processor as well.  For small problem sizes  however, the
+overhead  due  to  message-passing,  local  indexing and so on can be 
+significant.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="options">Why so many options in HPL.dat ?</A></H3>
+
+There are quite a few reasons. First off, these options are useful to
+determine what matters and what does not on your system. Second,  HPL
+is often used in the context  of early evaluation of new systems.  In
+such a case, everything is usually not quite working right, and it is
+convenient  to be able  to vary these parameters without recompiling.
+Finally,  every system has its own peculiarities and one is likely to
+be  willing  to  emperically determine the best set of parameters. In
+any   case,  one  can  always  follow  the  advice  provided  in  the
+<A HREF = "tuning.html">tuning  section</A> of this  document and not
+worry about the complexity of the input file.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="outperf">Can HPL be Outperformed ?</A></H3>
+
+Certainly.   There  is  always  room  for  performance  improvements.
+Specific knowledge about  a  particular system  is always a source of
+performance   gains.  Even  from  a generic  point  of  view,  better
+algorithms  or  more  efficient  formulation  of the classic ones are
+potential winners.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/index.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/index.html
new file mode 100755
index 000000000..a3a53abfe
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/index.html
@@ -0,0 +1,178 @@
+<HTML>
+<HEAD>
+<TITLE>
+HPL - A Portable Implementation of the High-Performance
+Linpack Benchmark for Distributed-Memory Computers
+</TITLE>
+</HEAD>
+ 
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<HR NOSHADE>
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR>
+<TD ALIGN=CENTER>
+<H3>HPL - A Portable Implementation of the High-Performance Linpack
+Benchmark for Distributed-Memory Computers</H3>
+</TD>
+<TD ALIGN=LEFT VALIGN=LEFT>
+<A HREF     = "http://icl.cs.utk.edu">
+<IMG SRC    = 2-273x48.jpg
+ALT         = "ICL - UTK Computer Science Department"
+BORDER      = 0
+HEIGHT      = 48
+WIDTH       = 273></A>
+</TD>
+</TR>
+</TABLE>
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR>
+<TD ALIGN=LEFT>Version 2.2</TD>
+<TD ALIGN=CENTER>
+<A HREF     = "http://www.cs.utk.edu/~petitet">A. Petitet</A>,
+<A HREF     = "http://www.cs.utk.edu/~rwhaley">R. C. Whaley</A>,
+<A HREF     = "http://www.netlib.org/utk/people/JackDongarra">J. Dongarra</A>,
+<A HREF     = "mailto:cleary1@llnl.gov">A. Cleary</A>
+</TD>
+<TD ALIGN=CENTER>December 2, 2018</TD>
+<TD ALIGN=RIGHT>
+<A HREF="http://www.netlib.org/master_counts2.html#benchmark/hpl"># Accesses</A>
+</TD>
+</TR>
+</TABLE>
+<HR NOSHADE><BR> 
+
+<STRONG>HPL</STRONG> is  a software  package  that solves  a (random)
+dense  linear  system  in  double  precision   (64  bits)  arithmetic
+on  distributed-memory  computers.    It  can  thus  be  regarded  as
+a portable as well as  freely available  implementation  of the  High
+Performance Computing Linpack Benchmark.<BR><BR>
+
+The <STRONG>algorithm</STRONG> used  by HPL  can be summarized by the
+following keywords:  Two-dimensional  block-cyclic data  distribution
+- Right-looking variant  of  the  LU  factorization  with row partial
+pivoting  featuring  multiple  look-ahead depths  -  Recursive  panel
+factorization  with  pivot  search  and  column  broadcast combined -
+Various  virtual  panel  broadcast topologies  -  bandwidth  reducing
+swap-broadcast  algorithm -  backward  substitution  with  look-ahead
+of depth 1.<BR><BR>
+
+The  HPL package  provides  a testing and timing program  to quantify
+the  <STRONG>accuracy</STRONG> of  the obtained solution  as  well as
+the time it took to compute it. The best <STRONG>performance</STRONG>
+achievable by this software on your system depends on a large variety
+of factors.  Nonetheless,  with some restrictive assumptions  on  the
+interconnection  network,   the  algorithm  described  here  and  its
+attached implementation  are <STRONG>scalable</STRONG>  in  the sense
+that their parallel efficiency is maintained  constant  with  respect
+to the per processor memory usage.<BR><BR>
+
+The HPL software package <STRONG>requires</STRONG>  the  availibility
+on your system of an implementation of the  Message Passing Interface
+<STRONG>MPI</STRONG> (1.1 compliant).
+An implementation of <STRONG>either</STRONG> the Basic Linear Algebra
+Subprograms   <STRONG>BLAS  or</STRONG>   the   Vector  Signal  Image
+Processing Library <STRONG>VSIPL</STRONG> is also needed.
+Machine-specific as well as generic implementations of
+<A HREF = "links.html#mpi_libs">MPI</A>, the
+<A HREF = "links.html#blas_libs">BLAS</A> and
+<A HREF = "links.html#vsip_libs">VSIPL</A> are available  for a large
+variety of systems.<BR><BR>
+
+<STRONG>Acknowledgements</STRONG>: This work was  supported  in  part
+by  a  grant  from  the  Department  of  Energy's   Lawrence
+Livermore National Laboratory  and  Los  Alamos  National  Laboratory
+as   part  of  the   ASCI  Projects   contract  numbers  B503962  and
+12187-001-00 4R.
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+
+<ADDRESS>
+Innovative Computing Laboratory<BR>
+last revised December 2, 2018<BR>
+</ADDRESS>
+
+<PRE>
+#########################################################################
+
+file    <a href="hpl-2.3.tar.gz">hpl-2.3.tar.gz</a>
+for     HPL 2.3 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: December 2, 2018
+
+#########################################################################
+
+file    <a href="hpl-2.2.tar.gz">hpl-2.2.tar.gz</a>
+for     HPL 2.2 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: February 24, 2016
+
+#########################################################################
+
+file    <a href="hpl-2.1.tar.gz">hpl-2.1.tar.gz</a>
+for     HPL 2.1 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: October 26, 2012
+
+#########################################################################
+
+file    <a href="hpl-2.0.tar.gz">hpl-2.0.tar.gz</a>
+for     HPL 2.0 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary
+Updated: September 10, 2008
+
+#########################################################################
+
+file    <a href="hpl.tgz">hpl.tgz</a>
+for     HPL 1.0a - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary
+Updated: January 20, 2004<BR>
+
+#########################################################################
+
+file    <a href="hpl_qs22-2008-11-30.patch">hpl_qs22-2008-11-30.patch</a>
+for     Implementation of the High-Performance Linpack benchmark for IBM
+,       QS22 systems with PowerXCell 8i processors. The file is a patch
+,       for HPL 1.0a.
+by      IBM
+
+file    <a href="IBM_LICENSE.TXT">IBM_LICENSE.TXT</a>
+for     IBM Copyright notice for QS22 HPL
+by      IBM
+
+file    <a href="IBM_README.txt">IBM_README.txt</a>
+for     README for IBM QS22 HPL
+by      IBM
+Updated: November 30, 2008
+
+
+#########################################################################
+</PRE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/links.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/links.html
new file mode 100755
index 000000000..da2639e99
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/links.html
@@ -0,0 +1,89 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Related Links</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Related Links</H2>
+
+<STRONG>The  list  of links below contains some relevant material to this
+work.  This  list  is provided  for illustrative purposes, and  should be
+regarded  as  an initial starting point  for the interested reader.  This
+list is by all means not meant to be exhaustive.</STRONG><BR><BR>
+
+<H3><A NAME="mpi_libs">Message Passing Interface (MPI)</A></H3>
+
+MPI  is  a  library  specification  for  message-passing,  proposed  as a
+standard  by  a  broadly  based committee  of  vendors, implementors, and
+users.  Machine-specific (optimized)  as  well  as  freely available  MPI
+libraries  are  available  for  a large  variety of systems.  Browse  the
+<A HREF = "http://www.mcs.anl.gov/mpi">Message  Passing  Interface  (MPI)
+standard web page</A> for more information.<BR><BR>
+
+<H3><A NAME="blas_libs">Basic Linear Algebra Subroutines (BLAS)</A></H3>
+
+The  <A HREF = "http://www.netlib.org/blas">BLAS</A>  are   high  quality
+"building  block"   routines  for  performing  basic  vector  and  matrix 
+operations.  A  lot  of  "BLAS-related"  information can be found at this 
+site.  In  particular,  a  reference implementation  is  available.  This
+reference   implementation  is  <STRONG>not  optimized</STRONG>  for  any
+system, and  it is therefore <STRONG>not  recommended</STRONG>  to use it
+for  benchmarking  purposes</STRONG>.
+However, <A HREF = "http://www.netlib.org/blas/faq.html">machine-specific
+optimized  BLAS  libraries</A> are  available  for  a variety of computer
+systems.   For  further  details,    please  contact  your  local  vendor
+representative.  Alternatively,  one  may  also consider  using automatic
+code  generators such as <A HREF="http://www.netlib.org/atlas">ATLAS</A>.
+This  tool  automatically   generates   a  complete   and  optimized BLAS
+library for a large variety of modern systems.<BR><BR>
+
+<H3><A NAME="vsip_libs">Vector Signal Image Processing Library (VSIPL)</A></H3>
+
+<A HREF = "http://www.vsipl.org">VSIPL</A>  is  an API defined by an open
+standard  comprised of  embedded signal and image processing hardware and
+software  vendors,  academia,  users,  and  government  labs.  A  lot  of
+"VSIPL-related"  information can be found at this site.  In particular, a
+reference implementation is available.  Machine-specific  optimized VSIPL
+libraries are available  for a variety of computer systems.  For  further
+details, please contact your local vendor representative.<BR><BR>
+
+<H3>TOP 500 List</H3>
+
+The  <A HREF  = "http://www.netlib.org/benchmark/top500.html">TOP 500</A>
+is  an  ordered list of the 500 most powerful computer systems worldwide.
+Computers   are   ranked  in  this  list  by  their  performance  on  the 
+<A HREF = "http://www.netlib.org/benchmark/top500/lists/linpack.html">
+LINPACK Benchmark</A>.<BR><BR>
+
+<H3>Parallel Dense Linear Algebra Software Libraries</H3>
+
+Browse the <A HREF="http://www.netlib.org">Netlib software repository</A>
+or  the <A HREF="http://www.nhse.org">National HPCC Software Exchange</A>
+to find a large collection of freely available linear algebra libraries.
+<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/main.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/main.jpg
new file mode 100755
index 000000000..df62edd33
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/main.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/mat2.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/mat2.jpg
new file mode 100755
index 000000000..25afdc44c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/mat2.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/pfact.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/pfact.jpg
new file mode 100755
index 000000000..33a7e55cb
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/pfact.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/references.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/references.html
new file mode 100755
index 000000000..95c6db176
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/references.html
@@ -0,0 +1,276 @@
+<HTML>
+<HEAD>
+<TITLE>HPL References</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL References</H2>
+
+<STRONG>
+The list of references below contains some relevant published material
+to this work.  This list  is  provided  for illustrative purposes, and
+should be regarded  as an initial  starting point  for the  interested
+reader. This list is by all means not meant to be exhaustive.
+</STRONG><BR><BR>
+
+The references have been sorted in four categories and chronologically
+listed within each category. The four categories are
+<UL>
+<LI><A HREF="references.html#Linpack_Benchmark">Linpack Benchmark</A>
+<LI><A HREF="references.html#parallel_LUfact">Parallel  LU Factorization</A>
+<LI><A HREF="references.html#recursiv_LUfact">Recursive LU Factorization</A>
+<LI><A HREF="references.html#parallel_matmul">Parallel Matrix Multiply</A>
+<LI><A HREF="references.html#parallel_trsolv">Parallel Triangular Solve</A>
+</UL>
+<HR NOSHADE>
+
+<H3><A NAME="Linpack_Benchmark">Linpack Benchmark</A></H3>
+
+<UL>
+
+<! - 1979 ----------------------------------------------------------- !>
+<LI><I>LINPACK Users Guide</I>, J. Dongarra, J. Bunch, C. Moler and
+G. W. Stewart, SIAM, Philadelphia, PA, 1979.
+
+<! - 1989 ----------------------------------------------------------- !>
+<LI><I>Performance of Various Computers Using Standard Linear Equations
+Software</I>, J. Dongarra, Technical Report CS-89-85, University of 
+Tennessee, 1989. (An updated version of this report can be found at
+<A HREF="http://www.netlib.org/benchmark/performance.ps">
+http://www.netlib.org/benchmark/performance.ps</A>).
+
+<! - 1991 ----------------------------------------------------------- !>
+<LI><I>Towards Peak Parallel LINPACK Performance on 400</I>,
+R. Bisseling and L. Loyens, Supercomputer, Vol. 45, pp. 20-27, 1991.
+
+<LI><I>Massively Parallel LINPACK Benchmark on the Intel Touchstone 
+DELTA and iPSC/860 Systems</I>, R. van de Geijn, 1991 Annual Users
+Conference Proceedings. Intel Supercomputer Users Group, Dallas, TX,
+1991.
+
+<LI><I>The LINPACK Benchmark on the AP 1000</I>, R. Brent, Frontiers,
+1992, pp. 128-135, McLean, VA, 1992.
+
+<! - 1993 ----------------------------------------------------------- !>
+<LI><I>Implementation of BLAS Level 3 and LINPACK Benchmark on the
+AP1000</I>, R. Brent and P. Strazdins, Fujitsu Scientific and Technical
+Journal, Vol. 5, No. 1, pp. 61-70, 1993.
+
+<! - 1994 ----------------------------------------------------------- !>
+<LI><I>LU Factorization and the LINPACK Benchmark on the Intel
+Paragon</I>, D. Womble, D. Greenberg, D. Wheat and S. Riesen, Sandia
+Technical Report, 1994.
+
+<! - 1995 ----------------------------------------------------------- !>
+<LI><I>Massively Parallel Distributed Computing: Worlds First 281
+Gigaflop Supercomputer</I>, J. Bolen, A. Davis, B. Dazey, S. Gupta,
+G. Henry, D. Robboy, G. Schiffler, D. Scott, M. Stallcup, A. Taraghi,
+S. Wheat from Intel SSD, L. Fisk, G. Istrail, C. Jong, R. Riesen,
+L. Shuler, from Sandia National Laboratories, Proceedings of the Intel
+Supercomputer Users Group 1995.
+
+<! - 1997 ----------------------------------------------------------- !>
+<LI><I>High Performance Software on Intel Pentium Pro Processors or
+Micro-Ops to TeraFLOPS</I>, B. Greer and G. Henry, Proceedings of the
+SuperComputing 1997 Conference, ACM SIGARCH - IEEE Computer Society
+Press - ISBN: 0-89791-985-8, San Jose, CA, 1997.
+
+</UL>
+<! ------------------------------------------------------------------ !>
+<HR NOSHADE>
+
+<H3><A NAME="parallel_LUfact">Parallel LU Factorization</A></H3>
+
+<UL>
+
+<! - 1986 ----------------------------------------------------------- !>
+<LI><I>Communication Complexity of the Gaussian Elimination Algorithm
+on Multiprocessors</I>, Y. Saad, Linear Algebra and Its Applications,
+Vol. 77, pp. 315-340, 1986.
+
+<! - 1988 ----------------------------------------------------------- !>
+<LI><I>LU Factorization Algorithms on Distributed-Memory Multiprocessor
+Architectures</I>, G. Geist and C. Romine, SIAM Journal on Scientific
+and Statistical Computing, Vol. 9, pp. 639-649, 1988.
+ 
+<! - 1989 ----------------------------------------------------------- !>
+<LI><I>Parallel LU Decomposition on a Transputer Network</I>, 
+R. Bisseling and J. van der Vorst, Lecture Notes in Computer Sciences,
+Springer-Verlag, Eds. G. van Zee and J. van der Vorst, Vol. 384,
+pp. 61-77, 1989.
+
+<! - 1990 ----------------------------------------------------------- !>
+<LI><I>The Distributed Solution of Linear Systems Using the Torus-Wrap
+Data Mapping</I>, C. Ashcraft, ECA-TR-147, Boeing Computer Services,
+Seattle, WA, 1990.
+
+<LI><I>Experiments with Multicomputer LU-Decomposition</I>, E. van de
+Velde, Concurrency: Practice and Experience, Vol. 2, pp. 1-26, 1990.
+
+<! - 1991 ----------------------------------------------------------- !>
+<LI><I>A Taxonomy of Distributed Dense LU Factorization Methods</I>,
+C. Ashcraft, ECA-TR-161, Boeing Computer Services, Seattle, WA, 1991.
+
+<! - 1994 ----------------------------------------------------------- !>
+<LI><I>The Torus-Wrap Mapping for Dense Matrix Calculations on Massively
+Parallel Computers</I>, B. Hendrickson and D. Womble, SIAM Journal on
+Scientific and Statistical Computing, Vol. 15, pp. 1201-1226, 1994.
+
+<LI><I>Scalability Issues in the Design of a Library for Dense Linear
+Algebra</I>, J. Dongarra, R. van de Geijn and D. Walker, Journal of
+Parallel and Distributed Computing, Vol. 22, No. 3, pp. 523-537, 1994.
+
+<! - 1995 ----------------------------------------------------------- !>
+<LI><I>Matrix Factorization using Distributed Panels on the Fujitsu
+AP1000</I>, P. Strazdins, Proceedings of the IEEE First International
+Conference on Algorithms And Architectures for Parallel Processing
+ICA3PP-95, Brisbane, 1995.
+
+<! - 1996 ----------------------------------------------------------- !>
+<LI><I>The Design and Implementation of the ScaLAPACK LU, QR, and
+Cholesky Factorization Routines</I>, J. Choi, J. Dongarra, S. Ostrouchov,
+A. Petitet, D. Walker and R. C. Whaley, Scientific Programming, Vol. 5,
+pp. 173-184, 1996.
+
+</UL>
+<! ------------------------------------------------------------------ !>
+<HR NOSHADE>
+
+<H3><A NAME="recursiv_LUfact">Recursive LU Factorization</A></H3>
+
+<UL>
+
+<! - 1997 ----------------------------------------------------------- !>
+<LI><I>Locality of Reference in LU Decomposition with partial
+pivoting</I>, S. Toledo, SIAM Journal on Matrix. Anal. Appl., Vol. 18,
+No. 4, 1997.
+
+<LI><I>Recursion Leads to Automatic Variable Blocking for Dense 
+Linear-Algebra Algorithms</I>, F. Gustavson, IBM Journal of Research
+and Development, Vol. 41, No. 6, pp. 737-755, 1997
+
+</UL>
+<! ------------------------------------------------------------------ !>
+<HR NOSHADE>
+
+<H3><A NAME="parallel_matmul">Parallel Matrix Multiply</A></H3>
+
+<UL>
+
+<! - 1990 ----------------------------------------------------------- !>
+<LI><I>Matrix Algorithms on a Hypercube I: Matrix Multiplication</I>,
+G. Fox, S. Otto and A. Hey, Parallel Computing, Vol. 3, pp. 17-31, 1987.
+
+<! - 1990 ----------------------------------------------------------- !>
+<LI><I>Basic Matrix Subprograms for Distributed-Memory Systems</I>,
+A. Elster, Proceedings of the Fifth Distributed-Memory Computing
+Conference, Eds. D. Walker and Q. Stout, IEEE Press, pp. 311-316, 1990.
+ 
+<! - 1991 ----------------------------------------------------------- !>
+<LI><I>The Parallelization of Level 2 and 3 BLAS Operations on
+Distributed-Memory Machines</I>, M. Aboelaze, N. Chrisochoides
+and E. Houstis, CSD-TR-91-007, Purdue University, West Lafayette,
+IN, 1991.
+
+<! - 1992 ----------------------------------------------------------- !>
+<LI><I>The Multicomputer Toolbox Approach to Concurrent BLAS and LACS</I>,
+R. Falgout, A. Skjellum, S. Smith and C. Still, Proceedings of the
+Scalable High Performance Computing Conference SHPCC-92, IEEE Computer
+Society Press, 1992.
+
+<! - 1994 ----------------------------------------------------------- !>
+<LI><I>A High Performance Matrix Multiplication Algorithm on a
+Distributed-Memory Parallel Computer, Using Overlapped Communication</I>,
+R. Agarwal, F. Gustavson and M. Zubair, IBM Journal or Research and
+Development, Vol. 38, No. 6, pp. 673-681, 1994.
+
+<LI><I>PUMMA: Parallel Universal Matrix Multiplication Algorithms on
+Distributed-Memory Concurrent Computers</I>, J. Choi, J. Dongarra and
+D. Walker, Concurrency: Practice and Experience, Vol. 6, No. 7,
+pp. 543-570, 1994.
+
+<LI><I>Matrix Multiplication on the Intel Touchstone DELTA</I>,
+S. Huss-Lederman, E. Jacobson, A. Tsao and G. Zhang, Concurrency:
+Practice and Experience, Vol. 6, No. 7, pp. 571-594, 1994.
+ 
+<! - 1995 ----------------------------------------------------------- !>
+<LI><I>A Three-Dimensional Approach to Parallel Matrix Multiplication</I>,
+R. Agarwal, S. Balle, F. Gustavson, M. Joshi and P. Palkar, IBM Journal
+or Research and Development, Vol. 39, No. 5, pp. 575-582, 1995.
+
+<! - 1995 ----------------------------------------------------------- !>
+<LI><I>A High Performance Parallel Strassen Implementation</I>,
+B. Grayson and R. van de Geijn, Parallel Processing Letters, Vol. 6,
+No. 1, pp. 3-12, 1996.
+
+<! - 1997 ----------------------------------------------------------- !>
+<LI><I>Parallel Implementation of BLAS: General Techniques for Level
+3 BLAS</I>, A. Chtchelkanova, J. Gunnels, G. Morrow, J. Overfelt and
+R. van de Geijn, Concurrency: Practice and Experience, Vol. 9, No. 9,
+pp. 837-857, 1997.
+
+<LI><I>A Poly-Algorithm for Parallel Dense Matrix Multiplication on
+Two-Dimensional Process Grid Topologies</I>, J. Li, R. Falgout and
+A. Skjellum, Concurrency: Practice and Experience, Vol. 9, No. 5,
+pp. 345-389, 1997.
+
+<LI><I>SUMMA: Scalable Universal Matrix Multiplication Algorithm</I>,
+R. van de Geijn and J. Watts, Concurrency: Practice and Experience,
+Vol. 9, No. 4, pp. 255-274, 1997.
+
+</UL>
+<! ------------------------------------------------------------------ !>
+<HR NOSHADE>
+
+<H3><A NAME="parallel_trsolv">Parallel Triangular Solve</A></H3>
+
+<UL>
+ 
+<! - 1988 ----------------------------------------------------------- !>
+<LI><I>Parallel Solution Triangular Systems on Distributed-Memory
+Multiprocessors</I>, M. Heath and C. Romine, SIAM Journal on Scientific
+and Statistical Computing, Vol. 9, pp. 558-588, 1988.
+
+<LI><I>A Parallel Triangular Solver for a Distributed-Memory
+Multiprocessor</I>, G. Li and T. Coleman, SIAM Journal on Scientific
+and Statistical Computing, Vol. 9, No. 3, pp. 485-502, 1988.
+
+<! - 1989 ----------------------------------------------------------- !>
+<LI><I>A New Method for Solving Triangular Systems on Distributed-Memory
+Message-Passing Multiprocessor</I>, G. Li and T. Coleman, SIAM Journal
+on Scientific and Statistical Computing, Vol. 10, No. 2, pp. 382-396,
+1989.
+
+<! - 1991 ----------------------------------------------------------- !>
+<LI><I>Parallel Triangular System Solving on a Mesh Network of
+Transputers</I>, R. Bisseling and J. van der Vorst, SIAM Journal
+on Scientific and Statistical Computing, Vol. 12, pp. 787-799, 1991.
+
+</UL>
+<! ------------------------------------------------------------------ !>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/results.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/results.html
new file mode 100755
index 000000000..9a7d8b8af
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/results.html
@@ -0,0 +1,243 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Results</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR><TD ALIGN=LEFT VALIGN=LEFT>
+<IMG SRC    = "aprunner.gif" BORDER=0 HEIGHT=160 WIDTH=220>
+</TD>
+<TD ALIGN=LEFT VALIGN=LEFT>
+<H2>HPL Performance Results</H2>
+
+<STRONG>
+The performance achieved by this software package  on a few machine
+configurations is shown below.  These results are only provided for
+illustrative  purposes.  By the time you read this,  those  systems
+have changed,  they may not even exist anymore  and  one can surely
+not exactly reproduce  the state  in which these machines were when
+those measurements have been obtained.  To obtain  accurate figures
+on your system, it is absolutely necessary to
+<A HREF = "software.html">download the software</A> and run it there.
+</STRONG>
+</TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "results.html#AMD_K7000">Athlon 4-nodes cluster</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "results.html#I550p3000">Intel PIII 8-duals cluster</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "results.html#compaq000">Compaq 64 nodes AlphaServer SC</A>
+</UL></TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<H3><A NAME="AMD_K7000">4 AMD Athlon K7 500 Mhz (256 Mb) - (2x) 100 Mbs
+Switched - 2 NICs per node (channel bonding)</A></H3>
+
+<CENTER>
+<TABLE BORDER>
+<TR><TD>OS         </TD><TD>Linux 6.2 RedHat (Kernel 2.2.14)       </TD></TR>
+<TR><TD>C compiler </TD><TD>gcc (egcs-2.91.66 egcs-1.1.2 release)  </TD></TR>
+<TR><TD>C flags    </TD><TD>-fomit-frame-pointer -O3 -funroll-loops</TD></TR>
+<TR><TD>MPI        </TD><TD>MPIch 1.2.1                            </TD></TR>
+<TR><TD>BLAS       </TD><TD>ATLAS (Version 3.0 beta)               </TD></TR>
+<TR><TD>Comments   </TD><TD>09 / 00                                </TD></TR>
+</TABLE><P>
+
+<TABLE BORDER>
+<TR>
+<TH ALIGN=CENTER> GRID</TH>
+<TH ALIGN=CENTER> 2000</TH>
+<TH ALIGN=CENTER> 5000</TH>
+<TH ALIGN=CENTER> 8000</TH>
+<TH ALIGN=CENTER>10000</TH>
+</TR>
+<TR>
+<TH ALIGN=CENTER>1 x 4</TH>
+<TD ALIGN=CENTER> 1.28</TD>
+<TD ALIGN=CENTER> 1.73</TD>
+<TD ALIGN=CENTER> 1.89</TD>
+<TD ALIGN=CENTER> 1.95</TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>2 x 2</TH>
+<TD ALIGN=CENTER> 1.17</TD>
+<TD ALIGN=CENTER> 1.68</TD>
+<TD ALIGN=CENTER> 1.88</TD>
+<TD ALIGN=CENTER> 1.93</TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>4 x 1</TH>
+<TD ALIGN=CENTER> 0.81</TD>
+<TD ALIGN=CENTER> 1.43</TD>
+<TD ALIGN=CENTER> 1.70</TD>
+<TD ALIGN=CENTER> 1.80</TD>
+</TR>
+Performance (Gflops) w.r.t Problem size on 4 nodes.
+</TABLE><P>
+</CENTER>
+
+<HR NOSHADE>
+<H3><A NAME="I550p3000">8 Duals Intel PIII 550 Mhz (512 Mb) - Myrinet</A></H3>
+
+<CENTER>
+<TABLE BORDER>
+<TR><TD>OS         </TD><TD>Linux 6.1 RedHat (Kernel 2.2.15)       </TD></TR>
+<TR><TD>C compiler </TD><TD>gcc (egcs-2.91.66 egcs-1.1.2 release)  </TD></TR>
+<TR><TD>C flags    </TD><TD>-fomit-frame-pointer -O3 -funroll-loops</TD></TR>
+<TR><TD>MPI        </TD><TD>MPI GM (Version 1.2.3)                 </TD></TR>
+<TR><TD>BLAS       </TD><TD>ATLAS (Version 3.0 beta)               </TD></TR>
+<TR><TD>Comments   </TD>
+<TD><A HREF="http://icl.cs.utk.edu">UTK / ICL</A> - Torc cluster - 09 / 00</TD>
+</TR>
+</TABLE><P>
+
+<TABLE BORDER>
+<TR>
+<TH ALIGN=CENTER> GRID</TH>
+<TH ALIGN=CENTER> 2000</TH>
+<TH ALIGN=CENTER> 5000</TH>
+<TH ALIGN=CENTER> 8000</TH>
+<TH ALIGN=CENTER>10000</TH>
+<TH ALIGN=CENTER>15000</TH>
+<TH ALIGN=CENTER>20000</TH>
+</TR>
+<TR>
+<TH ALIGN=CENTER>2 x 4</TH>
+<TD ALIGN=CENTER> 1.76</TD>
+<TD ALIGN=CENTER> 2.32</TD>
+<TD ALIGN=CENTER> 2.51</TD>
+<TD ALIGN=CENTER> 2.58</TD>
+<TD ALIGN=CENTER> 2.72</TD>
+<TD ALIGN=CENTER> 2.73</TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>4 x 4</TH>
+<TD ALIGN=CENTER> 2.27</TD>
+<TD ALIGN=CENTER> 3.94</TD>
+<TD ALIGN=CENTER> 4.46</TD>
+<TD ALIGN=CENTER> 4.68</TD>
+<TD ALIGN=CENTER> 5.00</TD>
+<TD ALIGN=CENTER> 5.16</TD>
+</TR>
+Performance (Gflops) w.r.t Problem size on 8- and 16-processors grids.
+</TABLE><P>
+</CENTER>
+
+<HR NOSHADE>
+<H3><A NAME="compaq000">Compaq 64 nodes (4 ev67 667 Mhz processors per node)
+AlphaServer SC</A></H3>
+
+<CENTER>
+<TABLE BORDER>
+<TR><TD>OS         </TD><TD>Tru64 Version 5               </TD></TR>
+<TR><TD>C compiler </TD><TD>cc Version 6.1                </TD></TR>
+<TR><TD>C flags    </TD><TD>-arch host -tune host -std -O5</TD></TR>
+<TR><TD>MPI        </TD><TD>-lmpi -lelan                  </TD></TR>
+<TR><TD>BLAS       </TD><TD>CXML                          </TD></TR>
+<TR><TD>Comments   </TD>
+<TD><A HREF = "http://www.nccs.gov/">ORNL / NCCS</A>
+ - falcon - 09 / 00</TD></TR>
+</TABLE><P>
+</CENTER>
+
+In the table below, each row corresponds to a given number of cpus (or
+processors) and nodes.  The first row for example is denoted by 1 / 1,
+i.e.,  1 cpu / 1 node.  Rmax is given in Gflops, and the value of Nmax
+in fact corresponds to  351 Mb per cpu for all machine configurations.<BR><BR>
+
+<CENTER>
+<TABLE BORDER>
+<TR>
+<TH ALIGN=CENTER>    CPUS / NODES     </TH>
+<TH ALIGN=CENTER>       GRID          </TH>
+<TH ALIGN=CENTER>      N 1/2          </TH>
+<TH ALIGN=CENTER>       Nmax          </TH>
+<TH ALIGN=CENTER>    Rmax (Gflops)    </TH>
+<TH ALIGN=CENTER> Parallel Efficiency </TH>
+</TR>
+<TR>
+<TH ALIGN=CENTER>   1 / 1    </TH>
+<TH ALIGN=CENTER>   1 x 1    </TH>
+<TD ALIGN=CENTER>     150    </TD>
+<TD ALIGN=CENTER>    6625    </TD>
+<TD ALIGN=CENTER>   1.136    </TD>
+<TD ALIGN=CENTER>   1.000    </TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>   4 / 1    </TH>
+<TH ALIGN=CENTER>   2 x 2    </TH>
+<TD ALIGN=CENTER>     800    </TD>
+<TD ALIGN=CENTER>   13250    </TD>
+<TD ALIGN=CENTER>   4.360    </TD>
+<TD ALIGN=CENTER>   0.960    </TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>  16 / 4    </TH>
+<TH ALIGN=CENTER>   4 x 4    </TH>
+<TD ALIGN=CENTER>    2300    </TD>
+<TD ALIGN=CENTER>   26500    </TD>
+<TD ALIGN=CENTER>   17.00    </TD>
+<TD ALIGN=CENTER>   0.935    </TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>  64 / 16   </TH>
+<TH ALIGN=CENTER>   8 x 8    </TH>
+<TD ALIGN=CENTER>    5700    </TD>
+<TD ALIGN=CENTER>   53000    </TD>
+<TD ALIGN=CENTER>   67.50    </TD>
+<TD ALIGN=CENTER>   0.928    </TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER> 256 / 64   </TH>
+<TH ALIGN=CENTER>  16 x 16   </TH>
+<TD ALIGN=CENTER>   14000    </TD>
+<TD ALIGN=CENTER>  106000    </TD>
+<TD ALIGN=CENTER>   263.6    </TD>
+<TD ALIGN=CENTER>   0.906    </TD>
+</TR>
+</TABLE><P>
+</CENTER> 
+For Rmax shown in the table, the  parallel efficiency  per  cpu has been
+computed using the performance achieved by  HPL on 1 cpu.  That is fair,
+since the CXML matrix multiply routine was achieving at best 1.24 Gflops
+for large matrix operands on one cpu, it would have been difficult for a
+sequential  Linpack  benchmark  implementation to achieve much more than
+1.136 Gflops on this same cpu. For constant load (as in the table 351 Mb
+per cpu for Nmax),  HPL  scales almost linearly as it should.
+
+<BR><BR>
+The authors acknowledge the use  of the Oak Ridge National Laboratory
+Compaq computer, funded by the Department of Energy's Office
+of Science and Energy Efficiency programs.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/roll.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/roll.jpg
new file mode 100755
index 000000000..88d2c56af
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/roll.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/rollM.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/rollM.jpg
new file mode 100755
index 000000000..0d7f076fd
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/rollM.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/scalability.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/scalability.html
new file mode 100755
index 000000000..00bb1a27e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/scalability.html
@@ -0,0 +1,200 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Scalability Analysis</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Scalability Analysis</H2>
+
+The <A HREF = "scalability.html#model">machine model</A> used for the
+analysis is first described.  This crude model is then used  to first
+estimate  the  parallel running time  of  the various phases  of  the 
+algorithm namely
+<UL>
+<LI><A HREF="scalability.html#pfact">panel factorization and broadcast</A>,
+<LI><A HREF="scalability.html#updat">trailing submatrix update</A>, 
+<LI><A HREF="scalability.html#backs">backward substitution</A>. 
+</UL>
+Finally <A HREF="scalability.html#total">the  parallel efficiency</A>
+of the entire algorithm is estimated according to this machine model.
+We show that for a given set of parameters HPL is <STRONG>scalable</STRONG>
+not  only  with respect to the amount of computation,  but  also with
+respect to the communication volume.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME = "model">The Machine Model</A></H3>
+
+Distributed-memory computers consist of processors that are connected
+using  a message passing interconnection network.  Each processor has
+its own memory called the local memory,  which  is accessible only to
+that processor.  As the time to access a remote memory is longer than
+the time to access a local one,  such computers are often referred to
+as Non-Uniform Memory Access (NUMA) machines.<BR><BR>
+
+The interconnection network  of our machine model is static,  meaning
+that   it   consists  of  point-to-point  communication  links  among
+processors.  This  type  of  network  is also referred to as a direct
+network as opposed to dynamic networks.  The  latter  are constructed 
+from switches and communication links.  These links  are  dynamically
+connected  to one another by the switching elements to establish,  at
+run time, the paths between processors memories.<BR><BR>
+ 
+The  interconnection  network  of the two-dimensional  machine  model
+considered here is a static,  fully  connected physical topology.  It
+is also assumed  that  processors  can be treated  equally  in  terms
+of  local performance  and  that  the  communication rate between two
+processors depends on the processors considered.<BR><BR>
+
+Our model assumes  that  a processor can send or receive data on only
+one of its communication ports at a time  (assuming  it has more than
+one). In the literature,  this  assumption is also referred to as the
+one-port communication model.<BR><BR>
+ 
+The time spent to communicate  a message between two given processors
+is called the communication time Tc.   In  our machine model,  Tc  is
+approximated  by  a  linear  function  of  the  number  L  of  double
+precision (64-bits) items communicated.  Tc is the sum of the time to
+prepare the message for transmission (alpha) and the time  (beta * L)
+taken  by the message of length  L  to traverse  the network  to  its 
+destination, i.e.,<BR><BR>
+<CENTER>
+Tc = alpha + beta L.<BR><BR>
+</CENTER>
+
+Finally,   the   model  assumes  that  the  communication  links  are
+bi-directional,  that is,  the time  for two processors  to send each 
+other a message of length L is also Tc.  A processor  can send and/or
+receive  a message on only one of  its communication links at a time.
+In particular, a processor can send a message while receiving another
+message from the processor it is sending to at the same time.<BR><BR>
+ 
+Since this document is only concerned with regular local dense linear
+algebra  operations,  the time taken to perform  one  floating  point 
+operation  is  assumed  to  be  summarized by  three constants  gam1, 
+gam2 and gam3. These quantitites are flop rates approximations of the
+vector-vector,  matrix-vector  and matrix-matrix operations for  each
+processor.  This  very  crude approximation summarizes all  the steps
+performed  by a processor  to achieve such a computation.  Obviously,
+such a model neglects all the phenomena  occurring  in  the processor
+components,  such as cache misses, pipeline startups, memory load  or
+store, floating point arithmetic and so on,  that  may  influence the
+value  of  these  constants  as  a function  of the  problem size for
+example.<BR><BR>
+ 
+Similarly,  the model  does  not make any assumption on the amount of
+physical memory per node.  It  is  assumed that if a process has been
+spawn  on  a processor,  one  has  ensured  that  enough  memory  was 
+available  on that processor. In other words, swapping will not occur
+during the modeled computation.<BR><BR>
+ 
+<STRONG>
+This  machine  model  is  a very crude approximation that is designed
+specifically  to  illustrate  the cost of the dominant factors of our
+particular case.<BR><BR>
+</STRONG>
+<HR NOSHADE>
+
+<H3><A NAME="pfact">Panel Factorization and Broadcast</A></H3>
+
+Let  consider  an  M-by-N  panel distributed over a P-process column.
+Because  of the recursive formulation of the panel factorization,  it
+is  reasonable to consider  that  the floating point operations  will
+be performed at matrix-matrix multiply "speed".  For  every column in
+the panel a binary-exchange is performed on 2*N data items. When this
+panel is broadcast,  what  matters  is the time that the next process
+column  will  spend  in this  communication operation.  Assuming  one
+chooses the <A HREF="algorithm.html#bcast">increasing-ring (modified)
+variant</A>,  only  one  message needs to be taken into account.  The
+execution  time  of the panel factorization and broadcast can thus be
+approximated by:<BR><BR>
+<CENTER>
+Tpfact( M, N ) = (M/P - N/3) N^2 gam3 + N log(P)( alpha + beta 2 N ) +
+alpha + beta M N / P.<BR><BR>
+</CENTER>
+<HR NOSHADE>
+
+<H3><A NAME="updat">Trailing Submatrix Update</A></H3>
+
+Let  consider  the  update  phase  of an  N-by-N  trailing  submatrix
+distributed on a P-by-Q process grid.  From  a computational point of
+view one has to (triangular) solve N right-hand-sides  and  perform a 
+local rank-NB update of this trailing submatrix. Assuming one chooses
+the <A HREF="algorithm.html#update">long variant</A>,  the  execution
+time of the update operation can be approximated by:<BR><BR>
+<CENTER>
+Tupdate( N, NB ) = gam3 ( N NB^2 / Q + 2 N^2 NB / ( P Q ) ) +
+alpha ( log( P ) + P - 1 ) + 3 beta N NB / Q.<BR><BR>
+</CENTER>
+The constant "3" in front of the "beta" term is obtained  by counting
+one for the (logarithmic) spread phase and two for the rolling phase;
+In the case of bi-directional links  this constant 3 should therefore
+be only a 2.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="backs">Backward Substitution</A></H3>
+
+The number of floating point operations performed during the backward
+substitution in given by  N^2 / (P*Q).  Because of the lookahead, the
+communication cost  can be approximated at each step  by two messages
+of length NB, i.e.,  the time  to  communicate  the NB-piece  of  the 
+solution vector from one diagonal block of the matrix to another.  It
+follows that the execution time of the backward substitution  can  be
+approximated by:<BR><BR>
+<CENTER>
+Tbacks( N, NB ) = gam2 N^2  / (P Q) + N ( alpha / NB + 2 beta ).<BR><BR>
+</CENTER>
+<HR NOSHADE>
+
+<H3><A NAME="total">Putting it All Together</A></H3>
+
+The total execution time of the algorithm described above is given by<BR><BR>
+<CENTER>
+Sum(k=0,N,NB)[Tpfact( N-k, NB ) + Tupdate( N-k-NB, NB )] +
+Tbacks( N, NB ).<BR><BR>
+</CENTER>
+That is, by only considering only the dominant term in alpha, beta and
+gam3:<BR><BR>
+<CENTER>
+Thpl = 2 gam3 N^3  / ( 3 P Q ) + beta N^2 (3 P + Q) / ( 2 P Q ) +
+alpha N ((NB + 1) log(P) + P) / NB.<BR><BR>
+</CENTER>
+The serial execution time is given by Tser = 2 gam3 N^3  / 3. If we
+define the parallel efficiency  E  as the ratio  Tser / ( P Q Thpl ), we
+obtain:<BR><BR>
+<CENTER>
+E = 1 / ( 1 + 3 beta (3 P + Q) / ( 4 gam3 N ) +
+3 alpha P Q ((NB + 1) log(P) + P) / (2 N^2 NB gam3) ).<BR><BR>
+</CENTER>
+This  last equality  shows  that when the memory usage per  processor
+N^2 / (P Q)  is maintained  constant, the parallel efficiency  slowly
+decreases  only  because of the alpha term.  The communication volume
+(the beta term) however remains constant.  Due to these results,  HPL
+is said to be <STRONG>scalable</STRONG> not only with respect  to the
+amount of computation,  but also  with  respect  to the communication
+volume.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/software.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/software.html
new file mode 100755
index 000000000..34d82b2b7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/software.html
@@ -0,0 +1,109 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Software</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Software</H2>
+
+<H3>Download and Installation</H3>
+
+<OL>
+<LI>Download    the  <A HREF="hpl-2.3.tar.gz">tar-gzipped  file</A>,
+issue  then "gunzip hpl-2.3.tar.gz; tar -xvf hpl-2.3.tar"  and  this
+should create an  hpl-2.3  directory  containing  the  distribution.
+We call this directory the top level directory.
+
+<LI>Create a  file  Make.&#60arch&#62  in  the  top-level directory.
+For  this purpose,  you  may  want  to  re-use  one contained in the 
+setup  directory.  This Make.&#60arch&#62 file  essentially contains
+the compilers, libraries, and their paths to be used on your system. 
+
+<LI>Type  "make arch=&#60arch&#62". This should create an executable
+in the bin/&#60arch&#62 directory called xhpl.  For example,  on our
+Linux  PII  cluster,  I create  a file called Make.Linux_PII in  the
+top-level  directory.  Then,  I  type  "make  arch=Linux_PII".  This
+creates  the executable file bin/Linux_PII/xhpl. 
+
+<LI>Quick check:  run  a few  tests  (assuming  you have 4 nodes for
+interactive use)  by  issuing  the  following  commands from the top
+level  directory:  "cd bin/&#60arch&#62 ;  mpirun -np 4 xhpl".  This
+should produce quite a bit of meaningful output on the screen.
+ 
+<LI>Most  of  the  performance parameters can be tuned, by modifying
+the input file bin/&#60arch&#62/HPL.dat. See the
+<A HREF = "tuning.html">tuning page</A>  or  the  TUNING file in the
+top-level directory.
+</OL>
+<HR NOSHADE>
+
+<H3>Compile Time Options</H3>
+
+At  the  end  of  the "model" Make.&#60arch&#62,  the  user is given
+the  opportunity  to override  some default  compile options of this
+software. The list of these options and their meaning is:<BR><BR>
+
+<CENTER>
+<TABLE WIDTH=80% BORDER>
+<TR><TD ALIGN=LEFT><STRONG>-DHPL_COPY_L</STRONG></TD>
+<TD ALIGN=LEFT>force the copy of the panel L before bcast</TD></TR>
+<TR><TD ALIGN=LEFT><STRONG>-DHPL_CALL_CBLAS</STRONG></TD>
+<TD ALIGN=LEFT>call the BLAS C interface</TD></TR>
+<TR><TD ALIGN=LEFT><STRONG>-DHPL_CALL_VSIPL</STRONG></TD>
+<TD ALIGN=LEFT>call the vsip library</TD></TR>
+<TR><TD ALIGN=LEFT><STRONG>-DHPL_DETAILED_TIMING</STRONG></TD>
+<TD ALIGN=LEFT>enable detailed timers</TD></TR>
+</TABLE><P>
+</CENTER>
+
+The user must choose between either the  BLAS  Fortran 77 interface,
+or the BLAS C interface, or the  VSIPL  library depending  on  which
+computational kernels are available on his system. Only one of these
+options should be selected.  If  you  choose  the  BLAS  Fortran  77
+interface,  it is necessary  to fill out  the machine-specific  C to
+Fortran 77 interface section of the  Make.&#60arch&#62  file.  To do
+this,  please  refer  to the Make.&#60arch&#62 examples contained in
+the setup directory.<BR><BR>
+
+By default HPL will:
+<UL>
+<LI>not copy L before broadcast,
+<LI>call the BLAS Fortran 77 interface,
+<LI>not display detailed timing information.
+</UL>
+
+As an example,  suppose one wants this software to copy the panel of
+columns  into  a contiguous buffer  before broadcasting.  It  should
+be  more efficient  to let  the software create the appropriate  MPI
+user-defined data type  since this may avoid the data copy.  So,  it
+is a strange idea,  but one insists.  To achieve this  one would add
+-DHPL_COPY_L  to  the definition of  HPL_OPTS at the end of the file
+Make.&#60arch&#62.  Issue   then  a  "make clean arch=&#60arch&#62 ; 
+make build arch=&#60arch&#62"  and  the executable  will be re-build
+with that feature in.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/spread.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/spread.jpg
new file mode 100755
index 000000000..56c255a3f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/spread.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/spreadM.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/spreadM.jpg
new file mode 100755
index 000000000..433e4c077
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/spreadM.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/tuning.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/tuning.html
new file mode 100755
index 000000000..fbbf17fb7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/tuning.html
@@ -0,0 +1,476 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Tuning</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Tuning</H2>
+
+After  having built the executable hpl/bin/&#60arch&#62/xhpl,
+one may want to modify the input data file HPL.dat. This file
+should  reside  in  the  same  directory  as  the  executable
+hpl/bin/&#60arch&#62/xhpl.   An example   HPL.dat   file   is 
+provided by default. This file contains information about the
+problem sizes, machine configuration,  and algorithm features
+to be used by the executable.  It is  31  lines long. All the
+selected  parameters  will be printed in the output generated
+by the executable.<BR><BR>
+
+We first describe the meaning of each line of this input file
+below.  Finally,  <A HREF="tuning.html#tips">a   few   useful 
+experimental guide lines</A>  to set up the file are given at
+the end of this page.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="desc">Description of the HPL.dat File</A></H3>
+
+<STRONG>Line 1</STRONG>:  (unused) Typically  one  would  use
+this line for its own good.  For example,  it  could  be used
+to summarize the content of the input file.  By  default this 
+line reads:
+<TT><PRE>
+HPL Linpack benchmark input file
+</PRE></TT>
+ 
+<HR NOSHADE>
+<STRONG>Line 2</STRONG>:  (unused) same as line 1. By default
+this line reads:
+<TT><PRE>
+Innovative Computing Laboratory, University of Tennessee
+</PRE></TT>
+ 
+<HR NOSHADE>
+<STRONG>Line 3</STRONG>:  the  user  can   choose  where  the
+output  should  be  redirected to.  In the case of a file,  a
+name  is necessary, and this is  the line  where one wants to 
+specify it.  Only the first name on this line is significant.
+By default, the line reads:
+<TT><PRE>
+HPL.out  output file name (if any)
+</PRE></TT>
+ 
+This  means  that if  one chooses to redirect the output to a
+file, the file will be called "HPL.out". The rest of the line
+is unused,  and this space to put some informative comment on
+the meaning of this line.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 4</STRONG>: This line specifies where the output
+should go.  The  line  is  formatted,  it  must  begin with a 
+positive integer,  the rest is unsignificant. 3  choices  are
+possible  for  the  positive integer, 6 means that the output
+will go the standard output,  7  means  that the  output will
+go to the standard error.  Any  other integer means that  the
+output should be redirected to a file,  which  name has  been
+specified  in the line above. This line by default reads:
+<TT><PRE>
+6        device out (6=stdout,7=stderr,file)
+</PRE></TT>
+which  means  that  the  output generated  by  the executable
+should be redirected to the standard output.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 5</STRONG>: This  line  specifies  the number of
+problem sizes to be executed. This number should be less than
+or equal to 20.  The first  integer is significant,  the rest
+is ignored. If the line reads:
+<TT><PRE>
+3        # of problems sizes (N)
+</PRE></TT>
+this  means  that  the user is willing to run 3 problem sizes
+that will be specified in the next line.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 6</STRONG>: This line specifies the problem sizes
+one wants to run.  Assuming  the  line  above  started with 3,
+the  3  first positive  integers  are significant, the rest is
+ignored. For example:
+<TT><PRE>
+3000 6000 10000    Ns
+</PRE></TT>
+means that one wants xhpl to run 3 (specified in line 5)
+problem sizes, namely 3000, 6000 and 10000.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 7</STRONG>: This line  specifies  the number  of
+block sizes to be runned. This number should be less than  or
+equal to 20.  The first integer  is significant,  the rest is
+ignored. If the line reads:
+<TT><PRE>
+5        # of NBs
+</PRE></TT>
+this means that the user is willing to use 5 block sizes that
+will be specified in the next line.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 8</STRONG>:  This line specifies the block sizes
+one  wants  to run.  Assuming  the  line above started with 5,
+the  5  first positive integers  are  significant, the rest is 
+ignored. For example:
+<TT><PRE>
+80 100 120 140 160 NBs
+</PRE></TT>
+means  that  one  wants  xhpl  to use 5 (specified in line 7)
+block sizes, namely 80, 100, 120, 140 and 160.<BR><BR>
+
+<HR NOSHADE>
+<STRONG>Line 9</STRONG>:  This  line specifies  how  the  MPI
+processes  should be mapped  onto the nodes of your platform.
+There are currently two possible mappings,  namely  row-  and
+column-major. This feature is mainly useful  when these nodes
+are themselves multi-processor computers. A row-major mapping
+is recommended.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 10</STRONG>: This line specifies  the  number of
+process grid to be runned.  This  number  should be less than
+or equal to 20. The first integer is significant, the rest is
+ignored. If the line reads:
+<TT><PRE>
+2        # of process grids (P x Q)
+</PRE></TT>
+this  means  that you are willing to try 2 process grid sizes 
+that will be specified in the next line.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 11-12</STRONG>:  These  two  lines  specify  the  
+number of process rows  and  columns of each grid you want to
+run on.  Assuming the line above (10)  started with 2,  the 2
+first  positive integers of those two lines  are significant,
+the rest  is ignored. For example:
+<TT><PRE>
+1 2          Ps
+6 8          Qs
+</PRE></TT>
+means that one wants to run  xhpl  on  2  process grids (line
+10), namely 1-by-6 and 2-by-8. Note: In  this example,  it is
+required then  to  start  xhpl  on  at  least  16  nodes (max
+of Pi-by-Qi).  The runs on the two grids will be consecutive.
+If one was starting xhpl on more than 16 nodes, say 52,  only
+6 would be used for the first grid (1x6)  and  then 16  (2x8)
+would  be used for the second grid. The fact that you started
+the MPI job on 52 nodes, will not make  HPL  use all of them.
+In this example,  only 16 would be used.  If one wants to run 
+xhpl  with  52  processes  one needs  to specify a grid of 52
+processes, for example the following lines would do the job:
+<TT><PRE>
+4  2         Ps
+13 8         Qs
+</PRE></TT>
+ 
+<HR NOSHADE>
+<STRONG>Line 13</STRONG>: This line specifies  the  threshold
+to which the residuals should be compared with. The residuals
+should be or order 1, but are  in practice slightly less than
+this, typically 0.001.  This  line  is made of a real number,
+the rest is not significant. For example:
+<TT><PRE>
+16.0         threshold
+</PRE></TT>
+In practice,  a value of  16.0  will  cover  most cases.  For
+various reasons,  it  is possible  that some of the residuals
+become slightly larger, say for example 35.6.  xhpl will flag
+those runs  as  failed,  however  they  can be  considered as
+correct. A run should be considered as failed if the residual
+is a few order of magnitude bigger than 1 for example 10^6 or
+more. Note:  if one was  to specify  a threshold of  0.0, all
+tests  would be flagged  as failed, even though the answer is
+likely  to  be  correct.  It is allowed to specify a negative 
+value for this threshold,  in which case  the checks  will be 
+by-passed,  no matter what the threshold value is, as soon as
+it  is  negative.  This  feature  allows  to  save  time when 
+performing a lot of experiments,  say for instance during the
+tuning phase. Example:
+<TT><PRE>
+-16.0        threshold
+</PRE></TT>
+ 
+<HR NOSHADE>
+The remaning lines  allow  to specifies algorithmic features.
+xhpl  will  run  all  possible combinations of those for each
+problem  size,  block size, process grid combination. This is
+handy  when one looks for an "optimal" set of parameters.  To
+understand  a little bit better,  let  say  first a few words
+about  the algorithm implemented in HPL. Basically this is  a
+right-looking  version  with  row-partial pivoting. The panel
+factorization is matrix-matrix operation based and recursive,
+dividing the panel into  NDIV  subpanels  at each step.  This
+part  of  the   panel   factorization  is  denoted  below  by
+"recursive  panel  fact.  (RFACT)".  The recursion stops when
+the  current panel  is made of less  than or equal  to  NBMIN
+columns. At that point, xhpl uses a  matrix-vector  operation
+based  factorization  denoted   below  by  "PFACTs".  Classic
+recursion  would  then  use  NDIV=2,   NBMIN=1.   There   are
+essentially   3   numerically  equivalent  LU   factorization 
+algorithm  variants  (left-looking, Crout and right-looking).
+In HPL, one can choose  every one of those for the  RFACT, as
+well as the PFACT.  The following lines of HPL.dat allows you
+to set those parameters.<BR><BR>
+<STRONG>Lines 14-21: (Example 1)</STRONG>
+<TT><PRE>
+3       # of panel fact
+0 1 2   PFACTs (0=left, 1=Crout, 2=Right)
+4       # of recursive stopping criterium
+1 2 4 8 NBMINs (>= 1)
+3       # of panels in recursion
+2 3 4   NDIVs
+3       # of recursive panel fact.
+0 1 2   RFACTs (0=left, 1=Crout, 2=Right)
+</PRE></TT>
+ 
+This  example  would  try all variants of PFACT, 4 values for
+NBMIN,  namely 1, 2, 4 and 8,  3 values for NDIV namely 2,  3 
+and 4, and all variants for RFACT.<BR><BR>
+<STRONG>Lines 14-21: (Example 2)</STRONG>
+<TT><PRE>
+2       # of panel fact
+2 0     PFACTs (0=left, 1=Crout, 2=Right)
+2       # of recursive stopping criterium
+4 8     NBMINs (>= 1)
+1       # of panels in recursion
+2       NDIVs
+1       # of recursive panel fact.
+2       RFACTs (0=left, 1=Crout, 2=Right)
+</PRE></TT>
+This example  would  try  2  variants  of  PFACT namely right
+looking and left looking, 2 values for NBMIN, namely 4 and 8,
+1 value for NDIV namely 2, and one variant for RFACT.<BR><BR>
+ 
+<HR NOSHADE>
+In the  main loop  of the algorithm,  the  current  panel  of
+column  is broadcast  in process rows  using  a virtual  ring
+topology. HPL offers various choices and one most likely want
+to use the increasing ring modified encoded as 1. 3 and 4 are
+also good choices.<BR><BR>
+<STRONG>Lines 22-23: (Example 1)</STRONG>
+<TT><PRE>
+1       # of broadcast
+1       BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+</PRE></TT>
+This will cause HPL  to broadcast the current panel using the
+increasing ring modified topology.<BR><BR>
+<STRONG>Lines 22-23: (Example 2)</STRONG>
+<TT><PRE>
+2       # of broadcast
+0 4     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+</PRE></TT>
+This will cause  HPL to broadcast the current panel using the
+increasing   ring  virtual  topology  and  the  long  message
+algorithm.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Lines 24-25</STRONG> allow to specify  the look-ahead
+depth used by HPL.  A depth of 0  means  that  the next panel
+is  factorized  after  the  update  by  the  current panel is
+completely finished.   A  depth of  1  means  that  the  next
+panel  is  immediately  factorized  after being updated.  The 
+update  by  the  current panel is then finished. A depth of k
+means that the k next panels are factorized immediately after
+being updated.  The  update  by  the  current  panel  is then 
+finished.  It  turns out that a depth of 1  seems to give the
+best results,  but  may need a large problem size  before one
+can  see  the performance  gain. So use 1, if you do not know
+better,  otherwise  you  may want  to  try 0.  Look-ahead  of
+depths 3  and  larger  will  probably  not  give  you  better
+results.<BR><BR>
+<STRONG>Lines 24-25: (Example 1):</STRONG>
+<TT><PRE>
+1       # of lookahead depth
+1       DEPTHs (>=0)
+</PRE></TT>
+This will cause HPL to use a look-ahead of depth 1.<BR><BR>
+<STRONG>Lines 24-25: (Example 2):</STRONG>
+<TT><PRE>
+2       # of lookahead depth
+0 1     DEPTHs (>=0)
+</PRE></TT>
+This will cause HPL to use a look-ahead of depths 0 and 1.<BR><BR>
+
+<HR NOSHADE>
+<STRONG>Lines 26-27</STRONG>  allow  to  specify  the  swapping
+algorithm  used  by  HPL for  all tests.  There  are  currently
+two  swapping  algorithms   available,  one  based  on  "binary
+exchange"  and  the   other  one   based  on   a  "spread-roll"
+procedure  (also  called   "long"  below).  For  large  problem
+sizes, this last one is likely to be more efficient.  The  user
+can also choose to mix both variants, that is "binary-exchange"
+for a number of columns less  than a threshold value,  and then
+the  "spread-roll" algorithm.  This  threshold  value  is  then 
+specified on Line 27.<BR><BR>
+<STRONG>Lines 26-27: (Example 1):</STRONG>
+<TT><PRE>
+1       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold
+</PRE></TT>
+This  will  cause  HPL  to  use  the "long" or  "spread-roll" 
+swapping algorithm.  Note  that a threshold  is specified  in
+that example but not used by HPL.<BR><BR>
+<STRONG>Lines 26-27: (Example 2):</STRONG>
+<TT><PRE>
+2       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold
+</PRE></TT>
+This  will  cause  HPL  to  use  the "long" or  "spread-roll" 
+swapping algorithm  as  soon as there is more than 60 columns
+in the row panel. Otherwise, the "binary-exchange"  algorithm
+will be used instead.<BR><BR>
+
+<HR NOSHADE>
+<STRONG>Line 28</STRONG>  allows  to specify whether the upper
+triangle  of  the  panel  of  columns  should   be  stored  in
+no-transposed  or transposed form. Example:
+<TT><PRE>
+0            L1 in (0=transposed,1=no-transposed) form
+</PRE></TT>
+
+<HR NOSHADE>
+<STRONG>Line 29</STRONG> allows  to specify whether the panel 
+of rows  U  should be stored in  no-transposed  or transposed 
+form. Example:
+<TT><PRE>
+0            U  in (0=transposed,1=no-transposed) form
+</PRE></TT>
+
+<HR NOSHADE>
+<STRONG>Line 30</STRONG> enables / disables the equilibration 
+phase. This option  will not be used unless you selected 1 or
+2 in Line 26. Example:
+<TT><PRE>
+1            Equilibration (0=no,1=yes)
+</PRE></TT>
+
+<HR NOSHADE>
+<STRONG>Line 31</STRONG> allows  to  specify the alignment in
+memory for the memory  space  allocated  by  HPL.  On  modern
+machines, one probably wants to use  4,  8  or 16.  This  may 
+result in a tiny amount of memory wasted. Example:
+<TT><PRE>
+8       memory alignment in double (> 0)
+</PRE></TT>
+
+<HR NOSHADE>
+<H3><A NAME="tips">Guide Lines</A></H3>
+
+<OL>
+<LI>Figure  out  a  good block size  for  the matrix multiply
+routine.  The best method  is to try a few out. If you happen
+to know  the block size  used  by the matrix-matrix  multiply
+routine,  a  small  multiple of that block size will do fine.
+This particular topic is discussed in the
+<A HREF="faqs.html#blsize">FAQs</A> section.<BR><BR>
+
+<LI>The process mapping  should  not matter  if  the nodes of
+your platform are single processor computers.  If these nodes
+are multi-processors, a row-major mapping is recommended.<BR><BR>
+
+<LI>HPL likes "square" or slightly flat process grids. Unless
+you  are using  a very small process grid, stay away from the 
+1-by-Q and P-by-1 process grids. This particular topic is also
+discussed in the <A HREF="faqs.html#grid">FAQs</A> section.<BR><BR>
+
+<LI>Panel factorization  parameters:  a  good  start  are the
+following for the lines 14-21:
+<TT><PRE>
+1       # of panel fact
+1       PFACTs (0=left, 1=Crout, 2=Right)
+2       # of recursive stopping criterium
+4 8     NBMINs (>= 1)
+1       # of panels in recursion
+2       NDIVs
+1       # of recursive panel fact.
+2       RFACTs (0=left, 1=Crout, 2=Right)
+</PRE></TT>
+
+<LI>Broadcast parameters: at this time it is far from obvious
+to me what the best setting is,  so i would probably try them
+all.  If  I  had  to guess  I would probably  start  with the 
+following for the lines 22-23:
+<TT><PRE>
+2       # of broadcast
+1 3     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+</PRE></TT>
+The best broadcast  depends  on your problem size and harware
+performance. My take is that 4 or 5  may be  competitive  for
+machines  featuring  very  fast nodes  comparatively  to  the 
+network.<BR><BR>
+
+<LI>Look-ahead depth: as mentioned above 0 or 1 are likely to 
+be the best choices.  This also  depends  on the problem size
+and machine configuration, so I would try "no look-ahead (0)"
+and "look-ahead of depth 1 (1)". That is for lines 24-25:
+<TT><PRE>
+2       # of lookahead depth
+0 1     DEPTHs (>=0)
+</PRE></TT>
+
+<LI>Swapping: one  can select only one of the three algorithm 
+in the input file. Theoretically, mix (2) should win, however
+long (1) might just be good enough. The  difference should be
+small between those two assuming  a swapping threshold of the 
+order of the block size (NB) selected. If  this  threshold is
+very large, HPL will use bin_exch (0) most of the time and if
+it  is  very  small  (< NB) long (1)  will always be used. In 
+short  and  assuming  the  block size (NB)  used is say 60, I 
+would choose for the lines 26-27:
+<TT><PRE>
+2       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold 
+</PRE></TT>
+I would also try the long variant.  For  a very  small number 
+of processes  in every column of the process grid  (say < 4),
+very little performance difference should be observable.<BR><BR>
+
+<LI>Local storage: I do not think Line 28 matters.  Pick 0 in
+doubt. Line 29 is more important.  It controls  how the panel
+of rows should be stored. No doubt 0 is better. The caveat is
+that in that case the matrix-multiply function is called with
+( Notrans, Trans, ... ), that is C := C - A B^T.   Unless the 
+computational  kernel  you are using  has  a very poor  (with
+respect to performance) implementation of that case,  and  is
+much more efficient with  ( Notrans, Notrans, ... ) just pick
+0 as well.  So, my choice:
+<TT><PRE>
+0       L1 in (0=transposed,1=no-transposed) form
+0       U  in (0=transposed,1=no-transposed) form
+</PRE></TT>
+
+<LI>Equilibration: It  is hard to tell  whether equilibration
+should always be performed or not. Not knowing much about the
+random matrix generated  and because the overhead is so small
+compared to the possible gain, I turn it on all the time.
+<TT><PRE>
+1       Equilibration (0=no,1=yes)
+</PRE></TT>
+
+<LI>For alignment, 4 should be plenty,  but just to be safe,
+one may want to pick 8 instead.
+<TT><PRE>
+8       memory alignment in double (> 0)
+</PRE></TT>
+</OL>
+ 
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_cpu.dat b/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_cpu.dat
new file mode 100644
index 000000000..a015f8ba5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_cpu.dat
@@ -0,0 +1,32 @@
+HPLinpack benchmark input file
+Innovative Computing Laboratory, University of Tennessee
+HPL.out      output file name (if any)
+6            device out (6=stdout,7=stderr,file)
+1            # of problems sizes (N)
+24576 12288 Ns
+1             # of NBs
+3072 1024 2048 384 640 768 896 960 1024 1152 1280 384 640 960 768 640 256  960 512 768 1152         NBs
+0            PMAP process mapping (0=Row-,1=Column-major)
+1            # of process grids (P x Q)
+1            Ps
+1            Qs
+16.0         threshold
+1            # of panel fact
+0 1 2        PFACTs (0=left, 1=Crout, 2=Right)
+1            # of recursive stopping criterium
+2 8          NBMINs (>= 1)
+1            # of panels in recursion
+2            NDIVs
+1            # of recursive panel fact.
+0 1 2        RFACTs (0=left, 1=Crout, 2=Right)
+1            # of broadcast
+0 2          BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+1            # of lookahead depth
+1 0          DEPTHs (>=0)
+1            SWAP (0=bin-exch,1=long,2=mix)
+192          swapping threshold
+1            L1 in (0=transposed,1=no-transposed) form
+1            U  in (0=transposed,1=no-transposed) form
+1            Equilibration (0=no,1=yes)
+8            memory alignment in double (> 0)
+
diff --git a/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_gpu.dat b/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_gpu.dat
new file mode 100644
index 000000000..19a956783
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_gpu.dat
@@ -0,0 +1,32 @@
+HPLinpack benchmark input file
+Innovative Computing Laboratory, University of Tennessee
+HPL.out      output file name (if any)
+6            device out (6=stdout,7=stderr,file)
+2            # of problems sizes (N)
+24576 24576 12288 Ns
+1             # of NBs
+2048 1024 2048 384 640 768 896 960 1024 1152 1280 384 640 960 768 640 256  960 512 768 1152         NBs
+0            PMAP process mapping (0=Row-,1=Column-major)
+1            # of process grids (P x Q)
+1            Ps
+1            Qs
+16.0         threshold
+1            # of panel fact
+0 1 2        PFACTs (0=left, 1=Crout, 2=Right)
+1            # of recursive stopping criterium
+2 8          NBMINs (>= 1)
+1            # of panels in recursion
+2            NDIVs
+1            # of recursive panel fact.
+0 1 2        RFACTs (0=left, 1=Crout, 2=Right)
+1            # of broadcast
+0 2          BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+1            # of lookahead depth
+1 0          DEPTHs (>=0)
+1            SWAP (0=bin-exch,1=long,2=mix)
+192          swapping threshold
+1            L1 in (0=transposed,1=no-transposed) form
+1            U  in (0=transposed,1=no-transposed) form
+1            Equilibration (0=no,1=yes)
+8            memory alignment in double (> 0)
+
diff --git a/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_gpu_2_tile.dat b/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_gpu_2_tile.dat
new file mode 100644
index 000000000..f84b54155
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_gpu_2_tile.dat
@@ -0,0 +1,32 @@
+HPLinpack benchmark input file
+Innovative Computing Laboratory, University of Tennessee
+HPL.out      output file name (if any)
+6            device out (6=stdout,7=stderr,file)
+1            # of problems sizes (N)
+24576 12288 Ns
+2             # of NBs
+2048 2048 1024 2048 384 640 768 896 960 1024 1152 1280 384 640 960 768 640 256  960 512 768 1152         NBs
+0            PMAP process mapping (0=Row-,1=Column-major)
+1            # of process grids (P x Q)
+1            Ps
+2            Qs
+16.0         threshold
+1            # of panel fact
+0 1 2        PFACTs (0=left, 1=Crout, 2=Right)
+1            # of recursive stopping criterium
+2 8          NBMINs (>= 1)
+1            # of panels in recursion
+2            NDIVs
+1            # of recursive panel fact.
+0 1 2        RFACTs (0=left, 1=Crout, 2=Right)
+1            # of broadcast
+0 2          BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+1            # of lookahead depth
+1 0          DEPTHs (>=0)
+1            SWAP (0=bin-exch,1=long,2=mix)
+192          swapping threshold
+1            L1 in (0=transposed,1=no-transposed) form
+1            U  in (0=transposed,1=no-transposed) form
+1            Equilibration (0=no,1=yes)
+8            memory alignment in double (> 0)
+
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/AUTHORS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/AUTHORS
new file mode 100644
index 000000000..b08e25180
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/AUTHORS
@@ -0,0 +1,6 @@
+Antoine Petitet
+Clint Whaley rcwhaley@lsu.edu
+Jack Dongarra dongarra@icl.utk.edu
+Andy Cleary
+Piotr Luszczek luszczek@icl.utk.edu
+Julien Langou Julien.Langou@ucdenver.edu
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/BUGS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/BUGS
new file mode 100644
index 000000000..08d694014
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/BUGS
@@ -0,0 +1,9 @@
+==============================================================
+ List of the known problems with the HPL software
+
+ Current as of release HPL - 2.3 - December 2, 2018
+==============================================================
+
+==============================================================
+ 
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/COPYING b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/COPYING
new file mode 100644
index 000000000..08465d618
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/COPYING
@@ -0,0 +1,45 @@
+======================================================================
+ -- High Performance Computing Linpack Benchmark (HPL)                
+    HPL - 2.3 - December 2, 2018                        
+    Antoine P. Petitet                                                
+    University of Tennessee, Knoxville                                
+    Innovative Computing Laboratory                                 
+    (C) Copyright 2000-2008 All Rights Reserved                       
+                                                                      
+ -- Copyright notice and Licensing terms:                             
+                                                                      
+ Redistribution  and  use in  source and binary forms, with or without
+ modification, are  permitted provided  that the following  conditions
+ are met:                                                             
+                                                                      
+ 1. Redistributions  of  source  code  must retain the above copyright
+ notice, this list of conditions and the following disclaimer.        
+                                                                      
+ 2. Redistributions in binary form must reproduce  the above copyright
+ notice, this list of conditions,  and the following disclaimer in the
+ documentation and/or other materials provided with the distribution. 
+                                                                      
+ 3. All  advertising  materials  mentioning  features  or  use of this
+ software must display the following acknowledgement:                 
+ This  product  includes  software  developed  at  the  University  of
+ Tennessee, Knoxville, Innovative Computing Laboratory.             
+                                                                      
+ 4. The name of the  University,  the name of the  Laboratory,  or the
+ names  of  its  contributors  may  not  be used to endorse or promote
+ products  derived   from   this  software  without  specific  written
+ permission.                                                          
+                                                                      
+ -- Disclaimer:                                                       
+                                                                      
+ THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+======================================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/COPYRIGHT b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/COPYRIGHT
new file mode 100644
index 000000000..08465d618
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/COPYRIGHT
@@ -0,0 +1,45 @@
+======================================================================
+ -- High Performance Computing Linpack Benchmark (HPL)                
+    HPL - 2.3 - December 2, 2018                        
+    Antoine P. Petitet                                                
+    University of Tennessee, Knoxville                                
+    Innovative Computing Laboratory                                 
+    (C) Copyright 2000-2008 All Rights Reserved                       
+                                                                      
+ -- Copyright notice and Licensing terms:                             
+                                                                      
+ Redistribution  and  use in  source and binary forms, with or without
+ modification, are  permitted provided  that the following  conditions
+ are met:                                                             
+                                                                      
+ 1. Redistributions  of  source  code  must retain the above copyright
+ notice, this list of conditions and the following disclaimer.        
+                                                                      
+ 2. Redistributions in binary form must reproduce  the above copyright
+ notice, this list of conditions,  and the following disclaimer in the
+ documentation and/or other materials provided with the distribution. 
+                                                                      
+ 3. All  advertising  materials  mentioning  features  or  use of this
+ software must display the following acknowledgement:                 
+ This  product  includes  software  developed  at  the  University  of
+ Tennessee, Knoxville, Innovative Computing Laboratory.             
+                                                                      
+ 4. The name of the  University,  the name of the  Laboratory,  or the
+ names  of  its  contributors  may  not  be used to endorse or promote
+ products  derived   from   this  software  without  specific  written
+ permission.                                                          
+                                                                      
+ -- Disclaimer:                                                       
+                                                                      
+ THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+======================================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/ChangeLog b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/ChangeLog
new file mode 100644
index 000000000..1c2b36778
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/ChangeLog
@@ -0,0 +1,16 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ Done list in version 1.0b, December 15th, 2004
+ - Fixed problem with 32-bit integer overflow.
+   Thanks to John Baron.
+
+ Done list in version 1.0a, January 1st, 2004
+ - Added Row- or Column-major process mapping in data file
+ - Fixed compilation error for gcc 3.3 in walltime.
+ - Fixed building problems on the T3E;
+   Thanks to Edward Anderson.
+
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/HISTORY b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/HISTORY
new file mode 100644
index 000000000..d6d59ee45
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/HISTORY
@@ -0,0 +1,103 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ History
+
+ - 09/09/00 Public release of Version 1.0
+
+ - 09/27/00 A couple of mistakes in the  VSIPL  port have been
+ corrected.  The tar file as well as the web site were updated
+ on September 27th, 2000.  Note  that  these problems were not
+ affecting the BLAS version of the software in any way.
+
+ - 01/01/04 Version 1.0a
+ The  MPI  process grid  numbering  scheme  is now an run-time
+ option.
+ The inlined assembly  timer  routine that caused the compila-
+ tion to fail when using  gcc  version 3.3  and above has been
+ removed from the package.
+ Various building problems on the T3E have been fixed;  Thanks
+ to Edward Anderson.
+
+ - 15/12/04 Version 1.0b
+ Weakness of the pseudo-random matrix generator found for pro-
+ blem sizes being power of twos and larger  than 2^15;  Thanks
+ to Gregory Bauer. This problem has not been fixed. It is thus
+ currently recommended to  HPL  users willing to test matrices
+ of size larger than 2^15 to not use power twos.
+
+ When the matrix size is such that one needs  > 16 GB  per MPI
+ rank,  the  intermediate  calculation  (mat.ld+1) * mat.nq in
+ HPL_pdtest.c  ends up  overflowing  because  it is done using
+ 32-bit arithmetic.  This issue has been fixed by  typecasting
+ to size_t; Thanks to John Baron.
+
+ - 09/10/08 Version 2.0
+
+ Piotr Luszczek changed to 64-bit RNG, modified files:
+ -- [M] include/hpl_matgen.h
+ -- [M] testing/matgen/HPL_ladd.c
+ -- [M] testing/matgen/HPL_lmul.c
+ -- [M] testing/matgen/HPL_rand.c
+ -- [M] testing/ptest/HPL_pdinfo.c
+
+ For a motivation for the change, see:
+    Dongarra and Langou, ``The Problem with the Linpack
+    Benchmark Matrix Generator'', LAWN 206, June 2008.
+
+ -- [M] testing/ptest/HPL_pdtest.c  --
+
+ Julien Langou changed the test for correctness from
+      ||Ax-b||_oo / ( eps * ||A||_1  * N            )
+      ||Ax-b||_oo / ( eps * ||A||_1  * ||x||_1      )
+      ||Ax-b||_oo / ( eps * ||A||_oo * ||x||_oo * N )
+ to the normwise backward error
+      || r ||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )
+ See:
+  Nicholas J. Higham, ``Accuracy and Stability of Numerical Algorithms'',
+  Society for Industrial and Applied Mathematics, Philadelphia, PA, USA,
+  Second Edition, pages = xxx+680, ISBN = 0-89871-521-0, 2002.
+
+ Note that in our case || b ||_oo is almost for sure
+ 1/2, we compute it anyway.
+
+ - 10/26/2012 Version 2.1
+
+ Piotr Luszczek introduced exact time stamping for HPL_pdgesv():
+ -- [M] dist/include/hpl_misc.h
+ -- [M] dist/testing/ptest/HPL_pdtest.c
+
+ Piotr Luszczek fixed out-of-bounds access in data spreading functions
+ and exact time stamping for HPL_pdgesv():
+ -- [M] dist/src/pgesv/HPL_spreadN.c
+ -- [M] dist/src/pgesv/HPL_spreadT.c
+ Thanks to Stephen Whalen from Cray.
+
+ - 02/24/2016 Version 2.2
+
+ Piotr Luszczek added continuous reporting of factorization progress
+ submitted by Intel and make scripts that uses Intel software tools and
+ libraries and their Apple's Mac OS X equivalents.
+
+ - 12/02/2018 Version 2.3
+
+ Piotr Luszczek removed deprecated MPI functions that are no longer
+ supported in some MPI implementations (for example Open MPI 4.0) and
+ replaced them with
+ modern equivalents in HPL_packL():
+ -- [M] src/comm/HPL_packL.c
+
+ Piotr Luszczek added one digit to the display of performance result
+ and changed display of scaled residual to scientific notation with
+ extra digits in HPL_pdtest():
+ -- [M] testing/ptest/HPL_pdtest.c
+
+ Piotr Luszczek added support for Autotools configuration packages
+ autoconf and automake:
+ -- [A] Makefile.am
+ -- [A] configure.ac
+ -- [A] acinclude.m4
+ -- [A] src/Makefile.am
+ -- [A] testing/Makefile.am
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/INSTALL b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/INSTALL
new file mode 100644
index 000000000..fec266c49
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/INSTALL
@@ -0,0 +1,81 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ 1) Retrieve the tar file, then
+
+    gunzip hpl.tgz; tar -xvf hpl.tar
+
+ this  will create an  hpl  directory,  that we call below the
+ top-level directory.
+
+ 2) Create a file Make.<arch> in the  top-level directory. For
+ this purpose,  you  may  want  to re-use one contained in the
+ setup directory. This file essentially contains the compilers
+ and librairies with their paths to be used.
+
+ 3) Type "make arch=<arch>". This  should create an executable
+ in the bin/<arch> directory called xhpl.
+
+ For example, on our Linux PII cluster, I create a file called
+ Make.Linux_PII in the top-level directory. Then, I type
+    "make arch=Linux_PII" 
+ This creates the executable file bin/Linux_PII/xhpl.
+
+ 4) Quick check: run a few tests:
+
+    cd bin/<arch>
+    mpirun -np 4 xhpl
+
+ 5) Tuning: Most of the performance  parameters can be tuned,
+ by modifying the input file bin/HPL.dat. See the file TUNING
+ in the top-level directory.
+
+==============================================================
+
+ Compile time options:  At the end of the "model" Make.<arch>,
+ ---------------------  the  user  is given the opportunity to
+ compile the software with some specific compile options.  The
+ list of this options and their meaning are:
+
+    -DHPL_COPY_L
+       force the copy of the panel L before bcast;
+
+    -DHPL_CALL_CBLAS
+       call the cblas interface;
+
+    -DHPL_CALL_VSIPL
+       call the vsip  library;
+
+    -DHPL_DETAILED_TIMING
+       enables detail timers;
+
+ The  user  must  choose  between  either  the BLAS Fortran 77
+ interface,  or the  BLAS  C  interface,  or the VSIPL library
+ depending on which computational kernels are available on his
+ system. Only one of these options should be selected.  If you
+ choose the BLAS Fortran 77 interface, it is necessary to fill
+ out the machine-specific C to Fortran 77 interface section of
+ the  Make.<arch>  file.  To  do this,  please  refer  to  the 
+ Make.<arch> examples contained in the setup directory.
+
+ By default HPL will:
+    *) not copy L before broadcast,
+    *) call the BLAS Fortran 77 interface,
+    *) not display detailed timing information.
+
+ As an example,  suppose  one wants  HPL  to copy the panel of
+ columns  into  a  contiguous buffer  before broadcasting.  In
+ theory,  it  would be more efficient to let  HPL  create  the
+ appropriate  MPI  user-defined data type since this may avoid 
+ the data copy. So, it is a strange idea, but one insists.  To
+ achieve this one would add -DHPL_COPY_L  to the definition of
+ HPL_OPTS  at the end of the file  Make.<arch>.  Issue  then a
+ "make clean arch=<arch>; make build arch=<arch>" and the xhpl
+ executable will be re-build with that feature in.
+==============================================================
+ 
+ Check out  the website  www.netlib.org/benchmark/hpl  for the
+ latest information.
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Make.intel64 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Make.intel64
new file mode 100644
index 000000000..2b55e694f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Make.intel64
@@ -0,0 +1,244 @@
+ # -- High Performance Computing Linpack Benchmark (HPL)                
+ #    Modifications Copyright (C) 2023 Intel Corporation​
+ #                                                                      
+ # -- Copyright notice and Licensing terms:                             
+ #                                                                      
+ # Redistribution  and  use in  source and binary forms, with or without
+ # modification, are  permitted provided  that the following  conditions
+ # are met:                                                             
+ #                                                                      
+ # 1. Redistributions  of  source  code  must retain the above copyright
+ # notice, this list of conditions and the following disclaimer.        
+ #                                                                      
+ # 2. Redistributions in binary form must reproduce  the above copyright
+ # notice, this list of conditions,  and the following disclaimer in the
+ # documentation and/or other materials provided with the distribution. 
+ #                                                                      
+ # 3. All  advertising  materials  mentioning  features  or  use of this
+ # software must display the following acknowledgement:                 
+ # This  product  includes  software  developed  at  the  University  of
+ # Tennessee, Knoxville, Innovative Computing Laboratory.             
+ #                                                                      
+ # 4. The name of the  University,  the name of the  Laboratory,  or the
+ # names  of  its  contributors  may  not  be used to endorse or promote
+ # products  derived   from   this  software  without  specific  written
+ # permission.                                                          
+ #                                                                      
+ # -- Disclaimer:                                                       
+ #                                                                      
+ # THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ # OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ # SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ # DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ # ---------------------------------------------------------------------
+ #
+ #SPDX-License-Identifier: BSD-4-Clause
+
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -fs
+MKDIR        = mkdir -p
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = intel64 
+export  ARCH = intel64
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+# Set TOPdir to the location of where this is being built
+TOPdir       = $(CURDIR)
+INCdir       = $(TOPdir)/include
+BINdir        =$(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a
+
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+OneAPIdir    = $(ONEAPI_ROOT)
+MPdir        = $(OneAPIdir)/mpi/latest/
+MPinc        = -I$(MPdir)/include/
+MPlib        = -lmpi #$(MPdir)/lib/release/libmpi.so
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(OneAPIdir)/mkl/latest/lib/intel64/
+LAinc        = -I$(OneAPIdir)/mkl/latest/include/intel64/
+LAlib 	     = -L$(TOPdir)/src/dpcpp/ -ldgemm  -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lpthread -liomp5 -lm -I$(TOPdir)/src/dpcpp/
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) #$(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+# -DASYOUGO              enable timing information as you go (nonintrusive)
+# -DASYOUGO2             slightly intrusive timing information
+# -DASYOUGO2_DISPLAY     display detailed DGEMM information
+# -DENDEARLY             end the problem early  
+# -DFASTSWAP             insert to use DLASWP instead of HPL code
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpicc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+ifeq ($(USE_NVIDIA_BACKEND),ON)
+LINKER       = $(CC)
+LINKFLAGS = $(CCFLAGS)
+else ifeq ($(USE_AMD_BACKEND),ON)
+LINKER       = $(CC)
+LINKFLAGS = $(CCFLAGS)
+else
+LINKER       = mpiicpc -cxx=icpx -fsycl
+LINKFLAGS    = $(CCFLAGS) -lmkl_sycl -lmkl_core -lmkl_cdft_core -lmkl_gf_ilp64 -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_intel_ilp64 -lmkl_intel_lp64 -lmkl_rt -lmkl_sequential -lmkl_tbb_thread
+endif
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
+MAKE = make VERBOSE=1 arch=$(ARCH) TOPdir=$(TOPdir)
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Make.top b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Make.top
new file mode 100644
index 000000000..c9980518c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Make.top
@@ -0,0 +1,238 @@
+ # -- High Performance Computing Linpack Benchmark (HPL)                
+ #    Modifications Copyright (C) 2023 Intel Corporation​
+ #                                                                      
+ # -- Copyright notice and Licensing terms:                             
+ #                                                                      
+ # Redistribution  and  use in  source and binary forms, with or without
+ # modification, are  permitted provided  that the following  conditions
+ # are met:                                                             
+ #                                                                      
+ # 1. Redistributions  of  source  code  must retain the above copyright
+ # notice, this list of conditions and the following disclaimer.        
+ #                                                                      
+ # 2. Redistributions in binary form must reproduce  the above copyright
+ # notice, this list of conditions,  and the following disclaimer in the
+ # documentation and/or other materials provided with the distribution. 
+ #                                                                      
+ # 3. All  advertising  materials  mentioning  features  or  use of this
+ # software must display the following acknowledgement:                 
+ # This  product  includes  software  developed  at  the  University  of
+ # Tennessee, Knoxville, Innovative Computing Laboratory.             
+ #                                                                      
+ # 4. The name of the  University,  the name of the  Laboratory,  or the
+ # names  of  its  contributors  may  not  be used to endorse or promote
+ # products  derived   from   this  software  without  specific  written
+ # permission.                                                          
+ #                                                                      
+ # -- Disclaimer:                                                       
+ #                                                                      
+ # THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ # OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ # SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ # DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ # ---------------------------------------------------------------------
+ #
+ #SPDX-License-Identifier: BSD-4-Clause
+
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+arch             = UNKNOWN
+#
+include Make.$(arch)
+#
+## build ###############################################################
+#
+build_src        :
+	( $(CD) src/auxil/$(arch);         $(MAKE) )
+	( $(CD) src/blas/$(arch);          $(MAKE) )
+	( $(CD) src/comm/$(arch);          $(MAKE) )
+	( $(CD) src/grid/$(arch);          $(MAKE) )
+	( $(CD) src/panel/$(arch);         $(MAKE) )
+	( $(CD) src/pauxil/$(arch);        $(MAKE) )
+	( $(CD) src/pfact/$(arch);         $(MAKE) )
+	( $(CD) src/pgesv/$(arch);         $(MAKE) )
+	( $(CD) src/dpcpp/;                $(MAKE) )
+#
+build_tst        :
+	( $(CD) testing/matgen/$(arch);    $(MAKE) )
+	( $(CD) testing/timer/$(arch);     $(MAKE) )
+	( $(CD) testing/pmatgen/$(arch);   $(MAKE) )
+	( $(CD) testing/ptimer/$(arch);    $(MAKE) )
+	( $(CD) testing/ptest/$(arch);     $(MAKE) )
+#( SPMS_make_cd`' testing/test/$(arch);      SPMS_make_make`' )
+#
+## startup #############################################################
+#
+startup_dir      :
+	- $(MKDIR) include/$(arch)
+	- $(MKDIR) lib
+	- $(MKDIR) lib/$(arch)
+	- $(MKDIR) bin
+	- $(MKDIR) bin/$(arch)
+#
+startup_src      :
+	- $(MAKE) -f Make.top leaf le=src/auxil       arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/blas        arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/comm        arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/grid        arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/panel       arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/pauxil      arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/pfact       arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/pgesv       arch=$(arch)
+#
+startup_tst      :
+	- $(MAKE) -f Make.top leaf le=testing/matgen  arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=testing/timer   arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=testing/pmatgen arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=testing/ptimer  arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=testing/ptest   arch=$(arch)
+#- SPMS_make_make`' -f Make.top leaf le=testing/test    arch=$(arch)
+#
+## refresh #############################################################
+#
+refresh_src      :
+	- $(CP) makes/Make.auxil    src/auxil/$(arch)/Makefile
+	- $(CP) makes/Make.blas     src/blas/$(arch)/Makefile
+	- $(CP) makes/Make.comm     src/comm/$(arch)/Makefile
+	- $(CP) makes/Make.grid     src/grid/$(arch)/Makefile
+	- $(CP) makes/Make.panel    src/panel/$(arch)/Makefile
+	- $(CP) makes/Make.pauxil   src/pauxil/$(arch)/Makefile
+	- $(CP) makes/Make.pfact    src/pfact/$(arch)/Makefile
+	- $(CP) makes/Make.pgesv    src/pgesv/$(arch)/Makefile
+#
+refresh_tst      :
+	- $(CP) makes/Make.matgen   testing/matgen/$(arch)/Makefile
+	- $(CP) makes/Make.timer    testing/timer/$(arch)/Makefile
+	- $(CP) makes/Make.pmatgen  testing/pmatgen/$(arch)/Makefile
+	- $(CP) makes/Make.ptimer   testing/ptimer/$(arch)/Makefile
+	- $(CP) makes/Make.ptest    testing/ptest/$(arch)/Makefile
+#- SPMS_make_cp`' makes/Make.test     testing/test/$(arch)/Makefile
+#
+## clean ###############################################################
+#
+clean_src        :
+	- ( $(CD) src/auxil/$(arch);        $(MAKE) clean )
+	- ( $(CD) src/blas/$(arch);         $(MAKE) clean )
+	- ( $(CD) src/comm/$(arch);         $(MAKE) clean )
+	- ( $(CD) src/grid/$(arch);         $(MAKE) clean )
+	- ( $(CD) src/panel/$(arch);        $(MAKE) clean )
+	- ( $(CD) src/pauxil/$(arch);       $(MAKE) clean )
+	- ( $(CD) src/pfact/$(arch);        $(MAKE) clean )
+	- ( $(CD) src/pgesv/$(arch);        $(MAKE) clean )
+	- ( $(CD) src/dpcpp/;               $(MAKE) clean)
+#
+clean_tst        :
+	- ( $(CD) testing/matgen/$(arch);   $(MAKE) clean )
+	- ( $(CD) testing/timer/$(arch);    $(MAKE) clean )
+	- ( $(CD) testing/pmatgen/$(arch);  $(MAKE) clean )
+	- ( $(CD) testing/ptimer/$(arch);   $(MAKE) clean )
+	- ( $(CD) testing/ptest/$(arch);    $(MAKE) clean )
+#- ( SPMS_make_cd`' testing/test/$(arch);     SPMS_make_make`' clean )
+#
+## clean_arch ##########################################################
+#
+clean_arch_src   :
+	- $(RM) -r src/auxil/$(arch)
+	- $(RM) -r src/blas/$(arch)
+	- $(RM) -r src/comm/$(arch)
+	- $(RM) -r src/grid/$(arch)
+	- $(RM) -r src/panel/$(arch)
+	- $(RM) -r src/pauxil/$(arch)
+	- $(RM) -r src/pfact/$(arch)
+	- $(RM) -r src/pgesv/$(arch)
+	- ( $(CD) src/dpcpp;         $(MAKE) clean)
+#
+clean_arch_tst   :
+	- $(RM) -r testing/matgen/$(arch)
+	- $(RM) -r testing/timer/$(arch)
+	- $(RM) -r testing/pmatgen/$(arch)
+	- $(RM) -r testing/ptimer/$(arch)
+	- $(RM) -r testing/ptest/$(arch)
+#- SPMS_make_rm`' -r testing/test/$(arch)
+#
+## clean_arch_all ######################################################
+#
+clean_arch_all   :
+	- $(MAKE) -f Make.top clean_arch_src arch=$(arch)
+	- $(MAKE) -f Make.top clean_arch_tst arch=$(arch)
+	- $(RM) -r bin/$(arch) include/$(arch) lib/$(arch)
+#
+## clean_guard #########################################################
+#
+clean_guard_src  :
+	- ( $(CD) src/auxil/$(arch);       $(RM) *.grd )
+	- ( $(CD) src/blas/$(arch);        $(RM) *.grd )
+	- ( $(CD) src/comm/$(arch);        $(RM) *.grd )
+	- ( $(CD) src/grid/$(arch);        $(RM) *.grd )
+	- ( $(CD) src/panel/$(arch);       $(RM) *.grd )
+	- ( $(CD) src/pauxil/$(arch);      $(RM) *.grd )
+	- ( $(CD) src/pfact/$(arch);       $(RM) *.grd )
+	- ( $(CD) src/pgesv/$(arch);       $(RM) *.grd )
+#
+clean_guard_tst  :
+	- ( $(CD) testing/matgen/$(arch);  $(RM) *.grd )
+	- ( $(CD) testing/timer/$(arch);   $(RM) *.grd )
+	- ( $(CD) testing/pmatgen/$(arch); $(RM) *.grd )
+	- ( $(CD) testing/ptimer/$(arch);  $(RM) *.grd )
+	- ( $(CD) testing/ptest/$(arch);   $(RM) *.grd )
+#- ( SPMS_make_cd`' testing/test/$(arch);    SPMS_make_rm`' *.grd )
+#
+## misc ################################################################
+#
+leaf             :
+	- ( $(CD) $(le) ; $(MKDIR) $(arch) )
+	- ( $(CD) $(le)/$(arch) ; \
+            $(LN_S) $(TOPdir)/Make.$(arch) Make.inc )
+#
+########################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile
new file mode 100644
index 000000000..7ab3d9c54
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile
@@ -0,0 +1,134 @@
+ # -- High Performance Computing Linpack Benchmark (HPL)                
+ #    Modifications Copyright (C) 2023 Intel Corporation​
+ #                                                                      
+ # -- Copyright notice and Licensing terms:                             
+ #                                                                      
+ # Redistribution  and  use in  source and binary forms, with or without
+ # modification, are  permitted provided  that the following  conditions
+ # are met:                                                             
+ #                                                                      
+ # 1. Redistributions  of  source  code  must retain the above copyright
+ # notice, this list of conditions and the following disclaimer.        
+ #                                                                      
+ # 2. Redistributions in binary form must reproduce  the above copyright
+ # notice, this list of conditions,  and the following disclaimer in the
+ # documentation and/or other materials provided with the distribution. 
+ #                                                                      
+ # 3. All  advertising  materials  mentioning  features  or  use of this
+ # software must display the following acknowledgement:                 
+ # This  product  includes  software  developed  at  the  University  of
+ # Tennessee, Knoxville, Innovative Computing Laboratory.             
+ #                                                                      
+ # 4. The name of the  University,  the name of the  Laboratory,  or the
+ # names  of  its  contributors  may  not  be used to endorse or promote
+ # products  derived   from   this  software  without  specific  written
+ # permission.                                                          
+ #                                                                      
+ # -- Disclaimer:                                                       
+ #                                                                      
+ # THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ # OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ # SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ # DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ # ---------------------------------------------------------------------
+ #
+ #SPDX-License-Identifier: BSD-4-Clause
+
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+#
+SHELL            = /bin/sh
+#
+arch             = intel64
+make             = 'make VERBOSE=1'
+#
+## Targets #############################################################
+#
+all              : install
+#
+# ######################################################################
+#
+install          : startup refresh build
+#
+startup          :
+	$(MAKE) -f Make.top startup_dir     arch=$(arch)
+	$(MAKE) -f Make.top startup_src     arch=$(arch)
+	$(MAKE) -f Make.top startup_tst     arch=$(arch)
+	$(MAKE) -f Make.top refresh_src     arch=$(arch)
+	$(MAKE) -f Make.top refresh_tst     arch=$(arch)
+#
+refresh          :
+	$(MAKE) -f Make.top refresh_src     arch=$(arch)
+	$(MAKE) -f Make.top refresh_tst     arch=$(arch)
+#
+build            :
+	$(MAKE) -f Make.top build_src       arch=$(arch)
+	$(MAKE) -f Make.top build_tst       arch=$(arch)
+#
+clean            :
+	$(MAKE) -f Make.top clean_src       arch=$(arch)
+	$(MAKE) -f Make.top clean_tst       arch=$(arch)
+#
+clean_arch       :
+	$(MAKE) -f Make.top clean_arch_src  arch=$(arch)
+	$(MAKE) -f Make.top clean_arch_tst  arch=$(arch)
+#
+clean_arch_all   :
+	$(MAKE) -f Make.top clean_arch_all  arch=$(arch)
+#
+clean_guard      :
+	$(MAKE) -f Make.top clean_guard_src arch=$(arch)
+	$(MAKE) -f Make.top clean_guard_tst arch=$(arch)
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile.am
new file mode 100644
index 000000000..1ad8c1b17
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src testing
+
+AM_CPPFLAGS = -I$(top_srcdir)/include
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile.in
new file mode 100644
index 000000000..76f0e2dd6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile.in
@@ -0,0 +1,772 @@
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+subdir = .
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+	$(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \
+	$(am__configure_deps) $(am__DIST_COMMON)
+am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
+ configure.lineno config.status.lineno
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/include/hplconfig.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+SOURCES =
+DIST_SOURCES =
+RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
+	ctags-recursive dvi-recursive html-recursive info-recursive \
+	install-data-recursive install-dvi-recursive \
+	install-exec-recursive install-html-recursive \
+	install-info-recursive install-pdf-recursive \
+	install-ps-recursive install-recursive installcheck-recursive \
+	installdirs-recursive pdf-recursive ps-recursive \
+	tags-recursive uninstall-recursive
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
+  distclean-recursive maintainer-clean-recursive
+am__recursive_targets = \
+  $(RECURSIVE_TARGETS) \
+  $(RECURSIVE_CLEAN_TARGETS) \
+  $(am__extra_recursive_targets)
+AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
+	cscope distdir distdir-am dist dist-all distcheck
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+CSCOPE = cscope
+DIST_SUBDIRS = $(SUBDIRS)
+am__DIST_COMMON = $(srcdir)/Makefile.in \
+	$(top_srcdir)/include/hplconfig.h.in AUTHORS COPYING ChangeLog \
+	INSTALL NEWS README THANKS TODO compile config.guess \
+	config.sub depcomp install-sh missing
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+distdir = $(PACKAGE)-$(VERSION)
+top_distdir = $(distdir)
+am__remove_distdir = \
+  if test -d "$(distdir)"; then \
+    find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
+      && rm -rf "$(distdir)" \
+      || { sleep 5 && rm -rf "$(distdir)"; }; \
+  else :; fi
+am__post_remove_distdir = $(am__remove_distdir)
+am__relativize = \
+  dir0=`pwd`; \
+  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+  sed_rest='s,^[^/]*/*,,'; \
+  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+  sed_butlast='s,/*[^/]*$$,,'; \
+  while test -n "$$dir1"; do \
+    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+    if test "$$first" != "."; then \
+      if test "$$first" = ".."; then \
+        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+      else \
+        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+        if test "$$first2" = "$$first"; then \
+          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+        else \
+          dir2="../$$dir2"; \
+        fi; \
+        dir0="$$dir0"/"$$first"; \
+      fi; \
+    fi; \
+    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+  done; \
+  reldir="$$dir2"
+DIST_ARCHIVES = $(distdir).tar.gz
+GZIP_ENV = --best
+DIST_TARGETS = dist-gzip
+distuninstallcheck_listfiles = find . -type f -print
+am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
+  | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
+distcleancheck_listfiles = find . -type f -print
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BLAS_LIBS = @BLAS_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build_alias = @build_alias@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host_alias = @host_alias@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUBDIRS = src testing
+AM_CPPFLAGS = -I$(top_srcdir)/include
+all: all-recursive
+
+.SUFFIXES:
+am--refresh: Makefile
+	@:
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      echo ' cd $(srcdir) && $(AUTOMAKE) --gnu'; \
+	      $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    echo ' $(SHELL) ./config.status'; \
+	    $(SHELL) ./config.status;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	$(SHELL) ./config.status --recheck
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	$(am__cd) $(srcdir) && $(AUTOCONF)
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	$(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
+$(am__aclocal_m4_deps):
+
+include/hplconfig.h: include/stamp-h1
+	@test -f $@ || rm -f include/stamp-h1
+	@test -f $@ || $(MAKE) $(AM_MAKEFLAGS) include/stamp-h1
+
+include/stamp-h1: $(top_srcdir)/include/hplconfig.h.in $(top_builddir)/config.status
+	@rm -f include/stamp-h1
+	cd $(top_builddir) && $(SHELL) ./config.status include/hplconfig.h
+$(top_srcdir)/include/hplconfig.h.in:  $(am__configure_deps) 
+	($(am__cd) $(top_srcdir) && $(AUTOHEADER))
+	rm -f include/stamp-h1
+	touch $@
+
+distclean-hdr:
+	-rm -f include/hplconfig.h include/stamp-h1
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run 'make' without going through this Makefile.
+# To change the values of 'make' variables: instead of editing Makefiles,
+# (1) if the variable is set in 'config.status', edit 'config.status'
+#     (which will cause the Makefiles to be regenerated when you run 'make');
+# (2) otherwise, pass the desired values on the 'make' command line.
+$(am__recursive_targets):
+	@fail=; \
+	if $(am__make_keepgoing); then \
+	  failcom='fail=yes'; \
+	else \
+	  failcom='exit 1'; \
+	fi; \
+	dot_seen=no; \
+	target=`echo $@ | sed s/-recursive//`; \
+	case "$@" in \
+	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+	  *) list='$(SUBDIRS)' ;; \
+	esac; \
+	for subdir in $$list; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    dot_seen=yes; \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done; \
+	if test "$$dot_seen" = "no"; then \
+	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+	fi; test -z "$$fail"
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-recursive
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+	  include_option=--etags-include; \
+	  empty_fix=.; \
+	else \
+	  include_option=--include; \
+	  empty_fix=; \
+	fi; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test ! -f $$subdir/TAGS || \
+	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+	  fi; \
+	done; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-recursive
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscope: cscope.files
+	test ! -s cscope.files \
+	  || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS)
+clean-cscope:
+	-rm -f cscope.files
+cscope.files: clean-cscope cscopelist
+cscopelist: cscopelist-recursive
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+	-rm -f cscope.out cscope.in.out cscope.po.out cscope.files
+
+distdir: $(BUILT_SOURCES)
+	$(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+	$(am__remove_distdir)
+	test -d "$(distdir)" || mkdir "$(distdir)"
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    $(am__make_dryrun) \
+	      || test -d "$(distdir)/$$subdir" \
+	      || $(MKDIR_P) "$(distdir)/$$subdir" \
+	      || exit 1; \
+	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+	    $(am__relativize); \
+	    new_distdir=$$reldir; \
+	    dir1=$$subdir; dir2="$(top_distdir)"; \
+	    $(am__relativize); \
+	    new_top_distdir=$$reldir; \
+	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+	    ($(am__cd) $$subdir && \
+	      $(MAKE) $(AM_MAKEFLAGS) \
+	        top_distdir="$$new_top_distdir" \
+	        distdir="$$new_distdir" \
+		am__remove_distdir=: \
+		am__skip_length_check=: \
+		am__skip_mode_fix=: \
+	        distdir) \
+	      || exit 1; \
+	  fi; \
+	done
+	-test -n "$(am__skip_mode_fix)" \
+	|| find "$(distdir)" -type d ! -perm -755 \
+		-exec chmod u+rwx,go+rx {} \; -o \
+	  ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
+	  ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
+	  ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
+	|| chmod -R a+r "$(distdir)"
+dist-gzip: distdir
+	tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz
+	$(am__post_remove_distdir)
+
+dist-bzip2: distdir
+	tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
+	$(am__post_remove_distdir)
+
+dist-lzip: distdir
+	tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
+	$(am__post_remove_distdir)
+
+dist-xz: distdir
+	tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
+	$(am__post_remove_distdir)
+
+dist-tarZ: distdir
+	@echo WARNING: "Support for distribution archives compressed with" \
+		       "legacy program 'compress' is deprecated." >&2
+	@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
+	tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
+	$(am__post_remove_distdir)
+
+dist-shar: distdir
+	@echo WARNING: "Support for shar distribution archives is" \
+	               "deprecated." >&2
+	@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
+	shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz
+	$(am__post_remove_distdir)
+
+dist-zip: distdir
+	-rm -f $(distdir).zip
+	zip -rq $(distdir).zip $(distdir)
+	$(am__post_remove_distdir)
+
+dist dist-all:
+	$(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:'
+	$(am__post_remove_distdir)
+
+# This target untars the dist file and tries a VPATH configuration.  Then
+# it guarantees that the distribution is self-contained by making another
+# tarfile.
+distcheck: dist
+	case '$(DIST_ARCHIVES)' in \
+	*.tar.gz*) \
+	  eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\
+	*.tar.bz2*) \
+	  bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
+	*.tar.lz*) \
+	  lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
+	*.tar.xz*) \
+	  xz -dc $(distdir).tar.xz | $(am__untar) ;;\
+	*.tar.Z*) \
+	  uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
+	*.shar.gz*) \
+	  eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\
+	*.zip*) \
+	  unzip $(distdir).zip ;;\
+	esac
+	chmod -R a-w $(distdir)
+	chmod u+w $(distdir)
+	mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst
+	chmod a-w $(distdir)
+	test -d $(distdir)/_build || exit 0; \
+	dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
+	  && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
+	  && am__cwd=`pwd` \
+	  && $(am__cd) $(distdir)/_build/sub \
+	  && ../../configure \
+	    $(AM_DISTCHECK_CONFIGURE_FLAGS) \
+	    $(DISTCHECK_CONFIGURE_FLAGS) \
+	    --srcdir=../.. --prefix="$$dc_install_base" \
+	  && $(MAKE) $(AM_MAKEFLAGS) \
+	  && $(MAKE) $(AM_MAKEFLAGS) dvi \
+	  && $(MAKE) $(AM_MAKEFLAGS) check \
+	  && $(MAKE) $(AM_MAKEFLAGS) install \
+	  && $(MAKE) $(AM_MAKEFLAGS) installcheck \
+	  && $(MAKE) $(AM_MAKEFLAGS) uninstall \
+	  && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
+	        distuninstallcheck \
+	  && chmod -R a-w "$$dc_install_base" \
+	  && ({ \
+	       (cd ../.. && umask 077 && mkdir "$$dc_destdir") \
+	       && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
+	       && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
+	       && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
+	            distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
+	      } || { rm -rf "$$dc_destdir"; exit 1; }) \
+	  && rm -rf "$$dc_destdir" \
+	  && $(MAKE) $(AM_MAKEFLAGS) dist \
+	  && rm -rf $(DIST_ARCHIVES) \
+	  && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \
+	  && cd "$$am__cwd" \
+	  || exit 1
+	$(am__post_remove_distdir)
+	@(echo "$(distdir) archives ready for distribution: "; \
+	  list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
+	  sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
+distuninstallcheck:
+	@test -n '$(distuninstallcheck_dir)' || { \
+	  echo 'ERROR: trying to run $@ with an empty' \
+	       '$$(distuninstallcheck_dir)' >&2; \
+	  exit 1; \
+	}; \
+	$(am__cd) '$(distuninstallcheck_dir)' || { \
+	  echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
+	  exit 1; \
+	}; \
+	test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
+	   || { echo "ERROR: files left after uninstall:" ; \
+	        if test -n "$(DESTDIR)"; then \
+	          echo "  (check DESTDIR support)"; \
+	        fi ; \
+	        $(distuninstallcheck_listfiles) ; \
+	        exit 1; } >&2
+distcleancheck: distclean
+	@if test '$(srcdir)' = . ; then \
+	  echo "ERROR: distcleancheck can only run from a VPATH build" ; \
+	  exit 1 ; \
+	fi
+	@test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
+	  || { echo "ERROR: files left in build directory after distclean:" ; \
+	       $(distcleancheck_listfiles) ; \
+	       exit 1; } >&2
+check-am: all-am
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-generic mostlyclean-am
+
+distclean: distclean-recursive
+	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-hdr distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
+	-rm -rf $(top_srcdir)/autom4te.cache
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-generic
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(am__recursive_targets) install-am install-strip
+
+.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \
+	am--refresh check check-am clean clean-cscope clean-generic \
+	cscope cscopelist-am ctags ctags-am dist dist-all dist-bzip2 \
+	dist-gzip dist-lzip dist-shar dist-tarZ dist-xz dist-zip \
+	distcheck distclean distclean-generic distclean-hdr \
+	distclean-tags distcleancheck distdir distuninstallcheck dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs installdirs-am \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \
+	uninstall-am
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/NEWS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/NEWS
new file mode 100644
index 000000000..d6d59ee45
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/NEWS
@@ -0,0 +1,103 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ History
+
+ - 09/09/00 Public release of Version 1.0
+
+ - 09/27/00 A couple of mistakes in the  VSIPL  port have been
+ corrected.  The tar file as well as the web site were updated
+ on September 27th, 2000.  Note  that  these problems were not
+ affecting the BLAS version of the software in any way.
+
+ - 01/01/04 Version 1.0a
+ The  MPI  process grid  numbering  scheme  is now an run-time
+ option.
+ The inlined assembly  timer  routine that caused the compila-
+ tion to fail when using  gcc  version 3.3  and above has been
+ removed from the package.
+ Various building problems on the T3E have been fixed;  Thanks
+ to Edward Anderson.
+
+ - 15/12/04 Version 1.0b
+ Weakness of the pseudo-random matrix generator found for pro-
+ blem sizes being power of twos and larger  than 2^15;  Thanks
+ to Gregory Bauer. This problem has not been fixed. It is thus
+ currently recommended to  HPL  users willing to test matrices
+ of size larger than 2^15 to not use power twos.
+
+ When the matrix size is such that one needs  > 16 GB  per MPI
+ rank,  the  intermediate  calculation  (mat.ld+1) * mat.nq in
+ HPL_pdtest.c  ends up  overflowing  because  it is done using
+ 32-bit arithmetic.  This issue has been fixed by  typecasting
+ to size_t; Thanks to John Baron.
+
+ - 09/10/08 Version 2.0
+
+ Piotr Luszczek changed to 64-bit RNG, modified files:
+ -- [M] include/hpl_matgen.h
+ -- [M] testing/matgen/HPL_ladd.c
+ -- [M] testing/matgen/HPL_lmul.c
+ -- [M] testing/matgen/HPL_rand.c
+ -- [M] testing/ptest/HPL_pdinfo.c
+
+ For a motivation for the change, see:
+    Dongarra and Langou, ``The Problem with the Linpack
+    Benchmark Matrix Generator'', LAWN 206, June 2008.
+
+ -- [M] testing/ptest/HPL_pdtest.c  --
+
+ Julien Langou changed the test for correctness from
+      ||Ax-b||_oo / ( eps * ||A||_1  * N            )
+      ||Ax-b||_oo / ( eps * ||A||_1  * ||x||_1      )
+      ||Ax-b||_oo / ( eps * ||A||_oo * ||x||_oo * N )
+ to the normwise backward error
+      || r ||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )
+ See:
+  Nicholas J. Higham, ``Accuracy and Stability of Numerical Algorithms'',
+  Society for Industrial and Applied Mathematics, Philadelphia, PA, USA,
+  Second Edition, pages = xxx+680, ISBN = 0-89871-521-0, 2002.
+
+ Note that in our case || b ||_oo is almost for sure
+ 1/2, we compute it anyway.
+
+ - 10/26/2012 Version 2.1
+
+ Piotr Luszczek introduced exact time stamping for HPL_pdgesv():
+ -- [M] dist/include/hpl_misc.h
+ -- [M] dist/testing/ptest/HPL_pdtest.c
+
+ Piotr Luszczek fixed out-of-bounds access in data spreading functions
+ and exact time stamping for HPL_pdgesv():
+ -- [M] dist/src/pgesv/HPL_spreadN.c
+ -- [M] dist/src/pgesv/HPL_spreadT.c
+ Thanks to Stephen Whalen from Cray.
+
+ - 02/24/2016 Version 2.2
+
+ Piotr Luszczek added continuous reporting of factorization progress
+ submitted by Intel and make scripts that uses Intel software tools and
+ libraries and their Apple's Mac OS X equivalents.
+
+ - 12/02/2018 Version 2.3
+
+ Piotr Luszczek removed deprecated MPI functions that are no longer
+ supported in some MPI implementations (for example Open MPI 4.0) and
+ replaced them with
+ modern equivalents in HPL_packL():
+ -- [M] src/comm/HPL_packL.c
+
+ Piotr Luszczek added one digit to the display of performance result
+ and changed display of scaled residual to scientific notation with
+ extra digits in HPL_pdtest():
+ -- [M] testing/ptest/HPL_pdtest.c
+
+ Piotr Luszczek added support for Autotools configuration packages
+ autoconf and automake:
+ -- [A] Makefile.am
+ -- [A] configure.ac
+ -- [A] acinclude.m4
+ -- [A] src/Makefile.am
+ -- [A] testing/Makefile.am
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/README b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/README
new file mode 100644
index 000000000..c3f79a877
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/README
@@ -0,0 +1,32 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ HPL is a software package that solves a (random) dense linear
+ system  in   double  precision  (64   bits)   arithmetic   on 
+ distributed-memory  computers.   It can thus be regarded as a
+ portable as well as  freely  available implementation  of the
+ High Performance Computing Linpack Benchmark.
+
+ The  HPL  software  package requires the availibility on your
+ system of an implementation of the  Message Passing Interface
+ MPI  (1.1 compliant).  An  implementation of either the Basic
+ Linear Algebra Subprograms  BLAS  or the  Vector Signal Image
+ Processing Library VSIPL is also needed.  Machine-specific as
+ well as generic implementations of MPI, the  BLAS  and  VSIPL
+ are available for a large variety of systems.
+
+ Install See the file INSTALL in this directory.
+ -------
+
+ Tuning  See the file TUNING in this directory.
+ ------
+
+ Bugs  Known  problems and bugs with this release are documen-
+ ----  ted in the file hpl/BUGS.
+
+ Check out  the website  www.netlib.org/benchmark/hpl  for the
+ latest information.
+
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/THANKS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/THANKS
new file mode 100644
index 000000000..1c5641ce4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/THANKS
@@ -0,0 +1 @@
+This software was improved with contribution of external developers.
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/TODO b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/TODO
new file mode 100644
index 000000000..1c2b36778
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/TODO
@@ -0,0 +1,16 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ Done list in version 1.0b, December 15th, 2004
+ - Fixed problem with 32-bit integer overflow.
+   Thanks to John Baron.
+
+ Done list in version 1.0a, January 1st, 2004
+ - Added Row- or Column-major process mapping in data file
+ - Fixed compilation error for gcc 3.3 in walltime.
+ - Fixed building problems on the T3E;
+   Thanks to Edward Anderson.
+
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/TUNING b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/TUNING
new file mode 100644
index 000000000..24707f1fc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/TUNING
@@ -0,0 +1,419 @@
+==============================================================
+ Performance Tuning and setting up the input data file HPL.dat
+ 
+ Current as of release HPL - 2.3 - December 2, 2018
+==============================================================
+ Check out  the website  www.netlib.org/benchmark/hpl  for the
+ latest information.
+
+ After  having  built  the executable hpl/bin/<arch>/xhpl, one
+ may want to modify the input  data  file  HPL.dat.  This file
+ should  reside  in  the  same  directory  as  the  executable 
+ hpl/bin/<arch>/xhpl.  An example  HPL.dat file is provided by
+ default.  This  file  contains  information about the problem
+ sizes,  machine configuration,  and  algorithm features to be
+ used by the executable. It is 30 lines long. All the selected
+ parameters  will  be  printed  in the output generated by the
+ executable.
+
+ At the end of this file,  there  is a couple of  experimental
+ guide lines that you may find useful.
+
+==============================================================
+ File HPL.dat (description):
+
+ Line 1: (unused) Typically  one  would  use this line for its 
+ own good. For example, it could be used to summarize the con-
+ tent of the input file. By default this line reads:
+ 
+ HPL Linpack benchmark input file
+ 
+ Line 2: (unused) same as line 1. By default this line reads:
+ 
+ Innovative Computing Laboratory, University of Tennessee
+ 
+ Line 3: the  user  can  choose where the output should be re-
+ directed to.  In the case of a file, a name is necessary, and
+ this  is  the  line  where one wants to specify it.  Only the
+ first name on this line is significative. By default, the li-
+ ne reads:
+ 
+ HPL.out  output file name (if any)
+ 
+ This  means  that if  one chooses to redirect the output to a
+ file, the file will be called "HPL.out". The rest of the line
+ is unused,  and this space to put some informative comment on
+ the meaning of this line.
+ 
+ Line 4: This line specifies  where the  output should go. The
+ line is formatted, it must be a positive integer, the rest is
+ unsignificant.  3 choices are possible for the positive inte-
+ ger,  6 means that the output will go  the standard output, 7
+ means  that the  output will go to the standard error. Any o-
+ ther  integer  means  that  the  output  should be redirected
+ to a file,  which  name has been specified in the line above.
+ This line by default reads:
+ 
+ 6        device out (6=stdout,7=stderr,file)
+ 
+ which  means  that  the  output generated  by  the executable
+ should be redirected to the standard output.
+ 
+ Line 5: This line specifies the number of problem sizes to be
+ executed. This number should be less than or equal to 20. The
+ first  integer  is  significant,  the rest is ignored. If the 
+ line reads:
+ 
+ 3        # of problems sizes (N)
+ 
+ this  means  that  the user is willing to run 3 problem sizes
+ that will be specified in the next line.
+ 
+ Line 6:  This  line  specifies the problem sizes one wants to 
+ run.  Assuming  the  line  above  started with 3, the 3 first
+ positive  integers  are significant, the rest is ignored. For
+ example:
+ 
+ 3000 6000 10000    Ns
+ 
+ means that one wants xhpl to run 3 (specified in line 5) pro-
+ blem sizes, namely 3000, 6000 and 10000.
+ 
+ Line 7: This line  specifies  the number of block sizes to be
+ runned. This number  should  be  less  than  or equal to  20.
+ The first integer is significant, the rest is ignored. If the
+ line reads:
+ 
+ 5        # of NBs
+ 
+ this means that the user is willing to use 5 block sizes that
+ will be specified in the next line.
+ 
+ Line 8: This line specifies the block sizes one wants to run.
+ Assuming  the line above started with 5, the 5 first positive
+ integers are significant, the rest is ignored. For example:
+ 
+ 80 100 120 140 160 NBs
+ 
+ means  that  one  wants  xhpl  to use 5 (specified in line 7)
+ block sizes, namely 80, 100, 120, 140 and 160.
+
+ Line 9 specifies how the  MPI processes should be mapped onto
+ the nodes of your platform.  There are currently two possible
+ mappings, namely row- and column-major. This feature is main-
+ ly  useful  when these nodes  are  themselves multi-processor
+ computers. A row-major mapping is recommended.
+ 
+ Line 10: This line specifies  the  number  of process grid to
+ be runned.  This  number  should be less than or equal to 20.
+ The first integer is significant, the rest is ignored. If the
+ line reads:
+
+ 2        # of process grids (P x Q)
+ 
+ this  means  that you are willing to try 2 process grid sizes 
+ that will be specified in the next line.
+ 
+ Line 11-12: These  two  lines specify  the  number of process
+ rows  and  columns of each grid you want to run on.  Assuming
+ the line above (10) started with 2,  the 2 first positive in-
+ tegers of those two lines are significant,  the rest is igno-
+ red. For example:
+ 
+ 1 2          Ps
+ 6 8          Qs
+ 
+ means  that one wants to run  xhpl  on  2 process grids (line
+ 10), namely 1 by 6 and 2 by 8.  Note:  In this example, it is
+ required then to start xhpl on at least 16 nodes  (max of P_i
+ xQ_i). The runs on the two grids will be consecutive.  If one
+ was starting xhpl on more than 16 nodes, say 52, only 6 would
+ be used for the first grid  (1x6) and then 16  (2x8) would be
+ used for the second grid.  The fact  that you started the MPI 
+ job on 52 nodes,  will not make HPL use all of them.  In this 
+ example, only 16 would be used. If one wants to run xhpl with
+ 52 processes one needs to specify a grid of 52 processes, for
+ example the following lines would do the job:
+ 
+ 4  2         Ps
+ 13 8         Qs
+ 
+ Line 13: This  line  specifies  the  threshold  the residuals
+ should be compared to.  The  residuals  should be or order 1,
+ but are in practice slightly less than this, typically 0.001.
+ This  line  is  made of a real number, the rest is unsignifi-
+ cant. For example:
+ 
+ 16.0         threshold
+
+ In practice,  a value of 16.0 will cover most cases.  For va-
+ rious reasons,  it is possible that some of the residuals be-
+ come slightly larger, say for example 35.6.  xhpl  will  flag
+ those runs as failed,  however they can be considered as cor-
+ rect.  A run can be considered as failed if the residual is a
+ few order of magnitude  bigger than 1 for example 10^6 or mo-
+ re. Note: if one was to specify a threshold of 0.0, all tests
+ would be flagged  as failed, even though the answer is likely
+ to be correct.  It is allowed to specify a negative value for
+ this threshold,  in  which case the checks will be by-passed,
+ no matter what the value is, as soon as it is negative.  This
+ feature  allows to save time when performing a lot of experi-
+ ments, say for instance during the tuning phase. Example:
+ 
+ -16.0        threshold
+ 
+ The remaning lines  allow  to specifies algorithmic features.
+ xhpl  will  run  all  possible combinations of those for each
+ problem  size,  block size, process grid combination. This is
+ handy  when one looks for an "optimal" set of parameters.  To
+ understand  a little bit better,  let  say  first a few words
+ about  the algorithm implemented in HPL. Basically this is  a
+ right-looking  version  with  row-partial pivoting. The panel
+ factorization is matrix-matrix operation based and recursive,
+ dividing the panel into  NDIV  subpanels  at each step.  This
+ part  of  the  panel   factorization   is  denoted  below  by
+ "recursive panel fact. (RFACT)". The recursion stops when the
+ current panel is made of less than or equal to NBMIN columns.
+ At  that  point,  xhpl  uses  a matrix-vector operation based
+ factorization denoted below by  "PFACTs".  Classic  recursion
+ would then use  NDIV=2,  NBMIN=1.  There  are  essentially  3
+ numerically  equivalent  LU  factorization algorithm variants
+ (left-looking, Crout  and  right-looking).  In  HPL,  one can 
+ choose  every one  of those  for the  RFACT,  as well as  the
+ PFACT. The following lines of HPL.dat allows you to set those
+ parameters.
+ 
+ Lines 14-21: (Example 1)
+ 3       # of panel fact
+ 0 1 2   PFACTs (0=left, 1=Crout, 2=Right)
+ 4       # of recursive stopping criterium
+ 1 2 4 8 NBMINs (>= 1)
+ 3       # of panels in recursion
+ 2 3 4   NDIVs
+ 3       # of recursive panel fact.
+ 0 1 2   RFACTs (0=left, 1=Crout, 2=Right)
+ 
+ This  example  would  try all variants of PFACT, 4 values for
+ NBMIN,  namely 1, 2, 4 and 8,  3 values for NDIV namely 2,  3 
+ and 4, and all variants for RFACT.  Lines 14-21: (Example 1)
+
+ 2       # of panel fact
+ 2 0     PFACTs (0=left, 1=Crout, 2=Right)
+ 2       # of recursive stopping criterium
+ 4 8     NBMINs (>= 1)
+ 1       # of panels in recursion
+ 2       NDIVs
+ 1       # of recursive panel fact.
+ 2       RFACTs (0=left, 1=Crout, 2=Right)
+ 
+ This example would try  2 variants of PFACT namely right loo-
+ king and left looking, 2 values for NBMIN, namely 4 and 8,  1
+ value for NDIV namely 2, and one variant for RFACT.
+ 
+ In the  main loop of the algorithm,  the current panel of co-
+ lumn is broadcast in process rows  using  a virtual  ring to-
+ pology. HPL offers various choices, and one most  likely want
+ to use the increasing ring modified encoded as 1.  4  is also
+ a good choice. Lines 22-23: (Example 1):
+
+ 1       # of broadcast
+ 1       BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+ 
+ This will cause HPL  to broadcast the current panel using the
+ increasing ring modified topology. Lines 22-23: (Example 2):
+ 
+ 2       # of broadcast
+ 0 4     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+ 
+ This will cause  HPL to broadcast the current panel using the
+ increasing ring virtual topology and the long message algori-
+ thm.
+ 
+ Lines 24-25  allow  to  specify  the look-ahead depth used by
+ HPL. A depth of 0 means that the next panel is factorized af-
+ ter the update by the current panel is completely finished. A
+ depth of 1 means that the next panel is factorized immediate-
+ ly after being updated.  The  update by the current  panel is
+ then finished.  A depth of k means that the k next panels are
+ factorized immediately after being updated. The update by the
+ current  panel is then finished. It turns out that a depth of
+ 1  seems  to give the best results, but may need a large pro-
+ blem size  before one can see the performance gain. So use 1,
+ if you do not know better,  otherwise  you may want to try 0.
+ Look-ahead of depths 2  and larger will probably not give you
+ better results.  Lines 24-25: (Example 1):
+ 
+ 1       # of lookahead depth
+ 1       DEPTHs (>=0)
+ 
+ This will cause HPL to use a look-ahead of depth 1.
+ Lines 24-25: (Example 2):
+ 
+ 2       # of lookahead depth
+ 0 1     DEPTHs (>=0)
+ 
+ This will cause HPL to use a look-ahead of depths 0 and 1.
+
+ Lines 26-27  allow to specify  the swapping algorithm used by
+ HPL for all tests.  There  are  currently  two swapping algo-
+ rithms  available,  one  based  on "binary exchange"  and the
+ other one based on a  "spread-roll"  procedure  (also  called 
+ "long" below. For large problem sizes, this last one is like-
+ ly to be more efficient. The user can also choose to mix both
+ variants, that is "binary-exchange"  for  a number of columns
+ less  than a threshold value, and then the  "spread-roll" al-
+ gorithm.  This threshold  value is then specified on Line 27.
+ Lines 26-27: (Example 1):
+
+ 1       SWAP (0=bin-exch,1=long,2=mix)
+ 60      swapping threshold
+
+ This will cause HPL to use the "long" or  "spread-roll" swap-
+ ping algorithm.  Note  that a threshold  is specified in that
+ example but not used by HPL. Lines 26-27: (Example 2):
+
+ 2       SWAP (0=bin-exch,1=long,2=mix)
+ 60      swapping threshold
+
+ This will cause HPL to use the "long" or  "spread-roll" swap-
+ ping  algorithm  as  soon as there is more than 60 columns in
+ the row panel.  Otherwise,  the  "binary-exchange"  algorithm
+ will be used instead.
+
+ Line 28  allows  to specify whether the upper triangle of the
+ panel  of  columns  should  be  stored  in  no-transposed  or
+ transposed form. Example:
+
+ 0            L1 in (0=transposed,1=no-transposed) form
+
+ Line 29 allows to specify whether the panel of rows  U should
+ be stored in no-transposed or transposed form. Example:
+ 
+ 0            U  in (0=transposed,1=no-transposed) form
+
+ Line 30 enables/disables the equilibration phase. This option
+ will not be used unless you selected 1 or 2 in Line 26. Ex:
+
+ 1            Equilibration (0=no,1=yes)
+
+
+ Line 31  allows  to  specify  the alignment in memory for the
+ memory space allocated by HPL. On modern machines, one proba-
+ bly wants to use 4, 8 or 16. This may result in a tiny amount
+ of memory wasted. Example:
+ 
+ 4       memory alignment in double (> 0)
+
+==============================================================
+ Guide lines:
+
+ 1) Figure  out  a  good  block  size  for  the  matrix-matrix 
+ multiply routine. The best method is to try a few out. If you
+ happen  to know  the block size  used  by  the  matrix-matrix 
+ multiply routine, a small multiple of that block size will do
+ fine.
+
+ HPL  uses the block size NB for the data distribution as well
+ as  for   the  computational   granularity.   From   a   data 
+ distribution point of view,  the smallest  NB, the better the
+ load balance.  You  definitely  want  to stay away  from very
+ large values of NB.  From a computation point of view,  a too
+ small value of  NB may limit the computational performance by
+ a large factor because almost no data reuse will occur in the
+ highest level of the memory hierarchy. The number of messages
+ will also increase.  Efficient  matrix-multiply  routines are 
+ often internally blocked.  Small multiples  of  this blocking
+ factor are likely to be good block sizes for HPL.  The bottom
+ line  is  that  "good"  block sizes  are almost always in the
+ [32..256] interval. The best values depend on the computation
+ / communication performance ratio of your system.  To  a much
+ less  extent,  the problem size  matters  as  well.  Say  for
+ example,  you emperically found that 44 was a good block size
+ with respect to performance.  88 or 132  are likely  to  give
+ slightly better  results for large problem sizes because of a
+ slighlty higher flop rate.
+
+ 2)  The process mapping  should  not matter  if  the nodes of
+ your platform are single processor computers.  If these nodes
+ are multi-processors, a row-major mapping is recommended.
+
+ 3) HPL likes "square" or slightly flat process grids.  Unless
+ you  are using  a very small process grid, stay away from the 
+ 1-by-Q and P-by-1 process grids.
+
+ 4) Panel factorization parameters:  a good start are the fol-
+ lowing for the lines 14-21:
+
+ 1       # of panel fact
+ 1       PFACTs (0=left, 1=Crout, 2=Right)
+ 2       # of recursive stopping criterium
+ 4 8     NBMINs (>= 1)
+ 1       # of panels in recursion
+ 2       NDIVs
+ 1       # of recursive panel fact.
+ 2       RFACTs (0=left, 1=Crout, 2=Right)
+
+ 5) Broadcast parameters: at this time, it is far from obvious
+ to me what the best setting is,  so i would probably try them
+ all. If I had to guess I would probably start with the follo-
+ wing for the lines 22-23:
+ 
+ 2       # of broadcast
+ 1 3     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+
+ The best broadcast  depends  on your problem size and harware
+ performance. My take is that 4 or 5  may be  competitive  for
+ machines  featuring  very  fast nodes  comparatively  to  the 
+ network.
+
+ 6) Look-ahead depth: as mentioned above  0 or 1 are likely to 
+ be the best choices.  This also  depends  on the problem size
+ and machine configuration, so I would try "no look-ahead (0)"
+ and "look-ahead of depth 1 (1)". That is for lines 24-25:
+ 
+ 2       # of lookahead depth
+ 0 1     DEPTHs (>=0)
+
+ 7) Swapping:  one  can select only one of the three algorithm 
+ in the input file. Theoretically, mix (2) should win, however
+ long (1) might just be good enough. The  difference should be
+ small between those two assuming  a swapping threshold of the 
+ order of the block size (NB) selected. If  this  threshold is
+ very large, HPL will use bin_exch (0) most of the time and if
+ it  is  very  small  (< NB) long (1)  will always be used. In 
+ short  and  assuming  the  block size (NB)  used is say 60, I 
+ would choose for the lines 26-27:
+
+ 2       SWAP (0=bin-exch,1=long,2=mix)
+ 60      swapping threshold 
+
+ I would also try the long variant.  For  a very  small number 
+ of processes  in every column of the process grid  (say < 4),
+ very little performance difference should be observable.
+
+ 8) Local storage:  I do not think Line 28 matters.  Pick 0 in
+ doubt.  Line 29 is more important.  It controls how the panel
+ of rows should be stored. No doubt 0 is better. The caveat is
+ that in that case the matrix-multiply function is called with
+ ( Notrans, Trans, ... ), that is C := C - A B^T.  Unless  the
+ computational  kernel  you  are  using  has a very poor (with
+ respect to performance)  implementation  of that case, and is
+ much more efficient with  ( Notrans, Notrans, ... ) just pick
+ 0 as well. So, my choice: 
+
+ 0       L1 in (0=transposed,1=no-transposed) form
+ 0       U  in (0=transposed,1=no-transposed) form
+
+ 9) Equilibration:  It  is hard to tell  whether equilibration
+ should always be performed or not. Not knowing much about the
+ random matrix generated and because the overhead is so small
+ compared to the possible gain, I turn it on all the time.
+
+ 1       Equilibration (0=no,1=yes)
+
+ 10) For  alignment, 4 should be plenty,  but just to be safe,
+ one may want to pick 8 instead.
+
+ 8       memory alignment in double (> 0)
+ 
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/acinclude.m4 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/acinclude.m4
new file mode 100644
index 000000000..4072a950f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/acinclude.m4
@@ -0,0 +1,90 @@
+
+AC_DEFUN([HPL_BLAS], [
+
+AC_PREREQ(2.69)
+
+hpl_blas_ok=no
+
+dnl FIXME: add --with-blas="<library spec>"
+
+current_LIBS="$LIBS"
+
+cat <<HPLEOF > hplvars.txt
+name1=OpenBLAS
+rout1=dgemm_
+libs1=-lopenblas -lm
+
+name2=Atlas Fortran BLAS
+rout2=dgemm_
+libs2=-lf77blas -latlas
+
+name3=Sequential Intel MKL LP64 (group)
+rout3=dgemm_
+libs3=-Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread
+
+name4=Sequential Intel MKL LP64
+rout4=dgemm_
+libs4=-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
+
+name5=AMD's ACML
+rout5=dgemm_
+libs5=-lacml -lm
+
+name6=Accelerate
+rout6=dgemm_
+libs6=-framework Accelerate
+
+name7=Apple VecLib
+rout7=dgemm_
+libs7=-framework vecLib
+
+name8=IBM ESSL
+rout8=dgemm_
+libs8=-lessl
+
+name9=NVIDIA nvblas
+rout9=dgemm_
+libs9=-lnvblas
+
+name10=Generic BLAS
+rout10=dgemm_
+libs10=-lblas
+
+HPLEOF
+for hpl_i in 1 2 3 4 5 6 7 8 9 10;
+do
+if test  x$hpl_blas_ok = xno; then
+  name="`grep ^name${hpl_i}= hplvars.txt | sed s/^name${hpl_i}=//`"
+  rout="`grep ^rout${hpl_i}= hplvars.txt | sed s/^rout${hpl_i}=//`"
+  libs="`grep ^libs${hpl_i}= hplvars.txt | sed s/^libs${hpl_i}=//`"
+  AC_MSG_CHECKING([for [$]rout in [$]name])
+
+  LIBS="[$]libs"
+  AC_TRY_LINK_FUNC([$]rout, [hpl_blas_ok=yes;BLAS_LIBS="[$]libs"])
+  LIBS="$current_LIBS"
+
+  AC_MSG_RESULT($hpl_blas_ok)
+fi
+done
+rm hplvars.txt
+
+if test  x$hpl_blas_ok = xno; then
+dnl
+AC_MSG_CHECKING([for dgemm_ in OpenBLAS])
+AC_CHECK_LIB(openblas, dgemm_, [hpl_blas_ok=yes;BLAS_LIBS="-lopenblas"])
+AC_MSG_RESULT($hpl_blas_ok)
+dnl
+fi
+
+AC_SUBST(BLAS_LIBS)
+
+# If present, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$hpl_blas_ok" = xyes; then
+        ifelse([$1],,AC_DEFINE(HAVE_BLAS,1,[Define if you have a BLAS library.]),[$1])
+        :
+else
+        hpl_blas_ok=no
+        $2
+fi
+
+])dnl HPL_BLAS
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/aclocal.m4 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/aclocal.m4
new file mode 100644
index 000000000..56c6bd753
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/aclocal.m4
@@ -0,0 +1,1308 @@
+# generated automatically by aclocal 1.16.1 -*- Autoconf -*-
+
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])])
+m4_ifndef([AC_AUTOCONF_VERSION],
+  [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
+m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],,
+[m4_warning([this file was generated for autoconf 2.69.
+You have another version of autoconf.  It may work, but is not guaranteed to.
+If you have problems, you may need to regenerate the build system entirely.
+To do so, use the procedure documented by the package, typically 'autoreconf'.])])
+
+# ===========================================================================
+#      https://www.gnu.org/software/autoconf-archive/ax_prog_cc_mpi.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_PROG_CC_MPI([MPI-WANTED-TEST[, ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]])
+#
+# DESCRIPTION
+#
+#   This macro tries to find out how to compile C programs that use MPI
+#   (Message Passing Interface), a standard API for parallel process
+#   communication (see http://www-unix.mcs.anl.gov/mpi/). The macro has to
+#   be used instead of the standard macro AC_PROG_CC and will replace the
+#   standard variable CC with the found compiler.
+#
+#   MPI-WANTED-TEST is used to test whether MPI is actually wanted by the
+#   user. If MPI-WANTED_TEST is omitted or if it succeeds, the macro will
+#   try to find out how to use MPI, if it fails, the macro will call
+#   AC_PROG_CC to find a standard C compiler instead.
+#
+#   When MPI is found, ACTION-IF-FOUND will be executed, if MPI is not found
+#   (or MPI-WANTED-TEST fails) ACTION-IF-NOT-FOUND is executed. If
+#   ACTION-IF-FOUND is not set, the macro will define HAVE_MPI.
+#
+#   The following example demonstrates usage of the macro:
+#
+#     # If --with-mpi=auto is used, try to find MPI, but use standard C compiler if it is not found.
+#     # If --with-mpi=yes is used, try to find MPI and fail if it isn't found.
+#     # If --with-mpi=no is used, use a standard C compiler instead.
+#     AC_ARG_WITH(mpi, [AS_HELP_STRING([--with-mpi],
+#         [compile with MPI (parallelization) support. If none is found,
+#         MPI is not used. Default: auto])
+#     ],,[with_mpi=auto])
+#     #
+#     AX_PROG_CC_MPI([test x"$with_mpi" != xno],[use_mpi=yes],[
+#       use_mpi=no
+#       if test x"$with_mpi" = xyes; then
+#         AC_MSG_FAILURE([MPI compiler requested, but couldn't use MPI.])
+#       else
+#         AC_MSG_WARN([No MPI compiler found, won't use MPI.])
+#       fi
+#     ])
+#
+# LICENSE
+#
+#   Copyright (c) 2010,2011 Olaf Lenz <olenz@icp.uni-stuttgart.de>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 2
+
+AC_DEFUN([AX_PROG_CC_MPI], [
+AC_PREREQ(2.50)
+
+# Check for compiler
+# Needs to be split off into an extra macro to ensure right expansion
+# order.
+AC_REQUIRE([_AX_PROG_CC_MPI],[_AX_PROG_CC_MPI([$1])])
+
+AS_IF([test x"$_ax_prog_cc_mpi_mpi_wanted" = xno],
+  [ _ax_prog_cc_mpi_mpi_found=no ],
+  [
+    AC_LANG_PUSH([C])
+    # test whether MPI_Init is available
+    # We do not use AC_SEARCH_LIBS here, as it caches its outcome and
+    # thus disallows corresponding calls in the other AX_PROG_*_MPI
+    # macros.
+    for lib in NONE mpi mpich; do
+      save_LIBS=$LIBS
+      if test x"$lib" = xNONE; then
+        AC_MSG_CHECKING([for function MPI_Init])
+      else
+        AC_MSG_CHECKING([for function MPI_Init in -l$lib])
+        LIBS="-l$lib $LIBS"
+      fi
+      AC_LINK_IFELSE([AC_LANG_CALL([],[MPI_Init])],
+        [ _ax_prog_cc_mpi_mpi_found=yes ],
+        [ _ax_prog_cc_mpi_mpi_found=no ])
+      AC_MSG_RESULT($_ax_prog_cc_mpi_mpi_found)
+      if test "x$_ax_prog_cc_mpi_mpi_found" = "xyes"; then
+        break;
+      fi
+      LIBS=$save_LIBS
+    done
+
+    # Check for header
+    AS_IF([test x"$_ax_prog_cc_mpi_mpi_found" = xyes], [
+      AC_MSG_CHECKING([for mpi.h])
+      AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include <mpi.h>])],
+        [ AC_MSG_RESULT(yes)],
+        [ AC_MSG_RESULT(no)
+         _ax_prog_cc_mpi_mpi_found=no
+      ])
+    ])
+    AC_LANG_POP([C])
+])
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+AS_IF([test x"$_ax_prog_cc_mpi_mpi_found" = xyes], [
+        ifelse([$2],,[AC_DEFINE(HAVE_MPI,1,[Define if you have the MPI library.])],[$2])
+        :
+],[
+        $3
+        :
+])
+
+])dnl AX_PROG_CC_MPI
+
+dnl _AX_PROG_CC_MPI is an internal macro required by AX_PROG_CC_MPI.
+dnl To ensure the right expansion order, the main function AX_PROG_CC_MPI
+dnl has to be split into two parts.
+dnl
+dnl Known MPI C compilers:
+dnl  mpicc
+dnl  mpixlc_r
+dnl  mpixlc
+dnl  hcc
+dnl  mpxlc_r
+dnl  mpxlc
+dnl  sxmpicc  NEC SX
+dnl  mpifcc   Fujitsu
+dnl  mpgcc
+dnl  mpcc
+dnl  cmpicc
+dnl  cc
+dnl
+AC_DEFUN([_AX_PROG_CC_MPI], [
+  AC_ARG_VAR(MPICC,[MPI C compiler command])
+  ifelse([$1],,[_ax_prog_cc_mpi_mpi_wanted=yes],[
+    AC_MSG_CHECKING([whether to compile using MPI])
+    if $1; then
+      _ax_prog_cc_mpi_mpi_wanted=yes
+    else
+      _ax_prog_cc_mpi_mpi_wanted=no
+    fi
+    AC_MSG_RESULT($_ax_prog_cc_mpi_mpi_wanted)
+  ])
+  if test x"$_ax_prog_cc_mpi_mpi_wanted" = xyes; then
+    if test -z "$CC" && test -n "$MPICC"; then
+      CC="$MPICC"
+    else
+      AC_CHECK_TOOLS([CC], [mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc])
+    fi
+  fi
+  AC_PROG_CC
+])dnl _AX_PROG_CC_MPI
+
+# Copyright (C) 2002-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_AUTOMAKE_VERSION(VERSION)
+# ----------------------------
+# Automake X.Y traces this macro to ensure aclocal.m4 has been
+# generated from the m4 files accompanying Automake X.Y.
+# (This private macro should not be called outside this file.)
+AC_DEFUN([AM_AUTOMAKE_VERSION],
+[am__api_version='1.16'
+dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
+dnl require some minimum version.  Point them to the right macro.
+m4_if([$1], [1.16.1], [],
+      [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
+])
+
+# _AM_AUTOCONF_VERSION(VERSION)
+# -----------------------------
+# aclocal traces this macro to find the Autoconf version.
+# This is a private macro too.  Using m4_define simplifies
+# the logic in aclocal, which can simply ignore this definition.
+m4_define([_AM_AUTOCONF_VERSION], [])
+
+# AM_SET_CURRENT_AUTOMAKE_VERSION
+# -------------------------------
+# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
+# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
+AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
+[AM_AUTOMAKE_VERSION([1.16.1])dnl
+m4_ifndef([AC_AUTOCONF_VERSION],
+  [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
+_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
+
+# AM_AUX_DIR_EXPAND                                         -*- Autoconf -*-
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
+# $ac_aux_dir to '$srcdir/foo'.  In other projects, it is set to
+# '$srcdir', '$srcdir/..', or '$srcdir/../..'.
+#
+# Of course, Automake must honor this variable whenever it calls a
+# tool from the auxiliary directory.  The problem is that $srcdir (and
+# therefore $ac_aux_dir as well) can be either absolute or relative,
+# depending on how configure is run.  This is pretty annoying, since
+# it makes $ac_aux_dir quite unusable in subdirectories: in the top
+# source directory, any form will work fine, but in subdirectories a
+# relative path needs to be adjusted first.
+#
+# $ac_aux_dir/missing
+#    fails when called from a subdirectory if $ac_aux_dir is relative
+# $top_srcdir/$ac_aux_dir/missing
+#    fails if $ac_aux_dir is absolute,
+#    fails when called from a subdirectory in a VPATH build with
+#          a relative $ac_aux_dir
+#
+# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
+# are both prefixed by $srcdir.  In an in-source build this is usually
+# harmless because $srcdir is '.', but things will broke when you
+# start a VPATH build or use an absolute $srcdir.
+#
+# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
+# iff we strip the leading $srcdir from $ac_aux_dir.  That would be:
+#   am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
+# and then we would define $MISSING as
+#   MISSING="\${SHELL} $am_aux_dir/missing"
+# This will work as long as MISSING is not called from configure, because
+# unfortunately $(top_srcdir) has no meaning in configure.
+# However there are other variables, like CC, which are often used in
+# configure, and could therefore not use this "fixed" $ac_aux_dir.
+#
+# Another solution, used here, is to always expand $ac_aux_dir to an
+# absolute PATH.  The drawback is that using absolute paths prevent a
+# configured tree to be moved without reconfiguration.
+
+AC_DEFUN([AM_AUX_DIR_EXPAND],
+[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
+# Expand $ac_aux_dir to an absolute path.
+am_aux_dir=`cd "$ac_aux_dir" && pwd`
+])
+
+# AM_CONDITIONAL                                            -*- Autoconf -*-
+
+# Copyright (C) 1997-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_CONDITIONAL(NAME, SHELL-CONDITION)
+# -------------------------------------
+# Define a conditional.
+AC_DEFUN([AM_CONDITIONAL],
+[AC_PREREQ([2.52])dnl
+ m4_if([$1], [TRUE],  [AC_FATAL([$0: invalid condition: $1])],
+       [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
+AC_SUBST([$1_TRUE])dnl
+AC_SUBST([$1_FALSE])dnl
+_AM_SUBST_NOTMAKE([$1_TRUE])dnl
+_AM_SUBST_NOTMAKE([$1_FALSE])dnl
+m4_define([_AM_COND_VALUE_$1], [$2])dnl
+if $2; then
+  $1_TRUE=
+  $1_FALSE='#'
+else
+  $1_TRUE='#'
+  $1_FALSE=
+fi
+AC_CONFIG_COMMANDS_PRE(
+[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
+  AC_MSG_ERROR([[conditional "$1" was never defined.
+Usually this means the macro was only invoked conditionally.]])
+fi])])
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+
+# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be
+# written in clear, in which case automake, when reading aclocal.m4,
+# will think it sees a *use*, and therefore will trigger all it's
+# C support machinery.  Also note that it means that autoscan, seeing
+# CC etc. in the Makefile, will ask for an AC_PROG_CC use...
+
+
+# _AM_DEPENDENCIES(NAME)
+# ----------------------
+# See how the compiler implements dependency checking.
+# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC".
+# We try a few techniques and use that to set a single cache variable.
+#
+# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
+# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
+# dependency, and given that the user is not expected to run this macro,
+# just rely on AC_PROG_CC.
+AC_DEFUN([_AM_DEPENDENCIES],
+[AC_REQUIRE([AM_SET_DEPDIR])dnl
+AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
+AC_REQUIRE([AM_MAKE_INCLUDE])dnl
+AC_REQUIRE([AM_DEP_TRACK])dnl
+
+m4_if([$1], [CC],   [depcc="$CC"   am_compiler_list=],
+      [$1], [CXX],  [depcc="$CXX"  am_compiler_list=],
+      [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
+      [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'],
+      [$1], [UPC],  [depcc="$UPC"  am_compiler_list=],
+      [$1], [GCJ],  [depcc="$GCJ"  am_compiler_list='gcc3 gcc'],
+                    [depcc="$$1"   am_compiler_list=])
+
+AC_CACHE_CHECK([dependency style of $depcc],
+               [am_cv_$1_dependencies_compiler_type],
+[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
+  # We make a subdir and do the tests there.  Otherwise we can end up
+  # making bogus files that we don't know about and never remove.  For
+  # instance it was reported that on HP-UX the gcc test will end up
+  # making a dummy file named 'D' -- because '-MD' means "put the output
+  # in D".
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  # Copy depcomp to subdir because otherwise we won't find it if we're
+  # using a relative directory.
+  cp "$am_depcomp" conftest.dir
+  cd conftest.dir
+  # We will build objects and dependencies in a subdirectory because
+  # it helps to detect inapplicable dependency modes.  For instance
+  # both Tru64's cc and ICC support -MD to output dependencies as a
+  # side effect of compilation, but ICC will put the dependencies in
+  # the current directory while Tru64 will put them in the object
+  # directory.
+  mkdir sub
+
+  am_cv_$1_dependencies_compiler_type=none
+  if test "$am_compiler_list" = ""; then
+     am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
+  fi
+  am__universal=false
+  m4_case([$1], [CC],
+    [case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac],
+    [CXX],
+    [case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac])
+
+  for depmode in $am_compiler_list; do
+    # Setup a source with many dependencies, because some compilers
+    # like to wrap large dependency lists on column 80 (with \), and
+    # we should not choose a depcomp mode which is confused by this.
+    #
+    # We need to recreate these files for each test, as the compiler may
+    # overwrite some of them when testing with obscure command lines.
+    # This happens at least with the AIX C compiler.
+    : > sub/conftest.c
+    for i in 1 2 3 4 5 6; do
+      echo '#include "conftst'$i'.h"' >> sub/conftest.c
+      # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with
+      # Solaris 10 /bin/sh.
+      echo '/* dummy */' > sub/conftst$i.h
+    done
+    echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
+
+    # We check with '-c' and '-o' for the sake of the "dashmstdout"
+    # mode.  It turns out that the SunPro C++ compiler does not properly
+    # handle '-M -o', and we need to detect this.  Also, some Intel
+    # versions had trouble with output in subdirs.
+    am__obj=sub/conftest.${OBJEXT-o}
+    am__minus_obj="-o $am__obj"
+    case $depmode in
+    gcc)
+      # This depmode causes a compiler race in universal mode.
+      test "$am__universal" = false || continue
+      ;;
+    nosideeffect)
+      # After this tag, mechanisms are not by side-effect, so they'll
+      # only be used when explicitly requested.
+      if test "x$enable_dependency_tracking" = xyes; then
+	continue
+      else
+	break
+      fi
+      ;;
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
+      # This compiler won't grok '-c -o', but also, the minuso test has
+      # not run yet.  These depmodes are late enough in the game, and
+      # so weak that their functioning should not be impacted.
+      am__obj=conftest.${OBJEXT-o}
+      am__minus_obj=
+      ;;
+    none) break ;;
+    esac
+    if depmode=$depmode \
+       source=sub/conftest.c object=$am__obj \
+       depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
+       $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
+         >/dev/null 2>conftest.err &&
+       grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
+       ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
+      # icc doesn't choke on unknown options, it will just issue warnings
+      # or remarks (even with -Werror).  So we grep stderr for any message
+      # that says an option was ignored or not supported.
+      # When given -MP, icc 7.0 and 7.1 complain thusly:
+      #   icc: Command line warning: ignoring option '-M'; no argument required
+      # The diagnosis changed in icc 8.0:
+      #   icc: Command line remark: option '-MP' not supported
+      if (grep 'ignoring option' conftest.err ||
+          grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
+        am_cv_$1_dependencies_compiler_type=$depmode
+        break
+      fi
+    fi
+  done
+
+  cd ..
+  rm -rf conftest.dir
+else
+  am_cv_$1_dependencies_compiler_type=none
+fi
+])
+AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
+AM_CONDITIONAL([am__fastdep$1], [
+  test "x$enable_dependency_tracking" != xno \
+  && test "$am_cv_$1_dependencies_compiler_type" = gcc3])
+])
+
+
+# AM_SET_DEPDIR
+# -------------
+# Choose a directory name for dependency files.
+# This macro is AC_REQUIREd in _AM_DEPENDENCIES.
+AC_DEFUN([AM_SET_DEPDIR],
+[AC_REQUIRE([AM_SET_LEADING_DOT])dnl
+AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
+])
+
+
+# AM_DEP_TRACK
+# ------------
+AC_DEFUN([AM_DEP_TRACK],
+[AC_ARG_ENABLE([dependency-tracking], [dnl
+AS_HELP_STRING(
+  [--enable-dependency-tracking],
+  [do not reject slow dependency extractors])
+AS_HELP_STRING(
+  [--disable-dependency-tracking],
+  [speeds up one-time build])])
+if test "x$enable_dependency_tracking" != xno; then
+  am_depcomp="$ac_aux_dir/depcomp"
+  AMDEPBACKSLASH='\'
+  am__nodep='_no'
+fi
+AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
+AC_SUBST([AMDEPBACKSLASH])dnl
+_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
+AC_SUBST([am__nodep])dnl
+_AM_SUBST_NOTMAKE([am__nodep])dnl
+])
+
+# Generate code to set up dependency tracking.              -*- Autoconf -*-
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_OUTPUT_DEPENDENCY_COMMANDS
+# ------------------------------
+AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
+[{
+  # Older Autoconf quotes --file arguments for eval, but not when files
+  # are listed without --file.  Let's play safe and only enable the eval
+  # if we detect the quoting.
+  # TODO: see whether this extra hack can be removed once we start
+  # requiring Autoconf 2.70 or later.
+  AS_CASE([$CONFIG_FILES],
+          [*\'*], [eval set x "$CONFIG_FILES"],
+          [*], [set x $CONFIG_FILES])
+  shift
+  # Used to flag and report bootstrapping failures.
+  am_rc=0
+  for am_mf
+  do
+    # Strip MF so we end up with the name of the file.
+    am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'`
+    # Check whether this is an Automake generated Makefile which includes
+    # dependency-tracking related rules and includes.
+    # Grep'ing the whole file directly is not great: AIX grep has a line
+    # limit of 2048, but all sed's we know have understand at least 4000.
+    sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \
+      || continue
+    am_dirpart=`AS_DIRNAME(["$am_mf"])`
+    am_filepart=`AS_BASENAME(["$am_mf"])`
+    AM_RUN_LOG([cd "$am_dirpart" \
+      && sed -e '/# am--include-marker/d' "$am_filepart" \
+        | $MAKE -f - am--depfiles]) || am_rc=$?
+  done
+  if test $am_rc -ne 0; then
+    AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments
+    for automatic dependency tracking.  Try re-running configure with the
+    '--disable-dependency-tracking' option to at least be able to build
+    the package (albeit without support for automatic dependency tracking).])
+  fi
+  AS_UNSET([am_dirpart])
+  AS_UNSET([am_filepart])
+  AS_UNSET([am_mf])
+  AS_UNSET([am_rc])
+  rm -f conftest-deps.mk
+}
+])# _AM_OUTPUT_DEPENDENCY_COMMANDS
+
+
+# AM_OUTPUT_DEPENDENCY_COMMANDS
+# -----------------------------
+# This macro should only be invoked once -- use via AC_REQUIRE.
+#
+# This code is only required when automatic dependency tracking is enabled.
+# This creates each '.Po' and '.Plo' makefile fragment that we'll need in
+# order to bootstrap the dependency handling code.
+AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
+[AC_CONFIG_COMMANDS([depfiles],
+     [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
+     [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])])
+
+# Do all the work for Automake.                             -*- Autoconf -*-
+
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This macro actually does too much.  Some checks are only needed if
+# your package does certain things.  But this isn't really a big deal.
+
+dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O.
+m4_define([AC_PROG_CC],
+m4_defn([AC_PROG_CC])
+[_AM_PROG_CC_C_O
+])
+
+# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
+# AM_INIT_AUTOMAKE([OPTIONS])
+# -----------------------------------------------
+# The call with PACKAGE and VERSION arguments is the old style
+# call (pre autoconf-2.50), which is being phased out.  PACKAGE
+# and VERSION should now be passed to AC_INIT and removed from
+# the call to AM_INIT_AUTOMAKE.
+# We support both call styles for the transition.  After
+# the next Automake release, Autoconf can make the AC_INIT
+# arguments mandatory, and then we can depend on a new Autoconf
+# release and drop the old call support.
+AC_DEFUN([AM_INIT_AUTOMAKE],
+[AC_PREREQ([2.65])dnl
+dnl Autoconf wants to disallow AM_ names.  We explicitly allow
+dnl the ones we care about.
+m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
+AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
+AC_REQUIRE([AC_PROG_INSTALL])dnl
+if test "`cd $srcdir && pwd`" != "`pwd`"; then
+  # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
+  # is not polluted with repeated "-I."
+  AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
+  # test to see if srcdir already configured
+  if test -f $srcdir/config.status; then
+    AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
+  fi
+fi
+
+# test whether we have cygpath
+if test -z "$CYGPATH_W"; then
+  if (cygpath --version) >/dev/null 2>/dev/null; then
+    CYGPATH_W='cygpath -w'
+  else
+    CYGPATH_W=echo
+  fi
+fi
+AC_SUBST([CYGPATH_W])
+
+# Define the identity of the package.
+dnl Distinguish between old-style and new-style calls.
+m4_ifval([$2],
+[AC_DIAGNOSE([obsolete],
+             [$0: two- and three-arguments forms are deprecated.])
+m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
+ AC_SUBST([PACKAGE], [$1])dnl
+ AC_SUBST([VERSION], [$2])],
+[_AM_SET_OPTIONS([$1])dnl
+dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
+m4_if(
+  m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]),
+  [ok:ok],,
+  [m4_fatal([AC_INIT should be called with package and version arguments])])dnl
+ AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
+ AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
+
+_AM_IF_OPTION([no-define],,
+[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package])
+ AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl
+
+# Some tools Automake needs.
+AC_REQUIRE([AM_SANITY_CHECK])dnl
+AC_REQUIRE([AC_ARG_PROGRAM])dnl
+AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}])
+AM_MISSING_PROG([AUTOCONF], [autoconf])
+AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}])
+AM_MISSING_PROG([AUTOHEADER], [autoheader])
+AM_MISSING_PROG([MAKEINFO], [makeinfo])
+AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
+AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl
+AC_REQUIRE([AC_PROG_MKDIR_P])dnl
+# For better backward compatibility.  To be removed once Automake 1.9.x
+# dies out for good.  For more background, see:
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
+AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
+# We need awk for the "check" target (and possibly the TAP driver).  The
+# system "awk" is bad on some platforms.
+AC_REQUIRE([AC_PROG_AWK])dnl
+AC_REQUIRE([AC_PROG_MAKE_SET])dnl
+AC_REQUIRE([AM_SET_LEADING_DOT])dnl
+_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
+	      [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
+			     [_AM_PROG_TAR([v7])])])
+_AM_IF_OPTION([no-dependencies],,
+[AC_PROVIDE_IFELSE([AC_PROG_CC],
+		  [_AM_DEPENDENCIES([CC])],
+		  [m4_define([AC_PROG_CC],
+			     m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_CXX],
+		  [_AM_DEPENDENCIES([CXX])],
+		  [m4_define([AC_PROG_CXX],
+			     m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_OBJC],
+		  [_AM_DEPENDENCIES([OBJC])],
+		  [m4_define([AC_PROG_OBJC],
+			     m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_OBJCXX],
+		  [_AM_DEPENDENCIES([OBJCXX])],
+		  [m4_define([AC_PROG_OBJCXX],
+			     m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl
+])
+AC_REQUIRE([AM_SILENT_RULES])dnl
+dnl The testsuite driver may need to know about EXEEXT, so add the
+dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen.  This
+dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below.
+AC_CONFIG_COMMANDS_PRE(dnl
+[m4_provide_if([_AM_COMPILER_EXEEXT],
+  [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl
+
+# POSIX will say in a future version that running "rm -f" with no argument
+# is OK; and we want to be able to make that assumption in our Makefile
+# recipes.  So use an aggressive probe to check that the usage we want is
+# actually supported "in the wild" to an acceptable degree.
+# See automake bug#10828.
+# To make any issue more visible, cause the running configure to be aborted
+# by default if the 'rm' program in use doesn't match our expectations; the
+# user can still override this though.
+if rm -f && rm -fr && rm -rf; then : OK; else
+  cat >&2 <<'END'
+Oops!
+
+Your 'rm' program seems unable to run without file operands specified
+on the command line, even when the '-f' option is present.  This is contrary
+to the behaviour of most rm programs out there, and not conforming with
+the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
+
+Please tell bug-automake@gnu.org about your system, including the value
+of your $PATH and any error possibly output before this message.  This
+can help us improve future automake versions.
+
+END
+  if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
+    echo 'Configuration will proceed anyway, since you have set the' >&2
+    echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
+    echo >&2
+  else
+    cat >&2 <<'END'
+Aborting the configuration process, to ensure you take notice of the issue.
+
+You can download and install GNU coreutils to get an 'rm' implementation
+that behaves properly: <https://www.gnu.org/software/coreutils/>.
+
+If you want to complete the configuration process using your problematic
+'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
+to "yes", and re-run configure.
+
+END
+    AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
+  fi
+fi
+dnl The trailing newline in this macro's definition is deliberate, for
+dnl backward compatibility and to allow trailing 'dnl'-style comments
+dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841.
+])
+
+dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion.  Do not
+dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
+dnl mangled by Autoconf and run in a shell conditional statement.
+m4_define([_AC_COMPILER_EXEEXT],
+m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])])
+
+# When config.status generates a header, we must update the stamp-h file.
+# This file resides in the same directory as the config header
+# that is generated.  The stamp files are numbered to have different names.
+
+# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
+# loop where config.status creates the headers, so we can generate
+# our stamp files there.
+AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
+[# Compute $1's index in $config_headers.
+_am_arg=$1
+_am_stamp_count=1
+for _am_header in $config_headers :; do
+  case $_am_header in
+    $_am_arg | $_am_arg:* )
+      break ;;
+    * )
+      _am_stamp_count=`expr $_am_stamp_count + 1` ;;
+  esac
+done
+echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_INSTALL_SH
+# ------------------
+# Define $install_sh.
+AC_DEFUN([AM_PROG_INSTALL_SH],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+if test x"${install_sh+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
+  *)
+    install_sh="\${SHELL} $am_aux_dir/install-sh"
+  esac
+fi
+AC_SUBST([install_sh])])
+
+# Copyright (C) 2003-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# Check whether the underlying file-system supports filenames
+# with a leading dot.  For instance MS-DOS doesn't.
+AC_DEFUN([AM_SET_LEADING_DOT],
+[rm -rf .tst 2>/dev/null
+mkdir .tst 2>/dev/null
+if test -d .tst; then
+  am__leading_dot=.
+else
+  am__leading_dot=_
+fi
+rmdir .tst 2>/dev/null
+AC_SUBST([am__leading_dot])])
+
+# Check to see how 'make' treats includes.	            -*- Autoconf -*-
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_MAKE_INCLUDE()
+# -----------------
+# Check whether make has an 'include' directive that can support all
+# the idioms we need for our automatic dependency tracking code.
+AC_DEFUN([AM_MAKE_INCLUDE],
+[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive])
+cat > confinc.mk << 'END'
+am__doit:
+	@echo this is the am__doit target >confinc.out
+.PHONY: am__doit
+END
+am__include="#"
+am__quote=
+# BSD make does it like this.
+echo '.include "confinc.mk" # ignored' > confmf.BSD
+# Other make implementations (GNU, Solaris 10, AIX) do it like this.
+echo 'include confinc.mk # ignored' > confmf.GNU
+_am_result=no
+for s in GNU BSD; do
+  AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out])
+  AS_CASE([$?:`cat confinc.out 2>/dev/null`],
+      ['0:this is the am__doit target'],
+      [AS_CASE([$s],
+          [BSD], [am__include='.include' am__quote='"'],
+          [am__include='include' am__quote=''])])
+  if test "$am__include" != "#"; then
+    _am_result="yes ($s style)"
+    break
+  fi
+done
+rm -f confinc.* confmf.*
+AC_MSG_RESULT([${_am_result}])
+AC_SUBST([am__include])])
+AC_SUBST([am__quote])])
+
+# Fake the existence of programs that GNU maintainers use.  -*- Autoconf -*-
+
+# Copyright (C) 1997-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_MISSING_PROG(NAME, PROGRAM)
+# ------------------------------
+AC_DEFUN([AM_MISSING_PROG],
+[AC_REQUIRE([AM_MISSING_HAS_RUN])
+$1=${$1-"${am_missing_run}$2"}
+AC_SUBST($1)])
+
+# AM_MISSING_HAS_RUN
+# ------------------
+# Define MISSING if not defined so far and test if it is modern enough.
+# If it is, set am_missing_run to use it, otherwise, to nothing.
+AC_DEFUN([AM_MISSING_HAS_RUN],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+AC_REQUIRE_AUX_FILE([missing])dnl
+if test x"${MISSING+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
+  *)
+    MISSING="\${SHELL} $am_aux_dir/missing" ;;
+  esac
+fi
+# Use eval to expand $SHELL
+if eval "$MISSING --is-lightweight"; then
+  am_missing_run="$MISSING "
+else
+  am_missing_run=
+  AC_MSG_WARN(['missing' script is too old or missing])
+fi
+])
+
+# Helper functions for option handling.                     -*- Autoconf -*-
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_MANGLE_OPTION(NAME)
+# -----------------------
+AC_DEFUN([_AM_MANGLE_OPTION],
+[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
+
+# _AM_SET_OPTION(NAME)
+# --------------------
+# Set option NAME.  Presently that only means defining a flag for this option.
+AC_DEFUN([_AM_SET_OPTION],
+[m4_define(_AM_MANGLE_OPTION([$1]), [1])])
+
+# _AM_SET_OPTIONS(OPTIONS)
+# ------------------------
+# OPTIONS is a space-separated list of Automake options.
+AC_DEFUN([_AM_SET_OPTIONS],
+[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
+
+# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
+# -------------------------------------------
+# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
+AC_DEFUN([_AM_IF_OPTION],
+[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_PROG_CC_C_O
+# ---------------
+# Like AC_PROG_CC_C_O, but changed for automake.  We rewrite AC_PROG_CC
+# to automatically call this.
+AC_DEFUN([_AM_PROG_CC_C_O],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+AC_REQUIRE_AUX_FILE([compile])dnl
+AC_LANG_PUSH([C])dnl
+AC_CACHE_CHECK(
+  [whether $CC understands -c and -o together],
+  [am_cv_prog_cc_c_o],
+  [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])])
+  # Make sure it works both with $CC and with simple cc.
+  # Following AC_PROG_CC_C_O, we do the test twice because some
+  # compilers refuse to overwrite an existing .o file with -o,
+  # though they will create one.
+  am_cv_prog_cc_c_o=yes
+  for am_i in 1 2; do
+    if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \
+         && test -f conftest2.$ac_objext; then
+      : OK
+    else
+      am_cv_prog_cc_c_o=no
+      break
+    fi
+  done
+  rm -f core conftest*
+  unset am_i])
+if test "$am_cv_prog_cc_c_o" != yes; then
+   # Losing compiler, so override with the script.
+   # FIXME: It is wrong to rewrite CC.
+   # But if we don't then we get into trouble of one sort or another.
+   # A longer-term fix would be to have automake use am__CC in this case,
+   # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
+   CC="$am_aux_dir/compile $CC"
+fi
+AC_LANG_POP([C])])
+
+# For backward compatibility.
+AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_RUN_LOG(COMMAND)
+# -------------------
+# Run COMMAND, save the exit status in ac_status, and log it.
+# (This has been adapted from Autoconf's _AC_RUN_LOG macro.)
+AC_DEFUN([AM_RUN_LOG],
+[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD
+   ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
+   (exit $ac_status); }])
+
+# Check to make sure that the build environment is sane.    -*- Autoconf -*-
+
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_SANITY_CHECK
+# ---------------
+AC_DEFUN([AM_SANITY_CHECK],
+[AC_MSG_CHECKING([whether build environment is sane])
+# Reject unsafe characters in $srcdir or the absolute working directory
+# name.  Accept space and tab only in the latter.
+am_lf='
+'
+case `pwd` in
+  *[[\\\"\#\$\&\'\`$am_lf]]*)
+    AC_MSG_ERROR([unsafe absolute working directory name]);;
+esac
+case $srcdir in
+  *[[\\\"\#\$\&\'\`$am_lf\ \	]]*)
+    AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);;
+esac
+
+# Do 'set' in a subshell so we don't clobber the current shell's
+# arguments.  Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+   am_has_slept=no
+   for am_try in 1 2; do
+     echo "timestamp, slept: $am_has_slept" > conftest.file
+     set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
+     if test "$[*]" = "X"; then
+	# -L didn't work.
+	set X `ls -t "$srcdir/configure" conftest.file`
+     fi
+     if test "$[*]" != "X $srcdir/configure conftest.file" \
+	&& test "$[*]" != "X conftest.file $srcdir/configure"; then
+
+	# If neither matched, then we have a broken ls.  This can happen
+	# if, for instance, CONFIG_SHELL is bash and it inherits a
+	# broken ls alias from the environment.  This has actually
+	# happened.  Such a system could not be considered "sane".
+	AC_MSG_ERROR([ls -t appears to fail.  Make sure there is not a broken
+  alias in your environment])
+     fi
+     if test "$[2]" = conftest.file || test $am_try -eq 2; then
+       break
+     fi
+     # Just in case.
+     sleep 1
+     am_has_slept=yes
+   done
+   test "$[2]" = conftest.file
+   )
+then
+   # Ok.
+   :
+else
+   AC_MSG_ERROR([newly created file is older than distributed files!
+Check your system clock])
+fi
+AC_MSG_RESULT([yes])
+# If we didn't sleep, we still need to ensure time stamps of config.status and
+# generated files are strictly newer.
+am_sleep_pid=
+if grep 'slept: no' conftest.file >/dev/null 2>&1; then
+  ( sleep 1 ) &
+  am_sleep_pid=$!
+fi
+AC_CONFIG_COMMANDS_PRE(
+  [AC_MSG_CHECKING([that generated files are newer than configure])
+   if test -n "$am_sleep_pid"; then
+     # Hide warnings about reused PIDs.
+     wait $am_sleep_pid 2>/dev/null
+   fi
+   AC_MSG_RESULT([done])])
+rm -f conftest.file
+])
+
+# Copyright (C) 2009-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_SILENT_RULES([DEFAULT])
+# --------------------------
+# Enable less verbose build rules; with the default set to DEFAULT
+# ("yes" being less verbose, "no" or empty being verbose).
+AC_DEFUN([AM_SILENT_RULES],
+[AC_ARG_ENABLE([silent-rules], [dnl
+AS_HELP_STRING(
+  [--enable-silent-rules],
+  [less verbose build output (undo: "make V=1")])
+AS_HELP_STRING(
+  [--disable-silent-rules],
+  [verbose build output (undo: "make V=0")])dnl
+])
+case $enable_silent_rules in @%:@ (((
+  yes) AM_DEFAULT_VERBOSITY=0;;
+   no) AM_DEFAULT_VERBOSITY=1;;
+    *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);;
+esac
+dnl
+dnl A few 'make' implementations (e.g., NonStop OS and NextStep)
+dnl do not support nested variable expansions.
+dnl See automake bug#9928 and bug#10237.
+am_make=${MAKE-make}
+AC_CACHE_CHECK([whether $am_make supports nested variables],
+   [am_cv_make_support_nested_variables],
+   [if AS_ECHO([['TRUE=$(BAR$(V))
+BAR0=false
+BAR1=true
+V=1
+am__doit:
+	@$(TRUE)
+.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then
+  am_cv_make_support_nested_variables=yes
+else
+  am_cv_make_support_nested_variables=no
+fi])
+if test $am_cv_make_support_nested_variables = yes; then
+  dnl Using '$V' instead of '$(V)' breaks IRIX make.
+  AM_V='$(V)'
+  AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)'
+else
+  AM_V=$AM_DEFAULT_VERBOSITY
+  AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY
+fi
+AC_SUBST([AM_V])dnl
+AM_SUBST_NOTMAKE([AM_V])dnl
+AC_SUBST([AM_DEFAULT_V])dnl
+AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl
+AC_SUBST([AM_DEFAULT_VERBOSITY])dnl
+AM_BACKSLASH='\'
+AC_SUBST([AM_BACKSLASH])dnl
+_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
+])
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_INSTALL_STRIP
+# ---------------------
+# One issue with vendor 'install' (even GNU) is that you can't
+# specify the program used to strip binaries.  This is especially
+# annoying in cross-compiling environments, where the build's strip
+# is unlikely to handle the host's binaries.
+# Fortunately install-sh will honor a STRIPPROG variable, so we
+# always use install-sh in "make install-strip", and initialize
+# STRIPPROG with the value of the STRIP variable (set by the user).
+AC_DEFUN([AM_PROG_INSTALL_STRIP],
+[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
+# Installed binaries are usually stripped using 'strip' when the user
+# run "make install-strip".  However 'strip' might not be the right
+# tool to use in cross-compilation environments, therefore Automake
+# will honor the 'STRIP' environment variable to overrule this program.
+dnl Don't test for $cross_compiling = yes, because it might be 'maybe'.
+if test "$cross_compiling" != no; then
+  AC_CHECK_TOOL([STRIP], [strip], :)
+fi
+INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
+AC_SUBST([INSTALL_STRIP_PROGRAM])])
+
+# Copyright (C) 2006-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_SUBST_NOTMAKE(VARIABLE)
+# ---------------------------
+# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
+# This macro is traced by Automake.
+AC_DEFUN([_AM_SUBST_NOTMAKE])
+
+# AM_SUBST_NOTMAKE(VARIABLE)
+# --------------------------
+# Public sister of _AM_SUBST_NOTMAKE.
+AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
+
+# Check how to create a tarball.                            -*- Autoconf -*-
+
+# Copyright (C) 2004-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_PROG_TAR(FORMAT)
+# --------------------
+# Check how to create a tarball in format FORMAT.
+# FORMAT should be one of 'v7', 'ustar', or 'pax'.
+#
+# Substitute a variable $(am__tar) that is a command
+# writing to stdout a FORMAT-tarball containing the directory
+# $tardir.
+#     tardir=directory && $(am__tar) > result.tar
+#
+# Substitute a variable $(am__untar) that extract such
+# a tarball read from stdin.
+#     $(am__untar) < result.tar
+#
+AC_DEFUN([_AM_PROG_TAR],
+[# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AC_SUBST([AMTAR], ['$${TAR-tar}'])
+
+# We'll loop over all known methods to create a tar archive until one works.
+_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
+
+m4_if([$1], [v7],
+  [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
+
+  [m4_case([$1],
+    [ustar],
+     [# The POSIX 1988 'ustar' format is defined with fixed-size fields.
+      # There is notably a 21 bits limit for the UID and the GID.  In fact,
+      # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343
+      # and bug#13588).
+      am_max_uid=2097151 # 2^21 - 1
+      am_max_gid=$am_max_uid
+      # The $UID and $GID variables are not portable, so we need to resort
+      # to the POSIX-mandated id(1) utility.  Errors in the 'id' calls
+      # below are definitely unexpected, so allow the users to see them
+      # (that is, avoid stderr redirection).
+      am_uid=`id -u || echo unknown`
+      am_gid=`id -g || echo unknown`
+      AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format])
+      if test $am_uid -le $am_max_uid; then
+         AC_MSG_RESULT([yes])
+      else
+         AC_MSG_RESULT([no])
+         _am_tools=none
+      fi
+      AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format])
+      if test $am_gid -le $am_max_gid; then
+         AC_MSG_RESULT([yes])
+      else
+        AC_MSG_RESULT([no])
+        _am_tools=none
+      fi],
+
+  [pax],
+    [],
+
+  [m4_fatal([Unknown tar format])])
+
+  AC_MSG_CHECKING([how to create a $1 tar archive])
+
+  # Go ahead even if we have the value already cached.  We do so because we
+  # need to set the values for the 'am__tar' and 'am__untar' variables.
+  _am_tools=${am_cv_prog_tar_$1-$_am_tools}
+
+  for _am_tool in $_am_tools; do
+    case $_am_tool in
+    gnutar)
+      for _am_tar in tar gnutar gtar; do
+        AM_RUN_LOG([$_am_tar --version]) && break
+      done
+      am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
+      am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
+      am__untar="$_am_tar -xf -"
+      ;;
+    plaintar)
+      # Must skip GNU tar: if it does not support --format= it doesn't create
+      # ustar tarball either.
+      (tar --version) >/dev/null 2>&1 && continue
+      am__tar='tar chf - "$$tardir"'
+      am__tar_='tar chf - "$tardir"'
+      am__untar='tar xf -'
+      ;;
+    pax)
+      am__tar='pax -L -x $1 -w "$$tardir"'
+      am__tar_='pax -L -x $1 -w "$tardir"'
+      am__untar='pax -r'
+      ;;
+    cpio)
+      am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
+      am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
+      am__untar='cpio -i -H $1 -d'
+      ;;
+    none)
+      am__tar=false
+      am__tar_=false
+      am__untar=false
+      ;;
+    esac
+
+    # If the value was cached, stop now.  We just wanted to have am__tar
+    # and am__untar set.
+    test -n "${am_cv_prog_tar_$1}" && break
+
+    # tar/untar a dummy directory, and stop if the command works.
+    rm -rf conftest.dir
+    mkdir conftest.dir
+    echo GrepMe > conftest.dir/file
+    AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
+    rm -rf conftest.dir
+    if test -s conftest.tar; then
+      AM_RUN_LOG([$am__untar <conftest.tar])
+      AM_RUN_LOG([cat conftest.dir/file])
+      grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
+    fi
+  done
+  rm -rf conftest.dir
+
+  AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
+  AC_MSG_RESULT([$am_cv_prog_tar_$1])])
+
+AC_SUBST([am__tar])
+AC_SUBST([am__untar])
+]) # _AM_PROG_TAR
+
+m4_include([acinclude.m4])
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/compile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/compile
new file mode 100755
index 000000000..99e50524b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/compile
@@ -0,0 +1,348 @@
+#! /bin/sh
+# Wrapper for compilers which do not understand '-c -o'.
+
+scriptversion=2018-03-07.03; # UTC
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+# Written by Tom Tromey <tromey@cygnus.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+
+nl='
+'
+
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent tools from complaining about whitespace usage.
+IFS=" ""	$nl"
+
+file_conv=
+
+# func_file_conv build_file lazy
+# Convert a $build file to $host form and store it in $file
+# Currently only supports Windows hosts. If the determined conversion
+# type is listed in (the comma separated) LAZY, no conversion will
+# take place.
+func_file_conv ()
+{
+  file=$1
+  case $file in
+    / | /[!/]*) # absolute file, and not a UNC file
+      if test -z "$file_conv"; then
+	# lazily determine how to convert abs files
+	case `uname -s` in
+	  MINGW*)
+	    file_conv=mingw
+	    ;;
+	  CYGWIN*)
+	    file_conv=cygwin
+	    ;;
+	  *)
+	    file_conv=wine
+	    ;;
+	esac
+      fi
+      case $file_conv/,$2, in
+	*,$file_conv,*)
+	  ;;
+	mingw/*)
+	  file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
+	  ;;
+	cygwin/*)
+	  file=`cygpath -m "$file" || echo "$file"`
+	  ;;
+	wine/*)
+	  file=`winepath -w "$file" || echo "$file"`
+	  ;;
+      esac
+      ;;
+  esac
+}
+
+# func_cl_dashL linkdir
+# Make cl look for libraries in LINKDIR
+func_cl_dashL ()
+{
+  func_file_conv "$1"
+  if test -z "$lib_path"; then
+    lib_path=$file
+  else
+    lib_path="$lib_path;$file"
+  fi
+  linker_opts="$linker_opts -LIBPATH:$file"
+}
+
+# func_cl_dashl library
+# Do a library search-path lookup for cl
+func_cl_dashl ()
+{
+  lib=$1
+  found=no
+  save_IFS=$IFS
+  IFS=';'
+  for dir in $lib_path $LIB
+  do
+    IFS=$save_IFS
+    if $shared && test -f "$dir/$lib.dll.lib"; then
+      found=yes
+      lib=$dir/$lib.dll.lib
+      break
+    fi
+    if test -f "$dir/$lib.lib"; then
+      found=yes
+      lib=$dir/$lib.lib
+      break
+    fi
+    if test -f "$dir/lib$lib.a"; then
+      found=yes
+      lib=$dir/lib$lib.a
+      break
+    fi
+  done
+  IFS=$save_IFS
+
+  if test "$found" != yes; then
+    lib=$lib.lib
+  fi
+}
+
+# func_cl_wrapper cl arg...
+# Adjust compile command to suit cl
+func_cl_wrapper ()
+{
+  # Assume a capable shell
+  lib_path=
+  shared=:
+  linker_opts=
+  for arg
+  do
+    if test -n "$eat"; then
+      eat=
+    else
+      case $1 in
+	-o)
+	  # configure might choose to run compile as 'compile cc -o foo foo.c'.
+	  eat=1
+	  case $2 in
+	    *.o | *.[oO][bB][jJ])
+	      func_file_conv "$2"
+	      set x "$@" -Fo"$file"
+	      shift
+	      ;;
+	    *)
+	      func_file_conv "$2"
+	      set x "$@" -Fe"$file"
+	      shift
+	      ;;
+	  esac
+	  ;;
+	-I)
+	  eat=1
+	  func_file_conv "$2" mingw
+	  set x "$@" -I"$file"
+	  shift
+	  ;;
+	-I*)
+	  func_file_conv "${1#-I}" mingw
+	  set x "$@" -I"$file"
+	  shift
+	  ;;
+	-l)
+	  eat=1
+	  func_cl_dashl "$2"
+	  set x "$@" "$lib"
+	  shift
+	  ;;
+	-l*)
+	  func_cl_dashl "${1#-l}"
+	  set x "$@" "$lib"
+	  shift
+	  ;;
+	-L)
+	  eat=1
+	  func_cl_dashL "$2"
+	  ;;
+	-L*)
+	  func_cl_dashL "${1#-L}"
+	  ;;
+	-static)
+	  shared=false
+	  ;;
+	-Wl,*)
+	  arg=${1#-Wl,}
+	  save_ifs="$IFS"; IFS=','
+	  for flag in $arg; do
+	    IFS="$save_ifs"
+	    linker_opts="$linker_opts $flag"
+	  done
+	  IFS="$save_ifs"
+	  ;;
+	-Xlinker)
+	  eat=1
+	  linker_opts="$linker_opts $2"
+	  ;;
+	-*)
+	  set x "$@" "$1"
+	  shift
+	  ;;
+	*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
+	  func_file_conv "$1"
+	  set x "$@" -Tp"$file"
+	  shift
+	  ;;
+	*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
+	  func_file_conv "$1" mingw
+	  set x "$@" "$file"
+	  shift
+	  ;;
+	*)
+	  set x "$@" "$1"
+	  shift
+	  ;;
+      esac
+    fi
+    shift
+  done
+  if test -n "$linker_opts"; then
+    linker_opts="-link$linker_opts"
+  fi
+  exec "$@" $linker_opts
+  exit 1
+}
+
+eat=
+
+case $1 in
+  '')
+     echo "$0: No command.  Try '$0 --help' for more information." 1>&2
+     exit 1;
+     ;;
+  -h | --h*)
+    cat <<\EOF
+Usage: compile [--help] [--version] PROGRAM [ARGS]
+
+Wrapper for compilers which do not understand '-c -o'.
+Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
+arguments, and rename the output as expected.
+
+If you are trying to build a whole package this is not the
+right script to run: please start by reading the file 'INSTALL'.
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit $?
+    ;;
+  -v | --v*)
+    echo "compile $scriptversion"
+    exit $?
+    ;;
+  cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \
+  icl | *[/\\]icl | icl.exe | *[/\\]icl.exe )
+    func_cl_wrapper "$@"      # Doesn't return...
+    ;;
+esac
+
+ofile=
+cfile=
+
+for arg
+do
+  if test -n "$eat"; then
+    eat=
+  else
+    case $1 in
+      -o)
+	# configure might choose to run compile as 'compile cc -o foo foo.c'.
+	# So we strip '-o arg' only if arg is an object.
+	eat=1
+	case $2 in
+	  *.o | *.obj)
+	    ofile=$2
+	    ;;
+	  *)
+	    set x "$@" -o "$2"
+	    shift
+	    ;;
+	esac
+	;;
+      *.c)
+	cfile=$1
+	set x "$@" "$1"
+	shift
+	;;
+      *)
+	set x "$@" "$1"
+	shift
+	;;
+    esac
+  fi
+  shift
+done
+
+if test -z "$ofile" || test -z "$cfile"; then
+  # If no '-o' option was seen then we might have been invoked from a
+  # pattern rule where we don't need one.  That is ok -- this is a
+  # normal compilation that the losing compiler can handle.  If no
+  # '.c' file was seen then we are probably linking.  That is also
+  # ok.
+  exec "$@"
+fi
+
+# Name of file we expect compiler to create.
+cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
+
+# Create the lock directory.
+# Note: use '[/\\:.-]' here to ensure that we don't use the same name
+# that we are using for the .o file.  Also, base the name on the expected
+# object file name, since that is what matters with a parallel build.
+lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
+while true; do
+  if mkdir "$lockdir" >/dev/null 2>&1; then
+    break
+  fi
+  sleep 1
+done
+# FIXME: race condition here if user kills between mkdir and trap.
+trap "rmdir '$lockdir'; exit 1" 1 2 15
+
+# Run the compile.
+"$@"
+ret=$?
+
+if test -f "$cofile"; then
+  test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
+elif test -f "${cofile}bj"; then
+  test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
+fi
+
+rmdir "$lockdir"
+exit $ret
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/config.guess b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/config.guess
new file mode 100755
index 000000000..256083a70
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/config.guess
@@ -0,0 +1,1476 @@
+#! /bin/sh
+# Attempt to guess a canonical system name.
+#   Copyright 1992-2018 Free Software Foundation, Inc.
+
+timestamp='2018-03-08'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <https://www.gnu.org/licenses/>.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that
+# program.  This Exception is an additional permission under section 7
+# of the GNU General Public License, version 3 ("GPLv3").
+#
+# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
+#
+# You can get the latest version of this script from:
+# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+#
+# Please send patches to <config-patches@gnu.org>.
+
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION]
+
+Output the configuration name of the system \`$me' is run on.
+
+Options:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.guess ($timestamp)
+
+Originally written by Per Bothner.
+Copyright 1992-2018 Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help" >&2
+       exit 1 ;;
+    * )
+       break ;;
+  esac
+done
+
+if test $# != 0; then
+  echo "$me: too many arguments$help" >&2
+  exit 1
+fi
+
+trap 'exit 1' 1 2 15
+
+# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
+# compiler to aid in system detection is discouraged as it requires
+# temporary files to be created and, as you can see below, it is a
+# headache to deal with in a portable fashion.
+
+# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
+# use `HOST_CC' if defined, but it is deprecated.
+
+# Portable tmp directory creation inspired by the Autoconf team.
+
+set_cc_for_build='
+trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
+trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
+: ${TMPDIR=/tmp} ;
+ { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
+ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
+dummy=$tmp/dummy ;
+tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
+case $CC_FOR_BUILD,$HOST_CC,$CC in
+ ,,)    echo "int x;" > "$dummy.c" ;
+	for c in cc gcc c89 c99 ; do
+	  if ($c -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
+	     CC_FOR_BUILD="$c"; break ;
+	  fi ;
+	done ;
+	if test x"$CC_FOR_BUILD" = x ; then
+	  CC_FOR_BUILD=no_compiler_found ;
+	fi
+	;;
+ ,,*)   CC_FOR_BUILD=$CC ;;
+ ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
+esac ; set_cc_for_build= ;'
+
+# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+# (ghazi@noc.rutgers.edu 1994-08-24)
+if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+	PATH=$PATH:/.attbin ; export PATH
+fi
+
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+
+case "$UNAME_SYSTEM" in
+Linux|GNU|GNU/*)
+	# If the system lacks a compiler, then just pick glibc.
+	# We could probably try harder.
+	LIBC=gnu
+
+	eval "$set_cc_for_build"
+	cat <<-EOF > "$dummy.c"
+	#include <features.h>
+	#if defined(__UCLIBC__)
+	LIBC=uclibc
+	#elif defined(__dietlibc__)
+	LIBC=dietlibc
+	#else
+	LIBC=gnu
+	#endif
+	EOF
+	eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`"
+
+	# If ldd exists, use it to detect musl libc.
+	if command -v ldd >/dev/null && \
+		ldd --version 2>&1 | grep -q ^musl
+	then
+	    LIBC=musl
+	fi
+	;;
+esac
+
+# Note: order is significant - the case branches are not exclusive.
+
+case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
+    *:NetBSD:*:*)
+	# NetBSD (nbsd) targets should (where applicable) match one or
+	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
+	# switched to ELF, *-*-netbsd* would select the old
+	# object file format.  This provides both forward
+	# compatibility and a consistent mechanism for selecting the
+	# object file format.
+	#
+	# Note: NetBSD doesn't particularly care about the vendor
+	# portion of the name.  We always set it to "unknown".
+	sysctl="sysctl -n hw.machine_arch"
+	UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
+	    "/sbin/$sysctl" 2>/dev/null || \
+	    "/usr/sbin/$sysctl" 2>/dev/null || \
+	    echo unknown)`
+	case "$UNAME_MACHINE_ARCH" in
+	    armeb) machine=armeb-unknown ;;
+	    arm*) machine=arm-unknown ;;
+	    sh3el) machine=shl-unknown ;;
+	    sh3eb) machine=sh-unknown ;;
+	    sh5el) machine=sh5le-unknown ;;
+	    earmv*)
+		arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
+		endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'`
+		machine="${arch}${endian}"-unknown
+		;;
+	    *) machine="$UNAME_MACHINE_ARCH"-unknown ;;
+	esac
+	# The Operating System including object format, if it has switched
+	# to ELF recently (or will in the future) and ABI.
+	case "$UNAME_MACHINE_ARCH" in
+	    earm*)
+		os=netbsdelf
+		;;
+	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+		eval "$set_cc_for_build"
+		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
+			| grep -q __ELF__
+		then
+		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
+		    # Return netbsd for either.  FIX?
+		    os=netbsd
+		else
+		    os=netbsdelf
+		fi
+		;;
+	    *)
+		os=netbsd
+		;;
+	esac
+	# Determine ABI tags.
+	case "$UNAME_MACHINE_ARCH" in
+	    earm*)
+		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
+		abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"`
+		;;
+	esac
+	# The OS release
+	# Debian GNU/NetBSD machines have a different userland, and
+	# thus, need a distinct triplet. However, they do not need
+	# kernel version information, so it can be replaced with a
+	# suitable tag, in the style of linux-gnu.
+	case "$UNAME_VERSION" in
+	    Debian*)
+		release='-gnu'
+		;;
+	    *)
+		release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2`
+		;;
+	esac
+	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
+	# contains redundant information, the shorter form:
+	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
+	echo "$machine-${os}${release}${abi}"
+	exit ;;
+    *:Bitrig:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
+	echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE"
+	exit ;;
+    *:OpenBSD:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+	echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE"
+	exit ;;
+    *:LibertyBSD:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
+	echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE"
+	exit ;;
+    *:MidnightBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE"
+	exit ;;
+    *:ekkoBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE"
+	exit ;;
+    *:SolidBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE"
+	exit ;;
+    macppc:MirBSD:*:*)
+	echo powerpc-unknown-mirbsd"$UNAME_RELEASE"
+	exit ;;
+    *:MirBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE"
+	exit ;;
+    *:Sortix:*:*)
+	echo "$UNAME_MACHINE"-unknown-sortix
+	exit ;;
+    *:Redox:*:*)
+	echo "$UNAME_MACHINE"-unknown-redox
+	exit ;;
+    mips:OSF1:*.*)
+        echo mips-dec-osf1
+        exit ;;
+    alpha:OSF1:*:*)
+	case $UNAME_RELEASE in
+	*4.0)
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+		;;
+	*5.*)
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		;;
+	esac
+	# According to Compaq, /usr/sbin/psrinfo has been available on
+	# OSF/1 and Tru64 systems produced since 1995.  I hope that
+	# covers most systems running today.  This code pipes the CPU
+	# types through head -n 1, so we only detect the type of CPU 0.
+	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+	case "$ALPHA_CPU_TYPE" in
+	    "EV4 (21064)")
+		UNAME_MACHINE=alpha ;;
+	    "EV4.5 (21064)")
+		UNAME_MACHINE=alpha ;;
+	    "LCA4 (21066/21068)")
+		UNAME_MACHINE=alpha ;;
+	    "EV5 (21164)")
+		UNAME_MACHINE=alphaev5 ;;
+	    "EV5.6 (21164A)")
+		UNAME_MACHINE=alphaev56 ;;
+	    "EV5.6 (21164PC)")
+		UNAME_MACHINE=alphapca56 ;;
+	    "EV5.7 (21164PC)")
+		UNAME_MACHINE=alphapca57 ;;
+	    "EV6 (21264)")
+		UNAME_MACHINE=alphaev6 ;;
+	    "EV6.7 (21264A)")
+		UNAME_MACHINE=alphaev67 ;;
+	    "EV6.8CB (21264C)")
+		UNAME_MACHINE=alphaev68 ;;
+	    "EV6.8AL (21264B)")
+		UNAME_MACHINE=alphaev68 ;;
+	    "EV6.8CX (21264D)")
+		UNAME_MACHINE=alphaev68 ;;
+	    "EV6.9A (21264/EV69A)")
+		UNAME_MACHINE=alphaev69 ;;
+	    "EV7 (21364)")
+		UNAME_MACHINE=alphaev7 ;;
+	    "EV7.9 (21364A)")
+		UNAME_MACHINE=alphaev79 ;;
+	esac
+	# A Pn.n version is a patched version.
+	# A Vn.n version is a released version.
+	# A Tn.n version is a released field test version.
+	# A Xn.n version is an unreleased experimental baselevel.
+	# 1.2 uses "1.2" for uname -r.
+	echo "$UNAME_MACHINE"-dec-osf"`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`"
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	exitcode=$?
+	trap '' 0
+	exit $exitcode ;;
+    Amiga*:UNIX_System_V:4.0:*)
+	echo m68k-unknown-sysv4
+	exit ;;
+    *:[Aa]miga[Oo][Ss]:*:*)
+	echo "$UNAME_MACHINE"-unknown-amigaos
+	exit ;;
+    *:[Mm]orph[Oo][Ss]:*:*)
+	echo "$UNAME_MACHINE"-unknown-morphos
+	exit ;;
+    *:OS/390:*:*)
+	echo i370-ibm-openedition
+	exit ;;
+    *:z/VM:*:*)
+	echo s390-ibm-zvmoe
+	exit ;;
+    *:OS400:*:*)
+	echo powerpc-ibm-os400
+	exit ;;
+    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+	echo arm-acorn-riscix"$UNAME_RELEASE"
+	exit ;;
+    arm*:riscos:*:*|arm*:RISCOS:*:*)
+	echo arm-unknown-riscos
+	exit ;;
+    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
+	echo hppa1.1-hitachi-hiuxmpp
+	exit ;;
+    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+	if test "`(/bin/universe) 2>/dev/null`" = att ; then
+		echo pyramid-pyramid-sysv3
+	else
+		echo pyramid-pyramid-bsd
+	fi
+	exit ;;
+    NILE*:*:*:dcosx)
+	echo pyramid-pyramid-svr4
+	exit ;;
+    DRS?6000:unix:4.0:6*)
+	echo sparc-icl-nx6
+	exit ;;
+    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
+	case `/usr/bin/uname -p` in
+	    sparc) echo sparc-icl-nx7; exit ;;
+	esac ;;
+    s390x:SunOS:*:*)
+	echo "$UNAME_MACHINE"-ibm-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`"
+	exit ;;
+    sun4H:SunOS:5.*:*)
+	echo sparc-hal-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+	exit ;;
+    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+	echo sparc-sun-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`"
+	exit ;;
+    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
+	echo i386-pc-auroraux"$UNAME_RELEASE"
+	exit ;;
+    i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
+	eval "$set_cc_for_build"
+	SUN_ARCH=i386
+	# If there is a compiler, see if it is configured for 64-bit objects.
+	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
+	# This test works for both compilers.
+	if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
+		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		grep IS_64BIT_ARCH >/dev/null
+	    then
+		SUN_ARCH=x86_64
+	    fi
+	fi
+	echo "$SUN_ARCH"-pc-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+	exit ;;
+    sun4*:SunOS:6*:*)
+	# According to config.sub, this is the proper way to canonicalize
+	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
+	# it's likely to be more like Solaris than SunOS4.
+	echo sparc-sun-solaris3"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+	exit ;;
+    sun4*:SunOS:*:*)
+	case "`/usr/bin/arch -k`" in
+	    Series*|S4*)
+		UNAME_RELEASE=`uname -v`
+		;;
+	esac
+	# Japanese Language versions have a version number like `4.1.3-JL'.
+	echo sparc-sun-sunos"`echo "$UNAME_RELEASE"|sed -e 's/-/_/'`"
+	exit ;;
+    sun3*:SunOS:*:*)
+	echo m68k-sun-sunos"$UNAME_RELEASE"
+	exit ;;
+    sun*:*:4.2BSD:*)
+	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+	test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3
+	case "`/bin/arch`" in
+	    sun3)
+		echo m68k-sun-sunos"$UNAME_RELEASE"
+		;;
+	    sun4)
+		echo sparc-sun-sunos"$UNAME_RELEASE"
+		;;
+	esac
+	exit ;;
+    aushp:SunOS:*:*)
+	echo sparc-auspex-sunos"$UNAME_RELEASE"
+	exit ;;
+    # The situation for MiNT is a little confusing.  The machine name
+    # can be virtually everything (everything which is not
+    # "atarist" or "atariste" at least should have a processor
+    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
+    # to the lowercase version "mint" (or "freemint").  Finally
+    # the system name "TOS" denotes a system which is actually not
+    # MiNT.  But MiNT is downward compatible to TOS, so this should
+    # be no problem.
+    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+	echo m68k-atari-mint"$UNAME_RELEASE"
+	exit ;;
+    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+	echo m68k-atari-mint"$UNAME_RELEASE"
+	exit ;;
+    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+	echo m68k-atari-mint"$UNAME_RELEASE"
+	exit ;;
+    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+	echo m68k-milan-mint"$UNAME_RELEASE"
+	exit ;;
+    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+	echo m68k-hades-mint"$UNAME_RELEASE"
+	exit ;;
+    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+	echo m68k-unknown-mint"$UNAME_RELEASE"
+	exit ;;
+    m68k:machten:*:*)
+	echo m68k-apple-machten"$UNAME_RELEASE"
+	exit ;;
+    powerpc:machten:*:*)
+	echo powerpc-apple-machten"$UNAME_RELEASE"
+	exit ;;
+    RISC*:Mach:*:*)
+	echo mips-dec-mach_bsd4.3
+	exit ;;
+    RISC*:ULTRIX:*:*)
+	echo mips-dec-ultrix"$UNAME_RELEASE"
+	exit ;;
+    VAX*:ULTRIX*:*:*)
+	echo vax-dec-ultrix"$UNAME_RELEASE"
+	exit ;;
+    2020:CLIX:*:* | 2430:CLIX:*:*)
+	echo clipper-intergraph-clix"$UNAME_RELEASE"
+	exit ;;
+    mips:*:*:UMIPS | mips:*:*:RISCos)
+	eval "$set_cc_for_build"
+	sed 's/^	//' << EOF > "$dummy.c"
+#ifdef __cplusplus
+#include <stdio.h>  /* for printf() prototype */
+	int main (int argc, char *argv[]) {
+#else
+	int main (argc, argv) int argc; char *argv[]; {
+#endif
+	#if defined (host_mips) && defined (MIPSEB)
+	#if defined (SYSTYPE_SYSV)
+	  printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_SVR4)
+	  printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
+	  printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0);
+	#endif
+	#endif
+	  exit (-1);
+	}
+EOF
+	$CC_FOR_BUILD -o "$dummy" "$dummy.c" &&
+	  dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+	  SYSTEM_NAME=`"$dummy" "$dummyarg"` &&
+	    { echo "$SYSTEM_NAME"; exit; }
+	echo mips-mips-riscos"$UNAME_RELEASE"
+	exit ;;
+    Motorola:PowerMAX_OS:*:*)
+	echo powerpc-motorola-powermax
+	exit ;;
+    Motorola:*:4.3:PL8-*)
+	echo powerpc-harris-powermax
+	exit ;;
+    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
+	echo powerpc-harris-powermax
+	exit ;;
+    Night_Hawk:Power_UNIX:*:*)
+	echo powerpc-harris-powerunix
+	exit ;;
+    m88k:CX/UX:7*:*)
+	echo m88k-harris-cxux7
+	exit ;;
+    m88k:*:4*:R4*)
+	echo m88k-motorola-sysv4
+	exit ;;
+    m88k:*:3*:R3*)
+	echo m88k-motorola-sysv3
+	exit ;;
+    AViiON:dgux:*:*)
+	# DG/UX returns AViiON for all architectures
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	if [ "$UNAME_PROCESSOR" = mc88100 ] || [ "$UNAME_PROCESSOR" = mc88110 ]
+	then
+	    if [ "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx ] || \
+	       [ "$TARGET_BINARY_INTERFACE"x = x ]
+	    then
+		echo m88k-dg-dgux"$UNAME_RELEASE"
+	    else
+		echo m88k-dg-dguxbcs"$UNAME_RELEASE"
+	    fi
+	else
+	    echo i586-dg-dgux"$UNAME_RELEASE"
+	fi
+	exit ;;
+    M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
+	echo m88k-dolphin-sysv3
+	exit ;;
+    M88*:*:R3*:*)
+	# Delta 88k system running SVR3
+	echo m88k-motorola-sysv3
+	exit ;;
+    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+	echo m88k-tektronix-sysv3
+	exit ;;
+    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+	echo m68k-tektronix-bsd
+	exit ;;
+    *:IRIX*:*:*)
+	echo mips-sgi-irix"`echo "$UNAME_RELEASE"|sed -e 's/-/_/g'`"
+	exit ;;
+    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
+	exit ;;               # Note that: echo "'`uname -s`'" gives 'AIX '
+    i*86:AIX:*:*)
+	echo i386-ibm-aix
+	exit ;;
+    ia64:AIX:*:*)
+	if [ -x /usr/bin/oslevel ] ; then
+		IBM_REV=`/usr/bin/oslevel`
+	else
+		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+	fi
+	echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV"
+	exit ;;
+    *:AIX:2:3)
+	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+		eval "$set_cc_for_build"
+		sed 's/^		//' << EOF > "$dummy.c"
+		#include <sys/systemcfg.h>
+
+		main()
+			{
+			if (!__power_pc())
+				exit(1);
+			puts("powerpc-ibm-aix3.2.5");
+			exit(0);
+			}
+EOF
+		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"`
+		then
+			echo "$SYSTEM_NAME"
+		else
+			echo rs6000-ibm-aix3.2.5
+		fi
+	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+		echo rs6000-ibm-aix3.2.4
+	else
+		echo rs6000-ibm-aix3.2
+	fi
+	exit ;;
+    *:AIX:*:[4567])
+	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
+	if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then
+		IBM_ARCH=rs6000
+	else
+		IBM_ARCH=powerpc
+	fi
+	if [ -x /usr/bin/lslpp ] ; then
+		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc |
+			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
+	else
+		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+	fi
+	echo "$IBM_ARCH"-ibm-aix"$IBM_REV"
+	exit ;;
+    *:AIX:*:*)
+	echo rs6000-ibm-aix
+	exit ;;
+    ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
+	echo romp-ibm-bsd4.4
+	exit ;;
+    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
+	echo romp-ibm-bsd"$UNAME_RELEASE"   # 4.3 with uname added to
+	exit ;;                             # report: romp-ibm BSD 4.3
+    *:BOSX:*:*)
+	echo rs6000-bull-bosx
+	exit ;;
+    DPX/2?00:B.O.S.:*:*)
+	echo m68k-bull-sysv3
+	exit ;;
+    9000/[34]??:4.3bsd:1.*:*)
+	echo m68k-hp-bsd
+	exit ;;
+    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+	echo m68k-hp-bsd4.4
+	exit ;;
+    9000/[34678]??:HP-UX:*:*)
+	HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'`
+	case "$UNAME_MACHINE" in
+	    9000/31?)            HP_ARCH=m68000 ;;
+	    9000/[34]??)         HP_ARCH=m68k ;;
+	    9000/[678][0-9][0-9])
+		if [ -x /usr/bin/getconf ]; then
+		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case "$sc_cpu_version" in
+		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
+		      532)                      # CPU_PA_RISC2_0
+			case "$sc_kernel_bits" in
+			  32) HP_ARCH=hppa2.0n ;;
+			  64) HP_ARCH=hppa2.0w ;;
+			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
+			esac ;;
+		    esac
+		fi
+		if [ "$HP_ARCH" = "" ]; then
+		    eval "$set_cc_for_build"
+		    sed 's/^		//' << EOF > "$dummy.c"
+
+		#define _HPUX_SOURCE
+		#include <stdlib.h>
+		#include <unistd.h>
+
+		int main ()
+		{
+		#if defined(_SC_KERNEL_BITS)
+		    long bits = sysconf(_SC_KERNEL_BITS);
+		#endif
+		    long cpu  = sysconf (_SC_CPU_VERSION);
+
+		    switch (cpu)
+			{
+			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+			case CPU_PA_RISC2_0:
+		#if defined(_SC_KERNEL_BITS)
+			    switch (bits)
+				{
+				case 64: puts ("hppa2.0w"); break;
+				case 32: puts ("hppa2.0n"); break;
+				default: puts ("hppa2.0"); break;
+				} break;
+		#else  /* !defined(_SC_KERNEL_BITS) */
+			    puts ("hppa2.0"); break;
+		#endif
+			default: puts ("hppa1.0"); break;
+			}
+		    exit (0);
+		}
+EOF
+		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"`
+		    test -z "$HP_ARCH" && HP_ARCH=hppa
+		fi ;;
+	esac
+	if [ "$HP_ARCH" = hppa2.0w ]
+	then
+	    eval "$set_cc_for_build"
+
+	    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
+	    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
+	    # generating 64-bit code.  GNU and HP use different nomenclature:
+	    #
+	    # $ CC_FOR_BUILD=cc ./config.guess
+	    # => hppa2.0w-hp-hpux11.23
+	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
+	    # => hppa64-hp-hpux11.23
+
+	    if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) |
+		grep -q __LP64__
+	    then
+		HP_ARCH=hppa2.0w
+	    else
+		HP_ARCH=hppa64
+	    fi
+	fi
+	echo "$HP_ARCH"-hp-hpux"$HPUX_REV"
+	exit ;;
+    ia64:HP-UX:*:*)
+	HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'`
+	echo ia64-hp-hpux"$HPUX_REV"
+	exit ;;
+    3050*:HI-UX:*:*)
+	eval "$set_cc_for_build"
+	sed 's/^	//' << EOF > "$dummy.c"
+	#include <unistd.h>
+	int
+	main ()
+	{
+	  long cpu = sysconf (_SC_CPU_VERSION);
+	  /* The order matters, because CPU_IS_HP_MC68K erroneously returns
+	     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
+	     results, however.  */
+	  if (CPU_IS_PA_RISC (cpu))
+	    {
+	      switch (cpu)
+		{
+		  case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
+		  default: puts ("hppa-hitachi-hiuxwe2"); break;
+		}
+	    }
+	  else if (CPU_IS_HP_MC68K (cpu))
+	    puts ("m68k-hitachi-hiuxwe2");
+	  else puts ("unknown-hitachi-hiuxwe2");
+	  exit (0);
+	}
+EOF
+	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` &&
+		{ echo "$SYSTEM_NAME"; exit; }
+	echo unknown-hitachi-hiuxwe2
+	exit ;;
+    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
+	echo hppa1.1-hp-bsd
+	exit ;;
+    9000/8??:4.3bsd:*:*)
+	echo hppa1.0-hp-bsd
+	exit ;;
+    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
+	echo hppa1.0-hp-mpeix
+	exit ;;
+    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
+	echo hppa1.1-hp-osf
+	exit ;;
+    hp8??:OSF1:*:*)
+	echo hppa1.0-hp-osf
+	exit ;;
+    i*86:OSF1:*:*)
+	if [ -x /usr/sbin/sysversion ] ; then
+	    echo "$UNAME_MACHINE"-unknown-osf1mk
+	else
+	    echo "$UNAME_MACHINE"-unknown-osf1
+	fi
+	exit ;;
+    parisc*:Lites*:*:*)
+	echo hppa1.1-hp-lites
+	exit ;;
+    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+	echo c1-convex-bsd
+	exit ;;
+    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+	if getsysinfo -f scalar_acc
+	then echo c32-convex-bsd
+	else echo c2-convex-bsd
+	fi
+	exit ;;
+    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+	echo c34-convex-bsd
+	exit ;;
+    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+	echo c38-convex-bsd
+	exit ;;
+    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+	echo c4-convex-bsd
+	exit ;;
+    CRAY*Y-MP:*:*:*)
+	echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*[A-Z]90:*:*:*)
+	echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \
+	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
+	      -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*TS:*:*:*)
+	echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*T3E:*:*:*)
+	echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*SV1:*:*:*)
+	echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    *:UNICOS/mp:*:*)
+	echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
+	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'`
+	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
+    5000:UNIX_System_V:4.*:*)
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
+	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
+    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+	echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE"
+	exit ;;
+    sparc*:BSD/OS:*:*)
+	echo sparc-unknown-bsdi"$UNAME_RELEASE"
+	exit ;;
+    *:BSD/OS:*:*)
+	echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
+	exit ;;
+    *:FreeBSD:*:*)
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	case "$UNAME_PROCESSOR" in
+	    amd64)
+		UNAME_PROCESSOR=x86_64 ;;
+	    i386)
+		UNAME_PROCESSOR=i586 ;;
+	esac
+	echo "$UNAME_PROCESSOR"-unknown-freebsd"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`"
+	exit ;;
+    i*:CYGWIN*:*)
+	echo "$UNAME_MACHINE"-pc-cygwin
+	exit ;;
+    *:MINGW64*:*)
+	echo "$UNAME_MACHINE"-pc-mingw64
+	exit ;;
+    *:MINGW*:*)
+	echo "$UNAME_MACHINE"-pc-mingw32
+	exit ;;
+    *:MSYS*:*)
+	echo "$UNAME_MACHINE"-pc-msys
+	exit ;;
+    i*:PW*:*)
+	echo "$UNAME_MACHINE"-pc-pw32
+	exit ;;
+    *:Interix*:*)
+	case "$UNAME_MACHINE" in
+	    x86)
+		echo i586-pc-interix"$UNAME_RELEASE"
+		exit ;;
+	    authenticamd | genuineintel | EM64T)
+		echo x86_64-unknown-interix"$UNAME_RELEASE"
+		exit ;;
+	    IA64)
+		echo ia64-unknown-interix"$UNAME_RELEASE"
+		exit ;;
+	esac ;;
+    i*:UWIN*:*)
+	echo "$UNAME_MACHINE"-pc-uwin
+	exit ;;
+    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
+	echo x86_64-unknown-cygwin
+	exit ;;
+    prep*:SunOS:5.*:*)
+	echo powerpcle-unknown-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+	exit ;;
+    *:GNU:*:*)
+	# the GNU system
+	echo "`echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,'`-unknown-$LIBC`echo "$UNAME_RELEASE"|sed -e 's,/.*$,,'`"
+	exit ;;
+    *:GNU/*:*:*)
+	# other systems with GNU libc and userland
+	echo "$UNAME_MACHINE-unknown-`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`-$LIBC"
+	exit ;;
+    i*86:Minix:*:*)
+	echo "$UNAME_MACHINE"-pc-minix
+	exit ;;
+    aarch64:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    aarch64_be:Linux:*:*)
+	UNAME_MACHINE=aarch64_be
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    alpha:Linux:*:*)
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	  EV5)   UNAME_MACHINE=alphaev5 ;;
+	  EV56)  UNAME_MACHINE=alphaev56 ;;
+	  PCA56) UNAME_MACHINE=alphapca56 ;;
+	  PCA57) UNAME_MACHINE=alphapca56 ;;
+	  EV6)   UNAME_MACHINE=alphaev6 ;;
+	  EV67)  UNAME_MACHINE=alphaev67 ;;
+	  EV68*) UNAME_MACHINE=alphaev68 ;;
+	esac
+	objdump --private-headers /bin/sh | grep -q ld.so.1
+	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    arc:Linux:*:* | arceb:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    arm*:Linux:*:*)
+	eval "$set_cc_for_build"
+	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
+	    | grep -q __ARM_EABI__
+	then
+	    echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	else
+	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+		| grep -q __ARM_PCS_VFP
+	    then
+		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi
+	    else
+		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf
+	    fi
+	fi
+	exit ;;
+    avr32*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    cris:Linux:*:*)
+	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
+	exit ;;
+    crisv32:Linux:*:*)
+	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
+	exit ;;
+    e2k:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    frv:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    hexagon:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    i*86:Linux:*:*)
+	echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
+	exit ;;
+    ia64:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    k1om:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    m32r*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    m68*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    mips:Linux:*:* | mips64:Linux:*:*)
+	eval "$set_cc_for_build"
+	sed 's/^	//' << EOF > "$dummy.c"
+	#undef CPU
+	#undef ${UNAME_MACHINE}
+	#undef ${UNAME_MACHINE}el
+	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+	CPU=${UNAME_MACHINE}el
+	#else
+	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+	CPU=${UNAME_MACHINE}
+	#else
+	CPU=
+	#endif
+	#endif
+EOF
+	eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU'`"
+	test "x$CPU" != x && { echo "$CPU-unknown-linux-$LIBC"; exit; }
+	;;
+    mips64el:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    openrisc*:Linux:*:*)
+	echo or1k-unknown-linux-"$LIBC"
+	exit ;;
+    or32:Linux:*:* | or1k*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    padre:Linux:*:*)
+	echo sparc-unknown-linux-"$LIBC"
+	exit ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+	echo hppa64-unknown-linux-"$LIBC"
+	exit ;;
+    parisc:Linux:*:* | hppa:Linux:*:*)
+	# Look for CPU level
+	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+	  PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;;
+	  PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;;
+	  *)    echo hppa-unknown-linux-"$LIBC" ;;
+	esac
+	exit ;;
+    ppc64:Linux:*:*)
+	echo powerpc64-unknown-linux-"$LIBC"
+	exit ;;
+    ppc:Linux:*:*)
+	echo powerpc-unknown-linux-"$LIBC"
+	exit ;;
+    ppc64le:Linux:*:*)
+	echo powerpc64le-unknown-linux-"$LIBC"
+	exit ;;
+    ppcle:Linux:*:*)
+	echo powerpcle-unknown-linux-"$LIBC"
+	exit ;;
+    riscv32:Linux:*:* | riscv64:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    s390:Linux:*:* | s390x:Linux:*:*)
+	echo "$UNAME_MACHINE"-ibm-linux-"$LIBC"
+	exit ;;
+    sh64*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    sh*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    sparc:Linux:*:* | sparc64:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    tile*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    vax:Linux:*:*)
+	echo "$UNAME_MACHINE"-dec-linux-"$LIBC"
+	exit ;;
+    x86_64:Linux:*:*)
+	echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
+	exit ;;
+    xtensa*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    i*86:DYNIX/ptx:4*:*)
+	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
+	# earlier versions are messed up and put the nodename in both
+	# sysname and nodename.
+	echo i386-sequent-sysv4
+	exit ;;
+    i*86:UNIX_SV:4.2MP:2.*)
+	# Unixware is an offshoot of SVR4, but it has its own version
+	# number series starting with 2...
+	# I am not positive that other SVR4 systems won't match this,
+	# I just have to hope.  -- rms.
+	# Use sysv4.2uw... so that sysv4* matches it.
+	echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION"
+	exit ;;
+    i*86:OS/2:*:*)
+	# If we were able to find `uname', then EMX Unix compatibility
+	# is probably installed.
+	echo "$UNAME_MACHINE"-pc-os2-emx
+	exit ;;
+    i*86:XTS-300:*:STOP)
+	echo "$UNAME_MACHINE"-unknown-stop
+	exit ;;
+    i*86:atheos:*:*)
+	echo "$UNAME_MACHINE"-unknown-atheos
+	exit ;;
+    i*86:syllable:*:*)
+	echo "$UNAME_MACHINE"-pc-syllable
+	exit ;;
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
+	echo i386-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    i*86:*DOS:*:*)
+	echo "$UNAME_MACHINE"-pc-msdosdjgpp
+	exit ;;
+    i*86:*:4.*:*)
+	UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'`
+	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+		echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL"
+	else
+		echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL"
+	fi
+	exit ;;
+    i*86:*:5:[678]*)
+	# UnixWare 7.x, OpenUNIX and OpenServer 6.
+	case `/bin/uname -X | grep "^Machine"` in
+	    *486*)	     UNAME_MACHINE=i486 ;;
+	    *Pentium)	     UNAME_MACHINE=i586 ;;
+	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
+	esac
+	echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}{$UNAME_VERSION}"
+	exit ;;
+    i*86:*:3.2:*)
+	if test -f /usr/options/cb.name; then
+		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+		echo "$UNAME_MACHINE"-pc-isc"$UNAME_REL"
+	elif /bin/uname -X 2>/dev/null >/dev/null ; then
+		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
+		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
+			&& UNAME_MACHINE=i586
+		(/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL"
+	else
+		echo "$UNAME_MACHINE"-pc-sysv32
+	fi
+	exit ;;
+    pc:*:*:*)
+	# Left here for compatibility:
+	# uname -m prints for DJGPP always 'pc', but it prints nothing about
+	# the processor, so we play safe by assuming i586.
+	# Note: whatever this is, it MUST be the same as what config.sub
+	# prints for the "djgpp" host, or else GDB configure will decide that
+	# this is a cross-build.
+	echo i586-pc-msdosdjgpp
+	exit ;;
+    Intel:Mach:3*:*)
+	echo i386-pc-mach3
+	exit ;;
+    paragon:*:*:*)
+	echo i860-intel-osf1
+	exit ;;
+    i860:*:4.*:*) # i860-SVR4
+	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+	  echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4
+	else # Add other i860-SVR4 vendors below as they are discovered.
+	  echo i860-unknown-sysv"$UNAME_RELEASE"  # Unknown i860-SVR4
+	fi
+	exit ;;
+    mini*:CTIX:SYS*5:*)
+	# "miniframe"
+	echo m68010-convergent-sysv
+	exit ;;
+    mc68k:UNIX:SYSTEM5:3.51m)
+	echo m68k-convergent-sysv
+	exit ;;
+    M680?0:D-NIX:5.3:*)
+	echo m68k-diab-dnix
+	exit ;;
+    M68*:*:R3V[5678]*:*)
+	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
+    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
+	OS_REL=''
+	test -r /etc/.relid \
+	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	  && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
+    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4; exit; } ;;
+    NCR*:*:4.2:* | MPRAS*:*:4.2:*)
+	OS_REL='.3'
+	test -r /etc/.relid \
+	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	    && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
+	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
+    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
+	echo m68k-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    mc68030:UNIX_System_V:4.*:*)
+	echo m68k-atari-sysv4
+	exit ;;
+    TSUNAMI:LynxOS:2.*:*)
+	echo sparc-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    rs6000:LynxOS:2.*:*)
+	echo rs6000-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
+	echo powerpc-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    SM[BE]S:UNIX_SV:*:*)
+	echo mips-dde-sysv"$UNAME_RELEASE"
+	exit ;;
+    RM*:ReliantUNIX-*:*:*)
+	echo mips-sni-sysv4
+	exit ;;
+    RM*:SINIX-*:*:*)
+	echo mips-sni-sysv4
+	exit ;;
+    *:SINIX-*:*:*)
+	if uname -p 2>/dev/null >/dev/null ; then
+		UNAME_MACHINE=`(uname -p) 2>/dev/null`
+		echo "$UNAME_MACHINE"-sni-sysv4
+	else
+		echo ns32k-sni-sysv
+	fi
+	exit ;;
+    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+			# says <Richard.M.Bartel@ccMail.Census.GOV>
+	echo i586-unisys-sysv4
+	exit ;;
+    *:UNIX_System_V:4*:FTX*)
+	# From Gerald Hewes <hewes@openmarket.com>.
+	# How about differentiating between stratus architectures? -djm
+	echo hppa1.1-stratus-sysv4
+	exit ;;
+    *:*:*:FTX*)
+	# From seanf@swdc.stratus.com.
+	echo i860-stratus-sysv4
+	exit ;;
+    i*86:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo "$UNAME_MACHINE"-stratus-vos
+	exit ;;
+    *:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo hppa1.1-stratus-vos
+	exit ;;
+    mc68*:A/UX:*:*)
+	echo m68k-apple-aux"$UNAME_RELEASE"
+	exit ;;
+    news*:NEWS-OS:6*:*)
+	echo mips-sony-newsos6
+	exit ;;
+    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+	if [ -d /usr/nec ]; then
+		echo mips-nec-sysv"$UNAME_RELEASE"
+	else
+		echo mips-unknown-sysv"$UNAME_RELEASE"
+	fi
+	exit ;;
+    BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
+	echo powerpc-be-beos
+	exit ;;
+    BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
+	echo powerpc-apple-beos
+	exit ;;
+    BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
+	echo i586-pc-beos
+	exit ;;
+    BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
+	echo i586-pc-haiku
+	exit ;;
+    x86_64:Haiku:*:*)
+	echo x86_64-unknown-haiku
+	exit ;;
+    SX-4:SUPER-UX:*:*)
+	echo sx4-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-5:SUPER-UX:*:*)
+	echo sx5-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-6:SUPER-UX:*:*)
+	echo sx6-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-7:SUPER-UX:*:*)
+	echo sx7-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-8:SUPER-UX:*:*)
+	echo sx8-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-8R:SUPER-UX:*:*)
+	echo sx8r-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-ACE:SUPER-UX:*:*)
+	echo sxace-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    Power*:Rhapsody:*:*)
+	echo powerpc-apple-rhapsody"$UNAME_RELEASE"
+	exit ;;
+    *:Rhapsody:*:*)
+	echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
+	exit ;;
+    *:Darwin:*:*)
+	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
+	eval "$set_cc_for_build"
+	if test "$UNAME_PROCESSOR" = unknown ; then
+	    UNAME_PROCESSOR=powerpc
+	fi
+	if test "`echo "$UNAME_RELEASE" | sed -e 's/\..*//'`" -le 10 ; then
+	    if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+		if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		       (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		       grep IS_64BIT_ARCH >/dev/null
+		then
+		    case $UNAME_PROCESSOR in
+			i386) UNAME_PROCESSOR=x86_64 ;;
+			powerpc) UNAME_PROCESSOR=powerpc64 ;;
+		    esac
+		fi
+		# On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
+		if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
+		       (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		       grep IS_PPC >/dev/null
+		then
+		    UNAME_PROCESSOR=powerpc
+		fi
+	    fi
+	elif test "$UNAME_PROCESSOR" = i386 ; then
+	    # Avoid executing cc on OS X 10.9, as it ships with a stub
+	    # that puts up a graphical alert prompting to install
+	    # developer tools.  Any system running Mac OS X 10.7 or
+	    # later (Darwin 11 and later) is required to have a 64-bit
+	    # processor. This is not true of the ARM version of Darwin
+	    # that Apple uses in portable devices.
+	    UNAME_PROCESSOR=x86_64
+	fi
+	echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
+	exit ;;
+    *:procnto*:*:* | *:QNX:[0123456789]*:*)
+	UNAME_PROCESSOR=`uname -p`
+	if test "$UNAME_PROCESSOR" = x86; then
+		UNAME_PROCESSOR=i386
+		UNAME_MACHINE=pc
+	fi
+	echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE"
+	exit ;;
+    *:QNX:*:4*)
+	echo i386-pc-qnx
+	exit ;;
+    NEO-*:NONSTOP_KERNEL:*:*)
+	echo neo-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSE-*:NONSTOP_KERNEL:*:*)
+	echo nse-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSR-*:NONSTOP_KERNEL:*:*)
+	echo nsr-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSV-*:NONSTOP_KERNEL:*:*)
+	echo nsv-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSX-*:NONSTOP_KERNEL:*:*)
+	echo nsx-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    *:NonStop-UX:*:*)
+	echo mips-compaq-nonstopux
+	exit ;;
+    BS2000:POSIX*:*:*)
+	echo bs2000-siemens-sysv
+	exit ;;
+    DS/*:UNIX_System_V:*:*)
+	echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE"
+	exit ;;
+    *:Plan9:*:*)
+	# "uname -m" is not consistent, so use $cputype instead. 386
+	# is converted to i386 for consistency with other x86
+	# operating systems.
+	if test "$cputype" = 386; then
+	    UNAME_MACHINE=i386
+	else
+	    UNAME_MACHINE="$cputype"
+	fi
+	echo "$UNAME_MACHINE"-unknown-plan9
+	exit ;;
+    *:TOPS-10:*:*)
+	echo pdp10-unknown-tops10
+	exit ;;
+    *:TENEX:*:*)
+	echo pdp10-unknown-tenex
+	exit ;;
+    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
+	echo pdp10-dec-tops20
+	exit ;;
+    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
+	echo pdp10-xkl-tops20
+	exit ;;
+    *:TOPS-20:*:*)
+	echo pdp10-unknown-tops20
+	exit ;;
+    *:ITS:*:*)
+	echo pdp10-unknown-its
+	exit ;;
+    SEI:*:*:SEIUX)
+	echo mips-sei-seiux"$UNAME_RELEASE"
+	exit ;;
+    *:DragonFly:*:*)
+	echo "$UNAME_MACHINE"-unknown-dragonfly"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`"
+	exit ;;
+    *:*VMS:*:*)
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	case "$UNAME_MACHINE" in
+	    A*) echo alpha-dec-vms ; exit ;;
+	    I*) echo ia64-dec-vms ; exit ;;
+	    V*) echo vax-dec-vms ; exit ;;
+	esac ;;
+    *:XENIX:*:SysV)
+	echo i386-pc-xenix
+	exit ;;
+    i*86:skyos:*:*)
+	echo "$UNAME_MACHINE"-pc-skyos"`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`"
+	exit ;;
+    i*86:rdos:*:*)
+	echo "$UNAME_MACHINE"-pc-rdos
+	exit ;;
+    i*86:AROS:*:*)
+	echo "$UNAME_MACHINE"-pc-aros
+	exit ;;
+    x86_64:VMkernel:*:*)
+	echo "$UNAME_MACHINE"-unknown-esx
+	exit ;;
+    amd64:Isilon\ OneFS:*:*)
+	echo x86_64-unknown-onefs
+	exit ;;
+esac
+
+echo "$0: unable to guess system type" >&2
+
+case "$UNAME_MACHINE:$UNAME_SYSTEM" in
+    mips:Linux | mips64:Linux)
+	# If we got here on MIPS GNU/Linux, output extra information.
+	cat >&2 <<EOF
+
+NOTE: MIPS GNU/Linux systems require a C compiler to fully recognize
+the system type. Please install a C compiler and try again.
+EOF
+	;;
+esac
+
+cat >&2 <<EOF
+
+This script (version $timestamp), has failed to recognize the
+operating system you are using. If your script is old, overwrite *all*
+copies of config.guess and config.sub with the latest versions from:
+
+  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+and
+  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+
+If $0 has already been updated, send the following data and any
+information you think might be pertinent to config-patches@gnu.org to
+provide the necessary information to handle your system.
+
+config.guess timestamp = $timestamp
+
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
+
+hostinfo               = `(hostinfo) 2>/dev/null`
+/bin/universe          = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch              = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+
+UNAME_MACHINE = "$UNAME_MACHINE"
+UNAME_RELEASE = "$UNAME_RELEASE"
+UNAME_SYSTEM  = "$UNAME_SYSTEM"
+UNAME_VERSION = "$UNAME_VERSION"
+EOF
+
+exit 1
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/config.sub b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/config.sub
new file mode 100755
index 000000000..9ccf09a7a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/config.sub
@@ -0,0 +1,1801 @@
+#! /bin/sh
+# Configuration validation subroutine script.
+#   Copyright 1992-2018 Free Software Foundation, Inc.
+
+timestamp='2018-03-08'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <https://www.gnu.org/licenses/>.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that
+# program.  This Exception is an additional permission under section 7
+# of the GNU General Public License, version 3 ("GPLv3").
+
+
+# Please send patches to <config-patches@gnu.org>.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+
+# You can get the latest version of this script from:
+# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support.  The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
+
+Canonicalize a configuration name.
+
+Options:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.sub ($timestamp)
+
+Copyright 1992-2018 Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help"
+       exit 1 ;;
+
+    *local*)
+       # First pass through any local machine types.
+       echo "$1"
+       exit ;;
+
+    * )
+       break ;;
+  esac
+done
+
+case $# in
+ 0) echo "$me: missing argument$help" >&2
+    exit 1;;
+ 1) ;;
+ *) echo "$me: too many arguments$help" >&2
+    exit 1;;
+esac
+
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
+  linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+  knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \
+  kopensolaris*-gnu* | cloudabi*-eabi* | \
+  storm-chaos* | os2-emx* | rtmk-nova*)
+    os=-$maybe_os
+    basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+    ;;
+  android-linux)
+    os=-linux-android
+    basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+    ;;
+  *)
+    basic_machine=`echo "$1" | sed 's/-[^-]*$//'`
+    if [ "$basic_machine" != "$1" ]
+    then os=`echo "$1" | sed 's/.*-/-/'`
+    else os=; fi
+    ;;
+esac
+
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work.  We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+	-sun*os*)
+		# Prevent following clause from handling this invalid input.
+		;;
+	-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+	-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+	-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+	-apple | -axis | -knuth | -cray | -microblaze*)
+		os=
+		basic_machine=$1
+		;;
+	-bluegene*)
+		os=-cnk
+		;;
+	-sim | -cisco | -oki | -wec | -winbond)
+		os=
+		basic_machine=$1
+		;;
+	-scout)
+		;;
+	-wrs)
+		os=-vxworks
+		basic_machine=$1
+		;;
+	-chorusos*)
+		os=-chorusos
+		basic_machine=$1
+		;;
+	-chorusrdb)
+		os=-chorusrdb
+		basic_machine=$1
+		;;
+	-hiux*)
+		os=-hiuxwe2
+		;;
+	-sco6)
+		os=-sco5v6
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco5)
+		os=-sco3.2v5
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco4)
+		os=-sco3.2v4
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2.[4-9]*)
+		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2v[4-9]*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco5v6*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco*)
+		os=-sco3.2v2
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-udk*)
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-isc)
+		os=-isc2.2
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-clix*)
+		basic_machine=clipper-intergraph
+		;;
+	-isc*)
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-lynx*178)
+		os=-lynxos178
+		;;
+	-lynx*5)
+		os=-lynxos5
+		;;
+	-lynx*)
+		os=-lynxos
+		;;
+	-ptx*)
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-sequent/'`
+		;;
+	-psos*)
+		os=-psos
+		;;
+	-mint | -mint[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+esac
+
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+	# Recognize the basic CPU types without company name.
+	# Some are omitted here because they have special meanings below.
+	1750a | 580 \
+	| a29k \
+	| aarch64 | aarch64_be \
+	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
+	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+	| am33_2.0 \
+	| arc | arceb \
+	| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
+	| avr | avr32 \
+	| ba \
+	| be32 | be64 \
+	| bfin \
+	| c4x | c8051 | clipper \
+	| d10v | d30v | dlx | dsp16xx \
+	| e2k | epiphany \
+	| fido | fr30 | frv | ft32 \
+	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| hexagon \
+	| i370 | i860 | i960 | ia16 | ia64 \
+	| ip2k | iq2000 \
+	| k1om \
+	| le32 | le64 \
+	| lm32 \
+	| m32c | m32r | m32rle | m68000 | m68k | m88k \
+	| maxq | mb | microblaze | microblazeel | mcore | mep | metag \
+	| mips | mipsbe | mipseb | mipsel | mipsle \
+	| mips16 \
+	| mips64 | mips64el \
+	| mips64octeon | mips64octeonel \
+	| mips64orion | mips64orionel \
+	| mips64r5900 | mips64r5900el \
+	| mips64vr | mips64vrel \
+	| mips64vr4100 | mips64vr4100el \
+	| mips64vr4300 | mips64vr4300el \
+	| mips64vr5000 | mips64vr5000el \
+	| mips64vr5900 | mips64vr5900el \
+	| mipsisa32 | mipsisa32el \
+	| mipsisa32r2 | mipsisa32r2el \
+	| mipsisa32r6 | mipsisa32r6el \
+	| mipsisa64 | mipsisa64el \
+	| mipsisa64r2 | mipsisa64r2el \
+	| mipsisa64r6 | mipsisa64r6el \
+	| mipsisa64sb1 | mipsisa64sb1el \
+	| mipsisa64sr71k | mipsisa64sr71kel \
+	| mipsr5900 | mipsr5900el \
+	| mipstx39 | mipstx39el \
+	| mn10200 | mn10300 \
+	| moxie \
+	| mt \
+	| msp430 \
+	| nds32 | nds32le | nds32be \
+	| nios | nios2 | nios2eb | nios2el \
+	| ns16k | ns32k \
+	| open8 | or1k | or1knd | or32 \
+	| pdp10 | pj | pjl \
+	| powerpc | powerpc64 | powerpc64le | powerpcle \
+	| pru \
+	| pyramid \
+	| riscv32 | riscv64 \
+	| rl78 | rx \
+	| score \
+	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
+	| sh64 | sh64le \
+	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
+	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
+	| spu \
+	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
+	| ubicom32 \
+	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
+	| visium \
+	| wasm32 \
+	| x86 | xc16x | xstormy16 | xtensa \
+	| z8k | z80)
+		basic_machine=$basic_machine-unknown
+		;;
+	c54x)
+		basic_machine=tic54x-unknown
+		;;
+	c55x)
+		basic_machine=tic55x-unknown
+		;;
+	c6x)
+		basic_machine=tic6x-unknown
+		;;
+	leon|leon[3-9])
+		basic_machine=sparc-$basic_machine
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65)
+		;;
+	ms1)
+		basic_machine=mt-unknown
+		;;
+
+	strongarm | thumb | xscale)
+		basic_machine=arm-unknown
+		;;
+	xgate)
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	xscaleeb)
+		basic_machine=armeb-unknown
+		;;
+
+	xscaleel)
+		basic_machine=armel-unknown
+		;;
+
+	# We use `pc' rather than `unknown'
+	# because (1) that's what they normally are, and
+	# (2) the word "unknown" tends to confuse beginning users.
+	i*86 | x86_64)
+	  basic_machine=$basic_machine-pc
+	  ;;
+	# Object if more than one company name word.
+	*-*-*)
+		echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2
+		exit 1
+		;;
+	# Recognize the basic CPU types with company name.
+	580-* \
+	| a29k-* \
+	| aarch64-* | aarch64_be-* \
+	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
+	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
+	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
+	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+	| avr-* | avr32-* \
+	| ba-* \
+	| be32-* | be64-* \
+	| bfin-* | bs2000-* \
+	| c[123]* | c30-* | [cjt]90-* | c4x-* \
+	| c8051-* | clipper-* | craynv-* | cydra-* \
+	| d10v-* | d30v-* | dlx-* \
+	| e2k-* | elxsi-* \
+	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
+	| h8300-* | h8500-* \
+	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| hexagon-* \
+	| i*86-* | i860-* | i960-* | ia16-* | ia64-* \
+	| ip2k-* | iq2000-* \
+	| k1om-* \
+	| le32-* | le64-* \
+	| lm32-* \
+	| m32c-* | m32r-* | m32rle-* \
+	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
+	| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
+	| microblaze-* | microblazeel-* \
+	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
+	| mips16-* \
+	| mips64-* | mips64el-* \
+	| mips64octeon-* | mips64octeonel-* \
+	| mips64orion-* | mips64orionel-* \
+	| mips64r5900-* | mips64r5900el-* \
+	| mips64vr-* | mips64vrel-* \
+	| mips64vr4100-* | mips64vr4100el-* \
+	| mips64vr4300-* | mips64vr4300el-* \
+	| mips64vr5000-* | mips64vr5000el-* \
+	| mips64vr5900-* | mips64vr5900el-* \
+	| mipsisa32-* | mipsisa32el-* \
+	| mipsisa32r2-* | mipsisa32r2el-* \
+	| mipsisa32r6-* | mipsisa32r6el-* \
+	| mipsisa64-* | mipsisa64el-* \
+	| mipsisa64r2-* | mipsisa64r2el-* \
+	| mipsisa64r6-* | mipsisa64r6el-* \
+	| mipsisa64sb1-* | mipsisa64sb1el-* \
+	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
+	| mipsr5900-* | mipsr5900el-* \
+	| mipstx39-* | mipstx39el-* \
+	| mmix-* \
+	| mt-* \
+	| msp430-* \
+	| nds32-* | nds32le-* | nds32be-* \
+	| nios-* | nios2-* | nios2eb-* | nios2el-* \
+	| none-* | np1-* | ns16k-* | ns32k-* \
+	| open8-* \
+	| or1k*-* \
+	| orion-* \
+	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
+	| pru-* \
+	| pyramid-* \
+	| riscv32-* | riscv64-* \
+	| rl78-* | romp-* | rs6000-* | rx-* \
+	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
+	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
+	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
+	| sparclite-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \
+	| tahoe-* \
+	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+	| tile*-* \
+	| tron-* \
+	| ubicom32-* \
+	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
+	| vax-* \
+	| visium-* \
+	| wasm32-* \
+	| we32k-* \
+	| x86-* | x86_64-* | xc16x-* | xps100-* \
+	| xstormy16-* | xtensa*-* \
+	| ymp-* \
+	| z8k-* | z80-*)
+		;;
+	# Recognize the basic CPU types without company name, with glob match.
+	xtensa*)
+		basic_machine=$basic_machine-unknown
+		;;
+	# Recognize the various machine names and aliases which stand
+	# for a CPU type and a company and sometimes even an OS.
+	386bsd)
+		basic_machine=i386-pc
+		os=-bsd
+		;;
+	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+		basic_machine=m68000-att
+		;;
+	3b*)
+		basic_machine=we32k-att
+		;;
+	a29khif)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	abacus)
+		basic_machine=abacus-unknown
+		;;
+	adobe68k)
+		basic_machine=m68010-adobe
+		os=-scout
+		;;
+	alliant | fx80)
+		basic_machine=fx80-alliant
+		;;
+	altos | altos3068)
+		basic_machine=m68k-altos
+		;;
+	am29k)
+		basic_machine=a29k-none
+		os=-bsd
+		;;
+	amd64)
+		basic_machine=x86_64-pc
+		;;
+	amd64-*)
+		basic_machine=x86_64-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	amdahl)
+		basic_machine=580-amdahl
+		os=-sysv
+		;;
+	amiga | amiga-*)
+		basic_machine=m68k-unknown
+		;;
+	amigaos | amigados)
+		basic_machine=m68k-unknown
+		os=-amigaos
+		;;
+	amigaunix | amix)
+		basic_machine=m68k-unknown
+		os=-sysv4
+		;;
+	apollo68)
+		basic_machine=m68k-apollo
+		os=-sysv
+		;;
+	apollo68bsd)
+		basic_machine=m68k-apollo
+		os=-bsd
+		;;
+	aros)
+		basic_machine=i386-pc
+		os=-aros
+		;;
+	asmjs)
+		basic_machine=asmjs-unknown
+		;;
+	aux)
+		basic_machine=m68k-apple
+		os=-aux
+		;;
+	balance)
+		basic_machine=ns32k-sequent
+		os=-dynix
+		;;
+	blackfin)
+		basic_machine=bfin-unknown
+		os=-linux
+		;;
+	blackfin-*)
+		basic_machine=bfin-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	bluegene*)
+		basic_machine=powerpc-ibm
+		os=-cnk
+		;;
+	c54x-*)
+		basic_machine=tic54x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	c55x-*)
+		basic_machine=tic55x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	c6x-*)
+		basic_machine=tic6x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	c90)
+		basic_machine=c90-cray
+		os=-unicos
+		;;
+	cegcc)
+		basic_machine=arm-unknown
+		os=-cegcc
+		;;
+	convex-c1)
+		basic_machine=c1-convex
+		os=-bsd
+		;;
+	convex-c2)
+		basic_machine=c2-convex
+		os=-bsd
+		;;
+	convex-c32)
+		basic_machine=c32-convex
+		os=-bsd
+		;;
+	convex-c34)
+		basic_machine=c34-convex
+		os=-bsd
+		;;
+	convex-c38)
+		basic_machine=c38-convex
+		os=-bsd
+		;;
+	cray | j90)
+		basic_machine=j90-cray
+		os=-unicos
+		;;
+	craynv)
+		basic_machine=craynv-cray
+		os=-unicosmp
+		;;
+	cr16 | cr16-*)
+		basic_machine=cr16-unknown
+		os=-elf
+		;;
+	crds | unos)
+		basic_machine=m68k-crds
+		;;
+	crisv32 | crisv32-* | etraxfs*)
+		basic_machine=crisv32-axis
+		;;
+	cris | cris-* | etrax*)
+		basic_machine=cris-axis
+		;;
+	crx)
+		basic_machine=crx-unknown
+		os=-elf
+		;;
+	da30 | da30-*)
+		basic_machine=m68k-da30
+		;;
+	decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+		basic_machine=mips-dec
+		;;
+	decsystem10* | dec10*)
+		basic_machine=pdp10-dec
+		os=-tops10
+		;;
+	decsystem20* | dec20*)
+		basic_machine=pdp10-dec
+		os=-tops20
+		;;
+	delta | 3300 | motorola-3300 | motorola-delta \
+	      | 3300-motorola | delta-motorola)
+		basic_machine=m68k-motorola
+		;;
+	delta88)
+		basic_machine=m88k-motorola
+		os=-sysv3
+		;;
+	dicos)
+		basic_machine=i686-pc
+		os=-dicos
+		;;
+	djgpp)
+		basic_machine=i586-pc
+		os=-msdosdjgpp
+		;;
+	dpx20 | dpx20-*)
+		basic_machine=rs6000-bull
+		os=-bosx
+		;;
+	dpx2*)
+		basic_machine=m68k-bull
+		os=-sysv3
+		;;
+	e500v[12])
+		basic_machine=powerpc-unknown
+		os=$os"spe"
+		;;
+	e500v[12]-*)
+		basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		os=$os"spe"
+		;;
+	ebmon29k)
+		basic_machine=a29k-amd
+		os=-ebmon
+		;;
+	elxsi)
+		basic_machine=elxsi-elxsi
+		os=-bsd
+		;;
+	encore | umax | mmax)
+		basic_machine=ns32k-encore
+		;;
+	es1800 | OSE68k | ose68k | ose | OSE)
+		basic_machine=m68k-ericsson
+		os=-ose
+		;;
+	fx2800)
+		basic_machine=i860-alliant
+		;;
+	genix)
+		basic_machine=ns32k-ns
+		;;
+	gmicro)
+		basic_machine=tron-gmicro
+		os=-sysv
+		;;
+	go32)
+		basic_machine=i386-pc
+		os=-go32
+		;;
+	h3050r* | hiux*)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	h8300hms)
+		basic_machine=h8300-hitachi
+		os=-hms
+		;;
+	h8300xray)
+		basic_machine=h8300-hitachi
+		os=-xray
+		;;
+	h8500hms)
+		basic_machine=h8500-hitachi
+		os=-hms
+		;;
+	harris)
+		basic_machine=m88k-harris
+		os=-sysv3
+		;;
+	hp300-*)
+		basic_machine=m68k-hp
+		;;
+	hp300bsd)
+		basic_machine=m68k-hp
+		os=-bsd
+		;;
+	hp300hpux)
+		basic_machine=m68k-hp
+		os=-hpux
+		;;
+	hp3k9[0-9][0-9] | hp9[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k2[0-9][0-9] | hp9k31[0-9])
+		basic_machine=m68000-hp
+		;;
+	hp9k3[2-9][0-9])
+		basic_machine=m68k-hp
+		;;
+	hp9k6[0-9][0-9] | hp6[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k7[0-79][0-9] | hp7[0-79][0-9])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k78[0-9] | hp78[0-9])
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][13679] | hp8[0-9][13679])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][0-9] | hp8[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hppaosf)
+		basic_machine=hppa1.1-hp
+		os=-osf
+		;;
+	hppro)
+		basic_machine=hppa1.1-hp
+		os=-proelf
+		;;
+	i370-ibm* | ibm*)
+		basic_machine=i370-ibm
+		;;
+	i*86v32)
+		basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+		os=-sysv32
+		;;
+	i*86v4*)
+		basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+		os=-sysv4
+		;;
+	i*86v)
+		basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+		os=-sysv
+		;;
+	i*86sol2)
+		basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+		os=-solaris2
+		;;
+	i386mach)
+		basic_machine=i386-mach
+		os=-mach
+		;;
+	vsta)
+		basic_machine=i386-unknown
+		os=-vsta
+		;;
+	iris | iris4d)
+		basic_machine=mips-sgi
+		case $os in
+		    -irix*)
+			;;
+		    *)
+			os=-irix4
+			;;
+		esac
+		;;
+	isi68 | isi)
+		basic_machine=m68k-isi
+		os=-sysv
+		;;
+	leon-*|leon[3-9]-*)
+		basic_machine=sparc-`echo "$basic_machine" | sed 's/-.*//'`
+		;;
+	m68knommu)
+		basic_machine=m68k-unknown
+		os=-linux
+		;;
+	m68knommu-*)
+		basic_machine=m68k-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	magnum | m3230)
+		basic_machine=mips-mips
+		os=-sysv
+		;;
+	merlin)
+		basic_machine=ns32k-utek
+		os=-sysv
+		;;
+	microblaze*)
+		basic_machine=microblaze-xilinx
+		;;
+	mingw64)
+		basic_machine=x86_64-pc
+		os=-mingw64
+		;;
+	mingw32)
+		basic_machine=i686-pc
+		os=-mingw32
+		;;
+	mingw32ce)
+		basic_machine=arm-unknown
+		os=-mingw32ce
+		;;
+	miniframe)
+		basic_machine=m68000-convergent
+		;;
+	*mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+	mips3*-*)
+		basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`
+		;;
+	mips3*)
+		basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`-unknown
+		;;
+	monitor)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	morphos)
+		basic_machine=powerpc-unknown
+		os=-morphos
+		;;
+	moxiebox)
+		basic_machine=moxie-unknown
+		os=-moxiebox
+		;;
+	msdos)
+		basic_machine=i386-pc
+		os=-msdos
+		;;
+	ms1-*)
+		basic_machine=`echo "$basic_machine" | sed -e 's/ms1-/mt-/'`
+		;;
+	msys)
+		basic_machine=i686-pc
+		os=-msys
+		;;
+	mvs)
+		basic_machine=i370-ibm
+		os=-mvs
+		;;
+	nacl)
+		basic_machine=le32-unknown
+		os=-nacl
+		;;
+	ncr3000)
+		basic_machine=i486-ncr
+		os=-sysv4
+		;;
+	netbsd386)
+		basic_machine=i386-unknown
+		os=-netbsd
+		;;
+	netwinder)
+		basic_machine=armv4l-rebel
+		os=-linux
+		;;
+	news | news700 | news800 | news900)
+		basic_machine=m68k-sony
+		os=-newsos
+		;;
+	news1000)
+		basic_machine=m68030-sony
+		os=-newsos
+		;;
+	news-3600 | risc-news)
+		basic_machine=mips-sony
+		os=-newsos
+		;;
+	necv70)
+		basic_machine=v70-nec
+		os=-sysv
+		;;
+	next | m*-next)
+		basic_machine=m68k-next
+		case $os in
+		    -nextstep* )
+			;;
+		    -ns2*)
+		      os=-nextstep2
+			;;
+		    *)
+		      os=-nextstep3
+			;;
+		esac
+		;;
+	nh3000)
+		basic_machine=m68k-harris
+		os=-cxux
+		;;
+	nh[45]000)
+		basic_machine=m88k-harris
+		os=-cxux
+		;;
+	nindy960)
+		basic_machine=i960-intel
+		os=-nindy
+		;;
+	mon960)
+		basic_machine=i960-intel
+		os=-mon960
+		;;
+	nonstopux)
+		basic_machine=mips-compaq
+		os=-nonstopux
+		;;
+	np1)
+		basic_machine=np1-gould
+		;;
+	neo-tandem)
+		basic_machine=neo-tandem
+		;;
+	nse-tandem)
+		basic_machine=nse-tandem
+		;;
+	nsr-tandem)
+		basic_machine=nsr-tandem
+		;;
+	nsv-tandem)
+		basic_machine=nsv-tandem
+		;;
+	nsx-tandem)
+		basic_machine=nsx-tandem
+		;;
+	op50n-* | op60c-*)
+		basic_machine=hppa1.1-oki
+		os=-proelf
+		;;
+	openrisc | openrisc-*)
+		basic_machine=or32-unknown
+		;;
+	os400)
+		basic_machine=powerpc-ibm
+		os=-os400
+		;;
+	OSE68000 | ose68000)
+		basic_machine=m68000-ericsson
+		os=-ose
+		;;
+	os68k)
+		basic_machine=m68k-none
+		os=-os68k
+		;;
+	pa-hitachi)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	paragon)
+		basic_machine=i860-intel
+		os=-osf
+		;;
+	parisc)
+		basic_machine=hppa-unknown
+		os=-linux
+		;;
+	parisc-*)
+		basic_machine=hppa-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	pbd)
+		basic_machine=sparc-tti
+		;;
+	pbb)
+		basic_machine=m68k-tti
+		;;
+	pc532 | pc532-*)
+		basic_machine=ns32k-pc532
+		;;
+	pc98)
+		basic_machine=i386-pc
+		;;
+	pc98-*)
+		basic_machine=i386-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pentium | p5 | k5 | k6 | nexgen | viac3)
+		basic_machine=i586-pc
+		;;
+	pentiumpro | p6 | 6x86 | athlon | athlon_*)
+		basic_machine=i686-pc
+		;;
+	pentiumii | pentium2 | pentiumiii | pentium3)
+		basic_machine=i686-pc
+		;;
+	pentium4)
+		basic_machine=i786-pc
+		;;
+	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+		basic_machine=i586-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pentiumpro-* | p6-* | 6x86-* | athlon-*)
+		basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+		basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pentium4-*)
+		basic_machine=i786-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pn)
+		basic_machine=pn-gould
+		;;
+	power)	basic_machine=power-ibm
+		;;
+	ppc | ppcbe)	basic_machine=powerpc-unknown
+		;;
+	ppc-* | ppcbe-*)
+		basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	ppcle | powerpclittle)
+		basic_machine=powerpcle-unknown
+		;;
+	ppcle-* | powerpclittle-*)
+		basic_machine=powerpcle-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	ppc64)	basic_machine=powerpc64-unknown
+		;;
+	ppc64-*) basic_machine=powerpc64-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	ppc64le | powerpc64little)
+		basic_machine=powerpc64le-unknown
+		;;
+	ppc64le-* | powerpc64little-*)
+		basic_machine=powerpc64le-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	ps2)
+		basic_machine=i386-ibm
+		;;
+	pw32)
+		basic_machine=i586-unknown
+		os=-pw32
+		;;
+	rdos | rdos64)
+		basic_machine=x86_64-pc
+		os=-rdos
+		;;
+	rdos32)
+		basic_machine=i386-pc
+		os=-rdos
+		;;
+	rom68k)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	rm[46]00)
+		basic_machine=mips-siemens
+		;;
+	rtpc | rtpc-*)
+		basic_machine=romp-ibm
+		;;
+	s390 | s390-*)
+		basic_machine=s390-ibm
+		;;
+	s390x | s390x-*)
+		basic_machine=s390x-ibm
+		;;
+	sa29200)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	sb1)
+		basic_machine=mipsisa64sb1-unknown
+		;;
+	sb1el)
+		basic_machine=mipsisa64sb1el-unknown
+		;;
+	sde)
+		basic_machine=mipsisa32-sde
+		os=-elf
+		;;
+	sei)
+		basic_machine=mips-sei
+		os=-seiux
+		;;
+	sequent)
+		basic_machine=i386-sequent
+		;;
+	sh5el)
+		basic_machine=sh5le-unknown
+		;;
+	simso-wrs)
+		basic_machine=sparclite-wrs
+		os=-vxworks
+		;;
+	sps7)
+		basic_machine=m68k-bull
+		os=-sysv2
+		;;
+	spur)
+		basic_machine=spur-unknown
+		;;
+	st2000)
+		basic_machine=m68k-tandem
+		;;
+	stratus)
+		basic_machine=i860-stratus
+		os=-sysv4
+		;;
+	strongarm-* | thumb-*)
+		basic_machine=arm-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	sun2)
+		basic_machine=m68000-sun
+		;;
+	sun2os3)
+		basic_machine=m68000-sun
+		os=-sunos3
+		;;
+	sun2os4)
+		basic_machine=m68000-sun
+		os=-sunos4
+		;;
+	sun3os3)
+		basic_machine=m68k-sun
+		os=-sunos3
+		;;
+	sun3os4)
+		basic_machine=m68k-sun
+		os=-sunos4
+		;;
+	sun4os3)
+		basic_machine=sparc-sun
+		os=-sunos3
+		;;
+	sun4os4)
+		basic_machine=sparc-sun
+		os=-sunos4
+		;;
+	sun4sol2)
+		basic_machine=sparc-sun
+		os=-solaris2
+		;;
+	sun3 | sun3-*)
+		basic_machine=m68k-sun
+		;;
+	sun4)
+		basic_machine=sparc-sun
+		;;
+	sun386 | sun386i | roadrunner)
+		basic_machine=i386-sun
+		;;
+	sv1)
+		basic_machine=sv1-cray
+		os=-unicos
+		;;
+	symmetry)
+		basic_machine=i386-sequent
+		os=-dynix
+		;;
+	t3e)
+		basic_machine=alphaev5-cray
+		os=-unicos
+		;;
+	t90)
+		basic_machine=t90-cray
+		os=-unicos
+		;;
+	tile*)
+		basic_machine=$basic_machine-unknown
+		os=-linux-gnu
+		;;
+	tx39)
+		basic_machine=mipstx39-unknown
+		;;
+	tx39el)
+		basic_machine=mipstx39el-unknown
+		;;
+	toad1)
+		basic_machine=pdp10-xkl
+		os=-tops20
+		;;
+	tower | tower-32)
+		basic_machine=m68k-ncr
+		;;
+	tpf)
+		basic_machine=s390x-ibm
+		os=-tpf
+		;;
+	udi29k)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	ultra3)
+		basic_machine=a29k-nyu
+		os=-sym1
+		;;
+	v810 | necv810)
+		basic_machine=v810-nec
+		os=-none
+		;;
+	vaxv)
+		basic_machine=vax-dec
+		os=-sysv
+		;;
+	vms)
+		basic_machine=vax-dec
+		os=-vms
+		;;
+	vpp*|vx|vx-*)
+		basic_machine=f301-fujitsu
+		;;
+	vxworks960)
+		basic_machine=i960-wrs
+		os=-vxworks
+		;;
+	vxworks68)
+		basic_machine=m68k-wrs
+		os=-vxworks
+		;;
+	vxworks29k)
+		basic_machine=a29k-wrs
+		os=-vxworks
+		;;
+	w65*)
+		basic_machine=w65-wdc
+		os=-none
+		;;
+	w89k-*)
+		basic_machine=hppa1.1-winbond
+		os=-proelf
+		;;
+	x64)
+		basic_machine=x86_64-pc
+		;;
+	xbox)
+		basic_machine=i686-pc
+		os=-mingw32
+		;;
+	xps | xps100)
+		basic_machine=xps100-honeywell
+		;;
+	xscale-* | xscalee[bl]-*)
+		basic_machine=`echo "$basic_machine" | sed 's/^xscale/arm/'`
+		;;
+	ymp)
+		basic_machine=ymp-cray
+		os=-unicos
+		;;
+	none)
+		basic_machine=none-none
+		os=-none
+		;;
+
+# Here we handle the default manufacturer of certain CPU types.  It is in
+# some cases the only manufacturer, in others, it is the most popular.
+	w89k)
+		basic_machine=hppa1.1-winbond
+		;;
+	op50n)
+		basic_machine=hppa1.1-oki
+		;;
+	op60c)
+		basic_machine=hppa1.1-oki
+		;;
+	romp)
+		basic_machine=romp-ibm
+		;;
+	mmix)
+		basic_machine=mmix-knuth
+		;;
+	rs6000)
+		basic_machine=rs6000-ibm
+		;;
+	vax)
+		basic_machine=vax-dec
+		;;
+	pdp11)
+		basic_machine=pdp11-dec
+		;;
+	we32k)
+		basic_machine=we32k-att
+		;;
+	sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
+		basic_machine=sh-unknown
+		;;
+	cydra)
+		basic_machine=cydra-cydrome
+		;;
+	orion)
+		basic_machine=orion-highlevel
+		;;
+	orion105)
+		basic_machine=clipper-highlevel
+		;;
+	mac | mpw | mac-mpw)
+		basic_machine=m68k-apple
+		;;
+	pmac | pmac-mpw)
+		basic_machine=powerpc-apple
+		;;
+	*-unknown)
+		# Make sure to match an already-canonicalized machine name.
+		;;
+	*)
+		echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2
+		exit 1
+		;;
+esac
+
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+	*-digital*)
+		basic_machine=`echo "$basic_machine" | sed 's/digital.*/dec/'`
+		;;
+	*-commodore*)
+		basic_machine=`echo "$basic_machine" | sed 's/commodore.*/cbm/'`
+		;;
+	*)
+		;;
+esac
+
+# Decode manufacturer-specific aliases for certain operating systems.
+
+if [ x"$os" != x"" ]
+then
+case $os in
+	# First match some system type aliases that might get confused
+	# with valid system types.
+	# -solaris* is a basic system type, with this one exception.
+	-auroraux)
+		os=-auroraux
+		;;
+	-solaris1 | -solaris1.*)
+		os=`echo $os | sed -e 's|solaris1|sunos4|'`
+		;;
+	-solaris)
+		os=-solaris2
+		;;
+	-unixware*)
+		os=-sysv4.2uw
+		;;
+	-gnu/linux*)
+		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+		;;
+	# es1800 is here to avoid being matched by es* (a different OS)
+	-es1800*)
+		os=-ose
+		;;
+	# Now accept the basic system types.
+	# The portable systems comes first.
+	# Each alternative MUST end in a * to match a version number.
+	# -sysv* is not here because it comes later, after sysvr4.
+	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
+	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
+	      | -sym* | -kopensolaris* | -plan9* \
+	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+	      | -aos* | -aros* | -cloudabi* | -sortix* \
+	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+	      | -hiux* | -knetbsd* | -mirbsd* | -netbsd* \
+	      | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \
+	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
+	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* | -hcos* \
+	      | -chorusos* | -chorusrdb* | -cegcc* | -glidix* \
+	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+	      | -linux-newlib* | -linux-musl* | -linux-uclibc* \
+	      | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
+	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* \
+	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
+	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
+	      | -morphos* | -superux* | -rtmk* | -windiss* \
+	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
+	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
+	      | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox* | -bme* \
+	      | -midnightbsd*)
+	# Remember, each alternative MUST END IN *, to match a version number.
+		;;
+	-qnx*)
+		case $basic_machine in
+		    x86-* | i*86-*)
+			;;
+		    *)
+			os=-nto$os
+			;;
+		esac
+		;;
+	-nto-qnx*)
+		;;
+	-nto*)
+		os=`echo $os | sed -e 's|nto|nto-qnx|'`
+		;;
+	-sim | -xray | -os68k* | -v88r* \
+	      | -windows* | -osx | -abug | -netware* | -os9* \
+	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+		;;
+	-mac*)
+		os=`echo "$os" | sed -e 's|mac|macos|'`
+		;;
+	-linux-dietlibc)
+		os=-linux-dietlibc
+		;;
+	-linux*)
+		os=`echo $os | sed -e 's|linux|linux-gnu|'`
+		;;
+	-sunos5*)
+		os=`echo "$os" | sed -e 's|sunos5|solaris2|'`
+		;;
+	-sunos6*)
+		os=`echo "$os" | sed -e 's|sunos6|solaris3|'`
+		;;
+	-opened*)
+		os=-openedition
+		;;
+	-os400*)
+		os=-os400
+		;;
+	-wince*)
+		os=-wince
+		;;
+	-utek*)
+		os=-bsd
+		;;
+	-dynix*)
+		os=-bsd
+		;;
+	-acis*)
+		os=-aos
+		;;
+	-atheos*)
+		os=-atheos
+		;;
+	-syllable*)
+		os=-syllable
+		;;
+	-386bsd)
+		os=-bsd
+		;;
+	-ctix* | -uts*)
+		os=-sysv
+		;;
+	-nova*)
+		os=-rtmk-nova
+		;;
+	-ns2)
+		os=-nextstep2
+		;;
+	-nsk*)
+		os=-nsk
+		;;
+	# Preserve the version number of sinix5.
+	-sinix5.*)
+		os=`echo $os | sed -e 's|sinix|sysv|'`
+		;;
+	-sinix*)
+		os=-sysv4
+		;;
+	-tpf*)
+		os=-tpf
+		;;
+	-triton*)
+		os=-sysv3
+		;;
+	-oss*)
+		os=-sysv3
+		;;
+	-svr4*)
+		os=-sysv4
+		;;
+	-svr3)
+		os=-sysv3
+		;;
+	-sysvr4)
+		os=-sysv4
+		;;
+	# This must come after -sysvr4.
+	-sysv*)
+		;;
+	-ose*)
+		os=-ose
+		;;
+	-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+		os=-mint
+		;;
+	-zvmoe)
+		os=-zvmoe
+		;;
+	-dicos*)
+		os=-dicos
+		;;
+	-pikeos*)
+		# Until real need of OS specific support for
+		# particular features comes up, bare metal
+		# configurations are quite functional.
+		case $basic_machine in
+		    arm*)
+			os=-eabi
+			;;
+		    *)
+			os=-elf
+			;;
+		esac
+		;;
+	-nacl*)
+		;;
+	-ios)
+		;;
+	-none)
+		;;
+	*)
+		# Get rid of the `-' at the beginning of $os.
+		os=`echo $os | sed 's/[^-]*-//'`
+		echo Invalid configuration \`"$1"\': system \`"$os"\' not recognized 1>&2
+		exit 1
+		;;
+esac
+else
+
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system.  Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+
+case $basic_machine in
+	score-*)
+		os=-elf
+		;;
+	spu-*)
+		os=-elf
+		;;
+	*-acorn)
+		os=-riscix1.2
+		;;
+	arm*-rebel)
+		os=-linux
+		;;
+	arm*-semi)
+		os=-aout
+		;;
+	c4x-* | tic4x-*)
+		os=-coff
+		;;
+	c8051-*)
+		os=-elf
+		;;
+	hexagon-*)
+		os=-elf
+		;;
+	tic54x-*)
+		os=-coff
+		;;
+	tic55x-*)
+		os=-coff
+		;;
+	tic6x-*)
+		os=-coff
+		;;
+	# This must come before the *-dec entry.
+	pdp10-*)
+		os=-tops20
+		;;
+	pdp11-*)
+		os=-none
+		;;
+	*-dec | vax-*)
+		os=-ultrix4.2
+		;;
+	m68*-apollo)
+		os=-domain
+		;;
+	i386-sun)
+		os=-sunos4.0.2
+		;;
+	m68000-sun)
+		os=-sunos3
+		;;
+	m68*-cisco)
+		os=-aout
+		;;
+	mep-*)
+		os=-elf
+		;;
+	mips*-cisco)
+		os=-elf
+		;;
+	mips*-*)
+		os=-elf
+		;;
+	or32-*)
+		os=-coff
+		;;
+	*-tti)	# must be before sparc entry or we get the wrong os.
+		os=-sysv3
+		;;
+	sparc-* | *-sun)
+		os=-sunos4.1.1
+		;;
+	pru-*)
+		os=-elf
+		;;
+	*-be)
+		os=-beos
+		;;
+	*-ibm)
+		os=-aix
+		;;
+	*-knuth)
+		os=-mmixware
+		;;
+	*-wec)
+		os=-proelf
+		;;
+	*-winbond)
+		os=-proelf
+		;;
+	*-oki)
+		os=-proelf
+		;;
+	*-hp)
+		os=-hpux
+		;;
+	*-hitachi)
+		os=-hiux
+		;;
+	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+		os=-sysv
+		;;
+	*-cbm)
+		os=-amigaos
+		;;
+	*-dg)
+		os=-dgux
+		;;
+	*-dolphin)
+		os=-sysv3
+		;;
+	m68k-ccur)
+		os=-rtu
+		;;
+	m88k-omron*)
+		os=-luna
+		;;
+	*-next)
+		os=-nextstep
+		;;
+	*-sequent)
+		os=-ptx
+		;;
+	*-crds)
+		os=-unos
+		;;
+	*-ns)
+		os=-genix
+		;;
+	i370-*)
+		os=-mvs
+		;;
+	*-gould)
+		os=-sysv
+		;;
+	*-highlevel)
+		os=-bsd
+		;;
+	*-encore)
+		os=-bsd
+		;;
+	*-sgi)
+		os=-irix
+		;;
+	*-siemens)
+		os=-sysv4
+		;;
+	*-masscomp)
+		os=-rtu
+		;;
+	f30[01]-fujitsu | f700-fujitsu)
+		os=-uxpv
+		;;
+	*-rom68k)
+		os=-coff
+		;;
+	*-*bug)
+		os=-coff
+		;;
+	*-apple)
+		os=-macos
+		;;
+	*-atari*)
+		os=-mint
+		;;
+	*)
+		os=-none
+		;;
+esac
+fi
+
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer.  We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+	*-unknown)
+		case $os in
+			-riscix*)
+				vendor=acorn
+				;;
+			-sunos*)
+				vendor=sun
+				;;
+			-cnk*|-aix*)
+				vendor=ibm
+				;;
+			-beos*)
+				vendor=be
+				;;
+			-hpux*)
+				vendor=hp
+				;;
+			-mpeix*)
+				vendor=hp
+				;;
+			-hiux*)
+				vendor=hitachi
+				;;
+			-unos*)
+				vendor=crds
+				;;
+			-dgux*)
+				vendor=dg
+				;;
+			-luna*)
+				vendor=omron
+				;;
+			-genix*)
+				vendor=ns
+				;;
+			-mvs* | -opened*)
+				vendor=ibm
+				;;
+			-os400*)
+				vendor=ibm
+				;;
+			-ptx*)
+				vendor=sequent
+				;;
+			-tpf*)
+				vendor=ibm
+				;;
+			-vxsim* | -vxworks* | -windiss*)
+				vendor=wrs
+				;;
+			-aux*)
+				vendor=apple
+				;;
+			-hms*)
+				vendor=hitachi
+				;;
+			-mpw* | -macos*)
+				vendor=apple
+				;;
+			-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+				vendor=atari
+				;;
+			-vos*)
+				vendor=stratus
+				;;
+		esac
+		basic_machine=`echo "$basic_machine" | sed "s/unknown/$vendor/"`
+		;;
+esac
+
+echo "$basic_machine$os"
+exit
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/configure b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/configure
new file mode 100755
index 000000000..ed0b4faa0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/configure
@@ -0,0 +1,6161 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.69 for hpl 2.3.
+#
+# Report bugs to <hpl@icl.utk.edu>.
+#
+#
+# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
+#
+#
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+    && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='print -r --'
+  as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='printf %s\n'
+  as_echo_n='printf %s'
+else
+  if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+    as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+    as_echo_n='/usr/ucb/echo -n'
+  else
+    as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+    as_echo_n_body='eval
+      arg=$1;
+      case $arg in #(
+      *"$as_nl"*)
+	expr "X$arg" : "X\\(.*\\)$as_nl";
+	arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+      esac;
+      expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+    '
+    export as_echo_n_body
+    as_echo_n='sh -c $as_echo_n_body as_echo'
+  fi
+  export as_echo_body
+  as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh).  But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there.  '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+# Use a proper internal environment variable to ensure we don't fall
+  # into an infinite loop, continuously re-executing ourselves.
+  if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
+    _as_can_reexec=no; export _as_can_reexec;
+    # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+as_fn_exit 255
+  fi
+  # We don't want this to propagate to other subprocesses.
+          { _as_can_reexec=; unset _as_can_reexec;}
+if test "x$CONFIG_SHELL" = x; then
+  as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '\${1+\"\$@\"}'='\"\$@\"'
+  setopt NO_GLOB_SUBST
+else
+  case \`(set -o) 2>/dev/null\` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+"
+  as_required="as_fn_return () { (exit \$1); }
+as_fn_success () { as_fn_return 0; }
+as_fn_failure () { as_fn_return 1; }
+as_fn_ret_success () { return 0; }
+as_fn_ret_failure () { return 1; }
+
+exitcode=0
+as_fn_success || { exitcode=1; echo as_fn_success failed.; }
+as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }
+as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }
+as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }
+if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
+
+else
+  exitcode=1; echo positional parameters were not saved.
+fi
+test x\$exitcode = x0 || exit 1
+test -x / || exit 1"
+  as_suggested="  as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
+  as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
+  eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
+  test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1
+test \$(( 1 + 1 )) = 2 || exit 1"
+  if (eval "$as_required") 2>/dev/null; then :
+  as_have_required=yes
+else
+  as_have_required=no
+fi
+  if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then :
+
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+as_found=false
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  as_found=:
+  case $as_dir in #(
+	 /*)
+	   for as_base in sh bash ksh sh5; do
+	     # Try only shells that exist, to save several forks.
+	     as_shell=$as_dir/$as_base
+	     if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+		    { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then :
+  CONFIG_SHELL=$as_shell as_have_required=yes
+		   if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then :
+  break 2
+fi
+fi
+	   done;;
+       esac
+  as_found=false
+done
+$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } &&
+	      { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then :
+  CONFIG_SHELL=$SHELL as_have_required=yes
+fi; }
+IFS=$as_save_IFS
+
+
+      if test "x$CONFIG_SHELL" != x; then :
+  export CONFIG_SHELL
+             # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
+fi
+
+    if test x$as_have_required = xno; then :
+  $as_echo "$0: This script requires a shell more modern than all"
+  $as_echo "$0: the shells that I found on your system."
+  if test x${ZSH_VERSION+set} = xset ; then
+    $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should"
+    $as_echo "$0: be upgraded to zsh 4.3.4 or later."
+  else
+    $as_echo "$0: Please tell bug-autoconf@gnu.org and hpl@icl.utk.edu
+$0: about your system, including any error possibly output
+$0: before this message. Then install a modern shell, or
+$0: manually run the script under such a shell if you do
+$0: have one."
+  fi
+  exit 1
+fi
+fi
+fi
+SHELL=${CONFIG_SHELL-/bin/sh}
+export SHELL
+# Unset more variables known to interfere with behavior of common tools.
+CLICOLOR_FORCE= GREP_OPTIONS=
+unset CLICOLOR_FORCE GREP_OPTIONS
+
+## --------------------- ##
+## M4sh Shell Functions. ##
+## --------------------- ##
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else
+  as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else
+  as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  }
+fi # as_fn_arith
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+  fi
+  $as_echo "$as_me: error: $2" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+
+  as_lineno_1=$LINENO as_lineno_1a=$LINENO
+  as_lineno_2=$LINENO as_lineno_2a=$LINENO
+  eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
+  test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
+  # Blame Lee E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
+    sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
+      N
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+      t loop
+      s/-\n.*//
+    ' >$as_me.lineno &&
+  chmod +x "$as_me.lineno" ||
+    { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
+
+  # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
+  # already done that, so ensure we don't try to do so again and fall
+  # in an infinite loop.  This has already happened in practice.
+  _as_can_reexec=no; export _as_can_reexec
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
+  # Exit status is that of the last command.
+  exit
+}
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='	';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -pR'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -pR'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -pR'
+  fi
+else
+  as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+test -n "$DJDIR" || exec 7<&0 </dev/null
+exec 6>&1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+
+# Identity of this package.
+PACKAGE_NAME='hpl'
+PACKAGE_TARNAME='hpl'
+PACKAGE_VERSION='2.3'
+PACKAGE_STRING='hpl 2.3'
+PACKAGE_BUGREPORT='hpl@icl.utk.edu'
+PACKAGE_URL=''
+
+ac_unique_file="include/hpl.h"
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# ifdef HAVE_STDLIB_H
+#  include <stdlib.h>
+# endif
+#endif
+#ifdef HAVE_STRING_H
+# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
+#  include <memory.h>
+# endif
+# include <string.h>
+#endif
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+ac_subst_vars='am__EXEEXT_FALSE
+am__EXEEXT_TRUE
+LTLIBOBJS
+LIBOBJS
+EGREP
+GREP
+CPP
+BLAS_LIBS
+AM_BACKSLASH
+AM_DEFAULT_VERBOSITY
+AM_DEFAULT_V
+AM_V
+am__fastdepCC_FALSE
+am__fastdepCC_TRUE
+CCDEPMODE
+am__nodep
+AMDEPBACKSLASH
+AMDEP_FALSE
+AMDEP_TRUE
+am__include
+DEPDIR
+am__untar
+am__tar
+AMTAR
+am__leading_dot
+SET_MAKE
+AWK
+mkdir_p
+MKDIR_P
+INSTALL_STRIP_PROGRAM
+STRIP
+install_sh
+MAKEINFO
+AUTOHEADER
+AUTOMAKE
+AUTOCONF
+ACLOCAL
+VERSION
+PACKAGE
+CYGPATH_W
+am__isrc
+INSTALL_DATA
+INSTALL_SCRIPT
+INSTALL_PROGRAM
+RANLIB
+OBJEXT
+EXEEXT
+CPPFLAGS
+LDFLAGS
+CFLAGS
+ac_ct_CC
+CC
+MPICC
+target_alias
+host_alias
+build_alias
+LIBS
+ECHO_T
+ECHO_N
+ECHO_C
+DEFS
+mandir
+localedir
+libdir
+psdir
+pdfdir
+dvidir
+htmldir
+infodir
+docdir
+oldincludedir
+includedir
+localstatedir
+sharedstatedir
+sysconfdir
+datadir
+datarootdir
+libexecdir
+sbindir
+bindir
+program_transform_name
+prefix
+exec_prefix
+PACKAGE_URL
+PACKAGE_BUGREPORT
+PACKAGE_STRING
+PACKAGE_VERSION
+PACKAGE_TARNAME
+PACKAGE_NAME
+PATH_SEPARATOR
+SHELL
+am__quote'
+ac_subst_files=''
+ac_user_opts='
+enable_option_checking
+enable_dependency_tracking
+enable_silent_rules
+'
+      ac_precious_vars='build_alias
+host_alias
+target_alias
+MPICC
+CC
+CFLAGS
+LDFLAGS
+LIBS
+CPPFLAGS
+CPP'
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+ac_unrecognized_opts=
+ac_unrecognized_sep=
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+  # If the previous option needs an argument, assign it.
+  if test -n "$ac_prev"; then
+    eval $ac_prev=\$ac_option
+    ac_prev=
+    continue
+  fi
+
+  case $ac_option in
+  *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *=)   ac_optarg= ;;
+  *)    ac_optarg=yes ;;
+  esac
+
+  # Accept the important Cygnus configure options, so we can diagnose typos.
+
+  case $ac_dashdash$ac_option in
+  --)
+    ac_dashdash=yes ;;
+
+  -bindir | --bindir | --bindi | --bind | --bin | --bi)
+    ac_prev=bindir ;;
+  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+    bindir=$ac_optarg ;;
+
+  -build | --build | --buil | --bui | --bu)
+    ac_prev=build_alias ;;
+  -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+    build_alias=$ac_optarg ;;
+
+  -cache-file | --cache-file | --cache-fil | --cache-fi \
+  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+    ac_prev=cache_file ;;
+  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+    cache_file=$ac_optarg ;;
+
+  --config-cache | -C)
+    cache_file=config.cache ;;
+
+  -datadir | --datadir | --datadi | --datad)
+    ac_prev=datadir ;;
+  -datadir=* | --datadir=* | --datadi=* | --datad=*)
+    datadir=$ac_optarg ;;
+
+  -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+  | --dataroo | --dataro | --datar)
+    ac_prev=datarootdir ;;
+  -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+  | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+    datarootdir=$ac_optarg ;;
+
+  -disable-* | --disable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid feature name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=no ;;
+
+  -docdir | --docdir | --docdi | --doc | --do)
+    ac_prev=docdir ;;
+  -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+    docdir=$ac_optarg ;;
+
+  -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+    ac_prev=dvidir ;;
+  -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+    dvidir=$ac_optarg ;;
+
+  -enable-* | --enable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid feature name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=\$ac_optarg ;;
+
+  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+  | --exec | --exe | --ex)
+    ac_prev=exec_prefix ;;
+  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+  | --exec=* | --exe=* | --ex=*)
+    exec_prefix=$ac_optarg ;;
+
+  -gas | --gas | --ga | --g)
+    # Obsolete; use --with-gas.
+    with_gas=yes ;;
+
+  -help | --help | --hel | --he | -h)
+    ac_init_help=long ;;
+  -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+    ac_init_help=recursive ;;
+  -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+    ac_init_help=short ;;
+
+  -host | --host | --hos | --ho)
+    ac_prev=host_alias ;;
+  -host=* | --host=* | --hos=* | --ho=*)
+    host_alias=$ac_optarg ;;
+
+  -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+    ac_prev=htmldir ;;
+  -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+  | --ht=*)
+    htmldir=$ac_optarg ;;
+
+  -includedir | --includedir | --includedi | --included | --include \
+  | --includ | --inclu | --incl | --inc)
+    ac_prev=includedir ;;
+  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+  | --includ=* | --inclu=* | --incl=* | --inc=*)
+    includedir=$ac_optarg ;;
+
+  -infodir | --infodir | --infodi | --infod | --info | --inf)
+    ac_prev=infodir ;;
+  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+    infodir=$ac_optarg ;;
+
+  -libdir | --libdir | --libdi | --libd)
+    ac_prev=libdir ;;
+  -libdir=* | --libdir=* | --libdi=* | --libd=*)
+    libdir=$ac_optarg ;;
+
+  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+  | --libexe | --libex | --libe)
+    ac_prev=libexecdir ;;
+  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+  | --libexe=* | --libex=* | --libe=*)
+    libexecdir=$ac_optarg ;;
+
+  -localedir | --localedir | --localedi | --localed | --locale)
+    ac_prev=localedir ;;
+  -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+    localedir=$ac_optarg ;;
+
+  -localstatedir | --localstatedir | --localstatedi | --localstated \
+  | --localstate | --localstat | --localsta | --localst | --locals)
+    ac_prev=localstatedir ;;
+  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+  | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+    localstatedir=$ac_optarg ;;
+
+  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+    ac_prev=mandir ;;
+  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+    mandir=$ac_optarg ;;
+
+  -nfp | --nfp | --nf)
+    # Obsolete; use --without-fp.
+    with_fp=no ;;
+
+  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+  | --no-cr | --no-c | -n)
+    no_create=yes ;;
+
+  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+    no_recursion=yes ;;
+
+  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+  | --oldin | --oldi | --old | --ol | --o)
+    ac_prev=oldincludedir ;;
+  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+    oldincludedir=$ac_optarg ;;
+
+  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+    ac_prev=prefix ;;
+  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+    prefix=$ac_optarg ;;
+
+  -program-prefix | --program-prefix | --program-prefi | --program-pref \
+  | --program-pre | --program-pr | --program-p)
+    ac_prev=program_prefix ;;
+  -program-prefix=* | --program-prefix=* | --program-prefi=* \
+  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+    program_prefix=$ac_optarg ;;
+
+  -program-suffix | --program-suffix | --program-suffi | --program-suff \
+  | --program-suf | --program-su | --program-s)
+    ac_prev=program_suffix ;;
+  -program-suffix=* | --program-suffix=* | --program-suffi=* \
+  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+    program_suffix=$ac_optarg ;;
+
+  -program-transform-name | --program-transform-name \
+  | --program-transform-nam | --program-transform-na \
+  | --program-transform-n | --program-transform- \
+  | --program-transform | --program-transfor \
+  | --program-transfo | --program-transf \
+  | --program-trans | --program-tran \
+  | --progr-tra | --program-tr | --program-t)
+    ac_prev=program_transform_name ;;
+  -program-transform-name=* | --program-transform-name=* \
+  | --program-transform-nam=* | --program-transform-na=* \
+  | --program-transform-n=* | --program-transform-=* \
+  | --program-transform=* | --program-transfor=* \
+  | --program-transfo=* | --program-transf=* \
+  | --program-trans=* | --program-tran=* \
+  | --progr-tra=* | --program-tr=* | --program-t=*)
+    program_transform_name=$ac_optarg ;;
+
+  -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+    ac_prev=pdfdir ;;
+  -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+    pdfdir=$ac_optarg ;;
+
+  -psdir | --psdir | --psdi | --psd | --ps)
+    ac_prev=psdir ;;
+  -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+    psdir=$ac_optarg ;;
+
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil)
+    silent=yes ;;
+
+  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+    ac_prev=sbindir ;;
+  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+  | --sbi=* | --sb=*)
+    sbindir=$ac_optarg ;;
+
+  -sharedstatedir | --sharedstatedir | --sharedstatedi \
+  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+  | --sharedst | --shareds | --shared | --share | --shar \
+  | --sha | --sh)
+    ac_prev=sharedstatedir ;;
+  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+  | --sha=* | --sh=*)
+    sharedstatedir=$ac_optarg ;;
+
+  -site | --site | --sit)
+    ac_prev=site ;;
+  -site=* | --site=* | --sit=*)
+    site=$ac_optarg ;;
+
+  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+    ac_prev=srcdir ;;
+  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+    srcdir=$ac_optarg ;;
+
+  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+  | --syscon | --sysco | --sysc | --sys | --sy)
+    ac_prev=sysconfdir ;;
+  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+    sysconfdir=$ac_optarg ;;
+
+  -target | --target | --targe | --targ | --tar | --ta | --t)
+    ac_prev=target_alias ;;
+  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+    target_alias=$ac_optarg ;;
+
+  -v | -verbose | --verbose | --verbos | --verbo | --verb)
+    verbose=yes ;;
+
+  -version | --version | --versio | --versi | --vers | -V)
+    ac_init_version=: ;;
+
+  -with-* | --with-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid package name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=\$ac_optarg ;;
+
+  -without-* | --without-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid package name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=no ;;
+
+  --x)
+    # Obsolete; use --with-x.
+    with_x=yes ;;
+
+  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+  | --x-incl | --x-inc | --x-in | --x-i)
+    ac_prev=x_includes ;;
+  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+    x_includes=$ac_optarg ;;
+
+  -x-libraries | --x-libraries | --x-librarie | --x-librari \
+  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+    ac_prev=x_libraries ;;
+  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+    x_libraries=$ac_optarg ;;
+
+  -*) as_fn_error $? "unrecognized option: \`$ac_option'
+Try \`$0 --help' for more information"
+    ;;
+
+  *=*)
+    ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+    # Reject names that are not valid shell variable names.
+    case $ac_envvar in #(
+      '' | [0-9]* | *[!_$as_cr_alnum]* )
+      as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
+    esac
+    eval $ac_envvar=\$ac_optarg
+    export $ac_envvar ;;
+
+  *)
+    # FIXME: should be removed in autoconf 3.0.
+    $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+    expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+      $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+    : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}"
+    ;;
+
+  esac
+done
+
+if test -n "$ac_prev"; then
+  ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+  as_fn_error $? "missing argument to $ac_option"
+fi
+
+if test -n "$ac_unrecognized_opts"; then
+  case $enable_option_checking in
+    no) ;;
+    fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
+    *)     $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
+  esac
+fi
+
+# Check all directory arguments for consistency.
+for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
+		datadir sysconfdir sharedstatedir localstatedir includedir \
+		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+		libdir localedir mandir
+do
+  eval ac_val=\$$ac_var
+  # Remove trailing slashes.
+  case $ac_val in
+    */ )
+      ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'`
+      eval $ac_var=\$ac_val;;
+  esac
+  # Be sure to have absolute directory names.
+  case $ac_val in
+    [\\/$]* | ?:[\\/]* )  continue;;
+    NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+  esac
+  as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+  if test "x$build_alias" = x; then
+    cross_compiling=maybe
+  elif test "x$build_alias" != "x$host_alias"; then
+    cross_compiling=yes
+  fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+  as_fn_error $? "working directory cannot be determined"
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+  as_fn_error $? "pwd does not report name of working directory"
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+  ac_srcdir_defaulted=yes
+  # Try the directory containing this script, then the parent directory.
+  ac_confdir=`$as_dirname -- "$as_myself" ||
+$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_myself" : 'X\(//\)[^/]' \| \
+	 X"$as_myself" : 'X\(//\)$' \| \
+	 X"$as_myself" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_myself" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  srcdir=$ac_confdir
+  if test ! -r "$srcdir/$ac_unique_file"; then
+    srcdir=..
+  fi
+else
+  ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+  test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+  as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
+fi
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+	cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
+	pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+  srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+  eval ac_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_env_${ac_var}_value=\$${ac_var}
+  eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+  # Omit some internal or obsolete options to make the list less imposing.
+  # This message is too long to be a string in the A/UX 3.1 sh.
+  cat <<_ACEOF
+\`configure' configures hpl 2.3 to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE.  See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+  -h, --help              display this help and exit
+      --help=short        display options specific to this package
+      --help=recursive    display the short help of all the included packages
+  -V, --version           display version information and exit
+  -q, --quiet, --silent   do not print \`checking ...' messages
+      --cache-file=FILE   cache test results in FILE [disabled]
+  -C, --config-cache      alias for \`--cache-file=config.cache'
+  -n, --no-create         do not create output files
+      --srcdir=DIR        find the sources in DIR [configure dir or \`..']
+
+Installation directories:
+  --prefix=PREFIX         install architecture-independent files in PREFIX
+                          [$ac_default_prefix]
+  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX
+                          [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc.  You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+  --bindir=DIR            user executables [EPREFIX/bin]
+  --sbindir=DIR           system admin executables [EPREFIX/sbin]
+  --libexecdir=DIR        program executables [EPREFIX/libexec]
+  --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]
+  --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]
+  --localstatedir=DIR     modifiable single-machine data [PREFIX/var]
+  --libdir=DIR            object code libraries [EPREFIX/lib]
+  --includedir=DIR        C header files [PREFIX/include]
+  --oldincludedir=DIR     C header files for non-gcc [/usr/include]
+  --datarootdir=DIR       read-only arch.-independent data root [PREFIX/share]
+  --datadir=DIR           read-only architecture-independent data [DATAROOTDIR]
+  --infodir=DIR           info documentation [DATAROOTDIR/info]
+  --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]
+  --mandir=DIR            man documentation [DATAROOTDIR/man]
+  --docdir=DIR            documentation root [DATAROOTDIR/doc/hpl]
+  --htmldir=DIR           html documentation [DOCDIR]
+  --dvidir=DIR            dvi documentation [DOCDIR]
+  --pdfdir=DIR            pdf documentation [DOCDIR]
+  --psdir=DIR             ps documentation [DOCDIR]
+_ACEOF
+
+  cat <<\_ACEOF
+
+Program names:
+  --program-prefix=PREFIX            prepend PREFIX to installed program names
+  --program-suffix=SUFFIX            append SUFFIX to installed program names
+  --program-transform-name=PROGRAM   run sed PROGRAM on installed program names
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+  case $ac_init_help in
+     short | recursive ) echo "Configuration of hpl 2.3:";;
+   esac
+  cat <<\_ACEOF
+
+Optional Features:
+  --disable-option-checking  ignore unrecognized --enable/--with options
+  --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
+  --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
+  --enable-dependency-tracking
+                          do not reject slow dependency extractors
+  --disable-dependency-tracking
+                          speeds up one-time build
+  --enable-silent-rules   less verbose build output (undo: "make V=1")
+  --disable-silent-rules  verbose build output (undo: "make V=0")
+
+Some influential environment variables:
+  MPICC       MPI C compiler command
+  CC          C compiler command
+  CFLAGS      C compiler flags
+  LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
+              nonstandard directory <lib dir>
+  LIBS        libraries to pass to the linker, e.g. -l<library>
+  CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
+              you have headers in a nonstandard directory <include dir>
+  CPP         C preprocessor
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+Report bugs to <hpl@icl.utk.edu>.
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+  # If there are subdirs, report their specific --help.
+  for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+    test -d "$ac_dir" ||
+      { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } ||
+      continue
+    ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+    cd "$ac_dir" || { ac_status=$?; continue; }
+    # Check for guested configure.
+    if test -f "$ac_srcdir/configure.gnu"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+    elif test -f "$ac_srcdir/configure"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure" --help=recursive
+    else
+      $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+    fi || ac_status=$?
+    cd "$ac_pwd" || { ac_status=$?; break; }
+  done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+  cat <<\_ACEOF
+hpl configure 2.3
+generated by GNU Autoconf 2.69
+
+Copyright (C) 2012 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+  exit
+fi
+
+## ------------------------ ##
+## Autoconf initialization. ##
+## ------------------------ ##
+
+# ac_fn_c_try_compile LINENO
+# --------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext
+  if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_compile
+
+# ac_fn_c_try_link LINENO
+# -----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_link ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext conftest$ac_exeext
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 test -x conftest$ac_exeext
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_retval=1
+fi
+  # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+  # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+  # interfere with the next link command; also delete a directory that is
+  # left behind by Apple's compiler.  We do this before executing the actions.
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_link
+
+# ac_fn_c_check_func LINENO FUNC VAR
+# ----------------------------------
+# Tests whether FUNC exists, setting the cache variable VAR accordingly
+ac_fn_c_check_func ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $2 innocuous_$2
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $2 (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $2
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $2 ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$2 || defined __stub___$2
+choke me
+#endif
+
+int
+main ()
+{
+return $2 ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  eval "$3=yes"
+else
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_func
+
+# ac_fn_c_try_cpp LINENO
+# ----------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_cpp ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } > conftest.i && {
+	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+    ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_cpp
+
+# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
+# the include files in INCLUDES and setting the cache variable VAR
+# accordingly.
+ac_fn_c_check_header_mongrel ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if eval \${$3+:} false; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
+$as_echo_n "checking $2 usability... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_header_compiler=yes
+else
+  ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
+$as_echo_n "checking $2 presence... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <$2>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  ac_header_preproc=yes
+else
+  ac_header_preproc=no
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
+  yes:no: )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+    ;;
+  no:yes:* )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: $2:     check for missing prerequisite headers?" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+( $as_echo "## ------------------------------ ##
+## Report this to hpl@icl.utk.edu ##
+## ------------------------------ ##"
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  eval "$3=\$ac_header_compiler"
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_mongrel
+
+# ac_fn_c_try_run LINENO
+# ----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
+# that executables *can* be run.
+ac_fn_c_try_run ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: program exited with status $ac_status" >&5
+       $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_retval=$ac_status
+fi
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_run
+
+# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists and can be compiled using the include files in
+# INCLUDES, setting the cache variable VAR accordingly.
+ac_fn_c_check_header_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  eval "$3=yes"
+else
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_compile
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by hpl $as_me 2.3, which was
+generated by GNU Autoconf 2.69.  Invocation command line was
+
+  $ $0 $@
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null     || echo unknown`
+
+/bin/arch              = `(/bin/arch) 2>/dev/null              || echo unknown`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null       || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo      = `(/usr/bin/hostinfo) 2>/dev/null      || echo unknown`
+/bin/machine           = `(/bin/machine) 2>/dev/null           || echo unknown`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null       || echo unknown`
+/bin/universe          = `(/bin/universe) 2>/dev/null          || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    $as_echo "PATH: $as_dir"
+  done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+  for ac_arg
+  do
+    case $ac_arg in
+    -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+    -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+    | -silent | --silent | --silen | --sile | --sil)
+      continue ;;
+    *\'*)
+      ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    case $ac_pass in
+    1) as_fn_append ac_configure_args0 " '$ac_arg'" ;;
+    2)
+      as_fn_append ac_configure_args1 " '$ac_arg'"
+      if test $ac_must_keep_next = true; then
+	ac_must_keep_next=false # Got value, back to normal.
+      else
+	case $ac_arg in
+	  *=* | --config-cache | -C | -disable-* | --disable-* \
+	  | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+	  | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+	  | -with-* | --with-* | -without-* | --without-* | --x)
+	    case "$ac_configure_args0 " in
+	      "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+	    esac
+	    ;;
+	  -* ) ac_must_keep_next=true ;;
+	esac
+      fi
+      as_fn_append ac_configure_args " '$ac_arg'"
+      ;;
+    esac
+  done
+done
+{ ac_configure_args0=; unset ac_configure_args0;}
+{ ac_configure_args1=; unset ac_configure_args1;}
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log.  We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+  # Save into config.log some information that might help in debugging.
+  {
+    echo
+
+    $as_echo "## ---------------- ##
+## Cache variables. ##
+## ---------------- ##"
+    echo
+    # The following way of writing the cache mishandles newlines in values,
+(
+  for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+  (set) 2>&1 |
+    case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      sed -n \
+	"s/'\''/'\''\\\\'\'''\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+      ;; #(
+    *)
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+)
+    echo
+
+    $as_echo "## ----------------- ##
+## Output variables. ##
+## ----------------- ##"
+    echo
+    for ac_var in $ac_subst_vars
+    do
+      eval ac_val=\$$ac_var
+      case $ac_val in
+      *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+      esac
+      $as_echo "$ac_var='\''$ac_val'\''"
+    done | sort
+    echo
+
+    if test -n "$ac_subst_files"; then
+      $as_echo "## ------------------- ##
+## File substitutions. ##
+## ------------------- ##"
+      echo
+      for ac_var in $ac_subst_files
+      do
+	eval ac_val=\$$ac_var
+	case $ac_val in
+	*\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+	esac
+	$as_echo "$ac_var='\''$ac_val'\''"
+      done | sort
+      echo
+    fi
+
+    if test -s confdefs.h; then
+      $as_echo "## ----------- ##
+## confdefs.h. ##
+## ----------- ##"
+      echo
+      cat confdefs.h
+      echo
+    fi
+    test "$ac_signal" != 0 &&
+      $as_echo "$as_me: caught signal $ac_signal"
+    $as_echo "$as_me: exit $exit_status"
+  } >&5
+  rm -f core *.core core.conftest.* &&
+    rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+    exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+  trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+$as_echo "/* confdefs.h */" > confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_URL "$PACKAGE_URL"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer an explicitly selected file to automatically selected ones.
+ac_site_file1=NONE
+ac_site_file2=NONE
+if test -n "$CONFIG_SITE"; then
+  # We do not want a PATH search for config.site.
+  case $CONFIG_SITE in #((
+    -*)  ac_site_file1=./$CONFIG_SITE;;
+    */*) ac_site_file1=$CONFIG_SITE;;
+    *)   ac_site_file1=./$CONFIG_SITE;;
+  esac
+elif test "x$prefix" != xNONE; then
+  ac_site_file1=$prefix/share/config.site
+  ac_site_file2=$prefix/etc/config.site
+else
+  ac_site_file1=$ac_default_prefix/share/config.site
+  ac_site_file2=$ac_default_prefix/etc/config.site
+fi
+for ac_site_file in "$ac_site_file1" "$ac_site_file2"
+do
+  test "x$ac_site_file" = xNONE && continue
+  if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
+$as_echo "$as_me: loading site script $ac_site_file" >&6;}
+    sed 's/^/| /' "$ac_site_file" >&5
+    . "$ac_site_file" \
+      || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "failed to load site script $ac_site_file
+See \`config.log' for more details" "$LINENO" 5; }
+  fi
+done
+
+if test -r "$cache_file"; then
+  # Some versions of bash will fail to source /dev/null (special files
+  # actually), so we avoid doing that.  DJGPP emulates it as a regular file.
+  if test /dev/null != "$cache_file" && test -f "$cache_file"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
+$as_echo "$as_me: loading cache $cache_file" >&6;}
+    case $cache_file in
+      [\\/]* | ?:[\\/]* ) . "$cache_file";;
+      *)                      . "./$cache_file";;
+    esac
+  fi
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5
+$as_echo "$as_me: creating cache $cache_file" >&6;}
+  >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+  eval ac_old_set=\$ac_cv_env_${ac_var}_set
+  eval ac_new_set=\$ac_env_${ac_var}_set
+  eval ac_old_val=\$ac_cv_env_${ac_var}_value
+  eval ac_new_val=\$ac_env_${ac_var}_value
+  case $ac_old_set,$ac_new_set in
+    set,)
+      { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,set)
+      { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,);;
+    *)
+      if test "x$ac_old_val" != "x$ac_new_val"; then
+	# differences in whitespace do not lead to failure.
+	ac_old_val_w=`echo x $ac_old_val`
+	ac_new_val_w=`echo x $ac_new_val`
+	if test "$ac_old_val_w" != "$ac_new_val_w"; then
+	  { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5
+$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+	  ac_cache_corrupted=:
+	else
+	  { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
+$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;}
+	  eval $ac_var=\$ac_old_val
+	fi
+	{ $as_echo "$as_me:${as_lineno-$LINENO}:   former value:  \`$ac_old_val'" >&5
+$as_echo "$as_me:   former value:  \`$ac_old_val'" >&2;}
+	{ $as_echo "$as_me:${as_lineno-$LINENO}:   current value: \`$ac_new_val'" >&5
+$as_echo "$as_me:   current value: \`$ac_new_val'" >&2;}
+      fi;;
+  esac
+  # Pass precious variables to config.status.
+  if test "$ac_new_set" = set; then
+    case $ac_new_val in
+    *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+    *) ac_arg=$ac_var=$ac_new_val ;;
+    esac
+    case " $ac_configure_args " in
+      *" '$ac_arg' "*) ;; # Avoid dups.  Use of quotes ensures accuracy.
+      *) as_fn_append ac_configure_args " '$ac_arg'" ;;
+    esac
+  fi
+done
+if $ac_cache_corrupted; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
+$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+  as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+fi
+## -------------------- ##
+## Main body of script. ##
+## -------------------- ##
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+ac_config_headers="$ac_config_headers include/hplconfig.h"
+
+
+ac_aux_dir=
+for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
+  if test -f "$ac_dir/install-sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install-sh -c"
+    break
+  elif test -f "$ac_dir/install.sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install.sh -c"
+    break
+  elif test -f "$ac_dir/shtool"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/shtool install -c"
+    break
+  fi
+done
+if test -z "$ac_aux_dir"; then
+  as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
+fi
+
+# These three variables are undocumented and unsupported,
+# and are intended to be withdrawn in a future Autoconf release.
+# They can cause serious problems if a builder's source tree is in a directory
+# whose full name contains unusual characters.
+ac_config_guess="$SHELL $ac_aux_dir/config.guess"  # Please don't use this var.
+ac_config_sub="$SHELL $ac_aux_dir/config.sub"  # Please don't use this var.
+ac_configure="$SHELL $ac_aux_dir/configure"  # Please don't use this var.
+
+
+# Expand $ac_aux_dir to an absolute path.
+am_aux_dir=`cd "$ac_aux_dir" && pwd`
+
+
+
+  _ax_prog_cc_mpi_mpi_wanted=yes
+  if test x"$_ax_prog_cc_mpi_mpi_wanted" = xyes; then
+    if test -z "$CC" && test -n "$MPICC"; then
+      CC="$MPICC"
+    else
+      if test -n "$ac_tool_prefix"; then
+  for ac_prog in mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CC" && break
+done
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
+
+    fi
+  fi
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}gcc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+  ac_ct_CC=$CC
+  # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="gcc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+else
+  CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+          if test -n "$ac_tool_prefix"; then
+    # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}cc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  fi
+fi
+if test -z "$CC"; then
+  # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+  ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+       ac_prog_rejected=yes
+       continue
+     fi
+    ac_cv_prog_CC="cc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_CC
+  shift
+  if test $# != 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set CC to just the basename; use the full file name.
+    shift
+    ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+  fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+  if test -n "$ac_tool_prefix"; then
+  for ac_prog in cl.exe
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in cl.exe
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CC" && break
+done
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
+
+fi
+
+
+test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
+
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+  { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    sed '10a\
+... rest of stderr output deleted ...
+         10q' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+  fi
+  rm -f conftest.er1 conftest.err
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+done
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5
+$as_echo_n "checking whether the C compiler works... " >&6; }
+ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+
+# The possible output files:
+ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*"
+
+ac_rmfiles=
+for ac_file in $ac_files
+do
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+    * ) ac_rmfiles="$ac_rmfiles $ac_file";;
+  esac
+done
+rm -f $ac_rmfiles
+
+if { { ac_try="$ac_link_default"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link_default") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
+# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
+# in a Makefile.  We should not override ac_cv_exeext if it was cached,
+# so that the user can short-circuit this test for compilers unknown to
+# Autoconf.
+for ac_file in $ac_files ''
+do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj )
+	;;
+    [ab].out )
+	# We found the default executable, but exeext='' is most
+	# certainly right.
+	break;;
+    *.* )
+	if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+	then :; else
+	   ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	fi
+	# We set ac_cv_exeext here because the later test for it is not
+	# safe: cross compilers may not add the suffix if given an `-o'
+	# argument, so we may need to know it at that point already.
+	# Even if this section looks crufty: it has the advantage of
+	# actually working.
+	break;;
+    * )
+	break;;
+  esac
+done
+test "$ac_cv_exeext" = no && ac_cv_exeext=
+
+else
+  ac_file=''
+fi
+if test -z "$ac_file"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+$as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "C compiler cannot create executables
+See \`config.log' for more details" "$LINENO" 5; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5
+$as_echo_n "checking for C compiler default output file name... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
+$as_echo "$ac_file" >&6; }
+ac_exeext=$ac_cv_exeext
+
+rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5
+$as_echo_n "checking for suffix of executables... " >&6; }
+if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'.  For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+    *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	  break;;
+    * ) break;;
+  esac
+done
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest conftest$ac_cv_exeext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
+$as_echo "$ac_cv_exeext" >&6; }
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdio.h>
+int
+main ()
+{
+FILE *f = fopen ("conftest.out", "w");
+ return ferror (f) || fclose (f) != 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files="$ac_clean_files conftest.out"
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
+$as_echo_n "checking whether we are cross compiling... " >&6; }
+if test "$cross_compiling" != yes; then
+  { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+  if { ac_try='./conftest$ac_cv_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+    cross_compiling=no
+  else
+    if test "$cross_compiling" = maybe; then
+	cross_compiling=yes
+    else
+	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details" "$LINENO" 5; }
+    fi
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
+$as_echo "$cross_compiling" >&6; }
+
+rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
+$as_echo_n "checking for suffix of object files... " >&6; }
+if ${ac_cv_objext+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  for ac_file in conftest.o conftest.obj conftest.*; do
+  test -f "$ac_file" || continue;
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;;
+    *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+       break;;
+  esac
+done
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of object files: cannot compile
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5
+$as_echo "$ac_cv_objext" >&6; }
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
+$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
+if ${ac_cv_c_compiler_gnu+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_compiler_gnu=yes
+else
+  ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
+$as_echo "$ac_cv_c_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+  GCC=yes
+else
+  GCC=
+fi
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
+$as_echo_n "checking whether $CC accepts -g... " >&6; }
+if ${ac_cv_prog_cc_g+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_save_c_werror_flag=$ac_c_werror_flag
+   ac_c_werror_flag=yes
+   ac_cv_prog_cc_g=no
+   CFLAGS="-g"
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_g=yes
+else
+  CFLAGS=""
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+else
+  ac_c_werror_flag=$ac_save_c_werror_flag
+	 CFLAGS="-g"
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+$as_echo "$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+  CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+  if test "$GCC" = yes; then
+    CFLAGS="-g -O2"
+  else
+    CFLAGS="-g"
+  fi
+else
+  if test "$GCC" = yes; then
+    CFLAGS="-O2"
+  else
+    CFLAGS=
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
+if ${ac_cv_prog_cc_c89+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdio.h>
+struct stat;
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+     char **p;
+     int i;
+{
+  return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+  char *s;
+  va_list v;
+  va_start (v,p);
+  s = g (p, va_arg (v,int));
+  va_end (v);
+  return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default.  It has
+   function prototypes and stuff, but not '\xHH' hex character constants.
+   These don't provoke an error unfortunately, instead are silently treated
+   as 'x'.  The following induces an error, until -std is added to get
+   proper ANSI mode.  Curiously '\x00'!='x' always comes out true, for an
+   array size at least.  It's necessary to write '\x00'==0 to get something
+   that's true only with -std.  */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+   inside strings and character constants.  */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0]  ||  f (e, argv, 1) != argv[1];
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+	-Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_c89=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+  x)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+  xno)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c89"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c89" != xno; then :
+
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5
+$as_echo_n "checking whether $CC understands -c and -o together... " >&6; }
+if ${am_cv_prog_cc_c_o+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+  # Make sure it works both with $CC and with simple cc.
+  # Following AC_PROG_CC_C_O, we do the test twice because some
+  # compilers refuse to overwrite an existing .o file with -o,
+  # though they will create one.
+  am_cv_prog_cc_c_o=yes
+  for am_i in 1 2; do
+    if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5
+   ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); } \
+         && test -f conftest2.$ac_objext; then
+      : OK
+    else
+      am_cv_prog_cc_c_o=no
+      break
+    fi
+  done
+  rm -f core conftest*
+  unset am_i
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5
+$as_echo "$am_cv_prog_cc_c_o" >&6; }
+if test "$am_cv_prog_cc_c_o" != yes; then
+   # Losing compiler, so override with the script.
+   # FIXME: It is wrong to rewrite CC.
+   # But if we don't then we get into trouble of one sort or another.
+   # A longer-term fix would be to have automake use am__CC in this case,
+   # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
+   CC="$am_aux_dir/compile $CC"
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
+
+
+
+# Check for compiler
+# Needs to be split off into an extra macro to ensure right expansion
+# order.
+
+
+if test x"$_ax_prog_cc_mpi_mpi_wanted" = xno; then :
+   _ax_prog_cc_mpi_mpi_found=no
+else
+
+    ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+    # test whether MPI_Init is available
+    # We do not use AC_SEARCH_LIBS here, as it caches its outcome and
+    # thus disallows corresponding calls in the other AX_PROG_*_MPI
+    # macros.
+    for lib in NONE mpi mpich; do
+      save_LIBS=$LIBS
+      if test x"$lib" = xNONE; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking for function MPI_Init" >&5
+$as_echo_n "checking for function MPI_Init... " >&6; }
+      else
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking for function MPI_Init in -l$lib" >&5
+$as_echo_n "checking for function MPI_Init in -l$lib... " >&6; }
+        LIBS="-l$lib $LIBS"
+      fi
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char MPI_Init ();
+int
+main ()
+{
+return MPI_Init ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+   _ax_prog_cc_mpi_mpi_found=yes
+else
+   _ax_prog_cc_mpi_mpi_found=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_ax_prog_cc_mpi_mpi_found" >&5
+$as_echo "$_ax_prog_cc_mpi_mpi_found" >&6; }
+      if test "x$_ax_prog_cc_mpi_mpi_found" = "xyes"; then
+        break;
+      fi
+      LIBS=$save_LIBS
+    done
+
+    # Check for header
+    if test x"$_ax_prog_cc_mpi_mpi_found" = xyes; then :
+
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpi.h" >&5
+$as_echo_n "checking for mpi.h... " >&6; }
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <mpi.h>
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+         _ax_prog_cc_mpi_mpi_found=no
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+    ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$_ax_prog_cc_mpi_mpi_found" = xyes; then :
+
+
+$as_echo "#define HAVE_MPI 1" >>confdefs.h
+
+        :
+
+else
+
+
+        :
+
+fi
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ranlib; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_RANLIB+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$RANLIB"; then
+  ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+RANLIB=$ac_cv_prog_RANLIB
+if test -n "$RANLIB"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5
+$as_echo "$RANLIB" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_RANLIB"; then
+  ac_ct_RANLIB=$RANLIB
+  # Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_RANLIB+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_RANLIB"; then
+  ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_RANLIB="ranlib"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB
+if test -n "$ac_ct_RANLIB"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5
+$as_echo "$ac_ct_RANLIB" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_RANLIB" = x; then
+    RANLIB=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    RANLIB=$ac_ct_RANLIB
+  fi
+else
+  RANLIB="$ac_cv_prog_RANLIB"
+fi
+
+
+# Find a good install program.  We prefer a C program (faster),
+# so one script is as good as another.  But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AmigaOS /C/install, which installs bootblocks on floppy discs
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# OS/2's system install, which has a completely different semantic
+# ./install, which can be erroneously created by make from ./install.sh.
+# Reject install programs that cannot install multiple files.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5
+$as_echo_n "checking for a BSD-compatible install... " >&6; }
+if test -z "$INSTALL"; then
+if ${ac_cv_path_install+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    # Account for people who put trailing slashes in PATH elements.
+case $as_dir/ in #((
+  ./ | .// | /[cC]/* | \
+  /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \
+  ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \
+  /usr/ucb/* ) ;;
+  *)
+    # OSF1 and SCO ODT 3.0 have their own names for install.
+    # Don't use installbsd from OSF since it installs stuff as root
+    # by default.
+    for ac_prog in ginstall scoinst install; do
+      for ac_exec_ext in '' $ac_executable_extensions; do
+	if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
+	  if test $ac_prog = install &&
+	    grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+	    # AIX install.  It has an incompatible calling convention.
+	    :
+	  elif test $ac_prog = install &&
+	    grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+	    # program-specific install script used by HP pwplus--don't use.
+	    :
+	  else
+	    rm -rf conftest.one conftest.two conftest.dir
+	    echo one > conftest.one
+	    echo two > conftest.two
+	    mkdir conftest.dir
+	    if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" &&
+	      test -s conftest.one && test -s conftest.two &&
+	      test -s conftest.dir/conftest.one &&
+	      test -s conftest.dir/conftest.two
+	    then
+	      ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c"
+	      break 3
+	    fi
+	  fi
+	fi
+      done
+    done
+    ;;
+esac
+
+  done
+IFS=$as_save_IFS
+
+rm -rf conftest.one conftest.two conftest.dir
+
+fi
+  if test "${ac_cv_path_install+set}" = set; then
+    INSTALL=$ac_cv_path_install
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for INSTALL within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    INSTALL=$ac_install_sh
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5
+$as_echo "$INSTALL" >&6; }
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+
+am__api_version='1.16'
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5
+$as_echo_n "checking whether build environment is sane... " >&6; }
+# Reject unsafe characters in $srcdir or the absolute working directory
+# name.  Accept space and tab only in the latter.
+am_lf='
+'
+case `pwd` in
+  *[\\\"\#\$\&\'\`$am_lf]*)
+    as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;;
+esac
+case $srcdir in
+  *[\\\"\#\$\&\'\`$am_lf\ \	]*)
+    as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;;
+esac
+
+# Do 'set' in a subshell so we don't clobber the current shell's
+# arguments.  Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+   am_has_slept=no
+   for am_try in 1 2; do
+     echo "timestamp, slept: $am_has_slept" > conftest.file
+     set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
+     if test "$*" = "X"; then
+	# -L didn't work.
+	set X `ls -t "$srcdir/configure" conftest.file`
+     fi
+     if test "$*" != "X $srcdir/configure conftest.file" \
+	&& test "$*" != "X conftest.file $srcdir/configure"; then
+
+	# If neither matched, then we have a broken ls.  This can happen
+	# if, for instance, CONFIG_SHELL is bash and it inherits a
+	# broken ls alias from the environment.  This has actually
+	# happened.  Such a system could not be considered "sane".
+	as_fn_error $? "ls -t appears to fail.  Make sure there is not a broken
+  alias in your environment" "$LINENO" 5
+     fi
+     if test "$2" = conftest.file || test $am_try -eq 2; then
+       break
+     fi
+     # Just in case.
+     sleep 1
+     am_has_slept=yes
+   done
+   test "$2" = conftest.file
+   )
+then
+   # Ok.
+   :
+else
+   as_fn_error $? "newly created file is older than distributed files!
+Check your system clock" "$LINENO" 5
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+# If we didn't sleep, we still need to ensure time stamps of config.status and
+# generated files are strictly newer.
+am_sleep_pid=
+if grep 'slept: no' conftest.file >/dev/null 2>&1; then
+  ( sleep 1 ) &
+  am_sleep_pid=$!
+fi
+
+rm -f conftest.file
+
+test "$program_prefix" != NONE &&
+  program_transform_name="s&^&$program_prefix&;$program_transform_name"
+# Use a double $ so make ignores it.
+test "$program_suffix" != NONE &&
+  program_transform_name="s&\$&$program_suffix&;$program_transform_name"
+# Double any \ or $.
+# By default was `s,x,x', remove it if useless.
+ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
+program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
+
+if test x"${MISSING+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
+  *)
+    MISSING="\${SHELL} $am_aux_dir/missing" ;;
+  esac
+fi
+# Use eval to expand $SHELL
+if eval "$MISSING --is-lightweight"; then
+  am_missing_run="$MISSING "
+else
+  am_missing_run=
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5
+$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
+fi
+
+if test x"${install_sh+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
+  *)
+    install_sh="\${SHELL} $am_aux_dir/install-sh"
+  esac
+fi
+
+# Installed binaries are usually stripped using 'strip' when the user
+# run "make install-strip".  However 'strip' might not be the right
+# tool to use in cross-compilation environments, therefore Automake
+# will honor the 'STRIP' environment variable to overrule this program.
+if test "$cross_compiling" != no; then
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args.
+set dummy ${ac_tool_prefix}strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_STRIP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$STRIP"; then
+  ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_STRIP="${ac_tool_prefix}strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+STRIP=$ac_cv_prog_STRIP
+if test -n "$STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5
+$as_echo "$STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_STRIP"; then
+  ac_ct_STRIP=$STRIP
+  # Extract the first word of "strip", so it can be a program name with args.
+set dummy strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_STRIP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_STRIP"; then
+  ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_STRIP="strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP
+if test -n "$ac_ct_STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5
+$as_echo "$ac_ct_STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_STRIP" = x; then
+    STRIP=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    STRIP=$ac_ct_STRIP
+  fi
+else
+  STRIP="$ac_cv_prog_STRIP"
+fi
+
+fi
+INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5
+$as_echo_n "checking for a thread-safe mkdir -p... " >&6; }
+if test -z "$MKDIR_P"; then
+  if ${ac_cv_path_mkdir+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in mkdir gmkdir; do
+	 for ac_exec_ext in '' $ac_executable_extensions; do
+	   as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue
+	   case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
+	     'mkdir (GNU coreutils) '* | \
+	     'mkdir (coreutils) '* | \
+	     'mkdir (fileutils) '4.1*)
+	       ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext
+	       break 3;;
+	   esac
+	 done
+       done
+  done
+IFS=$as_save_IFS
+
+fi
+
+  test -d ./--version && rmdir ./--version
+  if test "${ac_cv_path_mkdir+set}" = set; then
+    MKDIR_P="$ac_cv_path_mkdir -p"
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for MKDIR_P within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    MKDIR_P="$ac_install_sh -d"
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5
+$as_echo "$MKDIR_P" >&6; }
+
+for ac_prog in gawk mawk nawk awk
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_AWK+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AWK"; then
+  ac_cv_prog_AWK="$AWK" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_AWK="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AWK=$ac_cv_prog_AWK
+if test -n "$AWK"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5
+$as_echo "$AWK" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$AWK" && break
+done
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5
+$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; }
+set x ${MAKE-make}
+ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'`
+if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.make <<\_ACEOF
+SHELL = /bin/sh
+all:
+	@echo '@@@%%%=$(MAKE)=@@@%%%'
+_ACEOF
+# GNU make sometimes prints "make[1]: Entering ...", which would confuse us.
+case `${MAKE-make} -f conftest.make 2>/dev/null` in
+  *@@@%%%=?*=@@@%%%*)
+    eval ac_cv_prog_make_${ac_make}_set=yes;;
+  *)
+    eval ac_cv_prog_make_${ac_make}_set=no;;
+esac
+rm -f conftest.make
+fi
+if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+  SET_MAKE=
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+  SET_MAKE="MAKE=${MAKE-make}"
+fi
+
+rm -rf .tst 2>/dev/null
+mkdir .tst 2>/dev/null
+if test -d .tst; then
+  am__leading_dot=.
+else
+  am__leading_dot=_
+fi
+rmdir .tst 2>/dev/null
+
+DEPDIR="${am__leading_dot}deps"
+
+ac_config_commands="$ac_config_commands depfiles"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5
+$as_echo_n "checking whether ${MAKE-make} supports the include directive... " >&6; }
+cat > confinc.mk << 'END'
+am__doit:
+	@echo this is the am__doit target >confinc.out
+.PHONY: am__doit
+END
+am__include="#"
+am__quote=
+# BSD make does it like this.
+echo '.include "confinc.mk" # ignored' > confmf.BSD
+# Other make implementations (GNU, Solaris 10, AIX) do it like this.
+echo 'include confinc.mk # ignored' > confmf.GNU
+_am_result=no
+for s in GNU BSD; do
+  { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5
+   (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); }
+  case $?:`cat confinc.out 2>/dev/null` in #(
+  '0:this is the am__doit target') :
+    case $s in #(
+  BSD) :
+    am__include='.include' am__quote='"' ;; #(
+  *) :
+    am__include='include' am__quote='' ;;
+esac ;; #(
+  *) :
+     ;;
+esac
+  if test "$am__include" != "#"; then
+    _am_result="yes ($s style)"
+    break
+  fi
+done
+rm -f confinc.* confmf.*
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5
+$as_echo "${_am_result}" >&6; }
+
+# Check whether --enable-dependency-tracking was given.
+if test "${enable_dependency_tracking+set}" = set; then :
+  enableval=$enable_dependency_tracking;
+fi
+
+if test "x$enable_dependency_tracking" != xno; then
+  am_depcomp="$ac_aux_dir/depcomp"
+  AMDEPBACKSLASH='\'
+  am__nodep='_no'
+fi
+ if test "x$enable_dependency_tracking" != xno; then
+  AMDEP_TRUE=
+  AMDEP_FALSE='#'
+else
+  AMDEP_TRUE='#'
+  AMDEP_FALSE=
+fi
+
+
+# Check whether --enable-silent-rules was given.
+if test "${enable_silent_rules+set}" = set; then :
+  enableval=$enable_silent_rules;
+fi
+
+case $enable_silent_rules in # (((
+  yes) AM_DEFAULT_VERBOSITY=0;;
+   no) AM_DEFAULT_VERBOSITY=1;;
+    *) AM_DEFAULT_VERBOSITY=1;;
+esac
+am_make=${MAKE-make}
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5
+$as_echo_n "checking whether $am_make supports nested variables... " >&6; }
+if ${am_cv_make_support_nested_variables+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if $as_echo 'TRUE=$(BAR$(V))
+BAR0=false
+BAR1=true
+V=1
+am__doit:
+	@$(TRUE)
+.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then
+  am_cv_make_support_nested_variables=yes
+else
+  am_cv_make_support_nested_variables=no
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5
+$as_echo "$am_cv_make_support_nested_variables" >&6; }
+if test $am_cv_make_support_nested_variables = yes; then
+    AM_V='$(V)'
+  AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)'
+else
+  AM_V=$AM_DEFAULT_VERBOSITY
+  AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY
+fi
+AM_BACKSLASH='\'
+
+if test "`cd $srcdir && pwd`" != "`pwd`"; then
+  # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
+  # is not polluted with repeated "-I."
+  am__isrc=' -I$(srcdir)'
+  # test to see if srcdir already configured
+  if test -f $srcdir/config.status; then
+    as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5
+  fi
+fi
+
+# test whether we have cygpath
+if test -z "$CYGPATH_W"; then
+  if (cygpath --version) >/dev/null 2>/dev/null; then
+    CYGPATH_W='cygpath -w'
+  else
+    CYGPATH_W=echo
+  fi
+fi
+
+
+# Define the identity of the package.
+ PACKAGE='hpl'
+ VERSION='2.3'
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE "$PACKAGE"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define VERSION "$VERSION"
+_ACEOF
+
+# Some tools Automake needs.
+
+ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"}
+
+
+AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"}
+
+
+AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"}
+
+
+AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"}
+
+
+MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
+
+# For better backward compatibility.  To be removed once Automake 1.9.x
+# dies out for good.  For more background, see:
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
+mkdir_p='$(MKDIR_P)'
+
+# We need awk for the "check" target (and possibly the TAP driver).  The
+# system "awk" is bad on some platforms.
+# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AMTAR='$${TAR-tar}'
+
+
+# We'll loop over all known methods to create a tar archive until one works.
+_am_tools='gnutar  pax cpio none'
+
+am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
+
+
+
+
+
+depcc="$CC"   am_compiler_list=
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5
+$as_echo_n "checking dependency style of $depcc... " >&6; }
+if ${am_cv_CC_dependencies_compiler_type+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
+  # We make a subdir and do the tests there.  Otherwise we can end up
+  # making bogus files that we don't know about and never remove.  For
+  # instance it was reported that on HP-UX the gcc test will end up
+  # making a dummy file named 'D' -- because '-MD' means "put the output
+  # in D".
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  # Copy depcomp to subdir because otherwise we won't find it if we're
+  # using a relative directory.
+  cp "$am_depcomp" conftest.dir
+  cd conftest.dir
+  # We will build objects and dependencies in a subdirectory because
+  # it helps to detect inapplicable dependency modes.  For instance
+  # both Tru64's cc and ICC support -MD to output dependencies as a
+  # side effect of compilation, but ICC will put the dependencies in
+  # the current directory while Tru64 will put them in the object
+  # directory.
+  mkdir sub
+
+  am_cv_CC_dependencies_compiler_type=none
+  if test "$am_compiler_list" = ""; then
+     am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp`
+  fi
+  am__universal=false
+  case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac
+
+  for depmode in $am_compiler_list; do
+    # Setup a source with many dependencies, because some compilers
+    # like to wrap large dependency lists on column 80 (with \), and
+    # we should not choose a depcomp mode which is confused by this.
+    #
+    # We need to recreate these files for each test, as the compiler may
+    # overwrite some of them when testing with obscure command lines.
+    # This happens at least with the AIX C compiler.
+    : > sub/conftest.c
+    for i in 1 2 3 4 5 6; do
+      echo '#include "conftst'$i'.h"' >> sub/conftest.c
+      # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with
+      # Solaris 10 /bin/sh.
+      echo '/* dummy */' > sub/conftst$i.h
+    done
+    echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
+
+    # We check with '-c' and '-o' for the sake of the "dashmstdout"
+    # mode.  It turns out that the SunPro C++ compiler does not properly
+    # handle '-M -o', and we need to detect this.  Also, some Intel
+    # versions had trouble with output in subdirs.
+    am__obj=sub/conftest.${OBJEXT-o}
+    am__minus_obj="-o $am__obj"
+    case $depmode in
+    gcc)
+      # This depmode causes a compiler race in universal mode.
+      test "$am__universal" = false || continue
+      ;;
+    nosideeffect)
+      # After this tag, mechanisms are not by side-effect, so they'll
+      # only be used when explicitly requested.
+      if test "x$enable_dependency_tracking" = xyes; then
+	continue
+      else
+	break
+      fi
+      ;;
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
+      # This compiler won't grok '-c -o', but also, the minuso test has
+      # not run yet.  These depmodes are late enough in the game, and
+      # so weak that their functioning should not be impacted.
+      am__obj=conftest.${OBJEXT-o}
+      am__minus_obj=
+      ;;
+    none) break ;;
+    esac
+    if depmode=$depmode \
+       source=sub/conftest.c object=$am__obj \
+       depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
+       $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
+         >/dev/null 2>conftest.err &&
+       grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
+       ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
+      # icc doesn't choke on unknown options, it will just issue warnings
+      # or remarks (even with -Werror).  So we grep stderr for any message
+      # that says an option was ignored or not supported.
+      # When given -MP, icc 7.0 and 7.1 complain thusly:
+      #   icc: Command line warning: ignoring option '-M'; no argument required
+      # The diagnosis changed in icc 8.0:
+      #   icc: Command line remark: option '-MP' not supported
+      if (grep 'ignoring option' conftest.err ||
+          grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
+        am_cv_CC_dependencies_compiler_type=$depmode
+        break
+      fi
+    fi
+  done
+
+  cd ..
+  rm -rf conftest.dir
+else
+  am_cv_CC_dependencies_compiler_type=none
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5
+$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; }
+CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type
+
+ if
+  test "x$enable_dependency_tracking" != xno \
+  && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then
+  am__fastdepCC_TRUE=
+  am__fastdepCC_FALSE='#'
+else
+  am__fastdepCC_TRUE='#'
+  am__fastdepCC_FALSE=
+fi
+
+
+
+# POSIX will say in a future version that running "rm -f" with no argument
+# is OK; and we want to be able to make that assumption in our Makefile
+# recipes.  So use an aggressive probe to check that the usage we want is
+# actually supported "in the wild" to an acceptable degree.
+# See automake bug#10828.
+# To make any issue more visible, cause the running configure to be aborted
+# by default if the 'rm' program in use doesn't match our expectations; the
+# user can still override this though.
+if rm -f && rm -fr && rm -rf; then : OK; else
+  cat >&2 <<'END'
+Oops!
+
+Your 'rm' program seems unable to run without file operands specified
+on the command line, even when the '-f' option is present.  This is contrary
+to the behaviour of most rm programs out there, and not conforming with
+the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
+
+Please tell bug-automake@gnu.org about your system, including the value
+of your $PATH and any error possibly output before this message.  This
+can help us improve future automake versions.
+
+END
+  if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
+    echo 'Configuration will proceed anyway, since you have set the' >&2
+    echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
+    echo >&2
+  else
+    cat >&2 <<'END'
+Aborting the configuration process, to ensure you take notice of the issue.
+
+You can download and install GNU coreutils to get an 'rm' implementation
+that behaves properly: <https://www.gnu.org/software/coreutils/>.
+
+If you want to complete the configuration process using your problematic
+'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
+to "yes", and re-run configure.
+
+END
+    as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5
+  fi
+fi
+
+
+
+
+
+
+
+
+
+hpl_blas_ok=no
+
+
+current_LIBS="$LIBS"
+
+cat <<HPLEOF > hplvars.txt
+name1=OpenBLAS
+rout1=dgemm_
+libs1=-lopenblas -lm
+
+name2=Atlas Fortran BLAS
+rout2=dgemm_
+libs2=-lf77blas -latlas
+
+name3=Sequential Intel MKL LP64 (group)
+rout3=dgemm_
+libs3=-Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread
+
+name4=Sequential Intel MKL LP64
+rout4=dgemm_
+libs4=-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
+
+name5=AMD's ACML
+rout5=dgemm_
+libs5=-lacml -lm
+
+name6=Accelerate
+rout6=dgemm_
+libs6=-framework Accelerate
+
+name7=Apple VecLib
+rout7=dgemm_
+libs7=-framework vecLib
+
+name8=IBM ESSL
+rout8=dgemm_
+libs8=-lessl
+
+name9=NVIDIA nvblas
+rout9=dgemm_
+libs9=-lnvblas
+
+name10=Generic BLAS
+rout10=dgemm_
+libs10=-lblas
+
+HPLEOF
+for hpl_i in 1 2 3 4 5 6 7 8 9 10;
+do
+if test  x$hpl_blas_ok = xno; then
+  name="`grep ^name${hpl_i}= hplvars.txt | sed s/^name${hpl_i}=//`"
+  rout="`grep ^rout${hpl_i}= hplvars.txt | sed s/^rout${hpl_i}=//`"
+  libs="`grep ^libs${hpl_i}= hplvars.txt | sed s/^libs${hpl_i}=//`"
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $rout in $name" >&5
+$as_echo_n "checking for $rout in $name... " >&6; }
+
+  LIBS="$libs"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $rout ();
+int
+main ()
+{
+return $rout ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  hpl_blas_ok=yes;BLAS_LIBS="$libs"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  LIBS="$current_LIBS"
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hpl_blas_ok" >&5
+$as_echo "$hpl_blas_ok" >&6; }
+fi
+done
+rm hplvars.txt
+
+if test  x$hpl_blas_ok = xno; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dgemm_ in OpenBLAS" >&5
+$as_echo_n "checking for dgemm_ in OpenBLAS... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dgemm_ in -lopenblas" >&5
+$as_echo_n "checking for dgemm_ in -lopenblas... " >&6; }
+if ${ac_cv_lib_openblas_dgemm_+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lopenblas  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dgemm_ ();
+int
+main ()
+{
+return dgemm_ ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_openblas_dgemm_=yes
+else
+  ac_cv_lib_openblas_dgemm_=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_openblas_dgemm_" >&5
+$as_echo "$ac_cv_lib_openblas_dgemm_" >&6; }
+if test "x$ac_cv_lib_openblas_dgemm_" = xyes; then :
+  hpl_blas_ok=yes;BLAS_LIBS="-lopenblas"
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hpl_blas_ok" >&5
+$as_echo "$hpl_blas_ok" >&6; }
+fi
+
+
+
+# If present, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$hpl_blas_ok" = xyes; then
+        LIBS="$BLAS_LIBS $LIBS"
+        :
+else
+        hpl_blas_ok=no
+        as_fn_error $? "BLAS not found" "$LINENO" 5
+fi
+
+
+
+
+for ac_func in dgemm_
+do :
+  ac_fn_c_check_func "$LINENO" "dgemm_" "ac_cv_func_dgemm_"
+if test "x$ac_cv_func_dgemm_" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_DGEMM_ 1
+_ACEOF
+
+fi
+done
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
+$as_echo_n "checking how to run the C preprocessor... " >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+  CPP=
+fi
+if test -z "$CPP"; then
+  if ${ac_cv_prog_CPP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+      # Double quotes because CPP needs to be expanded
+    for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+    do
+      ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+  break
+fi
+
+    done
+    ac_cv_prog_CPP=$CPP
+
+fi
+  CPP=$ac_cv_prog_CPP
+else
+  ac_cv_prog_CPP=$CPP
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
+$as_echo "$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if ${ac_cv_path_GREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$GREP"; then
+  ac_path_GREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in grep ggrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_GREP" || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+  # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'GREP' >> "conftest.nl"
+    "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_GREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_GREP="$ac_path_GREP"
+      ac_path_GREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_GREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_GREP"; then
+    as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+$as_echo_n "checking for egrep... " >&6; }
+if ${ac_cv_path_EGREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+   then ac_cv_path_EGREP="$GREP -E"
+   else
+     if test -z "$EGREP"; then
+  ac_path_EGREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in egrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_EGREP" || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+  # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'EGREP' >> "conftest.nl"
+    "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_EGREP="$ac_path_EGREP"
+      ac_path_EGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_EGREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_EGREP"; then
+    as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_EGREP=$EGREP
+fi
+
+   fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+$as_echo "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
+$as_echo_n "checking for ANSI C header files... " >&6; }
+if ${ac_cv_header_stdc+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_header_stdc=yes
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+  # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "memchr" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "free" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+  if test "$cross_compiling" = yes; then :
+  :
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+		   (('a' <= (c) && (c) <= 'i') \
+		     || ('j' <= (c) && (c) <= 'r') \
+		     || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    if (XOR (islower (i), ISLOWER (i))
+	|| toupper (i) != TOUPPER (i))
+      return 2;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
+$as_echo "$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+$as_echo "#define STDC_HEADERS 1" >>confdefs.h
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+		  inttypes.h stdint.h unistd.h
+do :
+  as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
+"
+if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
+  cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_header in mpi.h
+do :
+  ac_fn_c_check_header_mongrel "$LINENO" "mpi.h" "ac_cv_header_mpi_h" "$ac_includes_default"
+if test "x$ac_cv_header_mpi_h" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_MPI_H 1
+_ACEOF
+
+fi
+
+done
+
+
+ac_config_files="$ac_config_files Makefile src/Makefile testing/Makefile"
+
+
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems.  If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+  for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+
+  (set) 2>&1 |
+    case $as_nl`(ac_space=' '; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      # `set' does not quote correctly, so add quotes: double-quote
+      # substitution turns \\\\ into \\, and sed turns \\ into \.
+      sed -n \
+	"s/'/'\\\\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+      ;; #(
+    *)
+      # `set' quotes correctly as required by POSIX, so do not add quotes.
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+) |
+  sed '
+     /^ac_cv_env_/b end
+     t clear
+     :clear
+     s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+     t end
+     s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+     :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+  if test -w "$cache_file"; then
+    if test "x$cache_file" != "x/dev/null"; then
+      { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
+$as_echo "$as_me: updating cache $cache_file" >&6;}
+      if test ! -f "$cache_file" || test -h "$cache_file"; then
+	cat confcache >"$cache_file"
+      else
+        case $cache_file in #(
+        */* | ?:*)
+	  mv -f confcache "$cache_file"$$ &&
+	  mv -f "$cache_file"$$ "$cache_file" ;; #(
+        *)
+	  mv -f confcache "$cache_file" ;;
+	esac
+      fi
+    fi
+  else
+    { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
+$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
+  fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+DEFS=-DHAVE_CONFIG_H
+
+ac_libobjs=
+ac_ltlibobjs=
+U=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+  # 1. Remove the extension, and $U if already installed.
+  ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+  ac_i=`$as_echo "$ac_i" | sed "$ac_script"`
+  # 2. Prepend LIBOBJDIR.  When used with automake>=1.10 LIBOBJDIR
+  #    will be set to the directory where LIBOBJS objects are built.
+  as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+  as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5
+$as_echo_n "checking that generated files are newer than configure... " >&6; }
+   if test -n "$am_sleep_pid"; then
+     # Hide warnings about reused PIDs.
+     wait $am_sleep_pid 2>/dev/null
+   fi
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5
+$as_echo "done" >&6; }
+if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then
+  as_fn_error $? "conditional \"AMDEP\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then
+  as_fn_error $? "conditional \"am__fastdepCC\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+ if test -n "$EXEEXT"; then
+  am__EXEEXT_TRUE=
+  am__EXEEXT_FALSE='#'
+else
+  am__EXEEXT_TRUE='#'
+  am__EXEEXT_FALSE=
+fi
+
+
+: "${CONFIG_STATUS=./config.status}"
+ac_write_fail=0
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5
+$as_echo "$as_me: creating $CONFIG_STATUS" >&6;}
+as_write_fail=0
+cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+
+SHELL=\${CONFIG_SHELL-$SHELL}
+export SHELL
+_ASEOF
+cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+    && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='print -r --'
+  as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='printf %s\n'
+  as_echo_n='printf %s'
+else
+  if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+    as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+    as_echo_n='/usr/ucb/echo -n'
+  else
+    as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+    as_echo_n_body='eval
+      arg=$1;
+      case $arg in #(
+      *"$as_nl"*)
+	expr "X$arg" : "X\\(.*\\)$as_nl";
+	arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+      esac;
+      expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+    '
+    export as_echo_n_body
+    as_echo_n='sh -c $as_echo_n_body as_echo'
+  fi
+  export as_echo_body
+  as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh).  But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there.  '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+  fi
+  $as_echo "$as_me: error: $2" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else
+  as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else
+  as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  }
+fi # as_fn_arith
+
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='	';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -pR'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -pR'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -pR'
+  fi
+else
+  as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+## ----------------------------------- ##
+## Main body of $CONFIG_STATUS script. ##
+## ----------------------------------- ##
+_ASEOF
+test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# Save the log message, to keep $0 and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by hpl $as_me 2.3, which was
+generated by GNU Autoconf 2.69.  Invocation command line was
+
+  CONFIG_FILES    = $CONFIG_FILES
+  CONFIG_HEADERS  = $CONFIG_HEADERS
+  CONFIG_LINKS    = $CONFIG_LINKS
+  CONFIG_COMMANDS = $CONFIG_COMMANDS
+  $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+case $ac_config_files in *"
+"*) set x $ac_config_files; shift; ac_config_files=$*;;
+esac
+
+case $ac_config_headers in *"
+"*) set x $ac_config_headers; shift; ac_config_headers=$*;;
+esac
+
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+# Files that config.status was made for.
+config_files="$ac_config_files"
+config_headers="$ac_config_headers"
+config_commands="$ac_config_commands"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+ac_cs_usage="\
+\`$as_me' instantiates files and other configuration actions
+from templates according to the current configuration.  Unless the files
+and actions are specified as TAGs, all are instantiated by default.
+
+Usage: $0 [OPTION]... [TAG]...
+
+  -h, --help       print this help, then exit
+  -V, --version    print version number and configuration settings, then exit
+      --config     print configuration, then exit
+  -q, --quiet, --silent
+                   do not print progress messages
+  -d, --debug      don't remove temporary files
+      --recheck    update $as_me by reconfiguring in the same conditions
+      --file=FILE[:TEMPLATE]
+                   instantiate the configuration file FILE
+      --header=FILE[:TEMPLATE]
+                   instantiate the configuration header FILE
+
+Configuration files:
+$config_files
+
+Configuration headers:
+$config_headers
+
+Configuration commands:
+$config_commands
+
+Report bugs to <hpl@icl.utk.edu>."
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
+ac_cs_version="\\
+hpl config.status 2.3
+configured by $0, generated by GNU Autoconf 2.69,
+  with options \\"\$ac_cs_config\\"
+
+Copyright (C) 2012 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+INSTALL='$INSTALL'
+MKDIR_P='$MKDIR_P'
+AWK='$AWK'
+test -n "\$AWK" || AWK=awk
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# The default lists apply if the user does not specify any file.
+ac_need_defaults=:
+while test $# != 0
+do
+  case $1 in
+  --*=?*)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+    ac_shift=:
+    ;;
+  --*=)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=
+    ac_shift=:
+    ;;
+  *)
+    ac_option=$1
+    ac_optarg=$2
+    ac_shift=shift
+    ;;
+  esac
+
+  case $ac_option in
+  # Handling of the options.
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    ac_cs_recheck=: ;;
+  --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+    $as_echo "$ac_cs_version"; exit ;;
+  --config | --confi | --conf | --con | --co | --c )
+    $as_echo "$ac_cs_config"; exit ;;
+  --debug | --debu | --deb | --de | --d | -d )
+    debug=: ;;
+  --file | --fil | --fi | --f )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    '') as_fn_error $? "missing file argument" ;;
+    esac
+    as_fn_append CONFIG_FILES " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --header | --heade | --head | --hea )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    as_fn_append CONFIG_HEADERS " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --he | --h)
+    # Conflict between --help and --header
+    as_fn_error $? "ambiguous option: \`$1'
+Try \`$0 --help' for more information.";;
+  --help | --hel | -h )
+    $as_echo "$ac_cs_usage"; exit ;;
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil | --si | --s)
+    ac_cs_silent=: ;;
+
+  # This is an error.
+  -*) as_fn_error $? "unrecognized option: \`$1'
+Try \`$0 --help' for more information." ;;
+
+  *) as_fn_append ac_config_targets " $1"
+     ac_need_defaults=false ;;
+
+  esac
+  shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+  exec 6>/dev/null
+  ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+if \$ac_cs_recheck; then
+  set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+  shift
+  \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
+  CONFIG_SHELL='$SHELL'
+  export CONFIG_SHELL
+  exec "\$@"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+  $as_echo "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+#
+# INIT-COMMANDS
+#
+AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+  case $ac_config_target in
+    "include/hplconfig.h") CONFIG_HEADERS="$CONFIG_HEADERS include/hplconfig.h" ;;
+    "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;;
+    "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+    "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;;
+    "testing/Makefile") CONFIG_FILES="$CONFIG_FILES testing/Makefile" ;;
+
+  *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+  esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used.  Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+  test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+  test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
+fi
+
+# Have a temporary directory for convenience.  Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+  tmp= ac_tmp=
+  trap 'exit_status=$?
+  : "${ac_tmp:=$tmp}"
+  { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
+' 0
+  trap 'as_fn_exit 1' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+  tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+  test -d "$tmp"
+}  ||
+{
+  tmp=./conf$$-$RANDOM
+  (umask 077 && mkdir "$tmp")
+} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+ac_tmp=$tmp
+
+# Set up the scripts for CONFIG_FILES section.
+# No need to generate them if there are no CONFIG_FILES.
+# This happens for instance with `./config.status config.h'.
+if test -n "$CONFIG_FILES"; then
+
+
+ac_cr=`echo X | tr X '\015'`
+# On cygwin, bash can eat \r inside `` if the user requested igncr.
+# But we know of no other shell where ac_cr would be empty at this
+# point, so we can use a bashism as a fallback.
+if test "x$ac_cr" = x; then
+  eval ac_cr=\$\'\\r\'
+fi
+ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
+if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
+  ac_cs_awk_cr='\\r'
+else
+  ac_cs_awk_cr=$ac_cr
+fi
+
+echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
+_ACEOF
+
+
+{
+  echo "cat >conf$$subs.awk <<_ACEOF" &&
+  echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
+  echo "_ACEOF"
+} >conf$$subs.sh ||
+  as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  . ./conf$$subs.sh ||
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+
+  ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
+  if test $ac_delim_n = $ac_delim_num; then
+    break
+  elif $ac_last_try; then
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+rm -f conf$$subs.sh
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
+_ACEOF
+sed -n '
+h
+s/^/S["/; s/!.*/"]=/
+p
+g
+s/^[^!]*!//
+:repl
+t repl
+s/'"$ac_delim"'$//
+t delim
+:nl
+h
+s/\(.\{148\}\)..*/\1/
+t more1
+s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
+p
+n
+b repl
+:more1
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t nl
+:delim
+h
+s/\(.\{148\}\)..*/\1/
+t more2
+s/["\\]/\\&/g; s/^/"/; s/$/"/
+p
+b
+:more2
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t delim
+' <conf$$subs.awk | sed '
+/^[^""]/{
+  N
+  s/\n//
+}
+' >>$CONFIG_STATUS || ac_write_fail=1
+rm -f conf$$subs.awk
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACAWK
+cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
+  for (key in S) S_is_set[key] = 1
+  FS = ""
+
+}
+{
+  line = $ 0
+  nfields = split(line, field, "@")
+  substed = 0
+  len = length(field[1])
+  for (i = 2; i < nfields; i++) {
+    key = field[i]
+    keylen = length(key)
+    if (S_is_set[key]) {
+      value = S[key]
+      line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3)
+      len += length(value) + length(field[++i])
+      substed = 1
+    } else
+      len += 1 + keylen
+  }
+
+  print line
+}
+
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
+  sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
+else
+  cat
+fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
+  || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
+_ACEOF
+
+# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
+# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+  ac_vpsub='/^[	 ]*VPATH[	 ]*=[	 ]*/{
+h
+s///
+s/^/:/
+s/[	 ]*$/:/
+s/:\$(srcdir):/:/g
+s/:\${srcdir}:/:/g
+s/:@srcdir@:/:/g
+s/^:*//
+s/:*$//
+x
+s/\(=[	 ]*\).*/\1/
+G
+s/\n//
+s/^[^=]*=[	 ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+fi # test -n "$CONFIG_FILES"
+
+# Set up the scripts for CONFIG_HEADERS section.
+# No need to generate them if there are no CONFIG_HEADERS.
+# This happens for instance with `./config.status Makefile'.
+if test -n "$CONFIG_HEADERS"; then
+cat >"$ac_tmp/defines.awk" <<\_ACAWK ||
+BEGIN {
+_ACEOF
+
+# Transform confdefs.h into an awk script `defines.awk', embedded as
+# here-document in config.status, that substitutes the proper values into
+# config.h.in to produce config.h.
+
+# Create a delimiter string that does not exist in confdefs.h, to ease
+# handling of long lines.
+ac_delim='%!_!# '
+for ac_last_try in false false :; do
+  ac_tt=`sed -n "/$ac_delim/p" confdefs.h`
+  if test -z "$ac_tt"; then
+    break
+  elif $ac_last_try; then
+    as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+# For the awk script, D is an array of macro values keyed by name,
+# likewise P contains macro parameters if any.  Preserve backslash
+# newline sequences.
+
+ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
+sed -n '
+s/.\{148\}/&'"$ac_delim"'/g
+t rset
+:rset
+s/^[	 ]*#[	 ]*define[	 ][	 ]*/ /
+t def
+d
+:def
+s/\\$//
+t bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[	 ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3"/p
+s/^ \('"$ac_word_re"'\)[	 ]*\(.*\)/D["\1"]=" \2"/p
+d
+:bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[	 ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3\\\\\\n"\\/p
+t cont
+s/^ \('"$ac_word_re"'\)[	 ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p
+t cont
+d
+:cont
+n
+s/.\{148\}/&'"$ac_delim"'/g
+t clear
+:clear
+s/\\$//
+t bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/"/p
+d
+:bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p
+b cont
+' <confdefs.h | sed '
+s/'"$ac_delim"'/"\\\
+"/g' >>$CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  for (key in D) D_is_set[key] = 1
+  FS = ""
+}
+/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ {
+  line = \$ 0
+  split(line, arg, " ")
+  if (arg[1] == "#") {
+    defundef = arg[2]
+    mac1 = arg[3]
+  } else {
+    defundef = substr(arg[1], 2)
+    mac1 = arg[2]
+  }
+  split(mac1, mac2, "(") #)
+  macro = mac2[1]
+  prefix = substr(line, 1, index(line, defundef) - 1)
+  if (D_is_set[macro]) {
+    # Preserve the white space surrounding the "#".
+    print prefix "define", macro P[macro] D[macro]
+    next
+  } else {
+    # Replace #undef with comments.  This is necessary, for example,
+    # in the case of _POSIX_SOURCE, which is predefined and required
+    # on some systems where configure will not decide to define it.
+    if (defundef == "undef") {
+      print "/*", prefix defundef, macro, "*/"
+      next
+    }
+  }
+}
+{ print }
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+  as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
+fi # test -n "$CONFIG_HEADERS"
+
+
+eval set X "  :F $CONFIG_FILES  :H $CONFIG_HEADERS    :C $CONFIG_COMMANDS"
+shift
+for ac_tag
+do
+  case $ac_tag in
+  :[FHLC]) ac_mode=$ac_tag; continue;;
+  esac
+  case $ac_mode$ac_tag in
+  :[FHL]*:*);;
+  :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
+  :[FH]-) ac_tag=-:-;;
+  :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+  esac
+  ac_save_IFS=$IFS
+  IFS=:
+  set x $ac_tag
+  IFS=$ac_save_IFS
+  shift
+  ac_file=$1
+  shift
+
+  case $ac_mode in
+  :L) ac_source=$1;;
+  :[FH])
+    ac_file_inputs=
+    for ac_f
+    do
+      case $ac_f in
+      -) ac_f="$ac_tmp/stdin";;
+      *) # Look for the file first in the build tree, then in the source tree
+	 # (if the path is not absolute).  The absolute path cannot be DOS-style,
+	 # because $ac_f cannot contain `:'.
+	 test -f "$ac_f" ||
+	   case $ac_f in
+	   [\\/$]*) false;;
+	   *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+	   esac ||
+	   as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+      esac
+      case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
+      as_fn_append ac_file_inputs " '$ac_f'"
+    done
+
+    # Let's still pretend it is `configure' which instantiates (i.e., don't
+    # use $as_me), people would be surprised to read:
+    #    /* config.h.  Generated by config.status.  */
+    configure_input='Generated from '`
+	  $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'
+	`' by configure.'
+    if test x"$ac_file" != x-; then
+      configure_input="$ac_file.  $configure_input"
+      { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
+$as_echo "$as_me: creating $ac_file" >&6;}
+    fi
+    # Neutralize special characters interpreted by sed in replacement strings.
+    case $configure_input in #(
+    *\&* | *\|* | *\\* )
+       ac_sed_conf_input=`$as_echo "$configure_input" |
+       sed 's/[\\\\&|]/\\\\&/g'`;; #(
+    *) ac_sed_conf_input=$configure_input;;
+    esac
+
+    case $ac_tag in
+    *:-:* | *:-) cat >"$ac_tmp/stdin" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
+    esac
+    ;;
+  esac
+
+  ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$ac_file" : 'X\(//\)[^/]' \| \
+	 X"$ac_file" : 'X\(//\)$' \| \
+	 X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  as_dir="$ac_dir"; as_fn_mkdir_p
+  ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+  case $ac_mode in
+  :F)
+  #
+  # CONFIG_FILE
+  #
+
+  case $INSTALL in
+  [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
+  *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;;
+  esac
+  ac_MKDIR_P=$MKDIR_P
+  case $MKDIR_P in
+  [\\/$]* | ?:[\\/]* ) ;;
+  */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;;
+  esac
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+ac_sed_dataroot='
+/datarootdir/ {
+  p
+  q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p'
+case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  ac_datarootdir_hack='
+  s&@datadir@&$datadir&g
+  s&@docdir@&$docdir&g
+  s&@infodir@&$infodir&g
+  s&@localedir@&$localedir&g
+  s&@mandir@&$mandir&g
+  s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_sed_extra="$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s|@configure_input@|$ac_sed_conf_input|;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@top_build_prefix@&$ac_top_build_prefix&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+s&@INSTALL@&$ac_INSTALL&;t t
+s&@MKDIR_P@&$ac_MKDIR_P&;t t
+$ac_datarootdir_hack
+"
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \
+  >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' \
+      "$ac_tmp/out"`; test -z "$ac_out"; } &&
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined" >&5
+$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined" >&2;}
+
+  rm -f "$ac_tmp/stdin"
+  case $ac_file in
+  -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
+  *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
+  esac \
+  || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ ;;
+  :H)
+  #
+  # CONFIG_HEADER
+  #
+  if test x"$ac_file" != x-; then
+    {
+      $as_echo "/* $configure_input  */" \
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs"
+    } >"$ac_tmp/config.h" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+    if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then
+      { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
+$as_echo "$as_me: $ac_file is unchanged" >&6;}
+    else
+      rm -f "$ac_file"
+      mv "$ac_tmp/config.h" "$ac_file" \
+	|| as_fn_error $? "could not create $ac_file" "$LINENO" 5
+    fi
+  else
+    $as_echo "/* $configure_input  */" \
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \
+      || as_fn_error $? "could not create -" "$LINENO" 5
+  fi
+# Compute "$ac_file"'s index in $config_headers.
+_am_arg="$ac_file"
+_am_stamp_count=1
+for _am_header in $config_headers :; do
+  case $_am_header in
+    $_am_arg | $_am_arg:* )
+      break ;;
+    * )
+      _am_stamp_count=`expr $_am_stamp_count + 1` ;;
+  esac
+done
+echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" ||
+$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$_am_arg" : 'X\(//\)[^/]' \| \
+	 X"$_am_arg" : 'X\(//\)$' \| \
+	 X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$_am_arg" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`/stamp-h$_am_stamp_count
+ ;;
+
+  :C)  { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5
+$as_echo "$as_me: executing $ac_file commands" >&6;}
+ ;;
+  esac
+
+
+  case $ac_file$ac_mode in
+    "depfiles":C) test x"$AMDEP_TRUE" != x"" || {
+  # Older Autoconf quotes --file arguments for eval, but not when files
+  # are listed without --file.  Let's play safe and only enable the eval
+  # if we detect the quoting.
+  # TODO: see whether this extra hack can be removed once we start
+  # requiring Autoconf 2.70 or later.
+  case $CONFIG_FILES in #(
+  *\'*) :
+    eval set x "$CONFIG_FILES" ;; #(
+  *) :
+    set x $CONFIG_FILES ;; #(
+  *) :
+     ;;
+esac
+  shift
+  # Used to flag and report bootstrapping failures.
+  am_rc=0
+  for am_mf
+  do
+    # Strip MF so we end up with the name of the file.
+    am_mf=`$as_echo "$am_mf" | sed -e 's/:.*$//'`
+    # Check whether this is an Automake generated Makefile which includes
+    # dependency-tracking related rules and includes.
+    # Grep'ing the whole file directly is not great: AIX grep has a line
+    # limit of 2048, but all sed's we know have understand at least 4000.
+    sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \
+      || continue
+    am_dirpart=`$as_dirname -- "$am_mf" ||
+$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$am_mf" : 'X\(//\)[^/]' \| \
+	 X"$am_mf" : 'X\(//\)$' \| \
+	 X"$am_mf" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$am_mf" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+    am_filepart=`$as_basename -- "$am_mf" ||
+$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$am_mf" : 'X\(//\)$' \| \
+	 X"$am_mf" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$am_mf" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+    { echo "$as_me:$LINENO: cd "$am_dirpart" \
+      && sed -e '/# am--include-marker/d' "$am_filepart" \
+        | $MAKE -f - am--depfiles" >&5
+   (cd "$am_dirpart" \
+      && sed -e '/# am--include-marker/d' "$am_filepart" \
+        | $MAKE -f - am--depfiles) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); } || am_rc=$?
+  done
+  if test $am_rc -ne 0; then
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "Something went wrong bootstrapping makefile fragments
+    for automatic dependency tracking.  Try re-running configure with the
+    '--disable-dependency-tracking' option to at least be able to build
+    the package (albeit without support for automatic dependency tracking).
+See \`config.log' for more details" "$LINENO" 5; }
+  fi
+  { am_dirpart=; unset am_dirpart;}
+  { am_filepart=; unset am_filepart;}
+  { am_mf=; unset am_mf;}
+  { am_rc=; unset am_rc;}
+  rm -f conftest-deps.mk
+}
+ ;;
+
+  esac
+done # for ac_tag
+
+
+as_fn_exit 0
+_ACEOF
+ac_clean_files=$ac_clean_files_save
+
+test $ac_write_fail = 0 ||
+  as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded.  So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status.  When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+  ac_cs_success=:
+  ac_config_status_args=
+  test "$silent" = yes &&
+    ac_config_status_args="$ac_config_status_args --quiet"
+  exec 5>/dev/null
+  $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+  exec 5>>config.log
+  # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+  # would make configure fail if this is the last instruction.
+  $ac_cs_success || as_fn_exit 1
+fi
+if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
+$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
+fi
+
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/configure.ac b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/configure.ac
new file mode 100644
index 000000000..eb91dc590
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/configure.ac
@@ -0,0 +1,34 @@
+AC_PREREQ([2.69])
+
+AC_INIT(hpl, 2.3, hpl@icl.utk.edu)
+AC_CONFIG_SRCDIR([include/hpl.h])
+AC_CONFIG_HEADERS([include/hplconfig.h])
+
+AX_PROG_CC_MPI
+
+AC_PROG_RANLIB
+
+AC_PROG_INSTALL
+
+AM_INIT_AUTOMAKE([subdir-objects])
+
+AM_PROG_CC_C_O
+
+dnl
+dnl AX_BLAS requires Fortran compiler and detects fortran libraries in $FLIBS
+dnl
+dnl AX_BLAS(LIBS="$BLAS_LIBS $LIBS $FLIBS")
+dnl
+
+HPL_BLAS(LIBS="$BLAS_LIBS $LIBS",AC_MSG_ERROR([BLAS not found]))
+
+dnl FIXME: test for CBLAS: Atlas, MKL, OpenBLAS, ESSL, ...
+dnl FIXME: test for GSL CBLAS
+
+AC_CHECK_FUNCS([dgemm_])
+
+AC_CHECK_HEADERS([mpi.h])
+
+AC_CONFIG_FILES([Makefile src/Makefile testing/Makefile])
+
+AC_OUTPUT
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/depcomp b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/depcomp
new file mode 100755
index 000000000..65cbf7093
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/depcomp
@@ -0,0 +1,791 @@
+#! /bin/sh
+# depcomp - compile a program generating dependencies as side-effects
+
+scriptversion=2018-03-07.03; # UTC
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
+
+case $1 in
+  '')
+    echo "$0: No command.  Try '$0 --help' for more information." 1>&2
+    exit 1;
+    ;;
+  -h | --h*)
+    cat <<\EOF
+Usage: depcomp [--help] [--version] PROGRAM [ARGS]
+
+Run PROGRAMS ARGS to compile a file, generating dependencies
+as side-effects.
+
+Environment variables:
+  depmode     Dependency tracking mode.
+  source      Source file read by 'PROGRAMS ARGS'.
+  object      Object file output by 'PROGRAMS ARGS'.
+  DEPDIR      directory where to store dependencies.
+  depfile     Dependency file to output.
+  tmpdepfile  Temporary file to use when outputting dependencies.
+  libtool     Whether libtool is used (yes/no).
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit $?
+    ;;
+  -v | --v*)
+    echo "depcomp $scriptversion"
+    exit $?
+    ;;
+esac
+
+# Get the directory component of the given path, and save it in the
+# global variables '$dir'.  Note that this directory component will
+# be either empty or ending with a '/' character.  This is deliberate.
+set_dir_from ()
+{
+  case $1 in
+    */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
+      *) dir=;;
+  esac
+}
+
+# Get the suffix-stripped basename of the given path, and save it the
+# global variable '$base'.
+set_base_from ()
+{
+  base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
+}
+
+# If no dependency file was actually created by the compiler invocation,
+# we still have to create a dummy depfile, to avoid errors with the
+# Makefile "include basename.Plo" scheme.
+make_dummy_depfile ()
+{
+  echo "#dummy" > "$depfile"
+}
+
+# Factor out some common post-processing of the generated depfile.
+# Requires the auxiliary global variable '$tmpdepfile' to be set.
+aix_post_process_depfile ()
+{
+  # If the compiler actually managed to produce a dependency file,
+  # post-process it.
+  if test -f "$tmpdepfile"; then
+    # Each line is of the form 'foo.o: dependency.h'.
+    # Do two passes, one to just change these to
+    #   $object: dependency.h
+    # and one to simply output
+    #   dependency.h:
+    # which is needed to avoid the deleted-header problem.
+    { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
+      sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
+    } > "$depfile"
+    rm -f "$tmpdepfile"
+  else
+    make_dummy_depfile
+  fi
+}
+
+# A tabulation character.
+tab='	'
+# A newline character.
+nl='
+'
+# Character ranges might be problematic outside the C locale.
+# These definitions help.
+upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
+lower=abcdefghijklmnopqrstuvwxyz
+digits=0123456789
+alpha=${upper}${lower}
+
+if test -z "$depmode" || test -z "$source" || test -z "$object"; then
+  echo "depcomp: Variables source, object and depmode must be set" 1>&2
+  exit 1
+fi
+
+# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
+depfile=${depfile-`echo "$object" |
+  sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
+tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
+
+rm -f "$tmpdepfile"
+
+# Avoid interferences from the environment.
+gccflag= dashmflag=
+
+# Some modes work just like other modes, but use different flags.  We
+# parameterize here, but still list the modes in the big case below,
+# to make depend.m4 easier to write.  Note that we *cannot* use a case
+# here, because this file can only contain one case statement.
+if test "$depmode" = hp; then
+  # HP compiler uses -M and no extra arg.
+  gccflag=-M
+  depmode=gcc
+fi
+
+if test "$depmode" = dashXmstdout; then
+  # This is just like dashmstdout with a different argument.
+  dashmflag=-xM
+  depmode=dashmstdout
+fi
+
+cygpath_u="cygpath -u -f -"
+if test "$depmode" = msvcmsys; then
+  # This is just like msvisualcpp but w/o cygpath translation.
+  # Just convert the backslash-escaped backslashes to single forward
+  # slashes to satisfy depend.m4
+  cygpath_u='sed s,\\\\,/,g'
+  depmode=msvisualcpp
+fi
+
+if test "$depmode" = msvc7msys; then
+  # This is just like msvc7 but w/o cygpath translation.
+  # Just convert the backslash-escaped backslashes to single forward
+  # slashes to satisfy depend.m4
+  cygpath_u='sed s,\\\\,/,g'
+  depmode=msvc7
+fi
+
+if test "$depmode" = xlc; then
+  # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
+  gccflag=-qmakedep=gcc,-MF
+  depmode=gcc
+fi
+
+case "$depmode" in
+gcc3)
+## gcc 3 implements dependency tracking that does exactly what
+## we want.  Yay!  Note: for some reason libtool 1.4 doesn't like
+## it if -MD -MP comes after the -MF stuff.  Hmm.
+## Unfortunately, FreeBSD c89 acceptance of flags depends upon
+## the command line argument order; so add the flags where they
+## appear in depend2.am.  Note that the slowdown incurred here
+## affects only configure: in makefiles, %FASTDEP% shortcuts this.
+  for arg
+  do
+    case $arg in
+    -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
+    *)  set fnord "$@" "$arg" ;;
+    esac
+    shift # fnord
+    shift # $arg
+  done
+  "$@"
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  mv "$tmpdepfile" "$depfile"
+  ;;
+
+gcc)
+## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
+## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
+## (see the conditional assignment to $gccflag above).
+## There are various ways to get dependency output from gcc.  Here's
+## why we pick this rather obscure method:
+## - Don't want to use -MD because we'd like the dependencies to end
+##   up in a subdir.  Having to rename by hand is ugly.
+##   (We might end up doing this anyway to support other compilers.)
+## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
+##   -MM, not -M (despite what the docs say).  Also, it might not be
+##   supported by the other compilers which use the 'gcc' depmode.
+## - Using -M directly means running the compiler twice (even worse
+##   than renaming).
+  if test -z "$gccflag"; then
+    gccflag=-MD,
+  fi
+  "$@" -Wp,"$gccflag$tmpdepfile"
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  # The second -e expression handles DOS-style file names with drive
+  # letters.
+  sed -e 's/^[^:]*: / /' \
+      -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
+## This next piece of magic avoids the "deleted header file" problem.
+## The problem is that when a header file which appears in a .P file
+## is deleted, the dependency causes make to die (because there is
+## typically no way to rebuild the header).  We avoid this by adding
+## dummy dependencies for each header file.  Too bad gcc doesn't do
+## this for us directly.
+## Some versions of gcc put a space before the ':'.  On the theory
+## that the space means something, we add a space to the output as
+## well.  hp depmode also adds that space, but also prefixes the VPATH
+## to the object.  Take care to not repeat it in the output.
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly.  Breaking it into two sed invocations is a workaround.
+  tr ' ' "$nl" < "$tmpdepfile" \
+    | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+hp)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+sgi)
+  if test "$libtool" = yes; then
+    "$@" "-Wp,-MDupdate,$tmpdepfile"
+  else
+    "$@" -MDupdate "$tmpdepfile"
+  fi
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+
+  if test -f "$tmpdepfile"; then  # yes, the sourcefile depend on other files
+    echo "$object : \\" > "$depfile"
+    # Clip off the initial element (the dependent).  Don't try to be
+    # clever and replace this with sed code, as IRIX sed won't handle
+    # lines with more than a fixed number of characters (4096 in
+    # IRIX 6.2 sed, 8192 in IRIX 6.5).  We also remove comment lines;
+    # the IRIX cc adds comments like '#:fec' to the end of the
+    # dependency line.
+    tr ' ' "$nl" < "$tmpdepfile" \
+      | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
+      | tr "$nl" ' ' >> "$depfile"
+    echo >> "$depfile"
+    # The second pass generates a dummy entry for each header file.
+    tr ' ' "$nl" < "$tmpdepfile" \
+      | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
+      >> "$depfile"
+  else
+    make_dummy_depfile
+  fi
+  rm -f "$tmpdepfile"
+  ;;
+
+xlc)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+aix)
+  # The C for AIX Compiler uses -M and outputs the dependencies
+  # in a .u file.  In older versions, this file always lives in the
+  # current directory.  Also, the AIX compiler puts '$object:' at the
+  # start of each line; $object doesn't have directory information.
+  # Version 6 uses the directory in both cases.
+  set_dir_from "$object"
+  set_base_from "$object"
+  if test "$libtool" = yes; then
+    tmpdepfile1=$dir$base.u
+    tmpdepfile2=$base.u
+    tmpdepfile3=$dir.libs/$base.u
+    "$@" -Wc,-M
+  else
+    tmpdepfile1=$dir$base.u
+    tmpdepfile2=$dir$base.u
+    tmpdepfile3=$dir$base.u
+    "$@" -M
+  fi
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+    exit $stat
+  fi
+
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  aix_post_process_depfile
+  ;;
+
+tcc)
+  # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
+  # FIXME: That version still under development at the moment of writing.
+  #        Make that this statement remains true also for stable, released
+  #        versions.
+  # It will wrap lines (doesn't matter whether long or short) with a
+  # trailing '\', as in:
+  #
+  #   foo.o : \
+  #    foo.c \
+  #    foo.h \
+  #
+  # It will put a trailing '\' even on the last line, and will use leading
+  # spaces rather than leading tabs (at least since its commit 0394caf7
+  # "Emit spaces for -MD").
+  "$@" -MD -MF "$tmpdepfile"
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
+  # We have to change lines of the first kind to '$object: \'.
+  sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
+  # And for each line of the second kind, we have to emit a 'dep.h:'
+  # dummy dependency, to avoid the deleted-header problem.
+  sed -n -e 's|^  *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+## The order of this option in the case statement is important, since the
+## shell code in configure will try each of these formats in the order
+## listed in this file.  A plain '-MD' option would be understood by many
+## compilers, so we must ensure this comes after the gcc and icc options.
+pgcc)
+  # Portland's C compiler understands '-MD'.
+  # Will always output deps to 'file.d' where file is the root name of the
+  # source file under compilation, even if file resides in a subdirectory.
+  # The object file name does not affect the name of the '.d' file.
+  # pgcc 10.2 will output
+  #    foo.o: sub/foo.c sub/foo.h
+  # and will wrap long lines using '\' :
+  #    foo.o: sub/foo.c ... \
+  #     sub/foo.h ... \
+  #     ...
+  set_dir_from "$object"
+  # Use the source, not the object, to determine the base name, since
+  # that's sadly what pgcc will do too.
+  set_base_from "$source"
+  tmpdepfile=$base.d
+
+  # For projects that build the same source file twice into different object
+  # files, the pgcc approach of using the *source* file root name can cause
+  # problems in parallel builds.  Use a locking strategy to avoid stomping on
+  # the same $tmpdepfile.
+  lockdir=$base.d-lock
+  trap "
+    echo '$0: caught signal, cleaning up...' >&2
+    rmdir '$lockdir'
+    exit 1
+  " 1 2 13 15
+  numtries=100
+  i=$numtries
+  while test $i -gt 0; do
+    # mkdir is a portable test-and-set.
+    if mkdir "$lockdir" 2>/dev/null; then
+      # This process acquired the lock.
+      "$@" -MD
+      stat=$?
+      # Release the lock.
+      rmdir "$lockdir"
+      break
+    else
+      # If the lock is being held by a different process, wait
+      # until the winning process is done or we timeout.
+      while test -d "$lockdir" && test $i -gt 0; do
+        sleep 1
+        i=`expr $i - 1`
+      done
+    fi
+    i=`expr $i - 1`
+  done
+  trap - 1 2 13 15
+  if test $i -le 0; then
+    echo "$0: failed to acquire lock after $numtries attempts" >&2
+    echo "$0: check lockdir '$lockdir'" >&2
+    exit 1
+  fi
+
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  # Each line is of the form `foo.o: dependent.h',
+  # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
+  # Do two passes, one to just change these to
+  # `$object: dependent.h' and one to simply `dependent.h:'.
+  sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process this invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+hp2)
+  # The "hp" stanza above does not work with aCC (C++) and HP's ia64
+  # compilers, which have integrated preprocessors.  The correct option
+  # to use with these is +Maked; it writes dependencies to a file named
+  # 'foo.d', which lands next to the object file, wherever that
+  # happens to be.
+  # Much of this is similar to the tru64 case; see comments there.
+  set_dir_from  "$object"
+  set_base_from "$object"
+  if test "$libtool" = yes; then
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir.libs/$base.d
+    "$@" -Wc,+Maked
+  else
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir$base.d
+    "$@" +Maked
+  fi
+  stat=$?
+  if test $stat -ne 0; then
+     rm -f "$tmpdepfile1" "$tmpdepfile2"
+     exit $stat
+  fi
+
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  if test -f "$tmpdepfile"; then
+    sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
+    # Add 'dependent.h:' lines.
+    sed -ne '2,${
+               s/^ *//
+               s/ \\*$//
+               s/$/:/
+               p
+             }' "$tmpdepfile" >> "$depfile"
+  else
+    make_dummy_depfile
+  fi
+  rm -f "$tmpdepfile" "$tmpdepfile2"
+  ;;
+
+tru64)
+  # The Tru64 compiler uses -MD to generate dependencies as a side
+  # effect.  'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
+  # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
+  # dependencies in 'foo.d' instead, so we check for that too.
+  # Subdirectories are respected.
+  set_dir_from  "$object"
+  set_base_from "$object"
+
+  if test "$libtool" = yes; then
+    # Libtool generates 2 separate objects for the 2 libraries.  These
+    # two compilations output dependencies in $dir.libs/$base.o.d and
+    # in $dir$base.o.d.  We have to check for both files, because
+    # one of the two compilations can be disabled.  We should prefer
+    # $dir$base.o.d over $dir.libs/$base.o.d because the latter is
+    # automatically cleaned when .libs/ is deleted, while ignoring
+    # the former would cause a distcleancheck panic.
+    tmpdepfile1=$dir$base.o.d          # libtool 1.5
+    tmpdepfile2=$dir.libs/$base.o.d    # Likewise.
+    tmpdepfile3=$dir.libs/$base.d      # Compaq CCC V6.2-504
+    "$@" -Wc,-MD
+  else
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir$base.d
+    tmpdepfile3=$dir$base.d
+    "$@" -MD
+  fi
+
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+    exit $stat
+  fi
+
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  # Same post-processing that is required for AIX mode.
+  aix_post_process_depfile
+  ;;
+
+msvc7)
+  if test "$libtool" = yes; then
+    showIncludes=-Wc,-showIncludes
+  else
+    showIncludes=-showIncludes
+  fi
+  "$@" $showIncludes > "$tmpdepfile"
+  stat=$?
+  grep -v '^Note: including file: ' "$tmpdepfile"
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  # The first sed program below extracts the file names and escapes
+  # backslashes for cygpath.  The second sed program outputs the file
+  # name when reading, but also accumulates all include files in the
+  # hold buffer in order to output them again at the end.  This only
+  # works with sed implementations that can handle large buffers.
+  sed < "$tmpdepfile" -n '
+/^Note: including file:  *\(.*\)/ {
+  s//\1/
+  s/\\/\\\\/g
+  p
+}' | $cygpath_u | sort -u | sed -n '
+s/ /\\ /g
+s/\(.*\)/'"$tab"'\1 \\/p
+s/.\(.*\) \\/\1:/
+H
+$ {
+  s/.*/'"$tab"'/
+  G
+  p
+}' >> "$depfile"
+  echo >> "$depfile" # make sure the fragment doesn't end with a backslash
+  rm -f "$tmpdepfile"
+  ;;
+
+msvc7msys)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+#nosideeffect)
+  # This comment above is used by automake to tell side-effect
+  # dependency tracking mechanisms from slower ones.
+
+dashmstdout)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout, regardless of -o.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  # Remove '-o $object'.
+  IFS=" "
+  for arg
+  do
+    case $arg in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    *)
+      set fnord "$@" "$arg"
+      shift # fnord
+      shift # $arg
+      ;;
+    esac
+  done
+
+  test -z "$dashmflag" && dashmflag=-M
+  # Require at least two characters before searching for ':'
+  # in the target name.  This is to cope with DOS-style filenames:
+  # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
+  "$@" $dashmflag |
+    sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
+  rm -f "$depfile"
+  cat < "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process this sed invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  tr ' ' "$nl" < "$tmpdepfile" \
+    | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+dashXmstdout)
+  # This case only exists to satisfy depend.m4.  It is never actually
+  # run, as this mode is specially recognized in the preamble.
+  exit 1
+  ;;
+
+makedepend)
+  "$@" || exit $?
+  # Remove any Libtool call
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+  # X makedepend
+  shift
+  cleared=no eat=no
+  for arg
+  do
+    case $cleared in
+    no)
+      set ""; shift
+      cleared=yes ;;
+    esac
+    if test $eat = yes; then
+      eat=no
+      continue
+    fi
+    case "$arg" in
+    -D*|-I*)
+      set fnord "$@" "$arg"; shift ;;
+    # Strip any option that makedepend may not understand.  Remove
+    # the object too, otherwise makedepend will parse it as a source file.
+    -arch)
+      eat=yes ;;
+    -*|$object)
+      ;;
+    *)
+      set fnord "$@" "$arg"; shift ;;
+    esac
+  done
+  obj_suffix=`echo "$object" | sed 's/^.*\././'`
+  touch "$tmpdepfile"
+  ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
+  rm -f "$depfile"
+  # makedepend may prepend the VPATH from the source file name to the object.
+  # No need to regex-escape $object, excess matching of '.' is harmless.
+  sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process the last invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  sed '1,2d' "$tmpdepfile" \
+    | tr ' ' "$nl" \
+    | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile" "$tmpdepfile".bak
+  ;;
+
+cpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  # Remove '-o $object'.
+  IFS=" "
+  for arg
+  do
+    case $arg in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    *)
+      set fnord "$@" "$arg"
+      shift # fnord
+      shift # $arg
+      ;;
+    esac
+  done
+
+  "$@" -E \
+    | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
+             -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
+    | sed '$ s: \\$::' > "$tmpdepfile"
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  cat < "$tmpdepfile" >> "$depfile"
+  sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+msvisualcpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  IFS=" "
+  for arg
+  do
+    case "$arg" in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
+        set fnord "$@"
+        shift
+        shift
+        ;;
+    *)
+        set fnord "$@" "$arg"
+        shift
+        shift
+        ;;
+    esac
+  done
+  "$@" -E 2>/dev/null |
+  sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
+  echo "$tab" >> "$depfile"
+  sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+msvcmsys)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+none)
+  exec "$@"
+  ;;
+
+*)
+  echo "Unknown depmode $depmode" 1>&2
+  exit 1
+  ;;
+esac
+
+exit 0
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl.h
new file mode 100644
index 000000000..6d131963f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl.h
@@ -0,0 +1,97 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_H
+#define HPL_H
+/*
+ * ---------------------------------------------------------------------
+ * HPL default compile options that can overridden in the Make.<arch>
+ * ---------------------------------------------------------------------
+ */
+#ifndef HPL_NO_MPI_DATATYPE         /* Use MPI user-defined data type */
+#define HPL_USE_MPI_DATATYPE
+#endif
+ 
+#ifndef HPL_COPY_L  /* do not copy L, use MPI user-defined data types */
+#define HPL_NO_COPY_L
+#endif
+ 
+#ifndef HPL_DETAILED_TIMING         /* Do not enable detailed timings */
+#define HPL_NO_DETAILED_TIMING
+#endif
+ 
+#ifndef HPL_CALL_VSIPL          /* Call the Fortran 77 BLAS interface */
+#ifndef HPL_CALL_CBLAS                       /* there can be only one */
+#define HPL_CALL_FBLAS
+#endif
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pfact.h"
+#include "hpl_pgesv.h"
+
+#include "hpl_timer.h"
+#include "hpl_matgen.h"
+#include "hpl_test.h"
+
+#include "hpl_ptimer.h"
+#include "hpl_pmatgen.h"
+#include "hpl_ptest.h"
+
+#endif
+/*
+ * End of hpl.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_auxil.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_auxil.h
new file mode 100644
index 000000000..861caf380
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_auxil.h
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_AUXIL_H
+#define HPL_AUXIL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+/*
+ * ---------------------------------------------------------------------
+ * typedef definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{ HPL_NORM_A = 800, HPL_NORM_1 = 801, HPL_NORM_I = 802 } HPL_T_NORM;
+
+typedef enum
+{
+   HPL_MACH_EPS   = 900,                /* relative machine precision */
+   HPL_MACH_SFMIN = 901, /* safe minimum st 1/sfmin does not overflow */
+   HPL_MACH_BASE  = 902,                /* base = base of the machine */
+   HPL_MACH_PREC  = 903,                          /* prec  = eps*base */
+   HPL_MACH_MLEN  = 904,   /* number of (base) digits in the mantissa */
+   HPL_MACH_RND   = 905,        /* 1.0 if rounding occurs in addition */
+   HPL_MACH_EMIN  = 906,   /* min exponent before (gradual) underflow */
+   HPL_MACH_RMIN  = 907,        /* underflow threshold base**(emin-1) */
+   HPL_MACH_EMAX  = 908,          /* largest exponent before overflow */
+   HPL_MACH_RMAX  = 909  /* overflow threshold - (base**emax)*(1-eps) */
+ 
+} HPL_T_MACH;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_fprintf
+STDC_ARGS( (
+   FILE *,
+   const char *,
+   ...
+) );
+void                             HPL_warn
+STDC_ARGS( (
+   FILE *,
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_abort
+STDC_ARGS( (
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_dlacpy
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dlatcpy
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dlaprnt
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int,
+   const char *
+) );
+double                           HPL_dlange
+STDC_ARGS( (
+   const HPL_T_NORM,
+   const int,
+   const int,
+   const double *,
+   const int
+) );
+double                           HPL_dlamch
+STDC_ARGS( (
+   const HPL_T_MACH
+) );
+
+#endif
+/*
+ * End of hpl_auxil.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_blas.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_blas.h
new file mode 100644
index 000000000..2a510471a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_blas.h
@@ -0,0 +1,630 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_BLAS_H
+#define HPL_BLAS_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+
+
+/*
+ * ---------------------------------------------------------------------
+ * typedef definitions
+ * ---------------------------------------------------------------------
+ */
+enum HPL_ORDER
+{  HplRowMajor = 101,  HplColumnMajor  = 102 };
+enum HPL_TRANS
+{  HplNoTrans  = 111,  HplTrans        = 112,  HplConjTrans    = 113 };
+enum HPL_UPLO
+{  HplUpper    = 121,  HplLower        = 122 };
+enum HPL_DIAG
+{  HplNonUnit  = 131,  HplUnit         = 132 };
+enum HPL_SIDE
+{  HplLeft     = 141,  HplRight        = 142 }; 
+
+
+#ifdef HPL_CALL_CBLAS
+
+
+/*
+ * ---------------------------------------------------------------------
+ * The C interface of the BLAS is available ...
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    CBLAS_INDEX         int
+ 
+#define    CBLAS_ORDER         HPL_ORDER
+#define    CblasRowMajor       HplRowMajor
+#define    CblasColMajor       HplColMajor
+ 
+#define    CBLAS_TRANSPOSE     HPL_TRANS
+#define    CblasNoTrans        HplNoTrans
+#define    CblasTrans          HplTrans
+#define    CblasConjTrans      HplConjTrans
+ 
+#define    CBLAS_UPLO          HPL_UPLO
+#define    CblasUpper          HplUpper
+#define    CblasLower          HplLower
+ 
+#define    CBLAS_DIAG          HPL_DIAG
+#define    CblasNonUnit        HplNonUnit
+#define    CblasUnit           HplUnit
+ 
+#define    CBLAS_SIDE          HPL_SIDE
+#define    CblasLeft           HplLeft
+#define    CblasRight          HplRight
+/*
+ * ---------------------------------------------------------------------
+ * CBLAS Function prototypes
+ * ---------------------------------------------------------------------
+ */
+CBLAS_INDEX       cblas_idamax
+STDC_ARGS(
+(  const int,       const double *,  const int ) );
+void              cblas_dswap
+STDC_ARGS(
+(  const int,       double *,        const int,       double *,
+   const int ) );
+void              cblas_dcopy
+STDC_ARGS(
+(  const int,       const double *,  const int,       double *,
+   const int ) );
+void              cblas_daxpy
+STDC_ARGS(
+(  const int,       const double,    const double *,  const int,
+   double *,        const int ) );
+void              cblas_dscal
+STDC_ARGS(
+(  const int,       const double,    double *,        const int ) );
+
+void              cblas_dgemv
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const int,       const int,       const double,    const double *,
+   const int,       const double *,  const int,       const double,
+   double *,        const int ) );
+
+void              cblas_dger
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const int,       const int,
+   const double,    const double *,  const int,       const double *,
+   const int,       double *,        const int ) );
+void              cblas_dtrsv
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_UPLO,
+   const enum CBLAS_TRANSPOSE,       const enum CBLAS_DIAG,
+   const int,       const double *,  const int,       double *,
+   const int ) );
+
+void              cblas_dgemm
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_TRANSPOSE,       const int,       const int,
+   const int,       const double,    const double *,  const int,
+   const double *,  const int,       const double,    double *,
+   const int ) );
+
+void              cblas_dtrsm
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_SIDE,
+   const enum CBLAS_UPLO,            const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_DIAG,            const int,       const int,
+   const double,    const double *,  const int,       double *,
+   const int ) );
+void             dpcpp_dgemm 
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_TRANSPOSE,       const int,       const int,
+   const int,       const double,    const double *,  const int,
+   const double *,  const int,       const double,    double *,
+   const int ) );
+
+void             dpcpp_dtrsm 
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_SIDE,
+   const enum CBLAS_UPLO,            const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_DIAG,            const int,       const int,
+   const double,    const double *,  const int,       double *,
+   const int ) );
+/*
+ * ---------------------------------------------------------------------
+ * HPL C BLAS macro definition
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_dswap           cblas_dswap
+#define    HPL_dcopy           cblas_dcopy
+#define    HPL_daxpy           cblas_daxpy
+#define    HPL_dscal           cblas_dscal
+#define    HPL_idamax          cblas_idamax
+
+#define    HPL_dgemv           cblas_dgemv
+#define    HPL_dtrsv           cblas_dtrsv
+#define    HPL_dger            cblas_dger
+
+//#define    HPL_dgemm           cblas_dgemm
+//#define    HPL_dtrsm           cblas_dtrsm
+#define    HPL_dgemm           dpcpp_dgemm
+#define    HPL_dtrsm           dpcpp_dtrsm  
+
+#endif
+
+//#define    HPL_hello           sss_gemm 
+
+#ifdef HPL_CALL_FBLAS
+/*
+ * ---------------------------------------------------------------------
+ * Use the Fortran 77 interface of the BLAS ...
+ * ---------------------------------------------------------------------
+ * Defaults: Add_, F77_INTEGER=int, StringSunStyle
+ * ---------------------------------------------------------------------
+ */
+#ifndef NoChange
+#ifndef UpCase
+#ifndef Add__
+#ifndef Add_
+
+#define Add_
+
+#endif
+#endif
+#endif
+#endif
+
+#ifndef F77_INTEGER
+#define    F77_INTEGER         int
+#else
+#define    HPL_USE_F77_INTEGER_DEF
+#endif
+
+#ifndef StringCrayStyle
+#ifndef StringStructVal
+#ifndef StringStructPtr
+#ifndef StringSunStyle
+
+#define StringSunStyle
+
+#endif
+#endif
+#endif
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Fortran 77 <-> C interface
+ * ---------------------------------------------------------------------
+ *
+ * These macros identifies how Fortran routines will be called.
+ *
+ * Add_     : the Fortran compiler expects the name of C functions to be
+ * in all lower case and to have an underscore postfixed it (Suns, Intel
+ * compilers expect this).
+ *
+ * NoChange : the Fortran compiler expects the name of C functions to be
+ * in all lower case (IBM RS6K compilers do this).
+ *
+ * UpCase   : the Fortran compiler expects the name of C functions to be
+ * in all upcase. (Cray compilers expect this).
+ *
+ * Add__    : the Fortran compiler in use is f2c, a Fortran to C conver-
+ * ter.
+ */
+#ifdef NoChange
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm(...)
+ */
+#define    F77dswap               dswap
+#define    F77dscal               dscal
+#define    F77dcopy               dcopy
+#define    F77daxpy               daxpy
+#define    F77idamax              idamax
+
+#define    F77dgemv               dgemv
+#define    F77dtrsv               dtrsv
+#define    F77dger                dger
+
+#define    F77dgemm               dgemm
+#define    F77dtrsm               dtrsm
+
+#endif
+
+#ifdef UpCase
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          DGEMM(...)
+ */
+#ifdef CRAY_BLAS
+                                                                                
+#define    F77dswap               SSWAP
+#define    F77dscal               SSCAL
+#define    F77dcopy               SCOPY
+#define    F77daxpy               SAXPY
+#define    F77idamax              ISAMAX
+                                                                                
+#define    F77dgemv               SGEMV
+#define    F77dtrsv               STRSV
+#define    F77dger                SGER
+                                                                                
+#define    F77dgemm               SGEMM
+#define    F77dtrsm               STRSM
+                                                                                
+#else
+
+#define    F77dswap               DSWAP
+#define    F77dscal               DSCAL
+#define    F77dcopy               DCOPY
+#define    F77daxpy               DAXPY
+#define    F77idamax              IDAMAX
+
+#define    F77dgemv               DGEMV
+#define    F77dtrsv               DTRSV
+#define    F77dger                DGER
+
+#define    F77dgemm               DGEMM
+#define    F77dtrsm               DTRSM
+
+#endif
+
+#endif
+
+#ifdef Add_
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine  with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm_(...)
+ */
+#define    F77dswap               dswap_
+#define    F77dscal               dscal_
+#define    F77dcopy               dcopy_
+#define    F77daxpy               daxpy_
+#define    F77idamax              idamax_
+
+#define    F77dgemv               dgemv_
+#define    F77dtrsv               dtrsv_
+#define    F77dger                dger_
+
+#define    F77dgemm               dgemm_
+#define    F77dtrsm               dtrsm_
+
+#endif
+
+#ifdef Add__
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine  with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm_(...)
+ */
+#define    F77dswap               dswap_
+#define    F77dscal               dscal_
+#define    F77dcopy               dcopy_
+#define    F77daxpy               daxpy_
+#define    F77idamax              idamax_
+ 
+#define    F77dgemv               dgemv_
+#define    F77dtrsv               dtrsv_
+#define    F77dger                dger_
+ 
+#define    F77dgemm               dgemm_
+#define    F77dtrsm               dtrsm_
+//#define    F77hello               sss_gemm
+ 
+#endif
+//#define    F77hello               sss_gemm
+/*
+ * ---------------------------------------------------------------------
+ * Typedef definitions and conversion utilities
+ * ---------------------------------------------------------------------
+ */
+#ifdef StringCrayStyle
+
+#include <fortran.h>
+                      /* Type of character argument in a FORTRAN call */
+#define    F77_CHAR            _fcd
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(_fcdtocp(c) ))
+#define    HPL_C2F_CHAR(c)     (_cptofcd(&(c), 1))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringStructVal
+                      /* Type of character argument in a FORTRAN call */
+typedef struct { char *cp; F77_INTEGER len; } F77_CHAR;
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c.cp))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringStructPtr
+                      /* Type of character argument in a FORTRAN call */
+typedef struct { char *cp; F77_INTEGER len; } F77_CHAR;
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c->cp))
+
+#define    F77_CHAR_DECL       F77_CHAR *        /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringSunStyle
+                      /* Type of character argument in a FORTRAN call */
+#define    F77_CHAR            char *
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c))
+#define    HPL_C2F_CHAR(c)     (&(c))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+#define    F77_1_CHAR          , F77_INTEGER
+#define    F77_2_CHAR          F77_1_CHAR F77_1_CHAR
+#define    F77_3_CHAR          F77_2_CHAR F77_1_CHAR
+#define    F77_4_CHAR          F77_3_CHAR F77_1_CHAR
+
+#endif
+/* ------------------------------------------------------------------ */
+
+#ifndef F77_1_CHAR
+#define    F77_1_CHAR
+#define    F77_2_CHAR
+#define    F77_3_CHAR
+#define    F77_4_CHAR
+#endif
+
+#define    F77_INT_DECL        const F77_INTEGER *   /* input integer */
+#define    F77_SIN_DECL        const double *         /* input scalar */
+#define    F77_VIN_DECL        const double *         /* input vector */
+#define    F77_VINOUT_DECL     double *        /* input/output matrix */
+#define    F77_MIN_DECL        const double *         /* input matrix */
+#define    F77_MINOUT_DECL     double *        /* input/output matrix */
+ 
+#ifdef CRAY_PVP_ENV                      /* Type of FORTRAN functions */
+#define    F77_VOID_FUN        extern fortran void      /* subroutine */
+#define    F77_INT_FUN         extern fortran int /* integer function */
+#else
+#define    F77_VOID_FUN        extern void              /* subroutine */
+#define    F77_INT_FUN         extern int         /* integer function */
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Fortran 77 BLAS function prototypes
+ * ---------------------------------------------------------------------
+ */
+F77_VOID_FUN    F77dswap
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VINOUT_DECL, F77_INT_DECL,    F77_VINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77dscal
+STDC_ARGS(
+(  F77_INT_DECL,    F77_SIN_DECL,    F77_VINOUT_DECL, F77_INT_DECL ) );
+F77_VOID_FUN    F77dcopy
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,    F77_VINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77daxpy
+STDC_ARGS(
+(  F77_INT_DECL,    F77_SIN_DECL,    F77_VIN_DECL,    F77_INT_DECL,
+   F77_VINOUT_DECL, F77_INT_DECL ) );
+F77_INT_FUN     F77idamax
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL ) );
+
+F77_VOID_FUN    F77dgemv
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,
+   F77_SIN_DECL,    F77_VINOUT_DECL, F77_INT_DECL     F77_1_CHAR ) );
+F77_VOID_FUN    F77dger
+STDC_ARGS(
+(  F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_VIN_DECL,
+   F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,    F77_MINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77dtrsv
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,   F77_INT_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_VINOUT_DECL, F77_INT_DECL
+   F77_3_CHAR ) );
+
+F77_VOID_FUN    F77dgemm
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_INT_DECL,    F77_INT_DECL,
+   F77_INT_DECL,    F77_SIN_DECL,    F77_MIN_DECL,    F77_INT_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_MINOUT_DECL,
+   F77_INT_DECL     F77_2_CHAR ) );
+F77_VOID_FUN    F77dtrsm
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,
+   F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_MIN_DECL,
+   F77_INT_DECL,    F77_MINOUT_DECL, F77_INT_DECL     F77_4_CHAR ) );
+
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * HPL BLAS Function prototypes
+ * ---------------------------------------------------------------------
+ */
+#ifndef HPL_CALL_CBLAS
+
+int                              HPL_idamax
+STDC_ARGS( (
+   const int,
+   const double *,
+   const int
+) );
+void                             HPL_daxpy
+STDC_ARGS( (
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dcopy
+STDC_ARGS( (
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dscal
+STDC_ARGS( (
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_dswap
+STDC_ARGS( (
+   const int,
+   double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dgemv
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_TRANS,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   const double *,
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_dger
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dtrsv
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_UPLO,
+   const enum HPL_TRANS,
+   const enum HPL_DIAG,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dgemm
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_TRANS,
+   const enum HPL_TRANS,
+   const int,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   const double *,
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_hello
+STDC_ARGS( (
+) );
+#endif
+void                             HPL_dtrsm
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_SIDE,
+   const enum HPL_UPLO,
+   const enum HPL_TRANS,
+   const enum HPL_DIAG,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+
+//#endif
+
+#endif
+/*
+ * hpl_blas.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_comm.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_comm.h
new file mode 100644
index 000000000..e3ba51a57
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_comm.h
@@ -0,0 +1,161 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_COMM_H
+#define HPL_COMM_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+#include "hpl_panel.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_1RING         = 401,                        /* Increasing ring */
+   HPL_1RING_M       = 402,             /* Increasing ring (modified) */
+   HPL_2RING         = 403,                      /* Increasing 2-ring */
+   HPL_2RING_M       = 404,           /* Increasing 2-ring (modified) */
+   HPL_BLONG         = 405,                         /* long broadcast */
+   HPL_BLONG_M       = 406               /* long broadcast (modified) */
+} HPL_T_TOP;
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_FAILURE            0
+#define    HPL_SUCCESS            1
+#define    HPL_KEEP_TESTING       2
+/*
+ * ---------------------------------------------------------------------
+ * comm function prototypes
+ * ---------------------------------------------------------------------
+ */
+int                              HPL_send
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_recv
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_sdrv
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_binit
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+int                              HPL_bcast
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *
+) );
+int                              HPL_bwait
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+int                              HPL_packL
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int
+) );
+void                             HPL_copyL
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+ 
+int HPL_binit_1ring STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_1ring STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_1ring STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_1rinM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_1rinM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_1rinM STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_2ring STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_2ring STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_2ring STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_2rinM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_2rinM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_2rinM STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_blong STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_blong STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_blong STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_blonM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_blonM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_blonM STDC_ARGS( ( HPL_T_panel *        ) );
+
+#endif
+/*
+ * End of hpl_comm.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_gesv.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_gesv.h
new file mode 100644
index 000000000..ce671cf2b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_gesv.h
@@ -0,0 +1,87 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_GESV_H
+#define HPL_GESV_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_LEFT_LOOKING  = 301,           /* Left looking lu fact variant */
+   HPL_CROUT         = 302,                  /* Crout lu fact variant */
+   HPL_RIGHT_LOOKING = 303           /* Right looking lu fact variant */
+} HPL_T_FACT;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void              HPL_dgesv
+STDC_ARGS(
+(  const int,       const int,       const int,       const HPL_T_FACT,
+   const HPL_T_FACT,                 const int,       double *,
+   const int,       int * ) );
+void              HPL_ipid
+STDC_ARGS(
+(  const int,       double *,        int *,           int *,
+   int *,           int *,           int *,           int *,
+   const int,       const int,       const int,       const int,
+   const int ) );
+
+#endif
+/*
+ * End of hpl_gesv.h
+ */ 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_grid.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_grid.h
new file mode 100644
index 000000000..1895a5ed4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_grid.h
@@ -0,0 +1,212 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_GRID_H
+#define HPL_GRID_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum { HPL_INT       = 100, HPL_DOUBLE       = 101 } HPL_T_TYPE;
+ 
+typedef enum
+{
+   HPL_ROW_MAJOR     = 201,
+   HPL_COLUMN_MAJOR  = 202
+} HPL_T_ORDER;
+
+typedef struct HPL_S_grid
+{
+   MPI_Comm        all_comm;                     /* grid communicator */
+   MPI_Comm        row_comm;                      /* row communicator */
+   MPI_Comm        col_comm;                   /* column communicator */
+   HPL_T_ORDER     order;        /* ordering of the procs in the grid */
+   int             iam;                        /* my rank in the grid */
+   int             myrow;                /* my row number in the grid */
+   int             mycol;             /* my column number in the grid */
+   int             nprow;          /* the total # of rows in the grid */
+   int             npcol;       /* the total # of columns in the grid */
+   int             nprocs;        /* the total # of procs in the grid */
+   int             row_ip2;          /* largest power of two <= nprow */
+   int             row_hdim;     /* row_ip2 procs hypercube dimension */
+   int             row_ip2m1;      /* largest power of two <= nprow-1 */
+   int             row_mask;        /* row_ip2m1 procs hypercube mask */
+   int             col_ip2;          /* largest power of two <= npcol */
+   int             col_hdim;     /* col_ip2 procs hypercube dimension */
+   int             col_ip2m1;      /* largest power of two <= npcol-1 */
+   int             col_mask;        /* col_ip2m1 procs hypercube mask */
+} HPL_T_grid;
+
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef void (*HPL_T_OP)
+(  const int,       const void *,    void *,          const HPL_T_TYPE );
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_2_MPI_TYPE( typ ) \
+                           ( ( typ == HPL_INT ? MPI_INT : MPI_DOUBLE ) )
+/*
+ * The following macros perform common modulo operations;  All functions
+ * except MPosMod assume arguments are < d (i.e., arguments are themsel-
+ * ves within modulo range).
+ */
+                                                /* increment with mod */
+#define    MModInc(I, d)       if(++(I) == (d)) (I) = 0
+                                                /* decrement with mod */
+#define    MModDec(I, d)       if(--(I) == -1) (I) = (d)-1
+                                                   /* positive modulo */
+#define    MPosMod(I, d)       ( (I) - ((I)/(d))*(d) )
+                                                   /* add two numbers */
+#define    MModAdd(I1, I2, d) \
+           ( ( (I1) + (I2) < (d) ) ? (I1) + (I2) : (I1) + (I2) - (d) )
+                                                        /* add 1 to # */
+#define    MModAdd1(I, d) ( ((I) != (d)-1) ? (I) + 1 : 0 )
+                                              /* subtract two numbers */
+#define    MModSub(I1, I2, d) \
+           ( ( (I1) < (I2) ) ? (d) + (I1) - (I2) : (I1) - (I2) )
+                                                      /* sub 1 from # */
+#define    MModSub1(I, d) ( ((I)!=0) ? (I)-1 : (d)-1 )
+/*
+ * ---------------------------------------------------------------------
+ * grid function prototypes
+ * ---------------------------------------------------------------------
+ */
+int                              HPL_grid_init
+STDC_ARGS( (
+   MPI_Comm,
+   const HPL_T_ORDER,
+   const int,
+   const int,
+   HPL_T_grid *
+) );
+int                              HPL_grid_exit
+STDC_ARGS( (
+   HPL_T_grid *
+) );
+
+int                              HPL_grid_info
+STDC_ARGS( (
+   const HPL_T_grid *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+int                              HPL_pnum
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int
+) );
+
+int                              HPL_barrier
+STDC_ARGS( (
+   MPI_Comm
+) );
+int                              HPL_broadcast
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const int,
+   MPI_Comm
+) );
+int                              HPL_reduce
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const HPL_T_OP ,
+   const int,
+   MPI_Comm
+) );
+int                              HPL_all_reduce
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const HPL_T_OP ,
+   MPI_Comm
+) );
+
+void                             HPL_max
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+void                             HPL_min
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+void                             HPL_sum
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+
+#endif
+/*
+ * End of hpl_grid.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_matgen.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_matgen.h
new file mode 100644
index 000000000..de6503eea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_matgen.h
@@ -0,0 +1,120 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_MATGEN_H
+#define HPL_MATGEN_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_MULT0         1284865837
+#define    HPL_MULT1         1481765933
+#define    HPL_IADD0         1
+#define    HPL_IADD1         0
+#define    HPL_DIVFAC        2147483648.0
+#define    HPL_POW16         65536.0
+#define    HPL_HALF          0.5
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_dmatgen
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int
+) );
+void                             HPL_lmul
+STDC_ARGS( (
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_ladd
+STDC_ARGS( (
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_xjumpm
+STDC_ARGS( (
+   const int,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_setran
+STDC_ARGS( (
+   const int,
+   int *
+) );
+void                             HPL_jumpit
+STDC_ARGS( (
+   int *,
+   int *,
+   int *,
+   int *
+) );
+double                           HPL_rand STDC_ARGS( ( void ) );
+
+#endif
+/*
+ * End of hpl_matgen.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_misc.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_misc.h
new file mode 100644
index 000000000..ea421a403
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_misc.h
@@ -0,0 +1,110 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_MISC_H
+#define HPL_MISC_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#ifdef __STDC__
+#define STDC_HEADERS
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#ifdef STDC_HEADERS
+#include <stdarg.h>
+#define STDC_ARGS(p)           p
+#else
+#include <varargs.h>
+#define STDC_ARGS(p)           ()
+#endif
+
+#ifdef HPL_CALL_VSIPL
+#include <vsip.h>
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_rone             1.0
+#define    HPL_rtwo             2.0
+#define    HPL_rzero            0.0
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    Mabs( a_ )          ( ( (a_) <   0  ) ? -(a_) : (a_) )
+#define    Mmin( a_, b_ )      ( ( (a_) < (b_) ) ?  (a_) : (b_) )
+#define    Mmax( a_, b_ )      ( ( (a_) > (b_) ) ?  (a_) : (b_) )
+
+#define    Mfloor(a,b) (((a)>0) ? (((a)/(b))) : (-(((-(a))+(b)-1)/(b))))
+#define    Mceil(a,b)           ( ( (a)+(b)-1 ) / (b) )
+#define    Miceil(a,b) (((a)>0) ? ((((a)+(b)-1)/(b))) : (-((-(a))/(b))))
+
+#define    Mupcase(C)          (((C)>96 && (C)<123) ? (C) & 0xDF : (C))
+#define    Mlowcase(C)         (((C)>64 && (C)< 91) ? (C) | 32   : (C))
+/*
+ * Mptr returns a pointer to a_( i_, j_ ) for readability reasons and
+ * also less silly errors ...
+ */
+#define    Mptr( a_, i_, j_, lda_ ) \
+   ( (a_) + (size_t)(i_) + (size_t)(j_)*(size_t)(lda_) )
+/*
+ * Align pointer
+ */
+#define    HPL_PTR( ptr_, al_ ) \
+                      ( ( ( (size_t)(ptr_)+(al_)-1 ) / (al_) ) * (al_) ) 
+#endif
+/*
+ * End of hpl_misc.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_panel.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_panel.h
new file mode 100644
index 000000000..d5ba2939c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_panel.h
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PANEL_H
+#define HPL_PANEL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef struct HPL_S_panel
+{
+   struct HPL_S_grid   * grid;             /* ptr to the process grid */
+   struct HPL_S_palg   * algo;          /* ptr to the algo parameters */
+   struct HPL_S_pmat   * pmat;         /* ptr to the local array info */
+   double              * A;              /* ptr to trailing part of A */
+   double              * WORK;                          /* work space */
+   double              * L2;                              /* ptr to L */
+   double              * L1;       /* ptr to jb x jb upper block of A */
+   double              * DPIV;    /* ptr to replicated jb pivot array */
+   double              * DINFO;      /* ptr to replicated scalar info */
+   double              * U;                               /* ptr to U */
+   int                 * IWORK;     /* integer workspace for swapping */
+   void                * * * buffers[2];   /* buffers for panel bcast */
+   int                 counts [2];          /* counts for panel bcast */
+   MPI_Datatype        dtypes [2];      /* data types for panel bcast */
+   MPI_Request         request[1];        /* requests for panel bcast */
+   MPI_Status          status [1];          /* status for panel bcast */
+   int                 nb;            /* distribution blocking factor */
+   int                 jb;                             /* panel width */
+   int                 m;   /* global # of rows of trailing part of A */
+   int                 n;   /* global # of cols of trailing part of A */
+   int                 ia;  /* global row index of trailing part of A */
+   int                 ja;  /* global col index of trailing part of A */
+   int                 mp;   /* local # of rows of trailing part of A */
+   int                 nq;   /* local # of cols of trailing part of A */
+   int                 ii;   /* local row index of trailing part of A */
+   int                 jj;   /* local col index of trailing part of A */
+   int                 lda;           /* local leading dim of array A */
+   int                 prow;  /* proc. row owning 1st row of trail. A */
+   int                 pcol;  /* proc. col owning 1st col of trail. A */
+   int                 msgid;           /* message id for panel bcast */
+   int                 ldl2;         /* local leading dim of array L2 */
+   int                 len;      /* length of the buffer to broadcast */
+#ifdef HPL_CALL_VSIPL
+   vsip_block_d        * Ablock;                           /* A block */
+   vsip_block_d        * L1block;                         /* L1 block */
+   vsip_block_d        * L2block;                         /* L2 block */
+   vsip_block_d        * Ublock;                           /* U block */
+#endif
+} HPL_T_panel;
+
+/*
+ * ---------------------------------------------------------------------
+ * panel function prototypes
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pgesv.h"
+
+void                             HPL_pdpanel_new
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int,
+   const int,
+   HPL_T_pmat *,
+   const int,
+   const int,
+   const int,
+   HPL_T_panel * *
+) );
+void                             HPL_pdpanel_init
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int,
+   const int,
+   HPL_T_pmat *,
+   const int,
+   const int,
+   const int,
+   HPL_T_panel *
+) );
+int                              HPL_pdpanel_disp
+STDC_ARGS( (
+   HPL_T_panel * *
+) );
+int                              HPL_pdpanel_free
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+
+#endif
+/*
+ * End of hpl_panel.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pauxil.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pauxil.h
new file mode 100644
index 000000000..1fd0ee457
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pauxil.h
@@ -0,0 +1,505 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PAUXIL_H
+#define HPL_PAUXIL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+/*
+ * Mindxg2p  returns the process coodinate owning the entry globally in-
+ * dexed by ig_.
+ */
+#define    Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ )            \
+           {                                                           \
+              if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) &&            \
+                  ( (nprocs_) > 1 ) )                                  \
+              {                                                        \
+                 proc_  = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_);       \
+                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 proc_ = (src_);                                       \
+              }                                                        \
+           }
+
+#define    Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ )       \
+           {                                                           \
+              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
+                  ( (nprocs_) == 1 ) ) { il_ = (ig_); }                \
+              else                                                     \
+              {                                                        \
+                 int i__, j__;                                         \
+                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
+                 il_ = (nb_)*( j__ - i__ ) +                           \
+                       ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?        \
+                         (ig_) - (inb_) : (ig_) );                     \
+              }                                                        \
+           }
+
+#define    Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ )      \
+           {                                                           \
+              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
+                  ( (nprocs_) == 1 ) )                                 \
+              { il_ = (ig_); proc_ = (src_); }                         \
+              else                                                     \
+              {                                                        \
+                 int i__, j__;                                         \
+                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
+                 il_ = (nb_)*(j__-i__) +                               \
+                       ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?       \
+                         (ig_) - (inb_) : (ig_) );                     \
+                 proc_  = (src_) + 1 + i__;                            \
+                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
+              }                                                        \
+           }
+/*
+ * Mindxl2g computes the global index ig_ corresponding to the local
+ * index il_ in process proc_.
+ */
+#define    Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ )       \
+           {                                                           \
+              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
+              {                                                        \
+                 if( (proc_) == (src_) )                               \
+                 {                                                     \
+                    if( (il_) < (inb_) ) ig_ = (il_);                  \
+                    else                 ig_ = (il_) +                 \
+                       (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \
+                 }                                                     \
+                 else if( (proc_) < (src_) )                           \
+                 {                                                     \
+                    ig_ = (il_) + (inb_) +                             \
+                          (nb_)*(  ((nprocs_)-1)*((il_)/(nb_)) +       \
+                                   (proc_)-(src_)-1+(nprocs_) );       \
+                 }                                                     \
+                 else                                                  \
+                 {                                                     \
+                    ig_ =  (il_) + (inb_) +                            \
+                           (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) +       \
+                           (proc_)-(src_)-1 );                         \
+                 }                                                     \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 ig_ = (il_);                                          \
+              }                                                        \
+           }
+/*
+ * MnumrocI computes the # of local indexes  np_ residing in the process
+ * of coordinate  proc_  corresponding to the interval of global indexes
+ * i_:i_+n_-1  assuming  that the global index 0 resides in  the process
+ * src_,  and that the indexes are distributed from src_ using the para-
+ * meters inb_, nb_ and nprocs_.
+ */
+#define    MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ )    \
+           {                                                           \
+              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
+              {                                                        \
+                 int inb__, mydist__, n__, nblk__, quot__, src__;      \
+                 if( ( inb__ = (inb_) - (i_) ) <= 0 )                  \
+                 {                                                     \
+                    nblk__ = (-inb__) / (nb_) + 1;                     \
+                    src__  = (src_) + nblk__;                          \
+                    src__ -= ( src__ / (nprocs_) ) * (nprocs_);        \
+                    inb__ += nblk__*(nb_);                             \
+                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
+                    {                                                  \
+                       if( (proc_) == src__ ) np_ = (n_);              \
+                       else                   np_ = 0;                 \
+                    }                                                  \
+                    else                                               \
+                    {                                                  \
+                       if( ( mydist__ = (proc_) - src__ ) < 0 )        \
+                          mydist__ += (nprocs_);                       \
+                       nblk__    = n__ / (nb_) + 1;                    \
+                       mydist__ -= nblk__ -                            \
+                          (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \
+                       if( mydist__ < 0 )                              \
+                       {                                               \
+                          if( (proc_) != src__ )                       \
+                             np_ = (nb_) + (nb_) * quot__;             \
+                          else                                         \
+                             np_ = inb__ + (nb_) * quot__;             \
+                       }                                               \
+                       else if( mydist__ > 0 )                         \
+                       {                                               \
+                          np_ = (nb_) * quot__;                        \
+                       }                                               \
+                       else                                            \
+                       {                                               \
+                          if( (proc_) != src__ )                       \
+                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
+                          else                                         \
+                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
+                       }                                               \
+                    }                                                  \
+                 }                                                     \
+                 else                                                  \
+                 {                                                     \
+                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
+                    {                                                  \
+                       if( (proc_) == (src_) ) np_ = (n_);             \
+                       else                    np_ = 0;                \
+                    }                                                  \
+                    else                                               \
+                    {                                                  \
+                       if( ( mydist__ = (proc_) - (src_) ) < 0 )       \
+                          mydist__ += (nprocs_);                       \
+                       nblk__    = n__ / (nb_) + 1;                    \
+                       mydist__ -= nblk__ -                            \
+                          ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \
+                       if( mydist__ < 0 )                              \
+                       {                                               \
+                          if( (proc_) != (src_) )                      \
+                             np_ = (nb_) + (nb_) * quot__;             \
+                          else                                         \
+                             np_ = inb__ + (nb_) * quot__;             \
+                       }                                               \
+                       else if( mydist__ > 0 )                         \
+                       {                                               \
+                          np_ = (nb_) * quot__;                        \
+                       }                                               \
+                       else                                            \
+                       {                                               \
+                          if( (proc_) != (src_) )                      \
+                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
+                          else                                         \
+                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
+                       }                                               \
+                    }                                                  \
+                 }                                                     \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 np_ = (n_);                                           \
+              }                                                        \
+           }
+
+#define    Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ )         \
+           MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ )
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_indxg2lp
+STDC_ARGS( (
+   int *,
+   int *,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxg2l
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxg2p
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxl2g
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+void                             HPL_infog2l
+STDC_ARGS( (
+   int,
+   int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+int                              HPL_numroc
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_numrocI
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+
+void                             HPL_dlaswp00N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp10N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp01N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp01T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp02N
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp03N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int
+) );
+void                             HPL_dlaswp03T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int
+) );
+void                             HPL_dlaswp04N
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp04T
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp05N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp05T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp06N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp06T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+
+void                             HPL_pabort
+STDC_ARGS( (
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_pwarn
+STDC_ARGS( (
+   FILE *,
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_pdlaprnt
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int,
+   const char *
+) );
+double                           HPL_pdlamch
+STDC_ARGS( (
+   MPI_Comm,
+   const HPL_T_MACH
+) );
+double                           HPL_pdlange
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const HPL_T_NORM,
+   const int,
+   const int,
+   const int,
+   const double *,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_pauxil.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pfact.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pfact.h
new file mode 100644
index 000000000..09eee79ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pfact.h
@@ -0,0 +1,216 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PFACT_H
+#define HPL_PFACT_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef void (*HPL_T_PFA_FUN)
+(  HPL_T_panel *,   const int,       const int,       const int,
+   double * );
+typedef void (*HPL_T_RFA_FUN)
+(  HPL_T_panel *,   const int,       const int,       const int,
+   double * );
+typedef void (*HPL_T_UPD_FUN)
+(  HPL_T_panel *,   int *,           HPL_T_panel *,   const int ); 
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_dlocmax
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_dlocswpN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_dlocswpT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdmxswp
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdpancrN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpancrT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanllN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanllT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanrlN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanrlT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdrpancrN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpancrT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanllN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanllT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanrlN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanrlT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdfact
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+ 
+#endif
+/*
+ * End of hpl_pfact.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pgesv.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pgesv.h
new file mode 100644
index 000000000..3ca576c68
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pgesv.h
@@ -0,0 +1,346 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PGESV_H
+#define HPL_PGESV_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+#include "hpl_comm.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pfact.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_SWAP00        = 451,                      /* Use HPL_pdlaswp00 */
+   HPL_SWAP01        = 452,                      /* Use HPL_pdlaswp01 */
+   HPL_SW_MIX        = 453, /* Use HPL_pdlaswp00_ for small number of */
+                            /* columns, and HPL_pdlaswp01_ otherwise. */
+   HPL_NO_SWP        = 499
+} HPL_T_SWAP;
+
+typedef struct HPL_S_palg
+{
+   HPL_T_TOP           btopo;               /* row broadcast topology */
+   int                 depth;                     /* look-ahead depth */
+   int                 nbdiv;            /* recursive division factor */
+   int                 nbmin;         /* recursion stopping criterium */
+   HPL_T_FACT          pfact;                   /* panel fact variant */
+   HPL_T_FACT          rfact;               /* recursive fact variant */
+   HPL_T_PFA_FUN       pffun;              /* panel fact function ptr */
+   HPL_T_RFA_FUN       rffun;          /* recursive fact function ptr */
+   HPL_T_UPD_FUN       upfun;                      /* update function */
+   HPL_T_SWAP          fswap;                   /* Swapping algorithm */
+   int                 fsthr;                   /* Swapping threshold */
+   int                 equil;                        /* Equilibration */
+   int                 align;              /* data alignment constant */
+} HPL_T_palg;
+
+typedef struct HPL_S_pmat
+{
+#ifdef HPL_CALL_VSIPL
+   vsip_block_d        * block;
+#endif
+   double              * A;            /* pointer to local piece of A */
+   double              * X;             /* pointer to solution vector */
+   int                 n;                      /* global problem size */
+   int                 nb;                         /* blocking factor */
+   int                 ld;                 /* local leading dimension */
+   int                 mp;                    /* local number of rows */
+   int                 nq;                 /* local number of columns */
+   int                 info;                    /* computational flag */
+} HPL_T_pmat;
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    MSGID_BEGIN_PFACT   1001              /* message id ranges */
+#define    MSGID_END_PFACT     2000
+#define    MSGID_BEGIN_FACT    2001
+#define    MSGID_END_FACT      3000
+#define    MSGID_BEGIN_PTRSV   3001
+#define    MSGID_END_PTRSV     4000
+ 
+#define    MSGID_BEGIN_COLL    9001
+#define    MSGID_END_COLL     10000
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    MNxtMgid( id_, beg_, end_ ) \
+                             (( (id_)+1 > (end_) ?  (beg_) : (id_)+1 ))
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pipid
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   int *
+) );
+void                             HPL_plindx0
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_pdlaswp00N
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdlaswp00T
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_perm
+STDC_ARGS( (
+   const int,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_logsort
+STDC_ARGS( (
+   const int,
+   const int,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_plindx10
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_plindx1
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_spreadN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_SIDE,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_spreadT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_SIDE,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_equil
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_TRANS,
+   const int,
+   double *,
+   const int,
+   int *,
+   const int *,
+   const int *,
+   int *
+) );
+void                             HPL_rollN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_rollT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_pdlaswp01N
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdlaswp01T
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_pdupdateNN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateNT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateTN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateTT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_pdgesv0
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesvK1
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesvK2
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesv
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+ 
+void                             HPL_pdtrsv
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_pmat *
+) );
+
+#endif
+/*
+ * End of hpl_pgesv.h
+ */ 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pmatgen.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pmatgen.h
new file mode 100644
index 000000000..1091b0f60
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pmatgen.h
@@ -0,0 +1,77 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PMATGEN_H
+#define HPL_PMATGEN_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_matgen.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pdmatgen
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_pmatgen.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pmisc.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pmisc.h
new file mode 100644
index 000000000..23550d47b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pmisc.h
@@ -0,0 +1,59 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PMISC_H
+#define HPL_PMISC_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "mpi.h"
+
+#endif
+/*
+ * End of hpl_pmisc.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_ptest.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_ptest.h
new file mode 100644
index 000000000..5777bd536
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_ptest.h
@@ -0,0 +1,151 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PTEST_H
+#define HPL_PTEST_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pgesv.h"
+
+#include "hpl_ptimer.h"
+#include "hpl_pmatgen.h"
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef struct HPL_S_test
+{
+   double              epsil;                      /* epsilon machine */
+   double              thrsh;                            /* threshold */
+   FILE *              outfp;       /* output stream (only in proc 0) */
+   int                 kfail;                    /* # of tests failed */
+   int                 kpass;                    /* # of tests passed */
+   int                 kskip;                   /* # of tests skipped */
+   int                 ktest;                /* total number of tests */
+} HPL_T_test;
+
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants for testing only
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_LINE_MAX         256
+#define    HPL_MAX_PARAM         20
+#define    HPL_ISEED            100
+/*
+ * ---------------------------------------------------------------------
+ * global timers for timing analysis only
+ * ---------------------------------------------------------------------
+ */
+#ifdef HPL_DETAILED_TIMING
+#define    HPL_TIMING_BEG        11 /* timer 0 reserved, used by main */
+#define    HPL_TIMING_N           6 /* number of timers defined below */
+#define    HPL_TIMING_RPFACT     11 /* starting from here, contiguous */
+#define    HPL_TIMING_PFACT      12
+#define    HPL_TIMING_MXSWP      13
+#define    HPL_TIMING_UPDATE     14
+#define    HPL_TIMING_LASWP      15
+#define    HPL_TIMING_PTRSV      16
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pdinfo
+STDC_ARGS( (
+   HPL_T_test *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_ORDER *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_FACT *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_FACT *,
+   int *,
+   HPL_T_TOP *,
+   int *,
+   int *,
+   HPL_T_SWAP *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_pdtest
+STDC_ARGS( (
+   HPL_T_test *,
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_ptest.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_ptimer.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_ptimer.h
new file mode 100644
index 000000000..43c8fe33a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_ptimer.h
@@ -0,0 +1,96 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PTIMER_H
+#define HPL_PTIMER_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_NPTIMER             64
+#define    HPL_PTIMER_STARTFLAG   5.0
+#define    HPL_PTIMER_ERROR      -1.0
+/*
+ * ---------------------------------------------------------------------
+ * type definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{  HPL_WALL_PTIME = 101, HPL_CPU_PTIME  = 102 } HPL_T_PTIME;
+
+typedef enum
+{ HPL_AMAX_PTIME  = 201, HPL_AMIN_PTIME = 202, HPL_SUM_PTIME  = 203 }
+HPL_T_PTIME_OP;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+double          HPL_ptimer_cputime   STDC_ARGS(     ( void      ) );
+double          HPL_ptimer_walltime  STDC_ARGS(     ( void      ) );
+
+void            HPL_ptimer           STDC_ARGS(     ( const int ) );
+void            HPL_ptimer_boot      STDC_ARGS(     ( void      ) );
+void            HPL_ptimer_combine
+STDC_ARGS(
+(  MPI_Comm comm,   const HPL_T_PTIME_OP,             const HPL_T_PTIME,
+   const int,       const int,       double * ) );
+void            HPL_ptimer_disable   STDC_ARGS(     ( void      ) );
+void            HPL_ptimer_enable    STDC_ARGS(     ( void      ) );
+double          HPL_ptimer_inquire
+STDC_ARGS(
+(  const HPL_T_PTIME,                const int ) );
+
+#endif
+/*
+ * End of hpl_ptimer.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_test.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_test.h
new file mode 100644
index 000000000..1eedc97e0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_test.h
@@ -0,0 +1,80 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_TEST_H
+#define HPL_TEST_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_matgen.h"
+#include "hpl_timer.h"
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void            HPL_dinfo
+STDC_ARGS(
+(  FILE * *,        int *,           int *,           int *,
+   HPL_T_FACT *,    int *,           int *,           int *, 
+   int *,           int *,           HPL_T_FACT *,    int *,
+   double *,        double * ) );
+void            HPL_dtest
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   HPL_T_FACT,      HPL_T_FACT,      const int,       const double,
+   const double,    int *,           int *,           int * ) );
+
+#endif
+/*
+ * End of hpl_test.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_timer.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_timer.h
new file mode 100644
index 000000000..4c91700ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_timer.h
@@ -0,0 +1,88 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_TIMER_H
+#define HPL_TIMER_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_NTIMER              64
+#define    HPL_TIMER_STARTFLAG    5.0
+#define    HPL_TIMER_ERROR       -1.0
+/*
+ * ---------------------------------------------------------------------
+ * type definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{  HPL_WALL_TIME = 101, HPL_CPU_TIME  = 102 } HPL_T_TIME;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+double          HPL_timer_cputime    STDC_ARGS(     ( void      ) );
+double          HPL_timer_walltime   STDC_ARGS(     ( void      ) );
+
+void            HPL_timer            STDC_ARGS(     ( const int ) );
+void            HPL_timer_boot       STDC_ARGS(     ( void      ) );
+void            HPL_timer_enable     STDC_ARGS(     ( void      ) );
+void            HPL_timer_disable    STDC_ARGS(     ( void      ) );
+double          HPL_timer_inquire
+STDC_ARGS(
+(  const HPL_T_TIME,                 const int ) );
+
+#endif
+/*
+ * End of hpl_timer.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_units.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_units.h
new file mode 100644
index 000000000..a96956497
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_units.h
@@ -0,0 +1,135 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_UNITS_H
+#define HPL_UNITS_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_MAXROUT       50
+#define    HPL_MAXRNAME      15
+
+#define    HPL_TRUE         'T'
+#define    HPL_FALSE        'F'
+
+#define    HPL_INDXG2P_ROUT   "HPL_indxg2p"
+#define    HPL_INDXG2L_ROUT   "HPL_indxg2l"
+#define    HPL_INDXL2G_ROUT   "HPL_indxl2g"
+#define    HPL_NUMROC_ROUT    "HPL_numroc"
+#define    HPL_NUMROCI_ROUT   "HPL_numrocI"
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void            HPL_unit_info
+STDC_ARGS(
+(  FILE * *,        int *,           int *,           int *,
+   int *,           int *,           int *,           int *,
+   int *,           int *,           int *,           char [][HPL_MAXRNAME],
+   int [] ) );
+ 
+void            HPL_unit_indxg2l
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+int             HPL_chek_indxg2l
+STDC_ARGS(
+(  FILE *,          const char *,    const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+ 
+void            HPL_unit_indxl2g
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+int             HPL_chek_indxl2g
+STDC_ARGS(
+(  FILE *,          const char *,    const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+ 
+void            HPL_unit_indxg2p
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+int             HPL_chek_indxg2p
+STDC_ARGS(
+(  FILE *,          const char *,    const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+ 
+void            HPL_unit_numroc
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+void            HPL_unit_numrocI
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       const int,       long *,          long * ) );
+int             HPL_chek_numrocI
+STDC_ARGS(
+(  FILE *,          const char *,    const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       const int,       long *,          long * ) );
+
+#endif
+/*
+ * End of hpl_units.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hplconfig.h.in b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hplconfig.h.in
new file mode 100644
index 000000000..b4b3b9a35
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hplconfig.h.in
@@ -0,0 +1,67 @@
+/* include/hplconfig.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define if you have a BLAS library. */
+#undef HAVE_BLAS
+
+/* Define to 1 if you have the `dgemm_' function. */
+#undef HAVE_DGEMM_
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define if you have the MPI library. */
+#undef HAVE_MPI
+
+/* Define to 1 if you have the <mpi.h> header file. */
+#undef HAVE_MPI_H
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Version number of package */
+#undef VERSION
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/install-sh b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/install-sh
new file mode 100755
index 000000000..8175c640f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/install-sh
@@ -0,0 +1,518 @@
+#!/bin/sh
+# install - install a program, script, or datafile
+
+scriptversion=2018-03-11.20; # UTC
+
+# This originates from X11R5 (mit/util/scripts/install.sh), which was
+# later released in X11R6 (xc/config/util/install.sh) with the
+# following copyright and license.
+#
+# Copyright (C) 1994 X Consortium
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
+# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# Except as contained in this notice, the name of the X Consortium shall not
+# be used in advertising or otherwise to promote the sale, use or other deal-
+# ings in this Software without prior written authorization from the X Consor-
+# tium.
+#
+#
+# FSF changes to this file are in the public domain.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# 'make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.
+
+tab='	'
+nl='
+'
+IFS=" $tab$nl"
+
+# Set DOITPROG to "echo" to test this script.
+
+doit=${DOITPROG-}
+doit_exec=${doit:-exec}
+
+# Put in absolute file names if you don't have them in your path;
+# or use environment vars.
+
+chgrpprog=${CHGRPPROG-chgrp}
+chmodprog=${CHMODPROG-chmod}
+chownprog=${CHOWNPROG-chown}
+cmpprog=${CMPPROG-cmp}
+cpprog=${CPPROG-cp}
+mkdirprog=${MKDIRPROG-mkdir}
+mvprog=${MVPROG-mv}
+rmprog=${RMPROG-rm}
+stripprog=${STRIPPROG-strip}
+
+posix_mkdir=
+
+# Desired mode of installed file.
+mode=0755
+
+chgrpcmd=
+chmodcmd=$chmodprog
+chowncmd=
+mvcmd=$mvprog
+rmcmd="$rmprog -f"
+stripcmd=
+
+src=
+dst=
+dir_arg=
+dst_arg=
+
+copy_on_change=false
+is_target_a_directory=possibly
+
+usage="\
+Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
+   or: $0 [OPTION]... SRCFILES... DIRECTORY
+   or: $0 [OPTION]... -t DIRECTORY SRCFILES...
+   or: $0 [OPTION]... -d DIRECTORIES...
+
+In the 1st form, copy SRCFILE to DSTFILE.
+In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
+In the 4th, create DIRECTORIES.
+
+Options:
+     --help     display this help and exit.
+     --version  display version info and exit.
+
+  -c            (ignored)
+  -C            install only if different (preserve the last data modification time)
+  -d            create directories instead of installing files.
+  -g GROUP      $chgrpprog installed files to GROUP.
+  -m MODE       $chmodprog installed files to MODE.
+  -o USER       $chownprog installed files to USER.
+  -s            $stripprog installed files.
+  -t DIRECTORY  install into DIRECTORY.
+  -T            report an error if DSTFILE is a directory.
+
+Environment variables override the default commands:
+  CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
+  RMPROG STRIPPROG
+"
+
+while test $# -ne 0; do
+  case $1 in
+    -c) ;;
+
+    -C) copy_on_change=true;;
+
+    -d) dir_arg=true;;
+
+    -g) chgrpcmd="$chgrpprog $2"
+        shift;;
+
+    --help) echo "$usage"; exit $?;;
+
+    -m) mode=$2
+        case $mode in
+          *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*)
+            echo "$0: invalid mode: $mode" >&2
+            exit 1;;
+        esac
+        shift;;
+
+    -o) chowncmd="$chownprog $2"
+        shift;;
+
+    -s) stripcmd=$stripprog;;
+
+    -t)
+        is_target_a_directory=always
+        dst_arg=$2
+        # Protect names problematic for 'test' and other utilities.
+        case $dst_arg in
+          -* | [=\(\)!]) dst_arg=./$dst_arg;;
+        esac
+        shift;;
+
+    -T) is_target_a_directory=never;;
+
+    --version) echo "$0 $scriptversion"; exit $?;;
+
+    --) shift
+        break;;
+
+    -*) echo "$0: invalid option: $1" >&2
+        exit 1;;
+
+    *)  break;;
+  esac
+  shift
+done
+
+# We allow the use of options -d and -T together, by making -d
+# take the precedence; this is for compatibility with GNU install.
+
+if test -n "$dir_arg"; then
+  if test -n "$dst_arg"; then
+    echo "$0: target directory not allowed when installing a directory." >&2
+    exit 1
+  fi
+fi
+
+if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
+  # When -d is used, all remaining arguments are directories to create.
+  # When -t is used, the destination is already specified.
+  # Otherwise, the last argument is the destination.  Remove it from $@.
+  for arg
+  do
+    if test -n "$dst_arg"; then
+      # $@ is not empty: it contains at least $arg.
+      set fnord "$@" "$dst_arg"
+      shift # fnord
+    fi
+    shift # arg
+    dst_arg=$arg
+    # Protect names problematic for 'test' and other utilities.
+    case $dst_arg in
+      -* | [=\(\)!]) dst_arg=./$dst_arg;;
+    esac
+  done
+fi
+
+if test $# -eq 0; then
+  if test -z "$dir_arg"; then
+    echo "$0: no input file specified." >&2
+    exit 1
+  fi
+  # It's OK to call 'install-sh -d' without argument.
+  # This can happen when creating conditional directories.
+  exit 0
+fi
+
+if test -z "$dir_arg"; then
+  if test $# -gt 1 || test "$is_target_a_directory" = always; then
+    if test ! -d "$dst_arg"; then
+      echo "$0: $dst_arg: Is not a directory." >&2
+      exit 1
+    fi
+  fi
+fi
+
+if test -z "$dir_arg"; then
+  do_exit='(exit $ret); exit $ret'
+  trap "ret=129; $do_exit" 1
+  trap "ret=130; $do_exit" 2
+  trap "ret=141; $do_exit" 13
+  trap "ret=143; $do_exit" 15
+
+  # Set umask so as not to create temps with too-generous modes.
+  # However, 'strip' requires both read and write access to temps.
+  case $mode in
+    # Optimize common cases.
+    *644) cp_umask=133;;
+    *755) cp_umask=22;;
+
+    *[0-7])
+      if test -z "$stripcmd"; then
+        u_plus_rw=
+      else
+        u_plus_rw='% 200'
+      fi
+      cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
+    *)
+      if test -z "$stripcmd"; then
+        u_plus_rw=
+      else
+        u_plus_rw=,u+rw
+      fi
+      cp_umask=$mode$u_plus_rw;;
+  esac
+fi
+
+for src
+do
+  # Protect names problematic for 'test' and other utilities.
+  case $src in
+    -* | [=\(\)!]) src=./$src;;
+  esac
+
+  if test -n "$dir_arg"; then
+    dst=$src
+    dstdir=$dst
+    test -d "$dstdir"
+    dstdir_status=$?
+  else
+
+    # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
+    # might cause directories to be created, which would be especially bad
+    # if $src (and thus $dsttmp) contains '*'.
+    if test ! -f "$src" && test ! -d "$src"; then
+      echo "$0: $src does not exist." >&2
+      exit 1
+    fi
+
+    if test -z "$dst_arg"; then
+      echo "$0: no destination specified." >&2
+      exit 1
+    fi
+    dst=$dst_arg
+
+    # If destination is a directory, append the input filename.
+    if test -d "$dst"; then
+      if test "$is_target_a_directory" = never; then
+        echo "$0: $dst_arg: Is a directory" >&2
+        exit 1
+      fi
+      dstdir=$dst
+      dstbase=`basename "$src"`
+      case $dst in
+	*/) dst=$dst$dstbase;;
+	*)  dst=$dst/$dstbase;;
+      esac
+      dstdir_status=0
+    else
+      dstdir=`dirname "$dst"`
+      test -d "$dstdir"
+      dstdir_status=$?
+    fi
+  fi
+
+  case $dstdir in
+    */) dstdirslash=$dstdir;;
+    *)  dstdirslash=$dstdir/;;
+  esac
+
+  obsolete_mkdir_used=false
+
+  if test $dstdir_status != 0; then
+    case $posix_mkdir in
+      '')
+        # Create intermediate dirs using mode 755 as modified by the umask.
+        # This is like FreeBSD 'install' as of 1997-10-28.
+        umask=`umask`
+        case $stripcmd.$umask in
+          # Optimize common cases.
+          *[2367][2367]) mkdir_umask=$umask;;
+          .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
+
+          *[0-7])
+            mkdir_umask=`expr $umask + 22 \
+              - $umask % 100 % 40 + $umask % 20 \
+              - $umask % 10 % 4 + $umask % 2
+            `;;
+          *) mkdir_umask=$umask,go-w;;
+        esac
+
+        # With -d, create the new directory with the user-specified mode.
+        # Otherwise, rely on $mkdir_umask.
+        if test -n "$dir_arg"; then
+          mkdir_mode=-m$mode
+        else
+          mkdir_mode=
+        fi
+
+        posix_mkdir=false
+        case $umask in
+          *[123567][0-7][0-7])
+            # POSIX mkdir -p sets u+wx bits regardless of umask, which
+            # is incompatible with FreeBSD 'install' when (umask & 300) != 0.
+            ;;
+          *)
+            # Note that $RANDOM variable is not portable (e.g. dash);  Use it
+            # here however when possible just to lower collision chance.
+            tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
+
+            trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0
+
+            # Because "mkdir -p" follows existing symlinks and we likely work
+            # directly in world-writeable /tmp, make sure that the '$tmpdir'
+            # directory is successfully created first before we actually test
+            # 'mkdir -p' feature.
+            if (umask $mkdir_umask &&
+                $mkdirprog $mkdir_mode "$tmpdir" &&
+                exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1
+            then
+              if test -z "$dir_arg" || {
+                   # Check for POSIX incompatibilities with -m.
+                   # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
+                   # other-writable bit of parent directory when it shouldn't.
+                   # FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
+                   test_tmpdir="$tmpdir/a"
+                   ls_ld_tmpdir=`ls -ld "$test_tmpdir"`
+                   case $ls_ld_tmpdir in
+                     d????-?r-*) different_mode=700;;
+                     d????-?--*) different_mode=755;;
+                     *) false;;
+                   esac &&
+                   $mkdirprog -m$different_mode -p -- "$test_tmpdir" && {
+                     ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"`
+                     test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
+                   }
+                 }
+              then posix_mkdir=:
+              fi
+              rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir"
+            else
+              # Remove any dirs left behind by ancient mkdir implementations.
+              rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null
+            fi
+            trap '' 0;;
+        esac;;
+    esac
+
+    if
+      $posix_mkdir && (
+        umask $mkdir_umask &&
+        $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
+      )
+    then :
+    else
+
+      # The umask is ridiculous, or mkdir does not conform to POSIX,
+      # or it failed possibly due to a race condition.  Create the
+      # directory the slow way, step by step, checking for races as we go.
+
+      case $dstdir in
+        /*) prefix='/';;
+        [-=\(\)!]*) prefix='./';;
+        *)  prefix='';;
+      esac
+
+      oIFS=$IFS
+      IFS=/
+      set -f
+      set fnord $dstdir
+      shift
+      set +f
+      IFS=$oIFS
+
+      prefixes=
+
+      for d
+      do
+        test X"$d" = X && continue
+
+        prefix=$prefix$d
+        if test -d "$prefix"; then
+          prefixes=
+        else
+          if $posix_mkdir; then
+            (umask=$mkdir_umask &&
+             $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
+            # Don't fail if two instances are running concurrently.
+            test -d "$prefix" || exit 1
+          else
+            case $prefix in
+              *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
+              *) qprefix=$prefix;;
+            esac
+            prefixes="$prefixes '$qprefix'"
+          fi
+        fi
+        prefix=$prefix/
+      done
+
+      if test -n "$prefixes"; then
+        # Don't fail if two instances are running concurrently.
+        (umask $mkdir_umask &&
+         eval "\$doit_exec \$mkdirprog $prefixes") ||
+          test -d "$dstdir" || exit 1
+        obsolete_mkdir_used=true
+      fi
+    fi
+  fi
+
+  if test -n "$dir_arg"; then
+    { test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
+    { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
+    { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
+      test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
+  else
+
+    # Make a couple of temp file names in the proper directory.
+    dsttmp=${dstdirslash}_inst.$$_
+    rmtmp=${dstdirslash}_rm.$$_
+
+    # Trap to clean up those temp files at exit.
+    trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
+
+    # Copy the file name to the temp name.
+    (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
+
+    # and set any options; do chmod last to preserve setuid bits.
+    #
+    # If any of these fail, we abort the whole thing.  If we want to
+    # ignore errors from any of these, just make sure not to ignore
+    # errors from the above "$doit $cpprog $src $dsttmp" command.
+    #
+    { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
+    { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
+    { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
+    { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
+
+    # If -C, don't bother to copy if it wouldn't change the file.
+    if $copy_on_change &&
+       old=`LC_ALL=C ls -dlL "$dst"     2>/dev/null` &&
+       new=`LC_ALL=C ls -dlL "$dsttmp"  2>/dev/null` &&
+       set -f &&
+       set X $old && old=:$2:$4:$5:$6 &&
+       set X $new && new=:$2:$4:$5:$6 &&
+       set +f &&
+       test "$old" = "$new" &&
+       $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
+    then
+      rm -f "$dsttmp"
+    else
+      # Rename the file to the real destination.
+      $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
+
+      # The rename failed, perhaps because mv can't rename something else
+      # to itself, or perhaps because mv is so ancient that it does not
+      # support -f.
+      {
+        # Now remove or move aside any old file at destination location.
+        # We try this two ways since rm can't unlink itself on some
+        # systems and the destination file might be busy for other
+        # reasons.  In this case, the final cleanup might fail but the new
+        # file should still install successfully.
+        {
+          test ! -f "$dst" ||
+          $doit $rmcmd -f "$dst" 2>/dev/null ||
+          { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
+            { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
+          } ||
+          { echo "$0: cannot unlink or rename $dst" >&2
+            (exit 1); exit 1
+          }
+        } &&
+
+        # Now rename the file to the real destination.
+        $doit $mvcmd "$dsttmp" "$dst"
+      }
+    fi || exit 1
+
+    trap '' 0
+  fi
+done
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.auxil b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.auxil
new file mode 100644
index 000000000..e92d18b80
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.auxil
@@ -0,0 +1,100 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h
+#
+## Object files ########################################################
+#
+HPL_au0obj       = \
+   HPL_dlacpy.o           HPL_dlatcpy.o          HPL_fprintf.o          \
+   HPL_warn.o             HPL_abort.o            HPL_dlaprnt.o          \
+   HPL_dlange.o
+HPL_au1obj       = \
+   HPL_dlamch.o
+HPL_auxobj       = \
+   $(HPL_au0obj) $(HPL_au1obj)
+#
+## Targets #############################################################
+#
+all     : lib
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_auxobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_auxobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dlacpy.o           : ../HPL_dlacpy.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlacpy.c
+HPL_dlatcpy.o          : ../HPL_dlatcpy.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlatcpy.c
+HPL_fprintf.o          : ../HPL_fprintf.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_fprintf.c
+HPL_warn.o             : ../HPL_warn.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_warn.c
+HPL_abort.o            : ../HPL_abort.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_abort.c
+HPL_dlaprnt.o          : ../HPL_dlaprnt.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaprnt.c
+HPL_dlange.o           : ../HPL_dlange.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlange.c
+HPL_dlamch.o           : ../HPL_dlamch.c           $(INCdep)
+	$(CC) -o $@ -c $(CCNOOPT)  ../HPL_dlamch.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.blas b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.blas
new file mode 100644
index 000000000..ed9f3d0e2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.blas
@@ -0,0 +1,98 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h
+#
+## Object files ########################################################
+#
+HPL_blaobj       = \
+   HPL_dcopy.o            HPL_daxpy.o            HPL_dscal.o            \
+   HPL_idamax.o           HPL_dgemv.o            HPL_dtrsv.o            \
+   HPL_dger.o             HPL_dgemm.o            HPL_dtrsm.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_blaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_blaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dcopy.o            : ../HPL_dcopy.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dcopy.c
+HPL_daxpy.o            : ../HPL_daxpy.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_daxpy.c
+HPL_dscal.o            : ../HPL_dscal.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dscal.c
+HPL_idamax.o           : ../HPL_idamax.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_idamax.c
+HPL_dgemv.o            : ../HPL_dgemv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgemv.c
+HPL_dtrsv.o            : ../HPL_dtrsv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtrsv.c
+HPL_dger.o             : ../HPL_dger.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dger.c
+HPL_dgemm.o            : ../HPL_dgemm.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgemm.c
+HPL_dtrsm.o            : ../HPL_dtrsm.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtrsm.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.comm b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.comm
new file mode 100644
index 000000000..529fe9aea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.comm
@@ -0,0 +1,111 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_pmisc.h   $(INCdir)/hpl_grid.h \
+   $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_comobj       = \
+   HPL_1ring.o            HPL_1rinM.o            HPL_2ring.o            \
+   HPL_2rinM.o            HPL_blong.o            HPL_blonM.o            \
+   HPL_packL.o            HPL_copyL.o            HPL_binit.o            \
+   HPL_bcast.o            HPL_bwait.o            HPL_send.o             \
+   HPL_recv.o             HPL_sdrv.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_comobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_comobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_1ring.o            : ../HPL_1ring.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_1ring.c
+HPL_1rinM.o            : ../HPL_1rinM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_1rinM.c
+HPL_2ring.o            : ../HPL_2ring.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_2ring.c
+HPL_2rinM.o            : ../HPL_2rinM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_2rinM.c
+HPL_blong.o            : ../HPL_blong.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_blong.c
+HPL_blonM.o            : ../HPL_blonM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_blonM.c
+HPL_packL.o            : ../HPL_packL.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_packL.c
+HPL_copyL.o            : ../HPL_copyL.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_copyL.c
+HPL_binit.o            : ../HPL_binit.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_binit.c
+HPL_bcast.o            : ../HPL_bcast.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_bcast.c
+HPL_bwait.o            : ../HPL_bwait.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_bwait.c
+HPL_send.o             : ../HPL_send.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_send.c
+HPL_recv.o             : ../HPL_recv.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_recv.c
+HPL_sdrv.o             : ../HPL_sdrv.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_sdrv.c
+#
+# ######################################################################
+# 
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.gesv b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.gesv
new file mode 100644
index 000000000..2a8722559
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.gesv
@@ -0,0 +1,83 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h  \
+   $(INCdir)/hpl_gesv.h
+#
+## Object files ########################################################
+#
+HPL_gesobj       = \
+   HPL_dgesv.o            HPL_ipid.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_gesobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_gesobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dgesv.o            : ../HPL_dgesv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgesv.c
+HPL_ipid.o             : ../HPL_ipid.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ipid.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.grid b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.grid
new file mode 100644
index 000000000..51549d817
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.grid
@@ -0,0 +1,103 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h
+#
+## Object files ########################################################
+#
+HPL_griobj       = \
+   HPL_grid_init.o        HPL_pnum.o             HPL_grid_info.o        \
+   HPL_grid_exit.o        HPL_broadcast.o        HPL_reduce.o           \
+   HPL_all_reduce.o       HPL_barrier.o          HPL_min.o              \
+   HPL_max.o              HPL_sum.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_griobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_griobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_grid_init.o        : ../HPL_grid_init.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_init.c
+HPL_pnum.o             : ../HPL_pnum.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pnum.c
+HPL_grid_info.o        : ../HPL_grid_info.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_info.c
+HPL_grid_exit.o        : ../HPL_grid_exit.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_exit.c
+HPL_broadcast.o        : ../HPL_broadcast.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_broadcast.c
+HPL_reduce.o           : ../HPL_reduce.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_reduce.c
+HPL_all_reduce.o       : ../HPL_all_reduce.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_all_reduce.c
+HPL_barrier.o          : ../HPL_barrier.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_barrier.c
+HPL_min.o              : ../HPL_min.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_min.c
+HPL_max.o              : ../HPL_max.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_max.c
+HPL_sum.o              : ../HPL_sum.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_sum.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.matgen b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.matgen
new file mode 100644
index 000000000..f027fbc06
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.matgen
@@ -0,0 +1,95 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h  \
+   $(INCdir)/hpl_matgen.h
+#
+## Object files ########################################################
+#
+HPL_matobj       = \
+   HPL_dmatgen.o          HPL_ladd.o             HPL_lmul.o             \
+   HPL_xjumpm.o           HPL_jumpit.o           HPL_rand.o             \
+   HPL_setran.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_matobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_matobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dmatgen.o          : ../HPL_dmatgen.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dmatgen.c
+HPL_ladd.o             : ../HPL_ladd.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ladd.c
+HPL_lmul.o             : ../HPL_lmul.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_lmul.c
+HPL_xjumpm.o           : ../HPL_xjumpm.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_xjumpm.c
+HPL_jumpit.o           : ../HPL_jumpit.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_jumpit.c
+HPL_rand.o             : ../HPL_rand.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rand.c
+HPL_setran.o           : ../HPL_setran.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_setran.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.panel b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.panel
new file mode 100644
index 000000000..804749cc2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.panel
@@ -0,0 +1,90 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h  $(INCdir)/hpl_comm.h  \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \
+   $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_panobj       = \
+   HPL_pdpanel_new.o      HPL_pdpanel_init.o     HPL_pdpanel_disp.o     \
+   HPL_pdpanel_free.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_panobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_panobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pdpanel_new.o      : ../HPL_pdpanel_new.c      $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_new.c
+HPL_pdpanel_init.o     : ../HPL_pdpanel_init.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_init.c
+HPL_pdpanel_disp.o     : ../HPL_pdpanel_disp.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_disp.c
+HPL_pdpanel_free.o     : ../HPL_pdpanel_free.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_free.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pauxil b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pauxil
new file mode 100644
index 000000000..ea93cd150
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pauxil
@@ -0,0 +1,137 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h   $(INCdir)/hpl_pauxil.h
+#
+## Object files ########################################################
+#
+HPL_pauobj       = \
+   HPL_indxg2l.o          HPL_indxg2lp.o         HPL_indxg2p.o          \
+   HPL_indxl2g.o          HPL_infog2l.o          HPL_numroc.o           \
+   HPL_numrocI.o          HPL_dlaswp00N.o        HPL_dlaswp10N.o        \
+   HPL_dlaswp01N.o        HPL_dlaswp01T.o        HPL_dlaswp02N.o        \
+   HPL_dlaswp03N.o        HPL_dlaswp03T.o        HPL_dlaswp04N.o        \
+   HPL_dlaswp04T.o        HPL_dlaswp05N.o        HPL_dlaswp05T.o        \
+   HPL_dlaswp06N.o        HPL_dlaswp06T.o        HPL_pwarn.o            \
+   HPL_pabort.o           HPL_pdlaprnt.o         HPL_pdlamch.o          \
+   HPL_pdlange.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pauobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pauobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_indxg2l.o          : ../HPL_indxg2l.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2l.c
+HPL_indxg2lp.o         : ../HPL_indxg2lp.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2lp.c
+HPL_indxg2p.o          : ../HPL_indxg2p.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2p.c
+HPL_indxl2g.o          : ../HPL_indxl2g.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxl2g.c
+HPL_infog2l.o          : ../HPL_infog2l.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_infog2l.c
+HPL_numroc.o           : ../HPL_numroc.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_numroc.c
+HPL_numrocI.o          : ../HPL_numrocI.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_numrocI.c
+HPL_dlaswp00N.o        : ../HPL_dlaswp00N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp00N.c
+HPL_dlaswp10N.o        : ../HPL_dlaswp10N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp10N.c
+HPL_dlaswp01N.o        : ../HPL_dlaswp01N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp01N.c
+HPL_dlaswp01T.o        : ../HPL_dlaswp01T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp01T.c
+HPL_dlaswp02N.o        : ../HPL_dlaswp02N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp02N.c
+HPL_dlaswp03N.o        : ../HPL_dlaswp03N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp03N.c
+HPL_dlaswp03T.o        : ../HPL_dlaswp03T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp03T.c
+HPL_dlaswp04N.o        : ../HPL_dlaswp04N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp04N.c
+HPL_dlaswp04T.o        : ../HPL_dlaswp04T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp04T.c
+HPL_dlaswp05N.o        : ../HPL_dlaswp05N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp05N.c
+HPL_dlaswp05T.o        : ../HPL_dlaswp05T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp05T.c
+HPL_dlaswp06N.o        : ../HPL_dlaswp06N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp06N.c
+HPL_dlaswp06T.o        : ../HPL_dlaswp06T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp06T.c
+HPL_pwarn.o            : ../HPL_pwarn.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pwarn.c
+HPL_pabort.o           : ../HPL_pabort.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pabort.c
+HPL_pdlaprnt.o         : ../HPL_pdlaprnt.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaprnt.c
+HPL_pdlamch.o          : ../HPL_pdlamch.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlamch.c
+HPL_pdlange.o          : ../HPL_pdlange.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlange.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pfact b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pfact
new file mode 100644
index 000000000..bf4634d31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pfact
@@ -0,0 +1,118 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pfact.h
+#
+## Object files ########################################################
+#
+HPL_pfaobj       = \
+   HPL_dlocmax.o          HPL_dlocswpN.o         HPL_dlocswpT.o         \
+   HPL_pdmxswp.o          HPL_pdpancrN.o         HPL_pdpancrT.o         \
+   HPL_pdpanllN.o         HPL_pdpanllT.o         HPL_pdpanrlN.o         \
+   HPL_pdpanrlT.o         HPL_pdrpanllN.o        HPL_pdrpanllT.o        \
+   HPL_pdrpancrN.o        HPL_pdrpancrT.o        HPL_pdrpanrlN.o        \
+   HPL_pdrpanrlT.o        HPL_pdfact.o
+#
+## Targets #############################################################
+#
+all              : lib 
+#
+lib              : lib.grd
+#
+lib.grd          : $(HPL_pfaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pfaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dlocmax.o          : ../HPL_dlocmax.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocmax.c
+HPL_dlocswpN.o         : ../HPL_dlocswpN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocswpN.c
+HPL_dlocswpT.o         : ../HPL_dlocswpT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocswpT.c
+HPL_pdmxswp.o          : ../HPL_pdmxswp.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdmxswp.c
+HPL_pdpancrN.o         : ../HPL_pdpancrN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpancrN.c
+HPL_pdpancrT.o         : ../HPL_pdpancrT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpancrT.c
+HPL_pdpanllN.o         : ../HPL_pdpanllN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanllN.c
+HPL_pdpanllT.o         : ../HPL_pdpanllT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanllT.c
+HPL_pdpanrlN.o         : ../HPL_pdpanrlN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanrlN.c
+HPL_pdpanrlT.o         : ../HPL_pdpanrlT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanrlT.c
+HPL_pdrpanllN.o        : ../HPL_pdrpanllN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanllN.c
+HPL_pdrpanllT.o        : ../HPL_pdrpanllT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanllT.c
+HPL_pdrpancrN.o        : ../HPL_pdrpancrN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpancrN.c
+HPL_pdrpancrT.o        : ../HPL_pdrpancrT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpancrT.c
+HPL_pdrpanrlN.o        : ../HPL_pdrpanrlN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanrlN.c
+HPL_pdrpanrlT.o        : ../HPL_pdrpanrlT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanrlT.c
+HPL_pdfact.o           : ../HPL_pdfact.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdfact.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pgesv b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pgesv
new file mode 100644
index 000000000..7898665f0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pgesv
@@ -0,0 +1,136 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h   $(INCdir)/hpl_comm.h  \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pfact.h \
+   $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_pgeobj       = \
+   HPL_pipid.o            HPL_plindx0.o          HPL_pdlaswp00N.o       \
+   HPL_pdlaswp00T.o       HPL_perm.o             HPL_logsort.o          \
+   HPL_plindx10.o         HPL_plindx1.o          HPL_spreadN.o          \
+   HPL_spreadT.o          HPL_rollN.o            HPL_rollT.o            \
+   HPL_equil.o            HPL_pdlaswp01N.o       HPL_pdlaswp01T.o       \
+   HPL_pdupdateNN.o       HPL_pdupdateNT.o       HPL_pdupdateTN.o       \
+   HPL_pdupdateTT.o       HPL_pdtrsv.o           HPL_pdgesv0.o          \
+   HPL_pdgesvK1.o         HPL_pdgesvK2.o         HPL_pdgesv.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pgeobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pgeobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pipid.o            : ../HPL_pipid.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pipid.c
+HPL_plindx0.o          : ../HPL_plindx0.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx0.c
+HPL_pdlaswp00N.o       : ../HPL_pdlaswp00N.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp00N.c
+HPL_pdlaswp00T.o       : ../HPL_pdlaswp00T.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp00T.c
+HPL_perm.o             : ../HPL_perm.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_perm.c
+HPL_logsort.o          : ../HPL_logsort.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_logsort.c
+HPL_plindx10.o         : ../HPL_plindx10.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx10.c
+HPL_plindx1.o          : ../HPL_plindx1.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx1.c
+HPL_spreadN.o          : ../HPL_spreadN.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_spreadN.c
+HPL_spreadT.o          : ../HPL_spreadT.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_spreadT.c
+HPL_rollN.o            : ../HPL_rollN.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rollN.c
+HPL_rollT.o            : ../HPL_rollT.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rollT.c
+HPL_equil.o            : ../HPL_equil.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_equil.c
+HPL_pdlaswp01N.o       : ../HPL_pdlaswp01N.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp01N.c
+HPL_pdlaswp01T.o       : ../HPL_pdlaswp01T.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp01T.c
+HPL_pdupdateNN.o       : ../HPL_pdupdateNN.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateNN.c
+HPL_pdupdateNT.o       : ../HPL_pdupdateNT.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateNT.c
+HPL_pdupdateTN.o       : ../HPL_pdupdateTN.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateTN.c
+HPL_pdupdateTT.o       : ../HPL_pdupdateTT.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateTT.c
+HPL_pdtrsv.o           : ../HPL_pdtrsv.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdtrsv.c
+HPL_pdgesv0.o          : ../HPL_pdgesv0.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesv0.c
+HPL_pdgesvK1.o         : ../HPL_pdgesvK1.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesvK1.c
+HPL_pdgesvK2.o         : ../HPL_pdgesvK2.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesvK2.c
+HPL_pdgesv.o           : ../HPL_pdgesv.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesv.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pmatgen b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pmatgen
new file mode 100644
index 000000000..bf33fcd7b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pmatgen
@@ -0,0 +1,81 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_matgen.h $(INCdir)/hpl_pmisc.h \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pmatgen.h
+#
+## Object files ########################################################
+#
+HPL_pmaobj       = \
+   HPL_pdmatgen.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pmaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pmaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pdmatgen.o         : ../HPL_pdmatgen.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdmatgen.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.ptest b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.ptest
new file mode 100644
index 000000000..cfc96e667
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.ptest
@@ -0,0 +1,94 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h   \
+   $(INCdir)/hpl_gesv.h   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h  \
+   $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pgesv.h $(INCdir)/hpl_pmatgen.h \
+   $(INCdir)/hpl_ptimer.h $(INCdir)/hpl_ptest.h
+#
+## Executable names ####################################################
+# 
+xhpl             = $(BINdir)/xhpl
+#
+## Object files ########################################################
+#
+HPL_pteobj       = \
+   HPL_pddriver.o         HPL_pdinfo.o           HPL_pdtest.o
+#
+## Targets #############################################################
+#
+all     : dexe
+#
+dexe    : dexe.grd
+#
+$(BINdir)/HPL.dat : ../HPL.dat
+	( $(CP) ../HPL.dat $(BINdir) )
+#
+dexe.grd: $(HPL_pteobj) $(HPLlib)
+	$(LINKER) $(LINKFLAGS) -o $(xhpl) $(HPL_pteobj) $(HPL_LIBS)
+	$(MAKE) $(BINdir)/HPL.dat
+	$(TOUCH) dexe.grd
+#
+# ######################################################################
+#
+HPL_pddriver.o         : ../HPL_pddriver.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pddriver.c
+HPL_pdinfo.o           : ../HPL_pdinfo.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdinfo.c
+HPL_pdtest.o           : ../HPL_pdtest.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdtest.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.ptimer b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.ptimer
new file mode 100644
index 000000000..971500764
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.ptimer
@@ -0,0 +1,84 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_ptimer.h
+#
+## Object files ########################################################
+#
+HPL_ptiobj       = \
+   HPL_ptimer.o           HPL_ptimer_cputime.o   HPL_ptimer_walltime.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_ptiobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_ptiobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_ptimer.o           : ../HPL_ptimer.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer.c
+HPL_ptimer_cputime.o   : ../HPL_ptimer_cputime.c   $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer_cputime.c
+HPL_ptimer_walltime.o  : ../HPL_ptimer_walltime.c  $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer_walltime.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.test b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.test
new file mode 100644
index 000000000..514d445b8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.test
@@ -0,0 +1,93 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_gesv.h  $(INCdir)/hpl_matgen.h $(INCdir)/hpl_timer.h \
+   $(INCdir)/hpl_test.h
+#
+## Executable names ####################################################
+# 
+xlinpack         = $(BINdir)/xlinpack
+#
+## Object files ########################################################
+#
+HPL_tesobj       = \
+   HPL_ddriver.o          HPL_dinfo.o            HPL_dtest.o
+#
+## Targets #############################################################
+#
+all     : dexe
+#
+dexe    : dexe.grd
+#
+$(BINdir)/LINPACK.dat : ../LINPACK.dat
+	( $(CP) ../LINPACK.dat $(BINdir) )
+#
+dexe.grd: $(HPL_tesobj) $(HPLlib)
+	$(LINKER) $(LINKFLAGS) -o $(xlinpack) $(HPL_tesobj) HPL_make_libs
+	$(MAKE) $(BINdir)/LINPACK.dat
+	$(TOUCH) dexe.grd
+#
+# ######################################################################
+#
+HPL_ddriver.o          : ../HPL_ddriver.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ddriver.c
+HPL_dinfo.o            : ../HPL_dinfo.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dinfo.c
+HPL_dtest.o            : ../HPL_dtest.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtest.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.timer b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.timer
new file mode 100644
index 000000000..b8009e88a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.timer
@@ -0,0 +1,84 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_timer.h
+#
+## Object files ########################################################
+#
+HPL_timobj       = \
+   HPL_timer.o            HPL_timer_cputime.o    HPL_timer_walltime.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_timobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_timobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_timer.o            : ../HPL_timer.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer.c
+HPL_timer_cputime.o    : ../HPL_timer_cputime.c    $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer_cputime.c
+HPL_timer_walltime.o   : ../HPL_timer_walltime.c   $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer_walltime.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.units b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.units
new file mode 100644
index 000000000..1c447f204
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.units
@@ -0,0 +1,112 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+@rout Make.units
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_auxil.h $(INCdir)/hpl_pmisc.h \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_units.h 
+#
+## Executable names ####################################################
+# 
+xunits           = $(BINdir)/xunits   
+#
+## Object files ########################################################
+#
+HPL_uniobj       = \
+   HPL_unit_driver.o      HPL_unit_info.o        HPL_unit_indxg2l.o     \
+   HPL_chek_indxg2l.o     HPL_unit_indxg2p.o     HPL_chek_indxg2p.o     \
+   HPL_unit_indxl2g.o     HPL_chek_indxl2g.o     HPL_unit_numroc.o      \
+   HPL_unit_numrocI.o     HPL_chek_numrocI.o
+#
+## Targets #############################################################
+#
+all              : dexe
+#
+dexe             : dexe.grd
+#
+$(BINdir)/UNITS.dat : ../UNITS.dat
+	( $(CP) ../UNITS.dat $(BINdir) )
+#
+dexe.grd         : $(HPL_uniobj) $(HPLlib)
+	$(LINKER) $(LINKFLAGS) -o $(xunits) $(HPL_uniobj) @(hpllibs)
+	$(MAKE) $(BINdir)/UNITS.dat
+	$(TOUCH) dexe.grd
+#
+# ######################################################################
+#
+HPL_unit_driver.o      : ../HPL_unit_driver.c      $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_driver.c
+HPL_unit_info.o        : ../HPL_unit_info.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_info.c
+HPL_unit_indxg2l.o     : ../HPL_unit_indxg2l.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_indxg2l.c
+HPL_chek_indxg2l.o     : ../HPL_chek_indxg2l.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_chek_indxg2l.c
+HPL_unit_indxg2p.o     : ../HPL_unit_indxg2p.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_indxg2p.c
+HPL_chek_indxg2p.o     : ../HPL_chek_indxg2p.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_chek_indxg2p.c
+HPL_unit_indxl2g.o     : ../HPL_unit_indxl2g.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_indxl2g.c
+HPL_chek_indxl2g.o     : ../HPL_chek_indxl2g.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_chek_indxl2g.c
+HPL_unit_numroc.o      : ../HPL_unit_numroc.c      $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_numroc.c
+HPL_unit_numrocI.o     : ../HPL_unit_numrocI.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_numrocI.c
+HPL_chek_numrocI.o     : ../HPL_chek_numrocI.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_chek_numrocI.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_abort.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_abort.3
new file mode 100644
index 000000000..c6a2c7a70
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_abort.3
@@ -0,0 +1,52 @@
+.TH HPL_abort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_abort \- halts execution.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_abort(\fR
+\fB\&int\fR
+\fI\&LINE\fR,
+\fB\&const char *\fR
+\fI\&SRNAME\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_abort\fR
+displays an error message on stderr and halts execution.
+.SH ARGUMENTS
+.TP 8
+LINE    (local input)           int
+On entry,  LINE  specifies the line  number in the file where
+the  error  has  occured.  When  LINE  is not a positive line
+number, it is ignored.
+.TP 8
+SRNAME  (local input)           const char *
+On entry, SRNAME  should  be the name of the routine  calling
+this error handler.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   HPL_abort( __LINE__, __FILE__, "Halt.\en" );
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_fprintf \ (3),
+.BR HPL_warn \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_all_reduce.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_all_reduce.3
new file mode 100644
index 000000000..70ec6c4ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_all_reduce.3
@@ -0,0 +1,49 @@
+.TH HPL_all_reduce 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_all_reduce \- All reduce operation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_all_reduce(\fR
+\fB\&void *\fR
+\fI\&BUFFER\fR,
+\fB\&const int\fR
+\fI\&COUNT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR,
+\fB\&const HPL_T_OP \fR
+\fI\&OP\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_all_reduce\fR
+performs   a   global   reduce  operation  across  all
+processes of a group leaving the results on all processes.
+.SH ARGUMENTS
+.TP 8
+BUFFER  (local input/global out void *
+On entry,  BUFFER  points to  the  buffer to be combined.  On
+exit, this array contains the combined data and  is identical
+on all processes in the group.
+.TP 8
+COUNT   (global input)          const int
+On entry,  COUNT  indicates the number of entries in  BUFFER.
+COUNT must be at least zero.
+.TP 8
+DTYPE   (global input)          const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.TP 8
+OP      (global input)          const HPL_T_OP 
+On entry, OP is a pointer to the local combine function.
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_barrier.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_barrier.3
new file mode 100644
index 000000000..ffee7f291
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_barrier.3
@@ -0,0 +1,27 @@
+.TH HPL_barrier 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_barrier \- Barrier operation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_barrier(\fR
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_barrier\fR
+blocks the caller until all process members have call it.
+The  call  returns  at any process  only after all group members have
+entered the call.
+.SH ARGUMENTS
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_bcast.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_bcast.3
new file mode 100644
index 000000000..54eb54b25
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_bcast.3
@@ -0,0 +1,31 @@
+.TH HPL_bcast 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_bcast \- Perform the row broadcast.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_bcast(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_bcast\fR
+broadcasts  the  current  panel.  Successful  completion is
+indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to
+HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was
+not completed, in which case this function should be called again.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.TP 8
+IFLAG   (output)                int *
+On exit,  IFLAG  indicates  whether  or not the broadcast has
+occured.
+.SH SEE ALSO
+.BR HPL_binit \ (3),
+.BR HPL_bwait \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_binit.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_binit.3
new file mode 100644
index 000000000..083776ab6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_binit.3
@@ -0,0 +1,23 @@
+.TH HPL_binit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_binit \- Initialize the row broadcast.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_binit(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_binit\fR
+initializes  a  row  broadcast.  Successful  completion  is
+indicated by the returned error code HPL_SUCCESS.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.SH SEE ALSO
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_broadcast.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_broadcast.3
new file mode 100644
index 000000000..317d374cf
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_broadcast.3
@@ -0,0 +1,49 @@
+.TH HPL_broadcast 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_broadcast \- Broadcast operation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_broadcast(\fR
+\fB\&void *\fR
+\fI\&BUFFER\fR,
+\fB\&const int\fR
+\fI\&COUNT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR,
+\fB\&const int\fR
+\fI\&ROOT\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_broadcast\fR
+broadcasts  a message from the process with rank ROOT to
+all processes in the group.
+.SH ARGUMENTS
+.TP 8
+BUFFER  (local input/output)    void *
+On entry,  BUFFER  points to  the  buffer to be broadcast. On
+exit, this array contains the broadcast data and is identical
+on all processes in the group.
+.TP 8
+COUNT   (global input)          const int
+On entry,  COUNT  indicates the number of entries in  BUFFER.
+COUNT must be at least zero.
+.TP 8
+DTYPE   (global input)          const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.TP 8
+ROOT    (global input)          const int
+On entry, ROOT is the coordinate of the source process.
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.SH SEE ALSO
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_bwait.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_bwait.3
new file mode 100644
index 000000000..0dac6fe58
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_bwait.3
@@ -0,0 +1,24 @@
+.TH HPL_bwait 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_bwait \- Finalize the row broadcast.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_bwait(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_bwait\fR
+HPL_bwait waits  for  the  row  broadcast  of  the current  panel  to
+terminate.  Successful completion is indicated by the returned  error
+code HPL_SUCCESS.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.SH SEE ALSO
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_copyL.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_copyL.3
new file mode 100644
index 000000000..d60619a06
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_copyL.3
@@ -0,0 +1,28 @@
+.TH HPL_copyL 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_copyL \- Copy the current panel into a contiguous workspace.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_copyL(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_copyL\fR
+copies  the  panel of columns, the L1 replicated submatrix,
+the pivot array  and  the info scalar into a contiguous workspace for
+later broadcast.
+ 
+The copy of this panel  into  a contiguous buffer  can be enforced by
+specifying -DHPL_COPY_L in the architecture specific Makefile.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.SH SEE ALSO
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_daxpy.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_daxpy.3
new file mode 100644
index 000000000..50bd0b0a8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_daxpy.3
@@ -0,0 +1,76 @@
+.TH HPL_daxpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_daxpy \- y := y + alpha * x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_daxpy(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_daxpy\fR
+scales the vector x by alpha and adds it to y.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vectors  x  and  y. N
+must be at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied as zero, then the entries of the incremented array X
+need not be set on input.
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+Y       (local input/output)    double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+On exit, the entries of the incremented array  Y  are updated
+with the scaled entries of the incremented array X.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3], y[3];
+.br
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+.br
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+.br
+   HPL_daxpy( 3, 2.0, x, 1, y, 1 );
+.br
+   printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dcopy \ (3),
+.BR HPL_dscal \ (3),
+.BR HPL_dswap \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dcopy.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dcopy.3
new file mode 100644
index 000000000..f2759ced9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dcopy.3
@@ -0,0 +1,69 @@
+.TH HPL_dcopy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dcopy \- y := x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dcopy(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dcopy\fR
+copies the vector x into the vector y.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vectors  x  and  y. N
+must be at least zero.
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+Y       (local input/output)    double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+On exit, the entries of the incremented array  Y  are updated
+with the entries of the incremented array X.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3], y[3];
+.br
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+.br
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+.br
+   HPL_dcopy( 3, x, 1, y, 1 );
+.br
+   printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_daxpy \ (3),
+.BR HPL_dscal \ (3),
+.BR HPL_dswap \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dgemm.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dgemm.3
new file mode 100644
index 000000000..57c69f78c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dgemm.3
@@ -0,0 +1,160 @@
+.TH HPL_dgemm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dgemm \- C := alpha * op(A) * op(B) + beta * C.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dgemm(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANSA\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANSB\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&K\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&B\fR,
+\fB\&const int\fR
+\fI\&LDB\fR,
+\fB\&const double\fR
+\fI\&BETA\fR,
+\fB\&double *\fR
+\fI\&C\fR,
+\fB\&const int\fR
+\fI\&LDC\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dgemm\fR
+performs one of the matrix-matrix operations
+ 
+    C := alpha * op( A ) * op( B ) + beta * C
+ 
+ where op( X ) is one of
+ 
+    op( X ) = X   or   op( X ) = X^T.
+ 
+Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
+an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+TRANSA  (local input)           const enum HPL_TRANS
+On entry, TRANSA  specifies the form of  op(A)  to be used in
+the matrix-matrix operation follows:                         
+   TRANSA==HplNoTrans    : op( A ) = A,                     
+   TRANSA==HplTrans      : op( A ) = A^T,                   
+   TRANSA==HplConjTrans  : op( A ) = A^T.                   
+.TP 8
+TRANSB  (local input)           const enum HPL_TRANS
+On entry, TRANSB  specifies the form of  op(B)  to be used in
+the matrix-matrix operation follows:                         
+   TRANSB==HplNoTrans    : op( B ) = B,                     
+   TRANSB==HplTrans      : op( B ) = B^T,                   
+   TRANSB==HplConjTrans  : op( B ) = B^T.                   
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the  number  of rows  of the  matrix
+op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the number  of columns of the matrix
+op(B)  and  the number of columns of the matrix  C. N must be
+at least zero.
+.TP 8
+K       (local input)           const int
+On entry,  K  specifies  the  number of columns of the matrix
+op(A) and the number of rows of the matrix op(B).  K  must be
+be at least  zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied  as  zero  then the elements of the matrices A and B
+need not be set on input.
+.TP 8
+A       (local input)           const double *
+On entry,  A  is an array of dimension (LDA,ka),  where ka is
+k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+the array  A must contain the matrix A, otherwise the leading
+k  by  m  part of the array  A  must  contain the  matrix  A.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA  specifies the first dimension of A as declared
+in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+LDA must be at least max(1,m), otherwise LDA must be at least
+max(1,k).
+.TP 8
+B       (local input)           const double *
+On entry, B is an array of dimension (LDB,kb),  where  kb  is
+n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+the array  B must contain the matrix B, otherwise the leading
+n  by  k  part of the array  B  must  contain  the matrix  B.
+.TP 8
+LDB     (local input)           const int
+On entry, LDB  specifies the first dimension of B as declared
+in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+LDB must be at least max(1,k), otherwise LDB must be at least
+max(1,n).
+.TP 8
+BETA    (local input)           const double
+On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+supplied  as  zero  then  the  elements of the matrix C  need
+not be set on input.
+.TP 8
+C       (local input/output)    double *
+On entry,  C  is an array of dimension (LDC,n). Before entry,
+the  leading m by n part  of  the  array  C  must contain the
+matrix C,  except when beta is zero, in which case C need not
+be set on entry. On exit, the array  C  is overwritten by the
+m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+.TP 8
+LDC     (local input)           const int
+On entry, LDC  specifies the first dimension of C as declared
+in  the   calling  (sub)  program.   LDC  must  be  at  least
+max(1,m).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], b[2*2], c[2*2];
+.br
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+.br
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+.br
+   c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0;
+.br
+   HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans,
+.br
+              2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 );
+.br
+   printf("  [%f,%f]\en", c[0], c[2]);
+.br
+   printf("c=[%f,%f]\en", c[1], c[3]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dtrsm \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dgemv.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dgemv.3
new file mode 100644
index 000000000..f85db57fb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dgemv.3
@@ -0,0 +1,128 @@
+.TH HPL_dgemv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dgemv \- y := beta * y + alpha * op(A) * x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dgemv(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANS\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&const double\fR
+\fI\&BETA\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dgemv\fR
+performs one of the matrix-vector operations
+ 
+    y := alpha * op( A ) * x + beta * y,
+ 
+ where op( X ) is one of
+ 
+    op( X ) = X   or   op( X ) = X^T.
+ 
+where alpha and beta are scalars, x and y are vectors and  A  is an m
+by n matrix.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+TRANS   (local input)           const enum HPL_TRANS
+On entry,  TRANS  specifies the  operation to be performed as
+follows:   
+   TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+   TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the number of rows of  the matrix A.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied as zero then  A and X  need not be set on input.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points  to an array of size equal to or greater
+than LDA * n.  Before  entry, the leading m by n part  of the
+array  A  must contain the matrix coefficients.
+.TP 8
+LDA     (local input)           const int
+On entry,  LDA  specifies  the  leading  dimension  of  A  as
+declared  in  the  calling  (sub) program.  LDA  must  be  at
+least MAX(1,m).
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+BETA    (local input)           const double
+On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+supplied as zero then  Y  need not be set on input.
+.TP 8
+Y       (local input/output)    double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+Before entry with BETA non-zero, the incremented array Y must
+contain the vector  y.  On exit,  Y  is  overwritten  by  the
+updated vector y.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], x[2], y[2];
+.br
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+.br
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+.br
+   HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0,
+.br
+              a, 2, x, 1, -1.0, y, 1 );
+.br
+   printf("y=[%f,%f]\en", y[0], y[1]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dger \ (3),
+.BR HPL_dtrsv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dger.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dger.3
new file mode 100644
index 000000000..da9ddf495
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dger.3
@@ -0,0 +1,108 @@
+.TH HPL_dger 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dger \- A := alpha * x * y^T + A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dger(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dger\fR
+performs the rank 1 operation
+ 
+    A := alpha * x * y^T + A,
+ 
+where alpha is a scalar,  x is an m-element vector, y is an n-element
+vector and A is an m by n matrix.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the number of rows of  the matrix A.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied as zero then  X and Y  need not be set on input.
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+Y       (local input)           double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.TP 8
+A       (local input/output)    double *
+On entry,  A  points  to an array of size equal to or greater
+than LDA * n.  Before  entry, the leading m by n part  of the
+array  A  must contain the matrix coefficients. On exit, A is
+overwritten by the updated matrix.
+.TP 8
+LDA     (local input)           const int
+On entry,  LDA  specifies  the  leading  dimension  of  A  as
+declared  in  the  calling  (sub) program.  LDA  must  be  at
+least MAX(1,m).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], x[2], y[2];
+.br
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+.br
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+.br
+   HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1,
+.br
+             a, 2 );
+.br
+   printf("y=[%f,%f]\en", y[0], y[1]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dgemv \ (3),
+.BR HPL_dtrsv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlacpy.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlacpy.3
new file mode 100644
index 000000000..8da8b1316
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlacpy.3
@@ -0,0 +1,72 @@
+.TH HPL_dlacpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlacpy \- B := A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlacpy(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&B\fR,
+\fB\&const int\fR
+\fI\&LDB\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlacpy\fR
+copies an array A into an array B.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the number of rows of the arrays A and
+B. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N specifies  the number of columns of the arrays A
+and B. N must be at least zero.
+.TP 8
+A       (local input)           const double *
+On entry, A points to an array of dimension (LDA,N).
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+B       (local output)          double *
+On entry, B points to an array of dimension (LDB,N). On exit,
+B is overwritten with A.
+.TP 8
+LDB     (local input)           const int
+On entry, LDB specifies the leading dimension of the array B.
+LDB must be at least MAX(1,M).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], b[2*2];
+.br
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+.br
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+.br
+   printf("  [%f,%f]\en", b[0], b[2]);
+.br
+   printf("b=[%f,%f]\en", b[1], b[3]);
+.br
+   exit(0);
+.br
+   return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dlatcpy \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlamch.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlamch.3
new file mode 100644
index 000000000..9bf41b68a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlamch.3
@@ -0,0 +1,76 @@
+.TH HPL_dlamch 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlamch \- determines machine-specific arithmetic constants.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_dlamch(\fR
+\fB\&const HPL_T_MACH\fR
+\fI\&CMACH\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlamch\fR
+determines  machine-specific  arithmetic constants such as
+the relative machine precision  (eps),  the safe minimum (sfmin) such
+that 1 / sfmin does not overflow, the base of the machine (base), the
+precision (prec), the  number of (base) digits  in the  mantissa (t),
+whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise),  the
+minimum exponent before  (gradual)  underflow (emin),  the  underflow
+threshold (rmin) base**(emin-1), the largest exponent before overflow
+(emax), the overflow threshold (rmax) (base**emax)*(1-eps).
+.SH ARGUMENTS
+.TP 8
+CMACH   (local input)           const HPL_T_MACH
+Specifies the value to be returned by HPL_dlamch             
+   = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+   = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+   = HPL_MACH_BASE,  HPL_dlamch := base                      
+   = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+   = HPL_MACH_MLEN,  HPL_dlamch := t                         
+   = HPL_MACH_RND,   HPL_dlamch := rnd                       
+   = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+   = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+   = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+   = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+ 
+where                                                        
+ 
+   eps   = relative machine precision,                       
+   sfmin = safe minimum,                                     
+   base  = base of the machine,                              
+   prec  = eps*base,                                         
+   t     = number of digits in the mantissa,                 
+   rnd   = 1.0 if rounding occurs in addition,               
+   emin  = minimum exponent before underflow,                
+   rmin  = underflow threshold,                              
+   emax  = largest exponent before overflow,                 
+   rmax  = overflow threshold.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double eps;
+.br
+   eps = HPL_dlamch( HPL_MACH_EPS );
+.br
+   printf("eps=%18.8e\en", eps);
+.br
+   exit(0); return(0);
+.br
+}
+.SH REFERENCES
+This function has been manually translated from the Fortran 77 LAPACK
+auxiliary function dlamch.f  (version 2.0 -- 1992), that  was  itself
+based on the function ENVRON  by Malcolm and incorporated suggestions
+by Gentleman and Marovich. See                                       
+ 
+Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).                 
+ 
+Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+properties of  floating point arithmetic units.,  Comms. of  the ACM,
+17, 276-277 (1974).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlange.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlange.3
new file mode 100644
index 000000000..ffbab554f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlange.3
@@ -0,0 +1,73 @@
+.TH HPL_dlange 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlange \- Compute ||A||.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_dlange(\fR
+\fB\&const HPL_T_NORM\fR
+\fI\&NORM\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlange\fR
+returns  the value of the one norm,  or the infinity norm,
+or the element of largest absolute value of a matrix A:              
+ 
+   max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+   norm1(A),        when NORM = HPL_NORM_1,                          
+   normI(A),        when NORM = HPL_NORM_I,                          
+ 
+where norm1 denotes the one norm of a matrix (maximum column sum) and
+normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+that max(abs(A(i,j))) is not a matrix norm.
+.SH ARGUMENTS
+.TP 8
+NORM    (local input)           const HPL_T_NORM
+On entry,  NORM  specifies  the  value to be returned by this
+function as described above.
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the number  of rows of the matrix A.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points to an  array of dimension  (LDA,N), that
+contains the matrix A.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,M).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2];
+.br
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+.br
+   norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 );
+.br
+   printf("norm=%f\en", norm);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dlaprnt \ (3),
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaprnt.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaprnt.3
new file mode 100644
index 000000000..8fdd89b8c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaprnt.3
@@ -0,0 +1,70 @@
+.TH HPL_dlaprnt 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaprnt \- Print the matrix A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaprnt(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&IA\fR,
+\fB\&const int\fR
+\fI\&JA\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const char *\fR
+\fI\&CMATNM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaprnt\fR
+prints to standard error an M-by-N matrix A.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies the number of rows of A. M must be at
+least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies the number of columns of A. N must be
+at least zero.
+.TP 8
+A       (local input)           double *
+On entry, A  points to an array of dimension (LDA,N).
+.TP 8
+IA      (local input)           const int
+On entry, IA specifies the starting row index to be printed.
+.TP 8
+JA      (local input)           const int
+On entry,  JA  specifies  the  starting  column index  to be
+printed.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,M).
+.TP 8
+CMATNM  (local input)           const char *
+On entry, CMATNM is the name of the matrix to be printed.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2];
+.br
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+.br
+   HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" );
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp00N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp00N.3
new file mode 100644
index 000000000..efe3580b3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp00N.3
@@ -0,0 +1,60 @@
+.TH HPL_dlaswp00N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp00N \- performs a series of row interchanges.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp00N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int *\fR
+\fI\&IPIV\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp00N\fR
+performs a series of local row interchanges on a matrix
+A. One row interchange is initiated for rows 0 through M-1 of A.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M specifies the number of rows of the array A to be
+interchanged. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies  the number of columns of the array A.
+N must be at least zero.
+.TP 8
+A       (local input/output)    double *
+On entry, A  points to an array of dimension (LDA,N) to which
+the row interchanges will be  applied.  On exit, the permuted
+matrix.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+IPIV    (local input)           const int *
+On entry,  IPIV  is  an  array of size  M  that  contains the
+pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+implies that local rows k and l are to be interchanged.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp01N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp01N.3
new file mode 100644
index 000000000..662913e54
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp01N.3
@@ -0,0 +1,88 @@
+.TH HPL_dlaswp01N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp01N \- copies rows of A into itself and into U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp01N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp01N\fR
+copies  scattered rows  of  A  into itself  and into an
+array  U.  The row offsets in  A  of the source rows are specified by
+LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+positive value of  LINDXAU indicates that the array destination is U,
+and A otherwise.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+moved within A or copied into U. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the length of rows of A that should be
+moved within A or copied into U. N must be at least zero.
+.TP 8
+A       (local input/output)    double *
+On entry, A points to an array of dimension (LDA,N). The rows
+of this array specified by LINDXA should be moved within A or
+copied into U.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    double *
+On entry, U points to an array of dimension (LDU,N). The rows
+of A specified by LINDXA are be copied within this array U at
+the positions indicated by positive values of LINDXAU.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local  row indexes  of  A  that should be moved within  A  or
+or copied into U.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension  M that  contains
+the local  row indexes of  U  where the rows of  A  should be
+copied at. This array also contains the  local row offsets in
+A where some of the rows of A should be moved to.  A positive
+value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+should be copied into U at the position LINDXAU[i]; otherwise
+the row  LINDXA[i]  of  A  should be moved  at  the  position
+-LINDXAU[i] within A.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp01T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp01T.3
new file mode 100644
index 000000000..738507755
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp01T.3
@@ -0,0 +1,89 @@
+.TH HPL_dlaswp01T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp01T \- copies rows of A into itself and into U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp01T(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp01T\fR
+copies  scattered rows  of  A  into itself  and into an
+array U.  The row offsets in  A  of the source rows  are specified by
+LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+positive value of LINDXAU indicates that the array  destination is U,
+and A otherwise. Rows of A are stored as columns in U.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+moved within A or copied into U. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the length of rows of A that should be
+moved within A or copied into U. N must be at least zero.
+.TP 8
+A       (local input/output)    double *
+On entry, A points to an array of dimension (LDA,N). The rows
+of this array specified by LINDXA should be moved within A or
+copied into U.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    double *
+On entry, U points to an array of dimension (LDU,M). The rows
+of A specified by  LINDXA  are copied within this array  U at
+the  positions indicated by positive values of LINDXAU.  The
+rows of A are stored as columns in U.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local  row indexes  of  A  that should be moved within  A  or
+or copied into U.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension  M that  contains
+the local  row indexes of  U  where the rows of  A  should be
+copied at. This array also contains the  local row offsets in
+A where some of the rows of A should be moved to.  A positive
+value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+should be copied into U at the position LINDXAU[i]; otherwise
+the row  LINDXA[i]  of  A  should be moved  at  the  position
+-LINDXAU[i] within A.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp02N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp02N.3
new file mode 100644
index 000000000..600449c68
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp02N.3
@@ -0,0 +1,85 @@
+.TH HPL_dlaswp02N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp02N \- pack rows of A into columns of W.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp02N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&W0\fR,
+\fB\&double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp02N\fR
+packs scattered rows of an array  A  into workspace  W.
+The row offsets in A are specified by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+copied into W. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the length of rows of A that should be
+copied into W. N must be at least zero.
+.TP 8
+A       (local input)           const double *
+On entry, A points to an array of dimension (LDA,N). The rows
+of this array specified by LINDXA should be copied into W.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+W0      (local input/output)    double *
+On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local output)          double *
+On entry, W  is an array of size (LDW,M). On exit, W contains
+the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+in W(:,i).
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be copied into W.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension M  that  contains
+the local  row indexes of  U that should be copied into A and
+replaced by the rows of W.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp03N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp03N.3
new file mode 100644
index 000000000..1ba0b3208
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp03N.3
@@ -0,0 +1,75 @@
+.TH HPL_dlaswp03N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp03N \- copy rows of W into U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp03N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const double *\fR
+\fI\&W0\fR,
+\fB\&const double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp03N\fR
+copies columns of  W  into  rows  of an  array  U.  The
+destination in U of these columns contained in W is stored within W0.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies  the  number  of columns of  W  stored
+contiguously that should be copied into U. M must be at least
+zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the  length of columns of  W  stored
+contiguously that should be copied into U. N must be at least
+zero.
+.TP 8
+U       (local input/output)    double *
+On entry, U points to an array of dimension (LDU,N).  Columns
+of W are copied as rows within this array U at  the positions
+specified in W0.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M).
+.TP 8
+W0      (local input)           const double *
+On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local input)           const double *
+On entry, W  is an array of size (LDW,M),  that contains data
+to be copied into U. For i in [0..M),  entries W(:,i)  should
+be copied into the row or column W0(i*LDW) of U.
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp03T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp03T.3
new file mode 100644
index 000000000..d8bd11ec1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp03T.3
@@ -0,0 +1,75 @@
+.TH HPL_dlaswp03T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp03T \- copy columns of W into U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp03T(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const double *\fR
+\fI\&W0\fR,
+\fB\&const double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp03T\fR
+copies  columns of W into an array U.  The  destination
+in U of these columns contained in W is stored within W0.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies  the  number  of columns of  W  stored
+contiguously that should be copied into U. M must be at least
+zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the  length of columns of  W  stored
+contiguously that should be copied into U. N must be at least
+zero.
+.TP 8
+U       (local input/output)    double *
+On entry, U points to an array of dimension (LDU,M).  Columns
+of W are copied within the array U at the positions specified
+in W0.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+W0      (local input)           const double *
+On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local input)           const double *
+On entry, W  is an array of size (LDW,M),  that contains data
+to be copied into U. For i in [0..M),  entries W(:,i)  should
+be copied into the row or column W0(i*LDW) of U.
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp04N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp04N.3
new file mode 100644
index 000000000..9f12d79ab
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp04N.3
@@ -0,0 +1,106 @@
+.TH HPL_dlaswp04N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp04N \- copy rows of U in A and replace them with columns of W.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp04N(\fR
+\fB\&const int\fR
+\fI\&M0\fR,
+\fB\&const int\fR
+\fI\&M1\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&W0\fR,
+\fB\&const double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp04N\fR
+copies M0 rows of U into A and replaces those rows of U
+with columns of W. In addition M1 - M0 columns of  W  are copied into
+rows of U.
+.SH ARGUMENTS
+.TP 8
+M0      (local input)           const int
+On entry, M0 specifies the number of rows of U that should be
+copied into  A  and replaced by columns of  W.  M0 must be at
+least zero.
+.TP 8
+M1      (local input)           const int
+On entry, M1 specifies the number of columns of W that should
+be copied into rows of U. M1 must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the rows of U that should
+be copied into A. N must be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  points to  an array of dimension (LDU,N).  This
+array contains the rows that are to be copied into A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M1).
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+rows of U indicated by LINDXAU.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M0).
+.TP 8
+W0      (local input)           const double *
+On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local input)           const double *
+On entry, W  is an array of size (LDW,M0+M1),  that  contains
+data to be copied into U.  For i in [M0..M0+M1),  the entries
+W(:,i) are copied into the row W0(i*LDW) of U.
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA  is an array of dimension  M0 containing the
+local row indexes A into which rows of U are copied.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension M0 that  contains
+the local  row indexes of  U that should be copied into A and
+replaced by the columns of W.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp04T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp04T.3
new file mode 100644
index 000000000..448334148
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp04T.3
@@ -0,0 +1,107 @@
+.TH HPL_dlaswp04T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp04T \- copy columns of U in rows of A and replace them with columns of W.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp04T(\fR
+\fB\&const int\fR
+\fI\&M0\fR,
+\fB\&const int\fR
+\fI\&M1\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&W0\fR,
+\fB\&const double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp04T\fR
+copies M0 columns of U into rows of A and replaces those
+columns of U with columns of W. In addition M1 - M0 columns of W  are
+copied into U.
+.SH ARGUMENTS
+.TP 8
+M0      (local input)           const int
+On entry, M0 specifies the number of columns of U that should
+be copied into A and replaced by columns of W.  M0 must be at
+least zero.
+.TP 8
+M1      (local input)           const int
+On entry, M1 specifies  the number of columnns of W that will
+be copied into U. M1 must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies the length of the columns of  U  that
+will be copied into rows of A. N must be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  points  to an array of dimension (LDU,*).  This
+array contains the columns that are to be copied into rows of
+A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+columns of U indicated by LINDXAU.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M0).
+.TP 8
+W0      (local input)           const double *
+On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local input)           const double *
+On entry, W  is an array of size (LDW,M0+M1),  that  contains
+data to be copied into U.  For i in [M0..M0+M1),  the entries
+W(:,i) are copied into the column W0(i*LDW) of U.
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA  is an array of dimension  M0 containing the
+local row indexes A into which columns of U are copied.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension M0 that  contains
+the  local column indexes of  U  that should be copied into A
+and replaced by the columns of W.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp05N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp05N.3
new file mode 100644
index 000000000..371dd0b92
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp05N.3
@@ -0,0 +1,77 @@
+.TH HPL_dlaswp05N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp05N \- copy rows of U into A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp05N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp05N\fR
+copies rows of  U of global offset LINDXAU into rows of
+A at positions indicated by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of U that should be
+copied into A. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the rows of U that should
+be copied into A. N must be at least zero.
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+rows of U indicated by LINDXAU.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    const double *
+On entry,  U  points to an array of dimension  (LDU,N).  This
+array contains the rows that are to be copied into A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be copied from U.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension  M that  contains
+the local row indexes of U that should be copied in A.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp05T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp05T.3
new file mode 100644
index 000000000..5d70a7a16
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp05T.3
@@ -0,0 +1,77 @@
+.TH HPL_dlaswp05T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp05T \- copy rows of U into A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp05T(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp05T\fR
+copies columns of  U of global offset LINDXAU into rows
+of A at positions indicated by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the columns of U that will
+be copied into rows of A. N must be at least zero.
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+columns of U indicated by LINDXAU.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    const double *
+On entry,  U  points  to an array of dimension (LDU,*).  This
+array contains the columns that are to be copied into rows of
+A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be copied from U.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension  M that  contains
+the local column indexes of U that should be copied in A.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp06N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp06N.3
new file mode 100644
index 000000000..7fa19d41a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp06N.3
@@ -0,0 +1,72 @@
+.TH HPL_dlaswp06N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp06N \- swap rows of U with rows of A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp06N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp06N\fR
+swaps rows of  U  with rows of A at positions
+indicated by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+swapped with rows of U. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the rows of A that should
+be swapped with rows of U. N must be at least zero.
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+rows or columns of U.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    double *
+On entry,  U  points  to an array of dimension (LDU,N).  This
+array contains the rows of U that are to be swapped with rows
+of A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be swapped with U.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp06T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp06T.3
new file mode 100644
index 000000000..41fa3d6ee
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp06T.3
@@ -0,0 +1,72 @@
+.TH HPL_dlaswp06T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp06T \- swap rows or columns of U with rows of A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp06T(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp06T\fR
+swaps  columns  of  U  with  rows  of  A  at  positions
+indicated by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+swapped with columns of U. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the rows of A that should
+be swapped with columns of U. N must be at least zero.
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+columns of U.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    double *
+On entry,  U  points  to an array of dimension (LDU,*).  This
+array contains the columns of  U  that are to be swapped with
+rows of A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be swapped with U.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp10N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp10N.3
new file mode 100644
index 000000000..23465895c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp10N.3
@@ -0,0 +1,59 @@
+.TH HPL_dlaswp10N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp10N \- performs a series column interchanges.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp10N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int *\fR
+\fI\&IPIV\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp10N\fR
+performs a sequence  of  local column interchanges on a
+matrix A.  One column interchange is initiated  for columns 0 through
+N-1 of A.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+__arg0__
+.TP 8
+N       (local input)           const int
+On entry,  M  specifies  the number of rows of the array A. M
+must be at least zero.
+.TP 8
+A       (local input/output)    double *
+On entry, N specifies the number of columns of the array A. N
+must be at least zero.
+.TP 8
+LDA     (local input)           const int
+On entry, A  points to an  array of  dimension (LDA,N).  This
+array contains the columns onto which the interchanges should
+be applied. On exit, A contains the permuted matrix.
+.TP 8
+IPIV    (local input)           const int *
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlatcpy.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlatcpy.3
new file mode 100644
index 000000000..dc940e321
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlatcpy.3
@@ -0,0 +1,70 @@
+.TH HPL_dlatcpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlatcpy \- B := A^T
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlatcpy(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&B\fR,
+\fB\&const int\fR
+\fI\&LDB\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlatcpy\fR
+copies the transpose of an array A into an array B.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the number of  rows of the array B and
+the number of columns of A. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the number of  rows of the array A and
+the number of columns of B. N must be at least zero.
+.TP 8
+A       (local input)           const double *
+On entry, A points to an array of dimension (LDA,M).
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,N).
+.TP 8
+B       (local output)          double *
+On entry, B points to an array of dimension (LDB,N). On exit,
+B is overwritten with the transpose of A.
+.TP 8
+LDB     (local input)           const int
+On entry, LDB specifies the leading dimension of the array B.
+LDB must be at least MAX(1,M).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], b[2*2];
+.br
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+.br
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+.br
+   printf("  [%f,%f]\en", b[0], b[2]);
+.br
+   printf("b=[%f,%f]\en", b[1], b[3]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dlacpy \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocmax.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocmax.3
new file mode 100644
index 000000000..f68f887c9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocmax.3
@@ -0,0 +1,69 @@
+.TH HPL_dlocmax 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlocmax \- finds the maximum entry in matrix column.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlocmax(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&II\fR,
+\fB\&const int\fR
+\fI\&JJ\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlocmax\fR
+finds  the maximum entry in the current column  and packs
+the useful information in  WORK[0:3].  On exit,  WORK[0] contains the
+local maximum  absolute value  scalar,  WORK[1] is the  corresponding
+local row index,  WORK[2]  is the corresponding global row index, and
+WORK[3] is the coordinate of the process owning this max.  When N  is
+less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set
+to the total number of process rows.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of rows of the column
+of A on which we operate.
+.TP 8
+II      (local input)           const int
+On entry, II  specifies the row offset where the column to be
+operated on starts with respect to the panel.
+.TP 8
+JJ      (local input)           const int
+On entry, JJ  specifies the column offset where the column to
+be operated on starts with respect to the panel.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is  a workarray of size at least 4.  On exit,
+WORK[0] contains  the  local  maximum  absolute value scalar,
+WORK[1] contains  the corresponding local row index,  WORK[2]
+contains the corresponding global row index, and  WORK[3]  is
+the coordinate of process owning this max.
+.SH SEE ALSO
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocswpN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocswpN.3
new file mode 100644
index 000000000..367e37e36
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocswpN.3
@@ -0,0 +1,62 @@
+.TH HPL_dlocswpN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlocswpN \- locally swaps rows within panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlocswpN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&II\fR,
+\fB\&const int\fR
+\fI\&JJ\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlocswpN\fR
+performs  the local swapping operations  within a panel.
+The lower triangular  N0-by-N0  upper block of the panel is stored in
+no-transpose form (i.e. just like the input matrix itself).
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+II      (local input)           const int
+On entry, II  specifies the row offset where the column to be
+operated on starts with respect to the panel.
+.TP 8
+JJ      (local input)           const int
+On entry, JJ  specifies the column offset where the column to
+be operated on starts with respect to the panel.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+WORK[0] contains  the  local  maximum  absolute value scalar,
+WORK[1] contains  the corresponding local row index,  WORK[2]
+contains the corresponding global row index, and  WORK[3]  is
+the coordinate of process owning this max.  The N0 length max
+row is stored in WORK[4:4+N0-1];  Note  that this is also the
+JJth row  (or column) of L1. The remaining part of this array
+is used as workspace.
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocswpT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocswpT.3
new file mode 100644
index 000000000..f864de535
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocswpT.3
@@ -0,0 +1,62 @@
+.TH HPL_dlocswpT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlocswpT \- locally swaps rows within panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlocswpT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&II\fR,
+\fB\&const int\fR
+\fI\&JJ\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlocswpT\fR
+performs  the local swapping operations  within a panel.
+The lower triangular  N0-by-N0  upper block of the panel is stored in
+transpose form.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+II      (local input)           const int
+On entry, II  specifies the row offset where the column to be
+operated on starts with respect to the panel.
+.TP 8
+JJ      (local input)           const int
+On entry, JJ  specifies the column offset where the column to
+be operated on starts with respect to the panel.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+WORK[0] contains  the  local  maximum  absolute value scalar,
+WORK[1] contains  the corresponding local row index,  WORK[2]
+contains the corresponding global row index, and  WORK[3]  is
+the coordinate of process owning this max.  The N0 length max
+row is stored in WORK[4:4+N0-1];  Note  that this is also the
+JJth row  (or column) of L1. The remaining part of this array
+is used as workspace.
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dmatgen.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dmatgen.3
new file mode 100644
index 000000000..c287fb0fb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dmatgen.3
@@ -0,0 +1,55 @@
+.TH HPL_dmatgen 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dmatgen \- random matrix generator.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dmatgen(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int\fR
+\fI\&ISEED\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dmatgen\fR
+generates (or regenerates) a random matrix A.
+ 
+The  pseudo-random  generator uses the linear congruential algorithm:
+X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+Programming, Knuth 1973, Vol. 2.
+.SH ARGUMENTS
+.TP 8
+M       (input)                 const int
+On entry,  M  specifies  the number  of rows of the matrix A.
+M must be at least zero.
+.TP 8
+N       (input)                 const int
+On entry,  N specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+A       (output)                double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+this  array  contains   the   coefficients  of  the  randomly
+generated matrix.
+.TP 8
+LDA     (input)                 const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,M).
+.TP 8
+ISEED   (input)                 const int
+On entry, ISEED  specifies  the  seed  number to generate the
+matrix A. ISEED must be at least zero.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dscal.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dscal.3
new file mode 100644
index 000000000..8f42a10f5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dscal.3
@@ -0,0 +1,62 @@
+.TH HPL_dscal 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dscal \- x = alpha * x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dscal(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dscal\fR
+scales the vector x by alpha.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vector x. N  must  be
+at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied as zero, then the entries of the incremented array X
+need not be set on input.
+.TP 8
+X       (local input/output)    double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+On exit, the entries of the incremented array  X  are  scaled
+by the scalar alpha.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3];
+.br
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+.br
+   HPL_dscal( 3, 2.0, x, 1 );
+.br
+   printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_daxpy \ (3),
+.BR HPL_dcopy \ (3),
+.BR HPL_dswap \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dswap.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dswap.3
new file mode 100644
index 000000000..a398f795a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dswap.3
@@ -0,0 +1,73 @@
+.TH HPL_dswap 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dswap \- y <-> x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dswap(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dswap\fR
+swaps the vectors x and y.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vectors  x  and  y. N
+must be at least zero.
+.TP 8
+X       (local input/output)    double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+On exit, the entries of the incremented array  X  are updated
+with the entries of the incremented array Y.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+Y       (local input/output)    double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+On exit, the entries of the incremented array  Y  are updated
+with the entries of the incremented array X.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3], y[3];
+.br
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+.br
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+.br
+   HPL_dswap( 3, x, 1, y, 1 );
+.br
+   printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]);
+.br
+   printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_daxpy \ (3),
+.BR HPL_dcopy \ (3),
+.BR HPL_dscal \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dtrsm.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dtrsm.3
new file mode 100644
index 000000000..ad099eb83
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dtrsm.3
@@ -0,0 +1,152 @@
+.TH HPL_dtrsm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dtrsm \- B := A^{-1} * B  or  B := B * A^{-1}.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dtrsm(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const enum HPL_SIDE\fR
+\fI\&SIDE\fR,
+\fB\&const enum HPL_UPLO\fR
+\fI\&UPLO\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANS\fR,
+\fB\&const enum HPL_DIAG\fR
+\fI\&DIAG\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&B\fR,
+\fB\&const int\fR
+\fI\&LDB\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dtrsm\fR
+solves one of the matrix equations
+ 
+   op( A ) * X = alpha * B,   or  X * op( A ) = alpha * B,
+ 
+where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+non-unit, upper or lower triangular matrix and op(A) is one of
+ 
+   op( A ) = A   or   op( A ) = A^T.
+ 
+The matrix X is overwritten on B.
+ 
+No test for  singularity  or  near-singularity  is included  in  this
+routine. Such tests must be performed before calling this routine.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+SIDE    (local input)           const enum HPL_SIDE
+On entry, SIDE  specifies  whether  op(A) appears on the left
+or right of X as follows:
+   SIDE==HplLeft    op( A ) * X = alpha * B,
+   SIDE==HplRight   X * op( A ) = alpha * B.
+.TP 8
+UPLO    (local input)           const enum HPL_UPLO
+On  entry,   UPLO   specifies  whether  the  upper  or  lower
+triangular  part  of the array  A  is to be referenced.  When
+UPLO==HplUpper, only  the upper triangular part of A is to be
+referenced, otherwise only the lower triangular part of A is 
+to be referenced. 
+.TP 8
+TRANS   (local input)           const enum HPL_TRANS
+On entry, TRANSA  specifies the form of  op(A)  to be used in
+the matrix-matrix operation follows:                         
+   TRANSA==HplNoTrans    : op( A ) = A,                     
+   TRANSA==HplTrans      : op( A ) = A^T,                   
+   TRANSA==HplConjTrans  : op( A ) = A^T.                   
+.TP 8
+DIAG    (local input)           const enum HPL_DIAG
+On entry,  DIAG  specifies  whether  A  is unit triangular or
+not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+and otherwise, A is not assumed to be unit triangular.
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the number of rows of the  matrix B.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the number of columns of the matrix B.
+N must be at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied  as  zero then the elements of the matrix B need not
+be set on input.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points  to an array of size equal to or greater
+than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+k by k upper triangular  part of the array A must contain the
+upper triangular  matrix and the  strictly  lower  triangular
+part of A is not referenced.  When  UPLO==HplLower on  entry,
+the  leading k by k lower triangular part of the array A must
+contain the lower triangular matrix  and  the  strictly upper
+triangular part of A is not referenced.
+ 
+Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+not referenced  either,  but are assumed to be unity.
+.TP 8
+LDA     (local input)           const int
+On entry,  LDA  specifies  the  leading  dimension  of  A  as
+declared  in  the  calling  (sub) program.  LDA  must  be  at
+least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+.TP 8
+B       (local input/output)    double *
+On entry,  B  points  to an array of size equal to or greater
+than LDB * n.  Before entry, the leading  m by n  part of the
+array B must contain the matrix  B, except when beta is zero,
+in which case B need not be set on entry.  On exit, the array
+B is overwritten by the m by n solution matrix.
+.TP 8
+LDB     (local input)           const int
+On entry,  LDB  specifies  the  leading  dimension  of  B  as
+declared  in  the  calling  (sub) program.  LDB  must  be  at
+least MAX(1,m).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], b[2*2];
+.br
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+.br
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+.br
+   HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper,
+.br
+              HplNoTrans, HplNonUnit, 2, 2, 2.0,
+.br
+              a, 2, b, 2 );
+.br
+   printf("  [%f,%f]\en", b[0], b[2]);
+.br
+   printf("b=[%f,%f]\en", b[1], b[3]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dgemm \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dtrsv.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dtrsv.3
new file mode 100644
index 000000000..5df37c78b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dtrsv.3
@@ -0,0 +1,121 @@
+.TH HPL_dtrsv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dtrsv \- x := A^{-1} x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dtrsv(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const enum HPL_UPLO\fR
+\fI\&UPLO\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANS\fR,
+\fB\&const enum HPL_DIAG\fR
+\fI\&DIAG\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dtrsv\fR
+solves one of the systems of equations
+ 
+    A * x = b,   or   A^T * x = b,
+ 
+where b and x are n-element vectors and  A  is an n by n non-unit, or
+unit, upper or lower triangular matrix.
+ 
+No test for  singularity  or  near-singularity  is included  in  this
+routine. Such tests must be performed before calling this routine.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+UPLO    (local input)           const enum HPL_UPLO
+On  entry,   UPLO   specifies  whether  the  upper  or  lower
+triangular  part  of the array  A  is to be referenced.  When
+UPLO==HplUpper, only  the upper triangular part of A is to be
+referenced, otherwise only the lower triangular part of A is 
+to be referenced. 
+.TP 8
+TRANS   (local input)           const enum HPL_TRANS
+On entry,  TRANS  specifies  the equations  to  be  solved as
+follows:
+   TRANS==HplNoTrans     A   * x = b,
+   TRANS==HplTrans       A^T * x = b.
+.TP 8
+DIAG    (local input)           const enum HPL_DIAG
+On entry,  DIAG  specifies  whether  A  is unit triangular or
+not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+and otherwise, A is not assumed to be unit triangular.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the order of the matrix A. N must be at
+least zero.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points  to an array of size equal to or greater
+than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+n by n upper triangular  part of the array A must contain the
+upper triangular  matrix and the  strictly  lower  triangular
+part of A is not referenced.  When  UPLO==HplLower  on entry,
+the  leading n by n lower triangular part of the array A must
+contain the lower triangular matrix  and  the  strictly upper
+triangular part of A is not referenced.
+ 
+Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+not referenced  either,  but are assumed to be unity.
+.TP 8
+LDA     (local input)           const int
+On entry,  LDA  specifies  the  leading  dimension  of  A  as
+declared  in  the  calling  (sub) program.  LDA  must  be  at
+least MAX(1,n).
+.TP 8
+X       (local input/output)    double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+Before entry,  the  incremented array  X  must contain  the n
+element right-hand side vector b. On exit,  X  is overwritten
+with the solution vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], x[2];
+.br
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+.br
+   x[0] = 2.0; x[1] = 1.0;
+.br
+   HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans,
+.br
+              HplNoUnit, a, 2, x, 1 );
+.br
+   printf("x=[%f,%f]\en", x[0], x[1]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dger \ (3),
+.BR HPL_dgemv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_equil.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_equil.3
new file mode 100644
index 000000000..817780e44
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_equil.3
@@ -0,0 +1,91 @@
+.TH HPL_equil 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_equil \- Equilibrate U and forward the column panel L.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_equil(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANS\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR,
+\fB\&int *\fR
+\fI\&IWORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_equil\fR
+equilibrates  the  local  pieces  of U, so that on exit to
+this function, pieces of U contained in every process row are of the
+same size. This phase makes the rolling phase optimal.  In addition,
+this  function probes  for  the  column panel L and forwards it when
+possible.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be equilibrated) information.
+.TP 8
+TRANS   (global input)          const enum HPL_TRANS
+On entry, TRANS specifies whether  U  is stored in transposed
+or non-transposed form.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the number of rows or columns of  U. N
+must be at least 0.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U in each process row.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+non-transposed form, and MAX(1,N) otherwise.
+.TP 8
+IPLEN   (global input)          int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in process IPMAP[i].
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IPMAP is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words, IPMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry, IPMAPM1  is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+.TP 8
+IWORK   (workspace)             int *
+On entry, IWORK is a workarray of dimension NPROW+1.
+.SH SEE ALSO
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_fprintf.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_fprintf.3
new file mode 100644
index 000000000..8a81c0bfb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_fprintf.3
@@ -0,0 +1,44 @@
+.TH HPL_fprintf 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_fprintf \- fprintf + fflush wrapper.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_fprintf(\fR
+\fB\&FILE *\fR
+\fI\&STREAM\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_fprintf\fR
+is a wrapper around fprintf flushing the output stream.
+.SH ARGUMENTS
+.TP 8
+STREAM  (local input)           FILE *
+On entry, STREAM specifies the output stream.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   HPL_fprintf( stdout, "Hello World.\en" );
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_abort \ (3),
+.BR HPL_warn \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_exit.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_exit.3
new file mode 100644
index 000000000..dab8067e2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_exit.3
@@ -0,0 +1,25 @@
+.TH HPL_grid_exit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_grid_exit \- Exit process grid.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_grid_exit(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_grid_exit\fR
+marks  the process  grid object for  deallocation.  The
+returned  error  code  MPI_SUCCESS  indicates  successful completion.
+Other error codes are (MPI) implementation dependent.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input/output)    HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid to be released.
+.SH SEE ALSO
+.BR HPL_pnum \ (3),
+.BR HPL_grid_init \ (3),
+.BR HPL_grid_info \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_info.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_info.3
new file mode 100644
index 000000000..53c6a214b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_info.3
@@ -0,0 +1,52 @@
+.TH HPL_grid_info 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_grid_info \- Retrieve grid information.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_grid_info(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&int *\fR
+\fI\&NPROW\fR,
+\fB\&int *\fR
+\fI\&NPCOL\fR,
+\fB\&int *\fR
+\fI\&MYROW\fR,
+\fB\&int *\fR
+\fI\&MYCOL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_grid_info\fR
+returns  the grid shape and the coordinates in the grid
+of the calling process.  Successful  completion  is  indicated by the
+returned error code  MPI_SUCCESS. Other error codes depend on the MPI
+implementation.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+NPROW   (global output)         int *
+On exit,   NPROW  specifies the number of process rows in the
+grid. NPROW is at least one.
+.TP 8
+NPCOL   (global output)         int *
+On exit,   NPCOL  specifies  the number of process columns in
+the grid. NPCOL is at least one.
+.TP 8
+MYROW   (global output)         int *
+On exit,  MYROW  specifies my  row process  coordinate in the
+grid. MYROW is greater than or equal  to zero  and  less than
+NPROW.
+.TP 8
+MYCOL   (global output)         int *
+On exit,  MYCOL specifies my column process coordinate in the
+grid. MYCOL is greater than or equal  to zero  and  less than
+NPCOL.
+.SH SEE ALSO
+.BR HPL_pnum \ (3),
+.BR HPL_grid_init \ (3),
+.BR HPL_grid_exit \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_init.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_init.3
new file mode 100644
index 000000000..7792a522d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_init.3
@@ -0,0 +1,55 @@
+.TH HPL_grid_init 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_grid_init \- Create a process grid.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_grid_init(\fR
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR,
+\fB\&const HPL_T_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const int\fR
+\fI\&NPROW\fR,
+\fB\&const int\fR
+\fI\&NPCOL\fR,
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_grid_init\fR
+creates a NPROW x NPCOL  process  grid using column- or
+row-major ordering from an initial collection of processes identified
+by an  MPI  communicator.  Successful  completion is indicated by the
+returned error code MPI_SUCCESS.  Other error codes depend on the MPI
+implementation. The coordinates of processes that are not part of the
+grid are set to values outside of [0..NPROW) x [0..NPCOL).
+.SH ARGUMENTS
+.TP 8
+COMM    (global/local input)    MPI_Comm
+On entry,  COMM  is  the  MPI  communicator  identifying  the
+initial  collection  of  processes out of which  the  grid is
+formed.
+.TP 8
+ORDER   (global input)          const HPL_T_ORDER
+On entry, ORDER specifies how the processes should be ordered
+in the grid as follows:
+   ORDER = HPL_ROW_MAJOR    row-major    ordering;
+   ORDER = HPL_COLUMN_MAJOR column-major ordering;
+.TP 8
+NPROW   (global input)          const int
+On entry,  NPROW  specifies the number of process rows in the
+grid to be created. NPROW must be at least one.
+.TP 8
+NPCOL   (global input)          const int
+On entry,  NPCOL  specifies  the number of process columns in
+the grid to be created. NPCOL must be at least one.
+.TP 8
+GRID    (local input/output)    HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information to be initialized.
+.SH SEE ALSO
+.BR HPL_pnum \ (3),
+.BR HPL_grid_info \ (3),
+.BR HPL_grid_exit \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_idamax.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_idamax.3
new file mode 100644
index 000000000..c00292a02
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_idamax.3
@@ -0,0 +1,59 @@
+.TH HPL_idamax 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_idamax \- 1st k s.t. |x_k| = max_i(|x_i|).
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_idamax(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_idamax\fR
+returns  the index in an n-vector  x  of the first element
+having maximum absolute value.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vector x. N  must  be
+at least zero.
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3];
+.br
+   int    imax;
+.br
+   x[0] = 1.0; x[1] = 3.0; x[2] = 2.0;
+.br
+   imax = HPL_idamax( 3, x, 1 );
+.br
+   printf("imax=%d\en", imax);
+.br
+   exit(0);
+.br
+   return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_daxpy \ (3),
+.BR HPL_dcopy \ (3),
+.BR HPL_dscal \ (3),
+.BR HPL_dswap \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2l.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2l.3
new file mode 100644
index 000000000..32c4d9e07
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2l.3
@@ -0,0 +1,53 @@
+.TH HPL_indxg2l 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_indxg2l \- Map a global index into a local one.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_indxg2l(\fR
+\fB\&const int\fR
+\fI\&IG\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_indxg2l\fR
+computes  the local index of a matrix entry pointed to by
+the  global index IG.  This  local  returned index is the same in all
+processes.
+.SH ARGUMENTS
+.TP 8
+IG      (input)                 const int
+On entry, IG specifies the global index of the matrix  entry.
+IG must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix. NB must be larger than one.
+.TP 8
+SRCPROC (input)                 const int
+On entry, if SRCPROC = -1, the data  is not  distributed  but
+replicated,  in  which  case  this  routine returns IG in all
+processes. Otherwise, the value of SRCPROC is ignored.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2lp \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2lp.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2lp.3
new file mode 100644
index 000000000..ca2004031
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2lp.3
@@ -0,0 +1,66 @@
+.TH HPL_indxg2lp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_indxg2lp \- Map a local index into a global one.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_indxg2lp(\fR
+\fB\&int *\fR
+\fI\&IL\fR,
+\fB\&int *\fR
+\fI\&PROC\fR,
+\fB\&const int\fR
+\fI\&IG\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_indxg2lp\fR
+computes the local index of a matrix entry pointed to by
+the global  index IG as well as the process coordinate which posseses
+this entry. The local returned index is the same in all processes.
+.SH ARGUMENTS
+.TP 8
+IL      (output)                int *
+On exit, IL specifies the local index corresponding to IG. IL
+is at least zero.
+.TP 8
+PROC    (output)                int *
+On exit,  PROC  is the  coordinate of the process  owning the
+entry specified by the global index IG. PROC is at least zero
+and less than NPROCS.
+.TP 8
+IG      (input)                 const int
+On entry, IG specifies the global index of the matrix  entry.
+IG must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+SRCPROC (input)                 const int
+On entry, if SRCPROC = -1, the data  is not  distributed  but
+replicated,  in  which  case  this  routine returns IG in all
+processes. Otherwise, the value of SRCPROC is ignored.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2p.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2p.3
new file mode 100644
index 000000000..5e0273feb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2p.3
@@ -0,0 +1,52 @@
+.TH HPL_indxg2p 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_indxg2p \- Map a global index into a process coordinate.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_indxg2p(\fR
+\fB\&const int\fR
+\fI\&IG\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_indxg2p\fR
+computes the process coordinate  which posseses the entry
+of a matrix specified by a global index IG.
+.SH ARGUMENTS
+.TP 8
+IG      (input)                 const int
+On entry, IG specifies the global index of the matrix  entry.
+IG must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+SRCPROC (input)                 const int
+On entry,  SRCPROC  specifies  the coordinate of the  process
+that possesses the first row or column of the matrix. SRCPROC
+must be at least zero and strictly less than NPROCS.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxl2g.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxl2g.3
new file mode 100644
index 000000000..ba6da53a7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxl2g.3
@@ -0,0 +1,59 @@
+.TH HPL_indxl2g 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_indxl2g \- Map a index-process pair into a global index.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_indxl2g(\fR
+\fB\&const int\fR
+\fI\&IL\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&PROC\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_indxl2g\fR
+computes the global index of a matrix  entry  pointed to
+by the local index IL of the process indicated by PROC.
+.SH ARGUMENTS
+.TP 8
+IL      (input)                 const int
+On entry, IL specifies the local  index of the matrix  entry.
+IL must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+PROC    (input)                 const int
+On entry, PROC  specifies the coordinate of the process whose
+local array row or column is to be determined. PROC  must  be
+at least zero and strictly less than NPROCS.
+.TP 8
+SRCPROC (input)                 const int
+On entry,  SRCPROC  specifies  the coordinate of the  process
+that possesses the first row or column of the matrix. SRCPROC
+must be at least zero and strictly less than NPROCS.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2lp \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_infog2l.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_infog2l.3
new file mode 100644
index 000000000..c07f276d5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_infog2l.3
@@ -0,0 +1,126 @@
+.TH HPL_infog2l 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_infog2l \- global to local index translation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_infog2l(\fR
+\fB\&int\fR
+\fI\&I\fR,
+\fB\&int\fR
+\fI\&J\fR,
+\fB\&const int\fR
+\fI\&IMB\fR,
+\fB\&const int\fR
+\fI\&MB\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&RSRC\fR,
+\fB\&const int\fR
+\fI\&CSRC\fR,
+\fB\&const int\fR
+\fI\&MYROW\fR,
+\fB\&const int\fR
+\fI\&MYCOL\fR,
+\fB\&const int\fR
+\fI\&NPROW\fR,
+\fB\&const int\fR
+\fI\&NPCOL\fR,
+\fB\&int *\fR
+\fI\&II\fR,
+\fB\&int *\fR
+\fI\&JJ\fR,
+\fB\&int *\fR
+\fI\&PROW\fR,
+\fB\&int *\fR
+\fI\&PCOL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_infog2l\fR
+computes the starting local index II, JJ corresponding to
+the submatrix starting globally at the entry pointed by  I,  J.  This
+routine returns the coordinates in the grid of the process owning the
+matrix entry of global indexes I, J, namely PROW and PCOL.
+.SH ARGUMENTS
+.TP 8
+I       (global input)          int
+On entry,  I  specifies  the  global  row index of the matrix
+entry. I must be at least zero.
+.TP 8
+J       (global input)          int
+On entry,  J  specifies the global column index of the matrix
+entry. J must be at least zero.
+.TP 8
+IMB     (global input)          const int
+On entry,  IMB  specifies  the size of the first row block of
+the global matrix. IMB must be at least one.
+.TP 8
+MB      (global input)          const int
+On entry,  MB specifies the blocking factor used to partition
+and  distribute the rows of the matrix A.  MB  must be larger
+than one.
+.TP 8
+INB     (global input)          const int
+On entry, INB specifies the size of the first column block of
+the global matrix. INB must be at least one.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the columns of the matrix A. NB must be larger
+than one.
+.TP 8
+RSRC    (global input)          const int
+On entry,  RSRC  specifies  the row coordinate of the process
+that possesses the row  I.  RSRC  must  be at least zero  and
+strictly less than NPROW.
+.TP 8
+CSRC    (global input)          const int
+On entry, CSRC specifies the column coordinate of the process
+that possesses the column J. CSRC  must be at least zero  and
+strictly less than NPCOL.
+.TP 8
+MYROW   (local input)           const int
+On entry, MYROW  specifies my  row process  coordinate in the
+grid. MYROW is greater than or equal  to zero  and  less than
+NPROW.
+.TP 8
+MYCOL   (local input)           const int
+On entry, MYCOL specifies my column process coordinate in the
+grid. MYCOL is greater than or equal  to zero  and  less than
+NPCOL.
+.TP 8
+NPROW   (global input)          const int
+On entry,  NPROW  specifies the number of process rows in the
+grid. NPROW is at least one.
+.TP 8
+NPCOL   (global input)          const int
+On entry,  NPCOL  specifies  the number of process columns in
+the grid. NPCOL is at least one.
+.TP 8
+II      (local output)          int *
+On exit, II  specifies the  local  starting  row index of the
+submatrix. On exit, II is at least 0.
+.TP 8
+JJ      (local output)          int *
+On exit, JJ  specifies the local starting column index of the
+submatrix. On exit, JJ is at least 0.
+.TP 8
+PROW    (global output)         int *
+On exit, PROW is the row coordinate of the process owning the
+entry specified by the global index I.  PROW is at least zero
+and less than NPROW.
+.TP 8
+PCOL    (global output)         int *
+On exit, PCOL  is the column coordinate of the process owning
+the entry specified by the global index J.  PCOL  is at least
+zero and less than NPCOL.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_jumpit.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_jumpit.3
new file mode 100644
index 000000000..66e77ac32
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_jumpit.3
@@ -0,0 +1,48 @@
+.TH HPL_jumpit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_jumpit \- jump into the random sequence.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_jumpit(\fR
+\fB\&int *\fR
+\fI\&MULT\fR,
+\fB\&int *\fR
+\fI\&IADD\fR,
+\fB\&int *\fR
+\fI\&IRANN\fR,
+\fB\&int *\fR
+\fI\&IRANM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_jumpit\fR
+jumps in the random sequence from the number  X(n) encoded
+in IRANN to the number  X(m)  encoded in  IRANM using the constants A
+and C encoded in MULT and IADD: X(m) = A * X(n) + C.  The constants A
+and C obviously depend on m and n,  see  the function  HPL_xjumpm  in
+order to initialize them.
+.SH ARGUMENTS
+.TP 8
+MULT    (local input)           int *
+On entry, MULT is an array of dimension 2, that contains the
+16-lower and 15-higher bits of the constant A.
+.TP 8
+IADD    (local input)           int *
+On entry, IADD is an array of dimension 2, that contains the
+16-lower and 15-higher bits of the constant C.
+.TP 8
+IRANN   (local input)           int *
+On entry,  IRANN  is an array of dimension 2,  that contains 
+the 16-lower and 15-higher bits of the encoding of X(n).
+.TP 8
+IRANM   (local output)          int *
+On entry,  IRANM  is an array of dimension 2.  On exit, this
+array contains respectively the 16-lower and  15-higher bits
+of the encoding of X(m).
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ladd.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ladd.3
new file mode 100644
index 000000000..9fd6805d3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ladd.3
@@ -0,0 +1,41 @@
+.TH HPL_ladd 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_ladd \- Adds two long positive integers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_ladd(\fR
+\fB\&int *\fR
+\fI\&J\fR,
+\fB\&int *\fR
+\fI\&K\fR,
+\fB\&int *\fR
+\fI\&I\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_ladd\fR
+adds  without carry two long positive integers  K and J  and
+puts the result into I. The long integers  I, J, K are encoded on 64
+bits using an array of 2 integers.  The 32-lower bits  are stored in
+the  first  entry  of each array,  the 32-higher bits  in the second
+entry.
+.SH ARGUMENTS
+.TP 8
+J       (local input)           int *
+On entry, J is an integer array of dimension 2 containing the
+encoded long integer J.
+.TP 8
+K       (local input)           int *
+On entry, K is an integer array of dimension 2 containing the
+encoded long integer K.
+.TP 8
+I       (local output)          int *
+On entry, I is an integer array of dimension 2. On exit, this
+array contains the encoded long integer result.
+.SH SEE ALSO
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_lmul.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_lmul.3
new file mode 100644
index 000000000..8be7380e0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_lmul.3
@@ -0,0 +1,42 @@
+.TH HPL_lmul 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_lmul \- multiplies 2 long positive integers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_lmul(\fR
+\fB\&int *\fR
+\fI\&K\fR,
+\fB\&int *\fR
+\fI\&J\fR,
+\fB\&int *\fR
+\fI\&I\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_lmul\fR
+multiplies  without carry two long positive integers K and J
+and puts the result into I. The long integers  I, J, K are encoded on
+64 bits using an array of 2 integers. The 32-lower bits are stored in
+the first entry of each array, the 32-higher bits in the second entry
+of each array. For efficiency purposes, the  intrisic modulo function
+is inlined.
+.SH ARGUMENTS
+.TP 8
+K       (local input)           int *
+On entry, K is an integer array of dimension 2 containing the
+encoded long integer K.
+.TP 8
+J       (local input)           int *
+On entry, J is an integer array of dimension 2 containing the
+encoded long integer J.
+.TP 8
+I       (local output)          int *
+On entry, I is an integer array of dimension 2. On exit, this
+array contains the encoded long integer result.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_logsort.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_logsort.3
new file mode 100644
index 000000000..e7e80062a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_logsort.3
@@ -0,0 +1,65 @@
+.TH HPL_logsort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_logsort \- Sort the processes in logarithmic order.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_logsort(\fR
+\fB\&const int\fR
+\fI\&NPROCS\fR,
+\fB\&const int\fR
+\fI\&ICURROC\fR,
+\fB\&int *\fR
+\fI\&IPLEN\fR,
+\fB\&int *\fR
+\fI\&IPMAP\fR,
+\fB\&int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_logsort\fR
+computes an array  IPMAP  and  its inverse  IPMAPM1  that
+contain  the logarithmic sorted processes id with repect to the local
+number of rows of  U  that they own. This is necessary to ensure that
+the logarithmic spreading of U is optimal in terms of number of steps
+and communication volume as well.  In other words,  the larget pieces
+of U will be sent a minimal number of times.
+.SH ARGUMENTS
+.TP 8
+NPROCS  (global input)          const int
+On entry, NPROCS  specifies the number of process rows in the
+process grid. NPROCS is at least one.
+.TP 8
+ICURROC (global input)          const int
+On entry, ICURROC is the source process row.
+.TP 8
+IPLEN   (global input/output)   int *
+On entry, IPLEN is an array of dimension NPROCS+1,  such that
+IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+rows of U  in the processes before process IPMAP[i] after the
+sort,  with  the convention that  IPLEN[NPROCS] is  the total
+number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+IPLEN[i] is  the  number of rows of A that should be moved to
+the process IPMAP[i].  IPLEN  is such that the number of rows
+of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+remaining  entries  of  this  array  are  sorted  so that the
+quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+.TP 8
+IPMAP   (global output)         int *
+On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+array contains  the logarithmic mapping of the processes.  In
+other words, IPMAP[myroc] is the corresponding sorted process
+coordinate.
+.TP 8
+IPMAPM1 (global output)         int *
+On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+this  array  contains  the inverse of the logarithmic mapping
+contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+[0.. NPROCS)
+.SH SEE ALSO
+.BR HPL_plindx1 \ (3),
+.BR HPL_plindx10 \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_max.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_max.3
new file mode 100644
index 000000000..16d8aecc6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_max.3
@@ -0,0 +1,43 @@
+.TH HPL_max 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_max \- Combine (max) two buffers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_max(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const void *\fR
+\fI\&IN\fR,
+\fB\&void *\fR
+\fI\&INOUT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_max\fR
+combines (max) two buffers.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies  the  length  of  the  buffers  to  be
+combined. N must be at least zero.
+.TP 8
+IN      (input)                 const void *
+On entry, IN points to the input-only buffer to be combined.
+.TP 8
+INOUT   (input/output)          void *
+On entry, INOUT  points  to  the  input-output  buffer  to be
+combined.  On exit,  the  entries of this array contains  the
+combined results.
+.TP 8
+DTYPE   (input)                 const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_min.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_min.3
new file mode 100644
index 000000000..a816d61b7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_min.3
@@ -0,0 +1,43 @@
+.TH HPL_min 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_min \- Combine (min) two buffers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_min(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const void *\fR
+\fI\&IN\fR,
+\fB\&void *\fR
+\fI\&INOUT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_min\fR
+combines (min) two buffers.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies  the  length  of  the  buffers  to  be
+combined. N must be at least zero.
+.TP 8
+IN      (input)                 const void *
+On entry, IN points to the input-only buffer to be combined.
+.TP 8
+INOUT   (input/output)          void *
+On entry, INOUT  points  to  the  input-output  buffer  to be
+combined.  On exit,  the  entries of this array contains  the
+combined results.
+.TP 8
+DTYPE   (input)                 const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_numroc.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_numroc.3
new file mode 100644
index 000000000..34c8acfa9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_numroc.3
@@ -0,0 +1,60 @@
+.TH HPL_numroc 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_numroc \- Compute the local number of row/columns.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_numroc(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&PROC\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_numroc\fR
+returns  the  local number of matrix rows/columns process
+PROC  will  get  if  we give out  N rows/columns starting from global
+index 0.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies the number of rows/columns being dealt
+out. N must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+PROC    (input)                 const int
+On entry, PROC specifies  the coordinate of the process whose
+local portion is determined.  PROC must be at least zero  and
+strictly less than NPROCS.
+.TP 8
+SRCPROC (input)                 const int
+On entry,  SRCPROC  specifies  the coordinate of the  process
+that possesses the first row or column of the matrix. SRCPROC
+must be at least zero and strictly less than NPROCS.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2lp \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_numrocI.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_numrocI.3
new file mode 100644
index 000000000..1891f1ac9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_numrocI.3
@@ -0,0 +1,66 @@
+.TH HPL_numrocI 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_numrocI \- Compute the local number of row/columns.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_numrocI(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&I\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&PROC\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_numrocI\fR
+returns  the  local number of matrix rows/columns process
+PROC  will  get  if  we give out  N rows/columns starting from global
+index I.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies the number of rows/columns being dealt
+out. N must be at least zero.
+.TP 8
+I       (input)                 const int
+On entry, I  specifies the global index of the matrix  entry
+I must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of th
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+PROC    (input)                 const int
+On entry, PROC specifies  the coordinate of the process whos
+local portion is determined.  PROC must be at least zero  an
+strictly less than NPROCS.
+.TP 8
+SRCPROC (input)                 const int
+On entry,  SRCPROC  specifies  the coordinate of the  proces
+that possesses the first row or column of the matrix. SRCPRO
+must be at least zero and strictly less than NPROCS.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process row
+or columns over which the matrix is distributed.  NPROCS mus
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2lp \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pabort.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pabort.3
new file mode 100644
index 000000000..044e87210
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pabort.3
@@ -0,0 +1,40 @@
+.TH HPL_pabort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pabort \- halts execution.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pabort(\fR
+\fB\&int\fR
+\fI\&LINE\fR,
+\fB\&const char *\fR
+\fI\&SRNAME\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pabort\fR
+displays an error message on stderr and halts execution.
+.SH ARGUMENTS
+.TP 8
+LINE    (local input)           int
+On entry,  LINE  specifies the line  number in the file where
+the  error  has  occured.  When  LINE  is not a positive line
+number, it is ignored.
+.TP 8
+SRNAME  (local input)           const char *
+On entry, SRNAME  should  be the name of the routine  calling
+this error handler.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH SEE ALSO
+.BR HPL_fprintf \ (3),
+.BR HPL_pwarn \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_packL.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_packL.3
new file mode 100644
index 000000000..c79019c37
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_packL.3
@@ -0,0 +1,42 @@
+.TH HPL_packL 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_packL \- Form the MPI structure for the row ring broadcasts.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_packL(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&INDEX\fR,
+\fB\&const int\fR
+\fI\&LEN\fR,
+\fB\&const int\fR
+\fI\&IBUF\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_packL\fR
+forms  the MPI data type for the panel to be broadcast.
+Successful  completion  is  indicated  by  the  returned  error  code
+MPI_SUCCESS.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.TP 8
+INDEX   (input)                 const int
+On entry,  INDEX  points  to  the  first entry of the  packed
+buffer being broadcast.
+.TP 8
+LEN     (input)                 const int
+On entry, LEN is the length of the packed buffer.
+.TP 8
+IBUF    (input)                 const int
+On entry, IBUF  specifies the panel buffer/count/type entries
+that should be initialized.
+.SH SEE ALSO
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pddriver.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pddriver.3
new file mode 100644
index 000000000..30e55b62e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pddriver.3
@@ -0,0 +1,15 @@
+.TH main 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+main \- HPL main timing program.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&main();\fR
+.SH DESCRIPTION
+\fB\&main\fR
+is the main driver program for testing the HPL routines.
+This  program is  driven  by  a short data file named  "HPL.dat".
+.SH SEE ALSO
+.BR HPL_pdinfo \ (3),
+.BR HPL_pdtest \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdfact.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdfact.3
new file mode 100644
index 000000000..e3db5fb8b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdfact.3
@@ -0,0 +1,64 @@
+.TH HPL_pdfact 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdfact \- recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdfact(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdfact\fR
+recursively factorizes a  1-dimensional  panel of columns.
+The  RPFACT  function pointer specifies the recursive algorithm to be
+used, either Crout, Left- or Right looking.  NBMIN allows to vary the
+recursive stopping criterium in terms of the number of columns in the
+panel, and  NDIV  allow to specify the number of subpanels each panel
+should be divided into. Usuallly a value of 2 will be chosen. Finally
+PFACT is a function pointer specifying the non-recursive algorithm to
+to be used on at most NBMIN columns. One can also choose here between
+Crout, Left- or Right looking.  Empirical tests seem to indicate that
+values of 4 or 8 for NBMIN give the best results.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesv.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesv.3
new file mode 100644
index 000000000..ab4b62c4e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesv.3
@@ -0,0 +1,40 @@
+.TH HPL_pdgesv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdgesv \- Solve A x = b.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdgesv(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdgesv\fR
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with  or  without look-ahead.  The  lower  triangular  factor is left
+unpivoted and the pivots are not returned. The right hand side is the
+N+1 column of the coefficient matrix.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.SH SEE ALSO
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdtrsv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesv0.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesv0.3
new file mode 100644
index 000000000..180f191f2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesv0.3
@@ -0,0 +1,47 @@
+.TH HPL_pdgesv0 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdgesv0 \- Factor an N x N+1 matrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdgesv0(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdgesv0\fR
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+without look-ahead. The lower triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdfact \ (3),
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pdupdateTT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesvK1.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesvK1.3
new file mode 100644
index 000000000..64cee67ed
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesvK1.3
@@ -0,0 +1,46 @@
+.TH HPL_pdgesvK1 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdgesvK1 \- Factor an N x N+1 matrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdgesvK1(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdgesvK1\fR
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with look-ahead.  The  lower  triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdfact \ (3),
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pdupdateTT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesvK2.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesvK2.3
new file mode 100644
index 000000000..9f389b9dd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesvK2.3
@@ -0,0 +1,47 @@
+.TH HPL_pdgesvK2 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdgesvK2 \- Factor an N x N+1 matrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdgesvK2(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdgesvK2\fR
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with look-ahead.  The  lower  triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdfact \ (3),
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pdupdateTT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdinfo.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdinfo.3
new file mode 100644
index 000000000..eed541159
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdinfo.3
@@ -0,0 +1,212 @@
+.TH HPL_pdinfo 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdinfo \- Read input parameter file.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdinfo(\fR
+\fB\&HPL_T_test *\fR
+\fI\&TEST\fR,
+\fB\&int *\fR
+\fI\&NS\fR,
+\fB\&int *\fR
+\fI\&N\fR,
+\fB\&int *\fR
+\fI\&NBS\fR,
+\fB\&int *\fR
+\fI\&NB\fR,
+\fB\&HPL_T_ORDER *\fR
+\fI\&PMAPPIN\fR,
+\fB\&int *\fR
+\fI\&NPQS\fR,
+\fB\&int *\fR
+\fI\&P\fR,
+\fB\&int *\fR
+\fI\&Q\fR,
+\fB\&int *\fR
+\fI\&NPFS\fR,
+\fB\&HPL_T_FACT *\fR
+\fI\&PF\fR,
+\fB\&int *\fR
+\fI\&NBMS\fR,
+\fB\&int *\fR
+\fI\&NBM\fR,
+\fB\&int *\fR
+\fI\&NDVS\fR,
+\fB\&int *\fR
+\fI\&NDV\fR,
+\fB\&int *\fR
+\fI\&NRFS\fR,
+\fB\&HPL_T_FACT *\fR
+\fI\&RF\fR,
+\fB\&int *\fR
+\fI\&NTPS\fR,
+\fB\&HPL_T_TOP *\fR
+\fI\&TP\fR,
+\fB\&int *\fR
+\fI\&NDHS\fR,
+\fB\&int *\fR
+\fI\&DH\fR,
+\fB\&HPL_T_SWAP *\fR
+\fI\&FSWAP\fR,
+\fB\&int *\fR
+\fI\&TSWAP\fR,
+\fB\&int *\fR
+\fI\&L1NOTRAN\fR,
+\fB\&int *\fR
+\fI\&UNOTRAN\fR,
+\fB\&int *\fR
+\fI\&EQUIL\fR,
+\fB\&int *\fR
+\fI\&ALIGN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdinfo\fR
+reads  the  startup  information for the various tests and
+transmits it to all processes.
+.SH ARGUMENTS
+.TP 8
+TEST    (global output)         HPL_T_test *
+On entry, TEST  points to a testing data structure.  On exit,
+the fields of this data structure are initialized as follows:
+TEST->outfp  specifies the output file where the results will
+be printed.  It is only defined and used by  the process 0 of
+the grid.  TEST->thrsh specifies the threshhold value for the
+test ratio.  TEST->epsil is the relative machine precision of
+the distributed computer.  Finally  the test counters, kfail,
+kpass, kskip, ktest are initialized to zero.
+.TP 8
+NS      (global output)         int *
+On exit,  NS  specifies the number of different problem sizes
+to be tested. NS is less than or equal to HPL_MAX_PARAM.
+.TP 8
+N       (global output)         int *
+On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+the first NS entries of this array contain the  problem sizes
+to run the code with.
+.TP 8
+NBS     (global output)         int *
+On exit,  NBS  specifies the number of different distribution
+blocking factors to be tested. NBS must be less than or equal
+to HPL_MAX_PARAM.
+.TP 8
+NB      (global output)         int *
+On exit,  PMAPPIN  specifies the process mapping onto the no-
+des of the  MPI machine configuration.  PMAPPIN  defaults  to
+row-major ordering.
+.TP 8
+PMAPPIN (global output)         HPL_T_ORDER *
+On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+the first NBS entries of this array contain the values of the
+various distribution blocking factors, to run the code with.
+.TP 8
+NPQS    (global output)         int *
+On exit, NPQS  specifies the  number of different values that
+can be used for P and Q, i.e., the number of process grids to
+run  the  code with.  NPQS must be  less  than  or  equal  to
+HPL_MAX_PARAM.
+.TP 8
+P       (global output)         int *
+On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+the first NPQS entries of this array contain the values of P,
+the number of process rows of the  NPQS grids to run the code
+with.
+.TP 8
+Q       (global output)         int *
+On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+the first NPQS entries of this array contain the values of Q,
+the number of process columns of the  NPQS  grids to  run the
+code with.
+.TP 8
+NPFS    (global output)         int *
+On exit, NPFS  specifies the  number of different values that
+can be used for PF : the panel factorization algorithm to run
+the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+.TP 8
+PF      (global output)         HPL_T_FACT *
+On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+the first  NPFS  entries  of this array  contain  the various
+panel factorization algorithms to run the code with.
+.TP 8
+NBMS    (global output)         int *
+On exit,  NBMS  specifies  the  number  of  various recursive
+stopping criteria  to be tested.  NBMS  must be  less than or
+equal to HPL_MAX_PARAM.
+.TP 8
+NBM     (global output)         int *
+On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+exit, the first NBMS entries of this array contain the values
+of the various recursive stopping criteria to be tested.
+.TP 8
+NDVS    (global output)         int *
+On exit,  NDVS  specifies  the number  of various numbers  of
+panels in recursion to be tested.  NDVS is less than or equal
+to HPL_MAX_PARAM.
+.TP 8
+NDV     (global output)         int *
+On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+exit, the first NDVS entries of this array contain the values
+of the various numbers of panels in recursion to be tested.
+.TP 8
+NRFS    (global output)         int *
+On exit, NRFS  specifies the  number of different values that
+can be used for RF : the recursive factorization algorithm to
+be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+.TP 8
+RF      (global output)         HPL_T_FACT *
+On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+the first  NRFS  entries  of  this array contain  the various
+recursive factorization algorithms to run the code with.
+.TP 8
+NTPS    (global output)         int *
+On exit, NTPS  specifies the  number of different values that
+can be used for the  broadcast topologies  to be tested. NTPS
+is less than or equal to HPL_MAX_PARAM.
+.TP 8
+TP      (global output)         HPL_T_TOP *
+On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+the  first NTPS  entries of this  array  contain  the various
+broadcast (along rows) topologies to run the code with.
+.TP 8
+NDHS    (global output)         int *
+On exit, NDHS  specifies the  number of different values that
+can be used for the  lookahead depths to be  tested.  NDHS is
+less than or equal to HPL_MAX_PARAM.
+.TP 8
+DH      (global output)         int *
+On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+exit, the first NDHS entries of this array contain the values
+of lookahead depths to run the code with.  Such a value is at
+least 0 (no-lookahead) or greater than zero.
+.TP 8
+FSWAP   (global output)         HPL_T_SWAP *
+On exit, FSWAP specifies the swapping algorithm to be used in
+all tests.
+.TP 8
+TSWAP   (global output)         int *
+On exit,  TSWAP  specifies the swapping threshold as a number
+of columns when the mixed swapping algorithm was chosen.
+.TP 8
+L1NOTRA (global output)         int *
+On exit, L1NOTRAN specifies whether the upper triangle of the
+panels of columns  should  be stored  in  no-transposed  form
+(L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+.TP 8
+UNOTRAN (global output)         int *
+On exit, UNOTRAN  specifies whether the panels of rows should
+be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+form (UNOTRAN=0) during their broadcast.
+.TP 8
+EQUIL   (global output)         int *
+On exit,  EQUIL  specifies  whether  equilibration during the
+swap-broadcast  of  the  panel of rows  should  be  performed
+(EQUIL=1) or not (EQUIL=0).
+.TP 8
+ALIGN   (global output)         int *
+On exit,  ALIGN  specifies the alignment  of  the dynamically
+allocated buffers in double precision words. ALIGN is greater
+than zero.
+.SH SEE ALSO
+.BR HPL_pddriver \ (3),
+.BR HPL_pdtest \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlamch.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlamch.3
new file mode 100644
index 000000000..7ce46c23e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlamch.3
@@ -0,0 +1,53 @@
+.TH HPL_pdlamch 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlamch \- determines machine-specific arithmetic constants.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_pdlamch(\fR
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR,
+\fB\&const HPL_T_MACH\fR
+\fI\&CMACH\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlamch\fR
+determines  machine-specific  arithmetic  constants  such  as
+the relative machine precision (eps),  the safe minimum(sfmin) such that
+1/sfmin does not overflow, the base of the machine (base), the precision
+(prec),  the  number  of  (base)  digits in the  mantissa  (t),  whether
+rounding occurs in addition (rnd = 1.0 and 0.0 otherwise),  the  minimum
+exponent before  (gradual)  underflow (emin),  the  underflow  threshold
+(rmin)- base**(emin-1), the largest exponent before overflow (emax), the
+overflow threshold (rmax)  - (base**emax)*(1-eps).
+.SH ARGUMENTS
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.TP 8
+CMACH   (global input)          const HPL_T_MACH
+Specifies the value to be returned by HPL_pdlamch            
+   = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+   = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+   = HPL_MACH_BASE,  HPL_pdlamch := base                     
+   = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+   = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+   = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+   = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+   = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+   = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+   = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+ 
+where                                                        
+ 
+   eps   = relative machine precision,                       
+   sfmin = safe minimum,                                     
+   base  = base of the machine,                              
+   prec  = eps*base,                                         
+   t     = number of digits in the mantissa,                 
+   rnd   = 1.0 if rounding occurs in addition,               
+   emin  = minimum exponent before underflow,                
+   rmin  = underflow threshold,                              
+   emax  = largest exponent before overflow,                 
+   rmax  = overflow threshold.
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlange.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlange.3
new file mode 100644
index 000000000..30593401b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlange.3
@@ -0,0 +1,68 @@
+.TH HPL_pdlange 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlange \- Compute ||A||.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_pdlange(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&const HPL_T_NORM\fR
+\fI\&NORM\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlange\fR
+returns  the value of the one norm,  or the infinity norm,
+or the element of largest absolute value of a distributed matrix A:  
+ 
+ 
+   max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+   norm1(A),        when NORM = HPL_NORM_1,                          
+   normI(A),        when NORM = HPL_NORM_I,                          
+ 
+where norm1 denotes the one norm of a matrix (maximum column sum) and
+normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+that max(abs(A(i,j))) is not a matrix norm.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+NORM    (global input)          const HPL_T_NORM
+On entry,  NORM  specifies  the  value to be returned by this
+function as described above.
+.TP 8
+M       (global input)          const int
+On entry,  M  specifies  the number  of rows of the matrix A.
+M must be at least zero.
+.TP 8
+N       (global input)          const int
+On entry,  N specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix. NB must be larger than one.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+that contains the local pieces of the distributed matrix A.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,LocP(M)).
+.SH SEE ALSO
+.BR HPL_pdlaprnt \ (3),
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaprnt.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaprnt.3
new file mode 100644
index 000000000..feb010a67
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaprnt.3
@@ -0,0 +1,72 @@
+.TH HPL_pdlaprnt 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaprnt \- Print a distributed matrix A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaprnt(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int\fR
+\fI\&IAROW\fR,
+\fB\&const int\fR
+\fI\&IACOL\fR,
+\fB\&const char *\fR
+\fI\&CMATNM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaprnt\fR
+prints  to  standard  error a distributed matrix A. The
+local pieces of  A  are sent to the process of coordinates  (0,0)  in
+the grid and then printed.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+M       (global input)          const int
+On entry,  M  specifies the number of rows of the coefficient
+matrix A. M must be at least zero.
+.TP 8
+N       (global input)          const int
+On  entry,   N   specifies  the  number  of  columns  of  the
+coefficient matrix A. N must be at least zero.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix. NB must be larger than one.
+.TP 8
+A       (local input)           double *
+On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+This array contains the coefficient matrix to be printed.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,LocP(M)).
+.TP 8
+IAROW   (global input)          const int
+On entry,  IAROW  specifies the row process coordinate owning
+the  first row of A.  IAROW  must be  larger than or equal to
+zero and less than NPROW.
+.TP 8
+IACOL   (global input)          const int
+On entry,  IACOL  specifies  the  column  process  coordinate
+owning the  first column  of A. IACOL  must be larger than or
+equal to zero and less than NPCOL.
+.TP 8
+CMATNM  (global input)          const char *
+On entry, CMATNM is the name of the matrix to be printed.
+.SH SEE ALSO
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp00N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp00N.3
new file mode 100644
index 000000000..3875400e3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp00N.3
@@ -0,0 +1,65 @@
+.TH HPL_pdlaswp00N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaswp00N \- Broadcast a column panel L and swap the row panel U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaswp00N(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaswp00N\fR
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+Bi-directional  exchange  is used to perform the  swap :: broadcast of
+the row  panel U at once, resulting in a lower number of messages than
+usual as well as a lower communication volume. With P process rows and
+assuming  bi-directional links,  the running time of this function can
+be approximated by:
+ 
+   log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  Mono
+directional links will double this communication cost.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be broadcast and swapped) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to  be swapped and broadcast starting at
+the current position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pipid \ (3),
+.BR HPL_plindx0 \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp05N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp00T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp00T.3
new file mode 100644
index 000000000..39901ba4b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp00T.3
@@ -0,0 +1,65 @@
+.TH HPL_pdlaswp00T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaswp00T \- Broadcast a column panel L and swap the row panel U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaswp00T(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaswp00T\fR
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+Bi-directional  exchange  is used to perform the  swap :: broadcast of
+the row  panel U at once, resulting in a lower number of messages than
+usual as well as a lower communication volume. With P process rows and
+assuming  bi-directional links,  the running time of this function can
+be approximated by:
+ 
+   log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  Mono
+directional links will double this communication cost.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be broadcast and swapped) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to  be swapped and broadcast starting at
+the current position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTT \ (3),
+.BR HPL_pipid \ (3),
+.BR HPL_plindx0 \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp01N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp01N.3
new file mode 100644
index 000000000..1ee14c0a8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp01N.3
@@ -0,0 +1,69 @@
+.TH HPL_pdlaswp01N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaswp01N \- Broadcast a column panel L and swap the row panel U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaswp01N(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaswp01N\fR
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+row panel U at once,  resulting in a minimal communication volume  and
+a "very good"  use of the connectivity if available.  With  P  process
+rows  and  assuming  bi-directional links,  the  running time  of this
+function can be approximated by:
+ 
+   (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  K is
+a constant in (2,3] that depends on the achieved bandwidth  during  a
+simultaneous  message exchange  between two processes.  An  empirical
+optimistic value of K is typically 2.4.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to  be swapped and broadcast starting at
+the current position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pipid \ (3),
+.BR HPL_plindx1 \ (3),
+.BR HPL_plindx10 \ (3),
+.BR HPL_spreadN \ (3),
+.BR HPL_equil \ (3),
+.BR HPL_rollN \ (3),
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp06N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp01T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp01T.3
new file mode 100644
index 000000000..e5c5de024
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp01T.3
@@ -0,0 +1,69 @@
+.TH HPL_pdlaswp01T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaswp01T \- Broadcast a column panel L and swap the row panel U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaswp01T(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaswp01T\fR
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+row panel U at once,  resulting in a minimal communication volume  and
+a "very good"  use of the connectivity if available.  With  P  process
+rows  and  assuming  bi-directional links,  the  running time  of this
+function can be approximated by:
+ 
+   (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  K is
+a constant in (2,3] that depends on the achieved bandwidth  during  a
+simultaneous  message exchange  between two processes.  An  empirical
+optimistic value of K is typically 2.4.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to  be swapped and broadcast starting at
+the current position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTT \ (3),
+.BR HPL_pipid \ (3),
+.BR HPL_plindx1 \ (3),
+.BR HPL_plindx10 \ (3),
+.BR HPL_spreadT \ (3),
+.BR HPL_equil \ (3),
+.BR HPL_rollT \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdmatgen.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdmatgen.3
new file mode 100644
index 000000000..5b4675c6e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdmatgen.3
@@ -0,0 +1,67 @@
+.TH HPL_pdmatgen 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdmatgen \- Parallel random matrix generator.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdmatgen(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int\fR
+\fI\&ISEED\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdmatgen\fR
+generates (or regenerates) a parallel random matrix A.
+ 
+The  pseudo-random  generator uses the linear congruential algorithm:
+X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+Programming, Knuth 1973, Vol. 2.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+M       (global input)          const int
+On entry,  M  specifies  the number  of rows of the matrix A.
+M must be at least zero.
+.TP 8
+N       (global input)          const int
+On entry,  N specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+A       (local output)          double *
+On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+On exit, this array contains the coefficients of the randomly
+generated matrix.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,LocP(M)).
+.TP 8
+ISEED   (global input)          const int
+On entry, ISEED  specifies  the  seed  number to generate the
+matrix A. ISEED must be at least zero.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_drand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdmxswp.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdmxswp.3
new file mode 100644
index 000000000..41c604373
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdmxswp.3
@@ -0,0 +1,78 @@
+.TH HPL_pdmxswp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdmxswp \- swaps and broacast the pivot row.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdmxswp(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&II\fR,
+\fB\&const int\fR
+\fI\&JJ\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdmxswp\fR
+swaps  and  broadcasts  the  absolute value max row using
+bi-directional exchange.  The buffer is partially set by HPL_dlocmax.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by
+ 
+   log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth )
+ 
+where  lat and bdwth are the latency and bandwidth of the network for
+double precision real elements.  Communication  only  occurs  in  one
+process  column. Mono-directional links  will cause the communication
+cost to double.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of the matrix
+column on which this function operates.
+.TP 8
+II      (local input)           const int
+On entry, II  specifies the row offset where the column to be
+operated on starts with respect to the panel.
+.TP 8
+JJ      (local input)           const int
+On entry, JJ  specifies the column offset where the column to
+be operated on starts with respect to the panel.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+It  is assumed that  HPL_dlocmax  was called  prior  to  this
+routine to  initialize  the first four entries of this array.
+On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+Note that this is also the  JJth  row  (or column) of L1. The
+remaining part is used as a temporary array.
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpancrN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpancrN.3
new file mode 100644
index 000000000..2e94a36a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpancrN.3
@@ -0,0 +1,82 @@
+.TH HPL_pdpancrN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpancrN \- Crout panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpancrN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpancrN\fR
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel  A using the Crout variant of the  usual
+one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+of the panel is stored in no-transpose form (i.e. just like the input
+matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and gam2-3 is  an  estimate  of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpancrT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpancrT.3
new file mode 100644
index 000000000..035e60d60
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpancrT.3
@@ -0,0 +1,81 @@
+.TH HPL_pdpancrT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpancrT \- Crout panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpancrT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpancrT\fR
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel  A using the Crout variant of the  usual
+one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is an  estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_disp.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_disp.3
new file mode 100644
index 000000000..94a212ced
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_disp.3
@@ -0,0 +1,24 @@
+.TH HPL_pdpanel_disp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanel_disp \- Deallocate a panel data structure.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_pdpanel_disp(\fR
+\fB\&HPL_T_panel * *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanel_disp\fR
+deallocates  the  panel  structure  and  resources  and
+stores the error code returned by the panel factorization.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel * *
+On entry,  PANEL  points  to  the  address  of the panel data
+structure to be deallocated.
+.SH SEE ALSO
+.BR HPL_pdpanel_new \ (3),
+.BR HPL_pdpanel_init \ (3),
+.BR HPL_pdpanel_free \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_free.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_free.3
new file mode 100644
index 000000000..cfad40c3d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_free.3
@@ -0,0 +1,24 @@
+.TH HPL_pdpanel_free 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanel_free \- Deallocate the panel ressources.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_pdpanel_free(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanel_free\fR
+deallocates  the panel resources  and  stores the error
+code returned by the panel factorization.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points  to  the  panel data  structure from
+which the resources should be deallocated.
+.SH SEE ALSO
+.BR HPL_pdpanel_new \ (3),
+.BR HPL_pdpanel_init \ (3),
+.BR HPL_pdpanel_disp \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_init.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_init.3
new file mode 100644
index 000000000..cbb0e7e3a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_init.3
@@ -0,0 +1,76 @@
+.TH HPL_pdpanel_init 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanel_init \- Initialize the panel resources.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanel_init(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&JB\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&IA\fR,
+\fB\&const int\fR
+\fI\&JA\fR,
+\fB\&const int\fR
+\fI\&TAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanel_init\fR
+initializes a panel data structure.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+M       (local input)           const int
+On entry, M specifies the global number of rows of the panel.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the  global number of columns of the
+panel and trailing submatrix. N must be at least zero.
+.TP 8
+JB      (global input)          const int
+On entry, JB specifies is the number of columns of the panel.
+JB must be at least zero.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.TP 8
+IA      (global input)          const int
+On entry,  IA  is  the global row index identifying the panel
+and trailing submatrix. IA must be at least zero.
+.TP 8
+JA      (global input)          const int
+On entry, JA is the global column index identifying the panel
+and trailing submatrix. JA must be at least zero.
+.TP 8
+TAG     (global input)          const int
+On entry, TAG is the row broadcast message id.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.SH SEE ALSO
+.BR HPL_pdpanel_new \ (3),
+.BR HPL_pdpanel_disp \ (3),
+.BR HPL_pdpanel_free \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_new.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_new.3
new file mode 100644
index 000000000..ed9fe1053
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_new.3
@@ -0,0 +1,76 @@
+.TH HPL_pdpanel_new 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanel_new \- Create a panel data structure.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanel_new(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&JB\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&IA\fR,
+\fB\&const int\fR
+\fI\&JA\fR,
+\fB\&const int\fR
+\fI\&TAG\fR,
+\fB\&HPL_T_panel * *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanel_new\fR
+creates and initializes a panel data structure.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+M       (local input)           const int
+On entry, M specifies the global number of rows of the panel.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the  global number of columns of the
+panel and trailing submatrix. N must be at least zero.
+.TP 8
+JB      (global input)          const int
+On entry, JB specifies is the number of columns of the panel.
+JB must be at least zero.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.TP 8
+IA      (global input)          const int
+On entry,  IA  is  the global row index identifying the panel
+and trailing submatrix. IA must be at least zero.
+.TP 8
+JA      (global input)          const int
+On entry, JA is the global column index identifying the panel
+and trailing submatrix. JA must be at least zero.
+.TP 8
+TAG     (global input)          const int
+On entry, TAG is the row broadcast message id.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel * *
+On entry,  PANEL  points  to  the  address  of the panel data
+structure to create and initialize.
+.SH SEE ALSO
+.BR HPL_pdpanel_new \ (3),
+.BR HPL_pdpanel_init \ (3),
+.BR HPL_pdpanel_disp \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanllN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanllN.3
new file mode 100644
index 000000000..eca1f4a34
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanllN.3
@@ -0,0 +1,82 @@
+.TH HPL_pdpanllN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanllN \- Left-looking panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanllN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanllN\fR
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel A  using the Left-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in no-transpose form (i.e. just like the
+input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanllT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanllT.3
new file mode 100644
index 000000000..a18d52c61
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanllT.3
@@ -0,0 +1,81 @@
+.TH HPL_pdpanllT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanllT \- Left-looking panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanllT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanllT\fR
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel A  using the Left-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanrlN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanrlN.3
new file mode 100644
index 000000000..cae2b5b5b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanrlN.3
@@ -0,0 +1,82 @@
+.TH HPL_pdpanrlN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanrlN \- Right-looking panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanrlN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanrlN\fR
+factorizes  a panel of columns  that is a sub-array of a
+larger one-dimensional panel A using the Right-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in no-transpose form (i.e. just like the
+input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanrlT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanrlT.3
new file mode 100644
index 000000000..434444bf7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanrlT.3
@@ -0,0 +1,81 @@
+.TH HPL_pdpanrlT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanrlT \- Right-looking panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanrlT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanrlT\fR
+factorizes  a panel of columns  that is a sub-array of a
+larger one-dimensional panel A using the Right-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpancrN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpancrN.3
new file mode 100644
index 000000000..fc6dd25f8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpancrN.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpancrN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpancrN \- Crout recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpancrN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpancrN\fR
+HPL_pdrpancrN recursively  factorizes  a panel of columns  using  the
+recursive  Crout  variant of the usual one-dimensional algorithm. The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpancrT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpancrT.3
new file mode 100644
index 000000000..ea0a57bc9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpancrT.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpancrT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpancrT \- Crout recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpancrT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpancrT\fR
+recursively  factorizes  a panel  of columns using  the
+recursive  Crout  variant  of  the  usual one-dimensional  algorithm.
+The lower triangular N0-by-N0  upper block of the panel  is stored in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanllN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanllN.3
new file mode 100644
index 000000000..29b6db40a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanllN.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpanllN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpanllN \- Left-looking recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpanllN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpanllN\fR
+recursively  factorizes  a panel  of columns using  the
+recursive Left-looking variant of the one-dimensional algorithm.  The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanllT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanllT.3
new file mode 100644
index 000000000..18db5c1fb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanllT.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpanllT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpanllT \- Left-looking recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpanllT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpanllT\fR
+recursively  factorizes  a panel of columns  using  the
+recursive Left-looking variant of the one-dimensional algorithm.  The
+lower  triangular  N0-by-N0  upper block  of  the panel  is stored in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanrlN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanrlN.3
new file mode 100644
index 000000000..441560c14
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanrlN.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpanrlN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpanrlN \- Right-looking recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpanrlN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpanrlN\fR
+recursively  factorizes  a panel of columns  using  the
+recursive Right-looking variant of the one-dimensional algorithm. The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanrlT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanrlT.3
new file mode 100644
index 000000000..e5bd9d110
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanrlT.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpanrlT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpanrlT \- Right-looking recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpanrlT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpanrlT\fR
+recursively  factorizes  a panel of columns  using  the
+recursive Right-looking variant of the one-dimensional algorithm. The
+lower  triangular  N0-by-N0  upper  block of the panel  is stored  in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdtest.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdtest.3
new file mode 100644
index 000000000..eaaff2bff
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdtest.3
@@ -0,0 +1,63 @@
+.TH HPL_pdtest 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdtest \- Perform one test.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdtest(\fR
+\fB\&HPL_T_test *\fR
+\fI\&TEST\fR,
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&NB\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdtest\fR
+performs  one  test  given a set of parameters such as the
+process grid, the  problem size, the distribution blocking factor ...
+This function generates  the data, calls  and times the linear system
+solver,  checks  the  accuracy  of the  obtained vector solution  and
+writes this information to the file pointed to by TEST->outfp.
+.SH ARGUMENTS
+.TP 8
+TEST    (global input)          HPL_T_test *
+On entry,  TEST  points  to a testing data structure:  outfp
+specifies the output file where the results will be printed.
+It is only defined and used by the process  0  of the  grid.
+thrsh  specifies  the  threshhold value  for the test ratio.
+Concretely, a test is declared "PASSED"  if and only if  the
+following inequality is satisfied:
+||Ax-b||_oo / ( epsil *
+                ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+                 N )  < thrsh.
+epsil  is the  relative machine precision of the distributed
+computer. Finally the test counters, kfail, kpass, kskip and
+ktest are updated as follows:  if the test passes,  kpass is
+incremented by one;  if the test fails, kfail is incremented
+by one; if the test is skipped, kskip is incremented by one.
+ktest is left unchanged.
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters to be used for this test.
+.TP 8
+N       (global input)          const int
+On entry,  N specifies the order of the coefficient matrix A.
+N must be at least zero.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.SH SEE ALSO
+.BR HPL_pddriver \ (3),
+.BR HPL_pdinfo \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdtrsv.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdtrsv.3
new file mode 100644
index 000000000..5d2d14dcd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdtrsv.3
@@ -0,0 +1,49 @@
+.TH HPL_pdtrsv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdtrsv \- Solve triu( A ) x = b.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdtrsv(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&AMAT\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdtrsv\fR
+solves an upper triangular system of linear equations.
+ 
+The rhs is the last column of the N by N+1 matrix A. The solve starts
+in the process  column owning the  Nth  column of A, so the rhs b may
+need to be moved one process column to the left at the beginning. The
+routine therefore needs  a column  vector in every process column but
+the one owning  b. The result is  replicated in all process rows, and
+returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes.
+ 
+The algorithm uses decreasing one-ring broadcast in process rows  and
+columns  implemented  in terms of  synchronous communication point to
+point primitives.  The  lookahead of depth 1 is used to minimize  the
+critical path. This entire operation is essentially ``latency'' bound
+and an estimate of its running time is given by:
+ 
+   (move rhs) lat + N / ( P bdwth ) +            
+   (solve)    ((N / NB)-1) 2 (lat + NB / bdwth) +
+              gam2 N^2 / ( P Q ),                
+ 
+where  gam2   is an estimate of the   Level 2 BLAS rate of execution.
+There are  N / NB  diagonal blocks. One must exchange  2  messages of
+length NB to compute the next  NB  entries of the vector solution, as
+well as performing a total of N^2 floating point operations.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+AMAT    (local input/output)    HPL_T_pmat *
+On entry,  AMAT  points  to the data structure containing the
+local array information.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateNN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateNN.3
new file mode 100644
index 000000000..e20929a27
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateNN.3
@@ -0,0 +1,48 @@
+.TH HPL_pdupdateNN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdupdateNN \- Broadcast a panel and update the trailing submatrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdupdateNN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdupdateNN\fR
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local output)          int *
+On exit,  IFLAG  indicates  whether or not  the broadcast has
+been completed when PBCST is not NULL on entry. In that case,
+IFLAG is left unchanged.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be updated) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to be updated  starting  at the  current
+position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp01N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateNT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateNT.3
new file mode 100644
index 000000000..276c2ceda
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateNT.3
@@ -0,0 +1,48 @@
+.TH HPL_pdupdateNT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdupdateNT \- Broadcast a panel and update the trailing submatrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdupdateNT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdupdateNT\fR
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local output)          int *
+On exit,  IFLAG  indicates  whether or not  the broadcast has
+been completed when PBCST is not NULL on entry. In that case,
+IFLAG is left unchanged.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be updated) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to be updated  starting  at the  current
+position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateTN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateTN.3
new file mode 100644
index 000000000..091859d01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateTN.3
@@ -0,0 +1,48 @@
+.TH HPL_pdupdateTN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdupdateTN \- Broadcast a panel and update the trailing submatrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdupdateTN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdupdateTN\fR
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local output)          int *
+On exit,  IFLAG  indicates  whether or not  the broadcast has
+been completed when PBCST is not NULL on entry. In that case,
+IFLAG is left unchanged.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be updated) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to be updated  starting  at the  current
+position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp01N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateTT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateTT.3
new file mode 100644
index 000000000..34502c6ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateTT.3
@@ -0,0 +1,48 @@
+.TH HPL_pdupdateTT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdupdateTT \- Broadcast a panel and update the trailing submatrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdupdateTT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdupdateTT\fR
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local output)          int *
+On exit,  IFLAG  indicates  whether or not  the broadcast has
+been completed when PBCST is not NULL on entry. In that case,
+IFLAG is left unchanged.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be updated) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to be updated  starting  at the  current
+position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_perm.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_perm.3
new file mode 100644
index 000000000..9476b5eff
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_perm.3
@@ -0,0 +1,50 @@
+.TH HPL_perm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_perm \- Combine 2 index arrays - Generate the permutation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_perm(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&int *\fR
+\fI\&LINDXA\fR,
+\fB\&int *\fR
+\fI\&LINDXAU\fR,
+\fB\&int *\fR
+\fI\&IWORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_perm\fR
+combines  two  index  arrays  and generate the corresponding
+permutation. First, this function computes the inverse of LINDXA, and
+then combine it with LINDXAU.  Second, in order to be able to perform
+the permutation in place,  LINDXAU  is overwritten by the sequence of
+permutation  producing  the  same result.  What we ultimately want to
+achieve is:  U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the
+call to this function,  this in place permutation can be performed by
+for i in [0..N) swap U[i] with U[LINDXAU[i]].
+.SH ARGUMENTS
+.TP 8
+N       (global input)          const int
+On entry,  N  specifies the length of the arrays  LINDXA  and
+LINDXAU. N should be at least zero.
+.TP 8
+LINDXA  (global input/output)   int *
+On entry,  LINDXA  is an array of dimension N  containing the
+source indexes. On exit,  LINDXA  contains the combined index
+array.
+.TP 8
+LINDXAU (global input/output)   int *
+On entry,  LINDXAU is an array of dimension N  containing the
+target indexes.  On exit,  LINDXAU  contains  the sequence of
+permutation,  that  should be applied  in increasing order to
+permute the underlying array U in place.
+.TP 8
+IWORK   (workspace)             int *
+On entry, IWORK is a workarray of dimension N.
+.SH SEE ALSO
+.BR HPL_plindx1 \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pipid.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pipid.3
new file mode 100644
index 000000000..6a8f5f277
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pipid.3
@@ -0,0 +1,79 @@
+.TH HPL_pipid 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pipid \- Simplify the pivot vector.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pipid(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&int *\fR
+\fI\&K\fR,
+\fB\&int *\fR
+\fI\&IPID\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pipid\fR
+computes an array  IPID  that contains the source and final
+destination  of  matrix rows  resulting  from  the  application  of N
+interchanges  as computed by the  LU  factorization  with row partial
+pivoting. The array IPID is such that the row of global index IPID(i)
+should be mapped onto the row of global index IPID(i+1). Note that we
+cannot really know the length of IPID a priori. However, we know that
+this array is at least 2*N long,  since  there are N rows to swap and
+broadcast. The length of this array  must be smaller than or equal to
+4*N, since every row is swapped with at most a single distinct remote
+row. The algorithm constructing  IPID  goes as follows: Let IA be the
+global index of the first row to be swapped.
+ 
+For every row src IA + i with i in [0..N) to be swapped with row  dst
+such that dst is given by DPIV[i]:
+ 
+Is row  src  the destination  of a previous row of the current block,
+that is, is there k odd such that IPID(k) is equal to src ?
+    Yes:  update  this destination  with dst.  For  example,  if  the
+pivot array is  (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5,
+we swap in fact row 0 and 5,  i.e.,  row 0 goes to 5 and not 2  as it
+was thought so far ...
+    No :  add  the pair (src,dst) at the end of IPID; row src has not
+been moved yet.
+ 
+Is row  dst  different  from src the destination of a previous row of
+the current block, i.e., is there k odd such that IPID(k) is equal to
+dst ?
+    Yes:  update  IPID(k) with src.  For example,  if the pivot array
+is (0,5)(1,1)(2,5) ... , then when  we swap rows  2 and 5, we swap in
+fact row 2 and 0,  i.e.,  row 0 goes to 2 and not 5 as it was thought
+so far ...
+    No : add  the  pair (dst,src) at the end of IPID; row dst has not
+been moved yet.
+ 
+Note that when src is equal to dst, the pair (dst,src)  should not be
+added to  IPID  in  order  to avoid duplicated entries in this array.
+During  the construction of the array  IPID,  we  make  sure that the
+first N entries are such that IPID(k) with k odd is equal to  IA+k/2.
+For k in  [0..K/2),  the  row  of global index  IPID(2*k)  should  be
+mapped onto the row of global index IPID(2*k+1).
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+K       (global output)         int *
+On exit, K specifies the number of entries in  IPID.  K is at
+least 2*N, and at most 4*N.
+.TP 8
+IPID    (global output)         int *
+On entry, IPID is an array of length 4*N.  On exit, the first
+K entries of that array contain the src and final destination
+resulting  from  the  application of the  N  interchanges  as
+specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+in [0..N)
+.SH SEE ALSO
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx0.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx0.3
new file mode 100644
index 000000000..2b889947a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx0.3
@@ -0,0 +1,168 @@
+.TH HPL_plindx0 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_plindx0 \- Compute local swapping index arrays.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_plindx0(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&K\fR,
+\fB\&int *\fR
+\fI\&IPID\fR,
+\fB\&int *\fR
+\fI\&LINDXA\fR,
+\fB\&int *\fR
+\fI\&LINDXAU\fR,
+\fB\&int *\fR
+\fI\&LLEN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_plindx0\fR
+computes two local arrays  LINDXA and  LINDXAU  containing
+the  local  source and final destination position  resulting from the
+application of row interchanges.
+ 
+On entry, the array  IPID  of length K is such that the row of global
+index  IPID(i)  should be mapped onto row of global index  IPID(i+1).
+Let  IA  be the global index of the first row to be swapped. For k in
+[0..K/2), the row of global index IPID(2*k) should be mapped onto the
+row of global index  IPID(2*k+1).  The question then, is to determine
+which rows should ultimately be part of U.
+ 
+First, some rows of the process ICURROW  may be swapped locally.  One
+of this row belongs to U, the other one belongs to my local  piece of
+A.  The other  rows of the current block are swapped with remote rows
+and are thus not part of U. These rows however should be sent  along,
+and  grabbed by the other processes  as we  progress in the  exchange
+phase.
+ 
+So, assume that I am  ICURROW  and consider a row of index  IPID(2*i)
+that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less
+than N,  this row is locally swapped and should be copied into  U  at
+the position IPID(2*i+1) - IA. No row will be exchanged for this one.
+If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be
+locally copied into my local piece of A at the position corresponding
+to the row of global index IPID(2*i+1).
+ 
+If the process  ICURROW does not own  IPID(2*i+1), then row IPID(2*i)
+is to be swapped away and strictly speaking does not belong to U, but
+to  A  remotely.  Since this  process will however send this array U,
+this row is  copied into  U, exactly where the row IPID(2*i+1) should
+go. For this, we search IPID for k1, such that IPID(2*k1) is equal to
+IPID(2*i+1); and row  IPID(2*i) is to be copied in U  at the position
+IPID(2*k1+1)-IA.
+ 
+It is thus  important to put the rows that go into U, i.e., such that
+IPID(2*i+1) - IA is less than N at the begining of the array IPID. By
+doing so,  U  is formed, and the local copy  is performed in just one
+sweep.
+ 
+Two lists  LINDXA  and  LINDXAU are built.  LINDXA contains the local
+index of the rows I have that should be copied. LINDXAU  contains the
+local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A
+is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
+of A should be locally copied into A(-LINDXAU(k),:).  In the  process
+ICURROW, the initial packing algorithm proceeds as follows.
+ 
+  for all entries in IPID,
+     if IPID(2*i) is in ICURROW,
+        if IPID(2*i+1) is in ICURROW,
+           if( IPID(2*i+1) - IA < N )
+            save corresponding local position
+            of this row (LINDXA);
+            save local position (LINDXAU) in U
+            where this row goes;
+            [copy row IPID(2*i) in U at position
+            IPID(2*i+1)-IA; ];
+           else
+            save corresponding local position of
+            this row (LINDXA);
+            save local position (-LINDXAU) in A
+            where this row goes;
+            [copy row IPID(2*i) in my piece of A
+            at IPID(2*i+1);]
+           end if
+        else
+           find k1 such that IPID(2*k1) = IPID(2*i+1);
+           copy row IPID(2*i) in U at position
+           IPID(2*k1+1)-IA;
+           save corresponding local position of this
+           row (LINDXA);
+           save local position (LINDXAU) in U where
+           this row goes;
+        end if
+     end if
+  end for
+ 
+Second, if I am not the current row process  ICURROW, all source rows
+in IPID that I own are part of U. Indeed,  they  are swapped with one
+row  of  the  current  block  of rows,  and  the  main  factorization
+algorithm proceeds one row after each other.  The processes different
+from ICURROW,  should  exchange and accumulate  those rows until they
+receive some data previously owned by the process ICURROW.
+ 
+In processes different from  ICURROW,  the  initial packing algorithm
+proceeds as follows.  Consider a row of global index IPID(2*i) that I
+own. When I will be receiving data previously owned by ICURROW, i.e.,
+U, row IPID(2*i) should  replace the row in U at pos. IPID(2*i+1)-IA,
+and  this particular row of U should be first copied into my piece of
+A, at A(il,:),  where  il is the  local row  index  corresponding  to
+IPID(2*i). Now,initially, this row will be packed into workspace, say
+as the kth row of  that  work array.  The  following  algorithm  sets
+LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row
+should be copied. LINDXA(k) stores the local index in  A  where  this
+row of U should be copied, i.e il.
+ 
+  for all entries in IPID,
+     if IPID(2*i) is not in ICURROW,
+        copy row IPID(2*i) in work array;
+        save corresponding local position
+        of this row (LINDXA);
+        save position (LINDXAU) in U where
+        this row should be copied;
+     end if
+  end for
+ 
+Since we are at it, we also globally figure  out  how many rows every
+process has. That is necessary, because it would rather be cumbersome
+to  figure it on  the fly  during the  bi-directional exchange phase.
+This information is kept in the array  LLEN  of size NPROW. Also note
+that the arrays LINDXA and LINDXAU are of max length equal to 2*N.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+K       (global input)          const int
+On entry, K specifies the number of entries in IPID.  K is at
+least 2*N, and at most 4*N.
+.TP 8
+IPID    (global input)          int *
+On entry,  IPID  is an array of length K. The first K entries
+of that array contain the src and final destination resulting
+from the application of the interchanges.
+.TP 8
+LINDXA  (local output)          int *
+On entry, LINDXA  is an array of dimension 2*N. On exit, this
+array contains the local indexes of the rows of A I have that
+should be copied into U.
+.TP 8
+LINDXAU (local output)          int *
+On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+array contains  the local destination  information encoded as
+follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+of A should be locally copied into A(-LINDXAU(k),:).
+.TP 8
+LLEN    (global output)         int *
+On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+contains how many rows every process has.
+.SH SEE ALSO
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx1.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx1.3
new file mode 100644
index 000000000..7d4f8feba
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx1.3
@@ -0,0 +1,106 @@
+.TH HPL_plindx1 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_plindx1 \- Compute local swapping index arrays.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_plindx1(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&K\fR,
+\fB\&const int *\fR
+\fI\&IPID\fR,
+\fB\&int *\fR
+\fI\&IPA\fR,
+\fB\&int *\fR
+\fI\&LINDXA\fR,
+\fB\&int *\fR
+\fI\&LINDXAU\fR,
+\fB\&int *\fR
+\fI\&IPLEN\fR,
+\fB\&int *\fR
+\fI\&IPMAP\fR,
+\fB\&int *\fR
+\fI\&IPMAPM1\fR,
+\fB\&int *\fR
+\fI\&PERMU\fR,
+\fB\&int *\fR
+\fI\&IWORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_plindx1\fR
+computes two local arrays  LINDXA and  LINDXAU  containing
+the  local  source and final destination position  resulting from the
+application of row interchanges.  In addition, this function computes
+three arrays IPLEN, IPMAP and IPMAPM1  that contain  the  logarithmic
+mapping information for the spreading phase.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+K       (global input)          const int
+On entry, K specifies the number of entries in IPID.  K is at
+least 2*N, and at most 4*N.
+.TP 8
+IPID    (global input)          const int *
+On entry,  IPID  is an array of length K. The first K entries
+of that array contain the src and final destination resulting
+from the application of the interchanges.
+.TP 8
+IPA     (global output)         int *
+On exit,  IPA  specifies  the number of rows that the current
+process row has that either belong to U  or should be swapped
+with remote rows of A.
+.TP 8
+LINDXA  (global output)         int *
+On entry, LINDXA  is an array of dimension 2*N. On exit, this
+array contains the local indexes of the rows of A I have that
+should be copied into U.
+.TP 8
+LINDXAU (global output)         int *
+On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+array contains  the local destination  information encoded as
+follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+of A should be locally copied into A(-LINDXAU(k),:).
+.TP 8
+IPLEN   (global output)         int *
+On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+this array is such that  IPLEN[i]  is the number of rows of A
+in  the  processes  before  process  IPMAP[i]  after the sort
+with the convention that IPLEN[nprow]  is the total number of
+rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+local number of rows of A that should be moved to the process
+IPMAP[i]. IPLEN is such that the number of rows of the source
+process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+remaining  entries  of  this  array  are  sorted  so that the
+quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+.TP 8
+IPMAP   (global output)         int *
+On entry, IPMAP is an array of dimension NPROW. On exit, this
+array contains  the logarithmic mapping of the processes.  In
+other words, IPMAP[myrow] is the corresponding sorted process
+coordinate.
+.TP 8
+IPMAPM1 (global output)         int *
+On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+this  array  contains  the inverse of the logarithmic mapping
+contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+[0.. NPROCS)
+.TP 8
+PERMU   (global output)         int *
+On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+contains  a sequence of permutations,  that should be applied
+in increasing order to permute in place the row panel U.
+.TP 8
+IWORK   (workspace)             int *
+On entry, IWORK is a workarray of dimension 2*JB.
+.SH SEE ALSO
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx10.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx10.3
new file mode 100644
index 000000000..d22d64f36
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx10.3
@@ -0,0 +1,68 @@
+.TH HPL_plindx10 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_plindx10 \- Compute the logarithmic maps for the spreading.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_plindx10(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&K\fR,
+\fB\&const int *\fR
+\fI\&IPID\fR,
+\fB\&int *\fR
+\fI\&IPLEN\fR,
+\fB\&int *\fR
+\fI\&IPMAP\fR,
+\fB\&int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_plindx10\fR
+computes  three arrays  IPLEN,  IPMAP  and  IPMAPM1  that
+contain the logarithmic mapping information for the spreading phase.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+K       (global input)          const int
+On entry, K specifies the number of entries in IPID.  K is at
+least 2*N, and at most 4*N.
+.TP 8
+IPID    (global input)          const int *
+On entry,  IPID  is an array of length K. The first K entries
+of that array contain the src and final destination resulting
+from the application of the interchanges.
+.TP 8
+IPLEN   (global output)         int *
+On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+this array is such that  IPLEN[i]  is the number of rows of A
+in the processes  before process IMAP[i] after the sort, with
+the convention that IPLEN[nprow] is the total number of rows.
+In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+rows of  A  that should be moved for each process.  IPLEN  is
+such that the number of rows of the source process row can be
+computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+this  array are sorted  so  that  the quantities IPLEN[i+1] -
+IPLEN[i] are logarithmically sorted.
+.TP 8
+IPMAP   (global output)         int *
+On entry, IPMAP is an array of dimension NPROW. On exit, this
+array contains  the logarithmic mapping of the processes.  In
+other words, IPMAP[myrow] is the corresponding sorted process
+coordinate.
+.TP 8
+IPMAPM1 (global output)         int *
+On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+this  array  contains  the inverse of the logarithmic mapping
+contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+[0.. NPROW)
+.SH SEE ALSO
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pnum.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pnum.3
new file mode 100644
index 000000000..38956c5a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pnum.3
@@ -0,0 +1,38 @@
+.TH HPL_pnum 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pnum \- Rank determination.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_pnum(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&const int\fR
+\fI\&MYROW\fR,
+\fB\&const int\fR
+\fI\&MYCOL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pnum\fR
+determines  the  rank  of a  process  as a function  of  its
+coordinates in the grid.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+MYROW   (local input)           const int
+On entry,  MYROW  specifies the row coordinate of the process
+whose rank is to be determined. MYROW must be greater than or
+equal to zero and less than NPROW.
+.TP 8
+MYCOL   (local input)           const int
+On entry,  MYCOL  specifies  the  column  coordinate  of  the
+process whose rank is to be determined. MYCOL must be greater
+than or equal to zero and less than NPCOL.
+.SH SEE ALSO
+.BR HPL_grid_init \ (3),
+.BR HPL_grid_info \ (3),
+.BR HPL_grid_exit \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer.3
new file mode 100644
index 000000000..550703aee
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer.3
@@ -0,0 +1,35 @@
+.TH HPL_ptimer 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_ptimer \- Timer facility.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_ptimer(\fR
+\fB\&const int\fR
+\fI\&I\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_ptimer\fR
+provides a  "stopwatch"  functionality  cpu/wall  timer in
+seconds.  Up to  64  separate timers can be functioning at once.  The
+first call starts the timer,  and the second stops it.  This  routine
+can be disenabled  by calling HPL_ptimer_disable(),  so that calls to
+the timer are ignored.  This feature can be used to make sure certain
+sections of code do not affect timings,  even  if  they call routines
+which have HPL_ptimer calls in them. HPL_ptimer_enable()  will enable
+the  timer  functionality.  One  can retrieve  the current value of a
+timer by calling
+ 
+t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ 
+where  I  is the timer index in  [0..64).  To  inititialize the timer
+functionality, one must have called HPL_ptimer_boot() prior to any of
+the functions mentioned above.
+.SH ARGUMENTS
+.TP 8
+I       (global input)          const int
+On entry, I specifies the timer to stop/start.
+.SH SEE ALSO
+.BR HPL_ptimer_cputime \ (3),
+.BR HPL_ptimer_walltime \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer_cputime.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer_cputime.3
new file mode 100644
index 000000000..a93a1c208
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer_cputime.3
@@ -0,0 +1,23 @@
+.TH HPL_ptimer_cputime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_ptimer_cputime \- Return the CPU time.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_ptimer_cputime();\fR
+.SH DESCRIPTION
+\fB\&HPL_ptimer_cputime\fR
+returns the cpu time. If HPL_USE_CLOCK is defined,
+the  clock() function is used to return an approximation of processor
+time used by the program.  The value returned is the CPU time used so
+far as a clock_t;  to get the number of seconds used,  the result  is
+divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+standard library.  If  HPL_USE_TIMES is defined, the times() function
+is used instead.  This  function  returns  the current process times.
+times() returns the number of clock ticks that have elapsed since the
+system has been up.  Otherwise and by default,  the  standard library
+function getrusage() is used.
+.SH SEE ALSO
+.BR HPL_ptimer_walltime \ (3),
+.BR HPL_ptimer \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer_walltime.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer_walltime.3
new file mode 100644
index 000000000..37e5e8c54
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer_walltime.3
@@ -0,0 +1,14 @@
+.TH HPL_ptimer_walltime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_ptimer_walltime \- Return the elapsed (wall-clock) time.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_ptimer_walltime();\fR
+.SH DESCRIPTION
+\fB\&HPL_ptimer_walltime\fR
+returns the elapsed (wall-clock) time.
+.SH SEE ALSO
+.BR HPL_ptimer_cputime \ (3),
+.BR HPL_ptimer \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pwarn.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pwarn.3
new file mode 100644
index 000000000..14e4a65d3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pwarn.3
@@ -0,0 +1,45 @@
+.TH HPL_pwarn 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pwarn \- displays an error message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pwarn(\fR
+\fB\&FILE *\fR
+\fI\&STREAM\fR,
+\fB\&int\fR
+\fI\&LINE\fR,
+\fB\&const char *\fR
+\fI\&SRNAME\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pwarn\fR
+displays an error message.
+.SH ARGUMENTS
+.TP 8
+STREAM  (local input)           FILE *
+On entry, STREAM specifies the output stream.
+.TP 8
+LINE    (local input)           int
+On entry,  LINE  specifies the line  number in the file where
+the  error  has  occured.  When  LINE  is not a positive line
+number, it is ignored.
+.TP 8
+SRNAME  (local input)           const char *
+On entry, SRNAME  should  be the name of the routine  calling
+this error handler.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH SEE ALSO
+.BR HPL_pabort \ (3),
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rand.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rand.3
new file mode 100644
index 000000000..8b1918fea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rand.3
@@ -0,0 +1,28 @@
+.TH HPL_rand 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_rand \- random number generator.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_rand();\fR
+.SH DESCRIPTION
+\fB\&HPL_rand\fR
+generates  the next number  in the  random  sequence.  This
+function  ensures  that this number lies in the interval (-0.5, 0.5].
+ 
+The static array irand contains the information (2 integers) required
+to generate the  next number  in the sequence  X(n).  This  number is
+computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5,  where the
+constant d is the largest 64 bit positive integer. The array irand is
+then  updated  for the generation of the next number  X(n+1)  in  the
+random sequence as follows X(n+1) = a * X(n) + c. The constants a and
+c  should have been preliminarily stored in the arrays ias and ics as
+2 pairs of integers.  The initialization of  ias,  ics and  irand  is
+performed by the function HPL_setran.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_recv.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_recv.3
new file mode 100644
index 000000000..d9136c14b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_recv.3
@@ -0,0 +1,49 @@
+.TH HPL_recv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_recv \- Receive a message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_recv(\fR
+\fB\&double *\fR
+\fI\&RBUF\fR,
+\fB\&int\fR
+\fI\&RCOUNT\fR,
+\fB\&int\fR
+\fI\&SRC\fR,
+\fB\&int\fR
+\fI\&RTAG\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_recv\fR
+is a simple wrapper around  MPI_Recv.  Its  main  purpose is
+to  allow for some  experimentation / tuning  of this simple routine.
+Successful  completion  is  indicated  by  the  returned  error  code
+HPL_SUCCESS.  In the case of messages of length less than or equal to
+zero, this function returns immediately.
+.SH ARGUMENTS
+.TP 8
+RBUF    (local output)          double *
+On entry, RBUF specifies the starting address of buffer to be
+received.
+.TP 8
+RCOUNT  (local input)           int
+On entry,  RCOUNT  specifies  the number  of double precision
+entries in RBUF. RCOUNT must be at least zero.
+.TP 8
+SRC     (local input)           int
+On entry, SRC  specifies the rank of the  sending  process in
+the communication space defined by COMM.
+.TP 8
+RTAG    (local input)           int
+On entry,  STAG specifies the message tag to be used for this
+communication operation.
+.TP 8
+COMM    (local input)           MPI_Comm
+The MPI communicator identifying the communication space.
+.SH SEE ALSO
+.BR HPL_send \ (3),
+.BR HPL_sendrecv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_reduce.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_reduce.3
new file mode 100644
index 000000000..c48f04ded
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_reduce.3
@@ -0,0 +1,56 @@
+.TH HPL_reduce 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_reduce \- Reduce operation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_reduce(\fR
+\fB\&void *\fR
+\fI\&BUFFER\fR,
+\fB\&const int\fR
+\fI\&COUNT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR,
+\fB\&const HPL_T_OP \fR
+\fI\&OP\fR,
+\fB\&const int\fR
+\fI\&ROOT\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_reduce\fR
+performs a global reduce operation across all processes of
+a group.  Note that the input buffer is  used as workarray and in all
+processes but the accumulating process corrupting the original data.
+.SH ARGUMENTS
+.TP 8
+BUFFER  (local input/output)    void *
+On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+exit,  and  in process of rank  ROOT  this array contains the
+reduced data.  This  buffer  is also used as workspace during
+the operation in the other processes of the group.
+.TP 8
+COUNT   (global input)          const int
+On entry,  COUNT  indicates the number of entries in  BUFFER.
+COUNT must be at least zero.
+.TP 8
+DTYPE   (global input)          const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.TP 8
+OP      (global input)          const HPL_T_OP 
+On entry, OP is a pointer to the local combine function.
+.TP 8
+ROOT    (global input)          const int
+On entry, ROOT is the coordinate of the accumulating process.
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rollN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rollN.3
new file mode 100644
index 000000000..eac4deb66
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rollN.3
@@ -0,0 +1,77 @@
+.TH HPL_rollN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_rollN \- Roll U and forward the column panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_rollN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_rollN\fR
+rolls the local arrays containing the local pieces of U, so
+that on exit to this function  U  is replicated in every process row.
+In addition, this function probe for the presence of the column panel
+and forwards it when available.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be rolled) information.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the number of columns of  U.  N must be
+at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U in each process row.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least  MAX(1,IPLEN[NPROW]).
+.TP 8
+IPLEN   (global input)          const int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in each process row.
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IMAP  is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words,  IMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry,  IMAPM1  is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+.SH SEE ALSO
+.BR HPL_pdlaswp01N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rollT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rollT.3
new file mode 100644
index 000000000..bab5bdffd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rollT.3
@@ -0,0 +1,77 @@
+.TH HPL_rollT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_rollT \- Roll U and forward the column panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_rollT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_rollT\fR
+rolls the local arrays containing the local pieces of U, so
+that on exit to this function  U  is replicated in every process row.
+In addition, this function probe for the presence of the column panel
+and forwards it when available.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be rolled) information.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the local number of rows of  U.  N must
+be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U in each process row.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least  MAX(1,N).
+.TP 8
+IPLEN   (global input)          const int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in each process row.
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IMAP  is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words,  IMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry,  IMAPM1  is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+.SH SEE ALSO
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_sdrv.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_sdrv.3
new file mode 100644
index 000000000..a11252d6a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_sdrv.3
@@ -0,0 +1,67 @@
+.TH HPL_sdrv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_sdrv \- Send and receive a message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_sdrv(\fR
+\fB\&double *\fR
+\fI\&SBUF\fR,
+\fB\&int\fR
+\fI\&SCOUNT\fR,
+\fB\&int\fR
+\fI\&STAG\fR,
+\fB\&double *\fR
+\fI\&RBUF\fR,
+\fB\&int\fR
+\fI\&RCOUNT\fR,
+\fB\&int\fR
+\fI\&RTAG\fR,
+\fB\&int\fR
+\fI\&PARTNER\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_sdrv\fR
+is a simple wrapper around MPI_Sendrecv. Its main purpose is
+to allow for some experimentation and tuning of this simple function.
+Messages  of  length  less than  or  equal to zero  are not sent  nor
+received.  Successful completion  is  indicated by the returned error
+code HPL_SUCCESS.
+.SH ARGUMENTS
+.TP 8
+SBUF    (local input)           double *
+On entry, SBUF specifies the starting address of buffer to be
+sent.
+.TP 8
+SCOUNT  (local input)           int
+On entry,  SCOUNT  specifies  the number  of double precision
+entries in SBUF. SCOUNT must be at least zero.
+.TP 8
+STAG    (local input)           int
+On entry,  STAG  specifies the message tag to be used for the
+sending communication operation.
+.TP 8
+RBUF    (local output)          double *
+On entry, RBUF specifies the starting address of buffer to be
+received.
+.TP 8
+RCOUNT  (local input)           int
+On entry,  RCOUNT  specifies  the number  of double precision
+entries in RBUF. RCOUNT must be at least zero.
+.TP 8
+RTAG    (local input)           int
+On entry,  RTAG  specifies the message tag to be used for the
+receiving communication operation.
+.TP 8
+PARTNER (local input)           int
+On entry,  PARTNER  specifies  the rank of the  collaborative
+process in the communication space defined by COMM.
+.TP 8
+COMM    (local input)           MPI_Comm
+The MPI communicator identifying the communication space.
+.SH SEE ALSO
+.BR HPL_send \ (3),
+.BR HPL_recv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_send.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_send.3
new file mode 100644
index 000000000..48ffc5d62
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_send.3
@@ -0,0 +1,49 @@
+.TH HPL_send 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_send \- Send a message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_send(\fR
+\fB\&double *\fR
+\fI\&SBUF\fR,
+\fB\&int\fR
+\fI\&SCOUNT\fR,
+\fB\&int\fR
+\fI\&DEST\fR,
+\fB\&int\fR
+\fI\&STAG\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_send\fR
+is a simple wrapper around  MPI_Send.  Its  main  purpose is
+to  allow for some  experimentation / tuning  of this simple routine.
+Successful  completion  is  indicated  by  the  returned  error  code
+MPI_SUCCESS.  In the case of messages of length less than or equal to
+zero, this function returns immediately.
+.SH ARGUMENTS
+.TP 8
+SBUF    (local input)           double *
+On entry, SBUF specifies the starting address of buffer to be
+sent.
+.TP 8
+SCOUNT  (local input)           int
+On entry,  SCOUNT  specifies  the number of  double precision
+entries in SBUF. SCOUNT must be at least zero.
+.TP 8
+DEST    (local input)           int
+On entry, DEST specifies the rank of the receiving process in
+the communication space defined by COMM.
+.TP 8
+STAG    (local input)           int
+On entry,  STAG specifies the message tag to be used for this
+communication operation.
+.TP 8
+COMM    (local input)           MPI_Comm
+The MPI communicator identifying the communication space.
+.SH SEE ALSO
+.BR HPL_recv \ (3),
+.BR HPL_sendrecv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_setran.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_setran.3
new file mode 100644
index 000000000..e9a9433ae
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_setran.3
@@ -0,0 +1,37 @@
+.TH HPL_setran 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_setran \- Manage the random number generator.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_setran(\fR
+\fB\&const int\fR
+\fI\&OPTION\fR,
+\fB\&int *\fR
+\fI\&IRAN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_setran\fR
+initializes  the random generator with the encoding of the
+first number X(0) in the sequence,  and the constants a and c used to
+compute the next element in the sequence: X(n+1) = a*X(n) + c.  X(0),
+a and c are stored in the static variables  irand, ias and ics.  When
+OPTION is 0 (resp. 1 and 2),  irand  (resp. ia and ic)  is set to the
+values of the input array IRAN.  When OPTION is 3, IRAN is set to the
+current value of irand, and irand is then incremented.
+.SH ARGUMENTS
+.TP 8
+OPTION  (local input)           const int
+On entry, OPTION  is an integer that specifies the operations
+to be performed on the random generator as specified above.
+.TP 8
+IRAN    (local input/output)    int *
+On entry,  IRAN is an array of dimension 2, that contains the
+16-lower and 15-higher bits of a random number.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_spreadN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_spreadN.3
new file mode 100644
index 000000000..452b8da34
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_spreadN.3
@@ -0,0 +1,96 @@
+.TH HPL_spreadN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_spreadN \- Spread row panel U and forward current column panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_spreadN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const enum HPL_SIDE\fR
+\fI\&SIDE\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int\fR
+\fI\&SRCDIST\fR,
+\fB\&const int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_spreadN\fR
+spreads the local array containing local pieces of U, so
+that on exit to this function,  a piece of  U  is contained in every
+process row. The array IPLEN contains the number of rows of U,  that
+should be spread on any given process row. This function also probes
+for the presence of the column panel PBCST. In case of success, this
+panel will be forwarded.  If  PBCST  is NULL on input,  this probing
+mechanism will be disabled.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be spread) information.
+.TP 8
+SIDE    (global input)          const enum HPL_SIDE
+On entry, SIDE specifies whether the local piece of U located
+in process IPMAP[SRCDIST] should be spread to the right or to
+the left. This feature is used by the equilibration process.
+.TP 8
+N       (global input)          const int
+On entry,  N  specifies  the  local number of columns of U. N
+must be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least MAX(1,IPLEN[nprow]).
+.TP 8
+SRCDIST (local input)           const int
+On entry,  SRCDIST  specifies the source process that spreads
+its piece of U.
+.TP 8
+IPLEN   (global input)          const int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in each process before process IPMAP[i], with the  convention
+that IPLEN[nprow] is the total number of rows. In other words
+IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+should be moved to process IPMAP[i].
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IPMAP is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words, IPMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry,  IPMAPM1 is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+.SH SEE ALSO
+.BR HPL_pdlaswp01N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_spreadT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_spreadT.3
new file mode 100644
index 000000000..54f7dda31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_spreadT.3
@@ -0,0 +1,96 @@
+.TH HPL_spreadT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_spreadT \- Spread row panel U and forward current column panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_spreadT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const enum HPL_SIDE\fR
+\fI\&SIDE\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int\fR
+\fI\&SRCDIST\fR,
+\fB\&const int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_spreadT\fR
+spreads  the local array containing local pieces of U, so
+that on exit to this function,  a piece of  U  is contained in every
+process row.  The array  IPLEN  contains the number of columns of U,
+that should be spread on any given process row.  This function  also
+probes for the presence of  the column panel  PBCST.  If  available,
+this  panel will be forwarded.  If  PBCST  is  NULL  on input,  this
+probing mechanism will be disabled.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be spread) information.
+.TP 8
+SIDE    (global input)          const enum HPL_SIDE
+On entry, SIDE specifies whether the local piece of U located
+in process IPMAP[SRCDIST] should be spread to the right or to
+the left. This feature is used by the equilibration process.
+.TP 8
+N       (global input)          const int
+On entry,  N  specifies the local number of rows of U. N must
+be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least MAX(1,N).
+.TP 8
+SRCDIST (local input)           const int
+On entry,  SRCDIST  specifies the source process that spreads
+its piece of U.
+.TP 8
+IPLEN   (global input)          const int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in each process before process IPMAP[i], with the  convention
+that IPLEN[nprow] is the total number of rows. In other words
+IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+should be moved to process IPMAP[i].
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IPMAP is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words, IPMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry,  IPMAPM1 is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+.SH SEE ALSO
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_sum.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_sum.3
new file mode 100644
index 000000000..a3c4e2190
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_sum.3
@@ -0,0 +1,44 @@
+.TH HPL_sum 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_sum \- Combine (sum) two buffers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_sum(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const void *\fR
+\fI\&IN\fR,
+\fB\&void *\fR
+\fI\&INOUT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_sum\fR
+combines (sum) two buffers.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies  the  length  of  the  buffers  to  be
+combined. N must be at least zero.
+.TP 8
+IN      (input)                 const void *
+On entry, IN points to the input-only buffer to be combined.
+.TP 8
+INOUT   (input/output)          void *
+On entry, INOUT  points  to  the  input-output  buffer  to be
+combined.  On exit,  the  entries of this array contains  the
+combined results.
+.TP 8
+DTYPE   (input)                 const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer.3
new file mode 100644
index 000000000..61f3f7cb1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer.3
@@ -0,0 +1,35 @@
+.TH HPL_timer 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_timer \- Timer facility.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_timer(\fR
+\fB\&const int\fR
+\fI\&I\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_timer\fR
+provides a  "stopwatch"  functionality  cpu/wall  timer  in
+seconds.  Up to  64  separate timers can be functioning at once.  The
+first call starts the timer,  and the second stops it.  This  routine
+can be disenabled  by calling  HPL_timer_disable(),  so that calls to
+the timer are ignored.  This feature can be used to make sure certain
+sections of code do not affect timings,  even  if  they call routines
+which have HPL_timer calls in them. HPL_timer_enable() will re-enable
+the  timer  functionality.  One  can retrieve  the current value of a
+timer by calling
+ 
+t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ 
+where  I  is the timer index in  [0..64).  To  initialize  the  timer
+functionality, one must have called HPL_timer_boot()  prior to any of
+the functions mentioned above.
+.SH ARGUMENTS
+.TP 8
+I       (global input)          const int
+On entry, I specifies the timer to stop/start.
+.SH SEE ALSO
+.BR HPL_timer_cputime \ (3),
+.BR HPL_timer_walltime \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer_cputime.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer_cputime.3
new file mode 100644
index 000000000..1f8987ca2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer_cputime.3
@@ -0,0 +1,23 @@
+.TH HPL_timer_cputime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_timer_cputime \- Return the CPU time.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_timer_cputime();\fR
+.SH DESCRIPTION
+\fB\&HPL_timer_cputime\fR
+returns the cpu time.  If HPL_USE_CLOCK is defined,
+the  clock() function is used to return an approximation of processor
+time used by the program.  The value returned is the CPU time used so
+far as a clock_t;  to get the number of seconds used,  the result  is
+divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+standard library.  If  HPL_USE_TIMES is defined, the times() function
+is used instead.  This  function  returns  the current process times.
+times() returns the number of clock ticks that have elapsed since the
+system has been up.  Otherwise and by default,  the  standard library
+function getrusage() is used.
+.SH SEE ALSO
+.BR HPL_timer_walltime \ (3),
+.BR HPL_timer \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer_walltime.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer_walltime.3
new file mode 100644
index 000000000..9a6e898e7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer_walltime.3
@@ -0,0 +1,14 @@
+.TH HPL_timer_walltime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_timer_walltime \- Return the elapsed (wall-clock) time.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_timer_walltime();\fR
+.SH DESCRIPTION
+\fB\&HPL_timer_walltime\fR
+returns the elapsed (wall-clock) time.
+.SH SEE ALSO
+.BR HPL_timer_cputime \ (3),
+.BR HPL_timer \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_warn.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_warn.3
new file mode 100644
index 000000000..6b051acb3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_warn.3
@@ -0,0 +1,59 @@
+.TH HPL_warn 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_warn \- displays an error message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_warn(\fR
+\fB\&FILE *\fR
+\fI\&STREAM\fR,
+\fB\&int\fR
+\fI\&LINE\fR,
+\fB\&const char *\fR
+\fI\&SRNAME\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_warn\fR
+displays an error message.
+.SH ARGUMENTS
+.TP 8
+STREAM  (local input)           FILE *
+On entry, STREAM specifies the output stream.
+.TP 8
+LINE    (local input)           int
+On entry,  LINE  specifies the line  number in the file where
+the  error  has  occured.  When  LINE  is not a positive line
+number, it is ignored.
+.TP 8
+SRNAME  (local input)           const char *
+On entry, SRNAME  should  be the name of the routine  calling
+this error handler.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   HPL_warn( stderr, __LINE__, __FILE__,
+.br
+             "Demo.\en" );
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_abort \ (3),
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_xjumpm.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_xjumpm.3
new file mode 100644
index 000000000..df3e0a954
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_xjumpm.3
@@ -0,0 +1,77 @@
+.TH HPL_xjumpm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_xjumpm \- Compute constants to jump in the random sequence.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_xjumpm(\fR
+\fB\&const int\fR
+\fI\&JUMPM\fR,
+\fB\&int *\fR
+\fI\&MULT\fR,
+\fB\&int *\fR
+\fI\&IADD\fR,
+\fB\&int *\fR
+\fI\&IRANN\fR,
+\fB\&int *\fR
+\fI\&IRANM\fR,
+\fB\&int *\fR
+\fI\&IAM\fR,
+\fB\&int *\fR
+\fI\&ICM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_xjumpm\fR
+computes  the constants  A and C  to jump JUMPM numbers in
+the random sequence: X(n+JUMPM) = A*X(n)+C.  The constants encoded in
+MULT and IADD  specify  how to jump from one entry in the sequence to
+the next.
+.SH ARGUMENTS
+.TP 8
+JUMPM   (local input)           const int
+On entry,  JUMPM  specifies  the  number  of entries  in  the
+sequence to jump over. When JUMPM is less or equal than zero,
+A and C are not computed, IRANM is set to IRANN corresponding
+to a jump of size zero.
+.TP 8
+MULT    (local input)           int *
+On entry, MULT is an array of dimension 2,  that contains the
+16-lower  and 15-higher bits of the constant  a  to jump from
+X(n) to X(n+1) = a*X(n) + c in the random sequence.
+.TP 8
+IADD    (local input)           int *
+On entry, IADD is an array of dimension 2,  that contains the
+16-lower  and 15-higher bits of the constant  c  to jump from
+X(n) to X(n+1) = a*X(n) + c in the random sequence.
+.TP 8
+IRANN   (local input)           int *
+On entry, IRANN is an array of dimension 2. that contains the
+16-lower and 15-higher bits of the encoding of X(n).
+.TP 8
+IRANM   (local output)          int *
+On entry,  IRANM  is an array of dimension 2.   On exit, this
+array  contains respectively  the 16-lower and 15-higher bits
+of the encoding of X(n+JUMPM).
+.TP 8
+IAM     (local output)          int *
+On entry, IAM is an array of dimension 2. On exit, when JUMPM
+is  greater  than  zero,  this  array  contains  the  encoded
+constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+sequence. IAM(0:1)  contains  respectively  the  16-lower and
+15-higher  bits  of this constant  A. When  JUMPM  is less or
+equal than zero, this array is not referenced.
+.TP 8
+ICM     (local output)          int *
+On entry, ICM is an array of dimension 2. On exit, when JUMPM
+is  greater  than  zero,  this  array  contains  the  encoded
+constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+sequence. ICM(0:1)  contains  respectively  the  16-lower and
+15-higher  bits  of this constant  C. When  JUMPM  is less or
+equal than zero, this array is not referenced.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/missing b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/missing
new file mode 100755
index 000000000..625aeb118
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/missing
@@ -0,0 +1,215 @@
+#! /bin/sh
+# Common wrapper for a few potentially missing GNU programs.
+
+scriptversion=2018-03-07.03; # UTC
+
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+if test $# -eq 0; then
+  echo 1>&2 "Try '$0 --help' for more information"
+  exit 1
+fi
+
+case $1 in
+
+  --is-lightweight)
+    # Used by our autoconf macros to check whether the available missing
+    # script is modern enough.
+    exit 0
+    ;;
+
+  --run)
+    # Back-compat with the calling convention used by older automake.
+    shift
+    ;;
+
+  -h|--h|--he|--hel|--help)
+    echo "\
+$0 [OPTION]... PROGRAM [ARGUMENT]...
+
+Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due
+to PROGRAM being missing or too old.
+
+Options:
+  -h, --help      display this help and exit
+  -v, --version   output version information and exit
+
+Supported PROGRAM values:
+  aclocal   autoconf  autoheader   autom4te  automake  makeinfo
+  bison     yacc      flex         lex       help2man
+
+Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
+'g' are ignored when checking the name.
+
+Send bug reports to <bug-automake@gnu.org>."
+    exit $?
+    ;;
+
+  -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
+    echo "missing $scriptversion (GNU Automake)"
+    exit $?
+    ;;
+
+  -*)
+    echo 1>&2 "$0: unknown '$1' option"
+    echo 1>&2 "Try '$0 --help' for more information"
+    exit 1
+    ;;
+
+esac
+
+# Run the given program, remember its exit status.
+"$@"; st=$?
+
+# If it succeeded, we are done.
+test $st -eq 0 && exit 0
+
+# Also exit now if we it failed (or wasn't found), and '--version' was
+# passed; such an option is passed most likely to detect whether the
+# program is present and works.
+case $2 in --version|--help) exit $st;; esac
+
+# Exit code 63 means version mismatch.  This often happens when the user
+# tries to use an ancient version of a tool on a file that requires a
+# minimum version.
+if test $st -eq 63; then
+  msg="probably too old"
+elif test $st -eq 127; then
+  # Program was missing.
+  msg="missing on your system"
+else
+  # Program was found and executed, but failed.  Give up.
+  exit $st
+fi
+
+perl_URL=https://www.perl.org/
+flex_URL=https://github.com/westes/flex
+gnu_software_URL=https://www.gnu.org/software
+
+program_details ()
+{
+  case $1 in
+    aclocal|automake)
+      echo "The '$1' program is part of the GNU Automake package:"
+      echo "<$gnu_software_URL/automake>"
+      echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:"
+      echo "<$gnu_software_URL/autoconf>"
+      echo "<$gnu_software_URL/m4/>"
+      echo "<$perl_URL>"
+      ;;
+    autoconf|autom4te|autoheader)
+      echo "The '$1' program is part of the GNU Autoconf package:"
+      echo "<$gnu_software_URL/autoconf/>"
+      echo "It also requires GNU m4 and Perl in order to run:"
+      echo "<$gnu_software_URL/m4/>"
+      echo "<$perl_URL>"
+      ;;
+  esac
+}
+
+give_advice ()
+{
+  # Normalize program name to check for.
+  normalized_program=`echo "$1" | sed '
+    s/^gnu-//; t
+    s/^gnu//; t
+    s/^g//; t'`
+
+  printf '%s\n' "'$1' is $msg."
+
+  configure_deps="'configure.ac' or m4 files included by 'configure.ac'"
+  case $normalized_program in
+    autoconf*)
+      echo "You should only need it if you modified 'configure.ac',"
+      echo "or m4 files included by it."
+      program_details 'autoconf'
+      ;;
+    autoheader*)
+      echo "You should only need it if you modified 'acconfig.h' or"
+      echo "$configure_deps."
+      program_details 'autoheader'
+      ;;
+    automake*)
+      echo "You should only need it if you modified 'Makefile.am' or"
+      echo "$configure_deps."
+      program_details 'automake'
+      ;;
+    aclocal*)
+      echo "You should only need it if you modified 'acinclude.m4' or"
+      echo "$configure_deps."
+      program_details 'aclocal'
+      ;;
+   autom4te*)
+      echo "You might have modified some maintainer files that require"
+      echo "the 'autom4te' program to be rebuilt."
+      program_details 'autom4te'
+      ;;
+    bison*|yacc*)
+      echo "You should only need it if you modified a '.y' file."
+      echo "You may want to install the GNU Bison package:"
+      echo "<$gnu_software_URL/bison/>"
+      ;;
+    lex*|flex*)
+      echo "You should only need it if you modified a '.l' file."
+      echo "You may want to install the Fast Lexical Analyzer package:"
+      echo "<$flex_URL>"
+      ;;
+    help2man*)
+      echo "You should only need it if you modified a dependency" \
+           "of a man page."
+      echo "You may want to install the GNU Help2man package:"
+      echo "<$gnu_software_URL/help2man/>"
+    ;;
+    makeinfo*)
+      echo "You should only need it if you modified a '.texi' file, or"
+      echo "any other file indirectly affecting the aspect of the manual."
+      echo "You might want to install the Texinfo package:"
+      echo "<$gnu_software_URL/texinfo/>"
+      echo "The spurious makeinfo call might also be the consequence of"
+      echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might"
+      echo "want to install GNU make:"
+      echo "<$gnu_software_URL/make/>"
+      ;;
+    *)
+      echo "You might have modified some files without having the proper"
+      echo "tools for further handling them.  Check the 'README' file, it"
+      echo "often tells you about the needed prerequisites for installing"
+      echo "this package.  You may also peek at any GNU archive site, in"
+      echo "case some other package contains this missing '$1' program."
+      ;;
+  esac
+}
+
+give_advice "$1" | sed -e '1s/^/WARNING: /' \
+                       -e '2,$s/^/         /' >&2
+
+# Propagate the correct exit status (expected to be 127 for a program
+# not found, 63 for a program that failed due to version mismatch).
+exit $st
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS
new file mode 100644
index 000000000..056fd81ba
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = FreeBSD_PIV_CBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpich
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a $(MPdir)/lib/libpmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/share/ATLAS/lib/FreeBSD_P5SSE2
+LAinc        =
+LAlib        = $(LAdir)/libcblas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = /usr/bin/f77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = /usr/bin/ranlib
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.HPUX_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.HPUX_FBLAS
new file mode 100644
index 000000000..af3f5da5f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.HPUX_FBLAS
@@ -0,0 +1,179 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = HPUX
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - MPI directories - library ------------------------------------------
+# ----------------------------------------------------------------------
+# MPIinc tells the C compiler where to find the MPI header files, MPIlib
+# is defined to be the name of the MPI library to be used. The variables
+# MPIdir and MPIplat are only used for defining MPIinc and MPIlib).
+#
+MPIdir       = $(HOME)/local/mpi
+MPIplat      = $(MPIdir)/hpux/ch_p4
+#
+MPIinc       = -I$(MPIdir)/include -I$(MPIplat)/include
+MPIlib       = $(MPIplat)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - BLAS library -------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+BLASlib      = /usr/lib/pa1.1/libblas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate 
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. 
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a 
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form: 
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses 
+#                       Cray  fcd  (fortran  character  descriptor)  for 
+#                       interoperation. 
+#
+F2CDEFS      = -DNoChange -DF77_INTEGER=int -DStringSunStyle 
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(MPIinc)
+HPL_LIBS     = $(HPLlib) $(BLASlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS F77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(HPL_INCLUDES) $(F2CDEFS) $(HPL_OPTS)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -D_INCLUDE_POSIX_SOURCE -DUseTimes -Aa +O4
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = cc
+LINKFLAGS    = -Aa
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.I860_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.I860_FBLAS
new file mode 100644
index 000000000..984236be2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.I860_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = I860_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        = -lmpi
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lkmath
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS) -nx
+CCFLAGS      = $(HPL_DEFS) -O4 -nx
+#
+LINKER       = f77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.IRIX_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.IRIX_FBLAS
new file mode 100644
index 000000000..d78bcf09f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.IRIX_FBLAS
@@ -0,0 +1,181 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = IRIX_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = $(HOME)/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/IRIX64/ch_p4/include
+MPlib        = $(MPdir)/IRIX64/ch_p4/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lblas
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DStringSunStyle -DF77_INTEGER=int
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS) -64
+CCFLAGS      = $(HPL_DEFS) -O3 -64 -OPT:Olimit=15000 -TARG:platform=IP30 \
+               -LNO:blocking=OFF -LOPT:alias=typed
+#
+LINKER       = cc
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS
new file mode 100644
index 000000000..624306902
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_ATHLON_CBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - MPI directories - library ------------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_ATHLON
+LAinc        =
+LAlib        = $(LAdir)/libcblas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the Fortran 77 BLAS interface
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+LINKER       = /usr/bin/gcc
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS
new file mode 100644
index 000000000..07985f781
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_ATHLON_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be 
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be 
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_ATHLON
+LAinc        =
+LAlib        = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) 
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+LINKER       = /usr/bin/g77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL
new file mode 100644
index 000000000..ddf3fb4b6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_ATHLON_VSIPL
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - MPI directories - library ------------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = /home/software/TASP_VSIPL_Core_Plus
+LAinc        = -I$(LAdir)/include
+LAlib        = $(LAdir)/lib/libvsip_c.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the Fortran 77 BLAS interface
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_VSIPL
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+LINKER       = /usr/bin/gcc
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_Intel64 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_Intel64
new file mode 100644
index 000000000..47661c25d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_Intel64
@@ -0,0 +1,193 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -fs
+MKDIR        = mkdir -p
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_Intel64
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+# MPdir        = /opt/intel/mpi/4.1.0
+# MPinc        = -I$(MPdir)/include64
+# MPlib        = $(MPdir)/lib64/libmpi.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(MKLROOT)
+ifndef  LAinc
+LAinc        = $(LAdir)/mkl/include
+endif
+ifndef  LAlib
+LAlib        = -L$(LAdir)/mkl/lib/intel64 \
+               -Wl,--start-group \
+               $(LAdir)/lib/intel64/libmkl_intel_lp64.a \
+               $(LAdir)/lib/intel64/libmkl_intel_thread.a \
+               $(LAdir)/lib/intel64/libmkl_core.a \
+               -Wl,--end-group -lpthread -ldl
+endif
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) -I$(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC       = mpiicc
+CCNOOPT  = $(HPL_DEFS)
+OMP_DEFS = -openmp
+CCFLAGS  = $(HPL_DEFS) -O3 -w -ansi-alias -i-static -z noexecstack -z relro -z now -nocompchk -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = $(CC)
+LINKFLAGS    = $(CCFLAGS) $(OMP_DEFS) -mt_mpi
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_CBLAS
new file mode 100644
index 000000000..535a0e214
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_CBLAS
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_CBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_PII
+LAinc        =
+LAlib        = $(LAdir)/libcblas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = /usr/bin/g77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm
new file mode 100644
index 000000000..31fc9ea74
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_CBLAS_gm
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_PII
+LAinc        =
+LAlib        = $(LAdir)/libcblas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpicc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = mpif77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_FBLAS
new file mode 100644
index 000000000..5ed9aac12
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_FBLAS
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_PII
+LAinc        =
+LAlib        = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = /usr/bin/g77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm
new file mode 100644
index 000000000..a2416396c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_FBLAS_gm
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_PII
+LAinc        =
+LAlib        = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpicc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = mpif77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_VSIPL
new file mode 100644
index 000000000..0f690a1b3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_VSIPL
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_VSIPL
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = /home/software/TASP_VSIPL_Core_Plus
+LAinc        = -I$(LAdir)/include
+LAlib        = $(LAdir)/lib/libvsip_c.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_VSIPL
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = /usr/bin/g77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm
new file mode 100644
index 000000000..fee265e46
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_VSIPL_gm
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = /home/software/TASP_VSIPL_Core_Plus
+LAinc        = -I$(LAdir)/include
+LAlib        = $(LAdir)/lib/libvsip_c.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_VSIPL
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpicc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = mpif77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.MacOSX_Accelerate b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.MacOSX_Accelerate
new file mode 100644
index 000000000..d1ce69b64
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.MacOSX_Accelerate
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -fs
+MKDIR        = mkdir -p
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = MacOSX_Accelerate
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+# MPdir        = /opt/intel/mpi/4.1.0
+# MPinc        = -I$(MPdir)/include64
+# MPlib        = $(MPdir)/lib64/libmpi.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -framework Accelerate
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC       = mpicc-openmpi-mp
+CCNOOPT  = $(HPL_DEFS)
+CCFLAGS  = $(HPL_DEFS) -O3
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = $(CC)
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = cr
+RANLIB       = ranlib
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWR2_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWR2_FBLAS
new file mode 100644
index 000000000..628f2c152
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWR2_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = PWR2_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lesslp2
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DNoChange -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpcc_r
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -O3 -qarch=pwr2 -qtune=pwr2 -qmaxmem=-1
+#
+LINKER       = mpxlf_r
+LINKFLAGS    = -bmaxdata:0x70000000 $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWR3_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWR3_FBLAS
new file mode 100644
index 000000000..bba468803
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWR3_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = PWR3_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lessl
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DNoChange -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/vac/bin/xlc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -qtune=pwr3 -qarch=pwr3 -O3 -qmaxmem=-1 -qfloat=hsflt
+#
+LINKER       = /usr/bin/xlf
+LINKFLAGS    = -bmaxdata:0x70000000 $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWRPC_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWRPC_FBLAS
new file mode 100644
index 000000000..2a0fb2ec6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWRPC_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = PWRPC_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lessl
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DNoChange -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpcc_r
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -O3 -qarch=ppc -qtune=604 -qmaxmem=-1
+#
+LINKER       = mpxlf_r
+LINKFLAGS    = -bmaxdata:0x70000000 $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS
new file mode 100644
index 000000000..1ade2d8aa
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = SUN4SOL2-g_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = $(HOME)/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include
+MPlib        = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -xlic_lib=sunperf
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -g
+#
+LINKER       = purify -best-effort f77
+LINKFLAGS    =
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL
new file mode 100644
index 000000000..1cbb371fd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = SUN4SOL2-g_VSIPL
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = $(HOME)/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include
+MPlib        = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/local/TASP_VSIPL_Core_Plus
+LAinc        = -I$(LAdir)/include
+LAlib        = $(LAdir)/lib/libvsip_c.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_VSIPL
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -g
+#
+LINKER       = purify -best-effort cc
+LINKFLAGS    =
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2_FBLAS
new file mode 100644
index 000000000..a1d5d6315
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = SUN4SOL2_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = $(HOME)/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include
+MPlib        = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -xlic_lib=sunperf
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -dalign -fsingle -xO5 -native -xarch=v8plusa 
+#
+LINKER       = f77
+LINKFLAGS    = -dalign -native -xarch=v8plusa -xO5
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.T3E_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.T3E_FBLAS
new file mode 100644
index 000000000..fe12cae9a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.T3E_FBLAS
@@ -0,0 +1,187 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = T3E_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        =
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DUpCase -DF77_INTEGER=long -DStringCrayStyle \
+               -DCRAY_BLAS -DHPL_USE_TIMES
+#
+# When UpCase is defined,  CRAY_BLAS redefines the BLAS routines used in
+# HPL to be prefixed with an S. In the Cray programming environment, the
+# default INTEGER and REAL size is 64 bits.  This  is  reflected  in the
+# Cray Scientific Library as well,  so SGEMM is the 64-bit matrix multi-
+# ply.
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -O3
+#
+LINKER       = f77
+LINKFLAGS    = -O3,unroll2,pipeline2
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Tru64_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Tru64_FBLAS
new file mode 100644
index 000000000..3d8062061
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Tru64_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Tru64_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/alpha/ch_p4/include
+MPlib        = $(MPdir)/alpha/ch_p4/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lcxml
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -arch host -tune host -std -O5
+#
+LINKER       = f77
+LINKFLAGS    = -nofor_main -O5 -arch host -tune host
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = ranlib
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Tru64_FBLAS_elan b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Tru64_FBLAS_elan
new file mode 100644
index 000000000..f9550412c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Tru64_FBLAS_elan
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Tru64_FBLAS_elan
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        = -lmpi -lelan
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lcxml
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -arch host -tune host -std -O5
+#
+LINKER       = f77
+LINKFLAGS    = -nofor_main -O5 -arch host -tune host
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = ranlib
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.UNKNOWN.in b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.UNKNOWN.in
new file mode 100644
index 000000000..8cbbd8242
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.UNKNOWN.in
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = @SHELL@
+#
+CD           = @CD@
+CP           = @CP@
+LN_S         = @LN_S@
+MKDIR        = @MKDIR@
+RM           = @RM@
+TOUCH        = @TOUCH@
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = @ARCH@
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be 
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = @MPDIR@
+MPinc        = @MPINC@
+MPlib        = @MPLIB@
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be 
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = @LADIR@
+LAinc        = @LAINC@
+LAlib        = @LALIB@
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = @F2CDEFS@
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) 
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = @CC@
+CCNOOPT      = $(HPL_DEFS) @CCNOOPT@
+CCFLAGS      = $(HPL_DEFS) @CCFLAGS@
+#
+LINKER       = @LINKER@
+LINKFLAGS    = @LINKFLAGS@
+#
+ARCHIVER     = @ARCHIVER@
+ARFLAGS      = @ARFLAGS@
+RANLIB       = @RANLIB@
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/make_generic b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/make_generic
new file mode 100644
index 000000000..68cf74a3a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/make_generic
@@ -0,0 +1,83 @@
+#!/bin/sh
+#
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+#
+# Configure script to create Make.UNKNOWN from  Make.UNKNOWN.in  for the
+# HPL distribution, so users without a real Unix system can have a gene-
+# ric  Make.UNKNOWN  to  edit  for  their needs. This script substitutes
+# pathless version of all the system programs, and commonly used options
+# values into Make.UNKNOWN.in.
+#
+########################################################################
+#
+sed -e 's%@SHELL@%/bin/sh%' \
+    -e 's%@CD@%cd%' \
+    -e 's%@CP@%cp%' \
+    -e 's%@LN_S@%ln -s%' \
+    -e 's%@MKDIR@%mkdir%' \
+    -e 's%@RM@%/bin/rm -f%' \
+    -e 's%@TOUCH@%touch%' \
+    -e 's%@ARCH@%UNKNOWN%' \
+    -e 's%@CC@%mpicc%' \
+    -e 's%@CCNOOPT@%%' \
+    -e 's%@CCFLAGS@%%' \
+    -e 's%@LINKER@%mpif77%' \
+    -e 's%@LINKFLAGS@%%' \
+    -e 's%@ARCHIVER@%ar%' \
+    -e 's%@ARFLAGS@%r%' \
+    -e 's%@RANLIB@%echo%' \
+    -e 's%@MPDIR@%%' \
+    -e 's%@MPINC@%%' \
+    -e 's%@MPLIB@%%' \
+    -e 's%@F2CDEFS@%-DAdd_ -DF77_INTEGER=int -DStringSunStyle%' \
+    -e 's%@LADIR@%%' \
+    -e 's%@LAINC@%%' \
+    -e 's%@LALIB@%-lblas%' \
+    Make.UNKNOWN.in > Make.UNKNOWN
+#
+########################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/Makefile.am
new file mode 100644
index 000000000..2e6d3d454
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/Makefile.am
@@ -0,0 +1,42 @@
+AM_CPPFLAGS = -I$(top_srcdir)/../include
+
+lib_LIBRARIES = libhpl.a
+
+libhpl_a_SOURCES = \
+auxil/HPL_dlatcpy.c auxil/HPL_fprintf.c auxil/HPL_dlacpy.c auxil/HPL_dlamch.c \
+blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \
+blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \
+blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \
+comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \
+comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \
+comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \
+grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \
+grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \
+grid/HPL_min.c grid/HPL_all_reduce.c grid/HPL_barrier.c \
+panel/HPL_pdpanel_disp.c panel/HPL_pdpanel_free.c panel/HPL_pdpanel_init.c panel/HPL_pdpanel_new.c \
+pauxil/HPL_pdlamch.c pauxil/HPL_pdlange.c \
+pauxil/HPL_indxg2p.c pauxil/HPL_numroc.c pauxil/HPL_numrocI.c pauxil/HPL_numrocI.c \
+pauxil/HPL_dlaswp00N.c pauxil/HPL_dlaswp01N.c pauxil/HPL_dlaswp01T.c \
+pauxil/HPL_dlaswp02N.c pauxil/HPL_dlaswp03N.c pauxil/HPL_dlaswp03T.c \
+pauxil/HPL_dlaswp04N.c pauxil/HPL_dlaswp04T.c pauxil/HPL_dlaswp05N.c \
+pauxil/HPL_dlaswp05T.c pauxil/HPL_dlaswp06N.c pauxil/HPL_dlaswp06T.c \
+pauxil/HPL_infog2l.c pauxil/HPL_dlaswp10N.c pauxil/HPL_pwarn.c \
+pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \
+pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \
+pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c pfact/HPL_pdrpanrlT.c \
+pfact/HPL_pdmxswp.c pfact/HPL_pdfact.c pfact/HPL_dlocmax.c \
+pfact/HPL_pdpancrT.c pfact/HPL_pdpancrN.c pfact/HPL_dlocmax.c \
+pfact/HPL_dlocswpN.c pfact/HPL_dlocswpT.c pfact/HPL_pdmxswp.c \
+pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \
+pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \
+pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c \
+pfact/HPL_pdrpanrlT.c pauxil/HPL_pabort.c pauxil/HPL_pdlamch.c \
+pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c \
+pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c pgesv/HPL_pdupdateTT.c \
+pgesv/HPL_equil.c pgesv/HPL_pipid.c pgesv/HPL_plindx0.c \
+pgesv/HPL_plindx10.c pgesv/HPL_plindx1.c pgesv/HPL_plindx10.c \
+pgesv/HPL_rollN.c pgesv/HPL_rollT.c pgesv/HPL_spreadN.c pgesv/HPL_spreadT.c \
+pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesvK2.c \
+pgesv/HPL_pdlaswp00N.c pgesv/HPL_pdlaswp00T.c pgesv/HPL_pdlaswp01N.c pgesv/HPL_pdlaswp01T.c \
+pgesv/HPL_pdtrsv.c pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c \
+pgesv/HPL_pdupdateTT.c pgesv/HPL_logsort.c pgesv/HPL_perm.c
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/Makefile.in
new file mode 100644
index 000000000..139ecbad0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/Makefile.in
@@ -0,0 +1,1355 @@
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+subdir = src
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+	$(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/include/hplconfig.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(libdir)"
+LIBRARIES = $(lib_LIBRARIES)
+AR = ar
+ARFLAGS = cru
+AM_V_AR = $(am__v_AR_@AM_V@)
+am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@)
+am__v_AR_0 = @echo "  AR      " $@;
+am__v_AR_1 = 
+libhpl_a_AR = $(AR) $(ARFLAGS)
+libhpl_a_LIBADD =
+am__dirstamp = $(am__leading_dot)dirstamp
+am_libhpl_a_OBJECTS = auxil/HPL_dlatcpy.$(OBJEXT) \
+	auxil/HPL_fprintf.$(OBJEXT) auxil/HPL_dlacpy.$(OBJEXT) \
+	auxil/HPL_dlamch.$(OBJEXT) blas/HPL_dscal.$(OBJEXT) \
+	blas/HPL_dtrsm.$(OBJEXT) blas/HPL_dtrsv.$(OBJEXT) \
+	blas/HPL_idamax.$(OBJEXT) blas/HPL_dgemv.$(OBJEXT) \
+	blas/HPL_dscal.$(OBJEXT) blas/HPL_daxpy.$(OBJEXT) \
+	blas/HPL_dcopy.$(OBJEXT) blas/HPL_dgemm.$(OBJEXT) \
+	blas/HPL_dgemv.$(OBJEXT) blas/HPL_dger.$(OBJEXT) \
+	comm/HPL_sdrv.$(OBJEXT) comm/HPL_send.$(OBJEXT) \
+	comm/HPL_recv.$(OBJEXT) comm/HPL_bcast.$(OBJEXT) \
+	comm/HPL_binit.$(OBJEXT) comm/HPL_bwait.$(OBJEXT) \
+	comm/HPL_blong.$(OBJEXT) comm/HPL_1ring.$(OBJEXT) \
+	comm/HPL_1rinM.$(OBJEXT) comm/HPL_2rinM.$(OBJEXT) \
+	comm/HPL_2ring.$(OBJEXT) comm/HPL_blonM.$(OBJEXT) \
+	comm/HPL_packL.$(OBJEXT) grid/HPL_reduce.$(OBJEXT) \
+	grid/HPL_sum.$(OBJEXT) grid/HPL_grid_info.$(OBJEXT) \
+	grid/HPL_grid_init.$(OBJEXT) grid/HPL_all_reduce.$(OBJEXT) \
+	grid/HPL_broadcast.$(OBJEXT) grid/HPL_grid_exit.$(OBJEXT) \
+	grid/HPL_max.$(OBJEXT) grid/HPL_min.$(OBJEXT) \
+	grid/HPL_all_reduce.$(OBJEXT) grid/HPL_barrier.$(OBJEXT) \
+	panel/HPL_pdpanel_disp.$(OBJEXT) \
+	panel/HPL_pdpanel_free.$(OBJEXT) \
+	panel/HPL_pdpanel_init.$(OBJEXT) \
+	panel/HPL_pdpanel_new.$(OBJEXT) pauxil/HPL_pdlamch.$(OBJEXT) \
+	pauxil/HPL_pdlange.$(OBJEXT) pauxil/HPL_indxg2p.$(OBJEXT) \
+	pauxil/HPL_numroc.$(OBJEXT) pauxil/HPL_numrocI.$(OBJEXT) \
+	pauxil/HPL_numrocI.$(OBJEXT) pauxil/HPL_dlaswp00N.$(OBJEXT) \
+	pauxil/HPL_dlaswp01N.$(OBJEXT) pauxil/HPL_dlaswp01T.$(OBJEXT) \
+	pauxil/HPL_dlaswp02N.$(OBJEXT) pauxil/HPL_dlaswp03N.$(OBJEXT) \
+	pauxil/HPL_dlaswp03T.$(OBJEXT) pauxil/HPL_dlaswp04N.$(OBJEXT) \
+	pauxil/HPL_dlaswp04T.$(OBJEXT) pauxil/HPL_dlaswp05N.$(OBJEXT) \
+	pauxil/HPL_dlaswp05T.$(OBJEXT) pauxil/HPL_dlaswp06N.$(OBJEXT) \
+	pauxil/HPL_dlaswp06T.$(OBJEXT) pauxil/HPL_infog2l.$(OBJEXT) \
+	pauxil/HPL_dlaswp10N.$(OBJEXT) pauxil/HPL_pwarn.$(OBJEXT) \
+	pfact/HPL_pdpanllN.$(OBJEXT) pfact/HPL_pdpanllT.$(OBJEXT) \
+	pfact/HPL_pdpanrlN.$(OBJEXT) pfact/HPL_pdpanrlT.$(OBJEXT) \
+	pfact/HPL_pdrpancrN.$(OBJEXT) pfact/HPL_pdrpancrT.$(OBJEXT) \
+	pfact/HPL_pdrpanllN.$(OBJEXT) pfact/HPL_pdrpanllT.$(OBJEXT) \
+	pfact/HPL_pdrpanrlN.$(OBJEXT) pfact/HPL_pdrpanrlT.$(OBJEXT) \
+	pfact/HPL_pdmxswp.$(OBJEXT) pfact/HPL_pdfact.$(OBJEXT) \
+	pfact/HPL_dlocmax.$(OBJEXT) pfact/HPL_pdpancrT.$(OBJEXT) \
+	pfact/HPL_pdpancrN.$(OBJEXT) pfact/HPL_dlocmax.$(OBJEXT) \
+	pfact/HPL_dlocswpN.$(OBJEXT) pfact/HPL_dlocswpT.$(OBJEXT) \
+	pfact/HPL_pdmxswp.$(OBJEXT) pfact/HPL_pdpanllN.$(OBJEXT) \
+	pfact/HPL_pdpanllT.$(OBJEXT) pfact/HPL_pdpanrlN.$(OBJEXT) \
+	pfact/HPL_pdpanrlT.$(OBJEXT) pfact/HPL_pdrpancrN.$(OBJEXT) \
+	pfact/HPL_pdrpancrT.$(OBJEXT) pfact/HPL_pdrpanllN.$(OBJEXT) \
+	pfact/HPL_pdrpanllT.$(OBJEXT) pfact/HPL_pdrpanrlN.$(OBJEXT) \
+	pfact/HPL_pdrpanrlT.$(OBJEXT) pauxil/HPL_pabort.$(OBJEXT) \
+	pauxil/HPL_pdlamch.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \
+	pgesv/HPL_pdgesv.$(OBJEXT) pgesv/HPL_pdgesvK1.$(OBJEXT) \
+	pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdupdateNN.$(OBJEXT) \
+	pgesv/HPL_pdupdateNT.$(OBJEXT) pgesv/HPL_pdupdateTN.$(OBJEXT) \
+	pgesv/HPL_pdupdateTT.$(OBJEXT) pgesv/HPL_equil.$(OBJEXT) \
+	pgesv/HPL_pipid.$(OBJEXT) pgesv/HPL_plindx0.$(OBJEXT) \
+	pgesv/HPL_plindx10.$(OBJEXT) pgesv/HPL_plindx1.$(OBJEXT) \
+	pgesv/HPL_plindx10.$(OBJEXT) pgesv/HPL_rollN.$(OBJEXT) \
+	pgesv/HPL_rollT.$(OBJEXT) pgesv/HPL_spreadN.$(OBJEXT) \
+	pgesv/HPL_spreadT.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \
+	pgesv/HPL_pdgesv.$(OBJEXT) pgesv/HPL_pdgesvK1.$(OBJEXT) \
+	pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \
+	pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdlaswp00N.$(OBJEXT) \
+	pgesv/HPL_pdlaswp00T.$(OBJEXT) pgesv/HPL_pdlaswp01N.$(OBJEXT) \
+	pgesv/HPL_pdlaswp01T.$(OBJEXT) pgesv/HPL_pdtrsv.$(OBJEXT) \
+	pgesv/HPL_pdupdateNN.$(OBJEXT) pgesv/HPL_pdupdateNT.$(OBJEXT) \
+	pgesv/HPL_pdupdateTN.$(OBJEXT) pgesv/HPL_pdupdateTT.$(OBJEXT) \
+	pgesv/HPL_logsort.$(OBJEXT) pgesv/HPL_perm.$(OBJEXT)
+libhpl_a_OBJECTS = $(am_libhpl_a_OBJECTS)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__maybe_remake_depfiles = depfiles
+am__depfiles_remade = auxil/$(DEPDIR)/HPL_dlacpy.Po \
+	auxil/$(DEPDIR)/HPL_dlamch.Po auxil/$(DEPDIR)/HPL_dlatcpy.Po \
+	auxil/$(DEPDIR)/HPL_fprintf.Po blas/$(DEPDIR)/HPL_daxpy.Po \
+	blas/$(DEPDIR)/HPL_dcopy.Po blas/$(DEPDIR)/HPL_dgemm.Po \
+	blas/$(DEPDIR)/HPL_dgemv.Po blas/$(DEPDIR)/HPL_dger.Po \
+	blas/$(DEPDIR)/HPL_dscal.Po blas/$(DEPDIR)/HPL_dtrsm.Po \
+	blas/$(DEPDIR)/HPL_dtrsv.Po blas/$(DEPDIR)/HPL_idamax.Po \
+	comm/$(DEPDIR)/HPL_1rinM.Po comm/$(DEPDIR)/HPL_1ring.Po \
+	comm/$(DEPDIR)/HPL_2rinM.Po comm/$(DEPDIR)/HPL_2ring.Po \
+	comm/$(DEPDIR)/HPL_bcast.Po comm/$(DEPDIR)/HPL_binit.Po \
+	comm/$(DEPDIR)/HPL_blonM.Po comm/$(DEPDIR)/HPL_blong.Po \
+	comm/$(DEPDIR)/HPL_bwait.Po comm/$(DEPDIR)/HPL_packL.Po \
+	comm/$(DEPDIR)/HPL_recv.Po comm/$(DEPDIR)/HPL_sdrv.Po \
+	comm/$(DEPDIR)/HPL_send.Po grid/$(DEPDIR)/HPL_all_reduce.Po \
+	grid/$(DEPDIR)/HPL_barrier.Po grid/$(DEPDIR)/HPL_broadcast.Po \
+	grid/$(DEPDIR)/HPL_grid_exit.Po \
+	grid/$(DEPDIR)/HPL_grid_info.Po \
+	grid/$(DEPDIR)/HPL_grid_init.Po grid/$(DEPDIR)/HPL_max.Po \
+	grid/$(DEPDIR)/HPL_min.Po grid/$(DEPDIR)/HPL_reduce.Po \
+	grid/$(DEPDIR)/HPL_sum.Po panel/$(DEPDIR)/HPL_pdpanel_disp.Po \
+	panel/$(DEPDIR)/HPL_pdpanel_free.Po \
+	panel/$(DEPDIR)/HPL_pdpanel_init.Po \
+	panel/$(DEPDIR)/HPL_pdpanel_new.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp00N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp01N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp01T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp02N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp03N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp03T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp04N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp04T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp05N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp05T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp06N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp06T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp10N.Po \
+	pauxil/$(DEPDIR)/HPL_indxg2p.Po \
+	pauxil/$(DEPDIR)/HPL_infog2l.Po pauxil/$(DEPDIR)/HPL_numroc.Po \
+	pauxil/$(DEPDIR)/HPL_numrocI.Po pauxil/$(DEPDIR)/HPL_pabort.Po \
+	pauxil/$(DEPDIR)/HPL_pdlamch.Po \
+	pauxil/$(DEPDIR)/HPL_pdlange.Po pauxil/$(DEPDIR)/HPL_pwarn.Po \
+	pfact/$(DEPDIR)/HPL_dlocmax.Po pfact/$(DEPDIR)/HPL_dlocswpN.Po \
+	pfact/$(DEPDIR)/HPL_dlocswpT.Po pfact/$(DEPDIR)/HPL_pdfact.Po \
+	pfact/$(DEPDIR)/HPL_pdmxswp.Po pfact/$(DEPDIR)/HPL_pdpancrN.Po \
+	pfact/$(DEPDIR)/HPL_pdpancrT.Po \
+	pfact/$(DEPDIR)/HPL_pdpanllN.Po \
+	pfact/$(DEPDIR)/HPL_pdpanllT.Po \
+	pfact/$(DEPDIR)/HPL_pdpanrlN.Po \
+	pfact/$(DEPDIR)/HPL_pdpanrlT.Po \
+	pfact/$(DEPDIR)/HPL_pdrpancrN.Po \
+	pfact/$(DEPDIR)/HPL_pdrpancrT.Po \
+	pfact/$(DEPDIR)/HPL_pdrpanllN.Po \
+	pfact/$(DEPDIR)/HPL_pdrpanllT.Po \
+	pfact/$(DEPDIR)/HPL_pdrpanrlN.Po \
+	pfact/$(DEPDIR)/HPL_pdrpanrlT.Po pgesv/$(DEPDIR)/HPL_equil.Po \
+	pgesv/$(DEPDIR)/HPL_logsort.Po pgesv/$(DEPDIR)/HPL_pdgesv.Po \
+	pgesv/$(DEPDIR)/HPL_pdgesv0.Po pgesv/$(DEPDIR)/HPL_pdgesvK1.Po \
+	pgesv/$(DEPDIR)/HPL_pdgesvK2.Po \
+	pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po \
+	pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po \
+	pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po \
+	pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po \
+	pgesv/$(DEPDIR)/HPL_pdtrsv.Po \
+	pgesv/$(DEPDIR)/HPL_pdupdateNN.Po \
+	pgesv/$(DEPDIR)/HPL_pdupdateNT.Po \
+	pgesv/$(DEPDIR)/HPL_pdupdateTN.Po \
+	pgesv/$(DEPDIR)/HPL_pdupdateTT.Po pgesv/$(DEPDIR)/HPL_perm.Po \
+	pgesv/$(DEPDIR)/HPL_pipid.Po pgesv/$(DEPDIR)/HPL_plindx0.Po \
+	pgesv/$(DEPDIR)/HPL_plindx1.Po pgesv/$(DEPDIR)/HPL_plindx10.Po \
+	pgesv/$(DEPDIR)/HPL_rollN.Po pgesv/$(DEPDIR)/HPL_rollT.Po \
+	pgesv/$(DEPDIR)/HPL_spreadN.Po pgesv/$(DEPDIR)/HPL_spreadT.Po
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(libhpl_a_SOURCES)
+DIST_SOURCES = $(libhpl_a_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BLAS_LIBS = @BLAS_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build_alias = @build_alias@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host_alias = @host_alias@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(top_srcdir)/../include
+lib_LIBRARIES = libhpl.a
+libhpl_a_SOURCES = \
+auxil/HPL_dlatcpy.c auxil/HPL_fprintf.c auxil/HPL_dlacpy.c auxil/HPL_dlamch.c \
+blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \
+blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \
+blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \
+comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \
+comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \
+comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \
+grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \
+grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \
+grid/HPL_min.c grid/HPL_all_reduce.c grid/HPL_barrier.c \
+panel/HPL_pdpanel_disp.c panel/HPL_pdpanel_free.c panel/HPL_pdpanel_init.c panel/HPL_pdpanel_new.c \
+pauxil/HPL_pdlamch.c pauxil/HPL_pdlange.c \
+pauxil/HPL_indxg2p.c pauxil/HPL_numroc.c pauxil/HPL_numrocI.c pauxil/HPL_numrocI.c \
+pauxil/HPL_dlaswp00N.c pauxil/HPL_dlaswp01N.c pauxil/HPL_dlaswp01T.c \
+pauxil/HPL_dlaswp02N.c pauxil/HPL_dlaswp03N.c pauxil/HPL_dlaswp03T.c \
+pauxil/HPL_dlaswp04N.c pauxil/HPL_dlaswp04T.c pauxil/HPL_dlaswp05N.c \
+pauxil/HPL_dlaswp05T.c pauxil/HPL_dlaswp06N.c pauxil/HPL_dlaswp06T.c \
+pauxil/HPL_infog2l.c pauxil/HPL_dlaswp10N.c pauxil/HPL_pwarn.c \
+pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \
+pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \
+pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c pfact/HPL_pdrpanrlT.c \
+pfact/HPL_pdmxswp.c pfact/HPL_pdfact.c pfact/HPL_dlocmax.c \
+pfact/HPL_pdpancrT.c pfact/HPL_pdpancrN.c pfact/HPL_dlocmax.c \
+pfact/HPL_dlocswpN.c pfact/HPL_dlocswpT.c pfact/HPL_pdmxswp.c \
+pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \
+pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \
+pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c \
+pfact/HPL_pdrpanrlT.c pauxil/HPL_pabort.c pauxil/HPL_pdlamch.c \
+pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c \
+pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c pgesv/HPL_pdupdateTT.c \
+pgesv/HPL_equil.c pgesv/HPL_pipid.c pgesv/HPL_plindx0.c \
+pgesv/HPL_plindx10.c pgesv/HPL_plindx1.c pgesv/HPL_plindx10.c \
+pgesv/HPL_rollN.c pgesv/HPL_rollT.c pgesv/HPL_spreadN.c pgesv/HPL_spreadT.c \
+pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesvK2.c \
+pgesv/HPL_pdlaswp00N.c pgesv/HPL_pdlaswp00T.c pgesv/HPL_pdlaswp01N.c pgesv/HPL_pdlaswp01T.c \
+pgesv/HPL_pdtrsv.c pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c \
+pgesv/HPL_pdupdateTT.c pgesv/HPL_logsort.c pgesv/HPL_perm.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu src/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-libLIBRARIES: $(lib_LIBRARIES)
+	@$(NORMAL_INSTALL)
+	@list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \
+	list2=; for p in $$list; do \
+	  if test -f $$p; then \
+	    list2="$$list2 $$p"; \
+	  else :; fi; \
+	done; \
+	test -z "$$list2" || { \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
+	  echo " $(INSTALL_DATA) $$list2 '$(DESTDIR)$(libdir)'"; \
+	  $(INSTALL_DATA) $$list2 "$(DESTDIR)$(libdir)" || exit $$?; }
+	@$(POST_INSTALL)
+	@list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \
+	for p in $$list; do \
+	  if test -f $$p; then \
+	    $(am__strip_dir) \
+	    echo " ( cd '$(DESTDIR)$(libdir)' && $(RANLIB) $$f )"; \
+	    ( cd "$(DESTDIR)$(libdir)" && $(RANLIB) $$f ) || exit $$?; \
+	  else :; fi; \
+	done
+
+uninstall-libLIBRARIES:
+	@$(NORMAL_UNINSTALL)
+	@list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \
+	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+	dir='$(DESTDIR)$(libdir)'; $(am__uninstall_files_from_dir)
+
+clean-libLIBRARIES:
+	-test -z "$(lib_LIBRARIES)" || rm -f $(lib_LIBRARIES)
+auxil/$(am__dirstamp):
+	@$(MKDIR_P) auxil
+	@: > auxil/$(am__dirstamp)
+auxil/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) auxil/$(DEPDIR)
+	@: > auxil/$(DEPDIR)/$(am__dirstamp)
+auxil/HPL_dlatcpy.$(OBJEXT): auxil/$(am__dirstamp) \
+	auxil/$(DEPDIR)/$(am__dirstamp)
+auxil/HPL_fprintf.$(OBJEXT): auxil/$(am__dirstamp) \
+	auxil/$(DEPDIR)/$(am__dirstamp)
+auxil/HPL_dlacpy.$(OBJEXT): auxil/$(am__dirstamp) \
+	auxil/$(DEPDIR)/$(am__dirstamp)
+auxil/HPL_dlamch.$(OBJEXT): auxil/$(am__dirstamp) \
+	auxil/$(DEPDIR)/$(am__dirstamp)
+blas/$(am__dirstamp):
+	@$(MKDIR_P) blas
+	@: > blas/$(am__dirstamp)
+blas/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) blas/$(DEPDIR)
+	@: > blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dscal.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dtrsm.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dtrsv.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_idamax.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dgemv.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_daxpy.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dcopy.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dgemm.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dger.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+comm/$(am__dirstamp):
+	@$(MKDIR_P) comm
+	@: > comm/$(am__dirstamp)
+comm/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) comm/$(DEPDIR)
+	@: > comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_sdrv.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_send.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_recv.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_bcast.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_binit.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_bwait.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_blong.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_1ring.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_1rinM.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_2rinM.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_2ring.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_blonM.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_packL.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+grid/$(am__dirstamp):
+	@$(MKDIR_P) grid
+	@: > grid/$(am__dirstamp)
+grid/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) grid/$(DEPDIR)
+	@: > grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_reduce.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_sum.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_grid_info.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_grid_init.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_all_reduce.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_broadcast.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_grid_exit.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_max.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_min.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_barrier.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+panel/$(am__dirstamp):
+	@$(MKDIR_P) panel
+	@: > panel/$(am__dirstamp)
+panel/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) panel/$(DEPDIR)
+	@: > panel/$(DEPDIR)/$(am__dirstamp)
+panel/HPL_pdpanel_disp.$(OBJEXT): panel/$(am__dirstamp) \
+	panel/$(DEPDIR)/$(am__dirstamp)
+panel/HPL_pdpanel_free.$(OBJEXT): panel/$(am__dirstamp) \
+	panel/$(DEPDIR)/$(am__dirstamp)
+panel/HPL_pdpanel_init.$(OBJEXT): panel/$(am__dirstamp) \
+	panel/$(DEPDIR)/$(am__dirstamp)
+panel/HPL_pdpanel_new.$(OBJEXT): panel/$(am__dirstamp) \
+	panel/$(DEPDIR)/$(am__dirstamp)
+pauxil/$(am__dirstamp):
+	@$(MKDIR_P) pauxil
+	@: > pauxil/$(am__dirstamp)
+pauxil/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) pauxil/$(DEPDIR)
+	@: > pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_pdlamch.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_pdlange.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_indxg2p.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_numroc.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_numrocI.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp00N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp01N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp01T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp02N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp03N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp03T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp04N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp04T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp05N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp05T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp06N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp06T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_infog2l.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp10N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_pwarn.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pfact/$(am__dirstamp):
+	@$(MKDIR_P) pfact
+	@: > pfact/$(am__dirstamp)
+pfact/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) pfact/$(DEPDIR)
+	@: > pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpanllN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpanllT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpanrlN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpanrlT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpancrN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpancrT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpanllN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpanllT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpanrlN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpanrlT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdmxswp.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdfact.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_dlocmax.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpancrT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpancrN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_dlocswpN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_dlocswpT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_pabort.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pgesv/$(am__dirstamp):
+	@$(MKDIR_P) pgesv
+	@: > pgesv/$(am__dirstamp)
+pgesv/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) pgesv/$(DEPDIR)
+	@: > pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdgesv0.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdgesv.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdgesvK1.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdgesvK2.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdupdateNN.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdupdateNT.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdupdateTN.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdupdateTT.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_equil.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pipid.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_plindx0.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_plindx10.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_plindx1.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_rollN.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_rollT.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_spreadN.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_spreadT.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdlaswp00N.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdlaswp00T.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdlaswp01N.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdlaswp01T.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdtrsv.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_logsort.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_perm.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+
+libhpl.a: $(libhpl_a_OBJECTS) $(libhpl_a_DEPENDENCIES) $(EXTRA_libhpl_a_DEPENDENCIES) 
+	$(AM_V_at)-rm -f libhpl.a
+	$(AM_V_AR)$(libhpl_a_AR) libhpl.a $(libhpl_a_OBJECTS) $(libhpl_a_LIBADD)
+	$(AM_V_at)$(RANLIB) libhpl.a
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f auxil/*.$(OBJEXT)
+	-rm -f blas/*.$(OBJEXT)
+	-rm -f comm/*.$(OBJEXT)
+	-rm -f grid/*.$(OBJEXT)
+	-rm -f panel/*.$(OBJEXT)
+	-rm -f pauxil/*.$(OBJEXT)
+	-rm -f pfact/*.$(OBJEXT)
+	-rm -f pgesv/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlacpy.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlamch.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlatcpy.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_fprintf.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_daxpy.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dcopy.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dgemm.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dgemv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dger.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dscal.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dtrsm.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dtrsv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_idamax.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_1rinM.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_1ring.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_2rinM.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_2ring.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bcast.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_binit.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blonM.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blong.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bwait.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_packL.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_recv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_sdrv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_send.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_all_reduce.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_barrier.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_broadcast.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_exit.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_info.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_init.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_max.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_min.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_reduce.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_sum.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_disp.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_free.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_init.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_new.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp00N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp01N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp01T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp02N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp03N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp03T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp04N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp04T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp05N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp05T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp06N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp06T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp10N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_indxg2p.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_infog2l.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_numroc.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_numrocI.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pabort.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pdlamch.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pdlange.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pwarn.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocmax.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocswpN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocswpT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdfact.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdmxswp.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpancrN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpancrT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanllN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanllT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanrlN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanrlT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpancrN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpancrT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanllN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanllT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanrlN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanrlT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_equil.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_logsort.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesv0.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesvK1.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesvK2.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdtrsv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateNN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateNT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateTN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateTT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_perm.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pipid.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx0.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx1.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx10.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_rollN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_rollT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_spreadN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_spreadT.Po@am__quote@ # am--include-marker
+
+$(am__depfiles_remade):
+	@$(MKDIR_P) $(@D)
+	@echo '# dummy' >$@-t && $(am__mv) $@-t $@
+
+am--depfiles: $(am__depfiles_remade)
+
+.c.o:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(BUILT_SOURCES)
+	$(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LIBRARIES)
+installdirs:
+	for dir in "$(DESTDIR)$(libdir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f auxil/$(DEPDIR)/$(am__dirstamp)
+	-rm -f auxil/$(am__dirstamp)
+	-rm -f blas/$(DEPDIR)/$(am__dirstamp)
+	-rm -f blas/$(am__dirstamp)
+	-rm -f comm/$(DEPDIR)/$(am__dirstamp)
+	-rm -f comm/$(am__dirstamp)
+	-rm -f grid/$(DEPDIR)/$(am__dirstamp)
+	-rm -f grid/$(am__dirstamp)
+	-rm -f panel/$(DEPDIR)/$(am__dirstamp)
+	-rm -f panel/$(am__dirstamp)
+	-rm -f pauxil/$(DEPDIR)/$(am__dirstamp)
+	-rm -f pauxil/$(am__dirstamp)
+	-rm -f pfact/$(DEPDIR)/$(am__dirstamp)
+	-rm -f pfact/$(am__dirstamp)
+	-rm -f pgesv/$(DEPDIR)/$(am__dirstamp)
+	-rm -f pgesv/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLIBRARIES mostlyclean-am
+
+distclean: distclean-am
+		-rm -f auxil/$(DEPDIR)/HPL_dlacpy.Po
+	-rm -f auxil/$(DEPDIR)/HPL_dlamch.Po
+	-rm -f auxil/$(DEPDIR)/HPL_dlatcpy.Po
+	-rm -f auxil/$(DEPDIR)/HPL_fprintf.Po
+	-rm -f blas/$(DEPDIR)/HPL_daxpy.Po
+	-rm -f blas/$(DEPDIR)/HPL_dcopy.Po
+	-rm -f blas/$(DEPDIR)/HPL_dgemm.Po
+	-rm -f blas/$(DEPDIR)/HPL_dgemv.Po
+	-rm -f blas/$(DEPDIR)/HPL_dger.Po
+	-rm -f blas/$(DEPDIR)/HPL_dscal.Po
+	-rm -f blas/$(DEPDIR)/HPL_dtrsm.Po
+	-rm -f blas/$(DEPDIR)/HPL_dtrsv.Po
+	-rm -f blas/$(DEPDIR)/HPL_idamax.Po
+	-rm -f comm/$(DEPDIR)/HPL_1rinM.Po
+	-rm -f comm/$(DEPDIR)/HPL_1ring.Po
+	-rm -f comm/$(DEPDIR)/HPL_2rinM.Po
+	-rm -f comm/$(DEPDIR)/HPL_2ring.Po
+	-rm -f comm/$(DEPDIR)/HPL_bcast.Po
+	-rm -f comm/$(DEPDIR)/HPL_binit.Po
+	-rm -f comm/$(DEPDIR)/HPL_blonM.Po
+	-rm -f comm/$(DEPDIR)/HPL_blong.Po
+	-rm -f comm/$(DEPDIR)/HPL_bwait.Po
+	-rm -f comm/$(DEPDIR)/HPL_packL.Po
+	-rm -f comm/$(DEPDIR)/HPL_recv.Po
+	-rm -f comm/$(DEPDIR)/HPL_sdrv.Po
+	-rm -f comm/$(DEPDIR)/HPL_send.Po
+	-rm -f grid/$(DEPDIR)/HPL_all_reduce.Po
+	-rm -f grid/$(DEPDIR)/HPL_barrier.Po
+	-rm -f grid/$(DEPDIR)/HPL_broadcast.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_exit.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_info.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_init.Po
+	-rm -f grid/$(DEPDIR)/HPL_max.Po
+	-rm -f grid/$(DEPDIR)/HPL_min.Po
+	-rm -f grid/$(DEPDIR)/HPL_reduce.Po
+	-rm -f grid/$(DEPDIR)/HPL_sum.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_disp.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_free.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_init.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_new.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp00N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp01N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp01T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp02N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp03N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp03T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp04N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp04T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp05N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp05T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp06N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp06T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp10N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_indxg2p.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_infog2l.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_numroc.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_numrocI.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pabort.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pdlamch.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pdlange.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pwarn.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocmax.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocswpN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocswpT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdfact.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdmxswp.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpancrN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpancrT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanllN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanllT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanrlN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanrlT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpancrN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpancrT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanllN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanllT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanrlN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanrlT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_equil.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_logsort.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesv.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesv0.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesvK1.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesvK2.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdtrsv.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateNN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateNT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateTN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateTT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_perm.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pipid.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx0.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx1.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx10.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_rollN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_rollT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_spreadN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_spreadT.Po
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-libLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+		-rm -f auxil/$(DEPDIR)/HPL_dlacpy.Po
+	-rm -f auxil/$(DEPDIR)/HPL_dlamch.Po
+	-rm -f auxil/$(DEPDIR)/HPL_dlatcpy.Po
+	-rm -f auxil/$(DEPDIR)/HPL_fprintf.Po
+	-rm -f blas/$(DEPDIR)/HPL_daxpy.Po
+	-rm -f blas/$(DEPDIR)/HPL_dcopy.Po
+	-rm -f blas/$(DEPDIR)/HPL_dgemm.Po
+	-rm -f blas/$(DEPDIR)/HPL_dgemv.Po
+	-rm -f blas/$(DEPDIR)/HPL_dger.Po
+	-rm -f blas/$(DEPDIR)/HPL_dscal.Po
+	-rm -f blas/$(DEPDIR)/HPL_dtrsm.Po
+	-rm -f blas/$(DEPDIR)/HPL_dtrsv.Po
+	-rm -f blas/$(DEPDIR)/HPL_idamax.Po
+	-rm -f comm/$(DEPDIR)/HPL_1rinM.Po
+	-rm -f comm/$(DEPDIR)/HPL_1ring.Po
+	-rm -f comm/$(DEPDIR)/HPL_2rinM.Po
+	-rm -f comm/$(DEPDIR)/HPL_2ring.Po
+	-rm -f comm/$(DEPDIR)/HPL_bcast.Po
+	-rm -f comm/$(DEPDIR)/HPL_binit.Po
+	-rm -f comm/$(DEPDIR)/HPL_blonM.Po
+	-rm -f comm/$(DEPDIR)/HPL_blong.Po
+	-rm -f comm/$(DEPDIR)/HPL_bwait.Po
+	-rm -f comm/$(DEPDIR)/HPL_packL.Po
+	-rm -f comm/$(DEPDIR)/HPL_recv.Po
+	-rm -f comm/$(DEPDIR)/HPL_sdrv.Po
+	-rm -f comm/$(DEPDIR)/HPL_send.Po
+	-rm -f grid/$(DEPDIR)/HPL_all_reduce.Po
+	-rm -f grid/$(DEPDIR)/HPL_barrier.Po
+	-rm -f grid/$(DEPDIR)/HPL_broadcast.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_exit.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_info.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_init.Po
+	-rm -f grid/$(DEPDIR)/HPL_max.Po
+	-rm -f grid/$(DEPDIR)/HPL_min.Po
+	-rm -f grid/$(DEPDIR)/HPL_reduce.Po
+	-rm -f grid/$(DEPDIR)/HPL_sum.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_disp.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_free.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_init.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_new.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp00N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp01N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp01T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp02N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp03N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp03T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp04N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp04T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp05N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp05T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp06N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp06T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp10N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_indxg2p.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_infog2l.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_numroc.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_numrocI.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pabort.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pdlamch.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pdlange.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pwarn.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocmax.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocswpN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocswpT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdfact.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdmxswp.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpancrN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpancrT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanllN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanllT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanrlN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanrlT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpancrN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpancrT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanllN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanllT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanrlN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanrlT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_equil.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_logsort.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesv.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesv0.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesvK1.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesvK2.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdtrsv.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateNN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateNT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateTN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateTT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_perm.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pipid.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx0.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx1.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx10.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_rollN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_rollT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_spreadN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_spreadT.Po
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
+	clean-generic clean-libLIBRARIES cscopelist-am ctags ctags-am \
+	distclean distclean-compile distclean-generic distclean-tags \
+	distdir dvi dvi-am html html-am info info-am install \
+	install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am \
+	install-libLIBRARIES install-man install-pdf install-pdf-am \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \
+	uninstall-am uninstall-libLIBRARIES
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_abort.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_abort.c
new file mode 100644
index 000000000..bf0c5e727
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_abort.c
@@ -0,0 +1,129 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_abort
+(
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_abort( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_abort displays an error message on stderr and halts execution.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   LINE   = va_arg( argptr, int      );
+   SRNAME = va_arg( argptr, char *   );
+   FORM   = va_arg( argptr, char *   );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( stderr, "%s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR in function", SRNAME, cline );
+   else
+      HPL_fprintf( stderr, "%s %d %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR on line", LINE, "of function", SRNAME, cline );
+   exit( 0 );
+/*
+ * End of HPL_abort
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlacpy.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlacpy.c
new file mode 100644
index 000000000..ec71180eb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlacpy.c
@@ -0,0 +1,343 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factors
+ * #ifndef HPL_LACPY_M_DEPTH
+ * #define    HPL_LACPY_M_DEPTH       32
+ * #define    HPL_LACPY_LOG2_M_DEPTH   5
+ * #endif
+ * #ifndef HPL_LACPY_N_DEPTH
+ * #define    HPL_LACPY_N_DEPTH        4
+ * #define    HPL_LACPY_LOG2_N_DEPTH   2
+ * #endif
+ */
+#ifndef HPL_LACPY_M_DEPTH
+#define    HPL_LACPY_M_DEPTH        4
+#define    HPL_LACPY_LOG2_M_DEPTH   2
+#endif
+#ifndef HPL_LACPY_N_DEPTH
+#define    HPL_LACPY_N_DEPTH        2
+#define    HPL_LACPY_LOG2_N_DEPTH   1
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlacpy
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dlacpy
+( M, N, A, LDA, B, LDB )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlacpy copies an array A into an array B.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the number of rows of the arrays A and
+ *         B. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies  the number of columns of the arrays A
+ *         and B. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,N).
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * B       (local output)                double *
+ *         On entry, B points to an array of dimension (LDB,N). On exit,
+ *         B is overwritten with A.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB specifies the leading dimension of the array B.
+ *         LDB must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_LACPY_USE_COPY
+   register int               j;
+#else
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+   const double               * A0 = A;
+   double                     * B0 = B;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+   const double               * A0 = A,              * A1 = A +     LDA;
+   double                     * B0 = B,              * B1 = B +     LDB;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+   const double               * A0 = A,              * A1 = A +     LDA,
+                              * A2 = A + (LDA << 1), * A3 = A + 3 * LDA;
+   double                     * B0 = B,              * B1 = B +     LDB,
+                              * B2 = B + (LDB << 1), * B3 = B + 3 * LDB;
+#endif
+   const int                  incA = ( (unsigned int)(LDA) <<
+                                       HPL_LACPY_LOG2_N_DEPTH ) - M,
+                              incB = ( (unsigned int)(LDB) <<
+                                       HPL_LACPY_LOG2_N_DEPTH ) - M,
+                              incA0 = (unsigned int)(LDA) - M,
+                              incB0 = (unsigned int)(LDB) - M;
+   int                        mu, nu;
+   register int               i, j;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+#ifdef HPL_LACPY_USE_COPY
+   for( j = 0; j < N; j++, A0 += LDA, B0 += LDB ) HPL_dcopy( M, A0, 1, B0, 1 );
+#else
+   mu = (int)( ( (unsigned int)(M) >> HPL_LACPY_LOG2_M_DEPTH ) <<
+                                      HPL_LACPY_LOG2_M_DEPTH );
+   nu = (int)( ( (unsigned int)(N) >> HPL_LACPY_LOG2_N_DEPTH ) <<
+                                      HPL_LACPY_LOG2_N_DEPTH );
+
+   for( j = 0; j < nu; j += HPL_LACPY_N_DEPTH )
+   {
+      for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH )
+      {
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 0] = A0[ 0];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; B2[ 0] = A2[ 0]; B3[ 0] = A3[ 0];
+#endif
+
+#if ( HPL_LACPY_M_DEPTH >  1 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 1] = A0[ 1];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; B2[ 1] = A2[ 1]; B3[ 1] = A3[ 1];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  2 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B2[ 2] = A2[ 2]; B3[ 2] = A3[ 2];
+         B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; B2[ 3] = A2[ 3]; B3[ 3] = A3[ 3];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  4 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5];
+         B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B2[ 4] = A2[ 4]; B3[ 4] = A3[ 4];
+         B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; B2[ 5] = A2[ 5]; B3[ 5] = A3[ 5];
+         B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B2[ 6] = A2[ 6]; B3[ 6] = A3[ 6];
+         B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; B2[ 7] = A2[ 7]; B3[ 7] = A3[ 7];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  8 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11];
+         B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9];
+         B0[10] = A0[10]; B1[10] = A1[10]; B0[11] = A0[11]; B1[11] = A1[11];
+         B0[12] = A0[12]; B1[12] = A1[12]; B0[13] = A0[13]; B1[13] = A1[13];
+         B0[14] = A0[14]; B1[14] = A1[14]; B0[15] = A0[15]; B1[15] = A1[15];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B2[ 8] = A2[ 8]; B3[ 8] = A3[ 8];
+         B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; B2[ 9] = A2[ 9]; B3[ 9] = A3[ 9];
+         B0[10] = A0[10]; B1[10] = A1[10]; B2[10] = A2[10]; B3[10] = A3[10];
+         B0[11] = A0[11]; B1[11] = A1[11]; B2[11] = A2[11]; B3[11] = A3[11];
+         B0[12] = A0[12]; B1[12] = A1[12]; B2[12] = A2[12]; B3[12] = A3[12];
+         B0[13] = A0[13]; B1[13] = A1[13]; B2[13] = A2[13]; B3[13] = A3[13];
+         B0[14] = A0[14]; B1[14] = A1[14]; B2[14] = A2[14]; B3[14] = A3[14];
+         B0[15] = A0[15]; B1[15] = A1[15]; B2[15] = A2[15]; B3[15] = A3[15];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH > 16 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19];
+         B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23];
+         B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27];
+         B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[16] = A0[16]; B1[16] = A1[16]; B0[17] = A0[17]; B1[17] = A1[17];
+         B0[18] = A0[18]; B1[18] = A1[18]; B0[19] = A0[19]; B1[19] = A1[19];
+         B0[20] = A0[20]; B1[20] = A1[20]; B0[21] = A0[21]; B1[21] = A1[21];
+         B0[22] = A0[22]; B1[22] = A1[22]; B0[23] = A0[23]; B1[23] = A1[23];
+         B0[24] = A0[24]; B1[24] = A1[24]; B0[25] = A0[25]; B1[25] = A1[25];
+         B0[26] = A0[26]; B1[26] = A1[26]; B0[27] = A0[27]; B1[27] = A1[27];
+         B0[28] = A0[28]; B1[28] = A1[28]; B0[29] = A0[29]; B1[29] = A1[29];
+         B0[30] = A0[30]; B1[30] = A1[30]; B0[31] = A0[31]; B1[31] = A1[31];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[16] = A0[16]; B1[16] = A1[16]; B2[16] = A2[16]; B3[16] = A3[16];
+         B0[17] = A0[17]; B1[17] = A1[17]; B2[17] = A2[17]; B3[17] = A3[17];
+         B0[18] = A0[18]; B1[18] = A1[18]; B2[18] = A2[18]; B3[18] = A3[18];
+         B0[19] = A0[19]; B1[19] = A1[19]; B2[19] = A2[19]; B3[19] = A3[19];
+         B0[20] = A0[20]; B1[20] = A1[20]; B2[20] = A2[20]; B3[20] = A3[20];
+         B0[21] = A0[21]; B1[21] = A1[21]; B2[21] = A2[21]; B3[21] = A3[21];
+         B0[22] = A0[22]; B1[22] = A1[22]; B2[22] = A2[22]; B3[22] = A3[22];
+         B0[23] = A0[23]; B1[23] = A1[23]; B2[23] = A2[23]; B3[23] = A3[23];
+         B0[24] = A0[24]; B1[24] = A1[24]; B2[24] = A2[24]; B3[24] = A3[24];
+         B0[25] = A0[25]; B1[25] = A1[25]; B2[25] = A2[25]; B3[25] = A3[25];
+         B0[26] = A0[26]; B1[26] = A1[26]; B2[26] = A2[26]; B3[26] = A3[26];
+         B0[27] = A0[27]; B1[27] = A1[27]; B2[27] = A2[27]; B3[27] = A3[27];
+         B0[28] = A0[28]; B1[28] = A1[28]; B2[28] = A2[28]; B3[28] = A3[28];
+         B0[29] = A0[29]; B1[29] = A1[29]; B2[29] = A2[29]; B3[29] = A3[29];
+         B0[30] = A0[30]; B1[30] = A1[30]; B2[30] = A2[30]; B3[30] = A3[30];
+         B0[31] = A0[31]; B1[31] = A1[31]; B2[31] = A2[31]; B3[31] = A3[31];
+#endif
+
+#endif
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+         A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+         A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH;
+         A2 += HPL_LACPY_M_DEPTH; B2 += HPL_LACPY_M_DEPTH;
+         A3 += HPL_LACPY_M_DEPTH; B3 += HPL_LACPY_M_DEPTH;
+#endif
+      }
+
+      for( i = mu; i < M; i++ )
+      {
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         *B0 = *A0; B0++; A0++;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++;
+         *B2 = *A2; B2++; A2++; *B3 = *A3; B3++; A3++;
+#endif
+      }
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+      A0 += incA; B0 += incB;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+      A0 += incA; B0 += incB; A1 += incA; B1 += incB;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+      A0 += incA; B0 += incB; A1 += incA; B1 += incB;
+      A2 += incA; B2 += incB; A3 += incA; B3 += incB;
+#endif
+   }
+
+   for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 )
+   {
+      for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH,
+           B0 += HPL_LACPY_M_DEPTH, A0 += HPL_LACPY_M_DEPTH )
+      {
+         B0[ 0] = A0[ 0];
+#if ( HPL_LACPY_M_DEPTH >  1 )
+         B0[ 1] = A0[ 1];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  2 )
+         B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  4 )
+         B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  8 )
+         B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11];
+         B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15];
+#endif
+#if ( HPL_LACPY_M_DEPTH > 16 )
+         B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19];
+         B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23];
+         B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27];
+         B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31];
+#endif
+      }
+      for( i = mu; i < M; i++, B0++, A0++ ) { *B0 = *A0; }
+   }
+#endif
+/*
+ * End of HPL_dlacpy
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlamch.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlamch.c
new file mode 100644
index 000000000..c685f0d5e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlamch.c
@@ -0,0 +1,876 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static function prototypes
+ * ---------------------------------------------------------------------
+ */
+static void     HPL_dlamc1
+STDC_ARGS(
+(  int *,           int *,           int *,           int * ) );
+static void     HPL_dlamc2
+STDC_ARGS(
+(  int *,           int *,           int *,           double *,
+   int *,           double *,        int *,           double * ) );
+static double   HPL_dlamc3
+STDC_ARGS(
+(  const double,    const double ) );
+static void     HPL_dlamc4
+STDC_ARGS(
+(  int *,           const double,    const int ) );
+static void     HPL_dlamc5
+STDC_ARGS(
+(  const int,       const int,       const int,       const int,
+   int *,           double * ) );
+static double   HPL_dipow
+STDC_ARGS(
+(  const double,    const int ) );
+
+#ifdef STDC_HEADERS
+double HPL_dlamch
+(
+   const HPL_T_MACH                 CMACH
+)
+#else
+double HPL_dlamch
+( CMACH )
+   const HPL_T_MACH                 CMACH;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlamch determines  machine-specific  arithmetic constants such as
+ * the relative machine precision  (eps),  the safe minimum (sfmin) such
+ * that 1 / sfmin does not overflow, the base of the machine (base), the
+ * precision (prec), the  number of (base) digits  in the  mantissa (t),
+ * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise),  the
+ * minimum exponent before  (gradual)  underflow (emin),  the  underflow
+ * threshold (rmin) base**(emin-1), the largest exponent before overflow
+ * (emax), the overflow threshold (rmax) (base**emax)*(1-eps).
+ *
+ * Notes
+ * =====
+ * 
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamch.f  (version 2.0 -- 1992), that  was  itself
+ * based on the function ENVRON  by Malcolm and incorporated suggestions
+ * by Gentleman and Marovich. See                                       
+ *  
+ * Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+ * arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).                 
+ *  
+ * Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+ * properties of  floating point arithmetic units.,  Comms. of  the ACM,
+ * 17, 276-277 (1974).
+ * 
+ * Arguments
+ * =========
+ *
+ * CMACH   (local input)                 const HPL_T_MACH
+ *         Specifies the value to be returned by HPL_dlamch             
+ *            = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+ *            = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+ *            = HPL_MACH_BASE,  HPL_dlamch := base                      
+ *            = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+ *            = HPL_MACH_MLEN,  HPL_dlamch := t                         
+ *            = HPL_MACH_RND,   HPL_dlamch := rnd                       
+ *            = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+ *            = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+ *            = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+ *            = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+ *          
+ *         where                                                        
+ *          
+ *            eps   = relative machine precision,                       
+ *            sfmin = safe minimum,                                     
+ *            base  = base of the machine,                              
+ *            prec  = eps*base,                                         
+ *            t     = number of digits in the mantissa,                 
+ *            rnd   = 1.0 if rounding occurs in addition,               
+ *            emin  = minimum exponent before underflow,                
+ *            rmin  = underflow threshold,                              
+ *            emax  = largest exponent before overflow,                 
+ *            rmax  = overflow threshold.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   static double              eps, sfmin, base, t, rnd, emin, rmin, emax,
+                              rmax, prec;
+   double                     small;
+   static int                 first=1;
+   int                        beta=0, imax=0, imin=0, it=0, lrnd=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0;
+      HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax );
+      base  = (double)(beta);  t     = (double)(it);
+      if( lrnd != 0 )
+      { rnd = HPL_rone;  eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; }
+      else
+      { rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it );            }
+      prec  = eps * base;  emin  = (double)(imin); emax  = (double)(imax);
+      sfmin = rmin;        small = HPL_rone / rmax;
+/*
+ * Use  SMALL  plus a bit,  to avoid the possibility of rounding causing
+ * overflow when computing  1/sfmin.
+ */
+      if( small >= sfmin ) sfmin = small * ( HPL_rone + eps );
+   }
+
+   if( CMACH == HPL_MACH_EPS   ) return( eps   );
+   if( CMACH == HPL_MACH_SFMIN ) return( sfmin );
+   if( CMACH == HPL_MACH_BASE  ) return( base  );
+   if( CMACH == HPL_MACH_PREC  ) return( prec  );
+   if( CMACH == HPL_MACH_MLEN  ) return( t     );
+   if( CMACH == HPL_MACH_RND   ) return( rnd   );
+   if( CMACH == HPL_MACH_EMIN  ) return( emin  );
+   if( CMACH == HPL_MACH_RMIN  ) return( rmin  );
+   if( CMACH == HPL_MACH_EMAX  ) return( emax  );
+   if( CMACH == HPL_MACH_RMAX  ) return( rmax  );
+
+   return( eps );
+/*
+ * End of HPL_dlamch
+ */
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc1
+(
+   int                        * BETA,
+   int                        * T,
+   int                        * RND,
+   int                        * IEEE1
+)
+#else
+static void HPL_dlamc1
+( BETA, T, RND, IEEE1 )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * BETA, * IEEE1, * RND, * T;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc1  determines  the machine parameters given by BETA, T, RND,
+ * and IEEE1.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc1.f  (version 2.0 -- 1992), that  was  itself
+ * based on the function ENVRON  by Malcolm and incorporated suggestions
+ * by Gentleman and Marovich. See
+ *
+ * Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+ * arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).
+ *
+ * Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+ * properties of  floating point arithmetic units.,  Comms. of  the ACM,
+ * 17, 276-277 (1974).
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local output)              int *
+ *         The base of the machine.
+ *
+ * T       (local output)              int *
+ *         The number of ( BETA ) digits in the mantissa.
+ *
+ * RND     (local output)              int *
+ *         Specifies whether proper rounding (RND=1) or chopping (RND=0)
+ *         occurs in addition.  This may not be a  reliable guide to the
+ *         way in which the machine performs its arithmetic.
+ *
+ * IEEE1   (local output)              int *
+ *         Specifies  whether  rounding  appears  to be done in the IEEE
+ *         `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     a, b, c, f, one, qtr, savec, t1, t2;
+   static int                 first=1, lbeta, lieee1, lrnd, lt;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0; one = HPL_rone;
+/*
+ * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and
+ * RND. Throughout this routine we use the function HPL_dlamc3 to ensure
+ * that relevant values are stored and not held in registers, or are not
+ * affected by optimizers.
+ *
+ * Compute  a = 2.0**m  with the  smallest  positive integer m such that
+ * fl( a + 1.0 ) == a.
+ */
+      a = HPL_rone; c = HPL_rone;
+      do
+      { a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); }
+      while( c == HPL_rone );
+/*
+ * Now compute b = 2.0**m with the smallest positive integer m such that
+ * fl( a + b ) > a.
+ */
+      b = HPL_rone; c = HPL_dlamc3( a, b );
+      while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); }
+/*
+ * Now compute the base.  a and c  are  neighbouring floating point num-
+ * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe-
+ * rence is BETA.  Adding 0.25 to c is to ensure that it is truncated to
+ * BETA and not (BETA-1).
+ */
+      qtr = one / 4.0; savec = c;
+      c   = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr);
+/*
+ * Now  determine  whether  rounding or chopping occurs, by adding a bit
+ * less than BETA/2 and a bit more than BETA/2 to a.
+ */
+      b = (double)(lbeta);
+      f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a );
+      if( c == a ) { lrnd = 1; } else { lrnd = 0; }
+      f = HPL_dlamc3( b / HPL_rtwo,  b / 100.0 ); c = HPL_dlamc3( f, a );
+      if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0;
+/*
+ * Try  and decide whether rounding is done in the  IEEE  round to nea-
+ * rest style.  b/2 is half a unit in the last place of the two numbers
+ * a  and savec. Furthermore, a is even, i.e. has last bit zero, and sa-
+ * vec is odd.  Thus adding b/2 to a should not change a, but adding b/2
+ * to savec should change savec.
+ */
+      t1 = HPL_dlamc3( b / HPL_rtwo, a );
+      t2 = HPL_dlamc3( b / HPL_rtwo, savec );
+      if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1;
+      else                                                  lieee1 = 0;
+/*
+ * Now find the mantissa, T. It should be the integer part of log to the
+ * base BETA of a, however it is safer to determine T by powering. So we
+ * find T as the smallest positive integer for which fl( beta**t + 1.0 )
+ * is equal to 1.0.
+ */
+      lt = 0; a = HPL_rone; c = HPL_rone;
+
+      do
+      {
+         lt++; a *= (double)(lbeta);
+         c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c,  -a );
+      } while( c == HPL_rone );
+   }
+
+   *BETA  = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1;
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc2
+(
+   int                        * BETA, 
+   int                        * T,
+   int                        * RND,
+   double                     * EPS,
+   int                        * EMIN,
+   double                     * RMIN,
+   int                        * EMAX,
+   double                     * RMAX
+)
+#else
+static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * BETA, * EMAX, * EMIN, * RND, * T;
+   double                     * EPS, * RMAX, * RMIN;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc2  determines the machine  parameters specified in its argu-
+ * ment list.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function  dlamc2.f (version 2.0 -- 1992), that  was  itself
+ * based on a function PARANOIA  by  W. Kahan of the University of Cali-
+ * fornia at Berkeley for the computation of the  relative machine epsi-
+ * lon eps.
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local output)              int *
+ *         The base of the machine.
+ *
+ * T       (local output)              int *
+ *         The number of ( BETA ) digits in the mantissa.
+ *
+ * RND     (local output)              int *
+ *         Specifies whether proper rounding (RND=1) or chopping (RND=0)
+ *         occurs in addition. This may not be a reliable  guide to  the
+ *         way in which the machine performs its arithmetic.
+ *
+ * EPS     (local output)              double *
+ *         The smallest positive number such that fl( 1.0 - EPS ) < 1.0,
+ *         where fl denotes the computed value.
+ *
+ * EMIN    (local output)              int *
+ *         The minimum exponent before (gradual) underflow occurs.
+ *
+ * RMIN    (local output)              double *
+ *         The smallest  normalized  number  for  the  machine, given by
+ *         BASE**( EMIN - 1 ), where  BASE  is the floating  point value
+ *         of BETA.
+ *
+ * EMAX    (local output)              int *
+ *         The maximum exponent before overflow occurs.
+ *
+ * RMAX    (local output)              double *
+ *         The  largest  positive  number  for  the  machine,  given  by
+ *         BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating  point
+ *         value of BETA.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   static double              leps, lrmax, lrmin;
+   double                     a, b, c, half, one, rbase, sixth, small,
+                              third, two, zero;
+   static int                 first=1, iwarn=0, lbeta=0, lemax, lemin,
+                              lt=0;
+   int                        gnmin=0, gpmin=0, i, ieee, lieee1=0,
+                              lrnd=0, ngnmin=0, ngpmin=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo;
+/*
+ * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of  BETA,
+ * T, RND, EPS, EMIN and RMIN.
+ *
+ * Throughout this routine we use the function HPL_dlamc3 to ensure that
+ * relevant values are stored and not held in registers,  or are not af-
+ * fected by optimizers.
+ *
+ * HPL_dlamc1 returns the parameters  lbeta, lt, lrnd and lieee1.
+ */
+      HPL_dlamc1( &lbeta, &lt, &lrnd, &lieee1 );
+/*
+ * Start to find eps.
+ */
+      b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a;
+/*
+ * Try some tricks to see whether or not this is the correct  EPS.
+ */
+      b     = two / 3.0; 
+      half  = one / HPL_rtwo;
+      sixth = HPL_dlamc3( b, -half );
+      third = HPL_dlamc3( sixth, sixth );
+      b     = HPL_dlamc3( third, -half );
+      b     = HPL_dlamc3( b, sixth );
+      b     = Mabs( b ); if( b < leps ) b = leps;
+
+      leps = HPL_rone;
+
+      while( ( leps > b ) && ( b > zero ) )
+      {
+         leps = b;
+         c = HPL_dlamc3( half * leps,
+                         HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) );
+         c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c );
+         c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c );
+      }
+      if( a < leps ) leps = a;
+/*
+ * Computation of EPS complete.
+ *
+ * Now find  EMIN.  Let a = + or - 1, and + or - (1 + BASE**(-3)).  Keep
+ * dividing a by BETA until (gradual) underflow occurs. This is detected
+ * when we cannot recover the previous a.
+ */
+      rbase = one / (double)(lbeta); small = one;
+      for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero );
+      a = HPL_dlamc3( one, small );
+      HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta );
+      HPL_dlamc4( &gpmin,    a, lbeta ); HPL_dlamc4( &gnmin,    -a, lbeta );
+
+      ieee = 0;
+
+      if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) )
+      {
+         if( ngpmin == gpmin )
+         {
+/*
+ * Non twos-complement machines, no gradual underflow; e.g.,  VAX )
+ */
+            lemin = ngpmin;
+         }
+         else if( ( gpmin-ngpmin ) == 3 )
+         {
+/*
+ * Non twos-complement machines with gradual underflow; e.g., IEEE stan-
+ * dard followers
+ */
+            lemin = ngpmin - 1 + lt; ieee = 1;
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, gpmin );
+            iwarn = 1;
+         }
+      }
+      else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) )
+      {
+         if( Mabs( ngpmin-ngnmin ) == 1 )
+         {
+/*
+ * Twos-complement machines, no gradual underflow; e.g., CYBER 205
+ */
+            lemin = Mmax( ngpmin, ngnmin );
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, ngnmin );
+            iwarn = 1;
+         }
+      }
+      else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) )
+      {
+         if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 )
+         {
+/*
+ * Twos-complement machines with gradual underflow; no known machine
+ */
+            lemin = Mmax( ngpmin, ngnmin ) - 1 + lt;
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, ngnmin );
+            iwarn = 1;
+         }
+      }
+      else
+      {
+/*
+ * A guess; no known machine
+ */
+         lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin );
+         lemin = Mmin( lemin, gnmin ); iwarn = 1;
+      }
+/*
+ * Comment out this if block if EMIN is ok
+ */
+      if( iwarn != 0 )
+      {
+         first = 1;
+         HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n",
+"WARNING. The value EMIN may be incorrect:- EMIN =", lemin,
+"If, after inspection, the value EMIN looks acceptable, please comment ",
+"out the  if  block  as marked within the code of routine  HPL_dlamc2, ",
+"otherwise supply EMIN explicitly." );
+      }
+/*
+ * Assume IEEE arithmetic if we found denormalised  numbers above, or if
+ * arithmetic seems to round in the  IEEE style,  determined  in routine
+ * HPL_dlamc1.  A true  IEEE  machine should have both things true; how-
+ * ever, faulty machines may have one or the other.
+ */
+      if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1;
+      else                                   ieee = 0;
+/*
+ * Compute  RMIN by successive division by  BETA. We could compute  RMIN
+ * as BASE**( EMIN - 1 ), but some machines underflow during this compu-
+ * tation.
+ */
+      lrmin = HPL_rone;
+      for( i = 0; i < 1 - lemin; i++ )
+         lrmin = HPL_dlamc3( lrmin*rbase, zero );
+/*
+ * Finally, call HPL_dlamc5 to compute emax and rmax.
+ */
+      HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax );
+   }
+   *BETA = lbeta; *T    = lt;    *RND  = lrnd;  *EPS  = leps;
+   *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax;
+} 
+
+#ifdef STDC_HEADERS
+static double HPL_dlamc3( const double A, const double B )
+#else
+static double HPL_dlamc3( A, B )
+/*
+ * .. Scalar Arguments ..
+ */
+   const double               A, B;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc3  is intended to force a and b  to be stored prior to doing
+ * the addition of  a  and  b,  for  use  in situations where optimizers
+ * might hold one of these in a register.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc3.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * A, B    (local input)               double
+ *         The values a and b.
+ *
+ * ---------------------------------------------------------------------
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   return( A + B );
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc4
+(
+   int                        * EMIN,
+   const double               START,
+   const int                  BASE
+)
+#else
+static void HPL_dlamc4( EMIN, START, BASE )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * EMIN;
+   const int                  BASE;
+   const double               START;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc4 is a service function for HPL_dlamc2.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc4.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * EMIN    (local output)              int *
+ *         The minimum exponent before  (gradual) underflow, computed by
+ *         setting A = START and dividing  by  BASE until the previous A
+ *         can not be recovered.
+ *
+ * START   (local input)               double
+ *         The starting point for determining EMIN.
+ *
+ * BASE    (local input)               int
+ *         The base of the machine.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     a, b1, b2, c1, c2, d1, d2, one, rbase, zero;
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   a     = START; one = HPL_rone; rbase = one / (double)(BASE);
+   zero  = HPL_rzero;
+   *EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a;
+
+   do
+   {
+      (*EMIN)--; a = b1;
+      b1 = HPL_dlamc3( a /  BASE,  zero );
+      c1 = HPL_dlamc3( b1 *  BASE, zero );
+      d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1;
+      b2 = HPL_dlamc3( a * rbase,  zero );
+      c2 = HPL_dlamc3( b2 / rbase, zero );
+      d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2;
+   } while( ( c1 == a ) && ( c2 == a ) &&  ( d1 == a ) && ( d2 == a ) );
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc5
+(
+   const int                  BETA,
+   const int                  P, 
+   const int                  EMIN,
+   const int                  IEEE,
+   int                        * EMAX,
+   double                     * RMAX
+)
+#else
+static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX )
+/*
+ * .. Scalar Arguments ..
+ */
+   const int                  BETA, EMIN, IEEE, P; 
+   int                        * EMAX;
+   double                     * RMAX;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc5  attempts  to compute RMAX, the largest machine  floating-
+ * point number, without overflow.  It assumes that EMAX + abs(EMIN) sum
+ * approximately to a power of 2.  It will fail  on machines where  this
+ * assumption does not hold, for example, the  Cyber 205 (EMIN = -28625,
+ * EMAX = 28718).  It will also fail if  the value supplied for  EMIN is
+ * too large (i.e. too close to zero), probably with overflow.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc5.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local input)               int
+ *         The base of floating-point arithmetic.
+ *
+ * P       (local input)               int
+ *         The number of base BETA digits in the mantissa of a floating-
+ *         point value.
+ *
+ * EMIN    (local input)               int
+ *         The minimum exponent before (gradual) underflow.
+ *
+ * IEEE    (local input)               int
+ *         A logical flag specifying whether or not  the arithmetic sys-
+ *         tem is thought to comply with the IEEE standard.
+ *
+ * EMAX    (local output)              int *
+ *         The largest exponent before overflow.
+ *
+ * RMAX    (local output)              double *
+ *         The largest machine floating-point number.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     oldy=HPL_rzero, recbas, y, z;
+   int                        exbits=1, expsum, i, lexp=1, nbits, try,
+                              uexp;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * First compute  lexp  and  uexp, two powers of 2 that bound abs(EMIN).
+ * We then assume that  EMAX + abs( EMIN ) will sum approximately to the
+ * bound that  is closest to abs( EMIN ). (EMAX  is the  exponent of the
+ * required number RMAX).
+ */
+l_10:
+   try = (int)( (unsigned int)(lexp) << 1 );
+   if( try <= ( -EMIN ) ) { lexp = try; exbits++; goto l_10; }
+
+   if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try; exbits++; }
+/*
+ * Now -lexp is less than or equal to EMIN, and -uexp is greater than or
+ * equal to EMIN. exbits is the number of bits needed to store the expo-
+ * nent.
+ */
+   if( ( uexp+EMIN ) > ( -lexp-EMIN ) )
+   { expsum = (int)( (unsigned int)(lexp) << 1 ); }
+   else
+   { expsum = (int)( (unsigned int)(uexp) << 1 ); }
+/*
+ * expsum is the exponent range, approximately equal to EMAX - EMIN + 1.
+ */
+   *EMAX = expsum + EMIN - 1;
+/*
+ * nbits  is  the total number of bits needed to store a  floating-point
+ * number.
+ */
+   nbits = 1 + exbits + P;
+
+   if( ( nbits % 2 == 1 ) && ( BETA == 2 ) )
+   {
+/*
+ * Either there are an odd number of bits used to store a floating-point
+ * number, which is unlikely, or some bits are not used in the represen-
+ * tation of numbers,  which is possible,  (e.g. Cray machines)  or  the
+ * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines),
+ * which is perhaps the most likely. We have to assume the last alterna-
+ * tive.  If this is true,  then we need to reduce  EMAX  by one because
+ * there must be some way of representing zero  in an  implicit-bit sys-
+ * tem. On machines like Cray we are reducing EMAX by one unnecessarily.
+ */
+      (*EMAX)--;
+   }
+
+   if( IEEE != 0 )
+   {
+/*
+ * Assume we are on an IEEE  machine which reserves one exponent for in-
+ * finity and NaN.
+ */
+      (*EMAX)--;
+   }
+/*
+ * Now create RMAX, the largest machine number, which should be equal to
+ * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being
+ * careful that the result is less than 1.0.
+ */
+   recbas = HPL_rone / (double)(BETA);
+   z      = (double)(BETA) - HPL_rone;
+   y      = HPL_rzero;
+
+   for( i = 0; i < P; i++ )
+   { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); }
+
+   if( y >= HPL_rone ) y = oldy;
+/*
+ * Now multiply by BETA**EMAX to get RMAX.
+ */
+   for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero );
+
+   *RMAX = y;
+/*
+ * End of HPL_dlamch
+ */
+} 
+
+#ifdef STDC_HEADERS
+static double HPL_dipow
+(
+   const double               X,
+   const int                  N
+)
+#else
+static double HPL_dipow( X, N )
+/*
+ * .. Scalar Arguments ..
+ */
+   const int                  N;
+   const double               X;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dipow computes the integer n-th power of a real scalar x.
+ *
+ * Arguments
+ * =========
+ *
+ * X       (local input)               const double
+ *         The real scalar x.
+ *
+ * N       (local input)               const int
+ *         The integer power to raise x to.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     r, y=HPL_rone;
+   int                        k, n;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( X == HPL_rzero ) return( HPL_rzero );
+   if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; }
+   for( k = 0; k < n; k++ ) y *= r; 
+
+   return( y );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlange.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlange.c
new file mode 100644
index 000000000..82f118b6b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlange.c
@@ -0,0 +1,184 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_dlange
+(
+   const HPL_T_NORM                 NORM,
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA
+)
+#else
+double HPL_dlange
+( NORM, M, N, A, LDA )
+   const HPL_T_NORM                 NORM;
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlange returns  the value of the one norm,  or the infinity norm,
+ * or the element of largest absolute value of a matrix A:              
+ *  
+ *    max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+ *    norm1(A),        when NORM = HPL_NORM_1,                          
+ *    normI(A),        when NORM = HPL_NORM_I,                          
+ *  
+ * where norm1 denotes the one norm of a matrix (maximum column sum) and
+ * normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+ * that max(abs(A(i,j))) is not a matrix norm.
+ *
+ * Arguments
+ * =========
+ *
+ * NORM    (local input)                 const HPL_T_NORM
+ *         On entry,  NORM  specifies  the  value to be returned by this
+ *         function as described above.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points to an  array of dimension  (LDA,N), that
+ *         contains the matrix A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     s, v0=HPL_rzero, * work = NULL;
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return( HPL_rzero );
+
+   if(      NORM == HPL_NORM_A )
+   {
+/*
+ * max( abs( A ) )
+ */
+      for( j = 0; j < N; j++ )
+      {
+         for( i = 0; i < M; i++ ) { v0 = Mmax( v0, Mabs( *A ) ); A++; }
+         A += LDA - M;
+      }
+   }
+   else if( NORM == HPL_NORM_1 )
+   {
+/*
+ * Find norm_1( A ).
+ */
+      work = (double*)malloc( (size_t)(N) * sizeof( double ) );
+      if( work == NULL )
+      { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); }
+      else
+      {
+         for( j = 0; j < N; j++ )
+         {
+            s = HPL_rzero;
+            for( i = 0; i < M; i++ ) { s += Mabs( *A ); A++; }
+            work[j] = s; A += LDA - M;
+         }
+/*
+ * Find maximum sum of columns for 1-norm
+ */
+         v0 = work[HPL_idamax( N, work, 1 )]; v0 = Mabs( v0 );
+         if( work ) free( work );
+      }
+   }
+   else if( NORM == HPL_NORM_I )
+   {
+/*
+ * Find norm_inf( A )
+ */
+      work = (double*)malloc( (size_t)(M) * sizeof( double ) );
+      if( work == NULL )
+      { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); }
+      else
+      {
+         for( i = 0; i < M; i++ ) { work[i] = HPL_rzero; }
+
+         for( j = 0; j < N; j++ )
+         {
+            for( i = 0; i < M; i++ ) { work[i] += Mabs( *A ); A++; }
+            A += LDA - M;
+         }
+/*       
+ * Find maximum sum of rows for inf-norm
+ */      
+         v0 = work[HPL_idamax( M, work, 1 )]; v0 = Mabs( v0 );
+         if( work ) free( work );
+      }
+   }
+
+   return( v0 );
+/*
+ * End of HPL_dlange
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlaprnt.c
new file mode 100644
index 000000000..f29df3cd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlaprnt.c
@@ -0,0 +1,130 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dlaprnt
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        IA,
+   const int                        JA,
+   const int                        LDA,
+   const char *                     CMATNM
+)
+#else
+void HPL_dlaprnt
+( M, N, A, IA, JA, LDA, CMATNM )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        IA;
+   const int                        JA;
+   const int                        LDA;
+   const char *                     CMATNM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaprnt prints to standard error an M-by-N matrix A.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies the number of rows of A. M must be at
+ *         least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies the number of columns of A. N must be
+ *         at least zero.
+ *
+ * A       (local input)                 double *
+ *         On entry, A  points to an array of dimension (LDA,N).
+ *
+ * IA      (local input)                 const int
+ *         On entry, IA specifies the starting row index to be printed.
+ *
+ * JA      (local input)                 const int
+ *         On entry,  JA  specifies  the  starting  column index  to be
+ *         printed.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * CMATNM  (local input)                 const char *
+ *         On entry, CMATNM is the name of the matrix to be printed.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   for( j = 0; j < N; j++ )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         HPL_fprintf( stderr, "%s(%6d,%6d)=%30.18f\n", CMATNM, IA+i,
+                      JA+j, *(Mptr( A, i, j, LDA )) );
+      }
+   }
+/*
+ * End of HPL_dlaprnt
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlatcpy.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlatcpy.c
new file mode 100644
index 000000000..410451c24
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlatcpy.c
@@ -0,0 +1,398 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factors
+ * #ifndef HPL_LATCPY_M_DEPTH
+ * #define    HPL_LATCPY_M_DEPTH      32
+ * #define    HPL_LATCPY_LOG2_M_DEPTH  5
+ * #endif
+ * #ifndef HPL_LATCPY_N_DEPTH
+ * #define    HPL_LATCPY_N_DEPTH       4
+ * #define    HPL_LATCPY_LOG2_N_DEPTH  2
+ * #endif
+ */
+#ifndef HPL_LATCPY_M_DEPTH
+#define    HPL_LATCPY_M_DEPTH       4
+#define    HPL_LATCPY_LOG2_M_DEPTH  2
+#endif
+#ifndef HPL_LATCPY_N_DEPTH
+#define    HPL_LATCPY_N_DEPTH       2
+#define    HPL_LATCPY_LOG2_N_DEPTH  1
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlatcpy
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dlatcpy
+( M, N, A, LDA, B, LDB )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlatcpy copies the transpose of an array A into an array B.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the number of  rows of the array B and
+ *         the number of columns of A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the number of  rows of the array A and
+ *         the number of columns of B. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,M).
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,N).
+ *
+ * B       (local output)                double *
+ *         On entry, B points to an array of dimension (LDB,N). On exit,
+ *         B is overwritten with the transpose of A.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB specifies the leading dimension of the array B.
+ *         LDB must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_LATCPY_USE_COPY
+   register int               j;
+#else
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+   const double               * A0 = A;
+   double                     * B0 = B;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+   const double               * A0 = A,              * A1 = A + 1;
+   double                     * B0 = B,              * B1 = B +     LDB;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+   const double               * A0 = A,              * A1 = A + 1,
+                              * A2 = A + 2,          * A3 = A + 3;
+   double                     * B0 = B,              * B1 = B +     LDB,
+                              * B2 = B + (LDB << 1), * B3 = B + 3 * LDB;
+#endif
+   const int                  incA = -M * LDA + (1 << HPL_LATCPY_LOG2_N_DEPTH),
+                              incB = ( (unsigned int)(LDB) <<
+                                       HPL_LATCPY_LOG2_N_DEPTH ) - M,
+                              incA0 = -M * LDA + 1, incB0 = LDB - M;
+   int                        mu, nu;
+   register int               i, j;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+#ifdef HPL_LATCPY_USE_COPY
+   for( j = 0; j < N; j++, B0 += LDB ) HPL_dcopy( M, A0+j, LDA, B0, 1 );
+#else
+   mu = (int)( ( (unsigned int)(M) >> HPL_LATCPY_LOG2_M_DEPTH ) <<
+                                      HPL_LATCPY_LOG2_M_DEPTH );
+   nu = (int)( ( (unsigned int)(N) >> HPL_LATCPY_LOG2_N_DEPTH ) <<
+                                      HPL_LATCPY_LOG2_N_DEPTH );
+
+   for( j = 0; j < nu; j += HPL_LATCPY_N_DEPTH )
+   {
+      for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH )
+      {
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 0] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA;
+         B2[ 0] = *A2; A2 += LDA; B3[ 0] = *A3; A3 += LDA;
+#endif
+
+#if ( HPL_LATCPY_M_DEPTH >  1 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 1] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA;
+         B2[ 1] = *A2; A2 += LDA; B3[ 1] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  2 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 2] = *A0; A0 += LDA; B0[ 3] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA;
+         B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA;
+         B2[ 2] = *A2; A2 += LDA; B3[ 2] = *A3; A3 += LDA;
+         B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA;
+         B2[ 3] = *A2; A2 += LDA; B3[ 3] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  4 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 4] = *A0; A0 += LDA; B0[ 5] = *A0; A0 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B0[ 7] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA;
+         B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA;
+         B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA;
+         B2[ 4] = *A2; A2 += LDA; B3[ 4] = *A3; A3 += LDA;
+         B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA;
+         B2[ 5] = *A2; A2 += LDA; B3[ 5] = *A3; A3 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA;
+         B2[ 6] = *A2; A2 += LDA; B3[ 6] = *A3; A3 += LDA;
+         B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA;
+         B2[ 7] = *A2; A2 += LDA; B3[ 7] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  8 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 8] = *A0; A0 += LDA; B0[ 9] = *A0; A0 += LDA;
+         B0[10] = *A0; A0 += LDA; B0[11] = *A0; A0 += LDA;
+         B0[12] = *A0; A0 += LDA; B0[13] = *A0; A0 += LDA;
+         B0[14] = *A0; A0 += LDA; B0[15] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA;
+         B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA;
+         B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA;
+         B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA;
+         B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA;
+         B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA;
+         B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA;
+         B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA;
+         B2[ 8] = *A2; A2 += LDA; B3[ 8] = *A3; A3 += LDA;
+         B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA;
+         B2[ 9] = *A2; A2 += LDA; B3[ 9] = *A3; A3 += LDA;
+         B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA;
+         B2[10] = *A2; A2 += LDA; B3[10] = *A3; A3 += LDA;
+         B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA;
+         B2[11] = *A2; A2 += LDA; B3[11] = *A3; A3 += LDA;
+         B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA;
+         B2[12] = *A2; A2 += LDA; B3[12] = *A3; A3 += LDA;
+         B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA;
+         B2[13] = *A2; A2 += LDA; B3[13] = *A3; A3 += LDA;
+         B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA;
+         B2[14] = *A2; A2 += LDA; B3[14] = *A3; A3 += LDA;
+         B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA;
+         B2[15] = *A2; A2 += LDA; B3[15] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH > 16 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[16] = *A0; A0 += LDA; B0[17] = *A0; A0 += LDA;
+         B0[18] = *A0; A0 += LDA; B0[19] = *A0; A0 += LDA;
+         B0[20] = *A0; A0 += LDA; B0[21] = *A0; A0 += LDA;
+         B0[22] = *A0; A0 += LDA; B0[23] = *A0; A0 += LDA;
+         B0[24] = *A0; A0 += LDA; B0[25] = *A0; A0 += LDA;
+         B0[26] = *A0; A0 += LDA; B0[27] = *A0; A0 += LDA;
+         B0[28] = *A0; A0 += LDA; B0[29] = *A0; A0 += LDA;
+         B0[30] = *A0; A0 += LDA; B0[31] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA;
+         B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA;
+         B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA;
+         B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA;
+         B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA;
+         B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA;
+         B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA;
+         B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA;
+         B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA;
+         B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA;
+         B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA;
+         B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA;
+         B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA;
+         B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA;
+         B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA;
+         B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA;
+         B2[16] = *A2; A2 += LDA; B3[16] = *A3; A3 += LDA;
+         B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA;
+         B2[17] = *A2; A2 += LDA; B3[17] = *A3; A3 += LDA;
+         B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA;
+         B2[18] = *A2; A2 += LDA; B3[18] = *A3; A3 += LDA;
+         B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA;
+         B2[19] = *A2; A2 += LDA; B3[19] = *A3; A3 += LDA;
+         B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA;
+         B2[20] = *A2; A2 += LDA; B3[20] = *A3; A3 += LDA;
+         B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA;
+         B2[21] = *A2; A2 += LDA; B3[21] = *A3; A3 += LDA;
+         B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA;
+         B2[22] = *A2; A2 += LDA; B3[22] = *A3; A3 += LDA;
+         B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA;
+         B2[23] = *A2; A2 += LDA; B3[23] = *A3; A3 += LDA;
+         B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA;
+         B2[24] = *A2; A2 += LDA; B3[24] = *A3; A3 += LDA;
+         B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA;
+         B2[25] = *A2; A2 += LDA; B3[25] = *A3; A3 += LDA;
+         B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA;
+         B2[26] = *A2; A2 += LDA; B3[26] = *A3; A3 += LDA;
+         B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA;
+         B2[27] = *A2; A2 += LDA; B3[27] = *A3; A3 += LDA;
+         B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA;
+         B2[28] = *A2; A2 += LDA; B3[28] = *A3; A3 += LDA;
+         B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA;
+         B2[29] = *A2; A2 += LDA; B3[29] = *A3; A3 += LDA;
+         B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA;
+         B2[30] = *A2; A2 += LDA; B3[30] = *A3; A3 += LDA;
+         B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA;
+         B2[31] = *A2; A2 += LDA; B3[31] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0 += HPL_LATCPY_M_DEPTH;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH;
+         B2 += HPL_LATCPY_M_DEPTH; B3 += HPL_LATCPY_M_DEPTH;
+#endif
+      }
+
+      for( i = mu; i < M; i++ )
+      {
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         *B0 = *A0; B0++; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA;
+         *B2 = *A2; B2++; A2 += LDA; *B3 = *A3; B3++; A3 += LDA;
+#endif
+      }
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+      A0 += incA; B0 += incB;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+      A0 += incA; A1 += incA; B0 += incB; B1 += incB;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+      A0 += incA; A1 += incA; A2 += incA; A3 += incA;
+      B0 += incB; B1 += incB; B2 += incB; B3 += incB;
+#endif
+   }
+
+   for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 )
+   {
+      for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH, B0 += HPL_LATCPY_M_DEPTH )
+      {
+         B0[ 0]=*A0; A0 += LDA;
+#if ( HPL_LATCPY_M_DEPTH >  1 )
+         B0[ 1]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  2 )
+         B0[ 2]=*A0; A0 += LDA; B0[ 3]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  4 )
+         B0[ 4]=*A0; A0 += LDA; B0[ 5]=*A0; A0 += LDA;
+         B0[ 6]=*A0; A0 += LDA; B0[ 7]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  8 )
+         B0[ 8]=*A0; A0 += LDA; B0[ 9]=*A0; A0 += LDA;
+         B0[10]=*A0; A0 += LDA; B0[11]=*A0; A0 += LDA;
+         B0[12]=*A0; A0 += LDA; B0[13]=*A0; A0 += LDA;
+         B0[14]=*A0; A0 += LDA; B0[15]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH > 16 )
+         B0[16]=*A0; A0 += LDA; B0[17]=*A0; A0 += LDA;
+         B0[18]=*A0; A0 += LDA; B0[19]=*A0; A0 += LDA;
+         B0[20]=*A0; A0 += LDA; B0[21]=*A0; A0 += LDA;
+         B0[22]=*A0; A0 += LDA; B0[23]=*A0; A0 += LDA;
+         B0[24]=*A0; A0 += LDA; B0[25]=*A0; A0 += LDA;
+         B0[26]=*A0; A0 += LDA; B0[27]=*A0; A0 += LDA;
+         B0[28]=*A0; A0 += LDA; B0[29]=*A0; A0 += LDA;
+         B0[30]=*A0; A0 += LDA; B0[31]=*A0; A0 += LDA;
+#endif
+      }
+
+      for( i = mu; i < M; i++, B0++, A0 += LDA ) { *B0 = *A0; }
+   }
+#endif
+/*
+ * End of HPL_dlatcpy
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_fprintf.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_fprintf.c
new file mode 100644
index 000000000..adaf22b39
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_fprintf.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_fprintf
+(
+   FILE *                           STREAM,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_fprintf( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_fprintf is a wrapper around fprintf flushing the output stream.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[256];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   char                       * FORM;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   (void) fprintf( STREAM, "%s", cline );
+   (void) fflush( STREAM );
+/*
+ * End of HPL_fprintf
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_warn.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_warn.c
new file mode 100644
index 000000000..bc40818a9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_warn.c
@@ -0,0 +1,134 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_warn
+(
+   FILE *                           STREAM,
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_warn( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_warn displays an error message.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   LINE   = va_arg( argptr, int    );
+   SRNAME = va_arg( argptr, char * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( STREAM, "%s %s:\n>>> %s <<<\n\n", "HPL ERROR in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( STREAM, "%s %d %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR on line", LINE, "of function", SRNAME, cline );
+/*
+ * End of HPL_warn
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/intel64/Make.inc
new file mode 120000
index 000000000..3ee301793
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/intel64/Makefile
new file mode 100644
index 000000000..e92d18b80
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/intel64/Makefile
@@ -0,0 +1,100 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h
+#
+## Object files ########################################################
+#
+HPL_au0obj       = \
+   HPL_dlacpy.o           HPL_dlatcpy.o          HPL_fprintf.o          \
+   HPL_warn.o             HPL_abort.o            HPL_dlaprnt.o          \
+   HPL_dlange.o
+HPL_au1obj       = \
+   HPL_dlamch.o
+HPL_auxobj       = \
+   $(HPL_au0obj) $(HPL_au1obj)
+#
+## Targets #############################################################
+#
+all     : lib
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_auxobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_auxobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dlacpy.o           : ../HPL_dlacpy.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlacpy.c
+HPL_dlatcpy.o          : ../HPL_dlatcpy.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlatcpy.c
+HPL_fprintf.o          : ../HPL_fprintf.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_fprintf.c
+HPL_warn.o             : ../HPL_warn.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_warn.c
+HPL_abort.o            : ../HPL_abort.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_abort.c
+HPL_dlaprnt.o          : ../HPL_dlaprnt.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaprnt.c
+HPL_dlange.o           : ../HPL_dlange.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlange.c
+HPL_dlamch.o           : ../HPL_dlamch.c           $(INCdep)
+	$(CC) -o $@ -c $(CCNOOPT)  ../HPL_dlamch.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_daxpy.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_daxpy.c
new file mode 100644
index 000000000..72be5774b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_daxpy.c
@@ -0,0 +1,175 @@
+/*
+ * -- High Performance Computing Linpack Benchmark (HPL)
+ *    HPL - 2.3 - December 2, 2018
+ *    Antoine P. Petitet
+ *    University of Tennessee, Knoxville
+ *    Innovative Computing Laboratory
+ *    (C) Copyright 2000-2008 All Rights Reserved
+ *
+ * -- Copyright notice and Licensing terms:
+ *
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:
+ *
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.
+ *
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.
+ *
+ * -- Disclaimer:
+ *
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ---------------------------------------------------------------------
+ */
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_daxpy
+
+#ifdef STDC_HEADERS
+void HPL_daxpy
+(
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_daxpy
+( N, ALPHA, X, INCX, Y, INCY )
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_daxpy scales the vector x by alpha and adds it to y.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vectors  x  and  y. N
+ *         must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero, then the entries of the incremented array X
+ *         need not be set on input.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         On exit, the entries of the incremented array  Y  are updated
+ *         with the scaled entries of the incremented array X.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_daxpy( N, ALPHA, X, INCX, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register const double     alpha = ALPHA;
+   register double           x0, x1, x2, x3, y0, y1, y2, y3;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
+                             incX3 = 3 * INCX, incY3 = 3 * INCY,
+                             incX4 = 4 * INCX, incY4 = 4 * INCY;
+
+   if( ( N > 0 ) && ( alpha != HPL_rzero ) )
+   {
+      if( ( nu = ( N >> 2 ) << 2 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     y0 = (*Y);     x1 = X[INCX ]; y1 = Y[INCY ];
+            x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3];
+ 
+            *Y       = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1;
+            Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3;
+ 
+            X  += incX4;
+            Y  += incY4;
+ 
+         } while( X != StX );
+      }
+ 
+      for( i = N - nu; i != 0; i-- )
+      {
+         x0  = (*X);
+         y0  = (*Y);
+ 
+         *Y  = y0 + alpha * x0;
+ 
+         X  += INCX;
+         Y  += INCY;
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy );
+#endif
+/*
+ * End of HPL_daxpy
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dcopy.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dcopy.c
new file mode 100644
index 000000000..a8fe24109
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dcopy.c
@@ -0,0 +1,168 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dcopy
+
+#ifdef STDC_HEADERS
+void HPL_dcopy
+(
+   const int                        N,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_dcopy
+( N, X, INCX, Y, INCY )
+   const int                        N;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dcopy copies the vector x into the vector y.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vectors  x  and  y. N
+ *         must be at least zero.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         On exit, the entries of the incremented array  Y  are updated
+ *         with the entries of the incremented array X.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dcopy( N, X, INCX, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           x0, x1, x2, x3, x4, x5, x6, x7;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
+                             incX3 = 3 * INCX, incY3 = 3 * INCY,
+                             incX4 = 4 * INCX, incY4 = 4 * INCY,
+                             incX5 = 5 * INCX, incY5 = 5 * INCY,
+                             incX6 = 6 * INCX, incY6 = 6 * INCY,
+                             incX7 = 7 * INCX, incY7 = 7 * INCY,
+                             incX8 = 8 * INCX, incY8 = 8 * INCY;
+
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+            x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+            *Y       = x0; Y[incY4] = x4; Y[INCY ] = x1; Y[incY5] = x5;
+            Y[incY2] = x2; Y[incY6] = x6; Y[incY3] = x3; Y[incY7] = x7;
+ 
+            X  += incX8;
+            Y  += incY8;
+ 
+         } while( X != StX );
+      }
+ 
+      for( i = N - nu; i != 0; i-- )
+      {
+         x0  = (*X);
+         *Y  = x0;
+ 
+         X  += INCX;
+         Y  += INCY;
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   F77dcopy( &F77N, X, &F77incx, Y, &F77incy );
+#endif
+/*
+ * End of HPL_dcopy
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dgemm.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dgemm.c
new file mode 100644
index 000000000..b222e4717
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dgemm.c
@@ -0,0 +1,521 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dgemm
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmNN
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iail, iblj, icij, j, jal, jbj, jcj, l;
+
+   for( j = 0, jbj = 0, jcj  = 0; j < N; j++, jbj += LDB, jcj += LDC )
+   {
+      HPL_dscal( M, BETA, C+jcj, 1 );
+      for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 )
+      {
+         t0 = ALPHA * B[iblj];
+         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
+         { C[icij] += A[iail] * t0; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmNT
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iail, ibj, ibjl, icij, j, jal, jcj, l;
+
+   for( j = 0, ibj  = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
+   {
+      HPL_dscal( M, BETA, C+jcj, 1 );
+      for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB )
+      {
+         t0 = ALPHA * B[ibjl];
+         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
+         { C[icij] += A[iail] * t0; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmTN
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iai, iail, iblj, icij, j, jbj, jcj, l;
+
+   for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC )
+   {
+      for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA )
+      {
+         t0 = HPL_rzero;
+         for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 )
+         { t0 += A[iail] * B[iblj]; }
+         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
+         else                    C[icij] *= BETA;
+         C[icij] += ALPHA * t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmTT
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iali, ibj, ibjl, icij, j, jai, jcj, l;
+
+   for( j = 0, ibj = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
+   {
+      for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA )
+      {
+         t0 = HPL_rzero;
+         for( l = 0,      iali  = jai, ibjl  = ibj;
+              l < K; l++, iali += 1,   ibjl += LDB ) t0 += A[iali] * B[ibjl];
+         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
+         else                    C[icij] *= BETA;
+         C[icij] += ALPHA * t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemm0
+(
+   const enum HPL_TRANS       TRANSA,
+   const enum HPL_TRANS       TRANSB,
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
+                        BETA, C, LDC )
+   const enum HPL_TRANS       TRANSA, TRANSB;
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   int                        i, j;
+
+   if( ( M == 0 ) || ( N == 0 ) ||
+       ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) &&
+         ( BETA == HPL_rone ) ) ) return;
+
+   if( ALPHA == HPL_rzero )
+   {
+      for( j = 0; j < N; j++ )
+      {  for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; }
+      return;
+   }
+
+   if( TRANSB == HplNoTrans )
+   {
+      if( TRANSA == HplNoTrans )
+      { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+      else
+      { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+   }
+   else
+   {
+      if( TRANSA == HplNoTrans )
+      { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+      else
+      { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dgemm
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_TRANS             TRANSA,
+   const enum HPL_TRANS             TRANSB,
+   const int                        M,
+   const int                        N,
+   const int                        K,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   const double *                   B,
+   const int                        LDB,
+   const double                     BETA,
+   double *                         C,
+   const int                        LDC
+)
+#else
+void HPL_dgemm
+( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_TRANS             TRANSA;
+   const enum HPL_TRANS             TRANSB;
+   const int                        M;
+   const int                        N;
+   const int                        K;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   const double *                   B;
+   const int                        LDB;
+   const double                     BETA;
+   double *                         C;
+   const int                        LDC;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dgemm performs one of the matrix-matrix operations
+ *  
+ *     C := alpha * op( A ) * op( B ) + beta * C
+ *  
+ *  where op( X ) is one of
+ *  
+ *     op( X ) = X   or   op( X ) = X^T.
+ *  
+ * Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
+ * an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * TRANSA  (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSA  specifies the form of  op(A)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSA==HplNoTrans    : op( A ) = A,                     
+ *            TRANSA==HplTrans      : op( A ) = A^T,                   
+ *            TRANSA==HplConjTrans  : op( A ) = A^T.                   
+ *
+ * TRANSB  (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSB  specifies the form of  op(B)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSB==HplNoTrans    : op( B ) = B,                     
+ *            TRANSB==HplTrans      : op( B ) = B^T,                   
+ *            TRANSB==HplConjTrans  : op( B ) = B^T.                   
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the  number  of rows  of the  matrix
+ *         op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the number  of columns of the matrix
+ *         op(B)  and  the number of columns of the matrix  C. N must be
+ *         at least zero.
+ *
+ * K       (local input)                 const int
+ *         On entry,  K  specifies  the  number of columns of the matrix
+ *         op(A) and the number of rows of the matrix op(B).  K  must be
+ *         be at least  zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied  as  zero  then the elements of the matrices A and B
+ *         need not be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  is an array of dimension (LDA,ka),  where ka is
+ *         k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+ *         entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+ *         the array  A must contain the matrix A, otherwise the leading
+ *         k  by  m  part of the array  A  must  contain the  matrix  A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA  specifies the first dimension of A as declared
+ *         in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+ *         LDA must be at least max(1,m), otherwise LDA must be at least
+ *         max(1,k).
+ *
+ * B       (local input)                 const double *
+ *         On entry, B is an array of dimension (LDB,kb),  where  kb  is
+ *         n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+ *         entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+ *         the array  B must contain the matrix B, otherwise the leading
+ *         n  by  k  part of the array  B  must  contain  the matrix  B.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB  specifies the first dimension of B as declared
+ *         in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+ *         LDB must be at least max(1,k), otherwise LDB must be at least
+ *         max(1,n).
+ *
+ * BETA    (local input)                 const double
+ *         On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+ *         supplied  as  zero  then  the  elements of the matrix C  need
+ *         not be set on input.
+ *
+ * C       (local input/output)          double *
+ *         On entry,  C  is an array of dimension (LDC,n). Before entry,
+ *         the  leading m by n part  of  the  array  C  must contain the
+ *         matrix C,  except when beta is zero, in which case C need not
+ *         be set on entry. On exit, the array  C  is overwritten by the
+ *         m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+ *
+ * LDC     (local input)                 const int
+ *         On entry, LDC  specifies the first dimension of C as declared
+ *         in  the   calling  (sub)  program.   LDC  must  be  at  least
+ *         max(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   printf("Order %d, TransA %d, TransB %d, M %d, N %d, K %d\n", ORDER, TRANSA, TRANSB, M, N, K);
+   cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA,
+                  C, LDC );
+   }
+   else
+   {
+      HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA,
+                  C, LDC );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA, beta = BETA;
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M   = M,   F77N   = N,   F77K = K,
+                             F77lda = LDA, F77ldb = LDB, F77ldc = LDC;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77K                 K
+#define F77lda               LDA
+#define F77ldb               LDB
+#define F77ldc               LDC
+#endif
+   char                      ctransa, ctransb;
+
+   if(      TRANSA == HplNoTrans ) ctransa = 'N';
+   else if( TRANSA == HplTrans   ) ctransa = 'T';
+   else                            ctransa = 'C';
+ 
+   if(      TRANSB == HplNoTrans ) ctransb = 'N';
+   else if( TRANSB == HplTrans   ) ctransb = 'T';
+   else                            ctransb = 'C';
+
+   if( ORDER == HplColumnMajor )
+   {
+#ifdef StringSunStyle
+      F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
+      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructVal
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructPtr
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+   }
+   else
+   {
+#ifdef StringSunStyle
+      F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
+      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructVal
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructPtr
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+   }
+#endif
+/*
+ * End of HPL_dgemm
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dgemv.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dgemv.c
new file mode 100644
index 000000000..6366c5a48
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dgemv.c
@@ -0,0 +1,326 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dgemv
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dgemv0
+(
+   const enum HPL_TRANS       TRANS,
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * X,
+   const int                  INCX,
+   const double               BETA,
+   double                     * Y,
+   const int                  INCY
+)
+#else
+static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
+   const enum HPL_TRANS       TRANS;
+   const int                  INCX, INCY, LDA, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * X;
+   double                     * Y;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   int                        i, iaij, ix, iy, j, jaj, jx, jy;
+   register double            t0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M == 0 ) || ( N == 0 ) ||
+       ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone  ) ) ) return;
+ 
+   if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; }
+ 
+   if( TRANS == HplNoTrans )
+   {
+      HPL_dscal( M, BETA, Y, INCY );
+      for( j = 0, jaj  = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+      {
+         t0 = ALPHA * X[jx];
+         for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY )
+         { Y[iy] += A[iaij] * t0; }
+      }
+   }
+   else
+   {
+      for( j = 0, jaj  = 0, jy  = 0; j < N; j++, jaj += LDA, jy += INCY )
+      {
+         t0 = HPL_rzero;
+         for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX )
+         { t0 += A[iaij] * X[ix]; }
+         if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0;
+         else                    Y[jy] = BETA * Y[jy] + ALPHA * t0;
+      }
+   }
+}
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dgemv
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_TRANS             TRANS,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   const double *                   X,
+   const int                        INCX,
+   const double                     BETA,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_dgemv
+( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_TRANS             TRANS;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   const double *                   X;
+   const int                        INCX;
+   const double                     BETA;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dgemv performs one of the matrix-vector operations
+ *  
+ *     y := alpha * op( A ) * x + beta * y,
+ *  
+ *  where op( X ) is one of
+ *  
+ *     op( X ) = X   or   op( X ) = X^T.
+ *  
+ * where alpha and beta are scalars, x and y are vectors and  A  is an m
+ * by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry,  TRANS  specifies the  operation to be performed as
+ *         follows:   
+ *            TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+ *            TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of  the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero then  A and X  need not be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n.  Before  entry, the leading m by n part  of the
+ *         array  A  must contain the matrix coefficients.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m).
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * BETA    (local input)                 const double
+ *         On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+ *         supplied as zero then  Y  need not be set on input.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         Before entry with BETA non-zero, the incremented array Y must
+ *         contain the vector  y.  On exit,  Y  is  overwritten  by  the
+ *         updated vector y.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+   }
+   else
+   {
+      HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ),
+                  N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA, beta = BETA;
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  ftran;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  ftran;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  ftran;
+#endif
+ 
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M    = M,   F77N    = N,
+                             F77lda  = LDA, F77incx = INCX, F77incy = INCY;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77lda               LDA
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   char                      ctran;
+
+   if( ORDER == HplColumnMajor )
+   {
+      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
+
+#ifdef StringSunStyle
+      F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftran = HPL_C2F_CHAR( ctran );
+      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructVal
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructPtr
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+   }
+   else
+   {
+      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
+#ifdef StringSunStyle
+      F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftran = HPL_C2F_CHAR( ctran );
+      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructVal
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructPtr
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+   }
+
+#endif
+/*
+ * End of HPL_dgemv
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dger.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dger.c
new file mode 100644
index 000000000..5ea702778
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dger.c
@@ -0,0 +1,195 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dger
+
+#ifdef STDC_HEADERS
+void HPL_dger
+(
+   const enum HPL_ORDER             ORDER,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY,
+   double *                         A,
+   const int                        LDA
+)
+#else
+void HPL_dger
+( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+   const enum HPL_ORDER             ORDER;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+   double *                         A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dger performs the rank 1 operation
+ *  
+ *     A := alpha * x * y^T + A,
+ *  
+ * where alpha is a scalar,  x is an m-element vector, y is an n-element
+ * vector and A is an m by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of  the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero then  X and Y  need not be set on input.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input)                 double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n.  Before  entry, the leading m by n part  of the
+ *         array  A  must contain the matrix coefficients. On exit, A is
+ *         overwritten by the updated matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dger( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           t0;
+   int                       i, iaij, ix, iy, j, jaj, jx, jy;
+
+   if( ( M == 0 ) || ( N == 0 ) || ( ALPHA == HPL_rzero ) ) return;
+ 
+   if( ORDER == HplColumnMajor )
+   {
+      for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY )
+      {
+         t0 = ALPHA * Y[jy];
+         for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX )
+         { A[iaij] += X[ix] * t0; }
+      }
+   }
+   else
+   {
+      for( j = 0, jaj = 0, jx = 0; j < M; j++, jaj += LDA, jx += INCX )
+      {
+         t0 = ALPHA * X[jx];
+         for( i = 0, iaij = jaj, iy = 0; i < N; i++, iaij += 1, iy += INCY )
+         { A[iaij] += Y[iy] * t0; }
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M    = M,   F77N    = N,
+                             F77lda  = LDA, F77incx = INCX, F77incy = INCY;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77lda               LDA
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+
+   if( ORDER == HplColumnMajor )
+   {  F77dger( &F77M, &F77N, &alpha, X, &F77incx, Y, &F77incy, A, &F77lda ); }
+   else
+   {  F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); }
+#endif
+/*
+ * End of HPL_dger
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dscal.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dscal.c
new file mode 100644
index 000000000..7e041991f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dscal.c
@@ -0,0 +1,179 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dscal
+
+#ifdef STDC_HEADERS
+void HPL_dscal
+(
+   const int                        N,
+   const double                     ALPHA,
+   double *                         X,
+   const int                        INCX
+)
+#else
+void HPL_dscal
+( N, ALPHA, X, INCX )
+   const int                        N;
+   const double                     ALPHA;
+   double *                         X;
+   const int                        INCX;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dscal scales the vector x by alpha.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero, then the entries of the incremented array X
+ *         need not be set on input.
+ *
+ * X       (local input/output)          double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *         On exit, the entries of the incremented array  X  are  scaled
+ *         by the scalar alpha.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dscal( N, ALPHA, X, INCX );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           x0, x1, x2, x3, x4, x5, x6, x7;
+   register const double     alpha = ALPHA;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incX3 = 3 * INCX,
+                             incX4 = 4 * INCX, incX5 = 5 * INCX,
+                             incX6 = 6 * INCX, incX7 = 7 * INCX,
+                             incX8 = 8 * INCX;
+
+   if( ( N > 0 ) && ( alpha != HPL_rone ) )
+   {
+      if( alpha == HPL_rzero )
+      {
+         if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+         {
+            StX = (double *)X + nu * INCX;
+ 
+            do
+            {
+               (*X)     = HPL_rzero; X[incX4] = HPL_rzero;
+               X[INCX ] = HPL_rzero; X[incX5] = HPL_rzero;
+               X[incX2] = HPL_rzero; X[incX6] = HPL_rzero;
+               X[incX3] = HPL_rzero; X[incX7] = HPL_rzero; X += incX8;
+
+            } while( X != StX );
+         }
+ 
+         for( i = N - nu; i != 0; i-- ) { *X = HPL_rzero; X += INCX; }
+      }
+      else
+      {
+         if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+         {
+            StX = X + nu * INCX;
+ 
+            do
+            {
+               x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+               x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+               x0 *= alpha;   x4 *= alpha;   x1 *= alpha;   x5 *= alpha;
+               x2 *= alpha;   x6 *= alpha;   x3 *= alpha;   x7 *= alpha;
+ 
+               (*X)     = x0; X[incX4] = x4; X[INCX ] = x1; X[incX5] = x5;
+               X[incX2] = x2; X[incX6] = x6; X[incX3] = x3; X[incX7] = x7;
+ 
+               X  += incX8;
+ 
+            } while( X != StX );
+         }
+ 
+         for( i = N - nu; i != 0; i-- )
+         { x0 = (*X); x0 *= alpha; *X = x0; X += INCX; }
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#endif
+
+   F77dscal( &F77N, &alpha, X, &F77incx );
+#endif
+/*
+ * End of HPL_dscal
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dswap.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dswap.c
new file mode 100644
index 000000000..eb1b8e08d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dswap.c
@@ -0,0 +1,157 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dswap
+
+#ifdef STDC_HEADERS
+void HPL_dswap
+(
+   const int                        N,
+   double *                         X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_dswap
+( N, X, INCX, Y, INCY )
+   const int                        N;
+   double *                         X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dswap swaps the vectors x and y.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vectors  x  and  y. N
+ *         must be at least zero.
+ *
+ * X       (local input/output)          double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *         On exit, the entries of the incremented array  X  are updated
+ *         with the entries of the incremented array Y.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         On exit, the entries of the incremented array  Y  are updated
+ *         with the entries of the incremented array X.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dswap( N, X, INCX, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           x0, x1, x2, x3, y0, y1, y2, y3;
+   double                    * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
+                             incX3 = 3 * INCX, incY3 = 3 * INCY,
+                             incX4 = 4 * INCX, incY4 = 4 * INCY;
+
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 2 ) << 2 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);      y0 = (*Y);      x1 = X[INCX ];  y1 = Y[INCY ];
+            x2 = X[incX2];  y2 = Y[incY2];  x3 = X[incX3];  y3 = Y[incY3];
+            *Y        = x0; *X        = y0; Y[INCY ]  = x1; X[INCX ]  = y1;
+            Y[incY2]  = x2; X[incX2]  = y2; Y[incY3]  = x3; X[incX3]  = y3;
+            X += incX4; Y += incY4;
+ 
+         } while( X != StX );
+      }
+ 
+      for( i = N - nu; i != 0; i-- )
+      { x0  = (*X); y0  = (*Y); *Y = x0; *X = y0; X += INCX; Y += INCY; }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   F77dswap( &F77N, X, &F77incx, Y, &F77incy );
+#endif
+/*
+ * End of HPL_dswap
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dtrsm.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dtrsm.c
new file mode 100644
index 000000000..a336a7d29
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dtrsm.c
@@ -0,0 +1,977 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dtrsm
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, jak  = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 )
+      {
+         B[ibkj] /= A[k+jak];
+         for( i = k+1,    iaik  = k+1+jak, ibij  = k+1+jbj;
+              i < M; i++, iaik +=1,        ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, jak  = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 )
+      {
+         for( i = k+1,    iaik  = k+1+jak, ibij  = k+1+jbj;
+              i < M; i++, iaik +=1,        ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = M-1,     jai  = (M-1)*LDA, ibij  = M-1+jbj;
+           i >= 0; i--, jai -= LDA,       ibij -= 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = i+1,    iaki  = i+1+jai, ibkj  = i+1+jbj;
+              k < M; k++, iaki += 1,       ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         t0 /= A[i+jai];
+         B[ibij] = t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = M-1,     jai  = (M-1)*LDA, ibij  = M-1+jbj;
+           i >= 0; i--, jai -= LDA,       ibij -= 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = i+1,    iaki  = i+1+jai, ibkj  = i+1+jbj;
+              k < M; k++, iaki += 1,       ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         B[ibij] = t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = M-1,     jak  = (M-1)*LDA, ibkj  = M-1+jbj;
+           k >= 0; k--, jak -= LDA,       ibkj -= 1 )
+      {
+         B[ibkj] /= A[k+jak];
+         for( i = 0,      iaik  = jak, ibij  = jbj;
+              i < k; i++, iaik += 1,   ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = M-1,     jak  = (M-1)*LDA, ibkj  = M-1+jbj;
+           k >= 0; k--, jak -= LDA,       ibkj -= 1 )
+      {
+         for( i = 0,      iaik  = jak, ibij  = jbj;
+              i < k; i++, iaik += 1,   ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+   register double            t0;
+
+   for( j = 0, jbj  = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, jai  = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         t0 /= A[i+jai];
+         B[ibij] = t0;
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj  = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, jai  = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         B[ibij] = t0;
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = N-1,      jaj  = (N-1)*LDA, jbj  = (N-1)*LDB;
+        j >= 0;  j--, jaj -= LDA,       jbj -= LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = j+1,    iakj  = j+1+jaj, jbk  = (j+1)*LDB;
+           k < N; k++, iakj += 1,       jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = N-1,      jaj  = (N-1)*LDA, jbj  = (N-1)*LDB;
+        j >= 0;  j--, jaj -= LDA,       jbj -= LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = j+1,    iakj  = j+1+jaj, jbk  = (j+1)*LDB;
+           k < N; k++, iakj += 1,       jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB )
+   {
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; }
+      for( j = k+1,    iajk  = (k+1)+jak, jbj  = (k+1)*LDB;
+           j < N; j++, iajk += 1,         jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB )
+   {
+      for( j = k+1,    iajk  = (k+1)+jak, jbj  = (k+1)*LDB;
+           j < N; j++, iajk += 1,         jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = N-1,     jak  = (N-1)*LDA, jbk  = (N-1)*LDB;
+        k >= 0; k--, jak -= LDA,       jbk -= LDB )
+   {
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; }
+      for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = N-1,     jak  = (N-1)*LDA, jbk  = (N-1)*LDB;
+        k >= 0; k--, jak -= LDA,       jbk -= LDB )
+   {
+      for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsm0
+(
+   const enum HPL_SIDE        SIDE,
+   const enum HPL_UPLO        UPLO,
+   const enum HPL_TRANS       TRANS,
+   const enum HPL_DIAG        DIAG,
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB )
+   const enum HPL_SIDE        SIDE;
+   const enum HPL_UPLO        UPLO;
+   const enum HPL_TRANS       TRANS;
+   const enum HPL_DIAG        DIAG;
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{ 
+   int                        i, j;
+
+   if( ( M == 0 ) || ( N == 0 ) ) return;
+ 
+   if( ALPHA == HPL_rzero )
+   {
+      for( j = 0; j < N; j++ )
+      {  for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; }
+      return;
+   }
+
+   if( SIDE == HplLeft )
+   {
+      if( UPLO == HplUpper )
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+      else
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+   }
+   else
+   {
+      if( UPLO == HplUpper )
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+      else
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dtrsm
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_SIDE              SIDE,
+   const enum HPL_UPLO              UPLO,
+   const enum HPL_TRANS             TRANS,
+   const enum HPL_DIAG              DIAG,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dtrsm
+( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_SIDE              SIDE;
+   const enum HPL_UPLO              UPLO;
+   const enum HPL_TRANS             TRANS;
+   const enum HPL_DIAG              DIAG;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dtrsm solves one of the matrix equations
+ *  
+ *    op( A ) * X = alpha * B,   or  X * op( A ) = alpha * B,
+ *  
+ * where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+ * non-unit, upper or lower triangular matrix and op(A) is one of
+ *  
+ *    op( A ) = A   or   op( A ) = A^T.
+ *  
+ * The matrix X is overwritten on B.
+ *  
+ * No test for  singularity  or  near-singularity  is included  in  this
+ * routine. Such tests must be performed before calling this routine.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * SIDE    (local input)                 const enum HPL_SIDE
+ *         On entry, SIDE  specifies  whether  op(A) appears on the left
+ *         or right of X as follows:
+ *            SIDE==HplLeft    op( A ) * X = alpha * B,
+ *            SIDE==HplRight   X * op( A ) = alpha * B.
+ *
+ * UPLO    (local input)                 const enum HPL_UPLO
+ *         On  entry,   UPLO   specifies  whether  the  upper  or  lower
+ *         triangular  part  of the array  A  is to be referenced.  When
+ *         UPLO==HplUpper, only  the upper triangular part of A is to be
+ *         referenced, otherwise only the lower triangular part of A is 
+ *         to be referenced. 
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSA  specifies the form of  op(A)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSA==HplNoTrans    : op( A ) = A,                     
+ *            TRANSA==HplTrans      : op( A ) = A^T,                   
+ *            TRANSA==HplConjTrans  : op( A ) = A^T.                   
+ *
+ * DIAG    (local input)                 const enum HPL_DIAG
+ *         On entry,  DIAG  specifies  whether  A  is unit triangular or
+ *         not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+ *         and otherwise, A is not assumed to be unit triangular.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of the  matrix B.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix B.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied  as  zero then the elements of the matrix B need not
+ *         be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+ *         otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+ *         k by k upper triangular  part of the array A must contain the
+ *         upper triangular  matrix and the  strictly  lower  triangular
+ *         part of A is not referenced.  When  UPLO==HplLower on  entry,
+ *         the  leading k by k lower triangular part of the array A must
+ *         contain the lower triangular matrix  and  the  strictly upper
+ *         triangular part of A is not referenced.
+ *          
+ *         Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+ *         not referenced  either,  but are assumed to be unity.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+ *
+ * B       (local input/output)          double *
+ *         On entry,  B  points  to an array of size equal to or greater
+ *         than LDB * n.  Before entry, the leading  m by n  part of the
+ *         array B must contain the matrix  B, except when beta is zero,
+ *         in which case B need not be set on entry.  On exit, the array
+ *         B is overwritten by the m by n solution matrix.
+ *
+ * LDB     (local input)                 const int
+ *         On entry,  LDB  specifies  the  leading  dimension  of  B  as
+ *         declared  in  the  calling  (sub) program.  LDB  must  be  at
+ *         least MAX(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB );
+   }
+   else
+   {
+      HPL_dtrsm0( ( SIDE == HplRight ? HplLeft  : HplRight ),
+                  ( UPLO == HplLower ? HplUpper : HplLower ),
+                  TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef StringSunStyle
+#if defined( HPL_USE_F77_INTEGER_DEF )
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M   = M,   F77N   = N,
+                             F77lda = LDA, F77ldb = LDB;
+#else
+#define  F77M                M
+#define  F77N                N
+#define  F77lda              LDA
+#define  F77ldb              LDB
+#endif
+   char                      cside, cuplo, ctran, cdiag;
+
+   if(      TRANS == HplNoTrans ) ctran = 'N';
+   else if( TRANS == HplTrans   ) ctran = 'T';
+   else                           ctran = 'C';
+   cdiag = ( DIAG == HplUnit  ? 'U' : 'N' );
+
+   if( ORDER == HplColumnMajor )
+   {
+      cside = ( SIDE == HplRight ? 'R' : 'L' );
+      cuplo = ( UPLO == HplLower ? 'L' : 'U' );
+#ifdef StringSunStyle
+      F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo );
+      ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructVal
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructPtr
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+   }
+   else
+   {
+      cside = ( SIDE == HplRight ? 'L' : 'R' );
+      cuplo = ( UPLO == HplLower ? 'U' : 'L' );
+#ifdef StringSunStyle
+      F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo );
+      ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructVal
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructPtr
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+   }
+#endif
+/*
+ * End of HPL_dtrsm
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dtrsv.c
new file mode 100644
index 000000000..99e84f073
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dtrsv.c
@@ -0,0 +1,520 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dtrsv
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLNN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLNN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx  = 0; j < N; j++, jaj += ldap1, jx += INCX )
+   {
+      X[jx] /= A[jaj]; t0 = X[jx];
+      for( i = j+1,    iaij  = jaj+1, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLNU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLNU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = jaj+1, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLTN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLTN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*(ldap1), jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= ldap1,         jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = 1+jaj, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { t0 -= A[iaij] * X[ix]; }
+      t0 /= A[jaj]; X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLTU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLTU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*(ldap1), jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= ldap1,         jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = 1+jaj, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { t0 -= A[iaij] * X[ix]; }
+      X[jx] = t0;
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUNN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUNN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*LDA, jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= LDA,       jx -= INCX )
+   {
+      X[jx] /= A[j+jaj]; t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUNU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUNU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*LDA, jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= LDA,       jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUTN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUTN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = 0, jaj = 0,jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { t0 -= A[iaij] * X[ix]; }
+      t0 /= A[iaij]; X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUTU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUTU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { t0 -= A[iaij] * X[ix]; }
+      X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsv0
+(
+   const enum HPL_UPLO        UPLO,
+   const enum HPL_TRANS       TRANS,
+   const enum HPL_DIAG        DIAG,
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+) 
+#else
+static void HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+   const enum HPL_UPLO        UPLO;
+   const enum HPL_TRANS       TRANS;
+   const enum HPL_DIAG        DIAG;
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   if( N == 0 ) return;
+ 
+   if( UPLO == HplUpper )
+   {
+      if( TRANS == HplNoTrans )
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvUNN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvUNU( N,    A, LDA, X, INCX ); }
+      }
+      else
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvUTN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvUTU( N,    A, LDA, X, INCX ); }
+      }
+   }
+   else
+   {
+      if( TRANS == HplNoTrans )
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvLNN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvLNU( N,    A, LDA, X, INCX ); }
+      }
+      else
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvLTN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvLTU( N,    A, LDA, X, INCX ); }
+      }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dtrsv
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_UPLO              UPLO,
+   const enum HPL_TRANS             TRANS,
+   const enum HPL_DIAG              DIAG,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         X,
+   const int                        INCX
+)
+#else
+void HPL_dtrsv
+( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_UPLO              UPLO;
+   const enum HPL_TRANS             TRANS;
+   const enum HPL_DIAG              DIAG;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         X;
+   const int                        INCX;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dtrsv solves one of the systems of equations
+ *  
+ *     A * x = b,   or   A^T * x = b,
+ *  
+ * where b and x are n-element vectors and  A  is an n by n non-unit, or
+ * unit, upper or lower triangular matrix.
+ *  
+ * No test for  singularity  or  near-singularity  is included  in  this
+ * routine. Such tests must be performed before calling this routine.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * UPLO    (local input)                 const enum HPL_UPLO
+ *         On  entry,   UPLO   specifies  whether  the  upper  or  lower
+ *         triangular  part  of the array  A  is to be referenced.  When
+ *         UPLO==HplUpper, only  the upper triangular part of A is to be
+ *         referenced, otherwise only the lower triangular part of A is 
+ *         to be referenced. 
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry,  TRANS  specifies  the equations  to  be  solved as
+ *         follows:
+ *            TRANS==HplNoTrans     A   * x = b,
+ *            TRANS==HplTrans       A^T * x = b.
+ *
+ * DIAG    (local input)                 const enum HPL_DIAG
+ *         On entry,  DIAG  specifies  whether  A  is unit triangular or
+ *         not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+ *         and otherwise, A is not assumed to be unit triangular.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the order of the matrix A. N must be at
+ *         least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+ *         n by n upper triangular  part of the array A must contain the
+ *         upper triangular  matrix and the  strictly  lower  triangular
+ *         part of A is not referenced.  When  UPLO==HplLower  on entry,
+ *         the  leading n by n lower triangular part of the array A must
+ *         contain the lower triangular matrix  and  the  strictly upper
+ *         triangular part of A is not referenced.
+ *          
+ *         Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+ *         not referenced  either,  but are assumed to be unity.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,n).
+ *
+ * X       (local input/output)          double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *         Before entry,  the  incremented array  X  must contain  the n
+ *         element right-hand side vector b. On exit,  X  is overwritten
+ *         with the solution vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dtrsv( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX );
+   }
+   else
+   {
+      HPL_dtrsv0( ( UPLO  == HplUpper   ? HplLower : HplUpper   ),
+                  ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ),
+                  DIAG, N, A, LDA, X, INCX );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+ 
+#ifdef HPL_USE_F77_INTEGER_DEF 
+   const F77_INTEGER         F77N = N, F77lda = LDA, F77incx = INCX;
+#else
+#define F77N              N
+#define F77lda            LDA
+#define F77incx           INCX
+#endif
+   char                      cuplo, ctran, cdiag;
+
+   if( ORDER == HplColumnMajor )
+   {
+      cuplo = ( UPLO  == HplUpper   ? 'U' : 'L' );
+      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
+   }
+   else
+   {
+      cuplo = ( UPLO  == HplUpper   ? 'L' : 'U' );
+      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
+   }
+   cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' );
+
+#ifdef StringSunStyle
+   F77dtrsv( &cuplo, &ctran, &cdiag, &F77N, A, &F77lda, X, &F77incx,
+             IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+   ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+   fuplo = HPL_C2F_CHAR( cuplo );
+   F77dtrsv( fuplo,  ftran,  fdiag,  &F77N, A, &F77lda, X, &F77incx );
+#endif
+#ifdef StringStructVal
+   fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran;
+   fdiag.len = 1; fdiag.cp = &cdiag;
+   F77dtrsv( fuplo,  ftran,  fdiag,  &F77N, A, &F77lda, X, &F77incx );
+#endif
+#ifdef StringStructPtr
+   fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran;
+   fdiag.len = 1; fdiag.cp = &cdiag;
+   F77dtrsv( &fuplo, &ftran, &fdiag, &F77N, A, &F77lda, X, &F77incx );
+#endif
+
+#endif
+/*
+ * End of HPL_dtrsv
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_idamax.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_idamax.c
new file mode 100644
index 000000000..5ceabdf25
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_idamax.c
@@ -0,0 +1,167 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_idamax
+
+#ifdef STDC_HEADERS
+int HPL_idamax
+(
+   const int                        N,
+   const double *                   X,
+   const int                        INCX
+)
+#else
+int HPL_idamax
+( N, X, INCX )
+   const int                        N;
+   const double *                   X;
+   const int                        INCX;
+#endif 
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_idamax returns  the index in an n-vector  x  of the first element
+ * having maximum absolute value.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   return( (int)(cblas_idamax( N, X, INCX )) );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           absxi, smax = HPL_rzero, x0, x1, x2, x3,
+                             x4, x5, x6, x7;
+   const double              * StX;
+   register int              imax = 0, i = 0, j;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incX3 = 3 * INCX,
+                             incX4 = 4 * INCX, incX5 = 5 * INCX,
+                             incX6 = 6 * INCX, incX7 = 7 * INCX,
+                             incX8 = 8 * INCX;
+
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+            x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+            absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x1 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x2 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x3 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x4 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x5 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x6 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x7 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+ 
+            X    += incX8;
+ 
+         } while( X != StX );
+      }
+ 
+      for( j = N - nu; j != 0; j-- )
+      {
+         x0    = (*X);
+         absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; }
+         i    += 1;
+         X    += INCX;
+      }
+   }
+   return( imax );
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#endif
+   int                       imax = 0;
+
+   if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1;
+   return( imax );
+#endif
+/*
+ * End of HPL_idamax
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/intel64/Make.inc
new file mode 120000
index 000000000..3ee301793
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/intel64/Makefile
new file mode 100644
index 000000000..ed9f3d0e2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/intel64/Makefile
@@ -0,0 +1,98 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h
+#
+## Object files ########################################################
+#
+HPL_blaobj       = \
+   HPL_dcopy.o            HPL_daxpy.o            HPL_dscal.o            \
+   HPL_idamax.o           HPL_dgemv.o            HPL_dtrsv.o            \
+   HPL_dger.o             HPL_dgemm.o            HPL_dtrsm.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_blaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_blaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dcopy.o            : ../HPL_dcopy.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dcopy.c
+HPL_daxpy.o            : ../HPL_daxpy.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_daxpy.c
+HPL_dscal.o            : ../HPL_dscal.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dscal.c
+HPL_idamax.o           : ../HPL_idamax.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_idamax.c
+HPL_dgemv.o            : ../HPL_dgemv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgemv.c
+HPL_dtrsv.o            : ../HPL_dtrsv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtrsv.c
+HPL_dger.o             : ../HPL_dger.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dger.c
+HPL_dgemm.o            : ../HPL_dgemm.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgemm.c
+HPL_dtrsm.o            : ../HPL_dtrsm.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtrsm.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_1rinM.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_1rinM.c
new file mode 100644
index 000000000..dd03b79b1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_1rinM.c
@@ -0,0 +1,224 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+ 
+#ifdef STDC_HEADERS
+int HPL_binit_1rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_1rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_1rinM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_1rinM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, prev,
+                              rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process,  then  send message to its two
+ * next neighbors. Otherwise, probe for message. If the message is here,
+ * then receive it,   and  if I am not the last process of the ring,  or
+ * just after the root process, then forward it to the next.  Otherwise,
+ * inform the caller that the panel has still not been received.
+ */
+   rank = PANEL->grid->mycol; comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;        msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( next,
+                          size ), msgid, comm );
+      }
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+      if( ( size > 2 ) && 
+          ( MModSub1( prev, size ) == root ) ) partner = root;
+      else                                     partner = prev;
+
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) &&
+                ( prev != root ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+} 
+
+#ifdef STDC_HEADERS
+int HPL_bwait_1rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_1rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_1ring.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_1ring.c
new file mode 100644
index 000000000..dd5eb2d12
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_1ring.c
@@ -0,0 +1,216 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_1ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_1ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+
+#else
+
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_1ring
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_1ring( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, prev, rank, root,
+                              size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process, start spreading the panel.  If
+ * I am not the root process, probe for message. If the message is here,
+ * then receive it, and  if I am not the last process of the ring, then
+ * forward it to the next.  Otherwise, inform the caller that the panel
+ * has still not been received.
+ */
+   rank = PANEL->grid->mycol; comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;        msgid = PANEL->msgid;
+
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( rank,
+                       size ), msgid, comm );
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+
+      ierr = MPI_Iprobe( prev, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, prev, msgid,
+                             comm, &PANEL->status[0] );
+            next = MModAdd1( rank, size );
+            if( ( ierr == MPI_SUCCESS ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next,
+                                msgid, comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */  
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_1ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_1ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers 
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_2rinM.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_2rinM.c
new file mode 100644
index 000000000..56581ea0d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_2rinM.c
@@ -0,0 +1,236 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_2rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_2rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_2rinM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_2rinM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, prev,
+                              rank, roo2, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase: root process send to its two right neighbors and mid-pro-
+ * cess. If I am not the root process, probe for message. If the message
+ * is there, then receive it. If I am not the last process of both rings
+ * then forward it to the next.  Otherwise,  inform  the caller that the
+ * panel has still not been received.
+ */
+   rank = PANEL->grid->mycol;           comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;                  msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );       roo2  = ( ( size + 1 ) >> 1 );
+   roo2 = MModAdd(  root, roo2, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         if( MModAdd1( next, size ) != roo2 )
+         {
+            ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE,
+                             MModAdd1( next, size ), msgid, comm );
+         }
+
+         if( ierr == MPI_SUCCESS )
+         {
+            ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid,
+                             comm );
+         }
+      }
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+      if( ( prev == root ) || ( rank == roo2 ) ||
+          ( MModSub1( prev,  size )  == root ) ) partner = root;
+      else                                       partner = prev;
+ 
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) && ( prev != root ) &&
+                ( next != roo2        ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+} 
+
+#ifdef STDC_HEADERS
+int HPL_bwait_2rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_2rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_2ring.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_2ring.c
new file mode 100644
index 000000000..f0e6e2647
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_2ring.c
@@ -0,0 +1,224 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_2ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_2ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+ 
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_2ring
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_2ring( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, rank,
+                              roo2, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase: root process  send to its right neighbor and mid-process.
+ * If I am not the root process,  probe for message.   If the message is
+ * there,  then receive it,  and  if I am not the last process  of  both
+ * rings, then forward it to the next. Otherwise, inform the caller that
+ * the panel has still not been received.
+ */
+   rank = PANEL->grid->mycol;           comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;                  msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );       roo2  = ( ( size + 1 ) >> 1 );
+   roo2 = MModAdd(  root, roo2, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid,
+                          comm );
+      }
+   }
+   else
+   {
+      partner = MModSub1( rank, size );
+      if( ( partner == root ) || ( rank == roo2 ) ) partner = root;
+ 
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) &&
+                ( next != roo2 ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_2ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_2ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_bcast.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_bcast.c
new file mode 100644
index 000000000..100161152
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_bcast.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_bcast
+(
+   HPL_T_panel *                    PANEL,
+   int *                            IFLAG
+)
+#else
+int HPL_bcast
+( PANEL, IFLAG )
+   HPL_T_panel *                    PANEL;
+   int *                            IFLAG;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_bcast broadcasts  the  current  panel.  Successful  completion is
+ * indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to
+ * HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was
+ * not completed, in which case this function should be called again.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * IFLAG   (output)                      int *
+ *         On exit,  IFLAG  indicates  whether  or not the broadcast has
+ *         occured.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_bcast_1rinM( PANEL, IFLAG ); break;
+      case HPL_1RING   : ierr = HPL_bcast_1ring( PANEL, IFLAG ); break;
+      case HPL_2RING_M : ierr = HPL_bcast_2rinM( PANEL, IFLAG ); break;
+      case HPL_2RING   : ierr = HPL_bcast_2ring( PANEL, IFLAG ); break;
+      case HPL_BLONG_M : ierr = HPL_bcast_blonM( PANEL, IFLAG ); break;
+      case HPL_BLONG   : ierr = HPL_bcast_blong( PANEL, IFLAG ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_bcast
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_binit.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_binit.c
new file mode 100644
index 000000000..3daf72b7d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_binit.c
@@ -0,0 +1,108 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_binit
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_binit
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_binit initializes  a  row  broadcast.  Successful  completion  is
+ * indicated by the returned error code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS );
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_binit_1rinM( PANEL ); break;
+      case HPL_1RING   : ierr = HPL_binit_1ring( PANEL ); break;
+      case HPL_2RING_M : ierr = HPL_binit_2rinM( PANEL ); break;
+      case HPL_2RING   : ierr = HPL_binit_2ring( PANEL ); break;
+      case HPL_BLONG_M : ierr = HPL_binit_blonM( PANEL ); break;
+      case HPL_BLONG   : ierr = HPL_binit_blong( PANEL ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_binit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_blonM.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_blonM.c
new file mode 100644
index 000000000..5fa221937
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_blonM.c
@@ -0,0 +1,445 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+int HPL_binit_blonM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_blonM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+   return( HPL_SUCCESS );
+}
+ 
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF_S1        PANEL->buffers[I_SEND]
+#define   _M_COUNT_S1       PANEL->counts[I_SEND]
+#define   _M_TYPE_S1        PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_S2        PANEL->buffers[I_SEND]
+#define   _M_COUNT_S2       PANEL->counts[I_SEND]
+#define   _M_TYPE_S2        PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_R1        PANEL->buffers[I_RECV]
+#define   _M_COUNT_R1       PANEL->counts[I_RECV]
+#define   _M_TYPE_R1        PANEL->dtypes[I_RECV]
+
+#define   _M_BUFF_R2        PANEL->buffers[I_RECV]
+#define   _M_COUNT_R2       PANEL->counts[I_RECV]
+#define   _M_TYPE_R2        PANEL->dtypes[I_RECV]
+ 
+#define   _M_ROLL_BUFF_S    PANEL->buffers[I_SEND]
+#define   _M_ROLL_COUNT_S   PANEL->counts[I_SEND]
+#define   _M_ROLL_TYPE_S    PANEL->dtypes[I_SEND]
+
+#define   _M_ROLL_BUFF_R    PANEL->buffers[I_RECV]
+#define   _M_ROLL_COUNT_R   PANEL->counts[I_RECV]
+#define   _M_ROLL_TYPE_R    PANEL->dtypes[I_RECV]
+
+#else
+
+#define   _M_BUFF_S1        (void *)(PANEL->L2)
+#define   _M_COUNT_S1       PANEL->len
+#define   _M_TYPE_S1        MPI_DOUBLE
+
+#define   _M_BUFF_S2        (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_S2       lbuf
+#define   _M_TYPE_S2        MPI_DOUBLE
+ 
+#define   _M_BUFF_R1        (void *)(PANEL->L2)
+#define   _M_COUNT_R1       PANEL->len
+#define   _M_TYPE_R1        MPI_DOUBLE
+ 
+#define   _M_BUFF_R2        (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_R2       lbuf
+#define   _M_TYPE_R2        MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_S    (void *)(PANEL->L2 + ibufS)
+#define   _M_ROLL_COUNT_S   lbufS
+#define   _M_ROLL_TYPE_S    MPI_DOUBLE
+#define   _M_ROLL_BUFF_R    (void *)(PANEL->L2 + ibufR)
+#define   _M_ROLL_COUNT_R   lbufR
+#define   _M_ROLL_TYPE_R    MPI_DOUBLE
+
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_blonM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_blonM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        COUNT, count, go=1, ierr=MPI_SUCCESS, ibuf,
+                              ibufR, ibufS, dummy=0, indx, ip2=1, k, l,
+                              lbuf, lbufR, lbufS, mask=1, msgid, mydist,
+                              mydist2, next, npm1, npm2, partner, prev,
+                              rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  root process  sends to its right neighbor,  then spread
+ * the panel on the other npcol - 2 processes.  If  I  am  not the root 
+ * process, probe for message received.  If the message is there,  then
+ * receive it. If I am just after the root process, return.  Otherwise,
+ * keep spreading on those npcol - 2 processes.  Otherwise,  inform the
+ * caller that the panel has still not been received.
+ */
+   comm = PANEL->grid->row_comm; rank  = PANEL->grid->mycol;
+   root = PANEL->pcol;           msgid = PANEL->msgid;
+   prev = MModSub1( rank, size );
+ 
+   if( rank == root )
+   {
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ierr == MPI_SUCCESS )
+         ierr =   HPL_packL( PANEL, 0, PANEL->len, I_SEND );
+#endif
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Ssend( _M_BUFF_S1, _M_COUNT_S1, _M_TYPE_S1,
+                             MModAdd1( rank, size ), msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+   else if( prev == root )
+   {
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ *
+ *    ierr = MPI_Iprobe( root, msgid, comm, &go, &PANEL->status[0] );
+ */
+      if( ierr == MPI_SUCCESS )
+      {                                  /* if panel is here, proceed */
+         if( go != 0 )
+         {
+#ifdef HPL_USE_MPI_DATATYPE
+            ierr =      HPL_packL( PANEL, 0, PANEL->len, I_RECV );
+#endif
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Recv( _M_BUFF_R1, _M_COUNT_R1, _M_TYPE_R1,
+                                  root, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+      }
+   }
+/*
+ * if I am just after the root, exit now. The message receive  completed
+ * successfully, this guy is done. If there are only 2 processes in each 
+ * row of processes, we are done as well.
+ */
+   if( ( prev == root ) || ( size == 2 ) )
+   {
+      *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+      return( *IFLAG );
+   }
+/*
+ * Otherwise, proceed with broadcast -  Spread  the panel across process
+ * columns
+ */
+   npm2 = ( npm1 = size - 1 ) - 1; COUNT = PANEL->len;
+
+   k = npm2; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   if( rank == root ) mydist2 = ( mydist = 0 );
+   else   mydist2 = ( mydist  = MModSub( rank, root, size ) - 1 );
+
+   indx = ip2; count = COUNT / npm1; count = Mmax( count, 1 );
+ 
+   do
+   {
+      mask ^= ip2;
+
+      if( ( mydist & mask ) == 0 )
+      {
+         lbuf = COUNT - ( ibuf = indx * count );
+         if( indx + ip2 < npm1 ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); }
+
+         partner = mydist ^ ip2;
+
+         if( ( mydist & ip2 ) != 0 )
+         {
+            partner = MModAdd( root, partner, size );
+            if( partner != root ) partner = MModAdd1( partner, size );  
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ */
+#if 0
+            ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+  
+            if( ierr == MPI_SUCCESS )
+            {        /* if panel is not here, return and keep testing */
+               if( go == 0 )
+               { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+            }
+#endif
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_RECV );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( _M_BUFF_R2, _M_COUNT_R2, _M_TYPE_R2,
+                                     partner, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                   msgid, comm, &PANEL->status[0] );
+            }
+         }
+         else if( partner < npm1 )
+         {
+            partner = MModAdd( root, partner, size );
+            if( partner != root ) partner = MModAdd1( partner, size );  
+
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_SEND );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( _M_BUFF_S2, _M_COUNT_S2, _M_TYPE_S2,
+                                      partner, msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE,
+                                      partner, msgid, comm );
+            }
+         }
+      }
+ 
+      if( mydist2 < ip2 ) {  ip2 >>= 1; indx -= ip2; }
+      else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; }
+
+   } while( ip2 > 0 );
+/*
+ * Roll the pieces
+ */
+   prev = MModSub1( rank, size );
+   if( MModSub1( prev, size ) == root ) prev = root;
+   next = MModAdd1( rank, size );
+   if( rank == root ) next = MModAdd1( next, size );
+
+   for( k = 0; k < npm2; k++ )
+   {
+      l = ( k >> 1 );
+/*
+ * Who is sending to who and how much
+ */
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         ibufS = ( indx = MModAdd( mydist, l,   npm1 ) ) * count;
+         lbufS = ( indx == npm2 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModSub( mydist, l+1, npm1 ) ) * count;
+         lbufR = ( indx == npm2 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = prev;
+      }
+      else
+      {
+         ibufS = ( indx = MModSub( mydist, l,   npm1 ) ) * count;
+         lbufS = ( indx == npm2 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModAdd( mydist, l+1, npm1 ) ) * count;
+         lbufR = ( indx == npm2 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = next;
+      }
+/*
+ * Exchange the messages
+ */
+      if( lbufS > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufS, lbufS, I_SEND );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S,
+                                 _M_ROLL_TYPE_S, partner, msgid, comm,
+                                 &PANEL->request[0] );
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                 msgid, comm, &PANEL->request[0] );
+      }
+ 
+      if(  lbufR > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufR, lbufR, I_RECV );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R,
+                               _M_ROLL_TYPE_R, partner, msgid, comm,
+                               &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                               msgid, comm, &PANEL->status[0] );
+      }
+ 
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Wait ( &PANEL->request[0], &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_blonM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_blonM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+
+   return( HPL_SUCCESS );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_blong.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_blong.c
new file mode 100644
index 000000000..e57f11bcc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_blong.c
@@ -0,0 +1,363 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+int HPL_binit_blong
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_blong( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+   return( HPL_SUCCESS );
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF_S         PANEL->buffers[I_SEND]
+#define   _M_COUNT_S        PANEL->counts[I_SEND]
+#define   _M_TYPE_S         PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_R         PANEL->buffers[I_RECV]
+#define   _M_COUNT_R        PANEL->counts[I_RECV]
+#define   _M_TYPE_R         PANEL->dtypes[I_RECV]
+ 
+#define   _M_ROLL_BUFF_S    PANEL->buffers[I_SEND]
+#define   _M_ROLL_COUNT_S   PANEL->counts[I_SEND]
+#define   _M_ROLL_TYPE_S    PANEL->dtypes[I_SEND]
+ 
+#define   _M_ROLL_BUFF_R    PANEL->buffers[I_RECV]
+#define   _M_ROLL_COUNT_R   PANEL->counts[I_RECV]
+#define   _M_ROLL_TYPE_R    PANEL->dtypes[I_RECV]
+ 
+#else
+ 
+#define   _M_BUFF_S         (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_S        lbuf
+#define   _M_TYPE_S         MPI_DOUBLE
+ 
+#define   _M_BUFF_R         (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_R        lbuf
+#define   _M_TYPE_R         MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_S    (void *)(PANEL->L2 + ibufS)
+#define   _M_ROLL_COUNT_S   lbufS
+#define   _M_ROLL_TYPE_S    MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_R    (void *)(PANEL->L2 + ibufR)
+#define   _M_ROLL_COUNT_R   lbufR
+#define   _M_ROLL_TYPE_R    MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_blong
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_blong( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        COUNT, count, dummy=0, ierr=MPI_SUCCESS,
+                              ibuf, ibufR, ibufS, indx, ip2, k, l, lbuf,
+                              lbufR, lbufS, mask, msgid, mydist, mydist2,
+                              next, npm1, partner, prev, rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process, start spreading the panel.  If
+ * I am not the root process,  test  for  message receive completion. If
+ * the message  is there,  then receive it,  and  keep  spreading  in  a
+ * blocking fashion this time.  Otherwise,  inform  the caller  that the
+ * panel has still not been received. 
+ */
+   comm    = PANEL->grid->row_comm;  rank  = PANEL->grid->mycol;
+   mask    = PANEL->grid->col_mask;  ip2   = PANEL->grid->col_ip2m1;
+   root    = PANEL->pcol;            msgid = PANEL->msgid;
+   COUNT   = PANEL->len;             npm1  = size - 1;
+   mydist2 = ( mydist = MModSub( rank, root, size ) ); indx = ip2;
+   count   = COUNT / size; count = Mmax( count, 1 );
+/*
+ * Spread the panel across process columns
+ */
+   do
+   {
+      mask ^= ip2;
+ 
+      if( ( mydist & mask ) == 0 )
+      {
+         lbuf = COUNT - ( ibuf = indx * count );
+         if( indx + ip2 < size ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); }
+ 
+         partner = mydist ^ ip2;
+ 
+         if( ( mydist & ip2 ) != 0 )
+         {
+            partner = MModAdd( root, partner, size );
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on 
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ */
+#if 0
+            ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+            if( ierr == MPI_SUCCESS )
+            {        /* if panel is not here, return and keep testing */
+               if( go == 0 )
+               { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+            }
+#endif
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_RECV );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( _M_BUFF_R, _M_COUNT_R, _M_TYPE_R,
+                                     partner, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                     msgid, comm, &PANEL->status[0] );
+            }
+         }
+         else if( partner < size )
+         {
+            partner = MModAdd( root, partner, size );
+ 
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_SEND );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( _M_BUFF_S, _M_COUNT_S, _M_TYPE_S,
+                                      partner, msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+            }
+            else       /* Send message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE,
+                                      partner, msgid, comm );
+            }
+         }
+      }
+ 
+      if( mydist2 < ip2 ) {  ip2 >>= 1; indx -= ip2; }
+      else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; }
+ 
+   } while( ip2 > 0 );
+/*
+ * Roll the pieces
+ */
+   prev = MModSub1( rank, size ); next = MModAdd1( rank, size );
+
+   for( k = 0; k < npm1; k++ )
+   {
+      l = ( k >> 1 ); 
+/*
+ * Who is sending to who and how much
+ */
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         ibufS = ( indx = MModAdd( mydist, l,   size ) ) * count;
+         lbufS = ( indx == npm1 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModSub( mydist, l+1, size ) ) * count;
+         lbufR = ( indx == npm1 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = prev;
+      }
+      else
+      {
+         ibufS = ( indx = MModSub( mydist, l,   size ) ) * count;
+         lbufS = ( indx == npm1 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModAdd( mydist, l+1, size ) ) * count;
+         lbufR = ( indx == npm1 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = next;
+      }
+/*
+ * Exchange the messages
+ */
+      if( lbufS > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufS, lbufS, I_SEND );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S,
+                                 _M_ROLL_TYPE_S, partner, msgid, comm,
+                                 &PANEL->request[0] );
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                 msgid, comm, &PANEL->request[0] );
+      }
+
+      if(  lbufR > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufR, lbufR, I_RECV );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R,
+                               _M_ROLL_TYPE_R, partner, msgid, comm,
+                               &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                               msgid, comm, &PANEL->status[0] );
+      }
+
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Wait ( &PANEL->request[0], &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_blong
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_blong( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+
+   return( HPL_SUCCESS );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_bwait.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_bwait.c
new file mode 100644
index 000000000..a2e0f4df8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_bwait.c
@@ -0,0 +1,109 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_bwait
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_bwait
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_bwait HPL_bwait waits  for  the  row  broadcast  of  the current  panel  to
+ * terminate.  Successful completion is indicated by the returned  error
+ * code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS );
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_bwait_1rinM( PANEL ); break;
+      case HPL_1RING   : ierr = HPL_bwait_1ring( PANEL ); break;
+      case HPL_2RING_M : ierr = HPL_bwait_2rinM( PANEL ); break;
+      case HPL_2RING   : ierr = HPL_bwait_2ring( PANEL ); break;
+      case HPL_BLONG_M : ierr = HPL_bwait_blonM( PANEL ); break;
+      case HPL_BLONG   : ierr = HPL_bwait_blong( PANEL ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_bwait
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_copyL.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_copyL.c
new file mode 100644
index 000000000..04f765a6b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_copyL.c
@@ -0,0 +1,108 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_copyL
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_copyL
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_copyL copies  the  panel of columns, the L1 replicated submatrix,
+ * the pivot array  and  the info scalar into a contiguous workspace for
+ * later broadcast.
+ *  
+ * The copy of this panel  into  a contiguous buffer  can be enforced by
+ * specifying -DHPL_COPY_L in the architecture specific Makefile.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        jb, lda;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->mycol == PANEL->pcol )
+   {
+      jb = PANEL->jb; lda = PANEL->lda;
+ 
+      if( PANEL->grid->myrow == PANEL->prow )
+      {
+         HPL_dlacpy( PANEL->mp-jb, jb, Mptr( PANEL->A, jb, -jb, lda ),
+                     lda, PANEL->L2, PANEL->ldl2 );
+      }
+      else
+      {
+         HPL_dlacpy( PANEL->mp,    jb, Mptr( PANEL->A,  0, -jb, lda ),
+                     lda, PANEL->L2, PANEL->ldl2 );
+      }
+   }
+/*
+ * End of HPL_copyL
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_packL.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_packL.c
new file mode 100644
index 000000000..8a70ef83d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_packL.c
@@ -0,0 +1,245 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_packL
+(
+   HPL_T_panel *                    PANEL,
+   const int                        INDEX,
+   const int                        LEN,
+   const int                        IBUF
+)
+#else
+int HPL_packL
+( PANEL, INDEX, LEN, IBUF )
+   HPL_T_panel *                    PANEL;
+   const int                        INDEX;
+   const int                        LEN;
+   const int                        IBUF;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_packL forms  the MPI data type for the panel to be broadcast.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * MPI_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * INDEX   (input)                       const int
+ *         On entry,  INDEX  points  to  the  first entry of the  packed
+ *         buffer being broadcast.
+ *
+ * LEN     (input)                       const int
+ *         On entry, LEN is the length of the packed buffer.
+ *
+ * IBUF    (input)                       const int
+ *         On entry, IBUF  specifies the panel buffer/count/type entries
+ *         that should be initialized.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+#ifndef HPL_COPY_L
+   MPI_Datatype               * type = NULL;
+   void                       * * * bufs = NULL;
+   double                     * A;
+   int                        * blen = NULL;
+   MPI_Aint                   * disp = NULL;
+   int                        curr, i, i1, ibuf, ierr=MPI_SUCCESS, j1,
+                              jb, jbm, jbp1, lda, len, m, m1, nbufs;
+#else
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_COPY_L
+/*
+ * Panel + L1 + DPIV  have been copied into a contiguous buffer - Create
+ * and commit a contiguous data type
+ */
+   PANEL->buffers[IBUF] = (void *)(PANEL->L2 + INDEX);
+   PANEL->counts [IBUF] = 1;
+
+   ierr =      MPI_Type_contiguous( LEN, MPI_DOUBLE, &PANEL->dtypes[IBUF] );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &PANEL->dtypes[IBUF] );
+
+   return( ierr );
+#else
+/*
+ * Panel is not contiguous (because of LDA and also L1 + DPIV) -  Create
+ * and commit a struct data type
+ */
+   jbp1 = ( jb = PANEL->jb ) + 1;
+/*
+ * Temporaries to create the type struct.
+ */
+   bufs = (void     * * *)malloc( jbp1 * sizeof( void * *     ) );
+   blen = (int          *)malloc( jbp1 * sizeof( int          ) );
+   disp = (MPI_Aint     *)malloc( jbp1 * sizeof( MPI_Aint     ) );
+   type = (MPI_Datatype *)malloc( jbp1 * sizeof( MPI_Datatype ) );
+ 
+   if( ( bufs != NULL ) && ( blen != NULL ) &&
+       ( disp != NULL ) && ( type != NULL ) )
+   {
+      m = PANEL->mp; curr = (int)( PANEL->grid->myrow == PANEL->prow );
+      if( curr != 0 ) m -= jb;
+ 
+      len = LEN; ibuf = INDEX; nbufs = 0; jbm = jb * m;
+ 
+      if( ( m > 0 ) && ( ibuf < jbm ) )
+      {
+/*
+ * Retrieve proper pointers depending on process row and column
+ */
+         if( PANEL->grid->mycol == PANEL->pcol )
+         {
+            lda = PANEL->lda;
+            if( curr != 0 ) { A = Mptr( PANEL->A, jb, -jb, lda ); }
+            else            { A = Mptr( PANEL->A,  0, -jb, lda ); }
+         }
+         else { lda = PANEL->ldl2; A = PANEL->L2; }
+/*
+ * Pack the first (partial) column of L
+ */
+         m1 = m - ( i1 = ibuf - ( j1 = ibuf / m ) * m );
+         m1 = Mmin( len, m1 );
+ 
+         bufs[nbufs] = (void *)(Mptr( A, i1, j1, lda ));
+         type[nbufs] = MPI_DOUBLE;
+         blen[nbufs] = m1;
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+ 
+         nbufs++; len -= m1; j1++; ibuf += m1;
+/*
+ * Pack the remaining columns of L
+ */
+         while( ( len > 0 ) && ( j1 < jb ) )
+         {
+            m1 = Mmin( len, m );
+ 
+            bufs[nbufs] = (void*)(Mptr( A, 0, j1, lda ));
+            type[nbufs] = MPI_DOUBLE;
+            blen[nbufs] = m1;
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+ 
+            nbufs++; len -= m1; j1++; ibuf += m1;
+         }
+      }
+/*
+ * Pack L1, DPIV, DINFO
+ */
+      if( len > 0 )
+      {                                            /* L1, DPIV, DINFO */
+         bufs[nbufs] = (void *)(PANEL->L1 + ibuf - jbm);
+         type[nbufs] = MPI_DOUBLE;
+         blen[nbufs] = len;
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+         nbufs++;
+      }
+ 
+      for( i = 1; i < nbufs; i++ ) disp[i] -= disp[0]; disp[0] = 0;
+ 
+      PANEL->buffers[IBUF] = (void *)(bufs[0]); PANEL->counts [IBUF] = 1;
+/*
+ * construct the struct type 
+ */
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_create_struct( nbufs, blen, disp, type,
+                                   &PANEL->dtypes[IBUF] );
+/*
+ * release temporaries
+ */
+      if( bufs ) free( bufs );
+      if( blen ) free( blen );
+      if( disp ) free( disp );
+      if( type ) free( type );
+/*
+ * commit the type 
+ */
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_commit( &PANEL->dtypes[IBUF] );
+
+      return( ierr );
+   }
+   else
+   {
+/*
+ * Memory allocation failed -> abort
+ */
+      HPL_pabort( __LINE__, "HPL_packL", "Memory allocation failed" );
+      return( MPI_SUCCESS );    /* never executed (hopefully ...) */
+   }
+#endif
+#else
+          /* HPL_USE_MPI_DATATYPE not defined - Oops, there is a bug
+             somewhere, so, just in case  and until I find it ... */
+   return( MPI_SUCCESS );   
+#endif
+/*
+ * End of HPL_packL
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_recv.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_recv.c
new file mode 100644
index 000000000..ff426891c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_recv.c
@@ -0,0 +1,142 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_recv
+(
+   double *                         RBUF,
+   int                              RCOUNT,
+   int                              SRC,
+   int                              RTAG,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_recv
+( RBUF, RCOUNT, SRC, RTAG, COMM )
+   double *                         RBUF;
+   int                              RCOUNT;
+   int                              SRC;
+   int                              RTAG;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_recv is a simple wrapper around  MPI_Recv.  Its  main  purpose is
+ * to  allow for some  experimentation / tuning  of this simple routine.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * HPL_SUCCESS.  In the case of messages of length less than or equal to
+ * zero, this function returns immediately.
+ *
+ * Arguments
+ * =========
+ *
+ * RBUF    (local output)                double *
+ *         On entry, RBUF specifies the starting address of buffer to be
+ *         received.
+ *
+ * RCOUNT  (local input)                 int
+ *         On entry,  RCOUNT  specifies  the number  of double precision
+ *         entries in RBUF. RCOUNT must be at least zero.
+ *
+ * SRC     (local input)                 int
+ *         On entry, SRC  specifies the rank of the  sending  process in
+ *         the communication space defined by COMM.
+ *
+ * RTAG    (local input)                 int
+ *         On entry,  STAG specifies the message tag to be used for this
+ *         communication operation.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Status                 status;
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type;
+#endif
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( RCOUNT <= 0 ) return( HPL_SUCCESS );
+
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Recv( (void *)(RBUF), 1, type, SRC, RTAG, COMM,
+                         &status );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_free( &type );
+#else
+   ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, SRC, RTAG,
+                    COMM, &status );
+#endif
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+/*
+ * End of HPL_recv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_sdrv.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_sdrv.c
new file mode 100644
index 000000000..0b2363563
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_sdrv.c
@@ -0,0 +1,239 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_sdrv
+(
+   double *                         SBUF,
+   int                              SCOUNT,
+   int                              STAG,
+   double *                         RBUF,
+   int                              RCOUNT,
+   int                              RTAG,
+   int                              PARTNER,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_sdrv
+( SBUF, SCOUNT, STAG, RBUF, RCOUNT, RTAG, PARTNER, COMM )
+   double *                         SBUF;
+   int                              SCOUNT;
+   int                              STAG;
+   double *                         RBUF;
+   int                              RCOUNT;
+   int                              RTAG;
+   int                              PARTNER;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_sdrv is a simple wrapper around MPI_Sendrecv. Its main purpose is
+ * to allow for some experimentation and tuning of this simple function.
+ * Messages  of  length  less than  or  equal to zero  are not sent  nor
+ * received.  Successful completion  is  indicated by the returned error
+ * code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * SBUF    (local input)                 double *
+ *         On entry, SBUF specifies the starting address of buffer to be
+ *         sent.
+ *
+ * SCOUNT  (local input)                 int
+ *         On entry,  SCOUNT  specifies  the number  of double precision
+ *         entries in SBUF. SCOUNT must be at least zero.
+ *
+ * STAG    (local input)                 int
+ *         On entry,  STAG  specifies the message tag to be used for the
+ *         sending communication operation.
+ *
+ * RBUF    (local output)                double *
+ *         On entry, RBUF specifies the starting address of buffer to be
+ *         received.
+ *
+ * RCOUNT  (local input)                 int
+ *         On entry,  RCOUNT  specifies  the number  of double precision
+ *         entries in RBUF. RCOUNT must be at least zero.
+ *
+ * RTAG    (local input)                 int
+ *         On entry,  RTAG  specifies the message tag to be used for the
+ *         receiving communication operation.
+ *
+ * PARTNER (local input)                 int
+ *         On entry,  PARTNER  specifies  the rank of the  collaborative
+ *         process in the communication space defined by COMM.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type[2];
+#endif
+   MPI_Request                request;
+   MPI_Status                 status;
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( RCOUNT > 0 )
+   {
+      if( SCOUNT > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * Post asynchronous receive
+ */
+         ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( (void *)(RBUF), 1, type[0], PARTNER,
+                                RTAG, COMM, &request );
+/*
+ * Blocking send
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[1] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( (void *)(SBUF), 1, type[1], PARTNER,
+                               STAG, COMM );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[1] );
+/*
+ * Wait for the receive to complete
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[0] );
+#else
+/*
+ * Post asynchronous receive
+ */
+         ierr =      MPI_Irecv( (void *)(RBUF), RCOUNT, MPI_DOUBLE,
+                                PARTNER, RTAG, COMM, &request );
+/*
+ * Blocking send
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE,
+                               PARTNER, STAG, COMM );
+/*
+ * Wait for the receive to complete
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+#endif
+      }
+      else
+      {
+/*
+ * Blocking receive
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+         ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(RBUF), 1, type[0], PARTNER, RTAG,
+                               COMM, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[0] );
+#else
+         ierr =      MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE,
+                               PARTNER, RTAG, COMM, &status );
+#endif
+      }
+   }
+   else if( SCOUNT > 0 )
+   {
+/*
+ * Blocking send
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+      ierr =      MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_commit( &type[1] );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG,
+                          COMM );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_free( &type[1] ) );
+#else
+      ierr =      MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, PARTNER,
+                            STAG, COMM );
+#endif
+   }
+   else { ierr = MPI_SUCCESS; }
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+/*
+ * End of HPL_sdrv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_send.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_send.c
new file mode 100644
index 000000000..9e9868594
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_send.c
@@ -0,0 +1,139 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_send
+(
+   double *                         SBUF,
+   int                              SCOUNT,
+   int                              DEST,
+   int                              STAG,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_send
+( SBUF, SCOUNT, DEST, STAG, COMM )
+   double *                         SBUF;
+   int                              SCOUNT;
+   int                              DEST;
+   int                              STAG;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_send is a simple wrapper around  MPI_Send.  Its  main  purpose is
+ * to  allow for some  experimentation / tuning  of this simple routine.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * MPI_SUCCESS.  In the case of messages of length less than or equal to
+ * zero, this function returns immediately.
+ *
+ * Arguments
+ * =========
+ *
+ * SBUF    (local input)                 double *
+ *         On entry, SBUF specifies the starting address of buffer to be
+ *         sent.
+ *
+ * SCOUNT  (local input)                 int
+ *         On entry,  SCOUNT  specifies  the number of  double precision
+ *         entries in SBUF. SCOUNT must be at least zero.
+ *
+ * DEST    (local input)                 int
+ *         On entry, DEST specifies the rank of the receiving process in
+ *         the communication space defined by COMM.
+ *
+ * STAG    (local input)                 int
+ *         On entry,  STAG specifies the message tag to be used for this
+ *         communication operation.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type;
+#endif
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( SCOUNT <= 0 ) return( HPL_SUCCESS );
+
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr =      MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Send( (void *)(SBUF), 1, type, DEST, STAG, COMM );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_free( &type );
+#else
+   ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, DEST, STAG, COMM );
+#endif
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); 
+/*
+ * End of HPL_send
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/intel64/Make.inc
new file mode 120000
index 000000000..3ee301793
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/intel64/Makefile
new file mode 100644
index 000000000..529fe9aea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/intel64/Makefile
@@ -0,0 +1,111 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_pmisc.h   $(INCdir)/hpl_grid.h \
+   $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_comobj       = \
+   HPL_1ring.o            HPL_1rinM.o            HPL_2ring.o            \
+   HPL_2rinM.o            HPL_blong.o            HPL_blonM.o            \
+   HPL_packL.o            HPL_copyL.o            HPL_binit.o            \
+   HPL_bcast.o            HPL_bwait.o            HPL_send.o             \
+   HPL_recv.o             HPL_sdrv.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_comobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_comobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_1ring.o            : ../HPL_1ring.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_1ring.c
+HPL_1rinM.o            : ../HPL_1rinM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_1rinM.c
+HPL_2ring.o            : ../HPL_2ring.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_2ring.c
+HPL_2rinM.o            : ../HPL_2rinM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_2rinM.c
+HPL_blong.o            : ../HPL_blong.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_blong.c
+HPL_blonM.o            : ../HPL_blonM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_blonM.c
+HPL_packL.o            : ../HPL_packL.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_packL.c
+HPL_copyL.o            : ../HPL_copyL.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_copyL.c
+HPL_binit.o            : ../HPL_binit.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_binit.c
+HPL_bcast.o            : ../HPL_bcast.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_bcast.c
+HPL_bwait.o            : ../HPL_bwait.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_bwait.c
+HPL_send.o             : ../HPL_send.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_send.c
+HPL_recv.o             : ../HPL_recv.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_recv.c
+HPL_sdrv.o             : ../HPL_sdrv.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_sdrv.c
+#
+# ######################################################################
+# 
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/Makefile
new file mode 100644
index 000000000..6596a047b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/Makefile
@@ -0,0 +1,138 @@
+#  /* 
+#  * -- High Performance Computing Linpack Benchmark (HPL)                
+#  *    Modifications Copyright (C) 2023 Intel Corporation​
+#  *                                                                      
+#  * -- Copyright notice and Licensing terms:                             
+#  *                                                                      
+#  * Redistribution  and  use in  source and binary forms, with or without
+#  * modification, are  permitted provided  that the following  conditions
+#  * are met:                                                             
+#  *                                                                      
+#  * 1. Redistributions  of  source  code  must retain the above copyright
+#  * notice, this list of conditions and the following disclaimer.        
+#  *                                                                      
+#  * 2. Redistributions in binary form must reproduce  the above copyright
+#  * notice, this list of conditions,  and the following disclaimer in the
+#  * documentation and/or other materials provided with the distribution. 
+#  *                                                                      
+#  * 3. All  advertising  materials  mentioning  features  or  use of this
+#  * software must display the following acknowledgement:                 
+#  * This  product  includes  software  developed  at  the  University  of
+#  * Tennessee, Knoxville, Innovative Computing Laboratory.             
+#  *                                                                      
+#  * 4. The name of the  University,  the name of the  Laboratory,  or the
+#  * names  of  its  contributors  may  not  be used to endorse or promote
+#  * products  derived   from   this  software  without  specific  written
+#  * permission.                                                          
+#  *                                                                      
+#  * -- Disclaimer:                                                       
+#  *                                                                      
+#  * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+#  * ---------------------------------------------------------------------
+#  * 
+#  * SPDX-License-Identifier: BSD-4-Clause
+#  */  
+
+# /* 
+#  * -- High Performance Computing Linpack Benchmark (HPL)                
+#  *    HPL - 2.3 - December 2, 2018                          
+#  *    Antoine P. Petitet                                                
+#  *    University of Tennessee, Knoxville                                
+#  *    Innovative Computing Laboratory                                 
+#  *    (C) Copyright 2000-2008 All Rights Reserved                       
+#  *                                                                      
+#  * -- Copyright notice and Licensing terms:                             
+#  *                                                                      
+#  * Redistribution  and  use in  source and binary forms, with or without
+#  * modification, are  permitted provided  that the following  conditions
+#  * are met:                                                             
+#  *                                                                      
+#  * 1. Redistributions  of  source  code  must retain the above copyright
+#  * notice, this list of conditions and the following disclaimer.        
+#  *                                                                      
+#  * 2. Redistributions in binary form must reproduce  the above copyright
+#  * notice, this list of conditions,  and the following disclaimer in the
+#  * documentation and/or other materials provided with the distribution. 
+#  *                                                                      
+#  * 3. All  advertising  materials  mentioning  features  or  use of this
+#  * software must display the following acknowledgement:                 
+#  * This  product  includes  software  developed  at  the  University  of
+#  * Tennessee, Knoxville, Innovative Computing Laboratory.             
+#  *                                                                      
+#  * 4. The name of the  University,  the name of the  Laboratory,  or the
+#  * names  of  its  contributors  may  not  be used to endorse or promote
+#  * products  derived   from   this  software  without  specific  written
+#  * permission.                                                          
+#  *                                                                      
+#  * -- Disclaimer:                                                       
+#  *                                                                      
+#  * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+#  * ---------------------------------------------------------------------
+#  */ 
+
+
+all: libdgemm.so.1.0.1
+
+OBJS = dpcpp_dgemm.o 
+
+.PRECIOUS: $(OBJS)
+
+all : libdgemm.so.1.0.1 
+
+dpcpp_dgemm.o : dpcpp_dgemm.cpp
+
+#DEFINES = -DMPI
+#DEFINES += -DUSE_FERMI_DGEMM
+#DEFINES += -DVERBOSE_PRINT
+#DEFINES += -DACML
+#DEFINES += -DGOTO
+
+%.o: %.cpp
+ifeq ($(USE_NVIDIA_BACKEND),ON)
+	clang++ -O2 -fsycl -fsycl-targets=nvptx64-nvidia-cuda -c -fPIC $(DEFINES) $*.cpp -o $*.o -DUSE_CUBLAS
+else ifeq ($(USE_AMD_BACKEND),ON)
+	clang++ -O2 -fsycl -fsycl-targets=amd_gpu_gfx90a -c -fPIC $(DEFINES) $*.cpp -o $*.o -DUSE_HIPBLAS -D__HIP_PLATFORM_AMD__  
+else
+	icpx -fsycl -O2 -c -fPIC $(DEFINES) $*.cpp -o $*.o 
+endif	
+
+libdgemm.so.1.0.1: $(OBJS)
+ifeq ($(USE_NVIDIA_BACKEND),ON)
+	clang++ -O3 -std=c++17 -shared -Wl,-soname,libdgemm.so.1 -o libdgemm.so.1.0.1 $(OBJS) -lcudart -lsycl -lcuda -lcublas -fsycl -fsycl-targets=nvptx64-nvidia-cuda -L$(MKL_LIB) -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lpthread -liomp5 -lm -lstdc++  
+	ln -sf libdgemm.so.1.0.1 libdgemm.so.1.0
+	ln -sf libdgemm.so.1.0 libdgemm.so.1
+	ln -sf libdgemm.so.1 libdgemm.so
+else ifeq ($(USE_AMD_BACKEND),ON)
+	clang++ -O3 -std=c++17 -shared -Wl,-soname,libdgemm.so.1 -o libdgemm.so.1.0.1 $(OBJS) -lsycl -lhipblas -fsycl -fsycl-targets=amd_gpu_gfx90a
+	ln -sf libdgemm.so.1.0.1 libdgemm.so.1.0
+	ln -sf libdgemm.so.1.0 libdgemm.so.1
+	ln -sf libdgemm.so.1 libdgemm.so
+else
+	icpx -fsycl -fsycl-device-code-split=per_kernel -Wl, -lsycl -lOpenCL -lpthread -lm -ldl -fPIC -O2 -shared -Wl,-soname,libdgemm.so.1 -o libdgemm.so.1.0.1 $(OBJS) #-I$(SYCL_INCLUDE)
+	ln -sf libdgemm.so.1.0.1 libdgemm.so.1.0
+	ln -sf libdgemm.so.1.0 libdgemm.so.1
+	ln -sf libdgemm.so.1 libdgemm.so
+endif
+	
+clean:
+	rm -f $(OBJS) $(CUBINS) libdgemm.so.1.0.1 libdgemm.so.1.0 libdgemm.so.1 libdgemm.so
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/dpcpp_dgemm.cpp b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/dpcpp_dgemm.cpp
new file mode 100644
index 000000000..a5b800ce4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/dpcpp_dgemm.cpp
@@ -0,0 +1,461 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+
+
+
+#define NUMBER_OF_STREAMS 4
+#define CHUNK_SIZE 512
+#define NN 64
+#define NM 128
+//#define DPCPP_DEBUG
+//#define DEVICE_DEBUG
+//#define MPI
+
+#ifdef MPI
+#include <mpi.h>
+#endif
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <ctype.h>
+#include <math.h>
+#include <array>
+
+#include <time.h>
+#include <sys/types.h>
+#include <sys/times.h>
+#include <sys/time.h>
+
+#include <iostream>
+#include <chrono> 
+#include <sycl/sycl.hpp>
+#include "oneapi/mkl/blas.hpp"
+#include "dpcpp_dgemm.h"
+
+
+#ifdef USE_CUBLAS
+#include <sycl/backend/cuda.hpp>
+#include <cublas_v2.h>
+#include <cuda.h>
+#include "mkl.h"
+#include <cuda_runtime.h>
+#elif USE_HIPBLAS
+#include "hipblas.h"
+#else
+#include "oneapi/mkl/blas.hpp"
+#endif
+
+extern "C" {
+    void dpcpp_dgemm 
+        (   const int ORDER,
+            const int TRANSA,   const int TRANSB,
+            const int M,        const int N,        const int K,       
+            const double ALPHA, const double *A,    const int LDA,
+            const double *B,    const int LDB,      const double BETA,    
+            double *C,          const int LDC);
+
+    void dpcpp_dtrsm(
+       int HPL_ORDER,
+       int HPL_SIDE,
+       int HPL_UPLO,
+       int HPL_TRANS,
+       int HPL_DIAG,
+       const int,
+       const int,
+       const double,
+       const double *,
+       const int,
+       double *,
+       const int);
+}
+
+void DeviceManager::display_device_properties(sycl::device const &dev)
+{
+    std::cout << "\tSYCL device              : " << dev.get_info<sycl::info::device::name>() << std::endl;
+    std::cout << "\tDriver version           : " << dev.get_info<sycl::info::device::driver_version>() << std::endl;
+    std::cout << "\tPlatform                 : " << dev.get_platform().get_info<sycl::info::platform::name>()<< std::endl;
+    std::cout << "\tVendor                   : " << dev.get_info<sycl::info::device::vendor>() << std::endl;
+    std::cout << "\tMax compute units        : " << dev.get_info<sycl::info::device::max_compute_units>() << std::endl;
+}
+
+#ifdef USE_CUBLAS
+#define CHECK_ERROR(FUNC) checkCudaErrorMsg(FUNC, " " #FUNC)
+
+void inline checkCudaErrorMsg(cublasStatus_t status, const char *msg) {
+  if (status != CUBLAS_STATUS_SUCCESS) {
+    std::cout << "ERROR CUBLAS:" << msg << " - " << status << std::endl;
+    exit(EXIT_FAILURE);
+  }
+}
+
+void inline checkCudaErrorMsg(cudaError status, const char *msg) {
+  if (status != cudaSuccess) {
+    std::cout << "ERROR CUDA: " << msg << " - " << status << std::endl;
+    exit(EXIT_FAILURE);
+  }
+}
+
+void inline checkCudaErrorMsg(CUresult status, const char *msg) {
+  if (status != CUDA_SUCCESS) {
+    std::cout << "ERROR CUDA: " << msg << " - " << status << std::endl;
+    exit(EXIT_FAILURE);
+  }
+}
+
+#endif
+
+// helper functions to determine buffer dimension
+template <typename T> constexpr T inner_dimension(oneapi::mkl::transpose trans, T m, T n)
+    { return (trans == oneapi::mkl::transpose::nontrans) ? m : n; }
+template <typename T> constexpr T outer_dimension(oneapi::mkl::transpose trans, T m, T n)
+    { return (trans == oneapi::mkl::transpose::nontrans) ? n : m; }
+template <typename T> constexpr T matrix_size(oneapi::mkl::transpose trans, T m, T n, T ldm)
+    {   return outer_dimension(trans, m, n) * ldm; 
+       //return outer_dimension(trans, m, n);
+    }
+
+// TODO: hardcoded values for enums, 
+inline oneapi::mkl::transpose to_mkl_trans(int hpltrans){
+    if(hpltrans==111) return oneapi::mkl::transpose::nontrans;
+    if(hpltrans==112) return oneapi::mkl::transpose::trans;
+    if(hpltrans==113) return oneapi::mkl::transpose::conjtrans;
+    return oneapi::mkl::transpose::trans;
+}
+    
+inline oneapi::mkl::uplo   to_mkl_uplo(int hpluplo){
+    if(hpluplo==121) return oneapi::mkl::uplo::upper;
+    if(hpluplo==122) return oneapi::mkl::uplo::lower;
+    return oneapi::mkl::uplo::upper;
+}
+    
+inline oneapi::mkl::diag to_mkl_diag(int hpldiag){
+    if(hpldiag==131) return oneapi::mkl::diag::nonunit;
+    if(hpldiag==132) return oneapi::mkl::diag::unit;
+    return oneapi::mkl::diag::nonunit;
+}
+
+inline oneapi::mkl::side to_mkl_side(int hplside){
+    if(hplside==141) return oneapi::mkl::side::left;
+    if(hplside==142) return oneapi::mkl::side::right;
+    return oneapi::mkl::side::left;
+}
+void dpcpp_dgemm 
+(   const int ORDER,   const int TRANSA,    const int TRANSB,       
+    const int M,       const int N,         const int K,       
+    const double ALPHA,const double *A,     const int LDA,
+    const double *B,   const int LDB,       
+    const double BETA, double *C,         const int LDC)
+{
+
+
+
+if ((M==0)||(K==0)||(N==0))
+        return;
+
+
+
+#ifdef DPCPP_DEBUG    
+    using namespace std;
+    cout <<"Calling DPC++ dgemm ========="<<endl;
+    cout << "order  "<< ORDER << endl;
+    cout << "M      "<< M << endl;
+    cout << "N      "<< N << endl;
+    cout << "K      "<< K << endl;
+    cout << "A      "<< A << endl;
+    cout << "B      "<< B << endl;
+    cout << "C      "<< C << endl;
+    cout << "ALPHA  "<< ALPHA << endl;
+    cout << "BETA   "<< BETA << endl;
+    cout << "LDA    "<< LDA << endl;
+    cout << "LDB    "<< LDB << endl;
+    cout << "LDC    "<< LDC << endl;
+    cout <<"=============================="<<endl;
+#endif    
+    oneapi::mkl::transpose transA = to_mkl_trans(TRANSA);
+    oneapi::mkl::transpose transB = to_mkl_trans(TRANSB); 
+
+  
+    if ( (N) < NN || (M) < NM || (K) < 128){  
+      
+      #ifdef DEVICE_DEBUG
+   	std::cout << "gemm-CPU\n";
+      #endif
+      cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,  M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC);
+              
+      return;
+    }      
+
+    int id = 0;
+    #ifdef MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &id);
+    #endif
+
+    #ifdef DEVICE_DEBUG
+        std::cout << "gemm-GPU\n";
+    #endif
+
+    
+    sycl::queue mQueue{};
+    auto ctxt = mQueue.get_context();
+    auto mdevice = mQueue.get_device(); 
+
+   
+   
+    double *A_buffer = (double *)malloc_device(matrix_size(transA, M, K, LDA) * sizeof(double), mdevice, ctxt);
+    mQueue.memcpy(A_buffer, A,  matrix_size(transA, M, K, LDA) * sizeof(double));
+   
+    int b_size_total = matrix_size(transB, K, N, LDB);
+    int c_size_total = matrix_size(oneapi::mkl::transpose::nontrans, M, N, LDC);	
+
+    double *B_buffer = (double *)malloc_device(b_size_total * sizeof(double), mdevice, ctxt);
+    mQueue.memcpy(B_buffer, B,  b_size_total * sizeof(double));
+    
+   
+    double *C_buffer = (double *)malloc_device(c_size_total * sizeof(double), mdevice, ctxt);
+    mQueue.memcpy(C_buffer, C,  c_size_total * sizeof(double));
+
+    mQueue.wait();	    
+    
+    #ifdef USE_CUBLAS
+    cublasHandle_t handle;
+    CHECK_ERROR(cublasCreate(&handle)); 
+
+    mQueue.submit([&](sycl::handler &h){
+
+                h.host_task([=](sycl::interop_handle ih) {
+                      cuCtxSetCurrent(ih.get_native_context<sycl::backend::ext_oneapi_cuda>());
+                      cublasSetStream(handle, ih.get_native_queue<sycl::backend::ext_oneapi_cuda>());
+		      
+		      CHECK_ERROR(cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &ALPHA, A_buffer, LDA, B_buffer, LDB, &BETA, C_buffer, LDC));
+              cudaDeviceSynchronize ();	
+		});
+	}).wait_and_throw();
+    #elif USE_HIPBLAS
+       hipblasHandle_t handle;
+        hipblasCreate(&handle);
+
+
+	mQueue.submit([&](sycl::handler &h){
+
+                h.host_task([=](sycl::interop_handle ih) {
+                      hipCtxSetCurrent(ih.get_native_context<sycl::backend::ext_oneapi_hip>());
+                      hipblasSetStream(handle, ih.get_native_queue<sycl::backend::ext_oneapi_hip>());
+		      
+		       hipblasDgemm(handle, HIPBLAS_OP_N, HIPBLAS_OP_N, M, N, K, &ALPHA, A_buffer, LDA, B_buffer, LDB, &BETA, C_buffer, LDC);	
+		});
+	}).wait_and_throw();
+    #else
+    oneapi::mkl::blas::gemm(mQueue, transA, transB, M, N, K, ALPHA, A_buffer, LDA, B_buffer, LDB, BETA, C_buffer, LDC);
+    mQueue.wait();
+    #endif
+    mQueue.memcpy(C, C_buffer, c_size_total * sizeof(double)).wait(); 
+    free(A_buffer, mQueue);
+    free(B_buffer, mQueue);	
+    free(C_buffer, mQueue);
+}
+  
+void dpcpp_dtrsm
+
+(  const int ORDER,           const int SIDE,
+   const int UPLO,            const int TRANS,
+   const int DIAG,            const int M,       const int N,
+   const double ALPHA,    const double* A,  const int LDA,       double* B,
+   const int LDB)
+{
+
+  if ((M==0)||(N==0)){
+        return;
+  }
+
+
+
+#ifdef DPCPP_DEBUG    
+    using namespace std;
+    cout <<"Calling DPC++ dtrsm ========="<<endl;
+    cout << "ORDER      "<< ORDER << endl;
+    cout << "SIDE       "<< SIDE << endl;
+    cout << "UPLO       "<< UPLO << endl;
+    cout << "TRANS      "<< TRANS << endl;
+    cout << "DIAG       "<< DIAG << endl;
+    cout << "M          "<< M << endl;
+    cout << "N          "<< N << endl;
+    cout << "A          "<< A << endl;
+    cout << "LDA        "<< LDA << endl;
+    cout << "B          "<< B << endl;
+    cout << "LDB        "<< LDB << endl;
+    cout <<"============================="<<endl;
+#endif    
+    
+    oneapi::mkl::side      side     = to_mkl_side(SIDE);
+    oneapi::mkl::uplo      uplo     = to_mkl_uplo(UPLO);
+    oneapi::mkl::transpose trans    = to_mkl_trans(TRANS);
+    oneapi::mkl::diag      diag     = to_mkl_diag(DIAG);
+    
+   
+    if ( (M) < 512 || (N) < 2*(M)){
+
+        #ifdef DEVICE_DEBUG
+         std::cout << "dtrsm-CPU\n";
+        #endif 	 
+
+        cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, M, N, ALPHA, A, LDA, B, LDB);
+     	return;
+    }
+   
+     
+    #ifdef DEVICE_DEBUG
+        std::cout << "dtrsm-GPU\n";
+    #endif
+
+    int id = 0;
+    #ifdef MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &id); 
+    #endif	       
+
+    sycl::queue mQueue{};
+    auto ctxt = mQueue.get_context();
+    auto mdevice = mQueue.get_device(); 
+
+    
+    double *A_buffer = (double *)malloc_device(M * LDA  * sizeof(double), mdevice, ctxt);
+    mQueue.memcpy(A_buffer, A,  M * LDA * sizeof(double));
+
+    double *B_buffer = (double *)malloc_device(N * LDB  * sizeof(double), mdevice, ctxt);
+    mQueue.memcpy(B_buffer, B,  N * LDB * sizeof(double));
+
+    mQueue.wait(); 
+    #ifdef USE_CUBLAS 
+    cublasHandle_t handle;
+    CHECK_ERROR(cublasCreate(&handle)); 
+    //constexpr double CU_ALPHA = ALPHA;
+
+	mQueue.submit([&](sycl::handler &h){
+
+                h.host_task([=](sycl::interop_handle ih) {
+                      cuCtxSetCurrent(ih.get_native_context<sycl::backend::ext_oneapi_cuda>());
+                      cublasSetStream(handle, ih.get_native_queue<sycl::backend::ext_oneapi_cuda>());
+		      
+		      CHECK_ERROR(cublasDtrsm(handle,CUBLAS_SIDE_LEFT,CUBLAS_FILL_MODE_LOWER,CUBLAS_OP_N,CUBLAS_DIAG_UNIT,M,N,&ALPHA,A_buffer,LDA,B_buffer,LDB));
+              cudaDeviceSynchronize();	
+		});
+	}).wait_and_throw();		
+    #elif USE_HIPBLAS
+    hipblasHandle_t handle;
+        hipblasCreate(&handle); 
+
+        
+	mQueue.submit([&](sycl::handler &h){
+                h.host_task([=](sycl::interop_handle ih) {
+		      hipCtxSetCurrent(ih.get_native_context<sycl::backend::ext_oneapi_hip>());
+                      hipblasSetStream(handle, ih.get_native_queue<sycl::backend::ext_oneapi_hip>());                      
+
+		      hipblasDtrsm(handle,HIPBLAS_SIDE_LEFT,HIPBLAS_FILL_MODE_LOWER,HIPBLAS_OP_N,HIPBLAS_DIAG_UNIT,M,N,&ALPHA,A_buffer,LDA,B_buffer,LDB);	
+		});
+	}).wait_and_throw(); 
+     
+    #else
+
+    oneapi::mkl::blas::trsm(mQueue, side, uplo, trans, diag, M, N, ALPHA, A_buffer, LDA, B_buffer, LDB);
+    mQueue.wait();
+
+
+    #endif
+    
+    mQueue.memcpy(B, B_buffer, N * LDB * sizeof(double)).wait();
+         
+    free(A_buffer, mQueue);
+    free(B_buffer, mQueue);
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/dpcpp_dgemm.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/dpcpp_dgemm.h
new file mode 100644
index 000000000..e5de8ce8d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/dpcpp_dgemm.h
@@ -0,0 +1,157 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+
+
+#define NUMBER_OF_STREAMS 2
+
+#include <iostream>
+#include <sycl/sycl.hpp>
+#include <array>
+
+class DeviceManager;
+static DeviceManager *instance[2];
+
+class DeviceManager{
+    cl::sycl::device *m_pDevice; 
+    cl::sycl::queue queues[NUMBER_OF_STREAMS]; 
+    
+    DeviceManager(){ 
+        try{
+              m_pDevice = new cl::sycl::device(cl::sycl::default_selector());
+        }catch(...){
+            std::cout << "ERROR: failed to create sycl device.\n";
+        }   
+
+        auto exception_handler = [] (cl::sycl::exception_list exceptions) {
+            for (std::exception_ptr const& e : exceptions) {
+                try {
+                    std::rethrow_exception(e);
+                } catch(cl::sycl::exception const& e) {
+                    std::cout << "Caught asynchronous SYCL exception during GEMM:\n"
+                        << e.what() << std::endl;
+                }
+            }
+        };
+        
+       
+        	
+        queues[0] = cl::sycl::queue(*m_pDevice, exception_handler);   
+        //DeviceManager::display_device_properties(*m_pDevice);
+        //std::cout << "Done\n"; 
+
+    }
+
+    
+    ~DeviceManager() { std::cout << "Destructor Singleton" << std::endl; }
+
+    DeviceManager(const DeviceManager&) = delete;
+    DeviceManager& operator=(const DeviceManager&) = delete; 	
+
+
+    public:
+
+    static DeviceManager* getInstance(int mpi_id){
+        if(!instance[mpi_id]){
+           
+            std::cout << "Creating device for " << mpi_id << "\n"; 
+            instance[mpi_id] = new DeviceManager();
+            
+        }
+        return instance[mpi_id];
+    }
+
+    cl::sycl::device &getDevice(){ return *m_pDevice;}
+    cl::sycl::queue *getQueues(){ return queues;}
+
+    static void display_device_properties(cl::sycl::device const &dev);
+    static void destroyAllInstances() {delete instance[0]; delete instance[1];}
+};
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_all_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_all_reduce.c
new file mode 100644
index 000000000..776f48504
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_all_reduce.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_all_reduce
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const HPL_T_OP                   OP,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_all_reduce
+( BUFFER, COUNT, DTYPE, OP, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const HPL_T_OP                   OP;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_all_reduce performs   a   global   reduce  operation  across  all
+ * processes of a group leaving the results on all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/global output)   void *
+ *         On entry,  BUFFER  points to  the  buffer to be combined.  On
+ *         exit, this array contains the combined data and  is identical
+ *         on all processes in the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * OP      (global input)                const HPL_T_OP 
+ *         On entry, OP is a pointer to the local combine function.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr;
+/* ..
+ * .. Executable Statements ..
+ */
+   hplerr = HPL_reduce(   BUFFER, COUNT, DTYPE, OP, 0, COMM );
+   if( hplerr != MPI_SUCCESS ) return( hplerr );
+   return( HPL_broadcast( BUFFER, COUNT, DTYPE,     0, COMM ) );
+/*
+ * End of HPL_all_reduce
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_barrier.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_barrier.c
new file mode 100644
index 000000000..9a5d9b10a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_barrier.c
@@ -0,0 +1,90 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_barrier
+(
+   MPI_Comm                         COMM
+)
+#else
+int HPL_barrier
+( COMM )
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_barrier blocks the caller until all process members have call it.
+ * The  call  returns  at any process  only after all group members have
+ * entered the call.
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   return( HPL_broadcast( (void*)(&i), 1, HPL_INT, 0, COMM ) );
+/*
+ * End of HPL_barrier
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_broadcast.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_broadcast.c
new file mode 100644
index 000000000..42d962864
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_broadcast.c
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_broadcast
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const int                        ROOT,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_broadcast
+( BUFFER, COUNT, DTYPE, ROOT, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const int                        ROOT;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_broadcast broadcasts  a message from the process with rank ROOT to
+ * all processes in the group.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/output)          void *
+ *         On entry,  BUFFER  points to  the  buffer to be broadcast. On
+ *         exit, this array contains the broadcast data and is identical
+ *         on all processes in the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ROOT    (global input)                const int
+ *         On entry, ROOT is the coordinate of the source process.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr=MPI_SUCCESS, ip2=1, kk, mask=1, 
+                              mpierr, mydist, partner, rank, size, 
+                              tag = MSGID_BEGIN_COLL;
+   MPI_Status                 status;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( COUNT <= 0 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_size( COMM, &size ); if( size <= 1 ) return( mpierr );
+   mpierr = MPI_Comm_rank( COMM, &rank );
+
+   kk = size - 1;
+   while( kk > 1 ) { kk >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   mydist = MModSub( rank, ROOT, size );
+
+   do
+   {
+      mask ^= ip2;
+      if( ( mydist & mask ) == 0 )
+      {
+         partner = mydist ^ ip2;
+
+         if( mydist & ip2 )
+         {
+            partner = MModAdd( ROOT, partner, size );
+            mpierr  = MPI_Recv(  BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                 partner, tag, COMM, &status );
+         }
+         else if( partner < size )
+         {
+            partner = MModAdd( ROOT, partner, size );
+            mpierr  = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                partner, tag, COMM );
+         }
+         if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      }
+      ip2 >>= 1;
+   } while( ip2 );
+
+   return( hplerr );
+/*
+ * End of HPL_broadcast
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_exit.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_exit.c
new file mode 100644
index 000000000..f0d00b065
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_exit.c
@@ -0,0 +1,109 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_exit
+(
+   HPL_T_grid *                     GRID
+)
+#else
+int HPL_grid_exit
+( GRID )
+   HPL_T_grid *                     GRID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_exit marks  the process  grid object for  deallocation.  The
+ * returned  error  code  MPI_SUCCESS  indicates  successful completion.
+ * Other error codes are (MPI) implementation dependent.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input/output)          HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid to be released.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr = MPI_SUCCESS, mpierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( GRID->all_comm != MPI_COMM_NULL )
+   {
+      mpierr = MPI_Comm_free( &(GRID->row_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      mpierr = MPI_Comm_free( &(GRID->col_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      mpierr = MPI_Comm_free( &(GRID->all_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+   }
+
+   GRID->order   = HPL_COLUMN_MAJOR;
+
+   GRID->iam     = GRID->myrow    = GRID->mycol     = -1;
+   GRID->nprow   = GRID->npcol    = GRID->nprocs    = -1;
+
+   GRID->row_ip2 = GRID->row_hdim = GRID->row_ip2m1 = GRID->row_mask = -1;
+   GRID->col_ip2 = GRID->col_hdim = GRID->col_ip2m1 = GRID->col_mask = -1;
+
+   return( hplerr );
+/*
+ * End of HPL_grid_exit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_info.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_info.c
new file mode 100644
index 000000000..95c5a7315
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_info.c
@@ -0,0 +1,116 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_info
+(
+   const HPL_T_grid *               GRID,
+   int *                            NPROW,
+   int *                            NPCOL,
+   int *                            MYROW,
+   int *                            MYCOL
+)
+#else
+int HPL_grid_info
+( GRID, NPROW, NPCOL, MYROW, MYCOL )
+   const HPL_T_grid *               GRID;
+   int *                            NPROW;
+   int *                            NPCOL;
+   int *                            MYROW;
+   int *                            MYCOL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_info returns  the grid shape and the coordinates in the grid
+ * of the calling process.  Successful  completion  is  indicated by the
+ * returned error code  MPI_SUCCESS. Other error codes depend on the MPI
+ * implementation.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * NPROW   (global output)               int *
+ *         On exit,   NPROW  specifies the number of process rows in the
+ *         grid. NPROW is at least one.
+ *
+ * NPCOL   (global output)               int *
+ *         On exit,   NPCOL  specifies  the number of process columns in
+ *         the grid. NPCOL is at least one.
+ *
+ * MYROW   (global output)               int *
+ *         On exit,  MYROW  specifies my  row process  coordinate in the
+ *         grid. MYROW is greater than or equal  to zero  and  less than
+ *         NPROW.
+ *
+ * MYCOL   (global output)               int *
+ *         On exit,  MYCOL specifies my column process coordinate in the
+ *         grid. MYCOL is greater than or equal  to zero  and  less than
+ *         NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   *NPROW = GRID->nprow; *NPCOL = GRID->npcol;
+   *MYROW = GRID->myrow; *MYCOL = GRID->mycol;
+   return( MPI_SUCCESS );
+/*
+ * End of HPL_grid_info
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_init.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_init.c
new file mode 100644
index 000000000..52111ac52
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_init.c
@@ -0,0 +1,184 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_init
+(
+   MPI_Comm                         COMM,
+   const HPL_T_ORDER                ORDER,
+   const int                        NPROW,
+   const int                        NPCOL,
+   HPL_T_grid *                     GRID
+)
+#else
+int HPL_grid_init
+( COMM, ORDER, NPROW, NPCOL, GRID )
+   MPI_Comm                         COMM;
+   const HPL_T_ORDER                ORDER;
+   const int                        NPROW;
+   const int                        NPCOL;
+   HPL_T_grid *                     GRID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_init creates a NPROW x NPCOL  process  grid using column- or
+ * row-major ordering from an initial collection of processes identified
+ * by an  MPI  communicator.  Successful  completion is indicated by the
+ * returned error code MPI_SUCCESS.  Other error codes depend on the MPI
+ * implementation. The coordinates of processes that are not part of the
+ * grid are set to values outside of [0..NPROW) x [0..NPCOL).
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         On entry,  COMM  is  the  MPI  communicator  identifying  the
+ *         initial  collection  of  processes out of which  the  grid is
+ *         formed.
+ *
+ * ORDER   (global input)                const HPL_T_ORDER
+ *         On entry, ORDER specifies how the processes should be ordered
+ *         in the grid as follows:
+ *            ORDER = HPL_ROW_MAJOR    row-major    ordering;
+ *            ORDER = HPL_COLUMN_MAJOR column-major ordering;
+ *
+ * NPROW   (global input)                const int
+ *         On entry,  NPROW  specifies the number of process rows in the
+ *         grid to be created. NPROW must be at least one.
+ *
+ * NPCOL   (global input)                const int
+ *         On entry,  NPCOL  specifies  the number of process columns in
+ *         the grid to be created. NPCOL must be at least one.
+ *
+ * GRID    (local input/output)          HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information to be initialized.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hdim, hplerr=MPI_SUCCESS, ierr, ip2, k,
+                              mask, mycol, myrow, nprocs, rank, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Comm_rank( COMM, &rank ); MPI_Comm_size( COMM, &size );
+/*
+ * Abort if illegal process grid
+ */
+   nprocs = NPROW * NPCOL;
+   if( ( nprocs > size ) || ( NPROW < 1 ) || ( NPCOL < 1 ) )
+   { HPL_pabort( __LINE__, "HPL_grid_init", "Illegal Grid" ); }
+/*
+ * Row- or column-major ordering of the processes
+ */
+   if( ORDER == HPL_ROW_MAJOR )
+   {
+      GRID->order = HPL_ROW_MAJOR;
+      myrow = rank / NPCOL; mycol = rank - myrow * NPCOL;
+   }
+   else
+   {
+      GRID->order = HPL_COLUMN_MAJOR;
+      mycol = rank / NPROW; myrow = rank - mycol * NPROW;
+   }
+   GRID->iam   = rank;  GRID->myrow = myrow; GRID->mycol  = mycol;
+   GRID->nprow = NPROW; GRID->npcol = NPCOL; GRID->nprocs = nprocs;
+/*
+ * row_ip2   : largest power of two <= nprow;
+ * row_hdim  : row_ip2 procs hypercube dim;
+ * row_ip2m1 : largest power of two <= nprow-1;
+ * row_mask  : row_ip2m1 procs hypercube mask;
+ */
+   hdim = 0; ip2 = 1; k = NPROW;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; }
+   GRID->row_ip2 = ip2; GRID->row_hdim = hdim; 
+
+   mask = ip2 = 1;    k = NPROW - 1;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   GRID->row_ip2m1 = ip2; GRID->row_mask = mask; 
+/*
+ * col_ip2   : largest power of two <= npcol;
+ * col_hdim  : col_ip2 procs hypercube dim;
+ * col_ip2m1 : largest power of two <= npcol-1;
+ * col_mask  : col_ip2m1 procs hypercube mask;
+ */
+   hdim = 0; ip2 = 1; k = NPCOL;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; }
+   GRID->col_ip2 = ip2; GRID->col_hdim = hdim; 
+
+   mask = ip2 = 1;    k = NPCOL - 1;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   GRID->col_ip2m1 = ip2; GRID->col_mask = mask; 
+/*
+ * All communicator, leave if I am not part of this grid. Creation of the
+ * row- and column communicators.
+ */
+   ierr = MPI_Comm_split( COMM, ( rank < nprocs ? 0 : MPI_UNDEFINED ),
+                          rank, &(GRID->all_comm) );
+   if( GRID->all_comm == MPI_COMM_NULL ) return( ierr );
+
+   ierr = MPI_Comm_split( GRID->all_comm, myrow, mycol, &(GRID->row_comm) );
+   if( ierr != MPI_SUCCESS ) hplerr = ierr;
+
+   ierr = MPI_Comm_split( GRID->all_comm, mycol, myrow, &(GRID->col_comm) );
+   if( ierr != MPI_SUCCESS ) hplerr = ierr;
+
+   return( hplerr );
+/*
+ * End of HPL_grid_init
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_max.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_max.c
new file mode 100644
index 000000000..002aabe01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_max.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_max
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_max
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_max combines (max) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] );
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] );
+   }
+/*
+ * End of HPL_max
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_min.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_min.c
new file mode 100644
index 000000000..a99e5e58a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_min.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_min
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_min
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_min combines (min) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] );
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] );
+   }
+/*
+ * End of HPL_min
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_pnum.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_pnum.c
new file mode 100644
index 000000000..c80885b9a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_pnum.c
@@ -0,0 +1,103 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pnum
+(
+   const HPL_T_grid *               GRID,
+   const int                        MYROW,
+   const int                        MYCOL
+)
+#else
+int HPL_pnum
+( GRID, MYROW, MYCOL )
+   const HPL_T_grid *               GRID;
+   const int                        MYROW;
+   const int                        MYCOL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pnum determines  the  rank  of a  process  as a function  of  its
+ * coordinates in the grid.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * MYROW   (local input)                 const int
+ *         On entry,  MYROW  specifies the row coordinate of the process
+ *         whose rank is to be determined. MYROW must be greater than or
+ *         equal to zero and less than NPROW.
+ *
+ * MYCOL   (local input)                 const int
+ *         On entry,  MYCOL  specifies  the  column  coordinate  of  the
+ *         process whose rank is to be determined. MYCOL must be greater
+ *         than or equal to zero and less than NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   if( GRID->order == HPL_ROW_MAJOR )
+      return( MYROW * GRID->npcol + MYCOL );
+   else
+      return( MYCOL * GRID->nprow + MYROW );
+/*
+ * End of HPL_pnum
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_reduce.c
new file mode 100644
index 000000000..417c21163
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_reduce.c
@@ -0,0 +1,179 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_reduce
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const HPL_T_OP                   OP,
+   const int                        ROOT,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_reduce
+( BUFFER, COUNT, DTYPE, OP, ROOT, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const HPL_T_OP                   OP;
+   const int                        ROOT;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_reduce performs a global reduce operation across all processes of
+ * a group.  Note that the input buffer is  used as workarray and in all
+ * processes but the accumulating process corrupting the original data.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/output)          void *
+ *         On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+ *         exit,  and  in process of rank  ROOT  this array contains the
+ *         reduced data.  This  buffer  is also used as workspace during
+ *         the operation in the other processes of the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * OP      (global input)                const HPL_T_OP 
+ *         On entry, OP is a pointer to the local combine function.
+ *
+ * ROOT    (global input)                const int
+ *         On entry, ROOT is the coordinate of the accumulating process.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Status                 status;
+   void                       * buffer = NULL;
+   int                        hplerr=MPI_SUCCESS, d=1, i, ip2=1, mask=0,
+                              mpierr, mydist, partner, rank, size, 
+                              tag = MSGID_BEGIN_COLL;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( COUNT <= 0 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_size( COMM, &size );
+   if( size  == 1 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_rank( COMM, &rank );
+   i = size - 1; while( i > 1 ) { i >>= 1; d++; }
+
+   if( DTYPE == HPL_INT )
+      buffer = (void *)( (int *)   malloc( (size_t)(COUNT) * 
+                                           sizeof( int    ) ) );
+   else
+      buffer = (void *)( (double *)malloc( (size_t)(COUNT) *
+                                           sizeof( double ) ) );
+
+   if( !( buffer ) )
+   { HPL_pabort( __LINE__, "HPL_reduce", "Memory allocation failed" ); }
+
+   if( ( mydist = MModSub( rank, ROOT, size ) ) == 0 )
+   {
+      do
+      {
+         mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                            MModAdd( ROOT, ip2, size ), tag, COMM,
+                            &status );
+         if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+         OP( COUNT, buffer, BUFFER, DTYPE );
+         ip2 <<= 1; d--;
+      } while( d );
+   }
+   else
+   {
+      do
+      {
+         if( ( mydist & mask ) == 0 )
+         {
+            partner = mydist ^ ip2;
+
+            if( mydist & ip2 )
+            {
+               partner = MModAdd( ROOT, partner, size );
+               mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                  partner, tag, COMM );
+            }
+            else if( partner < size )
+            {
+               partner = MModAdd( ROOT, partner, size );
+               mpierr  = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                   partner, tag, COMM, &status );
+               OP( COUNT, buffer, BUFFER, DTYPE );
+            }
+            if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+         }
+         mask ^= ip2; ip2 <<= 1; d--;
+      } while( d );
+   }
+   if( buffer ) free( buffer );
+
+   return( hplerr );
+/*
+ * End of HPL_reduce
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_sum.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_sum.c
new file mode 100644
index 000000000..34cf87210
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_sum.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_sum
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_sum
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_sum combines (sum) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] += a[i];
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] += a[i];
+   }
+/*
+ * End of HPL_sum
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/intel64/Make.inc
new file mode 120000
index 000000000..3ee301793
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/intel64/Makefile
new file mode 100644
index 000000000..51549d817
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/intel64/Makefile
@@ -0,0 +1,103 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h
+#
+## Object files ########################################################
+#
+HPL_griobj       = \
+   HPL_grid_init.o        HPL_pnum.o             HPL_grid_info.o        \
+   HPL_grid_exit.o        HPL_broadcast.o        HPL_reduce.o           \
+   HPL_all_reduce.o       HPL_barrier.o          HPL_min.o              \
+   HPL_max.o              HPL_sum.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_griobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_griobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_grid_init.o        : ../HPL_grid_init.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_init.c
+HPL_pnum.o             : ../HPL_pnum.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pnum.c
+HPL_grid_info.o        : ../HPL_grid_info.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_info.c
+HPL_grid_exit.o        : ../HPL_grid_exit.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_exit.c
+HPL_broadcast.o        : ../HPL_broadcast.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_broadcast.c
+HPL_reduce.o           : ../HPL_reduce.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_reduce.c
+HPL_all_reduce.o       : ../HPL_all_reduce.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_all_reduce.c
+HPL_barrier.o          : ../HPL_barrier.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_barrier.c
+HPL_min.o              : ../HPL_min.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_min.c
+HPL_max.o              : ../HPL_max.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_max.c
+HPL_sum.o              : ../HPL_sum.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_sum.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_disp.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_disp.c
new file mode 100644
index 000000000..757dad242
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_disp.c
@@ -0,0 +1,97 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pdpanel_disp
+(
+   HPL_T_panel * *                  PANEL
+)
+#else
+int HPL_pdpanel_disp
+( PANEL )
+   HPL_T_panel * *                  PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_disp deallocates  the  panel  structure  and  resources  and
+ * stores the error code returned by the panel factorization.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel * *
+ *         On entry,  PANEL  points  to  the  address  of the panel data
+ *         structure to be deallocated.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        mpierr;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Deallocate the panel resources and panel structure
+ */
+   mpierr = HPL_pdpanel_free( *PANEL );
+   if( *PANEL ) free( *PANEL );
+   *PANEL = NULL;
+
+   return( mpierr );
+/*
+ * End of HPL_pdpanel_disp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_free.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_free.c
new file mode 100644
index 000000000..38b5b0d97
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_free.c
@@ -0,0 +1,104 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pdpanel_free
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_pdpanel_free
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_free deallocates  the panel resources  and  stores the error
+ * code returned by the panel factorization.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points  to  the  panel data  structure from
+ *         which the resources should be deallocated.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->pmat->info == 0 ) PANEL->pmat->info = *(PANEL->DINFO);
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( PANEL->L1block, VSIP_TRUE );
+   (void) vsip_blockrelease_d( PANEL->L2block, VSIP_TRUE );
+   if( PANEL->grid->nprow > 1 )
+      (void) vsip_blockrelease_d( PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Destroy blocks
+ */
+   vsip_blockdestroy_d( PANEL->L1block );
+   vsip_blockdestroy_d( PANEL->L2block );
+   if( PANEL->grid->nprow > 1 )
+      vsip_blockdestroy_d( PANEL->Ublock );
+#endif
+
+   if( PANEL->WORK  ) free( PANEL->WORK  );
+   if( PANEL->IWORK ) free( PANEL->IWORK );
+
+   return( MPI_SUCCESS );
+/*
+ * End of HPL_pdpanel_free
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_init.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_init.c
new file mode 100644
index 000000000..9e35c7fb4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_init.c
@@ -0,0 +1,348 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_pdpanel_init
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        M,
+   const int                        N,
+   const int                        JB,
+   HPL_T_pmat *                     A,
+   const int                        IA,
+   const int                        JA,
+   const int                        TAG,
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_pdpanel_init
+( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        M;
+   const int                        N;
+   const int                        JB;
+   HPL_T_pmat *                     A;
+   const int                        IA;
+   const int                        JA;
+   const int                        TAG;
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_init initializes a panel data structure.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the global number of rows of the panel.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  global number of columns of the
+ *         panel and trailing submatrix. N must be at least zero.
+ *
+ * JB      (global input)                const int
+ *         On entry, JB specifies is the number of columns of the panel.
+ *         JB must be at least zero.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * IA      (global input)                const int
+ *         On entry,  IA  is  the global row index identifying the panel
+ *         and trailing submatrix. IA must be at least zero.
+ *
+ * JA      (global input)                const int
+ *         On entry, JA is the global column index identifying the panel
+ *         and trailing submatrix. JA must be at least zero.
+ *
+ * TAG     (global input)                const int
+ *         On entry, TAG is the row broadcast message id.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   size_t                     dalign;
+   int                        icurcol, icurrow, ii, itmp1, jj, lwork,
+                              ml2, mp, mycol, myrow, nb, npcol, nprow,
+                              nq, nu;
+/* ..
+ * .. Executable Statements ..
+ */
+   PANEL->grid    = GRID;                  /* ptr to the process grid */
+   PANEL->algo    = ALGO;               /* ptr to the algo parameters */
+   PANEL->pmat    = A;                 /* ptr to the local array info */
+
+   myrow = GRID->myrow; mycol = GRID->mycol;
+   nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb;
+
+   HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol,
+                nprow, npcol, &ii, &jj, &icurrow, &icurcol );
+   mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow );
+   nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol );
+                                         /* ptr to trailing part of A */
+   PANEL->A       = Mptr( (double *)(A->A), ii, jj, A->ld );
+/*
+ * Workspace pointers are initialized to NULL.
+ */
+   PANEL->WORK    = NULL; PANEL->L2      = NULL; PANEL->L1      = NULL;
+   PANEL->DPIV    = NULL; PANEL->DINFO   = NULL; PANEL->U       = NULL;
+   PANEL->IWORK   = NULL;
+/*
+ * Local lengths, indexes process coordinates
+ */
+   PANEL->nb      = nb;               /* distribution blocking factor */
+   PANEL->jb      = JB;                                /* panel width */
+   PANEL->m       = M;      /* global # of rows of trailing part of A */
+   PANEL->n       = N;      /* global # of cols of trailing part of A */
+   PANEL->ia      = IA;     /* global row index of trailing part of A */
+   PANEL->ja      = JA;     /* global col index of trailing part of A */
+   PANEL->mp      = mp;      /* local # of rows of trailing part of A */
+   PANEL->nq      = nq;      /* local # of cols of trailing part of A */
+   PANEL->ii      = ii;      /* local row index of trailing part of A */
+   PANEL->jj      = jj;      /* local col index of trailing part of A */
+   PANEL->lda     = A->ld;            /* local leading dim of array A */
+   PANEL->prow    = icurrow; /* proc row owning 1st row of trailing A */
+   PANEL->pcol    = icurcol; /* proc col owning 1st col of trailing A */
+   PANEL->msgid   = TAG;     /* message id to be used for panel bcast */
+/*
+ * Initialize  ldl2 and len to temporary dummy values and Update tag for
+ * next panel
+ */
+   PANEL->ldl2    = 0;               /* local leading dim of array L2 */
+   PANEL->len     = 0;           /* length of the buffer to broadcast */
+/*
+ * Figure out the exact amount of workspace  needed by the factorization
+ * and the update - Allocate that space - Finish the panel data structu-
+ * re initialization.
+ *
+ * L1:    JB x JB in all processes
+ * DPIV:  JB      in all processes
+ * DINFO: 1       in all processes
+ *
+ * We make sure that those three arrays are contiguous in memory for the
+ * later panel broadcast.  We  also  choose  to put this amount of space 
+ * right  after  L2 (when it exist) so that one can receive a contiguous
+ * buffer.
+ */
+   dalign = ALGO->align * sizeof( double );
+
+   if( npcol == 1 )                             /* P x 1 process grid */
+   {                                     /* space for L1, DPIV, DINFO */
+      lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 );
+      if( nprow > 1 )                                 /* space for U */
+      { nu = nq - JB; lwork += JB * Mmax( 0, nu ); }
+
+      if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * 
+                                           sizeof( double ) ) ) )
+      {
+         HPL_pabort( __LINE__, "HPL_pdpanel_init",
+                     "Memory allocation failed" );
+      }
+/*
+ * Initialize the pointers of the panel structure  -  Always re-use A in
+ * the only process column
+ */
+      PANEL->L2    = PANEL->A + ( myrow == icurrow ? JB : 0 );
+      PANEL->ldl2  = A->ld;
+      PANEL->L1    = (double *)HPL_PTR( PANEL->WORK, dalign );
+      PANEL->DPIV  = PANEL->L1    + JB * JB;
+      PANEL->DINFO = PANEL->DPIV + JB;       *(PANEL->DINFO) = 0.0;
+      PANEL->U     = ( nprow > 1 ? PANEL->DINFO + 1: NULL );
+   }
+   else
+   {                                        /* space for L2, L1, DPIV */
+      ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 );
+      PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 );
+#ifdef HPL_COPY_L
+      lwork = ALGO->align + PANEL->len;
+#else
+      lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len );
+#endif
+      if( nprow > 1 )                                 /* space for U */
+      { 
+         nu = ( mycol == icurcol ? nq - JB : nq );
+         lwork += JB * Mmax( 0, nu );
+      }
+
+      if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) *
+                                           sizeof( double ) ) ) )
+      {
+         HPL_pabort( __LINE__, "HPL_pdpanel_init",
+                     "Memory allocation failed" );
+      }
+/*
+ * Initialize the pointers of the panel structure - Re-use A in the cur-
+ * rent process column when HPL_COPY_L is not defined.
+ */
+#ifdef HPL_COPY_L
+      PANEL->L2    = (double *)HPL_PTR( PANEL->WORK, dalign );
+      PANEL->ldl2  = Mmax( 1, ml2 );
+      PANEL->L1    = PANEL->L2 + ml2 * JB;
+#else
+      if( mycol == icurcol )
+      {
+         PANEL->L2   = PANEL->A + ( myrow == icurrow ? JB : 0 );
+         PANEL->ldl2 = A->ld;
+         PANEL->L1   = (double *)HPL_PTR( PANEL->WORK, dalign );
+      }
+      else
+      {
+         PANEL->L2   = (double *)HPL_PTR( PANEL->WORK, dalign );
+         PANEL->ldl2 = Mmax( 1, ml2 );
+         PANEL->L1   = PANEL->L2 + ml2 * JB;
+      } 
+#endif
+      PANEL->DPIV  = PANEL->L1   + JB * JB;
+      PANEL->DINFO = PANEL->DPIV + JB;     *(PANEL->DINFO) = 0.0;
+      PANEL->U     = ( nprow > 1 ? PANEL->DINFO + 1 : NULL );
+   }
+#ifdef HPL_CALL_VSIPL
+   PANEL->Ablock  = A->block;
+/*
+ * Create blocks and bind them to the data pointers
+ */
+   PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1),
+                                      (vsip_length)(JB*JB), VSIP_MEM_NONE );
+   PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2),
+                                      (vsip_length)(PANEL->ldl2*JB),
+                                      VSIP_MEM_NONE );
+   if( nprow > 1 )
+   { 
+      nu = ( mycol == icurcol ? nq - JB : nq );
+      PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U),
+                                        (vsip_length)(JB * Mmax( 0, nu )),
+                                        VSIP_MEM_NONE );
+   }
+   else { PANEL->Ublock = A->block; }
+#endif
+/*
+ * If nprow is 1, we just allocate an array of JB integers for the swap.
+ * When nprow > 1, we allocate the space for the index arrays immediate-
+ * ly. The exact size of this array depends on the swapping routine that
+ * will be used, so we allocate the maximum:
+ *
+ *    IWORK[0] is of size at most 1      +
+ *    IPL      is of size at most 1      +
+ *    IPID     is of size at most 4 * JB +
+ *
+ *    For HPL_pdlaswp00:
+ *       lindxA   is of size at most 2 * JB +
+ *       lindxAU  is of size at most 2 * JB +
+ *       llen     is of size at most NPROW  +
+ *       llen_sv  is of size at most NPROW.
+ *
+ *    For HPL_pdlaswp01:
+ *       ipA      is of size ar most 1      +
+ *       lindxA   is of size at most 2 * JB +
+ *       lindxAU  is of size at most 2 * JB +
+ *       iplen    is of size at most NPROW  + 1 +
+ *       ipmap    is of size at most NPROW  +
+ *       ipmapm1  is of size at most NPROW  +
+ *       permU    is of size at most JB     +
+ *       iwork    is of size at most MAX( 2*JB, NPROW+1 ).
+ *
+ * that is  3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1))
+ *       =  4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ).
+ *
+ * We use the fist entry of this to work array  to indicate  whether the
+ * the  local  index arrays have already been computed,  and if yes,  by
+ * which function:
+ *    IWORK[0] = -1: no index arrays have been computed so far;
+ *    IWORK[0] =  0: HPL_pdlaswp00 already computed those arrays;
+ *    IWORK[0] =  1: HPL_pdlaswp01 already computed those arrays;
+ * This allows to save some redundant and useless computations.
+ */
+   if( nprow == 1 ) { lwork = JB; }
+   else             
+   {
+      itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork );
+      lwork = 4 + (9 * JB) + (3 * nprow) + itmp1;
+   }
+
+   PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) );
+
+   if( PANEL->IWORK == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); }
+                       /* Initialize the first entry of the workarray */
+   *(PANEL->IWORK) = -1;
+/*
+ * End of HPL_pdpanel_init
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_new.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_new.c
new file mode 100644
index 000000000..1dbd8a18f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_new.c
@@ -0,0 +1,152 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanel_new
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        M,
+   const int                        N,
+   const int                        JB,
+   HPL_T_pmat *                     A,
+   const int                        IA,
+   const int                        JA,
+   const int                        TAG,
+   HPL_T_panel * *                  PANEL
+)
+#else
+void HPL_pdpanel_new
+( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        M;
+   const int                        N;
+   const int                        JB;
+   HPL_T_pmat *                     A;
+   const int                        IA;
+   const int                        JA;
+   const int                        TAG;
+   HPL_T_panel * *                  PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_new creates and initializes a panel data structure.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the global number of rows of the panel.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  global number of columns of the
+ *         panel and trailing submatrix. N must be at least zero.
+ *
+ * JB      (global input)                const int
+ *         On entry, JB specifies is the number of columns of the panel.
+ *         JB must be at least zero.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * IA      (global input)                const int
+ *         On entry,  IA  is  the global row index identifying the panel
+ *         and trailing submatrix. IA must be at least zero.
+ *
+ * JA      (global input)                const int
+ *         On entry, JA is the global column index identifying the panel
+ *         and trailing submatrix. JA must be at least zero.
+ *
+ * TAG     (global input)                const int
+ *         On entry, TAG is the row broadcast message id.
+ *
+ * PANEL   (local input/output)          HPL_T_panel * *
+ *         On entry,  PANEL  points  to  the  address  of the panel data
+ *         structure to create and initialize.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * p = NULL;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Allocate the panel structure - Check for enough memory
+ */
+   if( !( p = (HPL_T_panel *)malloc( sizeof( HPL_T_panel ) ) ) )
+   {
+      HPL_pabort( __LINE__, "HPL_pdpanel_new", "Memory allocation failed" );
+   }
+
+   HPL_pdpanel_init( GRID, ALGO, M, N, JB, A, IA, JA, TAG, p );
+   *PANEL = p;
+/*
+ * End of HPL_pdpanel_new
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/intel64/Make.inc
new file mode 120000
index 000000000..3ee301793
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/intel64/Makefile
new file mode 100644
index 000000000..804749cc2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/intel64/Makefile
@@ -0,0 +1,90 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h  $(INCdir)/hpl_comm.h  \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \
+   $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_panobj       = \
+   HPL_pdpanel_new.o      HPL_pdpanel_init.o     HPL_pdpanel_disp.o     \
+   HPL_pdpanel_free.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_panobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_panobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pdpanel_new.o      : ../HPL_pdpanel_new.c      $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_new.c
+HPL_pdpanel_init.o     : ../HPL_pdpanel_init.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_init.c
+HPL_pdpanel_disp.o     : ../HPL_pdpanel_disp.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_disp.c
+HPL_pdpanel_free.o     : ../HPL_pdpanel_free.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_free.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp00N.c
new file mode 100644
index 000000000..7ad5a1a99
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp00N.c
@@ -0,0 +1,198 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP00N_DEPTH
+#define    HPL_LASWP00N_DEPTH       32
+#define    HPL_LASWP00N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp00N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int *                      IPIV
+)
+#else
+void HPL_dlaswp00N
+( M, N, A, LDA, IPIV )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int *                      IPIV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp00N performs a series of local row interchanges on a matrix
+ * A. One row interchange is initiated for rows 0 through M-1 of A.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the number of rows of the array A to be
+ *         interchanged. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies  the number of columns of the array A.
+ *         N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A  points to an array of dimension (LDA,N) to which
+ *         the row interchanges will be  applied.  On exit, the permuted
+ *         matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * IPIV    (local input)                 const int *
+ *         On entry,  IPIV  is  an  array of size  M  that  contains the
+ *         pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+ *         implies that local rows k and l are to be interchanged.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register double            r;
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP00N_LOG2_DEPTH );
+   int                        ip, nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP00N_LOG2_DEPTH )
+                          << HPL_LASWP00N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP00N_DEPTH, A += incA )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         if( i != ( ip = IPIV[i] ) )
+         {
+            a0 = A + i; a1 = A + ip;
+
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#if ( HPL_LASWP00N_DEPTH >  1 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  2 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  4 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  8 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH > 16 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+         }
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         if( i != ( ip = IPIV[i] ) )
+         {
+            a0 = A + i; a1 = A + ip;
+            for( j = 0; j < nr; j++, a0 += LDA, a1 += LDA )
+            { r = *a0; *a0 = *a1; *a1 = r; }
+         }
+      }
+   }
+/*
+ * End of HPL_dlaswp00N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp01N.c
new file mode 100644
index 000000000..786d1eff4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp01N.c
@@ -0,0 +1,209 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP01N_DEPTH
+#define    HPL_LASWP01N_DEPTH      32
+#define    HPL_LASWP01N_LOG2_DEPTH  5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp01N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp01N
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp01N copies  scattered rows  of  A  into itself  and into an
+ * array  U.  The row offsets in  A  of the source rows are specified by
+ * LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+ * positive value of  LINDXAU indicates that the array destination is U,
+ * and A otherwise.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         moved within A or copied into U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         moved within A or copied into U. N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be moved within A or
+ *         copied into U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,N). The rows
+ *         of A specified by LINDXA are be copied within this array U at
+ *         the positions indicated by positive values of LINDXAU.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local  row indexes  of  A  that should be moved within  A  or
+ *         or copied into U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local  row indexes of  U  where the rows of  A  should be
+ *         copied at. This array also contains the  local row offsets in
+ *         A where some of the rows of A should be moved to.  A positive
+ *         value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+ *         should be copied into U at the position LINDXAU[i]; otherwise
+ *         the row  LINDXA[i]  of  A  should be moved  at  the  position
+ *         -LINDXAU[i] within A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP01N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP01N_LOG2_DEPTH );
+   int                        lda1, nu, nr;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01N_LOG2_DEPTH ) <<
+                            HPL_LASWP01N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP01N_DEPTH, A += incA, U += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
+         else                  { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
+
+         *a1 = *a0; a1 += lda1; a0 += LDA;
+#if ( HPL_LASWP01N_DEPTH >  1 )
+         *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  2 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  4 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  8 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH > 16 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
+         else                  { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
+         for( j = 0; j < nr; j++, a1 += lda1, a0 += LDA ) { *a1 = *a0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp01N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp01T.c
new file mode 100644
index 000000000..429cfb6f2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp01T.c
@@ -0,0 +1,252 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP01T_DEPTH
+#define    HPL_LASWP01T_DEPTH       32
+#define    HPL_LASWP01T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp01T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp01T
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp01T copies  scattered rows  of  A  into itself  and into an
+ * array U.  The row offsets in  A  of the source rows  are specified by
+ * LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+ * positive value of LINDXAU indicates that the array  destination is U,
+ * and A otherwise. Rows of A are stored as columns in U.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         moved within A or copied into U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         moved within A or copied into U. N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be moved within A or
+ *         copied into U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,M). The rows
+ *         of A specified by  LINDXA  are copied within this array  U at
+ *         the  positions indicated by positive values of LINDXAU.  The
+ *         rows of A are stored as columns in U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local  row indexes  of  A  that should be moved within  A  or
+ *         or copied into U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local  row indexes of  U  where the rows of  A  should be
+ *         copied at. This array also contains the  local row offsets in
+ *         A where some of the rows of A should be moved to.  A positive
+ *         value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+ *         should be copied into U at the position LINDXAU[i]; otherwise
+ *         the row  LINDXA[i]  of  A  should be moved  at  the  position
+ *         -LINDXAU[i] within A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP01T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP01T_LOG2_DEPTH );
+   int                        nu, nr;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01T_LOG2_DEPTH ) <<
+                            HPL_LASWP01T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP01T_DEPTH, A += incA, U += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+
+         if( LINDXAU[i] >= 0 )
+         {
+            a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+
+            a1[ 0] = *a0; a0 += LDA;
+#if ( HPL_LASWP01T_DEPTH >  1 )
+            a1[ 1] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  2 )
+            a1[ 2] = *a0; a0 += LDA; a1[ 3] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  4 )
+            a1[ 4] = *a0; a0 += LDA; a1[ 5] = *a0; a0 += LDA;
+            a1[ 6] = *a0; a0 += LDA; a1[ 7] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  8 )
+            a1[ 8] = *a0; a0 += LDA; a1[ 9] = *a0; a0 += LDA;
+            a1[10] = *a0; a0 += LDA; a1[11] = *a0; a0 += LDA;
+            a1[12] = *a0; a0 += LDA; a1[13] = *a0; a0 += LDA;
+            a1[14] = *a0; a0 += LDA; a1[15] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH > 16 )
+            a1[16] = *a0; a0 += LDA; a1[17] = *a0; a0 += LDA;
+            a1[18] = *a0; a0 += LDA; a1[19] = *a0; a0 += LDA;
+            a1[20] = *a0; a0 += LDA; a1[21] = *a0; a0 += LDA;
+            a1[22] = *a0; a0 += LDA; a1[23] = *a0; a0 += LDA;
+            a1[24] = *a0; a0 += LDA; a1[25] = *a0; a0 += LDA;
+            a1[26] = *a0; a0 += LDA; a1[27] = *a0; a0 += LDA;
+            a1[28] = *a0; a0 += LDA; a1[29] = *a0; a0 += LDA;
+            a1[30] = *a0; a0 += LDA; a1[31] = *a0; a0 += LDA;
+#endif
+         }
+         else
+         {
+            a1 = A - (size_t)(LINDXAU[i]);
+
+            *a1 = *a0; a1 += LDA; a0 += LDA;
+#if ( HPL_LASWP01T_DEPTH >  1 )
+            *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  2 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  4 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  8 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH > 16 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+         }
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+
+         if( LINDXAU[i] >= 0 )
+         {
+            a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+            for( j = 0; j < nr; j++, a0 += LDA ) { a1[j] = *a0; }
+         }
+         else
+         {
+            a1 = A - (size_t)(LINDXAU[i]);
+            for( j = 0; j < nr; j++, a1 += LDA, a0 += LDA ) { *a1 = *a0; }
+         }
+      }
+   }
+/*
+ * End of HPL_dlaswp01T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp02N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp02N.c
new file mode 100644
index 000000000..45c2f5f1f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp02N.c
@@ -0,0 +1,205 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP02N_DEPTH
+#define    HPL_LASWP02N_DEPTH       32
+#define    HPL_LASWP02N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp02N
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         W0,
+   double *                         W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp02N
+( M, N, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         W0;
+   double *                         W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp02N packs scattered rows of an array  A  into workspace  W.
+ * The row offsets in A are specified by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         copied into W. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         copied into W. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be copied into W.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * W0      (local input/output)          double *
+ *         On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local output)                double *
+ *         On entry, W  is an array of size (LDW,M). On exit, W contains
+ *         the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+ *         in W(:,i).
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied into W.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M  that  contains
+ *         the local  row indexes of  U that should be copied into A and
+ *         replaced by the rows of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * A0 = A, * a0;
+   double                     * w0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP02N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   for( i = 0; i < M; i++ ) 
+      *(W0+(size_t)(i)*(size_t)(LDW)) = (double)(LINDXAU[i]);
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP02N_LOG2_DEPTH ) <<
+                          HPL_LASWP02N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP02N_DEPTH, A0 += incA, W += HPL_LASWP02N_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW);
+
+         w0[ 0] = *a0; a0 += LDA;
+#if ( HPL_LASWP02N_DEPTH >  1 )
+         w0[ 1] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  2 )
+         w0[ 2] = *a0; a0 += LDA; w0[ 3] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  4 )
+         w0[ 4] = *a0; a0 += LDA; w0[ 5] = *a0; a0 += LDA;
+         w0[ 6] = *a0; a0 += LDA; w0[ 7] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  8 )
+         w0[ 8] = *a0; a0 += LDA; w0[ 9] = *a0; a0 += LDA;
+         w0[10] = *a0; a0 += LDA; w0[11] = *a0; a0 += LDA;
+         w0[12] = *a0; a0 += LDA; w0[13] = *a0; a0 += LDA;
+         w0[14] = *a0; a0 += LDA; w0[15] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH > 16 )
+         w0[16] = *a0; a0 += LDA; w0[17] = *a0; a0 += LDA;
+         w0[18] = *a0; a0 += LDA; w0[19] = *a0; a0 += LDA;
+         w0[20] = *a0; a0 += LDA; w0[21] = *a0; a0 += LDA;
+         w0[22] = *a0; a0 += LDA; w0[23] = *a0; a0 += LDA;
+         w0[24] = *a0; a0 += LDA; w0[25] = *a0; a0 += LDA;
+         w0[26] = *a0; a0 += LDA; w0[27] = *a0; a0 += LDA;
+         w0[28] = *a0; a0 += LDA; w0[29] = *a0; a0 += LDA;
+         w0[30] = *a0; a0 += LDA; w0[31] = *a0; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, a0 += LDA ) { w0[j] = *a0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp02N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp03N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp03N.c
new file mode 100644
index 000000000..760732a8d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp03N.c
@@ -0,0 +1,194 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP03N_DEPTH
+#define    HPL_LASWP03N_DEPTH       32
+#define    HPL_LASWP03N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp03N
+(
+   const int                        M,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW
+)
+#else
+void HPL_dlaswp03N
+( M, N, U, LDU, W0, W, LDW )
+   const int                        M;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp03N copies columns of  W  into  rows  of an  array  U.  The
+ * destination in U of these columns contained in W is stored within W0.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies  the  number  of columns of  W  stored
+ *         contiguously that should be copied into U. M must be at least
+ *         zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  length of columns of  W  stored
+ *         contiguously that should be copied into U. N must be at least
+ *         zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,N).  Columns
+ *         of W are copied as rows within this array U at  the positions
+ *         specified in W0.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M),  that contains data
+ *         to be copied into U. For i in [0..M),  entries W(:,i)  should
+ *         be copied into the row or column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * u0;
+   const int                  incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP03N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03N_LOG2_DEPTH ) <<
+                          HPL_LASWP03N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP03N_DEPTH, U += incU, w += HPL_LASWP03N_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); 
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *u0 = w0[ 0]; u0 += LDU;
+#if ( HPL_LASWP03N_DEPTH >  1 )
+         *u0 = w0[ 1]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  2 )
+         *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  4 )
+         *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU;
+         *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  8 )
+         *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU;
+         *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU;
+         *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU;
+         *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH > 16 )
+         *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU;
+         *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU;
+         *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU;
+         *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU;
+         *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU;
+         *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU;
+         *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU;
+         *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); 
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp03N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp03T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp03T.c
new file mode 100644
index 000000000..fece692ce
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp03T.c
@@ -0,0 +1,186 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP03T_DEPTH
+#define    HPL_LASWP03T_DEPTH       32
+#define    HPL_LASWP03T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp03T
+(
+   const int                        M,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW
+)
+#else
+void HPL_dlaswp03T
+( M, N, U, LDU, W0, W, LDW )
+   const int                        M;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp03T copies  columns of W into an array U.  The  destination
+ * in U of these columns contained in W is stored within W0.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies  the  number  of columns of  W  stored
+ *         contiguously that should be copied into U. M must be at least
+ *         zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  length of columns of  W  stored
+ *         contiguously that should be copied into U. N must be at least
+ *         zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,M).  Columns
+ *         of W are copied within the array U at the positions specified
+ *         in W0.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M),  that contains data
+ *         to be copied into U. For i in [0..M),  entries W(:,i)  should
+ *         be copied into the row or column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0; 
+   double                     * u0;
+   const int                  incU = ( 1 << HPL_LASWP03T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03T_LOG2_DEPTH ) <<
+                          HPL_LASWP03T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP03T_DEPTH, U += incU, w += HPL_LASWP03T_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         u0[ 0] = w0[ 0];
+#if ( HPL_LASWP03T_DEPTH >  1 )
+         u0[ 1] = w0[ 1];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  2 )
+         u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  4 )
+         u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  8 )
+         u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11];
+         u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15];
+#endif
+#if ( HPL_LASWP03T_DEPTH > 16 )
+         u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19];
+         u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23];
+         u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27];
+         u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31];
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp03T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp04N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp04N.c
new file mode 100644
index 000000000..4f9c490a5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp04N.c
@@ -0,0 +1,285 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP04N_DEPTH
+#define    HPL_LASWP04N_DEPTH       32
+#define    HPL_LASWP04N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp04N
+(
+   const int                        M0,
+   const int                        M1,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   double *                         A,
+   const int                        LDA,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp04N
+( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M0;
+   const int                        M1;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   double *                         A;
+   const int                        LDA;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp04N copies M0 rows of U into A and replaces those rows of U
+ * with columns of W. In addition M1 - M0 columns of  W  are copied into
+ * rows of U.
+ *
+ * Arguments
+ * =========
+ *
+ * M0      (local input)                 const int
+ *         On entry, M0 specifies the number of rows of U that should be
+ *         copied into  A  and replaced by columns of  W.  M0 must be at
+ *         least zero.
+ *
+ * M1      (local input)                 const int
+ *         On entry, M1 specifies the number of columns of W that should
+ *         be copied into rows of U. M1 must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of U that should
+ *         be copied into A. N must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points to  an array of dimension (LDU,N).  This
+ *         array contains the rows that are to be copied into A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M1).
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M0).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M0+M1),  that  contains
+ *         data to be copied into U.  For i in [M0..M0+M1),  the entries
+ *         W(:,i) are copied into the row W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA  is an array of dimension  M0 containing the
+ *         local row indexes A into which rows of U are copied.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M0 that  contains
+ *         the local  row indexes of  U that should be copied into A and
+ *         replaced by the columns of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) << 
+                                            HPL_LASWP04N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP04N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04N_LOG2_DEPTH ) <<
+                          HPL_LASWP04N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP04N_DEPTH, A += incA, U += incU,
+        w += HPL_LASWP04N_DEPTH )
+   {
+      for( i =  0; i < M0; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         u0 = U + (size_t)(LINDXAU[i]);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *a0 = *u0; *u0 = w0[ 0]; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP04N_DEPTH >  1 )
+         *a0 = *u0; *u0 = w0[ 1]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  2 )
+         *a0 = *u0; *u0 = w0[ 2]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 3]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  4 )
+         *a0 = *u0; *u0 = w0[ 4]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 5]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 6]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 7]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  8 )
+         *a0 = *u0; *u0 = w0[ 8]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 9]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[10]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[11]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[12]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[13]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[14]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[15]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH > 16 )
+         *a0 = *u0; *u0 = w0[16]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[17]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[18]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[19]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[20]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[21]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[22]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[23]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[24]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[25]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[26]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[27]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[28]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[29]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[30]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[31]; a0 += LDA; u0 += LDU;
+#endif
+      }
+
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW)));
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *u0 = w0[ 0]; u0 += LDU;
+#if ( HPL_LASWP04N_DEPTH >  1 )
+         *u0 = w0[ 1]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  2 )
+         *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  4 )
+         *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU;
+         *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  8 )
+         *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU;
+         *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU;
+         *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU;
+         *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH > 16 )
+         *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU;
+         *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU;
+         *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU;
+         *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU;
+         *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU;
+         *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU;
+         *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU;
+         *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         u0 = U + (size_t)(LINDXAU[i]);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU )
+         { *a0 = *u0; *u0 = w0[j]; }
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW)));
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp04N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp04T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp04T.c
new file mode 100644
index 000000000..9cbb4c863
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp04T.c
@@ -0,0 +1,270 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP04T_DEPTH
+#define    HPL_LASWP04T_DEPTH       32
+#define    HPL_LASWP04T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp04T
+(
+   const int                        M0,
+   const int                        M1,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   double *                         A,
+   const int                        LDA,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp04T
+( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M0;
+   const int                        M1;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   double *                         A;
+   const int                        LDA;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp04T copies M0 columns of U into rows of A and replaces those
+ * columns of U with columns of W. In addition M1 - M0 columns of W  are
+ * copied into U.
+ *
+ * Arguments
+ * =========
+ *
+ * M0      (local input)                 const int
+ *         On entry, M0 specifies the number of columns of U that should
+ *         be copied into A and replaced by columns of W.  M0 must be at
+ *         least zero.
+ *
+ * M1      (local input)                 const int
+ *         On entry, M1 specifies  the number of columnns of W that will
+ *         be copied into U. M1 must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies the length of the columns of  U  that
+ *         will be copied into rows of A. N must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns that are to be copied into rows of
+ *         A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M0).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M0+M1),  that  contains
+ *         data to be copied into U.  For i in [M0..M0+M1),  the entries
+ *         W(:,i) are copied into the column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA  is an array of dimension  M0 containing the
+ *         local row indexes A into which columns of U are copied.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M0 that  contains
+ *         the  local column indexes of  U  that should be copied into A
+ *         and replaced by the columns of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP04T_LOG2_DEPTH ),
+                              incU = (   1 << HPL_LASWP04T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04T_LOG2_DEPTH ) <<
+                          HPL_LASWP04T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP04T_DEPTH, A += incA, U += incU,
+        w += HPL_LASWP04T_DEPTH )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW;
+
+         *a0 = u0[ 0]; u0[ 0] = w0[ 0]; a0 += LDA;
+#if ( HPL_LASWP04T_DEPTH >  1 )
+         *a0 = u0[ 1]; u0[ 1] = w0[ 1]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  2 )
+         *a0 = u0[ 2]; u0[ 2] = w0[ 2]; a0 += LDA;
+         *a0 = u0[ 3]; u0[ 3] = w0[ 3]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  4 )
+         *a0 = u0[ 4]; u0[ 4] = w0[ 4]; a0 += LDA;
+         *a0 = u0[ 5]; u0[ 5] = w0[ 5]; a0 += LDA;
+         *a0 = u0[ 6]; u0[ 6] = w0[ 6]; a0 += LDA;
+         *a0 = u0[ 7]; u0[ 7] = w0[ 7]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  8 )
+         *a0 = u0[ 8]; u0[ 8] = w0[ 8]; a0 += LDA;
+         *a0 = u0[ 9]; u0[ 9] = w0[ 9]; a0 += LDA;
+         *a0 = u0[10]; u0[10] = w0[10]; a0 += LDA;
+         *a0 = u0[11]; u0[11] = w0[11]; a0 += LDA;
+         *a0 = u0[12]; u0[12] = w0[12]; a0 += LDA;
+         *a0 = u0[13]; u0[13] = w0[13]; a0 += LDA;
+         *a0 = u0[14]; u0[14] = w0[14]; a0 += LDA;
+         *a0 = u0[15]; u0[15] = w0[15]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH > 16 )
+         *a0 = u0[16]; u0[16] = w0[16]; a0 += LDA;
+         *a0 = u0[17]; u0[17] = w0[17]; a0 += LDA;
+         *a0 = u0[18]; u0[18] = w0[18]; a0 += LDA;
+         *a0 = u0[19]; u0[19] = w0[19]; a0 += LDA;
+         *a0 = u0[20]; u0[20] = w0[20]; a0 += LDA;
+         *a0 = u0[21]; u0[21] = w0[21]; a0 += LDA;
+         *a0 = u0[22]; u0[22] = w0[22]; a0 += LDA;
+         *a0 = u0[23]; u0[23] = w0[23]; a0 += LDA;
+         *a0 = u0[24]; u0[24] = w0[24]; a0 += LDA;
+         *a0 = u0[25]; u0[25] = w0[25]; a0 += LDA;
+         *a0 = u0[26]; u0[26] = w0[26]; a0 += LDA;
+         *a0 = u0[27]; u0[27] = w0[27]; a0 += LDA;
+         *a0 = u0[28]; u0[28] = w0[28]; a0 += LDA;
+         *a0 = u0[29]; u0[29] = w0[29]; a0 += LDA;
+         *a0 = u0[30]; u0[30] = w0[30]; a0 += LDA;
+         *a0 = u0[31]; u0[31] = w0[31]; a0 += LDA;
+#endif
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW;
+
+         u0[ 0] = w0[ 0];
+#if ( HPL_LASWP04T_DEPTH >  1 )
+         u0[ 1] = w0[ 1];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  2 )
+         u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  4 )
+         u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  8 )
+         u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11];
+         u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15];
+#endif
+#if ( HPL_LASWP04T_DEPTH > 16 )
+         u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19];
+         u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23];
+         u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27];
+         u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31];
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW;
+         for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; u0[j] = w0[j]; }
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW;
+         for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp04T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp05N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp05N.c
new file mode 100644
index 000000000..3edcf91a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp05N.c
@@ -0,0 +1,195 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP05N_DEPTH
+#define    HPL_LASWP05N_DEPTH       32
+#define    HPL_LASWP05N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp05N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const double *                   U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp05N
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const double *                   U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp05N copies rows of  U of global offset LINDXAU into rows of
+ * A at positions indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of U that should be
+ *         copied into A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of U that should
+ *         be copied into A. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          const double *
+ *         On entry,  U  points to an array of dimension  (LDU,N).  This
+ *         array contains the rows that are to be copied into A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied from U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local row indexes of U that should be copied in A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * U0 = U, * u0;
+   double                     * a0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP05N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP05N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05N_LOG2_DEPTH ) <<
+                            HPL_LASWP05N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP05N_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]);
+
+         *a0 = *u0; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP05N_DEPTH >  1 )
+         *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  2 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  4 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  8 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH > 16 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { *a0 = *u0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp05N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp05T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp05T.c
new file mode 100644
index 000000000..0adaa102d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp05T.c
@@ -0,0 +1,196 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP05T_DEPTH
+#define    HPL_LASWP05T_DEPTH       32
+#define    HPL_LASWP05T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp05T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const double *                   U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp05T
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const double *                   U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp05T copies columns of  U of global offset LINDXAU into rows
+ * of A at positions indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the columns of U that will
+ *         be copied into rows of A. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          const double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns that are to be copied into rows of
+ *         A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied from U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local column indexes of U that should be copied in A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * U0 = U, * u0;
+   double                     * a0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP05T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP05T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05T_LOG2_DEPTH ) <<
+                            HPL_LASWP05T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP05T_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[ i]);
+         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+
+         *a0 = u0[ 0]; a0 += LDA;
+#if ( HPL_LASWP05T_DEPTH >  1 )
+         *a0 = u0[ 1]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  2 )
+         *a0 = u0[ 2]; a0 += LDA; *a0 = u0[ 3]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  4 )
+         *a0 = u0[ 4]; a0 += LDA; *a0 = u0[ 5]; a0 += LDA;
+         *a0 = u0[ 6]; a0 += LDA; *a0 = u0[ 7]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  8 )
+         *a0 = u0[ 8]; a0 += LDA; *a0 = u0[ 9]; a0 += LDA;
+         *a0 = u0[10]; a0 += LDA; *a0 = u0[11]; a0 += LDA;
+         *a0 = u0[12]; a0 += LDA; *a0 = u0[13]; a0 += LDA;
+         *a0 = u0[14]; a0 += LDA; *a0 = u0[15]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH > 16 )
+         *a0 = u0[16]; a0 += LDA; *a0 = u0[17]; a0 += LDA;
+         *a0 = u0[18]; a0 += LDA; *a0 = u0[19]; a0 += LDA;
+         *a0 = u0[20]; a0 += LDA; *a0 = u0[21]; a0 += LDA;
+         *a0 = u0[22]; a0 += LDA; *a0 = u0[23]; a0 += LDA;
+         *a0 = u0[24]; a0 += LDA; *a0 = u0[25]; a0 += LDA;
+         *a0 = u0[26]; a0 += LDA; *a0 = u0[27]; a0 += LDA;
+         *a0 = u0[28]; a0 += LDA; *a0 = u0[29]; a0 += LDA;
+         *a0 = u0[30]; a0 += LDA; *a0 = u0[31]; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[ i]);
+         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+         for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp05T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp06N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp06N.c
new file mode 100644
index 000000000..a74bae75c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp06N.c
@@ -0,0 +1,206 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP06N_DEPTH
+#define    HPL_LASWP06N_DEPTH       32
+#define    HPL_LASWP06N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp06N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA
+)
+#else
+void HPL_dlaswp06N
+( M, N, A, LDA, U, LDU, LINDXA )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp06N swaps rows of  U  with rows of A at positions
+ * indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         swapped with rows of U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of A that should
+ *         be swapped with rows of U. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows or columns of U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,N).  This
+ *         array contains the rows of U that are to be swapped with rows
+ *         of A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be swapped with U.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * U0 = U, * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP06N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP06N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06N_LOG2_DEPTH ) <<
+                            HPL_LASWP06N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP06N_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i);
+
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP06N_DEPTH >  1 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  2 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  4 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  8 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH > 16 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU )
+         { r = *a0; *a0 = *u0; *u0 = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp06N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp06T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp06T.c
new file mode 100644
index 000000000..fb53c2a31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp06T.c
@@ -0,0 +1,207 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP06T_DEPTH
+#define    HPL_LASWP06T_DEPTH       32
+#define    HPL_LASWP06T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp06T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA
+)
+#else
+void HPL_dlaswp06T
+( M, N, A, LDA, U, LDU, LINDXA )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp06T swaps  columns  of  U  with  rows  of  A  at  positions
+ * indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         swapped with columns of U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of A that should
+ *         be swapped with columns of U. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns of  U  that are to be swapped with
+ *         rows of A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be swapped with U.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * U0 = U, * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP06T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP06T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06T_LOG2_DEPTH ) <<
+                            HPL_LASWP06T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP06T_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[i]);
+         u0 = U0 + (size_t)(i) * (size_t)(LDU);
+
+         r = *a0; *a0 = u0[ 0]; u0[ 0] = r; a0 += LDA;
+#if ( HPL_LASWP06T_DEPTH >  1 )
+         r = *a0; *a0 = u0[ 1]; u0[ 1] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  2 )
+         r = *a0; *a0 = u0[ 2]; u0[ 2] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 3]; u0[ 3] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  4 )
+         r = *a0; *a0 = u0[ 4]; u0[ 4] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 5]; u0[ 5] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 6]; u0[ 6] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 7]; u0[ 7] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  8 )
+         r = *a0; *a0 = u0[ 8]; u0[ 8] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 9]; u0[ 9] = r; a0 += LDA;
+         r = *a0; *a0 = u0[10]; u0[10] = r; a0 += LDA;
+         r = *a0; *a0 = u0[11]; u0[11] = r; a0 += LDA;
+         r = *a0; *a0 = u0[12]; u0[12] = r; a0 += LDA;
+         r = *a0; *a0 = u0[13]; u0[13] = r; a0 += LDA;
+         r = *a0; *a0 = u0[14]; u0[14] = r; a0 += LDA;
+         r = *a0; *a0 = u0[15]; u0[15] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH > 16 )
+         r = *a0; *a0 = u0[16]; u0[16] = r; a0 += LDA;
+         r = *a0; *a0 = u0[17]; u0[17] = r; a0 += LDA;
+         r = *a0; *a0 = u0[18]; u0[18] = r; a0 += LDA;
+         r = *a0; *a0 = u0[19]; u0[19] = r; a0 += LDA;
+         r = *a0; *a0 = u0[20]; u0[20] = r; a0 += LDA;
+         r = *a0; *a0 = u0[21]; u0[21] = r; a0 += LDA;
+         r = *a0; *a0 = u0[22]; u0[22] = r; a0 += LDA;
+         r = *a0; *a0 = u0[23]; u0[23] = r; a0 += LDA;
+         r = *a0; *a0 = u0[24]; u0[24] = r; a0 += LDA;
+         r = *a0; *a0 = u0[25]; u0[25] = r; a0 += LDA;
+         r = *a0; *a0 = u0[26]; u0[26] = r; a0 += LDA;
+         r = *a0; *a0 = u0[27]; u0[27] = r; a0 += LDA;
+         r = *a0; *a0 = u0[28]; u0[28] = r; a0 += LDA;
+         r = *a0; *a0 = u0[29]; u0[29] = r; a0 += LDA;
+         r = *a0; *a0 = u0[30]; u0[30] = r; a0 += LDA;
+         r = *a0; *a0 = u0[31]; u0[31] = r; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[i]);
+         u0 = U0 + (size_t)(i) * (size_t)(LDU);
+         for( j = 0; j < nr; j++, a0 += LDA )
+         { r = *a0; *a0 = u0[j]; u0[j] = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp06T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp10N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp10N.c
new file mode 100644
index 000000000..7dbf934f2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp10N.c
@@ -0,0 +1,186 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP10N_DEPTH
+#define    HPL_LASWP10N_DEPTH       32
+#define    HPL_LASWP10N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp10N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int *                      IPIV
+)
+#else
+void HPL_dlaswp10N
+( M, N, A, LDA, IPIV )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int *                      IPIV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp10N performs a sequence  of  local column interchanges on a
+ * matrix A.  One column interchange is initiated  for columns 0 through
+ * N-1 of A.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         __arg0__
+ *
+ * N       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of the array A. M
+ *         must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, N specifies the number of columns of the array A. N
+ *         must be at least zero.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, A  points to an  array of  dimension (LDA,N).  This
+ *         array contains the columns onto which the interchanges should
+ *         be applied. On exit, A contains the permuted matrix.
+ *
+ * IPIV    (local input)                 const int *
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * a0, * a1;
+   const int                  incA = ( 1 << HPL_LASWP10N_LOG2_DEPTH );
+   int                        jp, mr, mu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   mr = M - ( mu = (int)( ( (unsigned int)(M) >> HPL_LASWP10N_LOG2_DEPTH )
+                            << HPL_LASWP10N_LOG2_DEPTH ) );
+
+   for( j = 0; j < N; j++ )
+   {
+      if( j != ( jp = IPIV[j] ) )
+      {
+         a0 = A + j * LDA; a1 = A + jp * LDA;
+
+         for( i = 0; i < mu; i += incA, a0 += incA, a1 += incA )
+         {
+            r = *a0;    *a0    = *a1;    *a1    = r;
+#if ( HPL_LASWP10N_DEPTH >  1 )
+            r = a0[ 1]; a0[ 1] = a1[ 1]; a1[ 1] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  2 )
+            r = a0[ 2]; a0[ 2] = a1[ 2]; a1[ 2] = r;
+            r = a0[ 3]; a0[ 3] = a1[ 3]; a1[ 3] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  4 )
+            r = a0[ 4]; a0[ 4] = a1[ 4]; a1[ 4] = r;
+            r = a0[ 5]; a0[ 5] = a1[ 5]; a1[ 5] = r;
+            r = a0[ 6]; a0[ 6] = a1[ 6]; a1[ 6] = r;
+            r = a0[ 7]; a0[ 7] = a1[ 7]; a1[ 7] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  8 )
+            r = a0[ 8]; a0[ 8] = a1[ 8]; a1[ 8] = r;
+            r = a0[ 9]; a0[ 9] = a1[ 9]; a1[ 9] = r;
+            r = a0[10]; a0[10] = a1[10]; a1[10] = r;
+            r = a0[11]; a0[11] = a1[11]; a1[11] = r;
+            r = a0[12]; a0[12] = a1[12]; a1[12] = r;
+            r = a0[13]; a0[13] = a1[13]; a1[13] = r;
+            r = a0[14]; a0[14] = a1[14]; a1[14] = r;
+            r = a0[15]; a0[15] = a1[15]; a1[15] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH > 16 )
+            r = a0[16]; a0[16] = a1[16]; a1[16] = r;
+            r = a0[17]; a0[17] = a1[17]; a1[17] = r;
+            r = a0[18]; a0[18] = a1[18]; a1[18] = r;
+            r = a0[19]; a0[19] = a1[19]; a1[19] = r;
+            r = a0[20]; a0[20] = a1[20]; a1[20] = r;
+            r = a0[21]; a0[21] = a1[21]; a1[21] = r;
+            r = a0[22]; a0[22] = a1[22]; a1[22] = r;
+            r = a0[23]; a0[23] = a1[23]; a1[23] = r;
+            r = a0[24]; a0[24] = a1[24]; a1[24] = r;
+            r = a0[25]; a0[25] = a1[25]; a1[25] = r;
+            r = a0[26]; a0[26] = a1[26]; a1[26] = r;
+            r = a0[27]; a0[27] = a1[27]; a1[27] = r;
+            r = a0[28]; a0[28] = a1[28]; a1[28] = r;
+            r = a0[29]; a0[29] = a1[29]; a1[29] = r;
+            r = a0[30]; a0[30] = a1[30]; a1[30] = r;
+            r = a0[31]; a0[31] = a1[31]; a1[31] = r;
+#endif
+         }
+
+         for( i = 0; i < mr; i++ )
+         { r = a0[i]; a0[i] = a1[i]; a1[i] = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp10N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2l.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2l.c
new file mode 100644
index 000000000..e1b5bbfac
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2l.c
@@ -0,0 +1,151 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxg2l
+(
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxg2l
+( IG, INB, NB, SRCPROC, NPROCS )
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2l computes  the local index of a matrix entry pointed to by
+ * the  global index IG.  This  local  returned index is the same in all
+ * processes.
+ *
+ * Arguments
+ * =========
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry, if SRCPROC = -1, the data  is not  distributed  but
+ *         replicated,  in  which  case  this  routine returns IG in all
+ *         processes. Otherwise, the value of SRCPROC is ignored.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      return( IG );
+/*
+ * IG  =  INB - NB + ( l * NPROCS + MYROC ) * NB + X  with  0 <= X < NB,
+ * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC
+ * with  0 <= MYROC < NPROCS.  The local index to be returned depends on 
+ * whether  IG  resides in the process owning the first partial block of
+ * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB,
+ * so that if NPROCS divides i+1, i.e. MYROC=0,  we have i+1 = l*NPROCS.
+ * If we set  j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is
+ * equal to (j+1) * NPROCS.  Conversely, if NPROCS does not divide  i+1, 
+ * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that
+ * j=l and thus (j+1)*NPROCS > i+1.
+ */
+   j = ( i = ( IG - INB ) / NB ) / NPROCS;
+/*
+ * When IG resides in the process owning the first partial block of size
+ * INB (MYROC = 0), then the result IL can be written as:
+ * IL = INB - NB + l * NB + X  = IG + ( l - (l * NPROCS + MYROC) ) * NB.
+ * Using the above notation,  we have i+1 = l*NPROCS + MYROC = l*NPROCS,
+ * i.e l = ( i+1 ) / NPROCS = j+1,  since  NPROCS divides i+1, therefore
+ * IL = IG + ( j + 1 - ( i + 1 ) ) * NB.
+ *
+ * Otherwise when MYROC >= 1, the result IL can be written as:
+ * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB.
+ * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1,
+ * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e
+ * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB.
+ */
+   return( NB * (j - i) + 
+           ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ) );
+/*
+ * End of HPL_indxg2l
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2lp.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2lp.c
new file mode 100644
index 000000000..74662f9d2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2lp.c
@@ -0,0 +1,176 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_indxg2lp
+(
+   int *                            IL,
+   int *                            PROC,
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+void HPL_indxg2lp
+( IL, PROC, IG, INB, NB, SRCPROC, NPROCS )
+   int *                            IL;
+   int *                            PROC;
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2lp computes the local index of a matrix entry pointed to by
+ * the global  index IG as well as the process coordinate which posseses
+ * this entry. The local returned index is the same in all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * IL      (output)                      int *
+ *         On exit, IL specifies the local index corresponding to IG. IL
+ *         is at least zero.
+ *
+ * PROC    (output)                      int *
+ *         On exit,  PROC  is the  coordinate of the process  owning the
+ *         entry specified by the global index IG. PROC is at least zero
+ *         and less than NPROCS.
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry, if SRCPROC = -1, the data  is not  distributed  but
+ *         replicated,  in  which  case  this  routine returns IG in all
+ *         processes. Otherwise, the value of SRCPROC is ignored.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+   {
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      *IL   = IG;
+      *PROC = SRCPROC;
+   }
+   else
+   {
+/*
+ * IG  =  INB - NB + ( l * NPROCS + MYROC ) * NB + X  with  0 <= X < NB,
+ * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC
+ * with  0 <= MYROC < NPROCS.  The local index to be returned depends on
+ * whether  IG  resides in the process owning the first partial block of
+ * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB,
+ * so that if NPROCS divides i+1, i.e. MYROC=0,  we have i+1 = l*NPROCS.
+ * If we set  j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is
+ * equal to (j+1) * NPROCS.  Conversely, if NPROCS does not divide  i+1,
+ * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that
+ * j=l and thus (j+1)*NPROCS > i+1.
+ */
+      j = ( i = ( IG - INB ) / NB ) / NPROCS;
+/*
+ * IG  is in block  1 + ( IG - INB ) / NB.  Add this to SRCPROC and take
+ * the NPROCS modulo (definition of the block-cyclic data distribution).
+ */
+      *PROC = SRCPROC + 1 + i;
+      *PROC = MPosMod( *PROC, NPROCS );
+/*
+ * When IG resides in the process owning the first partial block of size
+ * INB (MYROC = 0), then the result IL can be written as:
+ * IL = INB - NB + l * NB + X  = IG + ( l - (l * NPROCS + MYROC) ) * NB.
+ * Using the above notation,  we have i+1 = l*NPROCS + MYROC = l*NPROCS,
+ * i.e l = ( i+1 ) / NPROCS = j+1,  since  NPROCS divides i+1, therefore
+ * IL = IG + ( j + 1 - ( i + 1 ) ) * NB.
+ *
+ * Otherwise when MYROC >= 1, the result IL can be written as:
+ * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB.
+ * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1,
+ * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e
+ * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB.
+ */
+      *IL = NB * (j - i) + 
+            ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG );
+   }
+/*
+ * End of HPL_indxg2lp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2p.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2p.c
new file mode 100644
index 000000000..d0e75f516
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2p.c
@@ -0,0 +1,128 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxg2p
+(
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxg2p
+( IG, INB, NB, SRCPROC, NPROCS )
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2p computes the process coordinate  which posseses the entry
+ * of a matrix specified by a global index IG.
+ *
+ * Arguments
+ * =========
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        proc;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      return( SRCPROC );
+/*
+ * Otherwise,  IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC
+ * and take the NPROCS  modulo (definition of the block-cyclic data dis-
+ * tribution).
+ */
+   proc = SRCPROC + 1 + ( IG - INB ) / NB;
+   return( MPosMod( proc, NPROCS ) );
+/*
+ * End of HPL_indxg2p
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxl2g.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxl2g.c
new file mode 100644
index 000000000..7f139425a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxl2g.c
@@ -0,0 +1,164 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxl2g
+(
+   const int                        IL,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxl2g
+( IL, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        IL;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxl2g computes the global index of a matrix  entry  pointed to
+ * by the local index IL of the process indicated by PROC.
+ *
+ * Arguments
+ * =========
+ *
+ * IL      (input)                       const int
+ *         On entry, IL specifies the local  index of the matrix  entry.
+ *         IL must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC  specifies the coordinate of the process whose
+ *         local array row or column is to be determined. PROC  must  be
+ *         at least zero and strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+   {
+/*
+ * The data is not distributed, or there is just one process in this di-
+ * mension of the grid.
+ */
+      return( IL );
+   }
+   else if( PROC == SRCPROC )
+   {
+/*
+ * If I am SRCPROC, my first block is of size INB
+ */
+      if( IL < INB )
+/*
+ * If  IL  belongs to the first block,  the local and global indexes are
+ * equal.
+ */
+         return ( IL );
+/*
+ * The  number  of  entire  blocks  before  the  one  IL  belongs  to is
+ * ( IL - INB ) / NB + 1.  In  the other NPROCS-1 processes,  there  are
+ * thus NB*( ( IL-INB )/NB + 1 ) entries,  that are  globally before the
+ * global entry corresponding to IL.
+ */
+      return( ( NPROCS - 1 ) * NB * ( ( IL - INB ) / NB + 1 ) + IL );
+   }
+   else if( PROC < SRCPROC )
+   {
+/*
+ * Otherwise, the process of coordinate  MOD(SRCPROC+1, NPROCS) owns the
+ * second block. Let IPROC = PROC-SRCPROC-1+NPROCS be the number of pro-
+ * cesses between this process and  PROC  not  included  when going from
+ * left to right on the process line  with  possible wrap around.  These
+ * IPROC  processes have one more NB block than the other processes, who
+ * own IL / NB blocks of size NB.
+ */
+      return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1+NPROCS )+IL+INB );
+   }
+   else
+   {
+/*
+ * Same reasoning as above with IPROC = PROC - SRCPROC - 1.
+ */
+      return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1        )+IL+INB );
+   }
+/*
+ * End of HPL_indxl2g
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_infog2l.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_infog2l.c
new file mode 100644
index 000000000..2580f2ad4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_infog2l.c
@@ -0,0 +1,382 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_infog2l
+(
+   int                              I,
+   int                              J,
+   const int                        IMB,
+   const int                        MB,
+   const int                        INB,
+   const int                        NB,
+   const int                        RSRC,
+   const int                        CSRC,
+   const int                        MYROW,
+   const int                        MYCOL,
+   const int                        NPROW,
+   const int                        NPCOL,
+   int *                            II,
+   int *                            JJ,
+   int *                            PROW,
+   int *                            PCOL
+)
+#else
+void HPL_infog2l
+( I, J, IMB, MB, INB, NB, RSRC, CSRC, MYROW, MYCOL, NPROW, NPCOL, II, JJ, PROW, PCOL )
+   int                              I;
+   int                              J;
+   const int                        IMB;
+   const int                        MB;
+   const int                        INB;
+   const int                        NB;
+   const int                        RSRC;
+   const int                        CSRC;
+   const int                        MYROW;
+   const int                        MYCOL;
+   const int                        NPROW;
+   const int                        NPCOL;
+   int *                            II;
+   int *                            JJ;
+   int *                            PROW;
+   int *                            PCOL;
+#endif 
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_infog2l computes the starting local index II, JJ corresponding to
+ * the submatrix starting globally at the entry pointed by  I,  J.  This
+ * routine returns the coordinates in the grid of the process owning the
+ * matrix entry of global indexes I, J, namely PROW and PCOL.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                int
+ *         On entry,  I  specifies  the  global  row index of the matrix
+ *         entry. I must be at least zero.
+ *
+ * J       (global input)                int
+ *         On entry,  J  specifies the global column index of the matrix
+ *         entry. J must be at least zero.
+ *
+ * IMB     (global input)                const int
+ *         On entry,  IMB  specifies  the size of the first row block of
+ *         the global matrix. IMB must be at least one.
+ *
+ * MB      (global input)                const int
+ *         On entry,  MB specifies the blocking factor used to partition
+ *         and  distribute the rows of the matrix A.  MB  must be larger
+ *         than one.
+ *
+ * INB     (global input)                const int
+ *         On entry, INB specifies the size of the first column block of
+ *         the global matrix. INB must be at least one.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the columns of the matrix A. NB must be larger
+ *         than one.
+ *
+ * RSRC    (global input)                const int
+ *         On entry,  RSRC  specifies  the row coordinate of the process
+ *         that possesses the row  I.  RSRC  must  be at least zero  and
+ *         strictly less than NPROW.
+ *
+ * CSRC    (global input)                const int
+ *         On entry, CSRC specifies the column coordinate of the process
+ *         that possesses the column J. CSRC  must be at least zero  and
+ *         strictly less than NPCOL.
+ *
+ * MYROW   (local input)                 const int
+ *         On entry, MYROW  specifies my  row process  coordinate in the
+ *         grid. MYROW is greater than or equal  to zero  and  less than
+ *         NPROW.
+ *
+ * MYCOL   (local input)                 const int
+ *         On entry, MYCOL specifies my column process coordinate in the
+ *         grid. MYCOL is greater than or equal  to zero  and  less than
+ *         NPCOL.
+ *
+ * NPROW   (global input)                const int
+ *         On entry,  NPROW  specifies the number of process rows in the
+ *         grid. NPROW is at least one.
+ *
+ * NPCOL   (global input)                const int
+ *         On entry,  NPCOL  specifies  the number of process columns in
+ *         the grid. NPCOL is at least one.
+ *
+ * II      (local output)                int *
+ *         On exit, II  specifies the  local  starting  row index of the
+ *         submatrix. On exit, II is at least 0.
+ *
+ * JJ      (local output)                int *
+ *         On exit, JJ  specifies the local starting column index of the
+ *         submatrix. On exit, JJ is at least 0.
+ *
+ * PROW    (global output)               int *
+ *         On exit, PROW is the row coordinate of the process owning the
+ *         entry specified by the global index I.  PROW is at least zero
+ *         and less than NPROW.
+ *
+ * PCOL    (global output)               int *
+ *         On exit, PCOL  is the column coordinate of the process owning
+ *         the entry specified by the global index J.  PCOL  is at least
+ *         zero and less than NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int            ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc;
+/* ..
+ * .. Executable Statements ..
+ */
+   imb   = IMB;
+   *PROW = RSRC;
+
+   if( ( *PROW == -1 ) || ( NPROW == 1 ) )
+   {
+/*
+ * The data is not distributed,  or there is just one process row in the
+ * grid.
+ */
+     *II = I;
+   }
+   else if( I < imb )
+   {
+/*
+ * I refers to an entry in the first block of rows
+ */
+     *II = ( MYROW == *PROW ? I : 0 );
+   }
+   else
+   {
+      mb   = MB;
+      rsrc = *PROW;
+/*
+ * The discussion goes as follows:  compute  my distance from the source
+ * process so that  within  this process coordinate system,  the  source
+ * process   is  the  process  such  that  mydist = 0,  or  equivalently
+ * MYROW == rsrc.
+ *
+ * Find  out  the global coordinate of the block I belongs to (nblocks),
+ * as well as the minimum local number of blocks that every process has.
+ *
+ * when mydist < nblocks-ilocblk*NPROCS,  I own ilocblk + 1 full blocks,
+ * when mydist > nblocks-ilocblk*NPROCS,  I own ilocblk     full blocks,
+ * when mydist = nblocks-ilocblk*NPROCS,  I own ilocblk     full blocks
+ * but not I, or I own ilocblk + 1 blocks and the entry I refers to.
+ */
+      if( MYROW == rsrc )
+      {
+/*
+ * I refers  to an entry  that is not in the first block, find out which
+ * process has it.
+ */
+         nblocks = ( I - imb ) / mb + 1;
+         *PROW  += nblocks;
+         *PROW  -= ( *PROW / NPROW ) * NPROW;
+/*
+ * Since  mydist = 0  and nblocks - ilocblk * NPROW >= 0, there are only
+ * three possible cases:
+ *
+ *   1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I do not own
+ *      I, in which case II = IMB + ( ilocblk - 1 ) * MB. Note that this
+ *      case  cannot  happen  when  ilocblk is zero, since nblocks is at
+ *      least one.
+ *
+ *   2) When  0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in
+ *      which  case  I  and  II  can  respectively  be  written as IMB + 
+ *      (nblocks-1)*NB + IL  and  IMB + (ilocblk-1) * MB + IL.  That  is
+ *      II = I + (ilocblk-nblocks)*MB. Note that this case cannot happen
+ *      when ilocblk is zero, since nblocks is at least one.
+ *
+ *   3) mydist = 0 < nblocks - ilocblk * NPROW,  the source process owns
+ *      ilocblk+1 full blocks,  and  therefore  II = IMB + ilocblk * MB.
+ *      Note that when ilocblk is zero, II is just IMB.
+ */
+         if( nblocks < NPROW )
+         {
+            *II = imb;
+         }
+         else
+         {
+            ilocblk = nblocks / NPROW;
+            if( ilocblk * NPROW >= nblocks )
+            {
+               *II = ( ( MYROW == *PROW ) ?
+                       I   + ( ilocblk - nblocks ) * mb :
+                       imb + ( ilocblk - 1       ) * mb );
+            }
+            else
+            {
+               *II =  imb + ilocblk * mb;
+            }
+         }
+      }
+      else
+      {
+/*
+ * I refers  to  an entry that is not in the first block, find out which
+ * process has it.
+ */
+         nblocks = ( I -= imb ) / mb + 1;
+         *PROW  += nblocks;
+         *PROW  -= ( *PROW / NPROW ) * NPROW;
+/*
+ * Compute  my distance from the source process so that within this pro-
+ * cess coordinate system,  the  source process is the process such that
+ * mydist=0.
+ */
+         if( ( mydist  = MYROW - rsrc ) < 0 ) mydist += NPROW;
+/*
+ * When mydist <  nblocks - ilocblk * NPROW, I own ilocblk+1 full blocks
+ * of size MB since I am not the source process, i.e. II=(ilocblk+1)*MB.
+ * When mydist>=nblocks-ilocblk*NPROW and I do not own I,  I own ilocblk
+ * full blocks of size MB, i.e. II = ilocblk*MB, otherwise I own ilocblk
+ * blocks and I,  in which case I can be written as IMB + (nblocks-1)*MB
+ * + IL and II = ilocblk*MB + IL = I - IMB + (ilocblk - nblocks + 1)*MB.
+ */
+         if( nblocks < NPROW )
+         {
+            mydist -= nblocks;
+            *II     = ( ( mydist < 0 ) ? mb :
+                        ( ( MYROW == *PROW ) ?
+                          I + ( 1 - nblocks ) * mb : 0 ) );
+         }
+         else
+         {
+            ilocblk = nblocks / NPROW;
+            mydist -= nblocks - ilocblk * NPROW;
+            *II     = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb :
+                        ( ( MYROW == *PROW ) ?
+                          ( ilocblk - nblocks + 1 ) * mb + I :
+                          ilocblk * mb ) );
+         }
+      }
+   }
+/*
+ * Idem for the columns
+ */
+   inb   = INB;
+   *PCOL = CSRC;
+
+   if( ( *PCOL == -1 ) || ( NPCOL == 1 ) )
+   {
+      *JJ = J;
+   }
+   else if( J < inb )
+   {
+      *JJ = ( MYCOL == *PCOL ? J : 0 );
+   }
+   else
+   {
+      nb   = NB;
+      csrc = *PCOL;
+
+      if( MYCOL == csrc )
+      {
+         nblocks = ( J - inb ) / nb + 1;
+         *PCOL  += nblocks;
+         *PCOL  -= ( *PCOL / NPCOL ) * NPCOL;
+
+         if( nblocks < NPCOL )
+         {
+            *JJ = inb;
+         }
+         else
+         {
+            ilocblk = nblocks / NPCOL;
+            if( ilocblk * NPCOL >= nblocks )
+            {
+               *JJ = ( ( MYCOL == *PCOL ) ?
+                       J   + ( ilocblk - nblocks ) * nb :
+                       inb + ( ilocblk - 1       ) * nb );
+            }
+            else
+            {
+               *JJ = inb + ilocblk * nb;
+            }
+         }
+      }
+      else
+      {
+         nblocks = ( J -= inb ) / nb + 1;
+         *PCOL  += nblocks;
+         *PCOL  -= ( *PCOL / NPCOL ) * NPCOL;
+
+         if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL;
+
+         if( nblocks < NPCOL )
+         {
+            mydist -= nblocks;
+            *JJ     = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ?
+                        J + ( 1 - nblocks )*nb : 0 ) );
+         }
+         else
+         {
+            ilocblk = nblocks / NPCOL;
+            mydist -= nblocks - ilocblk * NPCOL;
+            *JJ     = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb :
+                        ( ( MYCOL == *PCOL ) ?
+                          ( ilocblk - nblocks + 1 ) * nb + J :
+                          ilocblk * nb ) );
+         }
+      }
+   }
+/*
+ * End of HPL_infog2l
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_numroc.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_numroc.c
new file mode 100644
index 000000000..39cd736d3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_numroc.c
@@ -0,0 +1,120 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_numroc
+(
+   const int                        N,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_numroc
+( N, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        N;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_numroc returns  the  local number of matrix rows/columns process
+ * PROC  will  get  if  we give out  N rows/columns starting from global
+ * index 0.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies the number of rows/columns being dealt
+ *         out. N must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC specifies  the coordinate of the process whose
+ *         local portion is determined.  PROC must be at least zero  and
+ *         strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   return( HPL_numrocI( N, 0, INB, NB, PROC, SRCPROC, NPROCS ) );
+/*
+ * End of HPL_numroc
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_numrocI.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_numrocI.c
new file mode 100644
index 000000000..70f3497de
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_numrocI.c
@@ -0,0 +1,243 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_numrocI
+(
+   const int                        N,
+   const int                        I,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_numrocI
+( N, I, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        N;
+   const int                        I;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_numrocI returns  the  local number of matrix rows/columns process
+ * PROC  will  get  if  we give out  N rows/columns starting from global
+ * index I.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies the number of rows/columns being dealt
+ *         out. N must be at least zero.
+ *
+ * I       (input)                       const int
+ *         On entry, I  specifies the global index of the matrix  entry
+ *         I must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of th
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC specifies  the coordinate of the process whos
+ *         local portion is determined.  PROC must be at least zero  an
+ *         strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  proces
+ *         that possesses the first row or column of the matrix. SRCPRO
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process row
+ *         or columns over which the matrix is distributed.  NPROCS mus
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ilocblk, inb, mydist, nblocks, srcproc;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * The data is not distributed, or there is just one process in this di-
+ * mension of the grid.
+ */
+      return( N );
+/*
+ * Compute coordinate of process owning I and corresponding INB
+ */
+   srcproc = SRCPROC;
+
+   if( ( inb = INB - I ) <= 0 )
+   {
+/*
+ * I is not in the first block, find out which process has it and update
+ * the size of first block
+ */
+      srcproc += ( nblocks = (-inb) / NB + 1 ); 
+      srcproc -= ( srcproc / NPROCS ) * NPROCS;
+      inb     += nblocks * NB;
+   }
+/*
+ * Now  everything  is  just like  N, I=0, INB, NB, srcproc, NPROCS. The
+ * discussion goes as follows:  compute my distance from the source pro-
+ * cess  so that within this process coordinate system,  the source pro-
+ * cess is the process such that mydist = 0, or PROC == srcproc.
+ *
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries. Then remark that
+ *
+ * when  mydist < nblocks - ilocblk*NPROCS, I own ilocblk+1 full blocks,
+ * when  mydist > nblocks - ilocblk*NPROCS, I own ilocblk   full blocks,
+ * when  mydist = nblocks - ilocblk*NPROCS, either the last block is not
+ * full and I own it,  or the last block is full and I am the first pro-
+ * cess owning only ilocblk full blocks.
+ */
+   if( PROC == srcproc )
+   {
+/*
+ * I am the source process, i.e. I own I (mydist=0).  When N <= INB, the
+ * answer is simply N.
+ */
+      if( N <= inb ) return( N );
+/*
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries.
+ */
+      nblocks = ( N - inb ) / NB + 1;
+/*
+ * Since  mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only
+ * two possible cases:
+ *
+ *   1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS di-
+ *      vides the global number of full blocks,  then the source process
+ *      srcproc owns one more block than the other processes;  and N can
+ *      be rewritten as N = INB + (nblocks-1) * NB + LNB  with  LNB >= 0
+ *      size of the last block. Similarly, the local value Np correspon-
+ *      ding to N can be written as  Np = INB + (ilocblk-1) * NB + LNB =
+ *      N + ( ilocblk-1 - (nblocks-1) )*NB.  Note  that this case cannot
+ *      happen when ilocblk is zero, since nblocks is at least one.
+ *
+ *   2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only
+ *      owns full blocks,  and  therefore Np = INB + ilocblk * NB.  Note
+ *      that when ilocblk is zero, Np is just INB.
+ */
+      if( nblocks < NPROCS ) return( inb );
+ 
+      ilocblk = nblocks / NPROCS;
+      return( ( nblocks - ilocblk * NPROCS ) ? inb + ilocblk * NB :
+              N + ( ilocblk - nblocks ) * NB );
+   }
+   else
+   {
+/*
+ * I am not the source process. When N <= INB, the answer is simply 0.
+ */
+      if( N <= inb ) return( 0 );
+/*
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries
+ */
+      nblocks = ( N - inb ) / NB + 1;
+/*
+ * Compute  my distance from the source process so that within this pro-
+ * cess coordinate system,  the source  process is the process such that
+ * mydist=0.
+ */
+      if( ( mydist = PROC - srcproc ) < 0 ) mydist += NPROCS;
+/*
+ * When mydist < nblocks - ilocblk*NPROCS, I own ilocblk + 1 full blocks
+ * of size NB since I am not the source process,
+ *
+ * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk   full blocks
+ * of size NB since I am not the source process,
+ *
+ * when mydist = nblocks - ilocblk*NPROCS,
+ * either the last block is not full and I own it, in which case
+ *    N = INB + (nblocks - 1)*NB + LNB with  LNB  the  size  of the last
+ *    block such that NB > LNB > 0;  the local value Np corresponding to
+ *    N is given by  Np = ilocblk*NB+LNB = N-INB+(ilocblk-nblocks+1)*NB;
+ * or the  last  block  is  full  and I am the first process owning only
+ *    ilocblk full blocks of size NB, that is N = INB+(nblocks-1)*NB and
+ *    Np = ilocblk * NB = N - INB + (ilocblk-nblocks+1) * NB.
+ */
+      if( nblocks < NPROCS )
+         return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 :
+                 N - inb + NB * ( 1 - nblocks ) ) );
+ 
+      ilocblk = nblocks / NPROCS;
+      mydist -= nblocks - ilocblk * NPROCS;
+      return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB :
+              ( ( mydist > 0 ) ? ilocblk * NB :
+                N - inb + NB * ( ilocblk - nblocks + 1 ) ) );
+   }
+/*
+ * End of HPL_numrocI
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pabort.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pabort.c
new file mode 100644
index 000000000..268975fc1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pabort.c
@@ -0,0 +1,137 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pabort
+(
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_pabort( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pabort displays an error message on stderr and halts execution.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   int                        rank;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   LINE   = va_arg( argptr, int      );
+   SRNAME = va_arg( argptr, char *   );
+   FORM   = va_arg( argptr, char *   );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( stderr, "%s %s %d, %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR", "from process #", rank, "in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( stderr,
+                   "%s %s %d, %s %d %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR", "from process #", rank, "on line", LINE,
+                   "of function", SRNAME, cline );
+
+   MPI_Abort( MPI_COMM_WORLD, -1 );
+   exit( -1 );
+/*
+ * End of HPL_pabort
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlamch.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlamch.c
new file mode 100644
index 000000000..73cf649da
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlamch.c
@@ -0,0 +1,143 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_pdlamch
+(
+   MPI_Comm                         COMM,
+   const HPL_T_MACH                 CMACH
+)
+#else
+double HPL_pdlamch
+( COMM, CMACH )
+   MPI_Comm                         COMM;
+   const HPL_T_MACH                 CMACH;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlamch determines  machine-specific  arithmetic  constants  such  as
+ * the relative machine precision (eps),  the safe minimum(sfmin) such that
+ * 1/sfmin does not overflow, the base of the machine (base), the precision
+ * (prec),  the  number  of  (base)  digits in the  mantissa  (t),  whether
+ * rounding occurs in addition (rnd = 1.0 and 0.0 otherwise),  the  minimum
+ * exponent before  (gradual)  underflow (emin),  the  underflow  threshold
+ * (rmin)- base**(emin-1), the largest exponent before overflow (emax), the
+ * overflow threshold (rmax)  - (base**emax)*(1-eps).
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * CMACH   (global input)                const HPL_T_MACH
+ *         Specifies the value to be returned by HPL_pdlamch            
+ *            = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+ *            = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+ *            = HPL_MACH_BASE,  HPL_pdlamch := base                     
+ *            = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+ *            = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+ *            = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+ *            = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+ *            = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+ *            = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+ *            = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+ *          
+ *         where                                                        
+ *          
+ *            eps   = relative machine precision,                       
+ *            sfmin = safe minimum,                                     
+ *            base  = base of the machine,                              
+ *            prec  = eps*base,                                         
+ *            t     = number of digits in the mantissa,                 
+ *            rnd   = 1.0 if rounding occurs in addition,               
+ *            emin  = minimum exponent before underflow,                
+ *            rmin  = underflow threshold,                              
+ *            emax  = largest exponent before overflow,                 
+ *            rmax  = overflow threshold.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     param;
+/* ..
+ * .. Executable Statements ..
+ */
+   param = HPL_dlamch( CMACH );
+
+   switch( CMACH )
+   {
+      case HPL_MACH_EPS   :
+      case HPL_MACH_SFMIN :
+      case HPL_MACH_EMIN  :
+      case HPL_MACH_RMIN  :
+         (void) HPL_all_reduce( (void *)(&param), 1, HPL_DOUBLE,
+                                HPL_max, COMM );
+         break;
+      case HPL_MACH_EMAX  :
+      case HPL_MACH_RMAX  :
+         (void) HPL_all_reduce( (void *)(&param), 1, HPL_DOUBLE,
+                                HPL_min, COMM );
+         break;
+      default             :
+         break;
+   } 
+
+   return( param );
+/*
+ * End of HPL_pdlamch
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlange.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlange.c
new file mode 100644
index 000000000..40bdcc36b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlange.c
@@ -0,0 +1,242 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_pdlange
+(
+   const HPL_T_grid *               GRID,
+   const HPL_T_NORM                 NORM,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   const double *                   A,
+   const int                        LDA
+)
+#else
+double HPL_pdlange
+( GRID, NORM, M, N, NB, A, LDA )
+   const HPL_T_grid *               GRID;
+   const HPL_T_NORM                 NORM;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   const double *                   A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlange returns  the value of the one norm,  or the infinity norm,
+ * or the element of largest absolute value of a distributed matrix A:  
+ *  
+ *  
+ *    max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+ *    norm1(A),        when NORM = HPL_NORM_1,                          
+ *    normI(A),        when NORM = HPL_NORM_I,                          
+ *  
+ * where norm1 denotes the one norm of a matrix (maximum column sum) and
+ * normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+ * that max(abs(A(i,j))) is not a matrix norm.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * NORM    (global input)                const HPL_T_NORM
+ *         On entry,  NORM  specifies  the  value to be returned by this
+ *         function as described above.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+ *         that contains the local pieces of the distributed matrix A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     s, v0=HPL_rzero, * work = NULL;
+   MPI_Comm                   Acomm, Ccomm, Rcomm;
+   int                        ii, jj, mp, mycol, myrow, npcol, nprow,
+                              nq;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Rcomm = GRID->row_comm; Ccomm = GRID->col_comm;
+   Acomm = GRID->all_comm;
+
+   Mnumroc( mp, M, NB, NB, myrow, 0, nprow );
+   Mnumroc( nq, N, NB, NB, mycol, 0, npcol );
+
+   if( Mmin( M, N ) == 0 ) { return( v0 ); }
+   else if( NORM == HPL_NORM_A )
+   {
+/*
+ * max( abs( A ) )
+ */
+      if( ( nq > 0 ) && ( mp > 0 ) )
+      {
+         for( jj = 0; jj < nq; jj++ )
+         {
+            for( ii = 0; ii < mp; ii++ )
+            { v0 = Mmax( v0, Mabs( *A ) ); A++; }
+            A += LDA - mp;
+         }
+      }
+      (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0,
+                         Acomm );
+   }
+   else if( NORM == HPL_NORM_1 )
+   {
+/*
+ * Find norm_1( A ).
+ */
+      if( nq > 0 )
+      {
+         work = (double*)malloc( (size_t)(nq) * sizeof( double ) );
+         if( work == NULL )
+         { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); }
+
+         for( jj = 0; jj < nq; jj++ )
+         {
+            s = HPL_rzero;
+            for( ii = 0; ii < mp; ii++ ) { s += Mabs( *A ); A++; }
+            work[jj] = s; A += LDA - mp;
+         }
+/*
+ * Find sum of global matrix columns, store on row 0 of process grid
+ */
+         (void) HPL_reduce( (void *)(work), nq, HPL_DOUBLE, HPL_sum,
+                            0, Ccomm );
+/*
+ * Find maximum sum of columns for 1-norm
+ */
+         if( myrow == 0 )
+         { v0 = work[HPL_idamax( nq, work, 1 )]; v0 = Mabs( v0 ); }
+         if( work ) free( work );
+      }
+/*
+ * Find max in row 0, store result in process (0,0)
+ */
+      if( myrow == 0 )
+         (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0,
+                            Rcomm );
+   }
+   else if( NORM == HPL_NORM_I )
+   {
+/*
+ * Find norm_inf( A )
+ */
+      if( mp > 0 )
+      {
+         work = (double*)malloc( (size_t)(mp) * sizeof( double ) );
+         if( work == NULL )
+         { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); }
+
+         for( ii = 0; ii < mp; ii++ ) { work[ii] = HPL_rzero; }
+
+         for( jj = 0; jj < nq; jj++ )
+         {
+            for( ii = 0; ii < mp; ii++ )
+            { work[ii] += Mabs( *A ); A++; }
+            A += LDA - mp;
+         }
+/*       
+ * Find sum of global matrix rows, store on column 0 of process grid
+ */      
+         (void) HPL_reduce( (void *)(work), mp, HPL_DOUBLE, HPL_sum,
+                            0, Rcomm );
+/*       
+ * Find maximum sum of rows for inf-norm
+ */      
+         if( mycol == 0 )
+         { v0 = work[HPL_idamax( mp, work, 1 )]; v0 = Mabs( v0 ); }
+         if( work ) free( work );
+      }
+/*
+ * Find max in column 0, store result in process (0,0)
+ */
+      if( mycol == 0 )
+         (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max,
+                            0, Ccomm );
+   }
+/*
+ * Broadcast answer to every process in the grid
+ */
+   (void) HPL_broadcast( (void *)(&v0), 1, HPL_DOUBLE, 0, Acomm );
+
+   return( v0 );
+/*
+ * End of HPL_pdlange
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlaprnt.c
new file mode 100644
index 000000000..24fc47540
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlaprnt.c
@@ -0,0 +1,238 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaprnt
+(
+   const HPL_T_grid *               GRID,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   double *                         A,
+   const int                        LDA,
+   const int                        IAROW,
+   const int                        IACOL,
+   const char *                     CMATNM
+)
+#else
+void HPL_pdlaprnt
+( GRID, M, N, NB, A, LDA, IAROW, IACOL, CMATNM )
+   const HPL_T_grid *               GRID;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   double *                         A;
+   const int                        LDA;
+   const int                        IAROW;
+   const int                        IACOL;
+   const char *                     CMATNM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaprnt prints  to  standard  error a distributed matrix A. The
+ * local pieces of  A  are sent to the process of coordinates  (0,0)  in
+ * the grid and then printed.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies the number of rows of the coefficient
+ *         matrix A. M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On  entry,   N   specifies  the  number  of  columns  of  the
+ *         coefficient matrix A. N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * A       (local input)                 double *
+ *         On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+ *         This array contains the coefficient matrix to be printed.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * IAROW   (global input)                const int
+ *         On entry,  IAROW  specifies the row process coordinate owning
+ *         the  first row of A.  IAROW  must be  larger than or equal to
+ *         zero and less than NPROW.
+ *
+ * IACOL   (global input)                const int
+ *         On entry,  IACOL  specifies  the  column  process  coordinate
+ *         owning the  first column  of A. IACOL  must be larger than or
+ *         equal to zero and less than NPCOL.
+ *
+ * CMATNM  (global input)                const char *
+ *         On entry, CMATNM is the name of the matrix to be printed.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   Acomm;
+   double                     * buf = NULL;
+   int                        h, i, ib, icurcol=IACOL, icurrow=IAROW,
+                              ii=0, j, jb, jj=0, mycol, myrow, npcol,
+                              nprow, src;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Acomm = GRID->all_comm; 
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+      buf = (double*)malloc( (size_t)(NB) * sizeof( double ) );
+
+   for( j = 0; j < N; j += NB )
+   {
+      jb = N-j; jb = Mmin( jb, NB );
+      for( h = 0; h < jb; h++ )
+      {
+         (void) HPL_barrier( Acomm );
+
+         for( i = 0; i < M; i += NB )
+         {
+            ib = M-i; ib = Mmin( ib, NB );
+            if( ( icurrow == 0 ) && ( icurcol == 0 ) )
+            {
+               if( ( myrow == 0 ) && ( mycol == 0 ) )
+                  HPL_dlaprnt( ib, 1, Mptr( A, ii, jj+h, LDA ), i+1,
+                               j+h+1, LDA, CMATNM );
+            }
+            else
+            {
+               if( ( myrow == icurrow ) && ( mycol == icurcol ) )
+               {
+                  (void) HPL_send( Mptr( A, ii, jj+h, LDA ), ib, 0,
+                                   9000+(j+h)*M+i, Acomm );
+               }
+               else if( ( myrow == 0 ) && ( mycol == 0 ) )
+               {
+                  src = HPL_pnum( GRID, icurrow, icurcol );
+                  (void) HPL_recv( buf, ib, src, 9000+(j+h)*M+i,
+                                   Acomm );
+                  if (buf != NULL)
+                  	HPL_dlaprnt( ib, 1, buf, i+1, j+h+1, NB, CMATNM );
+               }
+            }
+            if( myrow == icurrow ) ii += ib;
+            icurrow = MModAdd1( icurrow, nprow );
+            (void) HPL_barrier( Acomm );
+         }
+         ii = 0; icurrow = IAROW;
+      }
+      if( mycol == icurcol ) jj += jb;
+      icurcol = MModAdd1( icurcol, npcol );
+      (void) HPL_barrier( Acomm );
+   }
+   if( ( myrow == 0 ) && ( mycol == 0 ) && ( buf ) ) free( buf );
+/*
+ * End of HPL_pdlaprnt
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pwarn.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pwarn.c
new file mode 100644
index 000000000..a9f666f89
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pwarn.c
@@ -0,0 +1,139 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pwarn
+(
+   FILE *                           STREAM,
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_pwarn( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pwarn displays an error message.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   int                        rank;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   LINE   = va_arg( argptr, int    );
+   SRNAME = va_arg( argptr, char * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( STREAM, "%s %s %d, %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR", "from process #", rank, "in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( STREAM, "%s %s %d, %s %d %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR", "from process #", rank, "on line", LINE,
+                   "of function", SRNAME, cline );
+/*
+ * End of HPL_pwarn
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/intel64/Make.inc
new file mode 120000
index 000000000..3ee301793
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/intel64/Makefile
new file mode 100644
index 000000000..ea93cd150
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/intel64/Makefile
@@ -0,0 +1,137 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h   $(INCdir)/hpl_pauxil.h
+#
+## Object files ########################################################
+#
+HPL_pauobj       = \
+   HPL_indxg2l.o          HPL_indxg2lp.o         HPL_indxg2p.o          \
+   HPL_indxl2g.o          HPL_infog2l.o          HPL_numroc.o           \
+   HPL_numrocI.o          HPL_dlaswp00N.o        HPL_dlaswp10N.o        \
+   HPL_dlaswp01N.o        HPL_dlaswp01T.o        HPL_dlaswp02N.o        \
+   HPL_dlaswp03N.o        HPL_dlaswp03T.o        HPL_dlaswp04N.o        \
+   HPL_dlaswp04T.o        HPL_dlaswp05N.o        HPL_dlaswp05T.o        \
+   HPL_dlaswp06N.o        HPL_dlaswp06T.o        HPL_pwarn.o            \
+   HPL_pabort.o           HPL_pdlaprnt.o         HPL_pdlamch.o          \
+   HPL_pdlange.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pauobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pauobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_indxg2l.o          : ../HPL_indxg2l.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2l.c
+HPL_indxg2lp.o         : ../HPL_indxg2lp.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2lp.c
+HPL_indxg2p.o          : ../HPL_indxg2p.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2p.c
+HPL_indxl2g.o          : ../HPL_indxl2g.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxl2g.c
+HPL_infog2l.o          : ../HPL_infog2l.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_infog2l.c
+HPL_numroc.o           : ../HPL_numroc.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_numroc.c
+HPL_numrocI.o          : ../HPL_numrocI.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_numrocI.c
+HPL_dlaswp00N.o        : ../HPL_dlaswp00N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp00N.c
+HPL_dlaswp10N.o        : ../HPL_dlaswp10N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp10N.c
+HPL_dlaswp01N.o        : ../HPL_dlaswp01N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp01N.c
+HPL_dlaswp01T.o        : ../HPL_dlaswp01T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp01T.c
+HPL_dlaswp02N.o        : ../HPL_dlaswp02N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp02N.c
+HPL_dlaswp03N.o        : ../HPL_dlaswp03N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp03N.c
+HPL_dlaswp03T.o        : ../HPL_dlaswp03T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp03T.c
+HPL_dlaswp04N.o        : ../HPL_dlaswp04N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp04N.c
+HPL_dlaswp04T.o        : ../HPL_dlaswp04T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp04T.c
+HPL_dlaswp05N.o        : ../HPL_dlaswp05N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp05N.c
+HPL_dlaswp05T.o        : ../HPL_dlaswp05T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp05T.c
+HPL_dlaswp06N.o        : ../HPL_dlaswp06N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp06N.c
+HPL_dlaswp06T.o        : ../HPL_dlaswp06T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp06T.c
+HPL_pwarn.o            : ../HPL_pwarn.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pwarn.c
+HPL_pabort.o           : ../HPL_pabort.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pabort.c
+HPL_pdlaprnt.o         : ../HPL_pdlaprnt.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaprnt.c
+HPL_pdlamch.o          : ../HPL_pdlamch.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlamch.c
+HPL_pdlange.o          : ../HPL_pdlange.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlange.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocmax.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocmax.c
new file mode 100644
index 000000000..644641412
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocmax.c
@@ -0,0 +1,149 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dlocmax
+(
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocmax
+( PANEL, N, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocmax finds  the maximum entry in the current column  and packs
+ * the useful information in  WORK[0:3].  On exit,  WORK[0] contains the
+ * local maximum  absolute value  scalar,  WORK[1] is the  corresponding
+ * local row index,  WORK[2]  is the corresponding global row index, and
+ * WORK[3] is the coordinate of the process owning this max.  When N  is
+ * less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set
+ * to the total number of process rows.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of rows of the column
+ *         of A on which we operate.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is  a workarray of size at least 4.  On exit,
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A;
+   int                        kk, igindx, ilindx, myrow, nb, nprow;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N > 0 )
+   {
+      A      = Mptr( PANEL->A, II, JJ, PANEL->lda );
+      myrow  = PANEL->grid->myrow;
+      nprow  = PANEL->grid->nprow;
+      nb     = PANEL->nb;
+      kk     = PANEL->ii + II + ( ilindx = HPL_idamax( N, A, 1 ) );
+      Mindxl2g( igindx, kk, nb, nb, myrow, 0, nprow );
+/*
+ * WORK[0] := local maximum absolute value scalar,
+ * WORK[1] := corresponding local  row index,
+ * WORK[2] := corresponding global row index,
+ * WORK[3] := coordinate of process owning this max.
+ */
+      WORK[0] = A[ilindx];         WORK[1] = (double)(ilindx);
+      WORK[2] = (double)(igindx);  WORK[3] = (double)(myrow);
+   }
+   else
+   {
+/*
+ * If I do not have any row of A, then set the coordinate of the process
+ * (WORK[3]) owning this "ghost" row,  such that it  will never be used,
+ * even if there are only zeros in the current column of A.
+ */
+      WORK[0] = WORK[1] = WORK[2] = HPL_rzero;
+      WORK[3] = (double)(PANEL->grid->nprow);
+   }
+/*
+ * End of HPL_dlocmax
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocswpN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocswpN.c
new file mode 100644
index 000000000..a3919500a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocswpN.c
@@ -0,0 +1,436 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LOCSWP_DEPTH
+#define    HPL_LOCSWP_DEPTH        32
+#define    HPL_LOCSWP_LOG2_DEPTH    5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlocswpN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocswpN
+( PANEL, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocswpN performs  the local swapping operations  within a panel.
+ * The lower triangular  N0-by-N0  upper block of the panel is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.  The N0 length max
+ *         row is stored in WORK[4:4+N0-1];  Note  that this is also the
+ *         JJth row  (or column) of L1. The remaining part of this array
+ *         is used as workspace.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax;
+   double                     * A1, * A2, * L, * Wr0, * Wmx;
+   int                        ilindx, lda, myrow, n0, nr, nu;
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda;
+
+   Wr0   = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0];
+   nu    = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) 
+                  << HPL_LOCSWP_LOG2_DEPTH );
+   nr    = n0 - nu;
+/*
+ * Replicated swap and copy of the current (new) row of A into L1
+ */
+   L  = Mptr( PANEL->L1, JJ, 0, n0  );
+/*
+ * If the pivot is non-zero ...
+ */
+   if( gmax != HPL_rzero )
+   {
+/*
+ * and if I own the current row of A ...
+ */
+      if( myrow == PANEL->prow )
+      {
+/*
+ * and if I also own the row to be swapped with the current row of A ...
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+/*
+ * and if the current row of A is not to swapped with itself ...
+ */
+            if( ( ilindx = (int)(WORK[1]) ) != 0 )
+            {
+/*
+ * then copy the max row into L1 and locally swap the 2 rows of A.
+ */
+               A1 = Mptr( PANEL->A,  II,     0, lda );
+               A2 = Mptr( A1,        ilindx, 0, lda );
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH )
+               {
+                  *L=*A1=Wmx[ 0]; *A2=Wr0[ 0]; L+=n0; A1+=lda; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  *L=*A1=Wmx[ 1]; *A2=Wr0[ 1]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  *L=*A1=Wmx[ 2]; *A2=Wr0[ 2]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 3]; *A2=Wr0[ 3]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  *L=*A1=Wmx[ 4]; *A2=Wr0[ 4]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 5]; *A2=Wr0[ 5]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 6]; *A2=Wr0[ 6]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 7]; *A2=Wr0[ 7]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  *L=*A1=Wmx[ 8]; *A2=Wr0[ 8]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 9]; *A2=Wr0[ 9]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[10]; *A2=Wr0[10]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[11]; *A2=Wr0[11]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[12]; *A2=Wr0[12]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[13]; *A2=Wr0[13]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[14]; *A2=Wr0[14]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[15]; *A2=Wr0[15]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  *L=*A1=Wmx[16]; *A2=Wr0[16]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[17]; *A2=Wr0[17]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[18]; *A2=Wr0[18]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[19]; *A2=Wr0[19]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[20]; *A2=Wr0[20]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[21]; *A2=Wr0[21]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[22]; *A2=Wr0[22]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[23]; *A2=Wr0[23]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[24]; *A2=Wr0[24]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[25]; *A2=Wr0[25]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[26]; *A2=Wr0[26]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[27]; *A2=Wr0[27]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[28]; *A2=Wr0[28]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[29]; *A2=Wr0[29]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[30]; *A2=Wr0[30]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[31]; *A2=Wr0[31]; L+=n0; A1+=lda; A2+=lda;
+#endif
+               }
+               for( i = 0; i < nr; i++, L += n0, A1 += lda, A2 += lda )
+               { *L = *A1 = Wmx[i]; *A2 = Wr0[i]; }
+            }
+            else
+            {
+/*
+ * otherwise the current row of  A  is swapped with itself, so just copy
+ * the current of A into L1.
+ */
+               *Mptr( PANEL->A, II, JJ, lda ) = gmax;
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH )
+               {
+                  *L = Wmx[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  *L = Wmx[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0;
+                  *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0;
+                  *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0;
+                  *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0;
+                  *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0;
+                  *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0;
+                  *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0;
+                  *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0;
+                  *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0;
+                  *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0;
+                  *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0;
+                  *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0;
+#endif
+               }
+               for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; }
+            }
+         }
+         else
+         {
+/*
+ * otherwise, the row to be swapped with the current row of A is in Wmx,
+ * so copy Wmx into L1 and A.
+ */
+            A1 = Mptr( PANEL->A,  II, 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wmx += HPL_LOCSWP_DEPTH )
+            {
+               *L = *A1 = Wmx[ 0]; L += n0; A1 += lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *L = *A1 = Wmx[ 1]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *L = *A1 = Wmx[ 2]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 3]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *L = *A1 = Wmx[ 4]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 5]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 6]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 7]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *L = *A1 = Wmx[ 8]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 9]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[10]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[11]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[12]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[13]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[14]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[15]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *L = *A1 = Wmx[16]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[17]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[18]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[19]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[20]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[21]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[22]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[23]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[24]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[25]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[26]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[27]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[28]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[29]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[30]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[31]; L += n0; A1 += lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, L += n0, A1 += lda )
+            { *L = *A1 = Wmx[i]; }
+         }
+      }
+      else
+      {
+/*
+ * otherwise I do not own the current row of A, so copy the max row  Wmx
+ * into L1.
+ */
+         for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+              Wmx += HPL_LOCSWP_DEPTH )
+         {
+            *L = Wmx[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+            *L = Wmx[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+            *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+            *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0;
+            *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+            *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0;
+            *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0;
+            *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0;
+            *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+            *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0;
+            *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0;
+            *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0;
+            *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0;
+            *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0;
+            *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0;
+            *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0;
+            *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0;
+#endif
+         }
+         for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; }
+/*
+ * and if I own the max row, overwrite it with the current row Wr0.
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+            A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wr0 += HPL_LOCSWP_DEPTH )
+            {
+               *A2 = Wr0[ 0]; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *A2 = Wr0[ 1]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda;
+               *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda;
+               *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda;
+               *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda;
+               *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda;
+               *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda;
+               *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda;
+               *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda;
+               *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda;
+               *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda;
+               *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda;
+               *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; }
+         }
+      }
+   }
+   else
+   {
+/*
+ * Otherwise the max element in the current column is zero,  simply copy
+ * the current row Wr0 into L1. The matrix is singular.
+ */
+      for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+           Wr0 += HPL_LOCSWP_DEPTH )
+      {
+         *L = Wr0[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+         *L = Wr0[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+         *L = Wr0[ 2]; L+=n0; *L = Wr0[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+         *L = Wr0[ 4]; L+=n0; *L = Wr0[ 5]; L+=n0;
+         *L = Wr0[ 6]; L+=n0; *L = Wr0[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+         *L = Wr0[ 8]; L+=n0; *L = Wr0[ 9]; L+=n0;
+         *L = Wr0[10]; L+=n0; *L = Wr0[11]; L+=n0;
+         *L = Wr0[12]; L+=n0; *L = Wr0[13]; L+=n0;
+         *L = Wr0[14]; L+=n0; *L = Wr0[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+         *L = Wr0[16]; L+=n0; *L = Wr0[17]; L+=n0;
+         *L = Wr0[18]; L+=n0; *L = Wr0[19]; L+=n0;
+         *L = Wr0[20]; L+=n0; *L = Wr0[21]; L+=n0;
+         *L = Wr0[22]; L+=n0; *L = Wr0[23]; L+=n0;
+         *L = Wr0[24]; L+=n0; *L = Wr0[25]; L+=n0;
+         *L = Wr0[26]; L+=n0; *L = Wr0[27]; L+=n0;
+         *L = Wr0[28]; L+=n0; *L = Wr0[29]; L+=n0;
+         *L = Wr0[30]; L+=n0; *L = Wr0[31]; L+=n0;
+#endif
+      }
+
+      for( i = 0; i < nr; i++, L += n0 ) { *L = Wr0[i]; }
+/*
+ * set INFO.
+ */
+      if( *(PANEL->DINFO) == 0.0 )
+         *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1);
+   }
+/*
+ * End of HPL_dlocswpN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocswpT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocswpT.c
new file mode 100644
index 000000000..89b86e35a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocswpT.c
@@ -0,0 +1,406 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LOCSWP_DEPTH
+#define    HPL_LOCSWP_DEPTH        32
+#define    HPL_LOCSWP_LOG2_DEPTH    5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlocswpT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocswpT
+( PANEL, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocswpT performs  the local swapping operations  within a panel.
+ * The lower triangular  N0-by-N0  upper block of the panel is stored in
+ * transpose form.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.  The N0 length max
+ *         row is stored in WORK[4:4+N0-1];  Note  that this is also the
+ *         JJth row  (or column) of L1. The remaining part of this array
+ *         is used as workspace.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax;
+   double                     * A1, * A2, * L, * Wr0, * Wmx;
+   int                        ilindx, lda, myrow, n0, nr, nu;
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda;
+
+   Wr0   = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0];
+   nu    = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) 
+                  << HPL_LOCSWP_LOG2_DEPTH );
+   nr    = n0 - nu;
+/*
+ * Replicated swap and copy of the current (new) row of A into L1
+ */
+   L  = Mptr( PANEL->L1, 0, JJ, n0  );
+/*
+ * If the pivot is non-zero ...
+ */
+   if( gmax != HPL_rzero )
+   {
+/*
+ * and if I own the current row of A ...
+ */
+      if( myrow == PANEL->prow )
+      {
+/*
+ * and if I also own the row to be swapped with the current row of A ...
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+/*
+ * and if the current row of A is not to swapped with itself ...
+ */
+            if( ( ilindx = (int)(WORK[1]) ) != 0 )
+            {
+/*
+ * then copy the max row into L1 and locally swap the 2 rows of A.
+ */
+               A1 = Mptr( PANEL->A, II,     0, lda );
+               A2 = Mptr( A1,       ilindx, 0, lda );
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH,
+                    L   += HPL_LOCSWP_DEPTH )
+               {
+                  L[ 0]=*A1=Wmx[ 0]; *A2=Wr0[ 0]; A1+=lda; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  L[ 1]=*A1=Wmx[ 1]; *A2=Wr0[ 1]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  L[ 2]=*A1=Wmx[ 2]; *A2=Wr0[ 2]; A1+=lda; A2+=lda;
+                  L[ 3]=*A1=Wmx[ 3]; *A2=Wr0[ 3]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  L[ 4]=*A1=Wmx[ 4]; *A2=Wr0[ 4]; A1+=lda; A2+=lda;
+                  L[ 5]=*A1=Wmx[ 5]; *A2=Wr0[ 5]; A1+=lda; A2+=lda;
+                  L[ 6]=*A1=Wmx[ 6]; *A2=Wr0[ 6]; A1+=lda; A2+=lda;
+                  L[ 7]=*A1=Wmx[ 7]; *A2=Wr0[ 7]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  L[ 8]=*A1=Wmx[ 8]; *A2=Wr0[ 8]; A1+=lda; A2+=lda;
+                  L[ 9]=*A1=Wmx[ 9]; *A2=Wr0[ 9]; A1+=lda; A2+=lda;
+                  L[10]=*A1=Wmx[10]; *A2=Wr0[10]; A1+=lda; A2+=lda;
+                  L[11]=*A1=Wmx[11]; *A2=Wr0[11]; A1+=lda; A2+=lda;
+                  L[12]=*A1=Wmx[12]; *A2=Wr0[12]; A1+=lda; A2+=lda;
+                  L[13]=*A1=Wmx[13]; *A2=Wr0[13]; A1+=lda; A2+=lda;
+                  L[14]=*A1=Wmx[14]; *A2=Wr0[14]; A1+=lda; A2+=lda;
+                  L[15]=*A1=Wmx[15]; *A2=Wr0[15]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  L[16]=*A1=Wmx[16]; *A2=Wr0[16]; A1+=lda; A2+=lda;
+                  L[17]=*A1=Wmx[17]; *A2=Wr0[17]; A1+=lda; A2+=lda;
+                  L[18]=*A1=Wmx[18]; *A2=Wr0[18]; A1+=lda; A2+=lda;
+                  L[19]=*A1=Wmx[19]; *A2=Wr0[19]; A1+=lda; A2+=lda;
+                  L[20]=*A1=Wmx[20]; *A2=Wr0[20]; A1+=lda; A2+=lda;
+                  L[21]=*A1=Wmx[21]; *A2=Wr0[21]; A1+=lda; A2+=lda;
+                  L[22]=*A1=Wmx[22]; *A2=Wr0[22]; A1+=lda; A2+=lda;
+                  L[23]=*A1=Wmx[23]; *A2=Wr0[23]; A1+=lda; A2+=lda;
+                  L[24]=*A1=Wmx[24]; *A2=Wr0[24]; A1+=lda; A2+=lda;
+                  L[25]=*A1=Wmx[25]; *A2=Wr0[25]; A1+=lda; A2+=lda;
+                  L[26]=*A1=Wmx[26]; *A2=Wr0[26]; A1+=lda; A2+=lda;
+                  L[27]=*A1=Wmx[27]; *A2=Wr0[27]; A1+=lda; A2+=lda;
+                  L[28]=*A1=Wmx[28]; *A2=Wr0[28]; A1+=lda; A2+=lda;
+                  L[29]=*A1=Wmx[29]; *A2=Wr0[29]; A1+=lda; A2+=lda;
+                  L[30]=*A1=Wmx[30]; *A2=Wr0[30]; A1+=lda; A2+=lda;
+                  L[31]=*A1=Wmx[31]; *A2=Wr0[31]; A1+=lda; A2+=lda;
+#endif
+               }
+
+               for( i = 0; i < nr; i++, A1 += lda, A2 += lda )
+               { L[i] = *A1 = Wmx[i]; *A2 = Wr0[i]; }
+            }
+            else
+            {
+/*
+ * otherwise the current row of  A  is swapped with itself, so just copy
+ * the current of A into L1.
+ */
+               *Mptr( PANEL->A, II, JJ, lda ) = gmax;
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+               {
+                  L[ 0]=Wmx[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  L[ 1]=Wmx[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5];
+                  L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  L[ 8]=Wmx[ 8]; L[12]=Wmx[12];
+                  L[ 9]=Wmx[ 9]; L[13]=Wmx[13];
+                  L[10]=Wmx[10]; L[14]=Wmx[14];
+                  L[11]=Wmx[11]; L[15]=Wmx[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  L[16]=Wmx[16]; L[20]=Wmx[20];
+                  L[17]=Wmx[17]; L[21]=Wmx[21];
+                  L[18]=Wmx[18]; L[22]=Wmx[22];
+                  L[19]=Wmx[19]; L[23]=Wmx[23];
+                  L[24]=Wmx[24]; L[28]=Wmx[28];
+                  L[25]=Wmx[25]; L[29]=Wmx[29];
+                  L[26]=Wmx[26]; L[30]=Wmx[30];
+                  L[27]=Wmx[27]; L[31]=Wmx[31];
+#endif
+               }
+               for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; }
+            }
+         }
+         else
+         {
+/*
+ * otherwise, the row to be swapped with the current row of A is in Wmx,
+ * so copy Wmx into L1 and A.
+ */
+            A1 = Mptr( PANEL->A, II, 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+            {
+               L[ 0]=*A1=Wmx[ 0]; A1+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               L[ 1]=*A1=Wmx[ 1]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               L[ 2]=*A1=Wmx[ 2]; A1+=lda; L[ 3]=*A1=Wmx[ 3]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               L[ 4]=*A1=Wmx[ 4]; A1+=lda; L[ 5]=*A1=Wmx[ 5]; A1+=lda;
+               L[ 6]=*A1=Wmx[ 6]; A1+=lda; L[ 7]=*A1=Wmx[ 7]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               L[ 8]=*A1=Wmx[ 8]; A1+=lda; L[ 9]=*A1=Wmx[ 9]; A1+=lda;
+               L[10]=*A1=Wmx[10]; A1+=lda; L[11]=*A1=Wmx[11]; A1+=lda;
+               L[12]=*A1=Wmx[12]; A1+=lda; L[13]=*A1=Wmx[13]; A1+=lda;
+               L[14]=*A1=Wmx[14]; A1+=lda; L[15]=*A1=Wmx[15]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               L[16]=*A1=Wmx[16]; A1+=lda; L[17]=*A1=Wmx[17]; A1+=lda;
+               L[18]=*A1=Wmx[18]; A1+=lda; L[19]=*A1=Wmx[19]; A1+=lda;
+               L[20]=*A1=Wmx[20]; A1+=lda; L[21]=*A1=Wmx[21]; A1+=lda;
+               L[22]=*A1=Wmx[22]; A1+=lda; L[23]=*A1=Wmx[23]; A1+=lda;
+               L[24]=*A1=Wmx[24]; A1+=lda; L[25]=*A1=Wmx[25]; A1+=lda;
+               L[26]=*A1=Wmx[26]; A1+=lda; L[27]=*A1=Wmx[27]; A1+=lda;
+               L[28]=*A1=Wmx[28]; A1+=lda; L[29]=*A1=Wmx[29]; A1+=lda;
+               L[30]=*A1=Wmx[30]; A1+=lda; L[31]=*A1=Wmx[31]; A1+=lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, A1 += lda ) { L[i]=*A1=Wmx[i]; } 
+         }
+      }
+      else
+      {
+/*
+ * otherwise I do not own the current row of A, so copy the max row  Wmx
+ * into L1.
+ */
+         for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+              Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+         {
+            L[ 0]=Wmx[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+            L[ 1]=Wmx[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+            L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+            L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+            L[ 8]=Wmx[ 8]; L[ 9]=Wmx[ 9]; L[10]=Wmx[10]; L[11]=Wmx[11];
+            L[12]=Wmx[12]; L[13]=Wmx[13]; L[14]=Wmx[14]; L[15]=Wmx[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+            L[16]=Wmx[16]; L[17]=Wmx[17]; L[18]=Wmx[18]; L[19]=Wmx[19];
+            L[20]=Wmx[20]; L[21]=Wmx[21]; L[22]=Wmx[22]; L[23]=Wmx[23];
+            L[24]=Wmx[24]; L[25]=Wmx[25]; L[26]=Wmx[26]; L[27]=Wmx[27];
+            L[28]=Wmx[28]; L[29]=Wmx[29]; L[30]=Wmx[30]; L[31]=Wmx[31];
+#endif
+         }
+         for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; }
+/*
+ * and if I own the max row, overwrite it with the current row Wr0.
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+            A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wr0 += HPL_LOCSWP_DEPTH )
+            {
+               *A2 = Wr0[ 0]; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *A2 = Wr0[ 1]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda;
+               *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda;
+               *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda;
+               *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda;
+               *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda;
+               *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda;
+               *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda;
+               *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda;
+               *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda;
+               *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda;
+               *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda;
+               *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda;
+#endif
+            }
+            for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; }
+         }
+      }
+   }
+   else
+   {
+/*
+ * Otherwise the max element in the current column is zero,  simply copy
+ * the current row Wr0 into L1. The matrix is singular.
+ */
+      for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+           Wr0 += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+      {
+         L[ 0]=Wr0[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+         L[ 1]=Wr0[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+         L[ 2]=Wr0[ 2]; L[ 3]=Wr0[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+         L[ 4]=Wr0[ 4]; L[ 5]=Wr0[ 5]; L[ 6]=Wr0[ 6]; L[ 7]=Wr0[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+         L[ 8]=Wr0[ 8]; L[12]=Wr0[12]; L[ 9]=Wr0[ 9]; L[13]=Wr0[13];
+         L[10]=Wr0[10]; L[14]=Wr0[14]; L[11]=Wr0[11]; L[15]=Wr0[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+         L[16]=Wr0[16]; L[20]=Wr0[20]; L[17]=Wr0[17]; L[21]=Wr0[21];
+         L[18]=Wr0[18]; L[22]=Wr0[22]; L[19]=Wr0[19]; L[23]=Wr0[23];
+         L[24]=Wr0[24]; L[28]=Wr0[28]; L[25]=Wr0[25]; L[29]=Wr0[29];
+         L[26]=Wr0[26]; L[30]=Wr0[30]; L[27]=Wr0[27]; L[31]=Wr0[31];
+#endif
+      }
+      for( i = 0; i < nr; i++ ) { L[i] = Wr0[i]; }
+/*
+ * Set INFO.
+ */
+      if( *(PANEL->DINFO) == 0.0 )
+         *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1);
+   }
+/*
+ * End of HPL_dlocswpT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdfact.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdfact.c
new file mode 100644
index 000000000..1d99c6e14
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdfact.c
@@ -0,0 +1,141 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdfact
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_pdfact
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdfact recursively factorizes a  1-dimensional  panel of columns.
+ * The  RPFACT  function pointer specifies the recursive algorithm to be
+ * used, either Crout, Left- or Right looking.  NBMIN allows to vary the
+ * recursive stopping criterium in terms of the number of columns in the
+ * panel, and  NDIV allows to specify the number of subpanels each panel
+ * should be divided into. Usuallly a value of 2 will be chosen. Finally
+ * PFACT is a function pointer specifying the non-recursive algorithm to
+ * to be used on at most NBMIN columns. One can also choose here between
+ * Crout, Left- or Right looking.  Empirical tests seem to indicate that
+ * values of 4 or 8 for NBMIN give the best results.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   void                       * vptr = NULL;
+   int                        align, jb;
+/* ..
+ * .. Executable Statements ..
+ */
+   jb = PANEL->jb; PANEL->n -= jb; PANEL->ja += jb;
+
+   if( ( PANEL->grid->mycol != PANEL->pcol ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_RPFACT );
+#endif
+   align = PANEL->algo->align;
+   vptr  = (void *)malloc( ( (size_t)(align) + 
+              (size_t)(((4+((unsigned int)(jb) << 1)) << 1) )) *
+              sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdfact", "Memory allocation failed" ); }
+/*
+ * Factor the panel - Update the panel pointers
+ */
+   PANEL->algo->rffun( PANEL, PANEL->mp, jb, 0, (double *)HPL_PTR( vptr,
+                       ((size_t)(align) * sizeof(double) ) ) );
+   if( vptr ) free( vptr );
+
+   PANEL->A   = Mptr( PANEL->A, 0, jb, PANEL->lda );
+   PANEL->nq -= jb; PANEL->jj += jb;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_RPFACT );
+#endif
+/*
+ * End of HPL_pdfact
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdmxswp.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdmxswp.c
new file mode 100644
index 000000000..b14452197
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdmxswp.c
@@ -0,0 +1,311 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdmxswp
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_pdmxswp
+( PANEL, M, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdmxswp swaps  and  broadcasts  the  absolute value max row using
+ * bi-directional exchange.  The buffer is partially set by HPL_dlocmax.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by
+ *  
+ *    log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth )
+ *  
+ * where  lat and bdwth are the latency and bandwidth of the network for
+ * double precision real elements.  Communication  only  occurs  in  one
+ * process  column. Mono-directional links  will cause the communication
+ * cost to double.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of the matrix
+ *         column on which this function operates.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         It  is assumed that  HPL_dlocmax  was called  prior  to  this
+ *         routine to  initialize  the first four entries of this array.
+ *         On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+ *         Note that this is also the  JJth  row  (or column) of L1. The
+ *         remaining part is used as a temporary array.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax, tmp1;
+   double                     * A0, * Wmx, * Wwork;
+   HPL_T_grid                 * grid;
+   MPI_Comm                   comm;
+   unsigned int               hdim, ip2, ip2_, ipow, k, mask;
+   int                        Np2, cnt_, cnt0, i, icurrow, lda, mydist,
+                              mydis_, myrow, n0, nprow, partner, rcnt,
+                              root, scnt, size_;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_MXSWP );
+#endif
+   grid = PANEL->grid; myrow = grid->myrow; nprow = grid->nprow;
+/*
+ * ip2  : the smallest power of two less than or equal to nprow;
+ * hdim : dimension of the hypercube made of those ip2 processes;
+ * Np2  : logical flag indicating whether or not nprow is a power of 2;
+ */
+   comm    = grid->col_comm; ip2 = (unsigned int)(grid->row_ip2);
+   hdim    = (unsigned int)(grid->row_hdim);     n0  = PANEL->jb;
+   icurrow = PANEL->prow; Np2 = (int)( ( size_ = nprow - ip2 ) != 0 );
+   mydist  = MModSub( myrow, icurrow, nprow );
+/*
+ * Set up pointers in workspace:  WORK and Wwork  point to the beginning
+ * of the buffers of size 4 + 2*N0 to be combined. Wmx points to the row
+ * owning the local (before combine) and global (after combine) absolute
+ * value max. A0 points to the copy of the current row of the matrix.
+ */
+   cnt0  = ( cnt_ = n0 + 4 ) + n0; A0 = ( Wmx = WORK + 4 ) + n0;
+   Wwork = WORK + cnt0;
+/*
+ * Wmx[0:N0-1] := A[ilindx,0:N0-1] where ilindx is  (int)(WORK[1])  (row
+ * with max in current column). If I am the current process row, pack in
+ * addition the current row of A in A0[0:N0-1].  If I do not own any row
+ * of A, then zero out Wmx[0:N0-1].
+ */
+   if( M > 0 )
+   {
+      lda = PANEL->lda;
+      HPL_dcopy( n0, Mptr( PANEL->A, II+(int)(WORK[1]), 0, lda ), lda,
+                 Wmx, 1 );
+      if( myrow == icurrow )
+      { HPL_dcopy( n0, Mptr( PANEL->A, II, 0, lda ), lda, A0, 1 ); }
+   }
+   else { for( i = 0; i < n0; i++ ) Wmx[i] = HPL_rzero; }
+/*
+ * Combine the results (bi-directional exchange):  the process coordina-
+ * tes are relative to icurrow,  this allows to reduce the communication
+ * volume when nprow is not a power of 2.
+ *
+ * When nprow is not a power of 2:  proc[i-ip2] receives local data from
+ * proc[i]  for all i in [ip2..nprow).  In addition,  proc[0]  (icurrow)
+ * sends to proc[ip2] the current row of A  for later broadcast in procs
+ * [ip2..nprow).
+ */
+   if( ( Np2 != 0 ) &&
+       ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) )
+   {
+      if( ( mydist & ip2 ) != 0 )
+      {
+         if( mydist == (int)(ip2) )
+            (void) HPL_sdrv( WORK, cnt_, MSGID_BEGIN_PFACT, A0, n0,
+                             MSGID_BEGIN_PFACT, MModAdd( partner,
+                             icurrow, nprow ), comm );
+         else
+            (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow,
+                             nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+      else
+      {
+         if( mydist == 0 )
+            (void) HPL_sdrv( A0, n0, MSGID_BEGIN_PFACT, Wwork, cnt_,
+                             MSGID_BEGIN_PFACT, MModAdd( partner,
+                             icurrow, nprow ), comm );
+         else
+            (void) HPL_recv( Wwork, cnt_, MModAdd( partner, icurrow,
+                             nprow ), MSGID_BEGIN_PFACT, comm );
+ 
+         tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] );
+         if( ( tmp1 > gmax ) ||
+             ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) )
+         { HPL_dcopy( cnt_, Wwork, 1, WORK, 1 ); }
+      }
+   }
+
+   if( mydist < (int)(ip2) )
+   {
+/*
+ * power of 2 part of the processes collection: processes  [0..ip2)  are
+ * combining (binary exchange); proc[0] has two rows to send, but one to
+ * receive.  At every step  k  in [0..hdim) of the algorithm,  a process 
+ * pair exchanging 2 rows is such that  myrow >> k+1 is 0.  Among  those
+ * processes the ones  that are sending one more row than  what they are
+ * receiving are such that myrow >> k is equal to 0.
+ */
+      k = 0; ipow = 1;
+ 
+      while( k < hdim )
+      {
+         if( ( (unsigned int)(mydist) >> ( k + 1 ) ) == 0 )
+         {
+            if( ( (unsigned int)(mydist) >> k ) == 0 )
+            { scnt = cnt0; rcnt = cnt_; }
+            else
+            { scnt = cnt_; rcnt = cnt0; }
+         }
+         else { scnt = rcnt = cnt_; }
+ 
+         partner = (int)( (unsigned int)(mydist) ^ ipow );
+         (void) HPL_sdrv( WORK, scnt, MSGID_BEGIN_PFACT, Wwork, rcnt,
+                          MSGID_BEGIN_PFACT, MModAdd( partner, icurrow,
+                          nprow ), comm );
+ 
+         tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] );
+         if( ( tmp1 > gmax ) ||
+             ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) )
+         {
+            HPL_dcopy( ( rcnt == cnt0 ? cnt0 : cnt_ ), Wwork, 1,
+                       WORK, 1 );
+         }
+         else if( rcnt == cnt0 )
+         { HPL_dcopy( n0, Wwork+cnt_, 1, A0, 1 ); }
+ 
+         ipow <<= 1; k++;
+      }
+   }
+   else if( size_ > 1 )
+   {
+/*
+ * proc[ip2] broadcast current row of A to procs [ip2+1..nprow).
+ */
+      k = (unsigned int)(size_) - 1; ip2_ = mask = 1;
+      while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+ 
+      root   = MModAdd( icurrow, (int)(ip2), nprow );
+      mydis_ = MModSub( myrow,   root,       nprow );
+ 
+      do
+      {
+         mask ^= ip2_;
+         if( ( mydis_ & mask ) == 0 )
+         {
+            partner = (int)(mydis_ ^ ip2_);
+            if( ( mydis_ & ip2_ ) != 0 )
+            {
+               (void) HPL_recv( A0, n0, MModAdd( root, partner,
+                                nprow ), MSGID_BEGIN_PFACT, comm );
+            }
+            else if( partner < size_ )
+            {
+               (void) HPL_send( A0, n0, MModAdd( root, partner,
+                                nprow ), MSGID_BEGIN_PFACT, comm );
+            }
+         }
+         ip2_ >>= 1;
+      } while( ip2_ > 0 );
+   }
+/*
+ * If nprow is not a power of 2,  for all i in [ip2..nprow), proc[i-ip2]
+ * sends the pivot row to proc[i]  along  with the first four entries of
+ * the WORK array.
+ */
+   if( ( Np2 != 0 ) &&
+       ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) )
+   {
+      if( ( mydist & ip2 ) != 0 )
+      {
+         (void) HPL_recv( WORK, cnt_, MModAdd( partner, icurrow,
+                          nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+      else
+      {
+         (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow,
+                          nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+   }
+/*
+ * Save the global pivot index in pivot array
+ */
+   (PANEL->DPIV)[JJ] = WORK[2];
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_MXSWP );
+#endif
+/*
+ * End of HPL_pdmxswp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpancrN.c
new file mode 100644
index 000000000..4ea170b73
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpancrN.c
@@ -0,0 +1,270 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpancrN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpancrN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpancrN factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel  A using the Crout variant of the  usual
+ * one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+ * of the panel is stored in no-transpose form (i.e. just like the input
+ * matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and gam2-3 is  an  estimate  of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+/*
+ * Compute row (column) jj of L1
+ */
+      if( kk > 0 )
+      {
+         L1ptr = Mptr( L1, jj, jj+1, n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Xv0, ICOFF, jj+1,  kk, Nm1 );
+         Xv1 = vsip_msubview_d( Xv0, jj,    ICOFF, 1,  kk  );
+         Yv1 = vsip_msubview_d( Xv0, jj,    jj+1,  1,  Nm1 );
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Av1, VSIP_MAT_NTRANS,
+                      HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 ); 
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dgemv( HplColumnMajor, HplTrans, kk, Nm1, -HPL_rone,
+                    Mptr( L1, ICOFF, jj+1, n0 ), n0, Mptr( L1, jj,
+                    ICOFF, n0 ), n0, HPL_rone, L1ptr, n0 );
+#endif
+         if( curr != 0 )
+            HPL_dcopy( Nm1, L1ptr, n0, Mptr( A, ii, jj+1, lda ), lda );
+      }
+/*
+ * Scale current column by its absolute value max entry  -  Update  dia-
+ * diagonal and subdiagonal elements in column  A(iip1:iip1+Mm1-1, jj+1)
+ * and  find local  absolute value max in  that column  (Only  one  pass
+ * through cache for each current column).  This sequence of  operations
+ * could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 );
+      Xv1 = vsip_msubview_d( Xv0, ICOFF,          jj+1,            kk+1,   1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,    1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      vsip_mdestroy_d( Yv1 );
+      vsip_mdestroy_d( Xv1 );
+      vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, ICOFF,
+                 jj+1, n0 ), 1, HPL_rone, Mptr( A, iip1, jj+1, lda ),
+                 1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++; kk++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpancrN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpancrT.c
new file mode 100644
index 000000000..50ed300aa
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpancrT.c
@@ -0,0 +1,267 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpancrT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpancrT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpancrT factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel  A using the Crout variant of the  usual
+ * one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+ * of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is an  estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+/*
+ * Compute row (column) jj of L1
+ */
+      if( kk > 0 )
+      {
+         L1ptr = Mptr( L1, jj+1, jj, n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Xv0, jj+1,  ICOFF, Nm1, kk );
+         Xv1 = vsip_msubview_d( Xv0, ICOFF, jj,    kk,   1 );
+         Yv1 = vsip_msubview_d( Xv0, jj+1,  jj,    Nm1,  1 );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dgemv( HplColumnMajor, HplNoTrans, Nm1, kk, -HPL_rone,
+                    Mptr( L1, jj+1, ICOFF, n0 ), n0, Mptr( L1, ICOFF,
+                    jj, n0 ), 1, HPL_rone, L1ptr, 1 );
+#endif
+         if( curr != 0 )
+            HPL_dcopy( Nm1, L1ptr, 1, Mptr( A, ii, jj+1, lda ), lda );
+      }
+/*
+ * Scale current column by its absolute value max entry  -  Update  dia-
+ * diagonal and subdiagonal elements in column  A(iip1:iip1+Mm1-1, jj+1)
+ * and  find local  absolute value max in  that column  (Only  one  pass
+ * through cache for each current column).  This sequence of  operations
+ * could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 );
+      Xv1 = vsip_msubview_d( Xv0, jj+1,           ICOFF,           1,   kk+1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,    1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, jj+1, ICOFF,
+                 n0 ), n0, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++; kk++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpancrT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanllN.c
new file mode 100644
index 000000000..fa471198d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanllN.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanllN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanllN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanllN factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel A  using the Left-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in no-transpose form (i.e. just like the
+ * input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1 = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column and initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+
+      L1ptr = Mptr( L1, ICOFF, jj+1, n0 ); kk = jj + 1 - ICOFF;
+      HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, HplUnit, kk, 
+                 Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr,  1 );
+/*
+ * Scale  current column by its absolute value max entry  -  Update  and 
+ * find local  absolute value max  in next column (Only one pass through 
+ * cache for each next column).  This sequence of operations could bene-
+ * fit from a specialized  blocked implementation.
+ */ 
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk );
+      Xv1 = vsip_msubview_d( Xv0, ICOFF,        jj+1,              kk,   1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,  1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk,  -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, 1,
+                 HPL_rone, Mptr( A, iip1, jj+1, lda ),  1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 )
+      {
+         HPL_dcopy( kk, L1ptr,  1, Mptr( A, ICOFF, jj+1, lda ), 1 );
+         ii = iip1; iip1++; m = Mm1; Mm1--;
+      }
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanllN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanllT.c
new file mode 100644
index 000000000..a6e1b67bd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanllT.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanllT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanllT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanllT factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel A  using the Left-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1 = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column and initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+
+      L1ptr = Mptr( L1, jj+1, ICOFF, n0 ); kk = jj + 1 - ICOFF;
+      HPL_dtrsv( HplColumnMajor, HplUpper, HplTrans,   HplUnit, kk,
+                 Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, n0 );
+/*
+ * Scale  current column by its absolute value max entry  -  Update  and 
+ * find local  absolute value max  in next column (Only one pass through 
+ * cache for each next column).  This sequence of operations could bene-
+ * fit from a specialized  blocked implementation.
+ */ 
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk );
+      Xv1 = vsip_msubview_d( Xv0, jj+1,         ICOFF,             1,   kk );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,  1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk,  -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, n0,
+                 HPL_rone, Mptr( A, iip1, jj+1, lda ),  1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 )
+      {
+         HPL_dcopy( kk, L1ptr, n0, Mptr( A, ICOFF, jj+1, lda ), 1 );
+         ii = iip1; iip1++; m = Mm1; Mm1--;
+      }
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanllT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanrlN.c
new file mode 100644
index 000000000..0a3b9a542
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanrlN.c
@@ -0,0 +1,250 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanrlN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanrlN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanrlN factorizes  a panel of columns  that is a sub-array of a
+ * larger one-dimensional panel A using the Right-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in no-transpose form (i.e. just like the
+ * input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Acur, * Anxt;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Xv1, * Yv0, * Yv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, lda, m=M;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 >= 1 )
+   {
+      Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda );
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+/*
+ * Scale current column by its absolute value max entry  -  Update trai-
+ * ling sub-matrix and find local absolute value max in next column (On-
+ * ly one pass through cache for each current column).  This sequence of
+ * operations could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 );
+      HPL_daxpy( Mm1, -WORK[4+jj+1], Acur, 1, Anxt, 1 );
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+#ifdef HPL_CALL_VSIPL
+      if( Nm1 > 1 )
+      {
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2,
+                                Mm1, Nm1-1 );
+         Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj,
+                                Mm1, 1   );
+         Yv1 = vsip_msubview_d( Yv0, jj, jj+2, 1, Nm1-1 );
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+      }
+#else
+      if( Nm1 > 1 )
+         HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+                   WORK+4+jj+2, 1, Mptr( Anxt, 0, 1, lda ), lda );
+#endif
+/*
+ * Same thing as above but with worse data access on y (A += x * y^T)
+ *
+ *    if( Nm1 > 1 ) )
+ *       HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+ *                 Mptr( L1, jj, jj+2, n0 ), n0, Mptr( Anxt, 0, 1, lda ),
+ *                 lda );
+ */  
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Yv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanrlN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanrlT.c
new file mode 100644
index 000000000..68c1afc02
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanrlT.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanrlT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanrlT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanrlT factorizes  a panel of columns  that is a sub-array of a
+ * larger one-dimensional panel A using the Right-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Acur, * Anxt, * L1;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Xv1, * Yv0, * Yv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, lda, m=M,
+                              n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 >= 1 )
+   {
+      Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda );
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+/*
+ * Scale current column by its absolute value max entry  -  Update trai-
+ * ling sub-matrix and find local absolute value max in next column (On-
+ * ly one pass through cache for each current column).  This sequence of
+ * operations could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 );
+      HPL_daxpy( Mm1, -(*(Mptr( L1, jj+1, jj, n0 ))), Acur, 1, Anxt, 1 );
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+
+      if( Nm1 > 1 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2,
+                                Mm1, Nm1-1 );
+         Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj,
+                                Mm1, 1   );
+         Yv1 = vsip_msubview_d( Yv0, jj+2, jj, Nm1-1, 1 ); 
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_TRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+                   Mptr( L1, jj+2, jj, n0 ), 1, Mptr( Anxt, 0, 1, lda ),
+                   lda );
+#endif
+      }
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Yv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanrlT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpancrN.c
new file mode 100644
index 000000000..348d7ebe6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpancrN.c
@@ -0,0 +1,282 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpancrN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpancrN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpancrN HPL_pdrpancrN recursively  factorizes  a panel of columns  using  the
+ * recursive  Crout  variant of the usual one-dimensional algorithm. The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Local update - Factor current panel - Replicated update and solve
+ */
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jb );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jb );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, jj,
+                 -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr,
+                 0, jj, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ),
+                 lda );
+#endif
+      HPL_pdrpancrN( PANEL, m, jb, ioff, WORK );
+
+      if( n > 0 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+         (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+         Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0 );
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Lv0, ioff,  ICOFF,   jb, jj );
+         Av2 = vsip_msubview_d( Lv0, ioff,  ioff+jb, jb,  n );
+         Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff+jb, jj,  n );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Lv1 );
+         (void) vsip_mdestroy_d( Av2 );
+         (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+         (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+         (void) vsip_mdestroy_d( Lv0 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, jb, n,
+                    jj, -HPL_rone, Mptr( L1ptr, jj, 0, n0 ), n0,
+                    Mptr( L1ptr, 0, jj+jb, n0 ), n0, HPL_rone, 
+                    Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj,
+                    n0 ), n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+      }
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpancrN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpancrT.c
new file mode 100644
index 000000000..a1ecfac2c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpancrT.c
@@ -0,0 +1,282 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpancrT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpancrT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpancrT recursively  factorizes  a panel  of columns using  the
+ * recursive  Crout  variant  of  the  usual one-dimensional  algorithm.
+ * The lower triangular N0-by-N0  upper block of the panel  is stored in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Local update - Factor current panel - Replicated update and solve
+ */
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jb );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1,
+                   VSIP_MAT_TRANS, HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, jj,
+                 -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr,
+                 jj, 0, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ),
+                 lda );
+#endif
+      HPL_pdrpancrT( PANEL, m, jb, ioff, WORK );
+
+      if( n > 0 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+         (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+         Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 );
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Lv0, ioff+jb, ICOFF, n, jj );
+         Av2 = vsip_msubview_d( Lv0, ioff+jb, ioff,  n, jb );
+         Lv1 = vsip_msubview_d( Lv0, ICOFF,   ioff, jj, jb );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1,
+                      VSIP_MAT_NTRANS, HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Lv1 );
+         (void) vsip_mdestroy_d( Av2 );
+         (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+         (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+         (void) vsip_mdestroy_d( Lv0 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, n, jb,
+                    jj, -HPL_rone, Mptr( L1ptr, jj+jb, 0, n0 ), n0,
+                    Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone,
+                    Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj,
+                    n0 ), n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+      }
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpancrT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanllN.c
new file mode 100644
index 000000000..4dbc13b44
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanllN.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanllN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanllN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanllN recursively  factorizes  a panel  of columns using  the
+ * recursive Left-looking variant of the one-dimensional algorithm.  The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Replicated solve - Local update - Factor current panel
+ */
+      HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit,
+                 jj, jb, HPL_rone, L1ptr, n0, Mptr( L1ptr, 0, jj, n0 ),
+                 n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jj );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m,  jj );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb,
+                 jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda,
+                 Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj, lda ), lda );
+#endif
+      HPL_pdrpanllN( PANEL, m, jb, ioff, WORK );
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanllN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanllT.c
new file mode 100644
index 000000000..887caeb87
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanllT.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanllT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanllT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanllT recursively  factorizes  a panel of columns  using  the
+ * recursive Left-looking variant of the one-dimensional algorithm.  The
+ * lower  triangular  N0-by-N0  upper block  of  the panel  is stored in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Replicated solve - Local update - Factor current panel
+ */
+      HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                 HplUnit, jb, jj, HPL_rone, L1ptr, n0, Mptr( L1ptr,
+                 jj, 0, n0 ), n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jj );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jj );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb,  jj );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av1 );
+      (void) vsip_mdestroy_d( Av2 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb,
+                 jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda,
+                 Mptr( L1ptr, jj, 0, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj, lda ), lda );
+#endif
+      HPL_pdrpanllT( PANEL, m, jb, ioff, WORK );
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanllT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanrlN.c
new file mode 100644
index 000000000..22f105cf4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanrlN.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanrlN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanrlN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanrlN recursively  factorizes  a panel of columns  using  the
+ * recursive Right-looking variant of the one-dimensional algorithm. The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+ 
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Factor current panel - Replicated solve - Local update
+ */
+      HPL_pdrpanrlN( PANEL, m, jb, ioff, WORK );
+      HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                 HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, n0 ),
+                 n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+      if( curr != 0 ) { ii += jb; m -= jb; }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb,
+                                m, n );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,    m, jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m,  n );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, n,
+                 jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda,
+                 Mptr( L1ptr, jj, jj+jb, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj+jb, lda ), lda );
+#endif
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanrlN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanrlT.c
new file mode 100644
index 000000000..a77301b9b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanrlT.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanrlT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanrlT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanrlT recursively  factorizes  a panel of columns  using  the
+ * recursive Right-looking variant of the one-dimensional algorithm. The
+ * lower  triangular  N0-by-N0  upper  block of the panel  is stored  in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+ 
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Factor current panel - Replicated solve - Local update
+ */
+      HPL_pdrpanrlT( PANEL, m, jb, ioff, WORK );
+      HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                 HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, n0 ),
+                 n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+      if( curr != 0 ) { ii += jb; m -= jb; }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb,
+                                m, N );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,    m, jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m,  n );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, n,
+                 jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda,
+                 Mptr( L1ptr, jj+jb, jj, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj+jb, lda ), lda );
+#endif
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanrlT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/intel64/Make.inc
new file mode 120000
index 000000000..3ee301793
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/intel64/Makefile
new file mode 100644
index 000000000..bf4634d31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/intel64/Makefile
@@ -0,0 +1,118 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pfact.h
+#
+## Object files ########################################################
+#
+HPL_pfaobj       = \
+   HPL_dlocmax.o          HPL_dlocswpN.o         HPL_dlocswpT.o         \
+   HPL_pdmxswp.o          HPL_pdpancrN.o         HPL_pdpancrT.o         \
+   HPL_pdpanllN.o         HPL_pdpanllT.o         HPL_pdpanrlN.o         \
+   HPL_pdpanrlT.o         HPL_pdrpanllN.o        HPL_pdrpanllT.o        \
+   HPL_pdrpancrN.o        HPL_pdrpancrT.o        HPL_pdrpanrlN.o        \
+   HPL_pdrpanrlT.o        HPL_pdfact.o
+#
+## Targets #############################################################
+#
+all              : lib 
+#
+lib              : lib.grd
+#
+lib.grd          : $(HPL_pfaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pfaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dlocmax.o          : ../HPL_dlocmax.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocmax.c
+HPL_dlocswpN.o         : ../HPL_dlocswpN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocswpN.c
+HPL_dlocswpT.o         : ../HPL_dlocswpT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocswpT.c
+HPL_pdmxswp.o          : ../HPL_pdmxswp.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdmxswp.c
+HPL_pdpancrN.o         : ../HPL_pdpancrN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpancrN.c
+HPL_pdpancrT.o         : ../HPL_pdpancrT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpancrT.c
+HPL_pdpanllN.o         : ../HPL_pdpanllN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanllN.c
+HPL_pdpanllT.o         : ../HPL_pdpanllT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanllT.c
+HPL_pdpanrlN.o         : ../HPL_pdpanrlN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanrlN.c
+HPL_pdpanrlT.o         : ../HPL_pdpanrlT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanrlT.c
+HPL_pdrpanllN.o        : ../HPL_pdrpanllN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanllN.c
+HPL_pdrpanllT.o        : ../HPL_pdrpanllT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanllT.c
+HPL_pdrpancrN.o        : ../HPL_pdrpancrN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpancrN.c
+HPL_pdrpancrT.o        : ../HPL_pdrpancrT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpancrT.c
+HPL_pdrpanrlN.o        : ../HPL_pdrpanrlN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanrlN.c
+HPL_pdrpanrlT.o        : ../HPL_pdrpanrlT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanrlT.c
+HPL_pdfact.o           : ../HPL_pdfact.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdfact.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_equil.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_equil.c
new file mode 100644
index 000000000..b917a6525
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_equil.c
@@ -0,0 +1,253 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_equil
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_TRANS             TRANS,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   int *                            IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1,
+   int *                            IWORK
+)
+#else
+void HPL_equil
+( PBCST, IFLAG, PANEL, TRANS, N, U, LDU, IPLEN, IPMAP, IPMAPM1, IWORK )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_TRANS             TRANS;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   int *                            IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_equil equilibrates  the  local  pieces  of U, so that on exit to
+ * this function, pieces of U contained in every process row are of the
+ * same size. This phase makes the rolling phase optimal.  In addition,
+ * this  function probes  for  the  column panel L and forwards it when
+ * possible.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be equilibrated) information.
+ *
+ * TRANS   (global input)                const enum HPL_TRANS
+ *         On entry, TRANS specifies whether  U  is stored in transposed
+ *         or non-transposed form.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of rows or columns of  U. N
+ *         must be at least 0.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+ *         non-transposed form, and MAX(1,N) otherwise.
+ *
+ * IPLEN   (global input)                int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension NPROW+1.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, ip, ipU, ipcur, iprow, iptgt, lastrow,
+                              left, npm1, nprow, ll, llU, llcur, lltgt,
+                              right, slen, smax, smin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( npm1 = ( nprow = PANEL->grid->nprow ) - 1 ) <= 1 ) return;
+/*
+ * If the current distribution of the pieces of U is already optimal for
+ * the rolling phase, then return imediately.  The  optimal distribution
+ * is such that ip processes have smax items and the remaining processes
+ * only have smin items. Another way to check this is to verify that all
+ * differences IPLEN[i+1] - IPLEN[i] are either smin or smax.
+ */
+   smax = ( ( slen = IPLEN[nprow] ) + npm1 ) / nprow;
+   ip   = slen - nprow * ( smin = slen / nprow );
+
+   iprow = 0;
+   do
+   {
+      ll = IPLEN[iprow+1] - IPLEN[iprow]; iprow++;
+   } while( ( iprow < nprow ) && ( ( ll == smin ) || ( ll == smax ) ) );
+
+   if( iprow == nprow ) return;
+/*
+ * Now,  we are sure  the distribution of the pieces of U is not optimal
+ * with respect to the rolling phase,  thus  perform  equilibration.  Go
+ * through the list of processes:  Processes  that have rows that do not
+ * belong to them  with respect to the optimal mapping spread them  in a
+ * logarithmic fashion. To simplify a little bit the implementation, and
+ * mainly the packing, a source process row spreads its data to its left
+ * first, and then to its right.
+ */
+   IWORK[nprow] = slen;
+
+   for( iprow = 0; iprow < nprow; iprow++ )
+   {
+      llU = IPLEN[iprow+1] - ( ipU = IPLEN[iprow] );
+      if( iprow < ip ) { lltgt = smax; iptgt = iprow * smax;      }
+      else             { lltgt = smin; iptgt = iprow * smin + ip; }
+
+      left = ( ipU < iptgt ); right = ( iptgt + lltgt < ipU + llU );
+/*
+ * If I have something to spread to either the left or the right
+ */
+      if( ( llU > 0 ) && ( left || right ) )
+      {        /* Figure out how much every other process should have */
+
+         ipcur = ipU; llcur = llU;
+
+         for( i = 0; i < nprow; i++ )
+         {
+            if( i < ip ) { lltgt = smax; iptgt = i * smax;      }
+            else         { lltgt = smin; iptgt = i * smin + ip; }
+            lastrow = iptgt + lltgt - 1;
+
+            if( ( lastrow >= ipcur ) && ( llcur > 0 ) )
+            { ll = lastrow - ipcur + 1; ll = Mmin( ll, llcur ); llcur -= ll; }
+            else { ll = 0; }
+
+            IWORK[i] = ipcur; ipcur += ll; IWORK[i+1] = ipcur;
+         }
+/*
+ * Equilibration phase
+ */
+         if( TRANS == HplNoTrans )
+         {
+            if( left  )
+            {
+               HPL_spreadN( PBCST, IFLAG, PANEL, HplLeft,  N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+
+            if( right )
+            {
+               HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+         }
+         else
+         {
+            if( left  )
+            {
+               HPL_spreadT( PBCST, IFLAG, PANEL, HplLeft,  N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+
+            if( right )
+            {
+               HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+         }
+      }
+   }
+/*
+ * Finally update  IPLEN  with the indexes corresponding to the new dis-
+ * tribution of U - IPLEN[nprow] remained unchanged.
+ */
+   for( i = 0; i < nprow; i++ ) IPLEN[i] = ( i < ip ? i*smax : i*smin + ip );
+/*
+ * End of HPL_equil
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_logsort.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_logsort.c
new file mode 100644
index 000000000..0715159bd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_logsort.c
@@ -0,0 +1,185 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_logsort
+(
+   const int                        NPROCS,
+   const int                        ICURROC,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1
+)
+#else
+void HPL_logsort
+( NPROCS, ICURROC, IPLEN, IPMAP, IPMAPM1 )
+   const int                        NPROCS;
+   const int                        ICURROC;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_logsort computes an array  IPMAP  and  its inverse  IPMAPM1  that
+ * contain  the logarithmic sorted processes id with repect to the local
+ * number of rows of  U  that they own. This is necessary to ensure that
+ * the logarithmic spreading of U is optimal in terms of number of steps
+ * and communication volume as well.  In other words,  the larget pieces
+ * of U will be sent a minimal number of times.
+ *
+ * Arguments
+ * =========
+ *
+ * NPROCS  (global input)                const int
+ *         On entry, NPROCS  specifies the number of process rows in the
+ *         process grid. NPROCS is at least one.
+ *
+ * ICURROC (global input)                const int
+ *         On entry, ICURROC is the source process row.
+ *
+ * IPLEN   (global input/output)         int *
+ *         On entry, IPLEN is an array of dimension NPROCS+1,  such that
+ *         IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+ *         that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+ *         rows of U  in the processes before process IPMAP[i] after the
+ *         sort,  with  the convention that  IPLEN[NPROCS] is  the total
+ *         number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+ *         IPLEN[i] is  the  number of rows of A that should be moved to
+ *         the process IPMAP[i].  IPLEN  is such that the number of rows
+ *         of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+ *         remaining  entries  of  this  array  are  sorted  so that the
+ *         quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myroc] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROCS)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dist, i, ip, iplen_i, iplen_j, itmp, j, k;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Compute the  logarithmic distance between process j and process 0, as
+ * well as the maximum logarithmic distance. IPMAPM1 is workarray here.
+ */
+   for( j = 0, dist = 0; j < NPROCS; j++ )
+   {
+      IPMAP[j] = MModAdd( j, ICURROC, NPROCS ); ip = j; itmp = 0;
+      do { if( ip & 1 ) itmp++; ip >>= 1; } while ( ip );
+      IPMAPM1[j] = itmp; if( itmp > dist ) dist = itmp;
+   }
+/*
+ * Shift IPLEN[1..NPROCS]  of ICURROC places,  so that  IPLEN[1]  is now
+ * what used to be IPLEN[ICURROC+1]. Initialize IPMAP,  so that IPMAP[0]
+ * is ICURROC.
+ */
+   for( j = 0; j < ICURROC; j++ )
+   {
+      for( i = 2, itmp = IPLEN[1]; i <= NPROCS; i++ ) IPLEN[i-1] = IPLEN[i];
+      IPLEN[NPROCS] = itmp;
+   }
+/*
+ * logarithmic sort
+ */
+   for( k = 1; k <= dist; k++ )
+   {
+      for( j = 1; j < NPROCS; j++ )
+      {
+         if( IPMAPM1[j] == k )
+         {
+            for( i = 2; i < NPROCS; i++ )
+            {
+               if( k < IPMAPM1[i] )
+               {
+                  iplen_i = IPLEN[i+1]; iplen_j = IPLEN[j+1];
+
+                  if( iplen_j < iplen_i )
+                  {
+                     IPLEN[j+1] = iplen_i;  IPLEN[i+1] = iplen_j;
+                     itmp       = IPMAP[j]; IPMAP[j]   = IPMAP[i];
+                     IPMAP[i]   = itmp;
+                  }
+               }
+            }
+         }
+      }
+   }
+/*
+ * Compute IPLEN and IPMAPM1 (the inverse of IPMAP)
+ */
+   IPLEN[0] = 0;
+
+   for( i = 0; i < NPROCS; i++ )
+   {
+      IPMAPM1[ IPMAP[i] ] = i;
+      IPLEN[i+1]         += IPLEN[i];
+   }
+/*
+ * End of HPL_logsort
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesv.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesv.c
new file mode 100644
index 000000000..ced74269e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesv.c
@@ -0,0 +1,116 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesv
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesv
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesv factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with  or  without look-ahead.  The  lower  triangular  factor is left
+ * unpivoted and the pivots are not returned. The right hand side is the
+ * N+1 column of the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( A->n <= 0 ) return;
+
+   A->info = 0;
+
+   if( ( ALGO->depth == 0 ) || ( GRID->npcol == 1 ) )
+   {
+      HPL_pdgesv0(  GRID, ALGO, A );
+   }
+   else
+   {
+      HPL_pdgesvK2( GRID, ALGO, A );
+   }
+/*
+ * Solve upper triangular system
+ */
+   if( A->info == 0 ) HPL_pdtrsv( GRID, A );
+/*
+ * End of HPL_pdgesv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesv0.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesv0.c
new file mode 100644
index 000000000..d79b6fa55
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesv0.c
@@ -0,0 +1,167 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesv0
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesv0
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesv0 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * without look-ahead. The lower triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate;
+   int                        N, j, jb, n, nb, tag=MSGID_BEGIN_FACT,
+                              test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( N = A->n ) <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+ 
+   HPL_pdupdate = ALGO->upfun; nb = A->nb;
+/*
+ * Allocate a panel list of length 1 - Allocate panel[0] resources
+ */
+   panel = (HPL_T_panel **)malloc( sizeof( HPL_T_panel * ) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesv0", "Memory allocation failed" ); }
+
+   HPL_pdpanel_new( GRID, ALGO, N, N+1, Mmin( N, nb ), A, 0, 0, tag,
+                    &panel[0] );
+/*
+ * Loop over the columns of A
+ */
+   for( j = 0; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && GRID->mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Release panel resources - re-initialize panel data structure
+ */
+      (void) HPL_pdpanel_free( panel[0] );
+      HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[0] );
+/*
+ * Factor and broadcast current panel - update
+ */
+      HPL_pdfact(               panel[0] );
+      (void) HPL_binit(         panel[0] );
+      do
+      { (void) HPL_bcast(       panel[0], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(         panel[0] );
+      HPL_pdupdate( NULL, NULL, panel[0], -1 );
+/*
+ * Update message id for next factorization
+ */
+      tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Release panel resources and panel list
+ */
+   (void) HPL_pdpanel_disp( &panel[0] );
+
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesv0
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesvK1.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesvK1.c
new file mode 100644
index 000000000..ff1958cfc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesvK1.c
@@ -0,0 +1,222 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+#ifdef STDC_HEADERS
+void HPL_pdgesvK1
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesvK1
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesvK1 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with look-ahead.  The  lower  triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate;
+   int                        N, depth, icurcol=0, j, jb, jj=0, jstart,
+                              k, mycol, n, nb, nn, npcol, nq,
+                              tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   mycol = GRID->mycol; npcol        = GRID->npcol;
+   depth = ALGO->depth; HPL_pdupdate = ALGO->upfun;
+   N     = A->n;        nb           = A->nb; 
+
+   if( N <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+
+/*
+ * Allocate a panel list of length depth + 1 (depth >= 1)
+ */
+   panel = (HPL_T_panel **)malloc( (size_t)(depth+1)*sizeof( HPL_T_panel *) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesvK1", "Memory allocation failed" ); }
+/*
+ * Create and initialize the first depth panels
+ */
+   nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0;
+
+   for( k = 0; k < depth; k++ )
+   {
+      jb = Mmin( nn, nb );
+      HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart,
+                       tag, &panel[k] );
+      nn -= jb; jstart += jb;
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Initialize the lookahead - Factor jstart columns: panel[0..depth-1]
+ */
+   for( k = 0, j = 0; k < depth; k++ )
+   {
+      jb = jstart - j; jb = Mmin( jb, nb ); j += jb;
+/*
+ * Factor and broadcast k-th panel - use long topology for those
+ */
+      HPL_pdfact(         panel[k] );
+      (void) HPL_binit(   panel[k] );
+      do
+      { (void) HPL_bcast( panel[k], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(   panel[k] );
+/*
+ * Partial update of the depth-1-k panels in front of me
+ */
+      if( k < depth - 1 )
+      {
+         nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol );
+         HPL_pdupdate( NULL, NULL, panel[k], nn );
+      }
+   }
+/*
+ * Main loop over the remaining columns of A
+ */
+   for( j = jstart; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Allocate current panel resources - Finish latest update - Factor and
+ * broadcast current panel
+ */
+      HPL_pdpanel_new( GRID, ALGO, n, n+1, jb, A, j, j, tag, &panel[depth] );
+ 
+      if( mycol == icurcol )
+      {
+         nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol );
+         for( k = 0; k < depth; k++ )   /* partial updates 0..depth-1 */
+            HPL_pdupdate( NULL, NULL, panel[k], nn );
+         HPL_pdfact(    panel[depth] );       /* factor current panel */
+      }
+      else { nn = 0; }
+          /* Finish the latest update and broadcast the current panel */
+      (void) HPL_binit( panel[depth] );
+      HPL_pdupdate(     panel[depth], &test, panel[0], nq-nn );
+      (void) HPL_bwait( panel[depth] );
+/*
+ * Release latest panel resources - circular  of the panel pointers
+ * Go to the next process row and column -  update  the message ids  for
+ * broadcast
+ */
+      (void) HPL_pdpanel_disp( &panel[0] );
+      for( k = 0; k < depth; k++ ) panel[k] = panel[k+1];
+ 
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Clean-up: Finish updates - release panels and panel list
+ */
+   nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol );
+   for( k = 0; k < depth; k++ )
+   {
+      HPL_pdupdate( NULL, NULL, panel[k], nn );
+      (void) HPL_pdpanel_disp( &panel[k] );
+   }
+ 
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesvK1
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesvK2.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesvK2.c
new file mode 100644
index 000000000..dec506ab9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesvK2.c
@@ -0,0 +1,231 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesvK2
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesvK2
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesvK2 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with look-ahead.  The  lower  triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * p, * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate; 
+   int                        N, depth, icurcol=0, j, jb, jj=0, jstart,
+                              k, mycol, n, nb, nn, npcol, nq,
+                              tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   mycol = GRID->mycol; npcol        = GRID->npcol;
+   depth = ALGO->depth; HPL_pdupdate = ALGO->upfun;
+   N     = A->n;        nb           = A->nb;
+
+   if( N <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+
+/*
+ * Allocate a panel list of length depth + 1 (depth >= 1)
+ */
+   panel = (HPL_T_panel **)malloc( (size_t)(depth+1) * sizeof( HPL_T_panel *) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesvK2", "Memory allocation failed" ); }
+/*
+ * Create and initialize the first depth panels
+ */
+   nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0;
+
+   for( k = 0; k < depth; k++ )
+   {
+      jb = Mmin( nn, nb );
+      HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart,
+                       tag, &panel[k] );
+      nn -= jb; jstart += jb;
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Create last depth+1 panel
+ */
+   HPL_pdpanel_new( GRID, ALGO, nn, nn+1, Mmin( nn, nb ), A, jstart,
+                    jstart, tag, &panel[depth] );
+   tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+/*
+ * Initialize the lookahead - Factor jstart columns: panel[0..depth-1]
+ */
+   for( k = 0, j = 0; k < depth; k++ )
+   {
+      jb = jstart - j; jb = Mmin( jb, nb ); j += jb;
+/*
+ * Factor and broadcast k-th panel
+ */
+      HPL_pdfact(         panel[k] );
+      (void) HPL_binit(   panel[k] );
+      do
+      { (void) HPL_bcast( panel[k], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(   panel[k] );
+/*
+ * Partial update of the depth-k-1 panels in front of me
+ */
+      if( k < depth - 1 )
+      {
+         nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol );
+         HPL_pdupdate( NULL, NULL, panel[k], nn );
+      }
+   }
+/*
+ * Main loop over the remaining columns of A
+ */
+   for( j = jstart; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Initialize current panel - Finish latest update, Factor and broadcast
+ * current panel
+ */
+      (void) HPL_pdpanel_free( panel[depth] );
+      HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[depth] );
+
+      if( mycol == icurcol )
+      {
+         nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol );
+         for( k = 0; k < depth; k++ )   /* partial updates 0..depth-1 */
+            (void) HPL_pdupdate( NULL, NULL, panel[k], nn );
+         HPL_pdfact(       panel[depth] );    /* factor current panel */
+      }
+      else { nn = 0; }
+          /* Finish the latest update and broadcast the current panel */
+      (void) HPL_binit( panel[depth] );
+      HPL_pdupdate( panel[depth], &test, panel[0], nq-nn );
+      (void) HPL_bwait( panel[depth] );
+/*
+ * Circular  of the panel pointers:
+ * xtmp = x[0]; for( k=0; k < depth; k++ ) x[k] = x[k+1]; x[d] = xtmp;
+ *
+ * Go to next process row and column - update the message ids for broadcast
+ */
+      p = panel[0]; for( k = 0; k < depth; k++ ) panel[k] = panel[k+1];
+      panel[depth] = p;
+
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Clean-up: Finish updates - release panels and panel list
+ */
+   nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol );
+   for( k = 0; k < depth; k++ )
+   {
+      (void) HPL_pdupdate( NULL, NULL, panel[k], nn );
+      (void) HPL_pdpanel_disp(  &panel[k] );
+   }
+   (void) HPL_pdpanel_disp( &panel[depth] );
+
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesvK2
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c
new file mode 100644
index 000000000..b4433e1be
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c
@@ -0,0 +1,432 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp00N
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp00N
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp00N applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * Bi-directional  exchange  is used to perform the  swap :: broadcast of
+ * the row  panel U at once, resulting in a lower number of messages than
+ * usual as well as a lower communication volume. With P process rows and
+ * assuming  bi-directional links,  the running time of this function can
+ * be approximated by:
+ *  
+ *    log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  Mono
+ * directional links will double this communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be broadcast and swapped) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                  comm;
+   HPL_T_grid                * grid;
+   double                    * A, * U, * W;
+   void                       * vptr = NULL;
+   int                       * ipID, * lindxA, * lindxAU, * llen,
+                             * llen_sv;
+   unsigned int              ip2, ip2_=1, ipdist, ipow=1, mask=1,
+                             mydist, mydis_;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, Np2, align,
+                             hdim, i, icurrow, *iflag, ipA, ipW, *ipl,
+                             iprow, jb, k, lda, ldW, myrow, n, nprow,
+                             partner, root, size_, usize;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+   n = Mmin( NN, PANEL->n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   grid  = PANEL->grid;    nprow   = grid->nprow; myrow = grid->myrow;
+   comm  = grid->col_comm; ip2     = (unsigned int)grid->row_ip2;
+   hdim  = grid->row_hdim; align   = PANEL->algo->align;
+   A     = PANEL->A;       U       = PANEL->U;    iflag = PANEL->IWORK;
+   lda   = PANEL->lda;     icurrow = PANEL->prow; usize = jb * n;
+   ldW   = n + 1;
+/*
+ * Allocate space for temporary W (ldW * jb)
+ */
+   vptr = (void*)malloc( 
+      ((size_t)(align) + ((size_t)(jb) * (size_t)(ldW))) * sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdlaswp00N", "Memory allocation failed" ); }
+
+   W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) );
+/*
+ * Construct ipID and its local counter parts lindxA, lindxAU -  llen is
+ * the number of rows/columns that I have in workspace and that I should
+ * send.  Compute  lindx_, ipA, llen if it has not already been done for
+ * this panel;
+ */
+   k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1;
+   lindxA  = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k;
+   llen    = lindxAU + k; llen_sv = llen + nprow;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+   else if( *iflag == 1 ) /* HPL_pdlaswp01N called before: reuse ipID */
+   {
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+/*
+ * Copy the llen_sv into llen - Reset ipA to its correct value
+ */
+   ipA = llen_sv[myrow];
+   for( i = 0; i < nprow; i++ ) { llen[i]  = llen_sv[i]; }
+/*
+ * For i in [0..2*jb),  lindxA[i] is the offset in A of a row that ulti-
+ * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ).  In icurrow,
+ * we directly pack into U, otherwise we pack into workspace. The  first
+ * entry of each column packed in workspace is in fact the row or column
+ * offset in U where it should go to.
+ */
+   if( myrow == icurrow ) 
+   {
+      HPL_dlaswp01N( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+   else
+   {
+      HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU );
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * Algorithm for bi-directional data exchange:
+ *
+ * As long as I have not talked to a process that  already  had the data
+ * from icurrow,  I will be sending the workspace,  otherwise  I will be
+ * sending U. Note that the columns in workspace contain the local index
+ * in U they should go to.
+ *
+ * If I am receiving from a process that  has the data from  icurrow,  I
+ * will be receiving in  U, copy the data of  U  that stays into  A, and
+ * then the columns I have in workspace into U; otherwise  I will be re-
+ * ceiving in the remaining workspace.  If I am one  of  those processes 
+ * that already has the data from icurrow, I will be immediately copying
+ * the data I have in my workspace into U.
+ *
+ * When I receive U, some of U should be copied in my piece of A  before
+ * I can copy the rows I have in my workspace into  U.  This information
+ * is kept in the lists  lindx_:  the row lindxAU[i] should be copied in
+ * the row  lindxA[i] of my piece of  A, just as in the reversed initial
+ * packing operation. Those rows are thus the first ones in the work ar-
+ * ray.  After  this  operation  has  been  performed,  I will not  need
+ * those lindx arrays,  and  I  will  always be sending a buffer of size
+ * jb x n, or n x jb, that is, U.
+ *
+ * At  every  step  of  the algorithm, it is necesary to update the list 
+ * llen,  so that I can figure out how large the next messages I will be
+ * sending/receiving are.  It is  obvious when I am sending U. It is not
+ * otherwise.
+ *
+ * We  choose  icurrow  to be the source of the bi-directional exchange.
+ * This allows the processes in the non-power 2 part to receive U at the
+ * first exchange,  and  then  broadcast internally this U so that those 
+ * processes can grab their piece of A.
+ */
+   if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; }
+   ipW    = ipA;
+   Np2    = ( ( size_ = nprow - ip2 ) != 0 );
+   mydist = (unsigned int)MModSub( myrow, icurrow, nprow );
+/*
+ * bi-directional exchange:   If nprow is not a power of 2,  proc[i-ip2]
+ * receives local data from proc[i] for all i in  [ip2..nprow);  icurrow
+ * is the source, these last process indexes are relative to icurrow.
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+
+      if( mydist == 0 )  /* I am the current row: I send U and recv W */
+      {
+         (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW,
+                          Cmsgid, partner, comm );
+         if( llen[partner] > 0 )
+            HPL_dlaswp03N( llen[partner], n, U, LDU, W, W+1, ldW );
+      }
+      else if( mydist == ip2 )
+      {                      /* I recv U for later Bcast, I send my W */
+         (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                          Cmsgid, partner, comm );
+      }
+      else               /* None of us is icurrow, we exchange our Ws */
+      {
+         if( ( mydist & ip2 ) != 0 ) 
+         {
+            (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm );
+         }
+         else
+         {
+            (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW,
+                             partner, Cmsgid, comm );
+            if( llen[partner] > 0 ) ipW += llen[partner];
+         }
+      }
+   }
+/*
+ * Update llen
+ */
+   for( i = 1; i < size_; i++ )
+   {
+      iprow   = MModAdd( icurrow, i,          nprow );
+      partner = MModAdd( iprow,   (int)(ip2), nprow );
+      llen[ iprow ] += llen[ partner ];
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * power of 2 part of the processes collection:  only processes [0..ip2)
+ * are working;  some of them  (mydist >> (k+1) == 0) either send or re-
+ * ceive U.  At every step k, k is in [0 .. hdim),  of the algorithm,  a
+ * process pair that exchanges  U  is such that  (mydist >> (k+1) == 0).
+ * Among  those  processes,  the  ones  that are sending U are such that 
+ * mydist >> k == 0.
+ */
+   if( mydist < ip2 )
+   {
+      k = 0;
+
+      while( k < hdim )
+      {
+         partner = (int)(mydist ^ ipow);
+         partner = MModAdd( icurrow, partner, nprow );
+/*
+ * Exchange and combine the local results - If I receive U,  then I must
+ * copy from U the rows that belong to my piece of A, and then update  U
+ * by  copying in it the rows I have accumulated in W.  Otherwise, I re-
+ * ceive W.  In this later case, and I have U, I shall update my copy of
+ * U by copying in it the rows I have accumulated in  W.  If  I  did not
+ * have U before, I simply need to update my pointer in W for later use.
+ */
+         if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 )
+         {
+            if( ( mydist >> (unsigned int)(k) ) == 0 )
+            {
+               (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW,
+                                ldW ), llen[partner]*ldW, Cmsgid,
+                                partner, comm );
+               HPL_dlaswp03N( llen[partner], n, U, LDU, Mptr( W, 0, ipW,
+                              ldW ), Mptr( W, 1, ipW, ldW ), ldW );
+               ipW += llen[partner];
+            }
+            else
+            {
+               (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                                Cmsgid, partner, comm );
+               HPL_dlaswp04N( ipA, llen[myrow], n, U, LDU, A, lda, W,
+                              W+1, ldW, lindxA, lindxAU );
+            }
+         }
+         else
+         {
+            (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0,
+                             ipW, ldW ), llen[partner]*ldW, Cmsgid,
+                             partner, comm );
+            ipW += llen[partner];
+         }
+/*
+ * Update llen - Go to next process pairs
+ */
+         iprow = icurrow; ipdist = 0;
+         do
+         {
+            if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist )
+            {
+               partner = MModAdd( icurrow, partner, nprow );
+               llen[iprow]  += llen[partner];
+               llen[partner] = llen[iprow];
+            }
+            iprow = MModAdd( iprow, 1, nprow ); ipdist++;
+
+         } while( ipdist < ip2 );
+
+         ipow <<= 1; k++;
+/*
+ * Probe for column panel - forward it when available 
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+      }
+   }
+   else
+   {
+/*
+ * non power of 2 part of the process collection:  proc[ip2] broadcast U
+ * to procs[ip2..nprow) (relatively to icurrow).
+ */
+      if( size_ > 1 )
+      {
+         k = size_ - 1;
+         while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+         root   = MModAdd( icurrow, (int)(ip2), nprow );
+         mydis_ = (unsigned int)MModSub( myrow,  root, nprow );
+
+         do
+         {
+            mask ^= ip2_;
+            if( ( mydis_ & mask ) == 0 )
+            {
+               partner = (int)(mydis_ ^ ip2_);
+               if( ( mydis_ & ip2_ ) != 0 )
+               {
+                  (void) HPL_recv( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+
+               }
+               else if( partner < size_ )
+               {
+                  (void) HPL_send( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+               }
+            }
+            ip2_ >>= 1;
+/*
+ * Probe for column panel - forward it when available 
+ */
+            if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+         } while( ip2_ > 0 );
+      }
+/*
+ * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece
+ * of A.
+ */
+      HPL_dlaswp05N( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+/*
+ * If  nprow  is not a power of 2,  proc[i-ip2]  sends  global result to
+ * proc[i] for all i in [ip2..nprow);
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+      if( ( mydist & ip2 ) != 0 )
+      { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); }
+      else
+      { (void) HPL_send( U, usize, partner, Cmsgid, comm ); }
+   }
+
+   if( vptr ) free( vptr );
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp00N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c
new file mode 100644
index 000000000..7a9764c09
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c
@@ -0,0 +1,433 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp00T
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp00T
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp00T applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * Bi-directional  exchange  is used to perform the  swap :: broadcast of
+ * the row  panel U at once, resulting in a lower number of messages than
+ * usual as well as a lower communication volume. With P process rows and
+ * assuming  bi-directional links,  the running time of this function can
+ * be approximated by:
+ *  
+ *    log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  Mono
+ * directional links will double this communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be broadcast and swapped) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                  comm;
+   HPL_T_grid                * grid;
+   double                    * A, * U, * W;
+   void                       * vptr = NULL;
+   int                       * ipID, * lindxA, * lindxAU, * llen,
+                             * llen_sv;
+   unsigned int              ip2, ip2_=1, ipdist, ipow=1, mask=1,
+                             mydist, mydis_;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, Np2, align,
+                             hdim, i, icurrow, *iflag, ipA, ipW, *ipl,
+                             iprow, jb, k, lda, ldW, myrow, n, nprow,
+                             partner, root, size_, usize;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+   n = Mmin( NN, PANEL->n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   grid  = PANEL->grid;    nprow   = grid->nprow; myrow = grid->myrow;
+   comm  = grid->col_comm; ip2     = (unsigned int)grid->row_ip2;
+   hdim  = grid->row_hdim; align   = PANEL->algo->align;
+   A     = PANEL->A;       U       = PANEL->U;    iflag = PANEL->IWORK;
+   lda   = PANEL->lda;     icurrow = PANEL->prow; usize = jb * n;
+   ldW   = n + 1;
+/*
+ * Allocate space for temporary W (ldW * jb)
+ */
+   vptr = (void*)malloc( ( (size_t)(align) + 
+                           ((size_t)(jb) * (size_t)(ldW))) * 
+                           sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdlaswp00T", "Memory allocation failed" ); }
+
+   W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) );
+/*
+ * Construct ipID and its local counter parts lindxA, lindxAU -  llen is
+ * the number of rows/columns that I have in workspace and that I should
+ * send.  Compute  lindx_, ipA, llen if it has not already been done for
+ * this panel;
+ */
+   k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1;
+   lindxA  = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k;
+   llen    = lindxAU + k; llen_sv = llen + nprow;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+   else if( *iflag == 1 ) /* HPL_pdlaswp01T called before: reuse ipID */
+   {
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+/*
+ * Copy the llen_sv into llen - Reset ipA to its correct value
+ */
+   ipA = llen_sv[myrow];
+   for( i = 0; i < nprow; i++ ) { llen[i]  = llen_sv[i]; }
+/*
+ * For i in [0..2*jb),  lindxA[i] is the offset in A of a row that ulti-
+ * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ).  In icurrow,
+ * we directly pack into U, otherwise we pack into workspace. The  first
+ * entry of each column packed in workspace is in fact the row or column
+ * offset in U where it should go to.
+ */
+   if( myrow == icurrow ) 
+   {
+      HPL_dlaswp01T( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+   else
+   {
+      HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU );
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * Algorithm for bi-directional data exchange:
+ *
+ * As long as I have not talked to a process that  already  had the data
+ * from icurrow,  I will be sending the workspace,  otherwise  I will be
+ * sending U. Note that the columns in workspace contain the local index
+ * in U they should go to.
+ *
+ * If I am receiving from a process that  has the data from  icurrow,  I
+ * will be receiving in  U, copy the data of  U  that stays into  A, and
+ * then the columns I have in workspace into U; otherwise  I will be re-
+ * ceiving in the remaining workspace.  If I am one  of  those processes 
+ * that already has the data from icurrow, I will be immediately copying
+ * the data I have in my workspace into U.
+ *
+ * When I receive U, some of U should be copied in my piece of A  before
+ * I can copy the rows I have in my workspace into  U.  This information
+ * is kept in the lists  lindx_:  the row lindxAU[i] should be copied in
+ * the row  lindxA[i] of my piece of  A, just as in the reversed initial
+ * packing operation. Those rows are thus the first ones in the work ar-
+ * ray.  After  this  operation  has  been  performed,  I will not  need
+ * those lindx arrays,  and  I  will  always be sending a buffer of size
+ * jb x n, or n x jb, that is, U.
+ *
+ * At  every  step  of  the algorithm, it is necesary to update the list 
+ * llen,  so that I can figure out how large the next messages I will be
+ * sending/receiving are.  It is  obvious when I am sending U. It is not
+ * otherwise.
+ *
+ * We  choose  icurrow  to be the source of the bi-directional exchange.
+ * This allows the processes in the non-power 2 part to receive U at the
+ * first exchange,  and  then  broadcast internally this U so that those 
+ * processes can grab their piece of A.
+ */
+   if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; }
+   ipW    = ipA;
+   Np2    = ( ( size_ = nprow - ip2 ) != 0 );
+   mydist = (unsigned int)MModSub( myrow, icurrow, nprow );
+/*
+ * bi-directional exchange:   If nprow is not a power of 2,  proc[i-ip2]
+ * receives local data from proc[i] for all i in  [ip2..nprow);  icurrow
+ * is the source, these last process indexes are relative to icurrow.
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+
+      if( mydist == 0 )  /* I am the current row: I send U and recv W */
+      {
+         (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW,
+                          Cmsgid, partner, comm );
+         if( llen[partner] > 0 )
+            HPL_dlaswp03T( llen[partner], n, U, LDU, W, W+1, ldW );
+      }
+      else if( mydist == ip2 )
+      {                      /* I recv U for later Bcast, I send my W */
+         (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                          Cmsgid, partner, comm );
+      }
+      else               /* None of us is icurrow, we exchange our Ws */
+      {
+         if( ( mydist & ip2 ) != 0 ) 
+         {
+            (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm );
+         }
+         else
+         {
+            (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW,
+                             partner, Cmsgid, comm );
+            if( llen[partner] > 0 ) ipW += llen[partner];
+         }
+      }
+   }
+/*
+ * Update llen
+ */
+   for( i = 1; i < size_; i++ )
+   {
+      iprow   = MModAdd( icurrow, i,          nprow );
+      partner = MModAdd( iprow,   (int)(ip2), nprow );
+      llen[ iprow ] += llen[ partner ];
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * power of 2 part of the processes collection:  only processes [0..ip2)
+ * are working;  some of them  (mydist >> (k+1) == 0) either send or re-
+ * ceive U.  At every step k, k is in [0 .. hdim),  of the algorithm,  a
+ * process pair that exchanges  U  is such that  (mydist >> (k+1) == 0).
+ * Among  those  processes,  the  ones  that are sending U are such that 
+ * mydist >> k == 0.
+ */
+   if( mydist < ip2 )
+   {
+      k = 0;
+
+      while( k < hdim )
+      {
+         partner = (int)(mydist ^ ipow);
+         partner = MModAdd( icurrow, partner, nprow );
+/*
+ * Exchange and combine the local results - If I receive U,  then I must
+ * copy from U the rows that belong to my piece of A, and then update  U
+ * by  copying in it the rows I have accumulated in W.  Otherwise, I re-
+ * ceive W.  In this later case, and I have U, I shall update my copy of
+ * U by copying in it the rows I have accumulated in  W.  If  I  did not
+ * have U before, I simply need to update my pointer in W for later use.
+ */
+         if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 )
+         {
+            if( ( mydist >> (unsigned int)(k) ) == 0 )
+            {
+               (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW,
+                                ldW ), llen[partner]*ldW, Cmsgid,
+                                partner, comm );
+               HPL_dlaswp03T( llen[partner], n, U, LDU, Mptr( W, 0, ipW,
+                              ldW ), Mptr( W, 1, ipW, ldW ), ldW );
+               ipW += llen[partner];
+            }
+            else
+            {
+               (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                                Cmsgid, partner, comm );
+               HPL_dlaswp04T( ipA, llen[myrow], n, U, LDU, A, lda, W,
+                              W+1, ldW, lindxA, lindxAU );
+            }
+         }
+         else
+         {
+            (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0,
+                             ipW, ldW ), llen[partner]*ldW, Cmsgid,
+                             partner, comm );
+            ipW += llen[partner];
+         }
+/*
+ * Update llen - Go to next process pairs
+ */
+         iprow = icurrow; ipdist = 0;
+         do
+         {
+            if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist )
+            {
+               partner = MModAdd( icurrow, partner, nprow );
+               llen[iprow]  += llen[partner];
+               llen[partner] = llen[iprow];
+            }
+            iprow = MModAdd( iprow, 1, nprow ); ipdist++;
+
+         } while( ipdist < ip2 );
+
+         ipow <<= 1; k++;
+/*
+ * Probe for column panel - forward it when available 
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+      }
+   }
+   else
+   {
+/*
+ * non power of 2 part of the process collection:  proc[ip2] broadcast U
+ * to procs[ip2..nprow) (relatively to icurrow).
+ */
+      if( size_ > 1 )
+      {
+         k = size_ - 1;
+         while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+         root   = MModAdd( icurrow, (int)(ip2), nprow );
+         mydis_ = (unsigned int)MModSub( myrow,  root, nprow );
+
+         do
+         {
+            mask ^= ip2_;
+            if( ( mydis_ & mask ) == 0 )
+            {
+               partner = (int)(mydis_ ^ ip2_);
+               if( ( mydis_ & ip2_ ) != 0 )
+               {
+                  (void) HPL_recv( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+
+               }
+               else if( partner < size_ )
+               {
+                  (void) HPL_send( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+               }
+            }
+            ip2_ >>= 1;
+/*
+ * Probe for column panel - forward it when available 
+ */
+            if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+         } while( ip2_ > 0 );
+      }
+/*
+ * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece
+ * of A.
+ */
+      HPL_dlaswp05T( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+/*
+ * If  nprow  is not a power of 2,  proc[i-ip2]  sends  global result to
+ * proc[i] for all i in [ip2..nprow);
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+      if( ( mydist & ip2 ) != 0 )
+      { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); }
+      else
+      { (void) HPL_send( U, usize, partner, Cmsgid, comm ); }
+   }
+
+   if( vptr ) free( vptr );
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp00T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c
new file mode 100644
index 000000000..31f219840
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c
@@ -0,0 +1,217 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp01N
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp01N
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp01N applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+ * row panel U at once,  resulting in a minimal communication volume  and
+ * a "very good"  use of the connectivity if available.  With  P  process
+ * rows  and  assuming  bi-directional links,  the  running time  of this
+ * function can be approximated by:
+ *  
+ *    (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  K is
+ * a constant in (2,3] that depends on the achieved bandwidth  during  a
+ * simultaneous  message exchange  between two processes.  An  empirical
+ * optimistic value of K is typically 2.4.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * A, * U;
+   int                       * ipID, * iplen, * ipmap, * ipmapm1,
+                             * iwork, * lindxA = NULL, * lindxAU,
+                             * permU;
+   static int                equil=-1;
+   int                       icurrow, * iflag, * ipA, * ipl, jb, k,
+                             lda, myrow, n, nprow;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+   n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Decide whether equilibration should be performed or not
+ */
+   if( equil == -1 ) equil = PANEL->algo->equil;
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow;
+   A     = PANEL->A;   U       = PANEL->U;     iflag  = PANEL->IWORK;
+   lda   = PANEL->lda; icurrow = PANEL->prow;
+/*
+ * Compute ipID (if not already done for this panel). lindxA and lindxAU
+ * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1
+ * are of size nprow,  permU is of length jb, and  this function needs a 
+ * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): 
+ * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1)
+ * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1);
+ */
+   k = (int)((unsigned int)(jb) << 1);  ipl = iflag + 1; ipID = ipl + 1;
+   ipA     = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1;
+   lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1;
+   ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( *iflag == 0 ) /* HPL_pdlaswp00N called before: reuse ipID */
+   {
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( ( *iflag == 1 ) && ( equil != 0 ) )
+   {   /* HPL_pdlaswp01N was call before only re-compute IPLEN, IPMAP */
+      HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 );
+      *iflag = 1;
+   }
+/*
+ * Copy into U the rows to be spread (local to icurrow)
+ */
+   if( myrow == icurrow )
+   { HPL_dlaswp01N( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); }
+/*
+ * Spread U - optionally probe for column panel
+ */
+   HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen,
+                ipmap, ipmapm1 );
+/*
+ * Local exchange (everywhere but in process row icurrow)
+ */
+   if( myrow != icurrow )
+   {
+      k = ipmapm1[myrow];
+      HPL_dlaswp06N( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, iplen[k],
+                     0, LDU ), LDU, lindxA );
+   }
+/*
+ * Equilibration
+ */
+   if( equil != 0 )
+      HPL_equil( PBCST, IFLAG, PANEL, HplNoTrans, n, U, LDU, iplen,
+                 ipmap, ipmapm1, iwork );
+/*
+ * Rolling phase
+ */
+   HPL_rollN( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 );
+/*
+ * Permute U in every process row
+ */
+   HPL_dlaswp00N( jb, n, U, LDU, permU );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp01N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c
new file mode 100644
index 000000000..0c4de2669
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c
@@ -0,0 +1,217 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp01T
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp01T
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp01T applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+ * row panel U at once,  resulting in a minimal communication volume  and
+ * a "very good"  use of the connectivity if available.  With  P  process
+ * rows  and  assuming  bi-directional links,  the  running time  of this
+ * function can be approximated by:
+ *  
+ *    (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  K is
+ * a constant in (2,3] that depends on the achieved bandwidth  during  a
+ * simultaneous  message exchange  between two processes.  An  empirical
+ * optimistic value of K is typically 2.4.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * A, * U;
+   int                       * ipID, * iplen, * ipmap, * ipmapm1,
+                             * iwork, * lindxA = NULL, * lindxAU,
+                             * permU;
+   static int                equil=-1;
+   int                       icurrow, * iflag, * ipA, * ipl, jb, k,
+                             lda, myrow, n, nprow;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+   n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Decide whether equilibration should be performed or not
+ */
+   if( equil == -1 ) equil = PANEL->algo->equil;
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow;
+   A     = PANEL->A;   U       = PANEL->U;     iflag  = PANEL->IWORK;
+   lda   = PANEL->lda; icurrow = PANEL->prow;
+/*
+ * Compute ipID (if not already done for this panel). lindxA and lindxAU
+ * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1
+ * are of size nprow,  permU is of length jb, and  this function needs a 
+ * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): 
+ * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1)
+ * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1);
+ */
+   k = (int)((unsigned int)(jb) << 1);  ipl = iflag + 1; ipID = ipl + 1;
+   ipA     = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1;
+   lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1;
+   ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( *iflag == 0 ) /* HPL_pdlaswp00T called before: reuse ipID */
+   {
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( ( *iflag == 1 ) && ( equil != 0 ) )
+   {   /* HPL_pdlaswp01T was call before only re-compute IPLEN, IPMAP */
+      HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 );
+      *iflag = 1;
+   }
+/*
+ * Copy into U the rows to be spread (local to icurrow)
+ */
+   if( myrow == icurrow )
+   { HPL_dlaswp01T( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); }
+/*
+ * Spread U - optionally probe for column panel
+ */
+   HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen,
+                ipmap, ipmapm1 );
+/*
+ * Local exchange (everywhere but in process row icurrow)
+ */
+   if( myrow != icurrow )
+   {
+      k = ipmapm1[myrow];
+      HPL_dlaswp06T( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, 0,
+                     iplen[k], LDU ), LDU, lindxA );
+   }
+/*
+ * Equilibration
+ */
+   if( equil != 0 )
+      HPL_equil( PBCST, IFLAG, PANEL, HplTrans, n, U, LDU, iplen, ipmap,
+                 ipmapm1, iwork );
+/*
+ * Rolling phase
+ */
+   HPL_rollT( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 );
+/*
+ * Permute U in every process row
+ */
+   HPL_dlaswp10N( n, jb, U, LDU, permU );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp01T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdtrsv.c
new file mode 100644
index 000000000..d2135130a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdtrsv.c
@@ -0,0 +1,296 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdtrsv
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_pmat *                     AMAT
+)
+#else
+void HPL_pdtrsv
+( GRID, AMAT )
+   HPL_T_grid *                     GRID;
+   HPL_T_pmat *                     AMAT;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdtrsv solves an upper triangular system of linear equations.
+ *  
+ * The rhs is the last column of the N by N+1 matrix A. The solve starts
+ * in the process  column owning the  Nth  column of A, so the rhs b may
+ * need to be moved one process column to the left at the beginning. The
+ * routine therefore needs  a column  vector in every process column but
+ * the one owning  b. The result is  replicated in all process rows, and
+ * returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes.
+ *  
+ * The algorithm uses decreasing one-ring broadcast in process rows  and
+ * columns  implemented  in terms of  synchronous communication point to
+ * point primitives.  The  lookahead of depth 1 is used to minimize  the
+ * critical path. This entire operation is essentially ``latency'' bound
+ * and an estimate of its running time is given by:
+ *  
+ *    (move rhs) lat + N / ( P bdwth ) +            
+ *    (solve)    ((N / NB)-1) 2 (lat + NB / bdwth) +
+ *               gam2 N^2 / ( P Q ),                
+ *  
+ * where  gam2   is an estimate of the   Level 2 BLAS rate of execution.
+ * There are  N / NB  diagonal blocks. One must exchange  2  messages of
+ * length NB to compute the next  NB  entries of the vector solution, as
+ * well as performing a total of N^2 floating point operations.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * AMAT    (local input/output)          HPL_T_pmat *
+ *         On entry,  AMAT  points  to the data structure containing the
+ *         local array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   Ccomm, Rcomm;
+   double                     * A=NULL, * Aprev=NULL, * Aptr, * XC=NULL,
+                              * XR=NULL, * Xd=NULL, * Xdprev=NULL,
+                              * W=NULL;
+   int                        Alcol, Alrow, Anpprev, Anp, Anq, Bcol,
+                              Cmsgid, GridIsNotPx1, GridIsNot1xQ, Rmsgid,
+                              Wfr=0, colprev, kb, kbprev, lda, mycol,
+                              myrow, n, n1, n1p, n1pprev=0, nb, npcol,
+                              nprow, rowprev, tmp1, tmp2;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PTRSV );
+#endif
+   if( ( n = AMAT->n ) <= 0 ) return;
+   nb = AMAT->nb; lda = AMAT->ld; A = AMAT->A; XR = AMAT->X;
+
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Rcomm = GRID->row_comm; Rmsgid = MSGID_BEGIN_PTRSV;
+   Ccomm = GRID->col_comm; Cmsgid = MSGID_BEGIN_PTRSV + 1;
+   GridIsNot1xQ = ( nprow > 1 ); GridIsNotPx1 = ( npcol > 1 );
+/*
+ * Move the rhs in the process column owning the last column of A.
+ */
+   Mnumroc( Anp, n, nb, nb, myrow, 0, nprow );
+   Mnumroc( Anq, n, nb, nb, mycol, 0, npcol );
+
+   tmp1  = ( n - 1 ) / nb;
+   Alrow = tmp1 - ( tmp1 / nprow ) * nprow;
+   Alcol = tmp1 - ( tmp1 / npcol ) * npcol;
+   kb    = n    - tmp1 * nb;
+
+   Aptr = (double *)(A); XC = Mptr( Aptr, 0, Anq, lda );
+   Mindxg2p( n, nb, nb, Bcol, 0, npcol );
+
+   if( ( Anp > 0 ) && ( Alcol != Bcol ) )
+   {
+      if( mycol == Bcol  )
+      { (void) HPL_send( XC, Anp, Alcol, Rmsgid, Rcomm ); }
+      else if( mycol == Alcol )
+      { (void) HPL_recv( XC, Anp, Bcol,  Rmsgid, Rcomm ); }
+   }
+   Rmsgid = ( Rmsgid + 2 >
+              MSGID_END_PTRSV ? MSGID_BEGIN_PTRSV : Rmsgid + 2 );
+   if( mycol != Alcol )
+   { for( tmp1=0; tmp1 < Anp; tmp1++ ) XC[tmp1] = HPL_rzero; }
+/*
+ * Set up lookahead
+ */
+   n1 = ( npcol - 1 ) * nb; n1 = Mmax( n1, nb );
+   if( Anp > 0 )
+   {
+      W = (double*)malloc( (size_t)(Mmin( n1, Anp )) * sizeof( double ) );
+      if( W == NULL )
+      { HPL_pabort( __LINE__, "HPL_pdtrsv", "Memory allocation failed" ); }
+      Wfr = 1;
+   }
+
+   Anpprev = Anp; Xdprev = XR; Aprev = Aptr = Mptr( Aptr, 0, Anq, lda );
+   tmp1    = n - kb; tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+   MnumrocI( n1pprev, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+
+   if( myrow == Alrow ) { Anpprev = ( Anp -= kb ); }
+   if( mycol == Alcol )
+   {
+      Aprev = ( Aptr -= lda * kb ); Anq -= kb; Xdprev = ( Xd = XR + Anq );
+      if( myrow == Alrow )
+      {
+         HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit,
+                    kb, Aptr+Anp, lda, XC+Anp, 1 );
+         HPL_dcopy( kb, XC+Anp, 1, Xd, 1 );
+      }
+   }
+
+   rowprev = Alrow; Alrow = MModSub1( Alrow, nprow );
+   colprev = Alcol; Alcol = MModSub1( Alcol, npcol );
+   kbprev  = kb; n -= kb;
+   tmp1    = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+   MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+/*
+ * Start the operations
+ */
+   while( n > 0 )
+   {
+      if( mycol == Alcol ) { Aptr -= lda * kb; Anq -= kb; Xd = XR + Anq; }
+      if( myrow == Alrow ) { Anp -= kb; }
+/*
+ * Broadcast  (decreasing-ring)  of  previous solution block in previous
+ * process column,  compute  partial update of current block and send it
+ * to current process column.
+ */
+      if( mycol == colprev )
+      {
+/*
+ * Send previous solution block in process row above
+ */
+         if( myrow == rowprev )
+         {
+            if( GridIsNot1xQ )
+               (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ),
+                                Cmsgid, Ccomm );
+         }
+         else
+         {
+            (void) HPL_recv( Xdprev, kbprev, MModAdd1( myrow, nprow ),
+                             Cmsgid, Ccomm );
+         } 
+/*
+ * Compute partial update of previous solution block and send it to cur-
+ * rent column
+ */
+         if( n1pprev > 0 )
+         {
+            tmp1 = Anpprev - n1pprev;
+            HPL_dgemv( HplColumnMajor, HplNoTrans, n1pprev, kbprev,
+                       -HPL_rone, Aprev+tmp1, lda, Xdprev, 1, HPL_rone,
+                       XC+tmp1, 1 );
+            if( GridIsNotPx1 )
+               (void) HPL_send( XC+tmp1, n1pprev, Alcol, Rmsgid, Rcomm );
+         }
+/*
+ * Finish  the (decreasing-ring) broadcast of the solution block in pre-
+ * vious process column
+ */
+         if( ( myrow != rowprev ) &&
+             ( myrow != MModAdd1( rowprev, nprow ) ) )
+            (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ),
+                             Cmsgid, Ccomm );
+      }
+      else if( mycol == Alcol )
+      {
+/*
+ * Current  column  receives  and accumulates partial update of previous
+ * solution block
+ */
+         if( n1pprev > 0 )
+         {
+            (void) HPL_recv( W, n1pprev, colprev, Rmsgid, Rcomm );
+            HPL_daxpy( n1pprev, HPL_rone, W, 1, XC+Anpprev-n1pprev, 1 );
+         }
+      }
+/*
+ * Solve current diagonal block 
+ */
+      if( ( mycol == Alcol ) && ( myrow == Alrow ) )
+      {
+         HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit,
+                    kb, Aptr+Anp, lda, XC+Anp, 1 );
+         HPL_dcopy( kb, XC+Anp, 1, XR+Anq, 1 );
+      }
+/*
+*  Finish previous update
+*/
+      if( ( mycol == colprev ) && ( ( tmp1 = Anpprev - n1pprev ) > 0 ) )
+         HPL_dgemv( HplColumnMajor, HplNoTrans, tmp1, kbprev, -HPL_rone,
+                    Aprev, lda, Xdprev, 1, HPL_rone, XC, 1 );
+/*
+*  Save info of current step and update info for the next step
+*/
+      if( mycol == Alcol ) { Xdprev   = Xd; Aprev = Aptr; }
+      if( myrow == Alrow ) { Anpprev -= kb; }
+      rowprev = Alrow; colprev = Alcol;
+      n1pprev = n1p;   kbprev  = kb; n -= kb;
+      Alrow = MModSub1( Alrow, nprow ); Alcol = MModSub1( Alcol, npcol );
+      tmp1  = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+      MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+
+      Rmsgid = ( Rmsgid+2 > MSGID_END_PTRSV ? 
+                 MSGID_BEGIN_PTRSV   : Rmsgid+2 );
+      Cmsgid = ( Cmsgid+2 > MSGID_END_PTRSV ?
+                 MSGID_BEGIN_PTRSV+1 : Cmsgid+2 );
+   }
+/*
+ * Replicate last solution block
+ */
+   if( mycol == colprev )
+      (void) HPL_broadcast( (void *)(XR), kbprev, HPL_DOUBLE, rowprev,
+                            Ccomm );
+
+   if( Wfr  ) free( W  );
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PTRSV );
+#endif
+/*
+ * End of HPL_pdtrsv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateNN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateNN.c
new file mode 100644
index 000000000..7e31ddcd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateNN.c
@@ -0,0 +1,442 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateNN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateNN
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateNN broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,                n );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, 0, nn, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateNN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateNT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateNT.c
new file mode 100644
index 000000000..faa3ef207
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateNT.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateNT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateNT
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateNT broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */ 
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,               jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, nn, 0, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateNT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateTN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateTN.c
new file mode 100644
index 000000000..a16aa26a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateTN.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateTN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateTN
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateTN broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,                n );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, 0, nn, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateTN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateTT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateTT.c
new file mode 100644
index 000000000..81e6cc4b7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateTT.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateTT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateTT
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateTT broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,               jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, nn, 0, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateTT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_perm.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_perm.c
new file mode 100644
index 000000000..bf7cc4503
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_perm.c
@@ -0,0 +1,131 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_perm
+(
+   const int                        N,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            IWORK
+)
+#else
+void HPL_perm
+( N, LINDXA, LINDXAU, IWORK )
+   const int                        N;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_perm combines  two  index  arrays  and generate the corresponding
+ * permutation. First, this function computes the inverse of LINDXA, and
+ * then combine it with LINDXAU.  Second, in order to be able to perform
+ * the permutation in place,  LINDXAU  is overwritten by the sequence of
+ * permutation  producing  the  same result.  What we ultimately want to
+ * achieve is:  U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the
+ * call to this function,  this in place permutation can be performed by
+ * for i in [0..N) swap U[i] with U[LINDXAU[i]].
+ *
+ * Arguments
+ * =========
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies the length of the arrays  LINDXA  and
+ *         LINDXAU. N should be at least zero.
+ *
+ * LINDXA  (global input/output)         int *
+ *         On entry,  LINDXA  is an array of dimension N  containing the
+ *         source indexes. On exit,  LINDXA  contains the combined index
+ *         array.
+ *
+ * LINDXAU (global input/output)         int *
+ *         On entry,  LINDXAU is an array of dimension N  containing the
+ *         target indexes.  On exit,  LINDXAU  contains  the sequence of
+ *         permutation,  that  should be applied  in increasing order to
+ *         permute the underlying array U in place.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension N.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j, k, fndd;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Inverse LINDXA - combine LINDXA and LINDXAU - Initialize IWORK
+ */
+   for( i = 0; i < N; i++ ) { IWORK[LINDXA[i]] = i; }
+   for( i = 0; i < N; i++ ) { LINDXA[i] = LINDXAU[IWORK[i]]; IWORK[i] = i; }
+ 
+   for( i = 0; i < N; i++ )
+   {
+      /* search LINDXA such that    LINDXA[j]  == i */
+      j = 0; do { fndd = ( LINDXA[j] == i ); j++; } while( !fndd ); j--;
+      /* search IWORK  such that    IWORK[k]   == j */
+      k = 0; do { fndd = ( IWORK[k]  == j ); k++; } while( !fndd ); k--;
+      /* swap IWORK[i] and IWORK[k]; LINDXAU[i] = k */
+      j = IWORK[i]; IWORK[i] = IWORK[k]; IWORK[k] = j;
+      LINDXAU[i] = k;
+   }
+/*
+ * End of HPL_perm
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pipid.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pipid.c
new file mode 100644
index 000000000..ab5ef949f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pipid.c
@@ -0,0 +1,187 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pipid
+(
+   HPL_T_panel *                    PANEL,
+   int *                            K,
+   int *                            IPID
+)
+#else
+void HPL_pipid
+( PANEL, K, IPID )
+   HPL_T_panel *                    PANEL;
+   int *                            K;
+   int *                            IPID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pipid computes an array  IPID  that contains the source and final
+ * destination  of  matrix rows  resulting  from  the  application  of N
+ * interchanges  as computed by the  LU  factorization  with row partial
+ * pivoting. The array IPID is such that the row of global index IPID(i)
+ * should be mapped onto the row of global index IPID(i+1). Note that we
+ * cannot really know the length of IPID a priori. However, we know that
+ * this array is at least 2*N long,  since  there are N rows to swap and
+ * broadcast. The length of this array  must be smaller than or equal to
+ * 4*N, since every row is swapped with at most a single distinct remote
+ * row. The algorithm constructing  IPID  goes as follows: Let IA be the
+ * global index of the first row to be swapped.
+ *  
+ * For every row src IA + i with i in [0..N) to be swapped with row  dst
+ * such that dst is given by DPIV[i]:
+ *  
+ * Is row  src  the destination  of a previous row of the current block,
+ * that is, is there k odd such that IPID(k) is equal to src ?
+ *     Yes:  update  this destination  with dst.  For  example,  if  the
+ * pivot array is  (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5,
+ * we swap in fact row 0 and 5,  i.e.,  row 0 goes to 5 and not 2  as it
+ * was thought so far ...
+ *     No :  add  the pair (src,dst) at the end of IPID; row src has not
+ * been moved yet.
+ *  
+ * Is row  dst  different  from src the destination of a previous row of
+ * the current block, i.e., is there k odd such that IPID(k) is equal to
+ * dst ?
+ *     Yes:  update  IPID(k) with src.  For example,  if the pivot array
+ * is (0,5)(1,1)(2,5) ... , then when  we swap rows  2 and 5, we swap in
+ * fact row 2 and 0,  i.e.,  row 0 goes to 2 and not 5 as it was thought
+ * so far ...
+ *     No : add  the  pair (dst,src) at the end of IPID; row dst has not
+ * been moved yet.
+ *  
+ * Note that when src is equal to dst, the pair (dst,src)  should not be
+ * added to  IPID  in  order  to avoid duplicated entries in this array.
+ * During  the construction of the array  IPID,  we  make  sure that the
+ * first N entries are such that IPID(k) with k odd is equal to  IA+k/2.
+ * For k in  [0..K/2),  the  row  of global index  IPID(2*k)  should  be
+ * mapped onto the row of global index IPID(2*k+1).
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global output)               int *
+ *         On exit, K specifies the number of entries in  IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global output)               int *
+ *         On entry, IPID is an array of length 4*N.  On exit, the first
+ *         K entries of that array contain the src and final destination
+ *         resulting  from  the  application of the  N  interchanges  as
+ *         specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+ *         stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+ *         in [0..N)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, fndd, fnds, ia, i, j, jb, lst, off,
+                              src;
+   double                     * dpiv;
+/* ..
+ * .. Executable Statements ..
+ */
+   dpiv = PANEL->DPIV; jb = PANEL->jb; src = ia = PANEL->ia;
+   dst  = (int)(dpiv[0]); IPID[0] = dst; IPID[1] = src; *K = 2;
+   if( src != dst ) { IPID[2] = src; IPID[3] = dst; *K += 2; }
+
+   for( i = 1; i < jb; i++ )
+   {
+      fnds = 0; j = 1;
+
+      if( ( src = ia + i ) == ( dst = (int)(dpiv[i]) ) )
+      {
+         do { if( src == IPID[j] ) { fnds = j; } else { j += 2; } }
+         while( !( fnds ) && ( j < *K ) );
+         if( !fnds ) { lst = *K;     off = 2; IPID[lst] = src; }
+         else        { lst = fnds-1; off = 0; }
+         IPID[lst+1] = dst;
+      }
+      else
+      {
+         fndd = 0;
+         do
+         {
+            if     ( src == IPID[j] ) { fnds = j; }
+            else if( dst == IPID[j] ) { fndd = j; }
+            j += 2;
+         }
+         while( ( !( fnds ) || !( fndd ) ) && ( j < *K ) );
+         if( !fnds ) { IPID[*K] = src; IPID[*K+1] = dst; off  = 2; }
+         else        {                 IPID[fnds] = dst; off  = 0; }
+         if( !fndd ) { lst = *K+off;   IPID[lst ] = dst; off += 2; }
+         else        { lst = fndd-1; }
+         IPID[lst+1] = src;
+      }
+/*
+ * Enforce IPID(1,i) equal to src = ia + i
+ */
+      if( lst != ( j = ( i << 1 ) ) )
+      {
+         src = IPID[j  ]; IPID[j  ] = IPID[lst  ]; IPID[lst  ] = src;
+         dst = IPID[j+1]; IPID[j+1] = IPID[lst+1]; IPID[lst+1] = dst;
+      }
+      *K += off;
+   }
+/*
+ * End of HPL_pipid
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx0.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx0.c
new file mode 100644
index 000000000..be12639d0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx0.c
@@ -0,0 +1,281 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx0
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   int *                            IPID,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            LLEN
+)
+#else
+void HPL_plindx0
+( PANEL, K, IPID, LINDXA, LINDXAU, LLEN )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   int *                            IPID;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            LLEN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx0 computes two local arrays  LINDXA and  LINDXAU  containing
+ * the  local  source and final destination position  resulting from the
+ * application of row interchanges.
+ *  
+ * On entry, the array  IPID  of length K is such that the row of global
+ * index  IPID(i)  should be mapped onto row of global index  IPID(i+1).
+ * Let  IA  be the global index of the first row to be swapped. For k in
+ * [0..K/2), the row of global index IPID(2*k) should be mapped onto the
+ * row of global index  IPID(2*k+1).  The question then, is to determine
+ * which rows should ultimately be part of U.
+ *  
+ * First, some rows of the process ICURROW  may be swapped locally.  One
+ * of this row belongs to U, the other one belongs to my local  piece of
+ * A.  The other  rows of the current block are swapped with remote rows
+ * and are thus not part of U. These rows however should be sent  along,
+ * and  grabbed by the other processes  as we  progress in the  exchange
+ * phase.
+ *  
+ * So, assume that I am  ICURROW  and consider a row of index  IPID(2*i)
+ * that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less
+ * than N,  this row is locally swapped and should be copied into  U  at
+ * the position IPID(2*i+1) - IA. No row will be exchanged for this one.
+ * If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be
+ * locally copied into my local piece of A at the position corresponding
+ * to the row of global index IPID(2*i+1).
+ *  
+ * If the process  ICURROW does not own  IPID(2*i+1), then row IPID(2*i)
+ * is to be swapped away and strictly speaking does not belong to U, but
+ * to  A  remotely.  Since this  process will however send this array U,
+ * this row is  copied into  U, exactly where the row IPID(2*i+1) should
+ * go. For this, we search IPID for k1, such that IPID(2*k1) is equal to
+ * IPID(2*i+1); and row  IPID(2*i) is to be copied in U  at the position
+ * IPID(2*k1+1)-IA.
+ *  
+ * It is thus  important to put the rows that go into U, i.e., such that
+ * IPID(2*i+1) - IA is less than N at the begining of the array IPID. By
+ * doing so,  U  is formed, and the local copy  is performed in just one
+ * sweep.
+ *  
+ * Two lists  LINDXA  and  LINDXAU are built.  LINDXA contains the local
+ * index of the rows I have that should be copied. LINDXAU  contains the
+ * local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A
+ * is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
+ * of A should be locally copied into A(-LINDXAU(k),:).  In the  process
+ * ICURROW, the initial packing algorithm proceeds as follows.
+ *  
+ *   for all entries in IPID,
+ *      if IPID(2*i) is in ICURROW,
+ *         if IPID(2*i+1) is in ICURROW,
+ *            if( IPID(2*i+1) - IA < N )
+ *             save corresponding local position
+ *             of this row (LINDXA);
+ *             save local position (LINDXAU) in U
+ *             where this row goes;
+ *             [copy row IPID(2*i) in U at position
+ *             IPID(2*i+1)-IA; ];
+ *            else
+ *             save corresponding local position of
+ *             this row (LINDXA);
+ *             save local position (-LINDXAU) in A
+ *             where this row goes;
+ *             [copy row IPID(2*i) in my piece of A
+ *             at IPID(2*i+1);]
+ *            end if
+ *         else
+ *            find k1 such that IPID(2*k1) = IPID(2*i+1);
+ *            copy row IPID(2*i) in U at position
+ *            IPID(2*k1+1)-IA;
+ *            save corresponding local position of this
+ *            row (LINDXA);
+ *            save local position (LINDXAU) in U where
+ *            this row goes;
+ *         end if
+ *      end if
+ *   end for
+ *  
+ * Second, if I am not the current row process  ICURROW, all source rows
+ * in IPID that I own are part of U. Indeed,  they  are swapped with one
+ * row  of  the  current  block  of rows,  and  the  main  factorization
+ * algorithm proceeds one row after each other.  The processes different
+ * from ICURROW,  should  exchange and accumulate  those rows until they
+ * receive some data previously owned by the process ICURROW.
+ *  
+ * In processes different from  ICURROW,  the  initial packing algorithm
+ * proceeds as follows.  Consider a row of global index IPID(2*i) that I
+ * own. When I will be receiving data previously owned by ICURROW, i.e.,
+ * U, row IPID(2*i) should  replace the row in U at pos. IPID(2*i+1)-IA,
+ * and  this particular row of U should be first copied into my piece of
+ * A, at A(il,:),  where  il is the  local row  index  corresponding  to
+ * IPID(2*i). Now,initially, this row will be packed into workspace, say
+ * as the kth row of  that  work array.  The  following  algorithm  sets
+ * LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row
+ * should be copied. LINDXA(k) stores the local index in  A  where  this
+ * row of U should be copied, i.e il.
+ *  
+ *   for all entries in IPID,
+ *      if IPID(2*i) is not in ICURROW,
+ *         copy row IPID(2*i) in work array;
+ *         save corresponding local position
+ *         of this row (LINDXA);
+ *         save position (LINDXAU) in U where
+ *         this row should be copied;
+ *      end if
+ *   end for
+ *  
+ * Since we are at it, we also globally figure  out  how many rows every
+ * process has. That is necessary, because it would rather be cumbersome
+ * to  figure it on  the fly  during the  bi-directional exchange phase.
+ * This information is kept in the array  LLEN  of size NPROW. Also note
+ * that the arrays LINDXA and LINDXAU are of max length equal to 2*N.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * LINDXA  (local output)                int *
+ *         On entry, LINDXA  is an array of dimension 2*N. On exit, this
+ *         array contains the local indexes of the rows of A I have that
+ *         should be copied into U.
+ *
+ * LINDXAU (local output)                int *
+ *         On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+ *         array contains  the local destination  information encoded as
+ *         follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+ *         copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+ *         of A should be locally copied into A(-LINDXAU(k),:).
+ *
+ * LLEN    (global output)               int *
+ *         On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+ *         contains how many rows every process has.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, dstrow, fndd, i, ia, icurrow, il,
+                              ip=0, iroff, j, jb, myrow, nb, nprow,
+                              src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Compute the local arrays  LINDXA  and  LINDXAU  containing  the local
+ * source and final destination position resulting from  the application
+ * of N interchanges.
+ */
+   myrow   = PANEL->grid->myrow; nprow = PANEL->grid->nprow;
+   icurrow = PANEL->prow;        jb    = PANEL->jb;
+   nb      = PANEL->nb;          ia    = PANEL->ia;
+   iroff   = PANEL->ii;
+
+   for( i = 0; i < nprow; i++ ) LLEN[i] = 0;
+
+   for( i = 0; i < K; i += 2 )
+   {
+      src = IPID[i];
+      Mindxg2p( src, nb, nb, srcrow, 0, nprow ); LLEN[ srcrow ]++;
+
+      if( myrow == srcrow )
+      {
+         Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
+         LINDXA[ip] = il - iroff; dst = IPID[i+1];
+
+         if( myrow == icurrow )
+         {
+            Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+            if( dstrow == icurrow )
+            {
+               if( dst - ia < jb ) { LINDXAU[ip] = dst - ia; }
+               else
+               {
+                  Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+                  LINDXAU[ip] = iroff - il;
+               }
+            }
+            else
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+               LINDXAU[ip] = IPID[j-1] - ia;
+            }
+         }
+         else { LINDXAU[ip] = dst - ia; }
+
+         ip++;
+      }
+   }
+/*
+ * End of HPL_plindx0
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx1.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx1.c
new file mode 100644
index 000000000..a24fd4c56
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx1.c
@@ -0,0 +1,275 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx1
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   const int *                      IPID,
+   int *                            IPA,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1,
+   int *                            PERMU,
+   int *                            IWORK
+)
+#else
+void HPL_plindx1
+( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   const int *                      IPID;
+   int *                            IPA;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+   int *                            PERMU;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx1 computes two local arrays  LINDXA and  LINDXAU  containing
+ * the  local  source and final destination position  resulting from the
+ * application of row interchanges.  In addition, this function computes
+ * three arrays IPLEN, IPMAP and IPMAPM1  that contain  the  logarithmic
+ * mapping information for the spreading phase.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                const int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * IPA     (global output)               int *
+ *         On exit,  IPA  specifies  the number of rows that the current
+ *         process row has that either belong to U  or should be swapped
+ *         with remote rows of A.
+ *
+ * LINDXA  (global output)               int *
+ *         On entry, LINDXA  is an array of dimension 2*N. On exit, this
+ *         array contains the local indexes of the rows of A I have that
+ *         should be copied into U.
+ *
+ * LINDXAU (global output)               int *
+ *         On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+ *         array contains  the local destination  information encoded as
+ *         follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+ *         copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+ *         of A should be locally copied into A(-LINDXAU(k),:).
+ *
+ * IPLEN   (global output)               int *
+ *         On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+ *         this array is such that  IPLEN[i]  is the number of rows of A
+ *         in  the  processes  before  process  IPMAP[i]  after the sort
+ *         with the convention that IPLEN[nprow]  is the total number of
+ *         rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+ *         local number of rows of A that should be moved to the process
+ *         IPMAP[i]. IPLEN is such that the number of rows of the source
+ *         process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+ *         remaining  entries  of  this  array  are  sorted  so that the
+ *         quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry, IPMAP is an array of dimension NPROW. On exit, this
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myrow] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROCS)
+ *
+ * PERMU   (global output)               int *
+ *         On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+ *         contains  a sequence of permutations,  that should be applied
+ *         in increasing order to permute in place the row panel U.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension 2*JB.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        * iwork;
+   int                        dst, dstrow, fndd, i, ia, icurrow, il,
+                              ip, ipU, iroff, j, jb, myrow, nb, nprow,
+                              src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1
+ */
+   HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 );
+/*
+ * Compute the local arrays  LINDXA  and  LINDXAU  containing  the local
+ * source and final destination position resulting from  the application
+ * of N interchanges. Compute LINDXA and LINDXAU in icurrow,  and LINDXA
+ * elsewhere and PERMU in every process.
+ */
+   myrow = PANEL->grid->myrow; nprow   = PANEL->grid->nprow;
+   jb    = PANEL->jb;          nb      = PANEL->nb;     ia = PANEL->ia;
+   iroff = PANEL->ii;          icurrow = PANEL->prow;
+
+   iwork = IWORK + jb;
+ 
+   if( myrow == icurrow )
+   {
+      for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
+      {
+         src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+ 
+         if( srcrow == icurrow )
+         {
+            dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+ 
+            Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
+            LINDXA[ip] = il - iroff;
+ 
+            if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
+            {
+               PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
+               j          = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j;
+               IPLEN[il]++; ipU++;
+            }
+            else if( dstrow != icurrow )
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+ 
+               PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow];
+               j          = IPLEN[il];    iwork[ipU] = LINDXAU[ip] = j;
+               IPLEN[il]++; ipU++;
+            }
+            else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) )
+            {
+               Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+               LINDXAU[ip] = iroff - il;
+            }
+            ip++;
+         }
+      }
+      *IPA = ip;
+   }
+   else
+   {
+      for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
+      {
+         src = IPID[i  ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+         dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+/*
+ * LINDXA[i] is the local index of the row of A that belongs into U
+ */
+         if( myrow == dstrow )
+         {
+            Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+            LINDXA[ip] = il - iroff; ip++;
+         }
+/*
+ * iwork[i] is the local (current) position  index in U
+ * PERMU[i] is the local (final) destination index in U
+ */
+         if( srcrow == icurrow )
+         {
+            if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
+            {
+               PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
+               iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
+            }
+            else if( dstrow != icurrow )
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+               PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow];
+               iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
+            }
+         }
+      }
+      *IPA = 0;
+   }
+/*
+ * Simplify iwork and PERMU, return in PERMU the sequence of permutation
+ * that need to be apply to U after it has been broadcast.
+ */
+   HPL_perm( jb, iwork, PERMU, IWORK );
+/*
+ * Reset IPLEN to its correct value
+ */
+   for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1];
+   IPLEN[0] = 0; 
+/*
+ * End of HPL_plindx1
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx10.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx10.c
new file mode 100644
index 000000000..fa460fd35
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx10.c
@@ -0,0 +1,155 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx10
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   const int *                      IPID,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1
+)
+#else
+void HPL_plindx10
+( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   const int *                      IPID;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx10 computes  three arrays  IPLEN,  IPMAP  and  IPMAPM1  that
+ * contain the logarithmic mapping information for the spreading phase.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                const int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * IPLEN   (global output)               int *
+ *         On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+ *         this array is such that  IPLEN[i]  is the number of rows of A
+ *         in the processes  before process IMAP[i] after the sort, with
+ *         the convention that IPLEN[nprow] is the total number of rows.
+ *         In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+ *         rows of  A  that should be moved for each process.  IPLEN  is
+ *         such that the number of rows of the source process row can be
+ *         computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+ *         this  array are sorted  so  that  the quantities IPLEN[i+1] -
+ *         IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry, IPMAP is an array of dimension NPROW. On exit, this
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myrow] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROW)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, dstrow, i, ia, icurrow, jb, nb,
+                              nprow, src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+   nprow = PANEL->grid->nprow; jb = PANEL->jb; nb = PANEL->nb;
+   ia    = PANEL->ia;          icurrow = PANEL->prow;
+/*
+ * Compute  redundantly  the local number of rows  that each process has
+ * and that belong to U in IPLEN[1 .. nprow+1]
+ */
+   for( i = 0; i <= nprow; i++ ) IPLEN[i] = 0;
+ 
+   for( i = 0; i < K; i += 2 )
+   {
+      src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+      if( srcrow == icurrow )
+      {
+         dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+         if( ( dstrow != srcrow ) || ( dst - ia < jb ) ) IPLEN[dstrow+1]++;
+      }
+   }
+/*
+ * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1
+ * (the inverse of IPMAP)
+ */
+   HPL_logsort( nprow, icurrow, IPLEN, IPMAP, IPMAPM1 );
+/*
+ * End of HPL_plindx10
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_rollN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_rollN.c
new file mode 100644
index 000000000..e68590a01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_rollN.c
@@ -0,0 +1,225 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+void HPL_rollN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_rollN
+( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rollN rolls the local arrays containing the local pieces of U, so
+ * that on exit to this function  U  is replicated in every process row.
+ * In addition, this function probe for the presence of the column panel
+ * and forwards it when available.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be rolled) information.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the number of columns of  U.  N must be
+ *         at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least  MAX(1,IPLEN[NPROW]).
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process row.
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IMAP  is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words,  IMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Datatype               type[2];
+   MPI_Status                 status;
+   MPI_Request                request;
+   MPI_Comm                   comm;
+   int                        Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS,
+                              ierr=MPI_SUCCESS, il, k, l, lengthR,
+                              lengthS, mydist, myrow, next, npm1, nprow,
+                              partner, prev;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= 0 ) return;
+
+   npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow;
+   comm = PANEL->grid->col_comm;
+/*
+ * Rolling phase
+ */
+   mydist = IPMAPM1[myrow];
+   prev   = IPMAP[MModSub1( mydist, nprow )];
+   next   = IPMAP[MModAdd1( mydist, nprow )];
+ 
+   for( k = 0; k < npm1; k++ )
+   {
+      l = (int)( (unsigned int)(k) >> 1 );
+ 
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         il      = MModAdd( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); 
+         il      = MModSub( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev;
+      }
+      else
+      {
+         il    = MModSub( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); 
+         il    = MModAdd( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next;
+      }
+ 
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_vector( N, lengthR, LDU, MPI_DOUBLE,
+                                      &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, ibufR, 0, LDU ), 1, type[I_RECV],
+                                partner, Cmsgid, comm, &request );
+      }
+ 
+      if( lengthS > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_vector( N, lengthS, LDU, MPI_DOUBLE,
+                                      &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, ibufS, 0, LDU ), 1, type[I_SEND],
+                               partner, Cmsgid, comm );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free(   &type[I_SEND] );
+      }
+
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free(   &type[I_RECV] );
+      }
+/*
+ * Probe for column panel - forward it when available
+ */
+      if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_rollN", "MPI call failed" ); }
+/*
+ * End of HPL_rollN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_rollT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_rollT.c
new file mode 100644
index 000000000..0160c9412
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_rollT.c
@@ -0,0 +1,259 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+void HPL_rollT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_rollT
+( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rollT rolls the local arrays containing the local pieces of U, so
+ * that on exit to this function  U  is replicated in every process row.
+ * In addition, this function probe for the presence of the column panel
+ * and forwards it when available.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be rolled) information.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the local number of rows of  U.  N must
+ *         be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least  MAX(1,N).
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process row.
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IMAP  is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words,  IMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#if 0
+   MPI_Datatype               type[2];
+#endif
+   MPI_Status                 status;
+   MPI_Request                request;
+   MPI_Comm                   comm;
+   int                        Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS,
+                              ierr=MPI_SUCCESS, il, k, l, lengthR, 
+                              lengthS, mydist, myrow, next, npm1, nprow,
+                              partner, prev;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= 0 ) return;
+
+   npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow;
+   comm = PANEL->grid->col_comm;
+/*
+ * Rolling phase
+ */
+   mydist = IPMAPM1[myrow];
+   prev   = IPMAP[MModSub1( mydist, nprow )];
+   next   = IPMAP[MModAdd1( mydist, nprow )];
+ 
+   for( k = 0; k < npm1; k++ )
+   {
+      l = (int)( (unsigned int)(k) >> 1 );
+ 
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         il      = MModAdd( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] );
+         il    = MModSub( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev;
+      }
+      else
+      {
+         il    = MModSub( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] );
+         il    = MModAdd( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next;
+      }
+ 
+      if( lengthR > 0 )
+      {
+#if 0
+         if( ierr == MPI_SUCCESS )
+         {
+            if( LDU == N )
+               ierr = MPI_Type_contiguous( lengthR * LDU, MPI_DOUBLE,
+                                           &type[I_RECV] );
+            else
+               ierr = MPI_Type_vector( lengthR, N, LDU, MPI_DOUBLE,
+                                       &type[I_RECV] );
+         }
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, 0, ibufR, LDU ), 1, type[I_RECV],
+                                partner, Cmsgid, comm, &request );
+#else
+/*
+ * In our case, LDU is N - Do not use the MPI datatype.
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, 0, ibufR, LDU ), lengthR*LDU,
+                                MPI_DOUBLE, partner, Cmsgid, comm, &request );
+#endif
+      }
+ 
+      if( lengthS > 0 )
+      {
+#if 0
+         if( ierr == MPI_SUCCESS )
+         {
+            if( LDU == N )
+               ierr =   MPI_Type_contiguous( lengthS*LDU, MPI_DOUBLE,
+                                             &type[I_SEND] );
+            else
+               ierr =   MPI_Type_vector( lengthS, N, LDU, MPI_DOUBLE,
+                                         &type[I_SEND] );
+         }
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, 0, ibufS, LDU ), 1, type[I_SEND],
+                               partner, Cmsgid, comm );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[I_SEND] );
+#else
+/*
+ * In our case, LDU is N - Do not use the MPI datatype.
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, 0, ibufS, LDU ), lengthS*LDU,
+                               MPI_DOUBLE, partner, Cmsgid, comm );
+#endif
+      }
+
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+#if 0
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[I_RECV] );
+#endif
+      }
+/*
+ * Probe for column panel - forward it when available
+ */
+      if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_rollT", "MPI call failed" ); }
+/*
+ * End of HPL_rollT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_spreadN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_spreadN.c
new file mode 100644
index 000000000..202611e7f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_spreadN.c
@@ -0,0 +1,303 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_spreadN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_SIDE              SIDE,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int                        SRCDIST,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_spreadN
+( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_SIDE              SIDE;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int                        SRCDIST;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_spreadN spreads the local array containing local pieces of U, so
+ * that on exit to this function,  a piece of  U  is contained in every
+ * process row. The array IPLEN contains the number of rows of U,  that
+ * should be spread on any given process row. This function also probes
+ * for the presence of the column panel PBCST. In case of success, this
+ * panel will be forwarded.  If  PBCST  is NULL on input,  this probing
+ * mechanism will be disabled.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be spread) information.
+ *
+ * SIDE    (global input)                const enum HPL_SIDE
+ *         On entry, SIDE specifies whether the local piece of U located
+ *         in process IPMAP[SRCDIST] should be spread to the right or to
+ *         the left. This feature is used by the equilibration process.
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies  the  local number of columns of U. N
+ *         must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,IPLEN[nprow]).
+ *
+ * SRCDIST (local input)                 const int
+ *         On entry,  SRCDIST  specifies the source process that spreads
+ *         its piece of U.
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process before process IPMAP[i], with the  convention
+ *         that IPLEN[nprow] is the total number of rows. In other words
+ *         IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+ *         should be moved to process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IPMAPM1 is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Datatype              type;
+   MPI_Status                status;
+   MPI_Comm                  comm;
+   unsigned int              ip2=1, mask=1, mydist, mydist2;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, ibuf,
+                             ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow,
+                             npm1, nprow, partner;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow;    nprow = PANEL->grid->nprow;
+   comm  = PANEL->grid->col_comm;
+/*
+ * Spread U to the left
+ */
+   if( SIDE == HplLeft )
+   {
+      nprow = ( npm1 = SRCDIST ) + 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) >
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2;
+      lgth    = IPLEN[nprow];
+
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); 
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm,
+                                        &status );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+               else if( partner < nprow )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il += ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+   else
+   {
+      npm1 = ( nprow -= SRCDIST ) - 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) <
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist -= SRCDIST ); il = ip2;
+      lgth    = IPLEN[SRCDIST+nprow];
+/*
+ * Spread U to the right - offset the IPLEN, and IPMAP arrays
+ */
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            k    = il      ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] );
+            k    = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf;
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm, &status );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+               else if( partner < nprow )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il -= ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il += ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_spreadN", "MPI call failed" ); }
+/*
+ * End of HPL_spreadN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_spreadT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_spreadT.c
new file mode 100644
index 000000000..1adf93507
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_spreadT.c
@@ -0,0 +1,372 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_spreadT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_SIDE              SIDE,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int                        SRCDIST,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_spreadT
+( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_SIDE              SIDE;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int                        SRCDIST;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_spreadT spreads  the local array containing local pieces of U, so
+ * that on exit to this function,  a piece of  U  is contained in every
+ * process row.  The array  IPLEN  contains the number of columns of U,
+ * that should be spread on any given process row.  This function  also
+ * probes for the presence of  the column panel  PBCST.  If  available,
+ * this  panel will be forwarded.  If  PBCST  is  NULL  on input,  this
+ * probing mechanism will be disabled.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be spread) information.
+ *
+ * SIDE    (global input)                const enum HPL_SIDE
+ *         On entry, SIDE specifies whether the local piece of U located
+ *         in process IPMAP[SRCDIST] should be spread to the right or to
+ *         the left. This feature is used by the equilibration process.
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies the local number of rows of U. N must
+ *         be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,N).
+ *
+ * SRCDIST (local input)                 const int
+ *         On entry,  SRCDIST  specifies the source process that spreads
+ *         its piece of U.
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process before process IPMAP[i], with the  convention
+ *         that IPLEN[nprow] is the total number of rows. In other words
+ *         IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+ *         should be moved to process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IPMAPM1 is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#if 0
+   MPI_Datatype              type;
+#endif
+   MPI_Status                status;
+   MPI_Comm                  comm;
+   unsigned int              ip2=1, mask=1, mydist, mydist2;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, ibuf,
+                             ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow,
+                             npm1, nprow, partner;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow;    nprow = PANEL->grid->nprow;
+   comm  = PANEL->grid->col_comm;
+/*
+ * Spread U
+ */
+   if( SIDE == HplLeft )
+   {
+      nprow = ( npm1 = SRCDIST ) + 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) >
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2;
+      lgth    = IPLEN[nprow];
+
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] );
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm,
+                                        &status );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[npm1-partner],
+                                        Cmsgid, comm, &status );
+#endif
+               }
+               else if( partner < nprow )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[npm1-partner],
+                                        Cmsgid, comm );
+#endif
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il += ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+   else
+   {
+      npm1 = ( nprow -= SRCDIST ) - 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) <
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist -= SRCDIST ); il = ip2;
+/*
+ * Spread to the right - offset the IPLEN and IPMAP arrays
+ */
+      lgth = IPLEN[SRCDIST+nprow];
+/*
+ * Spread U
+ */
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            k    = il      ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] );
+            k    = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf;
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm, &status );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[SRCDIST+partner],
+                                        Cmsgid, comm, &status );
+#endif
+               }
+               else if( partner < nprow )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[SRCDIST+partner],
+                                        Cmsgid, comm );
+#endif
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il -= ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il += ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_spreadT", "MPI call failed" ); }
+/*
+ * End of HPL_spreadT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/intel64/Make.inc
new file mode 120000
index 000000000..3ee301793
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/intel64/Makefile
new file mode 100644
index 000000000..7898665f0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/intel64/Makefile
@@ -0,0 +1,136 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h   $(INCdir)/hpl_comm.h  \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pfact.h \
+   $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_pgeobj       = \
+   HPL_pipid.o            HPL_plindx0.o          HPL_pdlaswp00N.o       \
+   HPL_pdlaswp00T.o       HPL_perm.o             HPL_logsort.o          \
+   HPL_plindx10.o         HPL_plindx1.o          HPL_spreadN.o          \
+   HPL_spreadT.o          HPL_rollN.o            HPL_rollT.o            \
+   HPL_equil.o            HPL_pdlaswp01N.o       HPL_pdlaswp01T.o       \
+   HPL_pdupdateNN.o       HPL_pdupdateNT.o       HPL_pdupdateTN.o       \
+   HPL_pdupdateTT.o       HPL_pdtrsv.o           HPL_pdgesv0.o          \
+   HPL_pdgesvK1.o         HPL_pdgesvK2.o         HPL_pdgesv.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pgeobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pgeobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pipid.o            : ../HPL_pipid.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pipid.c
+HPL_plindx0.o          : ../HPL_plindx0.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx0.c
+HPL_pdlaswp00N.o       : ../HPL_pdlaswp00N.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp00N.c
+HPL_pdlaswp00T.o       : ../HPL_pdlaswp00T.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp00T.c
+HPL_perm.o             : ../HPL_perm.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_perm.c
+HPL_logsort.o          : ../HPL_logsort.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_logsort.c
+HPL_plindx10.o         : ../HPL_plindx10.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx10.c
+HPL_plindx1.o          : ../HPL_plindx1.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx1.c
+HPL_spreadN.o          : ../HPL_spreadN.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_spreadN.c
+HPL_spreadT.o          : ../HPL_spreadT.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_spreadT.c
+HPL_rollN.o            : ../HPL_rollN.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rollN.c
+HPL_rollT.o            : ../HPL_rollT.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rollT.c
+HPL_equil.o            : ../HPL_equil.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_equil.c
+HPL_pdlaswp01N.o       : ../HPL_pdlaswp01N.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp01N.c
+HPL_pdlaswp01T.o       : ../HPL_pdlaswp01T.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp01T.c
+HPL_pdupdateNN.o       : ../HPL_pdupdateNN.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateNN.c
+HPL_pdupdateNT.o       : ../HPL_pdupdateNT.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateNT.c
+HPL_pdupdateTN.o       : ../HPL_pdupdateTN.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateTN.c
+HPL_pdupdateTT.o       : ../HPL_pdupdateTT.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateTT.c
+HPL_pdtrsv.o           : ../HPL_pdtrsv.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdtrsv.c
+HPL_pdgesv0.o          : ../HPL_pdgesv0.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesv0.c
+HPL_pdgesvK1.o         : ../HPL_pdgesvK1.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesvK1.c
+HPL_pdgesvK2.o         : ../HPL_pdgesvK2.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesvK2.c
+HPL_pdgesv.o           : ../HPL_pdgesv.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesv.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/Makefile.am
new file mode 100644
index 000000000..452ea5f06
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/Makefile.am
@@ -0,0 +1,13 @@
+
+AM_CPPFLAGS = -I$(top_srcdir)/include
+
+xhpl_LDADD = ../src/libhpl.a
+
+bin_PROGRAMS = xhpl
+
+xhpl_SOURCES =  \
+matgen/HPL_jumpit.c matgen/HPL_rand.c matgen/HPL_setran.c matgen/HPL_xjumpm.c \
+matgen/HPL_lmul.c matgen/HPL_ladd.c \
+pmatgen/HPL_pdmatgen.c \
+ptest/HPL_pddriver.c ptest/HPL_pdinfo.c ptest/HPL_pdtest.c \
+ptimer/HPL_ptimer.c ptimer/HPL_ptimer_cputime.c ptimer/HPL_ptimer_walltime.c
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/Makefile.in
new file mode 100644
index 000000000..034564545
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/Makefile.in
@@ -0,0 +1,698 @@
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+bin_PROGRAMS = xhpl$(EXEEXT)
+subdir = testing
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+	$(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/include/hplconfig.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
+am_xhpl_OBJECTS = matgen/HPL_jumpit.$(OBJEXT) \
+	matgen/HPL_rand.$(OBJEXT) matgen/HPL_setran.$(OBJEXT) \
+	matgen/HPL_xjumpm.$(OBJEXT) matgen/HPL_lmul.$(OBJEXT) \
+	matgen/HPL_ladd.$(OBJEXT) pmatgen/HPL_pdmatgen.$(OBJEXT) \
+	ptest/HPL_pddriver.$(OBJEXT) ptest/HPL_pdinfo.$(OBJEXT) \
+	ptest/HPL_pdtest.$(OBJEXT) ptimer/HPL_ptimer.$(OBJEXT) \
+	ptimer/HPL_ptimer_cputime.$(OBJEXT) \
+	ptimer/HPL_ptimer_walltime.$(OBJEXT)
+xhpl_OBJECTS = $(am_xhpl_OBJECTS)
+xhpl_DEPENDENCIES = ../src/libhpl.a
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__maybe_remake_depfiles = depfiles
+am__depfiles_remade = matgen/$(DEPDIR)/HPL_jumpit.Po \
+	matgen/$(DEPDIR)/HPL_ladd.Po matgen/$(DEPDIR)/HPL_lmul.Po \
+	matgen/$(DEPDIR)/HPL_rand.Po matgen/$(DEPDIR)/HPL_setran.Po \
+	matgen/$(DEPDIR)/HPL_xjumpm.Po \
+	pmatgen/$(DEPDIR)/HPL_pdmatgen.Po \
+	ptest/$(DEPDIR)/HPL_pddriver.Po ptest/$(DEPDIR)/HPL_pdinfo.Po \
+	ptest/$(DEPDIR)/HPL_pdtest.Po ptimer/$(DEPDIR)/HPL_ptimer.Po \
+	ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po \
+	ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(xhpl_SOURCES)
+DIST_SOURCES = $(xhpl_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BLAS_LIBS = @BLAS_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build_alias = @build_alias@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host_alias = @host_alias@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(top_srcdir)/include
+xhpl_LDADD = ../src/libhpl.a
+xhpl_SOURCES = \
+matgen/HPL_jumpit.c matgen/HPL_rand.c matgen/HPL_setran.c matgen/HPL_xjumpm.c \
+matgen/HPL_lmul.c matgen/HPL_ladd.c \
+pmatgen/HPL_pdmatgen.c \
+ptest/HPL_pddriver.c ptest/HPL_pdinfo.c ptest/HPL_pdtest.c \
+ptimer/HPL_ptimer.c ptimer/HPL_ptimer_cputime.c ptimer/HPL_ptimer_walltime.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu testing/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu testing/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p \
+	  ; then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' \
+	    -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	      echo " $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	      $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' \
+	`; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
+matgen/$(am__dirstamp):
+	@$(MKDIR_P) matgen
+	@: > matgen/$(am__dirstamp)
+matgen/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) matgen/$(DEPDIR)
+	@: > matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_jumpit.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_rand.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_setran.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_xjumpm.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_lmul.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_ladd.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+pmatgen/$(am__dirstamp):
+	@$(MKDIR_P) pmatgen
+	@: > pmatgen/$(am__dirstamp)
+pmatgen/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) pmatgen/$(DEPDIR)
+	@: > pmatgen/$(DEPDIR)/$(am__dirstamp)
+pmatgen/HPL_pdmatgen.$(OBJEXT): pmatgen/$(am__dirstamp) \
+	pmatgen/$(DEPDIR)/$(am__dirstamp)
+ptest/$(am__dirstamp):
+	@$(MKDIR_P) ptest
+	@: > ptest/$(am__dirstamp)
+ptest/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) ptest/$(DEPDIR)
+	@: > ptest/$(DEPDIR)/$(am__dirstamp)
+ptest/HPL_pddriver.$(OBJEXT): ptest/$(am__dirstamp) \
+	ptest/$(DEPDIR)/$(am__dirstamp)
+ptest/HPL_pdinfo.$(OBJEXT): ptest/$(am__dirstamp) \
+	ptest/$(DEPDIR)/$(am__dirstamp)
+ptest/HPL_pdtest.$(OBJEXT): ptest/$(am__dirstamp) \
+	ptest/$(DEPDIR)/$(am__dirstamp)
+ptimer/$(am__dirstamp):
+	@$(MKDIR_P) ptimer
+	@: > ptimer/$(am__dirstamp)
+ptimer/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) ptimer/$(DEPDIR)
+	@: > ptimer/$(DEPDIR)/$(am__dirstamp)
+ptimer/HPL_ptimer.$(OBJEXT): ptimer/$(am__dirstamp) \
+	ptimer/$(DEPDIR)/$(am__dirstamp)
+ptimer/HPL_ptimer_cputime.$(OBJEXT): ptimer/$(am__dirstamp) \
+	ptimer/$(DEPDIR)/$(am__dirstamp)
+ptimer/HPL_ptimer_walltime.$(OBJEXT): ptimer/$(am__dirstamp) \
+	ptimer/$(DEPDIR)/$(am__dirstamp)
+
+xhpl$(EXEEXT): $(xhpl_OBJECTS) $(xhpl_DEPENDENCIES) $(EXTRA_xhpl_DEPENDENCIES) 
+	@rm -f xhpl$(EXEEXT)
+	$(AM_V_CCLD)$(LINK) $(xhpl_OBJECTS) $(xhpl_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f matgen/*.$(OBJEXT)
+	-rm -f pmatgen/*.$(OBJEXT)
+	-rm -f ptest/*.$(OBJEXT)
+	-rm -f ptimer/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_jumpit.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_ladd.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_lmul.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_rand.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_setran.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_xjumpm.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pmatgen/$(DEPDIR)/HPL_pdmatgen.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pddriver.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pdinfo.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pdtest.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po@am__quote@ # am--include-marker
+
+$(am__depfiles_remade):
+	@$(MKDIR_P) $(@D)
+	@echo '# dummy' >$@-t && $(am__mv) $@-t $@
+
+am--depfiles: $(am__depfiles_remade)
+
+.c.o:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(BUILT_SOURCES)
+	$(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f matgen/$(DEPDIR)/$(am__dirstamp)
+	-rm -f matgen/$(am__dirstamp)
+	-rm -f pmatgen/$(DEPDIR)/$(am__dirstamp)
+	-rm -f pmatgen/$(am__dirstamp)
+	-rm -f ptest/$(DEPDIR)/$(am__dirstamp)
+	-rm -f ptest/$(am__dirstamp)
+	-rm -f ptimer/$(DEPDIR)/$(am__dirstamp)
+	-rm -f ptimer/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
+
+distclean: distclean-am
+		-rm -f matgen/$(DEPDIR)/HPL_jumpit.Po
+	-rm -f matgen/$(DEPDIR)/HPL_ladd.Po
+	-rm -f matgen/$(DEPDIR)/HPL_lmul.Po
+	-rm -f matgen/$(DEPDIR)/HPL_rand.Po
+	-rm -f matgen/$(DEPDIR)/HPL_setran.Po
+	-rm -f matgen/$(DEPDIR)/HPL_xjumpm.Po
+	-rm -f pmatgen/$(DEPDIR)/HPL_pdmatgen.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pddriver.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pdinfo.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pdtest.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+		-rm -f matgen/$(DEPDIR)/HPL_jumpit.Po
+	-rm -f matgen/$(DEPDIR)/HPL_ladd.Po
+	-rm -f matgen/$(DEPDIR)/HPL_lmul.Po
+	-rm -f matgen/$(DEPDIR)/HPL_rand.Po
+	-rm -f matgen/$(DEPDIR)/HPL_setran.Po
+	-rm -f matgen/$(DEPDIR)/HPL_xjumpm.Po
+	-rm -f pmatgen/$(DEPDIR)/HPL_pdmatgen.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pddriver.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pdinfo.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pdtest.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
+	clean-binPROGRAMS clean-generic cscopelist-am ctags ctags-am \
+	distclean distclean-compile distclean-generic distclean-tags \
+	distdir dvi dvi-am html html-am info info-am install \
+	install-am install-binPROGRAMS install-data install-data-am \
+	install-dvi install-dvi-am install-exec install-exec-am \
+	install-html install-html-am install-info install-info-am \
+	install-man install-pdf install-pdf-am install-ps \
+	install-ps-am install-strip installcheck installcheck-am \
+	installdirs maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-compile mostlyclean-generic pdf pdf-am \
+	ps ps-am tags tags-am uninstall uninstall-am \
+	uninstall-binPROGRAMS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_dmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_dmatgen.c
new file mode 100644
index 000000000..c14ef0fd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_dmatgen.c
@@ -0,0 +1,134 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dmatgen
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int                        ISEED
+)
+#else
+void HPL_dmatgen
+( M, N, A, LDA, ISEED )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int                        ISEED;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dmatgen generates (or regenerates) a random matrix A.
+ *  
+ * The  pseudo-random  generator uses the linear congruential algorithm:
+ * X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+ * Programming, Knuth 1973, Vol. 2.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (input)                       const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (input)                       const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * A       (output)                      double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         this  array  contains   the   coefficients  of  the  randomly
+ *         generated matrix.
+ *
+ * LDA     (input)                       const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * ISEED   (input)                       const int
+ *         On entry, ISEED  specifies  the  seed  number to generate the
+ *         matrix A. ISEED must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        iadd[2], ia1[2], ic1[2], iran1[2],
+                              jseed[2], mult[2];
+   int                        i, incA = LDA - M, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+/*
+ * Initialize the random sequence
+ */
+   mult [0] = HPL_MULT0; mult [1] = HPL_MULT1;
+   iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1;
+   jseed[0] = ISEED;     jseed[1] = 0;
+
+   HPL_xjumpm( 1, mult, iadd, jseed, iran1, ia1, ic1 );
+   HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 );
+/*
+ * Generate an M by N matrix
+ */
+   for( j = 0; j < N; A += incA, j++ )
+      for( i = 0; i < M; A++, i++ ) *A = HPL_rand();
+/*
+ * End of HPL_dmatgen
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_jumpit.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_jumpit.c
new file mode 100644
index 000000000..4d4dc4db5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_jumpit.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_jumpit
+(
+   int *                            MULT,
+   int *                            IADD,
+   int *                            IRANN,
+   int *                            IRANM
+)
+#else
+void HPL_jumpit
+( MULT, IADD, IRANN, IRANM )
+   int *                            MULT;
+   int *                            IADD;
+   int *                            IRANN;
+   int *                            IRANM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_jumpit jumps in the random sequence from the number  X(n) encoded
+ * in IRANN to the number  X(m)  encoded in  IRANM using the constants A
+ * and C encoded in MULT and IADD: X(m) = A * X(n) + C.  The constants A
+ * and C obviously depend on m and n,  see  the function  HPL_xjumpm  in
+ * order to initialize them.
+ *
+ * Arguments
+ * =========
+ *
+ * MULT    (local input)                 int *
+ *         On entry, MULT is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of the constant A.
+ *
+ * IADD    (local input)                 int *
+ *         On entry, IADD is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of the constant C.
+ *
+ * IRANN   (local input)                 int *
+ *         On entry,  IRANN  is an array of dimension 2,  that contains 
+ *         the 16-lower and 15-higher bits of the encoding of X(n).
+ *
+ * IRANM   (local output)                int *
+ *         On entry,  IRANM  is an array of dimension 2.  On exit, this
+ *         array contains respectively the 16-lower and  15-higher bits
+ *         of the encoding of X(m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                          j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_lmul( IRANN, MULT, j );              /* j     = IRANN * MULT;  */
+   HPL_ladd( j, IADD, IRANM );              /* IRANM = j     + IADD;  */
+   HPL_setran( 0, IRANM );                  /* irand = IRANM          */
+/*
+ * End of HPL_jumpit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_ladd.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_ladd.c
new file mode 100644
index 000000000..0d4e4c08c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_ladd.c
@@ -0,0 +1,126 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_ladd
+(
+   int *                            J,
+   int *                            K,
+   int *                            I
+)
+#else
+void HPL_ladd
+( J, K, I )
+   int *                            J;
+   int *                            K;
+   int *                            I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ladd adds  without carry two long positive integers  K and J and
+ * puts the result into I. The long integers  I, J, K are encoded on 64
+ * bits using an array of 2 integers.  The 32-lower bits  are stored in
+ * the  first  entry  of each array,  the 32-higher bits  in the second
+ * entry.
+ *
+ * Arguments
+ * =========
+ *
+ * J       (local input)                 int *
+ *         On entry, J is an integer array of dimension 2 containing the
+ *         encoded long integer J.
+ *
+ * K       (local input)                 int *
+ *         On entry, K is an integer array of dimension 2 containing the
+ *         encoded long integer K.
+ *
+ * I       (local output)                int *
+ *         On entry, I is an integer array of dimension 2. On exit, this
+ *         array contains the encoded long integer result.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   unsigned int        itmp0, itmp1;
+   unsigned int        ktmp0 = K[0] & 65535, ktmp1 = (unsigned)K[0] >> 16;
+   unsigned int        ktmp2 = K[1] & 65535, ktmp3 = (unsigned)K[1] >> 16;
+   unsigned int        jtmp0 = J[0] & 65535, jtmp1 = (unsigned)J[0] >> 16;
+   unsigned int        jtmp2 = J[1] & 65535, jtmp3 = (unsigned)J[1] >> 16;
+
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ *    K[1] K[0] K  I[0]  = (K[0]+J[0]) % 2^32
+ *    XXXX XXXX    carry = (K[0]+J[0]) / 2^32
+ *
+ * +  J[1] J[0] J  I[1] = K[1] + J[1] + carry
+ *    XXXX XXXX    I[1] = I[1] % 2^32
+ *    -------------
+ *    I[1] I[0]
+ *    0XXX XXXX I
+ */
+   itmp0 = ktmp0 + jtmp0;
+   itmp1 = itmp0 >> 16;         I[0] = itmp0 - (itmp1 << 16 );
+   itmp1 += ktmp1 + jtmp1;      I[0] |= (itmp1 & 65535) << 16;
+   itmp0 = (itmp1 >> 16) + ktmp2 + jtmp2;
+   I[1] = itmp0 - ((itmp0 >> 16 ) << 16);
+   itmp1 = (itmp0 >> 16) + ktmp3 + jtmp3;
+   I[1] |= (itmp1 & 65535) << 16;
+/*
+ * End of HPL_ladd
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_lmul.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_lmul.c
new file mode 100644
index 000000000..254b192f6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_lmul.c
@@ -0,0 +1,131 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_lmul
+(
+   int *                            K,
+   int *                            J,
+   int *                            I
+)
+#else
+void HPL_lmul
+( K, J, I )
+   int *                            K;
+   int *                            J;
+   int *                            I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_lmul multiplies  without carry two long positive integers K and J
+ * and puts the result into I. The long integers  I, J, K are encoded on
+ * 64 bits using an array of 2 integers. The 32-lower bits are stored in
+ * the first entry of each array, the 32-higher bits in the second entry
+ * of each array. For efficiency purposes, the  intrisic modulo function
+ * is inlined.
+ *
+ * Arguments
+ * =========
+ *
+ * K       (local input)                 int *
+ *         On entry, K is an integer array of dimension 2 containing the
+ *         encoded long integer K.
+ *
+ * J       (local input)                 int *
+ *         On entry, J is an integer array of dimension 2 containing the
+ *         encoded long integer J.
+ *
+ * I       (local output)                int *
+ *         On entry, I is an integer array of dimension 2. On exit, this
+ *         array contains the encoded long integer result.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        r, c;
+   unsigned int               kk[4], jj[4], res[5];
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Addition is done with 16 bits at a time. Multiplying two 16-bit
+ * integers yields a 32-bit result. The lower 16-bits of the result
+ * are kept in I, and the higher 16-bits are carried over to the
+ * next multiplication.
+ */
+   for (c = 0; c < 2; ++c) {
+     kk[2*c] = K[c] & 65535;
+     kk[2*c+1] = ((unsigned)K[c] >> 16) & 65535;
+     jj[2*c] = J[c] & 65535;
+     jj[2*c+1] = ((unsigned)J[c] >> 16) & 65535;
+   }
+
+   res[0] = 0;
+   for (c = 0; c < 4; ++c) {
+     res[c+1] = (res[c] >> 16) & 65535;
+     res[c] &= 65535;
+     for (r = 0; r < c+1; ++r) {
+       res[c] = kk[r] * jj[c-r] + (res[c] & 65535);
+       res[c+1] += (res[c] >> 16) & 65535;
+     }
+   }
+
+   for (c = 0; c < 2; ++c)
+     I[c] = (int)(((res[2*c+1] & 65535) << 16) | (res[2*c] & 65535));
+/*
+ * End of HPL_lmul
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_rand.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_rand.c
new file mode 100644
index 000000000..fe4e12f5e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_rand.c
@@ -0,0 +1,94 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_rand( void )
+#else
+double HPL_rand()
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rand generates  the next number  in the  random  sequence.  This
+ * function  ensures  that this number lies in the interval (-0.5, 0.5].
+ *  
+ * The static array irand contains the information (2 integers) required
+ * to generate the  next number  in the sequence  X(n).  This  number is
+ * computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5,  where the
+ * constant d is the largest 64 bit positive unsigned integer. The array
+ * irand is then  updated  for the generation of the next number  X(n+1)
+ * in  the  random   sequence  as   follows  X(n+1) = a * X(n) + c.  The
+ * constants a and c should have been preliminarily stored in the arrays
+ * ias and ics as 2 pairs of integers.  The initialization of  ias,  ics
+ * and  irand  is performed by the function HPL_setran.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_setran( 3, j );
+/*
+ * return number between -0.5 and 0.5
+ */
+   return( HPL_HALF -
+           (((j[0] & 65535) + ((unsigned)j[0] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF +
+           (j[1] & 65535) + ((unsigned)j[1] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF );
+/*
+ * End of HPL_rand
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_setran.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_setran.c
new file mode 100644
index 000000000..1a3ca73aa
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_setran.c
@@ -0,0 +1,115 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int       ias[2], ics[2], irand[2];
+
+#ifdef STDC_HEADERS
+void HPL_setran
+(
+   const int                        OPTION,
+   int *                            IRAN
+)
+#else
+void HPL_setran
+( OPTION, IRAN )
+   const int                        OPTION;
+   int *                            IRAN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_setran initializes  the random generator with the encoding of the
+ * first number X(0) in the sequence,  and the constants a and c used to
+ * compute the next element in the sequence: X(n+1) = a*X(n) + c.  X(0),
+ * a and c are stored in the static variables  irand, ias and ics.  When
+ * OPTION is 0 (resp. 1 and 2),  irand  (resp. ia and ic)  is set to the
+ * values of the input array IRAN.  When OPTION is 3, IRAN is set to the
+ * current value of irand, and irand is then incremented.
+ *
+ * Arguments
+ * =========
+ *
+ * OPTION  (local input)                 const int
+ *         On entry, OPTION  is an integer that specifies the operations
+ *         to be performed on the random generator as specified above.
+ *
+ * IRAN    (local input/output)          int *
+ *         On entry,  IRAN is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of a random number.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   if(      OPTION == 3 )
+   {                                       /* return current value */
+      IRAN[0] = irand[0]; IRAN[1] = irand[1];
+      HPL_lmul( irand, ias, j );         /* j     = irand * ias;   */
+      HPL_ladd( j, ics, irand );         /* irand = j     + ics;   */
+   } 
+   else if( OPTION == 0 ) { irand[0] = IRAN[0]; irand[1] = IRAN[1]; }
+   else if( OPTION == 1 ) { ias  [0] = IRAN[0]; ias  [1] = IRAN[1]; }
+   else if( OPTION == 2 ) { ics  [0] = IRAN[0]; ics  [1] = IRAN[1]; }
+/*
+ * End of HPL_setran
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_xjumpm.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_xjumpm.c
new file mode 100644
index 000000000..ae70bbc16
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_xjumpm.c
@@ -0,0 +1,158 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_xjumpm
+(
+   const int                        JUMPM,
+   int *                            MULT,
+   int *                            IADD,
+   int *                            IRANN,
+   int *                            IRANM,
+   int *                            IAM,
+   int *                            ICM
+)
+#else
+void HPL_xjumpm
+( JUMPM, MULT, IADD, IRANN, IRANM, IAM, ICM )
+   const int                        JUMPM;
+   int *                            MULT;
+   int *                            IADD;
+   int *                            IRANN;
+   int *                            IRANM;
+   int *                            IAM;
+   int *                            ICM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_xjumpm computes  the constants  A and C  to jump JUMPM numbers in
+ * the random sequence: X(n+JUMPM) = A*X(n)+C.  The constants encoded in
+ * MULT and IADD  specify  how to jump from one entry in the sequence to
+ * the next.
+ *
+ * Arguments
+ * =========
+ *
+ * JUMPM   (local input)                 const int
+ *         On entry,  JUMPM  specifies  the  number  of entries  in  the
+ *         sequence to jump over. When JUMPM is less or equal than zero,
+ *         A and C are not computed, IRANM is set to IRANN corresponding
+ *         to a jump of size zero.
+ *
+ * MULT    (local input)                 int *
+ *         On entry, MULT is an array of dimension 2,  that contains the
+ *         16-lower  and 15-higher bits of the constant  a  to jump from
+ *         X(n) to X(n+1) = a*X(n) + c in the random sequence.
+ *
+ * IADD    (local input)                 int *
+ *         On entry, IADD is an array of dimension 2,  that contains the
+ *         16-lower  and 15-higher bits of the constant  c  to jump from
+ *         X(n) to X(n+1) = a*X(n) + c in the random sequence.
+ *
+ * IRANN   (local input)                 int *
+ *         On entry, IRANN is an array of dimension 2. that contains the
+ *         16-lower and 15-higher bits of the encoding of X(n).
+ *
+ * IRANM   (local output)                int *
+ *         On entry,  IRANM  is an array of dimension 2.   On exit, this
+ *         array  contains respectively  the 16-lower and 15-higher bits
+ *         of the encoding of X(n+JUMPM).
+ *
+ * IAM     (local output)                int *
+ *         On entry, IAM is an array of dimension 2. On exit, when JUMPM
+ *         is  greater  than  zero,  this  array  contains  the  encoded
+ *         constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+ *         sequence. IAM(0:1)  contains  respectively  the  16-lower and
+ *         15-higher  bits  of this constant  A. When  JUMPM  is less or
+ *         equal than zero, this array is not referenced.
+ *
+ * ICM     (local output)                int *
+ *         On entry, ICM is an array of dimension 2. On exit, when JUMPM
+ *         is  greater  than  zero,  this  array  contains  the  encoded
+ *         constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+ *         sequence. ICM(0:1)  contains  respectively  the  16-lower and
+ *         15-higher  bits  of this constant  C. When  JUMPM  is less or
+ *         equal than zero, this array is not referenced.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2], k;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( JUMPM > 0 )
+   {
+      IAM[0] = MULT[0]; IAM[1] = MULT[1];   /* IAM   = MULT;          */
+      ICM[0] = IADD[0]; ICM[1] = IADD[1];   /* ICM   = IADD;          */
+      for( k = 1; k <= JUMPM-1; k++ )
+      {
+         HPL_lmul( IAM, MULT, j );          /* j     = IAM   * MULT;  */
+         IAM[0] = j[0]; IAM[1] = j[1];      /* IAM   = j;             */
+         HPL_lmul( ICM, MULT, j );          /* j     = ICM   * MULT;  */
+         HPL_ladd( IADD, j, ICM );          /* ICM   = IADD  + j;     */
+      }
+      HPL_lmul( IRANN, IAM, j );            /* j     = IRANN * IAM;   */
+      HPL_ladd( j, ICM, IRANM );            /* IRANM = j     + ICM;   */
+   }
+   else
+   {                                        /* IRANM = IRANN          */
+      IRANM[0] = IRANN[0]; IRANM[1] = IRANN[1];
+   }
+/*
+ * End of HPL_xjumpm
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/intel64/Make.inc
new file mode 120000
index 000000000..3ee301793
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/intel64/Makefile
new file mode 100644
index 000000000..f027fbc06
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/intel64/Makefile
@@ -0,0 +1,95 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h  \
+   $(INCdir)/hpl_matgen.h
+#
+## Object files ########################################################
+#
+HPL_matobj       = \
+   HPL_dmatgen.o          HPL_ladd.o             HPL_lmul.o             \
+   HPL_xjumpm.o           HPL_jumpit.o           HPL_rand.o             \
+   HPL_setran.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_matobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_matobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dmatgen.o          : ../HPL_dmatgen.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dmatgen.c
+HPL_ladd.o             : ../HPL_ladd.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ladd.c
+HPL_lmul.o             : ../HPL_lmul.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_lmul.c
+HPL_xjumpm.o           : ../HPL_xjumpm.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_xjumpm.c
+HPL_jumpit.o           : ../HPL_jumpit.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_jumpit.c
+HPL_rand.o             : ../HPL_rand.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rand.c
+HPL_setran.o           : ../HPL_setran.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_setran.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c
new file mode 100644
index 000000000..2d129c863
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c
@@ -0,0 +1,198 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdmatgen
+(
+   const HPL_T_grid *               GRID,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   double *                         A,
+   const int                        LDA,
+   const int                        ISEED
+)
+#else
+void HPL_pdmatgen
+( GRID, M, N, NB, A, LDA, ISEED )
+   const HPL_T_grid *               GRID;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   double *                         A;
+   const int                        LDA;
+   const int                        ISEED;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdmatgen generates (or regenerates) a parallel random matrix A.
+ *  
+ * The  pseudo-random  generator uses the linear congruential algorithm:
+ * X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+ * Programming, Knuth 1973, Vol. 2.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * A       (local output)                double *
+ *         On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+ *         On exit, this array contains the coefficients of the randomly
+ *         generated matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * ISEED   (global input)                const int
+ *         On entry, ISEED  specifies  the  seed  number to generate the
+ *         matrix A. ISEED must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        iadd [2], ia1  [2], ia2  [2], ia3  [2],
+                              ia4  [2], ia5  [2], ib1  [2], ib2  [2],
+                              ib3  [2], ic1  [2], ic2  [2], ic3  [2],
+                              ic4  [2], ic5  [2], iran1[2], iran2[2],
+                              iran3[2], iran4[2], itmp1[2], itmp2[2],
+                              itmp3[2], jseed[2], mult [2];
+   int                        ib, iblk, ik, jb, jblk, jk, jump1, jump2,
+                              jump3, jump4, jump5, jump6, jump7, lmb,
+                              lnb, mblks, mp, mycol, myrow, nblks,
+                              npcol, nprow, nq;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+
+   mult [0] = HPL_MULT0; mult [1] = HPL_MULT1;
+   iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1;
+   jseed[0] = ISEED;     jseed[1] = 0;
+/*
+ * Generate an M by N matrix starting in process (0,0)
+ */
+   Mnumroc( mp, M, NB, NB, myrow, 0, nprow );
+   Mnumroc( nq, N, NB, NB, mycol, 0, npcol );
+
+   if( ( mp <= 0 ) || ( nq <= 0 ) ) return;
+/*
+ * Local number of blocks and size of the last one
+ */
+   mblks = ( mp + NB - 1 ) / NB; lmb = mp - ( ( mp - 1 ) / NB ) * NB;
+   nblks = ( nq + NB - 1 ) / NB; lnb = nq - ( ( nq - 1 ) / NB ) * NB;
+/*
+ * Compute multiplier/adder for various jumps in random sequence
+ */
+   jump1 = 1;  jump2 = nprow * NB; jump3 = M; jump4 = npcol * NB;
+   jump5 = NB; jump6 = mycol;      jump7 = myrow * NB;
+
+   HPL_xjumpm( jump1, mult, iadd, jseed, iran1, ia1,   ic1   );
+   HPL_xjumpm( jump2, mult, iadd, iran1, itmp1, ia2,   ic2   );
+   HPL_xjumpm( jump3, mult, iadd, iran1, itmp1, ia3,   ic3   );
+   HPL_xjumpm( jump4, ia3,  ic3,  iran1, itmp1, ia4,   ic4   );
+   HPL_xjumpm( jump5, ia3,  ic3,  iran1, itmp1, ia5,   ic5   );
+   HPL_xjumpm( jump6, ia5,  ic5,  iran1, itmp3, itmp1, itmp2 );
+   HPL_xjumpm( jump7, mult, iadd, itmp3, iran1, itmp1, itmp2 );
+   HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 );
+/*
+ * Save value of first number in sequence
+ */
+   ib1[0] = iran1[0]; ib1[1] = iran1[1];
+   ib2[0] = iran1[0]; ib2[1] = iran1[1];
+   ib3[0] = iran1[0]; ib3[1] = iran1[1];
+
+   for( jblk = 0; jblk < nblks; jblk++ )
+   {
+      jb = ( jblk == nblks - 1 ? lnb : NB );
+      for( jk = 0; jk < jb; jk++ )
+      {
+         for( iblk = 0; iblk < mblks; iblk++ )
+         {
+            ib = ( iblk == mblks - 1 ? lmb : NB );
+            for( ik = 0; ik < ib; A++, ik++ ) *A = HPL_rand();
+            HPL_jumpit( ia2, ic2, ib1, iran2 );
+            ib1[0] = iran2[0]; ib1[1] = iran2[1];
+         }
+         A += LDA - mp;
+         HPL_jumpit( ia3, ic3, ib2, iran3 );
+         ib1[0] = iran3[0]; ib1[1] = iran3[1];
+         ib2[0] = iran3[0]; ib2[1] = iran3[1];
+      }
+      HPL_jumpit( ia4, ic4, ib3, iran4 );
+      ib1[0] = iran4[0]; ib1[1] = iran4[1];
+      ib2[0] = iran4[0]; ib2[1] = iran4[1];
+      ib3[0] = iran4[0]; ib3[1] = iran4[1];
+   }
+/*
+ * End of HPL_pdmatgen
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/intel64/Make.inc
new file mode 120000
index 000000000..3ee301793
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/intel64/Makefile
new file mode 100644
index 000000000..bf33fcd7b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/intel64/Makefile
@@ -0,0 +1,81 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_matgen.h $(INCdir)/hpl_pmisc.h \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pmatgen.h
+#
+## Object files ########################################################
+#
+HPL_pmaobj       = \
+   HPL_pdmatgen.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pmaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pmaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pdmatgen.o         : ../HPL_pdmatgen.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdmatgen.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL.dat b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL.dat
new file mode 100644
index 000000000..47aee883e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL.dat
@@ -0,0 +1,31 @@
+HPLinpack benchmark input file
+Innovative Computing Laboratory, University of Tennessee
+HPL.out      output file name (if any)
+6            device out (6=stdout,7=stderr,file)
+4            # of problems sizes (N)
+29 30 34 35  Ns
+4            # of NBs
+1 2 3 4      NBs
+0            PMAP process mapping (0=Row-,1=Column-major)
+3            # of process grids (P x Q)
+2 1 4        Ps
+2 4 1        Qs
+16.0         threshold
+3            # of panel fact
+0 1 2        PFACTs (0=left, 1=Crout, 2=Right)
+2            # of recursive stopping criterium
+2 4          NBMINs (>= 1)
+1            # of panels in recursion
+2            NDIVs
+3            # of recursive panel fact.
+0 1 2        RFACTs (0=left, 1=Crout, 2=Right)
+1            # of broadcast
+0            BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+1            # of lookahead depth
+0            DEPTHs (>=0)
+2            SWAP (0=bin-exch,1=long,2=mix)
+64           swapping threshold
+0            L1 in (0=transposed,1=no-transposed) form
+0            U  in (0=transposed,1=no-transposed) form
+1            Equilibration (0=no,1=yes)
+8            memory alignment in double (> 0)
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pddriver.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pddriver.c
new file mode 100644
index 000000000..5e4050f48
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pddriver.c
@@ -0,0 +1,293 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int main
+(
+   int                        ARGC,
+   char                       * * ARGV
+)
+#else
+int main( ARGC, ARGV )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        ARGC;
+/*
+ * .. Array Arguments ..
+ */
+   char                       * * ARGV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * main is the main driver program for testing the HPL routines.
+ * This  program is  driven  by  a short data file named  "HPL.dat".
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        nval  [HPL_MAX_PARAM],
+                              nbval [HPL_MAX_PARAM],
+                              pval  [HPL_MAX_PARAM],
+                              qval  [HPL_MAX_PARAM],
+                              nbmval[HPL_MAX_PARAM],
+                              ndvval[HPL_MAX_PARAM],
+                              ndhval[HPL_MAX_PARAM];
+
+   HPL_T_FACT                 pfaval[HPL_MAX_PARAM],
+                              rfaval[HPL_MAX_PARAM];
+
+   HPL_T_TOP                  topval[HPL_MAX_PARAM];
+
+   HPL_T_grid                 grid;
+   HPL_T_palg                 algo;
+   HPL_T_test                 test;
+   int                        L1notran, Unotran, align, equil, in, inb,
+                              inbm, indh, indv, ipfa, ipq, irfa, itop,
+                              mycol, myrow, ns, nbs, nbms, ndhs, ndvs,
+                              npcol, npfs, npqs, nprow, nrfs, ntps, 
+                              rank, size, tswap;
+   HPL_T_ORDER                pmapping;
+   HPL_T_FACT                 rpfa;
+   HPL_T_SWAP                 fswap;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Init( &ARGC, &ARGV );
+#ifdef HPL_CALL_VSIPL
+   vsip_init((void*)0);
+#endif
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+   MPI_Comm_size( MPI_COMM_WORLD, &size );
+/*
+ * Read and check validity of test parameters from input file
+ *
+ * HPL Version 1.0, Linpack benchmark input file
+ * Your message here
+ * HPL.out      output file name (if any)
+ * 6            device out (6=stdout,7=stderr,file)
+ * 4            # of problems sizes (N)
+ * 29 30 34 35  Ns
+ * 4            # of NBs
+ * 1 2 3 4      NBs
+ * 0            PMAP process mapping (0=Row-,1=Column-major)
+ * 3            # of process grids (P x Q)
+ * 2 1 4        Ps
+ * 2 4 1        Qs
+ * 16.0         threshold
+ * 3            # of panel fact
+ * 0 1 2        PFACTs (0=left, 1=Crout, 2=Right)
+ * 2            # of recursive stopping criterium
+ * 2 4          NBMINs (>= 1)
+ * 1            # of panels in recursion
+ * 2            NDIVs
+ * 3            # of recursive panel fact.
+ * 0 1 2        RFACTs (0=left, 1=Crout, 2=Right)
+ * 1            # of broadcast
+ * 0            BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+ * 1            # of lookahead depth
+ * 0            DEPTHs (>=0)
+ * 2            SWAP (0=bin-exch,1=long,2=mix)
+ * 4            swapping threshold
+ * 0            L1 in (0=transposed,1=no-transposed) form
+ * 0            U  in (0=transposed,1=no-transposed) form
+ * 1            Equilibration (0=no,1=yes)
+ * 8            memory alignment in double (> 0)
+ */
+   HPL_pdinfo( &test, &ns, nval, &nbs, nbval, &pmapping, &npqs, pval, qval,
+               &npfs, pfaval, &nbms, nbmval, &ndvs, ndvval, &nrfs, rfaval,
+               &ntps, topval, &ndhs, ndhval, &fswap, &tswap, &L1notran,
+               &Unotran, &equil, &align );
+/*
+ * Loop over different process grids - Define process grid. Go to bottom
+ * of process grid loop if this case does not use my process.
+ */
+   for( ipq = 0; ipq < npqs; ipq++ )
+   {
+      (void) HPL_grid_init( MPI_COMM_WORLD, pmapping, pval[ipq], qval[ipq],
+                            &grid );
+      (void) HPL_grid_info( &grid, &nprow, &npcol, &myrow, &mycol );
+
+      if( ( myrow < 0 ) || ( myrow >= nprow ) ||
+          ( mycol < 0 ) || ( mycol >= npcol ) ) goto label_end_of_npqs;
+
+      for( in = 0; in < ns; in++ )
+      {                            /* Loop over various problem sizes */
+       for( inb = 0; inb < nbs; inb++ )
+       {                        /* Loop over various blocking factors */
+        for( indh = 0; indh < ndhs; indh++ )
+        {                       /* Loop over various lookahead depths */
+         for( itop = 0; itop < ntps; itop++ )
+         {                  /* Loop over various broadcast topologies */
+          for( irfa = 0; irfa < nrfs; irfa++ )
+          {             /* Loop over various recursive factorizations */
+           for( ipfa = 0; ipfa < npfs; ipfa++ )
+           {                /* Loop over various panel factorizations */
+            for( inbm = 0; inbm < nbms; inbm++ )
+            {        /* Loop over various recursive stopping criteria */
+             for( indv = 0; indv < ndvs; indv++ )
+             {          /* Loop over various # of panels in recursion */
+/*
+ * Set up the algorithm parameters
+ */
+              algo.btopo = topval[itop]; algo.depth = ndhval[indh];
+              algo.nbmin = nbmval[inbm]; algo.nbdiv = ndvval[indv];
+
+              algo.pfact = rpfa = pfaval[ipfa];
+
+              if( L1notran != 0 )
+              {
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllN;
+                 else if( rpfa == HPL_CROUT   ) algo.pffun = HPL_pdpancrN;
+                 else                           algo.pffun = HPL_pdpanrlN;
+
+                 algo.rfact = rpfa = rfaval[irfa];
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllN;
+                 else if( rpfa == HPL_CROUT   ) algo.rffun = HPL_pdrpancrN;
+                 else                           algo.rffun = HPL_pdrpanrlN;
+
+                 if( Unotran != 0 ) algo.upfun = HPL_pdupdateNN;
+                 else               algo.upfun = HPL_pdupdateNT;
+              }
+              else
+              {
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllT;
+                 else if( rpfa == HPL_CROUT   ) algo.pffun = HPL_pdpancrT;
+                 else                           algo.pffun = HPL_pdpanrlT;
+
+                 algo.rfact = rpfa = rfaval[irfa];
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllT;
+                 else if( rpfa == HPL_CROUT   ) algo.rffun = HPL_pdrpancrT;
+                 else                           algo.rffun = HPL_pdrpanrlT;
+
+                 if( Unotran != 0 ) algo.upfun = HPL_pdupdateTN;
+                 else               algo.upfun = HPL_pdupdateTT;
+              }
+
+              algo.fswap = fswap; algo.fsthr = tswap;
+              algo.equil = equil; algo.align = align;
+
+              HPL_pdtest( &test, &grid, &algo, nval[in], nbval[inb] );
+
+             }
+            }
+           }
+          }
+         }
+        }
+       }
+      }
+      (void) HPL_grid_exit( &grid );
+label_end_of_npqs: ;
+   }
+/*
+ * Print ending messages, close output file, exit.
+ */
+   if( rank == 0 )
+   {
+      test.ktest = test.kpass + test.kfail + test.kskip;
+#ifndef HPL_DETAILED_TIMING
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+#else
+      if( test.thrsh > HPL_rzero )
+         HPL_fprintf( test.outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+#endif
+
+      HPL_fprintf( test.outfp, "\n%s %6d %s\n", "Finished", test.ktest,
+                   "tests with the following results:" );
+      if( test.thrsh > HPL_rzero )
+      {
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kpass,
+                      "tests completed and passed residual checks," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kfail,
+                      "tests completed and failed residual checks," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kskip,
+                      "tests skipped because of illegal input values." );
+      }
+      else
+      {
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kpass,
+                      "tests completed without checking," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kskip,
+                      "tests skipped because of illegal input values." );
+      }
+
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "----------------------------------------",
+                   "----------------------------------------" );
+      HPL_fprintf( test.outfp, "\nEnd of Tests.\n" );
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+
+      if( ( test.outfp != stdout ) && ( test.outfp != stderr ) )
+         (void) fclose( test.outfp );
+   }
+#ifdef HPL_CALL_VSIPL
+   vsip_finalize((void*)0);
+#endif
+   MPI_Finalize();
+   exit( 0 );
+
+   return( 0 );
+/*
+ * End of main
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pdinfo.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pdinfo.c
new file mode 100644
index 000000000..4ede45be6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pdinfo.c
@@ -0,0 +1,1182 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdinfo
+(
+   HPL_T_test *                     TEST,
+   int *                            NS,
+   int *                            N,
+   int *                            NBS,
+   int *                            NB,
+   HPL_T_ORDER *                    PMAPPIN,
+   int *                            NPQS,
+   int *                            P,
+   int *                            Q,
+   int *                            NPFS,
+   HPL_T_FACT *                     PF,
+   int *                            NBMS,
+   int *                            NBM,
+   int *                            NDVS,
+   int *                            NDV,
+   int *                            NRFS,
+   HPL_T_FACT *                     RF,
+   int *                            NTPS,
+   HPL_T_TOP *                      TP,
+   int *                            NDHS,
+   int *                            DH,
+   HPL_T_SWAP *                     FSWAP,
+   int *                            TSWAP,
+   int *                            L1NOTRAN,
+   int *                            UNOTRAN,
+   int *                            EQUIL,
+   int *                            ALIGN
+)
+#else
+void HPL_pdinfo
+( TEST, NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN )
+   HPL_T_test *                     TEST;
+   int *                            NS;
+   int *                            N;
+   int *                            NBS;
+   int *                            NB;
+   HPL_T_ORDER *                    PMAPPIN;
+   int *                            NPQS;
+   int *                            P;
+   int *                            Q;
+   int *                            NPFS;
+   HPL_T_FACT *                     PF;
+   int *                            NBMS;
+   int *                            NBM;
+   int *                            NDVS;
+   int *                            NDV;
+   int *                            NRFS;
+   HPL_T_FACT *                     RF;
+   int *                            NTPS;
+   HPL_T_TOP *                      TP;
+   int *                            NDHS;
+   int *                            DH;
+   HPL_T_SWAP *                     FSWAP;
+   int *                            TSWAP;
+   int *                            L1NOTRAN;
+   int *                            UNOTRAN;
+   int *                            EQUIL;
+   int *                            ALIGN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdinfo reads  the  startup  information for the various tests and
+ * transmits it to all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * TEST    (global output)               HPL_T_test *
+ *         On entry, TEST  points to a testing data structure.  On exit,
+ *         the fields of this data structure are initialized as follows:
+ *         TEST->outfp  specifies the output file where the results will
+ *         be printed.  It is only defined and used by  the process 0 of
+ *         the grid.  TEST->thrsh specifies the threshhold value for the
+ *         test ratio.  TEST->epsil is the relative machine precision of
+ *         the distributed computer.  Finally  the test counters, kfail,
+ *         kpass, kskip, ktest are initialized to zero.
+ *
+ * NS      (global output)               int *
+ *         On exit,  NS  specifies the number of different problem sizes
+ *         to be tested. NS is less than or equal to HPL_MAX_PARAM.
+ *
+ * N       (global output)               int *
+ *         On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+ *         the first NS entries of this array contain the  problem sizes
+ *         to run the code with.
+ *
+ * NBS     (global output)               int *
+ *         On exit,  NBS  specifies the number of different distribution
+ *         blocking factors to be tested. NBS must be less than or equal
+ *         to HPL_MAX_PARAM.
+ *
+ * NB      (global output)               int *
+ *         On exit,  PMAPPIN  specifies the process mapping onto the no-
+ *         des of the  MPI machine configuration.  PMAPPIN  defaults  to
+ *         row-major ordering.
+ *
+ * PMAPPIN (global output)               HPL_T_ORDER *
+ *         On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NBS entries of this array contain the values of the
+ *         various distribution blocking factors, to run the code with.
+ *
+ * NPQS    (global output)               int *
+ *         On exit, NPQS  specifies the  number of different values that
+ *         can be used for P and Q, i.e., the number of process grids to
+ *         run  the  code with.  NPQS must be  less  than  or  equal  to
+ *         HPL_MAX_PARAM.
+ *
+ * P       (global output)               int *
+ *         On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NPQS entries of this array contain the values of P,
+ *         the number of process rows of the  NPQS grids to run the code
+ *         with.
+ *
+ * Q       (global output)               int *
+ *         On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NPQS entries of this array contain the values of Q,
+ *         the number of process columns of the  NPQS  grids to  run the
+ *         code with.
+ *
+ * NPFS    (global output)               int *
+ *         On exit, NPFS  specifies the  number of different values that
+ *         can be used for PF : the panel factorization algorithm to run
+ *         the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+ *
+ * PF      (global output)               HPL_T_FACT *
+ *         On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first  NPFS  entries  of this array  contain  the various
+ *         panel factorization algorithms to run the code with.
+ *
+ * NBMS    (global output)               int *
+ *         On exit,  NBMS  specifies  the  number  of  various recursive
+ *         stopping criteria  to be tested.  NBMS  must be  less than or
+ *         equal to HPL_MAX_PARAM.
+ *
+ * NBM     (global output)               int *
+ *         On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NBMS entries of this array contain the values
+ *         of the various recursive stopping criteria to be tested.
+ *
+ * NDVS    (global output)               int *
+ *         On exit,  NDVS  specifies  the number  of various numbers  of
+ *         panels in recursion to be tested.  NDVS is less than or equal
+ *         to HPL_MAX_PARAM.
+ *
+ * NDV     (global output)               int *
+ *         On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NDVS entries of this array contain the values
+ *         of the various numbers of panels in recursion to be tested.
+ *
+ * NRFS    (global output)               int *
+ *         On exit, NRFS  specifies the  number of different values that
+ *         can be used for RF : the recursive factorization algorithm to
+ *         be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+ *
+ * RF      (global output)               HPL_T_FACT *
+ *         On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first  NRFS  entries  of  this array contain  the various
+ *         recursive factorization algorithms to run the code with.
+ *
+ * NTPS    (global output)               int *
+ *         On exit, NTPS  specifies the  number of different values that
+ *         can be used for the  broadcast topologies  to be tested. NTPS
+ *         is less than or equal to HPL_MAX_PARAM.
+ *
+ * TP      (global output)               HPL_T_TOP *
+ *         On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the  first NTPS  entries of this  array  contain  the various
+ *         broadcast (along rows) topologies to run the code with.
+ *
+ * NDHS    (global output)               int *
+ *         On exit, NDHS  specifies the  number of different values that
+ *         can be used for the  lookahead depths to be  tested.  NDHS is
+ *         less than or equal to HPL_MAX_PARAM.
+ *
+ * DH      (global output)               int *
+ *         On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NDHS entries of this array contain the values
+ *         of lookahead depths to run the code with.  Such a value is at
+ *         least 0 (no-lookahead) or greater than zero.
+ *
+ * FSWAP   (global output)               HPL_T_SWAP *
+ *         On exit, FSWAP specifies the swapping algorithm to be used in
+ *         all tests.
+ *
+ * TSWAP   (global output)               int *
+ *         On exit,  TSWAP  specifies the swapping threshold as a number
+ *         of columns when the mixed swapping algorithm was chosen.
+ *
+ * L1NOTRA (global output)               int *
+ *         On exit, L1NOTRAN specifies whether the upper triangle of the
+ *         panels of columns  should  be stored  in  no-transposed  form
+ *         (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+ *
+ * UNOTRAN (global output)               int *
+ *         On exit, UNOTRAN  specifies whether the panels of rows should
+ *         be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+ *         form (UNOTRAN=0) during their broadcast.
+ *
+ * EQUIL   (global output)               int *
+ *         On exit,  EQUIL  specifies  whether  equilibration during the
+ *         swap-broadcast  of  the  panel of rows  should  be  performed
+ *         (EQUIL=1) or not (EQUIL=0).
+ *
+ * ALIGN   (global output)               int *
+ *         On exit,  ALIGN  specifies the alignment  of  the dynamically
+ *         allocated buffers in double precision words. ALIGN is greater
+ *         than zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   char                       file[HPL_LINE_MAX], line[HPL_LINE_MAX],
+                              auth[HPL_LINE_MAX], num [HPL_LINE_MAX];
+   FILE                       * infp;
+   int                        * iwork = NULL;
+   char                       * lineptr;
+   int                        error=0, fid, i, j, lwork, maxp, nprocs,
+                              rank, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+   MPI_Comm_size( MPI_COMM_WORLD, &size );
+/*
+ * Initialize the TEST data structure with default values
+ */
+   TEST->outfp = stderr; TEST->epsil = 2.0e-16; TEST->thrsh = 16.0;
+   TEST->kfail = TEST->kpass = TEST->kskip = TEST->ktest = 0;
+/*
+ * Process 0 reads the input data, broadcasts to other processes and
+ * writes needed information to TEST->outfp.
+ */
+   if( rank == 0 )
+   {
+/*
+ * Open file and skip data file header
+ */
+      if( ( infp = fopen( "HPL.dat", "r" ) ) == NULL )
+      { 
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "cannot open file HPL.dat" );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) fgets( auth, HPL_LINE_MAX - 2, infp );
+/*
+ * Read name and unit number for summary output file
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", file );
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num  );
+      fid = atoi( num );
+      if     ( fid == 6 ) TEST->outfp = stdout;
+      else if( fid == 7 ) TEST->outfp = stderr;
+      else if( ( TEST->outfp = fopen( file, "w" ) ) == NULL )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "cannot open file %s.",
+                    file );
+         error = 1; goto label_error;
+      }
+/*
+ * Read and check the parameter values for the tests.
+ *
+ * Problem size (>=0) (N)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); 
+      (void) sscanf( line, "%s", num ); *NS = atoi( num );
+      if( ( *NS < 1 ) || ( *NS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %d",
+                    "Number of values of N is less than 1 or greater than",
+                    HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( N[ i ] = atoi( num ) ) < 0 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of N less than 0" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Block size (>=1) (NB)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NBS = atoi( num );
+      if( ( *NBS < 1 ) || ( *NBS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NB is less than 1 or",
+                    "greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NBS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NB[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", 
+                       "Value of NB less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Process grids, mapping, (>=1) (P, Q)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num );
+      *PMAPPIN = ( atoi( num ) == 1 ? HPL_COLUMN_MAJOR : HPL_ROW_MAJOR );
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NPQS = atoi( num );
+      if( ( *NPQS < 1 ) || ( *NPQS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of grids is less",
+                    "than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPQS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( P[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of P less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPQS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( Q[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of Q less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Check for enough processes in machine configuration
+ */
+      maxp = 0;
+      for( i = 0; i < *NPQS; i++ )
+      { nprocs   = P[i] * Q[i]; maxp = Mmax( maxp, nprocs ); }
+      if( maxp > size )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "Need at least %d processes for these tests", maxp );
+         error = 1; goto label_error;
+      }
+/*
+ * Checking threshold value (TEST->thrsh)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); TEST->thrsh = atof( num );
+/*
+ * Panel factorization algorithm (PF)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NPFS = atoi( num );
+      if( ( *NPFS < 1 ) || ( *NPFS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "number of values of PFACT",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPFS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) PF[ i ] = HPL_LEFT_LOOKING;
+         else if( j == 1 ) PF[ i ] = HPL_CROUT;
+         else if( j == 2 ) PF[ i ] = HPL_RIGHT_LOOKING;
+         else              PF[ i ] = HPL_RIGHT_LOOKING;
+      }
+/*
+ * Recursive stopping criterium (>=1) (NBM)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NBMS = atoi( num );
+      if( ( *NBMS < 1 ) || ( *NBMS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NBMIN",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NBMS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NBM[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of NBMIN less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Number of panels in recursion (>=2) (NDV)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NDVS = atoi( num );
+      if( ( *NDVS < 1 ) || ( *NDVS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NDIV",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NDVS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NDV[ i ] = atoi( num ) ) < 2 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of NDIV less than 2" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Recursive panel factorization (RF)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NRFS = atoi( num );
+      if( ( *NRFS < 1 ) || ( *NRFS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of RFACT",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NRFS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) RF[ i ] = HPL_LEFT_LOOKING;
+         else if( j == 1 ) RF[ i ] = HPL_CROUT;
+         else if( j == 2 ) RF[ i ] = HPL_RIGHT_LOOKING;
+         else              RF[ i ] = HPL_RIGHT_LOOKING;
+      }
+/*
+ * Broadcast topology (TP) (0=rg, 1=2rg, 2=rgM, 3=2rgM, 4=L)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NTPS = atoi( num );
+      if( ( *NTPS < 1 ) || ( *NTPS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of BCAST",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NTPS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) TP[ i ] = HPL_1RING;
+         else if( j == 1 ) TP[ i ] = HPL_1RING_M;
+         else if( j == 2 ) TP[ i ] = HPL_2RING;
+         else if( j == 3 ) TP[ i ] = HPL_2RING_M;
+         else if( j == 4 ) TP[ i ] = HPL_BLONG;
+         else if( j == 5 ) TP[ i ] = HPL_BLONG_M;
+         else              TP[ i ] = HPL_1RING_M;
+      }
+/*
+ * Lookahead depth (>=0) (NDH)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NDHS = atoi( num );
+      if( ( *NDHS < 1 ) || ( *NDHS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of DEPTH",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NDHS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num );
+         lineptr += strlen( num ) + 1;
+         if( ( DH[ i ] = atoi( num ) ) < 0 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of DEPTH less than 0" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Swapping algorithm (0,1 or 2) (FSWAP)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); j = atoi( num );
+      if(      j == 0 ) *FSWAP = HPL_SWAP00;
+      else if( j == 1 ) *FSWAP = HPL_SWAP01;
+      else if( j == 2 ) *FSWAP = HPL_SW_MIX;
+      else              *FSWAP = HPL_SWAP01;
+/*
+ * Swapping threshold (>=0) (TSWAP)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *TSWAP = atoi( num );
+      if( *TSWAP <= 0 ) *TSWAP = 0;
+/*
+ * L1 in (no-)transposed form (0 or 1)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *L1NOTRAN = atoi( num );
+      if( ( *L1NOTRAN != 0 ) && ( *L1NOTRAN != 1 ) ) *L1NOTRAN = 0; 
+/*
+ * U  in (no-)transposed form (0 or 1)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *UNOTRAN = atoi( num );
+      if( ( *UNOTRAN != 0 ) && ( *UNOTRAN != 1 ) ) *UNOTRAN = 0;
+/*
+ * Equilibration (0=no, 1=yes)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *EQUIL = atoi( num );
+      if( ( *EQUIL != 0 ) && ( *EQUIL != 1 ) ) *EQUIL = 1;
+/*
+ * Memory alignment in bytes (> 0) (ALIGN)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *ALIGN = atoi( num );
+      if( *ALIGN <= 0 ) *ALIGN = 4;
+/*
+ * Close input file
+ */
+label_error:
+      if (infp != NULL)
+      	(void) fclose( infp );
+   }
+   else { TEST->outfp = NULL; }
+/*
+ * Check for error on reading input file
+ */
+   (void) HPL_all_reduce( (void *)(&error), 1, HPL_INT, HPL_max,
+                          MPI_COMM_WORLD );
+   if( error )
+   {
+      if( rank == 0 )
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "Illegal input in file HPL.dat. Exiting ..." );
+      MPI_Finalize();
+#ifdef HPL_CALL_VSIPL
+      (void) vsip_finalize( NULL );
+#endif
+      exit( 1 );
+   }
+/*
+ * Compute and broadcast machine epsilon
+ */
+   TEST->epsil = HPL_pdlamch( MPI_COMM_WORLD, HPL_MACH_EPS );
+/*
+ * Pack information arrays and broadcast
+ */
+   (void) HPL_broadcast( (void *)(&(TEST->thrsh)), 1, HPL_DOUBLE, 0,
+                         MPI_COMM_WORLD );
+/*
+ * Broadcast array sizes
+ */
+   iwork = (int *)malloc( (size_t)(15) * sizeof( int ) );
+   if( rank == 0 )
+   {
+      iwork[ 0] = *NS;      iwork[ 1] = *NBS;
+      iwork[ 2] = ( *PMAPPIN == HPL_ROW_MAJOR ? 0 : 1 );
+      iwork[ 3] = *NPQS;    iwork[ 4] = *NPFS;     iwork[ 5] = *NBMS;
+      iwork[ 6] = *NDVS;    iwork[ 7] = *NRFS;     iwork[ 8] = *NTPS;
+      iwork[ 9] = *NDHS;    iwork[10] = *TSWAP;    iwork[11] = *L1NOTRAN;
+      iwork[12] = *UNOTRAN; iwork[13] = *EQUIL;    iwork[14] = *ALIGN;
+   }
+   (void) HPL_broadcast( (void *)iwork, 15, HPL_INT, 0, MPI_COMM_WORLD );
+   if( rank != 0 )
+   {
+      *NS       = iwork[ 0]; *NBS   = iwork[ 1];
+      *PMAPPIN  = ( iwork[ 2] == 0 ?  HPL_ROW_MAJOR : HPL_COLUMN_MAJOR );
+      *NPQS     = iwork[ 3]; *NPFS  = iwork[ 4]; *NBMS     = iwork[ 5];
+      *NDVS     = iwork[ 6]; *NRFS  = iwork[ 7]; *NTPS     = iwork[ 8];
+      *NDHS     = iwork[ 9]; *TSWAP = iwork[10]; *L1NOTRAN = iwork[11];
+      *UNOTRAN  = iwork[12]; *EQUIL = iwork[13]; *ALIGN    = iwork[14];
+   }
+   if( iwork ) free( iwork );
+/*
+ * Pack information arrays and broadcast
+ */
+   lwork = (*NS) + (*NBS) + 2 * (*NPQS) + (*NPFS) + (*NBMS) + 
+           (*NDVS) + (*NRFS) + (*NTPS) + (*NDHS) + 1;
+   
+   if (lwork < 0)
+	exit(EXIT_FAILURE); 
+
+
+   iwork = (int *)malloc( (size_t)(lwork) * sizeof( int ) );
+   if( rank == 0 )
+   {
+      j = 0;
+      for( i = 0; i < *NS;   i++ ) { iwork[j] = N [i]; j++; }
+      for( i = 0; i < *NBS;  i++ ) { iwork[j] = NB[i]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { iwork[j] = P [i]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { iwork[j] = Q [i]; j++; }
+      for( i = 0; i < *NPFS; i++ )
+      {
+         if(      PF[i] == HPL_LEFT_LOOKING  ) iwork[j] = 0;
+         else if( PF[i] == HPL_CROUT         ) iwork[j] = 1;
+         else if( PF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2;
+         j++;
+      }
+      for( i = 0; i < *NBMS; i++ ) { iwork[j] = NBM[i]; j++; }
+      for( i = 0; i < *NDVS; i++ ) { iwork[j] = NDV[i]; j++; }
+      for( i = 0; i < *NRFS; i++ )
+      {
+         if(      RF[i] == HPL_LEFT_LOOKING  ) iwork[j] = 0;
+         else if( RF[i] == HPL_CROUT         ) iwork[j] = 1;
+         else if( RF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2;
+         j++;
+      }
+      for( i = 0; i < *NTPS; i++ )
+      {
+         if(      TP[i] == HPL_1RING   ) iwork[j] = 0;
+         else if( TP[i] == HPL_1RING_M ) iwork[j] = 1;
+         else if( TP[i] == HPL_2RING   ) iwork[j] = 2;
+         else if( TP[i] == HPL_2RING_M ) iwork[j] = 3;
+         else if( TP[i] == HPL_BLONG   ) iwork[j] = 4;
+         else if( TP[i] == HPL_BLONG_M ) iwork[j] = 5;
+         j++;
+      }
+      for( i = 0; i < *NDHS; i++ ) { iwork[j] = DH[i]; j++; }
+
+      if(      *FSWAP == HPL_SWAP00 ) iwork[j] = 0;
+      else if( *FSWAP == HPL_SWAP01 ) iwork[j] = 1;
+      else if( *FSWAP == HPL_SW_MIX ) iwork[j] = 2;
+      j++;
+   }
+   (void) HPL_broadcast( (void*)iwork, lwork, HPL_INT, 0,
+                         MPI_COMM_WORLD );
+   if ((rank != 0) && (iwork != NULL))
+   {
+      j = 0;
+      for( i = 0; i < *NS;   i++ ) { N [i] = iwork[j]; j++; }
+      for( i = 0; i < *NBS;  i++ ) { NB[i] = iwork[j]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { P [i] = iwork[j]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { Q [i] = iwork[j]; j++; }
+
+      for( i = 0; i < *NPFS; i++ )
+      {
+         if(      iwork[j] == 0 ) PF[i] = HPL_LEFT_LOOKING;
+         else if( iwork[j] == 1 ) PF[i] = HPL_CROUT;
+         else if( iwork[j] == 2 ) PF[i] = HPL_RIGHT_LOOKING;
+         j++;
+      }
+      for( i = 0; i < *NBMS; i++ ) { NBM[i] = iwork[j]; j++; }
+      for( i = 0; i < *NDVS; i++ ) { NDV[i] = iwork[j]; j++; }
+      for( i = 0; i < *NRFS; i++ )
+      {
+         if(      iwork[j] == 0 ) RF[i] = HPL_LEFT_LOOKING;
+         else if( iwork[j] == 1 ) RF[i] = HPL_CROUT;
+         else if( iwork[j] == 2 ) RF[i] = HPL_RIGHT_LOOKING;
+         j++;
+      }
+      for( i = 0; i < *NTPS; i++ )
+      {
+         if(      iwork[j] == 0 ) TP[i] = HPL_1RING;
+         else if( iwork[j] == 1 ) TP[i] = HPL_1RING_M;
+         else if( iwork[j] == 2 ) TP[i] = HPL_2RING;
+         else if( iwork[j] == 3 ) TP[i] = HPL_2RING_M;
+         else if( iwork[j] == 4 ) TP[i] = HPL_BLONG;
+         else if( iwork[j] == 5 ) TP[i] = HPL_BLONG_M;
+         j++;
+      }
+      for( i = 0; i < *NDHS; i++ ) { DH[i] = iwork[j]; j++; }
+
+      if(      iwork[j] == 0 ) *FSWAP = HPL_SWAP00;
+      else if( iwork[j] == 1 ) *FSWAP = HPL_SWAP01;
+      else if( iwork[j] == 2 ) *FSWAP = HPL_SW_MIX;
+      j++;
+   
+      if( iwork ) free( iwork );
+   }
+/*
+ * regurgitate input
+ */
+   if( rank == 0 )
+   {
+      
+      if (TEST->outfp != NULL){
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "HPLinpack 2.3  --  High-Performance Linpack benchmark  --  ",
+          " December 2, 2018" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Written by A. Petitet and R. Clint Whaley,  ",
+          "Innovative Computing Laboratory, UTK" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Modified by Piotr Luszczek, ",
+          "Innovative Computing Laboratory, UTK" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Modified by Julien Langou, ",
+          "University of Colorado Denver");
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+
+      HPL_fprintf( TEST->outfp, "\n%s\n",
+          "An explanation of the input/output parameters follows:" );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "T/V    : Wall time / encoded variant." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+         "N      : The order of the coefficient matrix A." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "NB     : The partitioning blocking factor." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "P      : The number of process rows." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "Q      : The number of process columns." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+         "Time   : Time in seconds to solve the linear system." );
+      HPL_fprintf( TEST->outfp, "%s\n\n",
+         "Gflops : Rate of execution for solving the linear system." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "The following parameter values will be used:" );
+/*
+ * Problem size
+ */
+      HPL_fprintf( TEST->outfp,       "\nN      :" );
+      for( i = 0; i < Mmin( 8, *NS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", N[i]  );
+      if( *NS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", N[i]  );
+         if( *NS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", N[i]  );
+         }
+      }
+/*
+ * Distribution blocking factor
+ */
+      HPL_fprintf( TEST->outfp,       "\nNB     :" );
+      for( i = 0; i < Mmin( 8, *NBS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NB[i] );
+      if( *NBS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NBS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NB[i] );
+         if( *NBS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NBS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NB[i] );
+         }
+      }
+/*
+ * Process mapping
+ */
+      HPL_fprintf( TEST->outfp,       "\nPMAP   :" );
+      if(      *PMAPPIN == HPL_ROW_MAJOR    )
+         HPL_fprintf( TEST->outfp, " Row-major process mapping" );
+      else if( *PMAPPIN == HPL_COLUMN_MAJOR )
+         HPL_fprintf( TEST->outfp, " Column-major process mapping" );
+/*
+ * Process grid
+ */
+      HPL_fprintf( TEST->outfp,       "\nP      :" );
+      for( i = 0; i < Mmin( 8, *NPQS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", P[i]  );
+      if( *NPQS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPQS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", P[i]  );
+         if( *NPQS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPQS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", P[i]  );
+         }
+      }
+      HPL_fprintf( TEST->outfp,       "\nQ      :" );
+      for( i = 0; i < Mmin( 8, *NPQS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", Q[i]  );
+      if( *NPQS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPQS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", Q[i]  );
+         if( *NPQS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPQS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", Q[i]  );
+         }
+      }
+/*
+ * Panel Factorization
+ */
+      HPL_fprintf( TEST->outfp,       "\nPFACT  :" );
+      for( i = 0; i < Mmin( 8, *NPFS ); i++ )
+      {
+         if(      PF[i] == HPL_LEFT_LOOKING  )
+            HPL_fprintf( TEST->outfp,       "    Left " );
+         else if( PF[i] == HPL_CROUT         )
+            HPL_fprintf( TEST->outfp,       "   Crout " );
+         else if( PF[i] == HPL_RIGHT_LOOKING )
+            HPL_fprintf( TEST->outfp,       "   Right " );
+      }
+      if( *NPFS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPFS ); i++ )
+         {
+            if(      PF[i] == HPL_LEFT_LOOKING  )
+               HPL_fprintf( TEST->outfp,       "    Left " );
+            else if( PF[i] == HPL_CROUT         )
+               HPL_fprintf( TEST->outfp,       "   Crout " );
+            else if( PF[i] == HPL_RIGHT_LOOKING )
+               HPL_fprintf( TEST->outfp,       "   Right " );
+         }
+         if( *NPFS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPFS; i++ )
+            {
+               if(      PF[i] == HPL_LEFT_LOOKING  )
+                  HPL_fprintf( TEST->outfp,       "    Left " );
+               else if( PF[i] == HPL_CROUT         )
+                  HPL_fprintf( TEST->outfp,       "   Crout " );
+               else if( PF[i] == HPL_RIGHT_LOOKING )
+                  HPL_fprintf( TEST->outfp,       "   Right " );
+            }
+         }
+      }
+/*
+ * Recursive stopping criterium
+ */
+      HPL_fprintf( TEST->outfp,       "\nNBMIN  :" );
+      for( i = 0; i < Mmin( 8, *NBMS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NBM[i]  );
+      if( *NBMS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NBMS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NBM[i]  );
+         if( *NBMS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NBMS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NBM[i]  );
+         }
+      }
+/*
+ * Number of panels in recursion
+ */
+      HPL_fprintf( TEST->outfp,       "\nNDIV   :" );
+      for( i = 0; i < Mmin( 8, *NDVS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NDV[i]  );
+      if( *NDVS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NDVS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NDV[i]  );
+         if( *NDVS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NDVS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NDV[i]  );
+         }
+      }
+/*
+ * Recursive Factorization
+ */
+      HPL_fprintf( TEST->outfp,       "\nRFACT  :" );
+      for( i = 0; i < Mmin( 8, *NRFS ); i++ )
+      {
+         if(      RF[i] == HPL_LEFT_LOOKING  )
+            HPL_fprintf( TEST->outfp,       "    Left " );
+         else if( RF[i] == HPL_CROUT         )
+            HPL_fprintf( TEST->outfp,       "   Crout " );
+         else if( RF[i] == HPL_RIGHT_LOOKING )
+            HPL_fprintf( TEST->outfp,       "   Right " );
+      }
+      if( *NRFS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NRFS ); i++ )
+         {
+            if(      RF[i] == HPL_LEFT_LOOKING  )
+               HPL_fprintf( TEST->outfp,       "    Left " );
+            else if( RF[i] == HPL_CROUT         )
+               HPL_fprintf( TEST->outfp,       "   Crout " );
+            else if( RF[i] == HPL_RIGHT_LOOKING )
+               HPL_fprintf( TEST->outfp,       "   Right " );
+         }
+         if( *NRFS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NRFS; i++ )
+            {
+               if(      RF[i] == HPL_LEFT_LOOKING  )
+                  HPL_fprintf( TEST->outfp,       "    Left " );
+               else if( RF[i] == HPL_CROUT         )
+                  HPL_fprintf( TEST->outfp,       "   Crout " );
+               else if( RF[i] == HPL_RIGHT_LOOKING )
+                  HPL_fprintf( TEST->outfp,       "   Right " );
+            }
+         }
+      }
+/*
+ * Broadcast topology
+ */
+      HPL_fprintf( TEST->outfp,       "\nBCAST  :" );
+      for( i = 0; i < Mmin( 8, *NTPS ); i++ )
+      {
+         if(      TP[i] == HPL_1RING   )
+            HPL_fprintf( TEST->outfp,       "   1ring " );
+         else if( TP[i] == HPL_1RING_M )
+            HPL_fprintf( TEST->outfp,       "  1ringM " );
+         else if( TP[i] == HPL_2RING   )
+            HPL_fprintf( TEST->outfp,       "   2ring " );
+         else if( TP[i] == HPL_2RING_M )
+            HPL_fprintf( TEST->outfp,       "  2ringM " );
+         else if( TP[i] == HPL_BLONG   )
+            HPL_fprintf( TEST->outfp,       "   Blong " );
+         else if( TP[i] == HPL_BLONG_M )
+            HPL_fprintf( TEST->outfp,       "  BlongM " );
+      }
+      if( *NTPS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NTPS ); i++ )
+         {
+            if(      TP[i] == HPL_1RING   )
+               HPL_fprintf( TEST->outfp,       "   1ring " );
+            else if( TP[i] == HPL_1RING_M )
+               HPL_fprintf( TEST->outfp,       "  1ringM " );
+            else if( TP[i] == HPL_2RING   )
+               HPL_fprintf( TEST->outfp,       "   2ring " );
+            else if( TP[i] == HPL_2RING_M )
+               HPL_fprintf( TEST->outfp,       "  2ringM " );
+            else if( TP[i] == HPL_BLONG   )
+               HPL_fprintf( TEST->outfp,       "   Blong " );
+            else if( TP[i] == HPL_BLONG_M )
+               HPL_fprintf( TEST->outfp,       "  BlongM " );
+         }
+         if( *NTPS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NTPS; i++ )
+            {
+               if(      TP[i] == HPL_1RING   )
+                  HPL_fprintf( TEST->outfp,       "   1ring " );
+               else if( TP[i] == HPL_1RING_M )
+                  HPL_fprintf( TEST->outfp,       "  1ringM " );
+               else if( TP[i] == HPL_2RING   )
+                  HPL_fprintf( TEST->outfp,       "   2ring " );
+               else if( TP[i] == HPL_2RING_M )
+                  HPL_fprintf( TEST->outfp,       "  2ringM " );
+               else if( TP[i] == HPL_BLONG   )
+                  HPL_fprintf( TEST->outfp,       "   Blong " );
+               else if( TP[i] == HPL_BLONG_M )
+                  HPL_fprintf( TEST->outfp,       "  BlongM " );
+            }
+         }
+      }
+/*
+ * Lookahead depths
+ */
+      HPL_fprintf( TEST->outfp,       "\nDEPTH  :" );
+      for( i = 0; i < Mmin( 8, *NDHS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", DH[i]  );
+      if( *NDHS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NDHS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", DH[i]  );
+         if( *NDHS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NDHS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", DH[i]  );
+         }
+      }
+/*
+ * Swapping algorithm
+ */
+      HPL_fprintf( TEST->outfp,       "\nSWAP   :" );
+      if(      *FSWAP == HPL_SWAP00 )
+         HPL_fprintf( TEST->outfp, " Binary-exchange" );
+      else if( *FSWAP == HPL_SWAP01 )
+         HPL_fprintf( TEST->outfp, " Spread-roll (long)" );
+      else if( *FSWAP == HPL_SW_MIX )
+         HPL_fprintf( TEST->outfp, " Mix (threshold = %d)", *TSWAP );
+/*
+ * L1 storage form
+ */
+      HPL_fprintf( TEST->outfp,       "\nL1     :" );
+      if(      *L1NOTRAN != 0 )
+         HPL_fprintf( TEST->outfp, " no-transposed form" );
+      else
+         HPL_fprintf( TEST->outfp, " transposed form" );
+/*
+ * U  storage form
+ */
+      HPL_fprintf( TEST->outfp,       "\nU      :" );
+      if(      *UNOTRAN != 0 )
+         HPL_fprintf( TEST->outfp, " no-transposed form" );
+      else
+         HPL_fprintf( TEST->outfp, " transposed form" );
+/*
+ * Equilibration
+ */
+      HPL_fprintf( TEST->outfp,       "\nEQUIL  :" );
+      if(      *EQUIL != 0 )
+         HPL_fprintf( TEST->outfp, " yes" );
+      else
+         HPL_fprintf( TEST->outfp, " no" );
+/*
+ * Alignment
+ */
+      HPL_fprintf( TEST->outfp,       "\nALIGN  : %d double precision words",
+                   *ALIGN );
+
+      HPL_fprintf( TEST->outfp, "\n\n" );
+/*
+ * For testing only
+ */
+      if( TEST->thrsh > HPL_rzero )
+      {
+         HPL_fprintf( TEST->outfp, "%s%s\n\n",
+                      "----------------------------------------",
+                      "----------------------------------------" );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "- The matrix A is randomly generated for each test." );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "- The following scaled residual check will be computed:" );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "      ||Ax-b||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )" );
+         HPL_fprintf( TEST->outfp, "%s %21.6e\n",
+            "- The relative machine precision (eps) is taken to be     ",
+            TEST->epsil );
+         HPL_fprintf( TEST->outfp, "%s   %11.1f\n\n",
+            "- Computational tests pass if scaled residuals are less than      ",
+            TEST->thrsh );
+       }
+     }
+   }
+/*
+ * End of HPL_pdinfo
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pdtest.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pdtest.c
new file mode 100644
index 000000000..73a62a7ff
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pdtest.c
@@ -0,0 +1,438 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdtest
+(
+   HPL_T_test *                     TEST,
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        N,
+   const int                        NB
+)
+#else
+void HPL_pdtest
+( TEST, GRID, ALGO, N, NB )
+   HPL_T_test *                     TEST;
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        N;
+   const int                        NB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdtest performs  one  test  given a set of parameters such as the
+ * process grid, the  problem size, the distribution blocking factor ...
+ * This function generates  the data, calls  and times the linear system
+ * solver,  checks  the  accuracy  of the  obtained vector solution  and
+ * writes this information to the file pointed to by TEST->outfp.
+ *
+ * Arguments
+ * =========
+ *
+ * TEST    (global input)                HPL_T_test *
+ *         On entry,  TEST  points  to a testing data structure:  outfp
+ *         specifies the output file where the results will be printed.
+ *         It is only defined and used by the process  0  of the  grid.
+ *         thrsh  specifies  the  threshhold value  for the test ratio.
+ *         Concretely, a test is declared "PASSED"  if and only if the
+ *         following inequality is satisfied:
+ *         ||Ax-b||_oo / ( epsil *
+ *                         ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+ *                          N )  < thrsh.
+ *         epsil  is the  relative machine precision of the distributed
+ *         computer. Finally the test counters, kfail, kpass, kskip and
+ *         ktest are updated as follows:  if the test passes,  kpass is
+ *         incremented by one;  if the test fails, kfail is incremented
+ *         by one; if the test is skipped, kskip is incremented by one.
+ *         ktest is left unchanged.
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters to be used for this test.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the order of the coefficient matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   double                     HPL_w[HPL_TIMING_N];
+#endif
+   HPL_T_pmat                 mat;
+   double                     wtime[1];
+   int                        info[3];
+   double                     Anorm1, AnormI, Gflops, Xnorm1, XnormI,
+                              BnormI, resid0, resid1;
+   double                     * Bptr;
+   void                       * vptr = NULL;
+   static int                 first=1;
+   int                        ii, ip2, mycol, myrow, npcol, nprow, nq;
+   char                       ctop, cpfact, crfact;
+   time_t                     current_time_start, current_time_end;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+
+   mat.n  = N; mat.nb = NB; mat.info = 0;
+   mat.mp = HPL_numroc( N, NB, NB, myrow, 0, nprow );
+   nq     = HPL_numroc( N, NB, NB, mycol, 0, npcol );
+   mat.nq = nq + 1;
+/*
+ * Allocate matrix, right-hand-side, and vector solution x. [ A | b ] is
+ * N by N+1.  One column is added in every process column for the solve.
+ * The  result  however  is stored in a 1 x N vector replicated in every
+ * process row. In every process, A is lda * (nq+1), x is 1 * nq and the
+ * workspace is mp. 
+ *
+ * Ensure that lda is a multiple of ALIGN and not a power of 2
+ */
+   mat.ld = ( ( Mmax( 1, mat.mp ) - 1 ) / ALGO->align ) * ALGO->align;
+   do
+   {
+      ii = ( mat.ld += ALGO->align ); ip2 = 1;
+      while( ii > 1 ) { ii >>= 1; ip2 <<= 1; }
+   }
+   while( mat.ld == ip2 );
+/*
+ * Allocate dynamic memory
+ */
+   vptr = (void*)malloc( ( (size_t)(ALGO->align) + 
+                           (size_t)(mat.ld+1) * (size_t)(mat.nq) ) *
+                         sizeof(double) );
+   info[0] = (vptr == NULL); info[1] = myrow; info[2] = mycol;
+   (void) HPL_all_reduce( (void *)(info), 3, HPL_INT, HPL_max,
+                          GRID->all_comm );
+   if( info[0] != 0 )
+   {
+      if( ( myrow == 0 ) && ( mycol == 0 ) )
+         HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest",
+                    "[%d,%d] %s", info[1], info[2],
+                    "Memory allocation failed for A, x and b. Skip." );
+      (TEST->kskip)++;
+      /* some processes might have succeeded with allocation */
+      if (vptr) free(vptr);
+      return;
+   }
+/*
+ * generate matrix and right-hand-side, [ A | b ] which is N by N+1.
+ */
+   mat.A  = (double *)HPL_PTR( vptr,
+                               ((size_t)(ALGO->align) * sizeof(double) ) );
+   mat.X  = Mptr( mat.A, 0, mat.nq, mat.ld );
+   HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED );
+#ifdef HPL_CALL_VSIPL
+   mat.block = vsip_blockbind_d( (vsip_scalar_d *)(mat.A),
+                                 (vsip_length)(mat.ld * mat.nq),
+                                 VSIP_MEM_NONE );
+#endif
+/*
+ * Solve linear system
+ */
+   HPL_ptimer_boot(); (void) HPL_barrier( GRID->all_comm );
+   time( &current_time_start );
+   HPL_ptimer( 0 );
+   HPL_pdgesv( GRID, ALGO, &mat );
+   HPL_ptimer( 0 );
+   time( &current_time_end );
+#ifdef HPL_CALL_VSIPL
+   (void) vsip_blockrelease_d( mat.block, VSIP_TRUE ); 
+   vsip_blockdestroy_d( mat.block );
+#endif
+/*
+ * Gather max of all CPU and WALL clock timings and print timing results
+ */
+   HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME,
+                       1, 0, wtime );
+
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      if( first )
+      {
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "T/V                N    NB     P     Q",
+                      "               Time                 Gflops" );
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "----------------------------------------",
+                      "----------------------------------------" );
+         if( TEST->thrsh <= HPL_rzero ) first = 0;
+      }
+/*
+ * 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve.
+ * Print WALL time
+ */
+      Gflops = ( ( (double)(N) /   1.0e+9 ) * 
+                 ( (double)(N) / wtime[0] ) ) * 
+                 ( ( 2.0 / 3.0 ) * (double)(N) + ( 3.0 / 2.0 ) );
+
+      cpfact = ( ( (HPL_T_FACT)(ALGO->pfact) == 
+                   (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ?  (char)('L') :
+                 ( ( (HPL_T_FACT)(ALGO->pfact) == (HPL_T_FACT)(HPL_CROUT) ) ?
+                   (char)('C') : (char)('R') ) );
+      crfact = ( ( (HPL_T_FACT)(ALGO->rfact) == 
+                   (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ?  (char)('L') :
+                 ( ( (HPL_T_FACT)(ALGO->rfact) == (HPL_T_FACT)(HPL_CROUT) ) ? 
+                   (char)('C') : (char)('R') ) );
+
+      if(      ALGO->btopo == HPL_1RING   ) ctop = '0';
+      else if( ALGO->btopo == HPL_1RING_M ) ctop = '1';
+      else if( ALGO->btopo == HPL_2RING   ) ctop = '2';
+      else if( ALGO->btopo == HPL_2RING_M ) ctop = '3';
+      else if( ALGO->btopo == HPL_BLONG   ) ctop = '4';
+      else /* if( ALGO->btopo == HPL_BLONG_M ) */ ctop = '5';
+
+      if( wtime[0] > HPL_rzero ) {
+         HPL_fprintf( TEST->outfp,
+             "W%c%1d%c%c%1d%c%1d%12d %5d %5d %5d %18.2f    %19.4e\n",
+             ( GRID->order == HPL_ROW_MAJOR ? 'R' : 'C' ),
+             ALGO->depth, ctop, crfact, ALGO->nbdiv, cpfact, ALGO->nbmin,
+             N, NB, nprow, npcol, wtime[0], Gflops );
+         HPL_fprintf( TEST->outfp,
+             "HPL_pdgesv() start time %s\n", ctime( &current_time_start ) );
+         HPL_fprintf( TEST->outfp,
+             "HPL_pdgesv() end time   %s\n", ctime( &current_time_end ) );
+      }
+   }
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME,
+                       HPL_TIMING_N, HPL_TIMING_BEG, HPL_w );
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "--VVV--VVV--VVV--VVV--VVV--VVV--VVV--V",
+                   "VV--VVV--VVV--VVV--VVV--VVV--VVV--VVV-" );
+/*
+ * Recursive panel factorization
+ */
+      if( HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time rfact . . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] );
+/*
+ * Panel factorization
+ */
+      if( HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time pfact . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] );
+/*
+ * Panel factorization (swap)
+ */
+      if( HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time mxswp . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] );
+/*
+ * Update
+ */
+      if( HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time update  . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] );
+/*
+ * Update (swap)
+ */
+      if( HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time laswp . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] );
+/*
+ * Upper triangular system solve
+ */
+      if( HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time up tr sv  . : %18.2f\n",
+                      HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] );
+
+      if( TEST->thrsh <= HPL_rzero )
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+   }
+#endif
+/*
+ * Quick return, if I am not interested in checking the computations
+ */
+   if( TEST->thrsh <= HPL_rzero )
+   { (TEST->kpass)++; if( vptr ) free( vptr ); return; }
+/*
+ * Check info returned by solve
+ */
+   if( mat.info != 0 )
+   {
+      if( ( myrow == 0 ) && ( mycol == 0 ) )
+         HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", "%s %d, %s", 
+                    "Error code returned by solve is", mat.info, "skip" );
+      (TEST->kskip)++;
+      if( vptr ) free( vptr ); return;
+   }
+/*
+ * Check computation, re-generate [ A | b ], compute norm 1 and inf of A and x,
+ * and norm inf of b - A x. Display residual checks.
+ */
+   HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED );
+   Anorm1 = HPL_pdlange( GRID, HPL_NORM_1, N, N, NB, mat.A, mat.ld );
+   AnormI = HPL_pdlange( GRID, HPL_NORM_I, N, N, NB, mat.A, mat.ld );
+/*
+ * Because x is distributed in process rows, switch the norms
+ */
+   XnormI = HPL_pdlange( GRID, HPL_NORM_1, 1, N, NB, mat.X, 1 );
+   Xnorm1 = HPL_pdlange( GRID, HPL_NORM_I, 1, N, NB, mat.X, 1 );
+/*
+ * If I am in the col that owns b, (1) compute local BnormI, (2) all_reduce to
+ * find the max (in the col). Then (3) broadcast along the rows so that every
+ * process has BnormI. Note that since we use a uniform distribution in [-0.5,0.5]
+ * for the entries of B, it is very likely that BnormI (<=,~) 0.5.
+ */
+   Bptr = Mptr( mat.A, 0, nq, mat.ld );
+   if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ){
+      if( mat.mp > 0 )
+      {
+         BnormI = Bptr[HPL_idamax( mat.mp, Bptr, 1 )]; BnormI = Mabs( BnormI );
+      }
+      else
+      {
+         BnormI = HPL_rzero;
+      }
+      (void) HPL_all_reduce( (void *)(&BnormI), 1, HPL_DOUBLE, HPL_max,
+                             GRID->col_comm );
+   }
+   (void) HPL_broadcast( (void *)(&BnormI), 1, HPL_DOUBLE,
+                          HPL_indxg2p( N, NB, NB, 0, npcol ),
+                          GRID->row_comm );
+/*
+ * If I own b, compute ( b - A x ) and ( - A x ) otherwise
+ */
+   if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) )
+   {
+      HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone,
+                 mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1 );
+   }
+   else if( nq > 0 )
+   {
+      HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone,
+                 mat.A, mat.ld, mat.X, 1, HPL_rzero, Bptr, 1 );
+   }
+   else { for( ii = 0; ii < mat.mp; ii++ ) Bptr[ii] = HPL_rzero; }
+/*
+ * Reduce the distributed residual in process column 0
+ */
+   if( mat.mp > 0 )
+      (void) HPL_reduce( Bptr, mat.mp, HPL_DOUBLE, HPL_sum, 0,
+                         GRID->row_comm );
+/*
+ * Compute || b - A x ||_oo
+ */
+   resid0 = HPL_pdlange( GRID, HPL_NORM_I, N, 1, NB, Bptr, mat.ld );
+/*
+ * Computes and displays norms, residuals ...
+ */
+   if( N <= 0 )
+   {
+      resid1 = HPL_rzero;
+   }
+   else
+   {
+      resid1 = resid0 / ( TEST->epsil * ( AnormI * XnormI + BnormI ) * (double)(N) );
+   }
+
+   if( resid1 < TEST->thrsh ) (TEST->kpass)++;
+   else                       (TEST->kfail)++;
+
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "----------------------------------------",
+                   "----------------------------------------" );
+      HPL_fprintf( TEST->outfp, "%s%16.8e%s%s\n",
+         "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ", resid1,
+         " ...... ", ( resid1 < TEST->thrsh ? "PASSED" : "FAILED" ) );
+
+      if(resid1 >= TEST->thrsh ) 
+      {
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||Ax-b||_oo  . . . . . . . . . . . . . . . . . = ", resid0 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||A||_oo . . . . . . . . . . . . . . . . . . . = ", AnormI );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||A||_1  . . . . . . . . . . . . . . . . . . . = ", Anorm1 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||x||_oo . . . . . . . . . . . . . . . . . . . = ", XnormI );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||x||_1  . . . . . . . . . . . . . . . . . . . = ", Xnorm1 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||b||_oo . . . . . . . . . . . . . . . . . . . = ", BnormI );
+      }
+   }
+   if( vptr ) free( vptr );
+/*
+ * End of HPL_pdtest
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/intel64/Make.inc
new file mode 120000
index 000000000..3ee301793
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/intel64/Makefile
new file mode 100644
index 000000000..cfc96e667
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/intel64/Makefile
@@ -0,0 +1,94 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h   \
+   $(INCdir)/hpl_gesv.h   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h  \
+   $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pgesv.h $(INCdir)/hpl_pmatgen.h \
+   $(INCdir)/hpl_ptimer.h $(INCdir)/hpl_ptest.h
+#
+## Executable names ####################################################
+# 
+xhpl             = $(BINdir)/xhpl
+#
+## Object files ########################################################
+#
+HPL_pteobj       = \
+   HPL_pddriver.o         HPL_pdinfo.o           HPL_pdtest.o
+#
+## Targets #############################################################
+#
+all     : dexe
+#
+dexe    : dexe.grd
+#
+$(BINdir)/HPL.dat : ../HPL.dat
+	( $(CP) ../HPL.dat $(BINdir) )
+#
+dexe.grd: $(HPL_pteobj) $(HPLlib)
+	$(LINKER) $(LINKFLAGS) -o $(xhpl) $(HPL_pteobj) $(HPL_LIBS)
+	$(MAKE) $(BINdir)/HPL.dat
+	$(TOUCH) dexe.grd
+#
+# ######################################################################
+#
+HPL_pddriver.o         : ../HPL_pddriver.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pddriver.c
+HPL_pdinfo.o           : ../HPL_pdinfo.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdinfo.c
+HPL_pdtest.o           : ../HPL_pdtest.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdtest.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer.c
new file mode 100644
index 000000000..202416079
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer.c
@@ -0,0 +1,358 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int         HPL_ptimer_disabled;
+static double      HPL_ptimer_cpusec   [HPL_NPTIMER],
+                   HPL_ptimer_cpustart [HPL_NPTIMER],
+                   HPL_ptimer_wallsec  [HPL_NPTIMER],
+                   HPL_ptimer_wallstart[HPL_NPTIMER];
+/*
+ * ---------------------------------------------------------------------
+ * User callable functions
+ * ---------------------------------------------------------------------
+ */
+#ifdef STDC_HEADERS
+void HPL_ptimer_boot( void )
+#else
+void HPL_ptimer_boot()
+#endif
+{
+/*
+ * HPL_ptimer_boot (re)sets all timers to 0, and enables HPL_ptimer.
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 0;
+
+   for( i = 0; i < HPL_NPTIMER; i++ )
+   {
+      HPL_ptimer_cpusec  [i] = HPL_ptimer_wallsec  [i] = HPL_rzero;
+      HPL_ptimer_cpustart[i] = HPL_ptimer_wallstart[i] = HPL_PTIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_ptimer_boot
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_ptimer( const int I )
+#else
+void HPL_ptimer( I )
+   const int                  I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer provides a  "stopwatch"  functionality  cpu/wall  timer in
+ * seconds.  Up to  64  separate timers can be functioning at once.  The
+ * first call starts the timer,  and the second stops it.  This  routine
+ * can be disenabled  by calling HPL_ptimer_disable(),  so that calls to
+ * the timer are ignored.  This feature can be used to make sure certain
+ * sections of code do not affect timings,  even  if  they call routines
+ * which have HPL_ptimer calls in them. HPL_ptimer_enable()  will enable
+ * the  timer  functionality.  One  can retrieve  the current value of a
+ * timer by calling
+ *  
+ * t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ *  
+ * where  I  is the timer index in  [0..64).  To  inititialize the timer
+ * functionality, one must have called HPL_ptimer_boot() prior to any of
+ * the functions mentioned above.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                const int
+ *         On entry, I specifies the timer to stop/start.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( HPL_ptimer_disabled ) return;
+/*
+ * If timer has not been started, start it.  Otherwise,  stop it and add
+ * interval to count
+ */
+   if( HPL_ptimer_wallstart[I] == HPL_PTIMER_STARTFLAG )
+   {
+      HPL_ptimer_wallstart[I] = HPL_ptimer_walltime();
+      HPL_ptimer_cpustart [I] = HPL_ptimer_cputime ();
+   }
+   else
+   {
+      HPL_ptimer_cpusec   [I] += HPL_ptimer_cputime ()-HPL_ptimer_cpustart [I];
+      HPL_ptimer_wallsec  [I] += HPL_ptimer_walltime()-HPL_ptimer_wallstart[I];
+      HPL_ptimer_wallstart[I]  = HPL_PTIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_ptimer
+ */
+} 
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_enable( void )
+#else
+void HPL_ptimer_enable()
+#endif
+{
+/*
+ * HPL_ptimer_enable sets it so calls to HPL_ptimer are not ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 0;
+   return;
+/*
+ * End of HPL_ptimer_enable
+ */
+} 
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_disable( void )
+#else
+void HPL_ptimer_disable()
+#endif
+{
+/*
+ * HPL_ptimer_disable sets it so calls to HPL_ptimer are ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 1;
+   return;
+/*
+ * End of HPL_ptimer_disable
+ */
+} 
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_inquire
+(
+   const HPL_T_PTIME          TMTYPE,
+   const int                  I
+)
+#else
+double HPL_ptimer_inquire( TMTYPE, I )
+   const int                  I;
+   const HPL_T_PTIME          TMTYPE;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_inquire returns wall- or cpu- time that has accumulated in
+ * timer I.
+ *
+ * Arguments
+ * =========
+ *
+ * TMTYPE  (global input)              const HPL_T_PTIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_PTIME : wall clock time is returned,
+ *            = HPL_CPU_PTIME  : CPU time is returned (default).
+ *
+ * I       (global input)              const int
+ *         On entry, I specifies the timer to return.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double          time;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * If wall- or cpu-time are not available on this machine, return
+ * HPL_PTIMER_ERROR
+ */
+   if( TMTYPE == HPL_WALL_PTIME )
+   {
+      if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR )
+         time = HPL_PTIMER_ERROR;
+      else
+         time = HPL_ptimer_wallsec[I];
+   }
+   else
+   {
+      if( HPL_ptimer_cputime()  == HPL_PTIMER_ERROR )
+         time = HPL_PTIMER_ERROR;
+      else
+         time = HPL_ptimer_cpusec [I];
+   }
+   return( time );
+/*
+ * End of HPL_ptimer_inquire
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_combine
+(
+   MPI_Comm                   COMM,
+   const HPL_T_PTIME_OP       OPE,
+   const HPL_T_PTIME          TMTYPE,
+   const int                  N,
+   const int                  IBEG,
+   double                     * TIMES
+)
+#else
+void HPL_ptimer_combine( COMM, OPE, TMTYPE, N, IBEG, TIMES )
+   const int                  IBEG, N;
+   const HPL_T_PTIME_OP       OPE;
+   const HPL_T_PTIME          TMTYPE;
+   MPI_Comm                   COMM;
+   double                     * TIMES;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_combine  combines the timing information stored on a scope
+ * of processes into the user TIMES array.
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)        MPI_Comm
+ *         The MPI communicator  identifying  the process  collection on
+ *         which the timings are taken.
+ *
+ * OPE     (global input)              const HPL_T_PTIME_OP
+ *         On entry, OP  specifies what combine operation should be done
+ *         as follows:
+ *            = HPL_AMAX_PTIME get max. time on any process (default),
+ *            = HPL_AMIN_PTIME get min. time on any process,
+ *            = HPL_SUM_PTIME  get sum of times across processes.
+ *
+ * TMTYPE  (global input)              const HPL_T_PTIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_PTIME : wall clock time is returned,
+ *            = HPL_CPU_PTIME  : CPU time is returned (default).
+ *
+ * N       (global input)              const int
+ *         On entry, N specifies the number of timers to combine.
+ *
+ * IBEG    (global input)              const int
+ *         On entry, IBEG specifies the first timer to be combined.
+ *
+ * TIMES   (global output)             double *
+ *         On entry, TIMES is an array of dimension at least N. On exit,
+ *         this array contains the requested timing information.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i, tmpdis;
+/* ..
+ * .. Executable Statements ..
+ */
+   tmpdis = HPL_ptimer_disabled; HPL_ptimer_disabled = 1;
+/*
+ * Timer has been disabled for combine operation -  copy timing informa-
+ * tion into user times array.  If  wall- or  cpu-time are not available
+ * on this machine, fill in times with HPL_PTIMER_ERROR flag and return.
+ */
+   if( TMTYPE == HPL_WALL_PTIME )
+   {
+      if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR )
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return;   }
+      else
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_wallsec[IBEG+i]; }
+   }
+   else
+   {
+      if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR )
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return;  }
+      else
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_cpusec[IBEG+i]; }
+   }
+/*
+ * Combine all nodes information, restore HPL_ptimer_disabled, and return
+ */
+   for( i = 0; i < N; i++ ) TIMES[i] = Mmax( HPL_rzero, TIMES[i] );
+
+   if(      OPE == HPL_AMAX_PTIME )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM );
+   else if( OPE == HPL_AMIN_PTIME )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_min, COMM );
+   else if( OPE == HPL_SUM_PTIME  )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_sum, COMM );
+   else
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM );
+
+   HPL_ptimer_disabled = tmpdis;
+/*
+ * End of HPL_ptimer_combine
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c
new file mode 100644
index 000000000..711ef185d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c
@@ -0,0 +1,146 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_cputime returns the cpu time. If HPL_USE_CLOCK is defined,
+ * the  clock() function is used to return an approximation of processor
+ * time used by the program.  The value returned is the CPU time used so
+ * far as a clock_t;  to get the number of seconds used,  the result  is
+ * divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+ * standard library.  If  HPL_USE_TIMES is defined, the times() function
+ * is used instead.  This  function  returns  the current process times.
+ * times() returns the number of clock ticks that have elapsed since the
+ * system has been up.  Otherwise and by default,  the  standard library
+ * function getrusage() is used.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#if   defined( HPL_USE_CLOCK )
+ 
+#include <time.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   static double              cps = CLOCKS_PER_SEC;
+   double                     d;
+   clock_t                    t1;
+   static clock_t             t0 = 0;
+ 
+   if( t0 == 0 ) t0 = clock();
+   t1 = clock() - t0;
+   d = (double)(t1) / cps;
+   return( d );
+}
+ 
+#elif defined( HPL_USE_TIMES )
+ 
+#include <sys/times.h>
+#include <unistd.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   clock_t                    t1;
+   struct tms                 ts;
+   static double              ClockTick = HPL_rzero;
+ 
+   if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK));
+   (void) times( &ts );
+   return( (double)(ts.tms_utime) / ClockTick );
+}
+ 
+/* #elif defined( HPL_USE_GETRUSAGE ) */
+#else
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   struct rusage              ruse;
+ 
+   (void) getrusage( RUSAGE_SELF, &ruse );
+   return( (double)( ruse.ru_utime.tv_sec  ) +
+           ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) );
+}
+
+/* 
+#else
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   return( HPL_PTIMER_ERROR );
+}
+*/ 
+
+#endif
+/*
+ * End of HPL_ptimer_cputime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c
new file mode 100644
index 000000000..96cbd300f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c
@@ -0,0 +1,103 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_walltime returns the elapsed (wall-clock) time.
+ * 
+ *
+ * ---------------------------------------------------------------------
+ */ 
+ 
+#if defined( HPL_USE_GETTIMEOFDAY )
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_walltime( void )
+#else
+double HPL_ptimer_walltime()
+#endif
+{
+   struct timeval             tp;
+   static long                start=0, startu;
+ 
+   if( !start )
+   {
+      (void) gettimeofday( &tp, NULL );
+      start  = tp.tv_sec;
+      startu = tp.tv_usec;
+      return( HPL_rzero );
+   }
+   (void) gettimeofday( &tp, NULL );
+ 
+   return( (double)( tp.tv_sec - start ) +
+           ( (double)( tp.tv_usec-startu ) / 1000000.0 ) );
+}
+
+#else
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_walltime( void )
+#else
+double HPL_ptimer_walltime()
+#endif
+{
+   return( MPI_Wtime() );
+}
+ 
+#endif
+/*
+ * End of HPL_ptimer_walltime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/intel64/Make.inc
new file mode 120000
index 000000000..3ee301793
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/intel64/Makefile
new file mode 100644
index 000000000..971500764
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/intel64/Makefile
@@ -0,0 +1,84 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_ptimer.h
+#
+## Object files ########################################################
+#
+HPL_ptiobj       = \
+   HPL_ptimer.o           HPL_ptimer_cputime.o   HPL_ptimer_walltime.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_ptiobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_ptiobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_ptimer.o           : ../HPL_ptimer.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer.c
+HPL_ptimer_cputime.o   : ../HPL_ptimer_cputime.c   $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer_cputime.c
+HPL_ptimer_walltime.o  : ../HPL_ptimer_walltime.c  $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer_walltime.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer.c
new file mode 100644
index 000000000..3be9665f7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer.c
@@ -0,0 +1,253 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int                    HPL_timer_disabled;
+static double                 HPL_timer_cpusec   [HPL_NTIMER],
+                              HPL_timer_cpustart [HPL_NTIMER],
+                              HPL_timer_wallsec  [HPL_NTIMER],
+                              HPL_timer_wallstart[HPL_NTIMER];
+/*
+ * ---------------------------------------------------------------------
+ * User callable functions
+ * ---------------------------------------------------------------------
+ */
+#ifdef STDC_HEADERS
+void HPL_timer_boot( void )
+#else
+void HPL_timer_boot()
+#endif
+{
+/*
+ * HPL_timer_boot (re)sets all timers to 0, and enables HPL_timer.
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 0;
+
+   for( i = 0; i < HPL_NTIMER; i++ )
+   {
+      HPL_timer_cpusec  [i] = HPL_timer_wallsec  [i] = HPL_rzero;
+      HPL_timer_cpustart[i] = HPL_timer_wallstart[i] = HPL_TIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_timer_boot
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer( const int I )
+#else
+void HPL_timer( I )
+   const int                  I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer provides a  "stopwatch"  functionality  cpu/wall  timer  in
+ * seconds.  Up to  64  separate timers can be functioning at once.  The
+ * first call starts the timer,  and the second stops it.  This  routine
+ * can be disenabled  by calling  HPL_timer_disable(),  so that calls to
+ * the timer are ignored.  This feature can be used to make sure certain
+ * sections of code do not affect timings,  even  if  they call routines
+ * which have HPL_timer calls in them. HPL_timer_enable() will re-enable
+ * the  timer  functionality.  One  can retrieve  the current value of a
+ * timer by calling
+ *  
+ * t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ *  
+ * where  I  is the timer index in  [0..64).  To  initialize  the  timer
+ * functionality, one must have called HPL_timer_boot()  prior to any of
+ * the functions mentioned above.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                const int
+ *         On entry, I specifies the timer to stop/start.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( HPL_timer_disabled ) return;
+/*
+ * If timer has not been started, start it.  Otherwise,  stop it and add
+ * interval to count
+ */
+   if( HPL_timer_wallstart[I] == HPL_TIMER_STARTFLAG )
+   {
+      HPL_timer_wallstart[I] = HPL_timer_walltime();
+      HPL_timer_cpustart [I] = HPL_timer_cputime ();
+   }
+   else
+   {
+      HPL_timer_cpusec   [I] += HPL_timer_cputime () - HPL_timer_cpustart [I];
+      HPL_timer_wallsec  [I] += HPL_timer_walltime() - HPL_timer_wallstart[I];
+      HPL_timer_wallstart[I]  = HPL_TIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_timer
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer_enable( void )
+#else
+void HPL_timer_enable()
+#endif
+{
+/*
+ * HPL_timer_enable sets it so calls to HPL_timer are not ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 0;
+   return;
+/*
+ * End of HPL_timer_enable
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer_disable( void )
+#else
+void HPL_timer_disable()
+#endif
+{
+/*
+ * HPL_timer_disable sets it so calls to HPL_timer are ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 1;
+   return;
+/*
+ * End of HPL_timer_disable
+ */
+}
+
+#ifdef STDC_HEADERS
+double HPL_timer_inquire
+(
+   const HPL_T_TIME           TMTYPE,
+   const int                  I
+)
+#else
+double HPL_timer_inquire( TMTYPE, I )
+   const int                  I;
+   const HPL_T_TIME           TMTYPE;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_timer_inquire returns  wall- or cpu- time that has accumulated in
+ * timer I.
+ *
+ * Arguments
+ * =========
+ *
+ * TMTYPE  (global input)              const HPL_T_TIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_TIME : wall clock time is returned,
+ *            = HPL_CPU_TIME  : CPU time is returned (default).
+ *
+ * I       (global input)              const int
+ *         On entry, I specifies the timer to return.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double          time;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * If wall- or cpu-time are not available on this machine, return
+ * HPL_TIMER_ERROR
+ */
+   if( TMTYPE == HPL_WALL_TIME )
+   {
+      if( HPL_timer_walltime() == HPL_TIMER_ERROR )
+         time = HPL_TIMER_ERROR;
+      else
+         time = HPL_timer_wallsec[I];
+   }
+   else
+   {
+      if( HPL_timer_cputime()  == HPL_TIMER_ERROR )
+         time = HPL_TIMER_ERROR;
+      else
+         time = HPL_timer_cpusec [I];
+   }
+   return( time );
+/*
+ * End of HPL_timer_inquire
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer_cputime.c
new file mode 100644
index 000000000..4a7f9dfef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer_cputime.c
@@ -0,0 +1,145 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer_cputime returns the cpu time.  If HPL_USE_CLOCK is defined,
+ * the  clock() function is used to return an approximation of processor
+ * time used by the program.  The value returned is the CPU time used so
+ * far as a clock_t;  to get the number of seconds used,  the result  is
+ * divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+ * standard library.  If  HPL_USE_TIMES is defined, the times() function
+ * is used instead.  This  function  returns  the current process times.
+ * times() returns the number of clock ticks that have elapsed since the
+ * system has been up.  Otherwise and by default,  the  standard library
+ * function getrusage() is used.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#if   defined( HPL_USE_CLOCK )
+ 
+#include <time.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   static double              cps = CLOCKS_PER_SEC;
+   double                     d;
+   clock_t                    t1;
+   static clock_t             t0 = 0;
+ 
+   if( t0 == 0 ) t0 = clock();
+   t1 = clock() - t0;
+   d = (double)(t1) / cps;
+   return( d );
+} 
+
+#elif defined( HPL_USE_TIMES )
+ 
+#include <sys/times.h>
+#include <unistd.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   clock_t                    t1;
+   struct tms                 ts;
+   static double              ClockTick = HPL_rzero;
+ 
+   if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK));
+   (void) times( &ts );
+   return( (double)(ts.tms_utime) / ClockTick );
+}
+ 
+/* #elif defined( HPL_USE_GETRUSAGE )  */
+#else
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   struct rusage              ruse;
+   (void) getrusage( RUSAGE_SELF, &ruse );
+   return( (double)( ruse.ru_utime.tv_sec  ) +
+           ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) );
+}
+
+/* 
+#else
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   return( HPL_TIMER_ERROR );
+}
+*/
+
+#endif
+/*
+ * End of HPL_timer_cputime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer_walltime.c
new file mode 100644
index 000000000..f4f44f202
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer_walltime.c
@@ -0,0 +1,88 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer_walltime returns the elapsed (wall-clock) time.
+ * 
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_walltime( void )
+#else
+double HPL_timer_walltime()
+#endif
+{
+   struct timeval             tp;
+   static long                start=0, startu;
+
+   if( !start )
+   {
+      (void) gettimeofday( &tp, NULL );
+      start  = tp.tv_sec;
+      startu = tp.tv_usec;
+      return( HPL_rzero );
+   }
+   (void) gettimeofday( &tp, NULL );
+
+   return( (double)( tp.tv_sec - start ) +
+           ( (double)( tp.tv_usec-startu ) / 1000000.0 ) );
+}                                                                               
+/*
+ * End of HPL_timer_walltime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/intel64/Make.inc
new file mode 120000
index 000000000..3ee301793
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/intel64/Makefile
new file mode 100644
index 000000000..b8009e88a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/intel64/Makefile
@@ -0,0 +1,84 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_timer.h
+#
+## Object files ########################################################
+#
+HPL_timobj       = \
+   HPL_timer.o            HPL_timer_cputime.o    HPL_timer_walltime.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_timobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_timobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_timer.o            : ../HPL_timer.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer.c
+HPL_timer_cputime.o    : ../HPL_timer_cputime.c    $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer_cputime.c
+HPL_timer_walltime.o   : ../HPL_timer_walltime.c   $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer_walltime.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/1rinM.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/1rinM.jpg
new file mode 100755
index 000000000..9af78f844
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/1rinM.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/1ring.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/1ring.jpg
new file mode 100755
index 000000000..73e4391cf
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/1ring.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2-273x48.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2-273x48.jpg
new file mode 100755
index 000000000..23795f8b9
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2-273x48.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2rinM.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2rinM.jpg
new file mode 100755
index 000000000..c294e0d07
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2rinM.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2ring.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2ring.jpg
new file mode 100755
index 000000000..f37187f13
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2ring.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_abort.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_abort.html
new file mode 100755
index 000000000..49a4bd318
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_abort.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_abort HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_abort</B> halts execution.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_abort(</CODE>
+<CODE>int</CODE>
+<CODE>LINE</CODE>,
+<CODE>const char *</CODE>
+<CODE>SRNAME</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_abort</B>
+displays an error message on stderr and halts execution.
+
+<H1>Arguments</H1>
+<PRE>
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+</PRE>
+<PRE>
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   HPL_abort( __LINE__, __FILE__, "Halt.\n" );
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>,
+<A HREF="HPL_warn.html">HPL_warn</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_all_reduce.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_all_reduce.html
new file mode 100755
index 000000000..591cdd596
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_all_reduce.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_all_reduce HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_all_reduce</B> All reduce operation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_all_reduce(</CODE>
+<CODE>void *</CODE>
+<CODE>BUFFER</CODE>,
+<CODE>const int</CODE>
+<CODE>COUNT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>,
+<CODE>const HPL_T_OP </CODE>
+<CODE>OP</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_all_reduce</B>
+performs   a   global   reduce  operation  across  all
+processes of a group leaving the results on all processes.
+
+<H1>Arguments</H1>
+<PRE>
+BUFFER  (local input/global output)   void *
+        On entry,  BUFFER  points to  the  buffer to be combined.  On
+        exit, this array contains the combined data and  is identical
+        on all processes in the group.
+</PRE>
+<PRE>
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+</PRE>
+<PRE>
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+<PRE>
+OP      (global input)                const HPL_T_OP 
+        On entry, OP is a pointer to the local combine function.
+</PRE>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_barrier.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_barrier.html
new file mode 100755
index 000000000..86ae426ad
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_barrier.html
@@ -0,0 +1,41 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_barrier HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_barrier</B> Barrier operation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_barrier(</CODE>
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_barrier</B>
+blocks the caller until all process members have call it.
+The  call  returns  at any process  only after all group members have
+entered the call.
+
+<H1>Arguments</H1>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_bcast.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_bcast.html
new file mode 100755
index 000000000..079325ed7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_bcast.html
@@ -0,0 +1,46 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_bcast HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_bcast</B> Perform the row broadcast.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_bcast(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_bcast</B>
+broadcasts  the  current  panel.  Successful  completion is
+indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to
+HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was
+not completed, in which case this function should be called again.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+<PRE>
+IFLAG   (output)                      int *
+        On exit,  IFLAG  indicates  whether  or not the broadcast has
+        occured.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_binit.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_binit.html
new file mode 100755
index 000000000..0f9a9e1ae
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_binit.html
@@ -0,0 +1,37 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_binit HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_binit</B> Initialize the row broadcast.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_binit(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_binit</B>
+initializes  a  row  broadcast.  Successful  completion  is
+indicated by the returned error code HPL_SUCCESS.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_broadcast.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_broadcast.html
new file mode 100755
index 000000000..6e24b2c2b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_broadcast.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_broadcast HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_broadcast</B> Broadcast operation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_broadcast(</CODE>
+<CODE>void *</CODE>
+<CODE>BUFFER</CODE>,
+<CODE>const int</CODE>
+<CODE>COUNT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>,
+<CODE>const int</CODE>
+<CODE>ROOT</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_broadcast</B>
+broadcasts  a message from the process with rank ROOT to
+all processes in the group.
+
+<H1>Arguments</H1>
+<PRE>
+BUFFER  (local input/output)          void *
+        On entry,  BUFFER  points to  the  buffer to be broadcast. On
+        exit, this array contains the broadcast data and is identical
+        on all processes in the group.
+</PRE>
+<PRE>
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+</PRE>
+<PRE>
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+<PRE>
+ROOT    (global input)                const int
+        On entry, ROOT is the coordinate of the source process.
+</PRE>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_bwait.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_bwait.html
new file mode 100755
index 000000000..f1dd51e7b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_bwait.html
@@ -0,0 +1,38 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_bwait HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_bwait</B> Finalize the row broadcast.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_bwait(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_bwait</B>
+HPL_bwait waits  for  the  row  broadcast  of  the current  panel  to
+terminate.  Successful completion is indicated by the returned  error
+code HPL_SUCCESS.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_copyL.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_copyL.html
new file mode 100755
index 000000000..4b98963ac
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_copyL.html
@@ -0,0 +1,42 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_copyL HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_copyL</B> Copy the current panel into a contiguous workspace.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_copyL(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_copyL</B>
+copies  the  panel of columns, the L1 replicated submatrix,
+the pivot array  and  the info scalar into a contiguous workspace for
+later broadcast.
+ 
+The copy of this panel  into  a contiguous buffer  can be enforced by
+specifying -DHPL_COPY_L in the architecture specific Makefile.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_daxpy.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_daxpy.html
new file mode 100755
index 000000000..c34d0b2e8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_daxpy.html
@@ -0,0 +1,89 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_daxpy HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_daxpy</B> y := y + alpha * x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_daxpy(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_daxpy</B>
+scales the vector x by alpha and adds it to y.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero, then the entries of the incremented array X
+        need not be set on input.
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the scaled entries of the incremented array X.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_daxpy( 3, 2.0, x, 1, y, 1 );
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dcopy.html">HPL_dcopy</A>,
+<A HREF="HPL_dscal.html">HPL_dscal</A>,
+<A HREF="HPL_dswap.html">HPL_dswap</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dcopy.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dcopy.html
new file mode 100755
index 000000000..2a4a485b5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dcopy.html
@@ -0,0 +1,81 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dcopy HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dcopy</B> y := x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dcopy(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dcopy</B>
+copies the vector x into the vector y.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the entries of the incremented array X.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_dcopy( 3, x, 1, y, 1 );
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_daxpy.html">HPL_daxpy</A>,
+<A HREF="HPL_dscal.html">HPL_dscal</A>,
+<A HREF="HPL_dswap.html">HPL_dswap</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dgemm.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dgemm.html
new file mode 100755
index 000000000..667c0ff01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dgemm.html
@@ -0,0 +1,178 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dgemm HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dgemm</B> C := alpha * op(A) * op(B) + beta * C.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dgemm(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANSA</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANSB</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>K</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>B</CODE>,
+<CODE>const int</CODE>
+<CODE>LDB</CODE>,
+<CODE>const double</CODE>
+<CODE>BETA</CODE>,
+<CODE>double *</CODE>
+<CODE>C</CODE>,
+<CODE>const int</CODE>
+<CODE>LDC</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dgemm</B>
+performs one of the matrix-matrix operations
+ 
+    C := alpha * op( A ) * op( B ) + beta * C
+ 
+ where op( X ) is one of
+ 
+    op( X ) = X   or   op( X ) = X^T.
+ 
+Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
+an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+TRANSA  (local input)                 const enum HPL_TRANS
+        On entry, TRANSA  specifies the form of  op(A)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSA==HplNoTrans    : op( A ) = A,                     
+           TRANSA==HplTrans      : op( A ) = A^T,                   
+           TRANSA==HplConjTrans  : op( A ) = A^T.                   
+</PRE>
+<PRE>
+TRANSB  (local input)                 const enum HPL_TRANS
+        On entry, TRANSB  specifies the form of  op(B)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSB==HplNoTrans    : op( B ) = B,                     
+           TRANSB==HplTrans      : op( B ) = B^T,                   
+           TRANSB==HplConjTrans  : op( B ) = B^T.                   
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the  number  of rows  of the  matrix
+        op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the number  of columns of the matrix
+        op(B)  and  the number of columns of the matrix  C. N must be
+        at least zero.
+</PRE>
+<PRE>
+K       (local input)                 const int
+        On entry,  K  specifies  the  number of columns of the matrix
+        op(A) and the number of rows of the matrix op(B).  K  must be
+        be at least  zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied  as  zero  then the elements of the matrices A and B
+        need not be set on input.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  is an array of dimension (LDA,ka),  where ka is
+        k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+        entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+        the array  A must contain the matrix A, otherwise the leading
+        k  by  m  part of the array  A  must  contain the  matrix  A.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA  specifies the first dimension of A as declared
+        in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+        LDA must be at least max(1,m), otherwise LDA must be at least
+        max(1,k).
+</PRE>
+<PRE>
+B       (local input)                 const double *
+        On entry, B is an array of dimension (LDB,kb),  where  kb  is
+        n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+        entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+        the array  B must contain the matrix B, otherwise the leading
+        n  by  k  part of the array  B  must  contain  the matrix  B.
+</PRE>
+<PRE>
+LDB     (local input)                 const int
+        On entry, LDB  specifies the first dimension of B as declared
+        in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+        LDB must be at least max(1,k), otherwise LDB must be at least
+        max(1,n).
+</PRE>
+<PRE>
+BETA    (local input)                 const double
+        On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+        supplied  as  zero  then  the  elements of the matrix C  need
+        not be set on input.
+</PRE>
+<PRE>
+C       (local input/output)          double *
+        On entry,  C  is an array of dimension (LDC,n). Before entry,
+        the  leading m by n part  of  the  array  C  must contain the
+        matrix C,  except when beta is zero, in which case C need not
+        be set on entry. On exit, the array  C  is overwritten by the
+        m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+</PRE>
+<PRE>
+LDC     (local input)                 const int
+        On entry, LDC  specifies the first dimension of C as declared
+        in  the   calling  (sub)  program.   LDC  must  be  at  least
+        max(1,m).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2], c[2*2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+   c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0;
+   HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans,
+              2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 );
+   printf("  [%f,%f]\n", c[0], c[2]);
+   printf("c=[%f,%f]\n", c[1], c[3]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dtrsm.html">HPL_dtrsm</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dgemv.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dgemv.html
new file mode 100755
index 000000000..d5921a9b2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dgemv.html
@@ -0,0 +1,146 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dgemv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dgemv</B> y := beta * y + alpha * op(A) * x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dgemv(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANS</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>const double</CODE>
+<CODE>BETA</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dgemv</B>
+performs one of the matrix-vector operations
+ 
+    y := alpha * op( A ) * x + beta * y,
+ 
+ where op( X ) is one of
+ 
+    op( X ) = X   or   op( X ) = X^T.
+ 
+where alpha and beta are scalars, x and y are vectors and  A  is an m
+by n matrix.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry,  TRANS  specifies the  operation to be performed as
+        follows:   
+           TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+           TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of  the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero then  A and X  need not be set on input.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n.  Before  entry, the leading m by n part  of the
+        array  A  must contain the matrix coefficients.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m).
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+BETA    (local input)                 const double
+        On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+        supplied as zero then  Y  need not be set on input.
+</PRE>
+<PRE>
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        Before entry with BETA non-zero, the incremented array Y must
+        contain the vector  y.  On exit,  Y  is  overwritten  by  the
+        updated vector y.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2], y[2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+   HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0,
+              a, 2, x, 1, -1.0, y, 1 );
+   printf("y=[%f,%f]\n", y[0], y[1]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dger.html">HPL_dger</A>,
+<A HREF="HPL_dtrsv.html">HPL_dtrsv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dger.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dger.html
new file mode 100755
index 000000000..e4ea948ed
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dger.html
@@ -0,0 +1,124 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dger HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dger</B> A := alpha * x * y^T + A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dger(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dger</B>
+performs the rank 1 operation
+ 
+    A := alpha * x * y^T + A,
+ 
+where alpha is a scalar,  x is an m-element vector, y is an n-element
+vector and A is an m by n matrix.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of  the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero then  X and Y  need not be set on input.
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+Y       (local input)                 double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n.  Before  entry, the leading m by n part  of the
+        array  A  must contain the matrix coefficients. On exit, A is
+        overwritten by the updated matrix.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2], y[2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+   HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1,
+             a, 2 );
+   printf("y=[%f,%f]\n", y[0], y[1]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dgemv.html">HPL_dgemv</A>,
+<A HREF="HPL_dtrsv.html">HPL_dtrsv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlacpy.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlacpy.html
new file mode 100755
index 000000000..b64d34e0c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlacpy.html
@@ -0,0 +1,84 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlacpy HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlacpy</B> B := A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlacpy(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>B</CODE>,
+<CODE>const int</CODE>
+<CODE>LDB</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlacpy</B>
+copies an array A into an array B.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the number of rows of the arrays A and
+        B. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies  the number of columns of the arrays A
+        and B. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,N).
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+B       (local output)                double *
+        On entry, B points to an array of dimension (LDB,N). On exit,
+        B is overwritten with A.
+</PRE>
+<PRE>
+LDB     (local input)                 const int
+        On entry, LDB specifies the leading dimension of the array B.
+        LDB must be at least MAX(1,M).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0);
+   return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlatcpy.html">HPL_dlatcpy</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlamch.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlamch.html
new file mode 100755
index 000000000..cb87a90ba
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlamch.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlamch HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlamch</B> determines machine-specific arithmetic constants.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_dlamch(</CODE>
+<CODE>const HPL_T_MACH</CODE>
+<CODE>CMACH</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlamch</B>
+determines  machine-specific  arithmetic constants such as
+the relative machine precision  (eps),  the safe minimum (sfmin) such
+that 1 / sfmin does not overflow, the base of the machine (base), the
+precision (prec), the  number of (base) digits  in the  mantissa (t),
+whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise),  the
+minimum exponent before  (gradual)  underflow (emin),  the  underflow
+threshold (rmin) base**(emin-1), the largest exponent before overflow
+(emax), the overflow threshold (rmax) (base**emax)*(1-eps).
+
+<H1>Arguments</H1>
+<PRE>
+CMACH   (local input)                 const HPL_T_MACH
+        Specifies the value to be returned by HPL_dlamch             
+           = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+           = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+           = HPL_MACH_BASE,  HPL_dlamch := base                      
+           = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+           = HPL_MACH_MLEN,  HPL_dlamch := t                         
+           = HPL_MACH_RND,   HPL_dlamch := rnd                       
+           = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+           = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+           = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+           = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+         
+        where                                                        
+         
+           eps   = relative machine precision,                       
+           sfmin = safe minimum,                                     
+           base  = base of the machine,                              
+           prec  = eps*base,                                         
+           t     = number of digits in the mantissa,                 
+           rnd   = 1.0 if rounding occurs in addition,               
+           emin  = minimum exponent before underflow,                
+           rmin  = underflow threshold,                              
+           emax  = largest exponent before overflow,                 
+           rmax  = overflow threshold.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double eps;
+   eps = HPL_dlamch( HPL_MACH_EPS );
+   printf("eps=%18.8e\n", eps);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>References</H1>
+This function has been manually translated from the Fortran 77 LAPACK
+auxiliary function dlamch.f  (version 2.0 -- 1992), that  was  itself
+based on the function ENVRON  by Malcolm and incorporated suggestions
+by Gentleman and Marovich. See                                       
+ 
+Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).                 
+ 
+Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+properties of  floating point arithmetic units.,  Comms. of  the ACM,
+17, 276-277 (1974).
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlange.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlange.html
new file mode 100755
index 000000000..ce276e257
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlange.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlange HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlange</B> Compute ||A||.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_dlange(</CODE>
+<CODE>const HPL_T_NORM</CODE>
+<CODE>NORM</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlange</B>
+returns  the value of the one norm,  or the infinity norm,
+or the element of largest absolute value of a matrix A:              
+ 
+   max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+   norm1(A),        when NORM = HPL_NORM_1,                          
+   normI(A),        when NORM = HPL_NORM_I,                          
+ 
+where norm1 denotes the one norm of a matrix (maximum column sum) and
+normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+that max(abs(A(i,j))) is not a matrix norm.
+
+<H1>Arguments</H1>
+<PRE>
+NORM    (local input)                 const HPL_T_NORM
+        On entry,  NORM  specifies  the  value to be returned by this
+        function as described above.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points to an  array of dimension  (LDA,N), that
+        contains the matrix A.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 );
+   printf("norm=%f\n", norm);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaprnt.html">HPL_dlaprnt</A>,
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaprnt.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaprnt.html
new file mode 100755
index 000000000..f589ee2bb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaprnt.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaprnt HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaprnt</B> Print the matrix A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaprnt(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>IA</CODE>,
+<CODE>const int</CODE>
+<CODE>JA</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const char *</CODE>
+<CODE>CMATNM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaprnt</B>
+prints to standard error an M-by-N matrix A.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies the number of rows of A. M must be at
+        least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies the number of columns of A. N must be
+        at least zero.
+</PRE>
+<PRE>
+A       (local input)                 double *
+        On entry, A  points to an array of dimension (LDA,N).
+</PRE>
+<PRE>
+IA      (local input)                 const int
+        On entry, IA specifies the starting row index to be printed.
+</PRE>
+<PRE>
+JA      (local input)                 const int
+        On entry,  JA  specifies  the  starting  column index  to be
+        printed.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+</PRE>
+<PRE>
+CMATNM  (local input)                 const char *
+        On entry, CMATNM is the name of the matrix to be printed.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" );
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp00N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp00N.html
new file mode 100755
index 000000000..8e36cf6c6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp00N.html
@@ -0,0 +1,78 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp00N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp00N</B> performs a series of row interchanges.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp00N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPIV</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp00N</B>
+performs a series of local row interchanges on a matrix
+A. One row interchange is initiated for rows 0 through M-1 of A.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M specifies the number of rows of the array A to be
+        interchanged. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies  the number of columns of the array A.
+        N must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry, A  points to an array of dimension (LDA,N) to which
+        the row interchanges will be  applied.  On exit, the permuted
+        matrix.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+IPIV    (local input)                 const int *
+        On entry,  IPIV  is  an  array of size  M  that  contains the
+        pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+        implies that local rows k and l are to be interchanged.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp01N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp01N.html
new file mode 100755
index 000000000..aa8861d10
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp01N.html
@@ -0,0 +1,109 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp01N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp01N</B> copies rows of A into itself and into U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp01N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp01N</B>
+copies  scattered rows  of  A  into itself  and into an
+array  U.  The row offsets in  A  of the source rows are specified by
+LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+positive value of  LINDXAU indicates that the array destination is U,
+and A otherwise.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        moved within A or copied into U. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        moved within A or copied into U. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be moved within A or
+        copied into U.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,N). The rows
+        of A specified by LINDXA are be copied within this array U at
+        the positions indicated by positive values of LINDXAU.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local  row indexes  of  A  that should be moved within  A  or
+        or copied into U.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local  row indexes of  U  where the rows of  A  should be
+        copied at. This array also contains the  local row offsets in
+        A where some of the rows of A should be moved to.  A positive
+        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+        should be copied into U at the position LINDXAU[i]; otherwise
+        the row  LINDXA[i]  of  A  should be moved  at  the  position
+        -LINDXAU[i] within A.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp01T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp01T.html
new file mode 100755
index 000000000..9697471c5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp01T.html
@@ -0,0 +1,110 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp01T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp01T</B> copies rows of A into itself and into U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp01T(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp01T</B>
+copies  scattered rows  of  A  into itself  and into an
+array U.  The row offsets in  A  of the source rows  are specified by
+LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+positive value of LINDXAU indicates that the array  destination is U,
+and A otherwise. Rows of A are stored as columns in U.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        moved within A or copied into U. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        moved within A or copied into U. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be moved within A or
+        copied into U.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,M). The rows
+        of A specified by  LINDXA  are copied within this array  U at
+        the  positions indicated by positive values of LINDXAU.  The
+        rows of A are stored as columns in U.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local  row indexes  of  A  that should be moved within  A  or
+        or copied into U.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local  row indexes of  U  where the rows of  A  should be
+        copied at. This array also contains the  local row offsets in
+        A where some of the rows of A should be moved to.  A positive
+        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+        should be copied into U at the position LINDXAU[i]; otherwise
+        the row  LINDXA[i]  of  A  should be moved  at  the  position
+        -LINDXAU[i] within A.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp02N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp02N.html
new file mode 100755
index 000000000..d4e1a0cf8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp02N.html
@@ -0,0 +1,107 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp02N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp02N</B> pack rows of A into columns of W.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp02N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>W0</CODE>,
+<CODE>double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp02N</B>
+packs scattered rows of an array  A  into workspace  W.
+The row offsets in A are specified by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        copied into W. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        copied into W. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be copied into W.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+W0      (local input/output)          double *
+        On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local output)                double *
+        On entry, W  is an array of size (LDW,M). On exit, W contains
+        the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+        in W(:,i).
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied into W.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M  that  contains
+        the local  row indexes of  U that should be copied into A and
+        replaced by the rows of W.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp03N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp03N.html
new file mode 100755
index 000000000..f5c4127b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp03N.html
@@ -0,0 +1,95 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp03N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp03N</B> copy rows of W into U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp03N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const double *</CODE>
+<CODE>W0</CODE>,
+<CODE>const double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp03N</B>
+copies columns of  W  into  rows  of an  array  U.  The
+destination in U of these columns contained in W is stored within W0.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies  the  number  of columns of  W  stored
+        contiguously that should be copied into U. M must be at least
+        zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the  length of columns of  W  stored
+        contiguously that should be copied into U. N must be at least
+        zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,N).  Columns
+        of W are copied as rows within this array U at  the positions
+        specified in W0.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+</PRE>
+<PRE>
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M),  that contains data
+        to be copied into U. For i in [0..M),  entries W(:,i)  should
+        be copied into the row or column W0(i*LDW) of U.
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp03T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp03T.html
new file mode 100755
index 000000000..010175313
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp03T.html
@@ -0,0 +1,95 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp03T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp03T</B> copy columns of W into U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp03T(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const double *</CODE>
+<CODE>W0</CODE>,
+<CODE>const double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp03T</B>
+copies  columns of W into an array U.  The  destination
+in U of these columns contained in W is stored within W0.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies  the  number  of columns of  W  stored
+        contiguously that should be copied into U. M must be at least
+        zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the  length of columns of  W  stored
+        contiguously that should be copied into U. N must be at least
+        zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,M).  Columns
+        of W are copied within the array U at the positions specified
+        in W0.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M),  that contains data
+        to be copied into U. For i in [0..M),  entries W(:,i)  should
+        be copied into the row or column W0(i*LDW) of U.
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp04N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp04N.html
new file mode 100755
index 000000000..bb6cab0a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp04N.html
@@ -0,0 +1,131 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp04N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp04N</B> copy rows of U in A and replace them with columns of W.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp04N(</CODE>
+<CODE>const int</CODE>
+<CODE>M0</CODE>,
+<CODE>const int</CODE>
+<CODE>M1</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>W0</CODE>,
+<CODE>const double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp04N</B>
+copies M0 rows of U into A and replaces those rows of U
+with columns of W. In addition M1 - M0 columns of  W  are copied into
+rows of U.
+
+<H1>Arguments</H1>
+<PRE>
+M0      (local input)                 const int
+        On entry, M0 specifies the number of rows of U that should be
+        copied into  A  and replaced by columns of  W.  M0 must be at
+        least zero.
+</PRE>
+<PRE>
+M1      (local input)                 const int
+        On entry, M1 specifies the number of columns of W that should
+        be copied into rows of U. M1 must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of U that should
+        be copied into A. N must be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  points to  an array of dimension (LDU,N).  This
+        array contains the rows that are to be copied into A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M1).
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows of U indicated by LINDXAU.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M0).
+</PRE>
+<PRE>
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M0+M1),  that  contains
+        data to be copied into U.  For i in [M0..M0+M1),  the entries
+        W(:,i) are copied into the row W0(i*LDW) of U.
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA  is an array of dimension  M0 containing the
+        local row indexes A into which rows of U are copied.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M0 that  contains
+        the local  row indexes of  U that should be copied into A and
+        replaced by the columns of W.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp04T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp04T.html
new file mode 100755
index 000000000..0209a3689
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp04T.html
@@ -0,0 +1,132 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp04T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp04T</B> copy columns of U in rows of A and replace them with columns of W.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp04T(</CODE>
+<CODE>const int</CODE>
+<CODE>M0</CODE>,
+<CODE>const int</CODE>
+<CODE>M1</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>W0</CODE>,
+<CODE>const double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp04T</B>
+copies M0 columns of U into rows of A and replaces those
+columns of U with columns of W. In addition M1 - M0 columns of W  are
+copied into U.
+
+<H1>Arguments</H1>
+<PRE>
+M0      (local input)                 const int
+        On entry, M0 specifies the number of columns of U that should
+        be copied into A and replaced by columns of W.  M0 must be at
+        least zero.
+</PRE>
+<PRE>
+M1      (local input)                 const int
+        On entry, M1 specifies  the number of columnns of W that will
+        be copied into U. M1 must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies the length of the columns of  U  that
+        will be copied into rows of A. N must be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns that are to be copied into rows of
+        A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U indicated by LINDXAU.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M0).
+</PRE>
+<PRE>
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M0+M1),  that  contains
+        data to be copied into U.  For i in [M0..M0+M1),  the entries
+        W(:,i) are copied into the column W0(i*LDW) of U.
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA  is an array of dimension  M0 containing the
+        local row indexes A into which columns of U are copied.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M0 that  contains
+        the  local column indexes of  U  that should be copied into A
+        and replaced by the columns of W.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp05N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp05N.html
new file mode 100755
index 000000000..f428b7354
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp05N.html
@@ -0,0 +1,98 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp05N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp05N</B> copy rows of U into A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp05N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp05N</B>
+copies rows of  U of global offset LINDXAU into rows of
+A at positions indicated by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of U that should be
+        copied into A. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of U that should
+        be copied into A. N must be at least zero.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows of U indicated by LINDXAU.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          const double *
+        On entry,  U  points to an array of dimension  (LDU,N).  This
+        array contains the rows that are to be copied into A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied from U.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local row indexes of U that should be copied in A.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp05T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp05T.html
new file mode 100755
index 000000000..fffb9f320
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp05T.html
@@ -0,0 +1,98 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp05T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp05T</B> copy rows of U into A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp05T(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp05T</B>
+copies columns of  U of global offset LINDXAU into rows
+of A at positions indicated by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the columns of U that will
+        be copied into rows of A. N must be at least zero.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U indicated by LINDXAU.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          const double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns that are to be copied into rows of
+        A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied from U.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local column indexes of U that should be copied in A.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp06N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp06N.html
new file mode 100755
index 000000000..f28ab48c6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp06N.html
@@ -0,0 +1,92 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp06N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp06N</B> swap rows of U with rows of A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp06N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp06N</B>
+swaps rows of  U  with rows of A at positions
+indicated by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        swapped with rows of U. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of A that should
+        be swapped with rows of U. N must be at least zero.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows or columns of U.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,N).  This
+        array contains the rows of U that are to be swapped with rows
+        of A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be swapped with U.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp06T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp06T.html
new file mode 100755
index 000000000..86032a9f4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp06T.html
@@ -0,0 +1,92 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp06T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp06T</B> swap rows or columns of U with rows of A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp06T(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp06T</B>
+swaps  columns  of  U  with  rows  of  A  at  positions
+indicated by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        swapped with columns of U. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of A that should
+        be swapped with columns of U. N must be at least zero.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns of  U  that are to be swapped with
+        rows of A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be swapped with U.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp10N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp10N.html
new file mode 100755
index 000000000..84403ca79
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp10N.html
@@ -0,0 +1,77 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp10N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp10N</B> performs a series column interchanges.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp10N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPIV</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp10N</B>
+performs a sequence  of  local column interchanges on a
+matrix A.  One column interchange is initiated  for columns 0 through
+N-1 of A.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        __arg0__
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  M  specifies  the number of rows of the array A. M
+        must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry, N specifies the number of columns of the array A. N
+        must be at least zero.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, A  points to an  array of  dimension (LDA,N).  This
+        array contains the columns onto which the interchanges should
+        be applied. On exit, A contains the permuted matrix.
+</PRE>
+<PRE>
+IPIV    (local input)                 const int *
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlatcpy.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlatcpy.html
new file mode 100755
index 000000000..fa1cca5d9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlatcpy.html
@@ -0,0 +1,83 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlatcpy HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlatcpy</B> B := A^T
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlatcpy(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>B</CODE>,
+<CODE>const int</CODE>
+<CODE>LDB</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlatcpy</B>
+copies the transpose of an array A into an array B.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the number of  rows of the array B and
+        the number of columns of A. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the number of  rows of the array A and
+        the number of columns of B. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,M).
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,N).
+</PRE>
+<PRE>
+B       (local output)                double *
+        On entry, B points to an array of dimension (LDB,N). On exit,
+        B is overwritten with the transpose of A.
+</PRE>
+<PRE>
+LDB     (local input)                 const int
+        On entry, LDB specifies the leading dimension of the array B.
+        LDB must be at least MAX(1,M).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlacpy.html">HPL_dlacpy</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocmax.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocmax.html
new file mode 100755
index 000000000..c3361f32d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocmax.html
@@ -0,0 +1,87 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlocmax HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlocmax</B> finds the maximum entry in matrix column.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlocmax(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>II</CODE>,
+<CODE>const int</CODE>
+<CODE>JJ</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlocmax</B>
+finds  the maximum entry in the current column  and packs
+the useful information in  WORK[0:3].  On exit,  WORK[0] contains the
+local maximum  absolute value  scalar,  WORK[1] is the  corresponding
+local row index,  WORK[2]  is the corresponding global row index, and
+WORK[3] is the coordinate of the process owning this max.  When N  is
+less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set
+to the total number of process rows.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of rows of the column
+        of A on which we operate.
+</PRE>
+<PRE>
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+</PRE>
+<PRE>
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is  a workarray of size at least 4.  On exit,
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocswpN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocswpN.html
new file mode 100755
index 000000000..b5c4b74a9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocswpN.html
@@ -0,0 +1,79 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlocswpN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlocswpN</B> locally swaps rows within panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlocswpN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>II</CODE>,
+<CODE>const int</CODE>
+<CODE>JJ</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlocswpN</B>
+performs  the local swapping operations  within a panel.
+The lower triangular  N0-by-N0  upper block of the panel is stored in
+no-transpose form (i.e. just like the input matrix itself).
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+</PRE>
+<PRE>
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.  The N0 length max
+        row is stored in WORK[4:4+N0-1];  Note  that this is also the
+        JJth row  (or column) of L1. The remaining part of this array
+        is used as workspace.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocswpT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocswpT.html
new file mode 100755
index 000000000..d31361543
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocswpT.html
@@ -0,0 +1,79 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlocswpT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlocswpT</B> locally swaps rows within panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlocswpT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>II</CODE>,
+<CODE>const int</CODE>
+<CODE>JJ</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlocswpT</B>
+performs  the local swapping operations  within a panel.
+The lower triangular  N0-by-N0  upper block of the panel is stored in
+transpose form.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+</PRE>
+<PRE>
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.  The N0 length max
+        row is stored in WORK[4:4+N0-1];  Note  that this is also the
+        JJth row  (or column) of L1. The remaining part of this array
+        is used as workspace.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dmatgen.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dmatgen.html
new file mode 100755
index 000000000..7886da146
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dmatgen.html
@@ -0,0 +1,73 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dmatgen HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dmatgen</B> random matrix generator.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dmatgen(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int</CODE>
+<CODE>ISEED</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dmatgen</B>
+generates (or regenerates) a random matrix A.
+ 
+The  pseudo-random  generator uses the linear congruential algorithm:
+X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+Programming, Knuth 1973, Vol. 2.
+
+<H1>Arguments</H1>
+<PRE>
+M       (input)                       const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (input)                       const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+A       (output)                      double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        this  array  contains   the   coefficients  of  the  randomly
+        generated matrix.
+</PRE>
+<PRE>
+LDA     (input)                       const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+</PRE>
+<PRE>
+ISEED   (input)                       const int
+        On entry, ISEED  specifies  the  seed  number to generate the
+        matrix A. ISEED must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dscal.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dscal.html
new file mode 100755
index 000000000..c13427f44
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dscal.html
@@ -0,0 +1,74 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dscal HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dscal</B> x = alpha * x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dscal(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dscal</B>
+scales the vector x by alpha.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vector x. N  must  be
+        at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero, then the entries of the incremented array X
+        need not be set on input.
+</PRE>
+<PRE>
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        On exit, the entries of the incremented array  X  are  scaled
+        by the scalar alpha.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   HPL_dscal( 3, 2.0, x, 1 );
+   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_daxpy.html">HPL_daxpy</A>,
+<A HREF="HPL_dcopy.html">HPL_dcopy</A>,
+<A HREF="HPL_dswap.html">HPL_dswap</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dswap.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dswap.html
new file mode 100755
index 000000000..cae6980a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dswap.html
@@ -0,0 +1,84 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dswap HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dswap</B> y <-> x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dswap(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dswap</B>
+swaps the vectors x and y.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+</PRE>
+<PRE>
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        On exit, the entries of the incremented array  X  are updated
+        with the entries of the incremented array Y.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the entries of the incremented array X.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_dswap( 3, x, 1, y, 1 );
+   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_daxpy.html">HPL_daxpy</A>,
+<A HREF="HPL_dcopy.html">HPL_dcopy</A>,
+<A HREF="HPL_dscal.html">HPL_dscal</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dtrsm.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dtrsm.html
new file mode 100755
index 000000000..3d60e597f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dtrsm.html
@@ -0,0 +1,168 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dtrsm HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dtrsm</B> B := A^{-1} * B  or  B := B * A^{-1}.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dtrsm(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const enum HPL_SIDE</CODE>
+<CODE>SIDE</CODE>,
+<CODE>const enum HPL_UPLO</CODE>
+<CODE>UPLO</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANS</CODE>,
+<CODE>const enum HPL_DIAG</CODE>
+<CODE>DIAG</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>B</CODE>,
+<CODE>const int</CODE>
+<CODE>LDB</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dtrsm</B>
+solves one of the matrix equations
+ 
+   op( A ) * X = alpha * B,   or  X * op( A ) = alpha * B,
+ 
+where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+non-unit, upper or lower triangular matrix and op(A) is one of
+ 
+   op( A ) = A   or   op( A ) = A^T.
+ 
+The matrix X is overwritten on B.
+ 
+No test for  singularity  or  near-singularity  is included  in  this
+routine. Such tests must be performed before calling this routine.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+SIDE    (local input)                 const enum HPL_SIDE
+        On entry, SIDE  specifies  whether  op(A) appears on the left
+        or right of X as follows:
+           SIDE==HplLeft    op( A ) * X = alpha * B,
+           SIDE==HplRight   X * op( A ) = alpha * B.
+</PRE>
+<PRE>
+UPLO    (local input)                 const enum HPL_UPLO
+        On  entry,   UPLO   specifies  whether  the  upper  or  lower
+        triangular  part  of the array  A  is to be referenced.  When
+        UPLO==HplUpper, only  the upper triangular part of A is to be
+        referenced, otherwise only the lower triangular part of A is 
+        to be referenced. 
+</PRE>
+<PRE>
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry, TRANSA  specifies the form of  op(A)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSA==HplNoTrans    : op( A ) = A,                     
+           TRANSA==HplTrans      : op( A ) = A^T,                   
+           TRANSA==HplConjTrans  : op( A ) = A^T.                   
+</PRE>
+<PRE>
+DIAG    (local input)                 const enum HPL_DIAG
+        On entry,  DIAG  specifies  whether  A  is unit triangular or
+        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+        and otherwise, A is not assumed to be unit triangular.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of the  matrix B.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix B.
+        N must be at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied  as  zero then the elements of the matrix B need not
+        be set on input.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+        otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+        k by k upper triangular  part of the array A must contain the
+        upper triangular  matrix and the  strictly  lower  triangular
+        part of A is not referenced.  When  UPLO==HplLower on  entry,
+        the  leading k by k lower triangular part of the array A must
+        contain the lower triangular matrix  and  the  strictly upper
+        triangular part of A is not referenced.
+         
+        Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+        not referenced  either,  but are assumed to be unity.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+</PRE>
+<PRE>
+B       (local input/output)          double *
+        On entry,  B  points  to an array of size equal to or greater
+        than LDB * n.  Before entry, the leading  m by n  part of the
+        array B must contain the matrix  B, except when beta is zero,
+        in which case B need not be set on entry.  On exit, the array
+        B is overwritten by the m by n solution matrix.
+</PRE>
+<PRE>
+LDB     (local input)                 const int
+        On entry,  LDB  specifies  the  leading  dimension  of  B  as
+        declared  in  the  calling  (sub) program.  LDB  must  be  at
+        least MAX(1,m).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+   HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper,
+              HplNoTrans, HplNonUnit, 2, 2, 2.0,
+              a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dgemm.html">HPL_dgemm</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dtrsv.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dtrsv.html
new file mode 100755
index 000000000..3e4703529
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dtrsv.html
@@ -0,0 +1,136 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dtrsv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dtrsv</B> x := A^{-1} x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dtrsv(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const enum HPL_UPLO</CODE>
+<CODE>UPLO</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANS</CODE>,
+<CODE>const enum HPL_DIAG</CODE>
+<CODE>DIAG</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dtrsv</B>
+solves one of the systems of equations
+ 
+    A * x = b,   or   A^T * x = b,
+ 
+where b and x are n-element vectors and  A  is an n by n non-unit, or
+unit, upper or lower triangular matrix.
+ 
+No test for  singularity  or  near-singularity  is included  in  this
+routine. Such tests must be performed before calling this routine.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+UPLO    (local input)                 const enum HPL_UPLO
+        On  entry,   UPLO   specifies  whether  the  upper  or  lower
+        triangular  part  of the array  A  is to be referenced.  When
+        UPLO==HplUpper, only  the upper triangular part of A is to be
+        referenced, otherwise only the lower triangular part of A is 
+        to be referenced. 
+</PRE>
+<PRE>
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry,  TRANS  specifies  the equations  to  be  solved as
+        follows:
+           TRANS==HplNoTrans     A   * x = b,
+           TRANS==HplTrans       A^T * x = b.
+</PRE>
+<PRE>
+DIAG    (local input)                 const enum HPL_DIAG
+        On entry,  DIAG  specifies  whether  A  is unit triangular or
+        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+        and otherwise, A is not assumed to be unit triangular.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the order of the matrix A. N must be at
+        least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+        n by n upper triangular  part of the array A must contain the
+        upper triangular  matrix and the  strictly  lower  triangular
+        part of A is not referenced.  When  UPLO==HplLower  on entry,
+        the  leading n by n lower triangular part of the array A must
+        contain the lower triangular matrix  and  the  strictly upper
+        triangular part of A is not referenced.
+         
+        Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+        not referenced  either,  but are assumed to be unity.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,n).
+</PRE>
+<PRE>
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        Before entry,  the  incremented array  X  must contain  the n
+        element right-hand side vector b. On exit,  X  is overwritten
+        with the solution vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2];
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+   x[0] = 2.0; x[1] = 1.0;
+   HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans,
+              HplNoUnit, a, 2, x, 1 );
+   printf("x=[%f,%f]\n", x[0], x[1]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dger.html">HPL_dger</A>,
+<A HREF="HPL_dgemv.html">HPL_dgemv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_equil.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_equil.html
new file mode 100755
index 000000000..d64ecab99
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_equil.html
@@ -0,0 +1,115 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_equil HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_equil</B> Equilibrate U and forward the column panel L.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_equil(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANS</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>,
+<CODE>int *</CODE>
+<CODE>IWORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_equil</B>
+equilibrates  the  local  pieces  of U, so that on exit to
+this function, pieces of U contained in every process row are of the
+same size. This phase makes the rolling phase optimal.  In addition,
+this  function probes  for  the  column panel L and forwards it when
+possible.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be equilibrated) information.
+</PRE>
+<PRE>
+TRANS   (global input)                const enum HPL_TRANS
+        On entry, TRANS specifies whether  U  is stored in transposed
+        or non-transposed form.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the number of rows or columns of  U. N
+        must be at least 0.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+        non-transposed form, and MAX(1,N) otherwise.
+</PRE>
+<PRE>
+IPLEN   (global input)                int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in process IPMAP[i].
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry, IPMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+</PRE>
+<PRE>
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension NPROW+1.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_fprintf.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_fprintf.html
new file mode 100755
index 000000000..d62b2c871
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_fprintf.html
@@ -0,0 +1,58 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_fprintf HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_fprintf</B> fprintf + fflush wrapper.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_fprintf(</CODE>
+<CODE>FILE *</CODE>
+<CODE>STREAM</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_fprintf</B>
+is a wrapper around fprintf flushing the output stream.
+
+<H1>Arguments</H1>
+<PRE>
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   HPL_fprintf( stdout, "Hello World.\n" );
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_abort.html">HPL_abort</A>,
+<A HREF="HPL_warn.html">HPL_warn</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_exit.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_exit.html
new file mode 100755
index 000000000..b42f315c9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_exit.html
@@ -0,0 +1,39 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_grid_exit HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_grid_exit</B> Exit process grid.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_grid_exit(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_grid_exit</B>
+marks  the process  grid object for  deallocation.  The
+returned  error  code  MPI_SUCCESS  indicates  successful completion.
+Other error codes are (MPI) implementation dependent.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input/output)          HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid to be released.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pnum.html">HPL_pnum</A>,
+<A HREF="HPL_grid_init.html">HPL_grid_init</A>,
+<A HREF="HPL_grid_info.html">HPL_grid_info</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_info.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_info.html
new file mode 100755
index 000000000..47f63672d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_info.html
@@ -0,0 +1,70 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_grid_info HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_grid_info</B> Retrieve grid information.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_grid_info(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>int *</CODE>
+<CODE>NPROW</CODE>,
+<CODE>int *</CODE>
+<CODE>NPCOL</CODE>,
+<CODE>int *</CODE>
+<CODE>MYROW</CODE>,
+<CODE>int *</CODE>
+<CODE>MYCOL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_grid_info</B>
+returns  the grid shape and the coordinates in the grid
+of the calling process.  Successful  completion  is  indicated by the
+returned error code  MPI_SUCCESS. Other error codes depend on the MPI
+implementation.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+NPROW   (global output)               int *
+        On exit,   NPROW  specifies the number of process rows in the
+        grid. NPROW is at least one.
+</PRE>
+<PRE>
+NPCOL   (global output)               int *
+        On exit,   NPCOL  specifies  the number of process columns in
+        the grid. NPCOL is at least one.
+</PRE>
+<PRE>
+MYROW   (global output)               int *
+        On exit,  MYROW  specifies my  row process  coordinate in the
+        grid. MYROW is greater than or equal  to zero  and  less than
+        NPROW.
+</PRE>
+<PRE>
+MYCOL   (global output)               int *
+        On exit,  MYCOL specifies my column process coordinate in the
+        grid. MYCOL is greater than or equal  to zero  and  less than
+        NPCOL.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pnum.html">HPL_pnum</A>,
+<A HREF="HPL_grid_init.html">HPL_grid_init</A>,
+<A HREF="HPL_grid_exit.html">HPL_grid_exit</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_init.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_init.html
new file mode 100755
index 000000000..0bec56e6e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_init.html
@@ -0,0 +1,73 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_grid_init HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_grid_init</B> Create a process grid.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_grid_init(</CODE>
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>,
+<CODE>const HPL_T_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROW</CODE>,
+<CODE>const int</CODE>
+<CODE>NPCOL</CODE>,
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_grid_init</B>
+creates a NPROW x NPCOL  process  grid using column- or
+row-major ordering from an initial collection of processes identified
+by an  MPI  communicator.  Successful  completion is indicated by the
+returned error code MPI_SUCCESS.  Other error codes depend on the MPI
+implementation. The coordinates of processes that are not part of the
+grid are set to values outside of [0..NPROW) x [0..NPCOL).
+
+<H1>Arguments</H1>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        On entry,  COMM  is  the  MPI  communicator  identifying  the
+        initial  collection  of  processes out of which  the  grid is
+        formed.
+</PRE>
+<PRE>
+ORDER   (global input)                const HPL_T_ORDER
+        On entry, ORDER specifies how the processes should be ordered
+        in the grid as follows:
+           ORDER = HPL_ROW_MAJOR    row-major    ordering;
+           ORDER = HPL_COLUMN_MAJOR column-major ordering;
+</PRE>
+<PRE>
+NPROW   (global input)                const int
+        On entry,  NPROW  specifies the number of process rows in the
+        grid to be created. NPROW must be at least one.
+</PRE>
+<PRE>
+NPCOL   (global input)                const int
+        On entry,  NPCOL  specifies  the number of process columns in
+        the grid to be created. NPCOL must be at least one.
+</PRE>
+<PRE>
+GRID    (local input/output)          HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information to be initialized.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pnum.html">HPL_pnum</A>,
+<A HREF="HPL_grid_info.html">HPL_grid_info</A>,
+<A HREF="HPL_grid_exit.html">HPL_grid_exit</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_idamax.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_idamax.html
new file mode 100755
index 000000000..f16b296f6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_idamax.html
@@ -0,0 +1,68 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_idamax HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_idamax</B> 1st k s.t. |x_k| = max_i(|x_i|).
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_idamax(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_idamax</B>
+returns  the index in an n-vector  x  of the first element
+having maximum absolute value.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vector x. N  must  be
+        at least zero.
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3];
+   int    imax;
+   x[0] = 1.0; x[1] = 3.0; x[2] = 2.0;
+   imax = HPL_idamax( 3, x, 1 );
+   printf("imax=%d\n", imax);
+   exit(0);
+   return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_daxpy.html">HPL_daxpy</A>,
+<A HREF="HPL_dcopy.html">HPL_dcopy</A>,
+<A HREF="HPL_dscal.html">HPL_dscal</A>,
+<A HREF="HPL_dswap.html">HPL_dswap</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2l.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2l.html
new file mode 100755
index 000000000..a3eb758da
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2l.html
@@ -0,0 +1,71 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_indxg2l HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_indxg2l</B> Map a global index into a local one.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_indxg2l(</CODE>
+<CODE>const int</CODE>
+<CODE>IG</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_indxg2l</B>
+computes  the local index of a matrix entry pointed to by
+the  global index IG.  This  local  returned index is the same in all
+processes.
+
+<H1>Arguments</H1>
+<PRE>
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry, if SRCPROC = -1, the data  is not  distributed  but
+        replicated,  in  which  case  this  routine returns IG in all
+        processes. Otherwise, the value of SRCPROC is ignored.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2lp.html">HPL_indxg2lp</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2lp.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2lp.html
new file mode 100755
index 000000000..d9fa00436
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2lp.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_indxg2lp HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_indxg2lp</B> Map a local index into a global one.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_indxg2lp(</CODE>
+<CODE>int *</CODE>
+<CODE>IL</CODE>,
+<CODE>int *</CODE>
+<CODE>PROC</CODE>,
+<CODE>const int</CODE>
+<CODE>IG</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_indxg2lp</B>
+computes the local index of a matrix entry pointed to by
+the global  index IG as well as the process coordinate which posseses
+this entry. The local returned index is the same in all processes.
+
+<H1>Arguments</H1>
+<PRE>
+IL      (output)                      int *
+        On exit, IL specifies the local index corresponding to IG. IL
+        is at least zero.
+</PRE>
+<PRE>
+PROC    (output)                      int *
+        On exit,  PROC  is the  coordinate of the process  owning the
+        entry specified by the global index IG. PROC is at least zero
+        and less than NPROCS.
+</PRE>
+<PRE>
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry, if SRCPROC = -1, the data  is not  distributed  but
+        replicated,  in  which  case  this  routine returns IG in all
+        processes. Otherwise, the value of SRCPROC is ignored.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2p.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2p.html
new file mode 100755
index 000000000..0068dede3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2p.html
@@ -0,0 +1,70 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_indxg2p HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_indxg2p</B> Map a global index into a process coordinate.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_indxg2p(</CODE>
+<CODE>const int</CODE>
+<CODE>IG</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_indxg2p</B>
+computes the process coordinate  which posseses the entry
+of a matrix specified by a global index IG.
+
+<H1>Arguments</H1>
+<PRE>
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxl2g.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxl2g.html
new file mode 100755
index 000000000..216e98057
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxl2g.html
@@ -0,0 +1,78 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_indxl2g HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_indxl2g</B> Map a index-process pair into a global index.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_indxl2g(</CODE>
+<CODE>const int</CODE>
+<CODE>IL</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>PROC</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_indxl2g</B>
+computes the global index of a matrix  entry  pointed to
+by the local index IL of the process indicated by PROC.
+
+<H1>Arguments</H1>
+<PRE>
+IL      (input)                       const int
+        On entry, IL specifies the local  index of the matrix  entry.
+        IL must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+PROC    (input)                       const int
+        On entry, PROC  specifies the coordinate of the process whose
+        local array row or column is to be determined. PROC  must  be
+        at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2lp.html">HPL_indxg2lp</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_infog2l.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_infog2l.html
new file mode 100755
index 000000000..34feff72c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_infog2l.html
@@ -0,0 +1,155 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_infog2l HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_infog2l</B> global to local index translation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_infog2l(</CODE>
+<CODE>int</CODE>
+<CODE>I</CODE>,
+<CODE>int</CODE>
+<CODE>J</CODE>,
+<CODE>const int</CODE>
+<CODE>IMB</CODE>,
+<CODE>const int</CODE>
+<CODE>MB</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>RSRC</CODE>,
+<CODE>const int</CODE>
+<CODE>CSRC</CODE>,
+<CODE>const int</CODE>
+<CODE>MYROW</CODE>,
+<CODE>const int</CODE>
+<CODE>MYCOL</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROW</CODE>,
+<CODE>const int</CODE>
+<CODE>NPCOL</CODE>,
+<CODE>int *</CODE>
+<CODE>II</CODE>,
+<CODE>int *</CODE>
+<CODE>JJ</CODE>,
+<CODE>int *</CODE>
+<CODE>PROW</CODE>,
+<CODE>int *</CODE>
+<CODE>PCOL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_infog2l</B>
+computes the starting local index II, JJ corresponding to
+the submatrix starting globally at the entry pointed by  I,  J.  This
+routine returns the coordinates in the grid of the process owning the
+matrix entry of global indexes I, J, namely PROW and PCOL.
+
+<H1>Arguments</H1>
+<PRE>
+I       (global input)                int
+        On entry,  I  specifies  the  global  row index of the matrix
+        entry. I must be at least zero.
+</PRE>
+<PRE>
+J       (global input)                int
+        On entry,  J  specifies the global column index of the matrix
+        entry. J must be at least zero.
+</PRE>
+<PRE>
+IMB     (global input)                const int
+        On entry,  IMB  specifies  the size of the first row block of
+        the global matrix. IMB must be at least one.
+</PRE>
+<PRE>
+MB      (global input)                const int
+        On entry,  MB specifies the blocking factor used to partition
+        and  distribute the rows of the matrix A.  MB  must be larger
+        than one.
+</PRE>
+<PRE>
+INB     (global input)                const int
+        On entry, INB specifies the size of the first column block of
+        the global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the columns of the matrix A. NB must be larger
+        than one.
+</PRE>
+<PRE>
+RSRC    (global input)                const int
+        On entry,  RSRC  specifies  the row coordinate of the process
+        that possesses the row  I.  RSRC  must  be at least zero  and
+        strictly less than NPROW.
+</PRE>
+<PRE>
+CSRC    (global input)                const int
+        On entry, CSRC specifies the column coordinate of the process
+        that possesses the column J. CSRC  must be at least zero  and
+        strictly less than NPCOL.
+</PRE>
+<PRE>
+MYROW   (local input)                 const int
+        On entry, MYROW  specifies my  row process  coordinate in the
+        grid. MYROW is greater than or equal  to zero  and  less than
+        NPROW.
+</PRE>
+<PRE>
+MYCOL   (local input)                 const int
+        On entry, MYCOL specifies my column process coordinate in the
+        grid. MYCOL is greater than or equal  to zero  and  less than
+        NPCOL.
+</PRE>
+<PRE>
+NPROW   (global input)                const int
+        On entry,  NPROW  specifies the number of process rows in the
+        grid. NPROW is at least one.
+</PRE>
+<PRE>
+NPCOL   (global input)                const int
+        On entry,  NPCOL  specifies  the number of process columns in
+        the grid. NPCOL is at least one.
+</PRE>
+<PRE>
+II      (local output)                int *
+        On exit, II  specifies the  local  starting  row index of the
+        submatrix. On exit, II is at least 0.
+</PRE>
+<PRE>
+JJ      (local output)                int *
+        On exit, JJ  specifies the local starting column index of the
+        submatrix. On exit, JJ is at least 0.
+</PRE>
+<PRE>
+PROW    (global output)               int *
+        On exit, PROW is the row coordinate of the process owning the
+        entry specified by the global index I.  PROW is at least zero
+        and less than NPROW.
+</PRE>
+<PRE>
+PCOL    (global output)               int *
+        On exit, PCOL  is the column coordinate of the process owning
+        the entry specified by the global index J.  PCOL  is at least
+        zero and less than NPCOL.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_jumpit.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_jumpit.html
new file mode 100755
index 000000000..be87a1f53
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_jumpit.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_jumpit HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_jumpit</B> jump into the random sequence.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_jumpit(</CODE>
+<CODE>int *</CODE>
+<CODE>MULT</CODE>,
+<CODE>int *</CODE>
+<CODE>IADD</CODE>,
+<CODE>int *</CODE>
+<CODE>IRANN</CODE>,
+<CODE>int *</CODE>
+<CODE>IRANM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_jumpit</B>
+jumps in the random sequence from the number  X(n) encoded
+in IRANN to the number  X(m)  encoded in  IRANM using the constants A
+and C encoded in MULT and IADD: X(m) = A * X(n) + C.  The constants A
+and C obviously depend on m and n,  see  the function  HPL_xjumpm  in
+order to initialize them.
+
+<H1>Arguments</H1>
+<PRE>
+MULT    (local input)                 int *
+        On entry, MULT is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of the constant A.
+</PRE>
+<PRE>
+IADD    (local input)                 int *
+        On entry, IADD is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of the constant C.
+</PRE>
+<PRE>
+IRANN   (local input)                 int *
+        On entry,  IRANN  is an array of dimension 2,  that contains 
+        the 16-lower and 15-higher bits of the encoding of X(n).
+</PRE>
+<PRE>
+IRANM   (local output)                int *
+        On entry,  IRANM  is an array of dimension 2.  On exit, this
+        array contains respectively the 16-lower and  15-higher bits
+        of the encoding of X(m).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ladd.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ladd.html
new file mode 100755
index 000000000..0c42d80d8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ladd.html
@@ -0,0 +1,57 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_ladd HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_ladd</B> Adds two long positive integers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_ladd(</CODE>
+<CODE>int *</CODE>
+<CODE>J</CODE>,
+<CODE>int *</CODE>
+<CODE>K</CODE>,
+<CODE>int *</CODE>
+<CODE>I</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_ladd</B>
+adds  without carry two long positive integers  K and J  and
+puts the result into I. The long integers  I, J, K are encoded on 64
+bits using an array of 2 integers.  The 32-lower bits  are stored in
+the  first  entry  of each array,  the 32-higher bits  in the second
+entry.
+
+<H1>Arguments</H1>
+<PRE>
+J       (local input)                 int *
+        On entry, J is an integer array of dimension 2 containing the
+        encoded long integer J.
+</PRE>
+<PRE>
+K       (local input)                 int *
+        On entry, K is an integer array of dimension 2 containing the
+        encoded long integer K.
+</PRE>
+<PRE>
+I       (local output)                int *
+        On entry, I is an integer array of dimension 2. On exit, this
+        array contains the encoded long integer result.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_lmul.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_lmul.html
new file mode 100755
index 000000000..8ef70cba5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_lmul.html
@@ -0,0 +1,58 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_lmul HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_lmul</B> multiplies 2 long positive integers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_lmul(</CODE>
+<CODE>int *</CODE>
+<CODE>K</CODE>,
+<CODE>int *</CODE>
+<CODE>J</CODE>,
+<CODE>int *</CODE>
+<CODE>I</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_lmul</B>
+multiplies  without carry two long positive integers K and J
+and puts the result into I. The long integers  I, J, K are encoded on
+64 bits using an array of 2 integers. The 32-lower bits are stored in
+the first entry of each array, the 32-higher bits in the second entry
+of each array. For efficiency purposes, the  intrisic modulo function
+is inlined.
+
+<H1>Arguments</H1>
+<PRE>
+K       (local input)                 int *
+        On entry, K is an integer array of dimension 2 containing the
+        encoded long integer K.
+</PRE>
+<PRE>
+J       (local input)                 int *
+        On entry, J is an integer array of dimension 2 containing the
+        encoded long integer J.
+</PRE>
+<PRE>
+I       (local output)                int *
+        On entry, I is an integer array of dimension 2. On exit, this
+        array contains the encoded long integer result.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_logsort.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_logsort.html
new file mode 100755
index 000000000..da271fc19
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_logsort.html
@@ -0,0 +1,83 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_logsort HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_logsort</B> Sort the processes in logarithmic order.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_logsort(</CODE>
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>,
+<CODE>const int</CODE>
+<CODE>ICURROC</CODE>,
+<CODE>int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_logsort</B>
+computes an array  IPMAP  and  its inverse  IPMAPM1  that
+contain  the logarithmic sorted processes id with repect to the local
+number of rows of  U  that they own. This is necessary to ensure that
+the logarithmic spreading of U is optimal in terms of number of steps
+and communication volume as well.  In other words,  the larget pieces
+of U will be sent a minimal number of times.
+
+<H1>Arguments</H1>
+<PRE>
+NPROCS  (global input)                const int
+        On entry, NPROCS  specifies the number of process rows in the
+        process grid. NPROCS is at least one.
+</PRE>
+<PRE>
+ICURROC (global input)                const int
+        On entry, ICURROC is the source process row.
+</PRE>
+<PRE>
+IPLEN   (global input/output)         int *
+        On entry, IPLEN is an array of dimension NPROCS+1,  such that
+        IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+        that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+        rows of U  in the processes before process IPMAP[i] after the
+        sort,  with  the convention that  IPLEN[NPROCS] is  the total
+        number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+        IPLEN[i] is  the  number of rows of A that should be moved to
+        the process IPMAP[i].  IPLEN  is such that the number of rows
+        of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+        remaining  entries  of  this  array  are  sorted  so that the
+        quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+</PRE>
+<PRE>
+IPMAP   (global output)               int *
+        On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myroc] is the corresponding sorted process
+        coordinate.
+</PRE>
+<PRE>
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROCS)
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_plindx1.html">HPL_plindx1</A>,
+<A HREF="HPL_plindx10.html">HPL_plindx10</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_max.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_max.html
new file mode 100755
index 000000000..7cf0b0670
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_max.html
@@ -0,0 +1,60 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_max HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_max</B> Combine (max) two buffers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_max(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const void *</CODE>
+<CODE>IN</CODE>,
+<CODE>void *</CODE>
+<CODE>INOUT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_max</B>
+combines (max) two buffers.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+</PRE>
+<PRE>
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+</PRE>
+<PRE>
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+</PRE>
+<PRE>
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_min.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_min.html
new file mode 100755
index 000000000..9c109c338
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_min.html
@@ -0,0 +1,60 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_min HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_min</B> Combine (min) two buffers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_min(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const void *</CODE>
+<CODE>IN</CODE>,
+<CODE>void *</CODE>
+<CODE>INOUT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_min</B>
+combines (min) two buffers.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+</PRE>
+<PRE>
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+</PRE>
+<PRE>
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+</PRE>
+<PRE>
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_numroc.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_numroc.html
new file mode 100755
index 000000000..fa617cac3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_numroc.html
@@ -0,0 +1,79 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_numroc HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_numroc</B> Compute the local number of row/columns.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_numroc(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>PROC</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_numroc</B>
+returns  the  local number of matrix rows/columns process
+PROC  will  get  if  we give out  N rows/columns starting from global
+index 0.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies the number of rows/columns being dealt
+        out. N must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+PROC    (input)                       const int
+        On entry, PROC specifies  the coordinate of the process whose
+        local portion is determined.  PROC must be at least zero  and
+        strictly less than NPROCS.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2lp.html">HPL_indxg2lp</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_numrocI.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_numrocI.html
new file mode 100755
index 000000000..c1037a193
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_numrocI.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_numrocI HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_numrocI</B> Compute the local number of row/columns.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_numrocI(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>I</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>PROC</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_numrocI</B>
+returns  the  local number of matrix rows/columns process
+PROC  will  get  if  we give out  N rows/columns starting from global
+index I.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies the number of rows/columns being dealt
+        out. N must be at least zero.
+</PRE>
+<PRE>
+I       (input)                       const int
+        On entry, I  specifies the global index of the matrix  entry
+        I must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of th
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+PROC    (input)                       const int
+        On entry, PROC specifies  the coordinate of the process whos
+        local portion is determined.  PROC must be at least zero  an
+        strictly less than NPROCS.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  proces
+        that possesses the first row or column of the matrix. SRCPRO
+        must be at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process row
+        or columns over which the matrix is distributed.  NPROCS mus
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2lp.html">HPL_indxg2lp</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pabort.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pabort.html
new file mode 100755
index 000000000..89aacbd9f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pabort.html
@@ -0,0 +1,57 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pabort HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pabort</B> halts execution.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pabort(</CODE>
+<CODE>int</CODE>
+<CODE>LINE</CODE>,
+<CODE>const char *</CODE>
+<CODE>SRNAME</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pabort</B>
+displays an error message on stderr and halts execution.
+
+<H1>Arguments</H1>
+<PRE>
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+</PRE>
+<PRE>
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>,
+<A HREF="HPL_pwarn.html">HPL_pwarn</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_packL.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_packL.html
new file mode 100755
index 000000000..1e8f8106c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_packL.html
@@ -0,0 +1,59 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_packL HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_packL</B> Form the MPI structure for the row ring broadcasts.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_packL(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>INDEX</CODE>,
+<CODE>const int</CODE>
+<CODE>LEN</CODE>,
+<CODE>const int</CODE>
+<CODE>IBUF</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_packL</B>
+forms  the MPI data type for the panel to be broadcast.
+Successful  completion  is  indicated  by  the  returned  error  code
+MPI_SUCCESS.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+<PRE>
+INDEX   (input)                       const int
+        On entry,  INDEX  points  to  the  first entry of the  packed
+        buffer being broadcast.
+</PRE>
+<PRE>
+LEN     (input)                       const int
+        On entry, LEN is the length of the packed buffer.
+</PRE>
+<PRE>
+IBUF    (input)                       const int
+        On entry, IBUF  specifies the panel buffer/count/type entries
+        that should be initialized.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pddriver.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pddriver.html
new file mode 100755
index 000000000..adcc02e00
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pddriver.html
@@ -0,0 +1,27 @@
+<HTML>
+<HEAD>
+<TITLE>main HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>main</B> HPL main timing program.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>main();</CODE>
+
+<H1>Description</H1>
+<B>main</B>
+is the main driver program for testing the HPL routines.
+This  program is  driven  by  a short data file named  "HPL.dat".
+
+<H1>See Also</H1>
+<A HREF="HPL_pdinfo.html">HPL_pdinfo</A>,
+<A HREF="HPL_pdtest.html">HPL_pdtest</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdfact.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdfact.html
new file mode 100755
index 000000000..f51cee5d2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdfact.html
@@ -0,0 +1,78 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdfact HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdfact</B> recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdfact(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdfact</B>
+recursively factorizes a  1-dimensional  panel of columns.
+The  RPFACT  function pointer specifies the recursive algorithm to be
+used, either Crout, Left- or Right looking.  NBMIN allows to vary the
+recursive stopping criterium in terms of the number of columns in the
+panel, and  NDIV  allow to specify the number of subpanels each panel
+should be divided into. Usuallly a value of 2 will be chosen. Finally
+PFACT is a function pointer specifying the non-recursive algorithm to
+to be used on at most NBMIN columns. One can also choose here between
+Crout, Left- or Right looking.  Empirical tests seem to indicate that
+values of 4 or 8 for NBMIN give the best results.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesv.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesv.html
new file mode 100755
index 000000000..ebb9c18e4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesv.html
@@ -0,0 +1,56 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdgesv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdgesv</B> Solve A x = b.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdgesv(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdgesv</B>
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with  or  without look-ahead.  The  lower  triangular  factor is left
+unpivoted and the pivots are not returned. The right hand side is the
+N+1 column of the coefficient matrix.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdtrsv.html">HPL_pdtrsv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesv0.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesv0.html
new file mode 100755
index 000000000..c137975d4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesv0.html
@@ -0,0 +1,63 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdgesv0 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdgesv0</B> Factor an N x N+1 matrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdgesv0(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdgesv0</B>
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+without look-ahead. The lower triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>,
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesvK1.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesvK1.html
new file mode 100755
index 000000000..1a19edc05
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesvK1.html
@@ -0,0 +1,62 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdgesvK1 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdgesvK1</B> Factor an N x N+1 matrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdgesvK1(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdgesvK1</B>
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with look-ahead.  The  lower  triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>,
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesvK2.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesvK2.html
new file mode 100755
index 000000000..f2a9a25f0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesvK2.html
@@ -0,0 +1,63 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdgesvK2 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdgesvK2</B> Factor an N x N+1 matrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdgesvK2(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdgesvK2</B>
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with look-ahead.  The  lower  triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>,
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdinfo.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdinfo.html
new file mode 100755
index 000000000..94a7f78c0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdinfo.html
@@ -0,0 +1,252 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdinfo HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdinfo</B> Read input parameter file.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdinfo(</CODE>
+<CODE>HPL_T_test *</CODE>
+<CODE>TEST</CODE>,
+<CODE>int *</CODE>
+<CODE>NS</CODE>,
+<CODE>int *</CODE>
+<CODE>N</CODE>,
+<CODE>int *</CODE>
+<CODE>NBS</CODE>,
+<CODE>int *</CODE>
+<CODE>NB</CODE>,
+<CODE>HPL_T_ORDER *</CODE>
+<CODE>PMAPPIN</CODE>,
+<CODE>int *</CODE>
+<CODE>NPQS</CODE>,
+<CODE>int *</CODE>
+<CODE>P</CODE>,
+<CODE>int *</CODE>
+<CODE>Q</CODE>,
+<CODE>int *</CODE>
+<CODE>NPFS</CODE>,
+<CODE>HPL_T_FACT *</CODE>
+<CODE>PF</CODE>,
+<CODE>int *</CODE>
+<CODE>NBMS</CODE>,
+<CODE>int *</CODE>
+<CODE>NBM</CODE>,
+<CODE>int *</CODE>
+<CODE>NDVS</CODE>,
+<CODE>int *</CODE>
+<CODE>NDV</CODE>,
+<CODE>int *</CODE>
+<CODE>NRFS</CODE>,
+<CODE>HPL_T_FACT *</CODE>
+<CODE>RF</CODE>,
+<CODE>int *</CODE>
+<CODE>NTPS</CODE>,
+<CODE>HPL_T_TOP *</CODE>
+<CODE>TP</CODE>,
+<CODE>int *</CODE>
+<CODE>NDHS</CODE>,
+<CODE>int *</CODE>
+<CODE>DH</CODE>,
+<CODE>HPL_T_SWAP *</CODE>
+<CODE>FSWAP</CODE>,
+<CODE>int *</CODE>
+<CODE>TSWAP</CODE>,
+<CODE>int *</CODE>
+<CODE>L1NOTRAN</CODE>,
+<CODE>int *</CODE>
+<CODE>UNOTRAN</CODE>,
+<CODE>int *</CODE>
+<CODE>EQUIL</CODE>,
+<CODE>int *</CODE>
+<CODE>ALIGN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdinfo</B>
+reads  the  startup  information for the various tests and
+transmits it to all processes.
+
+<H1>Arguments</H1>
+<PRE>
+TEST    (global output)               HPL_T_test *
+        On entry, TEST  points to a testing data structure.  On exit,
+        the fields of this data structure are initialized as follows:
+        TEST->outfp  specifies the output file where the results will
+        be printed.  It is only defined and used by  the process 0 of
+        the grid.  TEST->thrsh specifies the threshhold value for the
+        test ratio.  TEST->epsil is the relative machine precision of
+        the distributed computer.  Finally  the test counters, kfail,
+        kpass, kskip, ktest are initialized to zero.
+</PRE>
+<PRE>
+NS      (global output)               int *
+        On exit,  NS  specifies the number of different problem sizes
+        to be tested. NS is less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+N       (global output)               int *
+        On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+        the first NS entries of this array contain the  problem sizes
+        to run the code with.
+</PRE>
+<PRE>
+NBS     (global output)               int *
+        On exit,  NBS  specifies the number of different distribution
+        blocking factors to be tested. NBS must be less than or equal
+        to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+NB      (global output)               int *
+        On exit,  PMAPPIN  specifies the process mapping onto the no-
+        des of the  MPI machine configuration.  PMAPPIN  defaults  to
+        row-major ordering.
+</PRE>
+<PRE>
+PMAPPIN (global output)               HPL_T_ORDER *
+        On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NBS entries of this array contain the values of the
+        various distribution blocking factors, to run the code with.
+</PRE>
+<PRE>
+NPQS    (global output)               int *
+        On exit, NPQS  specifies the  number of different values that
+        can be used for P and Q, i.e., the number of process grids to
+        run  the  code with.  NPQS must be  less  than  or  equal  to
+        HPL_MAX_PARAM.
+</PRE>
+<PRE>
+P       (global output)               int *
+        On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NPQS entries of this array contain the values of P,
+        the number of process rows of the  NPQS grids to run the code
+        with.
+</PRE>
+<PRE>
+Q       (global output)               int *
+        On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NPQS entries of this array contain the values of Q,
+        the number of process columns of the  NPQS  grids to  run the
+        code with.
+</PRE>
+<PRE>
+NPFS    (global output)               int *
+        On exit, NPFS  specifies the  number of different values that
+        can be used for PF : the panel factorization algorithm to run
+        the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+PF      (global output)               HPL_T_FACT *
+        On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+        the first  NPFS  entries  of this array  contain  the various
+        panel factorization algorithms to run the code with.
+</PRE>
+<PRE>
+NBMS    (global output)               int *
+        On exit,  NBMS  specifies  the  number  of  various recursive
+        stopping criteria  to be tested.  NBMS  must be  less than or
+        equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+NBM     (global output)               int *
+        On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NBMS entries of this array contain the values
+        of the various recursive stopping criteria to be tested.
+</PRE>
+<PRE>
+NDVS    (global output)               int *
+        On exit,  NDVS  specifies  the number  of various numbers  of
+        panels in recursion to be tested.  NDVS is less than or equal
+        to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+NDV     (global output)               int *
+        On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NDVS entries of this array contain the values
+        of the various numbers of panels in recursion to be tested.
+</PRE>
+<PRE>
+NRFS    (global output)               int *
+        On exit, NRFS  specifies the  number of different values that
+        can be used for RF : the recursive factorization algorithm to
+        be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+RF      (global output)               HPL_T_FACT *
+        On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+        the first  NRFS  entries  of  this array contain  the various
+        recursive factorization algorithms to run the code with.
+</PRE>
+<PRE>
+NTPS    (global output)               int *
+        On exit, NTPS  specifies the  number of different values that
+        can be used for the  broadcast topologies  to be tested. NTPS
+        is less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+TP      (global output)               HPL_T_TOP *
+        On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+        the  first NTPS  entries of this  array  contain  the various
+        broadcast (along rows) topologies to run the code with.
+</PRE>
+<PRE>
+NDHS    (global output)               int *
+        On exit, NDHS  specifies the  number of different values that
+        can be used for the  lookahead depths to be  tested.  NDHS is
+        less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+DH      (global output)               int *
+        On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NDHS entries of this array contain the values
+        of lookahead depths to run the code with.  Such a value is at
+        least 0 (no-lookahead) or greater than zero.
+</PRE>
+<PRE>
+FSWAP   (global output)               HPL_T_SWAP *
+        On exit, FSWAP specifies the swapping algorithm to be used in
+        all tests.
+</PRE>
+<PRE>
+TSWAP   (global output)               int *
+        On exit,  TSWAP  specifies the swapping threshold as a number
+        of columns when the mixed swapping algorithm was chosen.
+</PRE>
+<PRE>
+L1NOTRA (global output)               int *
+        On exit, L1NOTRAN specifies whether the upper triangle of the
+        panels of columns  should  be stored  in  no-transposed  form
+        (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+</PRE>
+<PRE>
+UNOTRAN (global output)               int *
+        On exit, UNOTRAN  specifies whether the panels of rows should
+        be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+        form (UNOTRAN=0) during their broadcast.
+</PRE>
+<PRE>
+EQUIL   (global output)               int *
+        On exit,  EQUIL  specifies  whether  equilibration during the
+        swap-broadcast  of  the  panel of rows  should  be  performed
+        (EQUIL=1) or not (EQUIL=0).
+</PRE>
+<PRE>
+ALIGN   (global output)               int *
+        On exit,  ALIGN  specifies the alignment  of  the dynamically
+        allocated buffers in double precision words. ALIGN is greater
+        than zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pddriver.html">HPL_pddriver</A>,
+<A HREF="HPL_pdtest.html">HPL_pdtest</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlamch.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlamch.html
new file mode 100755
index 000000000..c1b51370a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlamch.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlamch HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlamch</B> determines machine-specific arithmetic constants.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_pdlamch(</CODE>
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>,
+<CODE>const HPL_T_MACH</CODE>
+<CODE>CMACH</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlamch</B>
+determines  machine-specific  arithmetic  constants  such  as
+the relative machine precision (eps),  the safe minimum(sfmin) such that
+1/sfmin does not overflow, the base of the machine (base), the precision
+(prec),  the  number  of  (base)  digits in the  mantissa  (t),  whether
+rounding occurs in addition (rnd = 1.0 and 0.0 otherwise),  the  minimum
+exponent before  (gradual)  underflow (emin),  the  underflow  threshold
+(rmin)- base**(emin-1), the largest exponent before overflow (emax), the
+overflow threshold (rmax)  - (base**emax)*(1-eps).
+
+<H1>Arguments</H1>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+<PRE>
+CMACH   (global input)                const HPL_T_MACH
+        Specifies the value to be returned by HPL_pdlamch            
+           = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+           = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+           = HPL_MACH_BASE,  HPL_pdlamch := base                     
+           = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+           = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+           = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+           = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+           = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+           = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+           = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+         
+        where                                                        
+         
+           eps   = relative machine precision,                       
+           sfmin = safe minimum,                                     
+           base  = base of the machine,                              
+           prec  = eps*base,                                         
+           t     = number of digits in the mantissa,                 
+           rnd   = 1.0 if rounding occurs in addition,               
+           emin  = minimum exponent before underflow,                
+           rmin  = underflow threshold,                              
+           emax  = largest exponent before overflow,                 
+           rmax  = overflow threshold.
+</PRE>
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlange.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlange.html
new file mode 100755
index 000000000..0d1affc3d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlange.html
@@ -0,0 +1,88 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlange HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlange</B> Compute ||A||.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_pdlange(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>const HPL_T_NORM</CODE>
+<CODE>NORM</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlange</B>
+returns  the value of the one norm,  or the infinity norm,
+or the element of largest absolute value of a distributed matrix A:  
+ 
+ 
+   max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+   norm1(A),        when NORM = HPL_NORM_1,                          
+   normI(A),        when NORM = HPL_NORM_I,                          
+ 
+where norm1 denotes the one norm of a matrix (maximum column sum) and
+normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+that max(abs(A(i,j))) is not a matrix norm.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+NORM    (global input)                const HPL_T_NORM
+        On entry,  NORM  specifies  the  value to be returned by this
+        function as described above.
+</PRE>
+<PRE>
+M       (global input)                const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+        that contains the local pieces of the distributed matrix A.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaprnt.html">HPL_pdlaprnt</A>,
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaprnt.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaprnt.html
new file mode 100755
index 000000000..0ce810db0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaprnt.html
@@ -0,0 +1,94 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaprnt HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaprnt</B> Print a distributed matrix A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaprnt(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int</CODE>
+<CODE>IAROW</CODE>,
+<CODE>const int</CODE>
+<CODE>IACOL</CODE>,
+<CODE>const char *</CODE>
+<CODE>CMATNM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaprnt</B>
+prints  to  standard  error a distributed matrix A. The
+local pieces of  A  are sent to the process of coordinates  (0,0)  in
+the grid and then printed.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+M       (global input)                const int
+        On entry,  M  specifies the number of rows of the coefficient
+        matrix A. M must be at least zero.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On  entry,   N   specifies  the  number  of  columns  of  the
+        coefficient matrix A. N must be at least zero.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+</PRE>
+<PRE>
+A       (local input)                 double *
+        On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+        This array contains the coefficient matrix to be printed.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+</PRE>
+<PRE>
+IAROW   (global input)                const int
+        On entry,  IAROW  specifies the row process coordinate owning
+        the  first row of A.  IAROW  must be  larger than or equal to
+        zero and less than NPROW.
+</PRE>
+<PRE>
+IACOL   (global input)                const int
+        On entry,  IACOL  specifies  the  column  process  coordinate
+        owning the  first column  of A. IACOL  must be larger than or
+        equal to zero and less than NPCOL.
+</PRE>
+<PRE>
+CMATNM  (global input)                const char *
+        On entry, CMATNM is the name of the matrix to be printed.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp00N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp00N.html
new file mode 100755
index 000000000..07279fdb0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp00N.html
@@ -0,0 +1,82 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaswp00N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaswp00N</B> Broadcast a column panel L and swap the row panel U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaswp00N(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaswp00N</B>
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+Bi-directional  exchange  is used to perform the  swap :: broadcast of
+the row  panel U at once, resulting in a lower number of messages than
+usual as well as a lower communication volume. With P process rows and
+assuming  bi-directional links,  the running time of this function can
+be approximated by:
+ 
+   log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  Mono
+directional links will double this communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be broadcast and swapped) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pipid.html">HPL_pipid</A>,
+<A HREF="HPL_plindx0.html">HPL_plindx0</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp00T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp00T.html
new file mode 100755
index 000000000..08b8ea770
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp00T.html
@@ -0,0 +1,82 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaswp00T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaswp00T</B> Broadcast a column panel L and swap the row panel U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaswp00T(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaswp00T</B>
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+Bi-directional  exchange  is used to perform the  swap :: broadcast of
+the row  panel U at once, resulting in a lower number of messages than
+usual as well as a lower communication volume. With P process rows and
+assuming  bi-directional links,  the running time of this function can
+be approximated by:
+ 
+   log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  Mono
+directional links will double this communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be broadcast and swapped) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>,
+<A HREF="HPL_pipid.html">HPL_pipid</A>,
+<A HREF="HPL_plindx0.html">HPL_plindx0</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp01N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp01N.html
new file mode 100755
index 000000000..2d4772fda
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp01N.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaswp01N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaswp01N</B> Broadcast a column panel L and swap the row panel U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaswp01N(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaswp01N</B>
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+row panel U at once,  resulting in a minimal communication volume  and
+a "very good"  use of the connectivity if available.  With  P  process
+rows  and  assuming  bi-directional links,  the  running time  of this
+function can be approximated by:
+ 
+   (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  K is
+a constant in (2,3] that depends on the achieved bandwidth  during  a
+simultaneous  message exchange  between two processes.  An  empirical
+optimistic value of K is typically 2.4.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pipid.html">HPL_pipid</A>,
+<A HREF="HPL_plindx1.html">HPL_plindx1</A>,
+<A HREF="HPL_plindx10.html">HPL_plindx10</A>,
+<A HREF="HPL_spreadN.html">HPL_spreadN</A>,
+<A HREF="HPL_equil.html">HPL_equil</A>,
+<A HREF="HPL_rollN.html">HPL_rollN</A>,
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp01T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp01T.html
new file mode 100755
index 000000000..f6a5d8c4b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp01T.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaswp01T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaswp01T</B> Broadcast a column panel L and swap the row panel U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaswp01T(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaswp01T</B>
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+row panel U at once,  resulting in a minimal communication volume  and
+a "very good"  use of the connectivity if available.  With  P  process
+rows  and  assuming  bi-directional links,  the  running time  of this
+function can be approximated by:
+ 
+   (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  K is
+a constant in (2,3] that depends on the achieved bandwidth  during  a
+simultaneous  message exchange  between two processes.  An  empirical
+optimistic value of K is typically 2.4.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>,
+<A HREF="HPL_pipid.html">HPL_pipid</A>,
+<A HREF="HPL_plindx1.html">HPL_plindx1</A>,
+<A HREF="HPL_plindx10.html">HPL_plindx10</A>,
+<A HREF="HPL_spreadT.html">HPL_spreadT</A>,
+<A HREF="HPL_equil.html">HPL_equil</A>,
+<A HREF="HPL_rollT.html">HPL_rollT</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdmatgen.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdmatgen.html
new file mode 100755
index 000000000..28fb95509
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdmatgen.html
@@ -0,0 +1,87 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdmatgen HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdmatgen</B> Parallel random matrix generator.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdmatgen(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int</CODE>
+<CODE>ISEED</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdmatgen</B>
+generates (or regenerates) a parallel random matrix A.
+ 
+The  pseudo-random  generator uses the linear congruential algorithm:
+X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+Programming, Knuth 1973, Vol. 2.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+M       (global input)                const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+        On exit, this array contains the coefficients of the randomly
+        generated matrix.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+</PRE>
+<PRE>
+ISEED   (global input)                const int
+        On entry, ISEED  specifies  the  seed  number to generate the
+        matrix A. ISEED must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdmxswp.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdmxswp.html
new file mode 100755
index 000000000..c11d2b2da
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdmxswp.html
@@ -0,0 +1,96 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdmxswp HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdmxswp</B> swaps and broacast the pivot row.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdmxswp(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>II</CODE>,
+<CODE>const int</CODE>
+<CODE>JJ</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdmxswp</B>
+swaps  and  broadcasts  the  absolute value max row using
+bi-directional exchange.  The buffer is partially set by HPL_dlocmax.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by
+ 
+   log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth )
+ 
+where  lat and bdwth are the latency and bandwidth of the network for
+double precision real elements.  Communication  only  occurs  in  one
+process  column. Mono-directional links  will cause the communication
+cost to double.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of the matrix
+        column on which this function operates.
+</PRE>
+<PRE>
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+</PRE>
+<PRE>
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        It  is assumed that  HPL_dlocmax  was called  prior  to  this
+        routine to  initialize  the first four entries of this array.
+        On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+        Note that this is also the  JJth  row  (or column) of L1. The
+        remaining part is used as a temporary array.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpancrN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpancrN.html
new file mode 100755
index 000000000..663d2e266
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpancrN.html
@@ -0,0 +1,100 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpancrN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpancrN</B> Crout panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpancrN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpancrN</B>
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel  A using the Crout variant of the  usual
+one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+of the panel is stored in no-transpose form (i.e. just like the input
+matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and gam2-3 is  an  estimate  of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpancrT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpancrT.html
new file mode 100755
index 000000000..0e1490430
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpancrT.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpancrT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpancrT</B> Crout panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpancrT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpancrT</B>
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel  A using the Crout variant of the  usual
+one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is an  estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_disp.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_disp.html
new file mode 100755
index 000000000..cb78fa4be
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_disp.html
@@ -0,0 +1,38 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanel_disp HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanel_disp</B> Deallocate a panel data structure.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_pdpanel_disp(</CODE>
+<CODE>HPL_T_panel * *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanel_disp</B>
+deallocates  the  panel  structure  and  resources  and
+stores the error code returned by the panel factorization.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel * *
+        On entry,  PANEL  points  to  the  address  of the panel data
+        structure to be deallocated.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
+<A HREF="HPL_pdpanel_init.html">HPL_pdpanel_init</A>,
+<A HREF="HPL_pdpanel_free.html">HPL_pdpanel_free</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_free.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_free.html
new file mode 100755
index 000000000..d33e5e400
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_free.html
@@ -0,0 +1,38 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanel_free HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanel_free</B> Deallocate the panel ressources.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_pdpanel_free(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanel_free</B>
+deallocates  the panel resources  and  stores the error
+code returned by the panel factorization.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points  to  the  panel data  structure from
+        which the resources should be deallocated.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
+<A HREF="HPL_pdpanel_init.html">HPL_pdpanel_init</A>,
+<A HREF="HPL_pdpanel_disp.html">HPL_pdpanel_disp</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_init.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_init.html
new file mode 100755
index 000000000..2d105354f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_init.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanel_init HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanel_init</B> Initialize the panel resources.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanel_init(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>JB</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>IA</CODE>,
+<CODE>const int</CODE>
+<CODE>JA</CODE>,
+<CODE>const int</CODE>
+<CODE>TAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanel_init</B>
+initializes a panel data structure.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry, M specifies the global number of rows of the panel.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the  global number of columns of the
+        panel and trailing submatrix. N must be at least zero.
+</PRE>
+<PRE>
+JB      (global input)                const int
+        On entry, JB specifies is the number of columns of the panel.
+        JB must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+<PRE>
+IA      (global input)                const int
+        On entry,  IA  is  the global row index identifying the panel
+        and trailing submatrix. IA must be at least zero.
+</PRE>
+<PRE>
+JA      (global input)                const int
+        On entry, JA is the global column index identifying the panel
+        and trailing submatrix. JA must be at least zero.
+</PRE>
+<PRE>
+TAG     (global input)                const int
+        On entry, TAG is the row broadcast message id.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
+<A HREF="HPL_pdpanel_disp.html">HPL_pdpanel_disp</A>,
+<A HREF="HPL_pdpanel_free.html">HPL_pdpanel_free</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_new.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_new.html
new file mode 100755
index 000000000..1b3029ecb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_new.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanel_new HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanel_new</B> Create a panel data structure.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanel_new(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>JB</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>IA</CODE>,
+<CODE>const int</CODE>
+<CODE>JA</CODE>,
+<CODE>const int</CODE>
+<CODE>TAG</CODE>,
+<CODE>HPL_T_panel * *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanel_new</B>
+creates and initializes a panel data structure.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry, M specifies the global number of rows of the panel.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the  global number of columns of the
+        panel and trailing submatrix. N must be at least zero.
+</PRE>
+<PRE>
+JB      (global input)                const int
+        On entry, JB specifies is the number of columns of the panel.
+        JB must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+<PRE>
+IA      (global input)                const int
+        On entry,  IA  is  the global row index identifying the panel
+        and trailing submatrix. IA must be at least zero.
+</PRE>
+<PRE>
+JA      (global input)                const int
+        On entry, JA is the global column index identifying the panel
+        and trailing submatrix. JA must be at least zero.
+</PRE>
+<PRE>
+TAG     (global input)                const int
+        On entry, TAG is the row broadcast message id.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel * *
+        On entry,  PANEL  points  to  the  address  of the panel data
+        structure to create and initialize.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
+<A HREF="HPL_pdpanel_init.html">HPL_pdpanel_init</A>,
+<A HREF="HPL_pdpanel_disp.html">HPL_pdpanel_disp</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanllN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanllN.html
new file mode 100755
index 000000000..386815fd2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanllN.html
@@ -0,0 +1,100 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanllN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanllN</B> Left-looking panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanllN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanllN</B>
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel A  using the Left-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in no-transpose form (i.e. just like the
+input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanllT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanllT.html
new file mode 100755
index 000000000..04307e823
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanllT.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanllT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanllT</B> Left-looking panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanllT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanllT</B>
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel A  using the Left-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanrlN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanrlN.html
new file mode 100755
index 000000000..8d705c63c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanrlN.html
@@ -0,0 +1,100 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanrlN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanrlN</B> Right-looking panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanrlN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanrlN</B>
+factorizes  a panel of columns  that is a sub-array of a
+larger one-dimensional panel A using the Right-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in no-transpose form (i.e. just like the
+input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanrlT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanrlT.html
new file mode 100755
index 000000000..af458e7a1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanrlT.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanrlT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanrlT</B> Right-looking panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanrlT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanrlT</B>
+factorizes  a panel of columns  that is a sub-array of a
+larger one-dimensional panel A using the Right-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpancrN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpancrN.html
new file mode 100755
index 000000000..9169c48cc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpancrN.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpancrN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpancrN</B> Crout recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpancrN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpancrN</B>
+HPL_pdrpancrN recursively  factorizes  a panel of columns  using  the
+recursive  Crout  variant of the usual one-dimensional algorithm. The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpancrT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpancrT.html
new file mode 100755
index 000000000..cc9047c3c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpancrT.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpancrT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpancrT</B> Crout recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpancrT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpancrT</B>
+recursively  factorizes  a panel  of columns using  the
+recursive  Crout  variant  of  the  usual one-dimensional  algorithm.
+The lower triangular N0-by-N0  upper block of the panel  is stored in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanllN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanllN.html
new file mode 100755
index 000000000..bf16e6009
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanllN.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpanllN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpanllN</B> Left-looking recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpanllN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpanllN</B>
+recursively  factorizes  a panel  of columns using  the
+recursive Left-looking variant of the one-dimensional algorithm.  The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanllT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanllT.html
new file mode 100755
index 000000000..9904fb326
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanllT.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpanllT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpanllT</B> Left-looking recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpanllT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpanllT</B>
+recursively  factorizes  a panel of columns  using  the
+recursive Left-looking variant of the one-dimensional algorithm.  The
+lower  triangular  N0-by-N0  upper block  of  the panel  is stored in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanrlN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanrlN.html
new file mode 100755
index 000000000..9758c0722
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanrlN.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpanrlN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpanrlN</B> Right-looking recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpanrlN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpanrlN</B>
+recursively  factorizes  a panel of columns  using  the
+recursive Right-looking variant of the one-dimensional algorithm. The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanrlT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanrlT.html
new file mode 100755
index 000000000..ed48a815d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanrlT.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpanrlT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpanrlT</B> Right-looking recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpanrlT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpanrlT</B>
+recursively  factorizes  a panel of columns  using  the
+recursive Right-looking variant of the one-dimensional algorithm. The
+lower  triangular  N0-by-N0  upper  block of the panel  is stored  in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdtest.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdtest.html
new file mode 100755
index 000000000..1c11c34d7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdtest.html
@@ -0,0 +1,81 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdtest HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdtest</B> Perform one test.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdtest(</CODE>
+<CODE>HPL_T_test *</CODE>
+<CODE>TEST</CODE>,
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdtest</B>
+performs  one  test  given a set of parameters such as the
+process grid, the  problem size, the distribution blocking factor ...
+This function generates  the data, calls  and times the linear system
+solver,  checks  the  accuracy  of the  obtained vector solution  and
+writes this information to the file pointed to by TEST->outfp.
+
+<H1>Arguments</H1>
+<PRE>
+TEST    (global input)                HPL_T_test *
+        On entry,  TEST  points  to a testing data structure:  outfp
+        specifies the output file where the results will be printed.
+        It is only defined and used by the process  0  of the  grid.
+        thrsh  specifies  the  threshhold value  for the test ratio.
+        Concretely, a test is declared "PASSED"  if and only if  the
+        following inequality is satisfied:
+        ||Ax-b||_oo / ( epsil *
+                        ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+                         N )  < thrsh.
+        epsil  is the  relative machine precision of the distributed
+        computer. Finally the test counters, kfail, kpass, kskip and
+        ktest are updated as follows:  if the test passes,  kpass is
+        incremented by one;  if the test fails, kfail is incremented
+        by one; if the test is skipped, kskip is incremented by one.
+        ktest is left unchanged.
+</PRE>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters to be used for this test.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N specifies the order of the coefficient matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pddriver.html">HPL_pddriver</A>,
+<A HREF="HPL_pdinfo.html">HPL_pdinfo</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdtrsv.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdtrsv.html
new file mode 100755
index 000000000..0bb182dc9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdtrsv.html
@@ -0,0 +1,64 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdtrsv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdtrsv</B> Solve triu( A ) x = b.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdtrsv(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>AMAT</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdtrsv</B>
+solves an upper triangular system of linear equations.
+ 
+The rhs is the last column of the N by N+1 matrix A. The solve starts
+in the process  column owning the  Nth  column of A, so the rhs b may
+need to be moved one process column to the left at the beginning. The
+routine therefore needs  a column  vector in every process column but
+the one owning  b. The result is  replicated in all process rows, and
+returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes.
+ 
+The algorithm uses decreasing one-ring broadcast in process rows  and
+columns  implemented  in terms of  synchronous communication point to
+point primitives.  The  lookahead of depth 1 is used to minimize  the
+critical path. This entire operation is essentially ``latency'' bound
+and an estimate of its running time is given by:
+ 
+   (move rhs) lat + N / ( P bdwth ) +            
+   (solve)    ((N / NB)-1) 2 (lat + NB / bdwth) +
+              gam2 N^2 / ( P Q ),                
+ 
+where  gam2   is an estimate of the   Level 2 BLAS rate of execution.
+There are  N / NB  diagonal blocks. One must exchange  2  messages of
+length NB to compute the next  NB  entries of the vector solution, as
+well as performing a total of N^2 floating point operations.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+AMAT    (local input/output)          HPL_T_pmat *
+        On entry,  AMAT  points  to the data structure containing the
+        local array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateNN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateNN.html
new file mode 100755
index 000000000..b77cddbce
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateNN.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdupdateNN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdupdateNN</B> Broadcast a panel and update the trailing submatrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdupdateNN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdupdateNN</B>
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateNT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateNT.html
new file mode 100755
index 000000000..4ecb1f687
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateNT.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdupdateNT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdupdateNT</B> Broadcast a panel and update the trailing submatrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdupdateNT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdupdateNT</B>
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateTN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateTN.html
new file mode 100755
index 000000000..ae735bf84
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateTN.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdupdateTN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdupdateTN</B> Broadcast a panel and update the trailing submatrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdupdateTN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdupdateTN</B>
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateTT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateTT.html
new file mode 100755
index 000000000..7c69f8828
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateTT.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdupdateTT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdupdateTT</B> Broadcast a panel and update the trailing submatrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdupdateTT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdupdateTT</B>
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_perm.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_perm.html
new file mode 100755
index 000000000..9312eb4eb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_perm.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_perm HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_perm</B> Combine 2 index arrays - Generate the permutation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_perm(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXAU</CODE>,
+<CODE>int *</CODE>
+<CODE>IWORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_perm</B>
+combines  two  index  arrays  and generate the corresponding
+permutation. First, this function computes the inverse of LINDXA, and
+then combine it with LINDXAU.  Second, in order to be able to perform
+the permutation in place,  LINDXAU  is overwritten by the sequence of
+permutation  producing  the  same result.  What we ultimately want to
+achieve is:  U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the
+call to this function,  this in place permutation can be performed by
+for i in [0..N) swap U[i] with U[LINDXAU[i]].
+
+<H1>Arguments</H1>
+<PRE>
+N       (global input)                const int
+        On entry,  N  specifies the length of the arrays  LINDXA  and
+        LINDXAU. N should be at least zero.
+</PRE>
+<PRE>
+LINDXA  (global input/output)         int *
+        On entry,  LINDXA  is an array of dimension N  containing the
+        source indexes. On exit,  LINDXA  contains the combined index
+        array.
+</PRE>
+<PRE>
+LINDXAU (global input/output)         int *
+        On entry,  LINDXAU is an array of dimension N  containing the
+        target indexes.  On exit,  LINDXAU  contains  the sequence of
+        permutation,  that  should be applied  in increasing order to
+        permute the underlying array U in place.
+</PRE>
+<PRE>
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension N.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_plindx1.html">HPL_plindx1</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pipid.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pipid.html
new file mode 100755
index 000000000..e6deb3d93
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pipid.html
@@ -0,0 +1,95 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pipid HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pipid</B> Simplify the pivot vector.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pipid(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>int *</CODE>
+<CODE>K</CODE>,
+<CODE>int *</CODE>
+<CODE>IPID</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pipid</B>
+computes an array  IPID  that contains the source and final
+destination  of  matrix rows  resulting  from  the  application  of N
+interchanges  as computed by the  LU  factorization  with row partial
+pivoting. The array IPID is such that the row of global index IPID(i)
+should be mapped onto the row of global index IPID(i+1). Note that we
+cannot really know the length of IPID a priori. However, we know that
+this array is at least 2*N long,  since  there are N rows to swap and
+broadcast. The length of this array  must be smaller than or equal to
+4*N, since every row is swapped with at most a single distinct remote
+row. The algorithm constructing  IPID  goes as follows: Let IA be the
+global index of the first row to be swapped.
+ 
+For every row src IA + i with i in [0..N) to be swapped with row  dst
+such that dst is given by DPIV[i]:
+ 
+Is row  src  the destination  of a previous row of the current block,
+that is, is there k odd such that IPID(k) is equal to src ?
+    Yes:  update  this destination  with dst.  For  example,  if  the
+pivot array is  (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5,
+we swap in fact row 0 and 5,  i.e.,  row 0 goes to 5 and not 2  as it
+was thought so far ...
+    No :  add  the pair (src,dst) at the end of IPID; row src has not
+been moved yet.
+ 
+Is row  dst  different  from src the destination of a previous row of
+the current block, i.e., is there k odd such that IPID(k) is equal to
+dst ?
+    Yes:  update  IPID(k) with src.  For example,  if the pivot array
+is (0,5)(1,1)(2,5) ... , then when  we swap rows  2 and 5, we swap in
+fact row 2 and 0,  i.e.,  row 0 goes to 2 and not 5 as it was thought
+so far ...
+    No : add  the  pair (dst,src) at the end of IPID; row dst has not
+been moved yet.
+ 
+Note that when src is equal to dst, the pair (dst,src)  should not be
+added to  IPID  in  order  to avoid duplicated entries in this array.
+During  the construction of the array  IPID,  we  make  sure that the
+first N entries are such that IPID(k) with k odd is equal to  IA+k/2.
+For k in  [0..K/2),  the  row  of global index  IPID(2*k)  should  be
+mapped onto the row of global index IPID(2*k+1).
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+K       (global output)               int *
+        On exit, K specifies the number of entries in  IPID.  K is at
+        least 2*N, and at most 4*N.
+</PRE>
+<PRE>
+IPID    (global output)               int *
+        On entry, IPID is an array of length 4*N.  On exit, the first
+        K entries of that array contain the src and final destination
+        resulting  from  the  application of the  N  interchanges  as
+        specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+        stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+        in [0..N)
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx0.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx0.html
new file mode 100755
index 000000000..f3dbbcdea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx0.html
@@ -0,0 +1,187 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_plindx0 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_plindx0</B> Compute local swapping index arrays.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_plindx0(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>K</CODE>,
+<CODE>int *</CODE>
+<CODE>IPID</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXAU</CODE>,
+<CODE>int *</CODE>
+<CODE>LLEN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_plindx0</B>
+computes two local arrays  LINDXA and  LINDXAU  containing
+the  local  source and final destination position  resulting from the
+application of row interchanges.
+ 
+On entry, the array  IPID  of length K is such that the row of global
+index  IPID(i)  should be mapped onto row of global index  IPID(i+1).
+Let  IA  be the global index of the first row to be swapped. For k in
+[0..K/2), the row of global index IPID(2*k) should be mapped onto the
+row of global index  IPID(2*k+1).  The question then, is to determine
+which rows should ultimately be part of U.
+ 
+First, some rows of the process ICURROW  may be swapped locally.  One
+of this row belongs to U, the other one belongs to my local  piece of
+A.  The other  rows of the current block are swapped with remote rows
+and are thus not part of U. These rows however should be sent  along,
+and  grabbed by the other processes  as we  progress in the  exchange
+phase.
+ 
+So, assume that I am  ICURROW  and consider a row of index  IPID(2*i)
+that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less
+than N,  this row is locally swapped and should be copied into  U  at
+the position IPID(2*i+1) - IA. No row will be exchanged for this one.
+If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be
+locally copied into my local piece of A at the position corresponding
+to the row of global index IPID(2*i+1).
+ 
+If the process  ICURROW does not own  IPID(2*i+1), then row IPID(2*i)
+is to be swapped away and strictly speaking does not belong to U, but
+to  A  remotely.  Since this  process will however send this array U,
+this row is  copied into  U, exactly where the row IPID(2*i+1) should
+go. For this, we search IPID for k1, such that IPID(2*k1) is equal to
+IPID(2*i+1); and row  IPID(2*i) is to be copied in U  at the position
+IPID(2*k1+1)-IA.
+ 
+It is thus  important to put the rows that go into U, i.e., such that
+IPID(2*i+1) - IA is less than N at the begining of the array IPID. By
+doing so,  U  is formed, and the local copy  is performed in just one
+sweep.
+ 
+Two lists  LINDXA  and  LINDXAU are built.  LINDXA contains the local
+index of the rows I have that should be copied. LINDXAU  contains the
+local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A
+is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
+of A should be locally copied into A(-LINDXAU(k),:).  In the  process
+ICURROW, the initial packing algorithm proceeds as follows.
+ 
+  for all entries in IPID,
+     if IPID(2*i) is in ICURROW,
+        if IPID(2*i+1) is in ICURROW,
+           if( IPID(2*i+1) - IA < N )
+            save corresponding local position
+            of this row (LINDXA);
+            save local position (LINDXAU) in U
+            where this row goes;
+            [copy row IPID(2*i) in U at position
+            IPID(2*i+1)-IA; ];
+           else
+            save corresponding local position of
+            this row (LINDXA);
+            save local position (-LINDXAU) in A
+            where this row goes;
+            [copy row IPID(2*i) in my piece of A
+            at IPID(2*i+1);]
+           end if
+        else
+           find k1 such that IPID(2*k1) = IPID(2*i+1);
+           copy row IPID(2*i) in U at position
+           IPID(2*k1+1)-IA;
+           save corresponding local position of this
+           row (LINDXA);
+           save local position (LINDXAU) in U where
+           this row goes;
+        end if
+     end if
+  end for
+ 
+Second, if I am not the current row process  ICURROW, all source rows
+in IPID that I own are part of U. Indeed,  they  are swapped with one
+row  of  the  current  block  of rows,  and  the  main  factorization
+algorithm proceeds one row after each other.  The processes different
+from ICURROW,  should  exchange and accumulate  those rows until they
+receive some data previously owned by the process ICURROW.
+ 
+In processes different from  ICURROW,  the  initial packing algorithm
+proceeds as follows.  Consider a row of global index IPID(2*i) that I
+own. When I will be receiving data previously owned by ICURROW, i.e.,
+U, row IPID(2*i) should  replace the row in U at pos. IPID(2*i+1)-IA,
+and  this particular row of U should be first copied into my piece of
+A, at A(il,:),  where  il is the  local row  index  corresponding  to
+IPID(2*i). Now,initially, this row will be packed into workspace, say
+as the kth row of  that  work array.  The  following  algorithm  sets
+LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row
+should be copied. LINDXA(k) stores the local index in  A  where  this
+row of U should be copied, i.e il.
+ 
+  for all entries in IPID,
+     if IPID(2*i) is not in ICURROW,
+        copy row IPID(2*i) in work array;
+        save corresponding local position
+        of this row (LINDXA);
+        save position (LINDXAU) in U where
+        this row should be copied;
+     end if
+  end for
+ 
+Since we are at it, we also globally figure  out  how many rows every
+process has. That is necessary, because it would rather be cumbersome
+to  figure it on  the fly  during the  bi-directional exchange phase.
+This information is kept in the array  LLEN  of size NPROW. Also note
+that the arrays LINDXA and LINDXAU are of max length equal to 2*N.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+</PRE>
+<PRE>
+IPID    (global input)                int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+</PRE>
+<PRE>
+LINDXA  (local output)                int *
+        On entry, LINDXA  is an array of dimension 2*N. On exit, this
+        array contains the local indexes of the rows of A I have that
+        should be copied into U.
+</PRE>
+<PRE>
+LINDXAU (local output)                int *
+        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+        array contains  the local destination  information encoded as
+        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+        of A should be locally copied into A(-LINDXAU(k),:).
+</PRE>
+<PRE>
+LLEN    (global output)               int *
+        On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+        contains how many rows every process has.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx1.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx1.html
new file mode 100755
index 000000000..0a49ede0b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx1.html
@@ -0,0 +1,130 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_plindx1 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_plindx1</B> Compute local swapping index arrays.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_plindx1(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>K</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPID</CODE>,
+<CODE>int *</CODE>
+<CODE>IPA</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXAU</CODE>,
+<CODE>int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAPM1</CODE>,
+<CODE>int *</CODE>
+<CODE>PERMU</CODE>,
+<CODE>int *</CODE>
+<CODE>IWORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_plindx1</B>
+computes two local arrays  LINDXA and  LINDXAU  containing
+the  local  source and final destination position  resulting from the
+application of row interchanges.  In addition, this function computes
+three arrays IPLEN, IPMAP and IPMAPM1  that contain  the  logarithmic
+mapping information for the spreading phase.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+</PRE>
+<PRE>
+IPID    (global input)                const int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+</PRE>
+<PRE>
+IPA     (global output)               int *
+        On exit,  IPA  specifies  the number of rows that the current
+        process row has that either belong to U  or should be swapped
+        with remote rows of A.
+</PRE>
+<PRE>
+LINDXA  (global output)               int *
+        On entry, LINDXA  is an array of dimension 2*N. On exit, this
+        array contains the local indexes of the rows of A I have that
+        should be copied into U.
+</PRE>
+<PRE>
+LINDXAU (global output)               int *
+        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+        array contains  the local destination  information encoded as
+        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+        of A should be locally copied into A(-LINDXAU(k),:).
+</PRE>
+<PRE>
+IPLEN   (global output)               int *
+        On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+        this array is such that  IPLEN[i]  is the number of rows of A
+        in  the  processes  before  process  IPMAP[i]  after the sort
+        with the convention that IPLEN[nprow]  is the total number of
+        rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+        local number of rows of A that should be moved to the process
+        IPMAP[i]. IPLEN is such that the number of rows of the source
+        process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+        remaining  entries  of  this  array  are  sorted  so that the
+        quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+</PRE>
+<PRE>
+IPMAP   (global output)               int *
+        On entry, IPMAP is an array of dimension NPROW. On exit, this
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myrow] is the corresponding sorted process
+        coordinate.
+</PRE>
+<PRE>
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROCS)
+</PRE>
+<PRE>
+PERMU   (global output)               int *
+        On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+        contains  a sequence of permutations,  that should be applied
+        in increasing order to permute in place the row panel U.
+</PRE>
+<PRE>
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension 2*JB.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx10.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx10.html
new file mode 100755
index 000000000..fbfd6be2f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx10.html
@@ -0,0 +1,87 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_plindx10 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_plindx10</B> Compute the logarithmic maps for the spreading.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_plindx10(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>K</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPID</CODE>,
+<CODE>int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_plindx10</B>
+computes  three arrays  IPLEN,  IPMAP  and  IPMAPM1  that
+contain the logarithmic mapping information for the spreading phase.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+</PRE>
+<PRE>
+IPID    (global input)                const int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+</PRE>
+<PRE>
+IPLEN   (global output)               int *
+        On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+        this array is such that  IPLEN[i]  is the number of rows of A
+        in the processes  before process IMAP[i] after the sort, with
+        the convention that IPLEN[nprow] is the total number of rows.
+        In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+        rows of  A  that should be moved for each process.  IPLEN  is
+        such that the number of rows of the source process row can be
+        computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+        this  array are sorted  so  that  the quantities IPLEN[i+1] -
+        IPLEN[i] are logarithmically sorted.
+</PRE>
+<PRE>
+IPMAP   (global output)               int *
+        On entry, IPMAP is an array of dimension NPROW. On exit, this
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myrow] is the corresponding sorted process
+        coordinate.
+</PRE>
+<PRE>
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROW)
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pnum.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pnum.html
new file mode 100755
index 000000000..8bedc3016
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pnum.html
@@ -0,0 +1,54 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pnum HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pnum</B> Rank determination.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_pnum(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>const int</CODE>
+<CODE>MYROW</CODE>,
+<CODE>const int</CODE>
+<CODE>MYCOL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pnum</B>
+determines  the  rank  of a  process  as a function  of  its
+coordinates in the grid.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+MYROW   (local input)                 const int
+        On entry,  MYROW  specifies the row coordinate of the process
+        whose rank is to be determined. MYROW must be greater than or
+        equal to zero and less than NPROW.
+</PRE>
+<PRE>
+MYCOL   (local input)                 const int
+        On entry,  MYCOL  specifies  the  column  coordinate  of  the
+        process whose rank is to be determined. MYCOL must be greater
+        than or equal to zero and less than NPCOL.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_grid_init.html">HPL_grid_init</A>,
+<A HREF="HPL_grid_info.html">HPL_grid_info</A>,
+<A HREF="HPL_grid_exit.html">HPL_grid_exit</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer.html
new file mode 100755
index 000000000..abef45946
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer.html
@@ -0,0 +1,49 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_ptimer HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_ptimer</B> Timer facility.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_ptimer(</CODE>
+<CODE>const int</CODE>
+<CODE>I</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_ptimer</B>
+provides a  "stopwatch"  functionality  cpu/wall  timer in
+seconds.  Up to  64  separate timers can be functioning at once.  The
+first call starts the timer,  and the second stops it.  This  routine
+can be disenabled  by calling HPL_ptimer_disable(),  so that calls to
+the timer are ignored.  This feature can be used to make sure certain
+sections of code do not affect timings,  even  if  they call routines
+which have HPL_ptimer calls in them. HPL_ptimer_enable()  will enable
+the  timer  functionality.  One  can retrieve  the current value of a
+timer by calling
+ 
+t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ 
+where  I  is the timer index in  [0..64).  To  inititialize the timer
+functionality, one must have called HPL_ptimer_boot() prior to any of
+the functions mentioned above.
+
+<H1>Arguments</H1>
+<PRE>
+I       (global input)                const int
+        On entry, I specifies the timer to stop/start.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ptimer_cputime.html">HPL_ptimer_cputime</A>,
+<A HREF="HPL_ptimer_walltime.html">HPL_ptimer_walltime</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer_cputime.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer_cputime.html
new file mode 100755
index 000000000..cffd863b3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer_cputime.html
@@ -0,0 +1,35 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_ptimer_cputime HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_ptimer_cputime</B> Return the CPU time.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_ptimer_cputime();</CODE>
+
+<H1>Description</H1>
+<B>HPL_ptimer_cputime</B>
+returns the cpu time. If HPL_USE_CLOCK is defined,
+the  clock() function is used to return an approximation of processor
+time used by the program.  The value returned is the CPU time used so
+far as a clock_t;  to get the number of seconds used,  the result  is
+divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+standard library.  If  HPL_USE_TIMES is defined, the times() function
+is used instead.  This  function  returns  the current process times.
+times() returns the number of clock ticks that have elapsed since the
+system has been up.  Otherwise and by default,  the  standard library
+function getrusage() is used.
+
+<H1>See Also</H1>
+<A HREF="HPL_ptimer_walltime.html">HPL_ptimer_walltime</A>,
+<A HREF="HPL_ptimer.html">HPL_ptimer</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer_walltime.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer_walltime.html
new file mode 100755
index 000000000..a509897f1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer_walltime.html
@@ -0,0 +1,26 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_ptimer_walltime HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_ptimer_walltime</B> Return the elapsed (wall-clock) time.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_ptimer_walltime();</CODE>
+
+<H1>Description</H1>
+<B>HPL_ptimer_walltime</B>
+returns the elapsed (wall-clock) time.
+
+<H1>See Also</H1>
+<A HREF="HPL_ptimer_cputime.html">HPL_ptimer_cputime</A>,
+<A HREF="HPL_ptimer.html">HPL_ptimer</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pwarn.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pwarn.html
new file mode 100755
index 000000000..221d23982
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pwarn.html
@@ -0,0 +1,63 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pwarn HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pwarn</B> displays an error message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pwarn(</CODE>
+<CODE>FILE *</CODE>
+<CODE>STREAM</CODE>,
+<CODE>int</CODE>
+<CODE>LINE</CODE>,
+<CODE>const char *</CODE>
+<CODE>SRNAME</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pwarn</B>
+displays an error message.
+
+<H1>Arguments</H1>
+<PRE>
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+</PRE>
+<PRE>
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+</PRE>
+<PRE>
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pabort.html">HPL_pabort</A>,
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rand.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rand.html
new file mode 100755
index 000000000..5aef6669c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rand.html
@@ -0,0 +1,40 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_rand HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_rand</B> random number generator.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_rand();</CODE>
+
+<H1>Description</H1>
+<B>HPL_rand</B>
+generates  the next number  in the  random  sequence.  This
+function  ensures  that this number lies in the interval (-0.5, 0.5].
+ 
+The static array irand contains the information (2 integers) required
+to generate the  next number  in the sequence  X(n).  This  number is
+computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5,  where the
+constant d is the largest 64 bit positive integer. The array irand is
+then  updated  for the generation of the next number  X(n+1)  in  the
+random sequence as follows X(n+1) = a * X(n) + c. The constants a and
+c  should have been preliminarily stored in the arrays ias and ics as
+2 pairs of integers.  The initialization of  ias,  ics and  irand  is
+performed by the function HPL_setran.
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_recv.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_recv.html
new file mode 100755
index 000000000..afcb570c5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_recv.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_recv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_recv</B> Receive a message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_recv(</CODE>
+<CODE>double *</CODE>
+<CODE>RBUF</CODE>,
+<CODE>int</CODE>
+<CODE>RCOUNT</CODE>,
+<CODE>int</CODE>
+<CODE>SRC</CODE>,
+<CODE>int</CODE>
+<CODE>RTAG</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_recv</B>
+is a simple wrapper around  MPI_Recv.  Its  main  purpose is
+to  allow for some  experimentation / tuning  of this simple routine.
+Successful  completion  is  indicated  by  the  returned  error  code
+HPL_SUCCESS.  In the case of messages of length less than or equal to
+zero, this function returns immediately.
+
+<H1>Arguments</H1>
+<PRE>
+RBUF    (local output)                double *
+        On entry, RBUF specifies the starting address of buffer to be
+        received.
+</PRE>
+<PRE>
+RCOUNT  (local input)                 int
+        On entry,  RCOUNT  specifies  the number  of double precision
+        entries in RBUF. RCOUNT must be at least zero.
+</PRE>
+<PRE>
+SRC     (local input)                 int
+        On entry, SRC  specifies the rank of the  sending  process in
+        the communication space defined by COMM.
+</PRE>
+<PRE>
+RTAG    (local input)                 int
+        On entry,  STAG specifies the message tag to be used for this
+        communication operation.
+</PRE>
+<PRE>
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_send.html">HPL_send</A>,
+<A HREF="HPL_sdrv.html">HPL_sdrv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_reduce.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_reduce.html
new file mode 100755
index 000000000..026435ed6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_reduce.html
@@ -0,0 +1,75 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_reduce HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_reduce</B> Reduce operation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_reduce(</CODE>
+<CODE>void *</CODE>
+<CODE>BUFFER</CODE>,
+<CODE>const int</CODE>
+<CODE>COUNT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>,
+<CODE>const HPL_T_OP </CODE>
+<CODE>OP</CODE>,
+<CODE>const int</CODE>
+<CODE>ROOT</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_reduce</B>
+performs a global reduce operation across all processes of
+a group.  Note that the input buffer is  used as workarray and in all
+processes but the accumulating process corrupting the original data.
+
+<H1>Arguments</H1>
+<PRE>
+BUFFER  (local input/output)          void *
+        On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+        exit,  and  in process of rank  ROOT  this array contains the
+        reduced data.  This  buffer  is also used as workspace during
+        the operation in the other processes of the group.
+</PRE>
+<PRE>
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+</PRE>
+<PRE>
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+<PRE>
+OP      (global input)                const HPL_T_OP 
+        On entry, OP is a pointer to the local combine function.
+</PRE>
+<PRE>
+ROOT    (global input)                const int
+        On entry, ROOT is the coordinate of the accumulating process.
+</PRE>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rollN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rollN.html
new file mode 100755
index 000000000..1e1a49068
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rollN.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_rollN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_rollN</B> Roll U and forward the column panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_rollN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_rollN</B>
+rolls the local arrays containing the local pieces of U, so
+that on exit to this function  U  is replicated in every process row.
+In addition, this function probe for the presence of the column panel
+and forwards it when available.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be rolled) information.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the number of columns of  U.  N must be
+        at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least  MAX(1,IPLEN[NPROW]).
+</PRE>
+<PRE>
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process row.
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IMAP  is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words,  IMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry,  IMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rollT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rollT.html
new file mode 100755
index 000000000..a6ac29336
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rollT.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_rollT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_rollT</B> Roll U and forward the column panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_rollT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_rollT</B>
+rolls the local arrays containing the local pieces of U, so
+that on exit to this function  U  is replicated in every process row.
+In addition, this function probe for the presence of the column panel
+and forwards it when available.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be rolled) information.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the local number of rows of  U.  N must
+        be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least  MAX(1,N).
+</PRE>
+<PRE>
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process row.
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IMAP  is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words,  IMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry,  IMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_sdrv.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_sdrv.html
new file mode 100755
index 000000000..6f5b5880c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_sdrv.html
@@ -0,0 +1,88 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_sdrv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_sdrv</B> Send and receive a message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_sdrv(</CODE>
+<CODE>double *</CODE>
+<CODE>SBUF</CODE>,
+<CODE>int</CODE>
+<CODE>SCOUNT</CODE>,
+<CODE>int</CODE>
+<CODE>STAG</CODE>,
+<CODE>double *</CODE>
+<CODE>RBUF</CODE>,
+<CODE>int</CODE>
+<CODE>RCOUNT</CODE>,
+<CODE>int</CODE>
+<CODE>RTAG</CODE>,
+<CODE>int</CODE>
+<CODE>PARTNER</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_sdrv</B>
+is a simple wrapper around MPI_Sendrecv. Its main purpose is
+to allow for some experimentation and tuning of this simple function.
+Messages  of  length  less than  or  equal to zero  are not sent  nor
+received.  Successful completion  is  indicated by the returned error
+code HPL_SUCCESS.
+
+<H1>Arguments</H1>
+<PRE>
+SBUF    (local input)                 double *
+        On entry, SBUF specifies the starting address of buffer to be
+        sent.
+</PRE>
+<PRE>
+SCOUNT  (local input)                 int
+        On entry,  SCOUNT  specifies  the number  of double precision
+        entries in SBUF. SCOUNT must be at least zero.
+</PRE>
+<PRE>
+STAG    (local input)                 int
+        On entry,  STAG  specifies the message tag to be used for the
+        sending communication operation.
+</PRE>
+<PRE>
+RBUF    (local output)                double *
+        On entry, RBUF specifies the starting address of buffer to be
+        received.
+</PRE>
+<PRE>
+RCOUNT  (local input)                 int
+        On entry,  RCOUNT  specifies  the number  of double precision
+        entries in RBUF. RCOUNT must be at least zero.
+</PRE>
+<PRE>
+RTAG    (local input)                 int
+        On entry,  RTAG  specifies the message tag to be used for the
+        receiving communication operation.
+</PRE>
+<PRE>
+PARTNER (local input)                 int
+        On entry,  PARTNER  specifies  the rank of the  collaborative
+        process in the communication space defined by COMM.
+</PRE>
+<PRE>
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_send.html">HPL_send</A>,
+<A HREF="HPL_recv.html">HPL_recv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_send.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_send.html
new file mode 100755
index 000000000..05dcb7e6d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_send.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_send HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_send</B> Send a message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_send(</CODE>
+<CODE>double *</CODE>
+<CODE>SBUF</CODE>,
+<CODE>int</CODE>
+<CODE>SCOUNT</CODE>,
+<CODE>int</CODE>
+<CODE>DEST</CODE>,
+<CODE>int</CODE>
+<CODE>STAG</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_send</B>
+is a simple wrapper around  MPI_Send.  Its  main  purpose is
+to  allow for some  experimentation / tuning  of this simple routine.
+Successful  completion  is  indicated  by  the  returned  error  code
+MPI_SUCCESS.  In the case of messages of length less than or equal to
+zero, this function returns immediately.
+
+<H1>Arguments</H1>
+<PRE>
+SBUF    (local input)                 double *
+        On entry, SBUF specifies the starting address of buffer to be
+        sent.
+</PRE>
+<PRE>
+SCOUNT  (local input)                 int
+        On entry,  SCOUNT  specifies  the number of  double precision
+        entries in SBUF. SCOUNT must be at least zero.
+</PRE>
+<PRE>
+DEST    (local input)                 int
+        On entry, DEST specifies the rank of the receiving process in
+        the communication space defined by COMM.
+</PRE>
+<PRE>
+STAG    (local input)                 int
+        On entry,  STAG specifies the message tag to be used for this
+        communication operation.
+</PRE>
+<PRE>
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_recv.html">HPL_recv</A>,
+<A HREF="HPL_sdrv.html">HPL_sdrv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_setran.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_setran.html
new file mode 100755
index 000000000..44f37e35e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_setran.html
@@ -0,0 +1,52 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_setran HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_setran</B> Manage the random number generator.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_setran(</CODE>
+<CODE>const int</CODE>
+<CODE>OPTION</CODE>,
+<CODE>int *</CODE>
+<CODE>IRAN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_setran</B>
+initializes  the random generator with the encoding of the
+first number X(0) in the sequence,  and the constants a and c used to
+compute the next element in the sequence: X(n+1) = a*X(n) + c.  X(0),
+a and c are stored in the static variables  irand, ias and ics.  When
+OPTION is 0 (resp. 1 and 2),  irand  (resp. ia and ic)  is set to the
+values of the input array IRAN.  When OPTION is 3, IRAN is set to the
+current value of irand, and irand is then incremented.
+
+<H1>Arguments</H1>
+<PRE>
+OPTION  (local input)                 const int
+        On entry, OPTION  is an integer that specifies the operations
+        to be performed on the random generator as specified above.
+</PRE>
+<PRE>
+IRAN    (local input/output)          int *
+        On entry,  IRAN is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of a random number.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_spreadN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_spreadN.html
new file mode 100755
index 000000000..f0d8f8938
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_spreadN.html
@@ -0,0 +1,120 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_spreadN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_spreadN</B> Spread row panel U and forward current column panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_spreadN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const enum HPL_SIDE</CODE>
+<CODE>SIDE</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCDIST</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_spreadN</B>
+spreads the local array containing local pieces of U, so
+that on exit to this function,  a piece of  U  is contained in every
+process row. The array IPLEN contains the number of rows of U,  that
+should be spread on any given process row. This function also probes
+for the presence of the column panel PBCST. In case of success, this
+panel will be forwarded.  If  PBCST  is NULL on input,  this probing
+mechanism will be disabled.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be spread) information.
+</PRE>
+<PRE>
+SIDE    (global input)                const enum HPL_SIDE
+        On entry, SIDE specifies whether the local piece of U located
+        in process IPMAP[SRCDIST] should be spread to the right or to
+        the left. This feature is used by the equilibration process.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N  specifies  the  local number of columns of U. N
+        must be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,IPLEN[nprow]).
+</PRE>
+<PRE>
+SRCDIST (local input)                 const int
+        On entry,  SRCDIST  specifies the source process that spreads
+        its piece of U.
+</PRE>
+<PRE>
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process before process IPMAP[i], with the  convention
+        that IPLEN[nprow] is the total number of rows. In other words
+        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+        should be moved to process IPMAP[i].
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry,  IPMAPM1 is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_spreadT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_spreadT.html
new file mode 100755
index 000000000..cec561646
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_spreadT.html
@@ -0,0 +1,120 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_spreadT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_spreadT</B> Spread row panel U and forward current column panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_spreadT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const enum HPL_SIDE</CODE>
+<CODE>SIDE</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCDIST</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_spreadT</B>
+spreads  the local array containing local pieces of U, so
+that on exit to this function,  a piece of  U  is contained in every
+process row.  The array  IPLEN  contains the number of columns of U,
+that should be spread on any given process row.  This function  also
+probes for the presence of  the column panel  PBCST.  If  available,
+this  panel will be forwarded.  If  PBCST  is  NULL  on input,  this
+probing mechanism will be disabled.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be spread) information.
+</PRE>
+<PRE>
+SIDE    (global input)                const enum HPL_SIDE
+        On entry, SIDE specifies whether the local piece of U located
+        in process IPMAP[SRCDIST] should be spread to the right or to
+        the left. This feature is used by the equilibration process.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N  specifies the local number of rows of U. N must
+        be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,N).
+</PRE>
+<PRE>
+SRCDIST (local input)                 const int
+        On entry,  SRCDIST  specifies the source process that spreads
+        its piece of U.
+</PRE>
+<PRE>
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process before process IPMAP[i], with the  convention
+        that IPLEN[nprow] is the total number of rows. In other words
+        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+        should be moved to process IPMAP[i].
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry,  IPMAPM1 is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_sum.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_sum.html
new file mode 100755
index 000000000..be785b99e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_sum.html
@@ -0,0 +1,61 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_sum HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_sum</B> Combine (sum) two buffers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_sum(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const void *</CODE>
+<CODE>IN</CODE>,
+<CODE>void *</CODE>
+<CODE>INOUT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_sum</B>
+combines (sum) two buffers.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+</PRE>
+<PRE>
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+</PRE>
+<PRE>
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+</PRE>
+<PRE>
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer.html
new file mode 100755
index 000000000..8e6a79803
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer.html
@@ -0,0 +1,49 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_timer HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_timer</B> Timer facility.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_timer(</CODE>
+<CODE>const int</CODE>
+<CODE>I</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_timer</B>
+provides a  "stopwatch"  functionality  cpu/wall  timer  in
+seconds.  Up to  64  separate timers can be functioning at once.  The
+first call starts the timer,  and the second stops it.  This  routine
+can be disenabled  by calling  HPL_timer_disable(),  so that calls to
+the timer are ignored.  This feature can be used to make sure certain
+sections of code do not affect timings,  even  if  they call routines
+which have HPL_timer calls in them. HPL_timer_enable() will re-enable
+the  timer  functionality.  One  can retrieve  the current value of a
+timer by calling
+ 
+t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ 
+where  I  is the timer index in  [0..64).  To  initialize  the  timer
+functionality, one must have called HPL_timer_boot()  prior to any of
+the functions mentioned above.
+
+<H1>Arguments</H1>
+<PRE>
+I       (global input)                const int
+        On entry, I specifies the timer to stop/start.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_timer_cputime.html">HPL_timer_cputime</A>,
+<A HREF="HPL_timer_walltime.html">HPL_timer_walltime</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer_cputime.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer_cputime.html
new file mode 100755
index 000000000..0fa9b6575
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer_cputime.html
@@ -0,0 +1,35 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_timer_cputime HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_timer_cputime</B> Return the CPU time.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_timer_cputime();</CODE>
+
+<H1>Description</H1>
+<B>HPL_timer_cputime</B>
+returns the cpu time.  If HPL_USE_CLOCK is defined,
+the  clock() function is used to return an approximation of processor
+time used by the program.  The value returned is the CPU time used so
+far as a clock_t;  to get the number of seconds used,  the result  is
+divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+standard library.  If  HPL_USE_TIMES is defined, the times() function
+is used instead.  This  function  returns  the current process times.
+times() returns the number of clock ticks that have elapsed since the
+system has been up.  Otherwise and by default,  the  standard library
+function getrusage() is used.
+
+<H1>See Also</H1>
+<A HREF="HPL_timer_walltime.html">HPL_timer_walltime</A>,
+<A HREF="HPL_timer.html">HPL_timer</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer_walltime.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer_walltime.html
new file mode 100755
index 000000000..92588e49f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer_walltime.html
@@ -0,0 +1,26 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_timer_walltime HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_timer_walltime</B> Return the elapsed (wall-clock) time.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_timer_walltime();</CODE>
+
+<H1>Description</H1>
+<B>HPL_timer_walltime</B>
+returns the elapsed (wall-clock) time.
+
+<H1>See Also</H1>
+<A HREF="HPL_timer_cputime.html">HPL_timer_cputime</A>,
+<A HREF="HPL_timer.html">HPL_timer</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_warn.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_warn.html
new file mode 100755
index 000000000..773df9ae0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_warn.html
@@ -0,0 +1,74 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_warn HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_warn</B> displays an error message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_warn(</CODE>
+<CODE>FILE *</CODE>
+<CODE>STREAM</CODE>,
+<CODE>int</CODE>
+<CODE>LINE</CODE>,
+<CODE>const char *</CODE>
+<CODE>SRNAME</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_warn</B>
+displays an error message.
+
+<H1>Arguments</H1>
+<PRE>
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+</PRE>
+<PRE>
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+</PRE>
+<PRE>
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   HPL_warn( stderr, __LINE__, __FILE__,
+             "Demo.\n" );
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_abort.html">HPL_abort</A>,
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_xjumpm.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_xjumpm.html
new file mode 100755
index 000000000..794ae3a8b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_xjumpm.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_xjumpm HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_xjumpm</B> Compute constants to jump in the random sequence.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_xjumpm(</CODE>
+<CODE>const int</CODE>
+<CODE>JUMPM</CODE>,
+<CODE>int *</CODE>
+<CODE>MULT</CODE>,
+<CODE>int *</CODE>
+<CODE>IADD</CODE>,
+<CODE>int *</CODE>
+<CODE>IRANN</CODE>,
+<CODE>int *</CODE>
+<CODE>IRANM</CODE>,
+<CODE>int *</CODE>
+<CODE>IAM</CODE>,
+<CODE>int *</CODE>
+<CODE>ICM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_xjumpm</B>
+computes  the constants  A and C  to jump JUMPM numbers in
+the random sequence: X(n+JUMPM) = A*X(n)+C.  The constants encoded in
+MULT and IADD  specify  how to jump from one entry in the sequence to
+the next.
+
+<H1>Arguments</H1>
+<PRE>
+JUMPM   (local input)                 const int
+        On entry,  JUMPM  specifies  the  number  of entries  in  the
+        sequence to jump over. When JUMPM is less or equal than zero,
+        A and C are not computed, IRANM is set to IRANN corresponding
+        to a jump of size zero.
+</PRE>
+<PRE>
+MULT    (local input)                 int *
+        On entry, MULT is an array of dimension 2,  that contains the
+        16-lower  and 15-higher bits of the constant  a  to jump from
+        X(n) to X(n+1) = a*X(n) + c in the random sequence.
+</PRE>
+<PRE>
+IADD    (local input)                 int *
+        On entry, IADD is an array of dimension 2,  that contains the
+        16-lower  and 15-higher bits of the constant  c  to jump from
+        X(n) to X(n+1) = a*X(n) + c in the random sequence.
+</PRE>
+<PRE>
+IRANN   (local input)                 int *
+        On entry, IRANN is an array of dimension 2. that contains the
+        16-lower and 15-higher bits of the encoding of X(n).
+</PRE>
+<PRE>
+IRANM   (local output)                int *
+        On entry,  IRANM  is an array of dimension 2.   On exit, this
+        array  contains respectively  the 16-lower and 15-higher bits
+        of the encoding of X(n+JUMPM).
+</PRE>
+<PRE>
+IAM     (local output)                int *
+        On entry, IAM is an array of dimension 2. On exit, when JUMPM
+        is  greater  than  zero,  this  array  contains  the  encoded
+        constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+        sequence. IAM(0:1)  contains  respectively  the  16-lower and
+        15-higher  bits  of this constant  A. When  JUMPM  is less or
+        equal than zero, this array is not referenced.
+</PRE>
+<PRE>
+ICM     (local output)                int *
+        On entry, ICM is an array of dimension 2. On exit, when JUMPM
+        is  greater  than  zero,  this  array  contains  the  encoded
+        constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+        sequence. ICM(0:1)  contains  respectively  the  16-lower and
+        15-higher  bits  of this constant  C. When  JUMPM  is less or
+        equal than zero, this array is not referenced.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/algorithm.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/algorithm.html
new file mode 100755
index 000000000..9b1d7222e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/algorithm.html
@@ -0,0 +1,299 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Algorithm</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Algorithm</H2>
+
+<STRONG>
+This  page provides  a high-level description of the algorithm used in
+this package. As indicated below,  HPL  contains in fact many possible
+variants for various operations.  Defaults could have been chosen,  or
+even  variants  could  be selected  during  the execution.  Due to the
+performance requirements,  it was  decided  to leave the user with the
+opportunity of choosing,  so that an "optimal" set of parameters could
+easily be experimentally determined for a given machine configuration.
+From a numerical accuracy point of view, <STRONG>all</STRONG> possible
+combinations are rigorously equivalent  to each other  even though the
+result may slightly differ (bit-wise).
+</STRONG><BR><BR>
+
+<UL>
+<LI><A HREF="algorithm.html#main">Main Algorithm</A>
+<LI><A HREF="algorithm.html#pfact">Panel Factorization</A>
+<LI><A HREF="algorithm.html#bcast">Panel Broadcast</A>
+<LI><A HREF="algorithm.html#look_ahead">Look-ahead</A>
+<LI><A HREF="algorithm.html#update">Update</A>
+<LI><A HREF="algorithm.html#trsv">Backward Substitution</A>
+<LI><A HREF="algorithm.html#check">Checking the Solution</A>
+</UL>
+<HR NOSHADE>
+
+<H3><A NAME="main">Main Algorithm</A></H3>
+
+This  software  package  solves  a linear system  of order n:  A x = b by
+first  computing  the  LU  factorization with row partial pivoting of the
+n-by-n+1 coefficient matrix [A b] = [[L,U] y]. Since the lower triangular
+factor L is applied to b as the factorization progresses, the solution  x
+is obtained  by  solving  the upper triangular system U x = y.  The lower
+triangular  matrix  L  is left unpivoted  and  the array of pivots is not
+returned.<BR><BR>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR>
+<TD ALIGN=LEFT>
+The  data  is distributed onto a two-dimensional P-by-Q grid of processes
+according  to  the  block-cyclic  scheme  to ensure  "good"  load balance
+as well as  the scalability  of the algorithm.  The  n-by-n+1 coefficient
+matrix is  first  logically partitioned into  nb-by-nb  blocks,  that are
+cyclically "dealt" onto the  P-by-Q  process grid.  This is done  in both
+dimensions of the matrix.</TD>
+<TD ALIGN=CENTER><IMG SRC = "mat2.jpg" BORDER=0 HEIGHT=165 WIDTH=340></TD>
+</TR>
+</TABLE>
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR>
+<TD ALIGN=CENTER><IMG SRC ="main.jpg" BORDER=0 HEIGHT=165 WIDTH=165></TD>
+<TD ALIGN=LEFT>
+The  right-looking  variant  has been chosen for the main loop of the  LU
+factorization.  This  means that at each iteration of the loop a panel of
+nb columns is factorized,  and  the  trailing submatrix is updated.  Note
+that this computation is  thus  logically partitioned with the same block
+size nb that was used for the data distribution.</TD>
+</TR>
+</TABLE>
+<HR NOSHADE>
+
+<H3><A NAME="pfact">Panel Factorization</A></H3>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=10>
+<TR>
+<TD ALIGN=LEFT>
+At  a given iteration  of the main loop,  and  because of  the  cartesian 
+property of the distribution scheme,  each panel factorization  occurs in
+one column of processes.   This  particular part of the computation  lies
+on the critical path of  the overall algorithm.  The user is  offered the
+choice of three  (Crout, left- and right-looking)  matrix-multiply  based 
+recursive variants. The software also allows the user  to choose  in  how
+many  sub-panels  the current panel  should be divided  into  during  the
+recursion.  Furthermore,  one  can also  select at run-time the recursion
+stopping criterium in terms of the number  of  columns left to factorize.
+When this  threshold is reached,  the sub-panel will  then be  factorized
+using one of the three Crout, left- or right-looking matrix-vector  based 
+variant.  Finally, for each panel column the pivot search, the associated
+swap  and broadcast  operation  of  the pivot row  are combined  into one 
+single communication step.  A   binary-exchange  (leave-on-all) reduction
+performs these three operations at once.</TD>
+<TD ALIGN=CENTER><IMG SRC = "pfact.jpg" BORDER=0 HEIGHT=300 WIDTH=160></TD>
+</TR>
+</TABLE>
+<HR NOSHADE>
+
+<H3><A NAME="bcast">Panel Broadcast</A></H3>
+
+Once  the panel factorization has been computed,  this  panel  of columns
+is  broadcast  to the other process columns.   There  are  many  possible 
+broadcast  algorithms  and  the  software currently offers  6 variants to 
+choose from.  These variants are described below assuming  that process 0
+is the source of the broadcast for convenience. "->" means "sends to".
+<UL>
+<LI><STRONG>Increasing-ring</STRONG>:  0 -> 1;  1 -> 2; 2 -> 3 and so on.
+This algorithm is the classic one;  it has  the caveat that process 1 has
+to send a message.
+<CENTER>
+<IMG SRC="1ring.jpg">
+</CENTER>
+
+<LI><STRONG>Increasing-ring (modified)</STRONG>:  0 -> 1;  0 -> 2; 2 -> 3
+and so on. Process 0 sends two messages and process 1  only  receives one
+message. This algorithm is almost always better, if not the best.
+<CENTER>
+<IMG SRC="1rinM.jpg">
+</CENTER>
+
+<LI><STRONG>Increasing-2-ring</STRONG>:  The Q processes are divided into
+two parts: 0 -> 1 and 0 -> Q/2;  Then processes 1  and Q/2 act as sources
+of two rings: 1 -> 2, Q/2 -> Q/2+1;  2 -> 3, Q/2+1 -> to Q/2+2 and so on.
+This  algorithm has the advantage  of reducing the time by which the last
+process  will  receive  the  panel  at  the  cost  of process 0 sending 2
+messages.
+<CENTER>
+<IMG SRC="2ring.jpg">
+</CENTER>
+
+<LI><STRONG>Increasing-2-ring (modified)</STRONG>:  As  one  may  expect,
+first 0 -> 1,  then  the  Q-1  processes  left are divided into two equal
+parts: 0 -> 2 and 0 -> Q/2;  Processes  2 and Q/2  act then as sources of
+two rings:  2 -> 3,  Q/2 -> Q/2+1; 3 -> 4,  Q/2+1 -> to Q/2+2  and so on.
+This algorithm is probably  the most serious competitor to the increasing
+ring modified variant.
+<CENTER>
+<IMG SRC="2rinM.jpg">
+</CENTER>
+
+<LI><STRONG>Long  (bandwidth  reducing)</STRONG>:  as   opposed   to  the
+previous  variants,  this  algorithm  and  its follower  synchronize  all 
+processes involved in the operation. The message is chopped into  Q equal
+pieces that are scattered  across the Q processes. 
+<CENTER>
+<IMG SRC="spread.jpg">
+</CENTER>
+The pieces are then rolled in Q-1 steps.  The scatter phase uses a binary
+tree and the rolling phase exclusively uses mutual message exchanges.  In
+odd steps 0 <-> 1,  2 <-> 3, 4 <-> 5 and so on;  in even steps Q-1 <-> 0,
+1 <-> 2, 3 <-> 4, 5 <-> 6 and so on.
+<CENTER>
+<IMG SRC="roll.jpg">
+</CENTER>
+More messages are exchanged, however the total volume of communication is
+independent of Q, making this algorithm  particularly suitable for  large
+messages.  This algorithm  becomes  competitive  when the nodes are "very 
+fast" and the network (comparatively) "very slow".<BR><BR>
+
+<LI><STRONG>Long (bandwidth reducing modified)</STRONG>:  same  as above,
+except that 0 -> 1 first,  and then the Long variant is used on processes
+0,2,3,4 .. Q-1.<BR><BR>
+<CENTER>
+<IMG SRC="spreadM.jpg">
+<IMG SRC="rollM.jpg">
+</CENTER>
+
+</UL>
+
+The rings variants are distinguished by a probe mechanism  that activates
+them.  In other words,  a process involved in the broadcast and different
+from  the source asynchronously  probes for the message to receive.  When
+the  message  is  available  the broadcast proceeds,  and  otherwise  the
+function returns.  This allows to interleave the broadcast operation with
+the update phase. This contributes to reduce the idle time spent by those
+processes waiting for the factorized panel.  This  mechanism is necessary
+to accomodate for various computation/communication performance ratio.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="look_ahead">Look-ahead</A></H3>
+
+Once the panel has been broadcast or say during this broadcast operation,
+the trailing submatrix is updated  using the last panel in the look-ahead
+pipe: as mentioned before,  the panel factorization  lies on the critical
+path,  which  means  that when the kth panel has been factorized and then 
+broadcast, the next most urgent task to complete is the factorization and
+broadcast of the k+1 th panel.  This technique  is  often  refered  to as
+"look-ahead" or "send-ahead" in the literature.  This  package  allows to
+select various "depth" of look-ahead.  By  convention,  a  depth  of zero
+corresponds to no lookahead,  in which case  the  trailing  submatrix  is
+updated by the panel currently broadcast.  Look-ahead consumes some extra
+memory  to  essentially  keep  all the panels of columns currently in the
+look-ahead pipe.  A look-ahead  of depth 1 (maybe 2) is likely to achieve
+the best performance gain.<BR><BR> 
+<HR NOSHADE>
+
+<H3><A NAME="update">Update</A></H3>
+
+The update of the trailing submatrix by the last panel in the  look-ahead
+pipe is made of two phases. First, the pivots must be applied to form the
+current row panel U. U should then be solved by the upper triangle of the
+column panel. U finally needs to be broadcast to each process row so that
+the  local  rank-nb  update  can take place.  We choose  to  combine  the
+swapping and broadcast of  U  at the cost of  replicating the solve.  Two
+algorithms are available for this communication operation.
+<UL>
+<LI><STRONG>Binary-exchange</STRONG>:  this is a modified variant  of the
+binary-exchange (leave on all) reduction operation.  Every process column
+performs the same operation.  The algorithm essentially works as follows.
+It pretends reducing the row panel U, but at the beginning the only valid
+copy is owned by the current process row.  The  other process  rows  will
+contribute rows of A they own that should be copied in U and replace them
+with rows that were originally in the current process row.  The  complete
+operation is performed in  log(P) steps.  For the sake of simplicity, let
+assume that  P  is a power of two.  At step k,  process row p exchanges a 
+message with process row p+2^k.  There are  essentially two cases. First,
+one of those two process rows  has received  U  in  a previous step.  The
+exchange occurs.  One process  swaps  its  local rows of  A into U.  Both
+processes copy in  U remote rows of A. Second, none of those process rows
+has received U,  the exchange occurs, and both processes simply add those
+remote rows  to  the list  they have accumulated so far.  At each step, a 
+message  of  the size of  U  is exchanged by at least one pair of process
+rows.<BR><BR>
+
+<LI><STRONG>Long</STRONG>:   this  is   a   bandwidth   reducing  variant
+accomplishing the same task. The row panel is first spread (using a tree)
+among the process rows with respect to the pivot array. This is a scatter
+(V variant for MPI users).  Locally,  every process row  then swaps these
+rows with the the rows of A it owns and that belong to U.  These  buffers
+are then rolled  (P-1 steps) to finish the broadcast of U.  Every process
+row permutes U and proceed  with the computational part of the update.  A
+couple  of  notes:   process  rows  are  logarithmically   sorted  before
+spreading,  so  that  processes  receiving the largest number of rows are
+first in the tree.  This makes  the communication volume optimal for this
+phase. Finally, before rolling and after the local swap, an equilibration
+phase occurs during  which the local pieces of  U  are  uniformly  spread
+across  the process rows.  A tree-based algorithm is used. This operation
+is necessary to keep the rolling phase optimal  even  when the pivot rows
+are  not  equally distributed  in  process rows.  This  algorithm  has  a 
+complexity  in  terms  of communication volume that solely depends on the 
+size of U.  In particular,  the number of process rows  only  impacts the
+number of messages exchanged.  It  will  thus  outperforms  the  previous
+variant for large problems on large machine configurations.<BR><BR>
+
+</UL>
+
+The user can select any of the two variants above.  In addition, a mix is
+possible as well.  The  "binary-exchange"  algorithm will be used when  U
+contains at most a certain number of columns. Choosing at least the block
+size  nb as the threshold value is clearly recommended when look-ahead is
+on.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="trsv">Backward Substitution</A></H3>
+
+The factorization has just now ended, the back-substitution remains to be
+done.  For this,  we  choose  a look-ahead  of  depth  one  variant.  The
+right-hand-side  is  forwarded  in  process  rows  in  a  decreasing-ring 
+fashion,  so that  we solve Q * nb entries at a time.  At each step, this
+shrinking piece of the right-hand-side is updated. The process just above
+the one owning the current diagonal block of the matrix  A  updates first 
+its last nb piece of x,  forwards it to the previous process column, then
+broadcast  it in the process column in a decreasing-ring fashion as well.
+The solution is then updated and sent to the previous process column. The
+solution of the linear system is left replicated in every process row.<BR><BR>
+<HR NOSHADE>
+ 
+<H3><A NAME="check">Checking the Solution</A></H3>
+
+To verify the result obtained,  the input matrix  and right-hand side are
+regenerated.  The  normwise  backward  error  (see formula below) is then
+computed.  A solution  is  considered  as "numerically correct" when this
+quantity  is  less  than  a  threshold  value of the order of 1.0. In the
+expression   below,  eps  is  the  relative  (distributed-memory) machine
+precision.
+
+<UL>
+<LI>|| Ax - b ||_oo / ( eps * ( || A ||_oo * || x ||_oo + || b ||_oo ) * n )
+</UL>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/aprunner.gif b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/aprunner.gif
new file mode 100755
index 000000000..6508c806f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/aprunner.gif differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/copyright.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/copyright.html
new file mode 100755
index 000000000..934282c81
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/copyright.html
@@ -0,0 +1,66 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Copyright and Licensing Terms</TITLE>
+</HEAD>
+
+<BODY
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Copyright Notice and Licensing Terms</H2>
+
+Redistribution  and  use in  source and binary forms, with or without
+modification, are  permitted provided  that the following  conditions
+are met:                                                             
+<OL>
+<LI>Redistributions  of  source code  must retain the above copyright
+notice, this list of conditions and the following disclaimer.        
+<LI>Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions,  and the following disclaimer in the
+documentation and/or other materials provided with the distribution. 
+<LI>All  advertising  materials mentioning  features  or  use of this
+software must display  the  following  acknowledgement:  This product
+includes   software   developed   at  the  University  of  Tennessee,
+Knoxville, Innovative Computing Laboratory.             
+<LI>The name of the  University, the name of the  Laboratory,  or the
+names  of  its  contributors  may  not  be used to endorse or promote
+products  derived   from   this  software  without  specific  written
+permission.                                                          
+</OL>
+                                                                      
+<H3>Disclaimer</H3>
+
+THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+`AS IS' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/documentation.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/documentation.html
new file mode 100755
index 000000000..152188041
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/documentation.html
@@ -0,0 +1,304 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Documentation</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Documentation</H2>
+
+The  HPL software distribution comes  with a set of text files explaining
+how to install,  run and tune the software. These files reside in the top
+level  directory  and their names are in upper case.  To  a large extent,
+this page reproduces them. In addition,  man- and HTML-pages are provided
+for every routine in the package. To access the man pages,  one  must add
+hpl/man  to its  MANPATH  environment variable.  The  HTML  pages  can be 
+accessed on this site,  or by pointing your browser to your local hpl/www
+directory. Finally,  the source code has been heavily documented. Despite
+all the other documentation efforts, the  source  code remains  the  most
+trustworthy  and truthful piece of information about what goes on in HPL.
+<BR><BR>
+
+<H3>HPL Functions HTML Pages</H3>
+
+<STRONG>Computational Kernels Wrappers</STRONG> When calling the Fortran
+77 BLAS interface, these C functions allow to confine the C  to  Fortran
+77 interface  issues  to  a small  subset of routines.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_idamax.html">         HPL_idamax</A>
+<LI><A HREF = "HPL_dscal.html">          HPL_dscal</A>
+<LI><A HREF = "HPL_dswap.html">          HPL_dswap</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dcopy.html">          HPL_dcopy</A>
+<LI><A HREF = "HPL_daxpy.html">          HPL_daxpy</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dgemv.html">          HPL_dgemv</A>
+<LI><A HREF = "HPL_dger.html">           HPL_dger</A>
+<LI><A HREF = "HPL_dtrsv.html">          HPL_dtrsv</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dgemm.html">          HPL_dgemm</A>
+<LI><A HREF = "HPL_dtrsm.html">          HPL_dtrsm</A>
+</UL></TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Local Auxiliaries</STRONG> Basic functionality, local swap functions.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_fprintf.html">        HPL_fprintf</A>
+<LI><A HREF = "HPL_warn.html">           HPL_warn</A>
+<LI><A HREF = "HPL_abort.html">          HPL_abort</A>
+<LI><A HREF = "HPL_dlaprnt.html">        HPL_dlaprnt</A>
+<LI><A HREF = "HPL_dlamch.html">         HPL_dlamch</A>
+<LI><A HREF = "HPL_dlacpy.html">         HPL_dlacpy</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dlange.html">         HPL_dlange</A>
+<LI><A HREF = "HPL_dlatcpy.html">        HPL_dlatcpy</A>
+<LI><A HREF = "HPL_dlaswp00N.html">      HPL_dlaswp00N</A>
+<LI><A HREF = "HPL_dlaswp01N.html">      HPL_dlaswp01N</A>
+<LI><A HREF = "HPL_dlaswp02N.html">      HPL_dlaswp02N</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dlaswp03N.html">      HPL_dlaswp03N</A>
+<LI><A HREF = "HPL_dlaswp04N.html">      HPL_dlaswp04N</A>
+<LI><A HREF = "HPL_dlaswp05N.html">      HPL_dlaswp05N</A>
+<LI><A HREF = "HPL_dlaswp06N.html">      HPL_dlaswp06N</A>
+<LI><A HREF = "HPL_dlaswp10N.html">      HPL_dlaswp10N</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dlaswp01T.html">      HPL_dlaswp01T</A>
+<LI><A HREF = "HPL_dlaswp03T.html">      HPL_dlaswp03T</A>
+<LI><A HREF = "HPL_dlaswp04T.html">      HPL_dlaswp04T</A>
+<LI><A HREF = "HPL_dlaswp05T.html">      HPL_dlaswp05T</A>
+<LI><A HREF = "HPL_dlaswp06T.html">      HPL_dlaswp06T</A>
+</UL></TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Parallel Auxiliaries</STRONG> Index computations, parallel basic
+functionality.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_indxg2l.html">        HPL_indxg2l</A>
+<LI><A HREF = "HPL_indxg2lp.html">       HPL_indxg2lp</A>
+<LI><A HREF = "HPL_indxg2p.html">        HPL_indxg2p</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_indxl2g.html">        HPL_indxl2g</A>
+<LI><A HREF = "HPL_infog2l.html">        HPL_infog2l</A>
+<LI><A HREF = "HPL_numroc.html">         HPL_numroc</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_numrocI.html">        HPL_numrocI</A>
+<LI><A HREF = "HPL_pwarn.html">          HPL_pwarn</A>
+<LI><A HREF = "HPL_pabort.html">         HPL_pabort</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdlaprnt.html">       HPL_pdlaprnt</A>
+<LI><A HREF = "HPL_pdlamch.html">        HPL_pdlamch</A>
+<LI><A HREF = "HPL_pdlange.html">        HPL_pdlange</A>
+</UL></TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Grid Management</STRONG>  Most of these routines have a direct
+MPI equivalent.  On new systems,  when the entire MPI functionality is
+not yet readily available, these functions are particularly convenient
+since they rely on a mininal  subset of the MPI standard.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_grid_exit.html">      HPL_grid_exit</A>
+<LI><A HREF = "HPL_grid_info.html">      HPL_grid_info</A>
+<LI><A HREF = "HPL_grid_init.html">      HPL_grid_init</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_all_reduce.html">     HPL_all_reduce</A>
+<LI><A HREF = "HPL_barrier.html">        HPL_barrier</A>
+<LI><A HREF = "HPL_broadcast.html">      HPL_broadcast</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_reduce.html">         HPL_reduce</A>
+<LI><A HREF = "HPL_max.html">            HPL_max</A>
+<LI><A HREF = "HPL_min.html">            HPL_min</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pnum.html">           HPL_pnum</A>
+<LI><A HREF = "HPL_sum.html">            HPL_sum</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Panel Management</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_pdpanel_disp.html">   HPL_pdpanel_disp</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanel_free.html">   HPL_pdpanel_free</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanel_init.html">   HPL_pdpanel_init</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanel_new.html">    HPL_pdpanel_new</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Panel Factorization</STRONG> Recursive (matrix-multiply based) and
+(matrix-vector based) panel factorization.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_dlocmax.html">        HPL_dlocmax</A>
+<LI><A HREF = "HPL_dlocswpN.html">       HPL_dlocswpN</A>
+<LI><A HREF = "HPL_dlocswpT.html">       HPL_dlocswpT</A>
+<LI><A HREF = "HPL_pdmxswp.html">        HPL_pdmxswp</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpancrN.html">       HPL_pdpancrN</A>
+<LI><A HREF = "HPL_pdpancrT.html">       HPL_pdpancrT</A>
+<LI><A HREF = "HPL_pdrpancrN.html">      HPL_pdrpancrN</A>
+<LI><A HREF = "HPL_pdrpancrT.html">      HPL_pdrpancrT</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanllN.html">       HPL_pdpanllN</A>
+<LI><A HREF = "HPL_pdpanllT.html">       HPL_pdpanllT</A>
+<LI><A HREF = "HPL_pdrpanllN.html">      HPL_pdrpanllN</A>
+<LI><A HREF = "HPL_pdrpanllT.html">      HPL_pdrpanllT</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanrlN.html">       HPL_pdpanrlN</A>
+<LI><A HREF = "HPL_pdpanrlT.html">       HPL_pdpanrlT</A>
+<LI><A HREF = "HPL_pdrpanrlN.html">      HPL_pdrpanrlN</A>
+<LI><A HREF = "HPL_pdrpanrlT.html">      HPL_pdrpanrlT</A>
+<LI><A HREF = "HPL_pdfact.html">         HPL_pdfact</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Panel Broadcast</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_bcast.html">          HPL_bcast</A>
+<LI><A HREF = "HPL_binit.html">          HPL_binit</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_bwait.html">          HPL_bwait</A>
+<LI><A HREF = "HPL_copyL.html">          HPL_copyL</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_packL.html">          HPL_packL</A>
+<LI><A HREF = "HPL_recv.html">           HPL_recv</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_sdrv.html">           HPL_sdrv</A>
+<LI><A HREF = "HPL_send.html">           HPL_send</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Update</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_perm.html">           HPL_perm</A>
+<LI><A HREF = "HPL_pipid.html">          HPL_pipid</A>
+<LI><A HREF = "HPL_plindx0.html">        HPL_plindx0</A>
+<LI><A HREF = "HPL_plindx1.html">        HPL_plindx1</A>
+<LI><A HREF = "HPL_plindx10.html">       HPL_plindx10</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_equil.html">          HPL_equil</A>
+<LI><A HREF = "HPL_pdlaswp00N.html">     HPL_pdlaswp00N</A>
+<LI><A HREF = "HPL_pdlaswp01N.html">     HPL_pdlaswp01N</A>
+<LI><A HREF = "HPL_pdlaswp00T.html">     HPL_pdlaswp00T</A>
+<LI><A HREF = "HPL_pdlaswp01T.html">     HPL_pdlaswp01T</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_logsort.html">        HPL_logsort</A>
+<LI><A HREF = "HPL_rollN.html">          HPL_rollN</A>
+<LI><A HREF = "HPL_rollT.html">          HPL_rollT</A>
+<LI><A HREF = "HPL_spreadN.html">        HPL_spreadN</A>
+<LI><A HREF = "HPL_spreadT.html">        HPL_spreadT</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdupdateNN.html">     HPL_pdupdateNN</A>
+<LI><A HREF = "HPL_pdupdateNT.html">     HPL_pdupdateNT</A>
+<LI><A HREF = "HPL_pdupdateTN.html">     HPL_pdupdateTN</A>
+<LI><A HREF = "HPL_pdupdateTT.html">     HPL_pdupdateTT</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Main Factorization / Look-ahead</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_pdgesv.html">         HPL_pdgesv</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdgesv0.html">        HPL_pdgesv0</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdgesvK1.html">       HPL_pdgesvK1</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdgesvK2.html">       HPL_pdgesvK2</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Backward Substitution</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_pdtrsv.html">         HPL_pdtrsv</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Matrix generation</STRONG> A C version of the ScaLAPACK random
+matrix generator with less functionality though.
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_setran.html">         HPL_setran</A>
+<LI><A HREF = "HPL_rand.html">           HPL_rand</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_jumpit.html">         HPL_jumpit</A>
+<LI><A HREF = "HPL_xjumpm.html">         HPL_xjumpm</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_ladd.html">           HPL_ladd</A>
+<LI><A HREF = "HPL_lmul.html">           HPL_lmul</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dmatgen.html">        HPL_dmatgen</A>
+<LI><A HREF = "HPL_pdmatgen.html">       HPL_pdmatgen</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Timers</STRONG> Sequential and parallel timing utilities.
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_timer.html">          HPL_timer</A>
+<LI><A HREF = "HPL_ptimer.html">         HPL_ptimer</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_timer_cputime.html">  HPL_timer_cputime</A>
+<LI><A HREF = "HPL_timer_walltime.html"> HPL_timer_walltime</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_ptimer_cputime.html"> HPL_ptimer_cputime</A>
+<LI><A HREF = "HPL_ptimer_walltime.html">HPL_ptimer_walltime</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Main Testing / Timing Driver</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_pddriver.html">       HPL_pddriver</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdinfo.html">         HPL_pdinfo</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdtest.html">         HPL_pdtest</A>
+</UL></TD></TR></TABLE>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/errata.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/errata.html
new file mode 100755
index 000000000..24275d2dd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/errata.html
@@ -0,0 +1,116 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Errata-Bugs</TITLE>
+</HEAD>
+
+<BODY
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Errata - Bugs</H2>
+
+<H3>Issues fixed in Version 2.1, October 26th, 2012</H3>
+
+The output now reports exact time stamps before and after the
+execution of the solver function pdgesv() was run. This could
+allow for accurate accounting of running time for data center
+management   purposes.    For   example  as  reporting  power
+consumption.  This  is  important  for  the Green500 project.<BR><BR>
+
+Fixed an out-of-bounds access to arrays  in the HPL_spreadN()
+and  HPL_spreadT()  functions.  This  may cause  segmentation
+fault signals. It was reported by Stephen Whalen from Cray.<BR><BR>
+
+<H3>Issues fixed in Version 2.0, September 10th, 2008</H3>
+
+Gregory Bauer  found  a  problem  size  corresponding  to the
+periodicity of the pseudo-random matrix generator used in the
+HPL  timing  program. This causes  the  LU  factorization  to
+detect the singularity of the input matrix as it should have.<BR><BR>
+
+A problem size of 2^17 = 131072 causes columns 14 modulo 2^14
+(i.e. 16384)  (starting from 0)  to be bitwise identical on a
+homogeneous platform.  Every problem size  being a power of 2
+and larger than  2^15  will  feature a similar problem if one
+searches far enough in the columns of the square input matrix.<BR><BR>
+
+The pseudo-random  generator  uses  the  linear  congruential
+algorithm:  X(n+1) = (a * X(n) + c) mod m as described in the
+Art of Computer  Programming, Knuth 1973,  Vol. 2. In the HPL
+case, m is set to 2^31.<BR><BR>
+
+It is very important  to realize that this issue is a problem
+of  the  testing  part  of the  HPL software.  The  numerical
+properties  of the  algorithms  used in the factorization and
+the solve should not be questioned because of this.  In fact,
+this is just the opposite: the factorization demonstrated the
+weakness of the testing part of the software by detecting the
+singularity of the input matrix.<BR><BR>
+
+This issue of  the testing program  is not easy to fix.  This
+pseudo-random  generator  has  very useful properties despite
+this.  It is  thus currently recommended to HPL users willing
+to test matrices of size larger than  2^15  to  not use power
+twos.<BR><BR>
+
+This  issue  has  been fixed by  changing  the  pseudo-random
+matrix  generator.   Now the  periodicity of the generator is
+2^64.<BR><BR>
+
+<H3>Issues fixed in Version 1.0b, December 15th, 2004</H3>
+
+When the matrix size is such that one needs  more  than 16 GB
+per  MPI  rank,  the  intermediate  calculation  (mat.ld+1) *
+mat.nq in  HPL_pdtest.c  ends up  overflowing  because  it is
+done using  32-bit arithmetic.   This issue has been fixed by
+typecasting to size_t; Thanks to John Baron.<BR><BR>
+
+<H3>Issues fixed in Version 1.0a, January 20th, 2004</H3>
+
+The  MPI  process  grid numbering scheme defaults now to row-
+major ordering. This option can now be selected at run time.<BR><BR>
+
+The  inlined  assembly  timer  routine  that  was causing the
+compilation to fail when using gcc version  3.3 and above has
+been removed from the package.<BR><BR>
+
+Various building problems on the T3E have been fixed;  Thanks
+to Edward Anderson.<BR><BR>
+
+<H3>Issues fixed in Version 1.0, September 27th, 2000</H3>
+
+Due to a  couple errors  spotted in the  VSIPL  port  of  the
+software,  the  distribution  contained  in  the  tar file of
+September 9th, 2000 had been updated on September 27th,  2000
+with a corrected  distribution.  <STRONG>These  problems were
+not affecting in any way possible the  BLAS  version  of  the
+software.</STRONG>  If you are using  the  VSIPL port of HPL,
+and  want  to  make  sure  you are  indeed  using  the latest
+corrected version, please  check  the  date  contained in the
+file HPL.build.log contained in the main directory.<BR><BR>
+
+
+
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/faqs.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/faqs.html
new file mode 100755
index 000000000..ad853e760
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/faqs.html
@@ -0,0 +1,126 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Frequently Asked Questions</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Frequently Asked Questions</H2>
+
+<UL>
+<LI><A HREF="faqs.html#pbsize">What problem size N should I run ?</A>
+<LI><A HREF="faqs.html#blsize">What block size NB should I use ?</A>
+<LI><A HREF="faqs.html#grid">What process grid ratio P x Q should I use ?</A>
+<LI><A HREF="faqs.html#1node">What about the one processor case ?</A>
+<LI><A HREF="faqs.html#options">Why so many options in HPL.dat ?</A>
+<LI><A HREF="faqs.html#outperf">Can HPL be outperformed ?</A>
+</UL>
+<HR NOSHADE>
+
+<H3><A NAME="pbsize">What problem size N should I run ?</A></H3>
+
+In order  to find out  the  best performance   of  your  system,  the
+largest   problem size  fitting in memory is what you should aim for.
+The  amount  of  memory  used  by  HPL is essentially the size of the 
+coefficient matrix.  So for example, if you have 4 nodes  with 256 Mb
+of memory on each, this corresponds to 1 Gb total, i.e., 125 M double
+precision  (8  bytes)  elements. The  square  root  of that number is
+11585.  One  definitely needs to leave some memory for the OS as well
+as for other things, so a problem size of 10000 is likely to fit.  As
+a rule of thumb, 80 % of the  total amount of memory is a good guess.
+If the problem size you pick is too large,  swapping will occur,  and
+the performance will drop.  If multiple processes  are spawn  on each
+node  (say  you have 2 processors  per  node),  what  counts  is  the
+available amount of memory to each process.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="blsize">What block size NB should I use ?</A></H3>
+
+HPL  uses  the block size NB for the data distribution as well as for
+the  computational  granularity.  From  a data distribution  point of
+view,  the smallest NB,  the better the load balance.  You definitely
+want  to stay away  from very large values of NB.  From a computation
+point of view,  a too small value of NB  may  limit the computational
+performance by a large factor because almost no data reuse will occur
+in the highest level of the memory hierarchy. The  number of messages
+will  also  increase.  Efficient  matrix-multiply  routines are often 
+internally  blocked.  Small  multiples  of  this  blocking factor are 
+likely to be good block sizes for HPL. The bottom line is that "good"
+block sizes are almost always in the [32 .. 256] interval.  The  best
+values depend on the computation / communication performance ratio of
+your system. To a much less extent, the problem size matters as well.
+Say for example,  you emperically found that 44 was a good block size
+with respect to performance.  88 or 132  are likely  to give slightly 
+better results  for large problem sizes because of a slighlty  higher
+flop rate.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="grid">What process grid ratio P x Q should I use ?</A></H3>
+
+This  depends  on  the  physical  interconnection  network  you have.
+Assuming a mesh or a switch HPL "likes" a 1:k ratio with k in [1..3].
+In  other  words,  P  and  Q  should  be approximately equal,  with Q 
+slightly larger than P. Examples: 2 x 2, 2 x 4, 2 x 5,  3 x 4, 4 x 4,
+4 x 6, 5 x 6, 4 x 8 ...  If  you  are  running  on  a simple Ethernet 
+network,  there  is  only one wire through which all the messages are
+exchanged. On  such a network, the performance and scalability of HPL
+is strongly limited  and very flat process grids are likely to be the
+best choices: 1 x 4, 1 x 8, 2 x 4 ...<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="1node">What about the one processor case ?</A></H3>
+
+HPL  has  been  designed  to  perform well for large problem sizes on
+hundreds  of  nodes and more.  The software works on one node and for
+large problem sizes, one  can usually achieve pretty good performance
+on a single processor as well.  For small problem sizes  however, the
+overhead  due  to  message-passing,  local  indexing and so on can be 
+significant.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="options">Why so many options in HPL.dat ?</A></H3>
+
+There are quite a few reasons. First off, these options are useful to
+determine what matters and what does not on your system. Second,  HPL
+is often used in the context  of early evaluation of new systems.  In
+such a case, everything is usually not quite working right, and it is
+convenient  to be able  to vary these parameters without recompiling.
+Finally,  every system has its own peculiarities and one is likely to
+be  willing  to  emperically determine the best set of parameters. In
+any   case,  one  can  always  follow  the  advice  provided  in  the
+<A HREF = "tuning.html">tuning  section</A> of this  document and not
+worry about the complexity of the input file.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="outperf">Can HPL be Outperformed ?</A></H3>
+
+Certainly.   There  is  always  room  for  performance  improvements.
+Specific knowledge about  a  particular system  is always a source of
+performance   gains.  Even  from  a generic  point  of  view,  better
+algorithms  or  more  efficient  formulation  of the classic ones are
+potential winners.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/index.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/index.html
new file mode 100755
index 000000000..a3a53abfe
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/index.html
@@ -0,0 +1,178 @@
+<HTML>
+<HEAD>
+<TITLE>
+HPL - A Portable Implementation of the High-Performance
+Linpack Benchmark for Distributed-Memory Computers
+</TITLE>
+</HEAD>
+ 
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<HR NOSHADE>
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR>
+<TD ALIGN=CENTER>
+<H3>HPL - A Portable Implementation of the High-Performance Linpack
+Benchmark for Distributed-Memory Computers</H3>
+</TD>
+<TD ALIGN=LEFT VALIGN=LEFT>
+<A HREF     = "http://icl.cs.utk.edu">
+<IMG SRC    = 2-273x48.jpg
+ALT         = "ICL - UTK Computer Science Department"
+BORDER      = 0
+HEIGHT      = 48
+WIDTH       = 273></A>
+</TD>
+</TR>
+</TABLE>
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR>
+<TD ALIGN=LEFT>Version 2.2</TD>
+<TD ALIGN=CENTER>
+<A HREF     = "http://www.cs.utk.edu/~petitet">A. Petitet</A>,
+<A HREF     = "http://www.cs.utk.edu/~rwhaley">R. C. Whaley</A>,
+<A HREF     = "http://www.netlib.org/utk/people/JackDongarra">J. Dongarra</A>,
+<A HREF     = "mailto:cleary1@llnl.gov">A. Cleary</A>
+</TD>
+<TD ALIGN=CENTER>December 2, 2018</TD>
+<TD ALIGN=RIGHT>
+<A HREF="http://www.netlib.org/master_counts2.html#benchmark/hpl"># Accesses</A>
+</TD>
+</TR>
+</TABLE>
+<HR NOSHADE><BR> 
+
+<STRONG>HPL</STRONG> is  a software  package  that solves  a (random)
+dense  linear  system  in  double  precision   (64  bits)  arithmetic
+on  distributed-memory  computers.    It  can  thus  be  regarded  as
+a portable as well as  freely available  implementation  of the  High
+Performance Computing Linpack Benchmark.<BR><BR>
+
+The <STRONG>algorithm</STRONG> used  by HPL  can be summarized by the
+following keywords:  Two-dimensional  block-cyclic data  distribution
+- Right-looking variant  of  the  LU  factorization  with row partial
+pivoting  featuring  multiple  look-ahead depths  -  Recursive  panel
+factorization  with  pivot  search  and  column  broadcast combined -
+Various  virtual  panel  broadcast topologies  -  bandwidth  reducing
+swap-broadcast  algorithm -  backward  substitution  with  look-ahead
+of depth 1.<BR><BR>
+
+The  HPL package  provides  a testing and timing program  to quantify
+the  <STRONG>accuracy</STRONG> of  the obtained solution  as  well as
+the time it took to compute it. The best <STRONG>performance</STRONG>
+achievable by this software on your system depends on a large variety
+of factors.  Nonetheless,  with some restrictive assumptions  on  the
+interconnection  network,   the  algorithm  described  here  and  its
+attached implementation  are <STRONG>scalable</STRONG>  in  the sense
+that their parallel efficiency is maintained  constant  with  respect
+to the per processor memory usage.<BR><BR>
+
+The HPL software package <STRONG>requires</STRONG>  the  availibility
+on your system of an implementation of the  Message Passing Interface
+<STRONG>MPI</STRONG> (1.1 compliant).
+An implementation of <STRONG>either</STRONG> the Basic Linear Algebra
+Subprograms   <STRONG>BLAS  or</STRONG>   the   Vector  Signal  Image
+Processing Library <STRONG>VSIPL</STRONG> is also needed.
+Machine-specific as well as generic implementations of
+<A HREF = "links.html#mpi_libs">MPI</A>, the
+<A HREF = "links.html#blas_libs">BLAS</A> and
+<A HREF = "links.html#vsip_libs">VSIPL</A> are available  for a large
+variety of systems.<BR><BR>
+
+<STRONG>Acknowledgements</STRONG>: This work was  supported  in  part
+by  a  grant  from  the  Department  of  Energy's   Lawrence
+Livermore National Laboratory  and  Los  Alamos  National  Laboratory
+as   part  of  the   ASCI  Projects   contract  numbers  B503962  and
+12187-001-00 4R.
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+
+<ADDRESS>
+Innovative Computing Laboratory<BR>
+last revised December 2, 2018<BR>
+</ADDRESS>
+
+<PRE>
+#########################################################################
+
+file    <a href="hpl-2.3.tar.gz">hpl-2.3.tar.gz</a>
+for     HPL 2.3 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: December 2, 2018
+
+#########################################################################
+
+file    <a href="hpl-2.2.tar.gz">hpl-2.2.tar.gz</a>
+for     HPL 2.2 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: February 24, 2016
+
+#########################################################################
+
+file    <a href="hpl-2.1.tar.gz">hpl-2.1.tar.gz</a>
+for     HPL 2.1 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: October 26, 2012
+
+#########################################################################
+
+file    <a href="hpl-2.0.tar.gz">hpl-2.0.tar.gz</a>
+for     HPL 2.0 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary
+Updated: September 10, 2008
+
+#########################################################################
+
+file    <a href="hpl.tgz">hpl.tgz</a>
+for     HPL 1.0a - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary
+Updated: January 20, 2004<BR>
+
+#########################################################################
+
+file    <a href="hpl_qs22-2008-11-30.patch">hpl_qs22-2008-11-30.patch</a>
+for     Implementation of the High-Performance Linpack benchmark for IBM
+,       QS22 systems with PowerXCell 8i processors. The file is a patch
+,       for HPL 1.0a.
+by      IBM
+
+file    <a href="IBM_LICENSE.TXT">IBM_LICENSE.TXT</a>
+for     IBM Copyright notice for QS22 HPL
+by      IBM
+
+file    <a href="IBM_README.txt">IBM_README.txt</a>
+for     README for IBM QS22 HPL
+by      IBM
+Updated: November 30, 2008
+
+
+#########################################################################
+</PRE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/links.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/links.html
new file mode 100755
index 000000000..da2639e99
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/links.html
@@ -0,0 +1,89 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Related Links</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Related Links</H2>
+
+<STRONG>The  list  of links below contains some relevant material to this
+work.  This  list  is provided  for illustrative purposes, and  should be
+regarded  as  an initial starting point  for the interested reader.  This
+list is by all means not meant to be exhaustive.</STRONG><BR><BR>
+
+<H3><A NAME="mpi_libs">Message Passing Interface (MPI)</A></H3>
+
+MPI  is  a  library  specification  for  message-passing,  proposed  as a
+standard  by  a  broadly  based committee  of  vendors, implementors, and
+users.  Machine-specific (optimized)  as  well  as  freely available  MPI
+libraries  are  available  for  a large  variety of systems.  Browse  the
+<A HREF = "http://www.mcs.anl.gov/mpi">Message  Passing  Interface  (MPI)
+standard web page</A> for more information.<BR><BR>
+
+<H3><A NAME="blas_libs">Basic Linear Algebra Subroutines (BLAS)</A></H3>
+
+The  <A HREF = "http://www.netlib.org/blas">BLAS</A>  are   high  quality
+"building  block"   routines  for  performing  basic  vector  and  matrix 
+operations.  A  lot  of  "BLAS-related"  information can be found at this 
+site.  In  particular,  a  reference implementation  is  available.  This
+reference   implementation  is  <STRONG>not  optimized</STRONG>  for  any
+system, and  it is therefore <STRONG>not  recommended</STRONG>  to use it
+for  benchmarking  purposes</STRONG>.
+However, <A HREF = "http://www.netlib.org/blas/faq.html">machine-specific
+optimized  BLAS  libraries</A> are  available  for  a variety of computer
+systems.   For  further  details,    please  contact  your  local  vendor
+representative.  Alternatively,  one  may  also consider  using automatic
+code  generators such as <A HREF="http://www.netlib.org/atlas">ATLAS</A>.
+This  tool  automatically   generates   a  complete   and  optimized BLAS
+library for a large variety of modern systems.<BR><BR>
+
+<H3><A NAME="vsip_libs">Vector Signal Image Processing Library (VSIPL)</A></H3>
+
+<A HREF = "http://www.vsipl.org">VSIPL</A>  is  an API defined by an open
+standard  comprised of  embedded signal and image processing hardware and
+software  vendors,  academia,  users,  and  government  labs.  A  lot  of
+"VSIPL-related"  information can be found at this site.  In particular, a
+reference implementation is available.  Machine-specific  optimized VSIPL
+libraries are available  for a variety of computer systems.  For  further
+details, please contact your local vendor representative.<BR><BR>
+
+<H3>TOP 500 List</H3>
+
+The  <A HREF  = "http://www.netlib.org/benchmark/top500.html">TOP 500</A>
+is  an  ordered list of the 500 most powerful computer systems worldwide.
+Computers   are   ranked  in  this  list  by  their  performance  on  the 
+<A HREF = "http://www.netlib.org/benchmark/top500/lists/linpack.html">
+LINPACK Benchmark</A>.<BR><BR>
+
+<H3>Parallel Dense Linear Algebra Software Libraries</H3>
+
+Browse the <A HREF="http://www.netlib.org">Netlib software repository</A>
+or  the <A HREF="http://www.nhse.org">National HPCC Software Exchange</A>
+to find a large collection of freely available linear algebra libraries.
+<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/main.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/main.jpg
new file mode 100755
index 000000000..df62edd33
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/main.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/mat2.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/mat2.jpg
new file mode 100755
index 000000000..25afdc44c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/mat2.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/pfact.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/pfact.jpg
new file mode 100755
index 000000000..33a7e55cb
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/pfact.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/references.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/references.html
new file mode 100755
index 000000000..95c6db176
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/references.html
@@ -0,0 +1,276 @@
+<HTML>
+<HEAD>
+<TITLE>HPL References</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL References</H2>
+
+<STRONG>
+The list of references below contains some relevant published material
+to this work.  This list  is  provided  for illustrative purposes, and
+should be regarded  as an initial  starting point  for the  interested
+reader. This list is by all means not meant to be exhaustive.
+</STRONG><BR><BR>
+
+The references have been sorted in four categories and chronologically
+listed within each category. The four categories are
+<UL>
+<LI><A HREF="references.html#Linpack_Benchmark">Linpack Benchmark</A>
+<LI><A HREF="references.html#parallel_LUfact">Parallel  LU Factorization</A>
+<LI><A HREF="references.html#recursiv_LUfact">Recursive LU Factorization</A>
+<LI><A HREF="references.html#parallel_matmul">Parallel Matrix Multiply</A>
+<LI><A HREF="references.html#parallel_trsolv">Parallel Triangular Solve</A>
+</UL>
+<HR NOSHADE>
+
+<H3><A NAME="Linpack_Benchmark">Linpack Benchmark</A></H3>
+
+<UL>
+
+<! - 1979 ----------------------------------------------------------- !>
+<LI><I>LINPACK Users Guide</I>, J. Dongarra, J. Bunch, C. Moler and
+G. W. Stewart, SIAM, Philadelphia, PA, 1979.
+
+<! - 1989 ----------------------------------------------------------- !>
+<LI><I>Performance of Various Computers Using Standard Linear Equations
+Software</I>, J. Dongarra, Technical Report CS-89-85, University of 
+Tennessee, 1989. (An updated version of this report can be found at
+<A HREF="http://www.netlib.org/benchmark/performance.ps">
+http://www.netlib.org/benchmark/performance.ps</A>).
+
+<! - 1991 ----------------------------------------------------------- !>
+<LI><I>Towards Peak Parallel LINPACK Performance on 400</I>,
+R. Bisseling and L. Loyens, Supercomputer, Vol. 45, pp. 20-27, 1991.
+
+<LI><I>Massively Parallel LINPACK Benchmark on the Intel Touchstone 
+DELTA and iPSC/860 Systems</I>, R. van de Geijn, 1991 Annual Users
+Conference Proceedings. Intel Supercomputer Users Group, Dallas, TX,
+1991.
+
+<LI><I>The LINPACK Benchmark on the AP 1000</I>, R. Brent, Frontiers,
+1992, pp. 128-135, McLean, VA, 1992.
+
+<! - 1993 ----------------------------------------------------------- !>
+<LI><I>Implementation of BLAS Level 3 and LINPACK Benchmark on the
+AP1000</I>, R. Brent and P. Strazdins, Fujitsu Scientific and Technical
+Journal, Vol. 5, No. 1, pp. 61-70, 1993.
+
+<! - 1994 ----------------------------------------------------------- !>
+<LI><I>LU Factorization and the LINPACK Benchmark on the Intel
+Paragon</I>, D. Womble, D. Greenberg, D. Wheat and S. Riesen, Sandia
+Technical Report, 1994.
+
+<! - 1995 ----------------------------------------------------------- !>
+<LI><I>Massively Parallel Distributed Computing: Worlds First 281
+Gigaflop Supercomputer</I>, J. Bolen, A. Davis, B. Dazey, S. Gupta,
+G. Henry, D. Robboy, G. Schiffler, D. Scott, M. Stallcup, A. Taraghi,
+S. Wheat from Intel SSD, L. Fisk, G. Istrail, C. Jong, R. Riesen,
+L. Shuler, from Sandia National Laboratories, Proceedings of the Intel
+Supercomputer Users Group 1995.
+
+<! - 1997 ----------------------------------------------------------- !>
+<LI><I>High Performance Software on Intel Pentium Pro Processors or
+Micro-Ops to TeraFLOPS</I>, B. Greer and G. Henry, Proceedings of the
+SuperComputing 1997 Conference, ACM SIGARCH - IEEE Computer Society
+Press - ISBN: 0-89791-985-8, San Jose, CA, 1997.
+
+</UL>
+<! ------------------------------------------------------------------ !>
+<HR NOSHADE>
+
+<H3><A NAME="parallel_LUfact">Parallel LU Factorization</A></H3>
+
+<UL>
+
+<! - 1986 ----------------------------------------------------------- !>
+<LI><I>Communication Complexity of the Gaussian Elimination Algorithm
+on Multiprocessors</I>, Y. Saad, Linear Algebra and Its Applications,
+Vol. 77, pp. 315-340, 1986.
+
+<! - 1988 ----------------------------------------------------------- !>
+<LI><I>LU Factorization Algorithms on Distributed-Memory Multiprocessor
+Architectures</I>, G. Geist and C. Romine, SIAM Journal on Scientific
+and Statistical Computing, Vol. 9, pp. 639-649, 1988.
+ 
+<! - 1989 ----------------------------------------------------------- !>
+<LI><I>Parallel LU Decomposition on a Transputer Network</I>, 
+R. Bisseling and J. van der Vorst, Lecture Notes in Computer Sciences,
+Springer-Verlag, Eds. G. van Zee and J. van der Vorst, Vol. 384,
+pp. 61-77, 1989.
+
+<! - 1990 ----------------------------------------------------------- !>
+<LI><I>The Distributed Solution of Linear Systems Using the Torus-Wrap
+Data Mapping</I>, C. Ashcraft, ECA-TR-147, Boeing Computer Services,
+Seattle, WA, 1990.
+
+<LI><I>Experiments with Multicomputer LU-Decomposition</I>, E. van de
+Velde, Concurrency: Practice and Experience, Vol. 2, pp. 1-26, 1990.
+
+<! - 1991 ----------------------------------------------------------- !>
+<LI><I>A Taxonomy of Distributed Dense LU Factorization Methods</I>,
+C. Ashcraft, ECA-TR-161, Boeing Computer Services, Seattle, WA, 1991.
+
+<! - 1994 ----------------------------------------------------------- !>
+<LI><I>The Torus-Wrap Mapping for Dense Matrix Calculations on Massively
+Parallel Computers</I>, B. Hendrickson and D. Womble, SIAM Journal on
+Scientific and Statistical Computing, Vol. 15, pp. 1201-1226, 1994.
+
+<LI><I>Scalability Issues in the Design of a Library for Dense Linear
+Algebra</I>, J. Dongarra, R. van de Geijn and D. Walker, Journal of
+Parallel and Distributed Computing, Vol. 22, No. 3, pp. 523-537, 1994.
+
+<! - 1995 ----------------------------------------------------------- !>
+<LI><I>Matrix Factorization using Distributed Panels on the Fujitsu
+AP1000</I>, P. Strazdins, Proceedings of the IEEE First International
+Conference on Algorithms And Architectures for Parallel Processing
+ICA3PP-95, Brisbane, 1995.
+
+<! - 1996 ----------------------------------------------------------- !>
+<LI><I>The Design and Implementation of the ScaLAPACK LU, QR, and
+Cholesky Factorization Routines</I>, J. Choi, J. Dongarra, S. Ostrouchov,
+A. Petitet, D. Walker and R. C. Whaley, Scientific Programming, Vol. 5,
+pp. 173-184, 1996.
+
+</UL>
+<! ------------------------------------------------------------------ !>
+<HR NOSHADE>
+
+<H3><A NAME="recursiv_LUfact">Recursive LU Factorization</A></H3>
+
+<UL>
+
+<! - 1997 ----------------------------------------------------------- !>
+<LI><I>Locality of Reference in LU Decomposition with partial
+pivoting</I>, S. Toledo, SIAM Journal on Matrix. Anal. Appl., Vol. 18,
+No. 4, 1997.
+
+<LI><I>Recursion Leads to Automatic Variable Blocking for Dense 
+Linear-Algebra Algorithms</I>, F. Gustavson, IBM Journal of Research
+and Development, Vol. 41, No. 6, pp. 737-755, 1997
+
+</UL>
+<! ------------------------------------------------------------------ !>
+<HR NOSHADE>
+
+<H3><A NAME="parallel_matmul">Parallel Matrix Multiply</A></H3>
+
+<UL>
+
+<! - 1990 ----------------------------------------------------------- !>
+<LI><I>Matrix Algorithms on a Hypercube I: Matrix Multiplication</I>,
+G. Fox, S. Otto and A. Hey, Parallel Computing, Vol. 3, pp. 17-31, 1987.
+
+<! - 1990 ----------------------------------------------------------- !>
+<LI><I>Basic Matrix Subprograms for Distributed-Memory Systems</I>,
+A. Elster, Proceedings of the Fifth Distributed-Memory Computing
+Conference, Eds. D. Walker and Q. Stout, IEEE Press, pp. 311-316, 1990.
+ 
+<! - 1991 ----------------------------------------------------------- !>
+<LI><I>The Parallelization of Level 2 and 3 BLAS Operations on
+Distributed-Memory Machines</I>, M. Aboelaze, N. Chrisochoides
+and E. Houstis, CSD-TR-91-007, Purdue University, West Lafayette,
+IN, 1991.
+
+<! - 1992 ----------------------------------------------------------- !>
+<LI><I>The Multicomputer Toolbox Approach to Concurrent BLAS and LACS</I>,
+R. Falgout, A. Skjellum, S. Smith and C. Still, Proceedings of the
+Scalable High Performance Computing Conference SHPCC-92, IEEE Computer
+Society Press, 1992.
+
+<! - 1994 ----------------------------------------------------------- !>
+<LI><I>A High Performance Matrix Multiplication Algorithm on a
+Distributed-Memory Parallel Computer, Using Overlapped Communication</I>,
+R. Agarwal, F. Gustavson and M. Zubair, IBM Journal or Research and
+Development, Vol. 38, No. 6, pp. 673-681, 1994.
+
+<LI><I>PUMMA: Parallel Universal Matrix Multiplication Algorithms on
+Distributed-Memory Concurrent Computers</I>, J. Choi, J. Dongarra and
+D. Walker, Concurrency: Practice and Experience, Vol. 6, No. 7,
+pp. 543-570, 1994.
+
+<LI><I>Matrix Multiplication on the Intel Touchstone DELTA</I>,
+S. Huss-Lederman, E. Jacobson, A. Tsao and G. Zhang, Concurrency:
+Practice and Experience, Vol. 6, No. 7, pp. 571-594, 1994.
+ 
+<! - 1995 ----------------------------------------------------------- !>
+<LI><I>A Three-Dimensional Approach to Parallel Matrix Multiplication</I>,
+R. Agarwal, S. Balle, F. Gustavson, M. Joshi and P. Palkar, IBM Journal
+or Research and Development, Vol. 39, No. 5, pp. 575-582, 1995.
+
+<! - 1995 ----------------------------------------------------------- !>
+<LI><I>A High Performance Parallel Strassen Implementation</I>,
+B. Grayson and R. van de Geijn, Parallel Processing Letters, Vol. 6,
+No. 1, pp. 3-12, 1996.
+
+<! - 1997 ----------------------------------------------------------- !>
+<LI><I>Parallel Implementation of BLAS: General Techniques for Level
+3 BLAS</I>, A. Chtchelkanova, J. Gunnels, G. Morrow, J. Overfelt and
+R. van de Geijn, Concurrency: Practice and Experience, Vol. 9, No. 9,
+pp. 837-857, 1997.
+
+<LI><I>A Poly-Algorithm for Parallel Dense Matrix Multiplication on
+Two-Dimensional Process Grid Topologies</I>, J. Li, R. Falgout and
+A. Skjellum, Concurrency: Practice and Experience, Vol. 9, No. 5,
+pp. 345-389, 1997.
+
+<LI><I>SUMMA: Scalable Universal Matrix Multiplication Algorithm</I>,
+R. van de Geijn and J. Watts, Concurrency: Practice and Experience,
+Vol. 9, No. 4, pp. 255-274, 1997.
+
+</UL>
+<! ------------------------------------------------------------------ !>
+<HR NOSHADE>
+
+<H3><A NAME="parallel_trsolv">Parallel Triangular Solve</A></H3>
+
+<UL>
+ 
+<! - 1988 ----------------------------------------------------------- !>
+<LI><I>Parallel Solution Triangular Systems on Distributed-Memory
+Multiprocessors</I>, M. Heath and C. Romine, SIAM Journal on Scientific
+and Statistical Computing, Vol. 9, pp. 558-588, 1988.
+
+<LI><I>A Parallel Triangular Solver for a Distributed-Memory
+Multiprocessor</I>, G. Li and T. Coleman, SIAM Journal on Scientific
+and Statistical Computing, Vol. 9, No. 3, pp. 485-502, 1988.
+
+<! - 1989 ----------------------------------------------------------- !>
+<LI><I>A New Method for Solving Triangular Systems on Distributed-Memory
+Message-Passing Multiprocessor</I>, G. Li and T. Coleman, SIAM Journal
+on Scientific and Statistical Computing, Vol. 10, No. 2, pp. 382-396,
+1989.
+
+<! - 1991 ----------------------------------------------------------- !>
+<LI><I>Parallel Triangular System Solving on a Mesh Network of
+Transputers</I>, R. Bisseling and J. van der Vorst, SIAM Journal
+on Scientific and Statistical Computing, Vol. 12, pp. 787-799, 1991.
+
+</UL>
+<! ------------------------------------------------------------------ !>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/results.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/results.html
new file mode 100755
index 000000000..9a7d8b8af
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/results.html
@@ -0,0 +1,243 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Results</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR><TD ALIGN=LEFT VALIGN=LEFT>
+<IMG SRC    = "aprunner.gif" BORDER=0 HEIGHT=160 WIDTH=220>
+</TD>
+<TD ALIGN=LEFT VALIGN=LEFT>
+<H2>HPL Performance Results</H2>
+
+<STRONG>
+The performance achieved by this software package  on a few machine
+configurations is shown below.  These results are only provided for
+illustrative  purposes.  By the time you read this,  those  systems
+have changed,  they may not even exist anymore  and  one can surely
+not exactly reproduce  the state  in which these machines were when
+those measurements have been obtained.  To obtain  accurate figures
+on your system, it is absolutely necessary to
+<A HREF = "software.html">download the software</A> and run it there.
+</STRONG>
+</TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "results.html#AMD_K7000">Athlon 4-nodes cluster</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "results.html#I550p3000">Intel PIII 8-duals cluster</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "results.html#compaq000">Compaq 64 nodes AlphaServer SC</A>
+</UL></TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<H3><A NAME="AMD_K7000">4 AMD Athlon K7 500 Mhz (256 Mb) - (2x) 100 Mbs
+Switched - 2 NICs per node (channel bonding)</A></H3>
+
+<CENTER>
+<TABLE BORDER>
+<TR><TD>OS         </TD><TD>Linux 6.2 RedHat (Kernel 2.2.14)       </TD></TR>
+<TR><TD>C compiler </TD><TD>gcc (egcs-2.91.66 egcs-1.1.2 release)  </TD></TR>
+<TR><TD>C flags    </TD><TD>-fomit-frame-pointer -O3 -funroll-loops</TD></TR>
+<TR><TD>MPI        </TD><TD>MPIch 1.2.1                            </TD></TR>
+<TR><TD>BLAS       </TD><TD>ATLAS (Version 3.0 beta)               </TD></TR>
+<TR><TD>Comments   </TD><TD>09 / 00                                </TD></TR>
+</TABLE><P>
+
+<TABLE BORDER>
+<TR>
+<TH ALIGN=CENTER> GRID</TH>
+<TH ALIGN=CENTER> 2000</TH>
+<TH ALIGN=CENTER> 5000</TH>
+<TH ALIGN=CENTER> 8000</TH>
+<TH ALIGN=CENTER>10000</TH>
+</TR>
+<TR>
+<TH ALIGN=CENTER>1 x 4</TH>
+<TD ALIGN=CENTER> 1.28</TD>
+<TD ALIGN=CENTER> 1.73</TD>
+<TD ALIGN=CENTER> 1.89</TD>
+<TD ALIGN=CENTER> 1.95</TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>2 x 2</TH>
+<TD ALIGN=CENTER> 1.17</TD>
+<TD ALIGN=CENTER> 1.68</TD>
+<TD ALIGN=CENTER> 1.88</TD>
+<TD ALIGN=CENTER> 1.93</TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>4 x 1</TH>
+<TD ALIGN=CENTER> 0.81</TD>
+<TD ALIGN=CENTER> 1.43</TD>
+<TD ALIGN=CENTER> 1.70</TD>
+<TD ALIGN=CENTER> 1.80</TD>
+</TR>
+Performance (Gflops) w.r.t Problem size on 4 nodes.
+</TABLE><P>
+</CENTER>
+
+<HR NOSHADE>
+<H3><A NAME="I550p3000">8 Duals Intel PIII 550 Mhz (512 Mb) - Myrinet</A></H3>
+
+<CENTER>
+<TABLE BORDER>
+<TR><TD>OS         </TD><TD>Linux 6.1 RedHat (Kernel 2.2.15)       </TD></TR>
+<TR><TD>C compiler </TD><TD>gcc (egcs-2.91.66 egcs-1.1.2 release)  </TD></TR>
+<TR><TD>C flags    </TD><TD>-fomit-frame-pointer -O3 -funroll-loops</TD></TR>
+<TR><TD>MPI        </TD><TD>MPI GM (Version 1.2.3)                 </TD></TR>
+<TR><TD>BLAS       </TD><TD>ATLAS (Version 3.0 beta)               </TD></TR>
+<TR><TD>Comments   </TD>
+<TD><A HREF="http://icl.cs.utk.edu">UTK / ICL</A> - Torc cluster - 09 / 00</TD>
+</TR>
+</TABLE><P>
+
+<TABLE BORDER>
+<TR>
+<TH ALIGN=CENTER> GRID</TH>
+<TH ALIGN=CENTER> 2000</TH>
+<TH ALIGN=CENTER> 5000</TH>
+<TH ALIGN=CENTER> 8000</TH>
+<TH ALIGN=CENTER>10000</TH>
+<TH ALIGN=CENTER>15000</TH>
+<TH ALIGN=CENTER>20000</TH>
+</TR>
+<TR>
+<TH ALIGN=CENTER>2 x 4</TH>
+<TD ALIGN=CENTER> 1.76</TD>
+<TD ALIGN=CENTER> 2.32</TD>
+<TD ALIGN=CENTER> 2.51</TD>
+<TD ALIGN=CENTER> 2.58</TD>
+<TD ALIGN=CENTER> 2.72</TD>
+<TD ALIGN=CENTER> 2.73</TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>4 x 4</TH>
+<TD ALIGN=CENTER> 2.27</TD>
+<TD ALIGN=CENTER> 3.94</TD>
+<TD ALIGN=CENTER> 4.46</TD>
+<TD ALIGN=CENTER> 4.68</TD>
+<TD ALIGN=CENTER> 5.00</TD>
+<TD ALIGN=CENTER> 5.16</TD>
+</TR>
+Performance (Gflops) w.r.t Problem size on 8- and 16-processors grids.
+</TABLE><P>
+</CENTER>
+
+<HR NOSHADE>
+<H3><A NAME="compaq000">Compaq 64 nodes (4 ev67 667 Mhz processors per node)
+AlphaServer SC</A></H3>
+
+<CENTER>
+<TABLE BORDER>
+<TR><TD>OS         </TD><TD>Tru64 Version 5               </TD></TR>
+<TR><TD>C compiler </TD><TD>cc Version 6.1                </TD></TR>
+<TR><TD>C flags    </TD><TD>-arch host -tune host -std -O5</TD></TR>
+<TR><TD>MPI        </TD><TD>-lmpi -lelan                  </TD></TR>
+<TR><TD>BLAS       </TD><TD>CXML                          </TD></TR>
+<TR><TD>Comments   </TD>
+<TD><A HREF = "http://www.nccs.gov/">ORNL / NCCS</A>
+ - falcon - 09 / 00</TD></TR>
+</TABLE><P>
+</CENTER>
+
+In the table below, each row corresponds to a given number of cpus (or
+processors) and nodes.  The first row for example is denoted by 1 / 1,
+i.e.,  1 cpu / 1 node.  Rmax is given in Gflops, and the value of Nmax
+in fact corresponds to  351 Mb per cpu for all machine configurations.<BR><BR>
+
+<CENTER>
+<TABLE BORDER>
+<TR>
+<TH ALIGN=CENTER>    CPUS / NODES     </TH>
+<TH ALIGN=CENTER>       GRID          </TH>
+<TH ALIGN=CENTER>      N 1/2          </TH>
+<TH ALIGN=CENTER>       Nmax          </TH>
+<TH ALIGN=CENTER>    Rmax (Gflops)    </TH>
+<TH ALIGN=CENTER> Parallel Efficiency </TH>
+</TR>
+<TR>
+<TH ALIGN=CENTER>   1 / 1    </TH>
+<TH ALIGN=CENTER>   1 x 1    </TH>
+<TD ALIGN=CENTER>     150    </TD>
+<TD ALIGN=CENTER>    6625    </TD>
+<TD ALIGN=CENTER>   1.136    </TD>
+<TD ALIGN=CENTER>   1.000    </TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>   4 / 1    </TH>
+<TH ALIGN=CENTER>   2 x 2    </TH>
+<TD ALIGN=CENTER>     800    </TD>
+<TD ALIGN=CENTER>   13250    </TD>
+<TD ALIGN=CENTER>   4.360    </TD>
+<TD ALIGN=CENTER>   0.960    </TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>  16 / 4    </TH>
+<TH ALIGN=CENTER>   4 x 4    </TH>
+<TD ALIGN=CENTER>    2300    </TD>
+<TD ALIGN=CENTER>   26500    </TD>
+<TD ALIGN=CENTER>   17.00    </TD>
+<TD ALIGN=CENTER>   0.935    </TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>  64 / 16   </TH>
+<TH ALIGN=CENTER>   8 x 8    </TH>
+<TD ALIGN=CENTER>    5700    </TD>
+<TD ALIGN=CENTER>   53000    </TD>
+<TD ALIGN=CENTER>   67.50    </TD>
+<TD ALIGN=CENTER>   0.928    </TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER> 256 / 64   </TH>
+<TH ALIGN=CENTER>  16 x 16   </TH>
+<TD ALIGN=CENTER>   14000    </TD>
+<TD ALIGN=CENTER>  106000    </TD>
+<TD ALIGN=CENTER>   263.6    </TD>
+<TD ALIGN=CENTER>   0.906    </TD>
+</TR>
+</TABLE><P>
+</CENTER> 
+For Rmax shown in the table, the  parallel efficiency  per  cpu has been
+computed using the performance achieved by  HPL on 1 cpu.  That is fair,
+since the CXML matrix multiply routine was achieving at best 1.24 Gflops
+for large matrix operands on one cpu, it would have been difficult for a
+sequential  Linpack  benchmark  implementation to achieve much more than
+1.136 Gflops on this same cpu. For constant load (as in the table 351 Mb
+per cpu for Nmax),  HPL  scales almost linearly as it should.
+
+<BR><BR>
+The authors acknowledge the use  of the Oak Ridge National Laboratory
+Compaq computer, funded by the Department of Energy's Office
+of Science and Energy Efficiency programs.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/roll.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/roll.jpg
new file mode 100755
index 000000000..88d2c56af
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/roll.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/rollM.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/rollM.jpg
new file mode 100755
index 000000000..0d7f076fd
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/rollM.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/scalability.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/scalability.html
new file mode 100755
index 000000000..00bb1a27e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/scalability.html
@@ -0,0 +1,200 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Scalability Analysis</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Scalability Analysis</H2>
+
+The <A HREF = "scalability.html#model">machine model</A> used for the
+analysis is first described.  This crude model is then used  to first
+estimate  the  parallel running time  of  the various phases  of  the 
+algorithm namely
+<UL>
+<LI><A HREF="scalability.html#pfact">panel factorization and broadcast</A>,
+<LI><A HREF="scalability.html#updat">trailing submatrix update</A>, 
+<LI><A HREF="scalability.html#backs">backward substitution</A>. 
+</UL>
+Finally <A HREF="scalability.html#total">the  parallel efficiency</A>
+of the entire algorithm is estimated according to this machine model.
+We show that for a given set of parameters HPL is <STRONG>scalable</STRONG>
+not  only  with respect to the amount of computation,  but  also with
+respect to the communication volume.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME = "model">The Machine Model</A></H3>
+
+Distributed-memory computers consist of processors that are connected
+using  a message passing interconnection network.  Each processor has
+its own memory called the local memory,  which  is accessible only to
+that processor.  As the time to access a remote memory is longer than
+the time to access a local one,  such computers are often referred to
+as Non-Uniform Memory Access (NUMA) machines.<BR><BR>
+
+The interconnection network  of our machine model is static,  meaning
+that   it   consists  of  point-to-point  communication  links  among
+processors.  This  type  of  network  is also referred to as a direct
+network as opposed to dynamic networks.  The  latter  are constructed 
+from switches and communication links.  These links  are  dynamically
+connected  to one another by the switching elements to establish,  at
+run time, the paths between processors memories.<BR><BR>
+ 
+The  interconnection  network  of the two-dimensional  machine  model
+considered here is a static,  fully  connected physical topology.  It
+is also assumed  that  processors  can be treated  equally  in  terms
+of  local performance  and  that  the  communication rate between two
+processors depends on the processors considered.<BR><BR>
+
+Our model assumes  that  a processor can send or receive data on only
+one of its communication ports at a time  (assuming  it has more than
+one). In the literature,  this  assumption is also referred to as the
+one-port communication model.<BR><BR>
+ 
+The time spent to communicate  a message between two given processors
+is called the communication time Tc.   In  our machine model,  Tc  is
+approximated  by  a  linear  function  of  the  number  L  of  double
+precision (64-bits) items communicated.  Tc is the sum of the time to
+prepare the message for transmission (alpha) and the time  (beta * L)
+taken  by the message of length  L  to traverse  the network  to  its 
+destination, i.e.,<BR><BR>
+<CENTER>
+Tc = alpha + beta L.<BR><BR>
+</CENTER>
+
+Finally,   the   model  assumes  that  the  communication  links  are
+bi-directional,  that is,  the time  for two processors  to send each 
+other a message of length L is also Tc.  A processor  can send and/or
+receive  a message on only one of  its communication links at a time.
+In particular, a processor can send a message while receiving another
+message from the processor it is sending to at the same time.<BR><BR>
+ 
+Since this document is only concerned with regular local dense linear
+algebra  operations,  the time taken to perform  one  floating  point 
+operation  is  assumed  to  be  summarized by  three constants  gam1, 
+gam2 and gam3. These quantitites are flop rates approximations of the
+vector-vector,  matrix-vector  and matrix-matrix operations for  each
+processor.  This  very  crude approximation summarizes all  the steps
+performed  by a processor  to achieve such a computation.  Obviously,
+such a model neglects all the phenomena  occurring  in  the processor
+components,  such as cache misses, pipeline startups, memory load  or
+store, floating point arithmetic and so on,  that  may  influence the
+value  of  these  constants  as  a function  of the  problem size for
+example.<BR><BR>
+ 
+Similarly,  the model  does  not make any assumption on the amount of
+physical memory per node.  It  is  assumed that if a process has been
+spawn  on  a processor,  one  has  ensured  that  enough  memory  was 
+available  on that processor. In other words, swapping will not occur
+during the modeled computation.<BR><BR>
+ 
+<STRONG>
+This  machine  model  is  a very crude approximation that is designed
+specifically  to  illustrate  the cost of the dominant factors of our
+particular case.<BR><BR>
+</STRONG>
+<HR NOSHADE>
+
+<H3><A NAME="pfact">Panel Factorization and Broadcast</A></H3>
+
+Let  consider  an  M-by-N  panel distributed over a P-process column.
+Because  of the recursive formulation of the panel factorization,  it
+is  reasonable to consider  that  the floating point operations  will
+be performed at matrix-matrix multiply "speed".  For  every column in
+the panel a binary-exchange is performed on 2*N data items. When this
+panel is broadcast,  what  matters  is the time that the next process
+column  will  spend  in this  communication operation.  Assuming  one
+chooses the <A HREF="algorithm.html#bcast">increasing-ring (modified)
+variant</A>,  only  one  message needs to be taken into account.  The
+execution  time  of the panel factorization and broadcast can thus be
+approximated by:<BR><BR>
+<CENTER>
+Tpfact( M, N ) = (M/P - N/3) N^2 gam3 + N log(P)( alpha + beta 2 N ) +
+alpha + beta M N / P.<BR><BR>
+</CENTER>
+<HR NOSHADE>
+
+<H3><A NAME="updat">Trailing Submatrix Update</A></H3>
+
+Let  consider  the  update  phase  of an  N-by-N  trailing  submatrix
+distributed on a P-by-Q process grid.  From  a computational point of
+view one has to (triangular) solve N right-hand-sides  and  perform a 
+local rank-NB update of this trailing submatrix. Assuming one chooses
+the <A HREF="algorithm.html#update">long variant</A>,  the  execution
+time of the update operation can be approximated by:<BR><BR>
+<CENTER>
+Tupdate( N, NB ) = gam3 ( N NB^2 / Q + 2 N^2 NB / ( P Q ) ) +
+alpha ( log( P ) + P - 1 ) + 3 beta N NB / Q.<BR><BR>
+</CENTER>
+The constant "3" in front of the "beta" term is obtained  by counting
+one for the (logarithmic) spread phase and two for the rolling phase;
+In the case of bi-directional links  this constant 3 should therefore
+be only a 2.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="backs">Backward Substitution</A></H3>
+
+The number of floating point operations performed during the backward
+substitution in given by  N^2 / (P*Q).  Because of the lookahead, the
+communication cost  can be approximated at each step  by two messages
+of length NB, i.e.,  the time  to  communicate  the NB-piece  of  the 
+solution vector from one diagonal block of the matrix to another.  It
+follows that the execution time of the backward substitution  can  be
+approximated by:<BR><BR>
+<CENTER>
+Tbacks( N, NB ) = gam2 N^2  / (P Q) + N ( alpha / NB + 2 beta ).<BR><BR>
+</CENTER>
+<HR NOSHADE>
+
+<H3><A NAME="total">Putting it All Together</A></H3>
+
+The total execution time of the algorithm described above is given by<BR><BR>
+<CENTER>
+Sum(k=0,N,NB)[Tpfact( N-k, NB ) + Tupdate( N-k-NB, NB )] +
+Tbacks( N, NB ).<BR><BR>
+</CENTER>
+That is, by only considering only the dominant term in alpha, beta and
+gam3:<BR><BR>
+<CENTER>
+Thpl = 2 gam3 N^3  / ( 3 P Q ) + beta N^2 (3 P + Q) / ( 2 P Q ) +
+alpha N ((NB + 1) log(P) + P) / NB.<BR><BR>
+</CENTER>
+The serial execution time is given by Tser = 2 gam3 N^3  / 3. If we
+define the parallel efficiency  E  as the ratio  Tser / ( P Q Thpl ), we
+obtain:<BR><BR>
+<CENTER>
+E = 1 / ( 1 + 3 beta (3 P + Q) / ( 4 gam3 N ) +
+3 alpha P Q ((NB + 1) log(P) + P) / (2 N^2 NB gam3) ).<BR><BR>
+</CENTER>
+This  last equality  shows  that when the memory usage per  processor
+N^2 / (P Q)  is maintained  constant, the parallel efficiency  slowly
+decreases  only  because of the alpha term.  The communication volume
+(the beta term) however remains constant.  Due to these results,  HPL
+is said to be <STRONG>scalable</STRONG> not only with respect  to the
+amount of computation,  but also  with  respect  to the communication
+volume.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/software.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/software.html
new file mode 100755
index 000000000..34d82b2b7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/software.html
@@ -0,0 +1,109 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Software</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Software</H2>
+
+<H3>Download and Installation</H3>
+
+<OL>
+<LI>Download    the  <A HREF="hpl-2.3.tar.gz">tar-gzipped  file</A>,
+issue  then "gunzip hpl-2.3.tar.gz; tar -xvf hpl-2.3.tar"  and  this
+should create an  hpl-2.3  directory  containing  the  distribution.
+We call this directory the top level directory.
+
+<LI>Create a  file  Make.&#60arch&#62  in  the  top-level directory.
+For  this purpose,  you  may  want  to  re-use  one contained in the 
+setup  directory.  This Make.&#60arch&#62 file  essentially contains
+the compilers, libraries, and their paths to be used on your system. 
+
+<LI>Type  "make arch=&#60arch&#62". This should create an executable
+in the bin/&#60arch&#62 directory called xhpl.  For example,  on our
+Linux  PII  cluster,  I create  a file called Make.Linux_PII in  the
+top-level  directory.  Then,  I  type  "make  arch=Linux_PII".  This
+creates  the executable file bin/Linux_PII/xhpl. 
+
+<LI>Quick check:  run  a few  tests  (assuming  you have 4 nodes for
+interactive use)  by  issuing  the  following  commands from the top
+level  directory:  "cd bin/&#60arch&#62 ;  mpirun -np 4 xhpl".  This
+should produce quite a bit of meaningful output on the screen.
+ 
+<LI>Most  of  the  performance parameters can be tuned, by modifying
+the input file bin/&#60arch&#62/HPL.dat. See the
+<A HREF = "tuning.html">tuning page</A>  or  the  TUNING file in the
+top-level directory.
+</OL>
+<HR NOSHADE>
+
+<H3>Compile Time Options</H3>
+
+At  the  end  of  the "model" Make.&#60arch&#62,  the  user is given
+the  opportunity  to override  some default  compile options of this
+software. The list of these options and their meaning is:<BR><BR>
+
+<CENTER>
+<TABLE WIDTH=80% BORDER>
+<TR><TD ALIGN=LEFT><STRONG>-DHPL_COPY_L</STRONG></TD>
+<TD ALIGN=LEFT>force the copy of the panel L before bcast</TD></TR>
+<TR><TD ALIGN=LEFT><STRONG>-DHPL_CALL_CBLAS</STRONG></TD>
+<TD ALIGN=LEFT>call the BLAS C interface</TD></TR>
+<TR><TD ALIGN=LEFT><STRONG>-DHPL_CALL_VSIPL</STRONG></TD>
+<TD ALIGN=LEFT>call the vsip library</TD></TR>
+<TR><TD ALIGN=LEFT><STRONG>-DHPL_DETAILED_TIMING</STRONG></TD>
+<TD ALIGN=LEFT>enable detailed timers</TD></TR>
+</TABLE><P>
+</CENTER>
+
+The user must choose between either the  BLAS  Fortran 77 interface,
+or the BLAS C interface, or the  VSIPL  library depending  on  which
+computational kernels are available on his system. Only one of these
+options should be selected.  If  you  choose  the  BLAS  Fortran  77
+interface,  it is necessary  to fill out  the machine-specific  C to
+Fortran 77 interface section of the  Make.&#60arch&#62  file.  To do
+this,  please  refer  to the Make.&#60arch&#62 examples contained in
+the setup directory.<BR><BR>
+
+By default HPL will:
+<UL>
+<LI>not copy L before broadcast,
+<LI>call the BLAS Fortran 77 interface,
+<LI>not display detailed timing information.
+</UL>
+
+As an example,  suppose one wants this software to copy the panel of
+columns  into  a contiguous buffer  before broadcasting.  It  should
+be  more efficient  to let  the software create the appropriate  MPI
+user-defined data type  since this may avoid the data copy.  So,  it
+is a strange idea,  but one insists.  To achieve this  one would add
+-DHPL_COPY_L  to  the definition of  HPL_OPTS at the end of the file
+Make.&#60arch&#62.  Issue   then  a  "make clean arch=&#60arch&#62 ; 
+make build arch=&#60arch&#62"  and  the executable  will be re-build
+with that feature in.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/spread.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/spread.jpg
new file mode 100755
index 000000000..56c255a3f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/spread.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/spreadM.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/spreadM.jpg
new file mode 100755
index 000000000..433e4c077
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/spreadM.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/tuning.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/tuning.html
new file mode 100755
index 000000000..fbbf17fb7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/tuning.html
@@ -0,0 +1,476 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Tuning</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Tuning</H2>
+
+After  having built the executable hpl/bin/&#60arch&#62/xhpl,
+one may want to modify the input data file HPL.dat. This file
+should  reside  in  the  same  directory  as  the  executable
+hpl/bin/&#60arch&#62/xhpl.   An example   HPL.dat   file   is 
+provided by default. This file contains information about the
+problem sizes, machine configuration,  and algorithm features
+to be used by the executable.  It is  31  lines long. All the
+selected  parameters  will be printed in the output generated
+by the executable.<BR><BR>
+
+We first describe the meaning of each line of this input file
+below.  Finally,  <A HREF="tuning.html#tips">a   few   useful 
+experimental guide lines</A>  to set up the file are given at
+the end of this page.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="desc">Description of the HPL.dat File</A></H3>
+
+<STRONG>Line 1</STRONG>:  (unused) Typically  one  would  use
+this line for its own good.  For example,  it  could  be used
+to summarize the content of the input file.  By  default this 
+line reads:
+<TT><PRE>
+HPL Linpack benchmark input file
+</PRE></TT>
+ 
+<HR NOSHADE>
+<STRONG>Line 2</STRONG>:  (unused) same as line 1. By default
+this line reads:
+<TT><PRE>
+Innovative Computing Laboratory, University of Tennessee
+</PRE></TT>
+ 
+<HR NOSHADE>
+<STRONG>Line 3</STRONG>:  the  user  can   choose  where  the
+output  should  be  redirected to.  In the case of a file,  a
+name  is necessary, and this is  the line  where one wants to 
+specify it.  Only the first name on this line is significant.
+By default, the line reads:
+<TT><PRE>
+HPL.out  output file name (if any)
+</PRE></TT>
+ 
+This  means  that if  one chooses to redirect the output to a
+file, the file will be called "HPL.out". The rest of the line
+is unused,  and this space to put some informative comment on
+the meaning of this line.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 4</STRONG>: This line specifies where the output
+should go.  The  line  is  formatted,  it  must  begin with a 
+positive integer,  the rest is unsignificant. 3  choices  are
+possible  for  the  positive integer, 6 means that the output
+will go the standard output,  7  means  that the  output will
+go to the standard error.  Any  other integer means that  the
+output should be redirected to a file,  which  name has  been
+specified  in the line above. This line by default reads:
+<TT><PRE>
+6        device out (6=stdout,7=stderr,file)
+</PRE></TT>
+which  means  that  the  output generated  by  the executable
+should be redirected to the standard output.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 5</STRONG>: This  line  specifies  the number of
+problem sizes to be executed. This number should be less than
+or equal to 20.  The first  integer is significant,  the rest
+is ignored. If the line reads:
+<TT><PRE>
+3        # of problems sizes (N)
+</PRE></TT>
+this  means  that  the user is willing to run 3 problem sizes
+that will be specified in the next line.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 6</STRONG>: This line specifies the problem sizes
+one wants to run.  Assuming  the  line  above  started with 3,
+the  3  first positive  integers  are significant, the rest is
+ignored. For example:
+<TT><PRE>
+3000 6000 10000    Ns
+</PRE></TT>
+means that one wants xhpl to run 3 (specified in line 5)
+problem sizes, namely 3000, 6000 and 10000.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 7</STRONG>: This line  specifies  the number  of
+block sizes to be runned. This number should be less than  or
+equal to 20.  The first integer  is significant,  the rest is
+ignored. If the line reads:
+<TT><PRE>
+5        # of NBs
+</PRE></TT>
+this means that the user is willing to use 5 block sizes that
+will be specified in the next line.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 8</STRONG>:  This line specifies the block sizes
+one  wants  to run.  Assuming  the  line above started with 5,
+the  5  first positive integers  are  significant, the rest is 
+ignored. For example:
+<TT><PRE>
+80 100 120 140 160 NBs
+</PRE></TT>
+means  that  one  wants  xhpl  to use 5 (specified in line 7)
+block sizes, namely 80, 100, 120, 140 and 160.<BR><BR>
+
+<HR NOSHADE>
+<STRONG>Line 9</STRONG>:  This  line specifies  how  the  MPI
+processes  should be mapped  onto the nodes of your platform.
+There are currently two possible mappings,  namely  row-  and
+column-major. This feature is mainly useful  when these nodes
+are themselves multi-processor computers. A row-major mapping
+is recommended.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 10</STRONG>: This line specifies  the  number of
+process grid to be runned.  This  number  should be less than
+or equal to 20. The first integer is significant, the rest is
+ignored. If the line reads:
+<TT><PRE>
+2        # of process grids (P x Q)
+</PRE></TT>
+this  means  that you are willing to try 2 process grid sizes 
+that will be specified in the next line.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 11-12</STRONG>:  These  two  lines  specify  the  
+number of process rows  and  columns of each grid you want to
+run on.  Assuming the line above (10)  started with 2,  the 2
+first  positive integers of those two lines  are significant,
+the rest  is ignored. For example:
+<TT><PRE>
+1 2          Ps
+6 8          Qs
+</PRE></TT>
+means that one wants to run  xhpl  on  2  process grids (line
+10), namely 1-by-6 and 2-by-8. Note: In  this example,  it is
+required then  to  start  xhpl  on  at  least  16  nodes (max
+of Pi-by-Qi).  The runs on the two grids will be consecutive.
+If one was starting xhpl on more than 16 nodes, say 52,  only
+6 would be used for the first grid (1x6)  and  then 16  (2x8)
+would  be used for the second grid. The fact that you started
+the MPI job on 52 nodes, will not make  HPL  use all of them.
+In this example,  only 16 would be used.  If one wants to run 
+xhpl  with  52  processes  one needs  to specify a grid of 52
+processes, for example the following lines would do the job:
+<TT><PRE>
+4  2         Ps
+13 8         Qs
+</PRE></TT>
+ 
+<HR NOSHADE>
+<STRONG>Line 13</STRONG>: This line specifies  the  threshold
+to which the residuals should be compared with. The residuals
+should be or order 1, but are  in practice slightly less than
+this, typically 0.001.  This  line  is made of a real number,
+the rest is not significant. For example:
+<TT><PRE>
+16.0         threshold
+</PRE></TT>
+In practice,  a value of  16.0  will  cover  most cases.  For
+various reasons,  it  is possible  that some of the residuals
+become slightly larger, say for example 35.6.  xhpl will flag
+those runs  as  failed,  however  they  can be  considered as
+correct. A run should be considered as failed if the residual
+is a few order of magnitude bigger than 1 for example 10^6 or
+more. Note:  if one was  to specify  a threshold of  0.0, all
+tests  would be flagged  as failed, even though the answer is
+likely  to  be  correct.  It is allowed to specify a negative 
+value for this threshold,  in which case  the checks  will be 
+by-passed,  no matter what the threshold value is, as soon as
+it  is  negative.  This  feature  allows  to  save  time when 
+performing a lot of experiments,  say for instance during the
+tuning phase. Example:
+<TT><PRE>
+-16.0        threshold
+</PRE></TT>
+ 
+<HR NOSHADE>
+The remaning lines  allow  to specifies algorithmic features.
+xhpl  will  run  all  possible combinations of those for each
+problem  size,  block size, process grid combination. This is
+handy  when one looks for an "optimal" set of parameters.  To
+understand  a little bit better,  let  say  first a few words
+about  the algorithm implemented in HPL. Basically this is  a
+right-looking  version  with  row-partial pivoting. The panel
+factorization is matrix-matrix operation based and recursive,
+dividing the panel into  NDIV  subpanels  at each step.  This
+part  of  the   panel   factorization  is  denoted  below  by
+"recursive  panel  fact.  (RFACT)".  The recursion stops when
+the  current panel  is made of less  than or equal  to  NBMIN
+columns. At that point, xhpl uses a  matrix-vector  operation
+based  factorization  denoted   below  by  "PFACTs".  Classic
+recursion  would  then  use  NDIV=2,   NBMIN=1.   There   are
+essentially   3   numerically  equivalent  LU   factorization 
+algorithm  variants  (left-looking, Crout and right-looking).
+In HPL, one can choose  every one of those for the  RFACT, as
+well as the PFACT.  The following lines of HPL.dat allows you
+to set those parameters.<BR><BR>
+<STRONG>Lines 14-21: (Example 1)</STRONG>
+<TT><PRE>
+3       # of panel fact
+0 1 2   PFACTs (0=left, 1=Crout, 2=Right)
+4       # of recursive stopping criterium
+1 2 4 8 NBMINs (>= 1)
+3       # of panels in recursion
+2 3 4   NDIVs
+3       # of recursive panel fact.
+0 1 2   RFACTs (0=left, 1=Crout, 2=Right)
+</PRE></TT>
+ 
+This  example  would  try all variants of PFACT, 4 values for
+NBMIN,  namely 1, 2, 4 and 8,  3 values for NDIV namely 2,  3 
+and 4, and all variants for RFACT.<BR><BR>
+<STRONG>Lines 14-21: (Example 2)</STRONG>
+<TT><PRE>
+2       # of panel fact
+2 0     PFACTs (0=left, 1=Crout, 2=Right)
+2       # of recursive stopping criterium
+4 8     NBMINs (>= 1)
+1       # of panels in recursion
+2       NDIVs
+1       # of recursive panel fact.
+2       RFACTs (0=left, 1=Crout, 2=Right)
+</PRE></TT>
+This example  would  try  2  variants  of  PFACT namely right
+looking and left looking, 2 values for NBMIN, namely 4 and 8,
+1 value for NDIV namely 2, and one variant for RFACT.<BR><BR>
+ 
+<HR NOSHADE>
+In the  main loop  of the algorithm,  the  current  panel  of
+column  is broadcast  in process rows  using  a virtual  ring
+topology. HPL offers various choices and one most likely want
+to use the increasing ring modified encoded as 1. 3 and 4 are
+also good choices.<BR><BR>
+<STRONG>Lines 22-23: (Example 1)</STRONG>
+<TT><PRE>
+1       # of broadcast
+1       BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+</PRE></TT>
+This will cause HPL  to broadcast the current panel using the
+increasing ring modified topology.<BR><BR>
+<STRONG>Lines 22-23: (Example 2)</STRONG>
+<TT><PRE>
+2       # of broadcast
+0 4     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+</PRE></TT>
+This will cause  HPL to broadcast the current panel using the
+increasing   ring  virtual  topology  and  the  long  message
+algorithm.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Lines 24-25</STRONG> allow to specify  the look-ahead
+depth used by HPL.  A depth of 0  means  that  the next panel
+is  factorized  after  the  update  by  the  current panel is
+completely finished.   A  depth of  1  means  that  the  next
+panel  is  immediately  factorized  after being updated.  The 
+update  by  the  current panel is then finished. A depth of k
+means that the k next panels are factorized immediately after
+being updated.  The  update  by  the  current  panel  is then 
+finished.  It  turns out that a depth of 1  seems to give the
+best results,  but  may need a large problem size  before one
+can  see  the performance  gain. So use 1, if you do not know
+better,  otherwise  you  may want  to  try 0.  Look-ahead  of
+depths 3  and  larger  will  probably  not  give  you  better
+results.<BR><BR>
+<STRONG>Lines 24-25: (Example 1):</STRONG>
+<TT><PRE>
+1       # of lookahead depth
+1       DEPTHs (>=0)
+</PRE></TT>
+This will cause HPL to use a look-ahead of depth 1.<BR><BR>
+<STRONG>Lines 24-25: (Example 2):</STRONG>
+<TT><PRE>
+2       # of lookahead depth
+0 1     DEPTHs (>=0)
+</PRE></TT>
+This will cause HPL to use a look-ahead of depths 0 and 1.<BR><BR>
+
+<HR NOSHADE>
+<STRONG>Lines 26-27</STRONG>  allow  to  specify  the  swapping
+algorithm  used  by  HPL for  all tests.  There  are  currently
+two  swapping  algorithms   available,  one  based  on  "binary
+exchange"  and  the   other  one   based  on   a  "spread-roll"
+procedure  (also  called   "long"  below).  For  large  problem
+sizes, this last one is likely to be more efficient.  The  user
+can also choose to mix both variants, that is "binary-exchange"
+for a number of columns less  than a threshold value,  and then
+the  "spread-roll" algorithm.  This  threshold  value  is  then 
+specified on Line 27.<BR><BR>
+<STRONG>Lines 26-27: (Example 1):</STRONG>
+<TT><PRE>
+1       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold
+</PRE></TT>
+This  will  cause  HPL  to  use  the "long" or  "spread-roll" 
+swapping algorithm.  Note  that a threshold  is specified  in
+that example but not used by HPL.<BR><BR>
+<STRONG>Lines 26-27: (Example 2):</STRONG>
+<TT><PRE>
+2       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold
+</PRE></TT>
+This  will  cause  HPL  to  use  the "long" or  "spread-roll" 
+swapping algorithm  as  soon as there is more than 60 columns
+in the row panel. Otherwise, the "binary-exchange"  algorithm
+will be used instead.<BR><BR>
+
+<HR NOSHADE>
+<STRONG>Line 28</STRONG>  allows  to specify whether the upper
+triangle  of  the  panel  of  columns  should   be  stored  in
+no-transposed  or transposed form. Example:
+<TT><PRE>
+0            L1 in (0=transposed,1=no-transposed) form
+</PRE></TT>
+
+<HR NOSHADE>
+<STRONG>Line 29</STRONG> allows  to specify whether the panel 
+of rows  U  should be stored in  no-transposed  or transposed 
+form. Example:
+<TT><PRE>
+0            U  in (0=transposed,1=no-transposed) form
+</PRE></TT>
+
+<HR NOSHADE>
+<STRONG>Line 30</STRONG> enables / disables the equilibration 
+phase. This option  will not be used unless you selected 1 or
+2 in Line 26. Example:
+<TT><PRE>
+1            Equilibration (0=no,1=yes)
+</PRE></TT>
+
+<HR NOSHADE>
+<STRONG>Line 31</STRONG> allows  to  specify the alignment in
+memory for the memory  space  allocated  by  HPL.  On  modern
+machines, one probably wants to use  4,  8  or 16.  This  may 
+result in a tiny amount of memory wasted. Example:
+<TT><PRE>
+8       memory alignment in double (> 0)
+</PRE></TT>
+
+<HR NOSHADE>
+<H3><A NAME="tips">Guide Lines</A></H3>
+
+<OL>
+<LI>Figure  out  a  good block size  for  the matrix multiply
+routine.  The best method  is to try a few out. If you happen
+to know  the block size  used  by the matrix-matrix  multiply
+routine,  a  small  multiple of that block size will do fine.
+This particular topic is discussed in the
+<A HREF="faqs.html#blsize">FAQs</A> section.<BR><BR>
+
+<LI>The process mapping  should  not matter  if  the nodes of
+your platform are single processor computers.  If these nodes
+are multi-processors, a row-major mapping is recommended.<BR><BR>
+
+<LI>HPL likes "square" or slightly flat process grids. Unless
+you  are using  a very small process grid, stay away from the 
+1-by-Q and P-by-1 process grids. This particular topic is also
+discussed in the <A HREF="faqs.html#grid">FAQs</A> section.<BR><BR>
+
+<LI>Panel factorization  parameters:  a  good  start  are the
+following for the lines 14-21:
+<TT><PRE>
+1       # of panel fact
+1       PFACTs (0=left, 1=Crout, 2=Right)
+2       # of recursive stopping criterium
+4 8     NBMINs (>= 1)
+1       # of panels in recursion
+2       NDIVs
+1       # of recursive panel fact.
+2       RFACTs (0=left, 1=Crout, 2=Right)
+</PRE></TT>
+
+<LI>Broadcast parameters: at this time it is far from obvious
+to me what the best setting is,  so i would probably try them
+all.  If  I  had  to guess  I would probably  start  with the 
+following for the lines 22-23:
+<TT><PRE>
+2       # of broadcast
+1 3     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+</PRE></TT>
+The best broadcast  depends  on your problem size and harware
+performance. My take is that 4 or 5  may be  competitive  for
+machines  featuring  very  fast nodes  comparatively  to  the 
+network.<BR><BR>
+
+<LI>Look-ahead depth: as mentioned above 0 or 1 are likely to 
+be the best choices.  This also  depends  on the problem size
+and machine configuration, so I would try "no look-ahead (0)"
+and "look-ahead of depth 1 (1)". That is for lines 24-25:
+<TT><PRE>
+2       # of lookahead depth
+0 1     DEPTHs (>=0)
+</PRE></TT>
+
+<LI>Swapping: one  can select only one of the three algorithm 
+in the input file. Theoretically, mix (2) should win, however
+long (1) might just be good enough. The  difference should be
+small between those two assuming  a swapping threshold of the 
+order of the block size (NB) selected. If  this  threshold is
+very large, HPL will use bin_exch (0) most of the time and if
+it  is  very  small  (< NB) long (1)  will always be used. In 
+short  and  assuming  the  block size (NB)  used is say 60, I 
+would choose for the lines 26-27:
+<TT><PRE>
+2       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold 
+</PRE></TT>
+I would also try the long variant.  For  a very  small number 
+of processes  in every column of the process grid  (say < 4),
+very little performance difference should be observable.<BR><BR>
+
+<LI>Local storage: I do not think Line 28 matters.  Pick 0 in
+doubt. Line 29 is more important.  It controls  how the panel
+of rows should be stored. No doubt 0 is better. The caveat is
+that in that case the matrix-multiply function is called with
+( Notrans, Trans, ... ), that is C := C - A B^T.   Unless the 
+computational  kernel  you are using  has  a very poor  (with
+respect to performance) implementation of that case,  and  is
+much more efficient with  ( Notrans, Notrans, ... ) just pick
+0 as well.  So, my choice:
+<TT><PRE>
+0       L1 in (0=transposed,1=no-transposed) form
+0       U  in (0=transposed,1=no-transposed) form
+</PRE></TT>
+
+<LI>Equilibration: It  is hard to tell  whether equilibration
+should always be performed or not. Not knowing much about the
+random matrix generated  and because the overhead is so small
+compared to the possible gain, I turn it on all the time.
+<TT><PRE>
+1       Equilibration (0=no,1=yes)
+</PRE></TT>
+
+<LI>For alignment, 4 should be plenty,  but just to be safe,
+one may want to pick 8 instead.
+<TT><PRE>
+8       memory alignment in double (> 0)
+</PRE></TT>
+</OL>
+ 
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/AUTHORS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/AUTHORS
new file mode 100644
index 000000000..b08e25180
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/AUTHORS
@@ -0,0 +1,6 @@
+Antoine Petitet
+Clint Whaley rcwhaley@lsu.edu
+Jack Dongarra dongarra@icl.utk.edu
+Andy Cleary
+Piotr Luszczek luszczek@icl.utk.edu
+Julien Langou Julien.Langou@ucdenver.edu
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/BUGS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/BUGS
new file mode 100644
index 000000000..08d694014
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/BUGS
@@ -0,0 +1,9 @@
+==============================================================
+ List of the known problems with the HPL software
+
+ Current as of release HPL - 2.3 - December 2, 2018
+==============================================================
+
+==============================================================
+ 
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/COPYING b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/COPYING
new file mode 100644
index 000000000..08465d618
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/COPYING
@@ -0,0 +1,45 @@
+======================================================================
+ -- High Performance Computing Linpack Benchmark (HPL)                
+    HPL - 2.3 - December 2, 2018                        
+    Antoine P. Petitet                                                
+    University of Tennessee, Knoxville                                
+    Innovative Computing Laboratory                                 
+    (C) Copyright 2000-2008 All Rights Reserved                       
+                                                                      
+ -- Copyright notice and Licensing terms:                             
+                                                                      
+ Redistribution  and  use in  source and binary forms, with or without
+ modification, are  permitted provided  that the following  conditions
+ are met:                                                             
+                                                                      
+ 1. Redistributions  of  source  code  must retain the above copyright
+ notice, this list of conditions and the following disclaimer.        
+                                                                      
+ 2. Redistributions in binary form must reproduce  the above copyright
+ notice, this list of conditions,  and the following disclaimer in the
+ documentation and/or other materials provided with the distribution. 
+                                                                      
+ 3. All  advertising  materials  mentioning  features  or  use of this
+ software must display the following acknowledgement:                 
+ This  product  includes  software  developed  at  the  University  of
+ Tennessee, Knoxville, Innovative Computing Laboratory.             
+                                                                      
+ 4. The name of the  University,  the name of the  Laboratory,  or the
+ names  of  its  contributors  may  not  be used to endorse or promote
+ products  derived   from   this  software  without  specific  written
+ permission.                                                          
+                                                                      
+ -- Disclaimer:                                                       
+                                                                      
+ THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+======================================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/COPYRIGHT b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/COPYRIGHT
new file mode 100644
index 000000000..08465d618
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/COPYRIGHT
@@ -0,0 +1,45 @@
+======================================================================
+ -- High Performance Computing Linpack Benchmark (HPL)                
+    HPL - 2.3 - December 2, 2018                        
+    Antoine P. Petitet                                                
+    University of Tennessee, Knoxville                                
+    Innovative Computing Laboratory                                 
+    (C) Copyright 2000-2008 All Rights Reserved                       
+                                                                      
+ -- Copyright notice and Licensing terms:                             
+                                                                      
+ Redistribution  and  use in  source and binary forms, with or without
+ modification, are  permitted provided  that the following  conditions
+ are met:                                                             
+                                                                      
+ 1. Redistributions  of  source  code  must retain the above copyright
+ notice, this list of conditions and the following disclaimer.        
+                                                                      
+ 2. Redistributions in binary form must reproduce  the above copyright
+ notice, this list of conditions,  and the following disclaimer in the
+ documentation and/or other materials provided with the distribution. 
+                                                                      
+ 3. All  advertising  materials  mentioning  features  or  use of this
+ software must display the following acknowledgement:                 
+ This  product  includes  software  developed  at  the  University  of
+ Tennessee, Knoxville, Innovative Computing Laboratory.             
+                                                                      
+ 4. The name of the  University,  the name of the  Laboratory,  or the
+ names  of  its  contributors  may  not  be used to endorse or promote
+ products  derived   from   this  software  without  specific  written
+ permission.                                                          
+                                                                      
+ -- Disclaimer:                                                       
+                                                                      
+ THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+======================================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/ChangeLog b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/ChangeLog
new file mode 100644
index 000000000..1c2b36778
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/ChangeLog
@@ -0,0 +1,16 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ Done list in version 1.0b, December 15th, 2004
+ - Fixed problem with 32-bit integer overflow.
+   Thanks to John Baron.
+
+ Done list in version 1.0a, January 1st, 2004
+ - Added Row- or Column-major process mapping in data file
+ - Fixed compilation error for gcc 3.3 in walltime.
+ - Fixed building problems on the T3E;
+   Thanks to Edward Anderson.
+
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/HISTORY b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/HISTORY
new file mode 100644
index 000000000..d6d59ee45
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/HISTORY
@@ -0,0 +1,103 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ History
+
+ - 09/09/00 Public release of Version 1.0
+
+ - 09/27/00 A couple of mistakes in the  VSIPL  port have been
+ corrected.  The tar file as well as the web site were updated
+ on September 27th, 2000.  Note  that  these problems were not
+ affecting the BLAS version of the software in any way.
+
+ - 01/01/04 Version 1.0a
+ The  MPI  process grid  numbering  scheme  is now an run-time
+ option.
+ The inlined assembly  timer  routine that caused the compila-
+ tion to fail when using  gcc  version 3.3  and above has been
+ removed from the package.
+ Various building problems on the T3E have been fixed;  Thanks
+ to Edward Anderson.
+
+ - 15/12/04 Version 1.0b
+ Weakness of the pseudo-random matrix generator found for pro-
+ blem sizes being power of twos and larger  than 2^15;  Thanks
+ to Gregory Bauer. This problem has not been fixed. It is thus
+ currently recommended to  HPL  users willing to test matrices
+ of size larger than 2^15 to not use power twos.
+
+ When the matrix size is such that one needs  > 16 GB  per MPI
+ rank,  the  intermediate  calculation  (mat.ld+1) * mat.nq in
+ HPL_pdtest.c  ends up  overflowing  because  it is done using
+ 32-bit arithmetic.  This issue has been fixed by  typecasting
+ to size_t; Thanks to John Baron.
+
+ - 09/10/08 Version 2.0
+
+ Piotr Luszczek changed to 64-bit RNG, modified files:
+ -- [M] include/hpl_matgen.h
+ -- [M] testing/matgen/HPL_ladd.c
+ -- [M] testing/matgen/HPL_lmul.c
+ -- [M] testing/matgen/HPL_rand.c
+ -- [M] testing/ptest/HPL_pdinfo.c
+
+ For a motivation for the change, see:
+    Dongarra and Langou, ``The Problem with the Linpack
+    Benchmark Matrix Generator'', LAWN 206, June 2008.
+
+ -- [M] testing/ptest/HPL_pdtest.c  --
+
+ Julien Langou changed the test for correctness from
+      ||Ax-b||_oo / ( eps * ||A||_1  * N            )
+      ||Ax-b||_oo / ( eps * ||A||_1  * ||x||_1      )
+      ||Ax-b||_oo / ( eps * ||A||_oo * ||x||_oo * N )
+ to the normwise backward error
+      || r ||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )
+ See:
+  Nicholas J. Higham, ``Accuracy and Stability of Numerical Algorithms'',
+  Society for Industrial and Applied Mathematics, Philadelphia, PA, USA,
+  Second Edition, pages = xxx+680, ISBN = 0-89871-521-0, 2002.
+
+ Note that in our case || b ||_oo is almost for sure
+ 1/2, we compute it anyway.
+
+ - 10/26/2012 Version 2.1
+
+ Piotr Luszczek introduced exact time stamping for HPL_pdgesv():
+ -- [M] dist/include/hpl_misc.h
+ -- [M] dist/testing/ptest/HPL_pdtest.c
+
+ Piotr Luszczek fixed out-of-bounds access in data spreading functions
+ and exact time stamping for HPL_pdgesv():
+ -- [M] dist/src/pgesv/HPL_spreadN.c
+ -- [M] dist/src/pgesv/HPL_spreadT.c
+ Thanks to Stephen Whalen from Cray.
+
+ - 02/24/2016 Version 2.2
+
+ Piotr Luszczek added continuous reporting of factorization progress
+ submitted by Intel and make scripts that uses Intel software tools and
+ libraries and their Apple's Mac OS X equivalents.
+
+ - 12/02/2018 Version 2.3
+
+ Piotr Luszczek removed deprecated MPI functions that are no longer
+ supported in some MPI implementations (for example Open MPI 4.0) and
+ replaced them with
+ modern equivalents in HPL_packL():
+ -- [M] src/comm/HPL_packL.c
+
+ Piotr Luszczek added one digit to the display of performance result
+ and changed display of scaled residual to scientific notation with
+ extra digits in HPL_pdtest():
+ -- [M] testing/ptest/HPL_pdtest.c
+
+ Piotr Luszczek added support for Autotools configuration packages
+ autoconf and automake:
+ -- [A] Makefile.am
+ -- [A] configure.ac
+ -- [A] acinclude.m4
+ -- [A] src/Makefile.am
+ -- [A] testing/Makefile.am
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/INSTALL b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/INSTALL
new file mode 100644
index 000000000..fec266c49
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/INSTALL
@@ -0,0 +1,81 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ 1) Retrieve the tar file, then
+
+    gunzip hpl.tgz; tar -xvf hpl.tar
+
+ this  will create an  hpl  directory,  that we call below the
+ top-level directory.
+
+ 2) Create a file Make.<arch> in the  top-level directory. For
+ this purpose,  you  may  want  to re-use one contained in the
+ setup directory. This file essentially contains the compilers
+ and librairies with their paths to be used.
+
+ 3) Type "make arch=<arch>". This  should create an executable
+ in the bin/<arch> directory called xhpl.
+
+ For example, on our Linux PII cluster, I create a file called
+ Make.Linux_PII in the top-level directory. Then, I type
+    "make arch=Linux_PII" 
+ This creates the executable file bin/Linux_PII/xhpl.
+
+ 4) Quick check: run a few tests:
+
+    cd bin/<arch>
+    mpirun -np 4 xhpl
+
+ 5) Tuning: Most of the performance  parameters can be tuned,
+ by modifying the input file bin/HPL.dat. See the file TUNING
+ in the top-level directory.
+
+==============================================================
+
+ Compile time options:  At the end of the "model" Make.<arch>,
+ ---------------------  the  user  is given the opportunity to
+ compile the software with some specific compile options.  The
+ list of this options and their meaning are:
+
+    -DHPL_COPY_L
+       force the copy of the panel L before bcast;
+
+    -DHPL_CALL_CBLAS
+       call the cblas interface;
+
+    -DHPL_CALL_VSIPL
+       call the vsip  library;
+
+    -DHPL_DETAILED_TIMING
+       enables detail timers;
+
+ The  user  must  choose  between  either  the BLAS Fortran 77
+ interface,  or the  BLAS  C  interface,  or the VSIPL library
+ depending on which computational kernels are available on his
+ system. Only one of these options should be selected.  If you
+ choose the BLAS Fortran 77 interface, it is necessary to fill
+ out the machine-specific C to Fortran 77 interface section of
+ the  Make.<arch>  file.  To  do this,  please  refer  to  the 
+ Make.<arch> examples contained in the setup directory.
+
+ By default HPL will:
+    *) not copy L before broadcast,
+    *) call the BLAS Fortran 77 interface,
+    *) not display detailed timing information.
+
+ As an example,  suppose  one wants  HPL  to copy the panel of
+ columns  into  a  contiguous buffer  before broadcasting.  In
+ theory,  it  would be more efficient to let  HPL  create  the
+ appropriate  MPI  user-defined data type since this may avoid 
+ the data copy. So, it is a strange idea, but one insists.  To
+ achieve this one would add -DHPL_COPY_L  to the definition of
+ HPL_OPTS  at the end of the file  Make.<arch>.  Issue  then a
+ "make clean arch=<arch>; make build arch=<arch>" and the xhpl
+ executable will be re-build with that feature in.
+==============================================================
+ 
+ Check out  the website  www.netlib.org/benchmark/hpl  for the
+ latest information.
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Make.intel64 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Make.intel64
new file mode 100644
index 000000000..15d4ed82a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Make.intel64
@@ -0,0 +1,236 @@
+ # -- High Performance Computing Linpack Benchmark (HPL)                
+ #    Modifications Copyright (C) 2023 Intel Corporation​
+ #                                                                      
+ # -- Copyright notice and Licensing terms:                             
+ #                                                                      
+ # Redistribution  and  use in  source and binary forms, with or without
+ # modification, are  permitted provided  that the following  conditions
+ # are met:                                                             
+ #                                                                      
+ # 1. Redistributions  of  source  code  must retain the above copyright
+ # notice, this list of conditions and the following disclaimer.        
+ #                                                                      
+ # 2. Redistributions in binary form must reproduce  the above copyright
+ # notice, this list of conditions,  and the following disclaimer in the
+ # documentation and/or other materials provided with the distribution. 
+ #                                                                      
+ # 3. All  advertising  materials  mentioning  features  or  use of this
+ # software must display the following acknowledgement:                 
+ # This  product  includes  software  developed  at  the  University  of
+ # Tennessee, Knoxville, Innovative Computing Laboratory.             
+ #                                                                      
+ # 4. The name of the  University,  the name of the  Laboratory,  or the
+ # names  of  its  contributors  may  not  be used to endorse or promote
+ # products  derived   from   this  software  without  specific  written
+ # permission.                                                          
+ #                                                                      
+ # -- Disclaimer:                                                       
+ #                                                                      
+ # THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ # OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ # SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ # DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ # ---------------------------------------------------------------------
+ #
+ #SPDX-License-Identifier: BSD-4-Clause
+
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -fs
+MKDIR        = mkdir -p
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = intel64 
+export  ARCH = intel64
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+# Set TOPdir to the location of where this is being built
+TOPdir       = $(CURDIR)
+INCdir       = $(TOPdir)/include
+BINdir        =$(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a
+
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+OneAPIdir    = $(ONEAPI_ROOT)
+MPdir        = $(OneAPIdir)/mpi/latest/
+MPinc        = -I$(MPdir)/include/
+MPlib        = -lmpi #$(MPdir)/lib/release/libmpi.so
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(OneAPIdir)/mkl/latest/lib/intel64/
+LAinc        = -I$(OneAPIdir)/mkl/latest/include/intel64/
+LAlib 	     = -L$(TOPdir)/src/cuda/ -ldgemm  -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lpthread -liomp5 -lm -lstdc++ -L/opt/rocm/hipblas/lib/ -lhipblas -I$(TOPdir)/src/cuda/
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) #$(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+# -DASYOUGO              enable timing information as you go (nonintrusive)
+# -DASYOUGO2             slightly intrusive timing information
+# -DASYOUGO2_DISPLAY     display detailed DGEMM information
+# -DENDEARLY             end the problem early  
+# -DFASTSWAP             insert to use DLASWP instead of HPL code
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpicc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -g
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = $(CC) 
+LINKFLAGS    = $(CCFLAGS) 
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
+MAKE = make VERBOSE=1 arch=$(ARCH) TOPdir=$(TOPdir)
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Make.top b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Make.top
new file mode 100644
index 000000000..57e2d3fa9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Make.top
@@ -0,0 +1,238 @@
+ # -- High Performance Computing Linpack Benchmark (HPL)                
+ #    Modifications Copyright (C) 2023 Intel Corporation​
+ #                                                                      
+ # -- Copyright notice and Licensing terms:                             
+ #                                                                      
+ # Redistribution  and  use in  source and binary forms, with or without
+ # modification, are  permitted provided  that the following  conditions
+ # are met:                                                             
+ #                                                                      
+ # 1. Redistributions  of  source  code  must retain the above copyright
+ # notice, this list of conditions and the following disclaimer.        
+ #                                                                      
+ # 2. Redistributions in binary form must reproduce  the above copyright
+ # notice, this list of conditions,  and the following disclaimer in the
+ # documentation and/or other materials provided with the distribution. 
+ #                                                                      
+ # 3. All  advertising  materials  mentioning  features  or  use of this
+ # software must display the following acknowledgement:                 
+ # This  product  includes  software  developed  at  the  University  of
+ # Tennessee, Knoxville, Innovative Computing Laboratory.             
+ #                                                                      
+ # 4. The name of the  University,  the name of the  Laboratory,  or the
+ # names  of  its  contributors  may  not  be used to endorse or promote
+ # products  derived   from   this  software  without  specific  written
+ # permission.                                                          
+ #                                                                      
+ # -- Disclaimer:                                                       
+ #                                                                      
+ # THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ # OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ # SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ # DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ # ---------------------------------------------------------------------
+ #
+ #SPDX-License-Identifier: BSD-4-Clause
+
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+arch             = UNKNOWN
+#
+include Make.$(arch)
+#
+## build ###############################################################
+#
+build_src        :
+	( $(CD) src/auxil/$(arch);         $(MAKE) )
+	( $(CD) src/blas/$(arch);          $(MAKE) )
+	( $(CD) src/comm/$(arch);          $(MAKE) )
+	( $(CD) src/grid/$(arch);          $(MAKE) )
+	( $(CD) src/panel/$(arch);         $(MAKE) )
+	( $(CD) src/pauxil/$(arch);        $(MAKE) )
+	( $(CD) src/pfact/$(arch);         $(MAKE) )
+	( $(CD) src/pgesv/$(arch);         $(MAKE) )
+	( $(CD) src/cuda/;                $(MAKE) )
+#
+build_tst        :
+	( $(CD) testing/matgen/$(arch);    $(MAKE) )
+	( $(CD) testing/timer/$(arch);     $(MAKE) )
+	( $(CD) testing/pmatgen/$(arch);   $(MAKE) )
+	( $(CD) testing/ptimer/$(arch);    $(MAKE) )
+	( $(CD) testing/ptest/$(arch);     $(MAKE) )
+#( SPMS_make_cd`' testing/test/$(arch);      SPMS_make_make`' )
+#
+## startup #############################################################
+#
+startup_dir      :
+	- $(MKDIR) include/$(arch)
+	- $(MKDIR) lib
+	- $(MKDIR) lib/$(arch)
+	- $(MKDIR) bin
+	- $(MKDIR) bin/$(arch)
+#
+startup_src      :
+	- $(MAKE) -f Make.top leaf le=src/auxil       arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/blas        arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/comm        arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/grid        arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/panel       arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/pauxil      arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/pfact       arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=src/pgesv       arch=$(arch)
+#
+startup_tst      :
+	- $(MAKE) -f Make.top leaf le=testing/matgen  arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=testing/timer   arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=testing/pmatgen arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=testing/ptimer  arch=$(arch)
+	- $(MAKE) -f Make.top leaf le=testing/ptest   arch=$(arch)
+#- SPMS_make_make`' -f Make.top leaf le=testing/test    arch=$(arch)
+#
+## refresh #############################################################
+#
+refresh_src      :
+	- $(CP) makes/Make.auxil    src/auxil/$(arch)/Makefile
+	- $(CP) makes/Make.blas     src/blas/$(arch)/Makefile
+	- $(CP) makes/Make.comm     src/comm/$(arch)/Makefile
+	- $(CP) makes/Make.grid     src/grid/$(arch)/Makefile
+	- $(CP) makes/Make.panel    src/panel/$(arch)/Makefile
+	- $(CP) makes/Make.pauxil   src/pauxil/$(arch)/Makefile
+	- $(CP) makes/Make.pfact    src/pfact/$(arch)/Makefile
+	- $(CP) makes/Make.pgesv    src/pgesv/$(arch)/Makefile
+#
+refresh_tst      :
+	- $(CP) makes/Make.matgen   testing/matgen/$(arch)/Makefile
+	- $(CP) makes/Make.timer    testing/timer/$(arch)/Makefile
+	- $(CP) makes/Make.pmatgen  testing/pmatgen/$(arch)/Makefile
+	- $(CP) makes/Make.ptimer   testing/ptimer/$(arch)/Makefile
+	- $(CP) makes/Make.ptest    testing/ptest/$(arch)/Makefile
+#- SPMS_make_cp`' makes/Make.test     testing/test/$(arch)/Makefile
+#
+## clean ###############################################################
+#
+clean_src        :
+	- ( $(CD) src/auxil/$(arch);        $(MAKE) clean )
+	- ( $(CD) src/blas/$(arch);         $(MAKE) clean )
+	- ( $(CD) src/comm/$(arch);         $(MAKE) clean )
+	- ( $(CD) src/grid/$(arch);         $(MAKE) clean )
+	- ( $(CD) src/panel/$(arch);        $(MAKE) clean )
+	- ( $(CD) src/pauxil/$(arch);       $(MAKE) clean )
+	- ( $(CD) src/pfact/$(arch);        $(MAKE) clean )
+	- ( $(CD) src/pgesv/$(arch);        $(MAKE) clean )
+	- ( $(CD) src/cuda/;               $(MAKE) clean)
+#
+clean_tst        :
+	- ( $(CD) testing/matgen/$(arch);   $(MAKE) clean )
+	- ( $(CD) testing/timer/$(arch);    $(MAKE) clean )
+	- ( $(CD) testing/pmatgen/$(arch);  $(MAKE) clean )
+	- ( $(CD) testing/ptimer/$(arch);   $(MAKE) clean )
+	- ( $(CD) testing/ptest/$(arch);    $(MAKE) clean )
+#- ( SPMS_make_cd`' testing/test/$(arch);     SPMS_make_make`' clean )
+#
+## clean_arch ##########################################################
+#
+clean_arch_src   :
+	- $(RM) -r src/auxil/$(arch)
+	- $(RM) -r src/blas/$(arch)
+	- $(RM) -r src/comm/$(arch)
+	- $(RM) -r src/grid/$(arch)
+	- $(RM) -r src/panel/$(arch)
+	- $(RM) -r src/pauxil/$(arch)
+	- $(RM) -r src/pfact/$(arch)
+	- $(RM) -r src/pgesv/$(arch)
+	- ( $(CD) src/cuda;         $(MAKE) clean)
+#
+clean_arch_tst   :
+	- $(RM) -r testing/matgen/$(arch)
+	- $(RM) -r testing/timer/$(arch)
+	- $(RM) -r testing/pmatgen/$(arch)
+	- $(RM) -r testing/ptimer/$(arch)
+	- $(RM) -r testing/ptest/$(arch)
+#- SPMS_make_rm`' -r testing/test/$(arch)
+#
+## clean_arch_all ######################################################
+#
+clean_arch_all   :
+	- $(MAKE) -f Make.top clean_arch_src arch=$(arch)
+	- $(MAKE) -f Make.top clean_arch_tst arch=$(arch)
+	- $(RM) -r bin/$(arch) include/$(arch) lib/$(arch)
+#
+## clean_guard #########################################################
+#
+clean_guard_src  :
+	- ( $(CD) src/auxil/$(arch);       $(RM) *.grd )
+	- ( $(CD) src/blas/$(arch);        $(RM) *.grd )
+	- ( $(CD) src/comm/$(arch);        $(RM) *.grd )
+	- ( $(CD) src/grid/$(arch);        $(RM) *.grd )
+	- ( $(CD) src/panel/$(arch);       $(RM) *.grd )
+	- ( $(CD) src/pauxil/$(arch);      $(RM) *.grd )
+	- ( $(CD) src/pfact/$(arch);       $(RM) *.grd )
+	- ( $(CD) src/pgesv/$(arch);       $(RM) *.grd )
+#
+clean_guard_tst  :
+	- ( $(CD) testing/matgen/$(arch);  $(RM) *.grd )
+	- ( $(CD) testing/timer/$(arch);   $(RM) *.grd )
+	- ( $(CD) testing/pmatgen/$(arch); $(RM) *.grd )
+	- ( $(CD) testing/ptimer/$(arch);  $(RM) *.grd )
+	- ( $(CD) testing/ptest/$(arch);   $(RM) *.grd )
+#- ( SPMS_make_cd`' testing/test/$(arch);    SPMS_make_rm`' *.grd )
+#
+## misc ################################################################
+#
+leaf             :
+	- ( $(CD) $(le) ; $(MKDIR) $(arch) )
+	- ( $(CD) $(le)/$(arch) ; \
+            $(LN_S) $(TOPdir)/Make.$(arch) Make.inc )
+#
+########################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile
new file mode 100644
index 000000000..7ab3d9c54
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile
@@ -0,0 +1,134 @@
+ # -- High Performance Computing Linpack Benchmark (HPL)                
+ #    Modifications Copyright (C) 2023 Intel Corporation​
+ #                                                                      
+ # -- Copyright notice and Licensing terms:                             
+ #                                                                      
+ # Redistribution  and  use in  source and binary forms, with or without
+ # modification, are  permitted provided  that the following  conditions
+ # are met:                                                             
+ #                                                                      
+ # 1. Redistributions  of  source  code  must retain the above copyright
+ # notice, this list of conditions and the following disclaimer.        
+ #                                                                      
+ # 2. Redistributions in binary form must reproduce  the above copyright
+ # notice, this list of conditions,  and the following disclaimer in the
+ # documentation and/or other materials provided with the distribution. 
+ #                                                                      
+ # 3. All  advertising  materials  mentioning  features  or  use of this
+ # software must display the following acknowledgement:                 
+ # This  product  includes  software  developed  at  the  University  of
+ # Tennessee, Knoxville, Innovative Computing Laboratory.             
+ #                                                                      
+ # 4. The name of the  University,  the name of the  Laboratory,  or the
+ # names  of  its  contributors  may  not  be used to endorse or promote
+ # products  derived   from   this  software  without  specific  written
+ # permission.                                                          
+ #                                                                      
+ # -- Disclaimer:                                                       
+ #                                                                      
+ # THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ # OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ # SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ # DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ # ---------------------------------------------------------------------
+ #
+ #SPDX-License-Identifier: BSD-4-Clause
+
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+#
+SHELL            = /bin/sh
+#
+arch             = intel64
+make             = 'make VERBOSE=1'
+#
+## Targets #############################################################
+#
+all              : install
+#
+# ######################################################################
+#
+install          : startup refresh build
+#
+startup          :
+	$(MAKE) -f Make.top startup_dir     arch=$(arch)
+	$(MAKE) -f Make.top startup_src     arch=$(arch)
+	$(MAKE) -f Make.top startup_tst     arch=$(arch)
+	$(MAKE) -f Make.top refresh_src     arch=$(arch)
+	$(MAKE) -f Make.top refresh_tst     arch=$(arch)
+#
+refresh          :
+	$(MAKE) -f Make.top refresh_src     arch=$(arch)
+	$(MAKE) -f Make.top refresh_tst     arch=$(arch)
+#
+build            :
+	$(MAKE) -f Make.top build_src       arch=$(arch)
+	$(MAKE) -f Make.top build_tst       arch=$(arch)
+#
+clean            :
+	$(MAKE) -f Make.top clean_src       arch=$(arch)
+	$(MAKE) -f Make.top clean_tst       arch=$(arch)
+#
+clean_arch       :
+	$(MAKE) -f Make.top clean_arch_src  arch=$(arch)
+	$(MAKE) -f Make.top clean_arch_tst  arch=$(arch)
+#
+clean_arch_all   :
+	$(MAKE) -f Make.top clean_arch_all  arch=$(arch)
+#
+clean_guard      :
+	$(MAKE) -f Make.top clean_guard_src arch=$(arch)
+	$(MAKE) -f Make.top clean_guard_tst arch=$(arch)
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile.am
new file mode 100644
index 000000000..1ad8c1b17
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src testing
+
+AM_CPPFLAGS = -I$(top_srcdir)/include
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile.in
new file mode 100644
index 000000000..76f0e2dd6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile.in
@@ -0,0 +1,772 @@
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+subdir = .
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+	$(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \
+	$(am__configure_deps) $(am__DIST_COMMON)
+am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
+ configure.lineno config.status.lineno
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/include/hplconfig.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+SOURCES =
+DIST_SOURCES =
+RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
+	ctags-recursive dvi-recursive html-recursive info-recursive \
+	install-data-recursive install-dvi-recursive \
+	install-exec-recursive install-html-recursive \
+	install-info-recursive install-pdf-recursive \
+	install-ps-recursive install-recursive installcheck-recursive \
+	installdirs-recursive pdf-recursive ps-recursive \
+	tags-recursive uninstall-recursive
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
+  distclean-recursive maintainer-clean-recursive
+am__recursive_targets = \
+  $(RECURSIVE_TARGETS) \
+  $(RECURSIVE_CLEAN_TARGETS) \
+  $(am__extra_recursive_targets)
+AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
+	cscope distdir distdir-am dist dist-all distcheck
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+CSCOPE = cscope
+DIST_SUBDIRS = $(SUBDIRS)
+am__DIST_COMMON = $(srcdir)/Makefile.in \
+	$(top_srcdir)/include/hplconfig.h.in AUTHORS COPYING ChangeLog \
+	INSTALL NEWS README THANKS TODO compile config.guess \
+	config.sub depcomp install-sh missing
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+distdir = $(PACKAGE)-$(VERSION)
+top_distdir = $(distdir)
+am__remove_distdir = \
+  if test -d "$(distdir)"; then \
+    find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
+      && rm -rf "$(distdir)" \
+      || { sleep 5 && rm -rf "$(distdir)"; }; \
+  else :; fi
+am__post_remove_distdir = $(am__remove_distdir)
+am__relativize = \
+  dir0=`pwd`; \
+  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+  sed_rest='s,^[^/]*/*,,'; \
+  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+  sed_butlast='s,/*[^/]*$$,,'; \
+  while test -n "$$dir1"; do \
+    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+    if test "$$first" != "."; then \
+      if test "$$first" = ".."; then \
+        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+      else \
+        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+        if test "$$first2" = "$$first"; then \
+          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+        else \
+          dir2="../$$dir2"; \
+        fi; \
+        dir0="$$dir0"/"$$first"; \
+      fi; \
+    fi; \
+    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+  done; \
+  reldir="$$dir2"
+DIST_ARCHIVES = $(distdir).tar.gz
+GZIP_ENV = --best
+DIST_TARGETS = dist-gzip
+distuninstallcheck_listfiles = find . -type f -print
+am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
+  | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
+distcleancheck_listfiles = find . -type f -print
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BLAS_LIBS = @BLAS_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build_alias = @build_alias@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host_alias = @host_alias@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUBDIRS = src testing
+AM_CPPFLAGS = -I$(top_srcdir)/include
+all: all-recursive
+
+.SUFFIXES:
+am--refresh: Makefile
+	@:
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      echo ' cd $(srcdir) && $(AUTOMAKE) --gnu'; \
+	      $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    echo ' $(SHELL) ./config.status'; \
+	    $(SHELL) ./config.status;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	$(SHELL) ./config.status --recheck
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	$(am__cd) $(srcdir) && $(AUTOCONF)
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	$(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
+$(am__aclocal_m4_deps):
+
+include/hplconfig.h: include/stamp-h1
+	@test -f $@ || rm -f include/stamp-h1
+	@test -f $@ || $(MAKE) $(AM_MAKEFLAGS) include/stamp-h1
+
+include/stamp-h1: $(top_srcdir)/include/hplconfig.h.in $(top_builddir)/config.status
+	@rm -f include/stamp-h1
+	cd $(top_builddir) && $(SHELL) ./config.status include/hplconfig.h
+$(top_srcdir)/include/hplconfig.h.in:  $(am__configure_deps) 
+	($(am__cd) $(top_srcdir) && $(AUTOHEADER))
+	rm -f include/stamp-h1
+	touch $@
+
+distclean-hdr:
+	-rm -f include/hplconfig.h include/stamp-h1
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run 'make' without going through this Makefile.
+# To change the values of 'make' variables: instead of editing Makefiles,
+# (1) if the variable is set in 'config.status', edit 'config.status'
+#     (which will cause the Makefiles to be regenerated when you run 'make');
+# (2) otherwise, pass the desired values on the 'make' command line.
+$(am__recursive_targets):
+	@fail=; \
+	if $(am__make_keepgoing); then \
+	  failcom='fail=yes'; \
+	else \
+	  failcom='exit 1'; \
+	fi; \
+	dot_seen=no; \
+	target=`echo $@ | sed s/-recursive//`; \
+	case "$@" in \
+	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+	  *) list='$(SUBDIRS)' ;; \
+	esac; \
+	for subdir in $$list; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    dot_seen=yes; \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done; \
+	if test "$$dot_seen" = "no"; then \
+	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+	fi; test -z "$$fail"
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-recursive
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+	  include_option=--etags-include; \
+	  empty_fix=.; \
+	else \
+	  include_option=--include; \
+	  empty_fix=; \
+	fi; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test ! -f $$subdir/TAGS || \
+	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+	  fi; \
+	done; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-recursive
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscope: cscope.files
+	test ! -s cscope.files \
+	  || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS)
+clean-cscope:
+	-rm -f cscope.files
+cscope.files: clean-cscope cscopelist
+cscopelist: cscopelist-recursive
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+	-rm -f cscope.out cscope.in.out cscope.po.out cscope.files
+
+distdir: $(BUILT_SOURCES)
+	$(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+	$(am__remove_distdir)
+	test -d "$(distdir)" || mkdir "$(distdir)"
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    $(am__make_dryrun) \
+	      || test -d "$(distdir)/$$subdir" \
+	      || $(MKDIR_P) "$(distdir)/$$subdir" \
+	      || exit 1; \
+	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+	    $(am__relativize); \
+	    new_distdir=$$reldir; \
+	    dir1=$$subdir; dir2="$(top_distdir)"; \
+	    $(am__relativize); \
+	    new_top_distdir=$$reldir; \
+	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+	    ($(am__cd) $$subdir && \
+	      $(MAKE) $(AM_MAKEFLAGS) \
+	        top_distdir="$$new_top_distdir" \
+	        distdir="$$new_distdir" \
+		am__remove_distdir=: \
+		am__skip_length_check=: \
+		am__skip_mode_fix=: \
+	        distdir) \
+	      || exit 1; \
+	  fi; \
+	done
+	-test -n "$(am__skip_mode_fix)" \
+	|| find "$(distdir)" -type d ! -perm -755 \
+		-exec chmod u+rwx,go+rx {} \; -o \
+	  ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
+	  ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
+	  ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
+	|| chmod -R a+r "$(distdir)"
+dist-gzip: distdir
+	tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz
+	$(am__post_remove_distdir)
+
+dist-bzip2: distdir
+	tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
+	$(am__post_remove_distdir)
+
+dist-lzip: distdir
+	tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
+	$(am__post_remove_distdir)
+
+dist-xz: distdir
+	tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
+	$(am__post_remove_distdir)
+
+dist-tarZ: distdir
+	@echo WARNING: "Support for distribution archives compressed with" \
+		       "legacy program 'compress' is deprecated." >&2
+	@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
+	tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
+	$(am__post_remove_distdir)
+
+dist-shar: distdir
+	@echo WARNING: "Support for shar distribution archives is" \
+	               "deprecated." >&2
+	@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
+	shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz
+	$(am__post_remove_distdir)
+
+dist-zip: distdir
+	-rm -f $(distdir).zip
+	zip -rq $(distdir).zip $(distdir)
+	$(am__post_remove_distdir)
+
+dist dist-all:
+	$(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:'
+	$(am__post_remove_distdir)
+
+# This target untars the dist file and tries a VPATH configuration.  Then
+# it guarantees that the distribution is self-contained by making another
+# tarfile.
+distcheck: dist
+	case '$(DIST_ARCHIVES)' in \
+	*.tar.gz*) \
+	  eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\
+	*.tar.bz2*) \
+	  bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
+	*.tar.lz*) \
+	  lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
+	*.tar.xz*) \
+	  xz -dc $(distdir).tar.xz | $(am__untar) ;;\
+	*.tar.Z*) \
+	  uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
+	*.shar.gz*) \
+	  eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\
+	*.zip*) \
+	  unzip $(distdir).zip ;;\
+	esac
+	chmod -R a-w $(distdir)
+	chmod u+w $(distdir)
+	mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst
+	chmod a-w $(distdir)
+	test -d $(distdir)/_build || exit 0; \
+	dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
+	  && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
+	  && am__cwd=`pwd` \
+	  && $(am__cd) $(distdir)/_build/sub \
+	  && ../../configure \
+	    $(AM_DISTCHECK_CONFIGURE_FLAGS) \
+	    $(DISTCHECK_CONFIGURE_FLAGS) \
+	    --srcdir=../.. --prefix="$$dc_install_base" \
+	  && $(MAKE) $(AM_MAKEFLAGS) \
+	  && $(MAKE) $(AM_MAKEFLAGS) dvi \
+	  && $(MAKE) $(AM_MAKEFLAGS) check \
+	  && $(MAKE) $(AM_MAKEFLAGS) install \
+	  && $(MAKE) $(AM_MAKEFLAGS) installcheck \
+	  && $(MAKE) $(AM_MAKEFLAGS) uninstall \
+	  && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
+	        distuninstallcheck \
+	  && chmod -R a-w "$$dc_install_base" \
+	  && ({ \
+	       (cd ../.. && umask 077 && mkdir "$$dc_destdir") \
+	       && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
+	       && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
+	       && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
+	            distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
+	      } || { rm -rf "$$dc_destdir"; exit 1; }) \
+	  && rm -rf "$$dc_destdir" \
+	  && $(MAKE) $(AM_MAKEFLAGS) dist \
+	  && rm -rf $(DIST_ARCHIVES) \
+	  && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \
+	  && cd "$$am__cwd" \
+	  || exit 1
+	$(am__post_remove_distdir)
+	@(echo "$(distdir) archives ready for distribution: "; \
+	  list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
+	  sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
+distuninstallcheck:
+	@test -n '$(distuninstallcheck_dir)' || { \
+	  echo 'ERROR: trying to run $@ with an empty' \
+	       '$$(distuninstallcheck_dir)' >&2; \
+	  exit 1; \
+	}; \
+	$(am__cd) '$(distuninstallcheck_dir)' || { \
+	  echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
+	  exit 1; \
+	}; \
+	test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
+	   || { echo "ERROR: files left after uninstall:" ; \
+	        if test -n "$(DESTDIR)"; then \
+	          echo "  (check DESTDIR support)"; \
+	        fi ; \
+	        $(distuninstallcheck_listfiles) ; \
+	        exit 1; } >&2
+distcleancheck: distclean
+	@if test '$(srcdir)' = . ; then \
+	  echo "ERROR: distcleancheck can only run from a VPATH build" ; \
+	  exit 1 ; \
+	fi
+	@test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
+	  || { echo "ERROR: files left in build directory after distclean:" ; \
+	       $(distcleancheck_listfiles) ; \
+	       exit 1; } >&2
+check-am: all-am
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-generic mostlyclean-am
+
+distclean: distclean-recursive
+	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-hdr distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
+	-rm -rf $(top_srcdir)/autom4te.cache
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-generic
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(am__recursive_targets) install-am install-strip
+
+.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \
+	am--refresh check check-am clean clean-cscope clean-generic \
+	cscope cscopelist-am ctags ctags-am dist dist-all dist-bzip2 \
+	dist-gzip dist-lzip dist-shar dist-tarZ dist-xz dist-zip \
+	distcheck distclean distclean-generic distclean-hdr \
+	distclean-tags distcleancheck distdir distuninstallcheck dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs installdirs-am \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \
+	uninstall-am
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/NEWS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/NEWS
new file mode 100644
index 000000000..d6d59ee45
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/NEWS
@@ -0,0 +1,103 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ History
+
+ - 09/09/00 Public release of Version 1.0
+
+ - 09/27/00 A couple of mistakes in the  VSIPL  port have been
+ corrected.  The tar file as well as the web site were updated
+ on September 27th, 2000.  Note  that  these problems were not
+ affecting the BLAS version of the software in any way.
+
+ - 01/01/04 Version 1.0a
+ The  MPI  process grid  numbering  scheme  is now an run-time
+ option.
+ The inlined assembly  timer  routine that caused the compila-
+ tion to fail when using  gcc  version 3.3  and above has been
+ removed from the package.
+ Various building problems on the T3E have been fixed;  Thanks
+ to Edward Anderson.
+
+ - 15/12/04 Version 1.0b
+ Weakness of the pseudo-random matrix generator found for pro-
+ blem sizes being power of twos and larger  than 2^15;  Thanks
+ to Gregory Bauer. This problem has not been fixed. It is thus
+ currently recommended to  HPL  users willing to test matrices
+ of size larger than 2^15 to not use power twos.
+
+ When the matrix size is such that one needs  > 16 GB  per MPI
+ rank,  the  intermediate  calculation  (mat.ld+1) * mat.nq in
+ HPL_pdtest.c  ends up  overflowing  because  it is done using
+ 32-bit arithmetic.  This issue has been fixed by  typecasting
+ to size_t; Thanks to John Baron.
+
+ - 09/10/08 Version 2.0
+
+ Piotr Luszczek changed to 64-bit RNG, modified files:
+ -- [M] include/hpl_matgen.h
+ -- [M] testing/matgen/HPL_ladd.c
+ -- [M] testing/matgen/HPL_lmul.c
+ -- [M] testing/matgen/HPL_rand.c
+ -- [M] testing/ptest/HPL_pdinfo.c
+
+ For a motivation for the change, see:
+    Dongarra and Langou, ``The Problem with the Linpack
+    Benchmark Matrix Generator'', LAWN 206, June 2008.
+
+ -- [M] testing/ptest/HPL_pdtest.c  --
+
+ Julien Langou changed the test for correctness from
+      ||Ax-b||_oo / ( eps * ||A||_1  * N            )
+      ||Ax-b||_oo / ( eps * ||A||_1  * ||x||_1      )
+      ||Ax-b||_oo / ( eps * ||A||_oo * ||x||_oo * N )
+ to the normwise backward error
+      || r ||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )
+ See:
+  Nicholas J. Higham, ``Accuracy and Stability of Numerical Algorithms'',
+  Society for Industrial and Applied Mathematics, Philadelphia, PA, USA,
+  Second Edition, pages = xxx+680, ISBN = 0-89871-521-0, 2002.
+
+ Note that in our case || b ||_oo is almost for sure
+ 1/2, we compute it anyway.
+
+ - 10/26/2012 Version 2.1
+
+ Piotr Luszczek introduced exact time stamping for HPL_pdgesv():
+ -- [M] dist/include/hpl_misc.h
+ -- [M] dist/testing/ptest/HPL_pdtest.c
+
+ Piotr Luszczek fixed out-of-bounds access in data spreading functions
+ and exact time stamping for HPL_pdgesv():
+ -- [M] dist/src/pgesv/HPL_spreadN.c
+ -- [M] dist/src/pgesv/HPL_spreadT.c
+ Thanks to Stephen Whalen from Cray.
+
+ - 02/24/2016 Version 2.2
+
+ Piotr Luszczek added continuous reporting of factorization progress
+ submitted by Intel and make scripts that uses Intel software tools and
+ libraries and their Apple's Mac OS X equivalents.
+
+ - 12/02/2018 Version 2.3
+
+ Piotr Luszczek removed deprecated MPI functions that are no longer
+ supported in some MPI implementations (for example Open MPI 4.0) and
+ replaced them with
+ modern equivalents in HPL_packL():
+ -- [M] src/comm/HPL_packL.c
+
+ Piotr Luszczek added one digit to the display of performance result
+ and changed display of scaled residual to scientific notation with
+ extra digits in HPL_pdtest():
+ -- [M] testing/ptest/HPL_pdtest.c
+
+ Piotr Luszczek added support for Autotools configuration packages
+ autoconf and automake:
+ -- [A] Makefile.am
+ -- [A] configure.ac
+ -- [A] acinclude.m4
+ -- [A] src/Makefile.am
+ -- [A] testing/Makefile.am
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/THANKS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/THANKS
new file mode 100644
index 000000000..1c5641ce4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/THANKS
@@ -0,0 +1 @@
+This software was improved with contribution of external developers.
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/TODO b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/TODO
new file mode 100644
index 000000000..1c2b36778
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/TODO
@@ -0,0 +1,16 @@
+==============================================================
+ High Performance Computing Linpack Benchmark (HPL)
+ HPL - 2.3 - December 2, 2018
+==============================================================
+
+ Done list in version 1.0b, December 15th, 2004
+ - Fixed problem with 32-bit integer overflow.
+   Thanks to John Baron.
+
+ Done list in version 1.0a, January 1st, 2004
+ - Added Row- or Column-major process mapping in data file
+ - Fixed compilation error for gcc 3.3 in walltime.
+ - Fixed building problems on the T3E;
+   Thanks to Edward Anderson.
+
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/TUNING b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/TUNING
new file mode 100644
index 000000000..24707f1fc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/TUNING
@@ -0,0 +1,419 @@
+==============================================================
+ Performance Tuning and setting up the input data file HPL.dat
+ 
+ Current as of release HPL - 2.3 - December 2, 2018
+==============================================================
+ Check out  the website  www.netlib.org/benchmark/hpl  for the
+ latest information.
+
+ After  having  built  the executable hpl/bin/<arch>/xhpl, one
+ may want to modify the input  data  file  HPL.dat.  This file
+ should  reside  in  the  same  directory  as  the  executable 
+ hpl/bin/<arch>/xhpl.  An example  HPL.dat file is provided by
+ default.  This  file  contains  information about the problem
+ sizes,  machine configuration,  and  algorithm features to be
+ used by the executable. It is 30 lines long. All the selected
+ parameters  will  be  printed  in the output generated by the
+ executable.
+
+ At the end of this file,  there  is a couple of  experimental
+ guide lines that you may find useful.
+
+==============================================================
+ File HPL.dat (description):
+
+ Line 1: (unused) Typically  one  would  use this line for its 
+ own good. For example, it could be used to summarize the con-
+ tent of the input file. By default this line reads:
+ 
+ HPL Linpack benchmark input file
+ 
+ Line 2: (unused) same as line 1. By default this line reads:
+ 
+ Innovative Computing Laboratory, University of Tennessee
+ 
+ Line 3: the  user  can  choose where the output should be re-
+ directed to.  In the case of a file, a name is necessary, and
+ this  is  the  line  where one wants to specify it.  Only the
+ first name on this line is significative. By default, the li-
+ ne reads:
+ 
+ HPL.out  output file name (if any)
+ 
+ This  means  that if  one chooses to redirect the output to a
+ file, the file will be called "HPL.out". The rest of the line
+ is unused,  and this space to put some informative comment on
+ the meaning of this line.
+ 
+ Line 4: This line specifies  where the  output should go. The
+ line is formatted, it must be a positive integer, the rest is
+ unsignificant.  3 choices are possible for the positive inte-
+ ger,  6 means that the output will go  the standard output, 7
+ means  that the  output will go to the standard error. Any o-
+ ther  integer  means  that  the  output  should be redirected
+ to a file,  which  name has been specified in the line above.
+ This line by default reads:
+ 
+ 6        device out (6=stdout,7=stderr,file)
+ 
+ which  means  that  the  output generated  by  the executable
+ should be redirected to the standard output.
+ 
+ Line 5: This line specifies the number of problem sizes to be
+ executed. This number should be less than or equal to 20. The
+ first  integer  is  significant,  the rest is ignored. If the 
+ line reads:
+ 
+ 3        # of problems sizes (N)
+ 
+ this  means  that  the user is willing to run 3 problem sizes
+ that will be specified in the next line.
+ 
+ Line 6:  This  line  specifies the problem sizes one wants to 
+ run.  Assuming  the  line  above  started with 3, the 3 first
+ positive  integers  are significant, the rest is ignored. For
+ example:
+ 
+ 3000 6000 10000    Ns
+ 
+ means that one wants xhpl to run 3 (specified in line 5) pro-
+ blem sizes, namely 3000, 6000 and 10000.
+ 
+ Line 7: This line  specifies  the number of block sizes to be
+ runned. This number  should  be  less  than  or equal to  20.
+ The first integer is significant, the rest is ignored. If the
+ line reads:
+ 
+ 5        # of NBs
+ 
+ this means that the user is willing to use 5 block sizes that
+ will be specified in the next line.
+ 
+ Line 8: This line specifies the block sizes one wants to run.
+ Assuming  the line above started with 5, the 5 first positive
+ integers are significant, the rest is ignored. For example:
+ 
+ 80 100 120 140 160 NBs
+ 
+ means  that  one  wants  xhpl  to use 5 (specified in line 7)
+ block sizes, namely 80, 100, 120, 140 and 160.
+
+ Line 9 specifies how the  MPI processes should be mapped onto
+ the nodes of your platform.  There are currently two possible
+ mappings, namely row- and column-major. This feature is main-
+ ly  useful  when these nodes  are  themselves multi-processor
+ computers. A row-major mapping is recommended.
+ 
+ Line 10: This line specifies  the  number  of process grid to
+ be runned.  This  number  should be less than or equal to 20.
+ The first integer is significant, the rest is ignored. If the
+ line reads:
+
+ 2        # of process grids (P x Q)
+ 
+ this  means  that you are willing to try 2 process grid sizes 
+ that will be specified in the next line.
+ 
+ Line 11-12: These  two  lines specify  the  number of process
+ rows  and  columns of each grid you want to run on.  Assuming
+ the line above (10) started with 2,  the 2 first positive in-
+ tegers of those two lines are significant,  the rest is igno-
+ red. For example:
+ 
+ 1 2          Ps
+ 6 8          Qs
+ 
+ means  that one wants to run  xhpl  on  2 process grids (line
+ 10), namely 1 by 6 and 2 by 8.  Note:  In this example, it is
+ required then to start xhpl on at least 16 nodes  (max of P_i
+ xQ_i). The runs on the two grids will be consecutive.  If one
+ was starting xhpl on more than 16 nodes, say 52, only 6 would
+ be used for the first grid  (1x6) and then 16  (2x8) would be
+ used for the second grid.  The fact  that you started the MPI 
+ job on 52 nodes,  will not make HPL use all of them.  In this 
+ example, only 16 would be used. If one wants to run xhpl with
+ 52 processes one needs to specify a grid of 52 processes, for
+ example the following lines would do the job:
+ 
+ 4  2         Ps
+ 13 8         Qs
+ 
+ Line 13: This  line  specifies  the  threshold  the residuals
+ should be compared to.  The  residuals  should be or order 1,
+ but are in practice slightly less than this, typically 0.001.
+ This  line  is  made of a real number, the rest is unsignifi-
+ cant. For example:
+ 
+ 16.0         threshold
+
+ In practice,  a value of 16.0 will cover most cases.  For va-
+ rious reasons,  it is possible that some of the residuals be-
+ come slightly larger, say for example 35.6.  xhpl  will  flag
+ those runs as failed,  however they can be considered as cor-
+ rect.  A run can be considered as failed if the residual is a
+ few order of magnitude  bigger than 1 for example 10^6 or mo-
+ re. Note: if one was to specify a threshold of 0.0, all tests
+ would be flagged  as failed, even though the answer is likely
+ to be correct.  It is allowed to specify a negative value for
+ this threshold,  in  which case the checks will be by-passed,
+ no matter what the value is, as soon as it is negative.  This
+ feature  allows to save time when performing a lot of experi-
+ ments, say for instance during the tuning phase. Example:
+ 
+ -16.0        threshold
+ 
+ The remaning lines  allow  to specifies algorithmic features.
+ xhpl  will  run  all  possible combinations of those for each
+ problem  size,  block size, process grid combination. This is
+ handy  when one looks for an "optimal" set of parameters.  To
+ understand  a little bit better,  let  say  first a few words
+ about  the algorithm implemented in HPL. Basically this is  a
+ right-looking  version  with  row-partial pivoting. The panel
+ factorization is matrix-matrix operation based and recursive,
+ dividing the panel into  NDIV  subpanels  at each step.  This
+ part  of  the  panel   factorization   is  denoted  below  by
+ "recursive panel fact. (RFACT)". The recursion stops when the
+ current panel is made of less than or equal to NBMIN columns.
+ At  that  point,  xhpl  uses  a matrix-vector operation based
+ factorization denoted below by  "PFACTs".  Classic  recursion
+ would then use  NDIV=2,  NBMIN=1.  There  are  essentially  3
+ numerically  equivalent  LU  factorization algorithm variants
+ (left-looking, Crout  and  right-looking).  In  HPL,  one can 
+ choose  every one  of those  for the  RFACT,  as well as  the
+ PFACT. The following lines of HPL.dat allows you to set those
+ parameters.
+ 
+ Lines 14-21: (Example 1)
+ 3       # of panel fact
+ 0 1 2   PFACTs (0=left, 1=Crout, 2=Right)
+ 4       # of recursive stopping criterium
+ 1 2 4 8 NBMINs (>= 1)
+ 3       # of panels in recursion
+ 2 3 4   NDIVs
+ 3       # of recursive panel fact.
+ 0 1 2   RFACTs (0=left, 1=Crout, 2=Right)
+ 
+ This  example  would  try all variants of PFACT, 4 values for
+ NBMIN,  namely 1, 2, 4 and 8,  3 values for NDIV namely 2,  3 
+ and 4, and all variants for RFACT.  Lines 14-21: (Example 1)
+
+ 2       # of panel fact
+ 2 0     PFACTs (0=left, 1=Crout, 2=Right)
+ 2       # of recursive stopping criterium
+ 4 8     NBMINs (>= 1)
+ 1       # of panels in recursion
+ 2       NDIVs
+ 1       # of recursive panel fact.
+ 2       RFACTs (0=left, 1=Crout, 2=Right)
+ 
+ This example would try  2 variants of PFACT namely right loo-
+ king and left looking, 2 values for NBMIN, namely 4 and 8,  1
+ value for NDIV namely 2, and one variant for RFACT.
+ 
+ In the  main loop of the algorithm,  the current panel of co-
+ lumn is broadcast in process rows  using  a virtual  ring to-
+ pology. HPL offers various choices, and one most  likely want
+ to use the increasing ring modified encoded as 1.  4  is also
+ a good choice. Lines 22-23: (Example 1):
+
+ 1       # of broadcast
+ 1       BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+ 
+ This will cause HPL  to broadcast the current panel using the
+ increasing ring modified topology. Lines 22-23: (Example 2):
+ 
+ 2       # of broadcast
+ 0 4     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+ 
+ This will cause  HPL to broadcast the current panel using the
+ increasing ring virtual topology and the long message algori-
+ thm.
+ 
+ Lines 24-25  allow  to  specify  the look-ahead depth used by
+ HPL. A depth of 0 means that the next panel is factorized af-
+ ter the update by the current panel is completely finished. A
+ depth of 1 means that the next panel is factorized immediate-
+ ly after being updated.  The  update by the current  panel is
+ then finished.  A depth of k means that the k next panels are
+ factorized immediately after being updated. The update by the
+ current  panel is then finished. It turns out that a depth of
+ 1  seems  to give the best results, but may need a large pro-
+ blem size  before one can see the performance gain. So use 1,
+ if you do not know better,  otherwise  you may want to try 0.
+ Look-ahead of depths 2  and larger will probably not give you
+ better results.  Lines 24-25: (Example 1):
+ 
+ 1       # of lookahead depth
+ 1       DEPTHs (>=0)
+ 
+ This will cause HPL to use a look-ahead of depth 1.
+ Lines 24-25: (Example 2):
+ 
+ 2       # of lookahead depth
+ 0 1     DEPTHs (>=0)
+ 
+ This will cause HPL to use a look-ahead of depths 0 and 1.
+
+ Lines 26-27  allow to specify  the swapping algorithm used by
+ HPL for all tests.  There  are  currently  two swapping algo-
+ rithms  available,  one  based  on "binary exchange"  and the
+ other one based on a  "spread-roll"  procedure  (also  called 
+ "long" below. For large problem sizes, this last one is like-
+ ly to be more efficient. The user can also choose to mix both
+ variants, that is "binary-exchange"  for  a number of columns
+ less  than a threshold value, and then the  "spread-roll" al-
+ gorithm.  This threshold  value is then specified on Line 27.
+ Lines 26-27: (Example 1):
+
+ 1       SWAP (0=bin-exch,1=long,2=mix)
+ 60      swapping threshold
+
+ This will cause HPL to use the "long" or  "spread-roll" swap-
+ ping algorithm.  Note  that a threshold  is specified in that
+ example but not used by HPL. Lines 26-27: (Example 2):
+
+ 2       SWAP (0=bin-exch,1=long,2=mix)
+ 60      swapping threshold
+
+ This will cause HPL to use the "long" or  "spread-roll" swap-
+ ping  algorithm  as  soon as there is more than 60 columns in
+ the row panel.  Otherwise,  the  "binary-exchange"  algorithm
+ will be used instead.
+
+ Line 28  allows  to specify whether the upper triangle of the
+ panel  of  columns  should  be  stored  in  no-transposed  or
+ transposed form. Example:
+
+ 0            L1 in (0=transposed,1=no-transposed) form
+
+ Line 29 allows to specify whether the panel of rows  U should
+ be stored in no-transposed or transposed form. Example:
+ 
+ 0            U  in (0=transposed,1=no-transposed) form
+
+ Line 30 enables/disables the equilibration phase. This option
+ will not be used unless you selected 1 or 2 in Line 26. Ex:
+
+ 1            Equilibration (0=no,1=yes)
+
+
+ Line 31  allows  to  specify  the alignment in memory for the
+ memory space allocated by HPL. On modern machines, one proba-
+ bly wants to use 4, 8 or 16. This may result in a tiny amount
+ of memory wasted. Example:
+ 
+ 4       memory alignment in double (> 0)
+
+==============================================================
+ Guide lines:
+
+ 1) Figure  out  a  good  block  size  for  the  matrix-matrix 
+ multiply routine. The best method is to try a few out. If you
+ happen  to know  the block size  used  by  the  matrix-matrix 
+ multiply routine, a small multiple of that block size will do
+ fine.
+
+ HPL  uses the block size NB for the data distribution as well
+ as  for   the  computational   granularity.   From   a   data 
+ distribution point of view,  the smallest  NB, the better the
+ load balance.  You  definitely  want  to stay away  from very
+ large values of NB.  From a computation point of view,  a too
+ small value of  NB may limit the computational performance by
+ a large factor because almost no data reuse will occur in the
+ highest level of the memory hierarchy. The number of messages
+ will also increase.  Efficient  matrix-multiply  routines are 
+ often internally blocked.  Small multiples  of  this blocking
+ factor are likely to be good block sizes for HPL.  The bottom
+ line  is  that  "good"  block sizes  are almost always in the
+ [32..256] interval. The best values depend on the computation
+ / communication performance ratio of your system.  To  a much
+ less  extent,  the problem size  matters  as  well.  Say  for
+ example,  you emperically found that 44 was a good block size
+ with respect to performance.  88 or 132  are likely  to  give
+ slightly better  results for large problem sizes because of a
+ slighlty higher flop rate.
+
+ 2)  The process mapping  should  not matter  if  the nodes of
+ your platform are single processor computers.  If these nodes
+ are multi-processors, a row-major mapping is recommended.
+
+ 3) HPL likes "square" or slightly flat process grids.  Unless
+ you  are using  a very small process grid, stay away from the 
+ 1-by-Q and P-by-1 process grids.
+
+ 4) Panel factorization parameters:  a good start are the fol-
+ lowing for the lines 14-21:
+
+ 1       # of panel fact
+ 1       PFACTs (0=left, 1=Crout, 2=Right)
+ 2       # of recursive stopping criterium
+ 4 8     NBMINs (>= 1)
+ 1       # of panels in recursion
+ 2       NDIVs
+ 1       # of recursive panel fact.
+ 2       RFACTs (0=left, 1=Crout, 2=Right)
+
+ 5) Broadcast parameters: at this time, it is far from obvious
+ to me what the best setting is,  so i would probably try them
+ all. If I had to guess I would probably start with the follo-
+ wing for the lines 22-23:
+ 
+ 2       # of broadcast
+ 1 3     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+
+ The best broadcast  depends  on your problem size and harware
+ performance. My take is that 4 or 5  may be  competitive  for
+ machines  featuring  very  fast nodes  comparatively  to  the 
+ network.
+
+ 6) Look-ahead depth: as mentioned above  0 or 1 are likely to 
+ be the best choices.  This also  depends  on the problem size
+ and machine configuration, so I would try "no look-ahead (0)"
+ and "look-ahead of depth 1 (1)". That is for lines 24-25:
+ 
+ 2       # of lookahead depth
+ 0 1     DEPTHs (>=0)
+
+ 7) Swapping:  one  can select only one of the three algorithm 
+ in the input file. Theoretically, mix (2) should win, however
+ long (1) might just be good enough. The  difference should be
+ small between those two assuming  a swapping threshold of the 
+ order of the block size (NB) selected. If  this  threshold is
+ very large, HPL will use bin_exch (0) most of the time and if
+ it  is  very  small  (< NB) long (1)  will always be used. In 
+ short  and  assuming  the  block size (NB)  used is say 60, I 
+ would choose for the lines 26-27:
+
+ 2       SWAP (0=bin-exch,1=long,2=mix)
+ 60      swapping threshold 
+
+ I would also try the long variant.  For  a very  small number 
+ of processes  in every column of the process grid  (say < 4),
+ very little performance difference should be observable.
+
+ 8) Local storage:  I do not think Line 28 matters.  Pick 0 in
+ doubt.  Line 29 is more important.  It controls how the panel
+ of rows should be stored. No doubt 0 is better. The caveat is
+ that in that case the matrix-multiply function is called with
+ ( Notrans, Trans, ... ), that is C := C - A B^T.  Unless  the
+ computational  kernel  you  are  using  has a very poor (with
+ respect to performance)  implementation  of that case, and is
+ much more efficient with  ( Notrans, Notrans, ... ) just pick
+ 0 as well. So, my choice: 
+
+ 0       L1 in (0=transposed,1=no-transposed) form
+ 0       U  in (0=transposed,1=no-transposed) form
+
+ 9) Equilibration:  It  is hard to tell  whether equilibration
+ should always be performed or not. Not knowing much about the
+ random matrix generated and because the overhead is so small
+ compared to the possible gain, I turn it on all the time.
+
+ 1       Equilibration (0=no,1=yes)
+
+ 10) For  alignment, 4 should be plenty,  but just to be safe,
+ one may want to pick 8 instead.
+
+ 8       memory alignment in double (> 0)
+ 
+==============================================================
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/acinclude.m4 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/acinclude.m4
new file mode 100644
index 000000000..4072a950f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/acinclude.m4
@@ -0,0 +1,90 @@
+
+AC_DEFUN([HPL_BLAS], [
+
+AC_PREREQ(2.69)
+
+hpl_blas_ok=no
+
+dnl FIXME: add --with-blas="<library spec>"
+
+current_LIBS="$LIBS"
+
+cat <<HPLEOF > hplvars.txt
+name1=OpenBLAS
+rout1=dgemm_
+libs1=-lopenblas -lm
+
+name2=Atlas Fortran BLAS
+rout2=dgemm_
+libs2=-lf77blas -latlas
+
+name3=Sequential Intel MKL LP64 (group)
+rout3=dgemm_
+libs3=-Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread
+
+name4=Sequential Intel MKL LP64
+rout4=dgemm_
+libs4=-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
+
+name5=AMD's ACML
+rout5=dgemm_
+libs5=-lacml -lm
+
+name6=Accelerate
+rout6=dgemm_
+libs6=-framework Accelerate
+
+name7=Apple VecLib
+rout7=dgemm_
+libs7=-framework vecLib
+
+name8=IBM ESSL
+rout8=dgemm_
+libs8=-lessl
+
+name9=NVIDIA nvblas
+rout9=dgemm_
+libs9=-lnvblas
+
+name10=Generic BLAS
+rout10=dgemm_
+libs10=-lblas
+
+HPLEOF
+for hpl_i in 1 2 3 4 5 6 7 8 9 10;
+do
+if test  x$hpl_blas_ok = xno; then
+  name="`grep ^name${hpl_i}= hplvars.txt | sed s/^name${hpl_i}=//`"
+  rout="`grep ^rout${hpl_i}= hplvars.txt | sed s/^rout${hpl_i}=//`"
+  libs="`grep ^libs${hpl_i}= hplvars.txt | sed s/^libs${hpl_i}=//`"
+  AC_MSG_CHECKING([for [$]rout in [$]name])
+
+  LIBS="[$]libs"
+  AC_TRY_LINK_FUNC([$]rout, [hpl_blas_ok=yes;BLAS_LIBS="[$]libs"])
+  LIBS="$current_LIBS"
+
+  AC_MSG_RESULT($hpl_blas_ok)
+fi
+done
+rm hplvars.txt
+
+if test  x$hpl_blas_ok = xno; then
+dnl
+AC_MSG_CHECKING([for dgemm_ in OpenBLAS])
+AC_CHECK_LIB(openblas, dgemm_, [hpl_blas_ok=yes;BLAS_LIBS="-lopenblas"])
+AC_MSG_RESULT($hpl_blas_ok)
+dnl
+fi
+
+AC_SUBST(BLAS_LIBS)
+
+# If present, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$hpl_blas_ok" = xyes; then
+        ifelse([$1],,AC_DEFINE(HAVE_BLAS,1,[Define if you have a BLAS library.]),[$1])
+        :
+else
+        hpl_blas_ok=no
+        $2
+fi
+
+])dnl HPL_BLAS
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/aclocal.m4 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/aclocal.m4
new file mode 100644
index 000000000..56c6bd753
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/aclocal.m4
@@ -0,0 +1,1308 @@
+# generated automatically by aclocal 1.16.1 -*- Autoconf -*-
+
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])])
+m4_ifndef([AC_AUTOCONF_VERSION],
+  [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
+m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],,
+[m4_warning([this file was generated for autoconf 2.69.
+You have another version of autoconf.  It may work, but is not guaranteed to.
+If you have problems, you may need to regenerate the build system entirely.
+To do so, use the procedure documented by the package, typically 'autoreconf'.])])
+
+# ===========================================================================
+#      https://www.gnu.org/software/autoconf-archive/ax_prog_cc_mpi.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_PROG_CC_MPI([MPI-WANTED-TEST[, ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]])
+#
+# DESCRIPTION
+#
+#   This macro tries to find out how to compile C programs that use MPI
+#   (Message Passing Interface), a standard API for parallel process
+#   communication (see http://www-unix.mcs.anl.gov/mpi/). The macro has to
+#   be used instead of the standard macro AC_PROG_CC and will replace the
+#   standard variable CC with the found compiler.
+#
+#   MPI-WANTED-TEST is used to test whether MPI is actually wanted by the
+#   user. If MPI-WANTED_TEST is omitted or if it succeeds, the macro will
+#   try to find out how to use MPI, if it fails, the macro will call
+#   AC_PROG_CC to find a standard C compiler instead.
+#
+#   When MPI is found, ACTION-IF-FOUND will be executed, if MPI is not found
+#   (or MPI-WANTED-TEST fails) ACTION-IF-NOT-FOUND is executed. If
+#   ACTION-IF-FOUND is not set, the macro will define HAVE_MPI.
+#
+#   The following example demonstrates usage of the macro:
+#
+#     # If --with-mpi=auto is used, try to find MPI, but use standard C compiler if it is not found.
+#     # If --with-mpi=yes is used, try to find MPI and fail if it isn't found.
+#     # If --with-mpi=no is used, use a standard C compiler instead.
+#     AC_ARG_WITH(mpi, [AS_HELP_STRING([--with-mpi],
+#         [compile with MPI (parallelization) support. If none is found,
+#         MPI is not used. Default: auto])
+#     ],,[with_mpi=auto])
+#     #
+#     AX_PROG_CC_MPI([test x"$with_mpi" != xno],[use_mpi=yes],[
+#       use_mpi=no
+#       if test x"$with_mpi" = xyes; then
+#         AC_MSG_FAILURE([MPI compiler requested, but couldn't use MPI.])
+#       else
+#         AC_MSG_WARN([No MPI compiler found, won't use MPI.])
+#       fi
+#     ])
+#
+# LICENSE
+#
+#   Copyright (c) 2010,2011 Olaf Lenz <olenz@icp.uni-stuttgart.de>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 2
+
+AC_DEFUN([AX_PROG_CC_MPI], [
+AC_PREREQ(2.50)
+
+# Check for compiler
+# Needs to be split off into an extra macro to ensure right expansion
+# order.
+AC_REQUIRE([_AX_PROG_CC_MPI],[_AX_PROG_CC_MPI([$1])])
+
+AS_IF([test x"$_ax_prog_cc_mpi_mpi_wanted" = xno],
+  [ _ax_prog_cc_mpi_mpi_found=no ],
+  [
+    AC_LANG_PUSH([C])
+    # test whether MPI_Init is available
+    # We do not use AC_SEARCH_LIBS here, as it caches its outcome and
+    # thus disallows corresponding calls in the other AX_PROG_*_MPI
+    # macros.
+    for lib in NONE mpi mpich; do
+      save_LIBS=$LIBS
+      if test x"$lib" = xNONE; then
+        AC_MSG_CHECKING([for function MPI_Init])
+      else
+        AC_MSG_CHECKING([for function MPI_Init in -l$lib])
+        LIBS="-l$lib $LIBS"
+      fi
+      AC_LINK_IFELSE([AC_LANG_CALL([],[MPI_Init])],
+        [ _ax_prog_cc_mpi_mpi_found=yes ],
+        [ _ax_prog_cc_mpi_mpi_found=no ])
+      AC_MSG_RESULT($_ax_prog_cc_mpi_mpi_found)
+      if test "x$_ax_prog_cc_mpi_mpi_found" = "xyes"; then
+        break;
+      fi
+      LIBS=$save_LIBS
+    done
+
+    # Check for header
+    AS_IF([test x"$_ax_prog_cc_mpi_mpi_found" = xyes], [
+      AC_MSG_CHECKING([for mpi.h])
+      AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include <mpi.h>])],
+        [ AC_MSG_RESULT(yes)],
+        [ AC_MSG_RESULT(no)
+         _ax_prog_cc_mpi_mpi_found=no
+      ])
+    ])
+    AC_LANG_POP([C])
+])
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+AS_IF([test x"$_ax_prog_cc_mpi_mpi_found" = xyes], [
+        ifelse([$2],,[AC_DEFINE(HAVE_MPI,1,[Define if you have the MPI library.])],[$2])
+        :
+],[
+        $3
+        :
+])
+
+])dnl AX_PROG_CC_MPI
+
+dnl _AX_PROG_CC_MPI is an internal macro required by AX_PROG_CC_MPI.
+dnl To ensure the right expansion order, the main function AX_PROG_CC_MPI
+dnl has to be split into two parts.
+dnl
+dnl Known MPI C compilers:
+dnl  mpicc
+dnl  mpixlc_r
+dnl  mpixlc
+dnl  hcc
+dnl  mpxlc_r
+dnl  mpxlc
+dnl  sxmpicc  NEC SX
+dnl  mpifcc   Fujitsu
+dnl  mpgcc
+dnl  mpcc
+dnl  cmpicc
+dnl  cc
+dnl
+AC_DEFUN([_AX_PROG_CC_MPI], [
+  AC_ARG_VAR(MPICC,[MPI C compiler command])
+  ifelse([$1],,[_ax_prog_cc_mpi_mpi_wanted=yes],[
+    AC_MSG_CHECKING([whether to compile using MPI])
+    if $1; then
+      _ax_prog_cc_mpi_mpi_wanted=yes
+    else
+      _ax_prog_cc_mpi_mpi_wanted=no
+    fi
+    AC_MSG_RESULT($_ax_prog_cc_mpi_mpi_wanted)
+  ])
+  if test x"$_ax_prog_cc_mpi_mpi_wanted" = xyes; then
+    if test -z "$CC" && test -n "$MPICC"; then
+      CC="$MPICC"
+    else
+      AC_CHECK_TOOLS([CC], [mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc])
+    fi
+  fi
+  AC_PROG_CC
+])dnl _AX_PROG_CC_MPI
+
+# Copyright (C) 2002-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_AUTOMAKE_VERSION(VERSION)
+# ----------------------------
+# Automake X.Y traces this macro to ensure aclocal.m4 has been
+# generated from the m4 files accompanying Automake X.Y.
+# (This private macro should not be called outside this file.)
+AC_DEFUN([AM_AUTOMAKE_VERSION],
+[am__api_version='1.16'
+dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
+dnl require some minimum version.  Point them to the right macro.
+m4_if([$1], [1.16.1], [],
+      [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
+])
+
+# _AM_AUTOCONF_VERSION(VERSION)
+# -----------------------------
+# aclocal traces this macro to find the Autoconf version.
+# This is a private macro too.  Using m4_define simplifies
+# the logic in aclocal, which can simply ignore this definition.
+m4_define([_AM_AUTOCONF_VERSION], [])
+
+# AM_SET_CURRENT_AUTOMAKE_VERSION
+# -------------------------------
+# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
+# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
+AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
+[AM_AUTOMAKE_VERSION([1.16.1])dnl
+m4_ifndef([AC_AUTOCONF_VERSION],
+  [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
+_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
+
+# AM_AUX_DIR_EXPAND                                         -*- Autoconf -*-
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
+# $ac_aux_dir to '$srcdir/foo'.  In other projects, it is set to
+# '$srcdir', '$srcdir/..', or '$srcdir/../..'.
+#
+# Of course, Automake must honor this variable whenever it calls a
+# tool from the auxiliary directory.  The problem is that $srcdir (and
+# therefore $ac_aux_dir as well) can be either absolute or relative,
+# depending on how configure is run.  This is pretty annoying, since
+# it makes $ac_aux_dir quite unusable in subdirectories: in the top
+# source directory, any form will work fine, but in subdirectories a
+# relative path needs to be adjusted first.
+#
+# $ac_aux_dir/missing
+#    fails when called from a subdirectory if $ac_aux_dir is relative
+# $top_srcdir/$ac_aux_dir/missing
+#    fails if $ac_aux_dir is absolute,
+#    fails when called from a subdirectory in a VPATH build with
+#          a relative $ac_aux_dir
+#
+# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
+# are both prefixed by $srcdir.  In an in-source build this is usually
+# harmless because $srcdir is '.', but things will broke when you
+# start a VPATH build or use an absolute $srcdir.
+#
+# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
+# iff we strip the leading $srcdir from $ac_aux_dir.  That would be:
+#   am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
+# and then we would define $MISSING as
+#   MISSING="\${SHELL} $am_aux_dir/missing"
+# This will work as long as MISSING is not called from configure, because
+# unfortunately $(top_srcdir) has no meaning in configure.
+# However there are other variables, like CC, which are often used in
+# configure, and could therefore not use this "fixed" $ac_aux_dir.
+#
+# Another solution, used here, is to always expand $ac_aux_dir to an
+# absolute PATH.  The drawback is that using absolute paths prevent a
+# configured tree to be moved without reconfiguration.
+
+AC_DEFUN([AM_AUX_DIR_EXPAND],
+[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
+# Expand $ac_aux_dir to an absolute path.
+am_aux_dir=`cd "$ac_aux_dir" && pwd`
+])
+
+# AM_CONDITIONAL                                            -*- Autoconf -*-
+
+# Copyright (C) 1997-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_CONDITIONAL(NAME, SHELL-CONDITION)
+# -------------------------------------
+# Define a conditional.
+AC_DEFUN([AM_CONDITIONAL],
+[AC_PREREQ([2.52])dnl
+ m4_if([$1], [TRUE],  [AC_FATAL([$0: invalid condition: $1])],
+       [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
+AC_SUBST([$1_TRUE])dnl
+AC_SUBST([$1_FALSE])dnl
+_AM_SUBST_NOTMAKE([$1_TRUE])dnl
+_AM_SUBST_NOTMAKE([$1_FALSE])dnl
+m4_define([_AM_COND_VALUE_$1], [$2])dnl
+if $2; then
+  $1_TRUE=
+  $1_FALSE='#'
+else
+  $1_TRUE='#'
+  $1_FALSE=
+fi
+AC_CONFIG_COMMANDS_PRE(
+[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
+  AC_MSG_ERROR([[conditional "$1" was never defined.
+Usually this means the macro was only invoked conditionally.]])
+fi])])
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+
+# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be
+# written in clear, in which case automake, when reading aclocal.m4,
+# will think it sees a *use*, and therefore will trigger all it's
+# C support machinery.  Also note that it means that autoscan, seeing
+# CC etc. in the Makefile, will ask for an AC_PROG_CC use...
+
+
+# _AM_DEPENDENCIES(NAME)
+# ----------------------
+# See how the compiler implements dependency checking.
+# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC".
+# We try a few techniques and use that to set a single cache variable.
+#
+# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
+# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
+# dependency, and given that the user is not expected to run this macro,
+# just rely on AC_PROG_CC.
+AC_DEFUN([_AM_DEPENDENCIES],
+[AC_REQUIRE([AM_SET_DEPDIR])dnl
+AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
+AC_REQUIRE([AM_MAKE_INCLUDE])dnl
+AC_REQUIRE([AM_DEP_TRACK])dnl
+
+m4_if([$1], [CC],   [depcc="$CC"   am_compiler_list=],
+      [$1], [CXX],  [depcc="$CXX"  am_compiler_list=],
+      [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
+      [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'],
+      [$1], [UPC],  [depcc="$UPC"  am_compiler_list=],
+      [$1], [GCJ],  [depcc="$GCJ"  am_compiler_list='gcc3 gcc'],
+                    [depcc="$$1"   am_compiler_list=])
+
+AC_CACHE_CHECK([dependency style of $depcc],
+               [am_cv_$1_dependencies_compiler_type],
+[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
+  # We make a subdir and do the tests there.  Otherwise we can end up
+  # making bogus files that we don't know about and never remove.  For
+  # instance it was reported that on HP-UX the gcc test will end up
+  # making a dummy file named 'D' -- because '-MD' means "put the output
+  # in D".
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  # Copy depcomp to subdir because otherwise we won't find it if we're
+  # using a relative directory.
+  cp "$am_depcomp" conftest.dir
+  cd conftest.dir
+  # We will build objects and dependencies in a subdirectory because
+  # it helps to detect inapplicable dependency modes.  For instance
+  # both Tru64's cc and ICC support -MD to output dependencies as a
+  # side effect of compilation, but ICC will put the dependencies in
+  # the current directory while Tru64 will put them in the object
+  # directory.
+  mkdir sub
+
+  am_cv_$1_dependencies_compiler_type=none
+  if test "$am_compiler_list" = ""; then
+     am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
+  fi
+  am__universal=false
+  m4_case([$1], [CC],
+    [case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac],
+    [CXX],
+    [case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac])
+
+  for depmode in $am_compiler_list; do
+    # Setup a source with many dependencies, because some compilers
+    # like to wrap large dependency lists on column 80 (with \), and
+    # we should not choose a depcomp mode which is confused by this.
+    #
+    # We need to recreate these files for each test, as the compiler may
+    # overwrite some of them when testing with obscure command lines.
+    # This happens at least with the AIX C compiler.
+    : > sub/conftest.c
+    for i in 1 2 3 4 5 6; do
+      echo '#include "conftst'$i'.h"' >> sub/conftest.c
+      # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with
+      # Solaris 10 /bin/sh.
+      echo '/* dummy */' > sub/conftst$i.h
+    done
+    echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
+
+    # We check with '-c' and '-o' for the sake of the "dashmstdout"
+    # mode.  It turns out that the SunPro C++ compiler does not properly
+    # handle '-M -o', and we need to detect this.  Also, some Intel
+    # versions had trouble with output in subdirs.
+    am__obj=sub/conftest.${OBJEXT-o}
+    am__minus_obj="-o $am__obj"
+    case $depmode in
+    gcc)
+      # This depmode causes a compiler race in universal mode.
+      test "$am__universal" = false || continue
+      ;;
+    nosideeffect)
+      # After this tag, mechanisms are not by side-effect, so they'll
+      # only be used when explicitly requested.
+      if test "x$enable_dependency_tracking" = xyes; then
+	continue
+      else
+	break
+      fi
+      ;;
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
+      # This compiler won't grok '-c -o', but also, the minuso test has
+      # not run yet.  These depmodes are late enough in the game, and
+      # so weak that their functioning should not be impacted.
+      am__obj=conftest.${OBJEXT-o}
+      am__minus_obj=
+      ;;
+    none) break ;;
+    esac
+    if depmode=$depmode \
+       source=sub/conftest.c object=$am__obj \
+       depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
+       $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
+         >/dev/null 2>conftest.err &&
+       grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
+       ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
+      # icc doesn't choke on unknown options, it will just issue warnings
+      # or remarks (even with -Werror).  So we grep stderr for any message
+      # that says an option was ignored or not supported.
+      # When given -MP, icc 7.0 and 7.1 complain thusly:
+      #   icc: Command line warning: ignoring option '-M'; no argument required
+      # The diagnosis changed in icc 8.0:
+      #   icc: Command line remark: option '-MP' not supported
+      if (grep 'ignoring option' conftest.err ||
+          grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
+        am_cv_$1_dependencies_compiler_type=$depmode
+        break
+      fi
+    fi
+  done
+
+  cd ..
+  rm -rf conftest.dir
+else
+  am_cv_$1_dependencies_compiler_type=none
+fi
+])
+AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
+AM_CONDITIONAL([am__fastdep$1], [
+  test "x$enable_dependency_tracking" != xno \
+  && test "$am_cv_$1_dependencies_compiler_type" = gcc3])
+])
+
+
+# AM_SET_DEPDIR
+# -------------
+# Choose a directory name for dependency files.
+# This macro is AC_REQUIREd in _AM_DEPENDENCIES.
+AC_DEFUN([AM_SET_DEPDIR],
+[AC_REQUIRE([AM_SET_LEADING_DOT])dnl
+AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
+])
+
+
+# AM_DEP_TRACK
+# ------------
+AC_DEFUN([AM_DEP_TRACK],
+[AC_ARG_ENABLE([dependency-tracking], [dnl
+AS_HELP_STRING(
+  [--enable-dependency-tracking],
+  [do not reject slow dependency extractors])
+AS_HELP_STRING(
+  [--disable-dependency-tracking],
+  [speeds up one-time build])])
+if test "x$enable_dependency_tracking" != xno; then
+  am_depcomp="$ac_aux_dir/depcomp"
+  AMDEPBACKSLASH='\'
+  am__nodep='_no'
+fi
+AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
+AC_SUBST([AMDEPBACKSLASH])dnl
+_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
+AC_SUBST([am__nodep])dnl
+_AM_SUBST_NOTMAKE([am__nodep])dnl
+])
+
+# Generate code to set up dependency tracking.              -*- Autoconf -*-
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_OUTPUT_DEPENDENCY_COMMANDS
+# ------------------------------
+AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
+[{
+  # Older Autoconf quotes --file arguments for eval, but not when files
+  # are listed without --file.  Let's play safe and only enable the eval
+  # if we detect the quoting.
+  # TODO: see whether this extra hack can be removed once we start
+  # requiring Autoconf 2.70 or later.
+  AS_CASE([$CONFIG_FILES],
+          [*\'*], [eval set x "$CONFIG_FILES"],
+          [*], [set x $CONFIG_FILES])
+  shift
+  # Used to flag and report bootstrapping failures.
+  am_rc=0
+  for am_mf
+  do
+    # Strip MF so we end up with the name of the file.
+    am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'`
+    # Check whether this is an Automake generated Makefile which includes
+    # dependency-tracking related rules and includes.
+    # Grep'ing the whole file directly is not great: AIX grep has a line
+    # limit of 2048, but all sed's we know have understand at least 4000.
+    sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \
+      || continue
+    am_dirpart=`AS_DIRNAME(["$am_mf"])`
+    am_filepart=`AS_BASENAME(["$am_mf"])`
+    AM_RUN_LOG([cd "$am_dirpart" \
+      && sed -e '/# am--include-marker/d' "$am_filepart" \
+        | $MAKE -f - am--depfiles]) || am_rc=$?
+  done
+  if test $am_rc -ne 0; then
+    AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments
+    for automatic dependency tracking.  Try re-running configure with the
+    '--disable-dependency-tracking' option to at least be able to build
+    the package (albeit without support for automatic dependency tracking).])
+  fi
+  AS_UNSET([am_dirpart])
+  AS_UNSET([am_filepart])
+  AS_UNSET([am_mf])
+  AS_UNSET([am_rc])
+  rm -f conftest-deps.mk
+}
+])# _AM_OUTPUT_DEPENDENCY_COMMANDS
+
+
+# AM_OUTPUT_DEPENDENCY_COMMANDS
+# -----------------------------
+# This macro should only be invoked once -- use via AC_REQUIRE.
+#
+# This code is only required when automatic dependency tracking is enabled.
+# This creates each '.Po' and '.Plo' makefile fragment that we'll need in
+# order to bootstrap the dependency handling code.
+AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
+[AC_CONFIG_COMMANDS([depfiles],
+     [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
+     [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])])
+
+# Do all the work for Automake.                             -*- Autoconf -*-
+
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This macro actually does too much.  Some checks are only needed if
+# your package does certain things.  But this isn't really a big deal.
+
+dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O.
+m4_define([AC_PROG_CC],
+m4_defn([AC_PROG_CC])
+[_AM_PROG_CC_C_O
+])
+
+# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
+# AM_INIT_AUTOMAKE([OPTIONS])
+# -----------------------------------------------
+# The call with PACKAGE and VERSION arguments is the old style
+# call (pre autoconf-2.50), which is being phased out.  PACKAGE
+# and VERSION should now be passed to AC_INIT and removed from
+# the call to AM_INIT_AUTOMAKE.
+# We support both call styles for the transition.  After
+# the next Automake release, Autoconf can make the AC_INIT
+# arguments mandatory, and then we can depend on a new Autoconf
+# release and drop the old call support.
+AC_DEFUN([AM_INIT_AUTOMAKE],
+[AC_PREREQ([2.65])dnl
+dnl Autoconf wants to disallow AM_ names.  We explicitly allow
+dnl the ones we care about.
+m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
+AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
+AC_REQUIRE([AC_PROG_INSTALL])dnl
+if test "`cd $srcdir && pwd`" != "`pwd`"; then
+  # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
+  # is not polluted with repeated "-I."
+  AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
+  # test to see if srcdir already configured
+  if test -f $srcdir/config.status; then
+    AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
+  fi
+fi
+
+# test whether we have cygpath
+if test -z "$CYGPATH_W"; then
+  if (cygpath --version) >/dev/null 2>/dev/null; then
+    CYGPATH_W='cygpath -w'
+  else
+    CYGPATH_W=echo
+  fi
+fi
+AC_SUBST([CYGPATH_W])
+
+# Define the identity of the package.
+dnl Distinguish between old-style and new-style calls.
+m4_ifval([$2],
+[AC_DIAGNOSE([obsolete],
+             [$0: two- and three-arguments forms are deprecated.])
+m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
+ AC_SUBST([PACKAGE], [$1])dnl
+ AC_SUBST([VERSION], [$2])],
+[_AM_SET_OPTIONS([$1])dnl
+dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
+m4_if(
+  m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]),
+  [ok:ok],,
+  [m4_fatal([AC_INIT should be called with package and version arguments])])dnl
+ AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
+ AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
+
+_AM_IF_OPTION([no-define],,
+[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package])
+ AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl
+
+# Some tools Automake needs.
+AC_REQUIRE([AM_SANITY_CHECK])dnl
+AC_REQUIRE([AC_ARG_PROGRAM])dnl
+AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}])
+AM_MISSING_PROG([AUTOCONF], [autoconf])
+AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}])
+AM_MISSING_PROG([AUTOHEADER], [autoheader])
+AM_MISSING_PROG([MAKEINFO], [makeinfo])
+AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
+AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl
+AC_REQUIRE([AC_PROG_MKDIR_P])dnl
+# For better backward compatibility.  To be removed once Automake 1.9.x
+# dies out for good.  For more background, see:
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
+AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
+# We need awk for the "check" target (and possibly the TAP driver).  The
+# system "awk" is bad on some platforms.
+AC_REQUIRE([AC_PROG_AWK])dnl
+AC_REQUIRE([AC_PROG_MAKE_SET])dnl
+AC_REQUIRE([AM_SET_LEADING_DOT])dnl
+_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
+	      [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
+			     [_AM_PROG_TAR([v7])])])
+_AM_IF_OPTION([no-dependencies],,
+[AC_PROVIDE_IFELSE([AC_PROG_CC],
+		  [_AM_DEPENDENCIES([CC])],
+		  [m4_define([AC_PROG_CC],
+			     m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_CXX],
+		  [_AM_DEPENDENCIES([CXX])],
+		  [m4_define([AC_PROG_CXX],
+			     m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_OBJC],
+		  [_AM_DEPENDENCIES([OBJC])],
+		  [m4_define([AC_PROG_OBJC],
+			     m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_OBJCXX],
+		  [_AM_DEPENDENCIES([OBJCXX])],
+		  [m4_define([AC_PROG_OBJCXX],
+			     m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl
+])
+AC_REQUIRE([AM_SILENT_RULES])dnl
+dnl The testsuite driver may need to know about EXEEXT, so add the
+dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen.  This
+dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below.
+AC_CONFIG_COMMANDS_PRE(dnl
+[m4_provide_if([_AM_COMPILER_EXEEXT],
+  [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl
+
+# POSIX will say in a future version that running "rm -f" with no argument
+# is OK; and we want to be able to make that assumption in our Makefile
+# recipes.  So use an aggressive probe to check that the usage we want is
+# actually supported "in the wild" to an acceptable degree.
+# See automake bug#10828.
+# To make any issue more visible, cause the running configure to be aborted
+# by default if the 'rm' program in use doesn't match our expectations; the
+# user can still override this though.
+if rm -f && rm -fr && rm -rf; then : OK; else
+  cat >&2 <<'END'
+Oops!
+
+Your 'rm' program seems unable to run without file operands specified
+on the command line, even when the '-f' option is present.  This is contrary
+to the behaviour of most rm programs out there, and not conforming with
+the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
+
+Please tell bug-automake@gnu.org about your system, including the value
+of your $PATH and any error possibly output before this message.  This
+can help us improve future automake versions.
+
+END
+  if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
+    echo 'Configuration will proceed anyway, since you have set the' >&2
+    echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
+    echo >&2
+  else
+    cat >&2 <<'END'
+Aborting the configuration process, to ensure you take notice of the issue.
+
+You can download and install GNU coreutils to get an 'rm' implementation
+that behaves properly: <https://www.gnu.org/software/coreutils/>.
+
+If you want to complete the configuration process using your problematic
+'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
+to "yes", and re-run configure.
+
+END
+    AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
+  fi
+fi
+dnl The trailing newline in this macro's definition is deliberate, for
+dnl backward compatibility and to allow trailing 'dnl'-style comments
+dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841.
+])
+
+dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion.  Do not
+dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
+dnl mangled by Autoconf and run in a shell conditional statement.
+m4_define([_AC_COMPILER_EXEEXT],
+m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])])
+
+# When config.status generates a header, we must update the stamp-h file.
+# This file resides in the same directory as the config header
+# that is generated.  The stamp files are numbered to have different names.
+
+# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
+# loop where config.status creates the headers, so we can generate
+# our stamp files there.
+AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
+[# Compute $1's index in $config_headers.
+_am_arg=$1
+_am_stamp_count=1
+for _am_header in $config_headers :; do
+  case $_am_header in
+    $_am_arg | $_am_arg:* )
+      break ;;
+    * )
+      _am_stamp_count=`expr $_am_stamp_count + 1` ;;
+  esac
+done
+echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_INSTALL_SH
+# ------------------
+# Define $install_sh.
+AC_DEFUN([AM_PROG_INSTALL_SH],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+if test x"${install_sh+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
+  *)
+    install_sh="\${SHELL} $am_aux_dir/install-sh"
+  esac
+fi
+AC_SUBST([install_sh])])
+
+# Copyright (C) 2003-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# Check whether the underlying file-system supports filenames
+# with a leading dot.  For instance MS-DOS doesn't.
+AC_DEFUN([AM_SET_LEADING_DOT],
+[rm -rf .tst 2>/dev/null
+mkdir .tst 2>/dev/null
+if test -d .tst; then
+  am__leading_dot=.
+else
+  am__leading_dot=_
+fi
+rmdir .tst 2>/dev/null
+AC_SUBST([am__leading_dot])])
+
+# Check to see how 'make' treats includes.	            -*- Autoconf -*-
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_MAKE_INCLUDE()
+# -----------------
+# Check whether make has an 'include' directive that can support all
+# the idioms we need for our automatic dependency tracking code.
+AC_DEFUN([AM_MAKE_INCLUDE],
+[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive])
+cat > confinc.mk << 'END'
+am__doit:
+	@echo this is the am__doit target >confinc.out
+.PHONY: am__doit
+END
+am__include="#"
+am__quote=
+# BSD make does it like this.
+echo '.include "confinc.mk" # ignored' > confmf.BSD
+# Other make implementations (GNU, Solaris 10, AIX) do it like this.
+echo 'include confinc.mk # ignored' > confmf.GNU
+_am_result=no
+for s in GNU BSD; do
+  AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out])
+  AS_CASE([$?:`cat confinc.out 2>/dev/null`],
+      ['0:this is the am__doit target'],
+      [AS_CASE([$s],
+          [BSD], [am__include='.include' am__quote='"'],
+          [am__include='include' am__quote=''])])
+  if test "$am__include" != "#"; then
+    _am_result="yes ($s style)"
+    break
+  fi
+done
+rm -f confinc.* confmf.*
+AC_MSG_RESULT([${_am_result}])
+AC_SUBST([am__include])])
+AC_SUBST([am__quote])])
+
+# Fake the existence of programs that GNU maintainers use.  -*- Autoconf -*-
+
+# Copyright (C) 1997-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_MISSING_PROG(NAME, PROGRAM)
+# ------------------------------
+AC_DEFUN([AM_MISSING_PROG],
+[AC_REQUIRE([AM_MISSING_HAS_RUN])
+$1=${$1-"${am_missing_run}$2"}
+AC_SUBST($1)])
+
+# AM_MISSING_HAS_RUN
+# ------------------
+# Define MISSING if not defined so far and test if it is modern enough.
+# If it is, set am_missing_run to use it, otherwise, to nothing.
+AC_DEFUN([AM_MISSING_HAS_RUN],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+AC_REQUIRE_AUX_FILE([missing])dnl
+if test x"${MISSING+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
+  *)
+    MISSING="\${SHELL} $am_aux_dir/missing" ;;
+  esac
+fi
+# Use eval to expand $SHELL
+if eval "$MISSING --is-lightweight"; then
+  am_missing_run="$MISSING "
+else
+  am_missing_run=
+  AC_MSG_WARN(['missing' script is too old or missing])
+fi
+])
+
+# Helper functions for option handling.                     -*- Autoconf -*-
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_MANGLE_OPTION(NAME)
+# -----------------------
+AC_DEFUN([_AM_MANGLE_OPTION],
+[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
+
+# _AM_SET_OPTION(NAME)
+# --------------------
+# Set option NAME.  Presently that only means defining a flag for this option.
+AC_DEFUN([_AM_SET_OPTION],
+[m4_define(_AM_MANGLE_OPTION([$1]), [1])])
+
+# _AM_SET_OPTIONS(OPTIONS)
+# ------------------------
+# OPTIONS is a space-separated list of Automake options.
+AC_DEFUN([_AM_SET_OPTIONS],
+[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
+
+# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
+# -------------------------------------------
+# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
+AC_DEFUN([_AM_IF_OPTION],
+[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_PROG_CC_C_O
+# ---------------
+# Like AC_PROG_CC_C_O, but changed for automake.  We rewrite AC_PROG_CC
+# to automatically call this.
+AC_DEFUN([_AM_PROG_CC_C_O],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+AC_REQUIRE_AUX_FILE([compile])dnl
+AC_LANG_PUSH([C])dnl
+AC_CACHE_CHECK(
+  [whether $CC understands -c and -o together],
+  [am_cv_prog_cc_c_o],
+  [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])])
+  # Make sure it works both with $CC and with simple cc.
+  # Following AC_PROG_CC_C_O, we do the test twice because some
+  # compilers refuse to overwrite an existing .o file with -o,
+  # though they will create one.
+  am_cv_prog_cc_c_o=yes
+  for am_i in 1 2; do
+    if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \
+         && test -f conftest2.$ac_objext; then
+      : OK
+    else
+      am_cv_prog_cc_c_o=no
+      break
+    fi
+  done
+  rm -f core conftest*
+  unset am_i])
+if test "$am_cv_prog_cc_c_o" != yes; then
+   # Losing compiler, so override with the script.
+   # FIXME: It is wrong to rewrite CC.
+   # But if we don't then we get into trouble of one sort or another.
+   # A longer-term fix would be to have automake use am__CC in this case,
+   # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
+   CC="$am_aux_dir/compile $CC"
+fi
+AC_LANG_POP([C])])
+
+# For backward compatibility.
+AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_RUN_LOG(COMMAND)
+# -------------------
+# Run COMMAND, save the exit status in ac_status, and log it.
+# (This has been adapted from Autoconf's _AC_RUN_LOG macro.)
+AC_DEFUN([AM_RUN_LOG],
+[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD
+   ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
+   (exit $ac_status); }])
+
+# Check to make sure that the build environment is sane.    -*- Autoconf -*-
+
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_SANITY_CHECK
+# ---------------
+AC_DEFUN([AM_SANITY_CHECK],
+[AC_MSG_CHECKING([whether build environment is sane])
+# Reject unsafe characters in $srcdir or the absolute working directory
+# name.  Accept space and tab only in the latter.
+am_lf='
+'
+case `pwd` in
+  *[[\\\"\#\$\&\'\`$am_lf]]*)
+    AC_MSG_ERROR([unsafe absolute working directory name]);;
+esac
+case $srcdir in
+  *[[\\\"\#\$\&\'\`$am_lf\ \	]]*)
+    AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);;
+esac
+
+# Do 'set' in a subshell so we don't clobber the current shell's
+# arguments.  Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+   am_has_slept=no
+   for am_try in 1 2; do
+     echo "timestamp, slept: $am_has_slept" > conftest.file
+     set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
+     if test "$[*]" = "X"; then
+	# -L didn't work.
+	set X `ls -t "$srcdir/configure" conftest.file`
+     fi
+     if test "$[*]" != "X $srcdir/configure conftest.file" \
+	&& test "$[*]" != "X conftest.file $srcdir/configure"; then
+
+	# If neither matched, then we have a broken ls.  This can happen
+	# if, for instance, CONFIG_SHELL is bash and it inherits a
+	# broken ls alias from the environment.  This has actually
+	# happened.  Such a system could not be considered "sane".
+	AC_MSG_ERROR([ls -t appears to fail.  Make sure there is not a broken
+  alias in your environment])
+     fi
+     if test "$[2]" = conftest.file || test $am_try -eq 2; then
+       break
+     fi
+     # Just in case.
+     sleep 1
+     am_has_slept=yes
+   done
+   test "$[2]" = conftest.file
+   )
+then
+   # Ok.
+   :
+else
+   AC_MSG_ERROR([newly created file is older than distributed files!
+Check your system clock])
+fi
+AC_MSG_RESULT([yes])
+# If we didn't sleep, we still need to ensure time stamps of config.status and
+# generated files are strictly newer.
+am_sleep_pid=
+if grep 'slept: no' conftest.file >/dev/null 2>&1; then
+  ( sleep 1 ) &
+  am_sleep_pid=$!
+fi
+AC_CONFIG_COMMANDS_PRE(
+  [AC_MSG_CHECKING([that generated files are newer than configure])
+   if test -n "$am_sleep_pid"; then
+     # Hide warnings about reused PIDs.
+     wait $am_sleep_pid 2>/dev/null
+   fi
+   AC_MSG_RESULT([done])])
+rm -f conftest.file
+])
+
+# Copyright (C) 2009-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_SILENT_RULES([DEFAULT])
+# --------------------------
+# Enable less verbose build rules; with the default set to DEFAULT
+# ("yes" being less verbose, "no" or empty being verbose).
+AC_DEFUN([AM_SILENT_RULES],
+[AC_ARG_ENABLE([silent-rules], [dnl
+AS_HELP_STRING(
+  [--enable-silent-rules],
+  [less verbose build output (undo: "make V=1")])
+AS_HELP_STRING(
+  [--disable-silent-rules],
+  [verbose build output (undo: "make V=0")])dnl
+])
+case $enable_silent_rules in @%:@ (((
+  yes) AM_DEFAULT_VERBOSITY=0;;
+   no) AM_DEFAULT_VERBOSITY=1;;
+    *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);;
+esac
+dnl
+dnl A few 'make' implementations (e.g., NonStop OS and NextStep)
+dnl do not support nested variable expansions.
+dnl See automake bug#9928 and bug#10237.
+am_make=${MAKE-make}
+AC_CACHE_CHECK([whether $am_make supports nested variables],
+   [am_cv_make_support_nested_variables],
+   [if AS_ECHO([['TRUE=$(BAR$(V))
+BAR0=false
+BAR1=true
+V=1
+am__doit:
+	@$(TRUE)
+.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then
+  am_cv_make_support_nested_variables=yes
+else
+  am_cv_make_support_nested_variables=no
+fi])
+if test $am_cv_make_support_nested_variables = yes; then
+  dnl Using '$V' instead of '$(V)' breaks IRIX make.
+  AM_V='$(V)'
+  AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)'
+else
+  AM_V=$AM_DEFAULT_VERBOSITY
+  AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY
+fi
+AC_SUBST([AM_V])dnl
+AM_SUBST_NOTMAKE([AM_V])dnl
+AC_SUBST([AM_DEFAULT_V])dnl
+AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl
+AC_SUBST([AM_DEFAULT_VERBOSITY])dnl
+AM_BACKSLASH='\'
+AC_SUBST([AM_BACKSLASH])dnl
+_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
+])
+
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_INSTALL_STRIP
+# ---------------------
+# One issue with vendor 'install' (even GNU) is that you can't
+# specify the program used to strip binaries.  This is especially
+# annoying in cross-compiling environments, where the build's strip
+# is unlikely to handle the host's binaries.
+# Fortunately install-sh will honor a STRIPPROG variable, so we
+# always use install-sh in "make install-strip", and initialize
+# STRIPPROG with the value of the STRIP variable (set by the user).
+AC_DEFUN([AM_PROG_INSTALL_STRIP],
+[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
+# Installed binaries are usually stripped using 'strip' when the user
+# run "make install-strip".  However 'strip' might not be the right
+# tool to use in cross-compilation environments, therefore Automake
+# will honor the 'STRIP' environment variable to overrule this program.
+dnl Don't test for $cross_compiling = yes, because it might be 'maybe'.
+if test "$cross_compiling" != no; then
+  AC_CHECK_TOOL([STRIP], [strip], :)
+fi
+INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
+AC_SUBST([INSTALL_STRIP_PROGRAM])])
+
+# Copyright (C) 2006-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_SUBST_NOTMAKE(VARIABLE)
+# ---------------------------
+# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
+# This macro is traced by Automake.
+AC_DEFUN([_AM_SUBST_NOTMAKE])
+
+# AM_SUBST_NOTMAKE(VARIABLE)
+# --------------------------
+# Public sister of _AM_SUBST_NOTMAKE.
+AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
+
+# Check how to create a tarball.                            -*- Autoconf -*-
+
+# Copyright (C) 2004-2018 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_PROG_TAR(FORMAT)
+# --------------------
+# Check how to create a tarball in format FORMAT.
+# FORMAT should be one of 'v7', 'ustar', or 'pax'.
+#
+# Substitute a variable $(am__tar) that is a command
+# writing to stdout a FORMAT-tarball containing the directory
+# $tardir.
+#     tardir=directory && $(am__tar) > result.tar
+#
+# Substitute a variable $(am__untar) that extract such
+# a tarball read from stdin.
+#     $(am__untar) < result.tar
+#
+AC_DEFUN([_AM_PROG_TAR],
+[# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AC_SUBST([AMTAR], ['$${TAR-tar}'])
+
+# We'll loop over all known methods to create a tar archive until one works.
+_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
+
+m4_if([$1], [v7],
+  [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
+
+  [m4_case([$1],
+    [ustar],
+     [# The POSIX 1988 'ustar' format is defined with fixed-size fields.
+      # There is notably a 21 bits limit for the UID and the GID.  In fact,
+      # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343
+      # and bug#13588).
+      am_max_uid=2097151 # 2^21 - 1
+      am_max_gid=$am_max_uid
+      # The $UID and $GID variables are not portable, so we need to resort
+      # to the POSIX-mandated id(1) utility.  Errors in the 'id' calls
+      # below are definitely unexpected, so allow the users to see them
+      # (that is, avoid stderr redirection).
+      am_uid=`id -u || echo unknown`
+      am_gid=`id -g || echo unknown`
+      AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format])
+      if test $am_uid -le $am_max_uid; then
+         AC_MSG_RESULT([yes])
+      else
+         AC_MSG_RESULT([no])
+         _am_tools=none
+      fi
+      AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format])
+      if test $am_gid -le $am_max_gid; then
+         AC_MSG_RESULT([yes])
+      else
+        AC_MSG_RESULT([no])
+        _am_tools=none
+      fi],
+
+  [pax],
+    [],
+
+  [m4_fatal([Unknown tar format])])
+
+  AC_MSG_CHECKING([how to create a $1 tar archive])
+
+  # Go ahead even if we have the value already cached.  We do so because we
+  # need to set the values for the 'am__tar' and 'am__untar' variables.
+  _am_tools=${am_cv_prog_tar_$1-$_am_tools}
+
+  for _am_tool in $_am_tools; do
+    case $_am_tool in
+    gnutar)
+      for _am_tar in tar gnutar gtar; do
+        AM_RUN_LOG([$_am_tar --version]) && break
+      done
+      am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
+      am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
+      am__untar="$_am_tar -xf -"
+      ;;
+    plaintar)
+      # Must skip GNU tar: if it does not support --format= it doesn't create
+      # ustar tarball either.
+      (tar --version) >/dev/null 2>&1 && continue
+      am__tar='tar chf - "$$tardir"'
+      am__tar_='tar chf - "$tardir"'
+      am__untar='tar xf -'
+      ;;
+    pax)
+      am__tar='pax -L -x $1 -w "$$tardir"'
+      am__tar_='pax -L -x $1 -w "$tardir"'
+      am__untar='pax -r'
+      ;;
+    cpio)
+      am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
+      am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
+      am__untar='cpio -i -H $1 -d'
+      ;;
+    none)
+      am__tar=false
+      am__tar_=false
+      am__untar=false
+      ;;
+    esac
+
+    # If the value was cached, stop now.  We just wanted to have am__tar
+    # and am__untar set.
+    test -n "${am_cv_prog_tar_$1}" && break
+
+    # tar/untar a dummy directory, and stop if the command works.
+    rm -rf conftest.dir
+    mkdir conftest.dir
+    echo GrepMe > conftest.dir/file
+    AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
+    rm -rf conftest.dir
+    if test -s conftest.tar; then
+      AM_RUN_LOG([$am__untar <conftest.tar])
+      AM_RUN_LOG([cat conftest.dir/file])
+      grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
+    fi
+  done
+  rm -rf conftest.dir
+
+  AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
+  AC_MSG_RESULT([$am_cv_prog_tar_$1])])
+
+AC_SUBST([am__tar])
+AC_SUBST([am__untar])
+]) # _AM_PROG_TAR
+
+m4_include([acinclude.m4])
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/compile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/compile
new file mode 100755
index 000000000..99e50524b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/compile
@@ -0,0 +1,348 @@
+#! /bin/sh
+# Wrapper for compilers which do not understand '-c -o'.
+
+scriptversion=2018-03-07.03; # UTC
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+# Written by Tom Tromey <tromey@cygnus.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+
+nl='
+'
+
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent tools from complaining about whitespace usage.
+IFS=" ""	$nl"
+
+file_conv=
+
+# func_file_conv build_file lazy
+# Convert a $build file to $host form and store it in $file
+# Currently only supports Windows hosts. If the determined conversion
+# type is listed in (the comma separated) LAZY, no conversion will
+# take place.
+func_file_conv ()
+{
+  file=$1
+  case $file in
+    / | /[!/]*) # absolute file, and not a UNC file
+      if test -z "$file_conv"; then
+	# lazily determine how to convert abs files
+	case `uname -s` in
+	  MINGW*)
+	    file_conv=mingw
+	    ;;
+	  CYGWIN*)
+	    file_conv=cygwin
+	    ;;
+	  *)
+	    file_conv=wine
+	    ;;
+	esac
+      fi
+      case $file_conv/,$2, in
+	*,$file_conv,*)
+	  ;;
+	mingw/*)
+	  file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
+	  ;;
+	cygwin/*)
+	  file=`cygpath -m "$file" || echo "$file"`
+	  ;;
+	wine/*)
+	  file=`winepath -w "$file" || echo "$file"`
+	  ;;
+      esac
+      ;;
+  esac
+}
+
+# func_cl_dashL linkdir
+# Make cl look for libraries in LINKDIR
+func_cl_dashL ()
+{
+  func_file_conv "$1"
+  if test -z "$lib_path"; then
+    lib_path=$file
+  else
+    lib_path="$lib_path;$file"
+  fi
+  linker_opts="$linker_opts -LIBPATH:$file"
+}
+
+# func_cl_dashl library
+# Do a library search-path lookup for cl
+func_cl_dashl ()
+{
+  lib=$1
+  found=no
+  save_IFS=$IFS
+  IFS=';'
+  for dir in $lib_path $LIB
+  do
+    IFS=$save_IFS
+    if $shared && test -f "$dir/$lib.dll.lib"; then
+      found=yes
+      lib=$dir/$lib.dll.lib
+      break
+    fi
+    if test -f "$dir/$lib.lib"; then
+      found=yes
+      lib=$dir/$lib.lib
+      break
+    fi
+    if test -f "$dir/lib$lib.a"; then
+      found=yes
+      lib=$dir/lib$lib.a
+      break
+    fi
+  done
+  IFS=$save_IFS
+
+  if test "$found" != yes; then
+    lib=$lib.lib
+  fi
+}
+
+# func_cl_wrapper cl arg...
+# Adjust compile command to suit cl
+func_cl_wrapper ()
+{
+  # Assume a capable shell
+  lib_path=
+  shared=:
+  linker_opts=
+  for arg
+  do
+    if test -n "$eat"; then
+      eat=
+    else
+      case $1 in
+	-o)
+	  # configure might choose to run compile as 'compile cc -o foo foo.c'.
+	  eat=1
+	  case $2 in
+	    *.o | *.[oO][bB][jJ])
+	      func_file_conv "$2"
+	      set x "$@" -Fo"$file"
+	      shift
+	      ;;
+	    *)
+	      func_file_conv "$2"
+	      set x "$@" -Fe"$file"
+	      shift
+	      ;;
+	  esac
+	  ;;
+	-I)
+	  eat=1
+	  func_file_conv "$2" mingw
+	  set x "$@" -I"$file"
+	  shift
+	  ;;
+	-I*)
+	  func_file_conv "${1#-I}" mingw
+	  set x "$@" -I"$file"
+	  shift
+	  ;;
+	-l)
+	  eat=1
+	  func_cl_dashl "$2"
+	  set x "$@" "$lib"
+	  shift
+	  ;;
+	-l*)
+	  func_cl_dashl "${1#-l}"
+	  set x "$@" "$lib"
+	  shift
+	  ;;
+	-L)
+	  eat=1
+	  func_cl_dashL "$2"
+	  ;;
+	-L*)
+	  func_cl_dashL "${1#-L}"
+	  ;;
+	-static)
+	  shared=false
+	  ;;
+	-Wl,*)
+	  arg=${1#-Wl,}
+	  save_ifs="$IFS"; IFS=','
+	  for flag in $arg; do
+	    IFS="$save_ifs"
+	    linker_opts="$linker_opts $flag"
+	  done
+	  IFS="$save_ifs"
+	  ;;
+	-Xlinker)
+	  eat=1
+	  linker_opts="$linker_opts $2"
+	  ;;
+	-*)
+	  set x "$@" "$1"
+	  shift
+	  ;;
+	*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
+	  func_file_conv "$1"
+	  set x "$@" -Tp"$file"
+	  shift
+	  ;;
+	*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
+	  func_file_conv "$1" mingw
+	  set x "$@" "$file"
+	  shift
+	  ;;
+	*)
+	  set x "$@" "$1"
+	  shift
+	  ;;
+      esac
+    fi
+    shift
+  done
+  if test -n "$linker_opts"; then
+    linker_opts="-link$linker_opts"
+  fi
+  exec "$@" $linker_opts
+  exit 1
+}
+
+eat=
+
+case $1 in
+  '')
+     echo "$0: No command.  Try '$0 --help' for more information." 1>&2
+     exit 1;
+     ;;
+  -h | --h*)
+    cat <<\EOF
+Usage: compile [--help] [--version] PROGRAM [ARGS]
+
+Wrapper for compilers which do not understand '-c -o'.
+Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
+arguments, and rename the output as expected.
+
+If you are trying to build a whole package this is not the
+right script to run: please start by reading the file 'INSTALL'.
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit $?
+    ;;
+  -v | --v*)
+    echo "compile $scriptversion"
+    exit $?
+    ;;
+  cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \
+  icl | *[/\\]icl | icl.exe | *[/\\]icl.exe )
+    func_cl_wrapper "$@"      # Doesn't return...
+    ;;
+esac
+
+ofile=
+cfile=
+
+for arg
+do
+  if test -n "$eat"; then
+    eat=
+  else
+    case $1 in
+      -o)
+	# configure might choose to run compile as 'compile cc -o foo foo.c'.
+	# So we strip '-o arg' only if arg is an object.
+	eat=1
+	case $2 in
+	  *.o | *.obj)
+	    ofile=$2
+	    ;;
+	  *)
+	    set x "$@" -o "$2"
+	    shift
+	    ;;
+	esac
+	;;
+      *.c)
+	cfile=$1
+	set x "$@" "$1"
+	shift
+	;;
+      *)
+	set x "$@" "$1"
+	shift
+	;;
+    esac
+  fi
+  shift
+done
+
+if test -z "$ofile" || test -z "$cfile"; then
+  # If no '-o' option was seen then we might have been invoked from a
+  # pattern rule where we don't need one.  That is ok -- this is a
+  # normal compilation that the losing compiler can handle.  If no
+  # '.c' file was seen then we are probably linking.  That is also
+  # ok.
+  exec "$@"
+fi
+
+# Name of file we expect compiler to create.
+cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
+
+# Create the lock directory.
+# Note: use '[/\\:.-]' here to ensure that we don't use the same name
+# that we are using for the .o file.  Also, base the name on the expected
+# object file name, since that is what matters with a parallel build.
+lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
+while true; do
+  if mkdir "$lockdir" >/dev/null 2>&1; then
+    break
+  fi
+  sleep 1
+done
+# FIXME: race condition here if user kills between mkdir and trap.
+trap "rmdir '$lockdir'; exit 1" 1 2 15
+
+# Run the compile.
+"$@"
+ret=$?
+
+if test -f "$cofile"; then
+  test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
+elif test -f "${cofile}bj"; then
+  test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
+fi
+
+rmdir "$lockdir"
+exit $ret
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/config.guess b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/config.guess
new file mode 100755
index 000000000..256083a70
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/config.guess
@@ -0,0 +1,1476 @@
+#! /bin/sh
+# Attempt to guess a canonical system name.
+#   Copyright 1992-2018 Free Software Foundation, Inc.
+
+timestamp='2018-03-08'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <https://www.gnu.org/licenses/>.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that
+# program.  This Exception is an additional permission under section 7
+# of the GNU General Public License, version 3 ("GPLv3").
+#
+# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
+#
+# You can get the latest version of this script from:
+# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+#
+# Please send patches to <config-patches@gnu.org>.
+
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION]
+
+Output the configuration name of the system \`$me' is run on.
+
+Options:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.guess ($timestamp)
+
+Originally written by Per Bothner.
+Copyright 1992-2018 Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help" >&2
+       exit 1 ;;
+    * )
+       break ;;
+  esac
+done
+
+if test $# != 0; then
+  echo "$me: too many arguments$help" >&2
+  exit 1
+fi
+
+trap 'exit 1' 1 2 15
+
+# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
+# compiler to aid in system detection is discouraged as it requires
+# temporary files to be created and, as you can see below, it is a
+# headache to deal with in a portable fashion.
+
+# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
+# use `HOST_CC' if defined, but it is deprecated.
+
+# Portable tmp directory creation inspired by the Autoconf team.
+
+set_cc_for_build='
+trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
+trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
+: ${TMPDIR=/tmp} ;
+ { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
+ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
+dummy=$tmp/dummy ;
+tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
+case $CC_FOR_BUILD,$HOST_CC,$CC in
+ ,,)    echo "int x;" > "$dummy.c" ;
+	for c in cc gcc c89 c99 ; do
+	  if ($c -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
+	     CC_FOR_BUILD="$c"; break ;
+	  fi ;
+	done ;
+	if test x"$CC_FOR_BUILD" = x ; then
+	  CC_FOR_BUILD=no_compiler_found ;
+	fi
+	;;
+ ,,*)   CC_FOR_BUILD=$CC ;;
+ ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
+esac ; set_cc_for_build= ;'
+
+# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+# (ghazi@noc.rutgers.edu 1994-08-24)
+if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+	PATH=$PATH:/.attbin ; export PATH
+fi
+
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+
+case "$UNAME_SYSTEM" in
+Linux|GNU|GNU/*)
+	# If the system lacks a compiler, then just pick glibc.
+	# We could probably try harder.
+	LIBC=gnu
+
+	eval "$set_cc_for_build"
+	cat <<-EOF > "$dummy.c"
+	#include <features.h>
+	#if defined(__UCLIBC__)
+	LIBC=uclibc
+	#elif defined(__dietlibc__)
+	LIBC=dietlibc
+	#else
+	LIBC=gnu
+	#endif
+	EOF
+	eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`"
+
+	# If ldd exists, use it to detect musl libc.
+	if command -v ldd >/dev/null && \
+		ldd --version 2>&1 | grep -q ^musl
+	then
+	    LIBC=musl
+	fi
+	;;
+esac
+
+# Note: order is significant - the case branches are not exclusive.
+
+case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
+    *:NetBSD:*:*)
+	# NetBSD (nbsd) targets should (where applicable) match one or
+	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
+	# switched to ELF, *-*-netbsd* would select the old
+	# object file format.  This provides both forward
+	# compatibility and a consistent mechanism for selecting the
+	# object file format.
+	#
+	# Note: NetBSD doesn't particularly care about the vendor
+	# portion of the name.  We always set it to "unknown".
+	sysctl="sysctl -n hw.machine_arch"
+	UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
+	    "/sbin/$sysctl" 2>/dev/null || \
+	    "/usr/sbin/$sysctl" 2>/dev/null || \
+	    echo unknown)`
+	case "$UNAME_MACHINE_ARCH" in
+	    armeb) machine=armeb-unknown ;;
+	    arm*) machine=arm-unknown ;;
+	    sh3el) machine=shl-unknown ;;
+	    sh3eb) machine=sh-unknown ;;
+	    sh5el) machine=sh5le-unknown ;;
+	    earmv*)
+		arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
+		endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'`
+		machine="${arch}${endian}"-unknown
+		;;
+	    *) machine="$UNAME_MACHINE_ARCH"-unknown ;;
+	esac
+	# The Operating System including object format, if it has switched
+	# to ELF recently (or will in the future) and ABI.
+	case "$UNAME_MACHINE_ARCH" in
+	    earm*)
+		os=netbsdelf
+		;;
+	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+		eval "$set_cc_for_build"
+		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
+			| grep -q __ELF__
+		then
+		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
+		    # Return netbsd for either.  FIX?
+		    os=netbsd
+		else
+		    os=netbsdelf
+		fi
+		;;
+	    *)
+		os=netbsd
+		;;
+	esac
+	# Determine ABI tags.
+	case "$UNAME_MACHINE_ARCH" in
+	    earm*)
+		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
+		abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"`
+		;;
+	esac
+	# The OS release
+	# Debian GNU/NetBSD machines have a different userland, and
+	# thus, need a distinct triplet. However, they do not need
+	# kernel version information, so it can be replaced with a
+	# suitable tag, in the style of linux-gnu.
+	case "$UNAME_VERSION" in
+	    Debian*)
+		release='-gnu'
+		;;
+	    *)
+		release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2`
+		;;
+	esac
+	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
+	# contains redundant information, the shorter form:
+	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
+	echo "$machine-${os}${release}${abi}"
+	exit ;;
+    *:Bitrig:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
+	echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE"
+	exit ;;
+    *:OpenBSD:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+	echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE"
+	exit ;;
+    *:LibertyBSD:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
+	echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE"
+	exit ;;
+    *:MidnightBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE"
+	exit ;;
+    *:ekkoBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE"
+	exit ;;
+    *:SolidBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE"
+	exit ;;
+    macppc:MirBSD:*:*)
+	echo powerpc-unknown-mirbsd"$UNAME_RELEASE"
+	exit ;;
+    *:MirBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE"
+	exit ;;
+    *:Sortix:*:*)
+	echo "$UNAME_MACHINE"-unknown-sortix
+	exit ;;
+    *:Redox:*:*)
+	echo "$UNAME_MACHINE"-unknown-redox
+	exit ;;
+    mips:OSF1:*.*)
+        echo mips-dec-osf1
+        exit ;;
+    alpha:OSF1:*:*)
+	case $UNAME_RELEASE in
+	*4.0)
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+		;;
+	*5.*)
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		;;
+	esac
+	# According to Compaq, /usr/sbin/psrinfo has been available on
+	# OSF/1 and Tru64 systems produced since 1995.  I hope that
+	# covers most systems running today.  This code pipes the CPU
+	# types through head -n 1, so we only detect the type of CPU 0.
+	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+	case "$ALPHA_CPU_TYPE" in
+	    "EV4 (21064)")
+		UNAME_MACHINE=alpha ;;
+	    "EV4.5 (21064)")
+		UNAME_MACHINE=alpha ;;
+	    "LCA4 (21066/21068)")
+		UNAME_MACHINE=alpha ;;
+	    "EV5 (21164)")
+		UNAME_MACHINE=alphaev5 ;;
+	    "EV5.6 (21164A)")
+		UNAME_MACHINE=alphaev56 ;;
+	    "EV5.6 (21164PC)")
+		UNAME_MACHINE=alphapca56 ;;
+	    "EV5.7 (21164PC)")
+		UNAME_MACHINE=alphapca57 ;;
+	    "EV6 (21264)")
+		UNAME_MACHINE=alphaev6 ;;
+	    "EV6.7 (21264A)")
+		UNAME_MACHINE=alphaev67 ;;
+	    "EV6.8CB (21264C)")
+		UNAME_MACHINE=alphaev68 ;;
+	    "EV6.8AL (21264B)")
+		UNAME_MACHINE=alphaev68 ;;
+	    "EV6.8CX (21264D)")
+		UNAME_MACHINE=alphaev68 ;;
+	    "EV6.9A (21264/EV69A)")
+		UNAME_MACHINE=alphaev69 ;;
+	    "EV7 (21364)")
+		UNAME_MACHINE=alphaev7 ;;
+	    "EV7.9 (21364A)")
+		UNAME_MACHINE=alphaev79 ;;
+	esac
+	# A Pn.n version is a patched version.
+	# A Vn.n version is a released version.
+	# A Tn.n version is a released field test version.
+	# A Xn.n version is an unreleased experimental baselevel.
+	# 1.2 uses "1.2" for uname -r.
+	echo "$UNAME_MACHINE"-dec-osf"`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`"
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	exitcode=$?
+	trap '' 0
+	exit $exitcode ;;
+    Amiga*:UNIX_System_V:4.0:*)
+	echo m68k-unknown-sysv4
+	exit ;;
+    *:[Aa]miga[Oo][Ss]:*:*)
+	echo "$UNAME_MACHINE"-unknown-amigaos
+	exit ;;
+    *:[Mm]orph[Oo][Ss]:*:*)
+	echo "$UNAME_MACHINE"-unknown-morphos
+	exit ;;
+    *:OS/390:*:*)
+	echo i370-ibm-openedition
+	exit ;;
+    *:z/VM:*:*)
+	echo s390-ibm-zvmoe
+	exit ;;
+    *:OS400:*:*)
+	echo powerpc-ibm-os400
+	exit ;;
+    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+	echo arm-acorn-riscix"$UNAME_RELEASE"
+	exit ;;
+    arm*:riscos:*:*|arm*:RISCOS:*:*)
+	echo arm-unknown-riscos
+	exit ;;
+    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
+	echo hppa1.1-hitachi-hiuxmpp
+	exit ;;
+    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+	if test "`(/bin/universe) 2>/dev/null`" = att ; then
+		echo pyramid-pyramid-sysv3
+	else
+		echo pyramid-pyramid-bsd
+	fi
+	exit ;;
+    NILE*:*:*:dcosx)
+	echo pyramid-pyramid-svr4
+	exit ;;
+    DRS?6000:unix:4.0:6*)
+	echo sparc-icl-nx6
+	exit ;;
+    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
+	case `/usr/bin/uname -p` in
+	    sparc) echo sparc-icl-nx7; exit ;;
+	esac ;;
+    s390x:SunOS:*:*)
+	echo "$UNAME_MACHINE"-ibm-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`"
+	exit ;;
+    sun4H:SunOS:5.*:*)
+	echo sparc-hal-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+	exit ;;
+    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+	echo sparc-sun-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`"
+	exit ;;
+    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
+	echo i386-pc-auroraux"$UNAME_RELEASE"
+	exit ;;
+    i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
+	eval "$set_cc_for_build"
+	SUN_ARCH=i386
+	# If there is a compiler, see if it is configured for 64-bit objects.
+	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
+	# This test works for both compilers.
+	if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
+		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		grep IS_64BIT_ARCH >/dev/null
+	    then
+		SUN_ARCH=x86_64
+	    fi
+	fi
+	echo "$SUN_ARCH"-pc-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+	exit ;;
+    sun4*:SunOS:6*:*)
+	# According to config.sub, this is the proper way to canonicalize
+	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
+	# it's likely to be more like Solaris than SunOS4.
+	echo sparc-sun-solaris3"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+	exit ;;
+    sun4*:SunOS:*:*)
+	case "`/usr/bin/arch -k`" in
+	    Series*|S4*)
+		UNAME_RELEASE=`uname -v`
+		;;
+	esac
+	# Japanese Language versions have a version number like `4.1.3-JL'.
+	echo sparc-sun-sunos"`echo "$UNAME_RELEASE"|sed -e 's/-/_/'`"
+	exit ;;
+    sun3*:SunOS:*:*)
+	echo m68k-sun-sunos"$UNAME_RELEASE"
+	exit ;;
+    sun*:*:4.2BSD:*)
+	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+	test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3
+	case "`/bin/arch`" in
+	    sun3)
+		echo m68k-sun-sunos"$UNAME_RELEASE"
+		;;
+	    sun4)
+		echo sparc-sun-sunos"$UNAME_RELEASE"
+		;;
+	esac
+	exit ;;
+    aushp:SunOS:*:*)
+	echo sparc-auspex-sunos"$UNAME_RELEASE"
+	exit ;;
+    # The situation for MiNT is a little confusing.  The machine name
+    # can be virtually everything (everything which is not
+    # "atarist" or "atariste" at least should have a processor
+    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
+    # to the lowercase version "mint" (or "freemint").  Finally
+    # the system name "TOS" denotes a system which is actually not
+    # MiNT.  But MiNT is downward compatible to TOS, so this should
+    # be no problem.
+    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+	echo m68k-atari-mint"$UNAME_RELEASE"
+	exit ;;
+    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+	echo m68k-atari-mint"$UNAME_RELEASE"
+	exit ;;
+    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+	echo m68k-atari-mint"$UNAME_RELEASE"
+	exit ;;
+    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+	echo m68k-milan-mint"$UNAME_RELEASE"
+	exit ;;
+    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+	echo m68k-hades-mint"$UNAME_RELEASE"
+	exit ;;
+    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+	echo m68k-unknown-mint"$UNAME_RELEASE"
+	exit ;;
+    m68k:machten:*:*)
+	echo m68k-apple-machten"$UNAME_RELEASE"
+	exit ;;
+    powerpc:machten:*:*)
+	echo powerpc-apple-machten"$UNAME_RELEASE"
+	exit ;;
+    RISC*:Mach:*:*)
+	echo mips-dec-mach_bsd4.3
+	exit ;;
+    RISC*:ULTRIX:*:*)
+	echo mips-dec-ultrix"$UNAME_RELEASE"
+	exit ;;
+    VAX*:ULTRIX*:*:*)
+	echo vax-dec-ultrix"$UNAME_RELEASE"
+	exit ;;
+    2020:CLIX:*:* | 2430:CLIX:*:*)
+	echo clipper-intergraph-clix"$UNAME_RELEASE"
+	exit ;;
+    mips:*:*:UMIPS | mips:*:*:RISCos)
+	eval "$set_cc_for_build"
+	sed 's/^	//' << EOF > "$dummy.c"
+#ifdef __cplusplus
+#include <stdio.h>  /* for printf() prototype */
+	int main (int argc, char *argv[]) {
+#else
+	int main (argc, argv) int argc; char *argv[]; {
+#endif
+	#if defined (host_mips) && defined (MIPSEB)
+	#if defined (SYSTYPE_SYSV)
+	  printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_SVR4)
+	  printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
+	  printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0);
+	#endif
+	#endif
+	  exit (-1);
+	}
+EOF
+	$CC_FOR_BUILD -o "$dummy" "$dummy.c" &&
+	  dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+	  SYSTEM_NAME=`"$dummy" "$dummyarg"` &&
+	    { echo "$SYSTEM_NAME"; exit; }
+	echo mips-mips-riscos"$UNAME_RELEASE"
+	exit ;;
+    Motorola:PowerMAX_OS:*:*)
+	echo powerpc-motorola-powermax
+	exit ;;
+    Motorola:*:4.3:PL8-*)
+	echo powerpc-harris-powermax
+	exit ;;
+    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
+	echo powerpc-harris-powermax
+	exit ;;
+    Night_Hawk:Power_UNIX:*:*)
+	echo powerpc-harris-powerunix
+	exit ;;
+    m88k:CX/UX:7*:*)
+	echo m88k-harris-cxux7
+	exit ;;
+    m88k:*:4*:R4*)
+	echo m88k-motorola-sysv4
+	exit ;;
+    m88k:*:3*:R3*)
+	echo m88k-motorola-sysv3
+	exit ;;
+    AViiON:dgux:*:*)
+	# DG/UX returns AViiON for all architectures
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	if [ "$UNAME_PROCESSOR" = mc88100 ] || [ "$UNAME_PROCESSOR" = mc88110 ]
+	then
+	    if [ "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx ] || \
+	       [ "$TARGET_BINARY_INTERFACE"x = x ]
+	    then
+		echo m88k-dg-dgux"$UNAME_RELEASE"
+	    else
+		echo m88k-dg-dguxbcs"$UNAME_RELEASE"
+	    fi
+	else
+	    echo i586-dg-dgux"$UNAME_RELEASE"
+	fi
+	exit ;;
+    M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
+	echo m88k-dolphin-sysv3
+	exit ;;
+    M88*:*:R3*:*)
+	# Delta 88k system running SVR3
+	echo m88k-motorola-sysv3
+	exit ;;
+    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+	echo m88k-tektronix-sysv3
+	exit ;;
+    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+	echo m68k-tektronix-bsd
+	exit ;;
+    *:IRIX*:*:*)
+	echo mips-sgi-irix"`echo "$UNAME_RELEASE"|sed -e 's/-/_/g'`"
+	exit ;;
+    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
+	exit ;;               # Note that: echo "'`uname -s`'" gives 'AIX '
+    i*86:AIX:*:*)
+	echo i386-ibm-aix
+	exit ;;
+    ia64:AIX:*:*)
+	if [ -x /usr/bin/oslevel ] ; then
+		IBM_REV=`/usr/bin/oslevel`
+	else
+		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+	fi
+	echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV"
+	exit ;;
+    *:AIX:2:3)
+	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+		eval "$set_cc_for_build"
+		sed 's/^		//' << EOF > "$dummy.c"
+		#include <sys/systemcfg.h>
+
+		main()
+			{
+			if (!__power_pc())
+				exit(1);
+			puts("powerpc-ibm-aix3.2.5");
+			exit(0);
+			}
+EOF
+		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"`
+		then
+			echo "$SYSTEM_NAME"
+		else
+			echo rs6000-ibm-aix3.2.5
+		fi
+	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+		echo rs6000-ibm-aix3.2.4
+	else
+		echo rs6000-ibm-aix3.2
+	fi
+	exit ;;
+    *:AIX:*:[4567])
+	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
+	if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then
+		IBM_ARCH=rs6000
+	else
+		IBM_ARCH=powerpc
+	fi
+	if [ -x /usr/bin/lslpp ] ; then
+		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc |
+			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
+	else
+		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+	fi
+	echo "$IBM_ARCH"-ibm-aix"$IBM_REV"
+	exit ;;
+    *:AIX:*:*)
+	echo rs6000-ibm-aix
+	exit ;;
+    ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
+	echo romp-ibm-bsd4.4
+	exit ;;
+    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
+	echo romp-ibm-bsd"$UNAME_RELEASE"   # 4.3 with uname added to
+	exit ;;                             # report: romp-ibm BSD 4.3
+    *:BOSX:*:*)
+	echo rs6000-bull-bosx
+	exit ;;
+    DPX/2?00:B.O.S.:*:*)
+	echo m68k-bull-sysv3
+	exit ;;
+    9000/[34]??:4.3bsd:1.*:*)
+	echo m68k-hp-bsd
+	exit ;;
+    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+	echo m68k-hp-bsd4.4
+	exit ;;
+    9000/[34678]??:HP-UX:*:*)
+	HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'`
+	case "$UNAME_MACHINE" in
+	    9000/31?)            HP_ARCH=m68000 ;;
+	    9000/[34]??)         HP_ARCH=m68k ;;
+	    9000/[678][0-9][0-9])
+		if [ -x /usr/bin/getconf ]; then
+		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case "$sc_cpu_version" in
+		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
+		      532)                      # CPU_PA_RISC2_0
+			case "$sc_kernel_bits" in
+			  32) HP_ARCH=hppa2.0n ;;
+			  64) HP_ARCH=hppa2.0w ;;
+			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
+			esac ;;
+		    esac
+		fi
+		if [ "$HP_ARCH" = "" ]; then
+		    eval "$set_cc_for_build"
+		    sed 's/^		//' << EOF > "$dummy.c"
+
+		#define _HPUX_SOURCE
+		#include <stdlib.h>
+		#include <unistd.h>
+
+		int main ()
+		{
+		#if defined(_SC_KERNEL_BITS)
+		    long bits = sysconf(_SC_KERNEL_BITS);
+		#endif
+		    long cpu  = sysconf (_SC_CPU_VERSION);
+
+		    switch (cpu)
+			{
+			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+			case CPU_PA_RISC2_0:
+		#if defined(_SC_KERNEL_BITS)
+			    switch (bits)
+				{
+				case 64: puts ("hppa2.0w"); break;
+				case 32: puts ("hppa2.0n"); break;
+				default: puts ("hppa2.0"); break;
+				} break;
+		#else  /* !defined(_SC_KERNEL_BITS) */
+			    puts ("hppa2.0"); break;
+		#endif
+			default: puts ("hppa1.0"); break;
+			}
+		    exit (0);
+		}
+EOF
+		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"`
+		    test -z "$HP_ARCH" && HP_ARCH=hppa
+		fi ;;
+	esac
+	if [ "$HP_ARCH" = hppa2.0w ]
+	then
+	    eval "$set_cc_for_build"
+
+	    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
+	    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
+	    # generating 64-bit code.  GNU and HP use different nomenclature:
+	    #
+	    # $ CC_FOR_BUILD=cc ./config.guess
+	    # => hppa2.0w-hp-hpux11.23
+	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
+	    # => hppa64-hp-hpux11.23
+
+	    if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) |
+		grep -q __LP64__
+	    then
+		HP_ARCH=hppa2.0w
+	    else
+		HP_ARCH=hppa64
+	    fi
+	fi
+	echo "$HP_ARCH"-hp-hpux"$HPUX_REV"
+	exit ;;
+    ia64:HP-UX:*:*)
+	HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'`
+	echo ia64-hp-hpux"$HPUX_REV"
+	exit ;;
+    3050*:HI-UX:*:*)
+	eval "$set_cc_for_build"
+	sed 's/^	//' << EOF > "$dummy.c"
+	#include <unistd.h>
+	int
+	main ()
+	{
+	  long cpu = sysconf (_SC_CPU_VERSION);
+	  /* The order matters, because CPU_IS_HP_MC68K erroneously returns
+	     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
+	     results, however.  */
+	  if (CPU_IS_PA_RISC (cpu))
+	    {
+	      switch (cpu)
+		{
+		  case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
+		  default: puts ("hppa-hitachi-hiuxwe2"); break;
+		}
+	    }
+	  else if (CPU_IS_HP_MC68K (cpu))
+	    puts ("m68k-hitachi-hiuxwe2");
+	  else puts ("unknown-hitachi-hiuxwe2");
+	  exit (0);
+	}
+EOF
+	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` &&
+		{ echo "$SYSTEM_NAME"; exit; }
+	echo unknown-hitachi-hiuxwe2
+	exit ;;
+    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
+	echo hppa1.1-hp-bsd
+	exit ;;
+    9000/8??:4.3bsd:*:*)
+	echo hppa1.0-hp-bsd
+	exit ;;
+    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
+	echo hppa1.0-hp-mpeix
+	exit ;;
+    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
+	echo hppa1.1-hp-osf
+	exit ;;
+    hp8??:OSF1:*:*)
+	echo hppa1.0-hp-osf
+	exit ;;
+    i*86:OSF1:*:*)
+	if [ -x /usr/sbin/sysversion ] ; then
+	    echo "$UNAME_MACHINE"-unknown-osf1mk
+	else
+	    echo "$UNAME_MACHINE"-unknown-osf1
+	fi
+	exit ;;
+    parisc*:Lites*:*:*)
+	echo hppa1.1-hp-lites
+	exit ;;
+    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+	echo c1-convex-bsd
+	exit ;;
+    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+	if getsysinfo -f scalar_acc
+	then echo c32-convex-bsd
+	else echo c2-convex-bsd
+	fi
+	exit ;;
+    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+	echo c34-convex-bsd
+	exit ;;
+    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+	echo c38-convex-bsd
+	exit ;;
+    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+	echo c4-convex-bsd
+	exit ;;
+    CRAY*Y-MP:*:*:*)
+	echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*[A-Z]90:*:*:*)
+	echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \
+	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
+	      -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*TS:*:*:*)
+	echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*T3E:*:*:*)
+	echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*SV1:*:*:*)
+	echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    *:UNICOS/mp:*:*)
+	echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
+	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'`
+	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
+    5000:UNIX_System_V:4.*:*)
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
+	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
+    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+	echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE"
+	exit ;;
+    sparc*:BSD/OS:*:*)
+	echo sparc-unknown-bsdi"$UNAME_RELEASE"
+	exit ;;
+    *:BSD/OS:*:*)
+	echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
+	exit ;;
+    *:FreeBSD:*:*)
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	case "$UNAME_PROCESSOR" in
+	    amd64)
+		UNAME_PROCESSOR=x86_64 ;;
+	    i386)
+		UNAME_PROCESSOR=i586 ;;
+	esac
+	echo "$UNAME_PROCESSOR"-unknown-freebsd"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`"
+	exit ;;
+    i*:CYGWIN*:*)
+	echo "$UNAME_MACHINE"-pc-cygwin
+	exit ;;
+    *:MINGW64*:*)
+	echo "$UNAME_MACHINE"-pc-mingw64
+	exit ;;
+    *:MINGW*:*)
+	echo "$UNAME_MACHINE"-pc-mingw32
+	exit ;;
+    *:MSYS*:*)
+	echo "$UNAME_MACHINE"-pc-msys
+	exit ;;
+    i*:PW*:*)
+	echo "$UNAME_MACHINE"-pc-pw32
+	exit ;;
+    *:Interix*:*)
+	case "$UNAME_MACHINE" in
+	    x86)
+		echo i586-pc-interix"$UNAME_RELEASE"
+		exit ;;
+	    authenticamd | genuineintel | EM64T)
+		echo x86_64-unknown-interix"$UNAME_RELEASE"
+		exit ;;
+	    IA64)
+		echo ia64-unknown-interix"$UNAME_RELEASE"
+		exit ;;
+	esac ;;
+    i*:UWIN*:*)
+	echo "$UNAME_MACHINE"-pc-uwin
+	exit ;;
+    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
+	echo x86_64-unknown-cygwin
+	exit ;;
+    prep*:SunOS:5.*:*)
+	echo powerpcle-unknown-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+	exit ;;
+    *:GNU:*:*)
+	# the GNU system
+	echo "`echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,'`-unknown-$LIBC`echo "$UNAME_RELEASE"|sed -e 's,/.*$,,'`"
+	exit ;;
+    *:GNU/*:*:*)
+	# other systems with GNU libc and userland
+	echo "$UNAME_MACHINE-unknown-`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`-$LIBC"
+	exit ;;
+    i*86:Minix:*:*)
+	echo "$UNAME_MACHINE"-pc-minix
+	exit ;;
+    aarch64:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    aarch64_be:Linux:*:*)
+	UNAME_MACHINE=aarch64_be
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    alpha:Linux:*:*)
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	  EV5)   UNAME_MACHINE=alphaev5 ;;
+	  EV56)  UNAME_MACHINE=alphaev56 ;;
+	  PCA56) UNAME_MACHINE=alphapca56 ;;
+	  PCA57) UNAME_MACHINE=alphapca56 ;;
+	  EV6)   UNAME_MACHINE=alphaev6 ;;
+	  EV67)  UNAME_MACHINE=alphaev67 ;;
+	  EV68*) UNAME_MACHINE=alphaev68 ;;
+	esac
+	objdump --private-headers /bin/sh | grep -q ld.so.1
+	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    arc:Linux:*:* | arceb:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    arm*:Linux:*:*)
+	eval "$set_cc_for_build"
+	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
+	    | grep -q __ARM_EABI__
+	then
+	    echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	else
+	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+		| grep -q __ARM_PCS_VFP
+	    then
+		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi
+	    else
+		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf
+	    fi
+	fi
+	exit ;;
+    avr32*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    cris:Linux:*:*)
+	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
+	exit ;;
+    crisv32:Linux:*:*)
+	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
+	exit ;;
+    e2k:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    frv:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    hexagon:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    i*86:Linux:*:*)
+	echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
+	exit ;;
+    ia64:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    k1om:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    m32r*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    m68*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    mips:Linux:*:* | mips64:Linux:*:*)
+	eval "$set_cc_for_build"
+	sed 's/^	//' << EOF > "$dummy.c"
+	#undef CPU
+	#undef ${UNAME_MACHINE}
+	#undef ${UNAME_MACHINE}el
+	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+	CPU=${UNAME_MACHINE}el
+	#else
+	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+	CPU=${UNAME_MACHINE}
+	#else
+	CPU=
+	#endif
+	#endif
+EOF
+	eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU'`"
+	test "x$CPU" != x && { echo "$CPU-unknown-linux-$LIBC"; exit; }
+	;;
+    mips64el:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    openrisc*:Linux:*:*)
+	echo or1k-unknown-linux-"$LIBC"
+	exit ;;
+    or32:Linux:*:* | or1k*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    padre:Linux:*:*)
+	echo sparc-unknown-linux-"$LIBC"
+	exit ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+	echo hppa64-unknown-linux-"$LIBC"
+	exit ;;
+    parisc:Linux:*:* | hppa:Linux:*:*)
+	# Look for CPU level
+	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+	  PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;;
+	  PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;;
+	  *)    echo hppa-unknown-linux-"$LIBC" ;;
+	esac
+	exit ;;
+    ppc64:Linux:*:*)
+	echo powerpc64-unknown-linux-"$LIBC"
+	exit ;;
+    ppc:Linux:*:*)
+	echo powerpc-unknown-linux-"$LIBC"
+	exit ;;
+    ppc64le:Linux:*:*)
+	echo powerpc64le-unknown-linux-"$LIBC"
+	exit ;;
+    ppcle:Linux:*:*)
+	echo powerpcle-unknown-linux-"$LIBC"
+	exit ;;
+    riscv32:Linux:*:* | riscv64:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    s390:Linux:*:* | s390x:Linux:*:*)
+	echo "$UNAME_MACHINE"-ibm-linux-"$LIBC"
+	exit ;;
+    sh64*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    sh*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    sparc:Linux:*:* | sparc64:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    tile*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    vax:Linux:*:*)
+	echo "$UNAME_MACHINE"-dec-linux-"$LIBC"
+	exit ;;
+    x86_64:Linux:*:*)
+	echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
+	exit ;;
+    xtensa*:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
+    i*86:DYNIX/ptx:4*:*)
+	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
+	# earlier versions are messed up and put the nodename in both
+	# sysname and nodename.
+	echo i386-sequent-sysv4
+	exit ;;
+    i*86:UNIX_SV:4.2MP:2.*)
+	# Unixware is an offshoot of SVR4, but it has its own version
+	# number series starting with 2...
+	# I am not positive that other SVR4 systems won't match this,
+	# I just have to hope.  -- rms.
+	# Use sysv4.2uw... so that sysv4* matches it.
+	echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION"
+	exit ;;
+    i*86:OS/2:*:*)
+	# If we were able to find `uname', then EMX Unix compatibility
+	# is probably installed.
+	echo "$UNAME_MACHINE"-pc-os2-emx
+	exit ;;
+    i*86:XTS-300:*:STOP)
+	echo "$UNAME_MACHINE"-unknown-stop
+	exit ;;
+    i*86:atheos:*:*)
+	echo "$UNAME_MACHINE"-unknown-atheos
+	exit ;;
+    i*86:syllable:*:*)
+	echo "$UNAME_MACHINE"-pc-syllable
+	exit ;;
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
+	echo i386-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    i*86:*DOS:*:*)
+	echo "$UNAME_MACHINE"-pc-msdosdjgpp
+	exit ;;
+    i*86:*:4.*:*)
+	UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'`
+	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+		echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL"
+	else
+		echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL"
+	fi
+	exit ;;
+    i*86:*:5:[678]*)
+	# UnixWare 7.x, OpenUNIX and OpenServer 6.
+	case `/bin/uname -X | grep "^Machine"` in
+	    *486*)	     UNAME_MACHINE=i486 ;;
+	    *Pentium)	     UNAME_MACHINE=i586 ;;
+	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
+	esac
+	echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}{$UNAME_VERSION}"
+	exit ;;
+    i*86:*:3.2:*)
+	if test -f /usr/options/cb.name; then
+		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+		echo "$UNAME_MACHINE"-pc-isc"$UNAME_REL"
+	elif /bin/uname -X 2>/dev/null >/dev/null ; then
+		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
+		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
+			&& UNAME_MACHINE=i586
+		(/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL"
+	else
+		echo "$UNAME_MACHINE"-pc-sysv32
+	fi
+	exit ;;
+    pc:*:*:*)
+	# Left here for compatibility:
+	# uname -m prints for DJGPP always 'pc', but it prints nothing about
+	# the processor, so we play safe by assuming i586.
+	# Note: whatever this is, it MUST be the same as what config.sub
+	# prints for the "djgpp" host, or else GDB configure will decide that
+	# this is a cross-build.
+	echo i586-pc-msdosdjgpp
+	exit ;;
+    Intel:Mach:3*:*)
+	echo i386-pc-mach3
+	exit ;;
+    paragon:*:*:*)
+	echo i860-intel-osf1
+	exit ;;
+    i860:*:4.*:*) # i860-SVR4
+	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+	  echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4
+	else # Add other i860-SVR4 vendors below as they are discovered.
+	  echo i860-unknown-sysv"$UNAME_RELEASE"  # Unknown i860-SVR4
+	fi
+	exit ;;
+    mini*:CTIX:SYS*5:*)
+	# "miniframe"
+	echo m68010-convergent-sysv
+	exit ;;
+    mc68k:UNIX:SYSTEM5:3.51m)
+	echo m68k-convergent-sysv
+	exit ;;
+    M680?0:D-NIX:5.3:*)
+	echo m68k-diab-dnix
+	exit ;;
+    M68*:*:R3V[5678]*:*)
+	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
+    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
+	OS_REL=''
+	test -r /etc/.relid \
+	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	  && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
+    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4; exit; } ;;
+    NCR*:*:4.2:* | MPRAS*:*:4.2:*)
+	OS_REL='.3'
+	test -r /etc/.relid \
+	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	    && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
+	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
+    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
+	echo m68k-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    mc68030:UNIX_System_V:4.*:*)
+	echo m68k-atari-sysv4
+	exit ;;
+    TSUNAMI:LynxOS:2.*:*)
+	echo sparc-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    rs6000:LynxOS:2.*:*)
+	echo rs6000-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
+	echo powerpc-unknown-lynxos"$UNAME_RELEASE"
+	exit ;;
+    SM[BE]S:UNIX_SV:*:*)
+	echo mips-dde-sysv"$UNAME_RELEASE"
+	exit ;;
+    RM*:ReliantUNIX-*:*:*)
+	echo mips-sni-sysv4
+	exit ;;
+    RM*:SINIX-*:*:*)
+	echo mips-sni-sysv4
+	exit ;;
+    *:SINIX-*:*:*)
+	if uname -p 2>/dev/null >/dev/null ; then
+		UNAME_MACHINE=`(uname -p) 2>/dev/null`
+		echo "$UNAME_MACHINE"-sni-sysv4
+	else
+		echo ns32k-sni-sysv
+	fi
+	exit ;;
+    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+			# says <Richard.M.Bartel@ccMail.Census.GOV>
+	echo i586-unisys-sysv4
+	exit ;;
+    *:UNIX_System_V:4*:FTX*)
+	# From Gerald Hewes <hewes@openmarket.com>.
+	# How about differentiating between stratus architectures? -djm
+	echo hppa1.1-stratus-sysv4
+	exit ;;
+    *:*:*:FTX*)
+	# From seanf@swdc.stratus.com.
+	echo i860-stratus-sysv4
+	exit ;;
+    i*86:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo "$UNAME_MACHINE"-stratus-vos
+	exit ;;
+    *:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo hppa1.1-stratus-vos
+	exit ;;
+    mc68*:A/UX:*:*)
+	echo m68k-apple-aux"$UNAME_RELEASE"
+	exit ;;
+    news*:NEWS-OS:6*:*)
+	echo mips-sony-newsos6
+	exit ;;
+    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+	if [ -d /usr/nec ]; then
+		echo mips-nec-sysv"$UNAME_RELEASE"
+	else
+		echo mips-unknown-sysv"$UNAME_RELEASE"
+	fi
+	exit ;;
+    BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
+	echo powerpc-be-beos
+	exit ;;
+    BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
+	echo powerpc-apple-beos
+	exit ;;
+    BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
+	echo i586-pc-beos
+	exit ;;
+    BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
+	echo i586-pc-haiku
+	exit ;;
+    x86_64:Haiku:*:*)
+	echo x86_64-unknown-haiku
+	exit ;;
+    SX-4:SUPER-UX:*:*)
+	echo sx4-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-5:SUPER-UX:*:*)
+	echo sx5-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-6:SUPER-UX:*:*)
+	echo sx6-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-7:SUPER-UX:*:*)
+	echo sx7-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-8:SUPER-UX:*:*)
+	echo sx8-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-8R:SUPER-UX:*:*)
+	echo sx8r-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    SX-ACE:SUPER-UX:*:*)
+	echo sxace-nec-superux"$UNAME_RELEASE"
+	exit ;;
+    Power*:Rhapsody:*:*)
+	echo powerpc-apple-rhapsody"$UNAME_RELEASE"
+	exit ;;
+    *:Rhapsody:*:*)
+	echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
+	exit ;;
+    *:Darwin:*:*)
+	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
+	eval "$set_cc_for_build"
+	if test "$UNAME_PROCESSOR" = unknown ; then
+	    UNAME_PROCESSOR=powerpc
+	fi
+	if test "`echo "$UNAME_RELEASE" | sed -e 's/\..*//'`" -le 10 ; then
+	    if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+		if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		       (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		       grep IS_64BIT_ARCH >/dev/null
+		then
+		    case $UNAME_PROCESSOR in
+			i386) UNAME_PROCESSOR=x86_64 ;;
+			powerpc) UNAME_PROCESSOR=powerpc64 ;;
+		    esac
+		fi
+		# On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
+		if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
+		       (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		       grep IS_PPC >/dev/null
+		then
+		    UNAME_PROCESSOR=powerpc
+		fi
+	    fi
+	elif test "$UNAME_PROCESSOR" = i386 ; then
+	    # Avoid executing cc on OS X 10.9, as it ships with a stub
+	    # that puts up a graphical alert prompting to install
+	    # developer tools.  Any system running Mac OS X 10.7 or
+	    # later (Darwin 11 and later) is required to have a 64-bit
+	    # processor. This is not true of the ARM version of Darwin
+	    # that Apple uses in portable devices.
+	    UNAME_PROCESSOR=x86_64
+	fi
+	echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
+	exit ;;
+    *:procnto*:*:* | *:QNX:[0123456789]*:*)
+	UNAME_PROCESSOR=`uname -p`
+	if test "$UNAME_PROCESSOR" = x86; then
+		UNAME_PROCESSOR=i386
+		UNAME_MACHINE=pc
+	fi
+	echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE"
+	exit ;;
+    *:QNX:*:4*)
+	echo i386-pc-qnx
+	exit ;;
+    NEO-*:NONSTOP_KERNEL:*:*)
+	echo neo-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSE-*:NONSTOP_KERNEL:*:*)
+	echo nse-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSR-*:NONSTOP_KERNEL:*:*)
+	echo nsr-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSV-*:NONSTOP_KERNEL:*:*)
+	echo nsv-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSX-*:NONSTOP_KERNEL:*:*)
+	echo nsx-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    *:NonStop-UX:*:*)
+	echo mips-compaq-nonstopux
+	exit ;;
+    BS2000:POSIX*:*:*)
+	echo bs2000-siemens-sysv
+	exit ;;
+    DS/*:UNIX_System_V:*:*)
+	echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE"
+	exit ;;
+    *:Plan9:*:*)
+	# "uname -m" is not consistent, so use $cputype instead. 386
+	# is converted to i386 for consistency with other x86
+	# operating systems.
+	if test "$cputype" = 386; then
+	    UNAME_MACHINE=i386
+	else
+	    UNAME_MACHINE="$cputype"
+	fi
+	echo "$UNAME_MACHINE"-unknown-plan9
+	exit ;;
+    *:TOPS-10:*:*)
+	echo pdp10-unknown-tops10
+	exit ;;
+    *:TENEX:*:*)
+	echo pdp10-unknown-tenex
+	exit ;;
+    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
+	echo pdp10-dec-tops20
+	exit ;;
+    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
+	echo pdp10-xkl-tops20
+	exit ;;
+    *:TOPS-20:*:*)
+	echo pdp10-unknown-tops20
+	exit ;;
+    *:ITS:*:*)
+	echo pdp10-unknown-its
+	exit ;;
+    SEI:*:*:SEIUX)
+	echo mips-sei-seiux"$UNAME_RELEASE"
+	exit ;;
+    *:DragonFly:*:*)
+	echo "$UNAME_MACHINE"-unknown-dragonfly"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`"
+	exit ;;
+    *:*VMS:*:*)
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	case "$UNAME_MACHINE" in
+	    A*) echo alpha-dec-vms ; exit ;;
+	    I*) echo ia64-dec-vms ; exit ;;
+	    V*) echo vax-dec-vms ; exit ;;
+	esac ;;
+    *:XENIX:*:SysV)
+	echo i386-pc-xenix
+	exit ;;
+    i*86:skyos:*:*)
+	echo "$UNAME_MACHINE"-pc-skyos"`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`"
+	exit ;;
+    i*86:rdos:*:*)
+	echo "$UNAME_MACHINE"-pc-rdos
+	exit ;;
+    i*86:AROS:*:*)
+	echo "$UNAME_MACHINE"-pc-aros
+	exit ;;
+    x86_64:VMkernel:*:*)
+	echo "$UNAME_MACHINE"-unknown-esx
+	exit ;;
+    amd64:Isilon\ OneFS:*:*)
+	echo x86_64-unknown-onefs
+	exit ;;
+esac
+
+echo "$0: unable to guess system type" >&2
+
+case "$UNAME_MACHINE:$UNAME_SYSTEM" in
+    mips:Linux | mips64:Linux)
+	# If we got here on MIPS GNU/Linux, output extra information.
+	cat >&2 <<EOF
+
+NOTE: MIPS GNU/Linux systems require a C compiler to fully recognize
+the system type. Please install a C compiler and try again.
+EOF
+	;;
+esac
+
+cat >&2 <<EOF
+
+This script (version $timestamp), has failed to recognize the
+operating system you are using. If your script is old, overwrite *all*
+copies of config.guess and config.sub with the latest versions from:
+
+  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+and
+  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+
+If $0 has already been updated, send the following data and any
+information you think might be pertinent to config-patches@gnu.org to
+provide the necessary information to handle your system.
+
+config.guess timestamp = $timestamp
+
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
+
+hostinfo               = `(hostinfo) 2>/dev/null`
+/bin/universe          = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch              = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+
+UNAME_MACHINE = "$UNAME_MACHINE"
+UNAME_RELEASE = "$UNAME_RELEASE"
+UNAME_SYSTEM  = "$UNAME_SYSTEM"
+UNAME_VERSION = "$UNAME_VERSION"
+EOF
+
+exit 1
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/config.sub b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/config.sub
new file mode 100755
index 000000000..9ccf09a7a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/config.sub
@@ -0,0 +1,1801 @@
+#! /bin/sh
+# Configuration validation subroutine script.
+#   Copyright 1992-2018 Free Software Foundation, Inc.
+
+timestamp='2018-03-08'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <https://www.gnu.org/licenses/>.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that
+# program.  This Exception is an additional permission under section 7
+# of the GNU General Public License, version 3 ("GPLv3").
+
+
+# Please send patches to <config-patches@gnu.org>.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+
+# You can get the latest version of this script from:
+# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support.  The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
+
+Canonicalize a configuration name.
+
+Options:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.sub ($timestamp)
+
+Copyright 1992-2018 Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help"
+       exit 1 ;;
+
+    *local*)
+       # First pass through any local machine types.
+       echo "$1"
+       exit ;;
+
+    * )
+       break ;;
+  esac
+done
+
+case $# in
+ 0) echo "$me: missing argument$help" >&2
+    exit 1;;
+ 1) ;;
+ *) echo "$me: too many arguments$help" >&2
+    exit 1;;
+esac
+
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
+  linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+  knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \
+  kopensolaris*-gnu* | cloudabi*-eabi* | \
+  storm-chaos* | os2-emx* | rtmk-nova*)
+    os=-$maybe_os
+    basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+    ;;
+  android-linux)
+    os=-linux-android
+    basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+    ;;
+  *)
+    basic_machine=`echo "$1" | sed 's/-[^-]*$//'`
+    if [ "$basic_machine" != "$1" ]
+    then os=`echo "$1" | sed 's/.*-/-/'`
+    else os=; fi
+    ;;
+esac
+
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work.  We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+	-sun*os*)
+		# Prevent following clause from handling this invalid input.
+		;;
+	-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+	-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+	-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+	-apple | -axis | -knuth | -cray | -microblaze*)
+		os=
+		basic_machine=$1
+		;;
+	-bluegene*)
+		os=-cnk
+		;;
+	-sim | -cisco | -oki | -wec | -winbond)
+		os=
+		basic_machine=$1
+		;;
+	-scout)
+		;;
+	-wrs)
+		os=-vxworks
+		basic_machine=$1
+		;;
+	-chorusos*)
+		os=-chorusos
+		basic_machine=$1
+		;;
+	-chorusrdb)
+		os=-chorusrdb
+		basic_machine=$1
+		;;
+	-hiux*)
+		os=-hiuxwe2
+		;;
+	-sco6)
+		os=-sco5v6
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco5)
+		os=-sco3.2v5
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco4)
+		os=-sco3.2v4
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2.[4-9]*)
+		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2v[4-9]*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco5v6*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco*)
+		os=-sco3.2v2
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-udk*)
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-isc)
+		os=-isc2.2
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-clix*)
+		basic_machine=clipper-intergraph
+		;;
+	-isc*)
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+		;;
+	-lynx*178)
+		os=-lynxos178
+		;;
+	-lynx*5)
+		os=-lynxos5
+		;;
+	-lynx*)
+		os=-lynxos
+		;;
+	-ptx*)
+		basic_machine=`echo "$1" | sed -e 's/86-.*/86-sequent/'`
+		;;
+	-psos*)
+		os=-psos
+		;;
+	-mint | -mint[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+esac
+
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+	# Recognize the basic CPU types without company name.
+	# Some are omitted here because they have special meanings below.
+	1750a | 580 \
+	| a29k \
+	| aarch64 | aarch64_be \
+	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
+	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+	| am33_2.0 \
+	| arc | arceb \
+	| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
+	| avr | avr32 \
+	| ba \
+	| be32 | be64 \
+	| bfin \
+	| c4x | c8051 | clipper \
+	| d10v | d30v | dlx | dsp16xx \
+	| e2k | epiphany \
+	| fido | fr30 | frv | ft32 \
+	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| hexagon \
+	| i370 | i860 | i960 | ia16 | ia64 \
+	| ip2k | iq2000 \
+	| k1om \
+	| le32 | le64 \
+	| lm32 \
+	| m32c | m32r | m32rle | m68000 | m68k | m88k \
+	| maxq | mb | microblaze | microblazeel | mcore | mep | metag \
+	| mips | mipsbe | mipseb | mipsel | mipsle \
+	| mips16 \
+	| mips64 | mips64el \
+	| mips64octeon | mips64octeonel \
+	| mips64orion | mips64orionel \
+	| mips64r5900 | mips64r5900el \
+	| mips64vr | mips64vrel \
+	| mips64vr4100 | mips64vr4100el \
+	| mips64vr4300 | mips64vr4300el \
+	| mips64vr5000 | mips64vr5000el \
+	| mips64vr5900 | mips64vr5900el \
+	| mipsisa32 | mipsisa32el \
+	| mipsisa32r2 | mipsisa32r2el \
+	| mipsisa32r6 | mipsisa32r6el \
+	| mipsisa64 | mipsisa64el \
+	| mipsisa64r2 | mipsisa64r2el \
+	| mipsisa64r6 | mipsisa64r6el \
+	| mipsisa64sb1 | mipsisa64sb1el \
+	| mipsisa64sr71k | mipsisa64sr71kel \
+	| mipsr5900 | mipsr5900el \
+	| mipstx39 | mipstx39el \
+	| mn10200 | mn10300 \
+	| moxie \
+	| mt \
+	| msp430 \
+	| nds32 | nds32le | nds32be \
+	| nios | nios2 | nios2eb | nios2el \
+	| ns16k | ns32k \
+	| open8 | or1k | or1knd | or32 \
+	| pdp10 | pj | pjl \
+	| powerpc | powerpc64 | powerpc64le | powerpcle \
+	| pru \
+	| pyramid \
+	| riscv32 | riscv64 \
+	| rl78 | rx \
+	| score \
+	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
+	| sh64 | sh64le \
+	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
+	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
+	| spu \
+	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
+	| ubicom32 \
+	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
+	| visium \
+	| wasm32 \
+	| x86 | xc16x | xstormy16 | xtensa \
+	| z8k | z80)
+		basic_machine=$basic_machine-unknown
+		;;
+	c54x)
+		basic_machine=tic54x-unknown
+		;;
+	c55x)
+		basic_machine=tic55x-unknown
+		;;
+	c6x)
+		basic_machine=tic6x-unknown
+		;;
+	leon|leon[3-9])
+		basic_machine=sparc-$basic_machine
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65)
+		;;
+	ms1)
+		basic_machine=mt-unknown
+		;;
+
+	strongarm | thumb | xscale)
+		basic_machine=arm-unknown
+		;;
+	xgate)
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	xscaleeb)
+		basic_machine=armeb-unknown
+		;;
+
+	xscaleel)
+		basic_machine=armel-unknown
+		;;
+
+	# We use `pc' rather than `unknown'
+	# because (1) that's what they normally are, and
+	# (2) the word "unknown" tends to confuse beginning users.
+	i*86 | x86_64)
+	  basic_machine=$basic_machine-pc
+	  ;;
+	# Object if more than one company name word.
+	*-*-*)
+		echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2
+		exit 1
+		;;
+	# Recognize the basic CPU types with company name.
+	580-* \
+	| a29k-* \
+	| aarch64-* | aarch64_be-* \
+	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
+	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
+	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
+	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+	| avr-* | avr32-* \
+	| ba-* \
+	| be32-* | be64-* \
+	| bfin-* | bs2000-* \
+	| c[123]* | c30-* | [cjt]90-* | c4x-* \
+	| c8051-* | clipper-* | craynv-* | cydra-* \
+	| d10v-* | d30v-* | dlx-* \
+	| e2k-* | elxsi-* \
+	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
+	| h8300-* | h8500-* \
+	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| hexagon-* \
+	| i*86-* | i860-* | i960-* | ia16-* | ia64-* \
+	| ip2k-* | iq2000-* \
+	| k1om-* \
+	| le32-* | le64-* \
+	| lm32-* \
+	| m32c-* | m32r-* | m32rle-* \
+	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
+	| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
+	| microblaze-* | microblazeel-* \
+	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
+	| mips16-* \
+	| mips64-* | mips64el-* \
+	| mips64octeon-* | mips64octeonel-* \
+	| mips64orion-* | mips64orionel-* \
+	| mips64r5900-* | mips64r5900el-* \
+	| mips64vr-* | mips64vrel-* \
+	| mips64vr4100-* | mips64vr4100el-* \
+	| mips64vr4300-* | mips64vr4300el-* \
+	| mips64vr5000-* | mips64vr5000el-* \
+	| mips64vr5900-* | mips64vr5900el-* \
+	| mipsisa32-* | mipsisa32el-* \
+	| mipsisa32r2-* | mipsisa32r2el-* \
+	| mipsisa32r6-* | mipsisa32r6el-* \
+	| mipsisa64-* | mipsisa64el-* \
+	| mipsisa64r2-* | mipsisa64r2el-* \
+	| mipsisa64r6-* | mipsisa64r6el-* \
+	| mipsisa64sb1-* | mipsisa64sb1el-* \
+	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
+	| mipsr5900-* | mipsr5900el-* \
+	| mipstx39-* | mipstx39el-* \
+	| mmix-* \
+	| mt-* \
+	| msp430-* \
+	| nds32-* | nds32le-* | nds32be-* \
+	| nios-* | nios2-* | nios2eb-* | nios2el-* \
+	| none-* | np1-* | ns16k-* | ns32k-* \
+	| open8-* \
+	| or1k*-* \
+	| orion-* \
+	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
+	| pru-* \
+	| pyramid-* \
+	| riscv32-* | riscv64-* \
+	| rl78-* | romp-* | rs6000-* | rx-* \
+	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
+	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
+	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
+	| sparclite-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \
+	| tahoe-* \
+	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+	| tile*-* \
+	| tron-* \
+	| ubicom32-* \
+	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
+	| vax-* \
+	| visium-* \
+	| wasm32-* \
+	| we32k-* \
+	| x86-* | x86_64-* | xc16x-* | xps100-* \
+	| xstormy16-* | xtensa*-* \
+	| ymp-* \
+	| z8k-* | z80-*)
+		;;
+	# Recognize the basic CPU types without company name, with glob match.
+	xtensa*)
+		basic_machine=$basic_machine-unknown
+		;;
+	# Recognize the various machine names and aliases which stand
+	# for a CPU type and a company and sometimes even an OS.
+	386bsd)
+		basic_machine=i386-pc
+		os=-bsd
+		;;
+	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+		basic_machine=m68000-att
+		;;
+	3b*)
+		basic_machine=we32k-att
+		;;
+	a29khif)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	abacus)
+		basic_machine=abacus-unknown
+		;;
+	adobe68k)
+		basic_machine=m68010-adobe
+		os=-scout
+		;;
+	alliant | fx80)
+		basic_machine=fx80-alliant
+		;;
+	altos | altos3068)
+		basic_machine=m68k-altos
+		;;
+	am29k)
+		basic_machine=a29k-none
+		os=-bsd
+		;;
+	amd64)
+		basic_machine=x86_64-pc
+		;;
+	amd64-*)
+		basic_machine=x86_64-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	amdahl)
+		basic_machine=580-amdahl
+		os=-sysv
+		;;
+	amiga | amiga-*)
+		basic_machine=m68k-unknown
+		;;
+	amigaos | amigados)
+		basic_machine=m68k-unknown
+		os=-amigaos
+		;;
+	amigaunix | amix)
+		basic_machine=m68k-unknown
+		os=-sysv4
+		;;
+	apollo68)
+		basic_machine=m68k-apollo
+		os=-sysv
+		;;
+	apollo68bsd)
+		basic_machine=m68k-apollo
+		os=-bsd
+		;;
+	aros)
+		basic_machine=i386-pc
+		os=-aros
+		;;
+	asmjs)
+		basic_machine=asmjs-unknown
+		;;
+	aux)
+		basic_machine=m68k-apple
+		os=-aux
+		;;
+	balance)
+		basic_machine=ns32k-sequent
+		os=-dynix
+		;;
+	blackfin)
+		basic_machine=bfin-unknown
+		os=-linux
+		;;
+	blackfin-*)
+		basic_machine=bfin-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	bluegene*)
+		basic_machine=powerpc-ibm
+		os=-cnk
+		;;
+	c54x-*)
+		basic_machine=tic54x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	c55x-*)
+		basic_machine=tic55x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	c6x-*)
+		basic_machine=tic6x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	c90)
+		basic_machine=c90-cray
+		os=-unicos
+		;;
+	cegcc)
+		basic_machine=arm-unknown
+		os=-cegcc
+		;;
+	convex-c1)
+		basic_machine=c1-convex
+		os=-bsd
+		;;
+	convex-c2)
+		basic_machine=c2-convex
+		os=-bsd
+		;;
+	convex-c32)
+		basic_machine=c32-convex
+		os=-bsd
+		;;
+	convex-c34)
+		basic_machine=c34-convex
+		os=-bsd
+		;;
+	convex-c38)
+		basic_machine=c38-convex
+		os=-bsd
+		;;
+	cray | j90)
+		basic_machine=j90-cray
+		os=-unicos
+		;;
+	craynv)
+		basic_machine=craynv-cray
+		os=-unicosmp
+		;;
+	cr16 | cr16-*)
+		basic_machine=cr16-unknown
+		os=-elf
+		;;
+	crds | unos)
+		basic_machine=m68k-crds
+		;;
+	crisv32 | crisv32-* | etraxfs*)
+		basic_machine=crisv32-axis
+		;;
+	cris | cris-* | etrax*)
+		basic_machine=cris-axis
+		;;
+	crx)
+		basic_machine=crx-unknown
+		os=-elf
+		;;
+	da30 | da30-*)
+		basic_machine=m68k-da30
+		;;
+	decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+		basic_machine=mips-dec
+		;;
+	decsystem10* | dec10*)
+		basic_machine=pdp10-dec
+		os=-tops10
+		;;
+	decsystem20* | dec20*)
+		basic_machine=pdp10-dec
+		os=-tops20
+		;;
+	delta | 3300 | motorola-3300 | motorola-delta \
+	      | 3300-motorola | delta-motorola)
+		basic_machine=m68k-motorola
+		;;
+	delta88)
+		basic_machine=m88k-motorola
+		os=-sysv3
+		;;
+	dicos)
+		basic_machine=i686-pc
+		os=-dicos
+		;;
+	djgpp)
+		basic_machine=i586-pc
+		os=-msdosdjgpp
+		;;
+	dpx20 | dpx20-*)
+		basic_machine=rs6000-bull
+		os=-bosx
+		;;
+	dpx2*)
+		basic_machine=m68k-bull
+		os=-sysv3
+		;;
+	e500v[12])
+		basic_machine=powerpc-unknown
+		os=$os"spe"
+		;;
+	e500v[12]-*)
+		basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		os=$os"spe"
+		;;
+	ebmon29k)
+		basic_machine=a29k-amd
+		os=-ebmon
+		;;
+	elxsi)
+		basic_machine=elxsi-elxsi
+		os=-bsd
+		;;
+	encore | umax | mmax)
+		basic_machine=ns32k-encore
+		;;
+	es1800 | OSE68k | ose68k | ose | OSE)
+		basic_machine=m68k-ericsson
+		os=-ose
+		;;
+	fx2800)
+		basic_machine=i860-alliant
+		;;
+	genix)
+		basic_machine=ns32k-ns
+		;;
+	gmicro)
+		basic_machine=tron-gmicro
+		os=-sysv
+		;;
+	go32)
+		basic_machine=i386-pc
+		os=-go32
+		;;
+	h3050r* | hiux*)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	h8300hms)
+		basic_machine=h8300-hitachi
+		os=-hms
+		;;
+	h8300xray)
+		basic_machine=h8300-hitachi
+		os=-xray
+		;;
+	h8500hms)
+		basic_machine=h8500-hitachi
+		os=-hms
+		;;
+	harris)
+		basic_machine=m88k-harris
+		os=-sysv3
+		;;
+	hp300-*)
+		basic_machine=m68k-hp
+		;;
+	hp300bsd)
+		basic_machine=m68k-hp
+		os=-bsd
+		;;
+	hp300hpux)
+		basic_machine=m68k-hp
+		os=-hpux
+		;;
+	hp3k9[0-9][0-9] | hp9[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k2[0-9][0-9] | hp9k31[0-9])
+		basic_machine=m68000-hp
+		;;
+	hp9k3[2-9][0-9])
+		basic_machine=m68k-hp
+		;;
+	hp9k6[0-9][0-9] | hp6[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k7[0-79][0-9] | hp7[0-79][0-9])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k78[0-9] | hp78[0-9])
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][13679] | hp8[0-9][13679])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][0-9] | hp8[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hppaosf)
+		basic_machine=hppa1.1-hp
+		os=-osf
+		;;
+	hppro)
+		basic_machine=hppa1.1-hp
+		os=-proelf
+		;;
+	i370-ibm* | ibm*)
+		basic_machine=i370-ibm
+		;;
+	i*86v32)
+		basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+		os=-sysv32
+		;;
+	i*86v4*)
+		basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+		os=-sysv4
+		;;
+	i*86v)
+		basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+		os=-sysv
+		;;
+	i*86sol2)
+		basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+		os=-solaris2
+		;;
+	i386mach)
+		basic_machine=i386-mach
+		os=-mach
+		;;
+	vsta)
+		basic_machine=i386-unknown
+		os=-vsta
+		;;
+	iris | iris4d)
+		basic_machine=mips-sgi
+		case $os in
+		    -irix*)
+			;;
+		    *)
+			os=-irix4
+			;;
+		esac
+		;;
+	isi68 | isi)
+		basic_machine=m68k-isi
+		os=-sysv
+		;;
+	leon-*|leon[3-9]-*)
+		basic_machine=sparc-`echo "$basic_machine" | sed 's/-.*//'`
+		;;
+	m68knommu)
+		basic_machine=m68k-unknown
+		os=-linux
+		;;
+	m68knommu-*)
+		basic_machine=m68k-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	magnum | m3230)
+		basic_machine=mips-mips
+		os=-sysv
+		;;
+	merlin)
+		basic_machine=ns32k-utek
+		os=-sysv
+		;;
+	microblaze*)
+		basic_machine=microblaze-xilinx
+		;;
+	mingw64)
+		basic_machine=x86_64-pc
+		os=-mingw64
+		;;
+	mingw32)
+		basic_machine=i686-pc
+		os=-mingw32
+		;;
+	mingw32ce)
+		basic_machine=arm-unknown
+		os=-mingw32ce
+		;;
+	miniframe)
+		basic_machine=m68000-convergent
+		;;
+	*mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+	mips3*-*)
+		basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`
+		;;
+	mips3*)
+		basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`-unknown
+		;;
+	monitor)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	morphos)
+		basic_machine=powerpc-unknown
+		os=-morphos
+		;;
+	moxiebox)
+		basic_machine=moxie-unknown
+		os=-moxiebox
+		;;
+	msdos)
+		basic_machine=i386-pc
+		os=-msdos
+		;;
+	ms1-*)
+		basic_machine=`echo "$basic_machine" | sed -e 's/ms1-/mt-/'`
+		;;
+	msys)
+		basic_machine=i686-pc
+		os=-msys
+		;;
+	mvs)
+		basic_machine=i370-ibm
+		os=-mvs
+		;;
+	nacl)
+		basic_machine=le32-unknown
+		os=-nacl
+		;;
+	ncr3000)
+		basic_machine=i486-ncr
+		os=-sysv4
+		;;
+	netbsd386)
+		basic_machine=i386-unknown
+		os=-netbsd
+		;;
+	netwinder)
+		basic_machine=armv4l-rebel
+		os=-linux
+		;;
+	news | news700 | news800 | news900)
+		basic_machine=m68k-sony
+		os=-newsos
+		;;
+	news1000)
+		basic_machine=m68030-sony
+		os=-newsos
+		;;
+	news-3600 | risc-news)
+		basic_machine=mips-sony
+		os=-newsos
+		;;
+	necv70)
+		basic_machine=v70-nec
+		os=-sysv
+		;;
+	next | m*-next)
+		basic_machine=m68k-next
+		case $os in
+		    -nextstep* )
+			;;
+		    -ns2*)
+		      os=-nextstep2
+			;;
+		    *)
+		      os=-nextstep3
+			;;
+		esac
+		;;
+	nh3000)
+		basic_machine=m68k-harris
+		os=-cxux
+		;;
+	nh[45]000)
+		basic_machine=m88k-harris
+		os=-cxux
+		;;
+	nindy960)
+		basic_machine=i960-intel
+		os=-nindy
+		;;
+	mon960)
+		basic_machine=i960-intel
+		os=-mon960
+		;;
+	nonstopux)
+		basic_machine=mips-compaq
+		os=-nonstopux
+		;;
+	np1)
+		basic_machine=np1-gould
+		;;
+	neo-tandem)
+		basic_machine=neo-tandem
+		;;
+	nse-tandem)
+		basic_machine=nse-tandem
+		;;
+	nsr-tandem)
+		basic_machine=nsr-tandem
+		;;
+	nsv-tandem)
+		basic_machine=nsv-tandem
+		;;
+	nsx-tandem)
+		basic_machine=nsx-tandem
+		;;
+	op50n-* | op60c-*)
+		basic_machine=hppa1.1-oki
+		os=-proelf
+		;;
+	openrisc | openrisc-*)
+		basic_machine=or32-unknown
+		;;
+	os400)
+		basic_machine=powerpc-ibm
+		os=-os400
+		;;
+	OSE68000 | ose68000)
+		basic_machine=m68000-ericsson
+		os=-ose
+		;;
+	os68k)
+		basic_machine=m68k-none
+		os=-os68k
+		;;
+	pa-hitachi)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	paragon)
+		basic_machine=i860-intel
+		os=-osf
+		;;
+	parisc)
+		basic_machine=hppa-unknown
+		os=-linux
+		;;
+	parisc-*)
+		basic_machine=hppa-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	pbd)
+		basic_machine=sparc-tti
+		;;
+	pbb)
+		basic_machine=m68k-tti
+		;;
+	pc532 | pc532-*)
+		basic_machine=ns32k-pc532
+		;;
+	pc98)
+		basic_machine=i386-pc
+		;;
+	pc98-*)
+		basic_machine=i386-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pentium | p5 | k5 | k6 | nexgen | viac3)
+		basic_machine=i586-pc
+		;;
+	pentiumpro | p6 | 6x86 | athlon | athlon_*)
+		basic_machine=i686-pc
+		;;
+	pentiumii | pentium2 | pentiumiii | pentium3)
+		basic_machine=i686-pc
+		;;
+	pentium4)
+		basic_machine=i786-pc
+		;;
+	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+		basic_machine=i586-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pentiumpro-* | p6-* | 6x86-* | athlon-*)
+		basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+		basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pentium4-*)
+		basic_machine=i786-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	pn)
+		basic_machine=pn-gould
+		;;
+	power)	basic_machine=power-ibm
+		;;
+	ppc | ppcbe)	basic_machine=powerpc-unknown
+		;;
+	ppc-* | ppcbe-*)
+		basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	ppcle | powerpclittle)
+		basic_machine=powerpcle-unknown
+		;;
+	ppcle-* | powerpclittle-*)
+		basic_machine=powerpcle-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	ppc64)	basic_machine=powerpc64-unknown
+		;;
+	ppc64-*) basic_machine=powerpc64-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	ppc64le | powerpc64little)
+		basic_machine=powerpc64le-unknown
+		;;
+	ppc64le-* | powerpc64little-*)
+		basic_machine=powerpc64le-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	ps2)
+		basic_machine=i386-ibm
+		;;
+	pw32)
+		basic_machine=i586-unknown
+		os=-pw32
+		;;
+	rdos | rdos64)
+		basic_machine=x86_64-pc
+		os=-rdos
+		;;
+	rdos32)
+		basic_machine=i386-pc
+		os=-rdos
+		;;
+	rom68k)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	rm[46]00)
+		basic_machine=mips-siemens
+		;;
+	rtpc | rtpc-*)
+		basic_machine=romp-ibm
+		;;
+	s390 | s390-*)
+		basic_machine=s390-ibm
+		;;
+	s390x | s390x-*)
+		basic_machine=s390x-ibm
+		;;
+	sa29200)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	sb1)
+		basic_machine=mipsisa64sb1-unknown
+		;;
+	sb1el)
+		basic_machine=mipsisa64sb1el-unknown
+		;;
+	sde)
+		basic_machine=mipsisa32-sde
+		os=-elf
+		;;
+	sei)
+		basic_machine=mips-sei
+		os=-seiux
+		;;
+	sequent)
+		basic_machine=i386-sequent
+		;;
+	sh5el)
+		basic_machine=sh5le-unknown
+		;;
+	simso-wrs)
+		basic_machine=sparclite-wrs
+		os=-vxworks
+		;;
+	sps7)
+		basic_machine=m68k-bull
+		os=-sysv2
+		;;
+	spur)
+		basic_machine=spur-unknown
+		;;
+	st2000)
+		basic_machine=m68k-tandem
+		;;
+	stratus)
+		basic_machine=i860-stratus
+		os=-sysv4
+		;;
+	strongarm-* | thumb-*)
+		basic_machine=arm-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+		;;
+	sun2)
+		basic_machine=m68000-sun
+		;;
+	sun2os3)
+		basic_machine=m68000-sun
+		os=-sunos3
+		;;
+	sun2os4)
+		basic_machine=m68000-sun
+		os=-sunos4
+		;;
+	sun3os3)
+		basic_machine=m68k-sun
+		os=-sunos3
+		;;
+	sun3os4)
+		basic_machine=m68k-sun
+		os=-sunos4
+		;;
+	sun4os3)
+		basic_machine=sparc-sun
+		os=-sunos3
+		;;
+	sun4os4)
+		basic_machine=sparc-sun
+		os=-sunos4
+		;;
+	sun4sol2)
+		basic_machine=sparc-sun
+		os=-solaris2
+		;;
+	sun3 | sun3-*)
+		basic_machine=m68k-sun
+		;;
+	sun4)
+		basic_machine=sparc-sun
+		;;
+	sun386 | sun386i | roadrunner)
+		basic_machine=i386-sun
+		;;
+	sv1)
+		basic_machine=sv1-cray
+		os=-unicos
+		;;
+	symmetry)
+		basic_machine=i386-sequent
+		os=-dynix
+		;;
+	t3e)
+		basic_machine=alphaev5-cray
+		os=-unicos
+		;;
+	t90)
+		basic_machine=t90-cray
+		os=-unicos
+		;;
+	tile*)
+		basic_machine=$basic_machine-unknown
+		os=-linux-gnu
+		;;
+	tx39)
+		basic_machine=mipstx39-unknown
+		;;
+	tx39el)
+		basic_machine=mipstx39el-unknown
+		;;
+	toad1)
+		basic_machine=pdp10-xkl
+		os=-tops20
+		;;
+	tower | tower-32)
+		basic_machine=m68k-ncr
+		;;
+	tpf)
+		basic_machine=s390x-ibm
+		os=-tpf
+		;;
+	udi29k)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	ultra3)
+		basic_machine=a29k-nyu
+		os=-sym1
+		;;
+	v810 | necv810)
+		basic_machine=v810-nec
+		os=-none
+		;;
+	vaxv)
+		basic_machine=vax-dec
+		os=-sysv
+		;;
+	vms)
+		basic_machine=vax-dec
+		os=-vms
+		;;
+	vpp*|vx|vx-*)
+		basic_machine=f301-fujitsu
+		;;
+	vxworks960)
+		basic_machine=i960-wrs
+		os=-vxworks
+		;;
+	vxworks68)
+		basic_machine=m68k-wrs
+		os=-vxworks
+		;;
+	vxworks29k)
+		basic_machine=a29k-wrs
+		os=-vxworks
+		;;
+	w65*)
+		basic_machine=w65-wdc
+		os=-none
+		;;
+	w89k-*)
+		basic_machine=hppa1.1-winbond
+		os=-proelf
+		;;
+	x64)
+		basic_machine=x86_64-pc
+		;;
+	xbox)
+		basic_machine=i686-pc
+		os=-mingw32
+		;;
+	xps | xps100)
+		basic_machine=xps100-honeywell
+		;;
+	xscale-* | xscalee[bl]-*)
+		basic_machine=`echo "$basic_machine" | sed 's/^xscale/arm/'`
+		;;
+	ymp)
+		basic_machine=ymp-cray
+		os=-unicos
+		;;
+	none)
+		basic_machine=none-none
+		os=-none
+		;;
+
+# Here we handle the default manufacturer of certain CPU types.  It is in
+# some cases the only manufacturer, in others, it is the most popular.
+	w89k)
+		basic_machine=hppa1.1-winbond
+		;;
+	op50n)
+		basic_machine=hppa1.1-oki
+		;;
+	op60c)
+		basic_machine=hppa1.1-oki
+		;;
+	romp)
+		basic_machine=romp-ibm
+		;;
+	mmix)
+		basic_machine=mmix-knuth
+		;;
+	rs6000)
+		basic_machine=rs6000-ibm
+		;;
+	vax)
+		basic_machine=vax-dec
+		;;
+	pdp11)
+		basic_machine=pdp11-dec
+		;;
+	we32k)
+		basic_machine=we32k-att
+		;;
+	sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
+		basic_machine=sh-unknown
+		;;
+	cydra)
+		basic_machine=cydra-cydrome
+		;;
+	orion)
+		basic_machine=orion-highlevel
+		;;
+	orion105)
+		basic_machine=clipper-highlevel
+		;;
+	mac | mpw | mac-mpw)
+		basic_machine=m68k-apple
+		;;
+	pmac | pmac-mpw)
+		basic_machine=powerpc-apple
+		;;
+	*-unknown)
+		# Make sure to match an already-canonicalized machine name.
+		;;
+	*)
+		echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2
+		exit 1
+		;;
+esac
+
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+	*-digital*)
+		basic_machine=`echo "$basic_machine" | sed 's/digital.*/dec/'`
+		;;
+	*-commodore*)
+		basic_machine=`echo "$basic_machine" | sed 's/commodore.*/cbm/'`
+		;;
+	*)
+		;;
+esac
+
+# Decode manufacturer-specific aliases for certain operating systems.
+
+if [ x"$os" != x"" ]
+then
+case $os in
+	# First match some system type aliases that might get confused
+	# with valid system types.
+	# -solaris* is a basic system type, with this one exception.
+	-auroraux)
+		os=-auroraux
+		;;
+	-solaris1 | -solaris1.*)
+		os=`echo $os | sed -e 's|solaris1|sunos4|'`
+		;;
+	-solaris)
+		os=-solaris2
+		;;
+	-unixware*)
+		os=-sysv4.2uw
+		;;
+	-gnu/linux*)
+		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+		;;
+	# es1800 is here to avoid being matched by es* (a different OS)
+	-es1800*)
+		os=-ose
+		;;
+	# Now accept the basic system types.
+	# The portable systems comes first.
+	# Each alternative MUST end in a * to match a version number.
+	# -sysv* is not here because it comes later, after sysvr4.
+	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
+	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
+	      | -sym* | -kopensolaris* | -plan9* \
+	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+	      | -aos* | -aros* | -cloudabi* | -sortix* \
+	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+	      | -hiux* | -knetbsd* | -mirbsd* | -netbsd* \
+	      | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \
+	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
+	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* | -hcos* \
+	      | -chorusos* | -chorusrdb* | -cegcc* | -glidix* \
+	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+	      | -linux-newlib* | -linux-musl* | -linux-uclibc* \
+	      | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
+	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* \
+	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
+	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
+	      | -morphos* | -superux* | -rtmk* | -windiss* \
+	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
+	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
+	      | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox* | -bme* \
+	      | -midnightbsd*)
+	# Remember, each alternative MUST END IN *, to match a version number.
+		;;
+	-qnx*)
+		case $basic_machine in
+		    x86-* | i*86-*)
+			;;
+		    *)
+			os=-nto$os
+			;;
+		esac
+		;;
+	-nto-qnx*)
+		;;
+	-nto*)
+		os=`echo $os | sed -e 's|nto|nto-qnx|'`
+		;;
+	-sim | -xray | -os68k* | -v88r* \
+	      | -windows* | -osx | -abug | -netware* | -os9* \
+	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+		;;
+	-mac*)
+		os=`echo "$os" | sed -e 's|mac|macos|'`
+		;;
+	-linux-dietlibc)
+		os=-linux-dietlibc
+		;;
+	-linux*)
+		os=`echo $os | sed -e 's|linux|linux-gnu|'`
+		;;
+	-sunos5*)
+		os=`echo "$os" | sed -e 's|sunos5|solaris2|'`
+		;;
+	-sunos6*)
+		os=`echo "$os" | sed -e 's|sunos6|solaris3|'`
+		;;
+	-opened*)
+		os=-openedition
+		;;
+	-os400*)
+		os=-os400
+		;;
+	-wince*)
+		os=-wince
+		;;
+	-utek*)
+		os=-bsd
+		;;
+	-dynix*)
+		os=-bsd
+		;;
+	-acis*)
+		os=-aos
+		;;
+	-atheos*)
+		os=-atheos
+		;;
+	-syllable*)
+		os=-syllable
+		;;
+	-386bsd)
+		os=-bsd
+		;;
+	-ctix* | -uts*)
+		os=-sysv
+		;;
+	-nova*)
+		os=-rtmk-nova
+		;;
+	-ns2)
+		os=-nextstep2
+		;;
+	-nsk*)
+		os=-nsk
+		;;
+	# Preserve the version number of sinix5.
+	-sinix5.*)
+		os=`echo $os | sed -e 's|sinix|sysv|'`
+		;;
+	-sinix*)
+		os=-sysv4
+		;;
+	-tpf*)
+		os=-tpf
+		;;
+	-triton*)
+		os=-sysv3
+		;;
+	-oss*)
+		os=-sysv3
+		;;
+	-svr4*)
+		os=-sysv4
+		;;
+	-svr3)
+		os=-sysv3
+		;;
+	-sysvr4)
+		os=-sysv4
+		;;
+	# This must come after -sysvr4.
+	-sysv*)
+		;;
+	-ose*)
+		os=-ose
+		;;
+	-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+		os=-mint
+		;;
+	-zvmoe)
+		os=-zvmoe
+		;;
+	-dicos*)
+		os=-dicos
+		;;
+	-pikeos*)
+		# Until real need of OS specific support for
+		# particular features comes up, bare metal
+		# configurations are quite functional.
+		case $basic_machine in
+		    arm*)
+			os=-eabi
+			;;
+		    *)
+			os=-elf
+			;;
+		esac
+		;;
+	-nacl*)
+		;;
+	-ios)
+		;;
+	-none)
+		;;
+	*)
+		# Get rid of the `-' at the beginning of $os.
+		os=`echo $os | sed 's/[^-]*-//'`
+		echo Invalid configuration \`"$1"\': system \`"$os"\' not recognized 1>&2
+		exit 1
+		;;
+esac
+else
+
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system.  Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+
+case $basic_machine in
+	score-*)
+		os=-elf
+		;;
+	spu-*)
+		os=-elf
+		;;
+	*-acorn)
+		os=-riscix1.2
+		;;
+	arm*-rebel)
+		os=-linux
+		;;
+	arm*-semi)
+		os=-aout
+		;;
+	c4x-* | tic4x-*)
+		os=-coff
+		;;
+	c8051-*)
+		os=-elf
+		;;
+	hexagon-*)
+		os=-elf
+		;;
+	tic54x-*)
+		os=-coff
+		;;
+	tic55x-*)
+		os=-coff
+		;;
+	tic6x-*)
+		os=-coff
+		;;
+	# This must come before the *-dec entry.
+	pdp10-*)
+		os=-tops20
+		;;
+	pdp11-*)
+		os=-none
+		;;
+	*-dec | vax-*)
+		os=-ultrix4.2
+		;;
+	m68*-apollo)
+		os=-domain
+		;;
+	i386-sun)
+		os=-sunos4.0.2
+		;;
+	m68000-sun)
+		os=-sunos3
+		;;
+	m68*-cisco)
+		os=-aout
+		;;
+	mep-*)
+		os=-elf
+		;;
+	mips*-cisco)
+		os=-elf
+		;;
+	mips*-*)
+		os=-elf
+		;;
+	or32-*)
+		os=-coff
+		;;
+	*-tti)	# must be before sparc entry or we get the wrong os.
+		os=-sysv3
+		;;
+	sparc-* | *-sun)
+		os=-sunos4.1.1
+		;;
+	pru-*)
+		os=-elf
+		;;
+	*-be)
+		os=-beos
+		;;
+	*-ibm)
+		os=-aix
+		;;
+	*-knuth)
+		os=-mmixware
+		;;
+	*-wec)
+		os=-proelf
+		;;
+	*-winbond)
+		os=-proelf
+		;;
+	*-oki)
+		os=-proelf
+		;;
+	*-hp)
+		os=-hpux
+		;;
+	*-hitachi)
+		os=-hiux
+		;;
+	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+		os=-sysv
+		;;
+	*-cbm)
+		os=-amigaos
+		;;
+	*-dg)
+		os=-dgux
+		;;
+	*-dolphin)
+		os=-sysv3
+		;;
+	m68k-ccur)
+		os=-rtu
+		;;
+	m88k-omron*)
+		os=-luna
+		;;
+	*-next)
+		os=-nextstep
+		;;
+	*-sequent)
+		os=-ptx
+		;;
+	*-crds)
+		os=-unos
+		;;
+	*-ns)
+		os=-genix
+		;;
+	i370-*)
+		os=-mvs
+		;;
+	*-gould)
+		os=-sysv
+		;;
+	*-highlevel)
+		os=-bsd
+		;;
+	*-encore)
+		os=-bsd
+		;;
+	*-sgi)
+		os=-irix
+		;;
+	*-siemens)
+		os=-sysv4
+		;;
+	*-masscomp)
+		os=-rtu
+		;;
+	f30[01]-fujitsu | f700-fujitsu)
+		os=-uxpv
+		;;
+	*-rom68k)
+		os=-coff
+		;;
+	*-*bug)
+		os=-coff
+		;;
+	*-apple)
+		os=-macos
+		;;
+	*-atari*)
+		os=-mint
+		;;
+	*)
+		os=-none
+		;;
+esac
+fi
+
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer.  We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+	*-unknown)
+		case $os in
+			-riscix*)
+				vendor=acorn
+				;;
+			-sunos*)
+				vendor=sun
+				;;
+			-cnk*|-aix*)
+				vendor=ibm
+				;;
+			-beos*)
+				vendor=be
+				;;
+			-hpux*)
+				vendor=hp
+				;;
+			-mpeix*)
+				vendor=hp
+				;;
+			-hiux*)
+				vendor=hitachi
+				;;
+			-unos*)
+				vendor=crds
+				;;
+			-dgux*)
+				vendor=dg
+				;;
+			-luna*)
+				vendor=omron
+				;;
+			-genix*)
+				vendor=ns
+				;;
+			-mvs* | -opened*)
+				vendor=ibm
+				;;
+			-os400*)
+				vendor=ibm
+				;;
+			-ptx*)
+				vendor=sequent
+				;;
+			-tpf*)
+				vendor=ibm
+				;;
+			-vxsim* | -vxworks* | -windiss*)
+				vendor=wrs
+				;;
+			-aux*)
+				vendor=apple
+				;;
+			-hms*)
+				vendor=hitachi
+				;;
+			-mpw* | -macos*)
+				vendor=apple
+				;;
+			-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+				vendor=atari
+				;;
+			-vos*)
+				vendor=stratus
+				;;
+		esac
+		basic_machine=`echo "$basic_machine" | sed "s/unknown/$vendor/"`
+		;;
+esac
+
+echo "$basic_machine$os"
+exit
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/configure b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/configure
new file mode 100755
index 000000000..ed0b4faa0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/configure
@@ -0,0 +1,6161 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.69 for hpl 2.3.
+#
+# Report bugs to <hpl@icl.utk.edu>.
+#
+#
+# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
+#
+#
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+    && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='print -r --'
+  as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='printf %s\n'
+  as_echo_n='printf %s'
+else
+  if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+    as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+    as_echo_n='/usr/ucb/echo -n'
+  else
+    as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+    as_echo_n_body='eval
+      arg=$1;
+      case $arg in #(
+      *"$as_nl"*)
+	expr "X$arg" : "X\\(.*\\)$as_nl";
+	arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+      esac;
+      expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+    '
+    export as_echo_n_body
+    as_echo_n='sh -c $as_echo_n_body as_echo'
+  fi
+  export as_echo_body
+  as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh).  But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there.  '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+# Use a proper internal environment variable to ensure we don't fall
+  # into an infinite loop, continuously re-executing ourselves.
+  if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
+    _as_can_reexec=no; export _as_can_reexec;
+    # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+as_fn_exit 255
+  fi
+  # We don't want this to propagate to other subprocesses.
+          { _as_can_reexec=; unset _as_can_reexec;}
+if test "x$CONFIG_SHELL" = x; then
+  as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '\${1+\"\$@\"}'='\"\$@\"'
+  setopt NO_GLOB_SUBST
+else
+  case \`(set -o) 2>/dev/null\` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+"
+  as_required="as_fn_return () { (exit \$1); }
+as_fn_success () { as_fn_return 0; }
+as_fn_failure () { as_fn_return 1; }
+as_fn_ret_success () { return 0; }
+as_fn_ret_failure () { return 1; }
+
+exitcode=0
+as_fn_success || { exitcode=1; echo as_fn_success failed.; }
+as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }
+as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }
+as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }
+if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
+
+else
+  exitcode=1; echo positional parameters were not saved.
+fi
+test x\$exitcode = x0 || exit 1
+test -x / || exit 1"
+  as_suggested="  as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
+  as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
+  eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
+  test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1
+test \$(( 1 + 1 )) = 2 || exit 1"
+  if (eval "$as_required") 2>/dev/null; then :
+  as_have_required=yes
+else
+  as_have_required=no
+fi
+  if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then :
+
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+as_found=false
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  as_found=:
+  case $as_dir in #(
+	 /*)
+	   for as_base in sh bash ksh sh5; do
+	     # Try only shells that exist, to save several forks.
+	     as_shell=$as_dir/$as_base
+	     if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+		    { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then :
+  CONFIG_SHELL=$as_shell as_have_required=yes
+		   if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then :
+  break 2
+fi
+fi
+	   done;;
+       esac
+  as_found=false
+done
+$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } &&
+	      { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then :
+  CONFIG_SHELL=$SHELL as_have_required=yes
+fi; }
+IFS=$as_save_IFS
+
+
+      if test "x$CONFIG_SHELL" != x; then :
+  export CONFIG_SHELL
+             # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
+fi
+
+    if test x$as_have_required = xno; then :
+  $as_echo "$0: This script requires a shell more modern than all"
+  $as_echo "$0: the shells that I found on your system."
+  if test x${ZSH_VERSION+set} = xset ; then
+    $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should"
+    $as_echo "$0: be upgraded to zsh 4.3.4 or later."
+  else
+    $as_echo "$0: Please tell bug-autoconf@gnu.org and hpl@icl.utk.edu
+$0: about your system, including any error possibly output
+$0: before this message. Then install a modern shell, or
+$0: manually run the script under such a shell if you do
+$0: have one."
+  fi
+  exit 1
+fi
+fi
+fi
+SHELL=${CONFIG_SHELL-/bin/sh}
+export SHELL
+# Unset more variables known to interfere with behavior of common tools.
+CLICOLOR_FORCE= GREP_OPTIONS=
+unset CLICOLOR_FORCE GREP_OPTIONS
+
+## --------------------- ##
+## M4sh Shell Functions. ##
+## --------------------- ##
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else
+  as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else
+  as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  }
+fi # as_fn_arith
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+  fi
+  $as_echo "$as_me: error: $2" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+
+  as_lineno_1=$LINENO as_lineno_1a=$LINENO
+  as_lineno_2=$LINENO as_lineno_2a=$LINENO
+  eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
+  test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
+  # Blame Lee E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
+    sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
+      N
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+      t loop
+      s/-\n.*//
+    ' >$as_me.lineno &&
+  chmod +x "$as_me.lineno" ||
+    { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
+
+  # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
+  # already done that, so ensure we don't try to do so again and fall
+  # in an infinite loop.  This has already happened in practice.
+  _as_can_reexec=no; export _as_can_reexec
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
+  # Exit status is that of the last command.
+  exit
+}
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='	';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -pR'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -pR'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -pR'
+  fi
+else
+  as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+test -n "$DJDIR" || exec 7<&0 </dev/null
+exec 6>&1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+
+# Identity of this package.
+PACKAGE_NAME='hpl'
+PACKAGE_TARNAME='hpl'
+PACKAGE_VERSION='2.3'
+PACKAGE_STRING='hpl 2.3'
+PACKAGE_BUGREPORT='hpl@icl.utk.edu'
+PACKAGE_URL=''
+
+ac_unique_file="include/hpl.h"
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# ifdef HAVE_STDLIB_H
+#  include <stdlib.h>
+# endif
+#endif
+#ifdef HAVE_STRING_H
+# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
+#  include <memory.h>
+# endif
+# include <string.h>
+#endif
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+ac_subst_vars='am__EXEEXT_FALSE
+am__EXEEXT_TRUE
+LTLIBOBJS
+LIBOBJS
+EGREP
+GREP
+CPP
+BLAS_LIBS
+AM_BACKSLASH
+AM_DEFAULT_VERBOSITY
+AM_DEFAULT_V
+AM_V
+am__fastdepCC_FALSE
+am__fastdepCC_TRUE
+CCDEPMODE
+am__nodep
+AMDEPBACKSLASH
+AMDEP_FALSE
+AMDEP_TRUE
+am__include
+DEPDIR
+am__untar
+am__tar
+AMTAR
+am__leading_dot
+SET_MAKE
+AWK
+mkdir_p
+MKDIR_P
+INSTALL_STRIP_PROGRAM
+STRIP
+install_sh
+MAKEINFO
+AUTOHEADER
+AUTOMAKE
+AUTOCONF
+ACLOCAL
+VERSION
+PACKAGE
+CYGPATH_W
+am__isrc
+INSTALL_DATA
+INSTALL_SCRIPT
+INSTALL_PROGRAM
+RANLIB
+OBJEXT
+EXEEXT
+CPPFLAGS
+LDFLAGS
+CFLAGS
+ac_ct_CC
+CC
+MPICC
+target_alias
+host_alias
+build_alias
+LIBS
+ECHO_T
+ECHO_N
+ECHO_C
+DEFS
+mandir
+localedir
+libdir
+psdir
+pdfdir
+dvidir
+htmldir
+infodir
+docdir
+oldincludedir
+includedir
+localstatedir
+sharedstatedir
+sysconfdir
+datadir
+datarootdir
+libexecdir
+sbindir
+bindir
+program_transform_name
+prefix
+exec_prefix
+PACKAGE_URL
+PACKAGE_BUGREPORT
+PACKAGE_STRING
+PACKAGE_VERSION
+PACKAGE_TARNAME
+PACKAGE_NAME
+PATH_SEPARATOR
+SHELL
+am__quote'
+ac_subst_files=''
+ac_user_opts='
+enable_option_checking
+enable_dependency_tracking
+enable_silent_rules
+'
+      ac_precious_vars='build_alias
+host_alias
+target_alias
+MPICC
+CC
+CFLAGS
+LDFLAGS
+LIBS
+CPPFLAGS
+CPP'
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+ac_unrecognized_opts=
+ac_unrecognized_sep=
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+  # If the previous option needs an argument, assign it.
+  if test -n "$ac_prev"; then
+    eval $ac_prev=\$ac_option
+    ac_prev=
+    continue
+  fi
+
+  case $ac_option in
+  *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *=)   ac_optarg= ;;
+  *)    ac_optarg=yes ;;
+  esac
+
+  # Accept the important Cygnus configure options, so we can diagnose typos.
+
+  case $ac_dashdash$ac_option in
+  --)
+    ac_dashdash=yes ;;
+
+  -bindir | --bindir | --bindi | --bind | --bin | --bi)
+    ac_prev=bindir ;;
+  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+    bindir=$ac_optarg ;;
+
+  -build | --build | --buil | --bui | --bu)
+    ac_prev=build_alias ;;
+  -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+    build_alias=$ac_optarg ;;
+
+  -cache-file | --cache-file | --cache-fil | --cache-fi \
+  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+    ac_prev=cache_file ;;
+  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+    cache_file=$ac_optarg ;;
+
+  --config-cache | -C)
+    cache_file=config.cache ;;
+
+  -datadir | --datadir | --datadi | --datad)
+    ac_prev=datadir ;;
+  -datadir=* | --datadir=* | --datadi=* | --datad=*)
+    datadir=$ac_optarg ;;
+
+  -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+  | --dataroo | --dataro | --datar)
+    ac_prev=datarootdir ;;
+  -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+  | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+    datarootdir=$ac_optarg ;;
+
+  -disable-* | --disable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid feature name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=no ;;
+
+  -docdir | --docdir | --docdi | --doc | --do)
+    ac_prev=docdir ;;
+  -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+    docdir=$ac_optarg ;;
+
+  -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+    ac_prev=dvidir ;;
+  -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+    dvidir=$ac_optarg ;;
+
+  -enable-* | --enable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid feature name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=\$ac_optarg ;;
+
+  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+  | --exec | --exe | --ex)
+    ac_prev=exec_prefix ;;
+  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+  | --exec=* | --exe=* | --ex=*)
+    exec_prefix=$ac_optarg ;;
+
+  -gas | --gas | --ga | --g)
+    # Obsolete; use --with-gas.
+    with_gas=yes ;;
+
+  -help | --help | --hel | --he | -h)
+    ac_init_help=long ;;
+  -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+    ac_init_help=recursive ;;
+  -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+    ac_init_help=short ;;
+
+  -host | --host | --hos | --ho)
+    ac_prev=host_alias ;;
+  -host=* | --host=* | --hos=* | --ho=*)
+    host_alias=$ac_optarg ;;
+
+  -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+    ac_prev=htmldir ;;
+  -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+  | --ht=*)
+    htmldir=$ac_optarg ;;
+
+  -includedir | --includedir | --includedi | --included | --include \
+  | --includ | --inclu | --incl | --inc)
+    ac_prev=includedir ;;
+  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+  | --includ=* | --inclu=* | --incl=* | --inc=*)
+    includedir=$ac_optarg ;;
+
+  -infodir | --infodir | --infodi | --infod | --info | --inf)
+    ac_prev=infodir ;;
+  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+    infodir=$ac_optarg ;;
+
+  -libdir | --libdir | --libdi | --libd)
+    ac_prev=libdir ;;
+  -libdir=* | --libdir=* | --libdi=* | --libd=*)
+    libdir=$ac_optarg ;;
+
+  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+  | --libexe | --libex | --libe)
+    ac_prev=libexecdir ;;
+  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+  | --libexe=* | --libex=* | --libe=*)
+    libexecdir=$ac_optarg ;;
+
+  -localedir | --localedir | --localedi | --localed | --locale)
+    ac_prev=localedir ;;
+  -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+    localedir=$ac_optarg ;;
+
+  -localstatedir | --localstatedir | --localstatedi | --localstated \
+  | --localstate | --localstat | --localsta | --localst | --locals)
+    ac_prev=localstatedir ;;
+  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+  | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+    localstatedir=$ac_optarg ;;
+
+  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+    ac_prev=mandir ;;
+  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+    mandir=$ac_optarg ;;
+
+  -nfp | --nfp | --nf)
+    # Obsolete; use --without-fp.
+    with_fp=no ;;
+
+  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+  | --no-cr | --no-c | -n)
+    no_create=yes ;;
+
+  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+    no_recursion=yes ;;
+
+  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+  | --oldin | --oldi | --old | --ol | --o)
+    ac_prev=oldincludedir ;;
+  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+    oldincludedir=$ac_optarg ;;
+
+  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+    ac_prev=prefix ;;
+  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+    prefix=$ac_optarg ;;
+
+  -program-prefix | --program-prefix | --program-prefi | --program-pref \
+  | --program-pre | --program-pr | --program-p)
+    ac_prev=program_prefix ;;
+  -program-prefix=* | --program-prefix=* | --program-prefi=* \
+  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+    program_prefix=$ac_optarg ;;
+
+  -program-suffix | --program-suffix | --program-suffi | --program-suff \
+  | --program-suf | --program-su | --program-s)
+    ac_prev=program_suffix ;;
+  -program-suffix=* | --program-suffix=* | --program-suffi=* \
+  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+    program_suffix=$ac_optarg ;;
+
+  -program-transform-name | --program-transform-name \
+  | --program-transform-nam | --program-transform-na \
+  | --program-transform-n | --program-transform- \
+  | --program-transform | --program-transfor \
+  | --program-transfo | --program-transf \
+  | --program-trans | --program-tran \
+  | --progr-tra | --program-tr | --program-t)
+    ac_prev=program_transform_name ;;
+  -program-transform-name=* | --program-transform-name=* \
+  | --program-transform-nam=* | --program-transform-na=* \
+  | --program-transform-n=* | --program-transform-=* \
+  | --program-transform=* | --program-transfor=* \
+  | --program-transfo=* | --program-transf=* \
+  | --program-trans=* | --program-tran=* \
+  | --progr-tra=* | --program-tr=* | --program-t=*)
+    program_transform_name=$ac_optarg ;;
+
+  -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+    ac_prev=pdfdir ;;
+  -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+    pdfdir=$ac_optarg ;;
+
+  -psdir | --psdir | --psdi | --psd | --ps)
+    ac_prev=psdir ;;
+  -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+    psdir=$ac_optarg ;;
+
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil)
+    silent=yes ;;
+
+  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+    ac_prev=sbindir ;;
+  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+  | --sbi=* | --sb=*)
+    sbindir=$ac_optarg ;;
+
+  -sharedstatedir | --sharedstatedir | --sharedstatedi \
+  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+  | --sharedst | --shareds | --shared | --share | --shar \
+  | --sha | --sh)
+    ac_prev=sharedstatedir ;;
+  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+  | --sha=* | --sh=*)
+    sharedstatedir=$ac_optarg ;;
+
+  -site | --site | --sit)
+    ac_prev=site ;;
+  -site=* | --site=* | --sit=*)
+    site=$ac_optarg ;;
+
+  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+    ac_prev=srcdir ;;
+  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+    srcdir=$ac_optarg ;;
+
+  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+  | --syscon | --sysco | --sysc | --sys | --sy)
+    ac_prev=sysconfdir ;;
+  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+    sysconfdir=$ac_optarg ;;
+
+  -target | --target | --targe | --targ | --tar | --ta | --t)
+    ac_prev=target_alias ;;
+  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+    target_alias=$ac_optarg ;;
+
+  -v | -verbose | --verbose | --verbos | --verbo | --verb)
+    verbose=yes ;;
+
+  -version | --version | --versio | --versi | --vers | -V)
+    ac_init_version=: ;;
+
+  -with-* | --with-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid package name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=\$ac_optarg ;;
+
+  -without-* | --without-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid package name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=no ;;
+
+  --x)
+    # Obsolete; use --with-x.
+    with_x=yes ;;
+
+  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+  | --x-incl | --x-inc | --x-in | --x-i)
+    ac_prev=x_includes ;;
+  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+    x_includes=$ac_optarg ;;
+
+  -x-libraries | --x-libraries | --x-librarie | --x-librari \
+  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+    ac_prev=x_libraries ;;
+  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+    x_libraries=$ac_optarg ;;
+
+  -*) as_fn_error $? "unrecognized option: \`$ac_option'
+Try \`$0 --help' for more information"
+    ;;
+
+  *=*)
+    ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+    # Reject names that are not valid shell variable names.
+    case $ac_envvar in #(
+      '' | [0-9]* | *[!_$as_cr_alnum]* )
+      as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
+    esac
+    eval $ac_envvar=\$ac_optarg
+    export $ac_envvar ;;
+
+  *)
+    # FIXME: should be removed in autoconf 3.0.
+    $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+    expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+      $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+    : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}"
+    ;;
+
+  esac
+done
+
+if test -n "$ac_prev"; then
+  ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+  as_fn_error $? "missing argument to $ac_option"
+fi
+
+if test -n "$ac_unrecognized_opts"; then
+  case $enable_option_checking in
+    no) ;;
+    fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
+    *)     $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
+  esac
+fi
+
+# Check all directory arguments for consistency.
+for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
+		datadir sysconfdir sharedstatedir localstatedir includedir \
+		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+		libdir localedir mandir
+do
+  eval ac_val=\$$ac_var
+  # Remove trailing slashes.
+  case $ac_val in
+    */ )
+      ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'`
+      eval $ac_var=\$ac_val;;
+  esac
+  # Be sure to have absolute directory names.
+  case $ac_val in
+    [\\/$]* | ?:[\\/]* )  continue;;
+    NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+  esac
+  as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+  if test "x$build_alias" = x; then
+    cross_compiling=maybe
+  elif test "x$build_alias" != "x$host_alias"; then
+    cross_compiling=yes
+  fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+  as_fn_error $? "working directory cannot be determined"
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+  as_fn_error $? "pwd does not report name of working directory"
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+  ac_srcdir_defaulted=yes
+  # Try the directory containing this script, then the parent directory.
+  ac_confdir=`$as_dirname -- "$as_myself" ||
+$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_myself" : 'X\(//\)[^/]' \| \
+	 X"$as_myself" : 'X\(//\)$' \| \
+	 X"$as_myself" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_myself" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  srcdir=$ac_confdir
+  if test ! -r "$srcdir/$ac_unique_file"; then
+    srcdir=..
+  fi
+else
+  ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+  test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+  as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
+fi
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+	cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
+	pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+  srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+  eval ac_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_env_${ac_var}_value=\$${ac_var}
+  eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+  # Omit some internal or obsolete options to make the list less imposing.
+  # This message is too long to be a string in the A/UX 3.1 sh.
+  cat <<_ACEOF
+\`configure' configures hpl 2.3 to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE.  See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+  -h, --help              display this help and exit
+      --help=short        display options specific to this package
+      --help=recursive    display the short help of all the included packages
+  -V, --version           display version information and exit
+  -q, --quiet, --silent   do not print \`checking ...' messages
+      --cache-file=FILE   cache test results in FILE [disabled]
+  -C, --config-cache      alias for \`--cache-file=config.cache'
+  -n, --no-create         do not create output files
+      --srcdir=DIR        find the sources in DIR [configure dir or \`..']
+
+Installation directories:
+  --prefix=PREFIX         install architecture-independent files in PREFIX
+                          [$ac_default_prefix]
+  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX
+                          [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc.  You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+  --bindir=DIR            user executables [EPREFIX/bin]
+  --sbindir=DIR           system admin executables [EPREFIX/sbin]
+  --libexecdir=DIR        program executables [EPREFIX/libexec]
+  --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]
+  --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]
+  --localstatedir=DIR     modifiable single-machine data [PREFIX/var]
+  --libdir=DIR            object code libraries [EPREFIX/lib]
+  --includedir=DIR        C header files [PREFIX/include]
+  --oldincludedir=DIR     C header files for non-gcc [/usr/include]
+  --datarootdir=DIR       read-only arch.-independent data root [PREFIX/share]
+  --datadir=DIR           read-only architecture-independent data [DATAROOTDIR]
+  --infodir=DIR           info documentation [DATAROOTDIR/info]
+  --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]
+  --mandir=DIR            man documentation [DATAROOTDIR/man]
+  --docdir=DIR            documentation root [DATAROOTDIR/doc/hpl]
+  --htmldir=DIR           html documentation [DOCDIR]
+  --dvidir=DIR            dvi documentation [DOCDIR]
+  --pdfdir=DIR            pdf documentation [DOCDIR]
+  --psdir=DIR             ps documentation [DOCDIR]
+_ACEOF
+
+  cat <<\_ACEOF
+
+Program names:
+  --program-prefix=PREFIX            prepend PREFIX to installed program names
+  --program-suffix=SUFFIX            append SUFFIX to installed program names
+  --program-transform-name=PROGRAM   run sed PROGRAM on installed program names
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+  case $ac_init_help in
+     short | recursive ) echo "Configuration of hpl 2.3:";;
+   esac
+  cat <<\_ACEOF
+
+Optional Features:
+  --disable-option-checking  ignore unrecognized --enable/--with options
+  --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
+  --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
+  --enable-dependency-tracking
+                          do not reject slow dependency extractors
+  --disable-dependency-tracking
+                          speeds up one-time build
+  --enable-silent-rules   less verbose build output (undo: "make V=1")
+  --disable-silent-rules  verbose build output (undo: "make V=0")
+
+Some influential environment variables:
+  MPICC       MPI C compiler command
+  CC          C compiler command
+  CFLAGS      C compiler flags
+  LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
+              nonstandard directory <lib dir>
+  LIBS        libraries to pass to the linker, e.g. -l<library>
+  CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
+              you have headers in a nonstandard directory <include dir>
+  CPP         C preprocessor
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+Report bugs to <hpl@icl.utk.edu>.
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+  # If there are subdirs, report their specific --help.
+  for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+    test -d "$ac_dir" ||
+      { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } ||
+      continue
+    ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+    cd "$ac_dir" || { ac_status=$?; continue; }
+    # Check for guested configure.
+    if test -f "$ac_srcdir/configure.gnu"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+    elif test -f "$ac_srcdir/configure"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure" --help=recursive
+    else
+      $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+    fi || ac_status=$?
+    cd "$ac_pwd" || { ac_status=$?; break; }
+  done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+  cat <<\_ACEOF
+hpl configure 2.3
+generated by GNU Autoconf 2.69
+
+Copyright (C) 2012 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+  exit
+fi
+
+## ------------------------ ##
+## Autoconf initialization. ##
+## ------------------------ ##
+
+# ac_fn_c_try_compile LINENO
+# --------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext
+  if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_compile
+
+# ac_fn_c_try_link LINENO
+# -----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_link ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext conftest$ac_exeext
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 test -x conftest$ac_exeext
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_retval=1
+fi
+  # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+  # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+  # interfere with the next link command; also delete a directory that is
+  # left behind by Apple's compiler.  We do this before executing the actions.
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_link
+
+# ac_fn_c_check_func LINENO FUNC VAR
+# ----------------------------------
+# Tests whether FUNC exists, setting the cache variable VAR accordingly
+ac_fn_c_check_func ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $2 innocuous_$2
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $2 (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $2
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $2 ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$2 || defined __stub___$2
+choke me
+#endif
+
+int
+main ()
+{
+return $2 ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  eval "$3=yes"
+else
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_func
+
+# ac_fn_c_try_cpp LINENO
+# ----------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_cpp ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } > conftest.i && {
+	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+    ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_cpp
+
+# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
+# the include files in INCLUDES and setting the cache variable VAR
+# accordingly.
+ac_fn_c_check_header_mongrel ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if eval \${$3+:} false; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
+$as_echo_n "checking $2 usability... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_header_compiler=yes
+else
+  ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
+$as_echo_n "checking $2 presence... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <$2>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  ac_header_preproc=yes
+else
+  ac_header_preproc=no
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
+  yes:no: )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+    ;;
+  no:yes:* )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: $2:     check for missing prerequisite headers?" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+( $as_echo "## ------------------------------ ##
+## Report this to hpl@icl.utk.edu ##
+## ------------------------------ ##"
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  eval "$3=\$ac_header_compiler"
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_mongrel
+
+# ac_fn_c_try_run LINENO
+# ----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
+# that executables *can* be run.
+ac_fn_c_try_run ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: program exited with status $ac_status" >&5
+       $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_retval=$ac_status
+fi
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_run
+
+# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists and can be compiled using the include files in
+# INCLUDES, setting the cache variable VAR accordingly.
+ac_fn_c_check_header_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  eval "$3=yes"
+else
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_compile
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by hpl $as_me 2.3, which was
+generated by GNU Autoconf 2.69.  Invocation command line was
+
+  $ $0 $@
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null     || echo unknown`
+
+/bin/arch              = `(/bin/arch) 2>/dev/null              || echo unknown`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null       || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo      = `(/usr/bin/hostinfo) 2>/dev/null      || echo unknown`
+/bin/machine           = `(/bin/machine) 2>/dev/null           || echo unknown`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null       || echo unknown`
+/bin/universe          = `(/bin/universe) 2>/dev/null          || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    $as_echo "PATH: $as_dir"
+  done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+  for ac_arg
+  do
+    case $ac_arg in
+    -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+    -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+    | -silent | --silent | --silen | --sile | --sil)
+      continue ;;
+    *\'*)
+      ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    case $ac_pass in
+    1) as_fn_append ac_configure_args0 " '$ac_arg'" ;;
+    2)
+      as_fn_append ac_configure_args1 " '$ac_arg'"
+      if test $ac_must_keep_next = true; then
+	ac_must_keep_next=false # Got value, back to normal.
+      else
+	case $ac_arg in
+	  *=* | --config-cache | -C | -disable-* | --disable-* \
+	  | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+	  | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+	  | -with-* | --with-* | -without-* | --without-* | --x)
+	    case "$ac_configure_args0 " in
+	      "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+	    esac
+	    ;;
+	  -* ) ac_must_keep_next=true ;;
+	esac
+      fi
+      as_fn_append ac_configure_args " '$ac_arg'"
+      ;;
+    esac
+  done
+done
+{ ac_configure_args0=; unset ac_configure_args0;}
+{ ac_configure_args1=; unset ac_configure_args1;}
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log.  We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+  # Save into config.log some information that might help in debugging.
+  {
+    echo
+
+    $as_echo "## ---------------- ##
+## Cache variables. ##
+## ---------------- ##"
+    echo
+    # The following way of writing the cache mishandles newlines in values,
+(
+  for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+  (set) 2>&1 |
+    case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      sed -n \
+	"s/'\''/'\''\\\\'\'''\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+      ;; #(
+    *)
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+)
+    echo
+
+    $as_echo "## ----------------- ##
+## Output variables. ##
+## ----------------- ##"
+    echo
+    for ac_var in $ac_subst_vars
+    do
+      eval ac_val=\$$ac_var
+      case $ac_val in
+      *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+      esac
+      $as_echo "$ac_var='\''$ac_val'\''"
+    done | sort
+    echo
+
+    if test -n "$ac_subst_files"; then
+      $as_echo "## ------------------- ##
+## File substitutions. ##
+## ------------------- ##"
+      echo
+      for ac_var in $ac_subst_files
+      do
+	eval ac_val=\$$ac_var
+	case $ac_val in
+	*\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+	esac
+	$as_echo "$ac_var='\''$ac_val'\''"
+      done | sort
+      echo
+    fi
+
+    if test -s confdefs.h; then
+      $as_echo "## ----------- ##
+## confdefs.h. ##
+## ----------- ##"
+      echo
+      cat confdefs.h
+      echo
+    fi
+    test "$ac_signal" != 0 &&
+      $as_echo "$as_me: caught signal $ac_signal"
+    $as_echo "$as_me: exit $exit_status"
+  } >&5
+  rm -f core *.core core.conftest.* &&
+    rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+    exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+  trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+$as_echo "/* confdefs.h */" > confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_URL "$PACKAGE_URL"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer an explicitly selected file to automatically selected ones.
+ac_site_file1=NONE
+ac_site_file2=NONE
+if test -n "$CONFIG_SITE"; then
+  # We do not want a PATH search for config.site.
+  case $CONFIG_SITE in #((
+    -*)  ac_site_file1=./$CONFIG_SITE;;
+    */*) ac_site_file1=$CONFIG_SITE;;
+    *)   ac_site_file1=./$CONFIG_SITE;;
+  esac
+elif test "x$prefix" != xNONE; then
+  ac_site_file1=$prefix/share/config.site
+  ac_site_file2=$prefix/etc/config.site
+else
+  ac_site_file1=$ac_default_prefix/share/config.site
+  ac_site_file2=$ac_default_prefix/etc/config.site
+fi
+for ac_site_file in "$ac_site_file1" "$ac_site_file2"
+do
+  test "x$ac_site_file" = xNONE && continue
+  if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
+$as_echo "$as_me: loading site script $ac_site_file" >&6;}
+    sed 's/^/| /' "$ac_site_file" >&5
+    . "$ac_site_file" \
+      || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "failed to load site script $ac_site_file
+See \`config.log' for more details" "$LINENO" 5; }
+  fi
+done
+
+if test -r "$cache_file"; then
+  # Some versions of bash will fail to source /dev/null (special files
+  # actually), so we avoid doing that.  DJGPP emulates it as a regular file.
+  if test /dev/null != "$cache_file" && test -f "$cache_file"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
+$as_echo "$as_me: loading cache $cache_file" >&6;}
+    case $cache_file in
+      [\\/]* | ?:[\\/]* ) . "$cache_file";;
+      *)                      . "./$cache_file";;
+    esac
+  fi
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5
+$as_echo "$as_me: creating cache $cache_file" >&6;}
+  >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+  eval ac_old_set=\$ac_cv_env_${ac_var}_set
+  eval ac_new_set=\$ac_env_${ac_var}_set
+  eval ac_old_val=\$ac_cv_env_${ac_var}_value
+  eval ac_new_val=\$ac_env_${ac_var}_value
+  case $ac_old_set,$ac_new_set in
+    set,)
+      { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,set)
+      { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,);;
+    *)
+      if test "x$ac_old_val" != "x$ac_new_val"; then
+	# differences in whitespace do not lead to failure.
+	ac_old_val_w=`echo x $ac_old_val`
+	ac_new_val_w=`echo x $ac_new_val`
+	if test "$ac_old_val_w" != "$ac_new_val_w"; then
+	  { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5
+$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+	  ac_cache_corrupted=:
+	else
+	  { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
+$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;}
+	  eval $ac_var=\$ac_old_val
+	fi
+	{ $as_echo "$as_me:${as_lineno-$LINENO}:   former value:  \`$ac_old_val'" >&5
+$as_echo "$as_me:   former value:  \`$ac_old_val'" >&2;}
+	{ $as_echo "$as_me:${as_lineno-$LINENO}:   current value: \`$ac_new_val'" >&5
+$as_echo "$as_me:   current value: \`$ac_new_val'" >&2;}
+      fi;;
+  esac
+  # Pass precious variables to config.status.
+  if test "$ac_new_set" = set; then
+    case $ac_new_val in
+    *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+    *) ac_arg=$ac_var=$ac_new_val ;;
+    esac
+    case " $ac_configure_args " in
+      *" '$ac_arg' "*) ;; # Avoid dups.  Use of quotes ensures accuracy.
+      *) as_fn_append ac_configure_args " '$ac_arg'" ;;
+    esac
+  fi
+done
+if $ac_cache_corrupted; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
+$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+  as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+fi
+## -------------------- ##
+## Main body of script. ##
+## -------------------- ##
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+ac_config_headers="$ac_config_headers include/hplconfig.h"
+
+
+ac_aux_dir=
+for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
+  if test -f "$ac_dir/install-sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install-sh -c"
+    break
+  elif test -f "$ac_dir/install.sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install.sh -c"
+    break
+  elif test -f "$ac_dir/shtool"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/shtool install -c"
+    break
+  fi
+done
+if test -z "$ac_aux_dir"; then
+  as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
+fi
+
+# These three variables are undocumented and unsupported,
+# and are intended to be withdrawn in a future Autoconf release.
+# They can cause serious problems if a builder's source tree is in a directory
+# whose full name contains unusual characters.
+ac_config_guess="$SHELL $ac_aux_dir/config.guess"  # Please don't use this var.
+ac_config_sub="$SHELL $ac_aux_dir/config.sub"  # Please don't use this var.
+ac_configure="$SHELL $ac_aux_dir/configure"  # Please don't use this var.
+
+
+# Expand $ac_aux_dir to an absolute path.
+am_aux_dir=`cd "$ac_aux_dir" && pwd`
+
+
+
+  _ax_prog_cc_mpi_mpi_wanted=yes
+  if test x"$_ax_prog_cc_mpi_mpi_wanted" = xyes; then
+    if test -z "$CC" && test -n "$MPICC"; then
+      CC="$MPICC"
+    else
+      if test -n "$ac_tool_prefix"; then
+  for ac_prog in mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CC" && break
+done
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
+
+    fi
+  fi
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}gcc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+  ac_ct_CC=$CC
+  # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="gcc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+else
+  CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+          if test -n "$ac_tool_prefix"; then
+    # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}cc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  fi
+fi
+if test -z "$CC"; then
+  # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+  ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+       ac_prog_rejected=yes
+       continue
+     fi
+    ac_cv_prog_CC="cc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_CC
+  shift
+  if test $# != 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set CC to just the basename; use the full file name.
+    shift
+    ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+  fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+  if test -n "$ac_tool_prefix"; then
+  for ac_prog in cl.exe
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in cl.exe
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CC" && break
+done
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
+
+fi
+
+
+test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
+
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+  { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    sed '10a\
+... rest of stderr output deleted ...
+         10q' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+  fi
+  rm -f conftest.er1 conftest.err
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+done
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5
+$as_echo_n "checking whether the C compiler works... " >&6; }
+ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+
+# The possible output files:
+ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*"
+
+ac_rmfiles=
+for ac_file in $ac_files
+do
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+    * ) ac_rmfiles="$ac_rmfiles $ac_file";;
+  esac
+done
+rm -f $ac_rmfiles
+
+if { { ac_try="$ac_link_default"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link_default") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
+# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
+# in a Makefile.  We should not override ac_cv_exeext if it was cached,
+# so that the user can short-circuit this test for compilers unknown to
+# Autoconf.
+for ac_file in $ac_files ''
+do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj )
+	;;
+    [ab].out )
+	# We found the default executable, but exeext='' is most
+	# certainly right.
+	break;;
+    *.* )
+	if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+	then :; else
+	   ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	fi
+	# We set ac_cv_exeext here because the later test for it is not
+	# safe: cross compilers may not add the suffix if given an `-o'
+	# argument, so we may need to know it at that point already.
+	# Even if this section looks crufty: it has the advantage of
+	# actually working.
+	break;;
+    * )
+	break;;
+  esac
+done
+test "$ac_cv_exeext" = no && ac_cv_exeext=
+
+else
+  ac_file=''
+fi
+if test -z "$ac_file"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+$as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "C compiler cannot create executables
+See \`config.log' for more details" "$LINENO" 5; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5
+$as_echo_n "checking for C compiler default output file name... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
+$as_echo "$ac_file" >&6; }
+ac_exeext=$ac_cv_exeext
+
+rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5
+$as_echo_n "checking for suffix of executables... " >&6; }
+if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'.  For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+    *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	  break;;
+    * ) break;;
+  esac
+done
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest conftest$ac_cv_exeext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
+$as_echo "$ac_cv_exeext" >&6; }
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdio.h>
+int
+main ()
+{
+FILE *f = fopen ("conftest.out", "w");
+ return ferror (f) || fclose (f) != 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files="$ac_clean_files conftest.out"
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
+$as_echo_n "checking whether we are cross compiling... " >&6; }
+if test "$cross_compiling" != yes; then
+  { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+  if { ac_try='./conftest$ac_cv_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+    cross_compiling=no
+  else
+    if test "$cross_compiling" = maybe; then
+	cross_compiling=yes
+    else
+	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details" "$LINENO" 5; }
+    fi
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
+$as_echo "$cross_compiling" >&6; }
+
+rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
+$as_echo_n "checking for suffix of object files... " >&6; }
+if ${ac_cv_objext+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  for ac_file in conftest.o conftest.obj conftest.*; do
+  test -f "$ac_file" || continue;
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;;
+    *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+       break;;
+  esac
+done
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of object files: cannot compile
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5
+$as_echo "$ac_cv_objext" >&6; }
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
+$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
+if ${ac_cv_c_compiler_gnu+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_compiler_gnu=yes
+else
+  ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
+$as_echo "$ac_cv_c_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+  GCC=yes
+else
+  GCC=
+fi
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
+$as_echo_n "checking whether $CC accepts -g... " >&6; }
+if ${ac_cv_prog_cc_g+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_save_c_werror_flag=$ac_c_werror_flag
+   ac_c_werror_flag=yes
+   ac_cv_prog_cc_g=no
+   CFLAGS="-g"
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_g=yes
+else
+  CFLAGS=""
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+else
+  ac_c_werror_flag=$ac_save_c_werror_flag
+	 CFLAGS="-g"
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+$as_echo "$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+  CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+  if test "$GCC" = yes; then
+    CFLAGS="-g -O2"
+  else
+    CFLAGS="-g"
+  fi
+else
+  if test "$GCC" = yes; then
+    CFLAGS="-O2"
+  else
+    CFLAGS=
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
+if ${ac_cv_prog_cc_c89+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdio.h>
+struct stat;
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+     char **p;
+     int i;
+{
+  return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+  char *s;
+  va_list v;
+  va_start (v,p);
+  s = g (p, va_arg (v,int));
+  va_end (v);
+  return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default.  It has
+   function prototypes and stuff, but not '\xHH' hex character constants.
+   These don't provoke an error unfortunately, instead are silently treated
+   as 'x'.  The following induces an error, until -std is added to get
+   proper ANSI mode.  Curiously '\x00'!='x' always comes out true, for an
+   array size at least.  It's necessary to write '\x00'==0 to get something
+   that's true only with -std.  */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+   inside strings and character constants.  */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0]  ||  f (e, argv, 1) != argv[1];
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+	-Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_c89=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+  x)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+  xno)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c89"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c89" != xno; then :
+
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5
+$as_echo_n "checking whether $CC understands -c and -o together... " >&6; }
+if ${am_cv_prog_cc_c_o+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+  # Make sure it works both with $CC and with simple cc.
+  # Following AC_PROG_CC_C_O, we do the test twice because some
+  # compilers refuse to overwrite an existing .o file with -o,
+  # though they will create one.
+  am_cv_prog_cc_c_o=yes
+  for am_i in 1 2; do
+    if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5
+   ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); } \
+         && test -f conftest2.$ac_objext; then
+      : OK
+    else
+      am_cv_prog_cc_c_o=no
+      break
+    fi
+  done
+  rm -f core conftest*
+  unset am_i
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5
+$as_echo "$am_cv_prog_cc_c_o" >&6; }
+if test "$am_cv_prog_cc_c_o" != yes; then
+   # Losing compiler, so override with the script.
+   # FIXME: It is wrong to rewrite CC.
+   # But if we don't then we get into trouble of one sort or another.
+   # A longer-term fix would be to have automake use am__CC in this case,
+   # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
+   CC="$am_aux_dir/compile $CC"
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
+
+
+
+# Check for compiler
+# Needs to be split off into an extra macro to ensure right expansion
+# order.
+
+
+if test x"$_ax_prog_cc_mpi_mpi_wanted" = xno; then :
+   _ax_prog_cc_mpi_mpi_found=no
+else
+
+    ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+    # test whether MPI_Init is available
+    # We do not use AC_SEARCH_LIBS here, as it caches its outcome and
+    # thus disallows corresponding calls in the other AX_PROG_*_MPI
+    # macros.
+    for lib in NONE mpi mpich; do
+      save_LIBS=$LIBS
+      if test x"$lib" = xNONE; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking for function MPI_Init" >&5
+$as_echo_n "checking for function MPI_Init... " >&6; }
+      else
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking for function MPI_Init in -l$lib" >&5
+$as_echo_n "checking for function MPI_Init in -l$lib... " >&6; }
+        LIBS="-l$lib $LIBS"
+      fi
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char MPI_Init ();
+int
+main ()
+{
+return MPI_Init ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+   _ax_prog_cc_mpi_mpi_found=yes
+else
+   _ax_prog_cc_mpi_mpi_found=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_ax_prog_cc_mpi_mpi_found" >&5
+$as_echo "$_ax_prog_cc_mpi_mpi_found" >&6; }
+      if test "x$_ax_prog_cc_mpi_mpi_found" = "xyes"; then
+        break;
+      fi
+      LIBS=$save_LIBS
+    done
+
+    # Check for header
+    if test x"$_ax_prog_cc_mpi_mpi_found" = xyes; then :
+
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpi.h" >&5
+$as_echo_n "checking for mpi.h... " >&6; }
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <mpi.h>
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+         _ax_prog_cc_mpi_mpi_found=no
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+    ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$_ax_prog_cc_mpi_mpi_found" = xyes; then :
+
+
+$as_echo "#define HAVE_MPI 1" >>confdefs.h
+
+        :
+
+else
+
+
+        :
+
+fi
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ranlib; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_RANLIB+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$RANLIB"; then
+  ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+RANLIB=$ac_cv_prog_RANLIB
+if test -n "$RANLIB"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5
+$as_echo "$RANLIB" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_RANLIB"; then
+  ac_ct_RANLIB=$RANLIB
+  # Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_RANLIB+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_RANLIB"; then
+  ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_RANLIB="ranlib"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB
+if test -n "$ac_ct_RANLIB"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5
+$as_echo "$ac_ct_RANLIB" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_RANLIB" = x; then
+    RANLIB=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    RANLIB=$ac_ct_RANLIB
+  fi
+else
+  RANLIB="$ac_cv_prog_RANLIB"
+fi
+
+
+# Find a good install program.  We prefer a C program (faster),
+# so one script is as good as another.  But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AmigaOS /C/install, which installs bootblocks on floppy discs
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# OS/2's system install, which has a completely different semantic
+# ./install, which can be erroneously created by make from ./install.sh.
+# Reject install programs that cannot install multiple files.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5
+$as_echo_n "checking for a BSD-compatible install... " >&6; }
+if test -z "$INSTALL"; then
+if ${ac_cv_path_install+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    # Account for people who put trailing slashes in PATH elements.
+case $as_dir/ in #((
+  ./ | .// | /[cC]/* | \
+  /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \
+  ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \
+  /usr/ucb/* ) ;;
+  *)
+    # OSF1 and SCO ODT 3.0 have their own names for install.
+    # Don't use installbsd from OSF since it installs stuff as root
+    # by default.
+    for ac_prog in ginstall scoinst install; do
+      for ac_exec_ext in '' $ac_executable_extensions; do
+	if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
+	  if test $ac_prog = install &&
+	    grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+	    # AIX install.  It has an incompatible calling convention.
+	    :
+	  elif test $ac_prog = install &&
+	    grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+	    # program-specific install script used by HP pwplus--don't use.
+	    :
+	  else
+	    rm -rf conftest.one conftest.two conftest.dir
+	    echo one > conftest.one
+	    echo two > conftest.two
+	    mkdir conftest.dir
+	    if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" &&
+	      test -s conftest.one && test -s conftest.two &&
+	      test -s conftest.dir/conftest.one &&
+	      test -s conftest.dir/conftest.two
+	    then
+	      ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c"
+	      break 3
+	    fi
+	  fi
+	fi
+      done
+    done
+    ;;
+esac
+
+  done
+IFS=$as_save_IFS
+
+rm -rf conftest.one conftest.two conftest.dir
+
+fi
+  if test "${ac_cv_path_install+set}" = set; then
+    INSTALL=$ac_cv_path_install
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for INSTALL within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    INSTALL=$ac_install_sh
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5
+$as_echo "$INSTALL" >&6; }
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+
+am__api_version='1.16'
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5
+$as_echo_n "checking whether build environment is sane... " >&6; }
+# Reject unsafe characters in $srcdir or the absolute working directory
+# name.  Accept space and tab only in the latter.
+am_lf='
+'
+case `pwd` in
+  *[\\\"\#\$\&\'\`$am_lf]*)
+    as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;;
+esac
+case $srcdir in
+  *[\\\"\#\$\&\'\`$am_lf\ \	]*)
+    as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;;
+esac
+
+# Do 'set' in a subshell so we don't clobber the current shell's
+# arguments.  Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+   am_has_slept=no
+   for am_try in 1 2; do
+     echo "timestamp, slept: $am_has_slept" > conftest.file
+     set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
+     if test "$*" = "X"; then
+	# -L didn't work.
+	set X `ls -t "$srcdir/configure" conftest.file`
+     fi
+     if test "$*" != "X $srcdir/configure conftest.file" \
+	&& test "$*" != "X conftest.file $srcdir/configure"; then
+
+	# If neither matched, then we have a broken ls.  This can happen
+	# if, for instance, CONFIG_SHELL is bash and it inherits a
+	# broken ls alias from the environment.  This has actually
+	# happened.  Such a system could not be considered "sane".
+	as_fn_error $? "ls -t appears to fail.  Make sure there is not a broken
+  alias in your environment" "$LINENO" 5
+     fi
+     if test "$2" = conftest.file || test $am_try -eq 2; then
+       break
+     fi
+     # Just in case.
+     sleep 1
+     am_has_slept=yes
+   done
+   test "$2" = conftest.file
+   )
+then
+   # Ok.
+   :
+else
+   as_fn_error $? "newly created file is older than distributed files!
+Check your system clock" "$LINENO" 5
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+# If we didn't sleep, we still need to ensure time stamps of config.status and
+# generated files are strictly newer.
+am_sleep_pid=
+if grep 'slept: no' conftest.file >/dev/null 2>&1; then
+  ( sleep 1 ) &
+  am_sleep_pid=$!
+fi
+
+rm -f conftest.file
+
+test "$program_prefix" != NONE &&
+  program_transform_name="s&^&$program_prefix&;$program_transform_name"
+# Use a double $ so make ignores it.
+test "$program_suffix" != NONE &&
+  program_transform_name="s&\$&$program_suffix&;$program_transform_name"
+# Double any \ or $.
+# By default was `s,x,x', remove it if useless.
+ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
+program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
+
+if test x"${MISSING+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
+  *)
+    MISSING="\${SHELL} $am_aux_dir/missing" ;;
+  esac
+fi
+# Use eval to expand $SHELL
+if eval "$MISSING --is-lightweight"; then
+  am_missing_run="$MISSING "
+else
+  am_missing_run=
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5
+$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
+fi
+
+if test x"${install_sh+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
+  *)
+    install_sh="\${SHELL} $am_aux_dir/install-sh"
+  esac
+fi
+
+# Installed binaries are usually stripped using 'strip' when the user
+# run "make install-strip".  However 'strip' might not be the right
+# tool to use in cross-compilation environments, therefore Automake
+# will honor the 'STRIP' environment variable to overrule this program.
+if test "$cross_compiling" != no; then
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args.
+set dummy ${ac_tool_prefix}strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_STRIP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$STRIP"; then
+  ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_STRIP="${ac_tool_prefix}strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+STRIP=$ac_cv_prog_STRIP
+if test -n "$STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5
+$as_echo "$STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_STRIP"; then
+  ac_ct_STRIP=$STRIP
+  # Extract the first word of "strip", so it can be a program name with args.
+set dummy strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_STRIP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_STRIP"; then
+  ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_STRIP="strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP
+if test -n "$ac_ct_STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5
+$as_echo "$ac_ct_STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_STRIP" = x; then
+    STRIP=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    STRIP=$ac_ct_STRIP
+  fi
+else
+  STRIP="$ac_cv_prog_STRIP"
+fi
+
+fi
+INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5
+$as_echo_n "checking for a thread-safe mkdir -p... " >&6; }
+if test -z "$MKDIR_P"; then
+  if ${ac_cv_path_mkdir+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in mkdir gmkdir; do
+	 for ac_exec_ext in '' $ac_executable_extensions; do
+	   as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue
+	   case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
+	     'mkdir (GNU coreutils) '* | \
+	     'mkdir (coreutils) '* | \
+	     'mkdir (fileutils) '4.1*)
+	       ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext
+	       break 3;;
+	   esac
+	 done
+       done
+  done
+IFS=$as_save_IFS
+
+fi
+
+  test -d ./--version && rmdir ./--version
+  if test "${ac_cv_path_mkdir+set}" = set; then
+    MKDIR_P="$ac_cv_path_mkdir -p"
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for MKDIR_P within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    MKDIR_P="$ac_install_sh -d"
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5
+$as_echo "$MKDIR_P" >&6; }
+
+for ac_prog in gawk mawk nawk awk
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_AWK+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AWK"; then
+  ac_cv_prog_AWK="$AWK" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_AWK="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AWK=$ac_cv_prog_AWK
+if test -n "$AWK"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5
+$as_echo "$AWK" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$AWK" && break
+done
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5
+$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; }
+set x ${MAKE-make}
+ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'`
+if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.make <<\_ACEOF
+SHELL = /bin/sh
+all:
+	@echo '@@@%%%=$(MAKE)=@@@%%%'
+_ACEOF
+# GNU make sometimes prints "make[1]: Entering ...", which would confuse us.
+case `${MAKE-make} -f conftest.make 2>/dev/null` in
+  *@@@%%%=?*=@@@%%%*)
+    eval ac_cv_prog_make_${ac_make}_set=yes;;
+  *)
+    eval ac_cv_prog_make_${ac_make}_set=no;;
+esac
+rm -f conftest.make
+fi
+if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+  SET_MAKE=
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+  SET_MAKE="MAKE=${MAKE-make}"
+fi
+
+rm -rf .tst 2>/dev/null
+mkdir .tst 2>/dev/null
+if test -d .tst; then
+  am__leading_dot=.
+else
+  am__leading_dot=_
+fi
+rmdir .tst 2>/dev/null
+
+DEPDIR="${am__leading_dot}deps"
+
+ac_config_commands="$ac_config_commands depfiles"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5
+$as_echo_n "checking whether ${MAKE-make} supports the include directive... " >&6; }
+cat > confinc.mk << 'END'
+am__doit:
+	@echo this is the am__doit target >confinc.out
+.PHONY: am__doit
+END
+am__include="#"
+am__quote=
+# BSD make does it like this.
+echo '.include "confinc.mk" # ignored' > confmf.BSD
+# Other make implementations (GNU, Solaris 10, AIX) do it like this.
+echo 'include confinc.mk # ignored' > confmf.GNU
+_am_result=no
+for s in GNU BSD; do
+  { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5
+   (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); }
+  case $?:`cat confinc.out 2>/dev/null` in #(
+  '0:this is the am__doit target') :
+    case $s in #(
+  BSD) :
+    am__include='.include' am__quote='"' ;; #(
+  *) :
+    am__include='include' am__quote='' ;;
+esac ;; #(
+  *) :
+     ;;
+esac
+  if test "$am__include" != "#"; then
+    _am_result="yes ($s style)"
+    break
+  fi
+done
+rm -f confinc.* confmf.*
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5
+$as_echo "${_am_result}" >&6; }
+
+# Check whether --enable-dependency-tracking was given.
+if test "${enable_dependency_tracking+set}" = set; then :
+  enableval=$enable_dependency_tracking;
+fi
+
+if test "x$enable_dependency_tracking" != xno; then
+  am_depcomp="$ac_aux_dir/depcomp"
+  AMDEPBACKSLASH='\'
+  am__nodep='_no'
+fi
+ if test "x$enable_dependency_tracking" != xno; then
+  AMDEP_TRUE=
+  AMDEP_FALSE='#'
+else
+  AMDEP_TRUE='#'
+  AMDEP_FALSE=
+fi
+
+
+# Check whether --enable-silent-rules was given.
+if test "${enable_silent_rules+set}" = set; then :
+  enableval=$enable_silent_rules;
+fi
+
+case $enable_silent_rules in # (((
+  yes) AM_DEFAULT_VERBOSITY=0;;
+   no) AM_DEFAULT_VERBOSITY=1;;
+    *) AM_DEFAULT_VERBOSITY=1;;
+esac
+am_make=${MAKE-make}
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5
+$as_echo_n "checking whether $am_make supports nested variables... " >&6; }
+if ${am_cv_make_support_nested_variables+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if $as_echo 'TRUE=$(BAR$(V))
+BAR0=false
+BAR1=true
+V=1
+am__doit:
+	@$(TRUE)
+.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then
+  am_cv_make_support_nested_variables=yes
+else
+  am_cv_make_support_nested_variables=no
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5
+$as_echo "$am_cv_make_support_nested_variables" >&6; }
+if test $am_cv_make_support_nested_variables = yes; then
+    AM_V='$(V)'
+  AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)'
+else
+  AM_V=$AM_DEFAULT_VERBOSITY
+  AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY
+fi
+AM_BACKSLASH='\'
+
+if test "`cd $srcdir && pwd`" != "`pwd`"; then
+  # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
+  # is not polluted with repeated "-I."
+  am__isrc=' -I$(srcdir)'
+  # test to see if srcdir already configured
+  if test -f $srcdir/config.status; then
+    as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5
+  fi
+fi
+
+# test whether we have cygpath
+if test -z "$CYGPATH_W"; then
+  if (cygpath --version) >/dev/null 2>/dev/null; then
+    CYGPATH_W='cygpath -w'
+  else
+    CYGPATH_W=echo
+  fi
+fi
+
+
+# Define the identity of the package.
+ PACKAGE='hpl'
+ VERSION='2.3'
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE "$PACKAGE"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define VERSION "$VERSION"
+_ACEOF
+
+# Some tools Automake needs.
+
+ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"}
+
+
+AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"}
+
+
+AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"}
+
+
+AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"}
+
+
+MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
+
+# For better backward compatibility.  To be removed once Automake 1.9.x
+# dies out for good.  For more background, see:
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
+mkdir_p='$(MKDIR_P)'
+
+# We need awk for the "check" target (and possibly the TAP driver).  The
+# system "awk" is bad on some platforms.
+# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AMTAR='$${TAR-tar}'
+
+
+# We'll loop over all known methods to create a tar archive until one works.
+_am_tools='gnutar  pax cpio none'
+
+am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
+
+
+
+
+
+depcc="$CC"   am_compiler_list=
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5
+$as_echo_n "checking dependency style of $depcc... " >&6; }
+if ${am_cv_CC_dependencies_compiler_type+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
+  # We make a subdir and do the tests there.  Otherwise we can end up
+  # making bogus files that we don't know about and never remove.  For
+  # instance it was reported that on HP-UX the gcc test will end up
+  # making a dummy file named 'D' -- because '-MD' means "put the output
+  # in D".
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  # Copy depcomp to subdir because otherwise we won't find it if we're
+  # using a relative directory.
+  cp "$am_depcomp" conftest.dir
+  cd conftest.dir
+  # We will build objects and dependencies in a subdirectory because
+  # it helps to detect inapplicable dependency modes.  For instance
+  # both Tru64's cc and ICC support -MD to output dependencies as a
+  # side effect of compilation, but ICC will put the dependencies in
+  # the current directory while Tru64 will put them in the object
+  # directory.
+  mkdir sub
+
+  am_cv_CC_dependencies_compiler_type=none
+  if test "$am_compiler_list" = ""; then
+     am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp`
+  fi
+  am__universal=false
+  case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac
+
+  for depmode in $am_compiler_list; do
+    # Setup a source with many dependencies, because some compilers
+    # like to wrap large dependency lists on column 80 (with \), and
+    # we should not choose a depcomp mode which is confused by this.
+    #
+    # We need to recreate these files for each test, as the compiler may
+    # overwrite some of them when testing with obscure command lines.
+    # This happens at least with the AIX C compiler.
+    : > sub/conftest.c
+    for i in 1 2 3 4 5 6; do
+      echo '#include "conftst'$i'.h"' >> sub/conftest.c
+      # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with
+      # Solaris 10 /bin/sh.
+      echo '/* dummy */' > sub/conftst$i.h
+    done
+    echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
+
+    # We check with '-c' and '-o' for the sake of the "dashmstdout"
+    # mode.  It turns out that the SunPro C++ compiler does not properly
+    # handle '-M -o', and we need to detect this.  Also, some Intel
+    # versions had trouble with output in subdirs.
+    am__obj=sub/conftest.${OBJEXT-o}
+    am__minus_obj="-o $am__obj"
+    case $depmode in
+    gcc)
+      # This depmode causes a compiler race in universal mode.
+      test "$am__universal" = false || continue
+      ;;
+    nosideeffect)
+      # After this tag, mechanisms are not by side-effect, so they'll
+      # only be used when explicitly requested.
+      if test "x$enable_dependency_tracking" = xyes; then
+	continue
+      else
+	break
+      fi
+      ;;
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
+      # This compiler won't grok '-c -o', but also, the minuso test has
+      # not run yet.  These depmodes are late enough in the game, and
+      # so weak that their functioning should not be impacted.
+      am__obj=conftest.${OBJEXT-o}
+      am__minus_obj=
+      ;;
+    none) break ;;
+    esac
+    if depmode=$depmode \
+       source=sub/conftest.c object=$am__obj \
+       depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
+       $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
+         >/dev/null 2>conftest.err &&
+       grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
+       ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
+      # icc doesn't choke on unknown options, it will just issue warnings
+      # or remarks (even with -Werror).  So we grep stderr for any message
+      # that says an option was ignored or not supported.
+      # When given -MP, icc 7.0 and 7.1 complain thusly:
+      #   icc: Command line warning: ignoring option '-M'; no argument required
+      # The diagnosis changed in icc 8.0:
+      #   icc: Command line remark: option '-MP' not supported
+      if (grep 'ignoring option' conftest.err ||
+          grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
+        am_cv_CC_dependencies_compiler_type=$depmode
+        break
+      fi
+    fi
+  done
+
+  cd ..
+  rm -rf conftest.dir
+else
+  am_cv_CC_dependencies_compiler_type=none
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5
+$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; }
+CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type
+
+ if
+  test "x$enable_dependency_tracking" != xno \
+  && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then
+  am__fastdepCC_TRUE=
+  am__fastdepCC_FALSE='#'
+else
+  am__fastdepCC_TRUE='#'
+  am__fastdepCC_FALSE=
+fi
+
+
+
+# POSIX will say in a future version that running "rm -f" with no argument
+# is OK; and we want to be able to make that assumption in our Makefile
+# recipes.  So use an aggressive probe to check that the usage we want is
+# actually supported "in the wild" to an acceptable degree.
+# See automake bug#10828.
+# To make any issue more visible, cause the running configure to be aborted
+# by default if the 'rm' program in use doesn't match our expectations; the
+# user can still override this though.
+if rm -f && rm -fr && rm -rf; then : OK; else
+  cat >&2 <<'END'
+Oops!
+
+Your 'rm' program seems unable to run without file operands specified
+on the command line, even when the '-f' option is present.  This is contrary
+to the behaviour of most rm programs out there, and not conforming with
+the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
+
+Please tell bug-automake@gnu.org about your system, including the value
+of your $PATH and any error possibly output before this message.  This
+can help us improve future automake versions.
+
+END
+  if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
+    echo 'Configuration will proceed anyway, since you have set the' >&2
+    echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
+    echo >&2
+  else
+    cat >&2 <<'END'
+Aborting the configuration process, to ensure you take notice of the issue.
+
+You can download and install GNU coreutils to get an 'rm' implementation
+that behaves properly: <https://www.gnu.org/software/coreutils/>.
+
+If you want to complete the configuration process using your problematic
+'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
+to "yes", and re-run configure.
+
+END
+    as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5
+  fi
+fi
+
+
+
+
+
+
+
+
+
+hpl_blas_ok=no
+
+
+current_LIBS="$LIBS"
+
+cat <<HPLEOF > hplvars.txt
+name1=OpenBLAS
+rout1=dgemm_
+libs1=-lopenblas -lm
+
+name2=Atlas Fortran BLAS
+rout2=dgemm_
+libs2=-lf77blas -latlas
+
+name3=Sequential Intel MKL LP64 (group)
+rout3=dgemm_
+libs3=-Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread
+
+name4=Sequential Intel MKL LP64
+rout4=dgemm_
+libs4=-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
+
+name5=AMD's ACML
+rout5=dgemm_
+libs5=-lacml -lm
+
+name6=Accelerate
+rout6=dgemm_
+libs6=-framework Accelerate
+
+name7=Apple VecLib
+rout7=dgemm_
+libs7=-framework vecLib
+
+name8=IBM ESSL
+rout8=dgemm_
+libs8=-lessl
+
+name9=NVIDIA nvblas
+rout9=dgemm_
+libs9=-lnvblas
+
+name10=Generic BLAS
+rout10=dgemm_
+libs10=-lblas
+
+HPLEOF
+for hpl_i in 1 2 3 4 5 6 7 8 9 10;
+do
+if test  x$hpl_blas_ok = xno; then
+  name="`grep ^name${hpl_i}= hplvars.txt | sed s/^name${hpl_i}=//`"
+  rout="`grep ^rout${hpl_i}= hplvars.txt | sed s/^rout${hpl_i}=//`"
+  libs="`grep ^libs${hpl_i}= hplvars.txt | sed s/^libs${hpl_i}=//`"
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $rout in $name" >&5
+$as_echo_n "checking for $rout in $name... " >&6; }
+
+  LIBS="$libs"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $rout ();
+int
+main ()
+{
+return $rout ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  hpl_blas_ok=yes;BLAS_LIBS="$libs"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  LIBS="$current_LIBS"
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hpl_blas_ok" >&5
+$as_echo "$hpl_blas_ok" >&6; }
+fi
+done
+rm hplvars.txt
+
+if test  x$hpl_blas_ok = xno; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dgemm_ in OpenBLAS" >&5
+$as_echo_n "checking for dgemm_ in OpenBLAS... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dgemm_ in -lopenblas" >&5
+$as_echo_n "checking for dgemm_ in -lopenblas... " >&6; }
+if ${ac_cv_lib_openblas_dgemm_+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lopenblas  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dgemm_ ();
+int
+main ()
+{
+return dgemm_ ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_openblas_dgemm_=yes
+else
+  ac_cv_lib_openblas_dgemm_=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_openblas_dgemm_" >&5
+$as_echo "$ac_cv_lib_openblas_dgemm_" >&6; }
+if test "x$ac_cv_lib_openblas_dgemm_" = xyes; then :
+  hpl_blas_ok=yes;BLAS_LIBS="-lopenblas"
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hpl_blas_ok" >&5
+$as_echo "$hpl_blas_ok" >&6; }
+fi
+
+
+
+# If present, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$hpl_blas_ok" = xyes; then
+        LIBS="$BLAS_LIBS $LIBS"
+        :
+else
+        hpl_blas_ok=no
+        as_fn_error $? "BLAS not found" "$LINENO" 5
+fi
+
+
+
+
+for ac_func in dgemm_
+do :
+  ac_fn_c_check_func "$LINENO" "dgemm_" "ac_cv_func_dgemm_"
+if test "x$ac_cv_func_dgemm_" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_DGEMM_ 1
+_ACEOF
+
+fi
+done
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
+$as_echo_n "checking how to run the C preprocessor... " >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+  CPP=
+fi
+if test -z "$CPP"; then
+  if ${ac_cv_prog_CPP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+      # Double quotes because CPP needs to be expanded
+    for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+    do
+      ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+  break
+fi
+
+    done
+    ac_cv_prog_CPP=$CPP
+
+fi
+  CPP=$ac_cv_prog_CPP
+else
+  ac_cv_prog_CPP=$CPP
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
+$as_echo "$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if ${ac_cv_path_GREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$GREP"; then
+  ac_path_GREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in grep ggrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_GREP" || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+  # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'GREP' >> "conftest.nl"
+    "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_GREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_GREP="$ac_path_GREP"
+      ac_path_GREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_GREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_GREP"; then
+    as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+$as_echo_n "checking for egrep... " >&6; }
+if ${ac_cv_path_EGREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+   then ac_cv_path_EGREP="$GREP -E"
+   else
+     if test -z "$EGREP"; then
+  ac_path_EGREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in egrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_EGREP" || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+  # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'EGREP' >> "conftest.nl"
+    "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_EGREP="$ac_path_EGREP"
+      ac_path_EGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_EGREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_EGREP"; then
+    as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_EGREP=$EGREP
+fi
+
+   fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+$as_echo "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
+$as_echo_n "checking for ANSI C header files... " >&6; }
+if ${ac_cv_header_stdc+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_header_stdc=yes
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+  # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "memchr" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "free" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+  if test "$cross_compiling" = yes; then :
+  :
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+		   (('a' <= (c) && (c) <= 'i') \
+		     || ('j' <= (c) && (c) <= 'r') \
+		     || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    if (XOR (islower (i), ISLOWER (i))
+	|| toupper (i) != TOUPPER (i))
+      return 2;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
+$as_echo "$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+$as_echo "#define STDC_HEADERS 1" >>confdefs.h
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+		  inttypes.h stdint.h unistd.h
+do :
+  as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
+"
+if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
+  cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_header in mpi.h
+do :
+  ac_fn_c_check_header_mongrel "$LINENO" "mpi.h" "ac_cv_header_mpi_h" "$ac_includes_default"
+if test "x$ac_cv_header_mpi_h" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_MPI_H 1
+_ACEOF
+
+fi
+
+done
+
+
+ac_config_files="$ac_config_files Makefile src/Makefile testing/Makefile"
+
+
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems.  If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+  for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+
+  (set) 2>&1 |
+    case $as_nl`(ac_space=' '; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      # `set' does not quote correctly, so add quotes: double-quote
+      # substitution turns \\\\ into \\, and sed turns \\ into \.
+      sed -n \
+	"s/'/'\\\\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+      ;; #(
+    *)
+      # `set' quotes correctly as required by POSIX, so do not add quotes.
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+) |
+  sed '
+     /^ac_cv_env_/b end
+     t clear
+     :clear
+     s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+     t end
+     s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+     :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+  if test -w "$cache_file"; then
+    if test "x$cache_file" != "x/dev/null"; then
+      { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
+$as_echo "$as_me: updating cache $cache_file" >&6;}
+      if test ! -f "$cache_file" || test -h "$cache_file"; then
+	cat confcache >"$cache_file"
+      else
+        case $cache_file in #(
+        */* | ?:*)
+	  mv -f confcache "$cache_file"$$ &&
+	  mv -f "$cache_file"$$ "$cache_file" ;; #(
+        *)
+	  mv -f confcache "$cache_file" ;;
+	esac
+      fi
+    fi
+  else
+    { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
+$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
+  fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+DEFS=-DHAVE_CONFIG_H
+
+ac_libobjs=
+ac_ltlibobjs=
+U=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+  # 1. Remove the extension, and $U if already installed.
+  ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+  ac_i=`$as_echo "$ac_i" | sed "$ac_script"`
+  # 2. Prepend LIBOBJDIR.  When used with automake>=1.10 LIBOBJDIR
+  #    will be set to the directory where LIBOBJS objects are built.
+  as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+  as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5
+$as_echo_n "checking that generated files are newer than configure... " >&6; }
+   if test -n "$am_sleep_pid"; then
+     # Hide warnings about reused PIDs.
+     wait $am_sleep_pid 2>/dev/null
+   fi
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5
+$as_echo "done" >&6; }
+if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then
+  as_fn_error $? "conditional \"AMDEP\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then
+  as_fn_error $? "conditional \"am__fastdepCC\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+ if test -n "$EXEEXT"; then
+  am__EXEEXT_TRUE=
+  am__EXEEXT_FALSE='#'
+else
+  am__EXEEXT_TRUE='#'
+  am__EXEEXT_FALSE=
+fi
+
+
+: "${CONFIG_STATUS=./config.status}"
+ac_write_fail=0
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5
+$as_echo "$as_me: creating $CONFIG_STATUS" >&6;}
+as_write_fail=0
+cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+
+SHELL=\${CONFIG_SHELL-$SHELL}
+export SHELL
+_ASEOF
+cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+    && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='print -r --'
+  as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='printf %s\n'
+  as_echo_n='printf %s'
+else
+  if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+    as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+    as_echo_n='/usr/ucb/echo -n'
+  else
+    as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+    as_echo_n_body='eval
+      arg=$1;
+      case $arg in #(
+      *"$as_nl"*)
+	expr "X$arg" : "X\\(.*\\)$as_nl";
+	arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+      esac;
+      expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+    '
+    export as_echo_n_body
+    as_echo_n='sh -c $as_echo_n_body as_echo'
+  fi
+  export as_echo_body
+  as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh).  But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there.  '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+  fi
+  $as_echo "$as_me: error: $2" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else
+  as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else
+  as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  }
+fi # as_fn_arith
+
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='	';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -pR'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -pR'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -pR'
+  fi
+else
+  as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+## ----------------------------------- ##
+## Main body of $CONFIG_STATUS script. ##
+## ----------------------------------- ##
+_ASEOF
+test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# Save the log message, to keep $0 and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by hpl $as_me 2.3, which was
+generated by GNU Autoconf 2.69.  Invocation command line was
+
+  CONFIG_FILES    = $CONFIG_FILES
+  CONFIG_HEADERS  = $CONFIG_HEADERS
+  CONFIG_LINKS    = $CONFIG_LINKS
+  CONFIG_COMMANDS = $CONFIG_COMMANDS
+  $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+case $ac_config_files in *"
+"*) set x $ac_config_files; shift; ac_config_files=$*;;
+esac
+
+case $ac_config_headers in *"
+"*) set x $ac_config_headers; shift; ac_config_headers=$*;;
+esac
+
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+# Files that config.status was made for.
+config_files="$ac_config_files"
+config_headers="$ac_config_headers"
+config_commands="$ac_config_commands"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+ac_cs_usage="\
+\`$as_me' instantiates files and other configuration actions
+from templates according to the current configuration.  Unless the files
+and actions are specified as TAGs, all are instantiated by default.
+
+Usage: $0 [OPTION]... [TAG]...
+
+  -h, --help       print this help, then exit
+  -V, --version    print version number and configuration settings, then exit
+      --config     print configuration, then exit
+  -q, --quiet, --silent
+                   do not print progress messages
+  -d, --debug      don't remove temporary files
+      --recheck    update $as_me by reconfiguring in the same conditions
+      --file=FILE[:TEMPLATE]
+                   instantiate the configuration file FILE
+      --header=FILE[:TEMPLATE]
+                   instantiate the configuration header FILE
+
+Configuration files:
+$config_files
+
+Configuration headers:
+$config_headers
+
+Configuration commands:
+$config_commands
+
+Report bugs to <hpl@icl.utk.edu>."
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
+ac_cs_version="\\
+hpl config.status 2.3
+configured by $0, generated by GNU Autoconf 2.69,
+  with options \\"\$ac_cs_config\\"
+
+Copyright (C) 2012 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+INSTALL='$INSTALL'
+MKDIR_P='$MKDIR_P'
+AWK='$AWK'
+test -n "\$AWK" || AWK=awk
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# The default lists apply if the user does not specify any file.
+ac_need_defaults=:
+while test $# != 0
+do
+  case $1 in
+  --*=?*)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+    ac_shift=:
+    ;;
+  --*=)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=
+    ac_shift=:
+    ;;
+  *)
+    ac_option=$1
+    ac_optarg=$2
+    ac_shift=shift
+    ;;
+  esac
+
+  case $ac_option in
+  # Handling of the options.
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    ac_cs_recheck=: ;;
+  --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+    $as_echo "$ac_cs_version"; exit ;;
+  --config | --confi | --conf | --con | --co | --c )
+    $as_echo "$ac_cs_config"; exit ;;
+  --debug | --debu | --deb | --de | --d | -d )
+    debug=: ;;
+  --file | --fil | --fi | --f )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    '') as_fn_error $? "missing file argument" ;;
+    esac
+    as_fn_append CONFIG_FILES " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --header | --heade | --head | --hea )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    as_fn_append CONFIG_HEADERS " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --he | --h)
+    # Conflict between --help and --header
+    as_fn_error $? "ambiguous option: \`$1'
+Try \`$0 --help' for more information.";;
+  --help | --hel | -h )
+    $as_echo "$ac_cs_usage"; exit ;;
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil | --si | --s)
+    ac_cs_silent=: ;;
+
+  # This is an error.
+  -*) as_fn_error $? "unrecognized option: \`$1'
+Try \`$0 --help' for more information." ;;
+
+  *) as_fn_append ac_config_targets " $1"
+     ac_need_defaults=false ;;
+
+  esac
+  shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+  exec 6>/dev/null
+  ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+if \$ac_cs_recheck; then
+  set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+  shift
+  \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
+  CONFIG_SHELL='$SHELL'
+  export CONFIG_SHELL
+  exec "\$@"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+  $as_echo "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+#
+# INIT-COMMANDS
+#
+AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+  case $ac_config_target in
+    "include/hplconfig.h") CONFIG_HEADERS="$CONFIG_HEADERS include/hplconfig.h" ;;
+    "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;;
+    "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+    "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;;
+    "testing/Makefile") CONFIG_FILES="$CONFIG_FILES testing/Makefile" ;;
+
+  *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+  esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used.  Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+  test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+  test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
+fi
+
+# Have a temporary directory for convenience.  Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+  tmp= ac_tmp=
+  trap 'exit_status=$?
+  : "${ac_tmp:=$tmp}"
+  { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
+' 0
+  trap 'as_fn_exit 1' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+  tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+  test -d "$tmp"
+}  ||
+{
+  tmp=./conf$$-$RANDOM
+  (umask 077 && mkdir "$tmp")
+} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+ac_tmp=$tmp
+
+# Set up the scripts for CONFIG_FILES section.
+# No need to generate them if there are no CONFIG_FILES.
+# This happens for instance with `./config.status config.h'.
+if test -n "$CONFIG_FILES"; then
+
+
+ac_cr=`echo X | tr X '\015'`
+# On cygwin, bash can eat \r inside `` if the user requested igncr.
+# But we know of no other shell where ac_cr would be empty at this
+# point, so we can use a bashism as a fallback.
+if test "x$ac_cr" = x; then
+  eval ac_cr=\$\'\\r\'
+fi
+ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
+if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
+  ac_cs_awk_cr='\\r'
+else
+  ac_cs_awk_cr=$ac_cr
+fi
+
+echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
+_ACEOF
+
+
+{
+  echo "cat >conf$$subs.awk <<_ACEOF" &&
+  echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
+  echo "_ACEOF"
+} >conf$$subs.sh ||
+  as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  . ./conf$$subs.sh ||
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+
+  ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
+  if test $ac_delim_n = $ac_delim_num; then
+    break
+  elif $ac_last_try; then
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+rm -f conf$$subs.sh
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
+_ACEOF
+sed -n '
+h
+s/^/S["/; s/!.*/"]=/
+p
+g
+s/^[^!]*!//
+:repl
+t repl
+s/'"$ac_delim"'$//
+t delim
+:nl
+h
+s/\(.\{148\}\)..*/\1/
+t more1
+s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
+p
+n
+b repl
+:more1
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t nl
+:delim
+h
+s/\(.\{148\}\)..*/\1/
+t more2
+s/["\\]/\\&/g; s/^/"/; s/$/"/
+p
+b
+:more2
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t delim
+' <conf$$subs.awk | sed '
+/^[^""]/{
+  N
+  s/\n//
+}
+' >>$CONFIG_STATUS || ac_write_fail=1
+rm -f conf$$subs.awk
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACAWK
+cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
+  for (key in S) S_is_set[key] = 1
+  FS = ""
+
+}
+{
+  line = $ 0
+  nfields = split(line, field, "@")
+  substed = 0
+  len = length(field[1])
+  for (i = 2; i < nfields; i++) {
+    key = field[i]
+    keylen = length(key)
+    if (S_is_set[key]) {
+      value = S[key]
+      line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3)
+      len += length(value) + length(field[++i])
+      substed = 1
+    } else
+      len += 1 + keylen
+  }
+
+  print line
+}
+
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
+  sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
+else
+  cat
+fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
+  || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
+_ACEOF
+
+# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
+# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+  ac_vpsub='/^[	 ]*VPATH[	 ]*=[	 ]*/{
+h
+s///
+s/^/:/
+s/[	 ]*$/:/
+s/:\$(srcdir):/:/g
+s/:\${srcdir}:/:/g
+s/:@srcdir@:/:/g
+s/^:*//
+s/:*$//
+x
+s/\(=[	 ]*\).*/\1/
+G
+s/\n//
+s/^[^=]*=[	 ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+fi # test -n "$CONFIG_FILES"
+
+# Set up the scripts for CONFIG_HEADERS section.
+# No need to generate them if there are no CONFIG_HEADERS.
+# This happens for instance with `./config.status Makefile'.
+if test -n "$CONFIG_HEADERS"; then
+cat >"$ac_tmp/defines.awk" <<\_ACAWK ||
+BEGIN {
+_ACEOF
+
+# Transform confdefs.h into an awk script `defines.awk', embedded as
+# here-document in config.status, that substitutes the proper values into
+# config.h.in to produce config.h.
+
+# Create a delimiter string that does not exist in confdefs.h, to ease
+# handling of long lines.
+ac_delim='%!_!# '
+for ac_last_try in false false :; do
+  ac_tt=`sed -n "/$ac_delim/p" confdefs.h`
+  if test -z "$ac_tt"; then
+    break
+  elif $ac_last_try; then
+    as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+# For the awk script, D is an array of macro values keyed by name,
+# likewise P contains macro parameters if any.  Preserve backslash
+# newline sequences.
+
+ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
+sed -n '
+s/.\{148\}/&'"$ac_delim"'/g
+t rset
+:rset
+s/^[	 ]*#[	 ]*define[	 ][	 ]*/ /
+t def
+d
+:def
+s/\\$//
+t bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[	 ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3"/p
+s/^ \('"$ac_word_re"'\)[	 ]*\(.*\)/D["\1"]=" \2"/p
+d
+:bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[	 ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3\\\\\\n"\\/p
+t cont
+s/^ \('"$ac_word_re"'\)[	 ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p
+t cont
+d
+:cont
+n
+s/.\{148\}/&'"$ac_delim"'/g
+t clear
+:clear
+s/\\$//
+t bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/"/p
+d
+:bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p
+b cont
+' <confdefs.h | sed '
+s/'"$ac_delim"'/"\\\
+"/g' >>$CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  for (key in D) D_is_set[key] = 1
+  FS = ""
+}
+/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ {
+  line = \$ 0
+  split(line, arg, " ")
+  if (arg[1] == "#") {
+    defundef = arg[2]
+    mac1 = arg[3]
+  } else {
+    defundef = substr(arg[1], 2)
+    mac1 = arg[2]
+  }
+  split(mac1, mac2, "(") #)
+  macro = mac2[1]
+  prefix = substr(line, 1, index(line, defundef) - 1)
+  if (D_is_set[macro]) {
+    # Preserve the white space surrounding the "#".
+    print prefix "define", macro P[macro] D[macro]
+    next
+  } else {
+    # Replace #undef with comments.  This is necessary, for example,
+    # in the case of _POSIX_SOURCE, which is predefined and required
+    # on some systems where configure will not decide to define it.
+    if (defundef == "undef") {
+      print "/*", prefix defundef, macro, "*/"
+      next
+    }
+  }
+}
+{ print }
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+  as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
+fi # test -n "$CONFIG_HEADERS"
+
+
+eval set X "  :F $CONFIG_FILES  :H $CONFIG_HEADERS    :C $CONFIG_COMMANDS"
+shift
+for ac_tag
+do
+  case $ac_tag in
+  :[FHLC]) ac_mode=$ac_tag; continue;;
+  esac
+  case $ac_mode$ac_tag in
+  :[FHL]*:*);;
+  :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
+  :[FH]-) ac_tag=-:-;;
+  :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+  esac
+  ac_save_IFS=$IFS
+  IFS=:
+  set x $ac_tag
+  IFS=$ac_save_IFS
+  shift
+  ac_file=$1
+  shift
+
+  case $ac_mode in
+  :L) ac_source=$1;;
+  :[FH])
+    ac_file_inputs=
+    for ac_f
+    do
+      case $ac_f in
+      -) ac_f="$ac_tmp/stdin";;
+      *) # Look for the file first in the build tree, then in the source tree
+	 # (if the path is not absolute).  The absolute path cannot be DOS-style,
+	 # because $ac_f cannot contain `:'.
+	 test -f "$ac_f" ||
+	   case $ac_f in
+	   [\\/$]*) false;;
+	   *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+	   esac ||
+	   as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+      esac
+      case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
+      as_fn_append ac_file_inputs " '$ac_f'"
+    done
+
+    # Let's still pretend it is `configure' which instantiates (i.e., don't
+    # use $as_me), people would be surprised to read:
+    #    /* config.h.  Generated by config.status.  */
+    configure_input='Generated from '`
+	  $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'
+	`' by configure.'
+    if test x"$ac_file" != x-; then
+      configure_input="$ac_file.  $configure_input"
+      { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
+$as_echo "$as_me: creating $ac_file" >&6;}
+    fi
+    # Neutralize special characters interpreted by sed in replacement strings.
+    case $configure_input in #(
+    *\&* | *\|* | *\\* )
+       ac_sed_conf_input=`$as_echo "$configure_input" |
+       sed 's/[\\\\&|]/\\\\&/g'`;; #(
+    *) ac_sed_conf_input=$configure_input;;
+    esac
+
+    case $ac_tag in
+    *:-:* | *:-) cat >"$ac_tmp/stdin" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
+    esac
+    ;;
+  esac
+
+  ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$ac_file" : 'X\(//\)[^/]' \| \
+	 X"$ac_file" : 'X\(//\)$' \| \
+	 X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  as_dir="$ac_dir"; as_fn_mkdir_p
+  ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+  case $ac_mode in
+  :F)
+  #
+  # CONFIG_FILE
+  #
+
+  case $INSTALL in
+  [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
+  *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;;
+  esac
+  ac_MKDIR_P=$MKDIR_P
+  case $MKDIR_P in
+  [\\/$]* | ?:[\\/]* ) ;;
+  */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;;
+  esac
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+ac_sed_dataroot='
+/datarootdir/ {
+  p
+  q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p'
+case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  ac_datarootdir_hack='
+  s&@datadir@&$datadir&g
+  s&@docdir@&$docdir&g
+  s&@infodir@&$infodir&g
+  s&@localedir@&$localedir&g
+  s&@mandir@&$mandir&g
+  s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_sed_extra="$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s|@configure_input@|$ac_sed_conf_input|;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@top_build_prefix@&$ac_top_build_prefix&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+s&@INSTALL@&$ac_INSTALL&;t t
+s&@MKDIR_P@&$ac_MKDIR_P&;t t
+$ac_datarootdir_hack
+"
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \
+  >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' \
+      "$ac_tmp/out"`; test -z "$ac_out"; } &&
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined" >&5
+$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined" >&2;}
+
+  rm -f "$ac_tmp/stdin"
+  case $ac_file in
+  -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
+  *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
+  esac \
+  || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ ;;
+  :H)
+  #
+  # CONFIG_HEADER
+  #
+  if test x"$ac_file" != x-; then
+    {
+      $as_echo "/* $configure_input  */" \
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs"
+    } >"$ac_tmp/config.h" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+    if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then
+      { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
+$as_echo "$as_me: $ac_file is unchanged" >&6;}
+    else
+      rm -f "$ac_file"
+      mv "$ac_tmp/config.h" "$ac_file" \
+	|| as_fn_error $? "could not create $ac_file" "$LINENO" 5
+    fi
+  else
+    $as_echo "/* $configure_input  */" \
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \
+      || as_fn_error $? "could not create -" "$LINENO" 5
+  fi
+# Compute "$ac_file"'s index in $config_headers.
+_am_arg="$ac_file"
+_am_stamp_count=1
+for _am_header in $config_headers :; do
+  case $_am_header in
+    $_am_arg | $_am_arg:* )
+      break ;;
+    * )
+      _am_stamp_count=`expr $_am_stamp_count + 1` ;;
+  esac
+done
+echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" ||
+$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$_am_arg" : 'X\(//\)[^/]' \| \
+	 X"$_am_arg" : 'X\(//\)$' \| \
+	 X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$_am_arg" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`/stamp-h$_am_stamp_count
+ ;;
+
+  :C)  { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5
+$as_echo "$as_me: executing $ac_file commands" >&6;}
+ ;;
+  esac
+
+
+  case $ac_file$ac_mode in
+    "depfiles":C) test x"$AMDEP_TRUE" != x"" || {
+  # Older Autoconf quotes --file arguments for eval, but not when files
+  # are listed without --file.  Let's play safe and only enable the eval
+  # if we detect the quoting.
+  # TODO: see whether this extra hack can be removed once we start
+  # requiring Autoconf 2.70 or later.
+  case $CONFIG_FILES in #(
+  *\'*) :
+    eval set x "$CONFIG_FILES" ;; #(
+  *) :
+    set x $CONFIG_FILES ;; #(
+  *) :
+     ;;
+esac
+  shift
+  # Used to flag and report bootstrapping failures.
+  am_rc=0
+  for am_mf
+  do
+    # Strip MF so we end up with the name of the file.
+    am_mf=`$as_echo "$am_mf" | sed -e 's/:.*$//'`
+    # Check whether this is an Automake generated Makefile which includes
+    # dependency-tracking related rules and includes.
+    # Grep'ing the whole file directly is not great: AIX grep has a line
+    # limit of 2048, but all sed's we know have understand at least 4000.
+    sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \
+      || continue
+    am_dirpart=`$as_dirname -- "$am_mf" ||
+$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$am_mf" : 'X\(//\)[^/]' \| \
+	 X"$am_mf" : 'X\(//\)$' \| \
+	 X"$am_mf" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$am_mf" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+    am_filepart=`$as_basename -- "$am_mf" ||
+$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$am_mf" : 'X\(//\)$' \| \
+	 X"$am_mf" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$am_mf" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+    { echo "$as_me:$LINENO: cd "$am_dirpart" \
+      && sed -e '/# am--include-marker/d' "$am_filepart" \
+        | $MAKE -f - am--depfiles" >&5
+   (cd "$am_dirpart" \
+      && sed -e '/# am--include-marker/d' "$am_filepart" \
+        | $MAKE -f - am--depfiles) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); } || am_rc=$?
+  done
+  if test $am_rc -ne 0; then
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "Something went wrong bootstrapping makefile fragments
+    for automatic dependency tracking.  Try re-running configure with the
+    '--disable-dependency-tracking' option to at least be able to build
+    the package (albeit without support for automatic dependency tracking).
+See \`config.log' for more details" "$LINENO" 5; }
+  fi
+  { am_dirpart=; unset am_dirpart;}
+  { am_filepart=; unset am_filepart;}
+  { am_mf=; unset am_mf;}
+  { am_rc=; unset am_rc;}
+  rm -f conftest-deps.mk
+}
+ ;;
+
+  esac
+done # for ac_tag
+
+
+as_fn_exit 0
+_ACEOF
+ac_clean_files=$ac_clean_files_save
+
+test $ac_write_fail = 0 ||
+  as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded.  So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status.  When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+  ac_cs_success=:
+  ac_config_status_args=
+  test "$silent" = yes &&
+    ac_config_status_args="$ac_config_status_args --quiet"
+  exec 5>/dev/null
+  $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+  exec 5>>config.log
+  # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+  # would make configure fail if this is the last instruction.
+  $ac_cs_success || as_fn_exit 1
+fi
+if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
+$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
+fi
+
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/configure.ac b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/configure.ac
new file mode 100644
index 000000000..eb91dc590
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/configure.ac
@@ -0,0 +1,34 @@
+AC_PREREQ([2.69])
+
+AC_INIT(hpl, 2.3, hpl@icl.utk.edu)
+AC_CONFIG_SRCDIR([include/hpl.h])
+AC_CONFIG_HEADERS([include/hplconfig.h])
+
+AX_PROG_CC_MPI
+
+AC_PROG_RANLIB
+
+AC_PROG_INSTALL
+
+AM_INIT_AUTOMAKE([subdir-objects])
+
+AM_PROG_CC_C_O
+
+dnl
+dnl AX_BLAS requires Fortran compiler and detects fortran libraries in $FLIBS
+dnl
+dnl AX_BLAS(LIBS="$BLAS_LIBS $LIBS $FLIBS")
+dnl
+
+HPL_BLAS(LIBS="$BLAS_LIBS $LIBS",AC_MSG_ERROR([BLAS not found]))
+
+dnl FIXME: test for CBLAS: Atlas, MKL, OpenBLAS, ESSL, ...
+dnl FIXME: test for GSL CBLAS
+
+AC_CHECK_FUNCS([dgemm_])
+
+AC_CHECK_HEADERS([mpi.h])
+
+AC_CONFIG_FILES([Makefile src/Makefile testing/Makefile])
+
+AC_OUTPUT
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/depcomp b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/depcomp
new file mode 100755
index 000000000..65cbf7093
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/depcomp
@@ -0,0 +1,791 @@
+#! /bin/sh
+# depcomp - compile a program generating dependencies as side-effects
+
+scriptversion=2018-03-07.03; # UTC
+
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
+
+case $1 in
+  '')
+    echo "$0: No command.  Try '$0 --help' for more information." 1>&2
+    exit 1;
+    ;;
+  -h | --h*)
+    cat <<\EOF
+Usage: depcomp [--help] [--version] PROGRAM [ARGS]
+
+Run PROGRAMS ARGS to compile a file, generating dependencies
+as side-effects.
+
+Environment variables:
+  depmode     Dependency tracking mode.
+  source      Source file read by 'PROGRAMS ARGS'.
+  object      Object file output by 'PROGRAMS ARGS'.
+  DEPDIR      directory where to store dependencies.
+  depfile     Dependency file to output.
+  tmpdepfile  Temporary file to use when outputting dependencies.
+  libtool     Whether libtool is used (yes/no).
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit $?
+    ;;
+  -v | --v*)
+    echo "depcomp $scriptversion"
+    exit $?
+    ;;
+esac
+
+# Get the directory component of the given path, and save it in the
+# global variables '$dir'.  Note that this directory component will
+# be either empty or ending with a '/' character.  This is deliberate.
+set_dir_from ()
+{
+  case $1 in
+    */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
+      *) dir=;;
+  esac
+}
+
+# Get the suffix-stripped basename of the given path, and save it the
+# global variable '$base'.
+set_base_from ()
+{
+  base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
+}
+
+# If no dependency file was actually created by the compiler invocation,
+# we still have to create a dummy depfile, to avoid errors with the
+# Makefile "include basename.Plo" scheme.
+make_dummy_depfile ()
+{
+  echo "#dummy" > "$depfile"
+}
+
+# Factor out some common post-processing of the generated depfile.
+# Requires the auxiliary global variable '$tmpdepfile' to be set.
+aix_post_process_depfile ()
+{
+  # If the compiler actually managed to produce a dependency file,
+  # post-process it.
+  if test -f "$tmpdepfile"; then
+    # Each line is of the form 'foo.o: dependency.h'.
+    # Do two passes, one to just change these to
+    #   $object: dependency.h
+    # and one to simply output
+    #   dependency.h:
+    # which is needed to avoid the deleted-header problem.
+    { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
+      sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
+    } > "$depfile"
+    rm -f "$tmpdepfile"
+  else
+    make_dummy_depfile
+  fi
+}
+
+# A tabulation character.
+tab='	'
+# A newline character.
+nl='
+'
+# Character ranges might be problematic outside the C locale.
+# These definitions help.
+upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
+lower=abcdefghijklmnopqrstuvwxyz
+digits=0123456789
+alpha=${upper}${lower}
+
+if test -z "$depmode" || test -z "$source" || test -z "$object"; then
+  echo "depcomp: Variables source, object and depmode must be set" 1>&2
+  exit 1
+fi
+
+# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
+depfile=${depfile-`echo "$object" |
+  sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
+tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
+
+rm -f "$tmpdepfile"
+
+# Avoid interferences from the environment.
+gccflag= dashmflag=
+
+# Some modes work just like other modes, but use different flags.  We
+# parameterize here, but still list the modes in the big case below,
+# to make depend.m4 easier to write.  Note that we *cannot* use a case
+# here, because this file can only contain one case statement.
+if test "$depmode" = hp; then
+  # HP compiler uses -M and no extra arg.
+  gccflag=-M
+  depmode=gcc
+fi
+
+if test "$depmode" = dashXmstdout; then
+  # This is just like dashmstdout with a different argument.
+  dashmflag=-xM
+  depmode=dashmstdout
+fi
+
+cygpath_u="cygpath -u -f -"
+if test "$depmode" = msvcmsys; then
+  # This is just like msvisualcpp but w/o cygpath translation.
+  # Just convert the backslash-escaped backslashes to single forward
+  # slashes to satisfy depend.m4
+  cygpath_u='sed s,\\\\,/,g'
+  depmode=msvisualcpp
+fi
+
+if test "$depmode" = msvc7msys; then
+  # This is just like msvc7 but w/o cygpath translation.
+  # Just convert the backslash-escaped backslashes to single forward
+  # slashes to satisfy depend.m4
+  cygpath_u='sed s,\\\\,/,g'
+  depmode=msvc7
+fi
+
+if test "$depmode" = xlc; then
+  # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
+  gccflag=-qmakedep=gcc,-MF
+  depmode=gcc
+fi
+
+case "$depmode" in
+gcc3)
+## gcc 3 implements dependency tracking that does exactly what
+## we want.  Yay!  Note: for some reason libtool 1.4 doesn't like
+## it if -MD -MP comes after the -MF stuff.  Hmm.
+## Unfortunately, FreeBSD c89 acceptance of flags depends upon
+## the command line argument order; so add the flags where they
+## appear in depend2.am.  Note that the slowdown incurred here
+## affects only configure: in makefiles, %FASTDEP% shortcuts this.
+  for arg
+  do
+    case $arg in
+    -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
+    *)  set fnord "$@" "$arg" ;;
+    esac
+    shift # fnord
+    shift # $arg
+  done
+  "$@"
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  mv "$tmpdepfile" "$depfile"
+  ;;
+
+gcc)
+## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
+## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
+## (see the conditional assignment to $gccflag above).
+## There are various ways to get dependency output from gcc.  Here's
+## why we pick this rather obscure method:
+## - Don't want to use -MD because we'd like the dependencies to end
+##   up in a subdir.  Having to rename by hand is ugly.
+##   (We might end up doing this anyway to support other compilers.)
+## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
+##   -MM, not -M (despite what the docs say).  Also, it might not be
+##   supported by the other compilers which use the 'gcc' depmode.
+## - Using -M directly means running the compiler twice (even worse
+##   than renaming).
+  if test -z "$gccflag"; then
+    gccflag=-MD,
+  fi
+  "$@" -Wp,"$gccflag$tmpdepfile"
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  # The second -e expression handles DOS-style file names with drive
+  # letters.
+  sed -e 's/^[^:]*: / /' \
+      -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
+## This next piece of magic avoids the "deleted header file" problem.
+## The problem is that when a header file which appears in a .P file
+## is deleted, the dependency causes make to die (because there is
+## typically no way to rebuild the header).  We avoid this by adding
+## dummy dependencies for each header file.  Too bad gcc doesn't do
+## this for us directly.
+## Some versions of gcc put a space before the ':'.  On the theory
+## that the space means something, we add a space to the output as
+## well.  hp depmode also adds that space, but also prefixes the VPATH
+## to the object.  Take care to not repeat it in the output.
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly.  Breaking it into two sed invocations is a workaround.
+  tr ' ' "$nl" < "$tmpdepfile" \
+    | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+hp)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+sgi)
+  if test "$libtool" = yes; then
+    "$@" "-Wp,-MDupdate,$tmpdepfile"
+  else
+    "$@" -MDupdate "$tmpdepfile"
+  fi
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+
+  if test -f "$tmpdepfile"; then  # yes, the sourcefile depend on other files
+    echo "$object : \\" > "$depfile"
+    # Clip off the initial element (the dependent).  Don't try to be
+    # clever and replace this with sed code, as IRIX sed won't handle
+    # lines with more than a fixed number of characters (4096 in
+    # IRIX 6.2 sed, 8192 in IRIX 6.5).  We also remove comment lines;
+    # the IRIX cc adds comments like '#:fec' to the end of the
+    # dependency line.
+    tr ' ' "$nl" < "$tmpdepfile" \
+      | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
+      | tr "$nl" ' ' >> "$depfile"
+    echo >> "$depfile"
+    # The second pass generates a dummy entry for each header file.
+    tr ' ' "$nl" < "$tmpdepfile" \
+      | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
+      >> "$depfile"
+  else
+    make_dummy_depfile
+  fi
+  rm -f "$tmpdepfile"
+  ;;
+
+xlc)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+aix)
+  # The C for AIX Compiler uses -M and outputs the dependencies
+  # in a .u file.  In older versions, this file always lives in the
+  # current directory.  Also, the AIX compiler puts '$object:' at the
+  # start of each line; $object doesn't have directory information.
+  # Version 6 uses the directory in both cases.
+  set_dir_from "$object"
+  set_base_from "$object"
+  if test "$libtool" = yes; then
+    tmpdepfile1=$dir$base.u
+    tmpdepfile2=$base.u
+    tmpdepfile3=$dir.libs/$base.u
+    "$@" -Wc,-M
+  else
+    tmpdepfile1=$dir$base.u
+    tmpdepfile2=$dir$base.u
+    tmpdepfile3=$dir$base.u
+    "$@" -M
+  fi
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+    exit $stat
+  fi
+
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  aix_post_process_depfile
+  ;;
+
+tcc)
+  # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
+  # FIXME: That version still under development at the moment of writing.
+  #        Make that this statement remains true also for stable, released
+  #        versions.
+  # It will wrap lines (doesn't matter whether long or short) with a
+  # trailing '\', as in:
+  #
+  #   foo.o : \
+  #    foo.c \
+  #    foo.h \
+  #
+  # It will put a trailing '\' even on the last line, and will use leading
+  # spaces rather than leading tabs (at least since its commit 0394caf7
+  # "Emit spaces for -MD").
+  "$@" -MD -MF "$tmpdepfile"
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
+  # We have to change lines of the first kind to '$object: \'.
+  sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
+  # And for each line of the second kind, we have to emit a 'dep.h:'
+  # dummy dependency, to avoid the deleted-header problem.
+  sed -n -e 's|^  *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+## The order of this option in the case statement is important, since the
+## shell code in configure will try each of these formats in the order
+## listed in this file.  A plain '-MD' option would be understood by many
+## compilers, so we must ensure this comes after the gcc and icc options.
+pgcc)
+  # Portland's C compiler understands '-MD'.
+  # Will always output deps to 'file.d' where file is the root name of the
+  # source file under compilation, even if file resides in a subdirectory.
+  # The object file name does not affect the name of the '.d' file.
+  # pgcc 10.2 will output
+  #    foo.o: sub/foo.c sub/foo.h
+  # and will wrap long lines using '\' :
+  #    foo.o: sub/foo.c ... \
+  #     sub/foo.h ... \
+  #     ...
+  set_dir_from "$object"
+  # Use the source, not the object, to determine the base name, since
+  # that's sadly what pgcc will do too.
+  set_base_from "$source"
+  tmpdepfile=$base.d
+
+  # For projects that build the same source file twice into different object
+  # files, the pgcc approach of using the *source* file root name can cause
+  # problems in parallel builds.  Use a locking strategy to avoid stomping on
+  # the same $tmpdepfile.
+  lockdir=$base.d-lock
+  trap "
+    echo '$0: caught signal, cleaning up...' >&2
+    rmdir '$lockdir'
+    exit 1
+  " 1 2 13 15
+  numtries=100
+  i=$numtries
+  while test $i -gt 0; do
+    # mkdir is a portable test-and-set.
+    if mkdir "$lockdir" 2>/dev/null; then
+      # This process acquired the lock.
+      "$@" -MD
+      stat=$?
+      # Release the lock.
+      rmdir "$lockdir"
+      break
+    else
+      # If the lock is being held by a different process, wait
+      # until the winning process is done or we timeout.
+      while test -d "$lockdir" && test $i -gt 0; do
+        sleep 1
+        i=`expr $i - 1`
+      done
+    fi
+    i=`expr $i - 1`
+  done
+  trap - 1 2 13 15
+  if test $i -le 0; then
+    echo "$0: failed to acquire lock after $numtries attempts" >&2
+    echo "$0: check lockdir '$lockdir'" >&2
+    exit 1
+  fi
+
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  # Each line is of the form `foo.o: dependent.h',
+  # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
+  # Do two passes, one to just change these to
+  # `$object: dependent.h' and one to simply `dependent.h:'.
+  sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process this invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+hp2)
+  # The "hp" stanza above does not work with aCC (C++) and HP's ia64
+  # compilers, which have integrated preprocessors.  The correct option
+  # to use with these is +Maked; it writes dependencies to a file named
+  # 'foo.d', which lands next to the object file, wherever that
+  # happens to be.
+  # Much of this is similar to the tru64 case; see comments there.
+  set_dir_from  "$object"
+  set_base_from "$object"
+  if test "$libtool" = yes; then
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir.libs/$base.d
+    "$@" -Wc,+Maked
+  else
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir$base.d
+    "$@" +Maked
+  fi
+  stat=$?
+  if test $stat -ne 0; then
+     rm -f "$tmpdepfile1" "$tmpdepfile2"
+     exit $stat
+  fi
+
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  if test -f "$tmpdepfile"; then
+    sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
+    # Add 'dependent.h:' lines.
+    sed -ne '2,${
+               s/^ *//
+               s/ \\*$//
+               s/$/:/
+               p
+             }' "$tmpdepfile" >> "$depfile"
+  else
+    make_dummy_depfile
+  fi
+  rm -f "$tmpdepfile" "$tmpdepfile2"
+  ;;
+
+tru64)
+  # The Tru64 compiler uses -MD to generate dependencies as a side
+  # effect.  'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
+  # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
+  # dependencies in 'foo.d' instead, so we check for that too.
+  # Subdirectories are respected.
+  set_dir_from  "$object"
+  set_base_from "$object"
+
+  if test "$libtool" = yes; then
+    # Libtool generates 2 separate objects for the 2 libraries.  These
+    # two compilations output dependencies in $dir.libs/$base.o.d and
+    # in $dir$base.o.d.  We have to check for both files, because
+    # one of the two compilations can be disabled.  We should prefer
+    # $dir$base.o.d over $dir.libs/$base.o.d because the latter is
+    # automatically cleaned when .libs/ is deleted, while ignoring
+    # the former would cause a distcleancheck panic.
+    tmpdepfile1=$dir$base.o.d          # libtool 1.5
+    tmpdepfile2=$dir.libs/$base.o.d    # Likewise.
+    tmpdepfile3=$dir.libs/$base.d      # Compaq CCC V6.2-504
+    "$@" -Wc,-MD
+  else
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir$base.d
+    tmpdepfile3=$dir$base.d
+    "$@" -MD
+  fi
+
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+    exit $stat
+  fi
+
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  # Same post-processing that is required for AIX mode.
+  aix_post_process_depfile
+  ;;
+
+msvc7)
+  if test "$libtool" = yes; then
+    showIncludes=-Wc,-showIncludes
+  else
+    showIncludes=-showIncludes
+  fi
+  "$@" $showIncludes > "$tmpdepfile"
+  stat=$?
+  grep -v '^Note: including file: ' "$tmpdepfile"
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  # The first sed program below extracts the file names and escapes
+  # backslashes for cygpath.  The second sed program outputs the file
+  # name when reading, but also accumulates all include files in the
+  # hold buffer in order to output them again at the end.  This only
+  # works with sed implementations that can handle large buffers.
+  sed < "$tmpdepfile" -n '
+/^Note: including file:  *\(.*\)/ {
+  s//\1/
+  s/\\/\\\\/g
+  p
+}' | $cygpath_u | sort -u | sed -n '
+s/ /\\ /g
+s/\(.*\)/'"$tab"'\1 \\/p
+s/.\(.*\) \\/\1:/
+H
+$ {
+  s/.*/'"$tab"'/
+  G
+  p
+}' >> "$depfile"
+  echo >> "$depfile" # make sure the fragment doesn't end with a backslash
+  rm -f "$tmpdepfile"
+  ;;
+
+msvc7msys)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+#nosideeffect)
+  # This comment above is used by automake to tell side-effect
+  # dependency tracking mechanisms from slower ones.
+
+dashmstdout)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout, regardless of -o.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  # Remove '-o $object'.
+  IFS=" "
+  for arg
+  do
+    case $arg in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    *)
+      set fnord "$@" "$arg"
+      shift # fnord
+      shift # $arg
+      ;;
+    esac
+  done
+
+  test -z "$dashmflag" && dashmflag=-M
+  # Require at least two characters before searching for ':'
+  # in the target name.  This is to cope with DOS-style filenames:
+  # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
+  "$@" $dashmflag |
+    sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
+  rm -f "$depfile"
+  cat < "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process this sed invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  tr ' ' "$nl" < "$tmpdepfile" \
+    | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+dashXmstdout)
+  # This case only exists to satisfy depend.m4.  It is never actually
+  # run, as this mode is specially recognized in the preamble.
+  exit 1
+  ;;
+
+makedepend)
+  "$@" || exit $?
+  # Remove any Libtool call
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+  # X makedepend
+  shift
+  cleared=no eat=no
+  for arg
+  do
+    case $cleared in
+    no)
+      set ""; shift
+      cleared=yes ;;
+    esac
+    if test $eat = yes; then
+      eat=no
+      continue
+    fi
+    case "$arg" in
+    -D*|-I*)
+      set fnord "$@" "$arg"; shift ;;
+    # Strip any option that makedepend may not understand.  Remove
+    # the object too, otherwise makedepend will parse it as a source file.
+    -arch)
+      eat=yes ;;
+    -*|$object)
+      ;;
+    *)
+      set fnord "$@" "$arg"; shift ;;
+    esac
+  done
+  obj_suffix=`echo "$object" | sed 's/^.*\././'`
+  touch "$tmpdepfile"
+  ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
+  rm -f "$depfile"
+  # makedepend may prepend the VPATH from the source file name to the object.
+  # No need to regex-escape $object, excess matching of '.' is harmless.
+  sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process the last invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  sed '1,2d' "$tmpdepfile" \
+    | tr ' ' "$nl" \
+    | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile" "$tmpdepfile".bak
+  ;;
+
+cpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  # Remove '-o $object'.
+  IFS=" "
+  for arg
+  do
+    case $arg in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    *)
+      set fnord "$@" "$arg"
+      shift # fnord
+      shift # $arg
+      ;;
+    esac
+  done
+
+  "$@" -E \
+    | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
+             -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
+    | sed '$ s: \\$::' > "$tmpdepfile"
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  cat < "$tmpdepfile" >> "$depfile"
+  sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+msvisualcpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  IFS=" "
+  for arg
+  do
+    case "$arg" in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
+        set fnord "$@"
+        shift
+        shift
+        ;;
+    *)
+        set fnord "$@" "$arg"
+        shift
+        shift
+        ;;
+    esac
+  done
+  "$@" -E 2>/dev/null |
+  sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
+  echo "$tab" >> "$depfile"
+  sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+msvcmsys)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+none)
+  exec "$@"
+  ;;
+
+*)
+  echo "Unknown depmode $depmode" 1>&2
+  exit 1
+  ;;
+esac
+
+exit 0
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl.h
new file mode 100644
index 000000000..6d131963f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl.h
@@ -0,0 +1,97 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_H
+#define HPL_H
+/*
+ * ---------------------------------------------------------------------
+ * HPL default compile options that can overridden in the Make.<arch>
+ * ---------------------------------------------------------------------
+ */
+#ifndef HPL_NO_MPI_DATATYPE         /* Use MPI user-defined data type */
+#define HPL_USE_MPI_DATATYPE
+#endif
+ 
+#ifndef HPL_COPY_L  /* do not copy L, use MPI user-defined data types */
+#define HPL_NO_COPY_L
+#endif
+ 
+#ifndef HPL_DETAILED_TIMING         /* Do not enable detailed timings */
+#define HPL_NO_DETAILED_TIMING
+#endif
+ 
+#ifndef HPL_CALL_VSIPL          /* Call the Fortran 77 BLAS interface */
+#ifndef HPL_CALL_CBLAS                       /* there can be only one */
+#define HPL_CALL_FBLAS
+#endif
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pfact.h"
+#include "hpl_pgesv.h"
+
+#include "hpl_timer.h"
+#include "hpl_matgen.h"
+#include "hpl_test.h"
+
+#include "hpl_ptimer.h"
+#include "hpl_pmatgen.h"
+#include "hpl_ptest.h"
+
+#endif
+/*
+ * End of hpl.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_auxil.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_auxil.h
new file mode 100644
index 000000000..861caf380
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_auxil.h
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_AUXIL_H
+#define HPL_AUXIL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+/*
+ * ---------------------------------------------------------------------
+ * typedef definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{ HPL_NORM_A = 800, HPL_NORM_1 = 801, HPL_NORM_I = 802 } HPL_T_NORM;
+
+typedef enum
+{
+   HPL_MACH_EPS   = 900,                /* relative machine precision */
+   HPL_MACH_SFMIN = 901, /* safe minimum st 1/sfmin does not overflow */
+   HPL_MACH_BASE  = 902,                /* base = base of the machine */
+   HPL_MACH_PREC  = 903,                          /* prec  = eps*base */
+   HPL_MACH_MLEN  = 904,   /* number of (base) digits in the mantissa */
+   HPL_MACH_RND   = 905,        /* 1.0 if rounding occurs in addition */
+   HPL_MACH_EMIN  = 906,   /* min exponent before (gradual) underflow */
+   HPL_MACH_RMIN  = 907,        /* underflow threshold base**(emin-1) */
+   HPL_MACH_EMAX  = 908,          /* largest exponent before overflow */
+   HPL_MACH_RMAX  = 909  /* overflow threshold - (base**emax)*(1-eps) */
+ 
+} HPL_T_MACH;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_fprintf
+STDC_ARGS( (
+   FILE *,
+   const char *,
+   ...
+) );
+void                             HPL_warn
+STDC_ARGS( (
+   FILE *,
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_abort
+STDC_ARGS( (
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_dlacpy
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dlatcpy
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dlaprnt
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int,
+   const char *
+) );
+double                           HPL_dlange
+STDC_ARGS( (
+   const HPL_T_NORM,
+   const int,
+   const int,
+   const double *,
+   const int
+) );
+double                           HPL_dlamch
+STDC_ARGS( (
+   const HPL_T_MACH
+) );
+
+#endif
+/*
+ * End of hpl_auxil.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_blas.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_blas.h
new file mode 100644
index 000000000..2a510471a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_blas.h
@@ -0,0 +1,630 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_BLAS_H
+#define HPL_BLAS_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+
+
+/*
+ * ---------------------------------------------------------------------
+ * typedef definitions
+ * ---------------------------------------------------------------------
+ */
+enum HPL_ORDER
+{  HplRowMajor = 101,  HplColumnMajor  = 102 };
+enum HPL_TRANS
+{  HplNoTrans  = 111,  HplTrans        = 112,  HplConjTrans    = 113 };
+enum HPL_UPLO
+{  HplUpper    = 121,  HplLower        = 122 };
+enum HPL_DIAG
+{  HplNonUnit  = 131,  HplUnit         = 132 };
+enum HPL_SIDE
+{  HplLeft     = 141,  HplRight        = 142 }; 
+
+
+#ifdef HPL_CALL_CBLAS
+
+
+/*
+ * ---------------------------------------------------------------------
+ * The C interface of the BLAS is available ...
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    CBLAS_INDEX         int
+ 
+#define    CBLAS_ORDER         HPL_ORDER
+#define    CblasRowMajor       HplRowMajor
+#define    CblasColMajor       HplColMajor
+ 
+#define    CBLAS_TRANSPOSE     HPL_TRANS
+#define    CblasNoTrans        HplNoTrans
+#define    CblasTrans          HplTrans
+#define    CblasConjTrans      HplConjTrans
+ 
+#define    CBLAS_UPLO          HPL_UPLO
+#define    CblasUpper          HplUpper
+#define    CblasLower          HplLower
+ 
+#define    CBLAS_DIAG          HPL_DIAG
+#define    CblasNonUnit        HplNonUnit
+#define    CblasUnit           HplUnit
+ 
+#define    CBLAS_SIDE          HPL_SIDE
+#define    CblasLeft           HplLeft
+#define    CblasRight          HplRight
+/*
+ * ---------------------------------------------------------------------
+ * CBLAS Function prototypes
+ * ---------------------------------------------------------------------
+ */
+CBLAS_INDEX       cblas_idamax
+STDC_ARGS(
+(  const int,       const double *,  const int ) );
+void              cblas_dswap
+STDC_ARGS(
+(  const int,       double *,        const int,       double *,
+   const int ) );
+void              cblas_dcopy
+STDC_ARGS(
+(  const int,       const double *,  const int,       double *,
+   const int ) );
+void              cblas_daxpy
+STDC_ARGS(
+(  const int,       const double,    const double *,  const int,
+   double *,        const int ) );
+void              cblas_dscal
+STDC_ARGS(
+(  const int,       const double,    double *,        const int ) );
+
+void              cblas_dgemv
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const int,       const int,       const double,    const double *,
+   const int,       const double *,  const int,       const double,
+   double *,        const int ) );
+
+void              cblas_dger
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const int,       const int,
+   const double,    const double *,  const int,       const double *,
+   const int,       double *,        const int ) );
+void              cblas_dtrsv
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_UPLO,
+   const enum CBLAS_TRANSPOSE,       const enum CBLAS_DIAG,
+   const int,       const double *,  const int,       double *,
+   const int ) );
+
+void              cblas_dgemm
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_TRANSPOSE,       const int,       const int,
+   const int,       const double,    const double *,  const int,
+   const double *,  const int,       const double,    double *,
+   const int ) );
+
+void              cblas_dtrsm
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_SIDE,
+   const enum CBLAS_UPLO,            const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_DIAG,            const int,       const int,
+   const double,    const double *,  const int,       double *,
+   const int ) );
+void             dpcpp_dgemm 
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_TRANSPOSE,       const int,       const int,
+   const int,       const double,    const double *,  const int,
+   const double *,  const int,       const double,    double *,
+   const int ) );
+
+void             dpcpp_dtrsm 
+STDC_ARGS(
+(  const enum CBLAS_ORDER,           const enum CBLAS_SIDE,
+   const enum CBLAS_UPLO,            const enum CBLAS_TRANSPOSE,
+   const enum CBLAS_DIAG,            const int,       const int,
+   const double,    const double *,  const int,       double *,
+   const int ) );
+/*
+ * ---------------------------------------------------------------------
+ * HPL C BLAS macro definition
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_dswap           cblas_dswap
+#define    HPL_dcopy           cblas_dcopy
+#define    HPL_daxpy           cblas_daxpy
+#define    HPL_dscal           cblas_dscal
+#define    HPL_idamax          cblas_idamax
+
+#define    HPL_dgemv           cblas_dgemv
+#define    HPL_dtrsv           cblas_dtrsv
+#define    HPL_dger            cblas_dger
+
+//#define    HPL_dgemm           cblas_dgemm
+//#define    HPL_dtrsm           cblas_dtrsm
+#define    HPL_dgemm           dpcpp_dgemm
+#define    HPL_dtrsm           dpcpp_dtrsm  
+
+#endif
+
+//#define    HPL_hello           sss_gemm 
+
+#ifdef HPL_CALL_FBLAS
+/*
+ * ---------------------------------------------------------------------
+ * Use the Fortran 77 interface of the BLAS ...
+ * ---------------------------------------------------------------------
+ * Defaults: Add_, F77_INTEGER=int, StringSunStyle
+ * ---------------------------------------------------------------------
+ */
+#ifndef NoChange
+#ifndef UpCase
+#ifndef Add__
+#ifndef Add_
+
+#define Add_
+
+#endif
+#endif
+#endif
+#endif
+
+#ifndef F77_INTEGER
+#define    F77_INTEGER         int
+#else
+#define    HPL_USE_F77_INTEGER_DEF
+#endif
+
+#ifndef StringCrayStyle
+#ifndef StringStructVal
+#ifndef StringStructPtr
+#ifndef StringSunStyle
+
+#define StringSunStyle
+
+#endif
+#endif
+#endif
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Fortran 77 <-> C interface
+ * ---------------------------------------------------------------------
+ *
+ * These macros identifies how Fortran routines will be called.
+ *
+ * Add_     : the Fortran compiler expects the name of C functions to be
+ * in all lower case and to have an underscore postfixed it (Suns, Intel
+ * compilers expect this).
+ *
+ * NoChange : the Fortran compiler expects the name of C functions to be
+ * in all lower case (IBM RS6K compilers do this).
+ *
+ * UpCase   : the Fortran compiler expects the name of C functions to be
+ * in all upcase. (Cray compilers expect this).
+ *
+ * Add__    : the Fortran compiler in use is f2c, a Fortran to C conver-
+ * ter.
+ */
+#ifdef NoChange
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm(...)
+ */
+#define    F77dswap               dswap
+#define    F77dscal               dscal
+#define    F77dcopy               dcopy
+#define    F77daxpy               daxpy
+#define    F77idamax              idamax
+
+#define    F77dgemv               dgemv
+#define    F77dtrsv               dtrsv
+#define    F77dger                dger
+
+#define    F77dgemm               dgemm
+#define    F77dtrsm               dtrsm
+
+#endif
+
+#ifdef UpCase
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          DGEMM(...)
+ */
+#ifdef CRAY_BLAS
+                                                                                
+#define    F77dswap               SSWAP
+#define    F77dscal               SSCAL
+#define    F77dcopy               SCOPY
+#define    F77daxpy               SAXPY
+#define    F77idamax              ISAMAX
+                                                                                
+#define    F77dgemv               SGEMV
+#define    F77dtrsv               STRSV
+#define    F77dger                SGER
+                                                                                
+#define    F77dgemm               SGEMM
+#define    F77dtrsm               STRSM
+                                                                                
+#else
+
+#define    F77dswap               DSWAP
+#define    F77dscal               DSCAL
+#define    F77dcopy               DCOPY
+#define    F77daxpy               DAXPY
+#define    F77idamax              IDAMAX
+
+#define    F77dgemv               DGEMV
+#define    F77dtrsv               DTRSV
+#define    F77dger                DGER
+
+#define    F77dgemm               DGEMM
+#define    F77dtrsm               DTRSM
+
+#endif
+
+#endif
+
+#ifdef Add_
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine  with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm_(...)
+ */
+#define    F77dswap               dswap_
+#define    F77dscal               dscal_
+#define    F77dcopy               dcopy_
+#define    F77daxpy               daxpy_
+#define    F77idamax              idamax_
+
+#define    F77dgemv               dgemv_
+#define    F77dtrsv               dtrsv_
+#define    F77dger                dger_
+
+#define    F77dgemm               dgemm_
+#define    F77dtrsm               dtrsm_
+
+#endif
+
+#ifdef Add__
+/*
+ * These defines  set  up  the  naming scheme required to have a FORTRAN
+ * routine called by a C routine  with the following  FORTRAN to C inter-
+ * face:
+ *
+ *          FORTRAN DECLARATION            C CALL
+ *          SUBROUTINE DGEMM(...)          dgemm_(...)
+ */
+#define    F77dswap               dswap_
+#define    F77dscal               dscal_
+#define    F77dcopy               dcopy_
+#define    F77daxpy               daxpy_
+#define    F77idamax              idamax_
+ 
+#define    F77dgemv               dgemv_
+#define    F77dtrsv               dtrsv_
+#define    F77dger                dger_
+ 
+#define    F77dgemm               dgemm_
+#define    F77dtrsm               dtrsm_
+//#define    F77hello               sss_gemm
+ 
+#endif
+//#define    F77hello               sss_gemm
+/*
+ * ---------------------------------------------------------------------
+ * Typedef definitions and conversion utilities
+ * ---------------------------------------------------------------------
+ */
+#ifdef StringCrayStyle
+
+#include <fortran.h>
+                      /* Type of character argument in a FORTRAN call */
+#define    F77_CHAR            _fcd
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(_fcdtocp(c) ))
+#define    HPL_C2F_CHAR(c)     (_cptofcd(&(c), 1))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringStructVal
+                      /* Type of character argument in a FORTRAN call */
+typedef struct { char *cp; F77_INTEGER len; } F77_CHAR;
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c.cp))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringStructPtr
+                      /* Type of character argument in a FORTRAN call */
+typedef struct { char *cp; F77_INTEGER len; } F77_CHAR;
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c->cp))
+
+#define    F77_CHAR_DECL       F77_CHAR *        /* input CHARACTER*1 */
+
+#endif
+/* ------------------------------------------------------------------ */
+#ifdef StringSunStyle
+                      /* Type of character argument in a FORTRAN call */
+#define    F77_CHAR            char *
+                                    /* Character conversion utilities */
+#define    HPL_F2C_CHAR(c)     (*(c))
+#define    HPL_C2F_CHAR(c)     (&(c))
+
+#define    F77_CHAR_DECL       F77_CHAR          /* input CHARACTER*1 */
+#define    F77_1_CHAR          , F77_INTEGER
+#define    F77_2_CHAR          F77_1_CHAR F77_1_CHAR
+#define    F77_3_CHAR          F77_2_CHAR F77_1_CHAR
+#define    F77_4_CHAR          F77_3_CHAR F77_1_CHAR
+
+#endif
+/* ------------------------------------------------------------------ */
+
+#ifndef F77_1_CHAR
+#define    F77_1_CHAR
+#define    F77_2_CHAR
+#define    F77_3_CHAR
+#define    F77_4_CHAR
+#endif
+
+#define    F77_INT_DECL        const F77_INTEGER *   /* input integer */
+#define    F77_SIN_DECL        const double *         /* input scalar */
+#define    F77_VIN_DECL        const double *         /* input vector */
+#define    F77_VINOUT_DECL     double *        /* input/output matrix */
+#define    F77_MIN_DECL        const double *         /* input matrix */
+#define    F77_MINOUT_DECL     double *        /* input/output matrix */
+ 
+#ifdef CRAY_PVP_ENV                      /* Type of FORTRAN functions */
+#define    F77_VOID_FUN        extern fortran void      /* subroutine */
+#define    F77_INT_FUN         extern fortran int /* integer function */
+#else
+#define    F77_VOID_FUN        extern void              /* subroutine */
+#define    F77_INT_FUN         extern int         /* integer function */
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Fortran 77 BLAS function prototypes
+ * ---------------------------------------------------------------------
+ */
+F77_VOID_FUN    F77dswap
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VINOUT_DECL, F77_INT_DECL,    F77_VINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77dscal
+STDC_ARGS(
+(  F77_INT_DECL,    F77_SIN_DECL,    F77_VINOUT_DECL, F77_INT_DECL ) );
+F77_VOID_FUN    F77dcopy
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,    F77_VINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77daxpy
+STDC_ARGS(
+(  F77_INT_DECL,    F77_SIN_DECL,    F77_VIN_DECL,    F77_INT_DECL,
+   F77_VINOUT_DECL, F77_INT_DECL ) );
+F77_INT_FUN     F77idamax
+STDC_ARGS(
+(  F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL ) );
+
+F77_VOID_FUN    F77dgemv
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,
+   F77_SIN_DECL,    F77_VINOUT_DECL, F77_INT_DECL     F77_1_CHAR ) );
+F77_VOID_FUN    F77dger
+STDC_ARGS(
+(  F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_VIN_DECL,
+   F77_INT_DECL,    F77_VIN_DECL,    F77_INT_DECL,    F77_MINOUT_DECL,
+   F77_INT_DECL ) );
+F77_VOID_FUN    F77dtrsv
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,   F77_INT_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_VINOUT_DECL, F77_INT_DECL
+   F77_3_CHAR ) );
+
+F77_VOID_FUN    F77dgemm
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_INT_DECL,    F77_INT_DECL,
+   F77_INT_DECL,    F77_SIN_DECL,    F77_MIN_DECL,    F77_INT_DECL,
+   F77_MIN_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_MINOUT_DECL,
+   F77_INT_DECL     F77_2_CHAR ) );
+F77_VOID_FUN    F77dtrsm
+STDC_ARGS(
+(  F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,   F77_CHAR_DECL,
+   F77_INT_DECL,    F77_INT_DECL,    F77_SIN_DECL,    F77_MIN_DECL,
+   F77_INT_DECL,    F77_MINOUT_DECL, F77_INT_DECL     F77_4_CHAR ) );
+
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * HPL BLAS Function prototypes
+ * ---------------------------------------------------------------------
+ */
+#ifndef HPL_CALL_CBLAS
+
+int                              HPL_idamax
+STDC_ARGS( (
+   const int,
+   const double *,
+   const int
+) );
+void                             HPL_daxpy
+STDC_ARGS( (
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dcopy
+STDC_ARGS( (
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dscal
+STDC_ARGS( (
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_dswap
+STDC_ARGS( (
+   const int,
+   double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dgemv
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_TRANS,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   const double *,
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_dger
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dtrsv
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_UPLO,
+   const enum HPL_TRANS,
+   const enum HPL_DIAG,
+   const int,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+void                             HPL_dgemm
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_TRANS,
+   const enum HPL_TRANS,
+   const int,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   const double *,
+   const int,
+   const double,
+   double *,
+   const int
+) );
+void                             HPL_hello
+STDC_ARGS( (
+) );
+#endif
+void                             HPL_dtrsm
+STDC_ARGS( (
+   const enum HPL_ORDER,
+   const enum HPL_SIDE,
+   const enum HPL_UPLO,
+   const enum HPL_TRANS,
+   const enum HPL_DIAG,
+   const int,
+   const int,
+   const double,
+   const double *,
+   const int,
+   double *,
+   const int
+) );
+
+//#endif
+
+#endif
+/*
+ * hpl_blas.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_comm.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_comm.h
new file mode 100644
index 000000000..e3ba51a57
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_comm.h
@@ -0,0 +1,161 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_COMM_H
+#define HPL_COMM_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+#include "hpl_panel.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_1RING         = 401,                        /* Increasing ring */
+   HPL_1RING_M       = 402,             /* Increasing ring (modified) */
+   HPL_2RING         = 403,                      /* Increasing 2-ring */
+   HPL_2RING_M       = 404,           /* Increasing 2-ring (modified) */
+   HPL_BLONG         = 405,                         /* long broadcast */
+   HPL_BLONG_M       = 406               /* long broadcast (modified) */
+} HPL_T_TOP;
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_FAILURE            0
+#define    HPL_SUCCESS            1
+#define    HPL_KEEP_TESTING       2
+/*
+ * ---------------------------------------------------------------------
+ * comm function prototypes
+ * ---------------------------------------------------------------------
+ */
+int                              HPL_send
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_recv
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_sdrv
+STDC_ARGS( (
+   double *,
+   int,
+   int,
+   double *,
+   int,
+   int,
+   int,
+   MPI_Comm
+) );
+int                              HPL_binit
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+int                              HPL_bcast
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *
+) );
+int                              HPL_bwait
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+int                              HPL_packL
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int
+) );
+void                             HPL_copyL
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+ 
+int HPL_binit_1ring STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_1ring STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_1ring STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_1rinM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_1rinM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_1rinM STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_2ring STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_2ring STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_2ring STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_2rinM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_2rinM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_2rinM STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_blong STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_blong STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_blong STDC_ARGS( ( HPL_T_panel *        ) );
+ 
+int HPL_binit_blonM STDC_ARGS( ( HPL_T_panel *        ) );
+int HPL_bcast_blonM STDC_ARGS( ( HPL_T_panel *, int * ) );
+int HPL_bwait_blonM STDC_ARGS( ( HPL_T_panel *        ) );
+
+#endif
+/*
+ * End of hpl_comm.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_gesv.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_gesv.h
new file mode 100644
index 000000000..ce671cf2b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_gesv.h
@@ -0,0 +1,87 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_GESV_H
+#define HPL_GESV_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_LEFT_LOOKING  = 301,           /* Left looking lu fact variant */
+   HPL_CROUT         = 302,                  /* Crout lu fact variant */
+   HPL_RIGHT_LOOKING = 303           /* Right looking lu fact variant */
+} HPL_T_FACT;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void              HPL_dgesv
+STDC_ARGS(
+(  const int,       const int,       const int,       const HPL_T_FACT,
+   const HPL_T_FACT,                 const int,       double *,
+   const int,       int * ) );
+void              HPL_ipid
+STDC_ARGS(
+(  const int,       double *,        int *,           int *,
+   int *,           int *,           int *,           int *,
+   const int,       const int,       const int,       const int,
+   const int ) );
+
+#endif
+/*
+ * End of hpl_gesv.h
+ */ 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_grid.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_grid.h
new file mode 100644
index 000000000..1895a5ed4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_grid.h
@@ -0,0 +1,212 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_GRID_H
+#define HPL_GRID_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum { HPL_INT       = 100, HPL_DOUBLE       = 101 } HPL_T_TYPE;
+ 
+typedef enum
+{
+   HPL_ROW_MAJOR     = 201,
+   HPL_COLUMN_MAJOR  = 202
+} HPL_T_ORDER;
+
+typedef struct HPL_S_grid
+{
+   MPI_Comm        all_comm;                     /* grid communicator */
+   MPI_Comm        row_comm;                      /* row communicator */
+   MPI_Comm        col_comm;                   /* column communicator */
+   HPL_T_ORDER     order;        /* ordering of the procs in the grid */
+   int             iam;                        /* my rank in the grid */
+   int             myrow;                /* my row number in the grid */
+   int             mycol;             /* my column number in the grid */
+   int             nprow;          /* the total # of rows in the grid */
+   int             npcol;       /* the total # of columns in the grid */
+   int             nprocs;        /* the total # of procs in the grid */
+   int             row_ip2;          /* largest power of two <= nprow */
+   int             row_hdim;     /* row_ip2 procs hypercube dimension */
+   int             row_ip2m1;      /* largest power of two <= nprow-1 */
+   int             row_mask;        /* row_ip2m1 procs hypercube mask */
+   int             col_ip2;          /* largest power of two <= npcol */
+   int             col_hdim;     /* col_ip2 procs hypercube dimension */
+   int             col_ip2m1;      /* largest power of two <= npcol-1 */
+   int             col_mask;        /* col_ip2m1 procs hypercube mask */
+} HPL_T_grid;
+
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef void (*HPL_T_OP)
+(  const int,       const void *,    void *,          const HPL_T_TYPE );
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_2_MPI_TYPE( typ ) \
+                           ( ( typ == HPL_INT ? MPI_INT : MPI_DOUBLE ) )
+/*
+ * The following macros perform common modulo operations;  All functions
+ * except MPosMod assume arguments are < d (i.e., arguments are themsel-
+ * ves within modulo range).
+ */
+                                                /* increment with mod */
+#define    MModInc(I, d)       if(++(I) == (d)) (I) = 0
+                                                /* decrement with mod */
+#define    MModDec(I, d)       if(--(I) == -1) (I) = (d)-1
+                                                   /* positive modulo */
+#define    MPosMod(I, d)       ( (I) - ((I)/(d))*(d) )
+                                                   /* add two numbers */
+#define    MModAdd(I1, I2, d) \
+           ( ( (I1) + (I2) < (d) ) ? (I1) + (I2) : (I1) + (I2) - (d) )
+                                                        /* add 1 to # */
+#define    MModAdd1(I, d) ( ((I) != (d)-1) ? (I) + 1 : 0 )
+                                              /* subtract two numbers */
+#define    MModSub(I1, I2, d) \
+           ( ( (I1) < (I2) ) ? (d) + (I1) - (I2) : (I1) - (I2) )
+                                                      /* sub 1 from # */
+#define    MModSub1(I, d) ( ((I)!=0) ? (I)-1 : (d)-1 )
+/*
+ * ---------------------------------------------------------------------
+ * grid function prototypes
+ * ---------------------------------------------------------------------
+ */
+int                              HPL_grid_init
+STDC_ARGS( (
+   MPI_Comm,
+   const HPL_T_ORDER,
+   const int,
+   const int,
+   HPL_T_grid *
+) );
+int                              HPL_grid_exit
+STDC_ARGS( (
+   HPL_T_grid *
+) );
+
+int                              HPL_grid_info
+STDC_ARGS( (
+   const HPL_T_grid *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+int                              HPL_pnum
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int
+) );
+
+int                              HPL_barrier
+STDC_ARGS( (
+   MPI_Comm
+) );
+int                              HPL_broadcast
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const int,
+   MPI_Comm
+) );
+int                              HPL_reduce
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const HPL_T_OP ,
+   const int,
+   MPI_Comm
+) );
+int                              HPL_all_reduce
+STDC_ARGS( (
+   void *,
+   const int,
+   const HPL_T_TYPE,
+   const HPL_T_OP ,
+   MPI_Comm
+) );
+
+void                             HPL_max
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+void                             HPL_min
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+void                             HPL_sum
+STDC_ARGS( (
+   const int,
+   const void *,
+   void *,
+   const HPL_T_TYPE
+) );
+
+#endif
+/*
+ * End of hpl_grid.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_matgen.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_matgen.h
new file mode 100644
index 000000000..de6503eea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_matgen.h
@@ -0,0 +1,120 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_MATGEN_H
+#define HPL_MATGEN_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_MULT0         1284865837
+#define    HPL_MULT1         1481765933
+#define    HPL_IADD0         1
+#define    HPL_IADD1         0
+#define    HPL_DIVFAC        2147483648.0
+#define    HPL_POW16         65536.0
+#define    HPL_HALF          0.5
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_dmatgen
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int
+) );
+void                             HPL_lmul
+STDC_ARGS( (
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_ladd
+STDC_ARGS( (
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_xjumpm
+STDC_ARGS( (
+   const int,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_setran
+STDC_ARGS( (
+   const int,
+   int *
+) );
+void                             HPL_jumpit
+STDC_ARGS( (
+   int *,
+   int *,
+   int *,
+   int *
+) );
+double                           HPL_rand STDC_ARGS( ( void ) );
+
+#endif
+/*
+ * End of hpl_matgen.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_misc.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_misc.h
new file mode 100644
index 000000000..ea421a403
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_misc.h
@@ -0,0 +1,110 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_MISC_H
+#define HPL_MISC_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#ifdef __STDC__
+#define STDC_HEADERS
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#ifdef STDC_HEADERS
+#include <stdarg.h>
+#define STDC_ARGS(p)           p
+#else
+#include <varargs.h>
+#define STDC_ARGS(p)           ()
+#endif
+
+#ifdef HPL_CALL_VSIPL
+#include <vsip.h>
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_rone             1.0
+#define    HPL_rtwo             2.0
+#define    HPL_rzero            0.0
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    Mabs( a_ )          ( ( (a_) <   0  ) ? -(a_) : (a_) )
+#define    Mmin( a_, b_ )      ( ( (a_) < (b_) ) ?  (a_) : (b_) )
+#define    Mmax( a_, b_ )      ( ( (a_) > (b_) ) ?  (a_) : (b_) )
+
+#define    Mfloor(a,b) (((a)>0) ? (((a)/(b))) : (-(((-(a))+(b)-1)/(b))))
+#define    Mceil(a,b)           ( ( (a)+(b)-1 ) / (b) )
+#define    Miceil(a,b) (((a)>0) ? ((((a)+(b)-1)/(b))) : (-((-(a))/(b))))
+
+#define    Mupcase(C)          (((C)>96 && (C)<123) ? (C) & 0xDF : (C))
+#define    Mlowcase(C)         (((C)>64 && (C)< 91) ? (C) | 32   : (C))
+/*
+ * Mptr returns a pointer to a_( i_, j_ ) for readability reasons and
+ * also less silly errors ...
+ */
+#define    Mptr( a_, i_, j_, lda_ ) \
+   ( (a_) + (size_t)(i_) + (size_t)(j_)*(size_t)(lda_) )
+/*
+ * Align pointer
+ */
+#define    HPL_PTR( ptr_, al_ ) \
+                      ( ( ( (size_t)(ptr_)+(al_)-1 ) / (al_) ) * (al_) ) 
+#endif
+/*
+ * End of hpl_misc.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_panel.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_panel.h
new file mode 100644
index 000000000..d5ba2939c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_panel.h
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PANEL_H
+#define HPL_PANEL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef struct HPL_S_panel
+{
+   struct HPL_S_grid   * grid;             /* ptr to the process grid */
+   struct HPL_S_palg   * algo;          /* ptr to the algo parameters */
+   struct HPL_S_pmat   * pmat;         /* ptr to the local array info */
+   double              * A;              /* ptr to trailing part of A */
+   double              * WORK;                          /* work space */
+   double              * L2;                              /* ptr to L */
+   double              * L1;       /* ptr to jb x jb upper block of A */
+   double              * DPIV;    /* ptr to replicated jb pivot array */
+   double              * DINFO;      /* ptr to replicated scalar info */
+   double              * U;                               /* ptr to U */
+   int                 * IWORK;     /* integer workspace for swapping */
+   void                * * * buffers[2];   /* buffers for panel bcast */
+   int                 counts [2];          /* counts for panel bcast */
+   MPI_Datatype        dtypes [2];      /* data types for panel bcast */
+   MPI_Request         request[1];        /* requests for panel bcast */
+   MPI_Status          status [1];          /* status for panel bcast */
+   int                 nb;            /* distribution blocking factor */
+   int                 jb;                             /* panel width */
+   int                 m;   /* global # of rows of trailing part of A */
+   int                 n;   /* global # of cols of trailing part of A */
+   int                 ia;  /* global row index of trailing part of A */
+   int                 ja;  /* global col index of trailing part of A */
+   int                 mp;   /* local # of rows of trailing part of A */
+   int                 nq;   /* local # of cols of trailing part of A */
+   int                 ii;   /* local row index of trailing part of A */
+   int                 jj;   /* local col index of trailing part of A */
+   int                 lda;           /* local leading dim of array A */
+   int                 prow;  /* proc. row owning 1st row of trail. A */
+   int                 pcol;  /* proc. col owning 1st col of trail. A */
+   int                 msgid;           /* message id for panel bcast */
+   int                 ldl2;         /* local leading dim of array L2 */
+   int                 len;      /* length of the buffer to broadcast */
+#ifdef HPL_CALL_VSIPL
+   vsip_block_d        * Ablock;                           /* A block */
+   vsip_block_d        * L1block;                         /* L1 block */
+   vsip_block_d        * L2block;                         /* L2 block */
+   vsip_block_d        * Ublock;                           /* U block */
+#endif
+} HPL_T_panel;
+
+/*
+ * ---------------------------------------------------------------------
+ * panel function prototypes
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pgesv.h"
+
+void                             HPL_pdpanel_new
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int,
+   const int,
+   HPL_T_pmat *,
+   const int,
+   const int,
+   const int,
+   HPL_T_panel * *
+) );
+void                             HPL_pdpanel_init
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int,
+   const int,
+   HPL_T_pmat *,
+   const int,
+   const int,
+   const int,
+   HPL_T_panel *
+) );
+int                              HPL_pdpanel_disp
+STDC_ARGS( (
+   HPL_T_panel * *
+) );
+int                              HPL_pdpanel_free
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+
+#endif
+/*
+ * End of hpl_panel.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pauxil.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pauxil.h
new file mode 100644
index 000000000..1fd0ee457
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pauxil.h
@@ -0,0 +1,505 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PAUXIL_H
+#define HPL_PAUXIL_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+/*
+ * Mindxg2p  returns the process coodinate owning the entry globally in-
+ * dexed by ig_.
+ */
+#define    Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ )            \
+           {                                                           \
+              if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) &&            \
+                  ( (nprocs_) > 1 ) )                                  \
+              {                                                        \
+                 proc_  = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_);       \
+                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 proc_ = (src_);                                       \
+              }                                                        \
+           }
+
+#define    Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ )       \
+           {                                                           \
+              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
+                  ( (nprocs_) == 1 ) ) { il_ = (ig_); }                \
+              else                                                     \
+              {                                                        \
+                 int i__, j__;                                         \
+                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
+                 il_ = (nb_)*( j__ - i__ ) +                           \
+                       ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?        \
+                         (ig_) - (inb_) : (ig_) );                     \
+              }                                                        \
+           }
+
+#define    Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ )      \
+           {                                                           \
+              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
+                  ( (nprocs_) == 1 ) )                                 \
+              { il_ = (ig_); proc_ = (src_); }                         \
+              else                                                     \
+              {                                                        \
+                 int i__, j__;                                         \
+                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
+                 il_ = (nb_)*(j__-i__) +                               \
+                       ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?       \
+                         (ig_) - (inb_) : (ig_) );                     \
+                 proc_  = (src_) + 1 + i__;                            \
+                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
+              }                                                        \
+           }
+/*
+ * Mindxl2g computes the global index ig_ corresponding to the local
+ * index il_ in process proc_.
+ */
+#define    Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ )       \
+           {                                                           \
+              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
+              {                                                        \
+                 if( (proc_) == (src_) )                               \
+                 {                                                     \
+                    if( (il_) < (inb_) ) ig_ = (il_);                  \
+                    else                 ig_ = (il_) +                 \
+                       (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \
+                 }                                                     \
+                 else if( (proc_) < (src_) )                           \
+                 {                                                     \
+                    ig_ = (il_) + (inb_) +                             \
+                          (nb_)*(  ((nprocs_)-1)*((il_)/(nb_)) +       \
+                                   (proc_)-(src_)-1+(nprocs_) );       \
+                 }                                                     \
+                 else                                                  \
+                 {                                                     \
+                    ig_ =  (il_) + (inb_) +                            \
+                           (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) +       \
+                           (proc_)-(src_)-1 );                         \
+                 }                                                     \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 ig_ = (il_);                                          \
+              }                                                        \
+           }
+/*
+ * MnumrocI computes the # of local indexes  np_ residing in the process
+ * of coordinate  proc_  corresponding to the interval of global indexes
+ * i_:i_+n_-1  assuming  that the global index 0 resides in  the process
+ * src_,  and that the indexes are distributed from src_ using the para-
+ * meters inb_, nb_ and nprocs_.
+ */
+#define    MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ )    \
+           {                                                           \
+              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
+              {                                                        \
+                 int inb__, mydist__, n__, nblk__, quot__, src__;      \
+                 if( ( inb__ = (inb_) - (i_) ) <= 0 )                  \
+                 {                                                     \
+                    nblk__ = (-inb__) / (nb_) + 1;                     \
+                    src__  = (src_) + nblk__;                          \
+                    src__ -= ( src__ / (nprocs_) ) * (nprocs_);        \
+                    inb__ += nblk__*(nb_);                             \
+                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
+                    {                                                  \
+                       if( (proc_) == src__ ) np_ = (n_);              \
+                       else                   np_ = 0;                 \
+                    }                                                  \
+                    else                                               \
+                    {                                                  \
+                       if( ( mydist__ = (proc_) - src__ ) < 0 )        \
+                          mydist__ += (nprocs_);                       \
+                       nblk__    = n__ / (nb_) + 1;                    \
+                       mydist__ -= nblk__ -                            \
+                          (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \
+                       if( mydist__ < 0 )                              \
+                       {                                               \
+                          if( (proc_) != src__ )                       \
+                             np_ = (nb_) + (nb_) * quot__;             \
+                          else                                         \
+                             np_ = inb__ + (nb_) * quot__;             \
+                       }                                               \
+                       else if( mydist__ > 0 )                         \
+                       {                                               \
+                          np_ = (nb_) * quot__;                        \
+                       }                                               \
+                       else                                            \
+                       {                                               \
+                          if( (proc_) != src__ )                       \
+                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
+                          else                                         \
+                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
+                       }                                               \
+                    }                                                  \
+                 }                                                     \
+                 else                                                  \
+                 {                                                     \
+                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
+                    {                                                  \
+                       if( (proc_) == (src_) ) np_ = (n_);             \
+                       else                    np_ = 0;                \
+                    }                                                  \
+                    else                                               \
+                    {                                                  \
+                       if( ( mydist__ = (proc_) - (src_) ) < 0 )       \
+                          mydist__ += (nprocs_);                       \
+                       nblk__    = n__ / (nb_) + 1;                    \
+                       mydist__ -= nblk__ -                            \
+                          ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \
+                       if( mydist__ < 0 )                              \
+                       {                                               \
+                          if( (proc_) != (src_) )                      \
+                             np_ = (nb_) + (nb_) * quot__;             \
+                          else                                         \
+                             np_ = inb__ + (nb_) * quot__;             \
+                       }                                               \
+                       else if( mydist__ > 0 )                         \
+                       {                                               \
+                          np_ = (nb_) * quot__;                        \
+                       }                                               \
+                       else                                            \
+                       {                                               \
+                          if( (proc_) != (src_) )                      \
+                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
+                          else                                         \
+                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
+                       }                                               \
+                    }                                                  \
+                 }                                                     \
+              }                                                        \
+              else                                                     \
+              {                                                        \
+                 np_ = (n_);                                           \
+              }                                                        \
+           }
+
+#define    Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ )         \
+           MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ )
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_indxg2lp
+STDC_ARGS( (
+   int *,
+   int *,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxg2l
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxg2p
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_indxl2g
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+void                             HPL_infog2l
+STDC_ARGS( (
+   int,
+   int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+int                              HPL_numroc
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+int                              HPL_numrocI
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int,
+   const int
+) );
+
+void                             HPL_dlaswp00N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp10N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp01N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp01T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp02N
+STDC_ARGS( (
+   const int,
+   const int,
+   const double *,
+   const int,
+   double *,
+   double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp03N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int
+) );
+void                             HPL_dlaswp03T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int
+) );
+void                             HPL_dlaswp04N
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp04T
+STDC_ARGS( (
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp05N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp05T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   const double *,
+   const int,
+   const int *,
+   const int *
+) );
+void                             HPL_dlaswp06N
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+void                             HPL_dlaswp06T
+STDC_ARGS( (
+   const int,
+   const int,
+   double *,
+   const int,
+   double *,
+   const int,
+   const int *
+) );
+
+void                             HPL_pabort
+STDC_ARGS( (
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_pwarn
+STDC_ARGS( (
+   FILE *,
+   int,
+   const char *,
+   const char *,
+   ...
+) );
+void                             HPL_pdlaprnt
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int,
+   const char *
+) );
+double                           HPL_pdlamch
+STDC_ARGS( (
+   MPI_Comm,
+   const HPL_T_MACH
+) );
+double                           HPL_pdlange
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const HPL_T_NORM,
+   const int,
+   const int,
+   const int,
+   const double *,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_pauxil.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pfact.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pfact.h
new file mode 100644
index 000000000..09eee79ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pfact.h
@@ -0,0 +1,216 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PFACT_H
+#define HPL_PFACT_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef void (*HPL_T_PFA_FUN)
+(  HPL_T_panel *,   const int,       const int,       const int,
+   double * );
+typedef void (*HPL_T_RFA_FUN)
+(  HPL_T_panel *,   const int,       const int,       const int,
+   double * );
+typedef void (*HPL_T_UPD_FUN)
+(  HPL_T_panel *,   int *,           HPL_T_panel *,   const int ); 
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_dlocmax
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_dlocswpN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_dlocswpT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdmxswp
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdpancrN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpancrT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanllN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanllT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanrlN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdpanrlT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdrpancrN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpancrT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanllN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanllT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanrlN
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+void                             HPL_pdrpanrlT
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int,
+   const int,
+   double *
+) );
+
+void                             HPL_pdfact
+STDC_ARGS( (
+   HPL_T_panel *
+) );
+ 
+#endif
+/*
+ * End of hpl_pfact.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pgesv.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pgesv.h
new file mode 100644
index 000000000..3ca576c68
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pgesv.h
@@ -0,0 +1,346 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PGESV_H
+#define HPL_PGESV_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_grid.h"
+#include "hpl_comm.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pfact.h"
+/*
+ * ---------------------------------------------------------------------
+ * #typedefs and data structures
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{
+   HPL_SWAP00        = 451,                      /* Use HPL_pdlaswp00 */
+   HPL_SWAP01        = 452,                      /* Use HPL_pdlaswp01 */
+   HPL_SW_MIX        = 453, /* Use HPL_pdlaswp00_ for small number of */
+                            /* columns, and HPL_pdlaswp01_ otherwise. */
+   HPL_NO_SWP        = 499
+} HPL_T_SWAP;
+
+typedef struct HPL_S_palg
+{
+   HPL_T_TOP           btopo;               /* row broadcast topology */
+   int                 depth;                     /* look-ahead depth */
+   int                 nbdiv;            /* recursive division factor */
+   int                 nbmin;         /* recursion stopping criterium */
+   HPL_T_FACT          pfact;                   /* panel fact variant */
+   HPL_T_FACT          rfact;               /* recursive fact variant */
+   HPL_T_PFA_FUN       pffun;              /* panel fact function ptr */
+   HPL_T_RFA_FUN       rffun;          /* recursive fact function ptr */
+   HPL_T_UPD_FUN       upfun;                      /* update function */
+   HPL_T_SWAP          fswap;                   /* Swapping algorithm */
+   int                 fsthr;                   /* Swapping threshold */
+   int                 equil;                        /* Equilibration */
+   int                 align;              /* data alignment constant */
+} HPL_T_palg;
+
+typedef struct HPL_S_pmat
+{
+#ifdef HPL_CALL_VSIPL
+   vsip_block_d        * block;
+#endif
+   double              * A;            /* pointer to local piece of A */
+   double              * X;             /* pointer to solution vector */
+   int                 n;                      /* global problem size */
+   int                 nb;                         /* blocking factor */
+   int                 ld;                 /* local leading dimension */
+   int                 mp;                    /* local number of rows */
+   int                 nq;                 /* local number of columns */
+   int                 info;                    /* computational flag */
+} HPL_T_pmat;
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    MSGID_BEGIN_PFACT   1001              /* message id ranges */
+#define    MSGID_END_PFACT     2000
+#define    MSGID_BEGIN_FACT    2001
+#define    MSGID_END_FACT      3000
+#define    MSGID_BEGIN_PTRSV   3001
+#define    MSGID_END_PTRSV     4000
+ 
+#define    MSGID_BEGIN_COLL    9001
+#define    MSGID_END_COLL     10000
+/*
+ * ---------------------------------------------------------------------
+ * #define macros definitions
+ * ---------------------------------------------------------------------
+ */
+#define    MNxtMgid( id_, beg_, end_ ) \
+                             (( (id_)+1 > (end_) ?  (beg_) : (id_)+1 ))
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pipid
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   int *
+) );
+void                             HPL_plindx0
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_pdlaswp00N
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdlaswp00T
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_perm
+STDC_ARGS( (
+   const int,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_logsort
+STDC_ARGS( (
+   const int,
+   const int,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_plindx10
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_plindx1
+STDC_ARGS( (
+   HPL_T_panel *,
+   const int,
+   const int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_spreadN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_SIDE,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_spreadT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_SIDE,
+   const int,
+   double *,
+   const int,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_equil
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const enum HPL_TRANS,
+   const int,
+   double *,
+   const int,
+   int *,
+   const int *,
+   const int *,
+   int *
+) );
+void                             HPL_rollN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_rollT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int,
+   double *,
+   const int,
+   const int *,
+   const int *,
+   const int *
+) );
+void                             HPL_pdlaswp01N
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdlaswp01T
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_pdupdateNN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateNT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateTN
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+void                             HPL_pdupdateTT
+STDC_ARGS( (
+   HPL_T_panel *,
+   int *,
+   HPL_T_panel *,
+   const int
+) );
+
+void                             HPL_pdgesv0
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesvK1
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesvK2
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+void                             HPL_pdgesv
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_palg *,
+   HPL_T_pmat *
+) );
+ 
+void                             HPL_pdtrsv
+STDC_ARGS( (
+   HPL_T_grid *,
+   HPL_T_pmat *
+) );
+
+#endif
+/*
+ * End of hpl_pgesv.h
+ */ 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pmatgen.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pmatgen.h
new file mode 100644
index 000000000..1091b0f60
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pmatgen.h
@@ -0,0 +1,77 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PMATGEN_H
+#define HPL_PMATGEN_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_matgen.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pdmatgen
+STDC_ARGS( (
+   const HPL_T_grid *,
+   const int,
+   const int,
+   const int,
+   double *,
+   const int,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_pmatgen.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pmisc.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pmisc.h
new file mode 100644
index 000000000..23550d47b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pmisc.h
@@ -0,0 +1,59 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PMISC_H
+#define HPL_PMISC_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "mpi.h"
+
+#endif
+/*
+ * End of hpl_pmisc.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_ptest.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_ptest.h
new file mode 100644
index 000000000..5777bd536
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_ptest.h
@@ -0,0 +1,151 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PTEST_H
+#define HPL_PTEST_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+#include "hpl_panel.h"
+#include "hpl_pgesv.h"
+
+#include "hpl_ptimer.h"
+#include "hpl_pmatgen.h"
+/*
+ * ---------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------
+ */
+typedef struct HPL_S_test
+{
+   double              epsil;                      /* epsilon machine */
+   double              thrsh;                            /* threshold */
+   FILE *              outfp;       /* output stream (only in proc 0) */
+   int                 kfail;                    /* # of tests failed */
+   int                 kpass;                    /* # of tests passed */
+   int                 kskip;                   /* # of tests skipped */
+   int                 ktest;                /* total number of tests */
+} HPL_T_test;
+
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants for testing only
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_LINE_MAX         256
+#define    HPL_MAX_PARAM         20
+#define    HPL_ISEED            100
+/*
+ * ---------------------------------------------------------------------
+ * global timers for timing analysis only
+ * ---------------------------------------------------------------------
+ */
+#ifdef HPL_DETAILED_TIMING
+#define    HPL_TIMING_BEG        11 /* timer 0 reserved, used by main */
+#define    HPL_TIMING_N           6 /* number of timers defined below */
+#define    HPL_TIMING_RPFACT     11 /* starting from here, contiguous */
+#define    HPL_TIMING_PFACT      12
+#define    HPL_TIMING_MXSWP      13
+#define    HPL_TIMING_UPDATE     14
+#define    HPL_TIMING_LASWP      15
+#define    HPL_TIMING_PTRSV      16
+#endif
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void                             HPL_pdinfo
+STDC_ARGS( (
+   HPL_T_test *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_ORDER *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_FACT *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *,
+   HPL_T_FACT *,
+   int *,
+   HPL_T_TOP *,
+   int *,
+   int *,
+   HPL_T_SWAP *,
+   int *,
+   int *,
+   int *,
+   int *,
+   int *
+) );
+void                             HPL_pdtest
+STDC_ARGS( (
+   HPL_T_test *,
+   HPL_T_grid *,
+   HPL_T_palg *,
+   const int,
+   const int
+) );
+
+#endif
+/*
+ * End of hpl_ptest.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_ptimer.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_ptimer.h
new file mode 100644
index 000000000..43c8fe33a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_ptimer.h
@@ -0,0 +1,96 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_PTIMER_H
+#define HPL_PTIMER_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_NPTIMER             64
+#define    HPL_PTIMER_STARTFLAG   5.0
+#define    HPL_PTIMER_ERROR      -1.0
+/*
+ * ---------------------------------------------------------------------
+ * type definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{  HPL_WALL_PTIME = 101, HPL_CPU_PTIME  = 102 } HPL_T_PTIME;
+
+typedef enum
+{ HPL_AMAX_PTIME  = 201, HPL_AMIN_PTIME = 202, HPL_SUM_PTIME  = 203 }
+HPL_T_PTIME_OP;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+double          HPL_ptimer_cputime   STDC_ARGS(     ( void      ) );
+double          HPL_ptimer_walltime  STDC_ARGS(     ( void      ) );
+
+void            HPL_ptimer           STDC_ARGS(     ( const int ) );
+void            HPL_ptimer_boot      STDC_ARGS(     ( void      ) );
+void            HPL_ptimer_combine
+STDC_ARGS(
+(  MPI_Comm comm,   const HPL_T_PTIME_OP,             const HPL_T_PTIME,
+   const int,       const int,       double * ) );
+void            HPL_ptimer_disable   STDC_ARGS(     ( void      ) );
+void            HPL_ptimer_enable    STDC_ARGS(     ( void      ) );
+double          HPL_ptimer_inquire
+STDC_ARGS(
+(  const HPL_T_PTIME,                const int ) );
+
+#endif
+/*
+ * End of hpl_ptimer.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_test.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_test.h
new file mode 100644
index 000000000..1eedc97e0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_test.h
@@ -0,0 +1,80 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_TEST_H
+#define HPL_TEST_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+#include "hpl_blas.h"
+#include "hpl_auxil.h"
+#include "hpl_gesv.h"
+
+#include "hpl_matgen.h"
+#include "hpl_timer.h"
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void            HPL_dinfo
+STDC_ARGS(
+(  FILE * *,        int *,           int *,           int *,
+   HPL_T_FACT *,    int *,           int *,           int *, 
+   int *,           int *,           HPL_T_FACT *,    int *,
+   double *,        double * ) );
+void            HPL_dtest
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   HPL_T_FACT,      HPL_T_FACT,      const int,       const double,
+   const double,    int *,           int *,           int * ) );
+
+#endif
+/*
+ * End of hpl_test.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_timer.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_timer.h
new file mode 100644
index 000000000..4c91700ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_timer.h
@@ -0,0 +1,88 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_TIMER_H
+#define HPL_TIMER_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_misc.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_NTIMER              64
+#define    HPL_TIMER_STARTFLAG    5.0
+#define    HPL_TIMER_ERROR       -1.0
+/*
+ * ---------------------------------------------------------------------
+ * type definitions
+ * ---------------------------------------------------------------------
+ */
+typedef enum
+{  HPL_WALL_TIME = 101, HPL_CPU_TIME  = 102 } HPL_T_TIME;
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+double          HPL_timer_cputime    STDC_ARGS(     ( void      ) );
+double          HPL_timer_walltime   STDC_ARGS(     ( void      ) );
+
+void            HPL_timer            STDC_ARGS(     ( const int ) );
+void            HPL_timer_boot       STDC_ARGS(     ( void      ) );
+void            HPL_timer_enable     STDC_ARGS(     ( void      ) );
+void            HPL_timer_disable    STDC_ARGS(     ( void      ) );
+double          HPL_timer_inquire
+STDC_ARGS(
+(  const HPL_T_TIME,                 const int ) );
+
+#endif
+/*
+ * End of hpl_timer.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_units.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_units.h
new file mode 100644
index 000000000..a96956497
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_units.h
@@ -0,0 +1,135 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+#ifndef HPL_UNITS_H
+#define HPL_UNITS_H
+/*
+ * ---------------------------------------------------------------------
+ * Include files
+ * ---------------------------------------------------------------------
+ */
+#include "hpl_pmisc.h"
+#include "hpl_pauxil.h"
+/*
+ * ---------------------------------------------------------------------
+ * #define macro constants
+ * ---------------------------------------------------------------------
+ */
+#define    HPL_MAXROUT       50
+#define    HPL_MAXRNAME      15
+
+#define    HPL_TRUE         'T'
+#define    HPL_FALSE        'F'
+
+#define    HPL_INDXG2P_ROUT   "HPL_indxg2p"
+#define    HPL_INDXG2L_ROUT   "HPL_indxg2l"
+#define    HPL_INDXL2G_ROUT   "HPL_indxl2g"
+#define    HPL_NUMROC_ROUT    "HPL_numroc"
+#define    HPL_NUMROCI_ROUT   "HPL_numrocI"
+/*
+ * ---------------------------------------------------------------------
+ * Function prototypes
+ * ---------------------------------------------------------------------
+ */
+void            HPL_unit_info
+STDC_ARGS(
+(  FILE * *,        int *,           int *,           int *,
+   int *,           int *,           int *,           int *,
+   int *,           int *,           int *,           char [][HPL_MAXRNAME],
+   int [] ) );
+ 
+void            HPL_unit_indxg2l
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+int             HPL_chek_indxg2l
+STDC_ARGS(
+(  FILE *,          const char *,    const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+ 
+void            HPL_unit_indxl2g
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+int             HPL_chek_indxl2g
+STDC_ARGS(
+(  FILE *,          const char *,    const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+ 
+void            HPL_unit_indxg2p
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+int             HPL_chek_indxg2p
+STDC_ARGS(
+(  FILE *,          const char *,    const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+ 
+void            HPL_unit_numroc
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       long *,          long * ) );
+void            HPL_unit_numrocI
+STDC_ARGS(
+(  FILE *,          const int,       const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       const int,       long *,          long * ) );
+int             HPL_chek_numrocI
+STDC_ARGS(
+(  FILE *,          const char *,    const int,       const int,
+   const int,       const int,       const int,       const int,
+   const int,       const int,       long *,          long * ) );
+
+#endif
+/*
+ * End of hpl_units.h
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hplconfig.h.in b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hplconfig.h.in
new file mode 100644
index 000000000..b4b3b9a35
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hplconfig.h.in
@@ -0,0 +1,67 @@
+/* include/hplconfig.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define if you have a BLAS library. */
+#undef HAVE_BLAS
+
+/* Define to 1 if you have the `dgemm_' function. */
+#undef HAVE_DGEMM_
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define if you have the MPI library. */
+#undef HAVE_MPI
+
+/* Define to 1 if you have the <mpi.h> header file. */
+#undef HAVE_MPI_H
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Version number of package */
+#undef VERSION
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/install-sh b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/install-sh
new file mode 100755
index 000000000..8175c640f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/install-sh
@@ -0,0 +1,518 @@
+#!/bin/sh
+# install - install a program, script, or datafile
+
+scriptversion=2018-03-11.20; # UTC
+
+# This originates from X11R5 (mit/util/scripts/install.sh), which was
+# later released in X11R6 (xc/config/util/install.sh) with the
+# following copyright and license.
+#
+# Copyright (C) 1994 X Consortium
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
+# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# Except as contained in this notice, the name of the X Consortium shall not
+# be used in advertising or otherwise to promote the sale, use or other deal-
+# ings in this Software without prior written authorization from the X Consor-
+# tium.
+#
+#
+# FSF changes to this file are in the public domain.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# 'make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.
+
+tab='	'
+nl='
+'
+IFS=" $tab$nl"
+
+# Set DOITPROG to "echo" to test this script.
+
+doit=${DOITPROG-}
+doit_exec=${doit:-exec}
+
+# Put in absolute file names if you don't have them in your path;
+# or use environment vars.
+
+chgrpprog=${CHGRPPROG-chgrp}
+chmodprog=${CHMODPROG-chmod}
+chownprog=${CHOWNPROG-chown}
+cmpprog=${CMPPROG-cmp}
+cpprog=${CPPROG-cp}
+mkdirprog=${MKDIRPROG-mkdir}
+mvprog=${MVPROG-mv}
+rmprog=${RMPROG-rm}
+stripprog=${STRIPPROG-strip}
+
+posix_mkdir=
+
+# Desired mode of installed file.
+mode=0755
+
+chgrpcmd=
+chmodcmd=$chmodprog
+chowncmd=
+mvcmd=$mvprog
+rmcmd="$rmprog -f"
+stripcmd=
+
+src=
+dst=
+dir_arg=
+dst_arg=
+
+copy_on_change=false
+is_target_a_directory=possibly
+
+usage="\
+Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
+   or: $0 [OPTION]... SRCFILES... DIRECTORY
+   or: $0 [OPTION]... -t DIRECTORY SRCFILES...
+   or: $0 [OPTION]... -d DIRECTORIES...
+
+In the 1st form, copy SRCFILE to DSTFILE.
+In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
+In the 4th, create DIRECTORIES.
+
+Options:
+     --help     display this help and exit.
+     --version  display version info and exit.
+
+  -c            (ignored)
+  -C            install only if different (preserve the last data modification time)
+  -d            create directories instead of installing files.
+  -g GROUP      $chgrpprog installed files to GROUP.
+  -m MODE       $chmodprog installed files to MODE.
+  -o USER       $chownprog installed files to USER.
+  -s            $stripprog installed files.
+  -t DIRECTORY  install into DIRECTORY.
+  -T            report an error if DSTFILE is a directory.
+
+Environment variables override the default commands:
+  CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
+  RMPROG STRIPPROG
+"
+
+while test $# -ne 0; do
+  case $1 in
+    -c) ;;
+
+    -C) copy_on_change=true;;
+
+    -d) dir_arg=true;;
+
+    -g) chgrpcmd="$chgrpprog $2"
+        shift;;
+
+    --help) echo "$usage"; exit $?;;
+
+    -m) mode=$2
+        case $mode in
+          *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*)
+            echo "$0: invalid mode: $mode" >&2
+            exit 1;;
+        esac
+        shift;;
+
+    -o) chowncmd="$chownprog $2"
+        shift;;
+
+    -s) stripcmd=$stripprog;;
+
+    -t)
+        is_target_a_directory=always
+        dst_arg=$2
+        # Protect names problematic for 'test' and other utilities.
+        case $dst_arg in
+          -* | [=\(\)!]) dst_arg=./$dst_arg;;
+        esac
+        shift;;
+
+    -T) is_target_a_directory=never;;
+
+    --version) echo "$0 $scriptversion"; exit $?;;
+
+    --) shift
+        break;;
+
+    -*) echo "$0: invalid option: $1" >&2
+        exit 1;;
+
+    *)  break;;
+  esac
+  shift
+done
+
+# We allow the use of options -d and -T together, by making -d
+# take the precedence; this is for compatibility with GNU install.
+
+if test -n "$dir_arg"; then
+  if test -n "$dst_arg"; then
+    echo "$0: target directory not allowed when installing a directory." >&2
+    exit 1
+  fi
+fi
+
+if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
+  # When -d is used, all remaining arguments are directories to create.
+  # When -t is used, the destination is already specified.
+  # Otherwise, the last argument is the destination.  Remove it from $@.
+  for arg
+  do
+    if test -n "$dst_arg"; then
+      # $@ is not empty: it contains at least $arg.
+      set fnord "$@" "$dst_arg"
+      shift # fnord
+    fi
+    shift # arg
+    dst_arg=$arg
+    # Protect names problematic for 'test' and other utilities.
+    case $dst_arg in
+      -* | [=\(\)!]) dst_arg=./$dst_arg;;
+    esac
+  done
+fi
+
+if test $# -eq 0; then
+  if test -z "$dir_arg"; then
+    echo "$0: no input file specified." >&2
+    exit 1
+  fi
+  # It's OK to call 'install-sh -d' without argument.
+  # This can happen when creating conditional directories.
+  exit 0
+fi
+
+if test -z "$dir_arg"; then
+  if test $# -gt 1 || test "$is_target_a_directory" = always; then
+    if test ! -d "$dst_arg"; then
+      echo "$0: $dst_arg: Is not a directory." >&2
+      exit 1
+    fi
+  fi
+fi
+
+if test -z "$dir_arg"; then
+  do_exit='(exit $ret); exit $ret'
+  trap "ret=129; $do_exit" 1
+  trap "ret=130; $do_exit" 2
+  trap "ret=141; $do_exit" 13
+  trap "ret=143; $do_exit" 15
+
+  # Set umask so as not to create temps with too-generous modes.
+  # However, 'strip' requires both read and write access to temps.
+  case $mode in
+    # Optimize common cases.
+    *644) cp_umask=133;;
+    *755) cp_umask=22;;
+
+    *[0-7])
+      if test -z "$stripcmd"; then
+        u_plus_rw=
+      else
+        u_plus_rw='% 200'
+      fi
+      cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
+    *)
+      if test -z "$stripcmd"; then
+        u_plus_rw=
+      else
+        u_plus_rw=,u+rw
+      fi
+      cp_umask=$mode$u_plus_rw;;
+  esac
+fi
+
+for src
+do
+  # Protect names problematic for 'test' and other utilities.
+  case $src in
+    -* | [=\(\)!]) src=./$src;;
+  esac
+
+  if test -n "$dir_arg"; then
+    dst=$src
+    dstdir=$dst
+    test -d "$dstdir"
+    dstdir_status=$?
+  else
+
+    # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
+    # might cause directories to be created, which would be especially bad
+    # if $src (and thus $dsttmp) contains '*'.
+    if test ! -f "$src" && test ! -d "$src"; then
+      echo "$0: $src does not exist." >&2
+      exit 1
+    fi
+
+    if test -z "$dst_arg"; then
+      echo "$0: no destination specified." >&2
+      exit 1
+    fi
+    dst=$dst_arg
+
+    # If destination is a directory, append the input filename.
+    if test -d "$dst"; then
+      if test "$is_target_a_directory" = never; then
+        echo "$0: $dst_arg: Is a directory" >&2
+        exit 1
+      fi
+      dstdir=$dst
+      dstbase=`basename "$src"`
+      case $dst in
+	*/) dst=$dst$dstbase;;
+	*)  dst=$dst/$dstbase;;
+      esac
+      dstdir_status=0
+    else
+      dstdir=`dirname "$dst"`
+      test -d "$dstdir"
+      dstdir_status=$?
+    fi
+  fi
+
+  case $dstdir in
+    */) dstdirslash=$dstdir;;
+    *)  dstdirslash=$dstdir/;;
+  esac
+
+  obsolete_mkdir_used=false
+
+  if test $dstdir_status != 0; then
+    case $posix_mkdir in
+      '')
+        # Create intermediate dirs using mode 755 as modified by the umask.
+        # This is like FreeBSD 'install' as of 1997-10-28.
+        umask=`umask`
+        case $stripcmd.$umask in
+          # Optimize common cases.
+          *[2367][2367]) mkdir_umask=$umask;;
+          .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
+
+          *[0-7])
+            mkdir_umask=`expr $umask + 22 \
+              - $umask % 100 % 40 + $umask % 20 \
+              - $umask % 10 % 4 + $umask % 2
+            `;;
+          *) mkdir_umask=$umask,go-w;;
+        esac
+
+        # With -d, create the new directory with the user-specified mode.
+        # Otherwise, rely on $mkdir_umask.
+        if test -n "$dir_arg"; then
+          mkdir_mode=-m$mode
+        else
+          mkdir_mode=
+        fi
+
+        posix_mkdir=false
+        case $umask in
+          *[123567][0-7][0-7])
+            # POSIX mkdir -p sets u+wx bits regardless of umask, which
+            # is incompatible with FreeBSD 'install' when (umask & 300) != 0.
+            ;;
+          *)
+            # Note that $RANDOM variable is not portable (e.g. dash);  Use it
+            # here however when possible just to lower collision chance.
+            tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
+
+            trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0
+
+            # Because "mkdir -p" follows existing symlinks and we likely work
+            # directly in world-writeable /tmp, make sure that the '$tmpdir'
+            # directory is successfully created first before we actually test
+            # 'mkdir -p' feature.
+            if (umask $mkdir_umask &&
+                $mkdirprog $mkdir_mode "$tmpdir" &&
+                exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1
+            then
+              if test -z "$dir_arg" || {
+                   # Check for POSIX incompatibilities with -m.
+                   # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
+                   # other-writable bit of parent directory when it shouldn't.
+                   # FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
+                   test_tmpdir="$tmpdir/a"
+                   ls_ld_tmpdir=`ls -ld "$test_tmpdir"`
+                   case $ls_ld_tmpdir in
+                     d????-?r-*) different_mode=700;;
+                     d????-?--*) different_mode=755;;
+                     *) false;;
+                   esac &&
+                   $mkdirprog -m$different_mode -p -- "$test_tmpdir" && {
+                     ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"`
+                     test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
+                   }
+                 }
+              then posix_mkdir=:
+              fi
+              rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir"
+            else
+              # Remove any dirs left behind by ancient mkdir implementations.
+              rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null
+            fi
+            trap '' 0;;
+        esac;;
+    esac
+
+    if
+      $posix_mkdir && (
+        umask $mkdir_umask &&
+        $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
+      )
+    then :
+    else
+
+      # The umask is ridiculous, or mkdir does not conform to POSIX,
+      # or it failed possibly due to a race condition.  Create the
+      # directory the slow way, step by step, checking for races as we go.
+
+      case $dstdir in
+        /*) prefix='/';;
+        [-=\(\)!]*) prefix='./';;
+        *)  prefix='';;
+      esac
+
+      oIFS=$IFS
+      IFS=/
+      set -f
+      set fnord $dstdir
+      shift
+      set +f
+      IFS=$oIFS
+
+      prefixes=
+
+      for d
+      do
+        test X"$d" = X && continue
+
+        prefix=$prefix$d
+        if test -d "$prefix"; then
+          prefixes=
+        else
+          if $posix_mkdir; then
+            (umask=$mkdir_umask &&
+             $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
+            # Don't fail if two instances are running concurrently.
+            test -d "$prefix" || exit 1
+          else
+            case $prefix in
+              *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
+              *) qprefix=$prefix;;
+            esac
+            prefixes="$prefixes '$qprefix'"
+          fi
+        fi
+        prefix=$prefix/
+      done
+
+      if test -n "$prefixes"; then
+        # Don't fail if two instances are running concurrently.
+        (umask $mkdir_umask &&
+         eval "\$doit_exec \$mkdirprog $prefixes") ||
+          test -d "$dstdir" || exit 1
+        obsolete_mkdir_used=true
+      fi
+    fi
+  fi
+
+  if test -n "$dir_arg"; then
+    { test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
+    { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
+    { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
+      test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
+  else
+
+    # Make a couple of temp file names in the proper directory.
+    dsttmp=${dstdirslash}_inst.$$_
+    rmtmp=${dstdirslash}_rm.$$_
+
+    # Trap to clean up those temp files at exit.
+    trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
+
+    # Copy the file name to the temp name.
+    (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
+
+    # and set any options; do chmod last to preserve setuid bits.
+    #
+    # If any of these fail, we abort the whole thing.  If we want to
+    # ignore errors from any of these, just make sure not to ignore
+    # errors from the above "$doit $cpprog $src $dsttmp" command.
+    #
+    { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
+    { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
+    { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
+    { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
+
+    # If -C, don't bother to copy if it wouldn't change the file.
+    if $copy_on_change &&
+       old=`LC_ALL=C ls -dlL "$dst"     2>/dev/null` &&
+       new=`LC_ALL=C ls -dlL "$dsttmp"  2>/dev/null` &&
+       set -f &&
+       set X $old && old=:$2:$4:$5:$6 &&
+       set X $new && new=:$2:$4:$5:$6 &&
+       set +f &&
+       test "$old" = "$new" &&
+       $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
+    then
+      rm -f "$dsttmp"
+    else
+      # Rename the file to the real destination.
+      $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
+
+      # The rename failed, perhaps because mv can't rename something else
+      # to itself, or perhaps because mv is so ancient that it does not
+      # support -f.
+      {
+        # Now remove or move aside any old file at destination location.
+        # We try this two ways since rm can't unlink itself on some
+        # systems and the destination file might be busy for other
+        # reasons.  In this case, the final cleanup might fail but the new
+        # file should still install successfully.
+        {
+          test ! -f "$dst" ||
+          $doit $rmcmd -f "$dst" 2>/dev/null ||
+          { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
+            { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
+          } ||
+          { echo "$0: cannot unlink or rename $dst" >&2
+            (exit 1); exit 1
+          }
+        } &&
+
+        # Now rename the file to the real destination.
+        $doit $mvcmd "$dsttmp" "$dst"
+      }
+    fi || exit 1
+
+    trap '' 0
+  fi
+done
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.auxil b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.auxil
new file mode 100644
index 000000000..e92d18b80
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.auxil
@@ -0,0 +1,100 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h
+#
+## Object files ########################################################
+#
+HPL_au0obj       = \
+   HPL_dlacpy.o           HPL_dlatcpy.o          HPL_fprintf.o          \
+   HPL_warn.o             HPL_abort.o            HPL_dlaprnt.o          \
+   HPL_dlange.o
+HPL_au1obj       = \
+   HPL_dlamch.o
+HPL_auxobj       = \
+   $(HPL_au0obj) $(HPL_au1obj)
+#
+## Targets #############################################################
+#
+all     : lib
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_auxobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_auxobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dlacpy.o           : ../HPL_dlacpy.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlacpy.c
+HPL_dlatcpy.o          : ../HPL_dlatcpy.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlatcpy.c
+HPL_fprintf.o          : ../HPL_fprintf.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_fprintf.c
+HPL_warn.o             : ../HPL_warn.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_warn.c
+HPL_abort.o            : ../HPL_abort.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_abort.c
+HPL_dlaprnt.o          : ../HPL_dlaprnt.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaprnt.c
+HPL_dlange.o           : ../HPL_dlange.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlange.c
+HPL_dlamch.o           : ../HPL_dlamch.c           $(INCdep)
+	$(CC) -o $@ -c $(CCNOOPT)  ../HPL_dlamch.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.blas b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.blas
new file mode 100644
index 000000000..ed9f3d0e2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.blas
@@ -0,0 +1,98 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h
+#
+## Object files ########################################################
+#
+HPL_blaobj       = \
+   HPL_dcopy.o            HPL_daxpy.o            HPL_dscal.o            \
+   HPL_idamax.o           HPL_dgemv.o            HPL_dtrsv.o            \
+   HPL_dger.o             HPL_dgemm.o            HPL_dtrsm.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_blaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_blaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dcopy.o            : ../HPL_dcopy.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dcopy.c
+HPL_daxpy.o            : ../HPL_daxpy.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_daxpy.c
+HPL_dscal.o            : ../HPL_dscal.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dscal.c
+HPL_idamax.o           : ../HPL_idamax.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_idamax.c
+HPL_dgemv.o            : ../HPL_dgemv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgemv.c
+HPL_dtrsv.o            : ../HPL_dtrsv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtrsv.c
+HPL_dger.o             : ../HPL_dger.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dger.c
+HPL_dgemm.o            : ../HPL_dgemm.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgemm.c
+HPL_dtrsm.o            : ../HPL_dtrsm.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtrsm.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.comm b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.comm
new file mode 100644
index 000000000..529fe9aea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.comm
@@ -0,0 +1,111 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_pmisc.h   $(INCdir)/hpl_grid.h \
+   $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_comobj       = \
+   HPL_1ring.o            HPL_1rinM.o            HPL_2ring.o            \
+   HPL_2rinM.o            HPL_blong.o            HPL_blonM.o            \
+   HPL_packL.o            HPL_copyL.o            HPL_binit.o            \
+   HPL_bcast.o            HPL_bwait.o            HPL_send.o             \
+   HPL_recv.o             HPL_sdrv.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_comobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_comobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_1ring.o            : ../HPL_1ring.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_1ring.c
+HPL_1rinM.o            : ../HPL_1rinM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_1rinM.c
+HPL_2ring.o            : ../HPL_2ring.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_2ring.c
+HPL_2rinM.o            : ../HPL_2rinM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_2rinM.c
+HPL_blong.o            : ../HPL_blong.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_blong.c
+HPL_blonM.o            : ../HPL_blonM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_blonM.c
+HPL_packL.o            : ../HPL_packL.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_packL.c
+HPL_copyL.o            : ../HPL_copyL.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_copyL.c
+HPL_binit.o            : ../HPL_binit.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_binit.c
+HPL_bcast.o            : ../HPL_bcast.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_bcast.c
+HPL_bwait.o            : ../HPL_bwait.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_bwait.c
+HPL_send.o             : ../HPL_send.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_send.c
+HPL_recv.o             : ../HPL_recv.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_recv.c
+HPL_sdrv.o             : ../HPL_sdrv.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_sdrv.c
+#
+# ######################################################################
+# 
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.gesv b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.gesv
new file mode 100644
index 000000000..2a8722559
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.gesv
@@ -0,0 +1,83 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h  \
+   $(INCdir)/hpl_gesv.h
+#
+## Object files ########################################################
+#
+HPL_gesobj       = \
+   HPL_dgesv.o            HPL_ipid.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_gesobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_gesobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dgesv.o            : ../HPL_dgesv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgesv.c
+HPL_ipid.o             : ../HPL_ipid.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ipid.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.grid b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.grid
new file mode 100644
index 000000000..51549d817
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.grid
@@ -0,0 +1,103 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h
+#
+## Object files ########################################################
+#
+HPL_griobj       = \
+   HPL_grid_init.o        HPL_pnum.o             HPL_grid_info.o        \
+   HPL_grid_exit.o        HPL_broadcast.o        HPL_reduce.o           \
+   HPL_all_reduce.o       HPL_barrier.o          HPL_min.o              \
+   HPL_max.o              HPL_sum.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_griobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_griobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_grid_init.o        : ../HPL_grid_init.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_init.c
+HPL_pnum.o             : ../HPL_pnum.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pnum.c
+HPL_grid_info.o        : ../HPL_grid_info.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_info.c
+HPL_grid_exit.o        : ../HPL_grid_exit.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_exit.c
+HPL_broadcast.o        : ../HPL_broadcast.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_broadcast.c
+HPL_reduce.o           : ../HPL_reduce.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_reduce.c
+HPL_all_reduce.o       : ../HPL_all_reduce.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_all_reduce.c
+HPL_barrier.o          : ../HPL_barrier.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_barrier.c
+HPL_min.o              : ../HPL_min.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_min.c
+HPL_max.o              : ../HPL_max.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_max.c
+HPL_sum.o              : ../HPL_sum.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_sum.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.matgen b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.matgen
new file mode 100644
index 000000000..f027fbc06
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.matgen
@@ -0,0 +1,95 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h  \
+   $(INCdir)/hpl_matgen.h
+#
+## Object files ########################################################
+#
+HPL_matobj       = \
+   HPL_dmatgen.o          HPL_ladd.o             HPL_lmul.o             \
+   HPL_xjumpm.o           HPL_jumpit.o           HPL_rand.o             \
+   HPL_setran.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_matobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_matobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dmatgen.o          : ../HPL_dmatgen.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dmatgen.c
+HPL_ladd.o             : ../HPL_ladd.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ladd.c
+HPL_lmul.o             : ../HPL_lmul.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_lmul.c
+HPL_xjumpm.o           : ../HPL_xjumpm.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_xjumpm.c
+HPL_jumpit.o           : ../HPL_jumpit.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_jumpit.c
+HPL_rand.o             : ../HPL_rand.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rand.c
+HPL_setran.o           : ../HPL_setran.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_setran.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.panel b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.panel
new file mode 100644
index 000000000..804749cc2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.panel
@@ -0,0 +1,90 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h  $(INCdir)/hpl_comm.h  \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \
+   $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_panobj       = \
+   HPL_pdpanel_new.o      HPL_pdpanel_init.o     HPL_pdpanel_disp.o     \
+   HPL_pdpanel_free.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_panobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_panobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pdpanel_new.o      : ../HPL_pdpanel_new.c      $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_new.c
+HPL_pdpanel_init.o     : ../HPL_pdpanel_init.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_init.c
+HPL_pdpanel_disp.o     : ../HPL_pdpanel_disp.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_disp.c
+HPL_pdpanel_free.o     : ../HPL_pdpanel_free.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_free.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pauxil b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pauxil
new file mode 100644
index 000000000..ea93cd150
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pauxil
@@ -0,0 +1,137 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h   $(INCdir)/hpl_pauxil.h
+#
+## Object files ########################################################
+#
+HPL_pauobj       = \
+   HPL_indxg2l.o          HPL_indxg2lp.o         HPL_indxg2p.o          \
+   HPL_indxl2g.o          HPL_infog2l.o          HPL_numroc.o           \
+   HPL_numrocI.o          HPL_dlaswp00N.o        HPL_dlaswp10N.o        \
+   HPL_dlaswp01N.o        HPL_dlaswp01T.o        HPL_dlaswp02N.o        \
+   HPL_dlaswp03N.o        HPL_dlaswp03T.o        HPL_dlaswp04N.o        \
+   HPL_dlaswp04T.o        HPL_dlaswp05N.o        HPL_dlaswp05T.o        \
+   HPL_dlaswp06N.o        HPL_dlaswp06T.o        HPL_pwarn.o            \
+   HPL_pabort.o           HPL_pdlaprnt.o         HPL_pdlamch.o          \
+   HPL_pdlange.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pauobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pauobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_indxg2l.o          : ../HPL_indxg2l.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2l.c
+HPL_indxg2lp.o         : ../HPL_indxg2lp.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2lp.c
+HPL_indxg2p.o          : ../HPL_indxg2p.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2p.c
+HPL_indxl2g.o          : ../HPL_indxl2g.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxl2g.c
+HPL_infog2l.o          : ../HPL_infog2l.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_infog2l.c
+HPL_numroc.o           : ../HPL_numroc.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_numroc.c
+HPL_numrocI.o          : ../HPL_numrocI.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_numrocI.c
+HPL_dlaswp00N.o        : ../HPL_dlaswp00N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp00N.c
+HPL_dlaswp10N.o        : ../HPL_dlaswp10N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp10N.c
+HPL_dlaswp01N.o        : ../HPL_dlaswp01N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp01N.c
+HPL_dlaswp01T.o        : ../HPL_dlaswp01T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp01T.c
+HPL_dlaswp02N.o        : ../HPL_dlaswp02N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp02N.c
+HPL_dlaswp03N.o        : ../HPL_dlaswp03N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp03N.c
+HPL_dlaswp03T.o        : ../HPL_dlaswp03T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp03T.c
+HPL_dlaswp04N.o        : ../HPL_dlaswp04N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp04N.c
+HPL_dlaswp04T.o        : ../HPL_dlaswp04T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp04T.c
+HPL_dlaswp05N.o        : ../HPL_dlaswp05N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp05N.c
+HPL_dlaswp05T.o        : ../HPL_dlaswp05T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp05T.c
+HPL_dlaswp06N.o        : ../HPL_dlaswp06N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp06N.c
+HPL_dlaswp06T.o        : ../HPL_dlaswp06T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp06T.c
+HPL_pwarn.o            : ../HPL_pwarn.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pwarn.c
+HPL_pabort.o           : ../HPL_pabort.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pabort.c
+HPL_pdlaprnt.o         : ../HPL_pdlaprnt.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaprnt.c
+HPL_pdlamch.o          : ../HPL_pdlamch.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlamch.c
+HPL_pdlange.o          : ../HPL_pdlange.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlange.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pfact b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pfact
new file mode 100644
index 000000000..bf4634d31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pfact
@@ -0,0 +1,118 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pfact.h
+#
+## Object files ########################################################
+#
+HPL_pfaobj       = \
+   HPL_dlocmax.o          HPL_dlocswpN.o         HPL_dlocswpT.o         \
+   HPL_pdmxswp.o          HPL_pdpancrN.o         HPL_pdpancrT.o         \
+   HPL_pdpanllN.o         HPL_pdpanllT.o         HPL_pdpanrlN.o         \
+   HPL_pdpanrlT.o         HPL_pdrpanllN.o        HPL_pdrpanllT.o        \
+   HPL_pdrpancrN.o        HPL_pdrpancrT.o        HPL_pdrpanrlN.o        \
+   HPL_pdrpanrlT.o        HPL_pdfact.o
+#
+## Targets #############################################################
+#
+all              : lib 
+#
+lib              : lib.grd
+#
+lib.grd          : $(HPL_pfaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pfaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dlocmax.o          : ../HPL_dlocmax.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocmax.c
+HPL_dlocswpN.o         : ../HPL_dlocswpN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocswpN.c
+HPL_dlocswpT.o         : ../HPL_dlocswpT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocswpT.c
+HPL_pdmxswp.o          : ../HPL_pdmxswp.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdmxswp.c
+HPL_pdpancrN.o         : ../HPL_pdpancrN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpancrN.c
+HPL_pdpancrT.o         : ../HPL_pdpancrT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpancrT.c
+HPL_pdpanllN.o         : ../HPL_pdpanllN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanllN.c
+HPL_pdpanllT.o         : ../HPL_pdpanllT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanllT.c
+HPL_pdpanrlN.o         : ../HPL_pdpanrlN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanrlN.c
+HPL_pdpanrlT.o         : ../HPL_pdpanrlT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanrlT.c
+HPL_pdrpanllN.o        : ../HPL_pdrpanllN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanllN.c
+HPL_pdrpanllT.o        : ../HPL_pdrpanllT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanllT.c
+HPL_pdrpancrN.o        : ../HPL_pdrpancrN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpancrN.c
+HPL_pdrpancrT.o        : ../HPL_pdrpancrT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpancrT.c
+HPL_pdrpanrlN.o        : ../HPL_pdrpanrlN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanrlN.c
+HPL_pdrpanrlT.o        : ../HPL_pdrpanrlT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanrlT.c
+HPL_pdfact.o           : ../HPL_pdfact.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdfact.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pgesv b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pgesv
new file mode 100644
index 000000000..7898665f0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pgesv
@@ -0,0 +1,136 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h   $(INCdir)/hpl_comm.h  \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pfact.h \
+   $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_pgeobj       = \
+   HPL_pipid.o            HPL_plindx0.o          HPL_pdlaswp00N.o       \
+   HPL_pdlaswp00T.o       HPL_perm.o             HPL_logsort.o          \
+   HPL_plindx10.o         HPL_plindx1.o          HPL_spreadN.o          \
+   HPL_spreadT.o          HPL_rollN.o            HPL_rollT.o            \
+   HPL_equil.o            HPL_pdlaswp01N.o       HPL_pdlaswp01T.o       \
+   HPL_pdupdateNN.o       HPL_pdupdateNT.o       HPL_pdupdateTN.o       \
+   HPL_pdupdateTT.o       HPL_pdtrsv.o           HPL_pdgesv0.o          \
+   HPL_pdgesvK1.o         HPL_pdgesvK2.o         HPL_pdgesv.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pgeobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pgeobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pipid.o            : ../HPL_pipid.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pipid.c
+HPL_plindx0.o          : ../HPL_plindx0.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx0.c
+HPL_pdlaswp00N.o       : ../HPL_pdlaswp00N.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp00N.c
+HPL_pdlaswp00T.o       : ../HPL_pdlaswp00T.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp00T.c
+HPL_perm.o             : ../HPL_perm.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_perm.c
+HPL_logsort.o          : ../HPL_logsort.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_logsort.c
+HPL_plindx10.o         : ../HPL_plindx10.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx10.c
+HPL_plindx1.o          : ../HPL_plindx1.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx1.c
+HPL_spreadN.o          : ../HPL_spreadN.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_spreadN.c
+HPL_spreadT.o          : ../HPL_spreadT.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_spreadT.c
+HPL_rollN.o            : ../HPL_rollN.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rollN.c
+HPL_rollT.o            : ../HPL_rollT.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rollT.c
+HPL_equil.o            : ../HPL_equil.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_equil.c
+HPL_pdlaswp01N.o       : ../HPL_pdlaswp01N.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp01N.c
+HPL_pdlaswp01T.o       : ../HPL_pdlaswp01T.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp01T.c
+HPL_pdupdateNN.o       : ../HPL_pdupdateNN.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateNN.c
+HPL_pdupdateNT.o       : ../HPL_pdupdateNT.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateNT.c
+HPL_pdupdateTN.o       : ../HPL_pdupdateTN.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateTN.c
+HPL_pdupdateTT.o       : ../HPL_pdupdateTT.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateTT.c
+HPL_pdtrsv.o           : ../HPL_pdtrsv.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdtrsv.c
+HPL_pdgesv0.o          : ../HPL_pdgesv0.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesv0.c
+HPL_pdgesvK1.o         : ../HPL_pdgesvK1.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesvK1.c
+HPL_pdgesvK2.o         : ../HPL_pdgesvK2.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesvK2.c
+HPL_pdgesv.o           : ../HPL_pdgesv.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesv.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pmatgen b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pmatgen
new file mode 100644
index 000000000..bf33fcd7b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pmatgen
@@ -0,0 +1,81 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_matgen.h $(INCdir)/hpl_pmisc.h \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pmatgen.h
+#
+## Object files ########################################################
+#
+HPL_pmaobj       = \
+   HPL_pdmatgen.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pmaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pmaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pdmatgen.o         : ../HPL_pdmatgen.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdmatgen.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.ptest b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.ptest
new file mode 100644
index 000000000..cfc96e667
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.ptest
@@ -0,0 +1,94 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h   \
+   $(INCdir)/hpl_gesv.h   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h  \
+   $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pgesv.h $(INCdir)/hpl_pmatgen.h \
+   $(INCdir)/hpl_ptimer.h $(INCdir)/hpl_ptest.h
+#
+## Executable names ####################################################
+# 
+xhpl             = $(BINdir)/xhpl
+#
+## Object files ########################################################
+#
+HPL_pteobj       = \
+   HPL_pddriver.o         HPL_pdinfo.o           HPL_pdtest.o
+#
+## Targets #############################################################
+#
+all     : dexe
+#
+dexe    : dexe.grd
+#
+$(BINdir)/HPL.dat : ../HPL.dat
+	( $(CP) ../HPL.dat $(BINdir) )
+#
+dexe.grd: $(HPL_pteobj) $(HPLlib)
+	$(LINKER) $(LINKFLAGS) -o $(xhpl) $(HPL_pteobj) $(HPL_LIBS)
+	$(MAKE) $(BINdir)/HPL.dat
+	$(TOUCH) dexe.grd
+#
+# ######################################################################
+#
+HPL_pddriver.o         : ../HPL_pddriver.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pddriver.c
+HPL_pdinfo.o           : ../HPL_pdinfo.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdinfo.c
+HPL_pdtest.o           : ../HPL_pdtest.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdtest.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.ptimer b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.ptimer
new file mode 100644
index 000000000..971500764
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.ptimer
@@ -0,0 +1,84 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_ptimer.h
+#
+## Object files ########################################################
+#
+HPL_ptiobj       = \
+   HPL_ptimer.o           HPL_ptimer_cputime.o   HPL_ptimer_walltime.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_ptiobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_ptiobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_ptimer.o           : ../HPL_ptimer.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer.c
+HPL_ptimer_cputime.o   : ../HPL_ptimer_cputime.c   $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer_cputime.c
+HPL_ptimer_walltime.o  : ../HPL_ptimer_walltime.c  $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer_walltime.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.test b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.test
new file mode 100644
index 000000000..514d445b8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.test
@@ -0,0 +1,93 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_gesv.h  $(INCdir)/hpl_matgen.h $(INCdir)/hpl_timer.h \
+   $(INCdir)/hpl_test.h
+#
+## Executable names ####################################################
+# 
+xlinpack         = $(BINdir)/xlinpack
+#
+## Object files ########################################################
+#
+HPL_tesobj       = \
+   HPL_ddriver.o          HPL_dinfo.o            HPL_dtest.o
+#
+## Targets #############################################################
+#
+all     : dexe
+#
+dexe    : dexe.grd
+#
+$(BINdir)/LINPACK.dat : ../LINPACK.dat
+	( $(CP) ../LINPACK.dat $(BINdir) )
+#
+dexe.grd: $(HPL_tesobj) $(HPLlib)
+	$(LINKER) $(LINKFLAGS) -o $(xlinpack) $(HPL_tesobj) HPL_make_libs
+	$(MAKE) $(BINdir)/LINPACK.dat
+	$(TOUCH) dexe.grd
+#
+# ######################################################################
+#
+HPL_ddriver.o          : ../HPL_ddriver.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ddriver.c
+HPL_dinfo.o            : ../HPL_dinfo.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dinfo.c
+HPL_dtest.o            : ../HPL_dtest.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtest.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.timer b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.timer
new file mode 100644
index 000000000..b8009e88a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.timer
@@ -0,0 +1,84 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_timer.h
+#
+## Object files ########################################################
+#
+HPL_timobj       = \
+   HPL_timer.o            HPL_timer_cputime.o    HPL_timer_walltime.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_timobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_timobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_timer.o            : ../HPL_timer.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer.c
+HPL_timer_cputime.o    : ../HPL_timer_cputime.c    $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer_cputime.c
+HPL_timer_walltime.o   : ../HPL_timer_walltime.c   $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer_walltime.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.units b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.units
new file mode 100644
index 000000000..1c447f204
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.units
@@ -0,0 +1,112 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+@rout Make.units
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_auxil.h $(INCdir)/hpl_pmisc.h \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_units.h 
+#
+## Executable names ####################################################
+# 
+xunits           = $(BINdir)/xunits   
+#
+## Object files ########################################################
+#
+HPL_uniobj       = \
+   HPL_unit_driver.o      HPL_unit_info.o        HPL_unit_indxg2l.o     \
+   HPL_chek_indxg2l.o     HPL_unit_indxg2p.o     HPL_chek_indxg2p.o     \
+   HPL_unit_indxl2g.o     HPL_chek_indxl2g.o     HPL_unit_numroc.o      \
+   HPL_unit_numrocI.o     HPL_chek_numrocI.o
+#
+## Targets #############################################################
+#
+all              : dexe
+#
+dexe             : dexe.grd
+#
+$(BINdir)/UNITS.dat : ../UNITS.dat
+	( $(CP) ../UNITS.dat $(BINdir) )
+#
+dexe.grd         : $(HPL_uniobj) $(HPLlib)
+	$(LINKER) $(LINKFLAGS) -o $(xunits) $(HPL_uniobj) @(hpllibs)
+	$(MAKE) $(BINdir)/UNITS.dat
+	$(TOUCH) dexe.grd
+#
+# ######################################################################
+#
+HPL_unit_driver.o      : ../HPL_unit_driver.c      $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_driver.c
+HPL_unit_info.o        : ../HPL_unit_info.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_info.c
+HPL_unit_indxg2l.o     : ../HPL_unit_indxg2l.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_indxg2l.c
+HPL_chek_indxg2l.o     : ../HPL_chek_indxg2l.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_chek_indxg2l.c
+HPL_unit_indxg2p.o     : ../HPL_unit_indxg2p.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_indxg2p.c
+HPL_chek_indxg2p.o     : ../HPL_chek_indxg2p.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_chek_indxg2p.c
+HPL_unit_indxl2g.o     : ../HPL_unit_indxl2g.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_indxl2g.c
+HPL_chek_indxl2g.o     : ../HPL_chek_indxl2g.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_chek_indxl2g.c
+HPL_unit_numroc.o      : ../HPL_unit_numroc.c      $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_numroc.c
+HPL_unit_numrocI.o     : ../HPL_unit_numrocI.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_unit_numrocI.c
+HPL_chek_numrocI.o     : ../HPL_chek_numrocI.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_chek_numrocI.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_abort.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_abort.3
new file mode 100644
index 000000000..c6a2c7a70
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_abort.3
@@ -0,0 +1,52 @@
+.TH HPL_abort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_abort \- halts execution.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_abort(\fR
+\fB\&int\fR
+\fI\&LINE\fR,
+\fB\&const char *\fR
+\fI\&SRNAME\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_abort\fR
+displays an error message on stderr and halts execution.
+.SH ARGUMENTS
+.TP 8
+LINE    (local input)           int
+On entry,  LINE  specifies the line  number in the file where
+the  error  has  occured.  When  LINE  is not a positive line
+number, it is ignored.
+.TP 8
+SRNAME  (local input)           const char *
+On entry, SRNAME  should  be the name of the routine  calling
+this error handler.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   HPL_abort( __LINE__, __FILE__, "Halt.\en" );
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_fprintf \ (3),
+.BR HPL_warn \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_all_reduce.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_all_reduce.3
new file mode 100644
index 000000000..70ec6c4ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_all_reduce.3
@@ -0,0 +1,49 @@
+.TH HPL_all_reduce 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_all_reduce \- All reduce operation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_all_reduce(\fR
+\fB\&void *\fR
+\fI\&BUFFER\fR,
+\fB\&const int\fR
+\fI\&COUNT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR,
+\fB\&const HPL_T_OP \fR
+\fI\&OP\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_all_reduce\fR
+performs   a   global   reduce  operation  across  all
+processes of a group leaving the results on all processes.
+.SH ARGUMENTS
+.TP 8
+BUFFER  (local input/global out void *
+On entry,  BUFFER  points to  the  buffer to be combined.  On
+exit, this array contains the combined data and  is identical
+on all processes in the group.
+.TP 8
+COUNT   (global input)          const int
+On entry,  COUNT  indicates the number of entries in  BUFFER.
+COUNT must be at least zero.
+.TP 8
+DTYPE   (global input)          const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.TP 8
+OP      (global input)          const HPL_T_OP 
+On entry, OP is a pointer to the local combine function.
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_barrier.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_barrier.3
new file mode 100644
index 000000000..ffee7f291
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_barrier.3
@@ -0,0 +1,27 @@
+.TH HPL_barrier 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_barrier \- Barrier operation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_barrier(\fR
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_barrier\fR
+blocks the caller until all process members have call it.
+The  call  returns  at any process  only after all group members have
+entered the call.
+.SH ARGUMENTS
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_bcast.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_bcast.3
new file mode 100644
index 000000000..54eb54b25
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_bcast.3
@@ -0,0 +1,31 @@
+.TH HPL_bcast 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_bcast \- Perform the row broadcast.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_bcast(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_bcast\fR
+broadcasts  the  current  panel.  Successful  completion is
+indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to
+HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was
+not completed, in which case this function should be called again.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.TP 8
+IFLAG   (output)                int *
+On exit,  IFLAG  indicates  whether  or not the broadcast has
+occured.
+.SH SEE ALSO
+.BR HPL_binit \ (3),
+.BR HPL_bwait \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_binit.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_binit.3
new file mode 100644
index 000000000..083776ab6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_binit.3
@@ -0,0 +1,23 @@
+.TH HPL_binit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_binit \- Initialize the row broadcast.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_binit(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_binit\fR
+initializes  a  row  broadcast.  Successful  completion  is
+indicated by the returned error code HPL_SUCCESS.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.SH SEE ALSO
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_broadcast.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_broadcast.3
new file mode 100644
index 000000000..317d374cf
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_broadcast.3
@@ -0,0 +1,49 @@
+.TH HPL_broadcast 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_broadcast \- Broadcast operation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_broadcast(\fR
+\fB\&void *\fR
+\fI\&BUFFER\fR,
+\fB\&const int\fR
+\fI\&COUNT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR,
+\fB\&const int\fR
+\fI\&ROOT\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_broadcast\fR
+broadcasts  a message from the process with rank ROOT to
+all processes in the group.
+.SH ARGUMENTS
+.TP 8
+BUFFER  (local input/output)    void *
+On entry,  BUFFER  points to  the  buffer to be broadcast. On
+exit, this array contains the broadcast data and is identical
+on all processes in the group.
+.TP 8
+COUNT   (global input)          const int
+On entry,  COUNT  indicates the number of entries in  BUFFER.
+COUNT must be at least zero.
+.TP 8
+DTYPE   (global input)          const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.TP 8
+ROOT    (global input)          const int
+On entry, ROOT is the coordinate of the source process.
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.SH SEE ALSO
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_bwait.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_bwait.3
new file mode 100644
index 000000000..0dac6fe58
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_bwait.3
@@ -0,0 +1,24 @@
+.TH HPL_bwait 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_bwait \- Finalize the row broadcast.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_bwait(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_bwait\fR
+HPL_bwait waits  for  the  row  broadcast  of  the current  panel  to
+terminate.  Successful completion is indicated by the returned  error
+code HPL_SUCCESS.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.SH SEE ALSO
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_copyL.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_copyL.3
new file mode 100644
index 000000000..d60619a06
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_copyL.3
@@ -0,0 +1,28 @@
+.TH HPL_copyL 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_copyL \- Copy the current panel into a contiguous workspace.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_copyL(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_copyL\fR
+copies  the  panel of columns, the L1 replicated submatrix,
+the pivot array  and  the info scalar into a contiguous workspace for
+later broadcast.
+ 
+The copy of this panel  into  a contiguous buffer  can be enforced by
+specifying -DHPL_COPY_L in the architecture specific Makefile.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.SH SEE ALSO
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_daxpy.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_daxpy.3
new file mode 100644
index 000000000..50bd0b0a8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_daxpy.3
@@ -0,0 +1,76 @@
+.TH HPL_daxpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_daxpy \- y := y + alpha * x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_daxpy(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_daxpy\fR
+scales the vector x by alpha and adds it to y.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vectors  x  and  y. N
+must be at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied as zero, then the entries of the incremented array X
+need not be set on input.
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+Y       (local input/output)    double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+On exit, the entries of the incremented array  Y  are updated
+with the scaled entries of the incremented array X.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3], y[3];
+.br
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+.br
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+.br
+   HPL_daxpy( 3, 2.0, x, 1, y, 1 );
+.br
+   printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dcopy \ (3),
+.BR HPL_dscal \ (3),
+.BR HPL_dswap \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dcopy.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dcopy.3
new file mode 100644
index 000000000..f2759ced9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dcopy.3
@@ -0,0 +1,69 @@
+.TH HPL_dcopy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dcopy \- y := x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dcopy(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dcopy\fR
+copies the vector x into the vector y.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vectors  x  and  y. N
+must be at least zero.
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+Y       (local input/output)    double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+On exit, the entries of the incremented array  Y  are updated
+with the entries of the incremented array X.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3], y[3];
+.br
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+.br
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+.br
+   HPL_dcopy( 3, x, 1, y, 1 );
+.br
+   printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_daxpy \ (3),
+.BR HPL_dscal \ (3),
+.BR HPL_dswap \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dgemm.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dgemm.3
new file mode 100644
index 000000000..57c69f78c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dgemm.3
@@ -0,0 +1,160 @@
+.TH HPL_dgemm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dgemm \- C := alpha * op(A) * op(B) + beta * C.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dgemm(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANSA\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANSB\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&K\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&B\fR,
+\fB\&const int\fR
+\fI\&LDB\fR,
+\fB\&const double\fR
+\fI\&BETA\fR,
+\fB\&double *\fR
+\fI\&C\fR,
+\fB\&const int\fR
+\fI\&LDC\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dgemm\fR
+performs one of the matrix-matrix operations
+ 
+    C := alpha * op( A ) * op( B ) + beta * C
+ 
+ where op( X ) is one of
+ 
+    op( X ) = X   or   op( X ) = X^T.
+ 
+Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
+an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+TRANSA  (local input)           const enum HPL_TRANS
+On entry, TRANSA  specifies the form of  op(A)  to be used in
+the matrix-matrix operation follows:                         
+   TRANSA==HplNoTrans    : op( A ) = A,                     
+   TRANSA==HplTrans      : op( A ) = A^T,                   
+   TRANSA==HplConjTrans  : op( A ) = A^T.                   
+.TP 8
+TRANSB  (local input)           const enum HPL_TRANS
+On entry, TRANSB  specifies the form of  op(B)  to be used in
+the matrix-matrix operation follows:                         
+   TRANSB==HplNoTrans    : op( B ) = B,                     
+   TRANSB==HplTrans      : op( B ) = B^T,                   
+   TRANSB==HplConjTrans  : op( B ) = B^T.                   
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the  number  of rows  of the  matrix
+op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the number  of columns of the matrix
+op(B)  and  the number of columns of the matrix  C. N must be
+at least zero.
+.TP 8
+K       (local input)           const int
+On entry,  K  specifies  the  number of columns of the matrix
+op(A) and the number of rows of the matrix op(B).  K  must be
+be at least  zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied  as  zero  then the elements of the matrices A and B
+need not be set on input.
+.TP 8
+A       (local input)           const double *
+On entry,  A  is an array of dimension (LDA,ka),  where ka is
+k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+the array  A must contain the matrix A, otherwise the leading
+k  by  m  part of the array  A  must  contain the  matrix  A.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA  specifies the first dimension of A as declared
+in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+LDA must be at least max(1,m), otherwise LDA must be at least
+max(1,k).
+.TP 8
+B       (local input)           const double *
+On entry, B is an array of dimension (LDB,kb),  where  kb  is
+n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+the array  B must contain the matrix B, otherwise the leading
+n  by  k  part of the array  B  must  contain  the matrix  B.
+.TP 8
+LDB     (local input)           const int
+On entry, LDB  specifies the first dimension of B as declared
+in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+LDB must be at least max(1,k), otherwise LDB must be at least
+max(1,n).
+.TP 8
+BETA    (local input)           const double
+On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+supplied  as  zero  then  the  elements of the matrix C  need
+not be set on input.
+.TP 8
+C       (local input/output)    double *
+On entry,  C  is an array of dimension (LDC,n). Before entry,
+the  leading m by n part  of  the  array  C  must contain the
+matrix C,  except when beta is zero, in which case C need not
+be set on entry. On exit, the array  C  is overwritten by the
+m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+.TP 8
+LDC     (local input)           const int
+On entry, LDC  specifies the first dimension of C as declared
+in  the   calling  (sub)  program.   LDC  must  be  at  least
+max(1,m).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], b[2*2], c[2*2];
+.br
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+.br
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+.br
+   c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0;
+.br
+   HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans,
+.br
+              2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 );
+.br
+   printf("  [%f,%f]\en", c[0], c[2]);
+.br
+   printf("c=[%f,%f]\en", c[1], c[3]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dtrsm \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dgemv.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dgemv.3
new file mode 100644
index 000000000..f85db57fb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dgemv.3
@@ -0,0 +1,128 @@
+.TH HPL_dgemv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dgemv \- y := beta * y + alpha * op(A) * x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dgemv(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANS\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&const double\fR
+\fI\&BETA\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dgemv\fR
+performs one of the matrix-vector operations
+ 
+    y := alpha * op( A ) * x + beta * y,
+ 
+ where op( X ) is one of
+ 
+    op( X ) = X   or   op( X ) = X^T.
+ 
+where alpha and beta are scalars, x and y are vectors and  A  is an m
+by n matrix.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+TRANS   (local input)           const enum HPL_TRANS
+On entry,  TRANS  specifies the  operation to be performed as
+follows:   
+   TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+   TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the number of rows of  the matrix A.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied as zero then  A and X  need not be set on input.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points  to an array of size equal to or greater
+than LDA * n.  Before  entry, the leading m by n part  of the
+array  A  must contain the matrix coefficients.
+.TP 8
+LDA     (local input)           const int
+On entry,  LDA  specifies  the  leading  dimension  of  A  as
+declared  in  the  calling  (sub) program.  LDA  must  be  at
+least MAX(1,m).
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+BETA    (local input)           const double
+On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+supplied as zero then  Y  need not be set on input.
+.TP 8
+Y       (local input/output)    double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+Before entry with BETA non-zero, the incremented array Y must
+contain the vector  y.  On exit,  Y  is  overwritten  by  the
+updated vector y.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], x[2], y[2];
+.br
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+.br
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+.br
+   HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0,
+.br
+              a, 2, x, 1, -1.0, y, 1 );
+.br
+   printf("y=[%f,%f]\en", y[0], y[1]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dger \ (3),
+.BR HPL_dtrsv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dger.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dger.3
new file mode 100644
index 000000000..da9ddf495
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dger.3
@@ -0,0 +1,108 @@
+.TH HPL_dger 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dger \- A := alpha * x * y^T + A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dger(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dger\fR
+performs the rank 1 operation
+ 
+    A := alpha * x * y^T + A,
+ 
+where alpha is a scalar,  x is an m-element vector, y is an n-element
+vector and A is an m by n matrix.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the number of rows of  the matrix A.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied as zero then  X and Y  need not be set on input.
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+Y       (local input)           double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.TP 8
+A       (local input/output)    double *
+On entry,  A  points  to an array of size equal to or greater
+than LDA * n.  Before  entry, the leading m by n part  of the
+array  A  must contain the matrix coefficients. On exit, A is
+overwritten by the updated matrix.
+.TP 8
+LDA     (local input)           const int
+On entry,  LDA  specifies  the  leading  dimension  of  A  as
+declared  in  the  calling  (sub) program.  LDA  must  be  at
+least MAX(1,m).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], x[2], y[2];
+.br
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+.br
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+.br
+   HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1,
+.br
+             a, 2 );
+.br
+   printf("y=[%f,%f]\en", y[0], y[1]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dgemv \ (3),
+.BR HPL_dtrsv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlacpy.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlacpy.3
new file mode 100644
index 000000000..8da8b1316
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlacpy.3
@@ -0,0 +1,72 @@
+.TH HPL_dlacpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlacpy \- B := A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlacpy(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&B\fR,
+\fB\&const int\fR
+\fI\&LDB\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlacpy\fR
+copies an array A into an array B.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the number of rows of the arrays A and
+B. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N specifies  the number of columns of the arrays A
+and B. N must be at least zero.
+.TP 8
+A       (local input)           const double *
+On entry, A points to an array of dimension (LDA,N).
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+B       (local output)          double *
+On entry, B points to an array of dimension (LDB,N). On exit,
+B is overwritten with A.
+.TP 8
+LDB     (local input)           const int
+On entry, LDB specifies the leading dimension of the array B.
+LDB must be at least MAX(1,M).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], b[2*2];
+.br
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+.br
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+.br
+   printf("  [%f,%f]\en", b[0], b[2]);
+.br
+   printf("b=[%f,%f]\en", b[1], b[3]);
+.br
+   exit(0);
+.br
+   return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dlatcpy \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlamch.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlamch.3
new file mode 100644
index 000000000..9bf41b68a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlamch.3
@@ -0,0 +1,76 @@
+.TH HPL_dlamch 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlamch \- determines machine-specific arithmetic constants.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_dlamch(\fR
+\fB\&const HPL_T_MACH\fR
+\fI\&CMACH\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlamch\fR
+determines  machine-specific  arithmetic constants such as
+the relative machine precision  (eps),  the safe minimum (sfmin) such
+that 1 / sfmin does not overflow, the base of the machine (base), the
+precision (prec), the  number of (base) digits  in the  mantissa (t),
+whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise),  the
+minimum exponent before  (gradual)  underflow (emin),  the  underflow
+threshold (rmin) base**(emin-1), the largest exponent before overflow
+(emax), the overflow threshold (rmax) (base**emax)*(1-eps).
+.SH ARGUMENTS
+.TP 8
+CMACH   (local input)           const HPL_T_MACH
+Specifies the value to be returned by HPL_dlamch             
+   = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+   = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+   = HPL_MACH_BASE,  HPL_dlamch := base                      
+   = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+   = HPL_MACH_MLEN,  HPL_dlamch := t                         
+   = HPL_MACH_RND,   HPL_dlamch := rnd                       
+   = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+   = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+   = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+   = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+ 
+where                                                        
+ 
+   eps   = relative machine precision,                       
+   sfmin = safe minimum,                                     
+   base  = base of the machine,                              
+   prec  = eps*base,                                         
+   t     = number of digits in the mantissa,                 
+   rnd   = 1.0 if rounding occurs in addition,               
+   emin  = minimum exponent before underflow,                
+   rmin  = underflow threshold,                              
+   emax  = largest exponent before overflow,                 
+   rmax  = overflow threshold.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double eps;
+.br
+   eps = HPL_dlamch( HPL_MACH_EPS );
+.br
+   printf("eps=%18.8e\en", eps);
+.br
+   exit(0); return(0);
+.br
+}
+.SH REFERENCES
+This function has been manually translated from the Fortran 77 LAPACK
+auxiliary function dlamch.f  (version 2.0 -- 1992), that  was  itself
+based on the function ENVRON  by Malcolm and incorporated suggestions
+by Gentleman and Marovich. See                                       
+ 
+Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).                 
+ 
+Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+properties of  floating point arithmetic units.,  Comms. of  the ACM,
+17, 276-277 (1974).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlange.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlange.3
new file mode 100644
index 000000000..ffbab554f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlange.3
@@ -0,0 +1,73 @@
+.TH HPL_dlange 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlange \- Compute ||A||.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_dlange(\fR
+\fB\&const HPL_T_NORM\fR
+\fI\&NORM\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlange\fR
+returns  the value of the one norm,  or the infinity norm,
+or the element of largest absolute value of a matrix A:              
+ 
+   max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+   norm1(A),        when NORM = HPL_NORM_1,                          
+   normI(A),        when NORM = HPL_NORM_I,                          
+ 
+where norm1 denotes the one norm of a matrix (maximum column sum) and
+normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+that max(abs(A(i,j))) is not a matrix norm.
+.SH ARGUMENTS
+.TP 8
+NORM    (local input)           const HPL_T_NORM
+On entry,  NORM  specifies  the  value to be returned by this
+function as described above.
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the number  of rows of the matrix A.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points to an  array of dimension  (LDA,N), that
+contains the matrix A.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,M).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2];
+.br
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+.br
+   norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 );
+.br
+   printf("norm=%f\en", norm);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dlaprnt \ (3),
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaprnt.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaprnt.3
new file mode 100644
index 000000000..8fdd89b8c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaprnt.3
@@ -0,0 +1,70 @@
+.TH HPL_dlaprnt 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaprnt \- Print the matrix A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaprnt(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&IA\fR,
+\fB\&const int\fR
+\fI\&JA\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const char *\fR
+\fI\&CMATNM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaprnt\fR
+prints to standard error an M-by-N matrix A.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies the number of rows of A. M must be at
+least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies the number of columns of A. N must be
+at least zero.
+.TP 8
+A       (local input)           double *
+On entry, A  points to an array of dimension (LDA,N).
+.TP 8
+IA      (local input)           const int
+On entry, IA specifies the starting row index to be printed.
+.TP 8
+JA      (local input)           const int
+On entry,  JA  specifies  the  starting  column index  to be
+printed.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,M).
+.TP 8
+CMATNM  (local input)           const char *
+On entry, CMATNM is the name of the matrix to be printed.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2];
+.br
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+.br
+   HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" );
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp00N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp00N.3
new file mode 100644
index 000000000..efe3580b3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp00N.3
@@ -0,0 +1,60 @@
+.TH HPL_dlaswp00N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp00N \- performs a series of row interchanges.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp00N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int *\fR
+\fI\&IPIV\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp00N\fR
+performs a series of local row interchanges on a matrix
+A. One row interchange is initiated for rows 0 through M-1 of A.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M specifies the number of rows of the array A to be
+interchanged. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies  the number of columns of the array A.
+N must be at least zero.
+.TP 8
+A       (local input/output)    double *
+On entry, A  points to an array of dimension (LDA,N) to which
+the row interchanges will be  applied.  On exit, the permuted
+matrix.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+IPIV    (local input)           const int *
+On entry,  IPIV  is  an  array of size  M  that  contains the
+pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+implies that local rows k and l are to be interchanged.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp01N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp01N.3
new file mode 100644
index 000000000..662913e54
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp01N.3
@@ -0,0 +1,88 @@
+.TH HPL_dlaswp01N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp01N \- copies rows of A into itself and into U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp01N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp01N\fR
+copies  scattered rows  of  A  into itself  and into an
+array  U.  The row offsets in  A  of the source rows are specified by
+LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+positive value of  LINDXAU indicates that the array destination is U,
+and A otherwise.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+moved within A or copied into U. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the length of rows of A that should be
+moved within A or copied into U. N must be at least zero.
+.TP 8
+A       (local input/output)    double *
+On entry, A points to an array of dimension (LDA,N). The rows
+of this array specified by LINDXA should be moved within A or
+copied into U.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    double *
+On entry, U points to an array of dimension (LDU,N). The rows
+of A specified by LINDXA are be copied within this array U at
+the positions indicated by positive values of LINDXAU.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local  row indexes  of  A  that should be moved within  A  or
+or copied into U.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension  M that  contains
+the local  row indexes of  U  where the rows of  A  should be
+copied at. This array also contains the  local row offsets in
+A where some of the rows of A should be moved to.  A positive
+value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+should be copied into U at the position LINDXAU[i]; otherwise
+the row  LINDXA[i]  of  A  should be moved  at  the  position
+-LINDXAU[i] within A.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp01T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp01T.3
new file mode 100644
index 000000000..738507755
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp01T.3
@@ -0,0 +1,89 @@
+.TH HPL_dlaswp01T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp01T \- copies rows of A into itself and into U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp01T(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp01T\fR
+copies  scattered rows  of  A  into itself  and into an
+array U.  The row offsets in  A  of the source rows  are specified by
+LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+positive value of LINDXAU indicates that the array  destination is U,
+and A otherwise. Rows of A are stored as columns in U.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+moved within A or copied into U. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the length of rows of A that should be
+moved within A or copied into U. N must be at least zero.
+.TP 8
+A       (local input/output)    double *
+On entry, A points to an array of dimension (LDA,N). The rows
+of this array specified by LINDXA should be moved within A or
+copied into U.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    double *
+On entry, U points to an array of dimension (LDU,M). The rows
+of A specified by  LINDXA  are copied within this array  U at
+the  positions indicated by positive values of LINDXAU.  The
+rows of A are stored as columns in U.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local  row indexes  of  A  that should be moved within  A  or
+or copied into U.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension  M that  contains
+the local  row indexes of  U  where the rows of  A  should be
+copied at. This array also contains the  local row offsets in
+A where some of the rows of A should be moved to.  A positive
+value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+should be copied into U at the position LINDXAU[i]; otherwise
+the row  LINDXA[i]  of  A  should be moved  at  the  position
+-LINDXAU[i] within A.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp02N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp02N.3
new file mode 100644
index 000000000..600449c68
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp02N.3
@@ -0,0 +1,85 @@
+.TH HPL_dlaswp02N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp02N \- pack rows of A into columns of W.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp02N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&W0\fR,
+\fB\&double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp02N\fR
+packs scattered rows of an array  A  into workspace  W.
+The row offsets in A are specified by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+copied into W. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the length of rows of A that should be
+copied into W. N must be at least zero.
+.TP 8
+A       (local input)           const double *
+On entry, A points to an array of dimension (LDA,N). The rows
+of this array specified by LINDXA should be copied into W.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+W0      (local input/output)    double *
+On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local output)          double *
+On entry, W  is an array of size (LDW,M). On exit, W contains
+the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+in W(:,i).
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be copied into W.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension M  that  contains
+the local  row indexes of  U that should be copied into A and
+replaced by the rows of W.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp03N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp03N.3
new file mode 100644
index 000000000..1ba0b3208
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp03N.3
@@ -0,0 +1,75 @@
+.TH HPL_dlaswp03N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp03N \- copy rows of W into U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp03N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const double *\fR
+\fI\&W0\fR,
+\fB\&const double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp03N\fR
+copies columns of  W  into  rows  of an  array  U.  The
+destination in U of these columns contained in W is stored within W0.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies  the  number  of columns of  W  stored
+contiguously that should be copied into U. M must be at least
+zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the  length of columns of  W  stored
+contiguously that should be copied into U. N must be at least
+zero.
+.TP 8
+U       (local input/output)    double *
+On entry, U points to an array of dimension (LDU,N).  Columns
+of W are copied as rows within this array U at  the positions
+specified in W0.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M).
+.TP 8
+W0      (local input)           const double *
+On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local input)           const double *
+On entry, W  is an array of size (LDW,M),  that contains data
+to be copied into U. For i in [0..M),  entries W(:,i)  should
+be copied into the row or column W0(i*LDW) of U.
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp03T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp03T.3
new file mode 100644
index 000000000..d8bd11ec1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp03T.3
@@ -0,0 +1,75 @@
+.TH HPL_dlaswp03T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp03T \- copy columns of W into U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp03T(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const double *\fR
+\fI\&W0\fR,
+\fB\&const double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp03T\fR
+copies  columns of W into an array U.  The  destination
+in U of these columns contained in W is stored within W0.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies  the  number  of columns of  W  stored
+contiguously that should be copied into U. M must be at least
+zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the  length of columns of  W  stored
+contiguously that should be copied into U. N must be at least
+zero.
+.TP 8
+U       (local input/output)    double *
+On entry, U points to an array of dimension (LDU,M).  Columns
+of W are copied within the array U at the positions specified
+in W0.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+W0      (local input)           const double *
+On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local input)           const double *
+On entry, W  is an array of size (LDW,M),  that contains data
+to be copied into U. For i in [0..M),  entries W(:,i)  should
+be copied into the row or column W0(i*LDW) of U.
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp04N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp04N.3
new file mode 100644
index 000000000..9f12d79ab
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp04N.3
@@ -0,0 +1,106 @@
+.TH HPL_dlaswp04N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp04N \- copy rows of U in A and replace them with columns of W.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp04N(\fR
+\fB\&const int\fR
+\fI\&M0\fR,
+\fB\&const int\fR
+\fI\&M1\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&W0\fR,
+\fB\&const double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp04N\fR
+copies M0 rows of U into A and replaces those rows of U
+with columns of W. In addition M1 - M0 columns of  W  are copied into
+rows of U.
+.SH ARGUMENTS
+.TP 8
+M0      (local input)           const int
+On entry, M0 specifies the number of rows of U that should be
+copied into  A  and replaced by columns of  W.  M0 must be at
+least zero.
+.TP 8
+M1      (local input)           const int
+On entry, M1 specifies the number of columns of W that should
+be copied into rows of U. M1 must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the rows of U that should
+be copied into A. N must be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  points to  an array of dimension (LDU,N).  This
+array contains the rows that are to be copied into A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M1).
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+rows of U indicated by LINDXAU.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M0).
+.TP 8
+W0      (local input)           const double *
+On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local input)           const double *
+On entry, W  is an array of size (LDW,M0+M1),  that  contains
+data to be copied into U.  For i in [M0..M0+M1),  the entries
+W(:,i) are copied into the row W0(i*LDW) of U.
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA  is an array of dimension  M0 containing the
+local row indexes A into which rows of U are copied.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension M0 that  contains
+the local  row indexes of  U that should be copied into A and
+replaced by the columns of W.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp04T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp04T.3
new file mode 100644
index 000000000..448334148
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp04T.3
@@ -0,0 +1,107 @@
+.TH HPL_dlaswp04T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp04T \- copy columns of U in rows of A and replace them with columns of W.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp04T(\fR
+\fB\&const int\fR
+\fI\&M0\fR,
+\fB\&const int\fR
+\fI\&M1\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&W0\fR,
+\fB\&const double *\fR
+\fI\&W\fR,
+\fB\&const int\fR
+\fI\&LDW\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp04T\fR
+copies M0 columns of U into rows of A and replaces those
+columns of U with columns of W. In addition M1 - M0 columns of W  are
+copied into U.
+.SH ARGUMENTS
+.TP 8
+M0      (local input)           const int
+On entry, M0 specifies the number of columns of U that should
+be copied into A and replaced by columns of W.  M0 must be at
+least zero.
+.TP 8
+M1      (local input)           const int
+On entry, M1 specifies  the number of columnns of W that will
+be copied into U. M1 must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies the length of the columns of  U  that
+will be copied into rows of A. N must be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  points  to an array of dimension (LDU,*).  This
+array contains the columns that are to be copied into rows of
+A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+columns of U indicated by LINDXAU.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M0).
+.TP 8
+W0      (local input)           const double *
+On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+the destination offset  in U where the columns of W should be
+copied.
+.TP 8
+W       (local input)           const double *
+On entry, W  is an array of size (LDW,M0+M1),  that  contains
+data to be copied into U.  For i in [M0..M0+M1),  the entries
+W(:,i) are copied into the column W0(i*LDW) of U.
+.TP 8
+LDW     (local input)           const int
+On entry, LDW specifies the leading dimension of the array W.
+LDW must be at least MAX(1,N+1).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA  is an array of dimension  M0 containing the
+local row indexes A into which columns of U are copied.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension M0 that  contains
+the  local column indexes of  U  that should be copied into A
+and replaced by the columns of W.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp05N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp05N.3
new file mode 100644
index 000000000..371dd0b92
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp05N.3
@@ -0,0 +1,77 @@
+.TH HPL_dlaswp05N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp05N \- copy rows of U into A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp05N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp05N\fR
+copies rows of  U of global offset LINDXAU into rows of
+A at positions indicated by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of U that should be
+copied into A. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the rows of U that should
+be copied into A. N must be at least zero.
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+rows of U indicated by LINDXAU.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    const double *
+On entry,  U  points to an array of dimension  (LDU,N).  This
+array contains the rows that are to be copied into A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be copied from U.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension  M that  contains
+the local row indexes of U that should be copied in A.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp05T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp05T.3
new file mode 100644
index 000000000..5d70a7a16
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp05T.3
@@ -0,0 +1,77 @@
+.TH HPL_dlaswp05T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp05T \- copy rows of U into A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp05T(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR,
+\fB\&const int *\fR
+\fI\&LINDXAU\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp05T\fR
+copies columns of  U of global offset LINDXAU into rows
+of A at positions indicated by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the columns of U that will
+be copied into rows of A. N must be at least zero.
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+columns of U indicated by LINDXAU.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    const double *
+On entry,  U  points  to an array of dimension (LDU,*).  This
+array contains the columns that are to be copied into rows of
+A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be copied from U.
+.TP 8
+LINDXAU (local input)           const int *
+On entry, LINDXAU  is an array of dimension  M that  contains
+the local column indexes of U that should be copied in A.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp06N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp06N.3
new file mode 100644
index 000000000..7fa19d41a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp06N.3
@@ -0,0 +1,72 @@
+.TH HPL_dlaswp06N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp06N \- swap rows of U with rows of A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp06N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp06N\fR
+swaps rows of  U  with rows of A at positions
+indicated by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+swapped with rows of U. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the rows of A that should
+be swapped with rows of U. N must be at least zero.
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+rows or columns of U.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    double *
+On entry,  U  points  to an array of dimension (LDU,N).  This
+array contains the rows of U that are to be swapped with rows
+of A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,M).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be swapped with U.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp06T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp06T.3
new file mode 100644
index 000000000..41fa3d6ee
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp06T.3
@@ -0,0 +1,72 @@
+.TH HPL_dlaswp06T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp06T \- swap rows or columns of U with rows of A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp06T(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&LINDXA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp06T\fR
+swaps  columns  of  U  with  rows  of  A  at  positions
+indicated by LINDXA.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry, M  specifies the number of rows of A that should be
+swapped with columns of U. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the rows of A that should
+be swapped with columns of U. N must be at least zero.
+.TP 8
+A       (local output)          double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+the  rows of this array specified by  LINDXA  are replaced by
+columns of U.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.TP 8
+U       (local input/output)    double *
+On entry,  U  points  to an array of dimension (LDU,*).  This
+array contains the columns of  U  that are to be swapped with
+rows of A.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the leading dimension of the array U.
+LDU must be at least MAX(1,N).
+.TP 8
+LINDXA  (local input)           const int *
+On entry, LINDXA is an array of dimension M that contains the
+local row indexes of A that should be swapped with U.
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp10N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp10N.3
new file mode 100644
index 000000000..23465895c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp10N.3
@@ -0,0 +1,59 @@
+.TH HPL_dlaswp10N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlaswp10N \- performs a series column interchanges.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlaswp10N(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int *\fR
+\fI\&IPIV\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlaswp10N\fR
+performs a sequence  of  local column interchanges on a
+matrix A.  One column interchange is initiated  for columns 0 through
+N-1 of A.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+__arg0__
+.TP 8
+N       (local input)           const int
+On entry,  M  specifies  the number of rows of the array A. M
+must be at least zero.
+.TP 8
+A       (local input/output)    double *
+On entry, N specifies the number of columns of the array A. N
+must be at least zero.
+.TP 8
+LDA     (local input)           const int
+On entry, A  points to an  array of  dimension (LDA,N).  This
+array contains the columns onto which the interchanges should
+be applied. On exit, A contains the permuted matrix.
+.TP 8
+IPIV    (local input)           const int *
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,M).
+.SH SEE ALSO
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05N \ (3),
+.BR HPL_dlaswp05T \ (3),
+.BR HPL_dlaswp06N \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlatcpy.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlatcpy.3
new file mode 100644
index 000000000..dc940e321
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlatcpy.3
@@ -0,0 +1,70 @@
+.TH HPL_dlatcpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlatcpy \- B := A^T
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlatcpy(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&B\fR,
+\fB\&const int\fR
+\fI\&LDB\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlatcpy\fR
+copies the transpose of an array A into an array B.
+.SH ARGUMENTS
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the number of  rows of the array B and
+the number of columns of A. M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the number of  rows of the array A and
+the number of columns of B. N must be at least zero.
+.TP 8
+A       (local input)           const double *
+On entry, A points to an array of dimension (LDA,M).
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least MAX(1,N).
+.TP 8
+B       (local output)          double *
+On entry, B points to an array of dimension (LDB,N). On exit,
+B is overwritten with the transpose of A.
+.TP 8
+LDB     (local input)           const int
+On entry, LDB specifies the leading dimension of the array B.
+LDB must be at least MAX(1,M).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], b[2*2];
+.br
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+.br
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+.br
+   printf("  [%f,%f]\en", b[0], b[2]);
+.br
+   printf("b=[%f,%f]\en", b[1], b[3]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dlacpy \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocmax.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocmax.3
new file mode 100644
index 000000000..f68f887c9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocmax.3
@@ -0,0 +1,69 @@
+.TH HPL_dlocmax 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlocmax \- finds the maximum entry in matrix column.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlocmax(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&II\fR,
+\fB\&const int\fR
+\fI\&JJ\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlocmax\fR
+finds  the maximum entry in the current column  and packs
+the useful information in  WORK[0:3].  On exit,  WORK[0] contains the
+local maximum  absolute value  scalar,  WORK[1] is the  corresponding
+local row index,  WORK[2]  is the corresponding global row index, and
+WORK[3] is the coordinate of the process owning this max.  When N  is
+less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set
+to the total number of process rows.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of rows of the column
+of A on which we operate.
+.TP 8
+II      (local input)           const int
+On entry, II  specifies the row offset where the column to be
+operated on starts with respect to the panel.
+.TP 8
+JJ      (local input)           const int
+On entry, JJ  specifies the column offset where the column to
+be operated on starts with respect to the panel.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is  a workarray of size at least 4.  On exit,
+WORK[0] contains  the  local  maximum  absolute value scalar,
+WORK[1] contains  the corresponding local row index,  WORK[2]
+contains the corresponding global row index, and  WORK[3]  is
+the coordinate of process owning this max.
+.SH SEE ALSO
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocswpN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocswpN.3
new file mode 100644
index 000000000..367e37e36
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocswpN.3
@@ -0,0 +1,62 @@
+.TH HPL_dlocswpN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlocswpN \- locally swaps rows within panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlocswpN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&II\fR,
+\fB\&const int\fR
+\fI\&JJ\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlocswpN\fR
+performs  the local swapping operations  within a panel.
+The lower triangular  N0-by-N0  upper block of the panel is stored in
+no-transpose form (i.e. just like the input matrix itself).
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+II      (local input)           const int
+On entry, II  specifies the row offset where the column to be
+operated on starts with respect to the panel.
+.TP 8
+JJ      (local input)           const int
+On entry, JJ  specifies the column offset where the column to
+be operated on starts with respect to the panel.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+WORK[0] contains  the  local  maximum  absolute value scalar,
+WORK[1] contains  the corresponding local row index,  WORK[2]
+contains the corresponding global row index, and  WORK[3]  is
+the coordinate of process owning this max.  The N0 length max
+row is stored in WORK[4:4+N0-1];  Note  that this is also the
+JJth row  (or column) of L1. The remaining part of this array
+is used as workspace.
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocswpT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocswpT.3
new file mode 100644
index 000000000..f864de535
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocswpT.3
@@ -0,0 +1,62 @@
+.TH HPL_dlocswpT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dlocswpT \- locally swaps rows within panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dlocswpT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&II\fR,
+\fB\&const int\fR
+\fI\&JJ\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dlocswpT\fR
+performs  the local swapping operations  within a panel.
+The lower triangular  N0-by-N0  upper block of the panel is stored in
+transpose form.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+II      (local input)           const int
+On entry, II  specifies the row offset where the column to be
+operated on starts with respect to the panel.
+.TP 8
+JJ      (local input)           const int
+On entry, JJ  specifies the column offset where the column to
+be operated on starts with respect to the panel.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+WORK[0] contains  the  local  maximum  absolute value scalar,
+WORK[1] contains  the corresponding local row index,  WORK[2]
+contains the corresponding global row index, and  WORK[3]  is
+the coordinate of process owning this max.  The N0 length max
+row is stored in WORK[4:4+N0-1];  Note  that this is also the
+JJth row  (or column) of L1. The remaining part of this array
+is used as workspace.
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dmatgen.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dmatgen.3
new file mode 100644
index 000000000..c287fb0fb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dmatgen.3
@@ -0,0 +1,55 @@
+.TH HPL_dmatgen 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dmatgen \- random matrix generator.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dmatgen(\fR
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int\fR
+\fI\&ISEED\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dmatgen\fR
+generates (or regenerates) a random matrix A.
+ 
+The  pseudo-random  generator uses the linear congruential algorithm:
+X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+Programming, Knuth 1973, Vol. 2.
+.SH ARGUMENTS
+.TP 8
+M       (input)                 const int
+On entry,  M  specifies  the number  of rows of the matrix A.
+M must be at least zero.
+.TP 8
+N       (input)                 const int
+On entry,  N specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+A       (output)                double *
+On entry, A points to an array of dimension (LDA,N). On exit,
+this  array  contains   the   coefficients  of  the  randomly
+generated matrix.
+.TP 8
+LDA     (input)                 const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,M).
+.TP 8
+ISEED   (input)                 const int
+On entry, ISEED  specifies  the  seed  number to generate the
+matrix A. ISEED must be at least zero.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dscal.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dscal.3
new file mode 100644
index 000000000..8f42a10f5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dscal.3
@@ -0,0 +1,62 @@
+.TH HPL_dscal 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dscal \- x = alpha * x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dscal(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dscal\fR
+scales the vector x by alpha.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vector x. N  must  be
+at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied as zero, then the entries of the incremented array X
+need not be set on input.
+.TP 8
+X       (local input/output)    double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+On exit, the entries of the incremented array  X  are  scaled
+by the scalar alpha.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3];
+.br
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+.br
+   HPL_dscal( 3, 2.0, x, 1 );
+.br
+   printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_daxpy \ (3),
+.BR HPL_dcopy \ (3),
+.BR HPL_dswap \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dswap.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dswap.3
new file mode 100644
index 000000000..a398f795a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dswap.3
@@ -0,0 +1,73 @@
+.TH HPL_dswap 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dswap \- y <-> x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dswap(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR,
+\fB\&double *\fR
+\fI\&Y\fR,
+\fB\&const int\fR
+\fI\&INCY\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dswap\fR
+swaps the vectors x and y.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vectors  x  and  y. N
+must be at least zero.
+.TP 8
+X       (local input/output)    double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+On exit, the entries of the incremented array  X  are updated
+with the entries of the incremented array Y.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.TP 8
+Y       (local input/output)    double *
+On entry,  Y  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+On exit, the entries of the incremented array  Y  are updated
+with the entries of the incremented array X.
+.TP 8
+INCY    (local input)           const int
+On entry, INCY specifies the increment for the elements of Y.
+INCY must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3], y[3];
+.br
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+.br
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+.br
+   HPL_dswap( 3, x, 1, y, 1 );
+.br
+   printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]);
+.br
+   printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_daxpy \ (3),
+.BR HPL_dcopy \ (3),
+.BR HPL_dscal \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dtrsm.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dtrsm.3
new file mode 100644
index 000000000..ad099eb83
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dtrsm.3
@@ -0,0 +1,152 @@
+.TH HPL_dtrsm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dtrsm \- B := A^{-1} * B  or  B := B * A^{-1}.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dtrsm(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const enum HPL_SIDE\fR
+\fI\&SIDE\fR,
+\fB\&const enum HPL_UPLO\fR
+\fI\&UPLO\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANS\fR,
+\fB\&const enum HPL_DIAG\fR
+\fI\&DIAG\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double\fR
+\fI\&ALPHA\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&B\fR,
+\fB\&const int\fR
+\fI\&LDB\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dtrsm\fR
+solves one of the matrix equations
+ 
+   op( A ) * X = alpha * B,   or  X * op( A ) = alpha * B,
+ 
+where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+non-unit, upper or lower triangular matrix and op(A) is one of
+ 
+   op( A ) = A   or   op( A ) = A^T.
+ 
+The matrix X is overwritten on B.
+ 
+No test for  singularity  or  near-singularity  is included  in  this
+routine. Such tests must be performed before calling this routine.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+SIDE    (local input)           const enum HPL_SIDE
+On entry, SIDE  specifies  whether  op(A) appears on the left
+or right of X as follows:
+   SIDE==HplLeft    op( A ) * X = alpha * B,
+   SIDE==HplRight   X * op( A ) = alpha * B.
+.TP 8
+UPLO    (local input)           const enum HPL_UPLO
+On  entry,   UPLO   specifies  whether  the  upper  or  lower
+triangular  part  of the array  A  is to be referenced.  When
+UPLO==HplUpper, only  the upper triangular part of A is to be
+referenced, otherwise only the lower triangular part of A is 
+to be referenced. 
+.TP 8
+TRANS   (local input)           const enum HPL_TRANS
+On entry, TRANSA  specifies the form of  op(A)  to be used in
+the matrix-matrix operation follows:                         
+   TRANSA==HplNoTrans    : op( A ) = A,                     
+   TRANSA==HplTrans      : op( A ) = A^T,                   
+   TRANSA==HplConjTrans  : op( A ) = A^T.                   
+.TP 8
+DIAG    (local input)           const enum HPL_DIAG
+On entry,  DIAG  specifies  whether  A  is unit triangular or
+not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+and otherwise, A is not assumed to be unit triangular.
+.TP 8
+M       (local input)           const int
+On entry,  M  specifies  the number of rows of the  matrix B.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the number of columns of the matrix B.
+N must be at least zero.
+.TP 8
+ALPHA   (local input)           const double
+On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+supplied  as  zero then the elements of the matrix B need not
+be set on input.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points  to an array of size equal to or greater
+than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+k by k upper triangular  part of the array A must contain the
+upper triangular  matrix and the  strictly  lower  triangular
+part of A is not referenced.  When  UPLO==HplLower on  entry,
+the  leading k by k lower triangular part of the array A must
+contain the lower triangular matrix  and  the  strictly upper
+triangular part of A is not referenced.
+ 
+Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+not referenced  either,  but are assumed to be unity.
+.TP 8
+LDA     (local input)           const int
+On entry,  LDA  specifies  the  leading  dimension  of  A  as
+declared  in  the  calling  (sub) program.  LDA  must  be  at
+least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+.TP 8
+B       (local input/output)    double *
+On entry,  B  points  to an array of size equal to or greater
+than LDB * n.  Before entry, the leading  m by n  part of the
+array B must contain the matrix  B, except when beta is zero,
+in which case B need not be set on entry.  On exit, the array
+B is overwritten by the m by n solution matrix.
+.TP 8
+LDB     (local input)           const int
+On entry,  LDB  specifies  the  leading  dimension  of  B  as
+declared  in  the  calling  (sub) program.  LDB  must  be  at
+least MAX(1,m).
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], b[2*2];
+.br
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+.br
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+.br
+   HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper,
+.br
+              HplNoTrans, HplNonUnit, 2, 2, 2.0,
+.br
+              a, 2, b, 2 );
+.br
+   printf("  [%f,%f]\en", b[0], b[2]);
+.br
+   printf("b=[%f,%f]\en", b[1], b[3]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dgemm \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dtrsv.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dtrsv.3
new file mode 100644
index 000000000..5df37c78b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dtrsv.3
@@ -0,0 +1,121 @@
+.TH HPL_dtrsv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_dtrsv \- x := A^{-1} x.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_dtrsv(\fR
+\fB\&const enum HPL_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const enum HPL_UPLO\fR
+\fI\&UPLO\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANS\fR,
+\fB\&const enum HPL_DIAG\fR
+\fI\&DIAG\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_dtrsv\fR
+solves one of the systems of equations
+ 
+    A * x = b,   or   A^T * x = b,
+ 
+where b and x are n-element vectors and  A  is an n by n non-unit, or
+unit, upper or lower triangular matrix.
+ 
+No test for  singularity  or  near-singularity  is included  in  this
+routine. Such tests must be performed before calling this routine.
+.SH ARGUMENTS
+.TP 8
+ORDER   (local input)           const enum HPL_ORDER
+On entry, ORDER  specifies the storage format of the operands
+as follows:                                                  
+   ORDER = HplRowMajor,                                      
+   ORDER = HplColumnMajor.                                   
+.TP 8
+UPLO    (local input)           const enum HPL_UPLO
+On  entry,   UPLO   specifies  whether  the  upper  or  lower
+triangular  part  of the array  A  is to be referenced.  When
+UPLO==HplUpper, only  the upper triangular part of A is to be
+referenced, otherwise only the lower triangular part of A is 
+to be referenced. 
+.TP 8
+TRANS   (local input)           const enum HPL_TRANS
+On entry,  TRANS  specifies  the equations  to  be  solved as
+follows:
+   TRANS==HplNoTrans     A   * x = b,
+   TRANS==HplTrans       A^T * x = b.
+.TP 8
+DIAG    (local input)           const enum HPL_DIAG
+On entry,  DIAG  specifies  whether  A  is unit triangular or
+not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+and otherwise, A is not assumed to be unit triangular.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the order of the matrix A. N must be at
+least zero.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points  to an array of size equal to or greater
+than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+n by n upper triangular  part of the array A must contain the
+upper triangular  matrix and the  strictly  lower  triangular
+part of A is not referenced.  When  UPLO==HplLower  on entry,
+the  leading n by n lower triangular part of the array A must
+contain the lower triangular matrix  and  the  strictly upper
+triangular part of A is not referenced.
+ 
+Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+not referenced  either,  but are assumed to be unity.
+.TP 8
+LDA     (local input)           const int
+On entry,  LDA  specifies  the  leading  dimension  of  A  as
+declared  in  the  calling  (sub) program.  LDA  must  be  at
+least MAX(1,n).
+.TP 8
+X       (local input/output)    double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+Before entry,  the  incremented array  X  must contain  the n
+element right-hand side vector b. On exit,  X  is overwritten
+with the solution vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double a[2*2], x[2];
+.br
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+.br
+   x[0] = 2.0; x[1] = 1.0;
+.br
+   HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans,
+.br
+              HplNoUnit, a, 2, x, 1 );
+.br
+   printf("x=[%f,%f]\en", x[0], x[1]);
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_dger \ (3),
+.BR HPL_dgemv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_equil.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_equil.3
new file mode 100644
index 000000000..817780e44
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_equil.3
@@ -0,0 +1,91 @@
+.TH HPL_equil 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_equil \- Equilibrate U and forward the column panel L.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_equil(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const enum HPL_TRANS\fR
+\fI\&TRANS\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR,
+\fB\&int *\fR
+\fI\&IWORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_equil\fR
+equilibrates  the  local  pieces  of U, so that on exit to
+this function, pieces of U contained in every process row are of the
+same size. This phase makes the rolling phase optimal.  In addition,
+this  function probes  for  the  column panel L and forwards it when
+possible.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be equilibrated) information.
+.TP 8
+TRANS   (global input)          const enum HPL_TRANS
+On entry, TRANS specifies whether  U  is stored in transposed
+or non-transposed form.
+.TP 8
+N       (local input)           const int
+On entry, N  specifies the number of rows or columns of  U. N
+must be at least 0.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U in each process row.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+non-transposed form, and MAX(1,N) otherwise.
+.TP 8
+IPLEN   (global input)          int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in process IPMAP[i].
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IPMAP is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words, IPMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry, IPMAPM1  is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+.TP 8
+IWORK   (workspace)             int *
+On entry, IWORK is a workarray of dimension NPROW+1.
+.SH SEE ALSO
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_fprintf.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_fprintf.3
new file mode 100644
index 000000000..8a81c0bfb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_fprintf.3
@@ -0,0 +1,44 @@
+.TH HPL_fprintf 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_fprintf \- fprintf + fflush wrapper.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_fprintf(\fR
+\fB\&FILE *\fR
+\fI\&STREAM\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_fprintf\fR
+is a wrapper around fprintf flushing the output stream.
+.SH ARGUMENTS
+.TP 8
+STREAM  (local input)           FILE *
+On entry, STREAM specifies the output stream.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   HPL_fprintf( stdout, "Hello World.\en" );
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_abort \ (3),
+.BR HPL_warn \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_exit.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_exit.3
new file mode 100644
index 000000000..dab8067e2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_exit.3
@@ -0,0 +1,25 @@
+.TH HPL_grid_exit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_grid_exit \- Exit process grid.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_grid_exit(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_grid_exit\fR
+marks  the process  grid object for  deallocation.  The
+returned  error  code  MPI_SUCCESS  indicates  successful completion.
+Other error codes are (MPI) implementation dependent.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input/output)    HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid to be released.
+.SH SEE ALSO
+.BR HPL_pnum \ (3),
+.BR HPL_grid_init \ (3),
+.BR HPL_grid_info \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_info.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_info.3
new file mode 100644
index 000000000..53c6a214b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_info.3
@@ -0,0 +1,52 @@
+.TH HPL_grid_info 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_grid_info \- Retrieve grid information.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_grid_info(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&int *\fR
+\fI\&NPROW\fR,
+\fB\&int *\fR
+\fI\&NPCOL\fR,
+\fB\&int *\fR
+\fI\&MYROW\fR,
+\fB\&int *\fR
+\fI\&MYCOL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_grid_info\fR
+returns  the grid shape and the coordinates in the grid
+of the calling process.  Successful  completion  is  indicated by the
+returned error code  MPI_SUCCESS. Other error codes depend on the MPI
+implementation.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+NPROW   (global output)         int *
+On exit,   NPROW  specifies the number of process rows in the
+grid. NPROW is at least one.
+.TP 8
+NPCOL   (global output)         int *
+On exit,   NPCOL  specifies  the number of process columns in
+the grid. NPCOL is at least one.
+.TP 8
+MYROW   (global output)         int *
+On exit,  MYROW  specifies my  row process  coordinate in the
+grid. MYROW is greater than or equal  to zero  and  less than
+NPROW.
+.TP 8
+MYCOL   (global output)         int *
+On exit,  MYCOL specifies my column process coordinate in the
+grid. MYCOL is greater than or equal  to zero  and  less than
+NPCOL.
+.SH SEE ALSO
+.BR HPL_pnum \ (3),
+.BR HPL_grid_init \ (3),
+.BR HPL_grid_exit \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_init.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_init.3
new file mode 100644
index 000000000..7792a522d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_init.3
@@ -0,0 +1,55 @@
+.TH HPL_grid_init 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_grid_init \- Create a process grid.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_grid_init(\fR
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR,
+\fB\&const HPL_T_ORDER\fR
+\fI\&ORDER\fR,
+\fB\&const int\fR
+\fI\&NPROW\fR,
+\fB\&const int\fR
+\fI\&NPCOL\fR,
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_grid_init\fR
+creates a NPROW x NPCOL  process  grid using column- or
+row-major ordering from an initial collection of processes identified
+by an  MPI  communicator.  Successful  completion is indicated by the
+returned error code MPI_SUCCESS.  Other error codes depend on the MPI
+implementation. The coordinates of processes that are not part of the
+grid are set to values outside of [0..NPROW) x [0..NPCOL).
+.SH ARGUMENTS
+.TP 8
+COMM    (global/local input)    MPI_Comm
+On entry,  COMM  is  the  MPI  communicator  identifying  the
+initial  collection  of  processes out of which  the  grid is
+formed.
+.TP 8
+ORDER   (global input)          const HPL_T_ORDER
+On entry, ORDER specifies how the processes should be ordered
+in the grid as follows:
+   ORDER = HPL_ROW_MAJOR    row-major    ordering;
+   ORDER = HPL_COLUMN_MAJOR column-major ordering;
+.TP 8
+NPROW   (global input)          const int
+On entry,  NPROW  specifies the number of process rows in the
+grid to be created. NPROW must be at least one.
+.TP 8
+NPCOL   (global input)          const int
+On entry,  NPCOL  specifies  the number of process columns in
+the grid to be created. NPCOL must be at least one.
+.TP 8
+GRID    (local input/output)    HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information to be initialized.
+.SH SEE ALSO
+.BR HPL_pnum \ (3),
+.BR HPL_grid_info \ (3),
+.BR HPL_grid_exit \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_idamax.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_idamax.3
new file mode 100644
index 000000000..c00292a02
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_idamax.3
@@ -0,0 +1,59 @@
+.TH HPL_idamax 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_idamax \- 1st k s.t. |x_k| = max_i(|x_i|).
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_idamax(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const double *\fR
+\fI\&X\fR,
+\fB\&const int\fR
+\fI\&INCX\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_idamax\fR
+returns  the index in an n-vector  x  of the first element
+having maximum absolute value.
+.SH ARGUMENTS
+.TP 8
+N       (local input)           const int
+On entry, N specifies the length of the vector x. N  must  be
+at least zero.
+.TP 8
+X       (local input)           const double *
+On entry,  X  is an incremented array of dimension  at  least
+( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+.TP 8
+INCX    (local input)           const int
+On entry, INCX specifies the increment for the elements of X.
+INCX must not be zero.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   double x[3];
+.br
+   int    imax;
+.br
+   x[0] = 1.0; x[1] = 3.0; x[2] = 2.0;
+.br
+   imax = HPL_idamax( 3, x, 1 );
+.br
+   printf("imax=%d\en", imax);
+.br
+   exit(0);
+.br
+   return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_daxpy \ (3),
+.BR HPL_dcopy \ (3),
+.BR HPL_dscal \ (3),
+.BR HPL_dswap \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2l.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2l.3
new file mode 100644
index 000000000..32c4d9e07
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2l.3
@@ -0,0 +1,53 @@
+.TH HPL_indxg2l 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_indxg2l \- Map a global index into a local one.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_indxg2l(\fR
+\fB\&const int\fR
+\fI\&IG\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_indxg2l\fR
+computes  the local index of a matrix entry pointed to by
+the  global index IG.  This  local  returned index is the same in all
+processes.
+.SH ARGUMENTS
+.TP 8
+IG      (input)                 const int
+On entry, IG specifies the global index of the matrix  entry.
+IG must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix. NB must be larger than one.
+.TP 8
+SRCPROC (input)                 const int
+On entry, if SRCPROC = -1, the data  is not  distributed  but
+replicated,  in  which  case  this  routine returns IG in all
+processes. Otherwise, the value of SRCPROC is ignored.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2lp \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2lp.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2lp.3
new file mode 100644
index 000000000..ca2004031
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2lp.3
@@ -0,0 +1,66 @@
+.TH HPL_indxg2lp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_indxg2lp \- Map a local index into a global one.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_indxg2lp(\fR
+\fB\&int *\fR
+\fI\&IL\fR,
+\fB\&int *\fR
+\fI\&PROC\fR,
+\fB\&const int\fR
+\fI\&IG\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_indxg2lp\fR
+computes the local index of a matrix entry pointed to by
+the global  index IG as well as the process coordinate which posseses
+this entry. The local returned index is the same in all processes.
+.SH ARGUMENTS
+.TP 8
+IL      (output)                int *
+On exit, IL specifies the local index corresponding to IG. IL
+is at least zero.
+.TP 8
+PROC    (output)                int *
+On exit,  PROC  is the  coordinate of the process  owning the
+entry specified by the global index IG. PROC is at least zero
+and less than NPROCS.
+.TP 8
+IG      (input)                 const int
+On entry, IG specifies the global index of the matrix  entry.
+IG must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+SRCPROC (input)                 const int
+On entry, if SRCPROC = -1, the data  is not  distributed  but
+replicated,  in  which  case  this  routine returns IG in all
+processes. Otherwise, the value of SRCPROC is ignored.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2p.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2p.3
new file mode 100644
index 000000000..5e0273feb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2p.3
@@ -0,0 +1,52 @@
+.TH HPL_indxg2p 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_indxg2p \- Map a global index into a process coordinate.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_indxg2p(\fR
+\fB\&const int\fR
+\fI\&IG\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_indxg2p\fR
+computes the process coordinate  which posseses the entry
+of a matrix specified by a global index IG.
+.SH ARGUMENTS
+.TP 8
+IG      (input)                 const int
+On entry, IG specifies the global index of the matrix  entry.
+IG must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+SRCPROC (input)                 const int
+On entry,  SRCPROC  specifies  the coordinate of the  process
+that possesses the first row or column of the matrix. SRCPROC
+must be at least zero and strictly less than NPROCS.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxl2g.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxl2g.3
new file mode 100644
index 000000000..ba6da53a7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxl2g.3
@@ -0,0 +1,59 @@
+.TH HPL_indxl2g 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_indxl2g \- Map a index-process pair into a global index.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_indxl2g(\fR
+\fB\&const int\fR
+\fI\&IL\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&PROC\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_indxl2g\fR
+computes the global index of a matrix  entry  pointed to
+by the local index IL of the process indicated by PROC.
+.SH ARGUMENTS
+.TP 8
+IL      (input)                 const int
+On entry, IL specifies the local  index of the matrix  entry.
+IL must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+PROC    (input)                 const int
+On entry, PROC  specifies the coordinate of the process whose
+local array row or column is to be determined. PROC  must  be
+at least zero and strictly less than NPROCS.
+.TP 8
+SRCPROC (input)                 const int
+On entry,  SRCPROC  specifies  the coordinate of the  process
+that possesses the first row or column of the matrix. SRCPROC
+must be at least zero and strictly less than NPROCS.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2lp \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_infog2l.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_infog2l.3
new file mode 100644
index 000000000..c07f276d5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_infog2l.3
@@ -0,0 +1,126 @@
+.TH HPL_infog2l 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_infog2l \- global to local index translation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_infog2l(\fR
+\fB\&int\fR
+\fI\&I\fR,
+\fB\&int\fR
+\fI\&J\fR,
+\fB\&const int\fR
+\fI\&IMB\fR,
+\fB\&const int\fR
+\fI\&MB\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&RSRC\fR,
+\fB\&const int\fR
+\fI\&CSRC\fR,
+\fB\&const int\fR
+\fI\&MYROW\fR,
+\fB\&const int\fR
+\fI\&MYCOL\fR,
+\fB\&const int\fR
+\fI\&NPROW\fR,
+\fB\&const int\fR
+\fI\&NPCOL\fR,
+\fB\&int *\fR
+\fI\&II\fR,
+\fB\&int *\fR
+\fI\&JJ\fR,
+\fB\&int *\fR
+\fI\&PROW\fR,
+\fB\&int *\fR
+\fI\&PCOL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_infog2l\fR
+computes the starting local index II, JJ corresponding to
+the submatrix starting globally at the entry pointed by  I,  J.  This
+routine returns the coordinates in the grid of the process owning the
+matrix entry of global indexes I, J, namely PROW and PCOL.
+.SH ARGUMENTS
+.TP 8
+I       (global input)          int
+On entry,  I  specifies  the  global  row index of the matrix
+entry. I must be at least zero.
+.TP 8
+J       (global input)          int
+On entry,  J  specifies the global column index of the matrix
+entry. J must be at least zero.
+.TP 8
+IMB     (global input)          const int
+On entry,  IMB  specifies  the size of the first row block of
+the global matrix. IMB must be at least one.
+.TP 8
+MB      (global input)          const int
+On entry,  MB specifies the blocking factor used to partition
+and  distribute the rows of the matrix A.  MB  must be larger
+than one.
+.TP 8
+INB     (global input)          const int
+On entry, INB specifies the size of the first column block of
+the global matrix. INB must be at least one.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the columns of the matrix A. NB must be larger
+than one.
+.TP 8
+RSRC    (global input)          const int
+On entry,  RSRC  specifies  the row coordinate of the process
+that possesses the row  I.  RSRC  must  be at least zero  and
+strictly less than NPROW.
+.TP 8
+CSRC    (global input)          const int
+On entry, CSRC specifies the column coordinate of the process
+that possesses the column J. CSRC  must be at least zero  and
+strictly less than NPCOL.
+.TP 8
+MYROW   (local input)           const int
+On entry, MYROW  specifies my  row process  coordinate in the
+grid. MYROW is greater than or equal  to zero  and  less than
+NPROW.
+.TP 8
+MYCOL   (local input)           const int
+On entry, MYCOL specifies my column process coordinate in the
+grid. MYCOL is greater than or equal  to zero  and  less than
+NPCOL.
+.TP 8
+NPROW   (global input)          const int
+On entry,  NPROW  specifies the number of process rows in the
+grid. NPROW is at least one.
+.TP 8
+NPCOL   (global input)          const int
+On entry,  NPCOL  specifies  the number of process columns in
+the grid. NPCOL is at least one.
+.TP 8
+II      (local output)          int *
+On exit, II  specifies the  local  starting  row index of the
+submatrix. On exit, II is at least 0.
+.TP 8
+JJ      (local output)          int *
+On exit, JJ  specifies the local starting column index of the
+submatrix. On exit, JJ is at least 0.
+.TP 8
+PROW    (global output)         int *
+On exit, PROW is the row coordinate of the process owning the
+entry specified by the global index I.  PROW is at least zero
+and less than NPROW.
+.TP 8
+PCOL    (global output)         int *
+On exit, PCOL  is the column coordinate of the process owning
+the entry specified by the global index J.  PCOL  is at least
+zero and less than NPCOL.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_jumpit.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_jumpit.3
new file mode 100644
index 000000000..66e77ac32
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_jumpit.3
@@ -0,0 +1,48 @@
+.TH HPL_jumpit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_jumpit \- jump into the random sequence.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_jumpit(\fR
+\fB\&int *\fR
+\fI\&MULT\fR,
+\fB\&int *\fR
+\fI\&IADD\fR,
+\fB\&int *\fR
+\fI\&IRANN\fR,
+\fB\&int *\fR
+\fI\&IRANM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_jumpit\fR
+jumps in the random sequence from the number  X(n) encoded
+in IRANN to the number  X(m)  encoded in  IRANM using the constants A
+and C encoded in MULT and IADD: X(m) = A * X(n) + C.  The constants A
+and C obviously depend on m and n,  see  the function  HPL_xjumpm  in
+order to initialize them.
+.SH ARGUMENTS
+.TP 8
+MULT    (local input)           int *
+On entry, MULT is an array of dimension 2, that contains the
+16-lower and 15-higher bits of the constant A.
+.TP 8
+IADD    (local input)           int *
+On entry, IADD is an array of dimension 2, that contains the
+16-lower and 15-higher bits of the constant C.
+.TP 8
+IRANN   (local input)           int *
+On entry,  IRANN  is an array of dimension 2,  that contains 
+the 16-lower and 15-higher bits of the encoding of X(n).
+.TP 8
+IRANM   (local output)          int *
+On entry,  IRANM  is an array of dimension 2.  On exit, this
+array contains respectively the 16-lower and  15-higher bits
+of the encoding of X(m).
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ladd.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ladd.3
new file mode 100644
index 000000000..9fd6805d3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ladd.3
@@ -0,0 +1,41 @@
+.TH HPL_ladd 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_ladd \- Adds two long positive integers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_ladd(\fR
+\fB\&int *\fR
+\fI\&J\fR,
+\fB\&int *\fR
+\fI\&K\fR,
+\fB\&int *\fR
+\fI\&I\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_ladd\fR
+adds  without carry two long positive integers  K and J  and
+puts the result into I. The long integers  I, J, K are encoded on 64
+bits using an array of 2 integers.  The 32-lower bits  are stored in
+the  first  entry  of each array,  the 32-higher bits  in the second
+entry.
+.SH ARGUMENTS
+.TP 8
+J       (local input)           int *
+On entry, J is an integer array of dimension 2 containing the
+encoded long integer J.
+.TP 8
+K       (local input)           int *
+On entry, K is an integer array of dimension 2 containing the
+encoded long integer K.
+.TP 8
+I       (local output)          int *
+On entry, I is an integer array of dimension 2. On exit, this
+array contains the encoded long integer result.
+.SH SEE ALSO
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_lmul.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_lmul.3
new file mode 100644
index 000000000..8be7380e0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_lmul.3
@@ -0,0 +1,42 @@
+.TH HPL_lmul 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_lmul \- multiplies 2 long positive integers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_lmul(\fR
+\fB\&int *\fR
+\fI\&K\fR,
+\fB\&int *\fR
+\fI\&J\fR,
+\fB\&int *\fR
+\fI\&I\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_lmul\fR
+multiplies  without carry two long positive integers K and J
+and puts the result into I. The long integers  I, J, K are encoded on
+64 bits using an array of 2 integers. The 32-lower bits are stored in
+the first entry of each array, the 32-higher bits in the second entry
+of each array. For efficiency purposes, the  intrisic modulo function
+is inlined.
+.SH ARGUMENTS
+.TP 8
+K       (local input)           int *
+On entry, K is an integer array of dimension 2 containing the
+encoded long integer K.
+.TP 8
+J       (local input)           int *
+On entry, J is an integer array of dimension 2 containing the
+encoded long integer J.
+.TP 8
+I       (local output)          int *
+On entry, I is an integer array of dimension 2. On exit, this
+array contains the encoded long integer result.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_logsort.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_logsort.3
new file mode 100644
index 000000000..e7e80062a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_logsort.3
@@ -0,0 +1,65 @@
+.TH HPL_logsort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_logsort \- Sort the processes in logarithmic order.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_logsort(\fR
+\fB\&const int\fR
+\fI\&NPROCS\fR,
+\fB\&const int\fR
+\fI\&ICURROC\fR,
+\fB\&int *\fR
+\fI\&IPLEN\fR,
+\fB\&int *\fR
+\fI\&IPMAP\fR,
+\fB\&int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_logsort\fR
+computes an array  IPMAP  and  its inverse  IPMAPM1  that
+contain  the logarithmic sorted processes id with repect to the local
+number of rows of  U  that they own. This is necessary to ensure that
+the logarithmic spreading of U is optimal in terms of number of steps
+and communication volume as well.  In other words,  the larget pieces
+of U will be sent a minimal number of times.
+.SH ARGUMENTS
+.TP 8
+NPROCS  (global input)          const int
+On entry, NPROCS  specifies the number of process rows in the
+process grid. NPROCS is at least one.
+.TP 8
+ICURROC (global input)          const int
+On entry, ICURROC is the source process row.
+.TP 8
+IPLEN   (global input/output)   int *
+On entry, IPLEN is an array of dimension NPROCS+1,  such that
+IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+rows of U  in the processes before process IPMAP[i] after the
+sort,  with  the convention that  IPLEN[NPROCS] is  the total
+number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+IPLEN[i] is  the  number of rows of A that should be moved to
+the process IPMAP[i].  IPLEN  is such that the number of rows
+of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+remaining  entries  of  this  array  are  sorted  so that the
+quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+.TP 8
+IPMAP   (global output)         int *
+On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+array contains  the logarithmic mapping of the processes.  In
+other words, IPMAP[myroc] is the corresponding sorted process
+coordinate.
+.TP 8
+IPMAPM1 (global output)         int *
+On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+this  array  contains  the inverse of the logarithmic mapping
+contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+[0.. NPROCS)
+.SH SEE ALSO
+.BR HPL_plindx1 \ (3),
+.BR HPL_plindx10 \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_max.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_max.3
new file mode 100644
index 000000000..16d8aecc6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_max.3
@@ -0,0 +1,43 @@
+.TH HPL_max 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_max \- Combine (max) two buffers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_max(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const void *\fR
+\fI\&IN\fR,
+\fB\&void *\fR
+\fI\&INOUT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_max\fR
+combines (max) two buffers.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies  the  length  of  the  buffers  to  be
+combined. N must be at least zero.
+.TP 8
+IN      (input)                 const void *
+On entry, IN points to the input-only buffer to be combined.
+.TP 8
+INOUT   (input/output)          void *
+On entry, INOUT  points  to  the  input-output  buffer  to be
+combined.  On exit,  the  entries of this array contains  the
+combined results.
+.TP 8
+DTYPE   (input)                 const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_min.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_min.3
new file mode 100644
index 000000000..a816d61b7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_min.3
@@ -0,0 +1,43 @@
+.TH HPL_min 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_min \- Combine (min) two buffers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_min(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const void *\fR
+\fI\&IN\fR,
+\fB\&void *\fR
+\fI\&INOUT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_min\fR
+combines (min) two buffers.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies  the  length  of  the  buffers  to  be
+combined. N must be at least zero.
+.TP 8
+IN      (input)                 const void *
+On entry, IN points to the input-only buffer to be combined.
+.TP 8
+INOUT   (input/output)          void *
+On entry, INOUT  points  to  the  input-output  buffer  to be
+combined.  On exit,  the  entries of this array contains  the
+combined results.
+.TP 8
+DTYPE   (input)                 const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_numroc.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_numroc.3
new file mode 100644
index 000000000..34c8acfa9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_numroc.3
@@ -0,0 +1,60 @@
+.TH HPL_numroc 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_numroc \- Compute the local number of row/columns.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_numroc(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&PROC\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_numroc\fR
+returns  the  local number of matrix rows/columns process
+PROC  will  get  if  we give out  N rows/columns starting from global
+index 0.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies the number of rows/columns being dealt
+out. N must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of the
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+PROC    (input)                 const int
+On entry, PROC specifies  the coordinate of the process whose
+local portion is determined.  PROC must be at least zero  and
+strictly less than NPROCS.
+.TP 8
+SRCPROC (input)                 const int
+On entry,  SRCPROC  specifies  the coordinate of the  process
+that possesses the first row or column of the matrix. SRCPROC
+must be at least zero and strictly less than NPROCS.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process rows
+or columns over which the matrix is distributed.  NPROCS must
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2lp \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numrocI \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_numrocI.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_numrocI.3
new file mode 100644
index 000000000..1891f1ac9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_numrocI.3
@@ -0,0 +1,66 @@
+.TH HPL_numrocI 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_numrocI \- Compute the local number of row/columns.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_numrocI(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&I\fR,
+\fB\&const int\fR
+\fI\&INB\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const int\fR
+\fI\&PROC\fR,
+\fB\&const int\fR
+\fI\&SRCPROC\fR,
+\fB\&const int\fR
+\fI\&NPROCS\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_numrocI\fR
+returns  the  local number of matrix rows/columns process
+PROC  will  get  if  we give out  N rows/columns starting from global
+index I.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies the number of rows/columns being dealt
+out. N must be at least zero.
+.TP 8
+I       (input)                 const int
+On entry, I  specifies the global index of the matrix  entry
+I must be at least zero.
+.TP 8
+INB     (input)                 const int
+On entry,  INB  specifies  the size of the first block of th
+global matrix. INB must be at least one.
+.TP 8
+NB      (input)                 const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+PROC    (input)                 const int
+On entry, PROC specifies  the coordinate of the process whos
+local portion is determined.  PROC must be at least zero  an
+strictly less than NPROCS.
+.TP 8
+SRCPROC (input)                 const int
+On entry,  SRCPROC  specifies  the coordinate of the  proces
+that possesses the first row or column of the matrix. SRCPRO
+must be at least zero and strictly less than NPROCS.
+.TP 8
+NPROCS  (input)                 const int
+On entry,  NPROCS  specifies the total number of process row
+or columns over which the matrix is distributed.  NPROCS mus
+be at least one.
+.SH SEE ALSO
+.BR HPL_indxg2l \ (3),
+.BR HPL_indxg2lp \ (3),
+.BR HPL_indxg2p \ (3),
+.BR HPL_indxl2g \ (3),
+.BR HPL_numroc \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pabort.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pabort.3
new file mode 100644
index 000000000..044e87210
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pabort.3
@@ -0,0 +1,40 @@
+.TH HPL_pabort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pabort \- halts execution.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pabort(\fR
+\fB\&int\fR
+\fI\&LINE\fR,
+\fB\&const char *\fR
+\fI\&SRNAME\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pabort\fR
+displays an error message on stderr and halts execution.
+.SH ARGUMENTS
+.TP 8
+LINE    (local input)           int
+On entry,  LINE  specifies the line  number in the file where
+the  error  has  occured.  When  LINE  is not a positive line
+number, it is ignored.
+.TP 8
+SRNAME  (local input)           const char *
+On entry, SRNAME  should  be the name of the routine  calling
+this error handler.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH SEE ALSO
+.BR HPL_fprintf \ (3),
+.BR HPL_pwarn \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_packL.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_packL.3
new file mode 100644
index 000000000..c79019c37
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_packL.3
@@ -0,0 +1,42 @@
+.TH HPL_packL 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_packL \- Form the MPI structure for the row ring broadcasts.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_packL(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&INDEX\fR,
+\fB\&const int\fR
+\fI\&LEN\fR,
+\fB\&const int\fR
+\fI\&IBUF\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_packL\fR
+forms  the MPI data type for the panel to be broadcast.
+Successful  completion  is  indicated  by  the  returned  error  code
+MPI_SUCCESS.
+.SH ARGUMENTS
+.TP 8
+PANEL   (input/output)          HPL_T_panel *
+On entry,  PANEL  points to the  current panel data structure
+being broadcast.
+.TP 8
+INDEX   (input)                 const int
+On entry,  INDEX  points  to  the  first entry of the  packed
+buffer being broadcast.
+.TP 8
+LEN     (input)                 const int
+On entry, LEN is the length of the packed buffer.
+.TP 8
+IBUF    (input)                 const int
+On entry, IBUF  specifies the panel buffer/count/type entries
+that should be initialized.
+.SH SEE ALSO
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pddriver.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pddriver.3
new file mode 100644
index 000000000..30e55b62e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pddriver.3
@@ -0,0 +1,15 @@
+.TH main 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+main \- HPL main timing program.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&main();\fR
+.SH DESCRIPTION
+\fB\&main\fR
+is the main driver program for testing the HPL routines.
+This  program is  driven  by  a short data file named  "HPL.dat".
+.SH SEE ALSO
+.BR HPL_pdinfo \ (3),
+.BR HPL_pdtest \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdfact.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdfact.3
new file mode 100644
index 000000000..e3db5fb8b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdfact.3
@@ -0,0 +1,64 @@
+.TH HPL_pdfact 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdfact \- recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdfact(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdfact\fR
+recursively factorizes a  1-dimensional  panel of columns.
+The  RPFACT  function pointer specifies the recursive algorithm to be
+used, either Crout, Left- or Right looking.  NBMIN allows to vary the
+recursive stopping criterium in terms of the number of columns in the
+panel, and  NDIV  allow to specify the number of subpanels each panel
+should be divided into. Usuallly a value of 2 will be chosen. Finally
+PFACT is a function pointer specifying the non-recursive algorithm to
+to be used on at most NBMIN columns. One can also choose here between
+Crout, Left- or Right looking.  Empirical tests seem to indicate that
+values of 4 or 8 for NBMIN give the best results.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesv.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesv.3
new file mode 100644
index 000000000..ab4b62c4e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesv.3
@@ -0,0 +1,40 @@
+.TH HPL_pdgesv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdgesv \- Solve A x = b.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdgesv(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdgesv\fR
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with  or  without look-ahead.  The  lower  triangular  factor is left
+unpivoted and the pivots are not returned. The right hand side is the
+N+1 column of the coefficient matrix.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.SH SEE ALSO
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdtrsv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesv0.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesv0.3
new file mode 100644
index 000000000..180f191f2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesv0.3
@@ -0,0 +1,47 @@
+.TH HPL_pdgesv0 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdgesv0 \- Factor an N x N+1 matrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdgesv0(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdgesv0\fR
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+without look-ahead. The lower triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdfact \ (3),
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pdupdateTT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesvK1.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesvK1.3
new file mode 100644
index 000000000..64cee67ed
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesvK1.3
@@ -0,0 +1,46 @@
+.TH HPL_pdgesvK1 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdgesvK1 \- Factor an N x N+1 matrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdgesvK1(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdgesvK1\fR
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with look-ahead.  The  lower  triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdfact \ (3),
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pdupdateTT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesvK2.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesvK2.3
new file mode 100644
index 000000000..9f389b9dd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesvK2.3
@@ -0,0 +1,47 @@
+.TH HPL_pdgesvK2 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdgesvK2 \- Factor an N x N+1 matrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdgesvK2(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdgesvK2\fR
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with look-ahead.  The  lower  triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdfact \ (3),
+.BR HPL_binit \ (3),
+.BR HPL_bcast \ (3),
+.BR HPL_bwait \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pdupdateTT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdinfo.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdinfo.3
new file mode 100644
index 000000000..eed541159
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdinfo.3
@@ -0,0 +1,212 @@
+.TH HPL_pdinfo 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdinfo \- Read input parameter file.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdinfo(\fR
+\fB\&HPL_T_test *\fR
+\fI\&TEST\fR,
+\fB\&int *\fR
+\fI\&NS\fR,
+\fB\&int *\fR
+\fI\&N\fR,
+\fB\&int *\fR
+\fI\&NBS\fR,
+\fB\&int *\fR
+\fI\&NB\fR,
+\fB\&HPL_T_ORDER *\fR
+\fI\&PMAPPIN\fR,
+\fB\&int *\fR
+\fI\&NPQS\fR,
+\fB\&int *\fR
+\fI\&P\fR,
+\fB\&int *\fR
+\fI\&Q\fR,
+\fB\&int *\fR
+\fI\&NPFS\fR,
+\fB\&HPL_T_FACT *\fR
+\fI\&PF\fR,
+\fB\&int *\fR
+\fI\&NBMS\fR,
+\fB\&int *\fR
+\fI\&NBM\fR,
+\fB\&int *\fR
+\fI\&NDVS\fR,
+\fB\&int *\fR
+\fI\&NDV\fR,
+\fB\&int *\fR
+\fI\&NRFS\fR,
+\fB\&HPL_T_FACT *\fR
+\fI\&RF\fR,
+\fB\&int *\fR
+\fI\&NTPS\fR,
+\fB\&HPL_T_TOP *\fR
+\fI\&TP\fR,
+\fB\&int *\fR
+\fI\&NDHS\fR,
+\fB\&int *\fR
+\fI\&DH\fR,
+\fB\&HPL_T_SWAP *\fR
+\fI\&FSWAP\fR,
+\fB\&int *\fR
+\fI\&TSWAP\fR,
+\fB\&int *\fR
+\fI\&L1NOTRAN\fR,
+\fB\&int *\fR
+\fI\&UNOTRAN\fR,
+\fB\&int *\fR
+\fI\&EQUIL\fR,
+\fB\&int *\fR
+\fI\&ALIGN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdinfo\fR
+reads  the  startup  information for the various tests and
+transmits it to all processes.
+.SH ARGUMENTS
+.TP 8
+TEST    (global output)         HPL_T_test *
+On entry, TEST  points to a testing data structure.  On exit,
+the fields of this data structure are initialized as follows:
+TEST->outfp  specifies the output file where the results will
+be printed.  It is only defined and used by  the process 0 of
+the grid.  TEST->thrsh specifies the threshhold value for the
+test ratio.  TEST->epsil is the relative machine precision of
+the distributed computer.  Finally  the test counters, kfail,
+kpass, kskip, ktest are initialized to zero.
+.TP 8
+NS      (global output)         int *
+On exit,  NS  specifies the number of different problem sizes
+to be tested. NS is less than or equal to HPL_MAX_PARAM.
+.TP 8
+N       (global output)         int *
+On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+the first NS entries of this array contain the  problem sizes
+to run the code with.
+.TP 8
+NBS     (global output)         int *
+On exit,  NBS  specifies the number of different distribution
+blocking factors to be tested. NBS must be less than or equal
+to HPL_MAX_PARAM.
+.TP 8
+NB      (global output)         int *
+On exit,  PMAPPIN  specifies the process mapping onto the no-
+des of the  MPI machine configuration.  PMAPPIN  defaults  to
+row-major ordering.
+.TP 8
+PMAPPIN (global output)         HPL_T_ORDER *
+On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+the first NBS entries of this array contain the values of the
+various distribution blocking factors, to run the code with.
+.TP 8
+NPQS    (global output)         int *
+On exit, NPQS  specifies the  number of different values that
+can be used for P and Q, i.e., the number of process grids to
+run  the  code with.  NPQS must be  less  than  or  equal  to
+HPL_MAX_PARAM.
+.TP 8
+P       (global output)         int *
+On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+the first NPQS entries of this array contain the values of P,
+the number of process rows of the  NPQS grids to run the code
+with.
+.TP 8
+Q       (global output)         int *
+On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+the first NPQS entries of this array contain the values of Q,
+the number of process columns of the  NPQS  grids to  run the
+code with.
+.TP 8
+NPFS    (global output)         int *
+On exit, NPFS  specifies the  number of different values that
+can be used for PF : the panel factorization algorithm to run
+the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+.TP 8
+PF      (global output)         HPL_T_FACT *
+On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+the first  NPFS  entries  of this array  contain  the various
+panel factorization algorithms to run the code with.
+.TP 8
+NBMS    (global output)         int *
+On exit,  NBMS  specifies  the  number  of  various recursive
+stopping criteria  to be tested.  NBMS  must be  less than or
+equal to HPL_MAX_PARAM.
+.TP 8
+NBM     (global output)         int *
+On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+exit, the first NBMS entries of this array contain the values
+of the various recursive stopping criteria to be tested.
+.TP 8
+NDVS    (global output)         int *
+On exit,  NDVS  specifies  the number  of various numbers  of
+panels in recursion to be tested.  NDVS is less than or equal
+to HPL_MAX_PARAM.
+.TP 8
+NDV     (global output)         int *
+On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+exit, the first NDVS entries of this array contain the values
+of the various numbers of panels in recursion to be tested.
+.TP 8
+NRFS    (global output)         int *
+On exit, NRFS  specifies the  number of different values that
+can be used for RF : the recursive factorization algorithm to
+be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+.TP 8
+RF      (global output)         HPL_T_FACT *
+On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+the first  NRFS  entries  of  this array contain  the various
+recursive factorization algorithms to run the code with.
+.TP 8
+NTPS    (global output)         int *
+On exit, NTPS  specifies the  number of different values that
+can be used for the  broadcast topologies  to be tested. NTPS
+is less than or equal to HPL_MAX_PARAM.
+.TP 8
+TP      (global output)         HPL_T_TOP *
+On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+the  first NTPS  entries of this  array  contain  the various
+broadcast (along rows) topologies to run the code with.
+.TP 8
+NDHS    (global output)         int *
+On exit, NDHS  specifies the  number of different values that
+can be used for the  lookahead depths to be  tested.  NDHS is
+less than or equal to HPL_MAX_PARAM.
+.TP 8
+DH      (global output)         int *
+On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+exit, the first NDHS entries of this array contain the values
+of lookahead depths to run the code with.  Such a value is at
+least 0 (no-lookahead) or greater than zero.
+.TP 8
+FSWAP   (global output)         HPL_T_SWAP *
+On exit, FSWAP specifies the swapping algorithm to be used in
+all tests.
+.TP 8
+TSWAP   (global output)         int *
+On exit,  TSWAP  specifies the swapping threshold as a number
+of columns when the mixed swapping algorithm was chosen.
+.TP 8
+L1NOTRA (global output)         int *
+On exit, L1NOTRAN specifies whether the upper triangle of the
+panels of columns  should  be stored  in  no-transposed  form
+(L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+.TP 8
+UNOTRAN (global output)         int *
+On exit, UNOTRAN  specifies whether the panels of rows should
+be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+form (UNOTRAN=0) during their broadcast.
+.TP 8
+EQUIL   (global output)         int *
+On exit,  EQUIL  specifies  whether  equilibration during the
+swap-broadcast  of  the  panel of rows  should  be  performed
+(EQUIL=1) or not (EQUIL=0).
+.TP 8
+ALIGN   (global output)         int *
+On exit,  ALIGN  specifies the alignment  of  the dynamically
+allocated buffers in double precision words. ALIGN is greater
+than zero.
+.SH SEE ALSO
+.BR HPL_pddriver \ (3),
+.BR HPL_pdtest \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlamch.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlamch.3
new file mode 100644
index 000000000..7ce46c23e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlamch.3
@@ -0,0 +1,53 @@
+.TH HPL_pdlamch 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlamch \- determines machine-specific arithmetic constants.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_pdlamch(\fR
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR,
+\fB\&const HPL_T_MACH\fR
+\fI\&CMACH\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlamch\fR
+determines  machine-specific  arithmetic  constants  such  as
+the relative machine precision (eps),  the safe minimum(sfmin) such that
+1/sfmin does not overflow, the base of the machine (base), the precision
+(prec),  the  number  of  (base)  digits in the  mantissa  (t),  whether
+rounding occurs in addition (rnd = 1.0 and 0.0 otherwise),  the  minimum
+exponent before  (gradual)  underflow (emin),  the  underflow  threshold
+(rmin)- base**(emin-1), the largest exponent before overflow (emax), the
+overflow threshold (rmax)  - (base**emax)*(1-eps).
+.SH ARGUMENTS
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.TP 8
+CMACH   (global input)          const HPL_T_MACH
+Specifies the value to be returned by HPL_pdlamch            
+   = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+   = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+   = HPL_MACH_BASE,  HPL_pdlamch := base                     
+   = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+   = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+   = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+   = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+   = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+   = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+   = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+ 
+where                                                        
+ 
+   eps   = relative machine precision,                       
+   sfmin = safe minimum,                                     
+   base  = base of the machine,                              
+   prec  = eps*base,                                         
+   t     = number of digits in the mantissa,                 
+   rnd   = 1.0 if rounding occurs in addition,               
+   emin  = minimum exponent before underflow,                
+   rmin  = underflow threshold,                              
+   emax  = largest exponent before overflow,                 
+   rmax  = overflow threshold.
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlange.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlange.3
new file mode 100644
index 000000000..30593401b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlange.3
@@ -0,0 +1,68 @@
+.TH HPL_pdlange 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlange \- Compute ||A||.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_pdlange(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&const HPL_T_NORM\fR
+\fI\&NORM\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&const double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlange\fR
+returns  the value of the one norm,  or the infinity norm,
+or the element of largest absolute value of a distributed matrix A:  
+ 
+ 
+   max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+   norm1(A),        when NORM = HPL_NORM_1,                          
+   normI(A),        when NORM = HPL_NORM_I,                          
+ 
+where norm1 denotes the one norm of a matrix (maximum column sum) and
+normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+that max(abs(A(i,j))) is not a matrix norm.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+NORM    (global input)          const HPL_T_NORM
+On entry,  NORM  specifies  the  value to be returned by this
+function as described above.
+.TP 8
+M       (global input)          const int
+On entry,  M  specifies  the number  of rows of the matrix A.
+M must be at least zero.
+.TP 8
+N       (global input)          const int
+On entry,  N specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix. NB must be larger than one.
+.TP 8
+A       (local input)           const double *
+On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+that contains the local pieces of the distributed matrix A.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,LocP(M)).
+.SH SEE ALSO
+.BR HPL_pdlaprnt \ (3),
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaprnt.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaprnt.3
new file mode 100644
index 000000000..feb010a67
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaprnt.3
@@ -0,0 +1,72 @@
+.TH HPL_pdlaprnt 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaprnt \- Print a distributed matrix A.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaprnt(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int\fR
+\fI\&IAROW\fR,
+\fB\&const int\fR
+\fI\&IACOL\fR,
+\fB\&const char *\fR
+\fI\&CMATNM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaprnt\fR
+prints  to  standard  error a distributed matrix A. The
+local pieces of  A  are sent to the process of coordinates  (0,0)  in
+the grid and then printed.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+M       (global input)          const int
+On entry,  M  specifies the number of rows of the coefficient
+matrix A. M must be at least zero.
+.TP 8
+N       (global input)          const int
+On  entry,   N   specifies  the  number  of  columns  of  the
+coefficient matrix A. N must be at least zero.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix. NB must be larger than one.
+.TP 8
+A       (local input)           double *
+On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+This array contains the coefficient matrix to be printed.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,LocP(M)).
+.TP 8
+IAROW   (global input)          const int
+On entry,  IAROW  specifies the row process coordinate owning
+the  first row of A.  IAROW  must be  larger than or equal to
+zero and less than NPROW.
+.TP 8
+IACOL   (global input)          const int
+On entry,  IACOL  specifies  the  column  process  coordinate
+owning the  first column  of A. IACOL  must be larger than or
+equal to zero and less than NPCOL.
+.TP 8
+CMATNM  (global input)          const char *
+On entry, CMATNM is the name of the matrix to be printed.
+.SH SEE ALSO
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp00N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp00N.3
new file mode 100644
index 000000000..3875400e3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp00N.3
@@ -0,0 +1,65 @@
+.TH HPL_pdlaswp00N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaswp00N \- Broadcast a column panel L and swap the row panel U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaswp00N(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaswp00N\fR
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+Bi-directional  exchange  is used to perform the  swap :: broadcast of
+the row  panel U at once, resulting in a lower number of messages than
+usual as well as a lower communication volume. With P process rows and
+assuming  bi-directional links,  the running time of this function can
+be approximated by:
+ 
+   log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  Mono
+directional links will double this communication cost.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be broadcast and swapped) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to  be swapped and broadcast starting at
+the current position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pipid \ (3),
+.BR HPL_plindx0 \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03N \ (3),
+.BR HPL_dlaswp04N \ (3),
+.BR HPL_dlaswp05N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp00T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp00T.3
new file mode 100644
index 000000000..39901ba4b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp00T.3
@@ -0,0 +1,65 @@
+.TH HPL_pdlaswp00T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaswp00T \- Broadcast a column panel L and swap the row panel U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaswp00T(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaswp00T\fR
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+Bi-directional  exchange  is used to perform the  swap :: broadcast of
+the row  panel U at once, resulting in a lower number of messages than
+usual as well as a lower communication volume. With P process rows and
+assuming  bi-directional links,  the running time of this function can
+be approximated by:
+ 
+   log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  Mono
+directional links will double this communication cost.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be broadcast and swapped) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to  be swapped and broadcast starting at
+the current position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTT \ (3),
+.BR HPL_pipid \ (3),
+.BR HPL_plindx0 \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp02N \ (3),
+.BR HPL_dlaswp03T \ (3),
+.BR HPL_dlaswp04T \ (3),
+.BR HPL_dlaswp05T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp01N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp01N.3
new file mode 100644
index 000000000..1ee14c0a8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp01N.3
@@ -0,0 +1,69 @@
+.TH HPL_pdlaswp01N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaswp01N \- Broadcast a column panel L and swap the row panel U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaswp01N(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaswp01N\fR
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+row panel U at once,  resulting in a minimal communication volume  and
+a "very good"  use of the connectivity if available.  With  P  process
+rows  and  assuming  bi-directional links,  the  running time  of this
+function can be approximated by:
+ 
+   (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  K is
+a constant in (2,3] that depends on the achieved bandwidth  during  a
+simultaneous  message exchange  between two processes.  An  empirical
+optimistic value of K is typically 2.4.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to  be swapped and broadcast starting at
+the current position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdupdateNN \ (3),
+.BR HPL_pdupdateTN \ (3),
+.BR HPL_pipid \ (3),
+.BR HPL_plindx1 \ (3),
+.BR HPL_plindx10 \ (3),
+.BR HPL_spreadN \ (3),
+.BR HPL_equil \ (3),
+.BR HPL_rollN \ (3),
+.BR HPL_dlaswp00N \ (3),
+.BR HPL_dlaswp01N \ (3),
+.BR HPL_dlaswp06N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp01T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp01T.3
new file mode 100644
index 000000000..e5c5de024
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp01T.3
@@ -0,0 +1,69 @@
+.TH HPL_pdlaswp01T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdlaswp01T \- Broadcast a column panel L and swap the row panel U.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdlaswp01T(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdlaswp01T\fR
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+row panel U at once,  resulting in a minimal communication volume  and
+a "very good"  use of the connectivity if available.  With  P  process
+rows  and  assuming  bi-directional links,  the  running time  of this
+function can be approximated by:
+ 
+   (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  K is
+a constant in (2,3] that depends on the achieved bandwidth  during  a
+simultaneous  message exchange  between two processes.  An  empirical
+optimistic value of K is typically 2.4.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to  be swapped and broadcast starting at
+the current position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdupdateNT \ (3),
+.BR HPL_pdupdateTT \ (3),
+.BR HPL_pipid \ (3),
+.BR HPL_plindx1 \ (3),
+.BR HPL_plindx10 \ (3),
+.BR HPL_spreadT \ (3),
+.BR HPL_equil \ (3),
+.BR HPL_rollT \ (3),
+.BR HPL_dlaswp10N \ (3),
+.BR HPL_dlaswp01T \ (3),
+.BR HPL_dlaswp06T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdmatgen.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdmatgen.3
new file mode 100644
index 000000000..5b4675c6e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdmatgen.3
@@ -0,0 +1,67 @@
+.TH HPL_pdmatgen 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdmatgen \- Parallel random matrix generator.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdmatgen(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&NB\fR,
+\fB\&double *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&LDA\fR,
+\fB\&const int\fR
+\fI\&ISEED\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdmatgen\fR
+generates (or regenerates) a parallel random matrix A.
+ 
+The  pseudo-random  generator uses the linear congruential algorithm:
+X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+Programming, Knuth 1973, Vol. 2.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+M       (global input)          const int
+On entry,  M  specifies  the number  of rows of the matrix A.
+M must be at least zero.
+.TP 8
+N       (global input)          const int
+On entry,  N specifies the number of columns of the matrix A.
+N must be at least zero.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.TP 8
+A       (local output)          double *
+On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+On exit, this array contains the coefficients of the randomly
+generated matrix.
+.TP 8
+LDA     (local input)           const int
+On entry, LDA specifies the leading dimension of the array A.
+LDA must be at least max(1,LocP(M)).
+.TP 8
+ISEED   (global input)          const int
+On entry, ISEED  specifies  the  seed  number to generate the
+matrix A. ISEED must be at least zero.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_drand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdmxswp.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdmxswp.3
new file mode 100644
index 000000000..41c604373
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdmxswp.3
@@ -0,0 +1,78 @@
+.TH HPL_pdmxswp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdmxswp \- swaps and broacast the pivot row.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdmxswp(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&II\fR,
+\fB\&const int\fR
+\fI\&JJ\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdmxswp\fR
+swaps  and  broadcasts  the  absolute value max row using
+bi-directional exchange.  The buffer is partially set by HPL_dlocmax.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by
+ 
+   log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth )
+ 
+where  lat and bdwth are the latency and bandwidth of the network for
+double precision real elements.  Communication  only  occurs  in  one
+process  column. Mono-directional links  will cause the communication
+cost to double.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of the matrix
+column on which this function operates.
+.TP 8
+II      (local input)           const int
+On entry, II  specifies the row offset where the column to be
+operated on starts with respect to the panel.
+.TP 8
+JJ      (local input)           const int
+On entry, JJ  specifies the column offset where the column to
+be operated on starts with respect to the panel.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+It  is assumed that  HPL_dlocmax  was called  prior  to  this
+routine to  initialize  the first four entries of this array.
+On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+Note that this is also the  JJth  row  (or column) of L1. The
+remaining part is used as a temporary array.
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpancrN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpancrN.3
new file mode 100644
index 000000000..2e94a36a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpancrN.3
@@ -0,0 +1,82 @@
+.TH HPL_pdpancrN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpancrN \- Crout panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpancrN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpancrN\fR
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel  A using the Crout variant of the  usual
+one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+of the panel is stored in no-transpose form (i.e. just like the input
+matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and gam2-3 is  an  estimate  of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpancrT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpancrT.3
new file mode 100644
index 000000000..035e60d60
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpancrT.3
@@ -0,0 +1,81 @@
+.TH HPL_pdpancrT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpancrT \- Crout panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpancrT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpancrT\fR
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel  A using the Crout variant of the  usual
+one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is an  estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_disp.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_disp.3
new file mode 100644
index 000000000..94a212ced
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_disp.3
@@ -0,0 +1,24 @@
+.TH HPL_pdpanel_disp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanel_disp \- Deallocate a panel data structure.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_pdpanel_disp(\fR
+\fB\&HPL_T_panel * *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanel_disp\fR
+deallocates  the  panel  structure  and  resources  and
+stores the error code returned by the panel factorization.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel * *
+On entry,  PANEL  points  to  the  address  of the panel data
+structure to be deallocated.
+.SH SEE ALSO
+.BR HPL_pdpanel_new \ (3),
+.BR HPL_pdpanel_init \ (3),
+.BR HPL_pdpanel_free \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_free.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_free.3
new file mode 100644
index 000000000..cfad40c3d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_free.3
@@ -0,0 +1,24 @@
+.TH HPL_pdpanel_free 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanel_free \- Deallocate the panel ressources.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_pdpanel_free(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanel_free\fR
+deallocates  the panel resources  and  stores the error
+code returned by the panel factorization.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points  to  the  panel data  structure from
+which the resources should be deallocated.
+.SH SEE ALSO
+.BR HPL_pdpanel_new \ (3),
+.BR HPL_pdpanel_init \ (3),
+.BR HPL_pdpanel_disp \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_init.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_init.3
new file mode 100644
index 000000000..cbb0e7e3a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_init.3
@@ -0,0 +1,76 @@
+.TH HPL_pdpanel_init 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanel_init \- Initialize the panel resources.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanel_init(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&JB\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&IA\fR,
+\fB\&const int\fR
+\fI\&JA\fR,
+\fB\&const int\fR
+\fI\&TAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanel_init\fR
+initializes a panel data structure.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+M       (local input)           const int
+On entry, M specifies the global number of rows of the panel.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the  global number of columns of the
+panel and trailing submatrix. N must be at least zero.
+.TP 8
+JB      (global input)          const int
+On entry, JB specifies is the number of columns of the panel.
+JB must be at least zero.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.TP 8
+IA      (global input)          const int
+On entry,  IA  is  the global row index identifying the panel
+and trailing submatrix. IA must be at least zero.
+.TP 8
+JA      (global input)          const int
+On entry, JA is the global column index identifying the panel
+and trailing submatrix. JA must be at least zero.
+.TP 8
+TAG     (global input)          const int
+On entry, TAG is the row broadcast message id.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.SH SEE ALSO
+.BR HPL_pdpanel_new \ (3),
+.BR HPL_pdpanel_disp \ (3),
+.BR HPL_pdpanel_free \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_new.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_new.3
new file mode 100644
index 000000000..ed9fe1053
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_new.3
@@ -0,0 +1,76 @@
+.TH HPL_pdpanel_new 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanel_new \- Create a panel data structure.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanel_new(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&JB\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&A\fR,
+\fB\&const int\fR
+\fI\&IA\fR,
+\fB\&const int\fR
+\fI\&JA\fR,
+\fB\&const int\fR
+\fI\&TAG\fR,
+\fB\&HPL_T_panel * *\fR
+\fI\&PANEL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanel_new\fR
+creates and initializes a panel data structure.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters.
+.TP 8
+M       (local input)           const int
+On entry, M specifies the global number of rows of the panel.
+M must be at least zero.
+.TP 8
+N       (local input)           const int
+On entry,  N  specifies  the  global number of columns of the
+panel and trailing submatrix. N must be at least zero.
+.TP 8
+JB      (global input)          const int
+On entry, JB specifies is the number of columns of the panel.
+JB must be at least zero.
+.TP 8
+A       (local input/output)    HPL_T_pmat *
+On entry, A points to the data structure containing the local
+array information.
+.TP 8
+IA      (global input)          const int
+On entry,  IA  is  the global row index identifying the panel
+and trailing submatrix. IA must be at least zero.
+.TP 8
+JA      (global input)          const int
+On entry, JA is the global column index identifying the panel
+and trailing submatrix. JA must be at least zero.
+.TP 8
+TAG     (global input)          const int
+On entry, TAG is the row broadcast message id.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel * *
+On entry,  PANEL  points  to  the  address  of the panel data
+structure to create and initialize.
+.SH SEE ALSO
+.BR HPL_pdpanel_new \ (3),
+.BR HPL_pdpanel_init \ (3),
+.BR HPL_pdpanel_disp \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanllN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanllN.3
new file mode 100644
index 000000000..eca1f4a34
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanllN.3
@@ -0,0 +1,82 @@
+.TH HPL_pdpanllN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanllN \- Left-looking panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanllN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanllN\fR
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel A  using the Left-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in no-transpose form (i.e. just like the
+input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanllT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanllT.3
new file mode 100644
index 000000000..a18d52c61
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanllT.3
@@ -0,0 +1,81 @@
+.TH HPL_pdpanllT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanllT \- Left-looking panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanllT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanllT\fR
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel A  using the Left-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanrlN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanrlN.3
new file mode 100644
index 000000000..cae2b5b5b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanrlN.3
@@ -0,0 +1,82 @@
+.TH HPL_pdpanrlN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanrlN \- Right-looking panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanrlN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanrlN\fR
+factorizes  a panel of columns  that is a sub-array of a
+larger one-dimensional panel A using the Right-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in no-transpose form (i.e. just like the
+input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlT \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanrlT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanrlT.3
new file mode 100644
index 000000000..434444bf7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanrlT.3
@@ -0,0 +1,81 @@
+.TH HPL_pdpanrlT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdpanrlT \- Right-looking panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdpanrlT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdpanrlT\fR
+factorizes  a panel of columns  that is a sub-array of a
+larger one-dimensional panel A using the Right-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpancrN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpancrN.3
new file mode 100644
index 000000000..fc6dd25f8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpancrN.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpancrN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpancrN \- Crout recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpancrN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpancrN\fR
+HPL_pdrpancrN recursively  factorizes  a panel of columns  using  the
+recursive  Crout  variant of the usual one-dimensional algorithm. The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpancrT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpancrT.3
new file mode 100644
index 000000000..ea0a57bc9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpancrT.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpancrT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpancrT \- Crout recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpancrT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpancrT\fR
+recursively  factorizes  a panel  of columns using  the
+recursive  Crout  variant  of  the  usual one-dimensional  algorithm.
+The lower triangular N0-by-N0  upper block of the panel  is stored in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanllN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanllN.3
new file mode 100644
index 000000000..29b6db40a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanllN.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpanllN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpanllN \- Left-looking recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpanllN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpanllN\fR
+recursively  factorizes  a panel  of columns using  the
+recursive Left-looking variant of the one-dimensional algorithm.  The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanllT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanllT.3
new file mode 100644
index 000000000..18db5c1fb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanllT.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpanllT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpanllT \- Left-looking recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpanllT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpanllT\fR
+recursively  factorizes  a panel of columns  using  the
+recursive Left-looking variant of the one-dimensional algorithm.  The
+lower  triangular  N0-by-N0  upper block  of  the panel  is stored in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanrlN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanrlN.3
new file mode 100644
index 000000000..441560c14
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanrlN.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpanrlN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpanrlN \- Right-looking recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpanrlN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpanrlN\fR
+recursively  factorizes  a panel of columns  using  the
+recursive Right-looking variant of the one-dimensional algorithm. The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlT \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanrlT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanrlT.3
new file mode 100644
index 000000000..e5bd9d110
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanrlT.3
@@ -0,0 +1,79 @@
+.TH HPL_pdrpanrlT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdrpanrlT \- Right-looking recursive panel factorization.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdrpanrlT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&M\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&ICOFF\fR,
+\fB\&double *\fR
+\fI\&WORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdrpanrlT\fR
+recursively  factorizes  a panel of columns  using  the
+recursive Right-looking variant of the one-dimensional algorithm. The
+lower  triangular  N0-by-N0  upper  block of the panel  is stored  in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+M       (local input)           const int
+On entry,  M specifies the local number of rows of sub(A).
+.TP 8
+N       (local input)           const int
+On entry,  N specifies the local number of columns of sub(A).
+.TP 8
+ICOFF   (global input)          const int
+On entry, ICOFF specifies the row and column offset of sub(A)
+in A.
+.TP 8
+WORK    (local workspace)       double *
+On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+.SH SEE ALSO
+.BR HPL_dlocmax \ (3),
+.BR HPL_dlocswpN \ (3),
+.BR HPL_dlocswpT \ (3),
+.BR HPL_pdmxswp \ (3),
+.BR HPL_pdpancrN \ (3),
+.BR HPL_pdpancrT \ (3),
+.BR HPL_pdpanllN \ (3),
+.BR HPL_pdpanllT \ (3),
+.BR HPL_pdpanrlN \ (3),
+.BR HPL_pdpanrlT \ (3),
+.BR HPL_pdrpancrN \ (3),
+.BR HPL_pdrpancrT \ (3),
+.BR HPL_pdrpanllN \ (3),
+.BR HPL_pdrpanllT \ (3),
+.BR HPL_pdrpanrlN \ (3),
+.BR HPL_pdfact \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdtest.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdtest.3
new file mode 100644
index 000000000..eaaff2bff
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdtest.3
@@ -0,0 +1,63 @@
+.TH HPL_pdtest 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdtest \- Perform one test.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdtest(\fR
+\fB\&HPL_T_test *\fR
+\fI\&TEST\fR,
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_palg *\fR
+\fI\&ALGO\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const int\fR
+\fI\&NB\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdtest\fR
+performs  one  test  given a set of parameters such as the
+process grid, the  problem size, the distribution blocking factor ...
+This function generates  the data, calls  and times the linear system
+solver,  checks  the  accuracy  of the  obtained vector solution  and
+writes this information to the file pointed to by TEST->outfp.
+.SH ARGUMENTS
+.TP 8
+TEST    (global input)          HPL_T_test *
+On entry,  TEST  points  to a testing data structure:  outfp
+specifies the output file where the results will be printed.
+It is only defined and used by the process  0  of the  grid.
+thrsh  specifies  the  threshhold value  for the test ratio.
+Concretely, a test is declared "PASSED"  if and only if  the
+following inequality is satisfied:
+||Ax-b||_oo / ( epsil *
+                ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+                 N )  < thrsh.
+epsil  is the  relative machine precision of the distributed
+computer. Finally the test counters, kfail, kpass, kskip and
+ktest are updated as follows:  if the test passes,  kpass is
+incremented by one;  if the test fails, kfail is incremented
+by one; if the test is skipped, kskip is incremented by one.
+ktest is left unchanged.
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+ALGO    (global input)          HPL_T_palg *
+On entry,  ALGO  points to  the data structure containing the
+algorithmic parameters to be used for this test.
+.TP 8
+N       (global input)          const int
+On entry,  N specifies the order of the coefficient matrix A.
+N must be at least zero.
+.TP 8
+NB      (global input)          const int
+On entry,  NB specifies the blocking factor used to partition
+and distribute the matrix A. NB must be larger than one.
+.SH SEE ALSO
+.BR HPL_pddriver \ (3),
+.BR HPL_pdinfo \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdtrsv.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdtrsv.3
new file mode 100644
index 000000000..5d2d14dcd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdtrsv.3
@@ -0,0 +1,49 @@
+.TH HPL_pdtrsv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdtrsv \- Solve triu( A ) x = b.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdtrsv(\fR
+\fB\&HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&HPL_T_pmat *\fR
+\fI\&AMAT\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdtrsv\fR
+solves an upper triangular system of linear equations.
+ 
+The rhs is the last column of the N by N+1 matrix A. The solve starts
+in the process  column owning the  Nth  column of A, so the rhs b may
+need to be moved one process column to the left at the beginning. The
+routine therefore needs  a column  vector in every process column but
+the one owning  b. The result is  replicated in all process rows, and
+returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes.
+ 
+The algorithm uses decreasing one-ring broadcast in process rows  and
+columns  implemented  in terms of  synchronous communication point to
+point primitives.  The  lookahead of depth 1 is used to minimize  the
+critical path. This entire operation is essentially ``latency'' bound
+and an estimate of its running time is given by:
+ 
+   (move rhs) lat + N / ( P bdwth ) +            
+   (solve)    ((N / NB)-1) 2 (lat + NB / bdwth) +
+              gam2 N^2 / ( P Q ),                
+ 
+where  gam2   is an estimate of the   Level 2 BLAS rate of execution.
+There are  N / NB  diagonal blocks. One must exchange  2  messages of
+length NB to compute the next  NB  entries of the vector solution, as
+well as performing a total of N^2 floating point operations.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+AMAT    (local input/output)    HPL_T_pmat *
+On entry,  AMAT  points  to the data structure containing the
+local array information.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateNN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateNN.3
new file mode 100644
index 000000000..e20929a27
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateNN.3
@@ -0,0 +1,48 @@
+.TH HPL_pdupdateNN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdupdateNN \- Broadcast a panel and update the trailing submatrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdupdateNN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdupdateNN\fR
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local output)          int *
+On exit,  IFLAG  indicates  whether or not  the broadcast has
+been completed when PBCST is not NULL on entry. In that case,
+IFLAG is left unchanged.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be updated) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to be updated  starting  at the  current
+position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp01N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateNT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateNT.3
new file mode 100644
index 000000000..276c2ceda
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateNT.3
@@ -0,0 +1,48 @@
+.TH HPL_pdupdateNT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdupdateNT \- Broadcast a panel and update the trailing submatrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdupdateNT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdupdateNT\fR
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local output)          int *
+On exit,  IFLAG  indicates  whether or not  the broadcast has
+been completed when PBCST is not NULL on entry. In that case,
+IFLAG is left unchanged.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be updated) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to be updated  starting  at the  current
+position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateTN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateTN.3
new file mode 100644
index 000000000..091859d01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateTN.3
@@ -0,0 +1,48 @@
+.TH HPL_pdupdateTN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdupdateTN \- Broadcast a panel and update the trailing submatrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdupdateTN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdupdateTN\fR
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local output)          int *
+On exit,  IFLAG  indicates  whether or not  the broadcast has
+been completed when PBCST is not NULL on entry. In that case,
+IFLAG is left unchanged.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be updated) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to be updated  starting  at the  current
+position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp01N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateTT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateTT.3
new file mode 100644
index 000000000..34502c6ef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateTT.3
@@ -0,0 +1,48 @@
+.TH HPL_pdupdateTT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pdupdateTT \- Broadcast a panel and update the trailing submatrix.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pdupdateTT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&NN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pdupdateTT\fR
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local output)          int *
+On exit,  IFLAG  indicates  whether or not  the broadcast has
+been completed when PBCST is not NULL on entry. In that case,
+IFLAG is left unchanged.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be updated) information.
+.TP 8
+NN      (local input)           const int
+On entry, NN specifies  the  local  number  of columns of the
+trailing  submatrix  to be updated  starting  at the  current
+position. NN must be at least zero.
+.SH SEE ALSO
+.BR HPL_pdgesv \ (3),
+.BR HPL_pdgesv0 \ (3),
+.BR HPL_pdgesvK1 \ (3),
+.BR HPL_pdgesvK2 \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_perm.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_perm.3
new file mode 100644
index 000000000..9476b5eff
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_perm.3
@@ -0,0 +1,50 @@
+.TH HPL_perm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_perm \- Combine 2 index arrays - Generate the permutation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_perm(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&int *\fR
+\fI\&LINDXA\fR,
+\fB\&int *\fR
+\fI\&LINDXAU\fR,
+\fB\&int *\fR
+\fI\&IWORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_perm\fR
+combines  two  index  arrays  and generate the corresponding
+permutation. First, this function computes the inverse of LINDXA, and
+then combine it with LINDXAU.  Second, in order to be able to perform
+the permutation in place,  LINDXAU  is overwritten by the sequence of
+permutation  producing  the  same result.  What we ultimately want to
+achieve is:  U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the
+call to this function,  this in place permutation can be performed by
+for i in [0..N) swap U[i] with U[LINDXAU[i]].
+.SH ARGUMENTS
+.TP 8
+N       (global input)          const int
+On entry,  N  specifies the length of the arrays  LINDXA  and
+LINDXAU. N should be at least zero.
+.TP 8
+LINDXA  (global input/output)   int *
+On entry,  LINDXA  is an array of dimension N  containing the
+source indexes. On exit,  LINDXA  contains the combined index
+array.
+.TP 8
+LINDXAU (global input/output)   int *
+On entry,  LINDXAU is an array of dimension N  containing the
+target indexes.  On exit,  LINDXAU  contains  the sequence of
+permutation,  that  should be applied  in increasing order to
+permute the underlying array U in place.
+.TP 8
+IWORK   (workspace)             int *
+On entry, IWORK is a workarray of dimension N.
+.SH SEE ALSO
+.BR HPL_plindx1 \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pipid.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pipid.3
new file mode 100644
index 000000000..6a8f5f277
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pipid.3
@@ -0,0 +1,79 @@
+.TH HPL_pipid 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pipid \- Simplify the pivot vector.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pipid(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&int *\fR
+\fI\&K\fR,
+\fB\&int *\fR
+\fI\&IPID\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pipid\fR
+computes an array  IPID  that contains the source and final
+destination  of  matrix rows  resulting  from  the  application  of N
+interchanges  as computed by the  LU  factorization  with row partial
+pivoting. The array IPID is such that the row of global index IPID(i)
+should be mapped onto the row of global index IPID(i+1). Note that we
+cannot really know the length of IPID a priori. However, we know that
+this array is at least 2*N long,  since  there are N rows to swap and
+broadcast. The length of this array  must be smaller than or equal to
+4*N, since every row is swapped with at most a single distinct remote
+row. The algorithm constructing  IPID  goes as follows: Let IA be the
+global index of the first row to be swapped.
+ 
+For every row src IA + i with i in [0..N) to be swapped with row  dst
+such that dst is given by DPIV[i]:
+ 
+Is row  src  the destination  of a previous row of the current block,
+that is, is there k odd such that IPID(k) is equal to src ?
+    Yes:  update  this destination  with dst.  For  example,  if  the
+pivot array is  (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5,
+we swap in fact row 0 and 5,  i.e.,  row 0 goes to 5 and not 2  as it
+was thought so far ...
+    No :  add  the pair (src,dst) at the end of IPID; row src has not
+been moved yet.
+ 
+Is row  dst  different  from src the destination of a previous row of
+the current block, i.e., is there k odd such that IPID(k) is equal to
+dst ?
+    Yes:  update  IPID(k) with src.  For example,  if the pivot array
+is (0,5)(1,1)(2,5) ... , then when  we swap rows  2 and 5, we swap in
+fact row 2 and 0,  i.e.,  row 0 goes to 2 and not 5 as it was thought
+so far ...
+    No : add  the  pair (dst,src) at the end of IPID; row dst has not
+been moved yet.
+ 
+Note that when src is equal to dst, the pair (dst,src)  should not be
+added to  IPID  in  order  to avoid duplicated entries in this array.
+During  the construction of the array  IPID,  we  make  sure that the
+first N entries are such that IPID(k) with k odd is equal to  IA+k/2.
+For k in  [0..K/2),  the  row  of global index  IPID(2*k)  should  be
+mapped onto the row of global index IPID(2*k+1).
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+K       (global output)         int *
+On exit, K specifies the number of entries in  IPID.  K is at
+least 2*N, and at most 4*N.
+.TP 8
+IPID    (global output)         int *
+On entry, IPID is an array of length 4*N.  On exit, the first
+K entries of that array contain the src and final destination
+resulting  from  the  application of the  N  interchanges  as
+specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+in [0..N)
+.SH SEE ALSO
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx0.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx0.3
new file mode 100644
index 000000000..2b889947a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx0.3
@@ -0,0 +1,168 @@
+.TH HPL_plindx0 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_plindx0 \- Compute local swapping index arrays.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_plindx0(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&K\fR,
+\fB\&int *\fR
+\fI\&IPID\fR,
+\fB\&int *\fR
+\fI\&LINDXA\fR,
+\fB\&int *\fR
+\fI\&LINDXAU\fR,
+\fB\&int *\fR
+\fI\&LLEN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_plindx0\fR
+computes two local arrays  LINDXA and  LINDXAU  containing
+the  local  source and final destination position  resulting from the
+application of row interchanges.
+ 
+On entry, the array  IPID  of length K is such that the row of global
+index  IPID(i)  should be mapped onto row of global index  IPID(i+1).
+Let  IA  be the global index of the first row to be swapped. For k in
+[0..K/2), the row of global index IPID(2*k) should be mapped onto the
+row of global index  IPID(2*k+1).  The question then, is to determine
+which rows should ultimately be part of U.
+ 
+First, some rows of the process ICURROW  may be swapped locally.  One
+of this row belongs to U, the other one belongs to my local  piece of
+A.  The other  rows of the current block are swapped with remote rows
+and are thus not part of U. These rows however should be sent  along,
+and  grabbed by the other processes  as we  progress in the  exchange
+phase.
+ 
+So, assume that I am  ICURROW  and consider a row of index  IPID(2*i)
+that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less
+than N,  this row is locally swapped and should be copied into  U  at
+the position IPID(2*i+1) - IA. No row will be exchanged for this one.
+If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be
+locally copied into my local piece of A at the position corresponding
+to the row of global index IPID(2*i+1).
+ 
+If the process  ICURROW does not own  IPID(2*i+1), then row IPID(2*i)
+is to be swapped away and strictly speaking does not belong to U, but
+to  A  remotely.  Since this  process will however send this array U,
+this row is  copied into  U, exactly where the row IPID(2*i+1) should
+go. For this, we search IPID for k1, such that IPID(2*k1) is equal to
+IPID(2*i+1); and row  IPID(2*i) is to be copied in U  at the position
+IPID(2*k1+1)-IA.
+ 
+It is thus  important to put the rows that go into U, i.e., such that
+IPID(2*i+1) - IA is less than N at the begining of the array IPID. By
+doing so,  U  is formed, and the local copy  is performed in just one
+sweep.
+ 
+Two lists  LINDXA  and  LINDXAU are built.  LINDXA contains the local
+index of the rows I have that should be copied. LINDXAU  contains the
+local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A
+is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
+of A should be locally copied into A(-LINDXAU(k),:).  In the  process
+ICURROW, the initial packing algorithm proceeds as follows.
+ 
+  for all entries in IPID,
+     if IPID(2*i) is in ICURROW,
+        if IPID(2*i+1) is in ICURROW,
+           if( IPID(2*i+1) - IA < N )
+            save corresponding local position
+            of this row (LINDXA);
+            save local position (LINDXAU) in U
+            where this row goes;
+            [copy row IPID(2*i) in U at position
+            IPID(2*i+1)-IA; ];
+           else
+            save corresponding local position of
+            this row (LINDXA);
+            save local position (-LINDXAU) in A
+            where this row goes;
+            [copy row IPID(2*i) in my piece of A
+            at IPID(2*i+1);]
+           end if
+        else
+           find k1 such that IPID(2*k1) = IPID(2*i+1);
+           copy row IPID(2*i) in U at position
+           IPID(2*k1+1)-IA;
+           save corresponding local position of this
+           row (LINDXA);
+           save local position (LINDXAU) in U where
+           this row goes;
+        end if
+     end if
+  end for
+ 
+Second, if I am not the current row process  ICURROW, all source rows
+in IPID that I own are part of U. Indeed,  they  are swapped with one
+row  of  the  current  block  of rows,  and  the  main  factorization
+algorithm proceeds one row after each other.  The processes different
+from ICURROW,  should  exchange and accumulate  those rows until they
+receive some data previously owned by the process ICURROW.
+ 
+In processes different from  ICURROW,  the  initial packing algorithm
+proceeds as follows.  Consider a row of global index IPID(2*i) that I
+own. When I will be receiving data previously owned by ICURROW, i.e.,
+U, row IPID(2*i) should  replace the row in U at pos. IPID(2*i+1)-IA,
+and  this particular row of U should be first copied into my piece of
+A, at A(il,:),  where  il is the  local row  index  corresponding  to
+IPID(2*i). Now,initially, this row will be packed into workspace, say
+as the kth row of  that  work array.  The  following  algorithm  sets
+LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row
+should be copied. LINDXA(k) stores the local index in  A  where  this
+row of U should be copied, i.e il.
+ 
+  for all entries in IPID,
+     if IPID(2*i) is not in ICURROW,
+        copy row IPID(2*i) in work array;
+        save corresponding local position
+        of this row (LINDXA);
+        save position (LINDXAU) in U where
+        this row should be copied;
+     end if
+  end for
+ 
+Since we are at it, we also globally figure  out  how many rows every
+process has. That is necessary, because it would rather be cumbersome
+to  figure it on  the fly  during the  bi-directional exchange phase.
+This information is kept in the array  LLEN  of size NPROW. Also note
+that the arrays LINDXA and LINDXAU are of max length equal to 2*N.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+K       (global input)          const int
+On entry, K specifies the number of entries in IPID.  K is at
+least 2*N, and at most 4*N.
+.TP 8
+IPID    (global input)          int *
+On entry,  IPID  is an array of length K. The first K entries
+of that array contain the src and final destination resulting
+from the application of the interchanges.
+.TP 8
+LINDXA  (local output)          int *
+On entry, LINDXA  is an array of dimension 2*N. On exit, this
+array contains the local indexes of the rows of A I have that
+should be copied into U.
+.TP 8
+LINDXAU (local output)          int *
+On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+array contains  the local destination  information encoded as
+follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+of A should be locally copied into A(-LINDXAU(k),:).
+.TP 8
+LLEN    (global output)         int *
+On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+contains how many rows every process has.
+.SH SEE ALSO
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx1.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx1.3
new file mode 100644
index 000000000..7d4f8feba
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx1.3
@@ -0,0 +1,106 @@
+.TH HPL_plindx1 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_plindx1 \- Compute local swapping index arrays.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_plindx1(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&K\fR,
+\fB\&const int *\fR
+\fI\&IPID\fR,
+\fB\&int *\fR
+\fI\&IPA\fR,
+\fB\&int *\fR
+\fI\&LINDXA\fR,
+\fB\&int *\fR
+\fI\&LINDXAU\fR,
+\fB\&int *\fR
+\fI\&IPLEN\fR,
+\fB\&int *\fR
+\fI\&IPMAP\fR,
+\fB\&int *\fR
+\fI\&IPMAPM1\fR,
+\fB\&int *\fR
+\fI\&PERMU\fR,
+\fB\&int *\fR
+\fI\&IWORK\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_plindx1\fR
+computes two local arrays  LINDXA and  LINDXAU  containing
+the  local  source and final destination position  resulting from the
+application of row interchanges.  In addition, this function computes
+three arrays IPLEN, IPMAP and IPMAPM1  that contain  the  logarithmic
+mapping information for the spreading phase.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+K       (global input)          const int
+On entry, K specifies the number of entries in IPID.  K is at
+least 2*N, and at most 4*N.
+.TP 8
+IPID    (global input)          const int *
+On entry,  IPID  is an array of length K. The first K entries
+of that array contain the src and final destination resulting
+from the application of the interchanges.
+.TP 8
+IPA     (global output)         int *
+On exit,  IPA  specifies  the number of rows that the current
+process row has that either belong to U  or should be swapped
+with remote rows of A.
+.TP 8
+LINDXA  (global output)         int *
+On entry, LINDXA  is an array of dimension 2*N. On exit, this
+array contains the local indexes of the rows of A I have that
+should be copied into U.
+.TP 8
+LINDXAU (global output)         int *
+On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+array contains  the local destination  information encoded as
+follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+of A should be locally copied into A(-LINDXAU(k),:).
+.TP 8
+IPLEN   (global output)         int *
+On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+this array is such that  IPLEN[i]  is the number of rows of A
+in  the  processes  before  process  IPMAP[i]  after the sort
+with the convention that IPLEN[nprow]  is the total number of
+rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+local number of rows of A that should be moved to the process
+IPMAP[i]. IPLEN is such that the number of rows of the source
+process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+remaining  entries  of  this  array  are  sorted  so that the
+quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+.TP 8
+IPMAP   (global output)         int *
+On entry, IPMAP is an array of dimension NPROW. On exit, this
+array contains  the logarithmic mapping of the processes.  In
+other words, IPMAP[myrow] is the corresponding sorted process
+coordinate.
+.TP 8
+IPMAPM1 (global output)         int *
+On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+this  array  contains  the inverse of the logarithmic mapping
+contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+[0.. NPROCS)
+.TP 8
+PERMU   (global output)         int *
+On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+contains  a sequence of permutations,  that should be applied
+in increasing order to permute in place the row panel U.
+.TP 8
+IWORK   (workspace)             int *
+On entry, IWORK is a workarray of dimension 2*JB.
+.SH SEE ALSO
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx10.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx10.3
new file mode 100644
index 000000000..d22d64f36
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx10.3
@@ -0,0 +1,68 @@
+.TH HPL_plindx10 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_plindx10 \- Compute the logarithmic maps for the spreading.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_plindx10(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&K\fR,
+\fB\&const int *\fR
+\fI\&IPID\fR,
+\fB\&int *\fR
+\fI\&IPLEN\fR,
+\fB\&int *\fR
+\fI\&IPMAP\fR,
+\fB\&int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_plindx10\fR
+computes  three arrays  IPLEN,  IPMAP  and  IPMAPM1  that
+contain the logarithmic mapping information for the spreading phase.
+.SH ARGUMENTS
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel information.
+.TP 8
+K       (global input)          const int
+On entry, K specifies the number of entries in IPID.  K is at
+least 2*N, and at most 4*N.
+.TP 8
+IPID    (global input)          const int *
+On entry,  IPID  is an array of length K. The first K entries
+of that array contain the src and final destination resulting
+from the application of the interchanges.
+.TP 8
+IPLEN   (global output)         int *
+On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+this array is such that  IPLEN[i]  is the number of rows of A
+in the processes  before process IMAP[i] after the sort, with
+the convention that IPLEN[nprow] is the total number of rows.
+In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+rows of  A  that should be moved for each process.  IPLEN  is
+such that the number of rows of the source process row can be
+computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+this  array are sorted  so  that  the quantities IPLEN[i+1] -
+IPLEN[i] are logarithmically sorted.
+.TP 8
+IPMAP   (global output)         int *
+On entry, IPMAP is an array of dimension NPROW. On exit, this
+array contains  the logarithmic mapping of the processes.  In
+other words, IPMAP[myrow] is the corresponding sorted process
+coordinate.
+.TP 8
+IPMAPM1 (global output)         int *
+On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+this  array  contains  the inverse of the logarithmic mapping
+contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+[0.. NPROW)
+.SH SEE ALSO
+.BR HPL_pdlaswp00N \ (3),
+.BR HPL_pdlaswp00T \ (3),
+.BR HPL_pdlaswp01N \ (3),
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pnum.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pnum.3
new file mode 100644
index 000000000..38956c5a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pnum.3
@@ -0,0 +1,38 @@
+.TH HPL_pnum 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pnum \- Rank determination.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_pnum(\fR
+\fB\&const HPL_T_grid *\fR
+\fI\&GRID\fR,
+\fB\&const int\fR
+\fI\&MYROW\fR,
+\fB\&const int\fR
+\fI\&MYCOL\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pnum\fR
+determines  the  rank  of a  process  as a function  of  its
+coordinates in the grid.
+.SH ARGUMENTS
+.TP 8
+GRID    (local input)           const HPL_T_grid *
+On entry,  GRID  points  to the data structure containing the
+process grid information.
+.TP 8
+MYROW   (local input)           const int
+On entry,  MYROW  specifies the row coordinate of the process
+whose rank is to be determined. MYROW must be greater than or
+equal to zero and less than NPROW.
+.TP 8
+MYCOL   (local input)           const int
+On entry,  MYCOL  specifies  the  column  coordinate  of  the
+process whose rank is to be determined. MYCOL must be greater
+than or equal to zero and less than NPCOL.
+.SH SEE ALSO
+.BR HPL_grid_init \ (3),
+.BR HPL_grid_info \ (3),
+.BR HPL_grid_exit \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer.3
new file mode 100644
index 000000000..550703aee
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer.3
@@ -0,0 +1,35 @@
+.TH HPL_ptimer 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_ptimer \- Timer facility.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_ptimer(\fR
+\fB\&const int\fR
+\fI\&I\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_ptimer\fR
+provides a  "stopwatch"  functionality  cpu/wall  timer in
+seconds.  Up to  64  separate timers can be functioning at once.  The
+first call starts the timer,  and the second stops it.  This  routine
+can be disenabled  by calling HPL_ptimer_disable(),  so that calls to
+the timer are ignored.  This feature can be used to make sure certain
+sections of code do not affect timings,  even  if  they call routines
+which have HPL_ptimer calls in them. HPL_ptimer_enable()  will enable
+the  timer  functionality.  One  can retrieve  the current value of a
+timer by calling
+ 
+t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ 
+where  I  is the timer index in  [0..64).  To  inititialize the timer
+functionality, one must have called HPL_ptimer_boot() prior to any of
+the functions mentioned above.
+.SH ARGUMENTS
+.TP 8
+I       (global input)          const int
+On entry, I specifies the timer to stop/start.
+.SH SEE ALSO
+.BR HPL_ptimer_cputime \ (3),
+.BR HPL_ptimer_walltime \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer_cputime.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer_cputime.3
new file mode 100644
index 000000000..a93a1c208
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer_cputime.3
@@ -0,0 +1,23 @@
+.TH HPL_ptimer_cputime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_ptimer_cputime \- Return the CPU time.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_ptimer_cputime();\fR
+.SH DESCRIPTION
+\fB\&HPL_ptimer_cputime\fR
+returns the cpu time. If HPL_USE_CLOCK is defined,
+the  clock() function is used to return an approximation of processor
+time used by the program.  The value returned is the CPU time used so
+far as a clock_t;  to get the number of seconds used,  the result  is
+divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+standard library.  If  HPL_USE_TIMES is defined, the times() function
+is used instead.  This  function  returns  the current process times.
+times() returns the number of clock ticks that have elapsed since the
+system has been up.  Otherwise and by default,  the  standard library
+function getrusage() is used.
+.SH SEE ALSO
+.BR HPL_ptimer_walltime \ (3),
+.BR HPL_ptimer \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer_walltime.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer_walltime.3
new file mode 100644
index 000000000..37e5e8c54
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer_walltime.3
@@ -0,0 +1,14 @@
+.TH HPL_ptimer_walltime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_ptimer_walltime \- Return the elapsed (wall-clock) time.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_ptimer_walltime();\fR
+.SH DESCRIPTION
+\fB\&HPL_ptimer_walltime\fR
+returns the elapsed (wall-clock) time.
+.SH SEE ALSO
+.BR HPL_ptimer_cputime \ (3),
+.BR HPL_ptimer \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pwarn.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pwarn.3
new file mode 100644
index 000000000..14e4a65d3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pwarn.3
@@ -0,0 +1,45 @@
+.TH HPL_pwarn 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_pwarn \- displays an error message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_pwarn(\fR
+\fB\&FILE *\fR
+\fI\&STREAM\fR,
+\fB\&int\fR
+\fI\&LINE\fR,
+\fB\&const char *\fR
+\fI\&SRNAME\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_pwarn\fR
+displays an error message.
+.SH ARGUMENTS
+.TP 8
+STREAM  (local input)           FILE *
+On entry, STREAM specifies the output stream.
+.TP 8
+LINE    (local input)           int
+On entry,  LINE  specifies the line  number in the file where
+the  error  has  occured.  When  LINE  is not a positive line
+number, it is ignored.
+.TP 8
+SRNAME  (local input)           const char *
+On entry, SRNAME  should  be the name of the routine  calling
+this error handler.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH SEE ALSO
+.BR HPL_pabort \ (3),
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rand.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rand.3
new file mode 100644
index 000000000..8b1918fea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rand.3
@@ -0,0 +1,28 @@
+.TH HPL_rand 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_rand \- random number generator.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_rand();\fR
+.SH DESCRIPTION
+\fB\&HPL_rand\fR
+generates  the next number  in the  random  sequence.  This
+function  ensures  that this number lies in the interval (-0.5, 0.5].
+ 
+The static array irand contains the information (2 integers) required
+to generate the  next number  in the sequence  X(n).  This  number is
+computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5,  where the
+constant d is the largest 64 bit positive integer. The array irand is
+then  updated  for the generation of the next number  X(n+1)  in  the
+random sequence as follows X(n+1) = a * X(n) + c. The constants a and
+c  should have been preliminarily stored in the arrays ias and ics as
+2 pairs of integers.  The initialization of  ias,  ics and  irand  is
+performed by the function HPL_setran.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_recv.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_recv.3
new file mode 100644
index 000000000..d9136c14b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_recv.3
@@ -0,0 +1,49 @@
+.TH HPL_recv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_recv \- Receive a message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_recv(\fR
+\fB\&double *\fR
+\fI\&RBUF\fR,
+\fB\&int\fR
+\fI\&RCOUNT\fR,
+\fB\&int\fR
+\fI\&SRC\fR,
+\fB\&int\fR
+\fI\&RTAG\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_recv\fR
+is a simple wrapper around  MPI_Recv.  Its  main  purpose is
+to  allow for some  experimentation / tuning  of this simple routine.
+Successful  completion  is  indicated  by  the  returned  error  code
+HPL_SUCCESS.  In the case of messages of length less than or equal to
+zero, this function returns immediately.
+.SH ARGUMENTS
+.TP 8
+RBUF    (local output)          double *
+On entry, RBUF specifies the starting address of buffer to be
+received.
+.TP 8
+RCOUNT  (local input)           int
+On entry,  RCOUNT  specifies  the number  of double precision
+entries in RBUF. RCOUNT must be at least zero.
+.TP 8
+SRC     (local input)           int
+On entry, SRC  specifies the rank of the  sending  process in
+the communication space defined by COMM.
+.TP 8
+RTAG    (local input)           int
+On entry,  STAG specifies the message tag to be used for this
+communication operation.
+.TP 8
+COMM    (local input)           MPI_Comm
+The MPI communicator identifying the communication space.
+.SH SEE ALSO
+.BR HPL_send \ (3),
+.BR HPL_sendrecv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_reduce.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_reduce.3
new file mode 100644
index 000000000..c48f04ded
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_reduce.3
@@ -0,0 +1,56 @@
+.TH HPL_reduce 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_reduce \- Reduce operation.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_reduce(\fR
+\fB\&void *\fR
+\fI\&BUFFER\fR,
+\fB\&const int\fR
+\fI\&COUNT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR,
+\fB\&const HPL_T_OP \fR
+\fI\&OP\fR,
+\fB\&const int\fR
+\fI\&ROOT\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_reduce\fR
+performs a global reduce operation across all processes of
+a group.  Note that the input buffer is  used as workarray and in all
+processes but the accumulating process corrupting the original data.
+.SH ARGUMENTS
+.TP 8
+BUFFER  (local input/output)    void *
+On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+exit,  and  in process of rank  ROOT  this array contains the
+reduced data.  This  buffer  is also used as workspace during
+the operation in the other processes of the group.
+.TP 8
+COUNT   (global input)          const int
+On entry,  COUNT  indicates the number of entries in  BUFFER.
+COUNT must be at least zero.
+.TP 8
+DTYPE   (global input)          const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.TP 8
+OP      (global input)          const HPL_T_OP 
+On entry, OP is a pointer to the local combine function.
+.TP 8
+ROOT    (global input)          const int
+On entry, ROOT is the coordinate of the accumulating process.
+.TP 8
+COMM    (global/local input)    MPI_Comm
+The MPI communicator identifying the process collection.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rollN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rollN.3
new file mode 100644
index 000000000..eac4deb66
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rollN.3
@@ -0,0 +1,77 @@
+.TH HPL_rollN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_rollN \- Roll U and forward the column panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_rollN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_rollN\fR
+rolls the local arrays containing the local pieces of U, so
+that on exit to this function  U  is replicated in every process row.
+In addition, this function probe for the presence of the column panel
+and forwards it when available.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be rolled) information.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the number of columns of  U.  N must be
+at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U in each process row.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least  MAX(1,IPLEN[NPROW]).
+.TP 8
+IPLEN   (global input)          const int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in each process row.
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IMAP  is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words,  IMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry,  IMAPM1  is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+.SH SEE ALSO
+.BR HPL_pdlaswp01N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rollT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rollT.3
new file mode 100644
index 000000000..bab5bdffd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rollT.3
@@ -0,0 +1,77 @@
+.TH HPL_rollT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_rollT \- Roll U and forward the column panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_rollT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_rollT\fR
+rolls the local arrays containing the local pieces of U, so
+that on exit to this function  U  is replicated in every process row.
+In addition, this function probe for the presence of the column panel
+and forwards it when available.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be rolled) information.
+.TP 8
+N       (local input)           const int
+On entry, N specifies the local number of rows of  U.  N must
+be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U in each process row.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least  MAX(1,N).
+.TP 8
+IPLEN   (global input)          const int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in each process row.
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IMAP  is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words,  IMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry,  IMAPM1  is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+.SH SEE ALSO
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_sdrv.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_sdrv.3
new file mode 100644
index 000000000..a11252d6a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_sdrv.3
@@ -0,0 +1,67 @@
+.TH HPL_sdrv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_sdrv \- Send and receive a message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_sdrv(\fR
+\fB\&double *\fR
+\fI\&SBUF\fR,
+\fB\&int\fR
+\fI\&SCOUNT\fR,
+\fB\&int\fR
+\fI\&STAG\fR,
+\fB\&double *\fR
+\fI\&RBUF\fR,
+\fB\&int\fR
+\fI\&RCOUNT\fR,
+\fB\&int\fR
+\fI\&RTAG\fR,
+\fB\&int\fR
+\fI\&PARTNER\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_sdrv\fR
+is a simple wrapper around MPI_Sendrecv. Its main purpose is
+to allow for some experimentation and tuning of this simple function.
+Messages  of  length  less than  or  equal to zero  are not sent  nor
+received.  Successful completion  is  indicated by the returned error
+code HPL_SUCCESS.
+.SH ARGUMENTS
+.TP 8
+SBUF    (local input)           double *
+On entry, SBUF specifies the starting address of buffer to be
+sent.
+.TP 8
+SCOUNT  (local input)           int
+On entry,  SCOUNT  specifies  the number  of double precision
+entries in SBUF. SCOUNT must be at least zero.
+.TP 8
+STAG    (local input)           int
+On entry,  STAG  specifies the message tag to be used for the
+sending communication operation.
+.TP 8
+RBUF    (local output)          double *
+On entry, RBUF specifies the starting address of buffer to be
+received.
+.TP 8
+RCOUNT  (local input)           int
+On entry,  RCOUNT  specifies  the number  of double precision
+entries in RBUF. RCOUNT must be at least zero.
+.TP 8
+RTAG    (local input)           int
+On entry,  RTAG  specifies the message tag to be used for the
+receiving communication operation.
+.TP 8
+PARTNER (local input)           int
+On entry,  PARTNER  specifies  the rank of the  collaborative
+process in the communication space defined by COMM.
+.TP 8
+COMM    (local input)           MPI_Comm
+The MPI communicator identifying the communication space.
+.SH SEE ALSO
+.BR HPL_send \ (3),
+.BR HPL_recv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_send.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_send.3
new file mode 100644
index 000000000..48ffc5d62
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_send.3
@@ -0,0 +1,49 @@
+.TH HPL_send 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_send \- Send a message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&int\fR
+\fB\&HPL_send(\fR
+\fB\&double *\fR
+\fI\&SBUF\fR,
+\fB\&int\fR
+\fI\&SCOUNT\fR,
+\fB\&int\fR
+\fI\&DEST\fR,
+\fB\&int\fR
+\fI\&STAG\fR,
+\fB\&MPI_Comm\fR
+\fI\&COMM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_send\fR
+is a simple wrapper around  MPI_Send.  Its  main  purpose is
+to  allow for some  experimentation / tuning  of this simple routine.
+Successful  completion  is  indicated  by  the  returned  error  code
+MPI_SUCCESS.  In the case of messages of length less than or equal to
+zero, this function returns immediately.
+.SH ARGUMENTS
+.TP 8
+SBUF    (local input)           double *
+On entry, SBUF specifies the starting address of buffer to be
+sent.
+.TP 8
+SCOUNT  (local input)           int
+On entry,  SCOUNT  specifies  the number of  double precision
+entries in SBUF. SCOUNT must be at least zero.
+.TP 8
+DEST    (local input)           int
+On entry, DEST specifies the rank of the receiving process in
+the communication space defined by COMM.
+.TP 8
+STAG    (local input)           int
+On entry,  STAG specifies the message tag to be used for this
+communication operation.
+.TP 8
+COMM    (local input)           MPI_Comm
+The MPI communicator identifying the communication space.
+.SH SEE ALSO
+.BR HPL_recv \ (3),
+.BR HPL_sendrecv \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_setran.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_setran.3
new file mode 100644
index 000000000..e9a9433ae
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_setran.3
@@ -0,0 +1,37 @@
+.TH HPL_setran 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_setran \- Manage the random number generator.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_setran(\fR
+\fB\&const int\fR
+\fI\&OPTION\fR,
+\fB\&int *\fR
+\fI\&IRAN\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_setran\fR
+initializes  the random generator with the encoding of the
+first number X(0) in the sequence,  and the constants a and c used to
+compute the next element in the sequence: X(n+1) = a*X(n) + c.  X(0),
+a and c are stored in the static variables  irand, ias and ics.  When
+OPTION is 0 (resp. 1 and 2),  irand  (resp. ia and ic)  is set to the
+values of the input array IRAN.  When OPTION is 3, IRAN is set to the
+current value of irand, and irand is then incremented.
+.SH ARGUMENTS
+.TP 8
+OPTION  (local input)           const int
+On entry, OPTION  is an integer that specifies the operations
+to be performed on the random generator as specified above.
+.TP 8
+IRAN    (local input/output)    int *
+On entry,  IRAN is an array of dimension 2, that contains the
+16-lower and 15-higher bits of a random number.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_xjumpm \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_spreadN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_spreadN.3
new file mode 100644
index 000000000..452b8da34
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_spreadN.3
@@ -0,0 +1,96 @@
+.TH HPL_spreadN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_spreadN \- Spread row panel U and forward current column panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_spreadN(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const enum HPL_SIDE\fR
+\fI\&SIDE\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int\fR
+\fI\&SRCDIST\fR,
+\fB\&const int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_spreadN\fR
+spreads the local array containing local pieces of U, so
+that on exit to this function,  a piece of  U  is contained in every
+process row. The array IPLEN contains the number of rows of U,  that
+should be spread on any given process row. This function also probes
+for the presence of the column panel PBCST. In case of success, this
+panel will be forwarded.  If  PBCST  is NULL on input,  this probing
+mechanism will be disabled.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be spread) information.
+.TP 8
+SIDE    (global input)          const enum HPL_SIDE
+On entry, SIDE specifies whether the local piece of U located
+in process IPMAP[SRCDIST] should be spread to the right or to
+the left. This feature is used by the equilibration process.
+.TP 8
+N       (global input)          const int
+On entry,  N  specifies  the  local number of columns of U. N
+must be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least MAX(1,IPLEN[nprow]).
+.TP 8
+SRCDIST (local input)           const int
+On entry,  SRCDIST  specifies the source process that spreads
+its piece of U.
+.TP 8
+IPLEN   (global input)          const int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in each process before process IPMAP[i], with the  convention
+that IPLEN[nprow] is the total number of rows. In other words
+IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+should be moved to process IPMAP[i].
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IPMAP is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words, IPMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry,  IPMAPM1 is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+.SH SEE ALSO
+.BR HPL_pdlaswp01N \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_spreadT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_spreadT.3
new file mode 100644
index 000000000..54f7dda31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_spreadT.3
@@ -0,0 +1,96 @@
+.TH HPL_spreadT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_spreadT \- Spread row panel U and forward current column panel.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_spreadT(\fR
+\fB\&HPL_T_panel *\fR
+\fI\&PBCST\fR,
+\fB\&int *\fR
+\fI\&IFLAG\fR,
+\fB\&HPL_T_panel *\fR
+\fI\&PANEL\fR,
+\fB\&const enum HPL_SIDE\fR
+\fI\&SIDE\fR,
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&double *\fR
+\fI\&U\fR,
+\fB\&const int\fR
+\fI\&LDU\fR,
+\fB\&const int\fR
+\fI\&SRCDIST\fR,
+\fB\&const int *\fR
+\fI\&IPLEN\fR,
+\fB\&const int *\fR
+\fI\&IPMAP\fR,
+\fB\&const int *\fR
+\fI\&IPMAPM1\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_spreadT\fR
+spreads  the local array containing local pieces of U, so
+that on exit to this function,  a piece of  U  is contained in every
+process row.  The array  IPLEN  contains the number of columns of U,
+that should be spread on any given process row.  This function  also
+probes for the presence of  the column panel  PBCST.  If  available,
+this  panel will be forwarded.  If  PBCST  is  NULL  on input,  this
+probing mechanism will be disabled.
+.SH ARGUMENTS
+.TP 8
+PBCST   (local input/output)    HPL_T_panel *
+On entry,  PBCST  points to the data structure containing the
+panel (to be broadcast) information.
+.TP 8
+IFLAG   (local input/output)    int *
+On entry, IFLAG  indicates  whether or not  the broadcast has
+already been completed.  If not,  probing will occur, and the
+outcome will be contained in IFLAG on exit.
+.TP 8
+PANEL   (local input/output)    HPL_T_panel *
+On entry,  PANEL  points to the data structure containing the
+panel (to be spread) information.
+.TP 8
+SIDE    (global input)          const enum HPL_SIDE
+On entry, SIDE specifies whether the local piece of U located
+in process IPMAP[SRCDIST] should be spread to the right or to
+the left. This feature is used by the equilibration process.
+.TP 8
+N       (global input)          const int
+On entry,  N  specifies the local number of rows of U. N must
+be at least zero.
+.TP 8
+U       (local input/output)    double *
+On entry,  U  is an array of dimension (LDU,*) containing the
+local pieces of U.
+.TP 8
+LDU     (local input)           const int
+On entry, LDU specifies the local leading dimension of U. LDU
+should be at least MAX(1,N).
+.TP 8
+SRCDIST (local input)           const int
+On entry,  SRCDIST  specifies the source process that spreads
+its piece of U.
+.TP 8
+IPLEN   (global input)          const int *
+On entry, IPLEN is an array of dimension NPROW+1.  This array
+is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+in each process before process IPMAP[i], with the  convention
+that IPLEN[nprow] is the total number of rows. In other words
+IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+should be moved to process IPMAP[i].
+.TP 8
+IPMAP   (global input)          const int *
+On entry, IPMAP is an array of dimension  NPROW.  This  array
+contains  the  logarithmic mapping of the processes. In other
+words, IPMAP[myrow]  is the absolute coordinate of the sorted
+process.
+.TP 8
+IPMAPM1 (global input)          const int *
+On entry,  IPMAPM1 is an array of dimension NPROW. This array
+contains  the inverse of the logarithmic mapping contained in
+IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+.SH SEE ALSO
+.BR HPL_pdlaswp01T \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_sum.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_sum.3
new file mode 100644
index 000000000..a3c4e2190
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_sum.3
@@ -0,0 +1,44 @@
+.TH HPL_sum 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_sum \- Combine (sum) two buffers.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_sum(\fR
+\fB\&const int\fR
+\fI\&N\fR,
+\fB\&const void *\fR
+\fI\&IN\fR,
+\fB\&void *\fR
+\fI\&INOUT\fR,
+\fB\&const HPL_T_TYPE\fR
+\fI\&DTYPE\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_sum\fR
+combines (sum) two buffers.
+.SH ARGUMENTS
+.TP 8
+N       (input)                 const int
+On entry, N  specifies  the  length  of  the  buffers  to  be
+combined. N must be at least zero.
+.TP 8
+IN      (input)                 const void *
+On entry, IN points to the input-only buffer to be combined.
+.TP 8
+INOUT   (input/output)          void *
+On entry, INOUT  points  to  the  input-output  buffer  to be
+combined.  On exit,  the  entries of this array contains  the
+combined results.
+.TP 8
+DTYPE   (input)                 const HPL_T_TYPE
+On entry,  DTYPE  specifies the type of the buffers operands.
+.SH SEE ALSO
+.BR HPL_broadcast \ (3),
+.BR HPL_reduce \ (3),
+.BR HPL_all_reduce \ (3),
+.BR HPL_barrier \ (3),
+.BR HPL_min \ (3),
+.BR HPL_max \ (3),
+.BR HPL_sum \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer.3
new file mode 100644
index 000000000..61f3f7cb1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer.3
@@ -0,0 +1,35 @@
+.TH HPL_timer 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_timer \- Timer facility.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_timer(\fR
+\fB\&const int\fR
+\fI\&I\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_timer\fR
+provides a  "stopwatch"  functionality  cpu/wall  timer  in
+seconds.  Up to  64  separate timers can be functioning at once.  The
+first call starts the timer,  and the second stops it.  This  routine
+can be disenabled  by calling  HPL_timer_disable(),  so that calls to
+the timer are ignored.  This feature can be used to make sure certain
+sections of code do not affect timings,  even  if  they call routines
+which have HPL_timer calls in them. HPL_timer_enable() will re-enable
+the  timer  functionality.  One  can retrieve  the current value of a
+timer by calling
+ 
+t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ 
+where  I  is the timer index in  [0..64).  To  initialize  the  timer
+functionality, one must have called HPL_timer_boot()  prior to any of
+the functions mentioned above.
+.SH ARGUMENTS
+.TP 8
+I       (global input)          const int
+On entry, I specifies the timer to stop/start.
+.SH SEE ALSO
+.BR HPL_timer_cputime \ (3),
+.BR HPL_timer_walltime \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer_cputime.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer_cputime.3
new file mode 100644
index 000000000..1f8987ca2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer_cputime.3
@@ -0,0 +1,23 @@
+.TH HPL_timer_cputime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_timer_cputime \- Return the CPU time.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_timer_cputime();\fR
+.SH DESCRIPTION
+\fB\&HPL_timer_cputime\fR
+returns the cpu time.  If HPL_USE_CLOCK is defined,
+the  clock() function is used to return an approximation of processor
+time used by the program.  The value returned is the CPU time used so
+far as a clock_t;  to get the number of seconds used,  the result  is
+divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+standard library.  If  HPL_USE_TIMES is defined, the times() function
+is used instead.  This  function  returns  the current process times.
+times() returns the number of clock ticks that have elapsed since the
+system has been up.  Otherwise and by default,  the  standard library
+function getrusage() is used.
+.SH SEE ALSO
+.BR HPL_timer_walltime \ (3),
+.BR HPL_timer \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer_walltime.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer_walltime.3
new file mode 100644
index 000000000..9a6e898e7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer_walltime.3
@@ -0,0 +1,14 @@
+.TH HPL_timer_walltime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_timer_walltime \- Return the elapsed (wall-clock) time.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&double\fR
+\fB\&HPL_timer_walltime();\fR
+.SH DESCRIPTION
+\fB\&HPL_timer_walltime\fR
+returns the elapsed (wall-clock) time.
+.SH SEE ALSO
+.BR HPL_timer_cputime \ (3),
+.BR HPL_timer \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_warn.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_warn.3
new file mode 100644
index 000000000..6b051acb3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_warn.3
@@ -0,0 +1,59 @@
+.TH HPL_warn 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_warn \- displays an error message.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_warn(\fR
+\fB\&FILE *\fR
+\fI\&STREAM\fR,
+\fB\&int\fR
+\fI\&LINE\fR,
+\fB\&const char *\fR
+\fI\&SRNAME\fR,
+\fB\&const char *\fR
+\fI\&FORM\fR,
+\fB\&...\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_warn\fR
+displays an error message.
+.SH ARGUMENTS
+.TP 8
+STREAM  (local input)           FILE *
+On entry, STREAM specifies the output stream.
+.TP 8
+LINE    (local input)           int
+On entry,  LINE  specifies the line  number in the file where
+the  error  has  occured.  When  LINE  is not a positive line
+number, it is ignored.
+.TP 8
+SRNAME  (local input)           const char *
+On entry, SRNAME  should  be the name of the routine  calling
+this error handler.
+.TP 8
+FORM    (local input)           const char *
+On entry, FORM specifies the format, i.e., how the subsequent
+arguments are converted for output.
+.TP 8
+        (local input)           ...
+On entry,  ...  is the list of arguments to be printed within
+the format string.
+.SH EXAMPLE
+\fI\&#include "hpl.h"\fR
+ 
+int main(int argc, char *argv[])
+.br
+{
+.br
+   HPL_warn( stderr, __LINE__, __FILE__,
+.br
+             "Demo.\en" );
+.br
+   exit(0); return(0);
+.br
+}
+.SH SEE ALSO
+.BR HPL_abort \ (3),
+.BR HPL_fprintf \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_xjumpm.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_xjumpm.3
new file mode 100644
index 000000000..df3e0a954
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_xjumpm.3
@@ -0,0 +1,77 @@
+.TH HPL_xjumpm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions"
+.SH NAME
+HPL_xjumpm \- Compute constants to jump in the random sequence.
+.SH SYNOPSIS
+\fB\&#include "hpl.h"\fR
+ 
+\fB\&void\fR
+\fB\&HPL_xjumpm(\fR
+\fB\&const int\fR
+\fI\&JUMPM\fR,
+\fB\&int *\fR
+\fI\&MULT\fR,
+\fB\&int *\fR
+\fI\&IADD\fR,
+\fB\&int *\fR
+\fI\&IRANN\fR,
+\fB\&int *\fR
+\fI\&IRANM\fR,
+\fB\&int *\fR
+\fI\&IAM\fR,
+\fB\&int *\fR
+\fI\&ICM\fR
+\fB\&);\fR
+.SH DESCRIPTION
+\fB\&HPL_xjumpm\fR
+computes  the constants  A and C  to jump JUMPM numbers in
+the random sequence: X(n+JUMPM) = A*X(n)+C.  The constants encoded in
+MULT and IADD  specify  how to jump from one entry in the sequence to
+the next.
+.SH ARGUMENTS
+.TP 8
+JUMPM   (local input)           const int
+On entry,  JUMPM  specifies  the  number  of entries  in  the
+sequence to jump over. When JUMPM is less or equal than zero,
+A and C are not computed, IRANM is set to IRANN corresponding
+to a jump of size zero.
+.TP 8
+MULT    (local input)           int *
+On entry, MULT is an array of dimension 2,  that contains the
+16-lower  and 15-higher bits of the constant  a  to jump from
+X(n) to X(n+1) = a*X(n) + c in the random sequence.
+.TP 8
+IADD    (local input)           int *
+On entry, IADD is an array of dimension 2,  that contains the
+16-lower  and 15-higher bits of the constant  c  to jump from
+X(n) to X(n+1) = a*X(n) + c in the random sequence.
+.TP 8
+IRANN   (local input)           int *
+On entry, IRANN is an array of dimension 2. that contains the
+16-lower and 15-higher bits of the encoding of X(n).
+.TP 8
+IRANM   (local output)          int *
+On entry,  IRANM  is an array of dimension 2.   On exit, this
+array  contains respectively  the 16-lower and 15-higher bits
+of the encoding of X(n+JUMPM).
+.TP 8
+IAM     (local output)          int *
+On entry, IAM is an array of dimension 2. On exit, when JUMPM
+is  greater  than  zero,  this  array  contains  the  encoded
+constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+sequence. IAM(0:1)  contains  respectively  the  16-lower and
+15-higher  bits  of this constant  A. When  JUMPM  is less or
+equal than zero, this array is not referenced.
+.TP 8
+ICM     (local output)          int *
+On entry, ICM is an array of dimension 2. On exit, when JUMPM
+is  greater  than  zero,  this  array  contains  the  encoded
+constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+sequence. ICM(0:1)  contains  respectively  the  16-lower and
+15-higher  bits  of this constant  C. When  JUMPM  is less or
+equal than zero, this array is not referenced.
+.SH SEE ALSO
+.BR HPL_ladd \ (3),
+.BR HPL_lmul \ (3),
+.BR HPL_setran \ (3),
+.BR HPL_jumpit \ (3),
+.BR HPL_rand \ (3).
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/missing b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/missing
new file mode 100755
index 000000000..625aeb118
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/missing
@@ -0,0 +1,215 @@
+#! /bin/sh
+# Common wrapper for a few potentially missing GNU programs.
+
+scriptversion=2018-03-07.03; # UTC
+
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+if test $# -eq 0; then
+  echo 1>&2 "Try '$0 --help' for more information"
+  exit 1
+fi
+
+case $1 in
+
+  --is-lightweight)
+    # Used by our autoconf macros to check whether the available missing
+    # script is modern enough.
+    exit 0
+    ;;
+
+  --run)
+    # Back-compat with the calling convention used by older automake.
+    shift
+    ;;
+
+  -h|--h|--he|--hel|--help)
+    echo "\
+$0 [OPTION]... PROGRAM [ARGUMENT]...
+
+Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due
+to PROGRAM being missing or too old.
+
+Options:
+  -h, --help      display this help and exit
+  -v, --version   output version information and exit
+
+Supported PROGRAM values:
+  aclocal   autoconf  autoheader   autom4te  automake  makeinfo
+  bison     yacc      flex         lex       help2man
+
+Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
+'g' are ignored when checking the name.
+
+Send bug reports to <bug-automake@gnu.org>."
+    exit $?
+    ;;
+
+  -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
+    echo "missing $scriptversion (GNU Automake)"
+    exit $?
+    ;;
+
+  -*)
+    echo 1>&2 "$0: unknown '$1' option"
+    echo 1>&2 "Try '$0 --help' for more information"
+    exit 1
+    ;;
+
+esac
+
+# Run the given program, remember its exit status.
+"$@"; st=$?
+
+# If it succeeded, we are done.
+test $st -eq 0 && exit 0
+
+# Also exit now if we it failed (or wasn't found), and '--version' was
+# passed; such an option is passed most likely to detect whether the
+# program is present and works.
+case $2 in --version|--help) exit $st;; esac
+
+# Exit code 63 means version mismatch.  This often happens when the user
+# tries to use an ancient version of a tool on a file that requires a
+# minimum version.
+if test $st -eq 63; then
+  msg="probably too old"
+elif test $st -eq 127; then
+  # Program was missing.
+  msg="missing on your system"
+else
+  # Program was found and executed, but failed.  Give up.
+  exit $st
+fi
+
+perl_URL=https://www.perl.org/
+flex_URL=https://github.com/westes/flex
+gnu_software_URL=https://www.gnu.org/software
+
+program_details ()
+{
+  case $1 in
+    aclocal|automake)
+      echo "The '$1' program is part of the GNU Automake package:"
+      echo "<$gnu_software_URL/automake>"
+      echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:"
+      echo "<$gnu_software_URL/autoconf>"
+      echo "<$gnu_software_URL/m4/>"
+      echo "<$perl_URL>"
+      ;;
+    autoconf|autom4te|autoheader)
+      echo "The '$1' program is part of the GNU Autoconf package:"
+      echo "<$gnu_software_URL/autoconf/>"
+      echo "It also requires GNU m4 and Perl in order to run:"
+      echo "<$gnu_software_URL/m4/>"
+      echo "<$perl_URL>"
+      ;;
+  esac
+}
+
+give_advice ()
+{
+  # Normalize program name to check for.
+  normalized_program=`echo "$1" | sed '
+    s/^gnu-//; t
+    s/^gnu//; t
+    s/^g//; t'`
+
+  printf '%s\n' "'$1' is $msg."
+
+  configure_deps="'configure.ac' or m4 files included by 'configure.ac'"
+  case $normalized_program in
+    autoconf*)
+      echo "You should only need it if you modified 'configure.ac',"
+      echo "or m4 files included by it."
+      program_details 'autoconf'
+      ;;
+    autoheader*)
+      echo "You should only need it if you modified 'acconfig.h' or"
+      echo "$configure_deps."
+      program_details 'autoheader'
+      ;;
+    automake*)
+      echo "You should only need it if you modified 'Makefile.am' or"
+      echo "$configure_deps."
+      program_details 'automake'
+      ;;
+    aclocal*)
+      echo "You should only need it if you modified 'acinclude.m4' or"
+      echo "$configure_deps."
+      program_details 'aclocal'
+      ;;
+   autom4te*)
+      echo "You might have modified some maintainer files that require"
+      echo "the 'autom4te' program to be rebuilt."
+      program_details 'autom4te'
+      ;;
+    bison*|yacc*)
+      echo "You should only need it if you modified a '.y' file."
+      echo "You may want to install the GNU Bison package:"
+      echo "<$gnu_software_URL/bison/>"
+      ;;
+    lex*|flex*)
+      echo "You should only need it if you modified a '.l' file."
+      echo "You may want to install the Fast Lexical Analyzer package:"
+      echo "<$flex_URL>"
+      ;;
+    help2man*)
+      echo "You should only need it if you modified a dependency" \
+           "of a man page."
+      echo "You may want to install the GNU Help2man package:"
+      echo "<$gnu_software_URL/help2man/>"
+    ;;
+    makeinfo*)
+      echo "You should only need it if you modified a '.texi' file, or"
+      echo "any other file indirectly affecting the aspect of the manual."
+      echo "You might want to install the Texinfo package:"
+      echo "<$gnu_software_URL/texinfo/>"
+      echo "The spurious makeinfo call might also be the consequence of"
+      echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might"
+      echo "want to install GNU make:"
+      echo "<$gnu_software_URL/make/>"
+      ;;
+    *)
+      echo "You might have modified some files without having the proper"
+      echo "tools for further handling them.  Check the 'README' file, it"
+      echo "often tells you about the needed prerequisites for installing"
+      echo "this package.  You may also peek at any GNU archive site, in"
+      echo "case some other package contains this missing '$1' program."
+      ;;
+  esac
+}
+
+give_advice "$1" | sed -e '1s/^/WARNING: /' \
+                       -e '2,$s/^/         /' >&2
+
+# Propagate the correct exit status (expected to be 127 for a program
+# not found, 63 for a program that failed due to version mismatch).
+exit $st
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS
new file mode 100644
index 000000000..056fd81ba
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = FreeBSD_PIV_CBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpich
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a $(MPdir)/lib/libpmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/share/ATLAS/lib/FreeBSD_P5SSE2
+LAinc        =
+LAlib        = $(LAdir)/libcblas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = /usr/bin/f77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = /usr/bin/ranlib
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.HPUX_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.HPUX_FBLAS
new file mode 100644
index 000000000..af3f5da5f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.HPUX_FBLAS
@@ -0,0 +1,179 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = HPUX
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - MPI directories - library ------------------------------------------
+# ----------------------------------------------------------------------
+# MPIinc tells the C compiler where to find the MPI header files, MPIlib
+# is defined to be the name of the MPI library to be used. The variables
+# MPIdir and MPIplat are only used for defining MPIinc and MPIlib).
+#
+MPIdir       = $(HOME)/local/mpi
+MPIplat      = $(MPIdir)/hpux/ch_p4
+#
+MPIinc       = -I$(MPIdir)/include -I$(MPIplat)/include
+MPIlib       = $(MPIplat)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - BLAS library -------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+BLASlib      = /usr/lib/pa1.1/libblas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate 
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. 
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a 
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form: 
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses 
+#                       Cray  fcd  (fortran  character  descriptor)  for 
+#                       interoperation. 
+#
+F2CDEFS      = -DNoChange -DF77_INTEGER=int -DStringSunStyle 
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(MPIinc)
+HPL_LIBS     = $(HPLlib) $(BLASlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS F77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(HPL_INCLUDES) $(F2CDEFS) $(HPL_OPTS)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -D_INCLUDE_POSIX_SOURCE -DUseTimes -Aa +O4
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = cc
+LINKFLAGS    = -Aa
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.I860_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.I860_FBLAS
new file mode 100644
index 000000000..984236be2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.I860_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = I860_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        = -lmpi
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lkmath
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS) -nx
+CCFLAGS      = $(HPL_DEFS) -O4 -nx
+#
+LINKER       = f77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.IRIX_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.IRIX_FBLAS
new file mode 100644
index 000000000..d78bcf09f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.IRIX_FBLAS
@@ -0,0 +1,181 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = IRIX_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = $(HOME)/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/IRIX64/ch_p4/include
+MPlib        = $(MPdir)/IRIX64/ch_p4/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lblas
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DStringSunStyle -DF77_INTEGER=int
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS) -64
+CCFLAGS      = $(HPL_DEFS) -O3 -64 -OPT:Olimit=15000 -TARG:platform=IP30 \
+               -LNO:blocking=OFF -LOPT:alias=typed
+#
+LINKER       = cc
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS
new file mode 100644
index 000000000..624306902
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_ATHLON_CBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - MPI directories - library ------------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_ATHLON
+LAinc        =
+LAlib        = $(LAdir)/libcblas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the Fortran 77 BLAS interface
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+LINKER       = /usr/bin/gcc
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS
new file mode 100644
index 000000000..07985f781
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_ATHLON_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be 
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be 
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_ATHLON
+LAinc        =
+LAlib        = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) 
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+LINKER       = /usr/bin/g77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL
new file mode 100644
index 000000000..ddf3fb4b6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_ATHLON_VSIPL
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - MPI directories - library ------------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = /home/software/TASP_VSIPL_Core_Plus
+LAinc        = -I$(LAdir)/include
+LAlib        = $(LAdir)/lib/libvsip_c.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the Fortran 77 BLAS interface
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_VSIPL
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+LINKER       = /usr/bin/gcc
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_Intel64 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_Intel64
new file mode 100644
index 000000000..47661c25d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_Intel64
@@ -0,0 +1,193 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -fs
+MKDIR        = mkdir -p
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_Intel64
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+# MPdir        = /opt/intel/mpi/4.1.0
+# MPinc        = -I$(MPdir)/include64
+# MPlib        = $(MPdir)/lib64/libmpi.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(MKLROOT)
+ifndef  LAinc
+LAinc        = $(LAdir)/mkl/include
+endif
+ifndef  LAlib
+LAlib        = -L$(LAdir)/mkl/lib/intel64 \
+               -Wl,--start-group \
+               $(LAdir)/lib/intel64/libmkl_intel_lp64.a \
+               $(LAdir)/lib/intel64/libmkl_intel_thread.a \
+               $(LAdir)/lib/intel64/libmkl_core.a \
+               -Wl,--end-group -lpthread -ldl
+endif
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) -I$(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC       = mpiicc
+CCNOOPT  = $(HPL_DEFS)
+OMP_DEFS = -openmp
+CCFLAGS  = $(HPL_DEFS) -O3 -w -ansi-alias -i-static -z noexecstack -z relro -z now -nocompchk -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = $(CC)
+LINKFLAGS    = $(CCFLAGS) $(OMP_DEFS) -mt_mpi
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_CBLAS
new file mode 100644
index 000000000..535a0e214
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_CBLAS
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_CBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_PII
+LAinc        =
+LAlib        = $(LAdir)/libcblas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = /usr/bin/g77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm
new file mode 100644
index 000000000..31fc9ea74
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_CBLAS_gm
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_PII
+LAinc        =
+LAlib        = $(LAdir)/libcblas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_CBLAS
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpicc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = mpif77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_FBLAS
new file mode 100644
index 000000000..5ed9aac12
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_FBLAS
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_PII
+LAinc        =
+LAlib        = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = /usr/bin/g77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm
new file mode 100644
index 000000000..a2416396c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_FBLAS_gm
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/netlib/ARCHIVES/Linux_PII
+LAinc        =
+LAlib        = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpicc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = mpif77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_VSIPL
new file mode 100644
index 000000000..0f690a1b3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_VSIPL
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_VSIPL
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = /home/software/TASP_VSIPL_Core_Plus
+LAinc        = -I$(LAdir)/include
+LAlib        = $(LAdir)/lib/libvsip_c.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_VSIPL
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/bin/gcc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = /usr/bin/g77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm
new file mode 100644
index 000000000..fee265e46
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Linux_PII_VSIPL_gm
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = /home/software/TASP_VSIPL_Core_Plus
+LAinc        = -I$(LAdir)/include
+LAlib        = $(LAdir)/lib/libvsip_c.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_VSIPL
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpicc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = mpif77
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.MacOSX_Accelerate b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.MacOSX_Accelerate
new file mode 100644
index 000000000..d1ce69b64
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.MacOSX_Accelerate
@@ -0,0 +1,183 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -fs
+MKDIR        = mkdir -p
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = MacOSX_Accelerate
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+# MPdir        = /opt/intel/mpi/4.1.0
+# MPinc        = -I$(MPdir)/include64
+# MPlib        = $(MPdir)/lib64/libmpi.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -framework Accelerate
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC       = mpicc-openmpi-mp
+CCNOOPT  = $(HPL_DEFS)
+CCFLAGS  = $(HPL_DEFS) -O3
+#
+# On some platforms,  it is necessary  to use the Fortran linker to find
+# the Fortran internals used in the BLAS library.
+#
+LINKER       = $(CC)
+LINKFLAGS    = $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = cr
+RANLIB       = ranlib
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWR2_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWR2_FBLAS
new file mode 100644
index 000000000..628f2c152
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWR2_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = PWR2_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lesslp2
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DNoChange -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpcc_r
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -O3 -qarch=pwr2 -qtune=pwr2 -qmaxmem=-1
+#
+LINKER       = mpxlf_r
+LINKFLAGS    = -bmaxdata:0x70000000 $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWR3_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWR3_FBLAS
new file mode 100644
index 000000000..bba468803
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWR3_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = PWR3_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lessl
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DNoChange -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = /usr/vac/bin/xlc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -qtune=pwr3 -qarch=pwr3 -O3 -qmaxmem=-1 -qfloat=hsflt
+#
+LINKER       = /usr/bin/xlf
+LINKFLAGS    = -bmaxdata:0x70000000 $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWRPC_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWRPC_FBLAS
new file mode 100644
index 000000000..2a0fb2ec6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWRPC_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = PWRPC_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include
+MPlib        = $(MPdir)/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lessl
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DNoChange -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = mpcc_r
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -O3 -qarch=ppc -qtune=604 -qmaxmem=-1
+#
+LINKER       = mpxlf_r
+LINKFLAGS    = -bmaxdata:0x70000000 $(CCFLAGS)
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS
new file mode 100644
index 000000000..1ade2d8aa
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = SUN4SOL2-g_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = $(HOME)/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include
+MPlib        = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -xlic_lib=sunperf
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -g
+#
+LINKER       = purify -best-effort f77
+LINKFLAGS    =
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL
new file mode 100644
index 000000000..1cbb371fd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = SUN4SOL2-g_VSIPL
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = $(HOME)/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include
+MPlib        = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = $(HOME)/local/TASP_VSIPL_Core_Plus
+LAinc        = -I$(LAdir)/include
+LAlib        = $(LAdir)/lib/libvsip_c.a
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      =
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     = -DHPL_CALL_VSIPL
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -g
+#
+LINKER       = purify -best-effort cc
+LINKFLAGS    =
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2_FBLAS
new file mode 100644
index 000000000..a1d5d6315
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = SUN4SOL2_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = $(HOME)/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include
+MPlib        = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -xlic_lib=sunperf
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -dalign -fsingle -xO5 -native -xarch=v8plusa 
+#
+LINKER       = f77
+LINKFLAGS    = -dalign -native -xarch=v8plusa -xO5
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.T3E_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.T3E_FBLAS
new file mode 100644
index 000000000..fe12cae9a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.T3E_FBLAS
@@ -0,0 +1,187 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = T3E_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        =
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        =
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DUpCase -DF77_INTEGER=long -DStringCrayStyle \
+               -DCRAY_BLAS -DHPL_USE_TIMES
+#
+# When UpCase is defined,  CRAY_BLAS redefines the BLAS routines used in
+# HPL to be prefixed with an S. In the Cray programming environment, the
+# default INTEGER and REAL size is 64 bits.  This  is  reflected  in the
+# Cray Scientific Library as well,  so SGEMM is the 64-bit matrix multi-
+# ply.
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -O3
+#
+LINKER       = f77
+LINKFLAGS    = -O3,unroll2,pipeline2
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = echo
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Tru64_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Tru64_FBLAS
new file mode 100644
index 000000000..3d8062061
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Tru64_FBLAS
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Tru64_FBLAS
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = /usr/local/mpi
+MPinc        = -I$(MPdir)/include -I$(MPdir)/alpha/ch_p4/include
+MPlib        = $(MPdir)/alpha/ch_p4/lib/libmpich.a
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lcxml
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -arch host -tune host -std -O5
+#
+LINKER       = f77
+LINKFLAGS    = -nofor_main -O5 -arch host -tune host
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = ranlib
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Tru64_FBLAS_elan b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Tru64_FBLAS_elan
new file mode 100644
index 000000000..f9550412c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Tru64_FBLAS_elan
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = /bin/sh
+#
+CD           = cd
+CP           = cp
+LN_S         = ln -s
+MKDIR        = mkdir
+RM           = /bin/rm -f
+TOUCH        = touch
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = Tru64_FBLAS_elan
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        =
+MPinc        =
+MPlib        = -lmpi -lelan
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        =
+LAinc        =
+LAlib        = -lcxml
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+#
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = cc
+CCNOOPT      = $(HPL_DEFS)
+CCFLAGS      = $(HPL_DEFS) -arch host -tune host -std -O5
+#
+LINKER       = f77
+LINKFLAGS    = -nofor_main -O5 -arch host -tune host
+#
+ARCHIVER     = ar
+ARFLAGS      = r
+RANLIB       = ranlib
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.UNKNOWN.in b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.UNKNOWN.in
new file mode 100644
index 000000000..8cbbd8242
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.UNKNOWN.in
@@ -0,0 +1,180 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+# ----------------------------------------------------------------------
+# - shell --------------------------------------------------------------
+# ----------------------------------------------------------------------
+#
+SHELL        = @SHELL@
+#
+CD           = @CD@
+CP           = @CP@
+LN_S         = @LN_S@
+MKDIR        = @MKDIR@
+RM           = @RM@
+TOUCH        = @TOUCH@
+#
+# ----------------------------------------------------------------------
+# - Platform identifier ------------------------------------------------
+# ----------------------------------------------------------------------
+#
+ARCH         = @ARCH@
+#
+# ----------------------------------------------------------------------
+# - HPL Directory Structure / HPL library ------------------------------
+# ----------------------------------------------------------------------
+#
+TOPdir       = $(HOME)/hpl
+INCdir       = $(TOPdir)/include
+BINdir       = $(TOPdir)/bin/$(ARCH)
+LIBdir       = $(TOPdir)/lib/$(ARCH)
+#
+HPLlib       = $(LIBdir)/libhpl.a 
+#
+# ----------------------------------------------------------------------
+# - Message Passing library (MPI) --------------------------------------
+# ----------------------------------------------------------------------
+# MPinc tells the  C  compiler where to find the Message Passing library
+# header files,  MPlib  is defined  to be the name of  the library to be 
+# used. The variable MPdir is only used for defining MPinc and MPlib.
+#
+MPdir        = @MPDIR@
+MPinc        = @MPINC@
+MPlib        = @MPLIB@
+#
+# ----------------------------------------------------------------------
+# - Linear Algebra library (BLAS or VSIPL) -----------------------------
+# ----------------------------------------------------------------------
+# LAinc tells the  C  compiler where to find the Linear Algebra  library
+# header files,  LAlib  is defined  to be the name of  the library to be 
+# used. The variable LAdir is only used for defining LAinc and LAlib.
+#
+LAdir        = @LADIR@
+LAinc        = @LAINC@
+LAlib        = @LALIB@
+#
+# ----------------------------------------------------------------------
+# - F77 / C interface --------------------------------------------------
+# ----------------------------------------------------------------------
+# You can skip this section  if and only if  you are not planning to use
+# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
+# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
+# options.  **One and only one**  option should be chosen in **each** of
+# the 3 following categories:
+#
+# 1) name space (How C calls a Fortran 77 routine)
+#
+# -DAdd_              : all lower case and a suffixed underscore  (Suns,
+#                       Intel, ...),                           [default]
+# -DNoChange          : all lower case (IBM RS6000),
+# -DUpCase            : all upper case (Cray),
+# -DAdd__             : the FORTRAN compiler in use is f2c.
+#
+# 2) C and Fortran 77 integer mapping
+#
+# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
+# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
+# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+#
+# 3) Fortran 77 string handling
+#
+# -DStringSunStyle    : The string address is passed at the string loca-
+#                       tion on the stack, and the string length is then
+#                       passed as  an  F77_INTEGER  after  all  explicit
+#                       stack arguments,                       [default]
+# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
+#                       Fortran 77  string,  and the structure is of the
+#                       form: struct {char *cp; F77_INTEGER len;},
+# -DStringStructVal   : A structure is passed by value for each  Fortran
+#                       77 string,  and  the  structure is  of the form:
+#                       struct {char *cp; F77_INTEGER len;},
+# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
+#                       Cray  fcd  (fortran  character  descriptor)  for
+#                       interoperation.
+#
+F2CDEFS      = @F2CDEFS@
+#
+# ----------------------------------------------------------------------
+# - HPL includes / libraries / specifics -------------------------------
+# ----------------------------------------------------------------------
+#
+HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
+HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+#
+# - Compile time options -----------------------------------------------
+#
+# -DHPL_COPY_L           force the copy of the panel L before bcast;
+# -DHPL_CALL_CBLAS       call the cblas interface;
+# -DHPL_CALL_VSIPL       call the vsip  library;
+# -DHPL_DETAILED_TIMING  enable detailed timers;
+#
+# By default HPL will:
+#    *) not copy L before broadcast,
+#    *) call the BLAS Fortran 77 interface,
+#    *) not display detailed timing information.
+#
+HPL_OPTS     =
+# 
+# ----------------------------------------------------------------------
+#
+HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) 
+#
+# ----------------------------------------------------------------------
+# - Compilers / linkers - Optimization flags ---------------------------
+# ----------------------------------------------------------------------
+#
+CC           = @CC@
+CCNOOPT      = $(HPL_DEFS) @CCNOOPT@
+CCFLAGS      = $(HPL_DEFS) @CCFLAGS@
+#
+LINKER       = @LINKER@
+LINKFLAGS    = @LINKFLAGS@
+#
+ARCHIVER     = @ARCHIVER@
+ARFLAGS      = @ARFLAGS@
+RANLIB       = @RANLIB@
+#
+# ----------------------------------------------------------------------
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/make_generic b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/make_generic
new file mode 100644
index 000000000..68cf74a3a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/make_generic
@@ -0,0 +1,83 @@
+#!/bin/sh
+#
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+#
+# Configure script to create Make.UNKNOWN from  Make.UNKNOWN.in  for the
+# HPL distribution, so users without a real Unix system can have a gene-
+# ric  Make.UNKNOWN  to  edit  for  their needs. This script substitutes
+# pathless version of all the system programs, and commonly used options
+# values into Make.UNKNOWN.in.
+#
+########################################################################
+#
+sed -e 's%@SHELL@%/bin/sh%' \
+    -e 's%@CD@%cd%' \
+    -e 's%@CP@%cp%' \
+    -e 's%@LN_S@%ln -s%' \
+    -e 's%@MKDIR@%mkdir%' \
+    -e 's%@RM@%/bin/rm -f%' \
+    -e 's%@TOUCH@%touch%' \
+    -e 's%@ARCH@%UNKNOWN%' \
+    -e 's%@CC@%mpicc%' \
+    -e 's%@CCNOOPT@%%' \
+    -e 's%@CCFLAGS@%%' \
+    -e 's%@LINKER@%mpif77%' \
+    -e 's%@LINKFLAGS@%%' \
+    -e 's%@ARCHIVER@%ar%' \
+    -e 's%@ARFLAGS@%r%' \
+    -e 's%@RANLIB@%echo%' \
+    -e 's%@MPDIR@%%' \
+    -e 's%@MPINC@%%' \
+    -e 's%@MPLIB@%%' \
+    -e 's%@F2CDEFS@%-DAdd_ -DF77_INTEGER=int -DStringSunStyle%' \
+    -e 's%@LADIR@%%' \
+    -e 's%@LAINC@%%' \
+    -e 's%@LALIB@%-lblas%' \
+    Make.UNKNOWN.in > Make.UNKNOWN
+#
+########################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/Makefile.am
new file mode 100644
index 000000000..2e6d3d454
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/Makefile.am
@@ -0,0 +1,42 @@
+AM_CPPFLAGS = -I$(top_srcdir)/../include
+
+lib_LIBRARIES = libhpl.a
+
+libhpl_a_SOURCES = \
+auxil/HPL_dlatcpy.c auxil/HPL_fprintf.c auxil/HPL_dlacpy.c auxil/HPL_dlamch.c \
+blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \
+blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \
+blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \
+comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \
+comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \
+comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \
+grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \
+grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \
+grid/HPL_min.c grid/HPL_all_reduce.c grid/HPL_barrier.c \
+panel/HPL_pdpanel_disp.c panel/HPL_pdpanel_free.c panel/HPL_pdpanel_init.c panel/HPL_pdpanel_new.c \
+pauxil/HPL_pdlamch.c pauxil/HPL_pdlange.c \
+pauxil/HPL_indxg2p.c pauxil/HPL_numroc.c pauxil/HPL_numrocI.c pauxil/HPL_numrocI.c \
+pauxil/HPL_dlaswp00N.c pauxil/HPL_dlaswp01N.c pauxil/HPL_dlaswp01T.c \
+pauxil/HPL_dlaswp02N.c pauxil/HPL_dlaswp03N.c pauxil/HPL_dlaswp03T.c \
+pauxil/HPL_dlaswp04N.c pauxil/HPL_dlaswp04T.c pauxil/HPL_dlaswp05N.c \
+pauxil/HPL_dlaswp05T.c pauxil/HPL_dlaswp06N.c pauxil/HPL_dlaswp06T.c \
+pauxil/HPL_infog2l.c pauxil/HPL_dlaswp10N.c pauxil/HPL_pwarn.c \
+pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \
+pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \
+pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c pfact/HPL_pdrpanrlT.c \
+pfact/HPL_pdmxswp.c pfact/HPL_pdfact.c pfact/HPL_dlocmax.c \
+pfact/HPL_pdpancrT.c pfact/HPL_pdpancrN.c pfact/HPL_dlocmax.c \
+pfact/HPL_dlocswpN.c pfact/HPL_dlocswpT.c pfact/HPL_pdmxswp.c \
+pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \
+pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \
+pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c \
+pfact/HPL_pdrpanrlT.c pauxil/HPL_pabort.c pauxil/HPL_pdlamch.c \
+pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c \
+pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c pgesv/HPL_pdupdateTT.c \
+pgesv/HPL_equil.c pgesv/HPL_pipid.c pgesv/HPL_plindx0.c \
+pgesv/HPL_plindx10.c pgesv/HPL_plindx1.c pgesv/HPL_plindx10.c \
+pgesv/HPL_rollN.c pgesv/HPL_rollT.c pgesv/HPL_spreadN.c pgesv/HPL_spreadT.c \
+pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesvK2.c \
+pgesv/HPL_pdlaswp00N.c pgesv/HPL_pdlaswp00T.c pgesv/HPL_pdlaswp01N.c pgesv/HPL_pdlaswp01T.c \
+pgesv/HPL_pdtrsv.c pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c \
+pgesv/HPL_pdupdateTT.c pgesv/HPL_logsort.c pgesv/HPL_perm.c
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/Makefile.in
new file mode 100644
index 000000000..139ecbad0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/Makefile.in
@@ -0,0 +1,1355 @@
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+subdir = src
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+	$(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/include/hplconfig.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(libdir)"
+LIBRARIES = $(lib_LIBRARIES)
+AR = ar
+ARFLAGS = cru
+AM_V_AR = $(am__v_AR_@AM_V@)
+am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@)
+am__v_AR_0 = @echo "  AR      " $@;
+am__v_AR_1 = 
+libhpl_a_AR = $(AR) $(ARFLAGS)
+libhpl_a_LIBADD =
+am__dirstamp = $(am__leading_dot)dirstamp
+am_libhpl_a_OBJECTS = auxil/HPL_dlatcpy.$(OBJEXT) \
+	auxil/HPL_fprintf.$(OBJEXT) auxil/HPL_dlacpy.$(OBJEXT) \
+	auxil/HPL_dlamch.$(OBJEXT) blas/HPL_dscal.$(OBJEXT) \
+	blas/HPL_dtrsm.$(OBJEXT) blas/HPL_dtrsv.$(OBJEXT) \
+	blas/HPL_idamax.$(OBJEXT) blas/HPL_dgemv.$(OBJEXT) \
+	blas/HPL_dscal.$(OBJEXT) blas/HPL_daxpy.$(OBJEXT) \
+	blas/HPL_dcopy.$(OBJEXT) blas/HPL_dgemm.$(OBJEXT) \
+	blas/HPL_dgemv.$(OBJEXT) blas/HPL_dger.$(OBJEXT) \
+	comm/HPL_sdrv.$(OBJEXT) comm/HPL_send.$(OBJEXT) \
+	comm/HPL_recv.$(OBJEXT) comm/HPL_bcast.$(OBJEXT) \
+	comm/HPL_binit.$(OBJEXT) comm/HPL_bwait.$(OBJEXT) \
+	comm/HPL_blong.$(OBJEXT) comm/HPL_1ring.$(OBJEXT) \
+	comm/HPL_1rinM.$(OBJEXT) comm/HPL_2rinM.$(OBJEXT) \
+	comm/HPL_2ring.$(OBJEXT) comm/HPL_blonM.$(OBJEXT) \
+	comm/HPL_packL.$(OBJEXT) grid/HPL_reduce.$(OBJEXT) \
+	grid/HPL_sum.$(OBJEXT) grid/HPL_grid_info.$(OBJEXT) \
+	grid/HPL_grid_init.$(OBJEXT) grid/HPL_all_reduce.$(OBJEXT) \
+	grid/HPL_broadcast.$(OBJEXT) grid/HPL_grid_exit.$(OBJEXT) \
+	grid/HPL_max.$(OBJEXT) grid/HPL_min.$(OBJEXT) \
+	grid/HPL_all_reduce.$(OBJEXT) grid/HPL_barrier.$(OBJEXT) \
+	panel/HPL_pdpanel_disp.$(OBJEXT) \
+	panel/HPL_pdpanel_free.$(OBJEXT) \
+	panel/HPL_pdpanel_init.$(OBJEXT) \
+	panel/HPL_pdpanel_new.$(OBJEXT) pauxil/HPL_pdlamch.$(OBJEXT) \
+	pauxil/HPL_pdlange.$(OBJEXT) pauxil/HPL_indxg2p.$(OBJEXT) \
+	pauxil/HPL_numroc.$(OBJEXT) pauxil/HPL_numrocI.$(OBJEXT) \
+	pauxil/HPL_numrocI.$(OBJEXT) pauxil/HPL_dlaswp00N.$(OBJEXT) \
+	pauxil/HPL_dlaswp01N.$(OBJEXT) pauxil/HPL_dlaswp01T.$(OBJEXT) \
+	pauxil/HPL_dlaswp02N.$(OBJEXT) pauxil/HPL_dlaswp03N.$(OBJEXT) \
+	pauxil/HPL_dlaswp03T.$(OBJEXT) pauxil/HPL_dlaswp04N.$(OBJEXT) \
+	pauxil/HPL_dlaswp04T.$(OBJEXT) pauxil/HPL_dlaswp05N.$(OBJEXT) \
+	pauxil/HPL_dlaswp05T.$(OBJEXT) pauxil/HPL_dlaswp06N.$(OBJEXT) \
+	pauxil/HPL_dlaswp06T.$(OBJEXT) pauxil/HPL_infog2l.$(OBJEXT) \
+	pauxil/HPL_dlaswp10N.$(OBJEXT) pauxil/HPL_pwarn.$(OBJEXT) \
+	pfact/HPL_pdpanllN.$(OBJEXT) pfact/HPL_pdpanllT.$(OBJEXT) \
+	pfact/HPL_pdpanrlN.$(OBJEXT) pfact/HPL_pdpanrlT.$(OBJEXT) \
+	pfact/HPL_pdrpancrN.$(OBJEXT) pfact/HPL_pdrpancrT.$(OBJEXT) \
+	pfact/HPL_pdrpanllN.$(OBJEXT) pfact/HPL_pdrpanllT.$(OBJEXT) \
+	pfact/HPL_pdrpanrlN.$(OBJEXT) pfact/HPL_pdrpanrlT.$(OBJEXT) \
+	pfact/HPL_pdmxswp.$(OBJEXT) pfact/HPL_pdfact.$(OBJEXT) \
+	pfact/HPL_dlocmax.$(OBJEXT) pfact/HPL_pdpancrT.$(OBJEXT) \
+	pfact/HPL_pdpancrN.$(OBJEXT) pfact/HPL_dlocmax.$(OBJEXT) \
+	pfact/HPL_dlocswpN.$(OBJEXT) pfact/HPL_dlocswpT.$(OBJEXT) \
+	pfact/HPL_pdmxswp.$(OBJEXT) pfact/HPL_pdpanllN.$(OBJEXT) \
+	pfact/HPL_pdpanllT.$(OBJEXT) pfact/HPL_pdpanrlN.$(OBJEXT) \
+	pfact/HPL_pdpanrlT.$(OBJEXT) pfact/HPL_pdrpancrN.$(OBJEXT) \
+	pfact/HPL_pdrpancrT.$(OBJEXT) pfact/HPL_pdrpanllN.$(OBJEXT) \
+	pfact/HPL_pdrpanllT.$(OBJEXT) pfact/HPL_pdrpanrlN.$(OBJEXT) \
+	pfact/HPL_pdrpanrlT.$(OBJEXT) pauxil/HPL_pabort.$(OBJEXT) \
+	pauxil/HPL_pdlamch.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \
+	pgesv/HPL_pdgesv.$(OBJEXT) pgesv/HPL_pdgesvK1.$(OBJEXT) \
+	pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdupdateNN.$(OBJEXT) \
+	pgesv/HPL_pdupdateNT.$(OBJEXT) pgesv/HPL_pdupdateTN.$(OBJEXT) \
+	pgesv/HPL_pdupdateTT.$(OBJEXT) pgesv/HPL_equil.$(OBJEXT) \
+	pgesv/HPL_pipid.$(OBJEXT) pgesv/HPL_plindx0.$(OBJEXT) \
+	pgesv/HPL_plindx10.$(OBJEXT) pgesv/HPL_plindx1.$(OBJEXT) \
+	pgesv/HPL_plindx10.$(OBJEXT) pgesv/HPL_rollN.$(OBJEXT) \
+	pgesv/HPL_rollT.$(OBJEXT) pgesv/HPL_spreadN.$(OBJEXT) \
+	pgesv/HPL_spreadT.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \
+	pgesv/HPL_pdgesv.$(OBJEXT) pgesv/HPL_pdgesvK1.$(OBJEXT) \
+	pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \
+	pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdlaswp00N.$(OBJEXT) \
+	pgesv/HPL_pdlaswp00T.$(OBJEXT) pgesv/HPL_pdlaswp01N.$(OBJEXT) \
+	pgesv/HPL_pdlaswp01T.$(OBJEXT) pgesv/HPL_pdtrsv.$(OBJEXT) \
+	pgesv/HPL_pdupdateNN.$(OBJEXT) pgesv/HPL_pdupdateNT.$(OBJEXT) \
+	pgesv/HPL_pdupdateTN.$(OBJEXT) pgesv/HPL_pdupdateTT.$(OBJEXT) \
+	pgesv/HPL_logsort.$(OBJEXT) pgesv/HPL_perm.$(OBJEXT)
+libhpl_a_OBJECTS = $(am_libhpl_a_OBJECTS)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__maybe_remake_depfiles = depfiles
+am__depfiles_remade = auxil/$(DEPDIR)/HPL_dlacpy.Po \
+	auxil/$(DEPDIR)/HPL_dlamch.Po auxil/$(DEPDIR)/HPL_dlatcpy.Po \
+	auxil/$(DEPDIR)/HPL_fprintf.Po blas/$(DEPDIR)/HPL_daxpy.Po \
+	blas/$(DEPDIR)/HPL_dcopy.Po blas/$(DEPDIR)/HPL_dgemm.Po \
+	blas/$(DEPDIR)/HPL_dgemv.Po blas/$(DEPDIR)/HPL_dger.Po \
+	blas/$(DEPDIR)/HPL_dscal.Po blas/$(DEPDIR)/HPL_dtrsm.Po \
+	blas/$(DEPDIR)/HPL_dtrsv.Po blas/$(DEPDIR)/HPL_idamax.Po \
+	comm/$(DEPDIR)/HPL_1rinM.Po comm/$(DEPDIR)/HPL_1ring.Po \
+	comm/$(DEPDIR)/HPL_2rinM.Po comm/$(DEPDIR)/HPL_2ring.Po \
+	comm/$(DEPDIR)/HPL_bcast.Po comm/$(DEPDIR)/HPL_binit.Po \
+	comm/$(DEPDIR)/HPL_blonM.Po comm/$(DEPDIR)/HPL_blong.Po \
+	comm/$(DEPDIR)/HPL_bwait.Po comm/$(DEPDIR)/HPL_packL.Po \
+	comm/$(DEPDIR)/HPL_recv.Po comm/$(DEPDIR)/HPL_sdrv.Po \
+	comm/$(DEPDIR)/HPL_send.Po grid/$(DEPDIR)/HPL_all_reduce.Po \
+	grid/$(DEPDIR)/HPL_barrier.Po grid/$(DEPDIR)/HPL_broadcast.Po \
+	grid/$(DEPDIR)/HPL_grid_exit.Po \
+	grid/$(DEPDIR)/HPL_grid_info.Po \
+	grid/$(DEPDIR)/HPL_grid_init.Po grid/$(DEPDIR)/HPL_max.Po \
+	grid/$(DEPDIR)/HPL_min.Po grid/$(DEPDIR)/HPL_reduce.Po \
+	grid/$(DEPDIR)/HPL_sum.Po panel/$(DEPDIR)/HPL_pdpanel_disp.Po \
+	panel/$(DEPDIR)/HPL_pdpanel_free.Po \
+	panel/$(DEPDIR)/HPL_pdpanel_init.Po \
+	panel/$(DEPDIR)/HPL_pdpanel_new.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp00N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp01N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp01T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp02N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp03N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp03T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp04N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp04T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp05N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp05T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp06N.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp06T.Po \
+	pauxil/$(DEPDIR)/HPL_dlaswp10N.Po \
+	pauxil/$(DEPDIR)/HPL_indxg2p.Po \
+	pauxil/$(DEPDIR)/HPL_infog2l.Po pauxil/$(DEPDIR)/HPL_numroc.Po \
+	pauxil/$(DEPDIR)/HPL_numrocI.Po pauxil/$(DEPDIR)/HPL_pabort.Po \
+	pauxil/$(DEPDIR)/HPL_pdlamch.Po \
+	pauxil/$(DEPDIR)/HPL_pdlange.Po pauxil/$(DEPDIR)/HPL_pwarn.Po \
+	pfact/$(DEPDIR)/HPL_dlocmax.Po pfact/$(DEPDIR)/HPL_dlocswpN.Po \
+	pfact/$(DEPDIR)/HPL_dlocswpT.Po pfact/$(DEPDIR)/HPL_pdfact.Po \
+	pfact/$(DEPDIR)/HPL_pdmxswp.Po pfact/$(DEPDIR)/HPL_pdpancrN.Po \
+	pfact/$(DEPDIR)/HPL_pdpancrT.Po \
+	pfact/$(DEPDIR)/HPL_pdpanllN.Po \
+	pfact/$(DEPDIR)/HPL_pdpanllT.Po \
+	pfact/$(DEPDIR)/HPL_pdpanrlN.Po \
+	pfact/$(DEPDIR)/HPL_pdpanrlT.Po \
+	pfact/$(DEPDIR)/HPL_pdrpancrN.Po \
+	pfact/$(DEPDIR)/HPL_pdrpancrT.Po \
+	pfact/$(DEPDIR)/HPL_pdrpanllN.Po \
+	pfact/$(DEPDIR)/HPL_pdrpanllT.Po \
+	pfact/$(DEPDIR)/HPL_pdrpanrlN.Po \
+	pfact/$(DEPDIR)/HPL_pdrpanrlT.Po pgesv/$(DEPDIR)/HPL_equil.Po \
+	pgesv/$(DEPDIR)/HPL_logsort.Po pgesv/$(DEPDIR)/HPL_pdgesv.Po \
+	pgesv/$(DEPDIR)/HPL_pdgesv0.Po pgesv/$(DEPDIR)/HPL_pdgesvK1.Po \
+	pgesv/$(DEPDIR)/HPL_pdgesvK2.Po \
+	pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po \
+	pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po \
+	pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po \
+	pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po \
+	pgesv/$(DEPDIR)/HPL_pdtrsv.Po \
+	pgesv/$(DEPDIR)/HPL_pdupdateNN.Po \
+	pgesv/$(DEPDIR)/HPL_pdupdateNT.Po \
+	pgesv/$(DEPDIR)/HPL_pdupdateTN.Po \
+	pgesv/$(DEPDIR)/HPL_pdupdateTT.Po pgesv/$(DEPDIR)/HPL_perm.Po \
+	pgesv/$(DEPDIR)/HPL_pipid.Po pgesv/$(DEPDIR)/HPL_plindx0.Po \
+	pgesv/$(DEPDIR)/HPL_plindx1.Po pgesv/$(DEPDIR)/HPL_plindx10.Po \
+	pgesv/$(DEPDIR)/HPL_rollN.Po pgesv/$(DEPDIR)/HPL_rollT.Po \
+	pgesv/$(DEPDIR)/HPL_spreadN.Po pgesv/$(DEPDIR)/HPL_spreadT.Po
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(libhpl_a_SOURCES)
+DIST_SOURCES = $(libhpl_a_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BLAS_LIBS = @BLAS_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build_alias = @build_alias@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host_alias = @host_alias@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(top_srcdir)/../include
+lib_LIBRARIES = libhpl.a
+libhpl_a_SOURCES = \
+auxil/HPL_dlatcpy.c auxil/HPL_fprintf.c auxil/HPL_dlacpy.c auxil/HPL_dlamch.c \
+blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \
+blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \
+blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \
+comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \
+comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \
+comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \
+grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \
+grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \
+grid/HPL_min.c grid/HPL_all_reduce.c grid/HPL_barrier.c \
+panel/HPL_pdpanel_disp.c panel/HPL_pdpanel_free.c panel/HPL_pdpanel_init.c panel/HPL_pdpanel_new.c \
+pauxil/HPL_pdlamch.c pauxil/HPL_pdlange.c \
+pauxil/HPL_indxg2p.c pauxil/HPL_numroc.c pauxil/HPL_numrocI.c pauxil/HPL_numrocI.c \
+pauxil/HPL_dlaswp00N.c pauxil/HPL_dlaswp01N.c pauxil/HPL_dlaswp01T.c \
+pauxil/HPL_dlaswp02N.c pauxil/HPL_dlaswp03N.c pauxil/HPL_dlaswp03T.c \
+pauxil/HPL_dlaswp04N.c pauxil/HPL_dlaswp04T.c pauxil/HPL_dlaswp05N.c \
+pauxil/HPL_dlaswp05T.c pauxil/HPL_dlaswp06N.c pauxil/HPL_dlaswp06T.c \
+pauxil/HPL_infog2l.c pauxil/HPL_dlaswp10N.c pauxil/HPL_pwarn.c \
+pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \
+pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \
+pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c pfact/HPL_pdrpanrlT.c \
+pfact/HPL_pdmxswp.c pfact/HPL_pdfact.c pfact/HPL_dlocmax.c \
+pfact/HPL_pdpancrT.c pfact/HPL_pdpancrN.c pfact/HPL_dlocmax.c \
+pfact/HPL_dlocswpN.c pfact/HPL_dlocswpT.c pfact/HPL_pdmxswp.c \
+pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \
+pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \
+pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c \
+pfact/HPL_pdrpanrlT.c pauxil/HPL_pabort.c pauxil/HPL_pdlamch.c \
+pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c \
+pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c pgesv/HPL_pdupdateTT.c \
+pgesv/HPL_equil.c pgesv/HPL_pipid.c pgesv/HPL_plindx0.c \
+pgesv/HPL_plindx10.c pgesv/HPL_plindx1.c pgesv/HPL_plindx10.c \
+pgesv/HPL_rollN.c pgesv/HPL_rollT.c pgesv/HPL_spreadN.c pgesv/HPL_spreadT.c \
+pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesvK2.c \
+pgesv/HPL_pdlaswp00N.c pgesv/HPL_pdlaswp00T.c pgesv/HPL_pdlaswp01N.c pgesv/HPL_pdlaswp01T.c \
+pgesv/HPL_pdtrsv.c pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c \
+pgesv/HPL_pdupdateTT.c pgesv/HPL_logsort.c pgesv/HPL_perm.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu src/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-libLIBRARIES: $(lib_LIBRARIES)
+	@$(NORMAL_INSTALL)
+	@list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \
+	list2=; for p in $$list; do \
+	  if test -f $$p; then \
+	    list2="$$list2 $$p"; \
+	  else :; fi; \
+	done; \
+	test -z "$$list2" || { \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
+	  echo " $(INSTALL_DATA) $$list2 '$(DESTDIR)$(libdir)'"; \
+	  $(INSTALL_DATA) $$list2 "$(DESTDIR)$(libdir)" || exit $$?; }
+	@$(POST_INSTALL)
+	@list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \
+	for p in $$list; do \
+	  if test -f $$p; then \
+	    $(am__strip_dir) \
+	    echo " ( cd '$(DESTDIR)$(libdir)' && $(RANLIB) $$f )"; \
+	    ( cd "$(DESTDIR)$(libdir)" && $(RANLIB) $$f ) || exit $$?; \
+	  else :; fi; \
+	done
+
+uninstall-libLIBRARIES:
+	@$(NORMAL_UNINSTALL)
+	@list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \
+	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+	dir='$(DESTDIR)$(libdir)'; $(am__uninstall_files_from_dir)
+
+clean-libLIBRARIES:
+	-test -z "$(lib_LIBRARIES)" || rm -f $(lib_LIBRARIES)
+auxil/$(am__dirstamp):
+	@$(MKDIR_P) auxil
+	@: > auxil/$(am__dirstamp)
+auxil/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) auxil/$(DEPDIR)
+	@: > auxil/$(DEPDIR)/$(am__dirstamp)
+auxil/HPL_dlatcpy.$(OBJEXT): auxil/$(am__dirstamp) \
+	auxil/$(DEPDIR)/$(am__dirstamp)
+auxil/HPL_fprintf.$(OBJEXT): auxil/$(am__dirstamp) \
+	auxil/$(DEPDIR)/$(am__dirstamp)
+auxil/HPL_dlacpy.$(OBJEXT): auxil/$(am__dirstamp) \
+	auxil/$(DEPDIR)/$(am__dirstamp)
+auxil/HPL_dlamch.$(OBJEXT): auxil/$(am__dirstamp) \
+	auxil/$(DEPDIR)/$(am__dirstamp)
+blas/$(am__dirstamp):
+	@$(MKDIR_P) blas
+	@: > blas/$(am__dirstamp)
+blas/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) blas/$(DEPDIR)
+	@: > blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dscal.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dtrsm.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dtrsv.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_idamax.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dgemv.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_daxpy.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dcopy.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dgemm.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+blas/HPL_dger.$(OBJEXT): blas/$(am__dirstamp) \
+	blas/$(DEPDIR)/$(am__dirstamp)
+comm/$(am__dirstamp):
+	@$(MKDIR_P) comm
+	@: > comm/$(am__dirstamp)
+comm/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) comm/$(DEPDIR)
+	@: > comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_sdrv.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_send.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_recv.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_bcast.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_binit.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_bwait.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_blong.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_1ring.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_1rinM.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_2rinM.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_2ring.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_blonM.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+comm/HPL_packL.$(OBJEXT): comm/$(am__dirstamp) \
+	comm/$(DEPDIR)/$(am__dirstamp)
+grid/$(am__dirstamp):
+	@$(MKDIR_P) grid
+	@: > grid/$(am__dirstamp)
+grid/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) grid/$(DEPDIR)
+	@: > grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_reduce.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_sum.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_grid_info.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_grid_init.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_all_reduce.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_broadcast.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_grid_exit.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_max.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_min.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+grid/HPL_barrier.$(OBJEXT): grid/$(am__dirstamp) \
+	grid/$(DEPDIR)/$(am__dirstamp)
+panel/$(am__dirstamp):
+	@$(MKDIR_P) panel
+	@: > panel/$(am__dirstamp)
+panel/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) panel/$(DEPDIR)
+	@: > panel/$(DEPDIR)/$(am__dirstamp)
+panel/HPL_pdpanel_disp.$(OBJEXT): panel/$(am__dirstamp) \
+	panel/$(DEPDIR)/$(am__dirstamp)
+panel/HPL_pdpanel_free.$(OBJEXT): panel/$(am__dirstamp) \
+	panel/$(DEPDIR)/$(am__dirstamp)
+panel/HPL_pdpanel_init.$(OBJEXT): panel/$(am__dirstamp) \
+	panel/$(DEPDIR)/$(am__dirstamp)
+panel/HPL_pdpanel_new.$(OBJEXT): panel/$(am__dirstamp) \
+	panel/$(DEPDIR)/$(am__dirstamp)
+pauxil/$(am__dirstamp):
+	@$(MKDIR_P) pauxil
+	@: > pauxil/$(am__dirstamp)
+pauxil/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) pauxil/$(DEPDIR)
+	@: > pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_pdlamch.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_pdlange.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_indxg2p.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_numroc.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_numrocI.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp00N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp01N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp01T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp02N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp03N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp03T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp04N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp04T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp05N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp05T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp06N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp06T.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_infog2l.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_dlaswp10N.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_pwarn.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pfact/$(am__dirstamp):
+	@$(MKDIR_P) pfact
+	@: > pfact/$(am__dirstamp)
+pfact/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) pfact/$(DEPDIR)
+	@: > pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpanllN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpanllT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpanrlN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpanrlT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpancrN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpancrT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpanllN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpanllT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpanrlN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdrpanrlT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdmxswp.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdfact.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_dlocmax.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpancrT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_pdpancrN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_dlocswpN.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pfact/HPL_dlocswpT.$(OBJEXT): pfact/$(am__dirstamp) \
+	pfact/$(DEPDIR)/$(am__dirstamp)
+pauxil/HPL_pabort.$(OBJEXT): pauxil/$(am__dirstamp) \
+	pauxil/$(DEPDIR)/$(am__dirstamp)
+pgesv/$(am__dirstamp):
+	@$(MKDIR_P) pgesv
+	@: > pgesv/$(am__dirstamp)
+pgesv/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) pgesv/$(DEPDIR)
+	@: > pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdgesv0.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdgesv.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdgesvK1.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdgesvK2.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdupdateNN.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdupdateNT.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdupdateTN.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdupdateTT.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_equil.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pipid.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_plindx0.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_plindx10.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_plindx1.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_rollN.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_rollT.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_spreadN.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_spreadT.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdlaswp00N.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdlaswp00T.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdlaswp01N.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdlaswp01T.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_pdtrsv.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_logsort.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+pgesv/HPL_perm.$(OBJEXT): pgesv/$(am__dirstamp) \
+	pgesv/$(DEPDIR)/$(am__dirstamp)
+
+libhpl.a: $(libhpl_a_OBJECTS) $(libhpl_a_DEPENDENCIES) $(EXTRA_libhpl_a_DEPENDENCIES) 
+	$(AM_V_at)-rm -f libhpl.a
+	$(AM_V_AR)$(libhpl_a_AR) libhpl.a $(libhpl_a_OBJECTS) $(libhpl_a_LIBADD)
+	$(AM_V_at)$(RANLIB) libhpl.a
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f auxil/*.$(OBJEXT)
+	-rm -f blas/*.$(OBJEXT)
+	-rm -f comm/*.$(OBJEXT)
+	-rm -f grid/*.$(OBJEXT)
+	-rm -f panel/*.$(OBJEXT)
+	-rm -f pauxil/*.$(OBJEXT)
+	-rm -f pfact/*.$(OBJEXT)
+	-rm -f pgesv/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlacpy.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlamch.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlatcpy.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_fprintf.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_daxpy.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dcopy.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dgemm.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dgemv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dger.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dscal.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dtrsm.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dtrsv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_idamax.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_1rinM.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_1ring.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_2rinM.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_2ring.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bcast.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_binit.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blonM.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blong.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bwait.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_packL.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_recv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_sdrv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_send.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_all_reduce.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_barrier.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_broadcast.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_exit.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_info.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_init.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_max.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_min.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_reduce.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_sum.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_disp.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_free.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_init.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_new.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp00N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp01N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp01T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp02N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp03N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp03T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp04N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp04T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp05N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp05T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp06N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp06T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp10N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_indxg2p.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_infog2l.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_numroc.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_numrocI.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pabort.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pdlamch.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pdlange.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pwarn.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocmax.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocswpN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocswpT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdfact.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdmxswp.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpancrN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpancrT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanllN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanllT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanrlN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanrlT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpancrN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpancrT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanllN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanllT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanrlN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanrlT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_equil.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_logsort.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesv0.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesvK1.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesvK2.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdtrsv.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateNN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateNT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateTN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateTT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_perm.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pipid.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx0.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx1.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx10.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_rollN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_rollT.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_spreadN.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_spreadT.Po@am__quote@ # am--include-marker
+
+$(am__depfiles_remade):
+	@$(MKDIR_P) $(@D)
+	@echo '# dummy' >$@-t && $(am__mv) $@-t $@
+
+am--depfiles: $(am__depfiles_remade)
+
+.c.o:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(BUILT_SOURCES)
+	$(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LIBRARIES)
+installdirs:
+	for dir in "$(DESTDIR)$(libdir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f auxil/$(DEPDIR)/$(am__dirstamp)
+	-rm -f auxil/$(am__dirstamp)
+	-rm -f blas/$(DEPDIR)/$(am__dirstamp)
+	-rm -f blas/$(am__dirstamp)
+	-rm -f comm/$(DEPDIR)/$(am__dirstamp)
+	-rm -f comm/$(am__dirstamp)
+	-rm -f grid/$(DEPDIR)/$(am__dirstamp)
+	-rm -f grid/$(am__dirstamp)
+	-rm -f panel/$(DEPDIR)/$(am__dirstamp)
+	-rm -f panel/$(am__dirstamp)
+	-rm -f pauxil/$(DEPDIR)/$(am__dirstamp)
+	-rm -f pauxil/$(am__dirstamp)
+	-rm -f pfact/$(DEPDIR)/$(am__dirstamp)
+	-rm -f pfact/$(am__dirstamp)
+	-rm -f pgesv/$(DEPDIR)/$(am__dirstamp)
+	-rm -f pgesv/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLIBRARIES mostlyclean-am
+
+distclean: distclean-am
+		-rm -f auxil/$(DEPDIR)/HPL_dlacpy.Po
+	-rm -f auxil/$(DEPDIR)/HPL_dlamch.Po
+	-rm -f auxil/$(DEPDIR)/HPL_dlatcpy.Po
+	-rm -f auxil/$(DEPDIR)/HPL_fprintf.Po
+	-rm -f blas/$(DEPDIR)/HPL_daxpy.Po
+	-rm -f blas/$(DEPDIR)/HPL_dcopy.Po
+	-rm -f blas/$(DEPDIR)/HPL_dgemm.Po
+	-rm -f blas/$(DEPDIR)/HPL_dgemv.Po
+	-rm -f blas/$(DEPDIR)/HPL_dger.Po
+	-rm -f blas/$(DEPDIR)/HPL_dscal.Po
+	-rm -f blas/$(DEPDIR)/HPL_dtrsm.Po
+	-rm -f blas/$(DEPDIR)/HPL_dtrsv.Po
+	-rm -f blas/$(DEPDIR)/HPL_idamax.Po
+	-rm -f comm/$(DEPDIR)/HPL_1rinM.Po
+	-rm -f comm/$(DEPDIR)/HPL_1ring.Po
+	-rm -f comm/$(DEPDIR)/HPL_2rinM.Po
+	-rm -f comm/$(DEPDIR)/HPL_2ring.Po
+	-rm -f comm/$(DEPDIR)/HPL_bcast.Po
+	-rm -f comm/$(DEPDIR)/HPL_binit.Po
+	-rm -f comm/$(DEPDIR)/HPL_blonM.Po
+	-rm -f comm/$(DEPDIR)/HPL_blong.Po
+	-rm -f comm/$(DEPDIR)/HPL_bwait.Po
+	-rm -f comm/$(DEPDIR)/HPL_packL.Po
+	-rm -f comm/$(DEPDIR)/HPL_recv.Po
+	-rm -f comm/$(DEPDIR)/HPL_sdrv.Po
+	-rm -f comm/$(DEPDIR)/HPL_send.Po
+	-rm -f grid/$(DEPDIR)/HPL_all_reduce.Po
+	-rm -f grid/$(DEPDIR)/HPL_barrier.Po
+	-rm -f grid/$(DEPDIR)/HPL_broadcast.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_exit.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_info.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_init.Po
+	-rm -f grid/$(DEPDIR)/HPL_max.Po
+	-rm -f grid/$(DEPDIR)/HPL_min.Po
+	-rm -f grid/$(DEPDIR)/HPL_reduce.Po
+	-rm -f grid/$(DEPDIR)/HPL_sum.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_disp.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_free.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_init.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_new.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp00N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp01N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp01T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp02N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp03N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp03T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp04N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp04T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp05N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp05T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp06N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp06T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp10N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_indxg2p.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_infog2l.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_numroc.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_numrocI.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pabort.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pdlamch.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pdlange.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pwarn.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocmax.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocswpN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocswpT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdfact.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdmxswp.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpancrN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpancrT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanllN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanllT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanrlN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanrlT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpancrN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpancrT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanllN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanllT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanrlN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanrlT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_equil.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_logsort.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesv.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesv0.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesvK1.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesvK2.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdtrsv.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateNN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateNT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateTN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateTT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_perm.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pipid.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx0.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx1.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx10.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_rollN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_rollT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_spreadN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_spreadT.Po
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-libLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+		-rm -f auxil/$(DEPDIR)/HPL_dlacpy.Po
+	-rm -f auxil/$(DEPDIR)/HPL_dlamch.Po
+	-rm -f auxil/$(DEPDIR)/HPL_dlatcpy.Po
+	-rm -f auxil/$(DEPDIR)/HPL_fprintf.Po
+	-rm -f blas/$(DEPDIR)/HPL_daxpy.Po
+	-rm -f blas/$(DEPDIR)/HPL_dcopy.Po
+	-rm -f blas/$(DEPDIR)/HPL_dgemm.Po
+	-rm -f blas/$(DEPDIR)/HPL_dgemv.Po
+	-rm -f blas/$(DEPDIR)/HPL_dger.Po
+	-rm -f blas/$(DEPDIR)/HPL_dscal.Po
+	-rm -f blas/$(DEPDIR)/HPL_dtrsm.Po
+	-rm -f blas/$(DEPDIR)/HPL_dtrsv.Po
+	-rm -f blas/$(DEPDIR)/HPL_idamax.Po
+	-rm -f comm/$(DEPDIR)/HPL_1rinM.Po
+	-rm -f comm/$(DEPDIR)/HPL_1ring.Po
+	-rm -f comm/$(DEPDIR)/HPL_2rinM.Po
+	-rm -f comm/$(DEPDIR)/HPL_2ring.Po
+	-rm -f comm/$(DEPDIR)/HPL_bcast.Po
+	-rm -f comm/$(DEPDIR)/HPL_binit.Po
+	-rm -f comm/$(DEPDIR)/HPL_blonM.Po
+	-rm -f comm/$(DEPDIR)/HPL_blong.Po
+	-rm -f comm/$(DEPDIR)/HPL_bwait.Po
+	-rm -f comm/$(DEPDIR)/HPL_packL.Po
+	-rm -f comm/$(DEPDIR)/HPL_recv.Po
+	-rm -f comm/$(DEPDIR)/HPL_sdrv.Po
+	-rm -f comm/$(DEPDIR)/HPL_send.Po
+	-rm -f grid/$(DEPDIR)/HPL_all_reduce.Po
+	-rm -f grid/$(DEPDIR)/HPL_barrier.Po
+	-rm -f grid/$(DEPDIR)/HPL_broadcast.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_exit.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_info.Po
+	-rm -f grid/$(DEPDIR)/HPL_grid_init.Po
+	-rm -f grid/$(DEPDIR)/HPL_max.Po
+	-rm -f grid/$(DEPDIR)/HPL_min.Po
+	-rm -f grid/$(DEPDIR)/HPL_reduce.Po
+	-rm -f grid/$(DEPDIR)/HPL_sum.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_disp.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_free.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_init.Po
+	-rm -f panel/$(DEPDIR)/HPL_pdpanel_new.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp00N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp01N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp01T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp02N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp03N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp03T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp04N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp04T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp05N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp05T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp06N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp06T.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_dlaswp10N.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_indxg2p.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_infog2l.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_numroc.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_numrocI.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pabort.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pdlamch.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pdlange.Po
+	-rm -f pauxil/$(DEPDIR)/HPL_pwarn.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocmax.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocswpN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_dlocswpT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdfact.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdmxswp.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpancrN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpancrT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanllN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanllT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanrlN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdpanrlT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpancrN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpancrT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanllN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanllT.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanrlN.Po
+	-rm -f pfact/$(DEPDIR)/HPL_pdrpanrlT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_equil.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_logsort.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesv.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesv0.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesvK1.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdgesvK2.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdtrsv.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateNN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateNT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateTN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pdupdateTT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_perm.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_pipid.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx0.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx1.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_plindx10.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_rollN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_rollT.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_spreadN.Po
+	-rm -f pgesv/$(DEPDIR)/HPL_spreadT.Po
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
+	clean-generic clean-libLIBRARIES cscopelist-am ctags ctags-am \
+	distclean distclean-compile distclean-generic distclean-tags \
+	distdir dvi dvi-am html html-am info info-am install \
+	install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am \
+	install-libLIBRARIES install-man install-pdf install-pdf-am \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \
+	uninstall-am uninstall-libLIBRARIES
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_abort.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_abort.c
new file mode 100644
index 000000000..bf0c5e727
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_abort.c
@@ -0,0 +1,129 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_abort
+(
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_abort( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_abort displays an error message on stderr and halts execution.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   LINE   = va_arg( argptr, int      );
+   SRNAME = va_arg( argptr, char *   );
+   FORM   = va_arg( argptr, char *   );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( stderr, "%s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR in function", SRNAME, cline );
+   else
+      HPL_fprintf( stderr, "%s %d %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR on line", LINE, "of function", SRNAME, cline );
+   exit( 0 );
+/*
+ * End of HPL_abort
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlacpy.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlacpy.c
new file mode 100644
index 000000000..ec71180eb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlacpy.c
@@ -0,0 +1,343 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factors
+ * #ifndef HPL_LACPY_M_DEPTH
+ * #define    HPL_LACPY_M_DEPTH       32
+ * #define    HPL_LACPY_LOG2_M_DEPTH   5
+ * #endif
+ * #ifndef HPL_LACPY_N_DEPTH
+ * #define    HPL_LACPY_N_DEPTH        4
+ * #define    HPL_LACPY_LOG2_N_DEPTH   2
+ * #endif
+ */
+#ifndef HPL_LACPY_M_DEPTH
+#define    HPL_LACPY_M_DEPTH        4
+#define    HPL_LACPY_LOG2_M_DEPTH   2
+#endif
+#ifndef HPL_LACPY_N_DEPTH
+#define    HPL_LACPY_N_DEPTH        2
+#define    HPL_LACPY_LOG2_N_DEPTH   1
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlacpy
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dlacpy
+( M, N, A, LDA, B, LDB )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlacpy copies an array A into an array B.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the number of rows of the arrays A and
+ *         B. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies  the number of columns of the arrays A
+ *         and B. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,N).
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * B       (local output)                double *
+ *         On entry, B points to an array of dimension (LDB,N). On exit,
+ *         B is overwritten with A.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB specifies the leading dimension of the array B.
+ *         LDB must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_LACPY_USE_COPY
+   register int               j;
+#else
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+   const double               * A0 = A;
+   double                     * B0 = B;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+   const double               * A0 = A,              * A1 = A +     LDA;
+   double                     * B0 = B,              * B1 = B +     LDB;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+   const double               * A0 = A,              * A1 = A +     LDA,
+                              * A2 = A + (LDA << 1), * A3 = A + 3 * LDA;
+   double                     * B0 = B,              * B1 = B +     LDB,
+                              * B2 = B + (LDB << 1), * B3 = B + 3 * LDB;
+#endif
+   const int                  incA = ( (unsigned int)(LDA) <<
+                                       HPL_LACPY_LOG2_N_DEPTH ) - M,
+                              incB = ( (unsigned int)(LDB) <<
+                                       HPL_LACPY_LOG2_N_DEPTH ) - M,
+                              incA0 = (unsigned int)(LDA) - M,
+                              incB0 = (unsigned int)(LDB) - M;
+   int                        mu, nu;
+   register int               i, j;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+#ifdef HPL_LACPY_USE_COPY
+   for( j = 0; j < N; j++, A0 += LDA, B0 += LDB ) HPL_dcopy( M, A0, 1, B0, 1 );
+#else
+   mu = (int)( ( (unsigned int)(M) >> HPL_LACPY_LOG2_M_DEPTH ) <<
+                                      HPL_LACPY_LOG2_M_DEPTH );
+   nu = (int)( ( (unsigned int)(N) >> HPL_LACPY_LOG2_N_DEPTH ) <<
+                                      HPL_LACPY_LOG2_N_DEPTH );
+
+   for( j = 0; j < nu; j += HPL_LACPY_N_DEPTH )
+   {
+      for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH )
+      {
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 0] = A0[ 0];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; B2[ 0] = A2[ 0]; B3[ 0] = A3[ 0];
+#endif
+
+#if ( HPL_LACPY_M_DEPTH >  1 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 1] = A0[ 1];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; B2[ 1] = A2[ 1]; B3[ 1] = A3[ 1];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  2 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B2[ 2] = A2[ 2]; B3[ 2] = A3[ 2];
+         B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; B2[ 3] = A2[ 3]; B3[ 3] = A3[ 3];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  4 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5];
+         B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B2[ 4] = A2[ 4]; B3[ 4] = A3[ 4];
+         B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; B2[ 5] = A2[ 5]; B3[ 5] = A3[ 5];
+         B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B2[ 6] = A2[ 6]; B3[ 6] = A3[ 6];
+         B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; B2[ 7] = A2[ 7]; B3[ 7] = A3[ 7];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH >  8 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11];
+         B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9];
+         B0[10] = A0[10]; B1[10] = A1[10]; B0[11] = A0[11]; B1[11] = A1[11];
+         B0[12] = A0[12]; B1[12] = A1[12]; B0[13] = A0[13]; B1[13] = A1[13];
+         B0[14] = A0[14]; B1[14] = A1[14]; B0[15] = A0[15]; B1[15] = A1[15];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B2[ 8] = A2[ 8]; B3[ 8] = A3[ 8];
+         B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; B2[ 9] = A2[ 9]; B3[ 9] = A3[ 9];
+         B0[10] = A0[10]; B1[10] = A1[10]; B2[10] = A2[10]; B3[10] = A3[10];
+         B0[11] = A0[11]; B1[11] = A1[11]; B2[11] = A2[11]; B3[11] = A3[11];
+         B0[12] = A0[12]; B1[12] = A1[12]; B2[12] = A2[12]; B3[12] = A3[12];
+         B0[13] = A0[13]; B1[13] = A1[13]; B2[13] = A2[13]; B3[13] = A3[13];
+         B0[14] = A0[14]; B1[14] = A1[14]; B2[14] = A2[14]; B3[14] = A3[14];
+         B0[15] = A0[15]; B1[15] = A1[15]; B2[15] = A2[15]; B3[15] = A3[15];
+#endif
+
+#endif
+#if ( HPL_LACPY_M_DEPTH > 16 )
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19];
+         B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23];
+         B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27];
+         B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31];
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         B0[16] = A0[16]; B1[16] = A1[16]; B0[17] = A0[17]; B1[17] = A1[17];
+         B0[18] = A0[18]; B1[18] = A1[18]; B0[19] = A0[19]; B1[19] = A1[19];
+         B0[20] = A0[20]; B1[20] = A1[20]; B0[21] = A0[21]; B1[21] = A1[21];
+         B0[22] = A0[22]; B1[22] = A1[22]; B0[23] = A0[23]; B1[23] = A1[23];
+         B0[24] = A0[24]; B1[24] = A1[24]; B0[25] = A0[25]; B1[25] = A1[25];
+         B0[26] = A0[26]; B1[26] = A1[26]; B0[27] = A0[27]; B1[27] = A1[27];
+         B0[28] = A0[28]; B1[28] = A1[28]; B0[29] = A0[29]; B1[29] = A1[29];
+         B0[30] = A0[30]; B1[30] = A1[30]; B0[31] = A0[31]; B1[31] = A1[31];
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         B0[16] = A0[16]; B1[16] = A1[16]; B2[16] = A2[16]; B3[16] = A3[16];
+         B0[17] = A0[17]; B1[17] = A1[17]; B2[17] = A2[17]; B3[17] = A3[17];
+         B0[18] = A0[18]; B1[18] = A1[18]; B2[18] = A2[18]; B3[18] = A3[18];
+         B0[19] = A0[19]; B1[19] = A1[19]; B2[19] = A2[19]; B3[19] = A3[19];
+         B0[20] = A0[20]; B1[20] = A1[20]; B2[20] = A2[20]; B3[20] = A3[20];
+         B0[21] = A0[21]; B1[21] = A1[21]; B2[21] = A2[21]; B3[21] = A3[21];
+         B0[22] = A0[22]; B1[22] = A1[22]; B2[22] = A2[22]; B3[22] = A3[22];
+         B0[23] = A0[23]; B1[23] = A1[23]; B2[23] = A2[23]; B3[23] = A3[23];
+         B0[24] = A0[24]; B1[24] = A1[24]; B2[24] = A2[24]; B3[24] = A3[24];
+         B0[25] = A0[25]; B1[25] = A1[25]; B2[25] = A2[25]; B3[25] = A3[25];
+         B0[26] = A0[26]; B1[26] = A1[26]; B2[26] = A2[26]; B3[26] = A3[26];
+         B0[27] = A0[27]; B1[27] = A1[27]; B2[27] = A2[27]; B3[27] = A3[27];
+         B0[28] = A0[28]; B1[28] = A1[28]; B2[28] = A2[28]; B3[28] = A3[28];
+         B0[29] = A0[29]; B1[29] = A1[29]; B2[29] = A2[29]; B3[29] = A3[29];
+         B0[30] = A0[30]; B1[30] = A1[30]; B2[30] = A2[30]; B3[30] = A3[30];
+         B0[31] = A0[31]; B1[31] = A1[31]; B2[31] = A2[31]; B3[31] = A3[31];
+#endif
+
+#endif
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+         A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH;
+         A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH;
+         A2 += HPL_LACPY_M_DEPTH; B2 += HPL_LACPY_M_DEPTH;
+         A3 += HPL_LACPY_M_DEPTH; B3 += HPL_LACPY_M_DEPTH;
+#endif
+      }
+
+      for( i = mu; i < M; i++ )
+      {
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+         *B0 = *A0; B0++; A0++;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+         *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+         *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++;
+         *B2 = *A2; B2++; A2++; *B3 = *A3; B3++; A3++;
+#endif
+      }
+
+#if   ( HPL_LACPY_N_DEPTH ==  1 )
+      A0 += incA; B0 += incB;
+#elif ( HPL_LACPY_N_DEPTH ==  2 )
+      A0 += incA; B0 += incB; A1 += incA; B1 += incB;
+#elif ( HPL_LACPY_N_DEPTH ==  4 )
+      A0 += incA; B0 += incB; A1 += incA; B1 += incB;
+      A2 += incA; B2 += incB; A3 += incA; B3 += incB;
+#endif
+   }
+
+   for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 )
+   {
+      for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH,
+           B0 += HPL_LACPY_M_DEPTH, A0 += HPL_LACPY_M_DEPTH )
+      {
+         B0[ 0] = A0[ 0];
+#if ( HPL_LACPY_M_DEPTH >  1 )
+         B0[ 1] = A0[ 1];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  2 )
+         B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  4 )
+         B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7];
+#endif
+#if ( HPL_LACPY_M_DEPTH >  8 )
+         B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11];
+         B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15];
+#endif
+#if ( HPL_LACPY_M_DEPTH > 16 )
+         B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19];
+         B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23];
+         B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27];
+         B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31];
+#endif
+      }
+      for( i = mu; i < M; i++, B0++, A0++ ) { *B0 = *A0; }
+   }
+#endif
+/*
+ * End of HPL_dlacpy
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlamch.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlamch.c
new file mode 100644
index 000000000..c685f0d5e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlamch.c
@@ -0,0 +1,876 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static function prototypes
+ * ---------------------------------------------------------------------
+ */
+static void     HPL_dlamc1
+STDC_ARGS(
+(  int *,           int *,           int *,           int * ) );
+static void     HPL_dlamc2
+STDC_ARGS(
+(  int *,           int *,           int *,           double *,
+   int *,           double *,        int *,           double * ) );
+static double   HPL_dlamc3
+STDC_ARGS(
+(  const double,    const double ) );
+static void     HPL_dlamc4
+STDC_ARGS(
+(  int *,           const double,    const int ) );
+static void     HPL_dlamc5
+STDC_ARGS(
+(  const int,       const int,       const int,       const int,
+   int *,           double * ) );
+static double   HPL_dipow
+STDC_ARGS(
+(  const double,    const int ) );
+
+#ifdef STDC_HEADERS
+double HPL_dlamch
+(
+   const HPL_T_MACH                 CMACH
+)
+#else
+double HPL_dlamch
+( CMACH )
+   const HPL_T_MACH                 CMACH;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlamch determines  machine-specific  arithmetic constants such as
+ * the relative machine precision  (eps),  the safe minimum (sfmin) such
+ * that 1 / sfmin does not overflow, the base of the machine (base), the
+ * precision (prec), the  number of (base) digits  in the  mantissa (t),
+ * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise),  the
+ * minimum exponent before  (gradual)  underflow (emin),  the  underflow
+ * threshold (rmin) base**(emin-1), the largest exponent before overflow
+ * (emax), the overflow threshold (rmax) (base**emax)*(1-eps).
+ *
+ * Notes
+ * =====
+ * 
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamch.f  (version 2.0 -- 1992), that  was  itself
+ * based on the function ENVRON  by Malcolm and incorporated suggestions
+ * by Gentleman and Marovich. See                                       
+ *  
+ * Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+ * arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).                 
+ *  
+ * Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+ * properties of  floating point arithmetic units.,  Comms. of  the ACM,
+ * 17, 276-277 (1974).
+ * 
+ * Arguments
+ * =========
+ *
+ * CMACH   (local input)                 const HPL_T_MACH
+ *         Specifies the value to be returned by HPL_dlamch             
+ *            = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+ *            = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+ *            = HPL_MACH_BASE,  HPL_dlamch := base                      
+ *            = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+ *            = HPL_MACH_MLEN,  HPL_dlamch := t                         
+ *            = HPL_MACH_RND,   HPL_dlamch := rnd                       
+ *            = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+ *            = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+ *            = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+ *            = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+ *          
+ *         where                                                        
+ *          
+ *            eps   = relative machine precision,                       
+ *            sfmin = safe minimum,                                     
+ *            base  = base of the machine,                              
+ *            prec  = eps*base,                                         
+ *            t     = number of digits in the mantissa,                 
+ *            rnd   = 1.0 if rounding occurs in addition,               
+ *            emin  = minimum exponent before underflow,                
+ *            rmin  = underflow threshold,                              
+ *            emax  = largest exponent before overflow,                 
+ *            rmax  = overflow threshold.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   static double              eps, sfmin, base, t, rnd, emin, rmin, emax,
+                              rmax, prec;
+   double                     small;
+   static int                 first=1;
+   int                        beta=0, imax=0, imin=0, it=0, lrnd=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0;
+      HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax );
+      base  = (double)(beta);  t     = (double)(it);
+      if( lrnd != 0 )
+      { rnd = HPL_rone;  eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; }
+      else
+      { rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it );            }
+      prec  = eps * base;  emin  = (double)(imin); emax  = (double)(imax);
+      sfmin = rmin;        small = HPL_rone / rmax;
+/*
+ * Use  SMALL  plus a bit,  to avoid the possibility of rounding causing
+ * overflow when computing  1/sfmin.
+ */
+      if( small >= sfmin ) sfmin = small * ( HPL_rone + eps );
+   }
+
+   if( CMACH == HPL_MACH_EPS   ) return( eps   );
+   if( CMACH == HPL_MACH_SFMIN ) return( sfmin );
+   if( CMACH == HPL_MACH_BASE  ) return( base  );
+   if( CMACH == HPL_MACH_PREC  ) return( prec  );
+   if( CMACH == HPL_MACH_MLEN  ) return( t     );
+   if( CMACH == HPL_MACH_RND   ) return( rnd   );
+   if( CMACH == HPL_MACH_EMIN  ) return( emin  );
+   if( CMACH == HPL_MACH_RMIN  ) return( rmin  );
+   if( CMACH == HPL_MACH_EMAX  ) return( emax  );
+   if( CMACH == HPL_MACH_RMAX  ) return( rmax  );
+
+   return( eps );
+/*
+ * End of HPL_dlamch
+ */
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc1
+(
+   int                        * BETA,
+   int                        * T,
+   int                        * RND,
+   int                        * IEEE1
+)
+#else
+static void HPL_dlamc1
+( BETA, T, RND, IEEE1 )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * BETA, * IEEE1, * RND, * T;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc1  determines  the machine parameters given by BETA, T, RND,
+ * and IEEE1.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc1.f  (version 2.0 -- 1992), that  was  itself
+ * based on the function ENVRON  by Malcolm and incorporated suggestions
+ * by Gentleman and Marovich. See
+ *
+ * Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+ * arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).
+ *
+ * Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+ * properties of  floating point arithmetic units.,  Comms. of  the ACM,
+ * 17, 276-277 (1974).
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local output)              int *
+ *         The base of the machine.
+ *
+ * T       (local output)              int *
+ *         The number of ( BETA ) digits in the mantissa.
+ *
+ * RND     (local output)              int *
+ *         Specifies whether proper rounding (RND=1) or chopping (RND=0)
+ *         occurs in addition.  This may not be a  reliable guide to the
+ *         way in which the machine performs its arithmetic.
+ *
+ * IEEE1   (local output)              int *
+ *         Specifies  whether  rounding  appears  to be done in the IEEE
+ *         `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     a, b, c, f, one, qtr, savec, t1, t2;
+   static int                 first=1, lbeta, lieee1, lrnd, lt;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0; one = HPL_rone;
+/*
+ * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and
+ * RND. Throughout this routine we use the function HPL_dlamc3 to ensure
+ * that relevant values are stored and not held in registers, or are not
+ * affected by optimizers.
+ *
+ * Compute  a = 2.0**m  with the  smallest  positive integer m such that
+ * fl( a + 1.0 ) == a.
+ */
+      a = HPL_rone; c = HPL_rone;
+      do
+      { a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); }
+      while( c == HPL_rone );
+/*
+ * Now compute b = 2.0**m with the smallest positive integer m such that
+ * fl( a + b ) > a.
+ */
+      b = HPL_rone; c = HPL_dlamc3( a, b );
+      while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); }
+/*
+ * Now compute the base.  a and c  are  neighbouring floating point num-
+ * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe-
+ * rence is BETA.  Adding 0.25 to c is to ensure that it is truncated to
+ * BETA and not (BETA-1).
+ */
+      qtr = one / 4.0; savec = c;
+      c   = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr);
+/*
+ * Now  determine  whether  rounding or chopping occurs, by adding a bit
+ * less than BETA/2 and a bit more than BETA/2 to a.
+ */
+      b = (double)(lbeta);
+      f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a );
+      if( c == a ) { lrnd = 1; } else { lrnd = 0; }
+      f = HPL_dlamc3( b / HPL_rtwo,  b / 100.0 ); c = HPL_dlamc3( f, a );
+      if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0;
+/*
+ * Try  and decide whether rounding is done in the  IEEE  round to nea-
+ * rest style.  b/2 is half a unit in the last place of the two numbers
+ * a  and savec. Furthermore, a is even, i.e. has last bit zero, and sa-
+ * vec is odd.  Thus adding b/2 to a should not change a, but adding b/2
+ * to savec should change savec.
+ */
+      t1 = HPL_dlamc3( b / HPL_rtwo, a );
+      t2 = HPL_dlamc3( b / HPL_rtwo, savec );
+      if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1;
+      else                                                  lieee1 = 0;
+/*
+ * Now find the mantissa, T. It should be the integer part of log to the
+ * base BETA of a, however it is safer to determine T by powering. So we
+ * find T as the smallest positive integer for which fl( beta**t + 1.0 )
+ * is equal to 1.0.
+ */
+      lt = 0; a = HPL_rone; c = HPL_rone;
+
+      do
+      {
+         lt++; a *= (double)(lbeta);
+         c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c,  -a );
+      } while( c == HPL_rone );
+   }
+
+   *BETA  = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1;
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc2
+(
+   int                        * BETA, 
+   int                        * T,
+   int                        * RND,
+   double                     * EPS,
+   int                        * EMIN,
+   double                     * RMIN,
+   int                        * EMAX,
+   double                     * RMAX
+)
+#else
+static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * BETA, * EMAX, * EMIN, * RND, * T;
+   double                     * EPS, * RMAX, * RMIN;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc2  determines the machine  parameters specified in its argu-
+ * ment list.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function  dlamc2.f (version 2.0 -- 1992), that  was  itself
+ * based on a function PARANOIA  by  W. Kahan of the University of Cali-
+ * fornia at Berkeley for the computation of the  relative machine epsi-
+ * lon eps.
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local output)              int *
+ *         The base of the machine.
+ *
+ * T       (local output)              int *
+ *         The number of ( BETA ) digits in the mantissa.
+ *
+ * RND     (local output)              int *
+ *         Specifies whether proper rounding (RND=1) or chopping (RND=0)
+ *         occurs in addition. This may not be a reliable  guide to  the
+ *         way in which the machine performs its arithmetic.
+ *
+ * EPS     (local output)              double *
+ *         The smallest positive number such that fl( 1.0 - EPS ) < 1.0,
+ *         where fl denotes the computed value.
+ *
+ * EMIN    (local output)              int *
+ *         The minimum exponent before (gradual) underflow occurs.
+ *
+ * RMIN    (local output)              double *
+ *         The smallest  normalized  number  for  the  machine, given by
+ *         BASE**( EMIN - 1 ), where  BASE  is the floating  point value
+ *         of BETA.
+ *
+ * EMAX    (local output)              int *
+ *         The maximum exponent before overflow occurs.
+ *
+ * RMAX    (local output)              double *
+ *         The  largest  positive  number  for  the  machine,  given  by
+ *         BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating  point
+ *         value of BETA.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   static double              leps, lrmax, lrmin;
+   double                     a, b, c, half, one, rbase, sixth, small,
+                              third, two, zero;
+   static int                 first=1, iwarn=0, lbeta=0, lemax, lemin,
+                              lt=0;
+   int                        gnmin=0, gpmin=0, i, ieee, lieee1=0,
+                              lrnd=0, ngnmin=0, ngpmin=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( first != 0 )
+   {
+      first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo;
+/*
+ * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of  BETA,
+ * T, RND, EPS, EMIN and RMIN.
+ *
+ * Throughout this routine we use the function HPL_dlamc3 to ensure that
+ * relevant values are stored and not held in registers,  or are not af-
+ * fected by optimizers.
+ *
+ * HPL_dlamc1 returns the parameters  lbeta, lt, lrnd and lieee1.
+ */
+      HPL_dlamc1( &lbeta, &lt, &lrnd, &lieee1 );
+/*
+ * Start to find eps.
+ */
+      b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a;
+/*
+ * Try some tricks to see whether or not this is the correct  EPS.
+ */
+      b     = two / 3.0; 
+      half  = one / HPL_rtwo;
+      sixth = HPL_dlamc3( b, -half );
+      third = HPL_dlamc3( sixth, sixth );
+      b     = HPL_dlamc3( third, -half );
+      b     = HPL_dlamc3( b, sixth );
+      b     = Mabs( b ); if( b < leps ) b = leps;
+
+      leps = HPL_rone;
+
+      while( ( leps > b ) && ( b > zero ) )
+      {
+         leps = b;
+         c = HPL_dlamc3( half * leps,
+                         HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) );
+         c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c );
+         c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c );
+      }
+      if( a < leps ) leps = a;
+/*
+ * Computation of EPS complete.
+ *
+ * Now find  EMIN.  Let a = + or - 1, and + or - (1 + BASE**(-3)).  Keep
+ * dividing a by BETA until (gradual) underflow occurs. This is detected
+ * when we cannot recover the previous a.
+ */
+      rbase = one / (double)(lbeta); small = one;
+      for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero );
+      a = HPL_dlamc3( one, small );
+      HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta );
+      HPL_dlamc4( &gpmin,    a, lbeta ); HPL_dlamc4( &gnmin,    -a, lbeta );
+
+      ieee = 0;
+
+      if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) )
+      {
+         if( ngpmin == gpmin )
+         {
+/*
+ * Non twos-complement machines, no gradual underflow; e.g.,  VAX )
+ */
+            lemin = ngpmin;
+         }
+         else if( ( gpmin-ngpmin ) == 3 )
+         {
+/*
+ * Non twos-complement machines with gradual underflow; e.g., IEEE stan-
+ * dard followers
+ */
+            lemin = ngpmin - 1 + lt; ieee = 1;
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, gpmin );
+            iwarn = 1;
+         }
+      }
+      else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) )
+      {
+         if( Mabs( ngpmin-ngnmin ) == 1 )
+         {
+/*
+ * Twos-complement machines, no gradual underflow; e.g., CYBER 205
+ */
+            lemin = Mmax( ngpmin, ngnmin );
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, ngnmin );
+            iwarn = 1;
+         }
+      }
+      else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) )
+      {
+         if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 )
+         {
+/*
+ * Twos-complement machines with gradual underflow; no known machine
+ */
+            lemin = Mmax( ngpmin, ngnmin ) - 1 + lt;
+         }
+         else
+         {
+/*
+ * A guess; no known machine
+ */
+            lemin = Mmin( ngpmin, ngnmin );
+            iwarn = 1;
+         }
+      }
+      else
+      {
+/*
+ * A guess; no known machine
+ */
+         lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin );
+         lemin = Mmin( lemin, gnmin ); iwarn = 1;
+      }
+/*
+ * Comment out this if block if EMIN is ok
+ */
+      if( iwarn != 0 )
+      {
+         first = 1;
+         HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n",
+"WARNING. The value EMIN may be incorrect:- EMIN =", lemin,
+"If, after inspection, the value EMIN looks acceptable, please comment ",
+"out the  if  block  as marked within the code of routine  HPL_dlamc2, ",
+"otherwise supply EMIN explicitly." );
+      }
+/*
+ * Assume IEEE arithmetic if we found denormalised  numbers above, or if
+ * arithmetic seems to round in the  IEEE style,  determined  in routine
+ * HPL_dlamc1.  A true  IEEE  machine should have both things true; how-
+ * ever, faulty machines may have one or the other.
+ */
+      if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1;
+      else                                   ieee = 0;
+/*
+ * Compute  RMIN by successive division by  BETA. We could compute  RMIN
+ * as BASE**( EMIN - 1 ), but some machines underflow during this compu-
+ * tation.
+ */
+      lrmin = HPL_rone;
+      for( i = 0; i < 1 - lemin; i++ )
+         lrmin = HPL_dlamc3( lrmin*rbase, zero );
+/*
+ * Finally, call HPL_dlamc5 to compute emax and rmax.
+ */
+      HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax );
+   }
+   *BETA = lbeta; *T    = lt;    *RND  = lrnd;  *EPS  = leps;
+   *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax;
+} 
+
+#ifdef STDC_HEADERS
+static double HPL_dlamc3( const double A, const double B )
+#else
+static double HPL_dlamc3( A, B )
+/*
+ * .. Scalar Arguments ..
+ */
+   const double               A, B;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc3  is intended to force a and b  to be stored prior to doing
+ * the addition of  a  and  b,  for  use  in situations where optimizers
+ * might hold one of these in a register.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc3.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * A, B    (local input)               double
+ *         The values a and b.
+ *
+ * ---------------------------------------------------------------------
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   return( A + B );
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc4
+(
+   int                        * EMIN,
+   const double               START,
+   const int                  BASE
+)
+#else
+static void HPL_dlamc4( EMIN, START, BASE )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        * EMIN;
+   const int                  BASE;
+   const double               START;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc4 is a service function for HPL_dlamc2.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc4.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * EMIN    (local output)              int *
+ *         The minimum exponent before  (gradual) underflow, computed by
+ *         setting A = START and dividing  by  BASE until the previous A
+ *         can not be recovered.
+ *
+ * START   (local input)               double
+ *         The starting point for determining EMIN.
+ *
+ * BASE    (local input)               int
+ *         The base of the machine.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     a, b1, b2, c1, c2, d1, d2, one, rbase, zero;
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   a     = START; one = HPL_rone; rbase = one / (double)(BASE);
+   zero  = HPL_rzero;
+   *EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a;
+
+   do
+   {
+      (*EMIN)--; a = b1;
+      b1 = HPL_dlamc3( a /  BASE,  zero );
+      c1 = HPL_dlamc3( b1 *  BASE, zero );
+      d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1;
+      b2 = HPL_dlamc3( a * rbase,  zero );
+      c2 = HPL_dlamc3( b2 / rbase, zero );
+      d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2;
+   } while( ( c1 == a ) && ( c2 == a ) &&  ( d1 == a ) && ( d2 == a ) );
+} 
+
+#ifdef STDC_HEADERS
+static void HPL_dlamc5
+(
+   const int                  BETA,
+   const int                  P, 
+   const int                  EMIN,
+   const int                  IEEE,
+   int                        * EMAX,
+   double                     * RMAX
+)
+#else
+static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX )
+/*
+ * .. Scalar Arguments ..
+ */
+   const int                  BETA, EMIN, IEEE, P; 
+   int                        * EMAX;
+   double                     * RMAX;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dlamc5  attempts  to compute RMAX, the largest machine  floating-
+ * point number, without overflow.  It assumes that EMAX + abs(EMIN) sum
+ * approximately to a power of 2.  It will fail  on machines where  this
+ * assumption does not hold, for example, the  Cyber 205 (EMIN = -28625,
+ * EMAX = 28718).  It will also fail if  the value supplied for  EMIN is
+ * too large (i.e. too close to zero), probably with overflow.
+ *
+ * Notes
+ * =====
+ *
+ * This function has been manually translated from the Fortran 77 LAPACK
+ * auxiliary function dlamc5.f (version 2.0 -- 1992).
+ *
+ * Arguments
+ * =========
+ *
+ * BETA    (local input)               int
+ *         The base of floating-point arithmetic.
+ *
+ * P       (local input)               int
+ *         The number of base BETA digits in the mantissa of a floating-
+ *         point value.
+ *
+ * EMIN    (local input)               int
+ *         The minimum exponent before (gradual) underflow.
+ *
+ * IEEE    (local input)               int
+ *         A logical flag specifying whether or not  the arithmetic sys-
+ *         tem is thought to comply with the IEEE standard.
+ *
+ * EMAX    (local output)              int *
+ *         The largest exponent before overflow.
+ *
+ * RMAX    (local output)              double *
+ *         The largest machine floating-point number.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     oldy=HPL_rzero, recbas, y, z;
+   int                        exbits=1, expsum, i, lexp=1, nbits, try,
+                              uexp;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * First compute  lexp  and  uexp, two powers of 2 that bound abs(EMIN).
+ * We then assume that  EMAX + abs( EMIN ) will sum approximately to the
+ * bound that  is closest to abs( EMIN ). (EMAX  is the  exponent of the
+ * required number RMAX).
+ */
+l_10:
+   try = (int)( (unsigned int)(lexp) << 1 );
+   if( try <= ( -EMIN ) ) { lexp = try; exbits++; goto l_10; }
+
+   if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try; exbits++; }
+/*
+ * Now -lexp is less than or equal to EMIN, and -uexp is greater than or
+ * equal to EMIN. exbits is the number of bits needed to store the expo-
+ * nent.
+ */
+   if( ( uexp+EMIN ) > ( -lexp-EMIN ) )
+   { expsum = (int)( (unsigned int)(lexp) << 1 ); }
+   else
+   { expsum = (int)( (unsigned int)(uexp) << 1 ); }
+/*
+ * expsum is the exponent range, approximately equal to EMAX - EMIN + 1.
+ */
+   *EMAX = expsum + EMIN - 1;
+/*
+ * nbits  is  the total number of bits needed to store a  floating-point
+ * number.
+ */
+   nbits = 1 + exbits + P;
+
+   if( ( nbits % 2 == 1 ) && ( BETA == 2 ) )
+   {
+/*
+ * Either there are an odd number of bits used to store a floating-point
+ * number, which is unlikely, or some bits are not used in the represen-
+ * tation of numbers,  which is possible,  (e.g. Cray machines)  or  the
+ * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines),
+ * which is perhaps the most likely. We have to assume the last alterna-
+ * tive.  If this is true,  then we need to reduce  EMAX  by one because
+ * there must be some way of representing zero  in an  implicit-bit sys-
+ * tem. On machines like Cray we are reducing EMAX by one unnecessarily.
+ */
+      (*EMAX)--;
+   }
+
+   if( IEEE != 0 )
+   {
+/*
+ * Assume we are on an IEEE  machine which reserves one exponent for in-
+ * finity and NaN.
+ */
+      (*EMAX)--;
+   }
+/*
+ * Now create RMAX, the largest machine number, which should be equal to
+ * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being
+ * careful that the result is less than 1.0.
+ */
+   recbas = HPL_rone / (double)(BETA);
+   z      = (double)(BETA) - HPL_rone;
+   y      = HPL_rzero;
+
+   for( i = 0; i < P; i++ )
+   { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); }
+
+   if( y >= HPL_rone ) y = oldy;
+/*
+ * Now multiply by BETA**EMAX to get RMAX.
+ */
+   for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero );
+
+   *RMAX = y;
+/*
+ * End of HPL_dlamch
+ */
+} 
+
+#ifdef STDC_HEADERS
+static double HPL_dipow
+(
+   const double               X,
+   const int                  N
+)
+#else
+static double HPL_dipow( X, N )
+/*
+ * .. Scalar Arguments ..
+ */
+   const int                  N;
+   const double               X;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_dipow computes the integer n-th power of a real scalar x.
+ *
+ * Arguments
+ * =========
+ *
+ * X       (local input)               const double
+ *         The real scalar x.
+ *
+ * N       (local input)               const int
+ *         The integer power to raise x to.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double                     r, y=HPL_rone;
+   int                        k, n;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( X == HPL_rzero ) return( HPL_rzero );
+   if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; }
+   for( k = 0; k < n; k++ ) y *= r; 
+
+   return( y );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlange.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlange.c
new file mode 100644
index 000000000..82f118b6b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlange.c
@@ -0,0 +1,184 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_dlange
+(
+   const HPL_T_NORM                 NORM,
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA
+)
+#else
+double HPL_dlange
+( NORM, M, N, A, LDA )
+   const HPL_T_NORM                 NORM;
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlange returns  the value of the one norm,  or the infinity norm,
+ * or the element of largest absolute value of a matrix A:              
+ *  
+ *    max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+ *    norm1(A),        when NORM = HPL_NORM_1,                          
+ *    normI(A),        when NORM = HPL_NORM_I,                          
+ *  
+ * where norm1 denotes the one norm of a matrix (maximum column sum) and
+ * normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+ * that max(abs(A(i,j))) is not a matrix norm.
+ *
+ * Arguments
+ * =========
+ *
+ * NORM    (local input)                 const HPL_T_NORM
+ *         On entry,  NORM  specifies  the  value to be returned by this
+ *         function as described above.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points to an  array of dimension  (LDA,N), that
+ *         contains the matrix A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     s, v0=HPL_rzero, * work = NULL;
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return( HPL_rzero );
+
+   if(      NORM == HPL_NORM_A )
+   {
+/*
+ * max( abs( A ) )
+ */
+      for( j = 0; j < N; j++ )
+      {
+         for( i = 0; i < M; i++ ) { v0 = Mmax( v0, Mabs( *A ) ); A++; }
+         A += LDA - M;
+      }
+   }
+   else if( NORM == HPL_NORM_1 )
+   {
+/*
+ * Find norm_1( A ).
+ */
+      work = (double*)malloc( (size_t)(N) * sizeof( double ) );
+      if( work == NULL )
+      { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); }
+      else
+      {
+         for( j = 0; j < N; j++ )
+         {
+            s = HPL_rzero;
+            for( i = 0; i < M; i++ ) { s += Mabs( *A ); A++; }
+            work[j] = s; A += LDA - M;
+         }
+/*
+ * Find maximum sum of columns for 1-norm
+ */
+         v0 = work[HPL_idamax( N, work, 1 )]; v0 = Mabs( v0 );
+         if( work ) free( work );
+      }
+   }
+   else if( NORM == HPL_NORM_I )
+   {
+/*
+ * Find norm_inf( A )
+ */
+      work = (double*)malloc( (size_t)(M) * sizeof( double ) );
+      if( work == NULL )
+      { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); }
+      else
+      {
+         for( i = 0; i < M; i++ ) { work[i] = HPL_rzero; }
+
+         for( j = 0; j < N; j++ )
+         {
+            for( i = 0; i < M; i++ ) { work[i] += Mabs( *A ); A++; }
+            A += LDA - M;
+         }
+/*       
+ * Find maximum sum of rows for inf-norm
+ */      
+         v0 = work[HPL_idamax( M, work, 1 )]; v0 = Mabs( v0 );
+         if( work ) free( work );
+      }
+   }
+
+   return( v0 );
+/*
+ * End of HPL_dlange
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlaprnt.c
new file mode 100644
index 000000000..6e9c368c9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlaprnt.c
@@ -0,0 +1,176 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dlaprnt
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        IA,
+   const int                        JA,
+   const int                        LDA,
+   const char *                     CMATNM
+)
+#else
+void HPL_dlaprnt
+( M, N, A, IA, JA, LDA, CMATNM )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        IA;
+   const int                        JA;
+   const int                        LDA;
+   const char *                     CMATNM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaprnt prints to standard error an M-by-N matrix A.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies the number of rows of A. M must be at
+ *         least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies the number of columns of A. N must be
+ *         at least zero.
+ *
+ * A       (local input)                 double *
+ *         On entry, A  points to an array of dimension (LDA,N).
+ *
+ * IA      (local input)                 const int
+ *         On entry, IA specifies the starting row index to be printed.
+ *
+ * JA      (local input)                 const int
+ *         On entry,  JA  specifies  the  starting  column index  to be
+ *         printed.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * CMATNM  (local input)                 const char *
+ *         On entry, CMATNM is the name of the matrix to be printed.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   for( j = 0; j < N; j++ )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         HPL_fprintf( stderr, "%s(%6d,%6d)=%30.18f\n", CMATNM, IA+i,
+                      JA+j, *(Mptr( A, i, j, LDA )) );
+      }
+   }
+/*
+ * End of HPL_dlaprnt
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlatcpy.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlatcpy.c
new file mode 100644
index 000000000..410451c24
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlatcpy.c
@@ -0,0 +1,398 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factors
+ * #ifndef HPL_LATCPY_M_DEPTH
+ * #define    HPL_LATCPY_M_DEPTH      32
+ * #define    HPL_LATCPY_LOG2_M_DEPTH  5
+ * #endif
+ * #ifndef HPL_LATCPY_N_DEPTH
+ * #define    HPL_LATCPY_N_DEPTH       4
+ * #define    HPL_LATCPY_LOG2_N_DEPTH  2
+ * #endif
+ */
+#ifndef HPL_LATCPY_M_DEPTH
+#define    HPL_LATCPY_M_DEPTH       4
+#define    HPL_LATCPY_LOG2_M_DEPTH  2
+#endif
+#ifndef HPL_LATCPY_N_DEPTH
+#define    HPL_LATCPY_N_DEPTH       2
+#define    HPL_LATCPY_LOG2_N_DEPTH  1
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlatcpy
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dlatcpy
+( M, N, A, LDA, B, LDB )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlatcpy copies the transpose of an array A into an array B.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the number of  rows of the array B and
+ *         the number of columns of A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the number of  rows of the array A and
+ *         the number of columns of B. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,M).
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,N).
+ *
+ * B       (local output)                double *
+ *         On entry, B points to an array of dimension (LDB,N). On exit,
+ *         B is overwritten with the transpose of A.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB specifies the leading dimension of the array B.
+ *         LDB must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_LATCPY_USE_COPY
+   register int               j;
+#else
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+   const double               * A0 = A;
+   double                     * B0 = B;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+   const double               * A0 = A,              * A1 = A + 1;
+   double                     * B0 = B,              * B1 = B +     LDB;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+   const double               * A0 = A,              * A1 = A + 1,
+                              * A2 = A + 2,          * A3 = A + 3;
+   double                     * B0 = B,              * B1 = B +     LDB,
+                              * B2 = B + (LDB << 1), * B3 = B + 3 * LDB;
+#endif
+   const int                  incA = -M * LDA + (1 << HPL_LATCPY_LOG2_N_DEPTH),
+                              incB = ( (unsigned int)(LDB) <<
+                                       HPL_LATCPY_LOG2_N_DEPTH ) - M,
+                              incA0 = -M * LDA + 1, incB0 = LDB - M;
+   int                        mu, nu;
+   register int               i, j;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+#ifdef HPL_LATCPY_USE_COPY
+   for( j = 0; j < N; j++, B0 += LDB ) HPL_dcopy( M, A0+j, LDA, B0, 1 );
+#else
+   mu = (int)( ( (unsigned int)(M) >> HPL_LATCPY_LOG2_M_DEPTH ) <<
+                                      HPL_LATCPY_LOG2_M_DEPTH );
+   nu = (int)( ( (unsigned int)(N) >> HPL_LATCPY_LOG2_N_DEPTH ) <<
+                                      HPL_LATCPY_LOG2_N_DEPTH );
+
+   for( j = 0; j < nu; j += HPL_LATCPY_N_DEPTH )
+   {
+      for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH )
+      {
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 0] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA;
+         B2[ 0] = *A2; A2 += LDA; B3[ 0] = *A3; A3 += LDA;
+#endif
+
+#if ( HPL_LATCPY_M_DEPTH >  1 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 1] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA;
+         B2[ 1] = *A2; A2 += LDA; B3[ 1] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  2 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 2] = *A0; A0 += LDA; B0[ 3] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA;
+         B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA;
+         B2[ 2] = *A2; A2 += LDA; B3[ 2] = *A3; A3 += LDA;
+         B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA;
+         B2[ 3] = *A2; A2 += LDA; B3[ 3] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  4 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 4] = *A0; A0 += LDA; B0[ 5] = *A0; A0 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B0[ 7] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA;
+         B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA;
+         B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA;
+         B2[ 4] = *A2; A2 += LDA; B3[ 4] = *A3; A3 += LDA;
+         B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA;
+         B2[ 5] = *A2; A2 += LDA; B3[ 5] = *A3; A3 += LDA;
+         B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA;
+         B2[ 6] = *A2; A2 += LDA; B3[ 6] = *A3; A3 += LDA;
+         B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA;
+         B2[ 7] = *A2; A2 += LDA; B3[ 7] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  8 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[ 8] = *A0; A0 += LDA; B0[ 9] = *A0; A0 += LDA;
+         B0[10] = *A0; A0 += LDA; B0[11] = *A0; A0 += LDA;
+         B0[12] = *A0; A0 += LDA; B0[13] = *A0; A0 += LDA;
+         B0[14] = *A0; A0 += LDA; B0[15] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA;
+         B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA;
+         B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA;
+         B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA;
+         B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA;
+         B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA;
+         B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA;
+         B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA;
+         B2[ 8] = *A2; A2 += LDA; B3[ 8] = *A3; A3 += LDA;
+         B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA;
+         B2[ 9] = *A2; A2 += LDA; B3[ 9] = *A3; A3 += LDA;
+         B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA;
+         B2[10] = *A2; A2 += LDA; B3[10] = *A3; A3 += LDA;
+         B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA;
+         B2[11] = *A2; A2 += LDA; B3[11] = *A3; A3 += LDA;
+         B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA;
+         B2[12] = *A2; A2 += LDA; B3[12] = *A3; A3 += LDA;
+         B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA;
+         B2[13] = *A2; A2 += LDA; B3[13] = *A3; A3 += LDA;
+         B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA;
+         B2[14] = *A2; A2 += LDA; B3[14] = *A3; A3 += LDA;
+         B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA;
+         B2[15] = *A2; A2 += LDA; B3[15] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if ( HPL_LATCPY_M_DEPTH > 16 )
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0[16] = *A0; A0 += LDA; B0[17] = *A0; A0 += LDA;
+         B0[18] = *A0; A0 += LDA; B0[19] = *A0; A0 += LDA;
+         B0[20] = *A0; A0 += LDA; B0[21] = *A0; A0 += LDA;
+         B0[22] = *A0; A0 += LDA; B0[23] = *A0; A0 += LDA;
+         B0[24] = *A0; A0 += LDA; B0[25] = *A0; A0 += LDA;
+         B0[26] = *A0; A0 += LDA; B0[27] = *A0; A0 += LDA;
+         B0[28] = *A0; A0 += LDA; B0[29] = *A0; A0 += LDA;
+         B0[30] = *A0; A0 += LDA; B0[31] = *A0; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA;
+         B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA;
+         B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA;
+         B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA;
+         B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA;
+         B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA;
+         B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA;
+         B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA;
+         B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA;
+         B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA;
+         B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA;
+         B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA;
+         B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA;
+         B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA;
+         B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA;
+         B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA;
+         B2[16] = *A2; A2 += LDA; B3[16] = *A3; A3 += LDA;
+         B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA;
+         B2[17] = *A2; A2 += LDA; B3[17] = *A3; A3 += LDA;
+         B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA;
+         B2[18] = *A2; A2 += LDA; B3[18] = *A3; A3 += LDA;
+         B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA;
+         B2[19] = *A2; A2 += LDA; B3[19] = *A3; A3 += LDA;
+         B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA;
+         B2[20] = *A2; A2 += LDA; B3[20] = *A3; A3 += LDA;
+         B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA;
+         B2[21] = *A2; A2 += LDA; B3[21] = *A3; A3 += LDA;
+         B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA;
+         B2[22] = *A2; A2 += LDA; B3[22] = *A3; A3 += LDA;
+         B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA;
+         B2[23] = *A2; A2 += LDA; B3[23] = *A3; A3 += LDA;
+         B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA;
+         B2[24] = *A2; A2 += LDA; B3[24] = *A3; A3 += LDA;
+         B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA;
+         B2[25] = *A2; A2 += LDA; B3[25] = *A3; A3 += LDA;
+         B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA;
+         B2[26] = *A2; A2 += LDA; B3[26] = *A3; A3 += LDA;
+         B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA;
+         B2[27] = *A2; A2 += LDA; B3[27] = *A3; A3 += LDA;
+         B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA;
+         B2[28] = *A2; A2 += LDA; B3[28] = *A3; A3 += LDA;
+         B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA;
+         B2[29] = *A2; A2 += LDA; B3[29] = *A3; A3 += LDA;
+         B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA;
+         B2[30] = *A2; A2 += LDA; B3[30] = *A3; A3 += LDA;
+         B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA;
+         B2[31] = *A2; A2 += LDA; B3[31] = *A3; A3 += LDA;
+#endif
+
+#endif
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         B0 += HPL_LATCPY_M_DEPTH;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH;
+         B2 += HPL_LATCPY_M_DEPTH; B3 += HPL_LATCPY_M_DEPTH;
+#endif
+      }
+
+      for( i = mu; i < M; i++ )
+      {
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+         *B0 = *A0; B0++; A0 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+         *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+         *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA;
+         *B2 = *A2; B2++; A2 += LDA; *B3 = *A3; B3++; A3 += LDA;
+#endif
+      }
+
+#if   ( HPL_LATCPY_N_DEPTH == 1 )
+      A0 += incA; B0 += incB;
+#elif ( HPL_LATCPY_N_DEPTH == 2 )
+      A0 += incA; A1 += incA; B0 += incB; B1 += incB;
+#elif ( HPL_LATCPY_N_DEPTH == 4 )
+      A0 += incA; A1 += incA; A2 += incA; A3 += incA;
+      B0 += incB; B1 += incB; B2 += incB; B3 += incB;
+#endif
+   }
+
+   for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 )
+   {
+      for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH, B0 += HPL_LATCPY_M_DEPTH )
+      {
+         B0[ 0]=*A0; A0 += LDA;
+#if ( HPL_LATCPY_M_DEPTH >  1 )
+         B0[ 1]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  2 )
+         B0[ 2]=*A0; A0 += LDA; B0[ 3]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  4 )
+         B0[ 4]=*A0; A0 += LDA; B0[ 5]=*A0; A0 += LDA;
+         B0[ 6]=*A0; A0 += LDA; B0[ 7]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH >  8 )
+         B0[ 8]=*A0; A0 += LDA; B0[ 9]=*A0; A0 += LDA;
+         B0[10]=*A0; A0 += LDA; B0[11]=*A0; A0 += LDA;
+         B0[12]=*A0; A0 += LDA; B0[13]=*A0; A0 += LDA;
+         B0[14]=*A0; A0 += LDA; B0[15]=*A0; A0 += LDA;
+#endif
+#if ( HPL_LATCPY_M_DEPTH > 16 )
+         B0[16]=*A0; A0 += LDA; B0[17]=*A0; A0 += LDA;
+         B0[18]=*A0; A0 += LDA; B0[19]=*A0; A0 += LDA;
+         B0[20]=*A0; A0 += LDA; B0[21]=*A0; A0 += LDA;
+         B0[22]=*A0; A0 += LDA; B0[23]=*A0; A0 += LDA;
+         B0[24]=*A0; A0 += LDA; B0[25]=*A0; A0 += LDA;
+         B0[26]=*A0; A0 += LDA; B0[27]=*A0; A0 += LDA;
+         B0[28]=*A0; A0 += LDA; B0[29]=*A0; A0 += LDA;
+         B0[30]=*A0; A0 += LDA; B0[31]=*A0; A0 += LDA;
+#endif
+      }
+
+      for( i = mu; i < M; i++, B0++, A0 += LDA ) { *B0 = *A0; }
+   }
+#endif
+/*
+ * End of HPL_dlatcpy
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_fprintf.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_fprintf.c
new file mode 100644
index 000000000..adaf22b39
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_fprintf.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_fprintf
+(
+   FILE *                           STREAM,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_fprintf( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_fprintf is a wrapper around fprintf flushing the output stream.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[256];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   char                       * FORM;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   (void) fprintf( STREAM, "%s", cline );
+   (void) fflush( STREAM );
+/*
+ * End of HPL_fprintf
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_warn.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_warn.c
new file mode 100644
index 000000000..bc40818a9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_warn.c
@@ -0,0 +1,134 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_warn
+(
+   FILE *                           STREAM,
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_warn( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_warn displays an error message.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   LINE   = va_arg( argptr, int    );
+   SRNAME = va_arg( argptr, char * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( STREAM, "%s %s:\n>>> %s <<<\n\n", "HPL ERROR in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( STREAM, "%s %d %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR on line", LINE, "of function", SRNAME, cline );
+/*
+ * End of HPL_warn
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/Make.inc
new file mode 120000
index 000000000..ae55370b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kate/hip/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/Makefile
new file mode 100644
index 000000000..e92d18b80
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/Makefile
@@ -0,0 +1,100 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h
+#
+## Object files ########################################################
+#
+HPL_au0obj       = \
+   HPL_dlacpy.o           HPL_dlatcpy.o          HPL_fprintf.o          \
+   HPL_warn.o             HPL_abort.o            HPL_dlaprnt.o          \
+   HPL_dlange.o
+HPL_au1obj       = \
+   HPL_dlamch.o
+HPL_auxobj       = \
+   $(HPL_au0obj) $(HPL_au1obj)
+#
+## Targets #############################################################
+#
+all     : lib
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_auxobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_auxobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dlacpy.o           : ../HPL_dlacpy.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlacpy.c
+HPL_dlatcpy.o          : ../HPL_dlatcpy.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlatcpy.c
+HPL_fprintf.o          : ../HPL_fprintf.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_fprintf.c
+HPL_warn.o             : ../HPL_warn.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_warn.c
+HPL_abort.o            : ../HPL_abort.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_abort.c
+HPL_dlaprnt.o          : ../HPL_dlaprnt.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaprnt.c
+HPL_dlange.o           : ../HPL_dlange.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlange.c
+HPL_dlamch.o           : ../HPL_dlamch.c           $(INCdep)
+	$(CC) -o $@ -c $(CCNOOPT)  ../HPL_dlamch.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_daxpy.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_daxpy.c
new file mode 100644
index 000000000..72be5774b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_daxpy.c
@@ -0,0 +1,175 @@
+/*
+ * -- High Performance Computing Linpack Benchmark (HPL)
+ *    HPL - 2.3 - December 2, 2018
+ *    Antoine P. Petitet
+ *    University of Tennessee, Knoxville
+ *    Innovative Computing Laboratory
+ *    (C) Copyright 2000-2008 All Rights Reserved
+ *
+ * -- Copyright notice and Licensing terms:
+ *
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:
+ *
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.
+ *
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.
+ *
+ * -- Disclaimer:
+ *
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ---------------------------------------------------------------------
+ */
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_daxpy
+
+#ifdef STDC_HEADERS
+void HPL_daxpy
+(
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_daxpy
+( N, ALPHA, X, INCX, Y, INCY )
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_daxpy scales the vector x by alpha and adds it to y.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vectors  x  and  y. N
+ *         must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero, then the entries of the incremented array X
+ *         need not be set on input.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         On exit, the entries of the incremented array  Y  are updated
+ *         with the scaled entries of the incremented array X.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_daxpy( N, ALPHA, X, INCX, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register const double     alpha = ALPHA;
+   register double           x0, x1, x2, x3, y0, y1, y2, y3;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
+                             incX3 = 3 * INCX, incY3 = 3 * INCY,
+                             incX4 = 4 * INCX, incY4 = 4 * INCY;
+
+   if( ( N > 0 ) && ( alpha != HPL_rzero ) )
+   {
+      if( ( nu = ( N >> 2 ) << 2 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     y0 = (*Y);     x1 = X[INCX ]; y1 = Y[INCY ];
+            x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3];
+ 
+            *Y       = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1;
+            Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3;
+ 
+            X  += incX4;
+            Y  += incY4;
+ 
+         } while( X != StX );
+      }
+ 
+      for( i = N - nu; i != 0; i-- )
+      {
+         x0  = (*X);
+         y0  = (*Y);
+ 
+         *Y  = y0 + alpha * x0;
+ 
+         X  += INCX;
+         Y  += INCY;
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy );
+#endif
+/*
+ * End of HPL_daxpy
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dcopy.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dcopy.c
new file mode 100644
index 000000000..a8fe24109
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dcopy.c
@@ -0,0 +1,168 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dcopy
+
+#ifdef STDC_HEADERS
+void HPL_dcopy
+(
+   const int                        N,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_dcopy
+( N, X, INCX, Y, INCY )
+   const int                        N;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dcopy copies the vector x into the vector y.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vectors  x  and  y. N
+ *         must be at least zero.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         On exit, the entries of the incremented array  Y  are updated
+ *         with the entries of the incremented array X.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dcopy( N, X, INCX, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           x0, x1, x2, x3, x4, x5, x6, x7;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
+                             incX3 = 3 * INCX, incY3 = 3 * INCY,
+                             incX4 = 4 * INCX, incY4 = 4 * INCY,
+                             incX5 = 5 * INCX, incY5 = 5 * INCY,
+                             incX6 = 6 * INCX, incY6 = 6 * INCY,
+                             incX7 = 7 * INCX, incY7 = 7 * INCY,
+                             incX8 = 8 * INCX, incY8 = 8 * INCY;
+
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+            x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+            *Y       = x0; Y[incY4] = x4; Y[INCY ] = x1; Y[incY5] = x5;
+            Y[incY2] = x2; Y[incY6] = x6; Y[incY3] = x3; Y[incY7] = x7;
+ 
+            X  += incX8;
+            Y  += incY8;
+ 
+         } while( X != StX );
+      }
+ 
+      for( i = N - nu; i != 0; i-- )
+      {
+         x0  = (*X);
+         *Y  = x0;
+ 
+         X  += INCX;
+         Y  += INCY;
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   F77dcopy( &F77N, X, &F77incx, Y, &F77incy );
+#endif
+/*
+ * End of HPL_dcopy
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dgemm.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dgemm.c
new file mode 100644
index 000000000..b222e4717
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dgemm.c
@@ -0,0 +1,521 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dgemm
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmNN
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iail, iblj, icij, j, jal, jbj, jcj, l;
+
+   for( j = 0, jbj = 0, jcj  = 0; j < N; j++, jbj += LDB, jcj += LDC )
+   {
+      HPL_dscal( M, BETA, C+jcj, 1 );
+      for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 )
+      {
+         t0 = ALPHA * B[iblj];
+         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
+         { C[icij] += A[iail] * t0; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmNT
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iail, ibj, ibjl, icij, j, jal, jcj, l;
+
+   for( j = 0, ibj  = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
+   {
+      HPL_dscal( M, BETA, C+jcj, 1 );
+      for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB )
+      {
+         t0 = ALPHA * B[ibjl];
+         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
+         { C[icij] += A[iail] * t0; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmTN
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iai, iail, iblj, icij, j, jbj, jcj, l;
+
+   for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC )
+   {
+      for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA )
+      {
+         t0 = HPL_rzero;
+         for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 )
+         { t0 += A[iail] * B[iblj]; }
+         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
+         else                    C[icij] *= BETA;
+         C[icij] += ALPHA * t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemmTT
+(
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   register double            t0;
+   int                        i, iali, ibj, ibjl, icij, j, jai, jcj, l;
+
+   for( j = 0, ibj = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
+   {
+      for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA )
+      {
+         t0 = HPL_rzero;
+         for( l = 0,      iali  = jai, ibjl  = ibj;
+              l < K; l++, iali += 1,   ibjl += LDB ) t0 += A[iali] * B[ibjl];
+         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
+         else                    C[icij] *= BETA;
+         C[icij] += ALPHA * t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dgemm0
+(
+   const enum HPL_TRANS       TRANSA,
+   const enum HPL_TRANS       TRANSB,
+   const int                  M,
+   const int                  N,
+   const int                  K,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * B,
+   const int                  LDB,
+   const double               BETA,
+   double                     * C,
+   const int                  LDC
+)
+#else
+static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
+                        BETA, C, LDC )
+   const enum HPL_TRANS       TRANSA, TRANSB;
+   const int                  K, LDA, LDB, LDC, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * B;
+   double                     * C;
+#endif
+{
+   int                        i, j;
+
+   if( ( M == 0 ) || ( N == 0 ) ||
+       ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) &&
+         ( BETA == HPL_rone ) ) ) return;
+
+   if( ALPHA == HPL_rzero )
+   {
+      for( j = 0; j < N; j++ )
+      {  for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; }
+      return;
+   }
+
+   if( TRANSB == HplNoTrans )
+   {
+      if( TRANSA == HplNoTrans )
+      { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+      else
+      { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+   }
+   else
+   {
+      if( TRANSA == HplNoTrans )
+      { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+      else
+      { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dgemm
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_TRANS             TRANSA,
+   const enum HPL_TRANS             TRANSB,
+   const int                        M,
+   const int                        N,
+   const int                        K,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   const double *                   B,
+   const int                        LDB,
+   const double                     BETA,
+   double *                         C,
+   const int                        LDC
+)
+#else
+void HPL_dgemm
+( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_TRANS             TRANSA;
+   const enum HPL_TRANS             TRANSB;
+   const int                        M;
+   const int                        N;
+   const int                        K;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   const double *                   B;
+   const int                        LDB;
+   const double                     BETA;
+   double *                         C;
+   const int                        LDC;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dgemm performs one of the matrix-matrix operations
+ *  
+ *     C := alpha * op( A ) * op( B ) + beta * C
+ *  
+ *  where op( X ) is one of
+ *  
+ *     op( X ) = X   or   op( X ) = X^T.
+ *  
+ * Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
+ * an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * TRANSA  (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSA  specifies the form of  op(A)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSA==HplNoTrans    : op( A ) = A,                     
+ *            TRANSA==HplTrans      : op( A ) = A^T,                   
+ *            TRANSA==HplConjTrans  : op( A ) = A^T.                   
+ *
+ * TRANSB  (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSB  specifies the form of  op(B)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSB==HplNoTrans    : op( B ) = B,                     
+ *            TRANSB==HplTrans      : op( B ) = B^T,                   
+ *            TRANSB==HplConjTrans  : op( B ) = B^T.                   
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the  number  of rows  of the  matrix
+ *         op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the number  of columns of the matrix
+ *         op(B)  and  the number of columns of the matrix  C. N must be
+ *         at least zero.
+ *
+ * K       (local input)                 const int
+ *         On entry,  K  specifies  the  number of columns of the matrix
+ *         op(A) and the number of rows of the matrix op(B).  K  must be
+ *         be at least  zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied  as  zero  then the elements of the matrices A and B
+ *         need not be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  is an array of dimension (LDA,ka),  where ka is
+ *         k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+ *         entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+ *         the array  A must contain the matrix A, otherwise the leading
+ *         k  by  m  part of the array  A  must  contain the  matrix  A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA  specifies the first dimension of A as declared
+ *         in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+ *         LDA must be at least max(1,m), otherwise LDA must be at least
+ *         max(1,k).
+ *
+ * B       (local input)                 const double *
+ *         On entry, B is an array of dimension (LDB,kb),  where  kb  is
+ *         n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+ *         entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+ *         the array  B must contain the matrix B, otherwise the leading
+ *         n  by  k  part of the array  B  must  contain  the matrix  B.
+ *
+ * LDB     (local input)                 const int
+ *         On entry, LDB  specifies the first dimension of B as declared
+ *         in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+ *         LDB must be at least max(1,k), otherwise LDB must be at least
+ *         max(1,n).
+ *
+ * BETA    (local input)                 const double
+ *         On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+ *         supplied  as  zero  then  the  elements of the matrix C  need
+ *         not be set on input.
+ *
+ * C       (local input/output)          double *
+ *         On entry,  C  is an array of dimension (LDC,n). Before entry,
+ *         the  leading m by n part  of  the  array  C  must contain the
+ *         matrix C,  except when beta is zero, in which case C need not
+ *         be set on entry. On exit, the array  C  is overwritten by the
+ *         m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+ *
+ * LDC     (local input)                 const int
+ *         On entry, LDC  specifies the first dimension of C as declared
+ *         in  the   calling  (sub)  program.   LDC  must  be  at  least
+ *         max(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   printf("Order %d, TransA %d, TransB %d, M %d, N %d, K %d\n", ORDER, TRANSA, TRANSB, M, N, K);
+   cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA,
+                  C, LDC );
+   }
+   else
+   {
+      HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA,
+                  C, LDC );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA, beta = BETA;
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  ftransa;
+   F77_CHAR                  ftransb;
+#endif
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M   = M,   F77N   = N,   F77K = K,
+                             F77lda = LDA, F77ldb = LDB, F77ldc = LDC;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77K                 K
+#define F77lda               LDA
+#define F77ldb               LDB
+#define F77ldc               LDC
+#endif
+   char                      ctransa, ctransb;
+
+   if(      TRANSA == HplNoTrans ) ctransa = 'N';
+   else if( TRANSA == HplTrans   ) ctransa = 'T';
+   else                            ctransa = 'C';
+ 
+   if(      TRANSB == HplNoTrans ) ctransb = 'N';
+   else if( TRANSB == HplTrans   ) ctransb = 'T';
+   else                            ctransb = 'C';
+
+   if( ORDER == HplColumnMajor )
+   {
+#ifdef StringSunStyle
+      F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
+      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructVal
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructPtr
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
+                B, &F77ldb, &beta, C, &F77ldc );
+#endif
+   }
+   else
+   {
+#ifdef StringSunStyle
+      F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
+      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructVal
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+#ifdef StringStructPtr
+      ftransa.len = 1; ftransa.cp = &ctransa;
+      ftransb.len = 1; ftransb.cp = &ctransb;
+      F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
+                A, &F77lda, &beta, C, &F77ldc );
+#endif
+   }
+#endif
+/*
+ * End of HPL_dgemm
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dgemv.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dgemv.c
new file mode 100644
index 000000000..6366c5a48
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dgemv.c
@@ -0,0 +1,326 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dgemv
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dgemv0
+(
+   const enum HPL_TRANS       TRANS,
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   const double               * X,
+   const int                  INCX,
+   const double               BETA,
+   double                     * Y,
+   const int                  INCY
+)
+#else
+static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
+   const enum HPL_TRANS       TRANS;
+   const int                  INCX, INCY, LDA, M, N;
+   const double               ALPHA, BETA;
+   const double               * A, * X;
+   double                     * Y;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   int                        i, iaij, ix, iy, j, jaj, jx, jy;
+   register double            t0;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M == 0 ) || ( N == 0 ) ||
+       ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone  ) ) ) return;
+ 
+   if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; }
+ 
+   if( TRANS == HplNoTrans )
+   {
+      HPL_dscal( M, BETA, Y, INCY );
+      for( j = 0, jaj  = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+      {
+         t0 = ALPHA * X[jx];
+         for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY )
+         { Y[iy] += A[iaij] * t0; }
+      }
+   }
+   else
+   {
+      for( j = 0, jaj  = 0, jy  = 0; j < N; j++, jaj += LDA, jy += INCY )
+      {
+         t0 = HPL_rzero;
+         for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX )
+         { t0 += A[iaij] * X[ix]; }
+         if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0;
+         else                    Y[jy] = BETA * Y[jy] + ALPHA * t0;
+      }
+   }
+}
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dgemv
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_TRANS             TRANS,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   const double *                   X,
+   const int                        INCX,
+   const double                     BETA,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_dgemv
+( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_TRANS             TRANS;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   const double *                   X;
+   const int                        INCX;
+   const double                     BETA;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dgemv performs one of the matrix-vector operations
+ *  
+ *     y := alpha * op( A ) * x + beta * y,
+ *  
+ *  where op( X ) is one of
+ *  
+ *     op( X ) = X   or   op( X ) = X^T.
+ *  
+ * where alpha and beta are scalars, x and y are vectors and  A  is an m
+ * by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry,  TRANS  specifies the  operation to be performed as
+ *         follows:   
+ *            TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+ *            TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of  the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero then  A and X  need not be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n.  Before  entry, the leading m by n part  of the
+ *         array  A  must contain the matrix coefficients.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m).
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * BETA    (local input)                 const double
+ *         On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+ *         supplied as zero then  Y  need not be set on input.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         Before entry with BETA non-zero, the incremented array Y must
+ *         contain the vector  y.  On exit,  Y  is  overwritten  by  the
+ *         updated vector y.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+   }
+   else
+   {
+      HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ),
+                  N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA, beta = BETA;
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  ftran;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  ftran;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  ftran;
+#endif
+ 
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M    = M,   F77N    = N,
+                             F77lda  = LDA, F77incx = INCX, F77incy = INCY;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77lda               LDA
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   char                      ctran;
+
+   if( ORDER == HplColumnMajor )
+   {
+      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
+
+#ifdef StringSunStyle
+      F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftran = HPL_C2F_CHAR( ctran );
+      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructVal
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructPtr
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+   }
+   else
+   {
+      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
+#ifdef StringSunStyle
+      F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy, IONE );
+#endif
+#ifdef StringCrayStyle
+      ftran = HPL_C2F_CHAR( ctran );
+      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructVal
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+#ifdef StringStructPtr
+      ftran.len = 1; ftran.cp = &ctran;
+      F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
+                &beta, Y, &F77incy );
+#endif
+   }
+
+#endif
+/*
+ * End of HPL_dgemv
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dger.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dger.c
new file mode 100644
index 000000000..5ea702778
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dger.c
@@ -0,0 +1,195 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dger
+
+#ifdef STDC_HEADERS
+void HPL_dger
+(
+   const enum HPL_ORDER             ORDER,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY,
+   double *                         A,
+   const int                        LDA
+)
+#else
+void HPL_dger
+( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+   const enum HPL_ORDER             ORDER;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+   double *                         A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dger performs the rank 1 operation
+ *  
+ *     A := alpha * x * y^T + A,
+ *  
+ * where alpha is a scalar,  x is an m-element vector, y is an n-element
+ * vector and A is an m by n matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of  the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero then  X and Y  need not be set on input.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input)                 double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n.  Before  entry, the leading m by n part  of the
+ *         array  A  must contain the matrix coefficients. On exit, A is
+ *         overwritten by the updated matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dger( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           t0;
+   int                       i, iaij, ix, iy, j, jaj, jx, jy;
+
+   if( ( M == 0 ) || ( N == 0 ) || ( ALPHA == HPL_rzero ) ) return;
+ 
+   if( ORDER == HplColumnMajor )
+   {
+      for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY )
+      {
+         t0 = ALPHA * Y[jy];
+         for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX )
+         { A[iaij] += X[ix] * t0; }
+      }
+   }
+   else
+   {
+      for( j = 0, jaj = 0, jx = 0; j < M; j++, jaj += LDA, jx += INCX )
+      {
+         t0 = ALPHA * X[jx];
+         for( i = 0, iaij = jaj, iy = 0; i < N; i++, iaij += 1, iy += INCY )
+         { A[iaij] += Y[iy] * t0; }
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M    = M,   F77N    = N,
+                             F77lda  = LDA, F77incx = INCX, F77incy = INCY;
+#else
+#define F77M                 M
+#define F77N                 N
+#define F77lda               LDA
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+
+   if( ORDER == HplColumnMajor )
+   {  F77dger( &F77M, &F77N, &alpha, X, &F77incx, Y, &F77incy, A, &F77lda ); }
+   else
+   {  F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); }
+#endif
+/*
+ * End of HPL_dger
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dscal.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dscal.c
new file mode 100644
index 000000000..7e041991f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dscal.c
@@ -0,0 +1,179 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dscal
+
+#ifdef STDC_HEADERS
+void HPL_dscal
+(
+   const int                        N,
+   const double                     ALPHA,
+   double *                         X,
+   const int                        INCX
+)
+#else
+void HPL_dscal
+( N, ALPHA, X, INCX )
+   const int                        N;
+   const double                     ALPHA;
+   double *                         X;
+   const int                        INCX;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dscal scales the vector x by alpha.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero, then the entries of the incremented array X
+ *         need not be set on input.
+ *
+ * X       (local input/output)          double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *         On exit, the entries of the incremented array  X  are  scaled
+ *         by the scalar alpha.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dscal( N, ALPHA, X, INCX );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           x0, x1, x2, x3, x4, x5, x6, x7;
+   register const double     alpha = ALPHA;
+   const double              * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incX3 = 3 * INCX,
+                             incX4 = 4 * INCX, incX5 = 5 * INCX,
+                             incX6 = 6 * INCX, incX7 = 7 * INCX,
+                             incX8 = 8 * INCX;
+
+   if( ( N > 0 ) && ( alpha != HPL_rone ) )
+   {
+      if( alpha == HPL_rzero )
+      {
+         if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+         {
+            StX = (double *)X + nu * INCX;
+ 
+            do
+            {
+               (*X)     = HPL_rzero; X[incX4] = HPL_rzero;
+               X[INCX ] = HPL_rzero; X[incX5] = HPL_rzero;
+               X[incX2] = HPL_rzero; X[incX6] = HPL_rzero;
+               X[incX3] = HPL_rzero; X[incX7] = HPL_rzero; X += incX8;
+
+            } while( X != StX );
+         }
+ 
+         for( i = N - nu; i != 0; i-- ) { *X = HPL_rzero; X += INCX; }
+      }
+      else
+      {
+         if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+         {
+            StX = X + nu * INCX;
+ 
+            do
+            {
+               x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+               x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+               x0 *= alpha;   x4 *= alpha;   x1 *= alpha;   x5 *= alpha;
+               x2 *= alpha;   x6 *= alpha;   x3 *= alpha;   x7 *= alpha;
+ 
+               (*X)     = x0; X[incX4] = x4; X[INCX ] = x1; X[incX5] = x5;
+               X[incX2] = x2; X[incX6] = x6; X[incX3] = x3; X[incX7] = x7;
+ 
+               X  += incX8;
+ 
+            } while( X != StX );
+         }
+ 
+         for( i = N - nu; i != 0; i-- )
+         { x0 = (*X); x0 *= alpha; *X = x0; X += INCX; }
+      }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#endif
+
+   F77dscal( &F77N, &alpha, X, &F77incx );
+#endif
+/*
+ * End of HPL_dscal
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dswap.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dswap.c
new file mode 100644
index 000000000..eb1b8e08d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dswap.c
@@ -0,0 +1,157 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dswap
+
+#ifdef STDC_HEADERS
+void HPL_dswap
+(
+   const int                        N,
+   double *                         X,
+   const int                        INCX,
+   double *                         Y,
+   const int                        INCY
+)
+#else
+void HPL_dswap
+( N, X, INCX, Y, INCY )
+   const int                        N;
+   double *                         X;
+   const int                        INCX;
+   double *                         Y;
+   const int                        INCY;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dswap swaps the vectors x and y.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vectors  x  and  y. N
+ *         must be at least zero.
+ *
+ * X       (local input/output)          double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *         On exit, the entries of the incremented array  X  are updated
+ *         with the entries of the incremented array Y.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * Y       (local input/output)          double *
+ *         On entry,  Y  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+ *         On exit, the entries of the incremented array  Y  are updated
+ *         with the entries of the incremented array X.
+ *
+ * INCY    (local input)                 const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dswap( N, X, INCX, Y, INCY );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           x0, x1, x2, x3, y0, y1, y2, y3;
+   double                    * StX;
+   register int              i;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
+                             incX3 = 3 * INCX, incY3 = 3 * INCY,
+                             incX4 = 4 * INCX, incY4 = 4 * INCY;
+
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 2 ) << 2 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);      y0 = (*Y);      x1 = X[INCX ];  y1 = Y[INCY ];
+            x2 = X[incX2];  y2 = Y[incY2];  x3 = X[incX3];  y3 = Y[incY3];
+            *Y        = x0; *X        = y0; Y[INCY ]  = x1; X[INCX ]  = y1;
+            Y[incY2]  = x2; X[incX2]  = y2; Y[incY3]  = x3; X[incX3]  = y3;
+            X += incX4; Y += incY4;
+ 
+         } while( X != StX );
+      }
+ 
+      for( i = N - nu; i != 0; i-- )
+      { x0  = (*X); y0  = (*Y); *Y = x0; *X = y0; X += INCX; Y += INCY; }
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#define F77incy              INCY
+#endif
+   F77dswap( &F77N, X, &F77incx, Y, &F77incy );
+#endif
+/*
+ * End of HPL_dswap
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dtrsm.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dtrsm.c
new file mode 100644
index 000000000..a336a7d29
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dtrsm.c
@@ -0,0 +1,977 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dtrsm
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, jak  = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 )
+      {
+         B[ibkj] /= A[k+jak];
+         for( i = k+1,    iaik  = k+1+jak, ibij  = k+1+jbj;
+              i < M; i++, iaik +=1,        ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, jak  = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 )
+      {
+         for( i = k+1,    iaik  = k+1+jak, ibij  = k+1+jbj;
+              i < M; i++, iaik +=1,        ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = M-1,     jai  = (M-1)*LDA, ibij  = M-1+jbj;
+           i >= 0; i--, jai -= LDA,       ibij -= 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = i+1,    iaki  = i+1+jai, ibkj  = i+1+jbj;
+              k < M; k++, iaki += 1,       ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         t0 /= A[i+jai];
+         B[ibij] = t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLLTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = M-1,     jai  = (M-1)*LDA, ibij  = M-1+jbj;
+           i >= 0; i--, jai -= LDA,       ibij -= 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = i+1,    iaki  = i+1+jai, ibkj  = i+1+jbj;
+              k < M; k++, iaki += 1,       ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         B[ibij] = t0;
+      }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = M-1,     jak  = (M-1)*LDA, ibkj  = M-1+jbj;
+           k >= 0; k--, jak -= LDA,       ibkj -= 1 )
+      {
+         B[ibkj] /= A[k+jak];
+         for( i = 0,      iaik  = jak, ibij  = jbj;
+              i < k; i++, iaik += 1,   ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaik, ibij, ibkj, j, jak, jbj, k;
+
+   for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = M-1,     jak  = (M-1)*LDA, ibkj  = M-1+jbj;
+           k >= 0; k--, jak -= LDA,       ibkj -= 1 )
+      {
+         for( i = 0,      iaik  = jak, ibij  = jbj;
+              i < k; i++, iaik += 1,   ibij += 1 )
+         { B[ibij] -= B[ibkj] * A[iaik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+   register double            t0;
+
+   for( j = 0, jbj  = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, jai  = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         t0 /= A[i+jai];
+         B[ibij] = t0;
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmLUTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iaki, ibij, ibkj, j, jai, jbj, k;
+
+   for( j = 0, jbj  = 0; j < N; j++, jbj += LDB )
+   {
+      for( i = 0, jai  = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 )
+      {
+         t0 = ALPHA * B[ibij];
+         for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 )
+         { t0 -= A[iaki] * B[ibkj]; }
+         B[ibij] = t0;
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = N-1,      jaj  = (N-1)*LDA, jbj  = (N-1)*LDB;
+        j >= 0;  j--, jaj -= LDA,       jbj -= LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = j+1,    iakj  = j+1+jaj, jbk  = (j+1)*LDB;
+           k < N; k++, iakj += 1,       jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = N-1,      jaj  = (N-1)*LDA, jbj  = (N-1)*LDB;
+        j >= 0;  j--, jaj -= LDA,       jbj -= LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = j+1,    iakj  = j+1+jaj, jbk  = (j+1)*LDB;
+           k < N; k++, iakj += 1,       jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB )
+   {
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; }
+      for( j = k+1,    iajk  = (k+1)+jak, jbj  = (k+1)*LDB;
+           j < N; j++, iajk += 1,         jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRLTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB )
+   {
+      for( j = k+1,    iajk  = (k+1)+jak, jbj  = (k+1)*LDB;
+           j < N; j++, iajk += 1,         jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUNN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUNU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   int                        i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
+
+   for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB )
+   {
+      for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; }
+      for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB )
+      {
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= A[iakj] * B[ibik]; }
+      }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUTN
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = N-1,     jak  = (N-1)*LDA, jbk  = (N-1)*LDB;
+        k >= 0; k--, jak -= LDA,       jbk -= LDB )
+   {
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; }
+      for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsmRUTU
+(
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB )
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{
+   register double            t0;
+   int                        i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
+
+   for( k = N-1,     jak  = (N-1)*LDA, jbk  = (N-1)*LDB;
+        k >= 0; k--, jak -= LDA,       jbk -= LDB )
+   {
+      for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB )
+      {
+         t0 = A[iajk];
+         for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 )
+         { B[ibij] -= t0 * B[ibik]; }
+      }
+      for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsm0
+(
+   const enum HPL_SIDE        SIDE,
+   const enum HPL_UPLO        UPLO,
+   const enum HPL_TRANS       TRANS,
+   const enum HPL_DIAG        DIAG,
+   const int                  M,
+   const int                  N,
+   const double               ALPHA,
+   const double               * A,
+   const int                  LDA,
+   double                     * B,
+   const int                  LDB
+)
+#else
+static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB )
+   const enum HPL_SIDE        SIDE;
+   const enum HPL_UPLO        UPLO;
+   const enum HPL_TRANS       TRANS;
+   const enum HPL_DIAG        DIAG;
+   const int                  LDA, LDB, M, N;
+   const double               ALPHA;
+   const double               * A;
+   double                     * B;
+#endif
+{ 
+   int                        i, j;
+
+   if( ( M == 0 ) || ( N == 0 ) ) return;
+ 
+   if( ALPHA == HPL_rzero )
+   {
+      for( j = 0; j < N; j++ )
+      {  for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; }
+      return;
+   }
+
+   if( SIDE == HplLeft )
+   {
+      if( UPLO == HplUpper )
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+      else
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+   }
+   else
+   {
+      if( UPLO == HplUpper )
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+      else
+      {
+         if( TRANS == HplNoTrans )
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+         else
+         {
+            if( DIAG == HplNonUnit )
+            {      HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); }
+            else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); }
+         }
+      }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dtrsm
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_SIDE              SIDE,
+   const enum HPL_UPLO              UPLO,
+   const enum HPL_TRANS             TRANS,
+   const enum HPL_DIAG              DIAG,
+   const int                        M,
+   const int                        N,
+   const double                     ALPHA,
+   const double *                   A,
+   const int                        LDA,
+   double *                         B,
+   const int                        LDB
+)
+#else
+void HPL_dtrsm
+( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_SIDE              SIDE;
+   const enum HPL_UPLO              UPLO;
+   const enum HPL_TRANS             TRANS;
+   const enum HPL_DIAG              DIAG;
+   const int                        M;
+   const int                        N;
+   const double                     ALPHA;
+   const double *                   A;
+   const int                        LDA;
+   double *                         B;
+   const int                        LDB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dtrsm solves one of the matrix equations
+ *  
+ *    op( A ) * X = alpha * B,   or  X * op( A ) = alpha * B,
+ *  
+ * where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+ * non-unit, upper or lower triangular matrix and op(A) is one of
+ *  
+ *    op( A ) = A   or   op( A ) = A^T.
+ *  
+ * The matrix X is overwritten on B.
+ *  
+ * No test for  singularity  or  near-singularity  is included  in  this
+ * routine. Such tests must be performed before calling this routine.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * SIDE    (local input)                 const enum HPL_SIDE
+ *         On entry, SIDE  specifies  whether  op(A) appears on the left
+ *         or right of X as follows:
+ *            SIDE==HplLeft    op( A ) * X = alpha * B,
+ *            SIDE==HplRight   X * op( A ) = alpha * B.
+ *
+ * UPLO    (local input)                 const enum HPL_UPLO
+ *         On  entry,   UPLO   specifies  whether  the  upper  or  lower
+ *         triangular  part  of the array  A  is to be referenced.  When
+ *         UPLO==HplUpper, only  the upper triangular part of A is to be
+ *         referenced, otherwise only the lower triangular part of A is 
+ *         to be referenced. 
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry, TRANSA  specifies the form of  op(A)  to be used in
+ *         the matrix-matrix operation follows:                         
+ *            TRANSA==HplNoTrans    : op( A ) = A,                     
+ *            TRANSA==HplTrans      : op( A ) = A^T,                   
+ *            TRANSA==HplConjTrans  : op( A ) = A^T.                   
+ *
+ * DIAG    (local input)                 const enum HPL_DIAG
+ *         On entry,  DIAG  specifies  whether  A  is unit triangular or
+ *         not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+ *         and otherwise, A is not assumed to be unit triangular.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of the  matrix B.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of columns of the matrix B.
+ *         N must be at least zero.
+ *
+ * ALPHA   (local input)                 const double
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied  as  zero then the elements of the matrix B need not
+ *         be set on input.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+ *         otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+ *         k by k upper triangular  part of the array A must contain the
+ *         upper triangular  matrix and the  strictly  lower  triangular
+ *         part of A is not referenced.  When  UPLO==HplLower on  entry,
+ *         the  leading k by k lower triangular part of the array A must
+ *         contain the lower triangular matrix  and  the  strictly upper
+ *         triangular part of A is not referenced.
+ *          
+ *         Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+ *         not referenced  either,  but are assumed to be unity.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+ *
+ * B       (local input/output)          double *
+ *         On entry,  B  points  to an array of size equal to or greater
+ *         than LDB * n.  Before entry, the leading  m by n  part of the
+ *         array B must contain the matrix  B, except when beta is zero,
+ *         in which case B need not be set on entry.  On exit, the array
+ *         B is overwritten by the m by n solution matrix.
+ *
+ * LDB     (local input)                 const int
+ *         On entry,  LDB  specifies  the  leading  dimension  of  B  as
+ *         declared  in  the  calling  (sub) program.  LDB  must  be  at
+ *         least MAX(1,m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB );
+   }
+   else
+   {
+      HPL_dtrsm0( ( SIDE == HplRight ? HplLeft  : HplRight ),
+                  ( UPLO == HplLower ? HplUpper : HplLower ),
+                  TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+   double                    alpha = ALPHA;
+#ifdef StringSunStyle
+#if defined( HPL_USE_F77_INTEGER_DEF )
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  fside;
+   F77_CHAR                  fuplo;
+   F77_CHAR                  ftran;
+   F77_CHAR                  fdiag;
+#endif
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77M   = M,   F77N   = N,
+                             F77lda = LDA, F77ldb = LDB;
+#else
+#define  F77M                M
+#define  F77N                N
+#define  F77lda              LDA
+#define  F77ldb              LDB
+#endif
+   char                      cside, cuplo, ctran, cdiag;
+
+   if(      TRANS == HplNoTrans ) ctran = 'N';
+   else if( TRANS == HplTrans   ) ctran = 'T';
+   else                           ctran = 'C';
+   cdiag = ( DIAG == HplUnit  ? 'U' : 'N' );
+
+   if( ORDER == HplColumnMajor )
+   {
+      cside = ( SIDE == HplRight ? 'R' : 'L' );
+      cuplo = ( UPLO == HplLower ? 'L' : 'U' );
+#ifdef StringSunStyle
+      F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo );
+      ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructVal
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructPtr
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+   }
+   else
+   {
+      cside = ( SIDE == HplRight ? 'L' : 'R' );
+      cuplo = ( UPLO == HplLower ? 'U' : 'L' );
+#ifdef StringSunStyle
+      F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+      fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo );
+      ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructVal
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( fside,  fuplo,  ftran,  fdiag,  &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+#ifdef StringStructPtr
+      fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo;
+      ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag;
+      F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha,
+                A, &F77lda, B, &F77ldb );
+#endif
+   }
+#endif
+/*
+ * End of HPL_dtrsm
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dtrsv.c
new file mode 100644
index 000000000..99e84f073
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dtrsv.c
@@ -0,0 +1,520 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_dtrsv
+
+#ifdef HPL_CALL_VSIPL
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLNN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLNN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx  = 0; j < N; j++, jaj += ldap1, jx += INCX )
+   {
+      X[jx] /= A[jaj]; t0 = X[jx];
+      for( i = j+1,    iaij  = jaj+1, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLNU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLNU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = jaj+1, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLTN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLTN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*(ldap1), jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= ldap1,         jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = 1+jaj, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { t0 -= A[iaij] * X[ix]; }
+      t0 /= A[jaj]; X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvLTU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvLTU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*(ldap1), jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= ldap1,         jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = j+1,    iaij  = 1+jaj, ix  = jx + INCX;
+           i < N; i++, iaij += 1,     ix += INCX ) { t0 -= A[iaij] * X[ix]; }
+      X[jx] = t0;
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUNN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUNN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*LDA, jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= LDA,       jx -= INCX )
+   {
+      X[jx] /= A[j+jaj]; t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUNU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUNU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = N-1,     jaj  = (N-1)*LDA, jx  = (N-1)*INCX;
+        j >= 0; j--, jaj -= LDA,       jx -= INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { X[ix] -= t0 * A[iaij]; }
+   }
+}
+
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUTN
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUTN( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = 0, jaj = 0,jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { t0 -= A[iaij] * X[ix]; }
+      t0 /= A[iaij]; X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsvUTU
+(
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+)
+#else
+static void HPL_dtrsvUTU( N, A, LDA, X, INCX )
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   int                        i, iaij, ix, j, jaj, jx;
+   register double            t0;
+
+   for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX )
+   {
+      t0 = X[jx];
+      for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX )
+      { t0 -= A[iaij] * X[ix]; }
+      X[jx] = t0;
+   }
+}
+
+#ifdef STDC_HEADERS
+static void HPL_dtrsv0
+(
+   const enum HPL_UPLO        UPLO,
+   const enum HPL_TRANS       TRANS,
+   const enum HPL_DIAG        DIAG,
+   const int                  N,
+   const double               * A,
+   const int                  LDA,
+   double                     * X,
+   const int                  INCX
+) 
+#else
+static void HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+   const enum HPL_UPLO        UPLO;
+   const enum HPL_TRANS       TRANS;
+   const enum HPL_DIAG        DIAG;
+   const int                  INCX, LDA, N;
+   const double               * A;
+   double                     * X;
+#endif
+{
+   if( N == 0 ) return;
+ 
+   if( UPLO == HplUpper )
+   {
+      if( TRANS == HplNoTrans )
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvUNN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvUNU( N,    A, LDA, X, INCX ); }
+      }
+      else
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvUTN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvUTU( N,    A, LDA, X, INCX ); }
+      }
+   }
+   else
+   {
+      if( TRANS == HplNoTrans )
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvLNN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvLNU( N,    A, LDA, X, INCX ); }
+      }
+      else
+      {
+         if( DIAG == HplNonUnit ) { HPL_dtrsvLTN( N,    A, LDA, X, INCX ); }
+         else                     { HPL_dtrsvLTU( N,    A, LDA, X, INCX ); }
+      }
+   }
+}
+
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dtrsv
+(
+   const enum HPL_ORDER             ORDER,
+   const enum HPL_UPLO              UPLO,
+   const enum HPL_TRANS             TRANS,
+   const enum HPL_DIAG              DIAG,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         X,
+   const int                        INCX
+)
+#else
+void HPL_dtrsv
+( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+   const enum HPL_ORDER             ORDER;
+   const enum HPL_UPLO              UPLO;
+   const enum HPL_TRANS             TRANS;
+   const enum HPL_DIAG              DIAG;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         X;
+   const int                        INCX;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dtrsv solves one of the systems of equations
+ *  
+ *     A * x = b,   or   A^T * x = b,
+ *  
+ * where b and x are n-element vectors and  A  is an n by n non-unit, or
+ * unit, upper or lower triangular matrix.
+ *  
+ * No test for  singularity  or  near-singularity  is included  in  this
+ * routine. Such tests must be performed before calling this routine.
+ *
+ * Arguments
+ * =========
+ *
+ * ORDER   (local input)                 const enum HPL_ORDER
+ *         On entry, ORDER  specifies the storage format of the operands
+ *         as follows:                                                  
+ *            ORDER = HplRowMajor,                                      
+ *            ORDER = HplColumnMajor.                                   
+ *
+ * UPLO    (local input)                 const enum HPL_UPLO
+ *         On  entry,   UPLO   specifies  whether  the  upper  or  lower
+ *         triangular  part  of the array  A  is to be referenced.  When
+ *         UPLO==HplUpper, only  the upper triangular part of A is to be
+ *         referenced, otherwise only the lower triangular part of A is 
+ *         to be referenced. 
+ *
+ * TRANS   (local input)                 const enum HPL_TRANS
+ *         On entry,  TRANS  specifies  the equations  to  be  solved as
+ *         follows:
+ *            TRANS==HplNoTrans     A   * x = b,
+ *            TRANS==HplTrans       A^T * x = b.
+ *
+ * DIAG    (local input)                 const enum HPL_DIAG
+ *         On entry,  DIAG  specifies  whether  A  is unit triangular or
+ *         not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+ *         and otherwise, A is not assumed to be unit triangular.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the order of the matrix A. N must be at
+ *         least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points  to an array of size equal to or greater
+ *         than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+ *         n by n upper triangular  part of the array A must contain the
+ *         upper triangular  matrix and the  strictly  lower  triangular
+ *         part of A is not referenced.  When  UPLO==HplLower  on entry,
+ *         the  leading n by n lower triangular part of the array A must
+ *         contain the lower triangular matrix  and  the  strictly upper
+ *         triangular part of A is not referenced.
+ *          
+ *         Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+ *         not referenced  either,  but are assumed to be unity.
+ *
+ * LDA     (local input)                 const int
+ *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
+ *         declared  in  the  calling  (sub) program.  LDA  must  be  at
+ *         least MAX(1,n).
+ *
+ * X       (local input/output)          double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *         Before entry,  the  incremented array  X  must contain  the n
+ *         element right-hand side vector b. On exit,  X  is overwritten
+ *         with the solution vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   cblas_dtrsv( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX );
+#endif
+#ifdef HPL_CALL_VSIPL
+   if( ORDER == HplColumnMajor )
+   {
+      HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX );
+   }
+   else
+   {
+      HPL_dtrsv0( ( UPLO  == HplUpper   ? HplLower : HplUpper   ),
+                  ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ),
+                  DIAG, N, A, LDA, X, INCX );
+   }
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef StringSunStyle
+#ifdef HPL_USE_F77_INTEGER_DEF
+   F77_INTEGER               IONE = 1;
+#else
+   int                       IONE = 1;
+#endif
+#endif
+#ifdef StringStructVal
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+#ifdef StringStructPtr
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+#ifdef StringCrayStyle
+   F77_CHAR                  fuplo, ftran, fdiag;
+#endif
+ 
+#ifdef HPL_USE_F77_INTEGER_DEF 
+   const F77_INTEGER         F77N = N, F77lda = LDA, F77incx = INCX;
+#else
+#define F77N              N
+#define F77lda            LDA
+#define F77incx           INCX
+#endif
+   char                      cuplo, ctran, cdiag;
+
+   if( ORDER == HplColumnMajor )
+   {
+      cuplo = ( UPLO  == HplUpper   ? 'U' : 'L' );
+      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
+   }
+   else
+   {
+      cuplo = ( UPLO  == HplUpper   ? 'L' : 'U' );
+      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
+   }
+   cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' );
+
+#ifdef StringSunStyle
+   F77dtrsv( &cuplo, &ctran, &cdiag, &F77N, A, &F77lda, X, &F77incx,
+             IONE, IONE, IONE );
+#endif
+#ifdef StringCrayStyle
+   ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag );
+   fuplo = HPL_C2F_CHAR( cuplo );
+   F77dtrsv( fuplo,  ftran,  fdiag,  &F77N, A, &F77lda, X, &F77incx );
+#endif
+#ifdef StringStructVal
+   fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran;
+   fdiag.len = 1; fdiag.cp = &cdiag;
+   F77dtrsv( fuplo,  ftran,  fdiag,  &F77N, A, &F77lda, X, &F77incx );
+#endif
+#ifdef StringStructPtr
+   fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran;
+   fdiag.len = 1; fdiag.cp = &cdiag;
+   F77dtrsv( &fuplo, &ftran, &fdiag, &F77N, A, &F77lda, X, &F77incx );
+#endif
+
+#endif
+/*
+ * End of HPL_dtrsv
+ */
+}
+
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_idamax.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_idamax.c
new file mode 100644
index 000000000..5ceabdf25
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_idamax.c
@@ -0,0 +1,167 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifndef HPL_idamax
+
+#ifdef STDC_HEADERS
+int HPL_idamax
+(
+   const int                        N,
+   const double *                   X,
+   const int                        INCX
+)
+#else
+int HPL_idamax
+( N, X, INCX )
+   const int                        N;
+   const double *                   X;
+   const int                        INCX;
+#endif 
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_idamax returns  the index in an n-vector  x  of the first element
+ * having maximum absolute value.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero.
+ *
+ * X       (local input)                 const double *
+ *         On entry,  X  is an incremented array of dimension  at  least
+ *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+ *
+ * INCX    (local input)                 const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_CALL_CBLAS
+   return( (int)(cblas_idamax( N, X, INCX )) );
+#endif
+#ifdef HPL_CALL_VSIPL
+   register double           absxi, smax = HPL_rzero, x0, x1, x2, x3,
+                             x4, x5, x6, x7;
+   const double              * StX;
+   register int              imax = 0, i = 0, j;
+   int                       nu;
+   const int                 incX2 = 2 * INCX, incX3 = 3 * INCX,
+                             incX4 = 4 * INCX, incX5 = 5 * INCX,
+                             incX6 = 6 * INCX, incX7 = 7 * INCX,
+                             incX8 = 8 * INCX;
+
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+      {
+         StX = X + nu * INCX;
+ 
+         do
+         {
+            x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+            x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+ 
+            absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x1 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x2 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x3 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x4 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x5 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x6 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+            absxi = Mabs( x7 ); if( absxi > smax ) { imax = i; smax = absxi; }
+            i    += 1;
+ 
+            X    += incX8;
+ 
+         } while( X != StX );
+      }
+ 
+      for( j = N - nu; j != 0; j-- )
+      {
+         x0    = (*X);
+         absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; }
+         i    += 1;
+         X    += INCX;
+      }
+   }
+   return( imax );
+#endif
+#ifdef HPL_CALL_FBLAS
+#ifdef HPL_USE_F77_INTEGER_DEF
+   const F77_INTEGER         F77N = N, F77incx = INCX;
+#else
+#define F77N                 N
+#define F77incx              INCX
+#endif
+   int                       imax = 0;
+
+   if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1;
+   return( imax );
+#endif
+/*
+ * End of HPL_idamax
+ */
+}
+ 
+#endif
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/Make.inc
new file mode 120000
index 000000000..ae55370b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kate/hip/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/Makefile
new file mode 100644
index 000000000..ed9f3d0e2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/Makefile
@@ -0,0 +1,98 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h
+#
+## Object files ########################################################
+#
+HPL_blaobj       = \
+   HPL_dcopy.o            HPL_daxpy.o            HPL_dscal.o            \
+   HPL_idamax.o           HPL_dgemv.o            HPL_dtrsv.o            \
+   HPL_dger.o             HPL_dgemm.o            HPL_dtrsm.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_blaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_blaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dcopy.o            : ../HPL_dcopy.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dcopy.c
+HPL_daxpy.o            : ../HPL_daxpy.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_daxpy.c
+HPL_dscal.o            : ../HPL_dscal.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dscal.c
+HPL_idamax.o           : ../HPL_idamax.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_idamax.c
+HPL_dgemv.o            : ../HPL_dgemv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgemv.c
+HPL_dtrsv.o            : ../HPL_dtrsv.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtrsv.c
+HPL_dger.o             : ../HPL_dger.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dger.c
+HPL_dgemm.o            : ../HPL_dgemm.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dgemm.c
+HPL_dtrsm.o            : ../HPL_dtrsm.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dtrsm.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_1rinM.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_1rinM.c
new file mode 100644
index 000000000..dd03b79b1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_1rinM.c
@@ -0,0 +1,224 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+ 
+#ifdef STDC_HEADERS
+int HPL_binit_1rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_1rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_1rinM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_1rinM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, prev,
+                              rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process,  then  send message to its two
+ * next neighbors. Otherwise, probe for message. If the message is here,
+ * then receive it,   and  if I am not the last process of the ring,  or
+ * just after the root process, then forward it to the next.  Otherwise,
+ * inform the caller that the panel has still not been received.
+ */
+   rank = PANEL->grid->mycol; comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;        msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( next,
+                          size ), msgid, comm );
+      }
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+      if( ( size > 2 ) && 
+          ( MModSub1( prev, size ) == root ) ) partner = root;
+      else                                     partner = prev;
+
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) &&
+                ( prev != root ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+} 
+
+#ifdef STDC_HEADERS
+int HPL_bwait_1rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_1rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_1ring.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_1ring.c
new file mode 100644
index 000000000..dd5eb2d12
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_1ring.c
@@ -0,0 +1,216 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_1ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_1ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+
+#else
+
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_1ring
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_1ring( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, prev, rank, root,
+                              size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process, start spreading the panel.  If
+ * I am not the root process, probe for message. If the message is here,
+ * then receive it, and  if I am not the last process of the ring, then
+ * forward it to the next.  Otherwise, inform the caller that the panel
+ * has still not been received.
+ */
+   rank = PANEL->grid->mycol; comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;        msgid = PANEL->msgid;
+
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( rank,
+                       size ), msgid, comm );
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+
+      ierr = MPI_Iprobe( prev, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, prev, msgid,
+                             comm, &PANEL->status[0] );
+            next = MModAdd1( rank, size );
+            if( ( ierr == MPI_SUCCESS ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next,
+                                msgid, comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */  
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_1ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_1ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers 
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_2rinM.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_2rinM.c
new file mode 100644
index 000000000..56581ea0d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_2rinM.c
@@ -0,0 +1,236 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_2rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_2rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_2rinM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_2rinM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, prev,
+                              rank, roo2, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase: root process send to its two right neighbors and mid-pro-
+ * cess. If I am not the root process, probe for message. If the message
+ * is there, then receive it. If I am not the last process of both rings
+ * then forward it to the next.  Otherwise,  inform  the caller that the
+ * panel has still not been received.
+ */
+   rank = PANEL->grid->mycol;           comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;                  msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );       roo2  = ( ( size + 1 ) >> 1 );
+   roo2 = MModAdd(  root, roo2, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         if( MModAdd1( next, size ) != roo2 )
+         {
+            ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE,
+                             MModAdd1( next, size ), msgid, comm );
+         }
+
+         if( ierr == MPI_SUCCESS )
+         {
+            ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid,
+                             comm );
+         }
+      }
+   }
+   else
+   {
+      prev = MModSub1( rank, size );
+      if( ( prev == root ) || ( rank == roo2 ) ||
+          ( MModSub1( prev,  size )  == root ) ) partner = root;
+      else                                       partner = prev;
+ 
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) && ( prev != root ) &&
+                ( next != roo2        ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+} 
+
+#ifdef STDC_HEADERS
+int HPL_bwait_2rinM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_2rinM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_2ring.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_2ring.c
new file mode 100644
index 000000000..f0e6e2647
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_2ring.c
@@ -0,0 +1,224 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_binit_2ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_2ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+/*
+ * Create the MPI user-defined data type
+ */
+   ierr = HPL_packL( PANEL, 0, PANEL->len, 0 );
+ 
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+ 
+   return( HPL_SUCCESS );
+#endif
+}
+ 
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF     PANEL->buffers[0]
+#define   _M_COUNT    PANEL->counts[0]
+#define   _M_TYPE     PANEL->dtypes[0]
+ 
+#else
+ 
+#define   _M_BUFF     (void *)(PANEL->L2)
+#define   _M_COUNT    PANEL->len
+#define   _M_TYPE     MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_2ring
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_2ring( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        ierr, go, next, msgid, partner, rank,
+                              roo2, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase: root process  send to its right neighbor and mid-process.
+ * If I am not the root process,  probe for message.   If the message is
+ * there,  then receive it,  and  if I am not the last process  of  both
+ * rings, then forward it to the next. Otherwise, inform the caller that
+ * the panel has still not been received.
+ */
+   rank = PANEL->grid->mycol;           comm  = PANEL->grid->row_comm;
+   root = PANEL->pcol;                  msgid = PANEL->msgid;
+   next = MModAdd1( rank, size );       roo2  = ( ( size + 1 ) >> 1 );
+   roo2 = MModAdd(  root, roo2, size );
+ 
+   if( rank == root )
+   {
+      ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm );
+      if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) )
+      {
+         ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid,
+                          comm );
+      }
+   }
+   else
+   {
+      partner = MModSub1( rank, size );
+      if( ( partner == root ) || ( rank == roo2 ) ) partner = root;
+ 
+      ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+
+      if( ierr == MPI_SUCCESS )
+      {
+         if( go != 0 )
+         {
+            ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid,
+                             comm, &PANEL->status[0] );
+            if( ( ierr == MPI_SUCCESS ) &&
+                ( next != roo2 ) && ( next != root ) )
+            {
+               ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid,
+                                comm );
+            }
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); }
+      }
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_2ring
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_2ring( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+/*
+ * Release the arrays of request / status / data-types and buffers
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr = MPI_Type_free( &PANEL->dtypes[0] );
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+#else
+   return( HPL_SUCCESS );
+#endif
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_bcast.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_bcast.c
new file mode 100644
index 000000000..100161152
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_bcast.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_bcast
+(
+   HPL_T_panel *                    PANEL,
+   int *                            IFLAG
+)
+#else
+int HPL_bcast
+( PANEL, IFLAG )
+   HPL_T_panel *                    PANEL;
+   int *                            IFLAG;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_bcast broadcasts  the  current  panel.  Successful  completion is
+ * indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to
+ * HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was
+ * not completed, in which case this function should be called again.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * IFLAG   (output)                      int *
+ *         On exit,  IFLAG  indicates  whether  or not the broadcast has
+ *         occured.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_bcast_1rinM( PANEL, IFLAG ); break;
+      case HPL_1RING   : ierr = HPL_bcast_1ring( PANEL, IFLAG ); break;
+      case HPL_2RING_M : ierr = HPL_bcast_2rinM( PANEL, IFLAG ); break;
+      case HPL_2RING   : ierr = HPL_bcast_2ring( PANEL, IFLAG ); break;
+      case HPL_BLONG_M : ierr = HPL_bcast_blonM( PANEL, IFLAG ); break;
+      case HPL_BLONG   : ierr = HPL_bcast_blong( PANEL, IFLAG ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_bcast
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_binit.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_binit.c
new file mode 100644
index 000000000..3daf72b7d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_binit.c
@@ -0,0 +1,108 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_binit
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_binit
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_binit initializes  a  row  broadcast.  Successful  completion  is
+ * indicated by the returned error code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS );
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_binit_1rinM( PANEL ); break;
+      case HPL_1RING   : ierr = HPL_binit_1ring( PANEL ); break;
+      case HPL_2RING_M : ierr = HPL_binit_2rinM( PANEL ); break;
+      case HPL_2RING   : ierr = HPL_binit_2ring( PANEL ); break;
+      case HPL_BLONG_M : ierr = HPL_binit_blonM( PANEL ); break;
+      case HPL_BLONG   : ierr = HPL_binit_blong( PANEL ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_binit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_blonM.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_blonM.c
new file mode 100644
index 000000000..5fa221937
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_blonM.c
@@ -0,0 +1,445 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+int HPL_binit_blonM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_blonM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+   return( HPL_SUCCESS );
+}
+ 
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF_S1        PANEL->buffers[I_SEND]
+#define   _M_COUNT_S1       PANEL->counts[I_SEND]
+#define   _M_TYPE_S1        PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_S2        PANEL->buffers[I_SEND]
+#define   _M_COUNT_S2       PANEL->counts[I_SEND]
+#define   _M_TYPE_S2        PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_R1        PANEL->buffers[I_RECV]
+#define   _M_COUNT_R1       PANEL->counts[I_RECV]
+#define   _M_TYPE_R1        PANEL->dtypes[I_RECV]
+
+#define   _M_BUFF_R2        PANEL->buffers[I_RECV]
+#define   _M_COUNT_R2       PANEL->counts[I_RECV]
+#define   _M_TYPE_R2        PANEL->dtypes[I_RECV]
+ 
+#define   _M_ROLL_BUFF_S    PANEL->buffers[I_SEND]
+#define   _M_ROLL_COUNT_S   PANEL->counts[I_SEND]
+#define   _M_ROLL_TYPE_S    PANEL->dtypes[I_SEND]
+
+#define   _M_ROLL_BUFF_R    PANEL->buffers[I_RECV]
+#define   _M_ROLL_COUNT_R   PANEL->counts[I_RECV]
+#define   _M_ROLL_TYPE_R    PANEL->dtypes[I_RECV]
+
+#else
+
+#define   _M_BUFF_S1        (void *)(PANEL->L2)
+#define   _M_COUNT_S1       PANEL->len
+#define   _M_TYPE_S1        MPI_DOUBLE
+
+#define   _M_BUFF_S2        (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_S2       lbuf
+#define   _M_TYPE_S2        MPI_DOUBLE
+ 
+#define   _M_BUFF_R1        (void *)(PANEL->L2)
+#define   _M_COUNT_R1       PANEL->len
+#define   _M_TYPE_R1        MPI_DOUBLE
+ 
+#define   _M_BUFF_R2        (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_R2       lbuf
+#define   _M_TYPE_R2        MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_S    (void *)(PANEL->L2 + ibufS)
+#define   _M_ROLL_COUNT_S   lbufS
+#define   _M_ROLL_TYPE_S    MPI_DOUBLE
+#define   _M_ROLL_BUFF_R    (void *)(PANEL->L2 + ibufR)
+#define   _M_ROLL_COUNT_R   lbufR
+#define   _M_ROLL_TYPE_R    MPI_DOUBLE
+
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_blonM
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_blonM( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        COUNT, count, go=1, ierr=MPI_SUCCESS, ibuf,
+                              ibufR, ibufS, dummy=0, indx, ip2=1, k, l,
+                              lbuf, lbufR, lbufS, mask=1, msgid, mydist,
+                              mydist2, next, npm1, npm2, partner, prev,
+                              rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  root process  sends to its right neighbor,  then spread
+ * the panel on the other npcol - 2 processes.  If  I  am  not the root 
+ * process, probe for message received.  If the message is there,  then
+ * receive it. If I am just after the root process, return.  Otherwise,
+ * keep spreading on those npcol - 2 processes.  Otherwise,  inform the
+ * caller that the panel has still not been received.
+ */
+   comm = PANEL->grid->row_comm; rank  = PANEL->grid->mycol;
+   root = PANEL->pcol;           msgid = PANEL->msgid;
+   prev = MModSub1( rank, size );
+ 
+   if( rank == root )
+   {
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ierr == MPI_SUCCESS )
+         ierr =   HPL_packL( PANEL, 0, PANEL->len, I_SEND );
+#endif
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Ssend( _M_BUFF_S1, _M_COUNT_S1, _M_TYPE_S1,
+                             MModAdd1( rank, size ), msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+   else if( prev == root )
+   {
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ *
+ *    ierr = MPI_Iprobe( root, msgid, comm, &go, &PANEL->status[0] );
+ */
+      if( ierr == MPI_SUCCESS )
+      {                                  /* if panel is here, proceed */
+         if( go != 0 )
+         {
+#ifdef HPL_USE_MPI_DATATYPE
+            ierr =      HPL_packL( PANEL, 0, PANEL->len, I_RECV );
+#endif
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Recv( _M_BUFF_R1, _M_COUNT_R1, _M_TYPE_R1,
+                                  root, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+         }
+         else { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+      }
+   }
+/*
+ * if I am just after the root, exit now. The message receive  completed
+ * successfully, this guy is done. If there are only 2 processes in each 
+ * row of processes, we are done as well.
+ */
+   if( ( prev == root ) || ( size == 2 ) )
+   {
+      *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+      return( *IFLAG );
+   }
+/*
+ * Otherwise, proceed with broadcast -  Spread  the panel across process
+ * columns
+ */
+   npm2 = ( npm1 = size - 1 ) - 1; COUNT = PANEL->len;
+
+   k = npm2; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   if( rank == root ) mydist2 = ( mydist = 0 );
+   else   mydist2 = ( mydist  = MModSub( rank, root, size ) - 1 );
+
+   indx = ip2; count = COUNT / npm1; count = Mmax( count, 1 );
+ 
+   do
+   {
+      mask ^= ip2;
+
+      if( ( mydist & mask ) == 0 )
+      {
+         lbuf = COUNT - ( ibuf = indx * count );
+         if( indx + ip2 < npm1 ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); }
+
+         partner = mydist ^ ip2;
+
+         if( ( mydist & ip2 ) != 0 )
+         {
+            partner = MModAdd( root, partner, size );
+            if( partner != root ) partner = MModAdd1( partner, size );  
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ */
+#if 0
+            ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+  
+            if( ierr == MPI_SUCCESS )
+            {        /* if panel is not here, return and keep testing */
+               if( go == 0 )
+               { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+            }
+#endif
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_RECV );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( _M_BUFF_R2, _M_COUNT_R2, _M_TYPE_R2,
+                                     partner, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                   msgid, comm, &PANEL->status[0] );
+            }
+         }
+         else if( partner < npm1 )
+         {
+            partner = MModAdd( root, partner, size );
+            if( partner != root ) partner = MModAdd1( partner, size );  
+
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_SEND );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( _M_BUFF_S2, _M_COUNT_S2, _M_TYPE_S2,
+                                      partner, msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE,
+                                      partner, msgid, comm );
+            }
+         }
+      }
+ 
+      if( mydist2 < ip2 ) {  ip2 >>= 1; indx -= ip2; }
+      else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; }
+
+   } while( ip2 > 0 );
+/*
+ * Roll the pieces
+ */
+   prev = MModSub1( rank, size );
+   if( MModSub1( prev, size ) == root ) prev = root;
+   next = MModAdd1( rank, size );
+   if( rank == root ) next = MModAdd1( next, size );
+
+   for( k = 0; k < npm2; k++ )
+   {
+      l = ( k >> 1 );
+/*
+ * Who is sending to who and how much
+ */
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         ibufS = ( indx = MModAdd( mydist, l,   npm1 ) ) * count;
+         lbufS = ( indx == npm2 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModSub( mydist, l+1, npm1 ) ) * count;
+         lbufR = ( indx == npm2 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = prev;
+      }
+      else
+      {
+         ibufS = ( indx = MModSub( mydist, l,   npm1 ) ) * count;
+         lbufS = ( indx == npm2 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModAdd( mydist, l+1, npm1 ) ) * count;
+         lbufR = ( indx == npm2 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = next;
+      }
+/*
+ * Exchange the messages
+ */
+      if( lbufS > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufS, lbufS, I_SEND );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S,
+                                 _M_ROLL_TYPE_S, partner, msgid, comm,
+                                 &PANEL->request[0] );
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                 msgid, comm, &PANEL->request[0] );
+      }
+ 
+      if(  lbufR > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufR, lbufR, I_RECV );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R,
+                               _M_ROLL_TYPE_R, partner, msgid, comm,
+                               &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                               msgid, comm, &PANEL->status[0] );
+      }
+ 
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Wait ( &PANEL->request[0], &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_blonM
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_blonM( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+
+   return( HPL_SUCCESS );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_blong.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_blong.c
new file mode 100644
index 000000000..e57f11bcc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_blong.c
@@ -0,0 +1,363 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+int HPL_binit_blong
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_binit_blong( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+#ifdef HPL_USE_MPI_DATATYPE
+#ifdef HPL_COPY_L
+/*
+ * Copy the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+#else
+/*
+ * Force the copy of the panel into a contiguous buffer
+ */
+   HPL_copyL( PANEL );
+#endif
+   return( HPL_SUCCESS );
+}
+
+#ifdef HPL_USE_MPI_DATATYPE
+ 
+#define   _M_BUFF_S         PANEL->buffers[I_SEND]
+#define   _M_COUNT_S        PANEL->counts[I_SEND]
+#define   _M_TYPE_S         PANEL->dtypes[I_SEND]
+ 
+#define   _M_BUFF_R         PANEL->buffers[I_RECV]
+#define   _M_COUNT_R        PANEL->counts[I_RECV]
+#define   _M_TYPE_R         PANEL->dtypes[I_RECV]
+ 
+#define   _M_ROLL_BUFF_S    PANEL->buffers[I_SEND]
+#define   _M_ROLL_COUNT_S   PANEL->counts[I_SEND]
+#define   _M_ROLL_TYPE_S    PANEL->dtypes[I_SEND]
+ 
+#define   _M_ROLL_BUFF_R    PANEL->buffers[I_RECV]
+#define   _M_ROLL_COUNT_R   PANEL->counts[I_RECV]
+#define   _M_ROLL_TYPE_R    PANEL->dtypes[I_RECV]
+ 
+#else
+ 
+#define   _M_BUFF_S         (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_S        lbuf
+#define   _M_TYPE_S         MPI_DOUBLE
+ 
+#define   _M_BUFF_R         (void *)(PANEL->L2 + ibuf)
+#define   _M_COUNT_R        lbuf
+#define   _M_TYPE_R         MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_S    (void *)(PANEL->L2 + ibufS)
+#define   _M_ROLL_COUNT_S   lbufS
+#define   _M_ROLL_TYPE_S    MPI_DOUBLE
+ 
+#define   _M_ROLL_BUFF_R    (void *)(PANEL->L2 + ibufR)
+#define   _M_ROLL_COUNT_R   lbufR
+#define   _M_ROLL_TYPE_R    MPI_DOUBLE
+ 
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_bcast_blong
+(
+   HPL_T_panel                * PANEL,
+   int                        * IFLAG
+)
+#else
+int HPL_bcast_blong( PANEL, IFLAG )
+   HPL_T_panel                * PANEL;
+   int                        * IFLAG;
+#endif
+{ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   comm;
+   int                        COUNT, count, dummy=0, ierr=MPI_SUCCESS,
+                              ibuf, ibufR, ibufS, indx, ip2, k, l, lbuf,
+                              lbufR, lbufS, mask, msgid, mydist, mydist2,
+                              next, npm1, partner, prev, rank, root, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+   if( ( size = PANEL->grid->npcol ) <= 1 )
+   {                     *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); }
+/*
+ * Cast phase:  If I am the root process, start spreading the panel.  If
+ * I am not the root process,  test  for  message receive completion. If
+ * the message  is there,  then receive it,  and  keep  spreading  in  a
+ * blocking fashion this time.  Otherwise,  inform  the caller  that the
+ * panel has still not been received. 
+ */
+   comm    = PANEL->grid->row_comm;  rank  = PANEL->grid->mycol;
+   mask    = PANEL->grid->col_mask;  ip2   = PANEL->grid->col_ip2m1;
+   root    = PANEL->pcol;            msgid = PANEL->msgid;
+   COUNT   = PANEL->len;             npm1  = size - 1;
+   mydist2 = ( mydist = MModSub( rank, root, size ) ); indx = ip2;
+   count   = COUNT / size; count = Mmax( count, 1 );
+/*
+ * Spread the panel across process columns
+ */
+   do
+   {
+      mask ^= ip2;
+ 
+      if( ( mydist & mask ) == 0 )
+      {
+         lbuf = COUNT - ( ibuf = indx * count );
+         if( indx + ip2 < size ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); }
+ 
+         partner = mydist ^ ip2;
+ 
+         if( ( mydist & ip2 ) != 0 )
+         {
+            partner = MModAdd( root, partner, size );
+/*
+ * This probing mechanism causes problems when lookhead is on. Too many
+ * messages are exchanged  in this virtual topology  causing  a hang on 
+ * some machines. It is currently disabled until a better understanding
+ * is acquired.
+ */
+#if 0
+            ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] );
+            if( ierr == MPI_SUCCESS )
+            {        /* if panel is not here, return and keep testing */
+               if( go == 0 )
+               { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); }
+            }
+#endif
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_RECV );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( _M_BUFF_R, _M_COUNT_R, _M_TYPE_R,
+                                     partner, msgid, comm, &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+            }
+            else       /* Recv message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                     msgid, comm, &PANEL->status[0] );
+            }
+         }
+         else if( partner < size )
+         {
+            partner = MModAdd( root, partner, size );
+ 
+            if( lbuf > 0 )
+            {
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   HPL_packL( PANEL, ibuf, lbuf, I_SEND );
+#endif
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( _M_BUFF_S, _M_COUNT_S, _M_TYPE_S,
+                                      partner, msgid, comm );
+#ifdef HPL_USE_MPI_DATATYPE
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+            }
+            else       /* Send message of length zero to enable probe */
+            {
+               if( ierr == MPI_SUCCESS )
+                  ierr =   MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE,
+                                      partner, msgid, comm );
+            }
+         }
+      }
+ 
+      if( mydist2 < ip2 ) {  ip2 >>= 1; indx -= ip2; }
+      else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; }
+ 
+   } while( ip2 > 0 );
+/*
+ * Roll the pieces
+ */
+   prev = MModSub1( rank, size ); next = MModAdd1( rank, size );
+
+   for( k = 0; k < npm1; k++ )
+   {
+      l = ( k >> 1 ); 
+/*
+ * Who is sending to who and how much
+ */
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         ibufS = ( indx = MModAdd( mydist, l,   size ) ) * count;
+         lbufS = ( indx == npm1 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModSub( mydist, l+1, size ) ) * count;
+         lbufR = ( indx == npm1 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = prev;
+      }
+      else
+      {
+         ibufS = ( indx = MModSub( mydist, l,   size ) ) * count;
+         lbufS = ( indx == npm1 ? COUNT : ibufS + count );
+         lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS );
+
+         ibufR = ( indx = MModAdd( mydist, l+1, size ) ) * count;
+         lbufR = ( indx == npm1 ? COUNT : ibufR + count );
+         lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR );
+
+         partner = next;
+      }
+/*
+ * Exchange the messages
+ */
+      if( lbufS > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufS, lbufS, I_SEND );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S,
+                                 _M_ROLL_TYPE_S, partner, msgid, comm,
+                                 &PANEL->request[0] );
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner,
+                                 msgid, comm, &PANEL->request[0] );
+      }
+
+      if(  lbufR > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   HPL_packL( PANEL, ibufR, lbufR, I_RECV );
+#endif
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R,
+                               _M_ROLL_TYPE_R, partner, msgid, comm,
+                               &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &PANEL->dtypes[I_RECV] );
+#endif
+      }
+      else
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner,
+                               msgid, comm, &PANEL->status[0] );
+      }
+
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Wait ( &PANEL->request[0], &PANEL->status[0] );
+#ifdef HPL_USE_MPI_DATATYPE
+      if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) )
+         ierr =   MPI_Type_free( &PANEL->dtypes[I_SEND] );
+#endif
+   }
+/*
+ * If the message was received and being forwarded,  return HPL_SUCCESS.
+ * If an error occured in an MPI call, return HPL_FAILURE.
+ */
+   *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE );
+
+   return( *IFLAG );
+}
+
+#ifdef STDC_HEADERS
+int HPL_bwait_blong
+(
+   HPL_T_panel *              PANEL
+)
+#else
+int HPL_bwait_blong( PANEL )
+   HPL_T_panel *              PANEL;
+#endif
+{
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL == NULL )           { return( HPL_SUCCESS ); }
+   if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); }
+
+   return( HPL_SUCCESS );
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_bwait.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_bwait.c
new file mode 100644
index 000000000..a2e0f4df8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_bwait.c
@@ -0,0 +1,109 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_bwait
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_bwait
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_bwait HPL_bwait waits  for  the  row  broadcast  of  the current  panel  to
+ * terminate.  Successful completion is indicated by the returned  error
+ * code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ierr;
+   HPL_T_TOP                  top;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS );
+/*
+ * Retrieve the selected virtual broadcast topology
+ */
+   top = PANEL->algo->btopo;
+
+   switch( top )
+   {
+      case HPL_1RING_M : ierr = HPL_bwait_1rinM( PANEL ); break;
+      case HPL_1RING   : ierr = HPL_bwait_1ring( PANEL ); break;
+      case HPL_2RING_M : ierr = HPL_bwait_2rinM( PANEL ); break;
+      case HPL_2RING   : ierr = HPL_bwait_2ring( PANEL ); break;
+      case HPL_BLONG_M : ierr = HPL_bwait_blonM( PANEL ); break;
+      case HPL_BLONG   : ierr = HPL_bwait_blong( PANEL ); break;
+      default          : ierr = HPL_SUCCESS;
+   }
+ 
+   return( ierr );
+/*
+ * End of HPL_bwait
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_copyL.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_copyL.c
new file mode 100644
index 000000000..04f765a6b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_copyL.c
@@ -0,0 +1,108 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_copyL
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_copyL
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_copyL copies  the  panel of columns, the L1 replicated submatrix,
+ * the pivot array  and  the info scalar into a contiguous workspace for
+ * later broadcast.
+ *  
+ * The copy of this panel  into  a contiguous buffer  can be enforced by
+ * specifying -DHPL_COPY_L in the architecture specific Makefile.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        jb, lda;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->grid->mycol == PANEL->pcol )
+   {
+      jb = PANEL->jb; lda = PANEL->lda;
+ 
+      if( PANEL->grid->myrow == PANEL->prow )
+      {
+         HPL_dlacpy( PANEL->mp-jb, jb, Mptr( PANEL->A, jb, -jb, lda ),
+                     lda, PANEL->L2, PANEL->ldl2 );
+      }
+      else
+      {
+         HPL_dlacpy( PANEL->mp,    jb, Mptr( PANEL->A,  0, -jb, lda ),
+                     lda, PANEL->L2, PANEL->ldl2 );
+      }
+   }
+/*
+ * End of HPL_copyL
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_packL.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_packL.c
new file mode 100644
index 000000000..8a70ef83d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_packL.c
@@ -0,0 +1,245 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_packL
+(
+   HPL_T_panel *                    PANEL,
+   const int                        INDEX,
+   const int                        LEN,
+   const int                        IBUF
+)
+#else
+int HPL_packL
+( PANEL, INDEX, LEN, IBUF )
+   HPL_T_panel *                    PANEL;
+   const int                        INDEX;
+   const int                        LEN;
+   const int                        IBUF;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_packL forms  the MPI data type for the panel to be broadcast.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * MPI_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (input/output)                HPL_T_panel *
+ *         On entry,  PANEL  points to the  current panel data structure
+ *         being broadcast.
+ *
+ * INDEX   (input)                       const int
+ *         On entry,  INDEX  points  to  the  first entry of the  packed
+ *         buffer being broadcast.
+ *
+ * LEN     (input)                       const int
+ *         On entry, LEN is the length of the packed buffer.
+ *
+ * IBUF    (input)                       const int
+ *         On entry, IBUF  specifies the panel buffer/count/type entries
+ *         that should be initialized.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * .. Local Variables ..
+ */
+#ifndef HPL_COPY_L
+   MPI_Datatype               * type = NULL;
+   void                       * * * bufs = NULL;
+   double                     * A;
+   int                        * blen = NULL;
+   MPI_Aint                   * disp = NULL;
+   int                        curr, i, i1, ibuf, ierr=MPI_SUCCESS, j1,
+                              jb, jbm, jbp1, lda, len, m, m1, nbufs;
+#else
+   int                        ierr;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_COPY_L
+/*
+ * Panel + L1 + DPIV  have been copied into a contiguous buffer - Create
+ * and commit a contiguous data type
+ */
+   PANEL->buffers[IBUF] = (void *)(PANEL->L2 + INDEX);
+   PANEL->counts [IBUF] = 1;
+
+   ierr =      MPI_Type_contiguous( LEN, MPI_DOUBLE, &PANEL->dtypes[IBUF] );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &PANEL->dtypes[IBUF] );
+
+   return( ierr );
+#else
+/*
+ * Panel is not contiguous (because of LDA and also L1 + DPIV) -  Create
+ * and commit a struct data type
+ */
+   jbp1 = ( jb = PANEL->jb ) + 1;
+/*
+ * Temporaries to create the type struct.
+ */
+   bufs = (void     * * *)malloc( jbp1 * sizeof( void * *     ) );
+   blen = (int          *)malloc( jbp1 * sizeof( int          ) );
+   disp = (MPI_Aint     *)malloc( jbp1 * sizeof( MPI_Aint     ) );
+   type = (MPI_Datatype *)malloc( jbp1 * sizeof( MPI_Datatype ) );
+ 
+   if( ( bufs != NULL ) && ( blen != NULL ) &&
+       ( disp != NULL ) && ( type != NULL ) )
+   {
+      m = PANEL->mp; curr = (int)( PANEL->grid->myrow == PANEL->prow );
+      if( curr != 0 ) m -= jb;
+ 
+      len = LEN; ibuf = INDEX; nbufs = 0; jbm = jb * m;
+ 
+      if( ( m > 0 ) && ( ibuf < jbm ) )
+      {
+/*
+ * Retrieve proper pointers depending on process row and column
+ */
+         if( PANEL->grid->mycol == PANEL->pcol )
+         {
+            lda = PANEL->lda;
+            if( curr != 0 ) { A = Mptr( PANEL->A, jb, -jb, lda ); }
+            else            { A = Mptr( PANEL->A,  0, -jb, lda ); }
+         }
+         else { lda = PANEL->ldl2; A = PANEL->L2; }
+/*
+ * Pack the first (partial) column of L
+ */
+         m1 = m - ( i1 = ibuf - ( j1 = ibuf / m ) * m );
+         m1 = Mmin( len, m1 );
+ 
+         bufs[nbufs] = (void *)(Mptr( A, i1, j1, lda ));
+         type[nbufs] = MPI_DOUBLE;
+         blen[nbufs] = m1;
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+ 
+         nbufs++; len -= m1; j1++; ibuf += m1;
+/*
+ * Pack the remaining columns of L
+ */
+         while( ( len > 0 ) && ( j1 < jb ) )
+         {
+            m1 = Mmin( len, m );
+ 
+            bufs[nbufs] = (void*)(Mptr( A, 0, j1, lda ));
+            type[nbufs] = MPI_DOUBLE;
+            blen[nbufs] = m1;
+            if( ierr == MPI_SUCCESS )
+               ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+ 
+            nbufs++; len -= m1; j1++; ibuf += m1;
+         }
+      }
+/*
+ * Pack L1, DPIV, DINFO
+ */
+      if( len > 0 )
+      {                                            /* L1, DPIV, DINFO */
+         bufs[nbufs] = (void *)(PANEL->L1 + ibuf - jbm);
+         type[nbufs] = MPI_DOUBLE;
+         blen[nbufs] = len;
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Get_address( bufs[nbufs], &disp[nbufs] );
+         nbufs++;
+      }
+ 
+      for( i = 1; i < nbufs; i++ ) disp[i] -= disp[0]; disp[0] = 0;
+ 
+      PANEL->buffers[IBUF] = (void *)(bufs[0]); PANEL->counts [IBUF] = 1;
+/*
+ * construct the struct type 
+ */
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_create_struct( nbufs, blen, disp, type,
+                                   &PANEL->dtypes[IBUF] );
+/*
+ * release temporaries
+ */
+      if( bufs ) free( bufs );
+      if( blen ) free( blen );
+      if( disp ) free( disp );
+      if( type ) free( type );
+/*
+ * commit the type 
+ */
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_commit( &PANEL->dtypes[IBUF] );
+
+      return( ierr );
+   }
+   else
+   {
+/*
+ * Memory allocation failed -> abort
+ */
+      HPL_pabort( __LINE__, "HPL_packL", "Memory allocation failed" );
+      return( MPI_SUCCESS );    /* never executed (hopefully ...) */
+   }
+#endif
+#else
+          /* HPL_USE_MPI_DATATYPE not defined - Oops, there is a bug
+             somewhere, so, just in case  and until I find it ... */
+   return( MPI_SUCCESS );   
+#endif
+/*
+ * End of HPL_packL
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_recv.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_recv.c
new file mode 100644
index 000000000..ff426891c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_recv.c
@@ -0,0 +1,142 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_recv
+(
+   double *                         RBUF,
+   int                              RCOUNT,
+   int                              SRC,
+   int                              RTAG,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_recv
+( RBUF, RCOUNT, SRC, RTAG, COMM )
+   double *                         RBUF;
+   int                              RCOUNT;
+   int                              SRC;
+   int                              RTAG;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_recv is a simple wrapper around  MPI_Recv.  Its  main  purpose is
+ * to  allow for some  experimentation / tuning  of this simple routine.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * HPL_SUCCESS.  In the case of messages of length less than or equal to
+ * zero, this function returns immediately.
+ *
+ * Arguments
+ * =========
+ *
+ * RBUF    (local output)                double *
+ *         On entry, RBUF specifies the starting address of buffer to be
+ *         received.
+ *
+ * RCOUNT  (local input)                 int
+ *         On entry,  RCOUNT  specifies  the number  of double precision
+ *         entries in RBUF. RCOUNT must be at least zero.
+ *
+ * SRC     (local input)                 int
+ *         On entry, SRC  specifies the rank of the  sending  process in
+ *         the communication space defined by COMM.
+ *
+ * RTAG    (local input)                 int
+ *         On entry,  STAG specifies the message tag to be used for this
+ *         communication operation.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Status                 status;
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type;
+#endif
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( RCOUNT <= 0 ) return( HPL_SUCCESS );
+
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Recv( (void *)(RBUF), 1, type, SRC, RTAG, COMM,
+                         &status );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_free( &type );
+#else
+   ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, SRC, RTAG,
+                    COMM, &status );
+#endif
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+/*
+ * End of HPL_recv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_sdrv.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_sdrv.c
new file mode 100644
index 000000000..0b2363563
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_sdrv.c
@@ -0,0 +1,239 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_sdrv
+(
+   double *                         SBUF,
+   int                              SCOUNT,
+   int                              STAG,
+   double *                         RBUF,
+   int                              RCOUNT,
+   int                              RTAG,
+   int                              PARTNER,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_sdrv
+( SBUF, SCOUNT, STAG, RBUF, RCOUNT, RTAG, PARTNER, COMM )
+   double *                         SBUF;
+   int                              SCOUNT;
+   int                              STAG;
+   double *                         RBUF;
+   int                              RCOUNT;
+   int                              RTAG;
+   int                              PARTNER;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_sdrv is a simple wrapper around MPI_Sendrecv. Its main purpose is
+ * to allow for some experimentation and tuning of this simple function.
+ * Messages  of  length  less than  or  equal to zero  are not sent  nor
+ * received.  Successful completion  is  indicated by the returned error
+ * code HPL_SUCCESS.
+ *
+ * Arguments
+ * =========
+ *
+ * SBUF    (local input)                 double *
+ *         On entry, SBUF specifies the starting address of buffer to be
+ *         sent.
+ *
+ * SCOUNT  (local input)                 int
+ *         On entry,  SCOUNT  specifies  the number  of double precision
+ *         entries in SBUF. SCOUNT must be at least zero.
+ *
+ * STAG    (local input)                 int
+ *         On entry,  STAG  specifies the message tag to be used for the
+ *         sending communication operation.
+ *
+ * RBUF    (local output)                double *
+ *         On entry, RBUF specifies the starting address of buffer to be
+ *         received.
+ *
+ * RCOUNT  (local input)                 int
+ *         On entry,  RCOUNT  specifies  the number  of double precision
+ *         entries in RBUF. RCOUNT must be at least zero.
+ *
+ * RTAG    (local input)                 int
+ *         On entry,  RTAG  specifies the message tag to be used for the
+ *         receiving communication operation.
+ *
+ * PARTNER (local input)                 int
+ *         On entry,  PARTNER  specifies  the rank of the  collaborative
+ *         process in the communication space defined by COMM.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type[2];
+#endif
+   MPI_Request                request;
+   MPI_Status                 status;
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( RCOUNT > 0 )
+   {
+      if( SCOUNT > 0 )
+      {
+#ifdef HPL_USE_MPI_DATATYPE
+/*
+ * Post asynchronous receive
+ */
+         ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( (void *)(RBUF), 1, type[0], PARTNER,
+                                RTAG, COMM, &request );
+/*
+ * Blocking send
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[1] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( (void *)(SBUF), 1, type[1], PARTNER,
+                               STAG, COMM );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[1] );
+/*
+ * Wait for the receive to complete
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[0] );
+#else
+/*
+ * Post asynchronous receive
+ */
+         ierr =      MPI_Irecv( (void *)(RBUF), RCOUNT, MPI_DOUBLE,
+                                PARTNER, RTAG, COMM, &request );
+/*
+ * Blocking send
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE,
+                               PARTNER, STAG, COMM );
+/*
+ * Wait for the receive to complete
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+#endif
+      }
+      else
+      {
+/*
+ * Blocking receive
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+         ierr =      MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[0] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Recv( (void *)(RBUF), 1, type[0], PARTNER, RTAG,
+                               COMM, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[0] );
+#else
+         ierr =      MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE,
+                               PARTNER, RTAG, COMM, &status );
+#endif
+      }
+   }
+   else if( SCOUNT > 0 )
+   {
+/*
+ * Blocking send
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+      ierr =      MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_commit( &type[1] );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG,
+                          COMM );
+      if( ierr == MPI_SUCCESS )
+         ierr =   MPI_Type_free( &type[1] ) );
+#else
+      ierr =      MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, PARTNER,
+                            STAG, COMM );
+#endif
+   }
+   else { ierr = MPI_SUCCESS; }
+
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) );
+/*
+ * End of HPL_sdrv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_send.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_send.c
new file mode 100644
index 000000000..9e9868594
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_send.c
@@ -0,0 +1,139 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Do not use  MPI user-defined data types no matter what.  This routine
+ * is used for small contiguous messages.
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+#undef HPL_USE_MPI_DATATYPE
+#endif
+
+#ifdef STDC_HEADERS
+int HPL_send
+(
+   double *                         SBUF,
+   int                              SCOUNT,
+   int                              DEST,
+   int                              STAG,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_send
+( SBUF, SCOUNT, DEST, STAG, COMM )
+   double *                         SBUF;
+   int                              SCOUNT;
+   int                              DEST;
+   int                              STAG;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_send is a simple wrapper around  MPI_Send.  Its  main  purpose is
+ * to  allow for some  experimentation / tuning  of this simple routine.
+ * Successful  completion  is  indicated  by  the  returned  error  code
+ * MPI_SUCCESS.  In the case of messages of length less than or equal to
+ * zero, this function returns immediately.
+ *
+ * Arguments
+ * =========
+ *
+ * SBUF    (local input)                 double *
+ *         On entry, SBUF specifies the starting address of buffer to be
+ *         sent.
+ *
+ * SCOUNT  (local input)                 int
+ *         On entry,  SCOUNT  specifies  the number of  double precision
+ *         entries in SBUF. SCOUNT must be at least zero.
+ *
+ * DEST    (local input)                 int
+ *         On entry, DEST specifies the rank of the receiving process in
+ *         the communication space defined by COMM.
+ *
+ * STAG    (local input)                 int
+ *         On entry,  STAG specifies the message tag to be used for this
+ *         communication operation.
+ *
+ * COMM    (local input)                 MPI_Comm
+ *         The MPI communicator identifying the communication space.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_USE_MPI_DATATYPE
+   MPI_Datatype               type;
+#endif
+   int                        ierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( SCOUNT <= 0 ) return( HPL_SUCCESS );
+
+#ifdef HPL_USE_MPI_DATATYPE
+   ierr =      MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_commit( &type );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Send( (void *)(SBUF), 1, type, DEST, STAG, COMM );
+   if( ierr == MPI_SUCCESS )
+      ierr =   MPI_Type_free( &type );
+#else
+   ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, DEST, STAG, COMM );
+#endif
+   return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); 
+/*
+ * End of HPL_send
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/Make.inc
new file mode 120000
index 000000000..ae55370b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kate/hip/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/Makefile
new file mode 100644
index 000000000..529fe9aea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/Makefile
@@ -0,0 +1,111 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_pmisc.h   $(INCdir)/hpl_grid.h \
+   $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_comobj       = \
+   HPL_1ring.o            HPL_1rinM.o            HPL_2ring.o            \
+   HPL_2rinM.o            HPL_blong.o            HPL_blonM.o            \
+   HPL_packL.o            HPL_copyL.o            HPL_binit.o            \
+   HPL_bcast.o            HPL_bwait.o            HPL_send.o             \
+   HPL_recv.o             HPL_sdrv.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_comobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_comobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_1ring.o            : ../HPL_1ring.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_1ring.c
+HPL_1rinM.o            : ../HPL_1rinM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_1rinM.c
+HPL_2ring.o            : ../HPL_2ring.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_2ring.c
+HPL_2rinM.o            : ../HPL_2rinM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_2rinM.c
+HPL_blong.o            : ../HPL_blong.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_blong.c
+HPL_blonM.o            : ../HPL_blonM.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_blonM.c
+HPL_packL.o            : ../HPL_packL.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_packL.c
+HPL_copyL.o            : ../HPL_copyL.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_copyL.c
+HPL_binit.o            : ../HPL_binit.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_binit.c
+HPL_bcast.o            : ../HPL_bcast.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_bcast.c
+HPL_bwait.o            : ../HPL_bwait.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_bwait.c
+HPL_send.o             : ../HPL_send.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_send.c
+HPL_recv.o             : ../HPL_recv.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_recv.c
+HPL_sdrv.o             : ../HPL_sdrv.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_sdrv.c
+#
+# ######################################################################
+# 
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/Makefile
new file mode 100644
index 000000000..d3c61cb93
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/Makefile
@@ -0,0 +1,119 @@
+#    /* 
+#  * -- High Performance Computing Linpack Benchmark (HPL)                
+#  *    Modifications Copyright (C) 2023 Intel Corporation​
+#  *                                                                      
+#  * -- Copyright notice and Licensing terms:                             
+#  *                                                                      
+#  * Redistribution  and  use in  source and binary forms, with or without
+#  * modification, are  permitted provided  that the following  conditions
+#  * are met:                                                             
+#  *                                                                      
+#  * 1. Redistributions  of  source  code  must retain the above copyright
+#  * notice, this list of conditions and the following disclaimer.        
+#  *                                                                      
+#  * 2. Redistributions in binary form must reproduce  the above copyright
+#  * notice, this list of conditions,  and the following disclaimer in the
+#  * documentation and/or other materials provided with the distribution. 
+#  *                                                                      
+#  * 3. All  advertising  materials  mentioning  features  or  use of this
+#  * software must display the following acknowledgement:                 
+#  * This  product  includes  software  developed  at  the  University  of
+#  * Tennessee, Knoxville, Innovative Computing Laboratory.             
+#  *                                                                      
+#  * 4. The name of the  University,  the name of the  Laboratory,  or the
+#  * names  of  its  contributors  may  not  be used to endorse or promote
+#  * products  derived   from   this  software  without  specific  written
+#  * permission.                                                          
+#  *                                                                      
+#  * -- Disclaimer:                                                       
+#  *                                                                      
+#  * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+#  * ---------------------------------------------------------------------
+#  */ 
+
+# /* 
+#  * -- High Performance Computing Linpack Benchmark (HPL)                
+#  *    HPL - 2.3 - December 2, 2018                          
+#  *    Antoine P. Petitet                                                
+#  *    University of Tennessee, Knoxville                                
+#  *    Innovative Computing Laboratory                                 
+#  *    (C) Copyright 2000-2008 All Rights Reserved                       
+#  *                                                                      
+#  * -- Copyright notice and Licensing terms:                             
+#  *                                                                      
+#  * Redistribution  and  use in  source and binary forms, with or without
+#  * modification, are  permitted provided  that the following  conditions
+#  * are met:                                                             
+#  *                                                                      
+#  * 1. Redistributions  of  source  code  must retain the above copyright
+#  * notice, this list of conditions and the following disclaimer.        
+#  *                                                                      
+#  * 2. Redistributions in binary form must reproduce  the above copyright
+#  * notice, this list of conditions,  and the following disclaimer in the
+#  * documentation and/or other materials provided with the distribution. 
+#  *                                                                      
+#  * 3. All  advertising  materials  mentioning  features  or  use of this
+#  * software must display the following acknowledgement:                 
+#  * This  product  includes  software  developed  at  the  University  of
+#  * Tennessee, Knoxville, Innovative Computing Laboratory.             
+#  *                                                                      
+#  * 4. The name of the  University,  the name of the  Laboratory,  or the
+#  * names  of  its  contributors  may  not  be used to endorse or promote
+#  * products  derived   from   this  software  without  specific  written
+#  * permission.                                                          
+#  *                                                                      
+#  * -- Disclaimer:                                                       
+#  *                                                                      
+#  * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+#  * ---------------------------------------------------------------------
+#  */ 
+
+
+all: libdgemm.so.1.0.1
+
+OBJS = cuda_dgemm.o 
+
+.PRECIOUS: $(OBJS)
+
+all : libdgemm.so.1.0.1 
+
+fermi_dgemm.o : fermi_dgemm.c fermi_dgemm.h 
+
+DEFINES = -DMPI -g
+#DEFINES += -DUSE_FERMI_DGEMM
+#DEFINES += -DVERBOSE_PRINT
+#DEFINES += -DACML
+#DEFINES += -DGOTO
+
+%.o: %.cpp
+	mpicc -O0 -c -fPIC $(DEFINES) $*.cpp -o $*.o -I/opt/rocm/hipblas/include -I/opt/rocm-5.1.3/hip/include -D__HIP_PLATFORM_AMD__
+
+libdgemm.so.1.0.1: $(OBJS)
+
+	mpicc -O3 -shared -Wl,-soname,libdgemm.so.1 -o libdgemm.so.1.0.1 $(OBJS) -L/opt/rocm-5.1.3/hipblas/lib/ -lhipblas
+	ln -sf libdgemm.so.1.0.1 libdgemm.so.1.0
+	ln -sf libdgemm.so.1.0 libdgemm.so.1
+	ln -sf libdgemm.so.1 libdgemm.so
+
+clean:
+	rm -f $(OBJS) $(CUBINS) libdgemm.so.1.0.1 libdgemm.so.1.0 libdgemm.so.1 libdgemm.so
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/cuda_dgemm.cpp b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/cuda_dgemm.cpp
new file mode 100644
index 000000000..c4ac764c2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/cuda_dgemm.cpp
@@ -0,0 +1,277 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+
+
+
+#define NUMBER_OF_STREAMS 4
+#define CHUNK_SIZE 512
+#define NN 64
+#define NM 128
+#define ERRCODE(e) (-(__LINE__ * 1000 + (e)))
+//#define DEVICE_DEBUG
+//#ifdef MPI
+//#include <mpi.h>
+//#endif
+
+
+#define _GNU_SOURCE
+
+//#define CUDA_ERROR_CHECK
+//#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ )
+//#define CudaCheckError()    __cudaCheckError( __FILE__, __LINE__ )
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <ctype.h>
+#include <math.h>
+#include <array>
+
+#include <time.h>
+
+#include <iostream>
+#include <chrono> 
+
+/*
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <cublas.h>
+*/
+#include "hip/hip_runtime.h" 
+
+#include "mkl.h"
+#include "hipblas.h"
+
+extern "C" {
+
+    void dpcpp_dgemm 
+        (   const int ORDER,
+            const int TRANSA,   const int TRANSB,
+            const int M,        const int N,        const int K,       
+            const double ALPHA, const double *A,    const int LDA,
+            const double *B,    const int LDB,      const double BETA,    
+            double *C,          const int LDC);
+
+    void dpcpp_dtrsm(
+       int HPL_ORDER,
+       int HPL_SIDE,
+       int HPL_UPLO,
+       int HPL_TRANS,
+       int HPL_DIAG,
+       const int,
+       const int,
+       const double,
+       const double *,
+       const int,
+       double *,
+       const int);
+}
+
+
+void dpcpp_dgemm 
+(   const int ORDER,   const int TRANSA,    const int TRANSB,       
+    const int M,       const int N,         const int K,       
+    const double ALPHA,const double *A,     const int LDA,
+    const double *B,   const int LDB,       
+    const double BETA, double *C,         const int LDC)
+{
+
+    if ((M==0)||(K==0)||(N==0)){
+	    return;
+    }
+
+    
+    if ( (N) < NN || (M) < NM || (K) < 128){ 
+         
+         #ifdef DEVICE_DEBUG
+            std::cout << "dgemm-Running on CPU" << std::endl; 
+         #endif
+          
+         cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,  M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC);
+          return;
+    }    
+
+    
+    #ifdef DEVICE_DEBUG
+            std::cout << "dgemm-Running on GPU" << std::endl; 
+    #endif
+
+    double *devPtrA, *devPtrB, *devPtrC;
+    //int status;
+    
+    hipblasHandle_t handle = NULL;
+    
+     
+    hipblasStatus_t status;
+    status = hipblasCreate(&handle);
+    
+
+    hipMalloc((void **)&devPtrA, K * LDA *sizeof(double));
+    hipMemcpy(devPtrA, &A[0], K * LDA *sizeof(double), hipMemcpyHostToDevice);
+    
+    hipMalloc((void **)&devPtrB, N *  LDB *sizeof(double));
+    hipMemcpy(devPtrB, &B[0], N *  LDB *sizeof(double), hipMemcpyHostToDevice);
+    
+    hipMalloc((void **)&devPtrC, N * LDC *sizeof(double));
+    hipMemcpy(devPtrC, &C[0], N * LDC *sizeof(double), hipMemcpyHostToDevice);
+    
+
+    hipDeviceSynchronize();
+    hipblasDgemm(handle, HIPBLAS_OP_N, HIPBLAS_OP_N, M, N, K, &ALPHA, devPtrA, LDA, devPtrB, LDB, &BETA, devPtrC, LDC);
+    hipDeviceSynchronize();
+    hipMemcpy(&C[0], devPtrC, N * LDC *sizeof(double), hipMemcpyDeviceToHost);
+    hipDeviceSynchronize(); 
+    hipFree(devPtrA);
+    hipFree(devPtrB);
+    hipFree(devPtrC);
+}
+  
+void dpcpp_dtrsm
+
+(  const int ORDER,           const int SIDE,
+   const int UPLO,            const int TRANS,
+   const int DIAG,            const int M,       const int N,
+   const double ALPHA,    const double* A,  const int LDA,       double* B,
+   const int LDB)
+{
+
+  	if ((M==0)||(N==0)){
+        	return;
+  	}
+
+    double *devPtrA, *devPtrB;	
+    //int status;	
+
+    
+    if ( (M) < 512 || (N) < 2*(M)){
+        #ifdef DEVICE_DEBUG
+            std::cout << "dtrsm-Running on CPU" << std::endl; 
+        #endif
+ 	    cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, M, N, ALPHA, A, LDA, B, LDB);
+    
+    
+    	return;
+    } 
+       
+    #ifdef DEVICE_DEBUG
+            std::cout << "dtrsm-Running on GPU" << std::endl; 
+    #endif
+
+    hipblasHandle_t handle = NULL;
+    
+    
+    hipblasStatus_t status;
+    status = hipblasCreate(&handle);
+    
+
+    hipMalloc((void **)&devPtrA, M * LDA * sizeof(double));
+    hipMemcpy(devPtrA, A, M * LDA * sizeof(double), hipMemcpyHostToDevice);   	
+    
+        
+    hipMalloc((void **)&devPtrB, N * LDB * sizeof(double));
+    hipMemcpy(devPtrB, B, N * LDB * sizeof(double), hipMemcpyHostToDevice);
+    hipDeviceSynchronize(); 
+   
+    hipblasDtrsm(handle,HIPBLAS_SIDE_LEFT,HIPBLAS_FILL_MODE_LOWER,HIPBLAS_OP_N,HIPBLAS_DIAG_UNIT,M,N,&ALPHA,devPtrA,LDA,devPtrB,LDB);
+    
+    hipDeviceSynchronize();     
+    hipMemcpy(B, devPtrB, N * LDB * sizeof(double), hipMemcpyDeviceToHost);
+    
+    hipDeviceSynchronize();
+    hipFree(devPtrA);
+    hipFree(devPtrB);
+  
+        
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/cuda_dgemm.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/cuda_dgemm.h
new file mode 100644
index 000000000..8b9052fba
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/cuda_dgemm.h
@@ -0,0 +1,149 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+
+
+#define NUMBER_OF_STREAMS 2
+
+#include <iostream>
+#include <sycl/sycl.hpp>
+#include <array>
+
+class DeviceManager;
+static DeviceManager *instance[2];
+
+class DeviceManager{
+    cl::sycl::device *m_pDevice; 
+    cl::sycl::queue queues[NUMBER_OF_STREAMS]; 
+    
+    DeviceManager(){ 
+        try{
+              m_pDevice = new cl::sycl::device(cl::sycl::default_selector());
+        }catch(...){
+            std::cout << "ERROR: failed to create sycl device.\n";
+        }   
+
+        auto exception_handler = [] (cl::sycl::exception_list exceptions) {
+            for (std::exception_ptr const& e : exceptions) {
+                try {
+                    std::rethrow_exception(e);
+                } catch(cl::sycl::exception const& e) {
+                    std::cout << "Caught asynchronous SYCL exception during GEMM:\n"
+                        << e.what() << std::endl;
+                }
+            }
+        };
+        
+       
+        	
+        queues[0] = cl::sycl::queue(*m_pDevice, exception_handler);
+        queues[1] = cl::sycl::queue(*m_pDevice, exception_handler);   
+        //DeviceManager::display_device_properties(*m_pDevice);
+        //std::cout << "Done\n"; 
+
+    }
+    public:
+
+    static DeviceManager* getInstance(int mpi_id){
+        if(!instance[mpi_id]){
+           
+            std::cout << "Creating device for " << mpi_id << "\n"; 
+            instance[mpi_id] = new DeviceManager();
+            
+        }
+        return instance[mpi_id];
+    }
+
+    cl::sycl::device &getDevice(){ return *m_pDevice;}
+    cl::sycl::queue *getQueues(){ return queues;}
+
+    static void display_device_properties(cl::sycl::device const &dev);
+};
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1
new file mode 120000
index 000000000..ab21c8005
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1
@@ -0,0 +1 @@
+libdgemm.so.1.0
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1.0 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1.0
new file mode 120000
index 000000000..d08629732
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1.0
@@ -0,0 +1 @@
+libdgemm.so.1.0.1
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1.0.1 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1.0.1
new file mode 100755
index 000000000..6a9f7501f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1.0.1 differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_all_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_all_reduce.c
new file mode 100644
index 000000000..776f48504
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_all_reduce.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_all_reduce
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const HPL_T_OP                   OP,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_all_reduce
+( BUFFER, COUNT, DTYPE, OP, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const HPL_T_OP                   OP;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_all_reduce performs   a   global   reduce  operation  across  all
+ * processes of a group leaving the results on all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/global output)   void *
+ *         On entry,  BUFFER  points to  the  buffer to be combined.  On
+ *         exit, this array contains the combined data and  is identical
+ *         on all processes in the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * OP      (global input)                const HPL_T_OP 
+ *         On entry, OP is a pointer to the local combine function.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr;
+/* ..
+ * .. Executable Statements ..
+ */
+   hplerr = HPL_reduce(   BUFFER, COUNT, DTYPE, OP, 0, COMM );
+   if( hplerr != MPI_SUCCESS ) return( hplerr );
+   return( HPL_broadcast( BUFFER, COUNT, DTYPE,     0, COMM ) );
+/*
+ * End of HPL_all_reduce
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_barrier.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_barrier.c
new file mode 100644
index 000000000..9a5d9b10a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_barrier.c
@@ -0,0 +1,90 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_barrier
+(
+   MPI_Comm                         COMM
+)
+#else
+int HPL_barrier
+( COMM )
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_barrier blocks the caller until all process members have call it.
+ * The  call  returns  at any process  only after all group members have
+ * entered the call.
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i=0;
+/* ..
+ * .. Executable Statements ..
+ */
+   return( HPL_broadcast( (void*)(&i), 1, HPL_INT, 0, COMM ) );
+/*
+ * End of HPL_barrier
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_broadcast.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_broadcast.c
new file mode 100644
index 000000000..42d962864
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_broadcast.c
@@ -0,0 +1,147 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_broadcast
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const int                        ROOT,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_broadcast
+( BUFFER, COUNT, DTYPE, ROOT, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const int                        ROOT;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_broadcast broadcasts  a message from the process with rank ROOT to
+ * all processes in the group.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/output)          void *
+ *         On entry,  BUFFER  points to  the  buffer to be broadcast. On
+ *         exit, this array contains the broadcast data and is identical
+ *         on all processes in the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ROOT    (global input)                const int
+ *         On entry, ROOT is the coordinate of the source process.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr=MPI_SUCCESS, ip2=1, kk, mask=1, 
+                              mpierr, mydist, partner, rank, size, 
+                              tag = MSGID_BEGIN_COLL;
+   MPI_Status                 status;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( COUNT <= 0 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_size( COMM, &size ); if( size <= 1 ) return( mpierr );
+   mpierr = MPI_Comm_rank( COMM, &rank );
+
+   kk = size - 1;
+   while( kk > 1 ) { kk >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   mydist = MModSub( rank, ROOT, size );
+
+   do
+   {
+      mask ^= ip2;
+      if( ( mydist & mask ) == 0 )
+      {
+         partner = mydist ^ ip2;
+
+         if( mydist & ip2 )
+         {
+            partner = MModAdd( ROOT, partner, size );
+            mpierr  = MPI_Recv(  BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                 partner, tag, COMM, &status );
+         }
+         else if( partner < size )
+         {
+            partner = MModAdd( ROOT, partner, size );
+            mpierr  = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                partner, tag, COMM );
+         }
+         if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      }
+      ip2 >>= 1;
+   } while( ip2 );
+
+   return( hplerr );
+/*
+ * End of HPL_broadcast
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_exit.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_exit.c
new file mode 100644
index 000000000..f0d00b065
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_exit.c
@@ -0,0 +1,109 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_exit
+(
+   HPL_T_grid *                     GRID
+)
+#else
+int HPL_grid_exit
+( GRID )
+   HPL_T_grid *                     GRID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_exit marks  the process  grid object for  deallocation.  The
+ * returned  error  code  MPI_SUCCESS  indicates  successful completion.
+ * Other error codes are (MPI) implementation dependent.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input/output)          HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid to be released.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hplerr = MPI_SUCCESS, mpierr;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( GRID->all_comm != MPI_COMM_NULL )
+   {
+      mpierr = MPI_Comm_free( &(GRID->row_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      mpierr = MPI_Comm_free( &(GRID->col_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+      mpierr = MPI_Comm_free( &(GRID->all_comm) );
+      if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+   }
+
+   GRID->order   = HPL_COLUMN_MAJOR;
+
+   GRID->iam     = GRID->myrow    = GRID->mycol     = -1;
+   GRID->nprow   = GRID->npcol    = GRID->nprocs    = -1;
+
+   GRID->row_ip2 = GRID->row_hdim = GRID->row_ip2m1 = GRID->row_mask = -1;
+   GRID->col_ip2 = GRID->col_hdim = GRID->col_ip2m1 = GRID->col_mask = -1;
+
+   return( hplerr );
+/*
+ * End of HPL_grid_exit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_info.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_info.c
new file mode 100644
index 000000000..95c5a7315
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_info.c
@@ -0,0 +1,116 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_info
+(
+   const HPL_T_grid *               GRID,
+   int *                            NPROW,
+   int *                            NPCOL,
+   int *                            MYROW,
+   int *                            MYCOL
+)
+#else
+int HPL_grid_info
+( GRID, NPROW, NPCOL, MYROW, MYCOL )
+   const HPL_T_grid *               GRID;
+   int *                            NPROW;
+   int *                            NPCOL;
+   int *                            MYROW;
+   int *                            MYCOL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_info returns  the grid shape and the coordinates in the grid
+ * of the calling process.  Successful  completion  is  indicated by the
+ * returned error code  MPI_SUCCESS. Other error codes depend on the MPI
+ * implementation.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * NPROW   (global output)               int *
+ *         On exit,   NPROW  specifies the number of process rows in the
+ *         grid. NPROW is at least one.
+ *
+ * NPCOL   (global output)               int *
+ *         On exit,   NPCOL  specifies  the number of process columns in
+ *         the grid. NPCOL is at least one.
+ *
+ * MYROW   (global output)               int *
+ *         On exit,  MYROW  specifies my  row process  coordinate in the
+ *         grid. MYROW is greater than or equal  to zero  and  less than
+ *         NPROW.
+ *
+ * MYCOL   (global output)               int *
+ *         On exit,  MYCOL specifies my column process coordinate in the
+ *         grid. MYCOL is greater than or equal  to zero  and  less than
+ *         NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   *NPROW = GRID->nprow; *NPCOL = GRID->npcol;
+   *MYROW = GRID->myrow; *MYCOL = GRID->mycol;
+   return( MPI_SUCCESS );
+/*
+ * End of HPL_grid_info
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_init.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_init.c
new file mode 100644
index 000000000..52111ac52
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_init.c
@@ -0,0 +1,184 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_grid_init
+(
+   MPI_Comm                         COMM,
+   const HPL_T_ORDER                ORDER,
+   const int                        NPROW,
+   const int                        NPCOL,
+   HPL_T_grid *                     GRID
+)
+#else
+int HPL_grid_init
+( COMM, ORDER, NPROW, NPCOL, GRID )
+   MPI_Comm                         COMM;
+   const HPL_T_ORDER                ORDER;
+   const int                        NPROW;
+   const int                        NPCOL;
+   HPL_T_grid *                     GRID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_grid_init creates a NPROW x NPCOL  process  grid using column- or
+ * row-major ordering from an initial collection of processes identified
+ * by an  MPI  communicator.  Successful  completion is indicated by the
+ * returned error code MPI_SUCCESS.  Other error codes depend on the MPI
+ * implementation. The coordinates of processes that are not part of the
+ * grid are set to values outside of [0..NPROW) x [0..NPCOL).
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         On entry,  COMM  is  the  MPI  communicator  identifying  the
+ *         initial  collection  of  processes out of which  the  grid is
+ *         formed.
+ *
+ * ORDER   (global input)                const HPL_T_ORDER
+ *         On entry, ORDER specifies how the processes should be ordered
+ *         in the grid as follows:
+ *            ORDER = HPL_ROW_MAJOR    row-major    ordering;
+ *            ORDER = HPL_COLUMN_MAJOR column-major ordering;
+ *
+ * NPROW   (global input)                const int
+ *         On entry,  NPROW  specifies the number of process rows in the
+ *         grid to be created. NPROW must be at least one.
+ *
+ * NPCOL   (global input)                const int
+ *         On entry,  NPCOL  specifies  the number of process columns in
+ *         the grid to be created. NPCOL must be at least one.
+ *
+ * GRID    (local input/output)          HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information to be initialized.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        hdim, hplerr=MPI_SUCCESS, ierr, ip2, k,
+                              mask, mycol, myrow, nprocs, rank, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Comm_rank( COMM, &rank ); MPI_Comm_size( COMM, &size );
+/*
+ * Abort if illegal process grid
+ */
+   nprocs = NPROW * NPCOL;
+   if( ( nprocs > size ) || ( NPROW < 1 ) || ( NPCOL < 1 ) )
+   { HPL_pabort( __LINE__, "HPL_grid_init", "Illegal Grid" ); }
+/*
+ * Row- or column-major ordering of the processes
+ */
+   if( ORDER == HPL_ROW_MAJOR )
+   {
+      GRID->order = HPL_ROW_MAJOR;
+      myrow = rank / NPCOL; mycol = rank - myrow * NPCOL;
+   }
+   else
+   {
+      GRID->order = HPL_COLUMN_MAJOR;
+      mycol = rank / NPROW; myrow = rank - mycol * NPROW;
+   }
+   GRID->iam   = rank;  GRID->myrow = myrow; GRID->mycol  = mycol;
+   GRID->nprow = NPROW; GRID->npcol = NPCOL; GRID->nprocs = nprocs;
+/*
+ * row_ip2   : largest power of two <= nprow;
+ * row_hdim  : row_ip2 procs hypercube dim;
+ * row_ip2m1 : largest power of two <= nprow-1;
+ * row_mask  : row_ip2m1 procs hypercube mask;
+ */
+   hdim = 0; ip2 = 1; k = NPROW;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; }
+   GRID->row_ip2 = ip2; GRID->row_hdim = hdim; 
+
+   mask = ip2 = 1;    k = NPROW - 1;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   GRID->row_ip2m1 = ip2; GRID->row_mask = mask; 
+/*
+ * col_ip2   : largest power of two <= npcol;
+ * col_hdim  : col_ip2 procs hypercube dim;
+ * col_ip2m1 : largest power of two <= npcol-1;
+ * col_mask  : col_ip2m1 procs hypercube mask;
+ */
+   hdim = 0; ip2 = 1; k = NPCOL;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; }
+   GRID->col_ip2 = ip2; GRID->col_hdim = hdim; 
+
+   mask = ip2 = 1;    k = NPCOL - 1;
+   while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+   GRID->col_ip2m1 = ip2; GRID->col_mask = mask; 
+/*
+ * All communicator, leave if I am not part of this grid. Creation of the
+ * row- and column communicators.
+ */
+   ierr = MPI_Comm_split( COMM, ( rank < nprocs ? 0 : MPI_UNDEFINED ),
+                          rank, &(GRID->all_comm) );
+   if( GRID->all_comm == MPI_COMM_NULL ) return( ierr );
+
+   ierr = MPI_Comm_split( GRID->all_comm, myrow, mycol, &(GRID->row_comm) );
+   if( ierr != MPI_SUCCESS ) hplerr = ierr;
+
+   ierr = MPI_Comm_split( GRID->all_comm, mycol, myrow, &(GRID->col_comm) );
+   if( ierr != MPI_SUCCESS ) hplerr = ierr;
+
+   return( hplerr );
+/*
+ * End of HPL_grid_init
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_max.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_max.c
new file mode 100644
index 000000000..002aabe01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_max.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_max
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_max
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_max combines (max) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] );
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] );
+   }
+/*
+ * End of HPL_max
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_min.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_min.c
new file mode 100644
index 000000000..a99e5e58a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_min.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_min
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_min
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_min combines (min) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] );
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] );
+   }
+/*
+ * End of HPL_min
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_pnum.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_pnum.c
new file mode 100644
index 000000000..c80885b9a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_pnum.c
@@ -0,0 +1,103 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pnum
+(
+   const HPL_T_grid *               GRID,
+   const int                        MYROW,
+   const int                        MYCOL
+)
+#else
+int HPL_pnum
+( GRID, MYROW, MYCOL )
+   const HPL_T_grid *               GRID;
+   const int                        MYROW;
+   const int                        MYCOL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pnum determines  the  rank  of a  process  as a function  of  its
+ * coordinates in the grid.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * MYROW   (local input)                 const int
+ *         On entry,  MYROW  specifies the row coordinate of the process
+ *         whose rank is to be determined. MYROW must be greater than or
+ *         equal to zero and less than NPROW.
+ *
+ * MYCOL   (local input)                 const int
+ *         On entry,  MYCOL  specifies  the  column  coordinate  of  the
+ *         process whose rank is to be determined. MYCOL must be greater
+ *         than or equal to zero and less than NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   if( GRID->order == HPL_ROW_MAJOR )
+      return( MYROW * GRID->npcol + MYCOL );
+   else
+      return( MYCOL * GRID->nprow + MYROW );
+/*
+ * End of HPL_pnum
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_reduce.c
new file mode 100644
index 000000000..417c21163
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_reduce.c
@@ -0,0 +1,179 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_reduce
+(
+   void *                           BUFFER,
+   const int                        COUNT,
+   const HPL_T_TYPE                 DTYPE,
+   const HPL_T_OP                   OP,
+   const int                        ROOT,
+   MPI_Comm                         COMM
+)
+#else
+int HPL_reduce
+( BUFFER, COUNT, DTYPE, OP, ROOT, COMM )
+   void *                           BUFFER;
+   const int                        COUNT;
+   const HPL_T_TYPE                 DTYPE;
+   const HPL_T_OP                   OP;
+   const int                        ROOT;
+   MPI_Comm                         COMM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_reduce performs a global reduce operation across all processes of
+ * a group.  Note that the input buffer is  used as workarray and in all
+ * processes but the accumulating process corrupting the original data.
+ *
+ * Arguments
+ * =========
+ *
+ * BUFFER  (local input/output)          void *
+ *         On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+ *         exit,  and  in process of rank  ROOT  this array contains the
+ *         reduced data.  This  buffer  is also used as workspace during
+ *         the operation in the other processes of the group.
+ *
+ * COUNT   (global input)                const int
+ *         On entry,  COUNT  indicates the number of entries in  BUFFER.
+ *         COUNT must be at least zero.
+ *
+ * DTYPE   (global input)                const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * OP      (global input)                const HPL_T_OP 
+ *         On entry, OP is a pointer to the local combine function.
+ *
+ * ROOT    (global input)                const int
+ *         On entry, ROOT is the coordinate of the accumulating process.
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Status                 status;
+   void                       * buffer = NULL;
+   int                        hplerr=MPI_SUCCESS, d=1, i, ip2=1, mask=0,
+                              mpierr, mydist, partner, rank, size, 
+                              tag = MSGID_BEGIN_COLL;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( COUNT <= 0 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_size( COMM, &size );
+   if( size  == 1 ) return( MPI_SUCCESS );
+   mpierr = MPI_Comm_rank( COMM, &rank );
+   i = size - 1; while( i > 1 ) { i >>= 1; d++; }
+
+   if( DTYPE == HPL_INT )
+      buffer = (void *)( (int *)   malloc( (size_t)(COUNT) * 
+                                           sizeof( int    ) ) );
+   else
+      buffer = (void *)( (double *)malloc( (size_t)(COUNT) *
+                                           sizeof( double ) ) );
+
+   if( !( buffer ) )
+   { HPL_pabort( __LINE__, "HPL_reduce", "Memory allocation failed" ); }
+
+   if( ( mydist = MModSub( rank, ROOT, size ) ) == 0 )
+   {
+      do
+      {
+         mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                            MModAdd( ROOT, ip2, size ), tag, COMM,
+                            &status );
+         if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+         OP( COUNT, buffer, BUFFER, DTYPE );
+         ip2 <<= 1; d--;
+      } while( d );
+   }
+   else
+   {
+      do
+      {
+         if( ( mydist & mask ) == 0 )
+         {
+            partner = mydist ^ ip2;
+
+            if( mydist & ip2 )
+            {
+               partner = MModAdd( ROOT, partner, size );
+               mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                  partner, tag, COMM );
+            }
+            else if( partner < size )
+            {
+               partner = MModAdd( ROOT, partner, size );
+               mpierr  = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ),
+                                   partner, tag, COMM, &status );
+               OP( COUNT, buffer, BUFFER, DTYPE );
+            }
+            if( mpierr != MPI_SUCCESS ) hplerr = mpierr;
+         }
+         mask ^= ip2; ip2 <<= 1; d--;
+      } while( d );
+   }
+   if( buffer ) free( buffer );
+
+   return( hplerr );
+/*
+ * End of HPL_reduce
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_sum.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_sum.c
new file mode 100644
index 000000000..34cf87210
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_sum.c
@@ -0,0 +1,118 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_sum
+(
+   const int                        N,
+   const void *                     IN,
+   void *                           INOUT,
+   const HPL_T_TYPE                 DTYPE
+)
+#else
+void HPL_sum
+( N, IN, INOUT, DTYPE )
+   const int                        N;
+   const void *                     IN;
+   void *                           INOUT;
+   const HPL_T_TYPE                 DTYPE;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_sum combines (sum) two buffers.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies  the  length  of  the  buffers  to  be
+ *         combined. N must be at least zero.
+ *
+ * IN      (input)                       const void *
+ *         On entry, IN points to the input-only buffer to be combined.
+ *
+ * INOUT   (input/output)                void *
+ *         On entry, INOUT  points  to  the  input-output  buffer  to be
+ *         combined.  On exit,  the  entries of this array contains  the
+ *         combined results.
+ *
+ * DTYPE   (input)                       const HPL_T_TYPE
+ *         On entry,  DTYPE  specifies the type of the buffers operands.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( DTYPE == HPL_INT )
+   {
+      const int       * a = (const int *)(IN);
+      int             * b = (int *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] += a[i];
+   }
+   else
+   {
+      const double    * a = (const double *)(IN);
+      double          * b = (double *)(INOUT);
+      for( i = 0; i < N; i++ ) b[i] += a[i];
+   }
+/*
+ * End of HPL_sum
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/Make.inc
new file mode 120000
index 000000000..ae55370b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kate/hip/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/Makefile
new file mode 100644
index 000000000..51549d817
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/Makefile
@@ -0,0 +1,103 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h
+#
+## Object files ########################################################
+#
+HPL_griobj       = \
+   HPL_grid_init.o        HPL_pnum.o             HPL_grid_info.o        \
+   HPL_grid_exit.o        HPL_broadcast.o        HPL_reduce.o           \
+   HPL_all_reduce.o       HPL_barrier.o          HPL_min.o              \
+   HPL_max.o              HPL_sum.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_griobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_griobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_grid_init.o        : ../HPL_grid_init.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_init.c
+HPL_pnum.o             : ../HPL_pnum.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pnum.c
+HPL_grid_info.o        : ../HPL_grid_info.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_info.c
+HPL_grid_exit.o        : ../HPL_grid_exit.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_grid_exit.c
+HPL_broadcast.o        : ../HPL_broadcast.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_broadcast.c
+HPL_reduce.o           : ../HPL_reduce.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_reduce.c
+HPL_all_reduce.o       : ../HPL_all_reduce.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_all_reduce.c
+HPL_barrier.o          : ../HPL_barrier.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_barrier.c
+HPL_min.o              : ../HPL_min.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_min.c
+HPL_max.o              : ../HPL_max.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_max.c
+HPL_sum.o              : ../HPL_sum.c              $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_sum.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_disp.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_disp.c
new file mode 100644
index 000000000..757dad242
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_disp.c
@@ -0,0 +1,97 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pdpanel_disp
+(
+   HPL_T_panel * *                  PANEL
+)
+#else
+int HPL_pdpanel_disp
+( PANEL )
+   HPL_T_panel * *                  PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_disp deallocates  the  panel  structure  and  resources  and
+ * stores the error code returned by the panel factorization.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel * *
+ *         On entry,  PANEL  points  to  the  address  of the panel data
+ *         structure to be deallocated.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        mpierr;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Deallocate the panel resources and panel structure
+ */
+   mpierr = HPL_pdpanel_free( *PANEL );
+   if( *PANEL ) free( *PANEL );
+   *PANEL = NULL;
+
+   return( mpierr );
+/*
+ * End of HPL_pdpanel_disp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_free.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_free.c
new file mode 100644
index 000000000..38b5b0d97
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_free.c
@@ -0,0 +1,104 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_pdpanel_free
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+int HPL_pdpanel_free
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_free deallocates  the panel resources  and  stores the error
+ * code returned by the panel factorization.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points  to  the  panel data  structure from
+ *         which the resources should be deallocated.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( PANEL->pmat->info == 0 ) PANEL->pmat->info = *(PANEL->DINFO);
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( PANEL->L1block, VSIP_TRUE );
+   (void) vsip_blockrelease_d( PANEL->L2block, VSIP_TRUE );
+   if( PANEL->grid->nprow > 1 )
+      (void) vsip_blockrelease_d( PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Destroy blocks
+ */
+   vsip_blockdestroy_d( PANEL->L1block );
+   vsip_blockdestroy_d( PANEL->L2block );
+   if( PANEL->grid->nprow > 1 )
+      vsip_blockdestroy_d( PANEL->Ublock );
+#endif
+
+   if( PANEL->WORK  ) free( PANEL->WORK  );
+   if( PANEL->IWORK ) free( PANEL->IWORK );
+
+   return( MPI_SUCCESS );
+/*
+ * End of HPL_pdpanel_free
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_init.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_init.c
new file mode 100644
index 000000000..9e35c7fb4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_init.c
@@ -0,0 +1,348 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
+#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
+#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
+#endif
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_pdpanel_init
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        M,
+   const int                        N,
+   const int                        JB,
+   HPL_T_pmat *                     A,
+   const int                        IA,
+   const int                        JA,
+   const int                        TAG,
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_pdpanel_init
+( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        M;
+   const int                        N;
+   const int                        JB;
+   HPL_T_pmat *                     A;
+   const int                        IA;
+   const int                        JA;
+   const int                        TAG;
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_init initializes a panel data structure.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the global number of rows of the panel.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  global number of columns of the
+ *         panel and trailing submatrix. N must be at least zero.
+ *
+ * JB      (global input)                const int
+ *         On entry, JB specifies is the number of columns of the panel.
+ *         JB must be at least zero.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * IA      (global input)                const int
+ *         On entry,  IA  is  the global row index identifying the panel
+ *         and trailing submatrix. IA must be at least zero.
+ *
+ * JA      (global input)                const int
+ *         On entry, JA is the global column index identifying the panel
+ *         and trailing submatrix. JA must be at least zero.
+ *
+ * TAG     (global input)                const int
+ *         On entry, TAG is the row broadcast message id.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   size_t                     dalign;
+   int                        icurcol, icurrow, ii, itmp1, jj, lwork,
+                              ml2, mp, mycol, myrow, nb, npcol, nprow,
+                              nq, nu;
+/* ..
+ * .. Executable Statements ..
+ */
+   PANEL->grid    = GRID;                  /* ptr to the process grid */
+   PANEL->algo    = ALGO;               /* ptr to the algo parameters */
+   PANEL->pmat    = A;                 /* ptr to the local array info */
+
+   myrow = GRID->myrow; mycol = GRID->mycol;
+   nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb;
+
+   HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol,
+                nprow, npcol, &ii, &jj, &icurrow, &icurcol );
+   mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow );
+   nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol );
+                                         /* ptr to trailing part of A */
+   PANEL->A       = Mptr( (double *)(A->A), ii, jj, A->ld );
+/*
+ * Workspace pointers are initialized to NULL.
+ */
+   PANEL->WORK    = NULL; PANEL->L2      = NULL; PANEL->L1      = NULL;
+   PANEL->DPIV    = NULL; PANEL->DINFO   = NULL; PANEL->U       = NULL;
+   PANEL->IWORK   = NULL;
+/*
+ * Local lengths, indexes process coordinates
+ */
+   PANEL->nb      = nb;               /* distribution blocking factor */
+   PANEL->jb      = JB;                                /* panel width */
+   PANEL->m       = M;      /* global # of rows of trailing part of A */
+   PANEL->n       = N;      /* global # of cols of trailing part of A */
+   PANEL->ia      = IA;     /* global row index of trailing part of A */
+   PANEL->ja      = JA;     /* global col index of trailing part of A */
+   PANEL->mp      = mp;      /* local # of rows of trailing part of A */
+   PANEL->nq      = nq;      /* local # of cols of trailing part of A */
+   PANEL->ii      = ii;      /* local row index of trailing part of A */
+   PANEL->jj      = jj;      /* local col index of trailing part of A */
+   PANEL->lda     = A->ld;            /* local leading dim of array A */
+   PANEL->prow    = icurrow; /* proc row owning 1st row of trailing A */
+   PANEL->pcol    = icurcol; /* proc col owning 1st col of trailing A */
+   PANEL->msgid   = TAG;     /* message id to be used for panel bcast */
+/*
+ * Initialize  ldl2 and len to temporary dummy values and Update tag for
+ * next panel
+ */
+   PANEL->ldl2    = 0;               /* local leading dim of array L2 */
+   PANEL->len     = 0;           /* length of the buffer to broadcast */
+/*
+ * Figure out the exact amount of workspace  needed by the factorization
+ * and the update - Allocate that space - Finish the panel data structu-
+ * re initialization.
+ *
+ * L1:    JB x JB in all processes
+ * DPIV:  JB      in all processes
+ * DINFO: 1       in all processes
+ *
+ * We make sure that those three arrays are contiguous in memory for the
+ * later panel broadcast.  We  also  choose  to put this amount of space 
+ * right  after  L2 (when it exist) so that one can receive a contiguous
+ * buffer.
+ */
+   dalign = ALGO->align * sizeof( double );
+
+   if( npcol == 1 )                             /* P x 1 process grid */
+   {                                     /* space for L1, DPIV, DINFO */
+      lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 );
+      if( nprow > 1 )                                 /* space for U */
+      { nu = nq - JB; lwork += JB * Mmax( 0, nu ); }
+
+      if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * 
+                                           sizeof( double ) ) ) )
+      {
+         HPL_pabort( __LINE__, "HPL_pdpanel_init",
+                     "Memory allocation failed" );
+      }
+/*
+ * Initialize the pointers of the panel structure  -  Always re-use A in
+ * the only process column
+ */
+      PANEL->L2    = PANEL->A + ( myrow == icurrow ? JB : 0 );
+      PANEL->ldl2  = A->ld;
+      PANEL->L1    = (double *)HPL_PTR( PANEL->WORK, dalign );
+      PANEL->DPIV  = PANEL->L1    + JB * JB;
+      PANEL->DINFO = PANEL->DPIV + JB;       *(PANEL->DINFO) = 0.0;
+      PANEL->U     = ( nprow > 1 ? PANEL->DINFO + 1: NULL );
+   }
+   else
+   {                                        /* space for L2, L1, DPIV */
+      ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 );
+      PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 );
+#ifdef HPL_COPY_L
+      lwork = ALGO->align + PANEL->len;
+#else
+      lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len );
+#endif
+      if( nprow > 1 )                                 /* space for U */
+      { 
+         nu = ( mycol == icurcol ? nq - JB : nq );
+         lwork += JB * Mmax( 0, nu );
+      }
+
+      if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) *
+                                           sizeof( double ) ) ) )
+      {
+         HPL_pabort( __LINE__, "HPL_pdpanel_init",
+                     "Memory allocation failed" );
+      }
+/*
+ * Initialize the pointers of the panel structure - Re-use A in the cur-
+ * rent process column when HPL_COPY_L is not defined.
+ */
+#ifdef HPL_COPY_L
+      PANEL->L2    = (double *)HPL_PTR( PANEL->WORK, dalign );
+      PANEL->ldl2  = Mmax( 1, ml2 );
+      PANEL->L1    = PANEL->L2 + ml2 * JB;
+#else
+      if( mycol == icurcol )
+      {
+         PANEL->L2   = PANEL->A + ( myrow == icurrow ? JB : 0 );
+         PANEL->ldl2 = A->ld;
+         PANEL->L1   = (double *)HPL_PTR( PANEL->WORK, dalign );
+      }
+      else
+      {
+         PANEL->L2   = (double *)HPL_PTR( PANEL->WORK, dalign );
+         PANEL->ldl2 = Mmax( 1, ml2 );
+         PANEL->L1   = PANEL->L2 + ml2 * JB;
+      } 
+#endif
+      PANEL->DPIV  = PANEL->L1   + JB * JB;
+      PANEL->DINFO = PANEL->DPIV + JB;     *(PANEL->DINFO) = 0.0;
+      PANEL->U     = ( nprow > 1 ? PANEL->DINFO + 1 : NULL );
+   }
+#ifdef HPL_CALL_VSIPL
+   PANEL->Ablock  = A->block;
+/*
+ * Create blocks and bind them to the data pointers
+ */
+   PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1),
+                                      (vsip_length)(JB*JB), VSIP_MEM_NONE );
+   PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2),
+                                      (vsip_length)(PANEL->ldl2*JB),
+                                      VSIP_MEM_NONE );
+   if( nprow > 1 )
+   { 
+      nu = ( mycol == icurcol ? nq - JB : nq );
+      PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U),
+                                        (vsip_length)(JB * Mmax( 0, nu )),
+                                        VSIP_MEM_NONE );
+   }
+   else { PANEL->Ublock = A->block; }
+#endif
+/*
+ * If nprow is 1, we just allocate an array of JB integers for the swap.
+ * When nprow > 1, we allocate the space for the index arrays immediate-
+ * ly. The exact size of this array depends on the swapping routine that
+ * will be used, so we allocate the maximum:
+ *
+ *    IWORK[0] is of size at most 1      +
+ *    IPL      is of size at most 1      +
+ *    IPID     is of size at most 4 * JB +
+ *
+ *    For HPL_pdlaswp00:
+ *       lindxA   is of size at most 2 * JB +
+ *       lindxAU  is of size at most 2 * JB +
+ *       llen     is of size at most NPROW  +
+ *       llen_sv  is of size at most NPROW.
+ *
+ *    For HPL_pdlaswp01:
+ *       ipA      is of size ar most 1      +
+ *       lindxA   is of size at most 2 * JB +
+ *       lindxAU  is of size at most 2 * JB +
+ *       iplen    is of size at most NPROW  + 1 +
+ *       ipmap    is of size at most NPROW  +
+ *       ipmapm1  is of size at most NPROW  +
+ *       permU    is of size at most JB     +
+ *       iwork    is of size at most MAX( 2*JB, NPROW+1 ).
+ *
+ * that is  3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1))
+ *       =  4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ).
+ *
+ * We use the fist entry of this to work array  to indicate  whether the
+ * the  local  index arrays have already been computed,  and if yes,  by
+ * which function:
+ *    IWORK[0] = -1: no index arrays have been computed so far;
+ *    IWORK[0] =  0: HPL_pdlaswp00 already computed those arrays;
+ *    IWORK[0] =  1: HPL_pdlaswp01 already computed those arrays;
+ * This allows to save some redundant and useless computations.
+ */
+   if( nprow == 1 ) { lwork = JB; }
+   else             
+   {
+      itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork );
+      lwork = 4 + (9 * JB) + (3 * nprow) + itmp1;
+   }
+
+   PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) );
+
+   if( PANEL->IWORK == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); }
+                       /* Initialize the first entry of the workarray */
+   *(PANEL->IWORK) = -1;
+/*
+ * End of HPL_pdpanel_init
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_new.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_new.c
new file mode 100644
index 000000000..1dbd8a18f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_new.c
@@ -0,0 +1,152 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanel_new
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        M,
+   const int                        N,
+   const int                        JB,
+   HPL_T_pmat *                     A,
+   const int                        IA,
+   const int                        JA,
+   const int                        TAG,
+   HPL_T_panel * *                  PANEL
+)
+#else
+void HPL_pdpanel_new
+( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        M;
+   const int                        N;
+   const int                        JB;
+   HPL_T_pmat *                     A;
+   const int                        IA;
+   const int                        JA;
+   const int                        TAG;
+   HPL_T_panel * *                  PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanel_new creates and initializes a panel data structure.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the global number of rows of the panel.
+ *         M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  global number of columns of the
+ *         panel and trailing submatrix. N must be at least zero.
+ *
+ * JB      (global input)                const int
+ *         On entry, JB specifies is the number of columns of the panel.
+ *         JB must be at least zero.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * IA      (global input)                const int
+ *         On entry,  IA  is  the global row index identifying the panel
+ *         and trailing submatrix. IA must be at least zero.
+ *
+ * JA      (global input)                const int
+ *         On entry, JA is the global column index identifying the panel
+ *         and trailing submatrix. JA must be at least zero.
+ *
+ * TAG     (global input)                const int
+ *         On entry, TAG is the row broadcast message id.
+ *
+ * PANEL   (local input/output)          HPL_T_panel * *
+ *         On entry,  PANEL  points  to  the  address  of the panel data
+ *         structure to create and initialize.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * p = NULL;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Allocate the panel structure - Check for enough memory
+ */
+   if( !( p = (HPL_T_panel *)malloc( sizeof( HPL_T_panel ) ) ) )
+   {
+      HPL_pabort( __LINE__, "HPL_pdpanel_new", "Memory allocation failed" );
+   }
+
+   HPL_pdpanel_init( GRID, ALGO, M, N, JB, A, IA, JA, TAG, p );
+   *PANEL = p;
+/*
+ * End of HPL_pdpanel_new
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/Make.inc
new file mode 120000
index 000000000..ae55370b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kate/hip/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/Makefile
new file mode 100644
index 000000000..804749cc2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/Makefile
@@ -0,0 +1,90 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h  $(INCdir)/hpl_comm.h  \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \
+   $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_panobj       = \
+   HPL_pdpanel_new.o      HPL_pdpanel_init.o     HPL_pdpanel_disp.o     \
+   HPL_pdpanel_free.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_panobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_panobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pdpanel_new.o      : ../HPL_pdpanel_new.c      $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_new.c
+HPL_pdpanel_init.o     : ../HPL_pdpanel_init.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_init.c
+HPL_pdpanel_disp.o     : ../HPL_pdpanel_disp.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_disp.c
+HPL_pdpanel_free.o     : ../HPL_pdpanel_free.c     $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanel_free.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp00N.c
new file mode 100644
index 000000000..7ad5a1a99
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp00N.c
@@ -0,0 +1,198 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP00N_DEPTH
+#define    HPL_LASWP00N_DEPTH       32
+#define    HPL_LASWP00N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp00N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int *                      IPIV
+)
+#else
+void HPL_dlaswp00N
+( M, N, A, LDA, IPIV )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int *                      IPIV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp00N performs a series of local row interchanges on a matrix
+ * A. One row interchange is initiated for rows 0 through M-1 of A.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M specifies the number of rows of the array A to be
+ *         interchanged. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies  the number of columns of the array A.
+ *         N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A  points to an array of dimension (LDA,N) to which
+ *         the row interchanges will be  applied.  On exit, the permuted
+ *         matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * IPIV    (local input)                 const int *
+ *         On entry,  IPIV  is  an  array of size  M  that  contains the
+ *         pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+ *         implies that local rows k and l are to be interchanged.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   register double            r;
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP00N_LOG2_DEPTH );
+   int                        ip, nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP00N_LOG2_DEPTH )
+                          << HPL_LASWP00N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP00N_DEPTH, A += incA )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         if( i != ( ip = IPIV[i] ) )
+         {
+            a0 = A + i; a1 = A + ip;
+
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#if ( HPL_LASWP00N_DEPTH >  1 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  2 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  4 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH >  8 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+#if ( HPL_LASWP00N_DEPTH > 16 )
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
+#endif
+         }
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         if( i != ( ip = IPIV[i] ) )
+         {
+            a0 = A + i; a1 = A + ip;
+            for( j = 0; j < nr; j++, a0 += LDA, a1 += LDA )
+            { r = *a0; *a0 = *a1; *a1 = r; }
+         }
+      }
+   }
+/*
+ * End of HPL_dlaswp00N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp01N.c
new file mode 100644
index 000000000..786d1eff4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp01N.c
@@ -0,0 +1,209 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP01N_DEPTH
+#define    HPL_LASWP01N_DEPTH      32
+#define    HPL_LASWP01N_LOG2_DEPTH  5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp01N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp01N
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp01N copies  scattered rows  of  A  into itself  and into an
+ * array  U.  The row offsets in  A  of the source rows are specified by
+ * LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+ * positive value of  LINDXAU indicates that the array destination is U,
+ * and A otherwise.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         moved within A or copied into U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         moved within A or copied into U. N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be moved within A or
+ *         copied into U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,N). The rows
+ *         of A specified by LINDXA are be copied within this array U at
+ *         the positions indicated by positive values of LINDXAU.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local  row indexes  of  A  that should be moved within  A  or
+ *         or copied into U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local  row indexes of  U  where the rows of  A  should be
+ *         copied at. This array also contains the  local row offsets in
+ *         A where some of the rows of A should be moved to.  A positive
+ *         value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+ *         should be copied into U at the position LINDXAU[i]; otherwise
+ *         the row  LINDXA[i]  of  A  should be moved  at  the  position
+ *         -LINDXAU[i] within A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP01N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP01N_LOG2_DEPTH );
+   int                        lda1, nu, nr;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01N_LOG2_DEPTH ) <<
+                            HPL_LASWP01N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP01N_DEPTH, A += incA, U += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
+         else                  { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
+
+         *a1 = *a0; a1 += lda1; a0 += LDA;
+#if ( HPL_LASWP01N_DEPTH >  1 )
+         *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  2 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  4 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH >  8 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+#if ( HPL_LASWP01N_DEPTH > 16 )
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+         *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
+         else                  { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
+         for( j = 0; j < nr; j++, a1 += lda1, a0 += LDA ) { *a1 = *a0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp01N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp01T.c
new file mode 100644
index 000000000..429cfb6f2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp01T.c
@@ -0,0 +1,252 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP01T_DEPTH
+#define    HPL_LASWP01T_DEPTH       32
+#define    HPL_LASWP01T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp01T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp01T
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp01T copies  scattered rows  of  A  into itself  and into an
+ * array U.  The row offsets in  A  of the source rows  are specified by
+ * LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+ * positive value of LINDXAU indicates that the array  destination is U,
+ * and A otherwise. Rows of A are stored as columns in U.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         moved within A or copied into U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         moved within A or copied into U. N must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be moved within A or
+ *         copied into U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,M). The rows
+ *         of A specified by  LINDXA  are copied within this array  U at
+ *         the  positions indicated by positive values of LINDXAU.  The
+ *         rows of A are stored as columns in U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local  row indexes  of  A  that should be moved within  A  or
+ *         or copied into U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local  row indexes of  U  where the rows of  A  should be
+ *         copied at. This array also contains the  local row offsets in
+ *         A where some of the rows of A should be moved to.  A positive
+ *         value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+ *         should be copied into U at the position LINDXAU[i]; otherwise
+ *         the row  LINDXA[i]  of  A  should be moved  at  the  position
+ *         -LINDXAU[i] within A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * a0, * a1;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP01T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP01T_LOG2_DEPTH );
+   int                        nu, nr;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01T_LOG2_DEPTH ) <<
+                            HPL_LASWP01T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP01T_DEPTH, A += incA, U += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+
+         if( LINDXAU[i] >= 0 )
+         {
+            a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+
+            a1[ 0] = *a0; a0 += LDA;
+#if ( HPL_LASWP01T_DEPTH >  1 )
+            a1[ 1] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  2 )
+            a1[ 2] = *a0; a0 += LDA; a1[ 3] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  4 )
+            a1[ 4] = *a0; a0 += LDA; a1[ 5] = *a0; a0 += LDA;
+            a1[ 6] = *a0; a0 += LDA; a1[ 7] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  8 )
+            a1[ 8] = *a0; a0 += LDA; a1[ 9] = *a0; a0 += LDA;
+            a1[10] = *a0; a0 += LDA; a1[11] = *a0; a0 += LDA;
+            a1[12] = *a0; a0 += LDA; a1[13] = *a0; a0 += LDA;
+            a1[14] = *a0; a0 += LDA; a1[15] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH > 16 )
+            a1[16] = *a0; a0 += LDA; a1[17] = *a0; a0 += LDA;
+            a1[18] = *a0; a0 += LDA; a1[19] = *a0; a0 += LDA;
+            a1[20] = *a0; a0 += LDA; a1[21] = *a0; a0 += LDA;
+            a1[22] = *a0; a0 += LDA; a1[23] = *a0; a0 += LDA;
+            a1[24] = *a0; a0 += LDA; a1[25] = *a0; a0 += LDA;
+            a1[26] = *a0; a0 += LDA; a1[27] = *a0; a0 += LDA;
+            a1[28] = *a0; a0 += LDA; a1[29] = *a0; a0 += LDA;
+            a1[30] = *a0; a0 += LDA; a1[31] = *a0; a0 += LDA;
+#endif
+         }
+         else
+         {
+            a1 = A - (size_t)(LINDXAU[i]);
+
+            *a1 = *a0; a1 += LDA; a0 += LDA;
+#if ( HPL_LASWP01T_DEPTH >  1 )
+            *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  2 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  4 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH >  8 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+#if ( HPL_LASWP01T_DEPTH > 16 )
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+            *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA;
+#endif
+         }
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+
+         if( LINDXAU[i] >= 0 )
+         {
+            a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+            for( j = 0; j < nr; j++, a0 += LDA ) { a1[j] = *a0; }
+         }
+         else
+         {
+            a1 = A - (size_t)(LINDXAU[i]);
+            for( j = 0; j < nr; j++, a1 += LDA, a0 += LDA ) { *a1 = *a0; }
+         }
+      }
+   }
+/*
+ * End of HPL_dlaswp01T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp02N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp02N.c
new file mode 100644
index 000000000..45c2f5f1f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp02N.c
@@ -0,0 +1,205 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP02N_DEPTH
+#define    HPL_LASWP02N_DEPTH       32
+#define    HPL_LASWP02N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp02N
+(
+   const int                        M,
+   const int                        N,
+   const double *                   A,
+   const int                        LDA,
+   double *                         W0,
+   double *                         W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp02N
+( M, N, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   const double *                   A;
+   const int                        LDA;
+   double *                         W0;
+   double *                         W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp02N packs scattered rows of an array  A  into workspace  W.
+ * The row offsets in A are specified by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         copied into W. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the length of rows of A that should be
+ *         copied into W. N must be at least zero.
+ *
+ * A       (local input)                 const double *
+ *         On entry, A points to an array of dimension (LDA,N). The rows
+ *         of this array specified by LINDXA should be copied into W.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * W0      (local input/output)          double *
+ *         On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local output)                double *
+ *         On entry, W  is an array of size (LDW,M). On exit, W contains
+ *         the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+ *         in W(:,i).
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied into W.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M  that  contains
+ *         the local  row indexes of  U that should be copied into A and
+ *         replaced by the rows of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * A0 = A, * a0;
+   double                     * w0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP02N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   for( i = 0; i < M; i++ ) 
+      *(W0+(size_t)(i)*(size_t)(LDW)) = (double)(LINDXAU[i]);
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP02N_LOG2_DEPTH ) <<
+                          HPL_LASWP02N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP02N_DEPTH, A0 += incA, W += HPL_LASWP02N_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW);
+
+         w0[ 0] = *a0; a0 += LDA;
+#if ( HPL_LASWP02N_DEPTH >  1 )
+         w0[ 1] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  2 )
+         w0[ 2] = *a0; a0 += LDA; w0[ 3] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  4 )
+         w0[ 4] = *a0; a0 += LDA; w0[ 5] = *a0; a0 += LDA;
+         w0[ 6] = *a0; a0 += LDA; w0[ 7] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH >  8 )
+         w0[ 8] = *a0; a0 += LDA; w0[ 9] = *a0; a0 += LDA;
+         w0[10] = *a0; a0 += LDA; w0[11] = *a0; a0 += LDA;
+         w0[12] = *a0; a0 += LDA; w0[13] = *a0; a0 += LDA;
+         w0[14] = *a0; a0 += LDA; w0[15] = *a0; a0 += LDA;
+#endif
+#if ( HPL_LASWP02N_DEPTH > 16 )
+         w0[16] = *a0; a0 += LDA; w0[17] = *a0; a0 += LDA;
+         w0[18] = *a0; a0 += LDA; w0[19] = *a0; a0 += LDA;
+         w0[20] = *a0; a0 += LDA; w0[21] = *a0; a0 += LDA;
+         w0[22] = *a0; a0 += LDA; w0[23] = *a0; a0 += LDA;
+         w0[24] = *a0; a0 += LDA; w0[25] = *a0; a0 += LDA;
+         w0[26] = *a0; a0 += LDA; w0[27] = *a0; a0 += LDA;
+         w0[28] = *a0; a0 += LDA; w0[29] = *a0; a0 += LDA;
+         w0[30] = *a0; a0 += LDA; w0[31] = *a0; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, a0 += LDA ) { w0[j] = *a0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp02N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp03N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp03N.c
new file mode 100644
index 000000000..760732a8d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp03N.c
@@ -0,0 +1,194 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP03N_DEPTH
+#define    HPL_LASWP03N_DEPTH       32
+#define    HPL_LASWP03N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp03N
+(
+   const int                        M,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW
+)
+#else
+void HPL_dlaswp03N
+( M, N, U, LDU, W0, W, LDW )
+   const int                        M;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp03N copies columns of  W  into  rows  of an  array  U.  The
+ * destination in U of these columns contained in W is stored within W0.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies  the  number  of columns of  W  stored
+ *         contiguously that should be copied into U. M must be at least
+ *         zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  length of columns of  W  stored
+ *         contiguously that should be copied into U. N must be at least
+ *         zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,N).  Columns
+ *         of W are copied as rows within this array U at  the positions
+ *         specified in W0.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M),  that contains data
+ *         to be copied into U. For i in [0..M),  entries W(:,i)  should
+ *         be copied into the row or column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * u0;
+   const int                  incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP03N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03N_LOG2_DEPTH ) <<
+                          HPL_LASWP03N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP03N_DEPTH, U += incU, w += HPL_LASWP03N_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); 
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *u0 = w0[ 0]; u0 += LDU;
+#if ( HPL_LASWP03N_DEPTH >  1 )
+         *u0 = w0[ 1]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  2 )
+         *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  4 )
+         *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU;
+         *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH >  8 )
+         *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU;
+         *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU;
+         *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU;
+         *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU;
+#endif
+#if ( HPL_LASWP03N_DEPTH > 16 )
+         *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU;
+         *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU;
+         *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU;
+         *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU;
+         *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU;
+         *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU;
+         *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU;
+         *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); 
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp03N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp03T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp03T.c
new file mode 100644
index 000000000..fece692ce
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp03T.c
@@ -0,0 +1,186 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP03T_DEPTH
+#define    HPL_LASWP03T_DEPTH       32
+#define    HPL_LASWP03T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp03T
+(
+   const int                        M,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW
+)
+#else
+void HPL_dlaswp03T
+( M, N, U, LDU, W0, W, LDW )
+   const int                        M;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp03T copies  columns of W into an array U.  The  destination
+ * in U of these columns contained in W is stored within W0.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies  the  number  of columns of  W  stored
+ *         contiguously that should be copied into U. M must be at least
+ *         zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies  the  length of columns of  W  stored
+ *         contiguously that should be copied into U. N must be at least
+ *         zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry, U points to an array of dimension (LDU,M).  Columns
+ *         of W are copied within the array U at the positions specified
+ *         in W0.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M),  that contains data
+ *         to be copied into U. For i in [0..M),  entries W(:,i)  should
+ *         be copied into the row or column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0; 
+   double                     * u0;
+   const int                  incU = ( 1 << HPL_LASWP03T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03T_LOG2_DEPTH ) <<
+                          HPL_LASWP03T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu;
+        j += HPL_LASWP03T_DEPTH, U += incU, w += HPL_LASWP03T_DEPTH )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         u0[ 0] = w0[ 0];
+#if ( HPL_LASWP03T_DEPTH >  1 )
+         u0[ 1] = w0[ 1];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  2 )
+         u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  4 )
+         u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7];
+#endif
+#if ( HPL_LASWP03T_DEPTH >  8 )
+         u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11];
+         u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15];
+#endif
+#if ( HPL_LASWP03T_DEPTH > 16 )
+         u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19];
+         u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23];
+         u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27];
+         u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31];
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp03T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp04N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp04N.c
new file mode 100644
index 000000000..4f9c490a5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp04N.c
@@ -0,0 +1,285 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP04N_DEPTH
+#define    HPL_LASWP04N_DEPTH       32
+#define    HPL_LASWP04N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp04N
+(
+   const int                        M0,
+   const int                        M1,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   double *                         A,
+   const int                        LDA,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp04N
+( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M0;
+   const int                        M1;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   double *                         A;
+   const int                        LDA;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp04N copies M0 rows of U into A and replaces those rows of U
+ * with columns of W. In addition M1 - M0 columns of  W  are copied into
+ * rows of U.
+ *
+ * Arguments
+ * =========
+ *
+ * M0      (local input)                 const int
+ *         On entry, M0 specifies the number of rows of U that should be
+ *         copied into  A  and replaced by columns of  W.  M0 must be at
+ *         least zero.
+ *
+ * M1      (local input)                 const int
+ *         On entry, M1 specifies the number of columns of W that should
+ *         be copied into rows of U. M1 must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of U that should
+ *         be copied into A. N must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points to  an array of dimension (LDU,N).  This
+ *         array contains the rows that are to be copied into A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M1).
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M0).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M0+M1),  that  contains
+ *         data to be copied into U.  For i in [M0..M0+M1),  the entries
+ *         W(:,i) are copied into the row W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA  is an array of dimension  M0 containing the
+ *         local row indexes A into which rows of U are copied.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M0 that  contains
+ *         the local  row indexes of  U that should be copied into A and
+ *         replaced by the columns of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) << 
+                                            HPL_LASWP04N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP04N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04N_LOG2_DEPTH ) <<
+                          HPL_LASWP04N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP04N_DEPTH, A += incA, U += incU,
+        w += HPL_LASWP04N_DEPTH )
+   {
+      for( i =  0; i < M0; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         u0 = U + (size_t)(LINDXAU[i]);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *a0 = *u0; *u0 = w0[ 0]; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP04N_DEPTH >  1 )
+         *a0 = *u0; *u0 = w0[ 1]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  2 )
+         *a0 = *u0; *u0 = w0[ 2]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 3]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  4 )
+         *a0 = *u0; *u0 = w0[ 4]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 5]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 6]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 7]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  8 )
+         *a0 = *u0; *u0 = w0[ 8]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[ 9]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[10]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[11]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[12]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[13]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[14]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[15]; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH > 16 )
+         *a0 = *u0; *u0 = w0[16]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[17]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[18]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[19]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[20]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[21]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[22]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[23]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[24]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[25]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[26]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[27]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[28]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[29]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[30]; a0 += LDA; u0 += LDU;
+         *a0 = *u0; *u0 = w0[31]; a0 += LDA; u0 += LDU;
+#endif
+      }
+
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW)));
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+
+         *u0 = w0[ 0]; u0 += LDU;
+#if ( HPL_LASWP04N_DEPTH >  1 )
+         *u0 = w0[ 1]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  2 )
+         *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  4 )
+         *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU;
+         *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH >  8 )
+         *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU;
+         *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU;
+         *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU;
+         *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU;
+#endif
+#if ( HPL_LASWP04N_DEPTH > 16 )
+         *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU;
+         *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU;
+         *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU;
+         *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU;
+         *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU;
+         *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU;
+         *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU;
+         *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]);
+         u0 = U + (size_t)(LINDXAU[i]);
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU )
+         { *a0 = *u0; *u0 = w0[j]; }
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW)));
+         w0 = w + (size_t)(i) * (size_t)(LDW);
+         for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp04N
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp04T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp04T.c
new file mode 100644
index 000000000..9cbb4c863
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp04T.c
@@ -0,0 +1,270 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP04T_DEPTH
+#define    HPL_LASWP04T_DEPTH       32
+#define    HPL_LASWP04T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp04T
+(
+   const int                        M0,
+   const int                        M1,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   double *                         A,
+   const int                        LDA,
+   const double *                   W0,
+   const double *                   W,
+   const int                        LDW,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp04T
+( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU )
+   const int                        M0;
+   const int                        M1;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   double *                         A;
+   const int                        LDA;
+   const double *                   W0;
+   const double *                   W;
+   const int                        LDW;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp04T copies M0 columns of U into rows of A and replaces those
+ * columns of U with columns of W. In addition M1 - M0 columns of W  are
+ * copied into U.
+ *
+ * Arguments
+ * =========
+ *
+ * M0      (local input)                 const int
+ *         On entry, M0 specifies the number of columns of U that should
+ *         be copied into A and replaced by columns of W.  M0 must be at
+ *         least zero.
+ *
+ * M1      (local input)                 const int
+ *         On entry, M1 specifies  the number of columnns of W that will
+ *         be copied into U. M1 must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N  specifies the length of the columns of  U  that
+ *         will be copied into rows of A. N must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns that are to be copied into rows of
+ *         A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M0).
+ *
+ * W0      (local input)                 const double *
+ *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+ *         the destination offset  in U where the columns of W should be
+ *         copied.
+ *
+ * W       (local input)                 const double *
+ *         On entry, W  is an array of size (LDW,M0+M1),  that  contains
+ *         data to be copied into U.  For i in [M0..M0+M1),  the entries
+ *         W(:,i) are copied into the column W0(i*LDW) of U.
+ *
+ * LDW     (local input)                 const int
+ *         On entry, LDW specifies the leading dimension of the array W.
+ *         LDW must be at least MAX(1,N+1).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA  is an array of dimension  M0 containing the
+ *         local row indexes A into which columns of U are copied.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension M0 that  contains
+ *         the  local column indexes of  U  that should be copied into A
+ *         and replaced by the columns of W.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * w = W, * w0;
+   double                     * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP04T_LOG2_DEPTH ),
+                              incU = (   1 << HPL_LASWP04T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04T_LOG2_DEPTH ) <<
+                          HPL_LASWP04T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP04T_DEPTH, A += incA, U += incU,
+        w += HPL_LASWP04T_DEPTH )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW;
+
+         *a0 = u0[ 0]; u0[ 0] = w0[ 0]; a0 += LDA;
+#if ( HPL_LASWP04T_DEPTH >  1 )
+         *a0 = u0[ 1]; u0[ 1] = w0[ 1]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  2 )
+         *a0 = u0[ 2]; u0[ 2] = w0[ 2]; a0 += LDA;
+         *a0 = u0[ 3]; u0[ 3] = w0[ 3]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  4 )
+         *a0 = u0[ 4]; u0[ 4] = w0[ 4]; a0 += LDA;
+         *a0 = u0[ 5]; u0[ 5] = w0[ 5]; a0 += LDA;
+         *a0 = u0[ 6]; u0[ 6] = w0[ 6]; a0 += LDA;
+         *a0 = u0[ 7]; u0[ 7] = w0[ 7]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH >  8 )
+         *a0 = u0[ 8]; u0[ 8] = w0[ 8]; a0 += LDA;
+         *a0 = u0[ 9]; u0[ 9] = w0[ 9]; a0 += LDA;
+         *a0 = u0[10]; u0[10] = w0[10]; a0 += LDA;
+         *a0 = u0[11]; u0[11] = w0[11]; a0 += LDA;
+         *a0 = u0[12]; u0[12] = w0[12]; a0 += LDA;
+         *a0 = u0[13]; u0[13] = w0[13]; a0 += LDA;
+         *a0 = u0[14]; u0[14] = w0[14]; a0 += LDA;
+         *a0 = u0[15]; u0[15] = w0[15]; a0 += LDA;
+#endif
+#if ( HPL_LASWP04T_DEPTH > 16 )
+         *a0 = u0[16]; u0[16] = w0[16]; a0 += LDA;
+         *a0 = u0[17]; u0[17] = w0[17]; a0 += LDA;
+         *a0 = u0[18]; u0[18] = w0[18]; a0 += LDA;
+         *a0 = u0[19]; u0[19] = w0[19]; a0 += LDA;
+         *a0 = u0[20]; u0[20] = w0[20]; a0 += LDA;
+         *a0 = u0[21]; u0[21] = w0[21]; a0 += LDA;
+         *a0 = u0[22]; u0[22] = w0[22]; a0 += LDA;
+         *a0 = u0[23]; u0[23] = w0[23]; a0 += LDA;
+         *a0 = u0[24]; u0[24] = w0[24]; a0 += LDA;
+         *a0 = u0[25]; u0[25] = w0[25]; a0 += LDA;
+         *a0 = u0[26]; u0[26] = w0[26]; a0 += LDA;
+         *a0 = u0[27]; u0[27] = w0[27]; a0 += LDA;
+         *a0 = u0[28]; u0[28] = w0[28]; a0 += LDA;
+         *a0 = u0[29]; u0[29] = w0[29]; a0 += LDA;
+         *a0 = u0[30]; u0[30] = w0[30]; a0 += LDA;
+         *a0 = u0[31]; u0[31] = w0[31]; a0 += LDA;
+#endif
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW;
+
+         u0[ 0] = w0[ 0];
+#if ( HPL_LASWP04T_DEPTH >  1 )
+         u0[ 1] = w0[ 1];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  2 )
+         u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  4 )
+         u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7];
+#endif
+#if ( HPL_LASWP04T_DEPTH >  8 )
+         u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11];
+         u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15];
+#endif
+#if ( HPL_LASWP04T_DEPTH > 16 )
+         u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19];
+         u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23];
+         u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27];
+         u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31];
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M0; i++ )
+      {
+         a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW;
+         for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; u0[j] = w0[j]; }
+      }
+      for( i = M0; i < M1; i++ )
+      {
+         u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW;
+         for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp04T
+ */
+} 
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp05N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp05N.c
new file mode 100644
index 000000000..3edcf91a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp05N.c
@@ -0,0 +1,195 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP05N_DEPTH
+#define    HPL_LASWP05N_DEPTH       32
+#define    HPL_LASWP05N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp05N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const double *                   U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp05N
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const double *                   U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp05N copies rows of  U of global offset LINDXAU into rows of
+ * A at positions indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of U that should be
+ *         copied into A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of U that should
+ *         be copied into A. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          const double *
+ *         On entry,  U  points to an array of dimension  (LDU,N).  This
+ *         array contains the rows that are to be copied into A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied from U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local row indexes of U that should be copied in A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * U0 = U, * u0;
+   double                     * a0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP05N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP05N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05N_LOG2_DEPTH ) <<
+                            HPL_LASWP05N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP05N_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]);
+
+         *a0 = *u0; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP05N_DEPTH >  1 )
+         *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  2 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  4 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH >  8 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP05N_DEPTH > 16 )
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+         *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { *a0 = *u0; }
+      }
+   }
+/*
+ * End of HPL_dlaswp05N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp05T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp05T.c
new file mode 100644
index 000000000..0adaa102d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp05T.c
@@ -0,0 +1,196 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP05T_DEPTH
+#define    HPL_LASWP05T_DEPTH       32
+#define    HPL_LASWP05T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp05T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const double *                   U,
+   const int                        LDU,
+   const int *                      LINDXA,
+   const int *                      LINDXAU
+)
+#else
+void HPL_dlaswp05T
+( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const double *                   U;
+   const int                        LDU;
+   const int *                      LINDXA;
+   const int *                      LINDXAU;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp05T copies columns of  U of global offset LINDXAU into rows
+ * of A at positions indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the columns of U that will
+ *         be copied into rows of A. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U indicated by LINDXAU.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          const double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns that are to be copied into rows of
+ *         A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be copied from U.
+ *
+ * LINDXAU (local input)                 const int *
+ *         On entry, LINDXAU  is an array of dimension  M that  contains
+ *         the local column indexes of U that should be copied in A.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   const double               * U0 = U, * u0;
+   double                     * a0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP05T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP05T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05T_LOG2_DEPTH ) <<
+                            HPL_LASWP05T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP05T_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[ i]);
+         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+
+         *a0 = u0[ 0]; a0 += LDA;
+#if ( HPL_LASWP05T_DEPTH >  1 )
+         *a0 = u0[ 1]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  2 )
+         *a0 = u0[ 2]; a0 += LDA; *a0 = u0[ 3]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  4 )
+         *a0 = u0[ 4]; a0 += LDA; *a0 = u0[ 5]; a0 += LDA;
+         *a0 = u0[ 6]; a0 += LDA; *a0 = u0[ 7]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH >  8 )
+         *a0 = u0[ 8]; a0 += LDA; *a0 = u0[ 9]; a0 += LDA;
+         *a0 = u0[10]; a0 += LDA; *a0 = u0[11]; a0 += LDA;
+         *a0 = u0[12]; a0 += LDA; *a0 = u0[13]; a0 += LDA;
+         *a0 = u0[14]; a0 += LDA; *a0 = u0[15]; a0 += LDA;
+#endif
+#if ( HPL_LASWP05T_DEPTH > 16 )
+         *a0 = u0[16]; a0 += LDA; *a0 = u0[17]; a0 += LDA;
+         *a0 = u0[18]; a0 += LDA; *a0 = u0[19]; a0 += LDA;
+         *a0 = u0[20]; a0 += LDA; *a0 = u0[21]; a0 += LDA;
+         *a0 = u0[22]; a0 += LDA; *a0 = u0[23]; a0 += LDA;
+         *a0 = u0[24]; a0 += LDA; *a0 = u0[25]; a0 += LDA;
+         *a0 = u0[26]; a0 += LDA; *a0 = u0[27]; a0 += LDA;
+         *a0 = u0[28]; a0 += LDA; *a0 = u0[29]; a0 += LDA;
+         *a0 = u0[30]; a0 += LDA; *a0 = u0[31]; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[ i]);
+         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
+         for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; }
+      }
+   }
+/*
+ * End of HPL_dlaswp05T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp06N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp06N.c
new file mode 100644
index 000000000..a74bae75c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp06N.c
@@ -0,0 +1,206 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP06N_DEPTH
+#define    HPL_LASWP06N_DEPTH       32
+#define    HPL_LASWP06N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp06N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA
+)
+#else
+void HPL_dlaswp06N
+( M, N, A, LDA, U, LDU, LINDXA )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp06N swaps rows of  U  with rows of A at positions
+ * indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         swapped with rows of U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of A that should
+ *         be swapped with rows of U. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         rows or columns of U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,N).  This
+ *         array contains the rows of U that are to be swapped with rows
+ *         of A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,M).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be swapped with U.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * U0 = U, * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP06N_LOG2_DEPTH ),
+                              incU = (int)( (unsigned int)(LDU) <<
+                                            HPL_LASWP06N_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06N_LOG2_DEPTH ) <<
+                            HPL_LASWP06N_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP06N_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i);
+
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#if ( HPL_LASWP06N_DEPTH >  1 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  2 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  4 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH >  8 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+#if ( HPL_LASWP06N_DEPTH > 16 )
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+         r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU;
+#endif
+      }
+   }
+
+   if( nr )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i);
+         for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU )
+         { r = *a0; *a0 = *u0; *u0 = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp06N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp06T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp06T.c
new file mode 100644
index 000000000..fb53c2a31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp06T.c
@@ -0,0 +1,207 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP06T_DEPTH
+#define    HPL_LASWP06T_DEPTH       32
+#define    HPL_LASWP06T_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp06T
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   double *                         U,
+   const int                        LDU,
+   const int *                      LINDXA
+)
+#else
+void HPL_dlaswp06T
+( M, N, A, LDA, U, LDU, LINDXA )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   double *                         U;
+   const int                        LDU;
+   const int *                      LINDXA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp06T swaps  columns  of  U  with  rows  of  A  at  positions
+ * indicated by LINDXA.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         On entry, M  specifies the number of rows of A that should be
+ *         swapped with columns of U. M must be at least zero.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the length of the rows of A that should
+ *         be swapped with columns of U. N must be at least zero.
+ *
+ * A       (local output)                double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         the  rows of this array specified by  LINDXA  are replaced by
+ *         columns of U.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  points  to an array of dimension (LDU,*).  This
+ *         array contains the columns of  U  that are to be swapped with
+ *         rows of A.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the leading dimension of the array U.
+ *         LDU must be at least MAX(1,N).
+ *
+ * LINDXA  (local input)                 const int *
+ *         On entry, LINDXA is an array of dimension M that contains the
+ *         local row indexes of A that should be swapped with U.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * U0 = U, * a0, * u0;
+   const int                  incA = (int)( (unsigned int)(LDA) <<
+                                            HPL_LASWP06T_LOG2_DEPTH ),
+                              incU = ( 1 << HPL_LASWP06T_LOG2_DEPTH );
+   int                        nr, nu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06T_LOG2_DEPTH ) <<
+                            HPL_LASWP06T_LOG2_DEPTH ) );
+
+   for( j = 0; j < nu; j += HPL_LASWP06T_DEPTH, A += incA, U0 += incU )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[i]);
+         u0 = U0 + (size_t)(i) * (size_t)(LDU);
+
+         r = *a0; *a0 = u0[ 0]; u0[ 0] = r; a0 += LDA;
+#if ( HPL_LASWP06T_DEPTH >  1 )
+         r = *a0; *a0 = u0[ 1]; u0[ 1] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  2 )
+         r = *a0; *a0 = u0[ 2]; u0[ 2] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 3]; u0[ 3] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  4 )
+         r = *a0; *a0 = u0[ 4]; u0[ 4] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 5]; u0[ 5] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 6]; u0[ 6] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 7]; u0[ 7] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH >  8 )
+         r = *a0; *a0 = u0[ 8]; u0[ 8] = r; a0 += LDA;
+         r = *a0; *a0 = u0[ 9]; u0[ 9] = r; a0 += LDA;
+         r = *a0; *a0 = u0[10]; u0[10] = r; a0 += LDA;
+         r = *a0; *a0 = u0[11]; u0[11] = r; a0 += LDA;
+         r = *a0; *a0 = u0[12]; u0[12] = r; a0 += LDA;
+         r = *a0; *a0 = u0[13]; u0[13] = r; a0 += LDA;
+         r = *a0; *a0 = u0[14]; u0[14] = r; a0 += LDA;
+         r = *a0; *a0 = u0[15]; u0[15] = r; a0 += LDA;
+#endif
+#if ( HPL_LASWP06T_DEPTH > 16 )
+         r = *a0; *a0 = u0[16]; u0[16] = r; a0 += LDA;
+         r = *a0; *a0 = u0[17]; u0[17] = r; a0 += LDA;
+         r = *a0; *a0 = u0[18]; u0[18] = r; a0 += LDA;
+         r = *a0; *a0 = u0[19]; u0[19] = r; a0 += LDA;
+         r = *a0; *a0 = u0[20]; u0[20] = r; a0 += LDA;
+         r = *a0; *a0 = u0[21]; u0[21] = r; a0 += LDA;
+         r = *a0; *a0 = u0[22]; u0[22] = r; a0 += LDA;
+         r = *a0; *a0 = u0[23]; u0[23] = r; a0 += LDA;
+         r = *a0; *a0 = u0[24]; u0[24] = r; a0 += LDA;
+         r = *a0; *a0 = u0[25]; u0[25] = r; a0 += LDA;
+         r = *a0; *a0 = u0[26]; u0[26] = r; a0 += LDA;
+         r = *a0; *a0 = u0[27]; u0[27] = r; a0 += LDA;
+         r = *a0; *a0 = u0[28]; u0[28] = r; a0 += LDA;
+         r = *a0; *a0 = u0[29]; u0[29] = r; a0 += LDA;
+         r = *a0; *a0 = u0[30]; u0[30] = r; a0 += LDA;
+         r = *a0; *a0 = u0[31]; u0[31] = r; a0 += LDA;
+#endif
+      }
+   }
+
+   if( nr > 0 )
+   {
+      for( i = 0; i < M; i++ )
+      {
+         a0 = A  + (size_t)(LINDXA[i]);
+         u0 = U0 + (size_t)(i) * (size_t)(LDU);
+         for( j = 0; j < nr; j++, a0 += LDA )
+         { r = *a0; *a0 = u0[j]; u0[j] = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp06T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp10N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp10N.c
new file mode 100644
index 000000000..7dbf934f2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp10N.c
@@ -0,0 +1,186 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LASWP10N_DEPTH
+#define    HPL_LASWP10N_DEPTH       32
+#define    HPL_LASWP10N_LOG2_DEPTH   5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlaswp10N
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int *                      IPIV
+)
+#else
+void HPL_dlaswp10N
+( M, N, A, LDA, IPIV )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int *                      IPIV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlaswp10N performs a sequence  of  local column interchanges on a
+ * matrix A.  One column interchange is initiated  for columns 0 through
+ * N-1 of A.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (local input)                 const int
+ *         __arg0__
+ *
+ * N       (local input)                 const int
+ *         On entry,  M  specifies  the number of rows of the array A. M
+ *         must be at least zero.
+ *
+ * A       (local input/output)          double *
+ *         On entry, N specifies the number of columns of the array A. N
+ *         must be at least zero.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, A  points to an  array of  dimension (LDA,N).  This
+ *         array contains the columns onto which the interchanges should
+ *         be applied. On exit, A contains the permuted matrix.
+ *
+ * IPIV    (local input)                 const int *
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least MAX(1,M).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     r;
+   double                     * a0, * a1;
+   const int                  incA = ( 1 << HPL_LASWP10N_LOG2_DEPTH );
+   int                        jp, mr, mu;
+   register int               i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+
+   mr = M - ( mu = (int)( ( (unsigned int)(M) >> HPL_LASWP10N_LOG2_DEPTH )
+                            << HPL_LASWP10N_LOG2_DEPTH ) );
+
+   for( j = 0; j < N; j++ )
+   {
+      if( j != ( jp = IPIV[j] ) )
+      {
+         a0 = A + j * LDA; a1 = A + jp * LDA;
+
+         for( i = 0; i < mu; i += incA, a0 += incA, a1 += incA )
+         {
+            r = *a0;    *a0    = *a1;    *a1    = r;
+#if ( HPL_LASWP10N_DEPTH >  1 )
+            r = a0[ 1]; a0[ 1] = a1[ 1]; a1[ 1] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  2 )
+            r = a0[ 2]; a0[ 2] = a1[ 2]; a1[ 2] = r;
+            r = a0[ 3]; a0[ 3] = a1[ 3]; a1[ 3] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  4 )
+            r = a0[ 4]; a0[ 4] = a1[ 4]; a1[ 4] = r;
+            r = a0[ 5]; a0[ 5] = a1[ 5]; a1[ 5] = r;
+            r = a0[ 6]; a0[ 6] = a1[ 6]; a1[ 6] = r;
+            r = a0[ 7]; a0[ 7] = a1[ 7]; a1[ 7] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH >  8 )
+            r = a0[ 8]; a0[ 8] = a1[ 8]; a1[ 8] = r;
+            r = a0[ 9]; a0[ 9] = a1[ 9]; a1[ 9] = r;
+            r = a0[10]; a0[10] = a1[10]; a1[10] = r;
+            r = a0[11]; a0[11] = a1[11]; a1[11] = r;
+            r = a0[12]; a0[12] = a1[12]; a1[12] = r;
+            r = a0[13]; a0[13] = a1[13]; a1[13] = r;
+            r = a0[14]; a0[14] = a1[14]; a1[14] = r;
+            r = a0[15]; a0[15] = a1[15]; a1[15] = r;
+#endif
+#if ( HPL_LASWP10N_DEPTH > 16 )
+            r = a0[16]; a0[16] = a1[16]; a1[16] = r;
+            r = a0[17]; a0[17] = a1[17]; a1[17] = r;
+            r = a0[18]; a0[18] = a1[18]; a1[18] = r;
+            r = a0[19]; a0[19] = a1[19]; a1[19] = r;
+            r = a0[20]; a0[20] = a1[20]; a1[20] = r;
+            r = a0[21]; a0[21] = a1[21]; a1[21] = r;
+            r = a0[22]; a0[22] = a1[22]; a1[22] = r;
+            r = a0[23]; a0[23] = a1[23]; a1[23] = r;
+            r = a0[24]; a0[24] = a1[24]; a1[24] = r;
+            r = a0[25]; a0[25] = a1[25]; a1[25] = r;
+            r = a0[26]; a0[26] = a1[26]; a1[26] = r;
+            r = a0[27]; a0[27] = a1[27]; a1[27] = r;
+            r = a0[28]; a0[28] = a1[28]; a1[28] = r;
+            r = a0[29]; a0[29] = a1[29]; a1[29] = r;
+            r = a0[30]; a0[30] = a1[30]; a1[30] = r;
+            r = a0[31]; a0[31] = a1[31]; a1[31] = r;
+#endif
+         }
+
+         for( i = 0; i < mr; i++ )
+         { r = a0[i]; a0[i] = a1[i]; a1[i] = r; }
+      }
+   }
+/*
+ * End of HPL_dlaswp10N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2l.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2l.c
new file mode 100644
index 000000000..e1b5bbfac
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2l.c
@@ -0,0 +1,151 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxg2l
+(
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxg2l
+( IG, INB, NB, SRCPROC, NPROCS )
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2l computes  the local index of a matrix entry pointed to by
+ * the  global index IG.  This  local  returned index is the same in all
+ * processes.
+ *
+ * Arguments
+ * =========
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry, if SRCPROC = -1, the data  is not  distributed  but
+ *         replicated,  in  which  case  this  routine returns IG in all
+ *         processes. Otherwise, the value of SRCPROC is ignored.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      return( IG );
+/*
+ * IG  =  INB - NB + ( l * NPROCS + MYROC ) * NB + X  with  0 <= X < NB,
+ * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC
+ * with  0 <= MYROC < NPROCS.  The local index to be returned depends on 
+ * whether  IG  resides in the process owning the first partial block of
+ * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB,
+ * so that if NPROCS divides i+1, i.e. MYROC=0,  we have i+1 = l*NPROCS.
+ * If we set  j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is
+ * equal to (j+1) * NPROCS.  Conversely, if NPROCS does not divide  i+1, 
+ * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that
+ * j=l and thus (j+1)*NPROCS > i+1.
+ */
+   j = ( i = ( IG - INB ) / NB ) / NPROCS;
+/*
+ * When IG resides in the process owning the first partial block of size
+ * INB (MYROC = 0), then the result IL can be written as:
+ * IL = INB - NB + l * NB + X  = IG + ( l - (l * NPROCS + MYROC) ) * NB.
+ * Using the above notation,  we have i+1 = l*NPROCS + MYROC = l*NPROCS,
+ * i.e l = ( i+1 ) / NPROCS = j+1,  since  NPROCS divides i+1, therefore
+ * IL = IG + ( j + 1 - ( i + 1 ) ) * NB.
+ *
+ * Otherwise when MYROC >= 1, the result IL can be written as:
+ * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB.
+ * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1,
+ * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e
+ * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB.
+ */
+   return( NB * (j - i) + 
+           ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ) );
+/*
+ * End of HPL_indxg2l
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2lp.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2lp.c
new file mode 100644
index 000000000..74662f9d2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2lp.c
@@ -0,0 +1,176 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_indxg2lp
+(
+   int *                            IL,
+   int *                            PROC,
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+void HPL_indxg2lp
+( IL, PROC, IG, INB, NB, SRCPROC, NPROCS )
+   int *                            IL;
+   int *                            PROC;
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2lp computes the local index of a matrix entry pointed to by
+ * the global  index IG as well as the process coordinate which posseses
+ * this entry. The local returned index is the same in all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * IL      (output)                      int *
+ *         On exit, IL specifies the local index corresponding to IG. IL
+ *         is at least zero.
+ *
+ * PROC    (output)                      int *
+ *         On exit,  PROC  is the  coordinate of the process  owning the
+ *         entry specified by the global index IG. PROC is at least zero
+ *         and less than NPROCS.
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry, if SRCPROC = -1, the data  is not  distributed  but
+ *         replicated,  in  which  case  this  routine returns IG in all
+ *         processes. Otherwise, the value of SRCPROC is ignored.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+   {
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      *IL   = IG;
+      *PROC = SRCPROC;
+   }
+   else
+   {
+/*
+ * IG  =  INB - NB + ( l * NPROCS + MYROC ) * NB + X  with  0 <= X < NB,
+ * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC
+ * with  0 <= MYROC < NPROCS.  The local index to be returned depends on
+ * whether  IG  resides in the process owning the first partial block of
+ * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB,
+ * so that if NPROCS divides i+1, i.e. MYROC=0,  we have i+1 = l*NPROCS.
+ * If we set  j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is
+ * equal to (j+1) * NPROCS.  Conversely, if NPROCS does not divide  i+1,
+ * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that
+ * j=l and thus (j+1)*NPROCS > i+1.
+ */
+      j = ( i = ( IG - INB ) / NB ) / NPROCS;
+/*
+ * IG  is in block  1 + ( IG - INB ) / NB.  Add this to SRCPROC and take
+ * the NPROCS modulo (definition of the block-cyclic data distribution).
+ */
+      *PROC = SRCPROC + 1 + i;
+      *PROC = MPosMod( *PROC, NPROCS );
+/*
+ * When IG resides in the process owning the first partial block of size
+ * INB (MYROC = 0), then the result IL can be written as:
+ * IL = INB - NB + l * NB + X  = IG + ( l - (l * NPROCS + MYROC) ) * NB.
+ * Using the above notation,  we have i+1 = l*NPROCS + MYROC = l*NPROCS,
+ * i.e l = ( i+1 ) / NPROCS = j+1,  since  NPROCS divides i+1, therefore
+ * IL = IG + ( j + 1 - ( i + 1 ) ) * NB.
+ *
+ * Otherwise when MYROC >= 1, the result IL can be written as:
+ * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB.
+ * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1,
+ * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e
+ * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB.
+ */
+      *IL = NB * (j - i) + 
+            ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG );
+   }
+/*
+ * End of HPL_indxg2lp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2p.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2p.c
new file mode 100644
index 000000000..d0e75f516
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2p.c
@@ -0,0 +1,128 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxg2p
+(
+   const int                        IG,
+   const int                        INB,
+   const int                        NB,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxg2p
+( IG, INB, NB, SRCPROC, NPROCS )
+   const int                        IG;
+   const int                        INB;
+   const int                        NB;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxg2p computes the process coordinate  which posseses the entry
+ * of a matrix specified by a global index IG.
+ *
+ * Arguments
+ * =========
+ *
+ * IG      (input)                       const int
+ *         On entry, IG specifies the global index of the matrix  entry.
+ *         IG must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        proc;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * IG  belongs  to the first block,  or the data is not distributed,  or
+ * there is just one process in this dimension of the grid.
+ */
+      return( SRCPROC );
+/*
+ * Otherwise,  IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC
+ * and take the NPROCS  modulo (definition of the block-cyclic data dis-
+ * tribution).
+ */
+   proc = SRCPROC + 1 + ( IG - INB ) / NB;
+   return( MPosMod( proc, NPROCS ) );
+/*
+ * End of HPL_indxg2p
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxl2g.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxl2g.c
new file mode 100644
index 000000000..7f139425a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxl2g.c
@@ -0,0 +1,164 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_indxl2g
+(
+   const int                        IL,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_indxl2g
+( IL, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        IL;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_indxl2g computes the global index of a matrix  entry  pointed to
+ * by the local index IL of the process indicated by PROC.
+ *
+ * Arguments
+ * =========
+ *
+ * IL      (input)                       const int
+ *         On entry, IL specifies the local  index of the matrix  entry.
+ *         IL must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC  specifies the coordinate of the process whose
+ *         local array row or column is to be determined. PROC  must  be
+ *         at least zero and strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+   {
+/*
+ * The data is not distributed, or there is just one process in this di-
+ * mension of the grid.
+ */
+      return( IL );
+   }
+   else if( PROC == SRCPROC )
+   {
+/*
+ * If I am SRCPROC, my first block is of size INB
+ */
+      if( IL < INB )
+/*
+ * If  IL  belongs to the first block,  the local and global indexes are
+ * equal.
+ */
+         return ( IL );
+/*
+ * The  number  of  entire  blocks  before  the  one  IL  belongs  to is
+ * ( IL - INB ) / NB + 1.  In  the other NPROCS-1 processes,  there  are
+ * thus NB*( ( IL-INB )/NB + 1 ) entries,  that are  globally before the
+ * global entry corresponding to IL.
+ */
+      return( ( NPROCS - 1 ) * NB * ( ( IL - INB ) / NB + 1 ) + IL );
+   }
+   else if( PROC < SRCPROC )
+   {
+/*
+ * Otherwise, the process of coordinate  MOD(SRCPROC+1, NPROCS) owns the
+ * second block. Let IPROC = PROC-SRCPROC-1+NPROCS be the number of pro-
+ * cesses between this process and  PROC  not  included  when going from
+ * left to right on the process line  with  possible wrap around.  These
+ * IPROC  processes have one more NB block than the other processes, who
+ * own IL / NB blocks of size NB.
+ */
+      return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1+NPROCS )+IL+INB );
+   }
+   else
+   {
+/*
+ * Same reasoning as above with IPROC = PROC - SRCPROC - 1.
+ */
+      return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1        )+IL+INB );
+   }
+/*
+ * End of HPL_indxl2g
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_infog2l.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_infog2l.c
new file mode 100644
index 000000000..2580f2ad4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_infog2l.c
@@ -0,0 +1,382 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_infog2l
+(
+   int                              I,
+   int                              J,
+   const int                        IMB,
+   const int                        MB,
+   const int                        INB,
+   const int                        NB,
+   const int                        RSRC,
+   const int                        CSRC,
+   const int                        MYROW,
+   const int                        MYCOL,
+   const int                        NPROW,
+   const int                        NPCOL,
+   int *                            II,
+   int *                            JJ,
+   int *                            PROW,
+   int *                            PCOL
+)
+#else
+void HPL_infog2l
+( I, J, IMB, MB, INB, NB, RSRC, CSRC, MYROW, MYCOL, NPROW, NPCOL, II, JJ, PROW, PCOL )
+   int                              I;
+   int                              J;
+   const int                        IMB;
+   const int                        MB;
+   const int                        INB;
+   const int                        NB;
+   const int                        RSRC;
+   const int                        CSRC;
+   const int                        MYROW;
+   const int                        MYCOL;
+   const int                        NPROW;
+   const int                        NPCOL;
+   int *                            II;
+   int *                            JJ;
+   int *                            PROW;
+   int *                            PCOL;
+#endif 
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_infog2l computes the starting local index II, JJ corresponding to
+ * the submatrix starting globally at the entry pointed by  I,  J.  This
+ * routine returns the coordinates in the grid of the process owning the
+ * matrix entry of global indexes I, J, namely PROW and PCOL.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                int
+ *         On entry,  I  specifies  the  global  row index of the matrix
+ *         entry. I must be at least zero.
+ *
+ * J       (global input)                int
+ *         On entry,  J  specifies the global column index of the matrix
+ *         entry. J must be at least zero.
+ *
+ * IMB     (global input)                const int
+ *         On entry,  IMB  specifies  the size of the first row block of
+ *         the global matrix. IMB must be at least one.
+ *
+ * MB      (global input)                const int
+ *         On entry,  MB specifies the blocking factor used to partition
+ *         and  distribute the rows of the matrix A.  MB  must be larger
+ *         than one.
+ *
+ * INB     (global input)                const int
+ *         On entry, INB specifies the size of the first column block of
+ *         the global matrix. INB must be at least one.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the columns of the matrix A. NB must be larger
+ *         than one.
+ *
+ * RSRC    (global input)                const int
+ *         On entry,  RSRC  specifies  the row coordinate of the process
+ *         that possesses the row  I.  RSRC  must  be at least zero  and
+ *         strictly less than NPROW.
+ *
+ * CSRC    (global input)                const int
+ *         On entry, CSRC specifies the column coordinate of the process
+ *         that possesses the column J. CSRC  must be at least zero  and
+ *         strictly less than NPCOL.
+ *
+ * MYROW   (local input)                 const int
+ *         On entry, MYROW  specifies my  row process  coordinate in the
+ *         grid. MYROW is greater than or equal  to zero  and  less than
+ *         NPROW.
+ *
+ * MYCOL   (local input)                 const int
+ *         On entry, MYCOL specifies my column process coordinate in the
+ *         grid. MYCOL is greater than or equal  to zero  and  less than
+ *         NPCOL.
+ *
+ * NPROW   (global input)                const int
+ *         On entry,  NPROW  specifies the number of process rows in the
+ *         grid. NPROW is at least one.
+ *
+ * NPCOL   (global input)                const int
+ *         On entry,  NPCOL  specifies  the number of process columns in
+ *         the grid. NPCOL is at least one.
+ *
+ * II      (local output)                int *
+ *         On exit, II  specifies the  local  starting  row index of the
+ *         submatrix. On exit, II is at least 0.
+ *
+ * JJ      (local output)                int *
+ *         On exit, JJ  specifies the local starting column index of the
+ *         submatrix. On exit, JJ is at least 0.
+ *
+ * PROW    (global output)               int *
+ *         On exit, PROW is the row coordinate of the process owning the
+ *         entry specified by the global index I.  PROW is at least zero
+ *         and less than NPROW.
+ *
+ * PCOL    (global output)               int *
+ *         On exit, PCOL  is the column coordinate of the process owning
+ *         the entry specified by the global index J.  PCOL  is at least
+ *         zero and less than NPCOL.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int            ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc;
+/* ..
+ * .. Executable Statements ..
+ */
+   imb   = IMB;
+   *PROW = RSRC;
+
+   if( ( *PROW == -1 ) || ( NPROW == 1 ) )
+   {
+/*
+ * The data is not distributed,  or there is just one process row in the
+ * grid.
+ */
+     *II = I;
+   }
+   else if( I < imb )
+   {
+/*
+ * I refers to an entry in the first block of rows
+ */
+     *II = ( MYROW == *PROW ? I : 0 );
+   }
+   else
+   {
+      mb   = MB;
+      rsrc = *PROW;
+/*
+ * The discussion goes as follows:  compute  my distance from the source
+ * process so that  within  this process coordinate system,  the  source
+ * process   is  the  process  such  that  mydist = 0,  or  equivalently
+ * MYROW == rsrc.
+ *
+ * Find  out  the global coordinate of the block I belongs to (nblocks),
+ * as well as the minimum local number of blocks that every process has.
+ *
+ * when mydist < nblocks-ilocblk*NPROCS,  I own ilocblk + 1 full blocks,
+ * when mydist > nblocks-ilocblk*NPROCS,  I own ilocblk     full blocks,
+ * when mydist = nblocks-ilocblk*NPROCS,  I own ilocblk     full blocks
+ * but not I, or I own ilocblk + 1 blocks and the entry I refers to.
+ */
+      if( MYROW == rsrc )
+      {
+/*
+ * I refers  to an entry  that is not in the first block, find out which
+ * process has it.
+ */
+         nblocks = ( I - imb ) / mb + 1;
+         *PROW  += nblocks;
+         *PROW  -= ( *PROW / NPROW ) * NPROW;
+/*
+ * Since  mydist = 0  and nblocks - ilocblk * NPROW >= 0, there are only
+ * three possible cases:
+ *
+ *   1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I do not own
+ *      I, in which case II = IMB + ( ilocblk - 1 ) * MB. Note that this
+ *      case  cannot  happen  when  ilocblk is zero, since nblocks is at
+ *      least one.
+ *
+ *   2) When  0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in
+ *      which  case  I  and  II  can  respectively  be  written as IMB + 
+ *      (nblocks-1)*NB + IL  and  IMB + (ilocblk-1) * MB + IL.  That  is
+ *      II = I + (ilocblk-nblocks)*MB. Note that this case cannot happen
+ *      when ilocblk is zero, since nblocks is at least one.
+ *
+ *   3) mydist = 0 < nblocks - ilocblk * NPROW,  the source process owns
+ *      ilocblk+1 full blocks,  and  therefore  II = IMB + ilocblk * MB.
+ *      Note that when ilocblk is zero, II is just IMB.
+ */
+         if( nblocks < NPROW )
+         {
+            *II = imb;
+         }
+         else
+         {
+            ilocblk = nblocks / NPROW;
+            if( ilocblk * NPROW >= nblocks )
+            {
+               *II = ( ( MYROW == *PROW ) ?
+                       I   + ( ilocblk - nblocks ) * mb :
+                       imb + ( ilocblk - 1       ) * mb );
+            }
+            else
+            {
+               *II =  imb + ilocblk * mb;
+            }
+         }
+      }
+      else
+      {
+/*
+ * I refers  to  an entry that is not in the first block, find out which
+ * process has it.
+ */
+         nblocks = ( I -= imb ) / mb + 1;
+         *PROW  += nblocks;
+         *PROW  -= ( *PROW / NPROW ) * NPROW;
+/*
+ * Compute  my distance from the source process so that within this pro-
+ * cess coordinate system,  the  source process is the process such that
+ * mydist=0.
+ */
+         if( ( mydist  = MYROW - rsrc ) < 0 ) mydist += NPROW;
+/*
+ * When mydist <  nblocks - ilocblk * NPROW, I own ilocblk+1 full blocks
+ * of size MB since I am not the source process, i.e. II=(ilocblk+1)*MB.
+ * When mydist>=nblocks-ilocblk*NPROW and I do not own I,  I own ilocblk
+ * full blocks of size MB, i.e. II = ilocblk*MB, otherwise I own ilocblk
+ * blocks and I,  in which case I can be written as IMB + (nblocks-1)*MB
+ * + IL and II = ilocblk*MB + IL = I - IMB + (ilocblk - nblocks + 1)*MB.
+ */
+         if( nblocks < NPROW )
+         {
+            mydist -= nblocks;
+            *II     = ( ( mydist < 0 ) ? mb :
+                        ( ( MYROW == *PROW ) ?
+                          I + ( 1 - nblocks ) * mb : 0 ) );
+         }
+         else
+         {
+            ilocblk = nblocks / NPROW;
+            mydist -= nblocks - ilocblk * NPROW;
+            *II     = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb :
+                        ( ( MYROW == *PROW ) ?
+                          ( ilocblk - nblocks + 1 ) * mb + I :
+                          ilocblk * mb ) );
+         }
+      }
+   }
+/*
+ * Idem for the columns
+ */
+   inb   = INB;
+   *PCOL = CSRC;
+
+   if( ( *PCOL == -1 ) || ( NPCOL == 1 ) )
+   {
+      *JJ = J;
+   }
+   else if( J < inb )
+   {
+      *JJ = ( MYCOL == *PCOL ? J : 0 );
+   }
+   else
+   {
+      nb   = NB;
+      csrc = *PCOL;
+
+      if( MYCOL == csrc )
+      {
+         nblocks = ( J - inb ) / nb + 1;
+         *PCOL  += nblocks;
+         *PCOL  -= ( *PCOL / NPCOL ) * NPCOL;
+
+         if( nblocks < NPCOL )
+         {
+            *JJ = inb;
+         }
+         else
+         {
+            ilocblk = nblocks / NPCOL;
+            if( ilocblk * NPCOL >= nblocks )
+            {
+               *JJ = ( ( MYCOL == *PCOL ) ?
+                       J   + ( ilocblk - nblocks ) * nb :
+                       inb + ( ilocblk - 1       ) * nb );
+            }
+            else
+            {
+               *JJ = inb + ilocblk * nb;
+            }
+         }
+      }
+      else
+      {
+         nblocks = ( J -= inb ) / nb + 1;
+         *PCOL  += nblocks;
+         *PCOL  -= ( *PCOL / NPCOL ) * NPCOL;
+
+         if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL;
+
+         if( nblocks < NPCOL )
+         {
+            mydist -= nblocks;
+            *JJ     = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ?
+                        J + ( 1 - nblocks )*nb : 0 ) );
+         }
+         else
+         {
+            ilocblk = nblocks / NPCOL;
+            mydist -= nblocks - ilocblk * NPCOL;
+            *JJ     = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb :
+                        ( ( MYCOL == *PCOL ) ?
+                          ( ilocblk - nblocks + 1 ) * nb + J :
+                          ilocblk * nb ) );
+         }
+      }
+   }
+/*
+ * End of HPL_infog2l
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_numroc.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_numroc.c
new file mode 100644
index 000000000..39cd736d3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_numroc.c
@@ -0,0 +1,120 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_numroc
+(
+   const int                        N,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_numroc
+( N, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        N;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_numroc returns  the  local number of matrix rows/columns process
+ * PROC  will  get  if  we give out  N rows/columns starting from global
+ * index 0.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies the number of rows/columns being dealt
+ *         out. N must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of the
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC specifies  the coordinate of the process whose
+ *         local portion is determined.  PROC must be at least zero  and
+ *         strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  process
+ *         that possesses the first row or column of the matrix. SRCPROC
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process rows
+ *         or columns over which the matrix is distributed.  NPROCS must
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   return( HPL_numrocI( N, 0, INB, NB, PROC, SRCPROC, NPROCS ) );
+/*
+ * End of HPL_numroc
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_numrocI.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_numrocI.c
new file mode 100644
index 000000000..70f3497de
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_numrocI.c
@@ -0,0 +1,243 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int HPL_numrocI
+(
+   const int                        N,
+   const int                        I,
+   const int                        INB,
+   const int                        NB,
+   const int                        PROC,
+   const int                        SRCPROC,
+   const int                        NPROCS
+)
+#else
+int HPL_numrocI
+( N, I, INB, NB, PROC, SRCPROC, NPROCS )
+   const int                        N;
+   const int                        I;
+   const int                        INB;
+   const int                        NB;
+   const int                        PROC;
+   const int                        SRCPROC;
+   const int                        NPROCS;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_numrocI returns  the  local number of matrix rows/columns process
+ * PROC  will  get  if  we give out  N rows/columns starting from global
+ * index I.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N  specifies the number of rows/columns being dealt
+ *         out. N must be at least zero.
+ *
+ * I       (input)                       const int
+ *         On entry, I  specifies the global index of the matrix  entry
+ *         I must be at least zero.
+ *
+ * INB     (input)                       const int
+ *         On entry,  INB  specifies  the size of the first block of th
+ *         global matrix. INB must be at least one.
+ *
+ * NB      (input)                       const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * PROC    (input)                       const int
+ *         On entry, PROC specifies  the coordinate of the process whos
+ *         local portion is determined.  PROC must be at least zero  an
+ *         strictly less than NPROCS.
+ *
+ * SRCPROC (input)                       const int
+ *         On entry,  SRCPROC  specifies  the coordinate of the  proces
+ *         that possesses the first row or column of the matrix. SRCPRO
+ *         must be at least zero and strictly less than NPROCS.
+ *
+ * NPROCS  (input)                       const int
+ *         On entry,  NPROCS  specifies the total number of process row
+ *         or columns over which the matrix is distributed.  NPROCS mus
+ *         be at least one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        ilocblk, inb, mydist, nblocks, srcproc;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
+/*
+ * The data is not distributed, or there is just one process in this di-
+ * mension of the grid.
+ */
+      return( N );
+/*
+ * Compute coordinate of process owning I and corresponding INB
+ */
+   srcproc = SRCPROC;
+
+   if( ( inb = INB - I ) <= 0 )
+   {
+/*
+ * I is not in the first block, find out which process has it and update
+ * the size of first block
+ */
+      srcproc += ( nblocks = (-inb) / NB + 1 ); 
+      srcproc -= ( srcproc / NPROCS ) * NPROCS;
+      inb     += nblocks * NB;
+   }
+/*
+ * Now  everything  is  just like  N, I=0, INB, NB, srcproc, NPROCS. The
+ * discussion goes as follows:  compute my distance from the source pro-
+ * cess  so that within this process coordinate system,  the source pro-
+ * cess is the process such that mydist = 0, or PROC == srcproc.
+ *
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries. Then remark that
+ *
+ * when  mydist < nblocks - ilocblk*NPROCS, I own ilocblk+1 full blocks,
+ * when  mydist > nblocks - ilocblk*NPROCS, I own ilocblk   full blocks,
+ * when  mydist = nblocks - ilocblk*NPROCS, either the last block is not
+ * full and I own it,  or the last block is full and I am the first pro-
+ * cess owning only ilocblk full blocks.
+ */
+   if( PROC == srcproc )
+   {
+/*
+ * I am the source process, i.e. I own I (mydist=0).  When N <= INB, the
+ * answer is simply N.
+ */
+      if( N <= inb ) return( N );
+/*
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries.
+ */
+      nblocks = ( N - inb ) / NB + 1;
+/*
+ * Since  mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only
+ * two possible cases:
+ *
+ *   1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS di-
+ *      vides the global number of full blocks,  then the source process
+ *      srcproc owns one more block than the other processes;  and N can
+ *      be rewritten as N = INB + (nblocks-1) * NB + LNB  with  LNB >= 0
+ *      size of the last block. Similarly, the local value Np correspon-
+ *      ding to N can be written as  Np = INB + (ilocblk-1) * NB + LNB =
+ *      N + ( ilocblk-1 - (nblocks-1) )*NB.  Note  that this case cannot
+ *      happen when ilocblk is zero, since nblocks is at least one.
+ *
+ *   2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only
+ *      owns full blocks,  and  therefore Np = INB + ilocblk * NB.  Note
+ *      that when ilocblk is zero, Np is just INB.
+ */
+      if( nblocks < NPROCS ) return( inb );
+ 
+      ilocblk = nblocks / NPROCS;
+      return( ( nblocks - ilocblk * NPROCS ) ? inb + ilocblk * NB :
+              N + ( ilocblk - nblocks ) * NB );
+   }
+   else
+   {
+/*
+ * I am not the source process. When N <= INB, the answer is simply 0.
+ */
+      if( N <= inb ) return( 0 );
+/*
+ * Find  out  how  many  full  blocks are globally (nblocks) and locally
+ * (ilocblk) in those N entries
+ */
+      nblocks = ( N - inb ) / NB + 1;
+/*
+ * Compute  my distance from the source process so that within this pro-
+ * cess coordinate system,  the source  process is the process such that
+ * mydist=0.
+ */
+      if( ( mydist = PROC - srcproc ) < 0 ) mydist += NPROCS;
+/*
+ * When mydist < nblocks - ilocblk*NPROCS, I own ilocblk + 1 full blocks
+ * of size NB since I am not the source process,
+ *
+ * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk   full blocks
+ * of size NB since I am not the source process,
+ *
+ * when mydist = nblocks - ilocblk*NPROCS,
+ * either the last block is not full and I own it, in which case
+ *    N = INB + (nblocks - 1)*NB + LNB with  LNB  the  size  of the last
+ *    block such that NB > LNB > 0;  the local value Np corresponding to
+ *    N is given by  Np = ilocblk*NB+LNB = N-INB+(ilocblk-nblocks+1)*NB;
+ * or the  last  block  is  full  and I am the first process owning only
+ *    ilocblk full blocks of size NB, that is N = INB+(nblocks-1)*NB and
+ *    Np = ilocblk * NB = N - INB + (ilocblk-nblocks+1) * NB.
+ */
+      if( nblocks < NPROCS )
+         return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 :
+                 N - inb + NB * ( 1 - nblocks ) ) );
+ 
+      ilocblk = nblocks / NPROCS;
+      mydist -= nblocks - ilocblk * NPROCS;
+      return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB :
+              ( ( mydist > 0 ) ? ilocblk * NB :
+                N - inb + NB * ( ilocblk - nblocks + 1 ) ) );
+   }
+/*
+ * End of HPL_numrocI
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pabort.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pabort.c
new file mode 100644
index 000000000..268975fc1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pabort.c
@@ -0,0 +1,137 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pabort
+(
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_pabort( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pabort displays an error message on stderr and halts execution.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   int                        rank;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   LINE   = va_arg( argptr, int      );
+   SRNAME = va_arg( argptr, char *   );
+   FORM   = va_arg( argptr, char *   );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( stderr, "%s %s %d, %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR", "from process #", rank, "in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( stderr,
+                   "%s %s %d, %s %d %s %s:\n>>> %s <<< Abort ...\n\n",
+                   "HPL ERROR", "from process #", rank, "on line", LINE,
+                   "of function", SRNAME, cline );
+
+   MPI_Abort( MPI_COMM_WORLD, -1 );
+   exit( -1 );
+/*
+ * End of HPL_pabort
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlamch.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlamch.c
new file mode 100644
index 000000000..73cf649da
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlamch.c
@@ -0,0 +1,143 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_pdlamch
+(
+   MPI_Comm                         COMM,
+   const HPL_T_MACH                 CMACH
+)
+#else
+double HPL_pdlamch
+( COMM, CMACH )
+   MPI_Comm                         COMM;
+   const HPL_T_MACH                 CMACH;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlamch determines  machine-specific  arithmetic  constants  such  as
+ * the relative machine precision (eps),  the safe minimum(sfmin) such that
+ * 1/sfmin does not overflow, the base of the machine (base), the precision
+ * (prec),  the  number  of  (base)  digits in the  mantissa  (t),  whether
+ * rounding occurs in addition (rnd = 1.0 and 0.0 otherwise),  the  minimum
+ * exponent before  (gradual)  underflow (emin),  the  underflow  threshold
+ * (rmin)- base**(emin-1), the largest exponent before overflow (emax), the
+ * overflow threshold (rmax)  - (base**emax)*(1-eps).
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)          MPI_Comm
+ *         The MPI communicator identifying the process collection.
+ *
+ * CMACH   (global input)                const HPL_T_MACH
+ *         Specifies the value to be returned by HPL_pdlamch            
+ *            = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+ *            = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+ *            = HPL_MACH_BASE,  HPL_pdlamch := base                     
+ *            = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+ *            = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+ *            = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+ *            = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+ *            = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+ *            = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+ *            = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+ *          
+ *         where                                                        
+ *          
+ *            eps   = relative machine precision,                       
+ *            sfmin = safe minimum,                                     
+ *            base  = base of the machine,                              
+ *            prec  = eps*base,                                         
+ *            t     = number of digits in the mantissa,                 
+ *            rnd   = 1.0 if rounding occurs in addition,               
+ *            emin  = minimum exponent before underflow,                
+ *            rmin  = underflow threshold,                              
+ *            emax  = largest exponent before overflow,                 
+ *            rmax  = overflow threshold.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     param;
+/* ..
+ * .. Executable Statements ..
+ */
+   param = HPL_dlamch( CMACH );
+
+   switch( CMACH )
+   {
+      case HPL_MACH_EPS   :
+      case HPL_MACH_SFMIN :
+      case HPL_MACH_EMIN  :
+      case HPL_MACH_RMIN  :
+         (void) HPL_all_reduce( (void *)(&param), 1, HPL_DOUBLE,
+                                HPL_max, COMM );
+         break;
+      case HPL_MACH_EMAX  :
+      case HPL_MACH_RMAX  :
+         (void) HPL_all_reduce( (void *)(&param), 1, HPL_DOUBLE,
+                                HPL_min, COMM );
+         break;
+      default             :
+         break;
+   } 
+
+   return( param );
+/*
+ * End of HPL_pdlamch
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlange.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlange.c
new file mode 100644
index 000000000..40bdcc36b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlange.c
@@ -0,0 +1,242 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_pdlange
+(
+   const HPL_T_grid *               GRID,
+   const HPL_T_NORM                 NORM,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   const double *                   A,
+   const int                        LDA
+)
+#else
+double HPL_pdlange
+( GRID, NORM, M, N, NB, A, LDA )
+   const HPL_T_grid *               GRID;
+   const HPL_T_NORM                 NORM;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   const double *                   A;
+   const int                        LDA;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlange returns  the value of the one norm,  or the infinity norm,
+ * or the element of largest absolute value of a distributed matrix A:  
+ *  
+ *  
+ *    max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+ *    norm1(A),        when NORM = HPL_NORM_1,                          
+ *    normI(A),        when NORM = HPL_NORM_I,                          
+ *  
+ * where norm1 denotes the one norm of a matrix (maximum column sum) and
+ * normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+ * that max(abs(A(i,j))) is not a matrix norm.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * NORM    (global input)                const HPL_T_NORM
+ *         On entry,  NORM  specifies  the  value to be returned by this
+ *         function as described above.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * A       (local input)                 const double *
+ *         On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+ *         that contains the local pieces of the distributed matrix A.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     s, v0=HPL_rzero, * work = NULL;
+   MPI_Comm                   Acomm, Ccomm, Rcomm;
+   int                        ii, jj, mp, mycol, myrow, npcol, nprow,
+                              nq;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Rcomm = GRID->row_comm; Ccomm = GRID->col_comm;
+   Acomm = GRID->all_comm;
+
+   Mnumroc( mp, M, NB, NB, myrow, 0, nprow );
+   Mnumroc( nq, N, NB, NB, mycol, 0, npcol );
+
+   if( Mmin( M, N ) == 0 ) { return( v0 ); }
+   else if( NORM == HPL_NORM_A )
+   {
+/*
+ * max( abs( A ) )
+ */
+      if( ( nq > 0 ) && ( mp > 0 ) )
+      {
+         for( jj = 0; jj < nq; jj++ )
+         {
+            for( ii = 0; ii < mp; ii++ )
+            { v0 = Mmax( v0, Mabs( *A ) ); A++; }
+            A += LDA - mp;
+         }
+      }
+      (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0,
+                         Acomm );
+   }
+   else if( NORM == HPL_NORM_1 )
+   {
+/*
+ * Find norm_1( A ).
+ */
+      if( nq > 0 )
+      {
+         work = (double*)malloc( (size_t)(nq) * sizeof( double ) );
+         if( work == NULL )
+         { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); }
+
+         for( jj = 0; jj < nq; jj++ )
+         {
+            s = HPL_rzero;
+            for( ii = 0; ii < mp; ii++ ) { s += Mabs( *A ); A++; }
+            work[jj] = s; A += LDA - mp;
+         }
+/*
+ * Find sum of global matrix columns, store on row 0 of process grid
+ */
+         (void) HPL_reduce( (void *)(work), nq, HPL_DOUBLE, HPL_sum,
+                            0, Ccomm );
+/*
+ * Find maximum sum of columns for 1-norm
+ */
+         if( myrow == 0 )
+         { v0 = work[HPL_idamax( nq, work, 1 )]; v0 = Mabs( v0 ); }
+         if( work ) free( work );
+      }
+/*
+ * Find max in row 0, store result in process (0,0)
+ */
+      if( myrow == 0 )
+         (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0,
+                            Rcomm );
+   }
+   else if( NORM == HPL_NORM_I )
+   {
+/*
+ * Find norm_inf( A )
+ */
+      if( mp > 0 )
+      {
+         work = (double*)malloc( (size_t)(mp) * sizeof( double ) );
+         if( work == NULL )
+         { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); }
+
+         for( ii = 0; ii < mp; ii++ ) { work[ii] = HPL_rzero; }
+
+         for( jj = 0; jj < nq; jj++ )
+         {
+            for( ii = 0; ii < mp; ii++ )
+            { work[ii] += Mabs( *A ); A++; }
+            A += LDA - mp;
+         }
+/*       
+ * Find sum of global matrix rows, store on column 0 of process grid
+ */      
+         (void) HPL_reduce( (void *)(work), mp, HPL_DOUBLE, HPL_sum,
+                            0, Rcomm );
+/*       
+ * Find maximum sum of rows for inf-norm
+ */      
+         if( mycol == 0 )
+         { v0 = work[HPL_idamax( mp, work, 1 )]; v0 = Mabs( v0 ); }
+         if( work ) free( work );
+      }
+/*
+ * Find max in column 0, store result in process (0,0)
+ */
+      if( mycol == 0 )
+         (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max,
+                            0, Ccomm );
+   }
+/*
+ * Broadcast answer to every process in the grid
+ */
+   (void) HPL_broadcast( (void *)(&v0), 1, HPL_DOUBLE, 0, Acomm );
+
+   return( v0 );
+/*
+ * End of HPL_pdlange
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlaprnt.c
new file mode 100644
index 000000000..f32667cf3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlaprnt.c
@@ -0,0 +1,191 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaprnt
+(
+   const HPL_T_grid *               GRID,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   double *                         A,
+   const int                        LDA,
+   const int                        IAROW,
+   const int                        IACOL,
+   const char *                     CMATNM
+)
+#else
+void HPL_pdlaprnt
+( GRID, M, N, NB, A, LDA, IAROW, IACOL, CMATNM )
+   const HPL_T_grid *               GRID;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   double *                         A;
+   const int                        LDA;
+   const int                        IAROW;
+   const int                        IACOL;
+   const char *                     CMATNM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaprnt prints  to  standard  error a distributed matrix A. The
+ * local pieces of  A  are sent to the process of coordinates  (0,0)  in
+ * the grid and then printed.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies the number of rows of the coefficient
+ *         matrix A. M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On  entry,   N   specifies  the  number  of  columns  of  the
+ *         coefficient matrix A. N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix. NB must be larger than one.
+ *
+ * A       (local input)                 double *
+ *         On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+ *         This array contains the coefficient matrix to be printed.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * IAROW   (global input)                const int
+ *         On entry,  IAROW  specifies the row process coordinate owning
+ *         the  first row of A.  IAROW  must be  larger than or equal to
+ *         zero and less than NPROW.
+ *
+ * IACOL   (global input)                const int
+ *         On entry,  IACOL  specifies  the  column  process  coordinate
+ *         owning the  first column  of A. IACOL  must be larger than or
+ *         equal to zero and less than NPCOL.
+ *
+ * CMATNM  (global input)                const char *
+ *         On entry, CMATNM is the name of the matrix to be printed.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   Acomm;
+   double                     * buf = NULL;
+   int                        h, i, ib, icurcol=IACOL, icurrow=IAROW,
+                              ii=0, j, jb, jj=0, mycol, myrow, npcol,
+                              nprow, src;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Acomm = GRID->all_comm; 
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+      buf = (double*)malloc( (size_t)(NB) * sizeof( double ) );
+
+   for( j = 0; j < N; j += NB )
+   {
+      jb = N-j; jb = Mmin( jb, NB );
+      for( h = 0; h < jb; h++ )
+      {
+         (void) HPL_barrier( Acomm );
+
+         for( i = 0; i < M; i += NB )
+         {
+            ib = M-i; ib = Mmin( ib, NB );
+            if( ( icurrow == 0 ) && ( icurcol == 0 ) )
+            {
+               if( ( myrow == 0 ) && ( mycol == 0 ) )
+                  HPL_dlaprnt( ib, 1, Mptr( A, ii, jj+h, LDA ), i+1,
+                               j+h+1, LDA, CMATNM );
+            }
+            else
+            {
+               if( ( myrow == icurrow ) && ( mycol == icurcol ) )
+               {
+                  (void) HPL_send( Mptr( A, ii, jj+h, LDA ), ib, 0,
+                                   9000+(j+h)*M+i, Acomm );
+               }
+               else if( ( myrow == 0 ) && ( mycol == 0 ) )
+               {
+                  src = HPL_pnum( GRID, icurrow, icurcol );
+                  (void) HPL_recv( buf, ib, src, 9000+(j+h)*M+i,
+                                   Acomm );
+                  if (buf != NULL)
+                  	HPL_dlaprnt( ib, 1, buf, i+1, j+h+1, NB, CMATNM );
+               }
+            }
+            if( myrow == icurrow ) ii += ib;
+            icurrow = MModAdd1( icurrow, nprow );
+            (void) HPL_barrier( Acomm );
+         }
+         ii = 0; icurrow = IAROW;
+      }
+      if( mycol == icurcol ) jj += jb;
+      icurcol = MModAdd1( icurcol, npcol );
+      (void) HPL_barrier( Acomm );
+   }
+   if( ( myrow == 0 ) && ( mycol == 0 ) && ( buf ) ) free( buf );
+/*
+ * End of HPL_pdlaprnt
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pwarn.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pwarn.c
new file mode 100644
index 000000000..a9f666f89
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pwarn.c
@@ -0,0 +1,139 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pwarn
+(
+   FILE *                           STREAM,
+   int                              LINE,
+   const char *                     SRNAME,
+   const char *                     FORM,
+   ...                              
+)
+#else
+void HPL_pwarn( va_alist )
+va_dcl
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pwarn displays an error message.
+ * 
+ *
+ * Arguments
+ * =========
+ *
+ * STREAM  (local input)                 FILE *
+ *         On entry, STREAM specifies the output stream.
+ *
+ * LINE    (local input)                 int
+ *         On entry,  LINE  specifies the line  number in the file where
+ *         the  error  has  occured.  When  LINE  is not a positive line
+ *         number, it is ignored.
+ *
+ * SRNAME  (local input)                 const char *
+ *         On entry, SRNAME  should  be the name of the routine  calling
+ *         this error handler.
+ *
+ * FORM    (local input)                 const char *
+ *         On entry, FORM specifies the format, i.e., how the subsequent
+ *         arguments are converted for output.
+ *
+ *         (local input)                 ...
+ *         On entry,  ...  is the list of arguments to be printed within
+ *         the format string.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   va_list                    argptr;
+   int                        rank;
+   char                       cline[128];
+#ifndef STDC_HEADERS
+   FILE                       * STREAM;
+   int                        LINE;
+   char                       * FORM, * SRNAME;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef STDC_HEADERS
+   va_start( argptr, FORM );
+#else
+   va_start( argptr );
+   STREAM = va_arg( argptr, FILE * );
+   LINE   = va_arg( argptr, int    );
+   SRNAME = va_arg( argptr, char * );
+   FORM   = va_arg( argptr, char * );
+#endif
+   (void) vsprintf( cline, FORM, argptr );
+   va_end( argptr ); 
+
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+/*
+ * Display an error message
+ */
+   if( LINE <= 0 )
+      HPL_fprintf( STREAM, "%s %s %d, %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR", "from process #", rank, "in function",
+                   SRNAME, cline );
+   else
+      HPL_fprintf( STREAM, "%s %s %d, %s %d %s %s:\n>>> %s <<<\n\n",
+                   "HPL ERROR", "from process #", rank, "on line", LINE,
+                   "of function", SRNAME, cline );
+/*
+ * End of HPL_pwarn
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/Make.inc
new file mode 120000
index 000000000..ae55370b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kate/hip/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/Makefile
new file mode 100644
index 000000000..ea93cd150
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/Makefile
@@ -0,0 +1,137 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h   $(INCdir)/hpl_pauxil.h
+#
+## Object files ########################################################
+#
+HPL_pauobj       = \
+   HPL_indxg2l.o          HPL_indxg2lp.o         HPL_indxg2p.o          \
+   HPL_indxl2g.o          HPL_infog2l.o          HPL_numroc.o           \
+   HPL_numrocI.o          HPL_dlaswp00N.o        HPL_dlaswp10N.o        \
+   HPL_dlaswp01N.o        HPL_dlaswp01T.o        HPL_dlaswp02N.o        \
+   HPL_dlaswp03N.o        HPL_dlaswp03T.o        HPL_dlaswp04N.o        \
+   HPL_dlaswp04T.o        HPL_dlaswp05N.o        HPL_dlaswp05T.o        \
+   HPL_dlaswp06N.o        HPL_dlaswp06T.o        HPL_pwarn.o            \
+   HPL_pabort.o           HPL_pdlaprnt.o         HPL_pdlamch.o          \
+   HPL_pdlange.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pauobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pauobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_indxg2l.o          : ../HPL_indxg2l.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2l.c
+HPL_indxg2lp.o         : ../HPL_indxg2lp.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2lp.c
+HPL_indxg2p.o          : ../HPL_indxg2p.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxg2p.c
+HPL_indxl2g.o          : ../HPL_indxl2g.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_indxl2g.c
+HPL_infog2l.o          : ../HPL_infog2l.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_infog2l.c
+HPL_numroc.o           : ../HPL_numroc.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_numroc.c
+HPL_numrocI.o          : ../HPL_numrocI.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_numrocI.c
+HPL_dlaswp00N.o        : ../HPL_dlaswp00N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp00N.c
+HPL_dlaswp10N.o        : ../HPL_dlaswp10N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp10N.c
+HPL_dlaswp01N.o        : ../HPL_dlaswp01N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp01N.c
+HPL_dlaswp01T.o        : ../HPL_dlaswp01T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp01T.c
+HPL_dlaswp02N.o        : ../HPL_dlaswp02N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp02N.c
+HPL_dlaswp03N.o        : ../HPL_dlaswp03N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp03N.c
+HPL_dlaswp03T.o        : ../HPL_dlaswp03T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp03T.c
+HPL_dlaswp04N.o        : ../HPL_dlaswp04N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp04N.c
+HPL_dlaswp04T.o        : ../HPL_dlaswp04T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp04T.c
+HPL_dlaswp05N.o        : ../HPL_dlaswp05N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp05N.c
+HPL_dlaswp05T.o        : ../HPL_dlaswp05T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp05T.c
+HPL_dlaswp06N.o        : ../HPL_dlaswp06N.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp06N.c
+HPL_dlaswp06T.o        : ../HPL_dlaswp06T.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlaswp06T.c
+HPL_pwarn.o            : ../HPL_pwarn.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pwarn.c
+HPL_pabort.o           : ../HPL_pabort.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pabort.c
+HPL_pdlaprnt.o         : ../HPL_pdlaprnt.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaprnt.c
+HPL_pdlamch.o          : ../HPL_pdlamch.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlamch.c
+HPL_pdlange.o          : ../HPL_pdlange.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlange.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocmax.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocmax.c
new file mode 100644
index 000000000..644641412
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocmax.c
@@ -0,0 +1,149 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dlocmax
+(
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocmax
+( PANEL, N, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocmax finds  the maximum entry in the current column  and packs
+ * the useful information in  WORK[0:3].  On exit,  WORK[0] contains the
+ * local maximum  absolute value  scalar,  WORK[1] is the  corresponding
+ * local row index,  WORK[2]  is the corresponding global row index, and
+ * WORK[3] is the coordinate of the process owning this max.  When N  is
+ * less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set
+ * to the total number of process rows.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of rows of the column
+ *         of A on which we operate.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is  a workarray of size at least 4.  On exit,
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A;
+   int                        kk, igindx, ilindx, myrow, nb, nprow;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N > 0 )
+   {
+      A      = Mptr( PANEL->A, II, JJ, PANEL->lda );
+      myrow  = PANEL->grid->myrow;
+      nprow  = PANEL->grid->nprow;
+      nb     = PANEL->nb;
+      kk     = PANEL->ii + II + ( ilindx = HPL_idamax( N, A, 1 ) );
+      Mindxl2g( igindx, kk, nb, nb, myrow, 0, nprow );
+/*
+ * WORK[0] := local maximum absolute value scalar,
+ * WORK[1] := corresponding local  row index,
+ * WORK[2] := corresponding global row index,
+ * WORK[3] := coordinate of process owning this max.
+ */
+      WORK[0] = A[ilindx];         WORK[1] = (double)(ilindx);
+      WORK[2] = (double)(igindx);  WORK[3] = (double)(myrow);
+   }
+   else
+   {
+/*
+ * If I do not have any row of A, then set the coordinate of the process
+ * (WORK[3]) owning this "ghost" row,  such that it  will never be used,
+ * even if there are only zeros in the current column of A.
+ */
+      WORK[0] = WORK[1] = WORK[2] = HPL_rzero;
+      WORK[3] = (double)(PANEL->grid->nprow);
+   }
+/*
+ * End of HPL_dlocmax
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocswpN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocswpN.c
new file mode 100644
index 000000000..a3919500a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocswpN.c
@@ -0,0 +1,436 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LOCSWP_DEPTH
+#define    HPL_LOCSWP_DEPTH        32
+#define    HPL_LOCSWP_LOG2_DEPTH    5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlocswpN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocswpN
+( PANEL, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocswpN performs  the local swapping operations  within a panel.
+ * The lower triangular  N0-by-N0  upper block of the panel is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.  The N0 length max
+ *         row is stored in WORK[4:4+N0-1];  Note  that this is also the
+ *         JJth row  (or column) of L1. The remaining part of this array
+ *         is used as workspace.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax;
+   double                     * A1, * A2, * L, * Wr0, * Wmx;
+   int                        ilindx, lda, myrow, n0, nr, nu;
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda;
+
+   Wr0   = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0];
+   nu    = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) 
+                  << HPL_LOCSWP_LOG2_DEPTH );
+   nr    = n0 - nu;
+/*
+ * Replicated swap and copy of the current (new) row of A into L1
+ */
+   L  = Mptr( PANEL->L1, JJ, 0, n0  );
+/*
+ * If the pivot is non-zero ...
+ */
+   if( gmax != HPL_rzero )
+   {
+/*
+ * and if I own the current row of A ...
+ */
+      if( myrow == PANEL->prow )
+      {
+/*
+ * and if I also own the row to be swapped with the current row of A ...
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+/*
+ * and if the current row of A is not to swapped with itself ...
+ */
+            if( ( ilindx = (int)(WORK[1]) ) != 0 )
+            {
+/*
+ * then copy the max row into L1 and locally swap the 2 rows of A.
+ */
+               A1 = Mptr( PANEL->A,  II,     0, lda );
+               A2 = Mptr( A1,        ilindx, 0, lda );
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH )
+               {
+                  *L=*A1=Wmx[ 0]; *A2=Wr0[ 0]; L+=n0; A1+=lda; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  *L=*A1=Wmx[ 1]; *A2=Wr0[ 1]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  *L=*A1=Wmx[ 2]; *A2=Wr0[ 2]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 3]; *A2=Wr0[ 3]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  *L=*A1=Wmx[ 4]; *A2=Wr0[ 4]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 5]; *A2=Wr0[ 5]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 6]; *A2=Wr0[ 6]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 7]; *A2=Wr0[ 7]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  *L=*A1=Wmx[ 8]; *A2=Wr0[ 8]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[ 9]; *A2=Wr0[ 9]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[10]; *A2=Wr0[10]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[11]; *A2=Wr0[11]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[12]; *A2=Wr0[12]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[13]; *A2=Wr0[13]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[14]; *A2=Wr0[14]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[15]; *A2=Wr0[15]; L+=n0; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  *L=*A1=Wmx[16]; *A2=Wr0[16]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[17]; *A2=Wr0[17]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[18]; *A2=Wr0[18]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[19]; *A2=Wr0[19]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[20]; *A2=Wr0[20]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[21]; *A2=Wr0[21]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[22]; *A2=Wr0[22]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[23]; *A2=Wr0[23]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[24]; *A2=Wr0[24]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[25]; *A2=Wr0[25]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[26]; *A2=Wr0[26]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[27]; *A2=Wr0[27]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[28]; *A2=Wr0[28]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[29]; *A2=Wr0[29]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[30]; *A2=Wr0[30]; L+=n0; A1+=lda; A2+=lda;
+                  *L=*A1=Wmx[31]; *A2=Wr0[31]; L+=n0; A1+=lda; A2+=lda;
+#endif
+               }
+               for( i = 0; i < nr; i++, L += n0, A1 += lda, A2 += lda )
+               { *L = *A1 = Wmx[i]; *A2 = Wr0[i]; }
+            }
+            else
+            {
+/*
+ * otherwise the current row of  A  is swapped with itself, so just copy
+ * the current of A into L1.
+ */
+               *Mptr( PANEL->A, II, JJ, lda ) = gmax;
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH )
+               {
+                  *L = Wmx[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  *L = Wmx[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0;
+                  *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0;
+                  *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0;
+                  *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0;
+                  *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0;
+                  *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0;
+                  *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0;
+                  *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0;
+                  *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0;
+                  *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0;
+                  *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0;
+                  *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0;
+#endif
+               }
+               for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; }
+            }
+         }
+         else
+         {
+/*
+ * otherwise, the row to be swapped with the current row of A is in Wmx,
+ * so copy Wmx into L1 and A.
+ */
+            A1 = Mptr( PANEL->A,  II, 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wmx += HPL_LOCSWP_DEPTH )
+            {
+               *L = *A1 = Wmx[ 0]; L += n0; A1 += lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *L = *A1 = Wmx[ 1]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *L = *A1 = Wmx[ 2]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 3]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *L = *A1 = Wmx[ 4]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 5]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 6]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 7]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *L = *A1 = Wmx[ 8]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[ 9]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[10]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[11]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[12]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[13]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[14]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[15]; L += n0; A1 += lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *L = *A1 = Wmx[16]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[17]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[18]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[19]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[20]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[21]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[22]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[23]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[24]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[25]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[26]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[27]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[28]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[29]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[30]; L += n0; A1 += lda;
+               *L = *A1 = Wmx[31]; L += n0; A1 += lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, L += n0, A1 += lda )
+            { *L = *A1 = Wmx[i]; }
+         }
+      }
+      else
+      {
+/*
+ * otherwise I do not own the current row of A, so copy the max row  Wmx
+ * into L1.
+ */
+         for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+              Wmx += HPL_LOCSWP_DEPTH )
+         {
+            *L = Wmx[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+            *L = Wmx[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+            *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+            *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0;
+            *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+            *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0;
+            *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0;
+            *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0;
+            *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+            *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0;
+            *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0;
+            *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0;
+            *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0;
+            *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0;
+            *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0;
+            *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0;
+            *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0;
+#endif
+         }
+         for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; }
+/*
+ * and if I own the max row, overwrite it with the current row Wr0.
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+            A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wr0 += HPL_LOCSWP_DEPTH )
+            {
+               *A2 = Wr0[ 0]; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *A2 = Wr0[ 1]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda;
+               *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda;
+               *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda;
+               *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda;
+               *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda;
+               *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda;
+               *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda;
+               *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda;
+               *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda;
+               *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda;
+               *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda;
+               *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; }
+         }
+      }
+   }
+   else
+   {
+/*
+ * Otherwise the max element in the current column is zero,  simply copy
+ * the current row Wr0 into L1. The matrix is singular.
+ */
+      for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+           Wr0 += HPL_LOCSWP_DEPTH )
+      {
+         *L = Wr0[ 0]; L+=n0;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+         *L = Wr0[ 1]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+         *L = Wr0[ 2]; L+=n0; *L = Wr0[ 3]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+         *L = Wr0[ 4]; L+=n0; *L = Wr0[ 5]; L+=n0;
+         *L = Wr0[ 6]; L+=n0; *L = Wr0[ 7]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+         *L = Wr0[ 8]; L+=n0; *L = Wr0[ 9]; L+=n0;
+         *L = Wr0[10]; L+=n0; *L = Wr0[11]; L+=n0;
+         *L = Wr0[12]; L+=n0; *L = Wr0[13]; L+=n0;
+         *L = Wr0[14]; L+=n0; *L = Wr0[15]; L+=n0;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+         *L = Wr0[16]; L+=n0; *L = Wr0[17]; L+=n0;
+         *L = Wr0[18]; L+=n0; *L = Wr0[19]; L+=n0;
+         *L = Wr0[20]; L+=n0; *L = Wr0[21]; L+=n0;
+         *L = Wr0[22]; L+=n0; *L = Wr0[23]; L+=n0;
+         *L = Wr0[24]; L+=n0; *L = Wr0[25]; L+=n0;
+         *L = Wr0[26]; L+=n0; *L = Wr0[27]; L+=n0;
+         *L = Wr0[28]; L+=n0; *L = Wr0[29]; L+=n0;
+         *L = Wr0[30]; L+=n0; *L = Wr0[31]; L+=n0;
+#endif
+      }
+
+      for( i = 0; i < nr; i++, L += n0 ) { *L = Wr0[i]; }
+/*
+ * set INFO.
+ */
+      if( *(PANEL->DINFO) == 0.0 )
+         *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1);
+   }
+/*
+ * End of HPL_dlocswpN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocswpT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocswpT.c
new file mode 100644
index 000000000..89b86e35a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocswpT.c
@@ -0,0 +1,406 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * Define default value for unrolling factor
+ */
+#ifndef HPL_LOCSWP_DEPTH
+#define    HPL_LOCSWP_DEPTH        32
+#define    HPL_LOCSWP_LOG2_DEPTH    5
+#endif
+
+#ifdef STDC_HEADERS
+void HPL_dlocswpT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_dlocswpT
+( PANEL, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dlocswpT performs  the local swapping operations  within a panel.
+ * The lower triangular  N0-by-N0  upper block of the panel is stored in
+ * transpose form.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         WORK[0] contains  the  local  maximum  absolute value scalar,
+ *         WORK[1] contains  the corresponding local row index,  WORK[2]
+ *         contains the corresponding global row index, and  WORK[3]  is
+ *         the coordinate of process owning this max.  The N0 length max
+ *         row is stored in WORK[4:4+N0-1];  Note  that this is also the
+ *         JJth row  (or column) of L1. The remaining part of this array
+ *         is used as workspace.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax;
+   double                     * A1, * A2, * L, * Wr0, * Wmx;
+   int                        ilindx, lda, myrow, n0, nr, nu;
+   register int               i;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda;
+
+   Wr0   = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0];
+   nu    = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) 
+                  << HPL_LOCSWP_LOG2_DEPTH );
+   nr    = n0 - nu;
+/*
+ * Replicated swap and copy of the current (new) row of A into L1
+ */
+   L  = Mptr( PANEL->L1, 0, JJ, n0  );
+/*
+ * If the pivot is non-zero ...
+ */
+   if( gmax != HPL_rzero )
+   {
+/*
+ * and if I own the current row of A ...
+ */
+      if( myrow == PANEL->prow )
+      {
+/*
+ * and if I also own the row to be swapped with the current row of A ...
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+/*
+ * and if the current row of A is not to swapped with itself ...
+ */
+            if( ( ilindx = (int)(WORK[1]) ) != 0 )
+            {
+/*
+ * then copy the max row into L1 and locally swap the 2 rows of A.
+ */
+               A1 = Mptr( PANEL->A, II,     0, lda );
+               A2 = Mptr( A1,       ilindx, 0, lda );
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH,
+                    L   += HPL_LOCSWP_DEPTH )
+               {
+                  L[ 0]=*A1=Wmx[ 0]; *A2=Wr0[ 0]; A1+=lda; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  L[ 1]=*A1=Wmx[ 1]; *A2=Wr0[ 1]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  L[ 2]=*A1=Wmx[ 2]; *A2=Wr0[ 2]; A1+=lda; A2+=lda;
+                  L[ 3]=*A1=Wmx[ 3]; *A2=Wr0[ 3]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  L[ 4]=*A1=Wmx[ 4]; *A2=Wr0[ 4]; A1+=lda; A2+=lda;
+                  L[ 5]=*A1=Wmx[ 5]; *A2=Wr0[ 5]; A1+=lda; A2+=lda;
+                  L[ 6]=*A1=Wmx[ 6]; *A2=Wr0[ 6]; A1+=lda; A2+=lda;
+                  L[ 7]=*A1=Wmx[ 7]; *A2=Wr0[ 7]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  L[ 8]=*A1=Wmx[ 8]; *A2=Wr0[ 8]; A1+=lda; A2+=lda;
+                  L[ 9]=*A1=Wmx[ 9]; *A2=Wr0[ 9]; A1+=lda; A2+=lda;
+                  L[10]=*A1=Wmx[10]; *A2=Wr0[10]; A1+=lda; A2+=lda;
+                  L[11]=*A1=Wmx[11]; *A2=Wr0[11]; A1+=lda; A2+=lda;
+                  L[12]=*A1=Wmx[12]; *A2=Wr0[12]; A1+=lda; A2+=lda;
+                  L[13]=*A1=Wmx[13]; *A2=Wr0[13]; A1+=lda; A2+=lda;
+                  L[14]=*A1=Wmx[14]; *A2=Wr0[14]; A1+=lda; A2+=lda;
+                  L[15]=*A1=Wmx[15]; *A2=Wr0[15]; A1+=lda; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  L[16]=*A1=Wmx[16]; *A2=Wr0[16]; A1+=lda; A2+=lda;
+                  L[17]=*A1=Wmx[17]; *A2=Wr0[17]; A1+=lda; A2+=lda;
+                  L[18]=*A1=Wmx[18]; *A2=Wr0[18]; A1+=lda; A2+=lda;
+                  L[19]=*A1=Wmx[19]; *A2=Wr0[19]; A1+=lda; A2+=lda;
+                  L[20]=*A1=Wmx[20]; *A2=Wr0[20]; A1+=lda; A2+=lda;
+                  L[21]=*A1=Wmx[21]; *A2=Wr0[21]; A1+=lda; A2+=lda;
+                  L[22]=*A1=Wmx[22]; *A2=Wr0[22]; A1+=lda; A2+=lda;
+                  L[23]=*A1=Wmx[23]; *A2=Wr0[23]; A1+=lda; A2+=lda;
+                  L[24]=*A1=Wmx[24]; *A2=Wr0[24]; A1+=lda; A2+=lda;
+                  L[25]=*A1=Wmx[25]; *A2=Wr0[25]; A1+=lda; A2+=lda;
+                  L[26]=*A1=Wmx[26]; *A2=Wr0[26]; A1+=lda; A2+=lda;
+                  L[27]=*A1=Wmx[27]; *A2=Wr0[27]; A1+=lda; A2+=lda;
+                  L[28]=*A1=Wmx[28]; *A2=Wr0[28]; A1+=lda; A2+=lda;
+                  L[29]=*A1=Wmx[29]; *A2=Wr0[29]; A1+=lda; A2+=lda;
+                  L[30]=*A1=Wmx[30]; *A2=Wr0[30]; A1+=lda; A2+=lda;
+                  L[31]=*A1=Wmx[31]; *A2=Wr0[31]; A1+=lda; A2+=lda;
+#endif
+               }
+
+               for( i = 0; i < nr; i++, A1 += lda, A2 += lda )
+               { L[i] = *A1 = Wmx[i]; *A2 = Wr0[i]; }
+            }
+            else
+            {
+/*
+ * otherwise the current row of  A  is swapped with itself, so just copy
+ * the current of A into L1.
+ */
+               *Mptr( PANEL->A, II, JJ, lda ) = gmax;
+
+               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                    Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+               {
+                  L[ 0]=Wmx[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+                  L[ 1]=Wmx[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+                  L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+                  L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5];
+                  L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+                  L[ 8]=Wmx[ 8]; L[12]=Wmx[12];
+                  L[ 9]=Wmx[ 9]; L[13]=Wmx[13];
+                  L[10]=Wmx[10]; L[14]=Wmx[14];
+                  L[11]=Wmx[11]; L[15]=Wmx[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+                  L[16]=Wmx[16]; L[20]=Wmx[20];
+                  L[17]=Wmx[17]; L[21]=Wmx[21];
+                  L[18]=Wmx[18]; L[22]=Wmx[22];
+                  L[19]=Wmx[19]; L[23]=Wmx[23];
+                  L[24]=Wmx[24]; L[28]=Wmx[28];
+                  L[25]=Wmx[25]; L[29]=Wmx[29];
+                  L[26]=Wmx[26]; L[30]=Wmx[30];
+                  L[27]=Wmx[27]; L[31]=Wmx[31];
+#endif
+               }
+               for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; }
+            }
+         }
+         else
+         {
+/*
+ * otherwise, the row to be swapped with the current row of A is in Wmx,
+ * so copy Wmx into L1 and A.
+ */
+            A1 = Mptr( PANEL->A, II, 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+            {
+               L[ 0]=*A1=Wmx[ 0]; A1+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               L[ 1]=*A1=Wmx[ 1]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               L[ 2]=*A1=Wmx[ 2]; A1+=lda; L[ 3]=*A1=Wmx[ 3]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               L[ 4]=*A1=Wmx[ 4]; A1+=lda; L[ 5]=*A1=Wmx[ 5]; A1+=lda;
+               L[ 6]=*A1=Wmx[ 6]; A1+=lda; L[ 7]=*A1=Wmx[ 7]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               L[ 8]=*A1=Wmx[ 8]; A1+=lda; L[ 9]=*A1=Wmx[ 9]; A1+=lda;
+               L[10]=*A1=Wmx[10]; A1+=lda; L[11]=*A1=Wmx[11]; A1+=lda;
+               L[12]=*A1=Wmx[12]; A1+=lda; L[13]=*A1=Wmx[13]; A1+=lda;
+               L[14]=*A1=Wmx[14]; A1+=lda; L[15]=*A1=Wmx[15]; A1+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               L[16]=*A1=Wmx[16]; A1+=lda; L[17]=*A1=Wmx[17]; A1+=lda;
+               L[18]=*A1=Wmx[18]; A1+=lda; L[19]=*A1=Wmx[19]; A1+=lda;
+               L[20]=*A1=Wmx[20]; A1+=lda; L[21]=*A1=Wmx[21]; A1+=lda;
+               L[22]=*A1=Wmx[22]; A1+=lda; L[23]=*A1=Wmx[23]; A1+=lda;
+               L[24]=*A1=Wmx[24]; A1+=lda; L[25]=*A1=Wmx[25]; A1+=lda;
+               L[26]=*A1=Wmx[26]; A1+=lda; L[27]=*A1=Wmx[27]; A1+=lda;
+               L[28]=*A1=Wmx[28]; A1+=lda; L[29]=*A1=Wmx[29]; A1+=lda;
+               L[30]=*A1=Wmx[30]; A1+=lda; L[31]=*A1=Wmx[31]; A1+=lda;
+#endif
+            }
+
+            for( i = 0; i < nr; i++, A1 += lda ) { L[i]=*A1=Wmx[i]; } 
+         }
+      }
+      else
+      {
+/*
+ * otherwise I do not own the current row of A, so copy the max row  Wmx
+ * into L1.
+ */
+         for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+              Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+         {
+            L[ 0]=Wmx[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+            L[ 1]=Wmx[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+            L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+            L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+            L[ 8]=Wmx[ 8]; L[ 9]=Wmx[ 9]; L[10]=Wmx[10]; L[11]=Wmx[11];
+            L[12]=Wmx[12]; L[13]=Wmx[13]; L[14]=Wmx[14]; L[15]=Wmx[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+            L[16]=Wmx[16]; L[17]=Wmx[17]; L[18]=Wmx[18]; L[19]=Wmx[19];
+            L[20]=Wmx[20]; L[21]=Wmx[21]; L[22]=Wmx[22]; L[23]=Wmx[23];
+            L[24]=Wmx[24]; L[25]=Wmx[25]; L[26]=Wmx[26]; L[27]=Wmx[27];
+            L[28]=Wmx[28]; L[29]=Wmx[29]; L[30]=Wmx[30]; L[31]=Wmx[31];
+#endif
+         }
+         for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; }
+/*
+ * and if I own the max row, overwrite it with the current row Wr0.
+ */
+         if( myrow == (int)(WORK[3]) )
+         {
+            A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda );
+
+            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+                 Wr0 += HPL_LOCSWP_DEPTH )
+            {
+               *A2 = Wr0[ 0]; A2+=lda;
+#if ( HPL_LOCSWP_DEPTH >  1 )
+               *A2 = Wr0[ 1]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+               *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+               *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda;
+               *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+               *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda;
+               *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda;
+               *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda;
+               *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda;
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+               *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda;
+               *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda;
+               *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda;
+               *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda;
+               *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda;
+               *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda;
+               *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda;
+               *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda;
+#endif
+            }
+            for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; }
+         }
+      }
+   }
+   else
+   {
+/*
+ * Otherwise the max element in the current column is zero,  simply copy
+ * the current row Wr0 into L1. The matrix is singular.
+ */
+      for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
+           Wr0 += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH )
+      {
+         L[ 0]=Wr0[ 0];
+#if ( HPL_LOCSWP_DEPTH >  1 )
+         L[ 1]=Wr0[ 1];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  2 )
+         L[ 2]=Wr0[ 2]; L[ 3]=Wr0[ 3];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  4 )
+         L[ 4]=Wr0[ 4]; L[ 5]=Wr0[ 5]; L[ 6]=Wr0[ 6]; L[ 7]=Wr0[ 7];
+#endif
+#if ( HPL_LOCSWP_DEPTH >  8 )
+         L[ 8]=Wr0[ 8]; L[12]=Wr0[12]; L[ 9]=Wr0[ 9]; L[13]=Wr0[13];
+         L[10]=Wr0[10]; L[14]=Wr0[14]; L[11]=Wr0[11]; L[15]=Wr0[15];
+#endif
+#if ( HPL_LOCSWP_DEPTH > 16 )
+         L[16]=Wr0[16]; L[20]=Wr0[20]; L[17]=Wr0[17]; L[21]=Wr0[21];
+         L[18]=Wr0[18]; L[22]=Wr0[22]; L[19]=Wr0[19]; L[23]=Wr0[23];
+         L[24]=Wr0[24]; L[28]=Wr0[28]; L[25]=Wr0[25]; L[29]=Wr0[29];
+         L[26]=Wr0[26]; L[30]=Wr0[30]; L[27]=Wr0[27]; L[31]=Wr0[31];
+#endif
+      }
+      for( i = 0; i < nr; i++ ) { L[i] = Wr0[i]; }
+/*
+ * Set INFO.
+ */
+      if( *(PANEL->DINFO) == 0.0 )
+         *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1);
+   }
+/*
+ * End of HPL_dlocswpT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdfact.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdfact.c
new file mode 100644
index 000000000..1d99c6e14
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdfact.c
@@ -0,0 +1,141 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdfact
+(
+   HPL_T_panel *                    PANEL
+)
+#else
+void HPL_pdfact
+( PANEL )
+   HPL_T_panel *                    PANEL;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdfact recursively factorizes a  1-dimensional  panel of columns.
+ * The  RPFACT  function pointer specifies the recursive algorithm to be
+ * used, either Crout, Left- or Right looking.  NBMIN allows to vary the
+ * recursive stopping criterium in terms of the number of columns in the
+ * panel, and  NDIV allows to specify the number of subpanels each panel
+ * should be divided into. Usuallly a value of 2 will be chosen. Finally
+ * PFACT is a function pointer specifying the non-recursive algorithm to
+ * to be used on at most NBMIN columns. One can also choose here between
+ * Crout, Left- or Right looking.  Empirical tests seem to indicate that
+ * values of 4 or 8 for NBMIN give the best results.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   void                       * vptr = NULL;
+   int                        align, jb;
+/* ..
+ * .. Executable Statements ..
+ */
+   jb = PANEL->jb; PANEL->n -= jb; PANEL->ja += jb;
+
+   if( ( PANEL->grid->mycol != PANEL->pcol ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_RPFACT );
+#endif
+   align = PANEL->algo->align;
+   vptr  = (void *)malloc( ( (size_t)(align) + 
+              (size_t)(((4+((unsigned int)(jb) << 1)) << 1) )) *
+              sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdfact", "Memory allocation failed" ); }
+/*
+ * Factor the panel - Update the panel pointers
+ */
+   PANEL->algo->rffun( PANEL, PANEL->mp, jb, 0, (double *)HPL_PTR( vptr,
+                       ((size_t)(align) * sizeof(double) ) ) );
+   if( vptr ) free( vptr );
+
+   PANEL->A   = Mptr( PANEL->A, 0, jb, PANEL->lda );
+   PANEL->nq -= jb; PANEL->jj += jb;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_RPFACT );
+#endif
+/*
+ * End of HPL_pdfact
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdmxswp.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdmxswp.c
new file mode 100644
index 000000000..b14452197
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdmxswp.c
@@ -0,0 +1,311 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdmxswp
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        II,
+   const int                        JJ,
+   double *                         WORK
+)
+#else
+void HPL_pdmxswp
+( PANEL, M, II, JJ, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        II;
+   const int                        JJ;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdmxswp swaps  and  broadcasts  the  absolute value max row using
+ * bi-directional exchange.  The buffer is partially set by HPL_dlocmax.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by
+ *  
+ *    log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth )
+ *  
+ * where  lat and bdwth are the latency and bandwidth of the network for
+ * double precision real elements.  Communication  only  occurs  in  one
+ * process  column. Mono-directional links  will cause the communication
+ * cost to double.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of the matrix
+ *         column on which this function operates.
+ *
+ * II      (local input)                 const int
+ *         On entry, II  specifies the row offset where the column to be
+ *         operated on starts with respect to the panel.
+ *
+ * JJ      (local input)                 const int
+ *         On entry, JJ  specifies the column offset where the column to
+ *         be operated on starts with respect to the panel.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+ *         It  is assumed that  HPL_dlocmax  was called  prior  to  this
+ *         routine to  initialize  the first four entries of this array.
+ *         On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+ *         Note that this is also the  JJth  row  (or column) of L1. The
+ *         remaining part is used as a temporary array.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     gmax, tmp1;
+   double                     * A0, * Wmx, * Wwork;
+   HPL_T_grid                 * grid;
+   MPI_Comm                   comm;
+   unsigned int               hdim, ip2, ip2_, ipow, k, mask;
+   int                        Np2, cnt_, cnt0, i, icurrow, lda, mydist,
+                              mydis_, myrow, n0, nprow, partner, rcnt,
+                              root, scnt, size_;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_MXSWP );
+#endif
+   grid = PANEL->grid; myrow = grid->myrow; nprow = grid->nprow;
+/*
+ * ip2  : the smallest power of two less than or equal to nprow;
+ * hdim : dimension of the hypercube made of those ip2 processes;
+ * Np2  : logical flag indicating whether or not nprow is a power of 2;
+ */
+   comm    = grid->col_comm; ip2 = (unsigned int)(grid->row_ip2);
+   hdim    = (unsigned int)(grid->row_hdim);     n0  = PANEL->jb;
+   icurrow = PANEL->prow; Np2 = (int)( ( size_ = nprow - ip2 ) != 0 );
+   mydist  = MModSub( myrow, icurrow, nprow );
+/*
+ * Set up pointers in workspace:  WORK and Wwork  point to the beginning
+ * of the buffers of size 4 + 2*N0 to be combined. Wmx points to the row
+ * owning the local (before combine) and global (after combine) absolute
+ * value max. A0 points to the copy of the current row of the matrix.
+ */
+   cnt0  = ( cnt_ = n0 + 4 ) + n0; A0 = ( Wmx = WORK + 4 ) + n0;
+   Wwork = WORK + cnt0;
+/*
+ * Wmx[0:N0-1] := A[ilindx,0:N0-1] where ilindx is  (int)(WORK[1])  (row
+ * with max in current column). If I am the current process row, pack in
+ * addition the current row of A in A0[0:N0-1].  If I do not own any row
+ * of A, then zero out Wmx[0:N0-1].
+ */
+   if( M > 0 )
+   {
+      lda = PANEL->lda;
+      HPL_dcopy( n0, Mptr( PANEL->A, II+(int)(WORK[1]), 0, lda ), lda,
+                 Wmx, 1 );
+      if( myrow == icurrow )
+      { HPL_dcopy( n0, Mptr( PANEL->A, II, 0, lda ), lda, A0, 1 ); }
+   }
+   else { for( i = 0; i < n0; i++ ) Wmx[i] = HPL_rzero; }
+/*
+ * Combine the results (bi-directional exchange):  the process coordina-
+ * tes are relative to icurrow,  this allows to reduce the communication
+ * volume when nprow is not a power of 2.
+ *
+ * When nprow is not a power of 2:  proc[i-ip2] receives local data from
+ * proc[i]  for all i in [ip2..nprow).  In addition,  proc[0]  (icurrow)
+ * sends to proc[ip2] the current row of A  for later broadcast in procs
+ * [ip2..nprow).
+ */
+   if( ( Np2 != 0 ) &&
+       ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) )
+   {
+      if( ( mydist & ip2 ) != 0 )
+      {
+         if( mydist == (int)(ip2) )
+            (void) HPL_sdrv( WORK, cnt_, MSGID_BEGIN_PFACT, A0, n0,
+                             MSGID_BEGIN_PFACT, MModAdd( partner,
+                             icurrow, nprow ), comm );
+         else
+            (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow,
+                             nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+      else
+      {
+         if( mydist == 0 )
+            (void) HPL_sdrv( A0, n0, MSGID_BEGIN_PFACT, Wwork, cnt_,
+                             MSGID_BEGIN_PFACT, MModAdd( partner,
+                             icurrow, nprow ), comm );
+         else
+            (void) HPL_recv( Wwork, cnt_, MModAdd( partner, icurrow,
+                             nprow ), MSGID_BEGIN_PFACT, comm );
+ 
+         tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] );
+         if( ( tmp1 > gmax ) ||
+             ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) )
+         { HPL_dcopy( cnt_, Wwork, 1, WORK, 1 ); }
+      }
+   }
+
+   if( mydist < (int)(ip2) )
+   {
+/*
+ * power of 2 part of the processes collection: processes  [0..ip2)  are
+ * combining (binary exchange); proc[0] has two rows to send, but one to
+ * receive.  At every step  k  in [0..hdim) of the algorithm,  a process 
+ * pair exchanging 2 rows is such that  myrow >> k+1 is 0.  Among  those
+ * processes the ones  that are sending one more row than  what they are
+ * receiving are such that myrow >> k is equal to 0.
+ */
+      k = 0; ipow = 1;
+ 
+      while( k < hdim )
+      {
+         if( ( (unsigned int)(mydist) >> ( k + 1 ) ) == 0 )
+         {
+            if( ( (unsigned int)(mydist) >> k ) == 0 )
+            { scnt = cnt0; rcnt = cnt_; }
+            else
+            { scnt = cnt_; rcnt = cnt0; }
+         }
+         else { scnt = rcnt = cnt_; }
+ 
+         partner = (int)( (unsigned int)(mydist) ^ ipow );
+         (void) HPL_sdrv( WORK, scnt, MSGID_BEGIN_PFACT, Wwork, rcnt,
+                          MSGID_BEGIN_PFACT, MModAdd( partner, icurrow,
+                          nprow ), comm );
+ 
+         tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] );
+         if( ( tmp1 > gmax ) ||
+             ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) )
+         {
+            HPL_dcopy( ( rcnt == cnt0 ? cnt0 : cnt_ ), Wwork, 1,
+                       WORK, 1 );
+         }
+         else if( rcnt == cnt0 )
+         { HPL_dcopy( n0, Wwork+cnt_, 1, A0, 1 ); }
+ 
+         ipow <<= 1; k++;
+      }
+   }
+   else if( size_ > 1 )
+   {
+/*
+ * proc[ip2] broadcast current row of A to procs [ip2+1..nprow).
+ */
+      k = (unsigned int)(size_) - 1; ip2_ = mask = 1;
+      while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+ 
+      root   = MModAdd( icurrow, (int)(ip2), nprow );
+      mydis_ = MModSub( myrow,   root,       nprow );
+ 
+      do
+      {
+         mask ^= ip2_;
+         if( ( mydis_ & mask ) == 0 )
+         {
+            partner = (int)(mydis_ ^ ip2_);
+            if( ( mydis_ & ip2_ ) != 0 )
+            {
+               (void) HPL_recv( A0, n0, MModAdd( root, partner,
+                                nprow ), MSGID_BEGIN_PFACT, comm );
+            }
+            else if( partner < size_ )
+            {
+               (void) HPL_send( A0, n0, MModAdd( root, partner,
+                                nprow ), MSGID_BEGIN_PFACT, comm );
+            }
+         }
+         ip2_ >>= 1;
+      } while( ip2_ > 0 );
+   }
+/*
+ * If nprow is not a power of 2,  for all i in [ip2..nprow), proc[i-ip2]
+ * sends the pivot row to proc[i]  along  with the first four entries of
+ * the WORK array.
+ */
+   if( ( Np2 != 0 ) &&
+       ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) )
+   {
+      if( ( mydist & ip2 ) != 0 )
+      {
+         (void) HPL_recv( WORK, cnt_, MModAdd( partner, icurrow,
+                          nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+      else
+      {
+         (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow,
+                          nprow ), MSGID_BEGIN_PFACT, comm );
+      }
+   }
+/*
+ * Save the global pivot index in pivot array
+ */
+   (PANEL->DPIV)[JJ] = WORK[2];
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_MXSWP );
+#endif
+/*
+ * End of HPL_pdmxswp
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpancrN.c
new file mode 100644
index 000000000..4ea170b73
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpancrN.c
@@ -0,0 +1,270 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpancrN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpancrN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpancrN factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel  A using the Crout variant of the  usual
+ * one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+ * of the panel is stored in no-transpose form (i.e. just like the input
+ * matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and gam2-3 is  an  estimate  of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+/*
+ * Compute row (column) jj of L1
+ */
+      if( kk > 0 )
+      {
+         L1ptr = Mptr( L1, jj, jj+1, n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Xv0, ICOFF, jj+1,  kk, Nm1 );
+         Xv1 = vsip_msubview_d( Xv0, jj,    ICOFF, 1,  kk  );
+         Yv1 = vsip_msubview_d( Xv0, jj,    jj+1,  1,  Nm1 );
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Av1, VSIP_MAT_NTRANS,
+                      HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 ); 
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dgemv( HplColumnMajor, HplTrans, kk, Nm1, -HPL_rone,
+                    Mptr( L1, ICOFF, jj+1, n0 ), n0, Mptr( L1, jj,
+                    ICOFF, n0 ), n0, HPL_rone, L1ptr, n0 );
+#endif
+         if( curr != 0 )
+            HPL_dcopy( Nm1, L1ptr, n0, Mptr( A, ii, jj+1, lda ), lda );
+      }
+/*
+ * Scale current column by its absolute value max entry  -  Update  dia-
+ * diagonal and subdiagonal elements in column  A(iip1:iip1+Mm1-1, jj+1)
+ * and  find local  absolute value max in  that column  (Only  one  pass
+ * through cache for each current column).  This sequence of  operations
+ * could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 );
+      Xv1 = vsip_msubview_d( Xv0, ICOFF,          jj+1,            kk+1,   1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,    1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      vsip_mdestroy_d( Yv1 );
+      vsip_mdestroy_d( Xv1 );
+      vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, ICOFF,
+                 jj+1, n0 ), 1, HPL_rone, Mptr( A, iip1, jj+1, lda ),
+                 1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++; kk++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpancrN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpancrT.c
new file mode 100644
index 000000000..50ed300aa
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpancrT.c
@@ -0,0 +1,267 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpancrT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpancrT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpancrT factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel  A using the Crout variant of the  usual
+ * one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+ * of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is an  estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+/*
+ * Compute row (column) jj of L1
+ */
+      if( kk > 0 )
+      {
+         L1ptr = Mptr( L1, jj+1, jj, n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Xv0, jj+1,  ICOFF, Nm1, kk );
+         Xv1 = vsip_msubview_d( Xv0, ICOFF, jj,    kk,   1 );
+         Yv1 = vsip_msubview_d( Xv0, jj+1,  jj,    Nm1,  1 );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dgemv( HplColumnMajor, HplNoTrans, Nm1, kk, -HPL_rone,
+                    Mptr( L1, jj+1, ICOFF, n0 ), n0, Mptr( L1, ICOFF,
+                    jj, n0 ), 1, HPL_rone, L1ptr, 1 );
+#endif
+         if( curr != 0 )
+            HPL_dcopy( Nm1, L1ptr, 1, Mptr( A, ii, jj+1, lda ), lda );
+      }
+/*
+ * Scale current column by its absolute value max entry  -  Update  dia-
+ * diagonal and subdiagonal elements in column  A(iip1:iip1+Mm1-1, jj+1)
+ * and  find local  absolute value max in  that column  (Only  one  pass
+ * through cache for each current column).  This sequence of  operations
+ * could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 );
+      Xv1 = vsip_msubview_d( Xv0, jj+1,           ICOFF,           1,   kk+1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,    1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, jj+1, ICOFF,
+                 n0 ), n0, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++; kk++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpancrT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanllN.c
new file mode 100644
index 000000000..fa471198d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanllN.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanllN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanllN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanllN factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel A  using the Left-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in no-transpose form (i.e. just like the
+ * input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1 = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column and initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+
+      L1ptr = Mptr( L1, ICOFF, jj+1, n0 ); kk = jj + 1 - ICOFF;
+      HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, HplUnit, kk, 
+                 Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr,  1 );
+/*
+ * Scale  current column by its absolute value max entry  -  Update  and 
+ * find local  absolute value max  in next column (Only one pass through 
+ * cache for each next column).  This sequence of operations could bene-
+ * fit from a specialized  blocked implementation.
+ */ 
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk );
+      Xv1 = vsip_msubview_d( Xv0, ICOFF,        jj+1,              kk,   1 );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,  1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk,  -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, 1,
+                 HPL_rone, Mptr( A, iip1, jj+1, lda ),  1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 )
+      {
+         HPL_dcopy( kk, L1ptr,  1, Mptr( A, ICOFF, jj+1, lda ), 1 );
+         ii = iip1; iip1++; m = Mm1; Mm1--;
+      }
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanllN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanllT.c
new file mode 100644
index 000000000..a6e1b67bd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanllT.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanllT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanllT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanllT factorizes  a panel of columns that is a sub-array of a
+ * larger one-dimensional panel A  using the Left-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Yv1, * Xv0, * Xv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, kk, lda,
+                              m=M, n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1 = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column and initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 > 0 )
+   {
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+
+      L1ptr = Mptr( L1, jj+1, ICOFF, n0 ); kk = jj + 1 - ICOFF;
+      HPL_dtrsv( HplColumnMajor, HplUpper, HplTrans,   HplUnit, kk,
+                 Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, n0 );
+/*
+ * Scale  current column by its absolute value max entry  -  Update  and 
+ * find local  absolute value max  in next column (Only one pass through 
+ * cache for each next column).  This sequence of operations could bene-
+ * fit from a specialized  blocked implementation.
+ */ 
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+      Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk );
+      Xv1 = vsip_msubview_d( Xv0, jj+1,         ICOFF,             1,   kk );
+      Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1,  Mm1,  1 );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS,
+                   HPL_rone, Yv1 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Yv1 );
+      (void) vsip_mdestroy_d( Xv1 );
+      (void) vsip_mdestroy_d( Av1 );
+#else
+      HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk,  -HPL_rone,
+                 Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, n0,
+                 HPL_rone, Mptr( A, iip1, jj+1, lda ),  1 );
+#endif
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+      if( curr != 0 )
+      {
+         HPL_dcopy( kk, L1ptr, n0, Mptr( A, ICOFF, jj+1, lda ), 1 );
+         ii = iip1; iip1++; m = Mm1; Mm1--;
+      }
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Xv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanllT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanrlN.c
new file mode 100644
index 000000000..0a3b9a542
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanrlN.c
@@ -0,0 +1,250 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanrlN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanrlN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanrlN factorizes  a panel of columns  that is a sub-array of a
+ * larger one-dimensional panel A using the Right-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in no-transpose form (i.e. just like the
+ * input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Acur, * Anxt;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Xv1, * Yv0, * Yv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, lda, m=M;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 >= 1 )
+   {
+      Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda );
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpN( PANEL,    ii, jj, WORK );
+/*
+ * Scale current column by its absolute value max entry  -  Update trai-
+ * ling sub-matrix and find local absolute value max in next column (On-
+ * ly one pass through cache for each current column).  This sequence of
+ * operations could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 );
+      HPL_daxpy( Mm1, -WORK[4+jj+1], Acur, 1, Anxt, 1 );
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+#ifdef HPL_CALL_VSIPL
+      if( Nm1 > 1 )
+      {
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2,
+                                Mm1, Nm1-1 );
+         Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj,
+                                Mm1, 1   );
+         Yv1 = vsip_msubview_d( Yv0, jj, jj+2, 1, Nm1-1 );
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+      }
+#else
+      if( Nm1 > 1 )
+         HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+                   WORK+4+jj+2, 1, Mptr( Anxt, 0, 1, lda ), lda );
+#endif
+/*
+ * Same thing as above but with worse data access on y (A += x * y^T)
+ *
+ *    if( Nm1 > 1 ) )
+ *       HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+ *                 Mptr( L1, jj, jj+2, n0 ), n0, Mptr( Anxt, 0, 1, lda ),
+ *                 lda );
+ */  
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpN( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Yv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanrlN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanrlT.c
new file mode 100644
index 000000000..68c1afc02
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanrlT.c
@@ -0,0 +1,244 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdpanrlT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdpanrlT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdpanrlT factorizes  a panel of columns  that is a sub-array of a
+ * larger one-dimensional panel A using the Right-looking variant of the
+ * usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+ * block of the panel is stored in transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *  
+ * Note that  one  iteration of the the main loop is unrolled. The local
+ * computation of the absolute value max of the next column is performed
+ * just after its update by the current column. This allows to bring the
+ * current column only  once through  cache at each  step.  The  current
+ * implementation  does not perform  any blocking  for  this sequence of
+ * BLAS operations, however the design allows for plugging in an optimal
+ * (machine-specific) specialized  BLAS-like kernel.  This idea has been
+ * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Acur, * Anxt, * L1;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Av1, * Xv1, * Yv0, * Yv1;
+#endif
+   int                        Mm1, Nm1, curr, ii, iip1, jj, lda, m=M,
+                              n0;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+   A    = PANEL->A;   lda = PANEL->lda;
+   L1   = PANEL->L1;  n0  = PANEL->jb;
+   curr = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   Nm1  = N - 1; jj = ICOFF;
+   if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; }
+   else            { ii = 0;     iip1 = ii;   Mm1 = m;   }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+   (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+   (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+   Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,       lda, PANEL->pmat->nq );
+   Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb );
+#endif
+/*
+ * Find local absolute value max in first column - initialize WORK[0:3]
+ */
+   HPL_dlocmax( PANEL, m, ii, jj, WORK );
+
+   while( Nm1 >= 1 )
+   {
+      Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda );
+/*
+ * Swap and broadcast the current row
+ */
+      HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+      HPL_dlocswpT( PANEL,    ii, jj, WORK );
+/*
+ * Scale current column by its absolute value max entry  -  Update trai-
+ * ling sub-matrix and find local absolute value max in next column (On-
+ * ly one pass through cache for each current column).  This sequence of
+ * operations could benefit from a specialized blocked implementation.
+ */
+      if( WORK[0] != HPL_rzero )
+         HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 );
+      HPL_daxpy( Mm1, -(*(Mptr( L1, jj+1, jj, n0 ))), Acur, 1, Anxt, 1 );
+      HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK );
+
+      if( Nm1 > 1 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2,
+                                Mm1, Nm1-1 );
+         Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj,
+                                Mm1, 1   );
+         Yv1 = vsip_msubview_d( Yv0, jj+2, jj, Nm1-1, 1 ); 
+
+         vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_TRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Yv1 );
+         (void) vsip_mdestroy_d( Xv1 );
+         (void) vsip_mdestroy_d( Av1 );
+#else
+         HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1,
+                   Mptr( L1, jj+2, jj, n0 ), 1, Mptr( Anxt, 0, 1, lda ),
+                   lda );
+#endif
+      }
+      if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; }
+
+      Nm1--; jj++;
+   }
+/*
+ * Swap and broadcast last row - Scale last column by its absolute value
+ * max entry
+ */ 
+   HPL_pdmxswp(  PANEL, m, ii, jj, WORK );
+   HPL_dlocswpT( PANEL,    ii, jj, WORK );
+   if( WORK[0] != HPL_rzero )
+      HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Release the blocks
+ */
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE );
+   (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+   (void) vsip_mdestroy_d( Yv0 );
+   (void) vsip_mdestroy_d( Av0 );
+#endif
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PFACT );
+#endif
+/*
+ * End of HPL_pdpanrlT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpancrN.c
new file mode 100644
index 000000000..348d7ebe6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpancrN.c
@@ -0,0 +1,282 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpancrN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpancrN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpancrN HPL_pdrpancrN recursively  factorizes  a panel of columns  using  the
+ * recursive  Crout  variant of the usual one-dimensional algorithm. The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Local update - Factor current panel - Replicated update and solve
+ */
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jb );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jb );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, jj,
+                 -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr,
+                 0, jj, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ),
+                 lda );
+#endif
+      HPL_pdrpancrN( PANEL, m, jb, ioff, WORK );
+
+      if( n > 0 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+         (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+         Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0 );
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Lv0, ioff,  ICOFF,   jb, jj );
+         Av2 = vsip_msubview_d( Lv0, ioff,  ioff+jb, jb,  n );
+         Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff+jb, jj,  n );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Lv1 );
+         (void) vsip_mdestroy_d( Av2 );
+         (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+         (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+         (void) vsip_mdestroy_d( Lv0 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, jb, n,
+                    jj, -HPL_rone, Mptr( L1ptr, jj, 0, n0 ), n0,
+                    Mptr( L1ptr, 0, jj+jb, n0 ), n0, HPL_rone, 
+                    Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj,
+                    n0 ), n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+      }
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpancrN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpancrT.c
new file mode 100644
index 000000000..a1ecfac2c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpancrT.c
@@ -0,0 +1,282 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpancrT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpancrT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpancrT recursively  factorizes  a panel  of columns using  the
+ * recursive  Crout  variant  of  the  usual one-dimensional  algorithm.
+ * The lower triangular N0-by-N0  upper block of the panel  is stored in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Local update - Factor current panel - Replicated update and solve
+ */
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jb );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1,
+                   VSIP_MAT_TRANS, HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, jj,
+                 -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr,
+                 jj, 0, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ),
+                 lda );
+#endif
+      HPL_pdrpancrT( PANEL, m, jb, ioff, WORK );
+
+      if( n > 0 )
+      {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+         (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+         Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 );
+/*
+ * Create the matrix subviews
+ */
+         Av1 = vsip_msubview_d( Lv0, ioff+jb, ICOFF, n, jj );
+         Av2 = vsip_msubview_d( Lv0, ioff+jb, ioff,  n, jb );
+         Lv1 = vsip_msubview_d( Lv0, ICOFF,   ioff, jj, jb );
+
+         vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1,
+                      VSIP_MAT_NTRANS, HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Lv1 );
+         (void) vsip_mdestroy_d( Av2 );
+         (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+         (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+         (void) vsip_mdestroy_d( Lv0 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, n, jb,
+                    jj, -HPL_rone, Mptr( L1ptr, jj+jb, 0, n0 ), n0,
+                    Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone,
+                    Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj,
+                    n0 ), n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+      }
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpancrT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanllN.c
new file mode 100644
index 000000000..4dbc13b44
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanllN.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanllN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanllN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanllN recursively  factorizes  a panel  of columns using  the
+ * recursive Left-looking variant of the one-dimensional algorithm.  The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Replicated solve - Local update - Factor current panel
+ */
+      HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit,
+                 jj, jb, HPL_rone, L1ptr, n0, Mptr( L1ptr, 0, jj, n0 ),
+                 n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0 )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jj );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m,  jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m,  jj );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb,
+                 jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda,
+                 Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj, lda ), lda );
+#endif
+      HPL_pdrpanllN( PANEL, m, jb, ioff, WORK );
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanllN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanllT.c
new file mode 100644
index 000000000..887caeb87
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanllT.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanllT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanllT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanllT recursively  factorizes  a panel of columns  using  the
+ * recursive Left-looking variant of the one-dimensional algorithm.  The
+ * lower  triangular  N0-by-N0  upper block  of  the panel  is stored in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Replicated solve - Local update - Factor current panel
+ */
+      HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                 HplUnit, jb, jj, HPL_rone, L1ptr, n0, Mptr( L1ptr,
+                 jj, 0, n0 ), n0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0, n0              );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF,
+                                m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m, jj );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,  m, jj );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb,  jj );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+      (void) vsip_mdestroy_d( Av1 );
+      (void) vsip_mdestroy_d( Av2 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb,
+                 jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda,
+                 Mptr( L1ptr, jj, 0, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj, lda ), lda );
+#endif
+      HPL_pdrpanllT( PANEL, m, jb, ioff, WORK );
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+         ii += jb; m -= jb;
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanllT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanrlN.c
new file mode 100644
index 000000000..22f105cf4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanrlN.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanrlN
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanrlN
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanrlN recursively  factorizes  a panel of columns  using  the
+ * recursive Right-looking variant of the one-dimensional algorithm. The
+ * lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+ * no-transpose form (i.e. just like the input matrix itself).
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+ 
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Factor current panel - Replicated solve - Local update
+ */
+      HPL_pdrpanrlN( PANEL, m, jb, ioff, WORK );
+      HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                 HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, n0 ),
+                 n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 );
+      if( curr != 0 ) { ii += jb; m -= jb; }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb,
+                                m, n );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,    m, jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m,  n );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, n,
+                 jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda,
+                 Mptr( L1ptr, jj, jj+jb, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj+jb, lda ), lda );
+#endif
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0,
+                     Mptr( A, 0, ioff, lda ), lda );
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanrlN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanrlT.c
new file mode 100644
index 000000000..a77301b9b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanrlT.c
@@ -0,0 +1,240 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdrpanrlT
+(
+   HPL_T_panel *                    PANEL,
+   const int                        M,
+   const int                        N,
+   const int                        ICOFF,
+   double *                         WORK
+)
+#else
+void HPL_pdrpanrlT
+( PANEL, M, N, ICOFF, WORK )
+   HPL_T_panel *                    PANEL;
+   const int                        M;
+   const int                        N;
+   const int                        ICOFF;
+   double *                         WORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdrpanrlT recursively  factorizes  a panel of columns  using  the
+ * recursive Right-looking variant of the one-dimensional algorithm. The
+ * lower  triangular  N0-by-N0  upper  block of the panel  is stored  in
+ * transpose form.
+ *  
+ * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+ * operations  at once  for one column in the panel.  This  results in a
+ * lower number of slightly larger  messages than usual.  On P processes
+ * and assuming bi-directional links,  the running time of this function
+ * can be approximated by (when N is equal to N0):                      
+ *  
+ *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+ *    N0^2 * ( M - N0/3 ) * gam2-3
+ *  
+ * where M is the local number of rows of  the panel, lat and bdwth  are
+ * the latency and bandwidth of the network for  double  precision  real
+ * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+ * rate of execution. The  recursive  algorithm  allows indeed to almost
+ * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+ * large  number of modern machines,  this  operation is however latency
+ * bound,  meaning  that its cost can  be estimated  by only the latency
+ * portion N0 * log_2(P) * lat.  Mono-directional links will double this
+ * communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * M       (local input)                 const int
+ *         On entry,  M specifies the local number of rows of sub(A).
+ *
+ * N       (local input)                 const int
+ *         On entry,  N specifies the local number of columns of sub(A).
+ *
+ * ICOFF   (global input)                const int
+ *         On entry, ICOFF specifies the row and column offset of sub(A)
+ *         in A.
+ *
+ * WORK    (local workspace)             double *
+ *         On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                     * A, * Aptr, * L1, * L1ptr;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d               * Av0, * Lv0, * Av1, * Av2, * Lv1;
+#endif
+   int                        curr, ii, ioff, jb, jj, lda, m, n, n0, nb,
+                              nbdiv, nbmin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= ( nbmin = PANEL->algo->nbmin ) )
+   { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; }
+/*
+ * Find  new recursive blocking factor.  To avoid an infinite loop,  one
+ * must guarantee: 1 <= jb < N, knowing that  N  is greater than  NBMIN.
+ * First, we compute nblocks:  the number of blocks of size  NBMIN in N,
+ * including the last one that may be smaller.  nblocks  is thus  larger
+ * than or equal to one, since N >= NBMIN.
+ * The ratio ( nblocks + NDIV - 1 ) / NDIV  is thus larger than or equal
+ * to one as  well.  For  NDIV >= 2,  we  are guaranteed  that the quan-
+ * tity ( ( nblocks + NDIV  - 1 ) / NDIV ) * NBMIN  is less  than N  and
+ * greater than or equal to NBMIN.
+ */
+   nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N;
+   nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv  - 1) / nbdiv ) * nbmin;
+ 
+   A     = PANEL->A;   lda = PANEL->lda;
+   L1    = PANEL->L1;  n0  = PANEL->jb;
+   L1ptr = Mptr( L1, ICOFF, ICOFF, n0 );
+   curr  = (int)( PANEL->grid->myrow == PANEL->prow );
+ 
+   if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda );
+   else            Aptr = Mptr( A,     0, ICOFF, lda );
+/*
+ * The triangular solve is replicated in every  process row.  The  panel
+ * factorization is  such that  the first rows of  A  are accumulated in
+ * every process row during the (panel) swapping phase.  We  ensure this
+ * way a minimum amount  of communication during the entire panel facto-
+ * rization.
+ */
+   do
+   {
+      n -= jb; ioff = ICOFF + jj;
+/*
+ * Factor current panel - Replicated solve - Local update
+ */
+      HPL_pdrpanrlT( PANEL, m, jb, ioff, WORK );
+      HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                 HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, n0 ),
+                 n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 );
+      if( curr != 0 ) { ii += jb; m -= jb; }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L1block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda, lda, PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1,  n0,  n0,              n0 );
+/*
+ * Create the matrix subviews
+ */
+      if( curr != 0  )
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff,
+                                m,  jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb,
+                                m, N );
+      }
+      else
+      {
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff,    m, jb );
+         Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m,  n );
+      }
+      Lv1 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb );
+
+      vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS,
+                   HPL_rone, Av2 );
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 ); 
+      (void) vsip_mdestroy_d( Av2 );
+      (void) vsip_mdestroy_d( Av1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#else
+      HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, n,
+                 jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda,
+                 Mptr( L1ptr, jj+jb, jj, n0 ), n0, HPL_rone,
+                 Mptr( Aptr, ii, jj+jb, lda ), lda );
+#endif
+/*
+ * Copy back upper part of A in current process row - Go the next block
+ */
+      if( curr != 0 )
+      {
+         HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0,
+                      Mptr( A, 0, ioff, lda ), lda );
+      }
+      jj += jb; jb = Mmin( n, nb );
+
+   } while( n > 0 );
+/*
+ * End of HPL_pdrpanrlT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/Make.inc
new file mode 120000
index 000000000..ae55370b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kate/hip/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/Makefile
new file mode 100644
index 000000000..bf4634d31
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/Makefile
@@ -0,0 +1,118 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h  $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pfact.h
+#
+## Object files ########################################################
+#
+HPL_pfaobj       = \
+   HPL_dlocmax.o          HPL_dlocswpN.o         HPL_dlocswpT.o         \
+   HPL_pdmxswp.o          HPL_pdpancrN.o         HPL_pdpancrT.o         \
+   HPL_pdpanllN.o         HPL_pdpanllT.o         HPL_pdpanrlN.o         \
+   HPL_pdpanrlT.o         HPL_pdrpanllN.o        HPL_pdrpanllT.o        \
+   HPL_pdrpancrN.o        HPL_pdrpancrT.o        HPL_pdrpanrlN.o        \
+   HPL_pdrpanrlT.o        HPL_pdfact.o
+#
+## Targets #############################################################
+#
+all              : lib 
+#
+lib              : lib.grd
+#
+lib.grd          : $(HPL_pfaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pfaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dlocmax.o          : ../HPL_dlocmax.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocmax.c
+HPL_dlocswpN.o         : ../HPL_dlocswpN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocswpN.c
+HPL_dlocswpT.o         : ../HPL_dlocswpT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dlocswpT.c
+HPL_pdmxswp.o          : ../HPL_pdmxswp.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdmxswp.c
+HPL_pdpancrN.o         : ../HPL_pdpancrN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpancrN.c
+HPL_pdpancrT.o         : ../HPL_pdpancrT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpancrT.c
+HPL_pdpanllN.o         : ../HPL_pdpanllN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanllN.c
+HPL_pdpanllT.o         : ../HPL_pdpanllT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanllT.c
+HPL_pdpanrlN.o         : ../HPL_pdpanrlN.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanrlN.c
+HPL_pdpanrlT.o         : ../HPL_pdpanrlT.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdpanrlT.c
+HPL_pdrpanllN.o        : ../HPL_pdrpanllN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanllN.c
+HPL_pdrpanllT.o        : ../HPL_pdrpanllT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanllT.c
+HPL_pdrpancrN.o        : ../HPL_pdrpancrN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpancrN.c
+HPL_pdrpancrT.o        : ../HPL_pdrpancrT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpancrT.c
+HPL_pdrpanrlN.o        : ../HPL_pdrpanrlN.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanrlN.c
+HPL_pdrpanrlT.o        : ../HPL_pdrpanrlT.c        $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdrpanrlT.c
+HPL_pdfact.o           : ../HPL_pdfact.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdfact.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_equil.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_equil.c
new file mode 100644
index 000000000..b917a6525
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_equil.c
@@ -0,0 +1,253 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_equil
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_TRANS             TRANS,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   int *                            IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1,
+   int *                            IWORK
+)
+#else
+void HPL_equil
+( PBCST, IFLAG, PANEL, TRANS, N, U, LDU, IPLEN, IPMAP, IPMAPM1, IWORK )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_TRANS             TRANS;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   int *                            IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_equil equilibrates  the  local  pieces  of U, so that on exit to
+ * this function, pieces of U contained in every process row are of the
+ * same size. This phase makes the rolling phase optimal.  In addition,
+ * this  function probes  for  the  column panel L and forwards it when
+ * possible.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be equilibrated) information.
+ *
+ * TRANS   (global input)                const enum HPL_TRANS
+ *         On entry, TRANS specifies whether  U  is stored in transposed
+ *         or non-transposed form.
+ *
+ * N       (local input)                 const int
+ *         On entry, N  specifies the number of rows or columns of  U. N
+ *         must be at least 0.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+ *         non-transposed form, and MAX(1,N) otherwise.
+ *
+ * IPLEN   (global input)                int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension NPROW+1.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, ip, ipU, ipcur, iprow, iptgt, lastrow,
+                              left, npm1, nprow, ll, llU, llcur, lltgt,
+                              right, slen, smax, smin;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( npm1 = ( nprow = PANEL->grid->nprow ) - 1 ) <= 1 ) return;
+/*
+ * If the current distribution of the pieces of U is already optimal for
+ * the rolling phase, then return imediately.  The  optimal distribution
+ * is such that ip processes have smax items and the remaining processes
+ * only have smin items. Another way to check this is to verify that all
+ * differences IPLEN[i+1] - IPLEN[i] are either smin or smax.
+ */
+   smax = ( ( slen = IPLEN[nprow] ) + npm1 ) / nprow;
+   ip   = slen - nprow * ( smin = slen / nprow );
+
+   iprow = 0;
+   do
+   {
+      ll = IPLEN[iprow+1] - IPLEN[iprow]; iprow++;
+   } while( ( iprow < nprow ) && ( ( ll == smin ) || ( ll == smax ) ) );
+
+   if( iprow == nprow ) return;
+/*
+ * Now,  we are sure  the distribution of the pieces of U is not optimal
+ * with respect to the rolling phase,  thus  perform  equilibration.  Go
+ * through the list of processes:  Processes  that have rows that do not
+ * belong to them  with respect to the optimal mapping spread them  in a
+ * logarithmic fashion. To simplify a little bit the implementation, and
+ * mainly the packing, a source process row spreads its data to its left
+ * first, and then to its right.
+ */
+   IWORK[nprow] = slen;
+
+   for( iprow = 0; iprow < nprow; iprow++ )
+   {
+      llU = IPLEN[iprow+1] - ( ipU = IPLEN[iprow] );
+      if( iprow < ip ) { lltgt = smax; iptgt = iprow * smax;      }
+      else             { lltgt = smin; iptgt = iprow * smin + ip; }
+
+      left = ( ipU < iptgt ); right = ( iptgt + lltgt < ipU + llU );
+/*
+ * If I have something to spread to either the left or the right
+ */
+      if( ( llU > 0 ) && ( left || right ) )
+      {        /* Figure out how much every other process should have */
+
+         ipcur = ipU; llcur = llU;
+
+         for( i = 0; i < nprow; i++ )
+         {
+            if( i < ip ) { lltgt = smax; iptgt = i * smax;      }
+            else         { lltgt = smin; iptgt = i * smin + ip; }
+            lastrow = iptgt + lltgt - 1;
+
+            if( ( lastrow >= ipcur ) && ( llcur > 0 ) )
+            { ll = lastrow - ipcur + 1; ll = Mmin( ll, llcur ); llcur -= ll; }
+            else { ll = 0; }
+
+            IWORK[i] = ipcur; ipcur += ll; IWORK[i+1] = ipcur;
+         }
+/*
+ * Equilibration phase
+ */
+         if( TRANS == HplNoTrans )
+         {
+            if( left  )
+            {
+               HPL_spreadN( PBCST, IFLAG, PANEL, HplLeft,  N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+
+            if( right )
+            {
+               HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+         }
+         else
+         {
+            if( left  )
+            {
+               HPL_spreadT( PBCST, IFLAG, PANEL, HplLeft,  N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+
+            if( right )
+            {
+               HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, N, U, LDU,
+                            iprow, IWORK, IPMAP, IPMAPM1 );
+            }
+         }
+      }
+   }
+/*
+ * Finally update  IPLEN  with the indexes corresponding to the new dis-
+ * tribution of U - IPLEN[nprow] remained unchanged.
+ */
+   for( i = 0; i < nprow; i++ ) IPLEN[i] = ( i < ip ? i*smax : i*smin + ip );
+/*
+ * End of HPL_equil
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_logsort.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_logsort.c
new file mode 100644
index 000000000..0715159bd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_logsort.c
@@ -0,0 +1,185 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_logsort
+(
+   const int                        NPROCS,
+   const int                        ICURROC,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1
+)
+#else
+void HPL_logsort
+( NPROCS, ICURROC, IPLEN, IPMAP, IPMAPM1 )
+   const int                        NPROCS;
+   const int                        ICURROC;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_logsort computes an array  IPMAP  and  its inverse  IPMAPM1  that
+ * contain  the logarithmic sorted processes id with repect to the local
+ * number of rows of  U  that they own. This is necessary to ensure that
+ * the logarithmic spreading of U is optimal in terms of number of steps
+ * and communication volume as well.  In other words,  the larget pieces
+ * of U will be sent a minimal number of times.
+ *
+ * Arguments
+ * =========
+ *
+ * NPROCS  (global input)                const int
+ *         On entry, NPROCS  specifies the number of process rows in the
+ *         process grid. NPROCS is at least one.
+ *
+ * ICURROC (global input)                const int
+ *         On entry, ICURROC is the source process row.
+ *
+ * IPLEN   (global input/output)         int *
+ *         On entry, IPLEN is an array of dimension NPROCS+1,  such that
+ *         IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+ *         that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+ *         rows of U  in the processes before process IPMAP[i] after the
+ *         sort,  with  the convention that  IPLEN[NPROCS] is  the total
+ *         number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+ *         IPLEN[i] is  the  number of rows of A that should be moved to
+ *         the process IPMAP[i].  IPLEN  is such that the number of rows
+ *         of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+ *         remaining  entries  of  this  array  are  sorted  so that the
+ *         quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myroc] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROCS)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dist, i, ip, iplen_i, iplen_j, itmp, j, k;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Compute the  logarithmic distance between process j and process 0, as
+ * well as the maximum logarithmic distance. IPMAPM1 is workarray here.
+ */
+   for( j = 0, dist = 0; j < NPROCS; j++ )
+   {
+      IPMAP[j] = MModAdd( j, ICURROC, NPROCS ); ip = j; itmp = 0;
+      do { if( ip & 1 ) itmp++; ip >>= 1; } while ( ip );
+      IPMAPM1[j] = itmp; if( itmp > dist ) dist = itmp;
+   }
+/*
+ * Shift IPLEN[1..NPROCS]  of ICURROC places,  so that  IPLEN[1]  is now
+ * what used to be IPLEN[ICURROC+1]. Initialize IPMAP,  so that IPMAP[0]
+ * is ICURROC.
+ */
+   for( j = 0; j < ICURROC; j++ )
+   {
+      for( i = 2, itmp = IPLEN[1]; i <= NPROCS; i++ ) IPLEN[i-1] = IPLEN[i];
+      IPLEN[NPROCS] = itmp;
+   }
+/*
+ * logarithmic sort
+ */
+   for( k = 1; k <= dist; k++ )
+   {
+      for( j = 1; j < NPROCS; j++ )
+      {
+         if( IPMAPM1[j] == k )
+         {
+            for( i = 2; i < NPROCS; i++ )
+            {
+               if( k < IPMAPM1[i] )
+               {
+                  iplen_i = IPLEN[i+1]; iplen_j = IPLEN[j+1];
+
+                  if( iplen_j < iplen_i )
+                  {
+                     IPLEN[j+1] = iplen_i;  IPLEN[i+1] = iplen_j;
+                     itmp       = IPMAP[j]; IPMAP[j]   = IPMAP[i];
+                     IPMAP[i]   = itmp;
+                  }
+               }
+            }
+         }
+      }
+   }
+/*
+ * Compute IPLEN and IPMAPM1 (the inverse of IPMAP)
+ */
+   IPLEN[0] = 0;
+
+   for( i = 0; i < NPROCS; i++ )
+   {
+      IPMAPM1[ IPMAP[i] ] = i;
+      IPLEN[i+1]         += IPLEN[i];
+   }
+/*
+ * End of HPL_logsort
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesv.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesv.c
new file mode 100644
index 000000000..ced74269e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesv.c
@@ -0,0 +1,116 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesv
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesv
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesv factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with  or  without look-ahead.  The  lower  triangular  factor is left
+ * unpivoted and the pivots are not returned. The right hand side is the
+ * N+1 column of the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( A->n <= 0 ) return;
+
+   A->info = 0;
+
+   if( ( ALGO->depth == 0 ) || ( GRID->npcol == 1 ) )
+   {
+      HPL_pdgesv0(  GRID, ALGO, A );
+   }
+   else
+   {
+      HPL_pdgesvK2( GRID, ALGO, A );
+   }
+/*
+ * Solve upper triangular system
+ */
+   if( A->info == 0 ) HPL_pdtrsv( GRID, A );
+/*
+ * End of HPL_pdgesv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesv0.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesv0.c
new file mode 100644
index 000000000..d79b6fa55
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesv0.c
@@ -0,0 +1,167 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesv0
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesv0
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesv0 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * without look-ahead. The lower triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate;
+   int                        N, j, jb, n, nb, tag=MSGID_BEGIN_FACT,
+                              test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( N = A->n ) <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+ 
+   HPL_pdupdate = ALGO->upfun; nb = A->nb;
+/*
+ * Allocate a panel list of length 1 - Allocate panel[0] resources
+ */
+   panel = (HPL_T_panel **)malloc( sizeof( HPL_T_panel * ) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesv0", "Memory allocation failed" ); }
+
+   HPL_pdpanel_new( GRID, ALGO, N, N+1, Mmin( N, nb ), A, 0, 0, tag,
+                    &panel[0] );
+/*
+ * Loop over the columns of A
+ */
+   for( j = 0; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && GRID->mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Release panel resources - re-initialize panel data structure
+ */
+      (void) HPL_pdpanel_free( panel[0] );
+      HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[0] );
+/*
+ * Factor and broadcast current panel - update
+ */
+      HPL_pdfact(               panel[0] );
+      (void) HPL_binit(         panel[0] );
+      do
+      { (void) HPL_bcast(       panel[0], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(         panel[0] );
+      HPL_pdupdate( NULL, NULL, panel[0], -1 );
+/*
+ * Update message id for next factorization
+ */
+      tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Release panel resources and panel list
+ */
+   (void) HPL_pdpanel_disp( &panel[0] );
+
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesv0
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesvK1.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesvK1.c
new file mode 100644
index 000000000..ff1958cfc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesvK1.c
@@ -0,0 +1,222 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+#ifdef STDC_HEADERS
+void HPL_pdgesvK1
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesvK1
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesvK1 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with look-ahead.  The  lower  triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate;
+   int                        N, depth, icurcol=0, j, jb, jj=0, jstart,
+                              k, mycol, n, nb, nn, npcol, nq,
+                              tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   mycol = GRID->mycol; npcol        = GRID->npcol;
+   depth = ALGO->depth; HPL_pdupdate = ALGO->upfun;
+   N     = A->n;        nb           = A->nb; 
+
+   if( N <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+
+/*
+ * Allocate a panel list of length depth + 1 (depth >= 1)
+ */
+   panel = (HPL_T_panel **)malloc( (size_t)(depth+1)*sizeof( HPL_T_panel *) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesvK1", "Memory allocation failed" ); }
+/*
+ * Create and initialize the first depth panels
+ */
+   nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0;
+
+   for( k = 0; k < depth; k++ )
+   {
+      jb = Mmin( nn, nb );
+      HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart,
+                       tag, &panel[k] );
+      nn -= jb; jstart += jb;
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Initialize the lookahead - Factor jstart columns: panel[0..depth-1]
+ */
+   for( k = 0, j = 0; k < depth; k++ )
+   {
+      jb = jstart - j; jb = Mmin( jb, nb ); j += jb;
+/*
+ * Factor and broadcast k-th panel - use long topology for those
+ */
+      HPL_pdfact(         panel[k] );
+      (void) HPL_binit(   panel[k] );
+      do
+      { (void) HPL_bcast( panel[k], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(   panel[k] );
+/*
+ * Partial update of the depth-1-k panels in front of me
+ */
+      if( k < depth - 1 )
+      {
+         nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol );
+         HPL_pdupdate( NULL, NULL, panel[k], nn );
+      }
+   }
+/*
+ * Main loop over the remaining columns of A
+ */
+   for( j = jstart; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Allocate current panel resources - Finish latest update - Factor and
+ * broadcast current panel
+ */
+      HPL_pdpanel_new( GRID, ALGO, n, n+1, jb, A, j, j, tag, &panel[depth] );
+ 
+      if( mycol == icurcol )
+      {
+         nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol );
+         for( k = 0; k < depth; k++ )   /* partial updates 0..depth-1 */
+            HPL_pdupdate( NULL, NULL, panel[k], nn );
+         HPL_pdfact(    panel[depth] );       /* factor current panel */
+      }
+      else { nn = 0; }
+          /* Finish the latest update and broadcast the current panel */
+      (void) HPL_binit( panel[depth] );
+      HPL_pdupdate(     panel[depth], &test, panel[0], nq-nn );
+      (void) HPL_bwait( panel[depth] );
+/*
+ * Release latest panel resources - circular  of the panel pointers
+ * Go to the next process row and column -  update  the message ids  for
+ * broadcast
+ */
+      (void) HPL_pdpanel_disp( &panel[0] );
+      for( k = 0; k < depth; k++ ) panel[k] = panel[k+1];
+ 
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Clean-up: Finish updates - release panels and panel list
+ */
+   nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol );
+   for( k = 0; k < depth; k++ )
+   {
+      HPL_pdupdate( NULL, NULL, panel[k], nn );
+      (void) HPL_pdpanel_disp( &panel[k] );
+   }
+ 
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesvK1
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesvK2.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesvK2.c
new file mode 100644
index 000000000..dec506ab9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesvK2.c
@@ -0,0 +1,231 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdgesvK2
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   HPL_T_pmat *                     A
+)
+#else
+void HPL_pdgesvK2
+( GRID, ALGO, A )
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   HPL_T_pmat *                     A;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdgesvK2 factors a N+1-by-N matrix using LU factorization with row
+ * partial pivoting.  The main algorithm  is the "right looking" variant
+ * with look-ahead.  The  lower  triangular factor is left unpivoted and
+ * the pivots are not returned. The right hand side is the N+1 column of
+ * the coefficient matrix.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters.
+ *
+ * A       (local input/output)          HPL_T_pmat *
+ *         On entry, A points to the data structure containing the local
+ *         array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   HPL_T_panel                * p, * * panel = NULL;
+   HPL_T_UPD_FUN              HPL_pdupdate; 
+   int                        N, depth, icurcol=0, j, jb, jj=0, jstart,
+                              k, mycol, n, nb, nn, npcol, nq,
+                              tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING;
+#ifdef HPL_PROGRESS_REPORT
+   double start_time, time, gflops;
+#endif
+/* ..
+ * .. Executable Statements ..
+ */
+   mycol = GRID->mycol; npcol        = GRID->npcol;
+   depth = ALGO->depth; HPL_pdupdate = ALGO->upfun;
+   N     = A->n;        nb           = A->nb;
+
+   if( N <= 0 ) return;
+
+#ifdef HPL_PROGRESS_REPORT
+   start_time = HPL_timer_walltime();
+#endif
+
+/*
+ * Allocate a panel list of length depth + 1 (depth >= 1)
+ */
+   panel = (HPL_T_panel **)malloc( (size_t)(depth+1) * sizeof( HPL_T_panel *) );
+   if( panel == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdgesvK2", "Memory allocation failed" ); }
+/*
+ * Create and initialize the first depth panels
+ */
+   nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0;
+
+   for( k = 0; k < depth; k++ )
+   {
+      jb = Mmin( nn, nb );
+      HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart,
+                       tag, &panel[k] );
+      nn -= jb; jstart += jb;
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Create last depth+1 panel
+ */
+   HPL_pdpanel_new( GRID, ALGO, nn, nn+1, Mmin( nn, nb ), A, jstart,
+                    jstart, tag, &panel[depth] );
+   tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+/*
+ * Initialize the lookahead - Factor jstart columns: panel[0..depth-1]
+ */
+   for( k = 0, j = 0; k < depth; k++ )
+   {
+      jb = jstart - j; jb = Mmin( jb, nb ); j += jb;
+/*
+ * Factor and broadcast k-th panel
+ */
+      HPL_pdfact(         panel[k] );
+      (void) HPL_binit(   panel[k] );
+      do
+      { (void) HPL_bcast( panel[k], &test ); }
+      while( test != HPL_SUCCESS );
+      (void) HPL_bwait(   panel[k] );
+/*
+ * Partial update of the depth-k-1 panels in front of me
+ */
+      if( k < depth - 1 )
+      {
+         nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol );
+         HPL_pdupdate( NULL, NULL, panel[k], nn );
+      }
+   }
+/*
+ * Main loop over the remaining columns of A
+ */
+   for( j = jstart; j < N; j += nb )
+   {
+      n = N - j; jb = Mmin( n, nb );
+#ifdef HPL_PROGRESS_REPORT
+      /* if this is process 0,0 and not the first panel */
+      if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) 
+      {
+          time = HPL_timer_walltime() - start_time;
+          gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9;
+          HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops);
+      }
+#endif
+/*
+ * Initialize current panel - Finish latest update, Factor and broadcast
+ * current panel
+ */
+      (void) HPL_pdpanel_free( panel[depth] );
+      HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[depth] );
+
+      if( mycol == icurcol )
+      {
+         nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol );
+         for( k = 0; k < depth; k++ )   /* partial updates 0..depth-1 */
+            (void) HPL_pdupdate( NULL, NULL, panel[k], nn );
+         HPL_pdfact(       panel[depth] );    /* factor current panel */
+      }
+      else { nn = 0; }
+          /* Finish the latest update and broadcast the current panel */
+      (void) HPL_binit( panel[depth] );
+      HPL_pdupdate( panel[depth], &test, panel[0], nq-nn );
+      (void) HPL_bwait( panel[depth] );
+/*
+ * Circular  of the panel pointers:
+ * xtmp = x[0]; for( k=0; k < depth; k++ ) x[k] = x[k+1]; x[d] = xtmp;
+ *
+ * Go to next process row and column - update the message ids for broadcast
+ */
+      p = panel[0]; for( k = 0; k < depth; k++ ) panel[k] = panel[k+1];
+      panel[depth] = p;
+
+      if( mycol == icurcol ) { jj += jb; nq -= jb; }
+      icurcol = MModAdd1( icurcol, npcol );
+      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
+   }
+/*
+ * Clean-up: Finish updates - release panels and panel list
+ */
+   nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol );
+   for( k = 0; k < depth; k++ )
+   {
+      (void) HPL_pdupdate( NULL, NULL, panel[k], nn );
+      (void) HPL_pdpanel_disp(  &panel[k] );
+   }
+   (void) HPL_pdpanel_disp( &panel[depth] );
+
+   if( panel ) free( panel );
+/*
+ * End of HPL_pdgesvK2
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c
new file mode 100644
index 000000000..b4433e1be
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c
@@ -0,0 +1,432 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp00N
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp00N
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp00N applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * Bi-directional  exchange  is used to perform the  swap :: broadcast of
+ * the row  panel U at once, resulting in a lower number of messages than
+ * usual as well as a lower communication volume. With P process rows and
+ * assuming  bi-directional links,  the running time of this function can
+ * be approximated by:
+ *  
+ *    log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  Mono
+ * directional links will double this communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be broadcast and swapped) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                  comm;
+   HPL_T_grid                * grid;
+   double                    * A, * U, * W;
+   void                       * vptr = NULL;
+   int                       * ipID, * lindxA, * lindxAU, * llen,
+                             * llen_sv;
+   unsigned int              ip2, ip2_=1, ipdist, ipow=1, mask=1,
+                             mydist, mydis_;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, Np2, align,
+                             hdim, i, icurrow, *iflag, ipA, ipW, *ipl,
+                             iprow, jb, k, lda, ldW, myrow, n, nprow,
+                             partner, root, size_, usize;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+   n = Mmin( NN, PANEL->n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   grid  = PANEL->grid;    nprow   = grid->nprow; myrow = grid->myrow;
+   comm  = grid->col_comm; ip2     = (unsigned int)grid->row_ip2;
+   hdim  = grid->row_hdim; align   = PANEL->algo->align;
+   A     = PANEL->A;       U       = PANEL->U;    iflag = PANEL->IWORK;
+   lda   = PANEL->lda;     icurrow = PANEL->prow; usize = jb * n;
+   ldW   = n + 1;
+/*
+ * Allocate space for temporary W (ldW * jb)
+ */
+   vptr = (void*)malloc( 
+      ((size_t)(align) + ((size_t)(jb) * (size_t)(ldW))) * sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdlaswp00N", "Memory allocation failed" ); }
+
+   W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) );
+/*
+ * Construct ipID and its local counter parts lindxA, lindxAU -  llen is
+ * the number of rows/columns that I have in workspace and that I should
+ * send.  Compute  lindx_, ipA, llen if it has not already been done for
+ * this panel;
+ */
+   k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1;
+   lindxA  = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k;
+   llen    = lindxAU + k; llen_sv = llen + nprow;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+   else if( *iflag == 1 ) /* HPL_pdlaswp01N called before: reuse ipID */
+   {
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+/*
+ * Copy the llen_sv into llen - Reset ipA to its correct value
+ */
+   ipA = llen_sv[myrow];
+   for( i = 0; i < nprow; i++ ) { llen[i]  = llen_sv[i]; }
+/*
+ * For i in [0..2*jb),  lindxA[i] is the offset in A of a row that ulti-
+ * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ).  In icurrow,
+ * we directly pack into U, otherwise we pack into workspace. The  first
+ * entry of each column packed in workspace is in fact the row or column
+ * offset in U where it should go to.
+ */
+   if( myrow == icurrow ) 
+   {
+      HPL_dlaswp01N( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+   else
+   {
+      HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU );
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * Algorithm for bi-directional data exchange:
+ *
+ * As long as I have not talked to a process that  already  had the data
+ * from icurrow,  I will be sending the workspace,  otherwise  I will be
+ * sending U. Note that the columns in workspace contain the local index
+ * in U they should go to.
+ *
+ * If I am receiving from a process that  has the data from  icurrow,  I
+ * will be receiving in  U, copy the data of  U  that stays into  A, and
+ * then the columns I have in workspace into U; otherwise  I will be re-
+ * ceiving in the remaining workspace.  If I am one  of  those processes 
+ * that already has the data from icurrow, I will be immediately copying
+ * the data I have in my workspace into U.
+ *
+ * When I receive U, some of U should be copied in my piece of A  before
+ * I can copy the rows I have in my workspace into  U.  This information
+ * is kept in the lists  lindx_:  the row lindxAU[i] should be copied in
+ * the row  lindxA[i] of my piece of  A, just as in the reversed initial
+ * packing operation. Those rows are thus the first ones in the work ar-
+ * ray.  After  this  operation  has  been  performed,  I will not  need
+ * those lindx arrays,  and  I  will  always be sending a buffer of size
+ * jb x n, or n x jb, that is, U.
+ *
+ * At  every  step  of  the algorithm, it is necesary to update the list 
+ * llen,  so that I can figure out how large the next messages I will be
+ * sending/receiving are.  It is  obvious when I am sending U. It is not
+ * otherwise.
+ *
+ * We  choose  icurrow  to be the source of the bi-directional exchange.
+ * This allows the processes in the non-power 2 part to receive U at the
+ * first exchange,  and  then  broadcast internally this U so that those 
+ * processes can grab their piece of A.
+ */
+   if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; }
+   ipW    = ipA;
+   Np2    = ( ( size_ = nprow - ip2 ) != 0 );
+   mydist = (unsigned int)MModSub( myrow, icurrow, nprow );
+/*
+ * bi-directional exchange:   If nprow is not a power of 2,  proc[i-ip2]
+ * receives local data from proc[i] for all i in  [ip2..nprow);  icurrow
+ * is the source, these last process indexes are relative to icurrow.
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+
+      if( mydist == 0 )  /* I am the current row: I send U and recv W */
+      {
+         (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW,
+                          Cmsgid, partner, comm );
+         if( llen[partner] > 0 )
+            HPL_dlaswp03N( llen[partner], n, U, LDU, W, W+1, ldW );
+      }
+      else if( mydist == ip2 )
+      {                      /* I recv U for later Bcast, I send my W */
+         (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                          Cmsgid, partner, comm );
+      }
+      else               /* None of us is icurrow, we exchange our Ws */
+      {
+         if( ( mydist & ip2 ) != 0 ) 
+         {
+            (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm );
+         }
+         else
+         {
+            (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW,
+                             partner, Cmsgid, comm );
+            if( llen[partner] > 0 ) ipW += llen[partner];
+         }
+      }
+   }
+/*
+ * Update llen
+ */
+   for( i = 1; i < size_; i++ )
+   {
+      iprow   = MModAdd( icurrow, i,          nprow );
+      partner = MModAdd( iprow,   (int)(ip2), nprow );
+      llen[ iprow ] += llen[ partner ];
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * power of 2 part of the processes collection:  only processes [0..ip2)
+ * are working;  some of them  (mydist >> (k+1) == 0) either send or re-
+ * ceive U.  At every step k, k is in [0 .. hdim),  of the algorithm,  a
+ * process pair that exchanges  U  is such that  (mydist >> (k+1) == 0).
+ * Among  those  processes,  the  ones  that are sending U are such that 
+ * mydist >> k == 0.
+ */
+   if( mydist < ip2 )
+   {
+      k = 0;
+
+      while( k < hdim )
+      {
+         partner = (int)(mydist ^ ipow);
+         partner = MModAdd( icurrow, partner, nprow );
+/*
+ * Exchange and combine the local results - If I receive U,  then I must
+ * copy from U the rows that belong to my piece of A, and then update  U
+ * by  copying in it the rows I have accumulated in W.  Otherwise, I re-
+ * ceive W.  In this later case, and I have U, I shall update my copy of
+ * U by copying in it the rows I have accumulated in  W.  If  I  did not
+ * have U before, I simply need to update my pointer in W for later use.
+ */
+         if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 )
+         {
+            if( ( mydist >> (unsigned int)(k) ) == 0 )
+            {
+               (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW,
+                                ldW ), llen[partner]*ldW, Cmsgid,
+                                partner, comm );
+               HPL_dlaswp03N( llen[partner], n, U, LDU, Mptr( W, 0, ipW,
+                              ldW ), Mptr( W, 1, ipW, ldW ), ldW );
+               ipW += llen[partner];
+            }
+            else
+            {
+               (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                                Cmsgid, partner, comm );
+               HPL_dlaswp04N( ipA, llen[myrow], n, U, LDU, A, lda, W,
+                              W+1, ldW, lindxA, lindxAU );
+            }
+         }
+         else
+         {
+            (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0,
+                             ipW, ldW ), llen[partner]*ldW, Cmsgid,
+                             partner, comm );
+            ipW += llen[partner];
+         }
+/*
+ * Update llen - Go to next process pairs
+ */
+         iprow = icurrow; ipdist = 0;
+         do
+         {
+            if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist )
+            {
+               partner = MModAdd( icurrow, partner, nprow );
+               llen[iprow]  += llen[partner];
+               llen[partner] = llen[iprow];
+            }
+            iprow = MModAdd( iprow, 1, nprow ); ipdist++;
+
+         } while( ipdist < ip2 );
+
+         ipow <<= 1; k++;
+/*
+ * Probe for column panel - forward it when available 
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+      }
+   }
+   else
+   {
+/*
+ * non power of 2 part of the process collection:  proc[ip2] broadcast U
+ * to procs[ip2..nprow) (relatively to icurrow).
+ */
+      if( size_ > 1 )
+      {
+         k = size_ - 1;
+         while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+         root   = MModAdd( icurrow, (int)(ip2), nprow );
+         mydis_ = (unsigned int)MModSub( myrow,  root, nprow );
+
+         do
+         {
+            mask ^= ip2_;
+            if( ( mydis_ & mask ) == 0 )
+            {
+               partner = (int)(mydis_ ^ ip2_);
+               if( ( mydis_ & ip2_ ) != 0 )
+               {
+                  (void) HPL_recv( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+
+               }
+               else if( partner < size_ )
+               {
+                  (void) HPL_send( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+               }
+            }
+            ip2_ >>= 1;
+/*
+ * Probe for column panel - forward it when available 
+ */
+            if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+         } while( ip2_ > 0 );
+      }
+/*
+ * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece
+ * of A.
+ */
+      HPL_dlaswp05N( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+/*
+ * If  nprow  is not a power of 2,  proc[i-ip2]  sends  global result to
+ * proc[i] for all i in [ip2..nprow);
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+      if( ( mydist & ip2 ) != 0 )
+      { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); }
+      else
+      { (void) HPL_send( U, usize, partner, Cmsgid, comm ); }
+   }
+
+   if( vptr ) free( vptr );
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp00N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c
new file mode 100644
index 000000000..7a9764c09
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c
@@ -0,0 +1,433 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp00T
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp00T
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp00T applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * Bi-directional  exchange  is used to perform the  swap :: broadcast of
+ * the row  panel U at once, resulting in a lower number of messages than
+ * usual as well as a lower communication volume. With P process rows and
+ * assuming  bi-directional links,  the running time of this function can
+ * be approximated by:
+ *  
+ *    log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  Mono
+ * directional links will double this communication cost.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be broadcast and swapped) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                  comm;
+   HPL_T_grid                * grid;
+   double                    * A, * U, * W;
+   void                       * vptr = NULL;
+   int                       * ipID, * lindxA, * lindxAU, * llen,
+                             * llen_sv;
+   unsigned int              ip2, ip2_=1, ipdist, ipow=1, mask=1,
+                             mydist, mydis_;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, Np2, align,
+                             hdim, i, icurrow, *iflag, ipA, ipW, *ipl,
+                             iprow, jb, k, lda, ldW, myrow, n, nprow,
+                             partner, root, size_, usize;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+   n = Mmin( NN, PANEL->n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   grid  = PANEL->grid;    nprow   = grid->nprow; myrow = grid->myrow;
+   comm  = grid->col_comm; ip2     = (unsigned int)grid->row_ip2;
+   hdim  = grid->row_hdim; align   = PANEL->algo->align;
+   A     = PANEL->A;       U       = PANEL->U;    iflag = PANEL->IWORK;
+   lda   = PANEL->lda;     icurrow = PANEL->prow; usize = jb * n;
+   ldW   = n + 1;
+/*
+ * Allocate space for temporary W (ldW * jb)
+ */
+   vptr = (void*)malloc( ( (size_t)(align) + 
+                           ((size_t)(jb) * (size_t)(ldW))) * 
+                           sizeof(double) );
+   if( vptr == NULL )
+   { HPL_pabort( __LINE__, "HPL_pdlaswp00T", "Memory allocation failed" ); }
+
+   W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) );
+/*
+ * Construct ipID and its local counter parts lindxA, lindxAU -  llen is
+ * the number of rows/columns that I have in workspace and that I should
+ * send.  Compute  lindx_, ipA, llen if it has not already been done for
+ * this panel;
+ */
+   k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1;
+   lindxA  = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k;
+   llen    = lindxAU + k; llen_sv = llen + nprow;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+   else if( *iflag == 1 ) /* HPL_pdlaswp01T called before: reuse ipID */
+   {
+      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
+      *iflag = 0;
+   }
+/*
+ * Copy the llen_sv into llen - Reset ipA to its correct value
+ */
+   ipA = llen_sv[myrow];
+   for( i = 0; i < nprow; i++ ) { llen[i]  = llen_sv[i]; }
+/*
+ * For i in [0..2*jb),  lindxA[i] is the offset in A of a row that ulti-
+ * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ).  In icurrow,
+ * we directly pack into U, otherwise we pack into workspace. The  first
+ * entry of each column packed in workspace is in fact the row or column
+ * offset in U where it should go to.
+ */
+   if( myrow == icurrow ) 
+   {
+      HPL_dlaswp01T( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+   else
+   {
+      HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU );
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * Algorithm for bi-directional data exchange:
+ *
+ * As long as I have not talked to a process that  already  had the data
+ * from icurrow,  I will be sending the workspace,  otherwise  I will be
+ * sending U. Note that the columns in workspace contain the local index
+ * in U they should go to.
+ *
+ * If I am receiving from a process that  has the data from  icurrow,  I
+ * will be receiving in  U, copy the data of  U  that stays into  A, and
+ * then the columns I have in workspace into U; otherwise  I will be re-
+ * ceiving in the remaining workspace.  If I am one  of  those processes 
+ * that already has the data from icurrow, I will be immediately copying
+ * the data I have in my workspace into U.
+ *
+ * When I receive U, some of U should be copied in my piece of A  before
+ * I can copy the rows I have in my workspace into  U.  This information
+ * is kept in the lists  lindx_:  the row lindxAU[i] should be copied in
+ * the row  lindxA[i] of my piece of  A, just as in the reversed initial
+ * packing operation. Those rows are thus the first ones in the work ar-
+ * ray.  After  this  operation  has  been  performed,  I will not  need
+ * those lindx arrays,  and  I  will  always be sending a buffer of size
+ * jb x n, or n x jb, that is, U.
+ *
+ * At  every  step  of  the algorithm, it is necesary to update the list 
+ * llen,  so that I can figure out how large the next messages I will be
+ * sending/receiving are.  It is  obvious when I am sending U. It is not
+ * otherwise.
+ *
+ * We  choose  icurrow  to be the source of the bi-directional exchange.
+ * This allows the processes in the non-power 2 part to receive U at the
+ * first exchange,  and  then  broadcast internally this U so that those 
+ * processes can grab their piece of A.
+ */
+   if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; }
+   ipW    = ipA;
+   Np2    = ( ( size_ = nprow - ip2 ) != 0 );
+   mydist = (unsigned int)MModSub( myrow, icurrow, nprow );
+/*
+ * bi-directional exchange:   If nprow is not a power of 2,  proc[i-ip2]
+ * receives local data from proc[i] for all i in  [ip2..nprow);  icurrow
+ * is the source, these last process indexes are relative to icurrow.
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+
+      if( mydist == 0 )  /* I am the current row: I send U and recv W */
+      {
+         (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW,
+                          Cmsgid, partner, comm );
+         if( llen[partner] > 0 )
+            HPL_dlaswp03T( llen[partner], n, U, LDU, W, W+1, ldW );
+      }
+      else if( mydist == ip2 )
+      {                      /* I recv U for later Bcast, I send my W */
+         (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                          Cmsgid, partner, comm );
+      }
+      else               /* None of us is icurrow, we exchange our Ws */
+      {
+         if( ( mydist & ip2 ) != 0 ) 
+         {
+            (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm );
+         }
+         else
+         {
+            (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW,
+                             partner, Cmsgid, comm );
+            if( llen[partner] > 0 ) ipW += llen[partner];
+         }
+      }
+   }
+/*
+ * Update llen
+ */
+   for( i = 1; i < size_; i++ )
+   {
+      iprow   = MModAdd( icurrow, i,          nprow );
+      partner = MModAdd( iprow,   (int)(ip2), nprow );
+      llen[ iprow ] += llen[ partner ];
+   }
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+/*
+ * power of 2 part of the processes collection:  only processes [0..ip2)
+ * are working;  some of them  (mydist >> (k+1) == 0) either send or re-
+ * ceive U.  At every step k, k is in [0 .. hdim),  of the algorithm,  a
+ * process pair that exchanges  U  is such that  (mydist >> (k+1) == 0).
+ * Among  those  processes,  the  ones  that are sending U are such that 
+ * mydist >> k == 0.
+ */
+   if( mydist < ip2 )
+   {
+      k = 0;
+
+      while( k < hdim )
+      {
+         partner = (int)(mydist ^ ipow);
+         partner = MModAdd( icurrow, partner, nprow );
+/*
+ * Exchange and combine the local results - If I receive U,  then I must
+ * copy from U the rows that belong to my piece of A, and then update  U
+ * by  copying in it the rows I have accumulated in W.  Otherwise, I re-
+ * ceive W.  In this later case, and I have U, I shall update my copy of
+ * U by copying in it the rows I have accumulated in  W.  If  I  did not
+ * have U before, I simply need to update my pointer in W for later use.
+ */
+         if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 )
+         {
+            if( ( mydist >> (unsigned int)(k) ) == 0 )
+            {
+               (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW,
+                                ldW ), llen[partner]*ldW, Cmsgid,
+                                partner, comm );
+               HPL_dlaswp03T( llen[partner], n, U, LDU, Mptr( W, 0, ipW,
+                              ldW ), Mptr( W, 1, ipW, ldW ), ldW );
+               ipW += llen[partner];
+            }
+            else
+            {
+               (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
+                                Cmsgid, partner, comm );
+               HPL_dlaswp04T( ipA, llen[myrow], n, U, LDU, A, lda, W,
+                              W+1, ldW, lindxA, lindxAU );
+            }
+         }
+         else
+         {
+            (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0,
+                             ipW, ldW ), llen[partner]*ldW, Cmsgid,
+                             partner, comm );
+            ipW += llen[partner];
+         }
+/*
+ * Update llen - Go to next process pairs
+ */
+         iprow = icurrow; ipdist = 0;
+         do
+         {
+            if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist )
+            {
+               partner = MModAdd( icurrow, partner, nprow );
+               llen[iprow]  += llen[partner];
+               llen[partner] = llen[iprow];
+            }
+            iprow = MModAdd( iprow, 1, nprow ); ipdist++;
+
+         } while( ipdist < ip2 );
+
+         ipow <<= 1; k++;
+/*
+ * Probe for column panel - forward it when available 
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+      }
+   }
+   else
+   {
+/*
+ * non power of 2 part of the process collection:  proc[ip2] broadcast U
+ * to procs[ip2..nprow) (relatively to icurrow).
+ */
+      if( size_ > 1 )
+      {
+         k = size_ - 1;
+         while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
+         root   = MModAdd( icurrow, (int)(ip2), nprow );
+         mydis_ = (unsigned int)MModSub( myrow,  root, nprow );
+
+         do
+         {
+            mask ^= ip2_;
+            if( ( mydis_ & mask ) == 0 )
+            {
+               partner = (int)(mydis_ ^ ip2_);
+               if( ( mydis_ & ip2_ ) != 0 )
+               {
+                  (void) HPL_recv( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+
+               }
+               else if( partner < size_ )
+               {
+                  (void) HPL_send( U, usize, MModAdd( root, partner,
+                                   nprow ), Cmsgid, comm );
+               }
+            }
+            ip2_ >>= 1;
+/*
+ * Probe for column panel - forward it when available 
+ */
+            if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+         } while( ip2_ > 0 );
+      }
+/*
+ * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece
+ * of A.
+ */
+      HPL_dlaswp05T( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
+   }
+/*
+ * If  nprow  is not a power of 2,  proc[i-ip2]  sends  global result to
+ * proc[i] for all i in [ip2..nprow);
+ */
+   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
+   {
+      partner = MModAdd( icurrow, partner, nprow );
+      if( ( mydist & ip2 ) != 0 )
+      { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); }
+      else
+      { (void) HPL_send( U, usize, partner, Cmsgid, comm ); }
+   }
+
+   if( vptr ) free( vptr );
+/*
+ * Probe for column panel - forward it when available 
+ */
+   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp00T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c
new file mode 100644
index 000000000..31f219840
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c
@@ -0,0 +1,217 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp01N
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp01N
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp01N applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+ * row panel U at once,  resulting in a minimal communication volume  and
+ * a "very good"  use of the connectivity if available.  With  P  process
+ * rows  and  assuming  bi-directional links,  the  running time  of this
+ * function can be approximated by:
+ *  
+ *    (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  K is
+ * a constant in (2,3] that depends on the achieved bandwidth  during  a
+ * simultaneous  message exchange  between two processes.  An  empirical
+ * optimistic value of K is typically 2.4.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * A, * U;
+   int                       * ipID, * iplen, * ipmap, * ipmapm1,
+                             * iwork, * lindxA = NULL, * lindxAU,
+                             * permU;
+   static int                equil=-1;
+   int                       icurrow, * iflag, * ipA, * ipl, jb, k,
+                             lda, myrow, n, nprow;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+   n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Decide whether equilibration should be performed or not
+ */
+   if( equil == -1 ) equil = PANEL->algo->equil;
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow;
+   A     = PANEL->A;   U       = PANEL->U;     iflag  = PANEL->IWORK;
+   lda   = PANEL->lda; icurrow = PANEL->prow;
+/*
+ * Compute ipID (if not already done for this panel). lindxA and lindxAU
+ * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1
+ * are of size nprow,  permU is of length jb, and  this function needs a 
+ * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): 
+ * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1)
+ * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1);
+ */
+   k = (int)((unsigned int)(jb) << 1);  ipl = iflag + 1; ipID = ipl + 1;
+   ipA     = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1;
+   lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1;
+   ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( *iflag == 0 ) /* HPL_pdlaswp00N called before: reuse ipID */
+   {
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( ( *iflag == 1 ) && ( equil != 0 ) )
+   {   /* HPL_pdlaswp01N was call before only re-compute IPLEN, IPMAP */
+      HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 );
+      *iflag = 1;
+   }
+/*
+ * Copy into U the rows to be spread (local to icurrow)
+ */
+   if( myrow == icurrow )
+   { HPL_dlaswp01N( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); }
+/*
+ * Spread U - optionally probe for column panel
+ */
+   HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen,
+                ipmap, ipmapm1 );
+/*
+ * Local exchange (everywhere but in process row icurrow)
+ */
+   if( myrow != icurrow )
+   {
+      k = ipmapm1[myrow];
+      HPL_dlaswp06N( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, iplen[k],
+                     0, LDU ), LDU, lindxA );
+   }
+/*
+ * Equilibration
+ */
+   if( equil != 0 )
+      HPL_equil( PBCST, IFLAG, PANEL, HplNoTrans, n, U, LDU, iplen,
+                 ipmap, ipmapm1, iwork );
+/*
+ * Rolling phase
+ */
+   HPL_rollN( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 );
+/*
+ * Permute U in every process row
+ */
+   HPL_dlaswp00N( jb, n, U, LDU, permU );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp01N
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c
new file mode 100644
index 000000000..0c4de2669
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c
@@ -0,0 +1,217 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdlaswp01T
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdlaswp01T
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdlaswp01T applies the  NB  row interchanges to  NN columns of the
+ * trailing submatrix and broadcast a column panel.
+ *  
+ * A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+ * row panel U at once,  resulting in a minimal communication volume  and
+ * a "very good"  use of the connectivity if available.  With  P  process
+ * rows  and  assuming  bi-directional links,  the  running time  of this
+ * function can be approximated by:
+ *  
+ *    (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ *  
+ * where  NB  is the number of rows of the row panel U,  N is the global
+ * number of columns being updated,  lat and bdwth  are the latency  and
+ * bandwidth  of  the  network  for  double  precision real words.  K is
+ * a constant in (2,3] that depends on the achieved bandwidth  during  a
+ * simultaneous  message exchange  between two processes.  An  empirical
+ * optimistic value of K is typically 2.4.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to  be swapped and broadcast starting at
+ *         the current position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * A, * U;
+   int                       * ipID, * iplen, * ipmap, * ipmapm1,
+                             * iwork, * lindxA = NULL, * lindxAU,
+                             * permU;
+   static int                equil=-1;
+   int                       icurrow, * iflag, * ipA, * ipl, jb, k,
+                             lda, myrow, n, nprow;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+   n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb;
+/*
+ * Quick return if there is nothing to do
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * Decide whether equilibration should be performed or not
+ */
+   if( equil == -1 ) equil = PANEL->algo->equil;
+/*
+ * Retrieve parameters from the PANEL data structure
+ */
+   nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow;
+   A     = PANEL->A;   U       = PANEL->U;     iflag  = PANEL->IWORK;
+   lda   = PANEL->lda; icurrow = PANEL->prow;
+/*
+ * Compute ipID (if not already done for this panel). lindxA and lindxAU
+ * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1
+ * are of size nprow,  permU is of length jb, and  this function needs a 
+ * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): 
+ * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1)
+ * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1);
+ */
+   k = (int)((unsigned int)(jb) << 1);  ipl = iflag + 1; ipID = ipl + 1;
+   ipA     = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1;
+   lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1;
+   ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb;
+
+   if( *iflag == -1 )    /* no index arrays have been computed so far */
+   {
+      HPL_pipid(   PANEL,  ipl, ipID );
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( *iflag == 0 ) /* HPL_pdlaswp00T called before: reuse ipID */
+   {
+      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
+                   ipmap, ipmapm1, permU, iwork );
+      *iflag = 1;
+   }
+   else if( ( *iflag == 1 ) && ( equil != 0 ) )
+   {   /* HPL_pdlaswp01T was call before only re-compute IPLEN, IPMAP */
+      HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 );
+      *iflag = 1;
+   }
+/*
+ * Copy into U the rows to be spread (local to icurrow)
+ */
+   if( myrow == icurrow )
+   { HPL_dlaswp01T( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); }
+/*
+ * Spread U - optionally probe for column panel
+ */
+   HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen,
+                ipmap, ipmapm1 );
+/*
+ * Local exchange (everywhere but in process row icurrow)
+ */
+   if( myrow != icurrow )
+   {
+      k = ipmapm1[myrow];
+      HPL_dlaswp06T( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, 0,
+                     iplen[k], LDU ), LDU, lindxA );
+   }
+/*
+ * Equilibration
+ */
+   if( equil != 0 )
+      HPL_equil( PBCST, IFLAG, PANEL, HplTrans, n, U, LDU, iplen, ipmap,
+                 ipmapm1, iwork );
+/*
+ * Rolling phase
+ */
+   HPL_rollT( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 );
+/*
+ * Permute U in every process row
+ */
+   HPL_dlaswp10N( n, jb, U, LDU, permU );
+
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_LASWP );
+#endif
+/*
+ * End of HPL_pdlaswp01T
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdtrsv.c
new file mode 100644
index 000000000..d2135130a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdtrsv.c
@@ -0,0 +1,296 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdtrsv
+(
+   HPL_T_grid *                     GRID,
+   HPL_T_pmat *                     AMAT
+)
+#else
+void HPL_pdtrsv
+( GRID, AMAT )
+   HPL_T_grid *                     GRID;
+   HPL_T_pmat *                     AMAT;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdtrsv solves an upper triangular system of linear equations.
+ *  
+ * The rhs is the last column of the N by N+1 matrix A. The solve starts
+ * in the process  column owning the  Nth  column of A, so the rhs b may
+ * need to be moved one process column to the left at the beginning. The
+ * routine therefore needs  a column  vector in every process column but
+ * the one owning  b. The result is  replicated in all process rows, and
+ * returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes.
+ *  
+ * The algorithm uses decreasing one-ring broadcast in process rows  and
+ * columns  implemented  in terms of  synchronous communication point to
+ * point primitives.  The  lookahead of depth 1 is used to minimize  the
+ * critical path. This entire operation is essentially ``latency'' bound
+ * and an estimate of its running time is given by:
+ *  
+ *    (move rhs) lat + N / ( P bdwth ) +            
+ *    (solve)    ((N / NB)-1) 2 (lat + NB / bdwth) +
+ *               gam2 N^2 / ( P Q ),                
+ *  
+ * where  gam2   is an estimate of the   Level 2 BLAS rate of execution.
+ * There are  N / NB  diagonal blocks. One must exchange  2  messages of
+ * length NB to compute the next  NB  entries of the vector solution, as
+ * well as performing a total of N^2 floating point operations.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * AMAT    (local input/output)          HPL_T_pmat *
+ *         On entry,  AMAT  points  to the data structure containing the
+ *         local array information.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Comm                   Ccomm, Rcomm;
+   double                     * A=NULL, * Aprev=NULL, * Aptr, * XC=NULL,
+                              * XR=NULL, * Xd=NULL, * Xdprev=NULL,
+                              * W=NULL;
+   int                        Alcol, Alrow, Anpprev, Anp, Anq, Bcol,
+                              Cmsgid, GridIsNotPx1, GridIsNot1xQ, Rmsgid,
+                              Wfr=0, colprev, kb, kbprev, lda, mycol,
+                              myrow, n, n1, n1p, n1pprev=0, nb, npcol,
+                              nprow, rowprev, tmp1, tmp2;
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PTRSV );
+#endif
+   if( ( n = AMAT->n ) <= 0 ) return;
+   nb = AMAT->nb; lda = AMAT->ld; A = AMAT->A; XR = AMAT->X;
+
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+   Rcomm = GRID->row_comm; Rmsgid = MSGID_BEGIN_PTRSV;
+   Ccomm = GRID->col_comm; Cmsgid = MSGID_BEGIN_PTRSV + 1;
+   GridIsNot1xQ = ( nprow > 1 ); GridIsNotPx1 = ( npcol > 1 );
+/*
+ * Move the rhs in the process column owning the last column of A.
+ */
+   Mnumroc( Anp, n, nb, nb, myrow, 0, nprow );
+   Mnumroc( Anq, n, nb, nb, mycol, 0, npcol );
+
+   tmp1  = ( n - 1 ) / nb;
+   Alrow = tmp1 - ( tmp1 / nprow ) * nprow;
+   Alcol = tmp1 - ( tmp1 / npcol ) * npcol;
+   kb    = n    - tmp1 * nb;
+
+   Aptr = (double *)(A); XC = Mptr( Aptr, 0, Anq, lda );
+   Mindxg2p( n, nb, nb, Bcol, 0, npcol );
+
+   if( ( Anp > 0 ) && ( Alcol != Bcol ) )
+   {
+      if( mycol == Bcol  )
+      { (void) HPL_send( XC, Anp, Alcol, Rmsgid, Rcomm ); }
+      else if( mycol == Alcol )
+      { (void) HPL_recv( XC, Anp, Bcol,  Rmsgid, Rcomm ); }
+   }
+   Rmsgid = ( Rmsgid + 2 >
+              MSGID_END_PTRSV ? MSGID_BEGIN_PTRSV : Rmsgid + 2 );
+   if( mycol != Alcol )
+   { for( tmp1=0; tmp1 < Anp; tmp1++ ) XC[tmp1] = HPL_rzero; }
+/*
+ * Set up lookahead
+ */
+   n1 = ( npcol - 1 ) * nb; n1 = Mmax( n1, nb );
+   if( Anp > 0 )
+   {
+      W = (double*)malloc( (size_t)(Mmin( n1, Anp )) * sizeof( double ) );
+      if( W == NULL )
+      { HPL_pabort( __LINE__, "HPL_pdtrsv", "Memory allocation failed" ); }
+      Wfr = 1;
+   }
+
+   Anpprev = Anp; Xdprev = XR; Aprev = Aptr = Mptr( Aptr, 0, Anq, lda );
+   tmp1    = n - kb; tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+   MnumrocI( n1pprev, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+
+   if( myrow == Alrow ) { Anpprev = ( Anp -= kb ); }
+   if( mycol == Alcol )
+   {
+      Aprev = ( Aptr -= lda * kb ); Anq -= kb; Xdprev = ( Xd = XR + Anq );
+      if( myrow == Alrow )
+      {
+         HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit,
+                    kb, Aptr+Anp, lda, XC+Anp, 1 );
+         HPL_dcopy( kb, XC+Anp, 1, Xd, 1 );
+      }
+   }
+
+   rowprev = Alrow; Alrow = MModSub1( Alrow, nprow );
+   colprev = Alcol; Alcol = MModSub1( Alcol, npcol );
+   kbprev  = kb; n -= kb;
+   tmp1    = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+   MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+/*
+ * Start the operations
+ */
+   while( n > 0 )
+   {
+      if( mycol == Alcol ) { Aptr -= lda * kb; Anq -= kb; Xd = XR + Anq; }
+      if( myrow == Alrow ) { Anp -= kb; }
+/*
+ * Broadcast  (decreasing-ring)  of  previous solution block in previous
+ * process column,  compute  partial update of current block and send it
+ * to current process column.
+ */
+      if( mycol == colprev )
+      {
+/*
+ * Send previous solution block in process row above
+ */
+         if( myrow == rowprev )
+         {
+            if( GridIsNot1xQ )
+               (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ),
+                                Cmsgid, Ccomm );
+         }
+         else
+         {
+            (void) HPL_recv( Xdprev, kbprev, MModAdd1( myrow, nprow ),
+                             Cmsgid, Ccomm );
+         } 
+/*
+ * Compute partial update of previous solution block and send it to cur-
+ * rent column
+ */
+         if( n1pprev > 0 )
+         {
+            tmp1 = Anpprev - n1pprev;
+            HPL_dgemv( HplColumnMajor, HplNoTrans, n1pprev, kbprev,
+                       -HPL_rone, Aprev+tmp1, lda, Xdprev, 1, HPL_rone,
+                       XC+tmp1, 1 );
+            if( GridIsNotPx1 )
+               (void) HPL_send( XC+tmp1, n1pprev, Alcol, Rmsgid, Rcomm );
+         }
+/*
+ * Finish  the (decreasing-ring) broadcast of the solution block in pre-
+ * vious process column
+ */
+         if( ( myrow != rowprev ) &&
+             ( myrow != MModAdd1( rowprev, nprow ) ) )
+            (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ),
+                             Cmsgid, Ccomm );
+      }
+      else if( mycol == Alcol )
+      {
+/*
+ * Current  column  receives  and accumulates partial update of previous
+ * solution block
+ */
+         if( n1pprev > 0 )
+         {
+            (void) HPL_recv( W, n1pprev, colprev, Rmsgid, Rcomm );
+            HPL_daxpy( n1pprev, HPL_rone, W, 1, XC+Anpprev-n1pprev, 1 );
+         }
+      }
+/*
+ * Solve current diagonal block 
+ */
+      if( ( mycol == Alcol ) && ( myrow == Alrow ) )
+      {
+         HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit,
+                    kb, Aptr+Anp, lda, XC+Anp, 1 );
+         HPL_dcopy( kb, XC+Anp, 1, XR+Anq, 1 );
+      }
+/*
+*  Finish previous update
+*/
+      if( ( mycol == colprev ) && ( ( tmp1 = Anpprev - n1pprev ) > 0 ) )
+         HPL_dgemv( HplColumnMajor, HplNoTrans, tmp1, kbprev, -HPL_rone,
+                    Aprev, lda, Xdprev, 1, HPL_rone, XC, 1 );
+/*
+*  Save info of current step and update info for the next step
+*/
+      if( mycol == Alcol ) { Xdprev   = Xd; Aprev = Aptr; }
+      if( myrow == Alrow ) { Anpprev -= kb; }
+      rowprev = Alrow; colprev = Alcol;
+      n1pprev = n1p;   kbprev  = kb; n -= kb;
+      Alrow = MModSub1( Alrow, nprow ); Alcol = MModSub1( Alcol, npcol );
+      tmp1  = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) );
+      MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow );
+
+      Rmsgid = ( Rmsgid+2 > MSGID_END_PTRSV ? 
+                 MSGID_BEGIN_PTRSV   : Rmsgid+2 );
+      Cmsgid = ( Cmsgid+2 > MSGID_END_PTRSV ?
+                 MSGID_BEGIN_PTRSV+1 : Cmsgid+2 );
+   }
+/*
+ * Replicate last solution block
+ */
+   if( mycol == colprev )
+      (void) HPL_broadcast( (void *)(XR), kbprev, HPL_DOUBLE, rowprev,
+                            Ccomm );
+
+   if( Wfr  ) free( W  );
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_PTRSV );
+#endif
+/*
+ * End of HPL_pdtrsv
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateNN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateNN.c
new file mode 100644
index 000000000..7e31ddcd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateNN.c
@@ -0,0 +1,442 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateNN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateNN
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateNN broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d( PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,                n );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, 0, nn, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateNN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateNT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateNT.c
new file mode 100644
index 000000000..faa3ef207
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateNT.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateNT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateNT
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateNT broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */ 
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,               jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, nn, 0, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateNT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateTN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateTN.c
new file mode 100644
index 000000000..a16aa26a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateTN.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateTN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateTN
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateTN broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  jb
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,                n );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, 0, nn, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplLeft,  HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateTN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateTT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateTT.c
new file mode 100644
index 000000000..81e6cc4b7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateTT.c
@@ -0,0 +1,443 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdupdateTT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        NN
+)
+#else
+void HPL_pdupdateTT
+( PBCST, IFLAG, PANEL, NN )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        NN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdupdateTT broadcast - forward the panel PBCST and simultaneously
+ * applies the row interchanges and updates part of the trailing  (using
+ * the panel PANEL) submatrix.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local output)                int *
+ *         On exit,  IFLAG  indicates  whether or not  the broadcast has
+ *         been completed when PBCST is not NULL on entry. In that case,
+ *         IFLAG is left unchanged.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be updated) information.
+ *
+ * NN      (local input)                 const int
+ *         On entry, NN specifies  the  local  number  of columns of the
+ *         trailing  submatrix  to be updated  starting  at the  current
+ *         position. NN must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
+   int                       * ipiv;
+#ifdef HPL_CALL_VSIPL
+   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
+#endif
+   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
+                             nq0, nn, test;
+   static int                tswap = 0;
+   static HPL_T_SWAP         fswap = HPL_NO_SWP;
+#define LDU                  n
+/* ..
+ * .. Executable Statements ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
+   if( NN >= 0 ) n = Mmin( NN, n );
+/*
+ * There is nothing to update, enforce the panel broadcast.
+ */
+   if( ( n <= 0 ) || ( jb <= 0 ) )
+   {
+      if( PBCST != NULL )
+      {
+         do { (void) HPL_bcast( PBCST, IFLAG ); }
+         while( *IFLAG != HPL_SUCCESS );
+      }
+#ifdef HPL_DETAILED_TIMING
+      HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+      return;
+   }
+/*
+ * Enable/disable the column panel probing mechanism
+ */
+   (void) HPL_bcast( PBCST, &test );
+/*
+ * 1 x Q case
+ */
+   if( PANEL->grid->nprow == 1 )
+   {
+      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
+      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
+      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
+/*
+ * So far we have not updated anything -  test availability of the panel
+ * to be forwarded - If detected forward it and finish the update in one
+ * step.
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+/*
+ * Update nb columns at a time
+ */
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+#ifdef HPL_DETAILED_TIMING
+         HPL_ptimer( HPL_TIMING_LASWP );
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+         HPL_ptimer( HPL_TIMING_LASWP );
+#else
+         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
+#endif
+         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
+                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
+         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
+                      HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+         (void) vsip_mdestroy_d( Av1 );
+         (void) vsip_mdestroy_d( Uv1 );
+#else
+         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
+                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
+                    Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+   else                        /* nprow > 1 ... */
+   {
+/*
+ * Selection of the swapping algorithm - swap:broadcast U.
+ */
+      if( fswap == HPL_NO_SWP )
+      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
+
+      if( (   fswap == HPL_SWAP01 ) ||
+          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
+      { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); }
+      else
+      { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); }
+/*
+ * Compute redundantly row block of U and update trailing submatrix
+ */
+      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
+      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
+      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
+      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
+#ifdef HPL_CALL_VSIPL
+/*
+ * Admit the blocks
+ */
+      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
+      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
+/*
+ * Create the matrix views
+ */
+      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
+      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
+      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,               jb );
+/*
+ * Create the matrix subviews
+ */
+      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
+#endif
+/*
+ * Broadcast has not occured yet, spliting the computational part
+ */
+      while ( test == HPL_KEEP_TESTING )
+      {
+         nn = n - nq0; nn = Mmin( nb, nn );
+
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+         Uptr = Mptr( Uptr, nn, 0, LDU );
+         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
+
+         (void) HPL_bcast( PBCST, &test ); 
+      }
+/*
+ * The panel has been forwarded at that point, finish the update
+ */
+      if( ( nn = n - nq0 ) > 0 )
+      {
+         HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans,
+                    HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU );
+
+         if( curr != 0 )
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Mptr( Aptr, jb, 0, lda ), lda );
+#endif
+            HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda );
+         }
+         else
+         {
+#ifdef HPL_CALL_VSIPL
+/*
+ * Create the matrix subviews
+ */
+            Uv1 = vsip_msubview_d( Uv0, nq0,          0,             nn, jb );
+            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
+
+            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS,
+                         HPL_rone, Av1 );
+/*
+ * Destroy the matrix subviews
+ */
+            (void) vsip_mdestroy_d( Av1 );
+            (void) vsip_mdestroy_d( Uv1 );
+#else
+            HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn,
+                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
+                       Aptr, lda );
+#endif
+         }
+      }
+#ifdef HPL_CALL_VSIPL
+/*
+ * Destroy the matrix subviews
+ */
+      (void) vsip_mdestroy_d( Lv1 );
+/*
+ * Release the blocks
+ */
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
+      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
+/*
+ * Destroy the matrix views
+ */
+      (void) vsip_mdestroy_d( Uv0 );
+      (void) vsip_mdestroy_d( Lv0 );
+      (void) vsip_mdestroy_d( Av0 );
+#endif
+   }
+
+   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
+/*
+ * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
+ * panel broadcast is enforced in that routine).
+ */
+   if( PBCST != NULL ) *IFLAG = test;
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer( HPL_TIMING_UPDATE );
+#endif
+/*
+ * End of HPL_pdupdateTT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_perm.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_perm.c
new file mode 100644
index 000000000..bf7cc4503
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_perm.c
@@ -0,0 +1,131 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_perm
+(
+   const int                        N,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            IWORK
+)
+#else
+void HPL_perm
+( N, LINDXA, LINDXAU, IWORK )
+   const int                        N;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_perm combines  two  index  arrays  and generate the corresponding
+ * permutation. First, this function computes the inverse of LINDXA, and
+ * then combine it with LINDXAU.  Second, in order to be able to perform
+ * the permutation in place,  LINDXAU  is overwritten by the sequence of
+ * permutation  producing  the  same result.  What we ultimately want to
+ * achieve is:  U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the
+ * call to this function,  this in place permutation can be performed by
+ * for i in [0..N) swap U[i] with U[LINDXAU[i]].
+ *
+ * Arguments
+ * =========
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies the length of the arrays  LINDXA  and
+ *         LINDXAU. N should be at least zero.
+ *
+ * LINDXA  (global input/output)         int *
+ *         On entry,  LINDXA  is an array of dimension N  containing the
+ *         source indexes. On exit,  LINDXA  contains the combined index
+ *         array.
+ *
+ * LINDXAU (global input/output)         int *
+ *         On entry,  LINDXAU is an array of dimension N  containing the
+ *         target indexes.  On exit,  LINDXAU  contains  the sequence of
+ *         permutation,  that  should be applied  in increasing order to
+ *         permute the underlying array U in place.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension N.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        i, j, k, fndd;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Inverse LINDXA - combine LINDXA and LINDXAU - Initialize IWORK
+ */
+   for( i = 0; i < N; i++ ) { IWORK[LINDXA[i]] = i; }
+   for( i = 0; i < N; i++ ) { LINDXA[i] = LINDXAU[IWORK[i]]; IWORK[i] = i; }
+ 
+   for( i = 0; i < N; i++ )
+   {
+      /* search LINDXA such that    LINDXA[j]  == i */
+      j = 0; do { fndd = ( LINDXA[j] == i ); j++; } while( !fndd ); j--;
+      /* search IWORK  such that    IWORK[k]   == j */
+      k = 0; do { fndd = ( IWORK[k]  == j ); k++; } while( !fndd ); k--;
+      /* swap IWORK[i] and IWORK[k]; LINDXAU[i] = k */
+      j = IWORK[i]; IWORK[i] = IWORK[k]; IWORK[k] = j;
+      LINDXAU[i] = k;
+   }
+/*
+ * End of HPL_perm
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pipid.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pipid.c
new file mode 100644
index 000000000..ab5ef949f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pipid.c
@@ -0,0 +1,187 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pipid
+(
+   HPL_T_panel *                    PANEL,
+   int *                            K,
+   int *                            IPID
+)
+#else
+void HPL_pipid
+( PANEL, K, IPID )
+   HPL_T_panel *                    PANEL;
+   int *                            K;
+   int *                            IPID;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pipid computes an array  IPID  that contains the source and final
+ * destination  of  matrix rows  resulting  from  the  application  of N
+ * interchanges  as computed by the  LU  factorization  with row partial
+ * pivoting. The array IPID is such that the row of global index IPID(i)
+ * should be mapped onto the row of global index IPID(i+1). Note that we
+ * cannot really know the length of IPID a priori. However, we know that
+ * this array is at least 2*N long,  since  there are N rows to swap and
+ * broadcast. The length of this array  must be smaller than or equal to
+ * 4*N, since every row is swapped with at most a single distinct remote
+ * row. The algorithm constructing  IPID  goes as follows: Let IA be the
+ * global index of the first row to be swapped.
+ *  
+ * For every row src IA + i with i in [0..N) to be swapped with row  dst
+ * such that dst is given by DPIV[i]:
+ *  
+ * Is row  src  the destination  of a previous row of the current block,
+ * that is, is there k odd such that IPID(k) is equal to src ?
+ *     Yes:  update  this destination  with dst.  For  example,  if  the
+ * pivot array is  (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5,
+ * we swap in fact row 0 and 5,  i.e.,  row 0 goes to 5 and not 2  as it
+ * was thought so far ...
+ *     No :  add  the pair (src,dst) at the end of IPID; row src has not
+ * been moved yet.
+ *  
+ * Is row  dst  different  from src the destination of a previous row of
+ * the current block, i.e., is there k odd such that IPID(k) is equal to
+ * dst ?
+ *     Yes:  update  IPID(k) with src.  For example,  if the pivot array
+ * is (0,5)(1,1)(2,5) ... , then when  we swap rows  2 and 5, we swap in
+ * fact row 2 and 0,  i.e.,  row 0 goes to 2 and not 5 as it was thought
+ * so far ...
+ *     No : add  the  pair (dst,src) at the end of IPID; row dst has not
+ * been moved yet.
+ *  
+ * Note that when src is equal to dst, the pair (dst,src)  should not be
+ * added to  IPID  in  order  to avoid duplicated entries in this array.
+ * During  the construction of the array  IPID,  we  make  sure that the
+ * first N entries are such that IPID(k) with k odd is equal to  IA+k/2.
+ * For k in  [0..K/2),  the  row  of global index  IPID(2*k)  should  be
+ * mapped onto the row of global index IPID(2*k+1).
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global output)               int *
+ *         On exit, K specifies the number of entries in  IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global output)               int *
+ *         On entry, IPID is an array of length 4*N.  On exit, the first
+ *         K entries of that array contain the src and final destination
+ *         resulting  from  the  application of the  N  interchanges  as
+ *         specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+ *         stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+ *         in [0..N)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, fndd, fnds, ia, i, j, jb, lst, off,
+                              src;
+   double                     * dpiv;
+/* ..
+ * .. Executable Statements ..
+ */
+   dpiv = PANEL->DPIV; jb = PANEL->jb; src = ia = PANEL->ia;
+   dst  = (int)(dpiv[0]); IPID[0] = dst; IPID[1] = src; *K = 2;
+   if( src != dst ) { IPID[2] = src; IPID[3] = dst; *K += 2; }
+
+   for( i = 1; i < jb; i++ )
+   {
+      fnds = 0; j = 1;
+
+      if( ( src = ia + i ) == ( dst = (int)(dpiv[i]) ) )
+      {
+         do { if( src == IPID[j] ) { fnds = j; } else { j += 2; } }
+         while( !( fnds ) && ( j < *K ) );
+         if( !fnds ) { lst = *K;     off = 2; IPID[lst] = src; }
+         else        { lst = fnds-1; off = 0; }
+         IPID[lst+1] = dst;
+      }
+      else
+      {
+         fndd = 0;
+         do
+         {
+            if     ( src == IPID[j] ) { fnds = j; }
+            else if( dst == IPID[j] ) { fndd = j; }
+            j += 2;
+         }
+         while( ( !( fnds ) || !( fndd ) ) && ( j < *K ) );
+         if( !fnds ) { IPID[*K] = src; IPID[*K+1] = dst; off  = 2; }
+         else        {                 IPID[fnds] = dst; off  = 0; }
+         if( !fndd ) { lst = *K+off;   IPID[lst ] = dst; off += 2; }
+         else        { lst = fndd-1; }
+         IPID[lst+1] = src;
+      }
+/*
+ * Enforce IPID(1,i) equal to src = ia + i
+ */
+      if( lst != ( j = ( i << 1 ) ) )
+      {
+         src = IPID[j  ]; IPID[j  ] = IPID[lst  ]; IPID[lst  ] = src;
+         dst = IPID[j+1]; IPID[j+1] = IPID[lst+1]; IPID[lst+1] = dst;
+      }
+      *K += off;
+   }
+/*
+ * End of HPL_pipid
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx0.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx0.c
new file mode 100644
index 000000000..be12639d0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx0.c
@@ -0,0 +1,281 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx0
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   int *                            IPID,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            LLEN
+)
+#else
+void HPL_plindx0
+( PANEL, K, IPID, LINDXA, LINDXAU, LLEN )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   int *                            IPID;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            LLEN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx0 computes two local arrays  LINDXA and  LINDXAU  containing
+ * the  local  source and final destination position  resulting from the
+ * application of row interchanges.
+ *  
+ * On entry, the array  IPID  of length K is such that the row of global
+ * index  IPID(i)  should be mapped onto row of global index  IPID(i+1).
+ * Let  IA  be the global index of the first row to be swapped. For k in
+ * [0..K/2), the row of global index IPID(2*k) should be mapped onto the
+ * row of global index  IPID(2*k+1).  The question then, is to determine
+ * which rows should ultimately be part of U.
+ *  
+ * First, some rows of the process ICURROW  may be swapped locally.  One
+ * of this row belongs to U, the other one belongs to my local  piece of
+ * A.  The other  rows of the current block are swapped with remote rows
+ * and are thus not part of U. These rows however should be sent  along,
+ * and  grabbed by the other processes  as we  progress in the  exchange
+ * phase.
+ *  
+ * So, assume that I am  ICURROW  and consider a row of index  IPID(2*i)
+ * that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less
+ * than N,  this row is locally swapped and should be copied into  U  at
+ * the position IPID(2*i+1) - IA. No row will be exchanged for this one.
+ * If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be
+ * locally copied into my local piece of A at the position corresponding
+ * to the row of global index IPID(2*i+1).
+ *  
+ * If the process  ICURROW does not own  IPID(2*i+1), then row IPID(2*i)
+ * is to be swapped away and strictly speaking does not belong to U, but
+ * to  A  remotely.  Since this  process will however send this array U,
+ * this row is  copied into  U, exactly where the row IPID(2*i+1) should
+ * go. For this, we search IPID for k1, such that IPID(2*k1) is equal to
+ * IPID(2*i+1); and row  IPID(2*i) is to be copied in U  at the position
+ * IPID(2*k1+1)-IA.
+ *  
+ * It is thus  important to put the rows that go into U, i.e., such that
+ * IPID(2*i+1) - IA is less than N at the begining of the array IPID. By
+ * doing so,  U  is formed, and the local copy  is performed in just one
+ * sweep.
+ *  
+ * Two lists  LINDXA  and  LINDXAU are built.  LINDXA contains the local
+ * index of the rows I have that should be copied. LINDXAU  contains the
+ * local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A
+ * is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
+ * of A should be locally copied into A(-LINDXAU(k),:).  In the  process
+ * ICURROW, the initial packing algorithm proceeds as follows.
+ *  
+ *   for all entries in IPID,
+ *      if IPID(2*i) is in ICURROW,
+ *         if IPID(2*i+1) is in ICURROW,
+ *            if( IPID(2*i+1) - IA < N )
+ *             save corresponding local position
+ *             of this row (LINDXA);
+ *             save local position (LINDXAU) in U
+ *             where this row goes;
+ *             [copy row IPID(2*i) in U at position
+ *             IPID(2*i+1)-IA; ];
+ *            else
+ *             save corresponding local position of
+ *             this row (LINDXA);
+ *             save local position (-LINDXAU) in A
+ *             where this row goes;
+ *             [copy row IPID(2*i) in my piece of A
+ *             at IPID(2*i+1);]
+ *            end if
+ *         else
+ *            find k1 such that IPID(2*k1) = IPID(2*i+1);
+ *            copy row IPID(2*i) in U at position
+ *            IPID(2*k1+1)-IA;
+ *            save corresponding local position of this
+ *            row (LINDXA);
+ *            save local position (LINDXAU) in U where
+ *            this row goes;
+ *         end if
+ *      end if
+ *   end for
+ *  
+ * Second, if I am not the current row process  ICURROW, all source rows
+ * in IPID that I own are part of U. Indeed,  they  are swapped with one
+ * row  of  the  current  block  of rows,  and  the  main  factorization
+ * algorithm proceeds one row after each other.  The processes different
+ * from ICURROW,  should  exchange and accumulate  those rows until they
+ * receive some data previously owned by the process ICURROW.
+ *  
+ * In processes different from  ICURROW,  the  initial packing algorithm
+ * proceeds as follows.  Consider a row of global index IPID(2*i) that I
+ * own. When I will be receiving data previously owned by ICURROW, i.e.,
+ * U, row IPID(2*i) should  replace the row in U at pos. IPID(2*i+1)-IA,
+ * and  this particular row of U should be first copied into my piece of
+ * A, at A(il,:),  where  il is the  local row  index  corresponding  to
+ * IPID(2*i). Now,initially, this row will be packed into workspace, say
+ * as the kth row of  that  work array.  The  following  algorithm  sets
+ * LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row
+ * should be copied. LINDXA(k) stores the local index in  A  where  this
+ * row of U should be copied, i.e il.
+ *  
+ *   for all entries in IPID,
+ *      if IPID(2*i) is not in ICURROW,
+ *         copy row IPID(2*i) in work array;
+ *         save corresponding local position
+ *         of this row (LINDXA);
+ *         save position (LINDXAU) in U where
+ *         this row should be copied;
+ *      end if
+ *   end for
+ *  
+ * Since we are at it, we also globally figure  out  how many rows every
+ * process has. That is necessary, because it would rather be cumbersome
+ * to  figure it on  the fly  during the  bi-directional exchange phase.
+ * This information is kept in the array  LLEN  of size NPROW. Also note
+ * that the arrays LINDXA and LINDXAU are of max length equal to 2*N.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * LINDXA  (local output)                int *
+ *         On entry, LINDXA  is an array of dimension 2*N. On exit, this
+ *         array contains the local indexes of the rows of A I have that
+ *         should be copied into U.
+ *
+ * LINDXAU (local output)                int *
+ *         On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+ *         array contains  the local destination  information encoded as
+ *         follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+ *         copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+ *         of A should be locally copied into A(-LINDXAU(k),:).
+ *
+ * LLEN    (global output)               int *
+ *         On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+ *         contains how many rows every process has.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, dstrow, fndd, i, ia, icurrow, il,
+                              ip=0, iroff, j, jb, myrow, nb, nprow,
+                              src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Compute the local arrays  LINDXA  and  LINDXAU  containing  the local
+ * source and final destination position resulting from  the application
+ * of N interchanges.
+ */
+   myrow   = PANEL->grid->myrow; nprow = PANEL->grid->nprow;
+   icurrow = PANEL->prow;        jb    = PANEL->jb;
+   nb      = PANEL->nb;          ia    = PANEL->ia;
+   iroff   = PANEL->ii;
+
+   for( i = 0; i < nprow; i++ ) LLEN[i] = 0;
+
+   for( i = 0; i < K; i += 2 )
+   {
+      src = IPID[i];
+      Mindxg2p( src, nb, nb, srcrow, 0, nprow ); LLEN[ srcrow ]++;
+
+      if( myrow == srcrow )
+      {
+         Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
+         LINDXA[ip] = il - iroff; dst = IPID[i+1];
+
+         if( myrow == icurrow )
+         {
+            Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+            if( dstrow == icurrow )
+            {
+               if( dst - ia < jb ) { LINDXAU[ip] = dst - ia; }
+               else
+               {
+                  Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+                  LINDXAU[ip] = iroff - il;
+               }
+            }
+            else
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+               LINDXAU[ip] = IPID[j-1] - ia;
+            }
+         }
+         else { LINDXAU[ip] = dst - ia; }
+
+         ip++;
+      }
+   }
+/*
+ * End of HPL_plindx0
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx1.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx1.c
new file mode 100644
index 000000000..a24fd4c56
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx1.c
@@ -0,0 +1,275 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx1
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   const int *                      IPID,
+   int *                            IPA,
+   int *                            LINDXA,
+   int *                            LINDXAU,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1,
+   int *                            PERMU,
+   int *                            IWORK
+)
+#else
+void HPL_plindx1
+( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   const int *                      IPID;
+   int *                            IPA;
+   int *                            LINDXA;
+   int *                            LINDXAU;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+   int *                            PERMU;
+   int *                            IWORK;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx1 computes two local arrays  LINDXA and  LINDXAU  containing
+ * the  local  source and final destination position  resulting from the
+ * application of row interchanges.  In addition, this function computes
+ * three arrays IPLEN, IPMAP and IPMAPM1  that contain  the  logarithmic
+ * mapping information for the spreading phase.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                const int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * IPA     (global output)               int *
+ *         On exit,  IPA  specifies  the number of rows that the current
+ *         process row has that either belong to U  or should be swapped
+ *         with remote rows of A.
+ *
+ * LINDXA  (global output)               int *
+ *         On entry, LINDXA  is an array of dimension 2*N. On exit, this
+ *         array contains the local indexes of the rows of A I have that
+ *         should be copied into U.
+ *
+ * LINDXAU (global output)               int *
+ *         On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+ *         array contains  the local destination  information encoded as
+ *         follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+ *         copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+ *         of A should be locally copied into A(-LINDXAU(k),:).
+ *
+ * IPLEN   (global output)               int *
+ *         On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+ *         this array is such that  IPLEN[i]  is the number of rows of A
+ *         in  the  processes  before  process  IPMAP[i]  after the sort
+ *         with the convention that IPLEN[nprow]  is the total number of
+ *         rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+ *         local number of rows of A that should be moved to the process
+ *         IPMAP[i]. IPLEN is such that the number of rows of the source
+ *         process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+ *         remaining  entries  of  this  array  are  sorted  so that the
+ *         quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry, IPMAP is an array of dimension NPROW. On exit, this
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myrow] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROCS)
+ *
+ * PERMU   (global output)               int *
+ *         On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+ *         contains  a sequence of permutations,  that should be applied
+ *         in increasing order to permute in place the row panel U.
+ *
+ * IWORK   (workspace)                   int *
+ *         On entry, IWORK is a workarray of dimension 2*JB.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        * iwork;
+   int                        dst, dstrow, fndd, i, ia, icurrow, il,
+                              ip, ipU, iroff, j, jb, myrow, nb, nprow,
+                              src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1
+ */
+   HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 );
+/*
+ * Compute the local arrays  LINDXA  and  LINDXAU  containing  the local
+ * source and final destination position resulting from  the application
+ * of N interchanges. Compute LINDXA and LINDXAU in icurrow,  and LINDXA
+ * elsewhere and PERMU in every process.
+ */
+   myrow = PANEL->grid->myrow; nprow   = PANEL->grid->nprow;
+   jb    = PANEL->jb;          nb      = PANEL->nb;     ia = PANEL->ia;
+   iroff = PANEL->ii;          icurrow = PANEL->prow;
+
+   iwork = IWORK + jb;
+ 
+   if( myrow == icurrow )
+   {
+      for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
+      {
+         src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+ 
+         if( srcrow == icurrow )
+         {
+            dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+ 
+            Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
+            LINDXA[ip] = il - iroff;
+ 
+            if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
+            {
+               PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
+               j          = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j;
+               IPLEN[il]++; ipU++;
+            }
+            else if( dstrow != icurrow )
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+ 
+               PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow];
+               j          = IPLEN[il];    iwork[ipU] = LINDXAU[ip] = j;
+               IPLEN[il]++; ipU++;
+            }
+            else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) )
+            {
+               Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+               LINDXAU[ip] = iroff - il;
+            }
+            ip++;
+         }
+      }
+      *IPA = ip;
+   }
+   else
+   {
+      for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
+      {
+         src = IPID[i  ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+         dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+/*
+ * LINDXA[i] is the local index of the row of A that belongs into U
+ */
+         if( myrow == dstrow )
+         {
+            Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
+            LINDXA[ip] = il - iroff; ip++;
+         }
+/*
+ * iwork[i] is the local (current) position  index in U
+ * PERMU[i] is the local (final) destination index in U
+ */
+         if( srcrow == icurrow )
+         {
+            if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
+            {
+               PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
+               iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
+            }
+            else if( dstrow != icurrow )
+            {
+               j = 0;
+               do { fndd = ( dst == IPID[j] ); j+=2; }
+               while( !fndd && ( j < K ) );
+               PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow];
+               iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
+            }
+         }
+      }
+      *IPA = 0;
+   }
+/*
+ * Simplify iwork and PERMU, return in PERMU the sequence of permutation
+ * that need to be apply to U after it has been broadcast.
+ */
+   HPL_perm( jb, iwork, PERMU, IWORK );
+/*
+ * Reset IPLEN to its correct value
+ */
+   for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1];
+   IPLEN[0] = 0; 
+/*
+ * End of HPL_plindx1
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx10.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx10.c
new file mode 100644
index 000000000..fa460fd35
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx10.c
@@ -0,0 +1,155 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_plindx10
+(
+   HPL_T_panel *                    PANEL,
+   const int                        K,
+   const int *                      IPID,
+   int *                            IPLEN,
+   int *                            IPMAP,
+   int *                            IPMAPM1
+)
+#else
+void HPL_plindx10
+( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PANEL;
+   const int                        K;
+   const int *                      IPID;
+   int *                            IPLEN;
+   int *                            IPMAP;
+   int *                            IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_plindx10 computes  three arrays  IPLEN,  IPMAP  and  IPMAPM1  that
+ * contain the logarithmic mapping information for the spreading phase.
+ *
+ * Arguments
+ * =========
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel information.
+ *
+ * K       (global input)                const int
+ *         On entry, K specifies the number of entries in IPID.  K is at
+ *         least 2*N, and at most 4*N.
+ *
+ * IPID    (global input)                const int *
+ *         On entry,  IPID  is an array of length K. The first K entries
+ *         of that array contain the src and final destination resulting
+ *         from the application of the interchanges.
+ *
+ * IPLEN   (global output)               int *
+ *         On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+ *         this array is such that  IPLEN[i]  is the number of rows of A
+ *         in the processes  before process IMAP[i] after the sort, with
+ *         the convention that IPLEN[nprow] is the total number of rows.
+ *         In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+ *         rows of  A  that should be moved for each process.  IPLEN  is
+ *         such that the number of rows of the source process row can be
+ *         computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+ *         this  array are sorted  so  that  the quantities IPLEN[i+1] -
+ *         IPLEN[i] are logarithmically sorted.
+ *
+ * IPMAP   (global output)               int *
+ *         On entry, IPMAP is an array of dimension NPROW. On exit, this
+ *         array contains  the logarithmic mapping of the processes.  In
+ *         other words, IPMAP[myrow] is the corresponding sorted process
+ *         coordinate.
+ *
+ * IPMAPM1 (global output)               int *
+ *         On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+ *         this  array  contains  the inverse of the logarithmic mapping
+ *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+ *         [0.. NPROW)
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        dst, dstrow, i, ia, icurrow, jb, nb,
+                              nprow, src, srcrow;
+/* ..
+ * .. Executable Statements ..
+ */
+   nprow = PANEL->grid->nprow; jb = PANEL->jb; nb = PANEL->nb;
+   ia    = PANEL->ia;          icurrow = PANEL->prow;
+/*
+ * Compute  redundantly  the local number of rows  that each process has
+ * and that belong to U in IPLEN[1 .. nprow+1]
+ */
+   for( i = 0; i <= nprow; i++ ) IPLEN[i] = 0;
+ 
+   for( i = 0; i < K; i += 2 )
+   {
+      src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
+      if( srcrow == icurrow )
+      {
+         dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
+         if( ( dstrow != srcrow ) || ( dst - ia < jb ) ) IPLEN[dstrow+1]++;
+      }
+   }
+/*
+ * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1
+ * (the inverse of IPMAP)
+ */
+   HPL_logsort( nprow, icurrow, IPLEN, IPMAP, IPMAPM1 );
+/*
+ * End of HPL_plindx10
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_rollN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_rollN.c
new file mode 100644
index 000000000..e68590a01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_rollN.c
@@ -0,0 +1,225 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+void HPL_rollN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_rollN
+( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rollN rolls the local arrays containing the local pieces of U, so
+ * that on exit to this function  U  is replicated in every process row.
+ * In addition, this function probe for the presence of the column panel
+ * and forwards it when available.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be rolled) information.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the number of columns of  U.  N must be
+ *         at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least  MAX(1,IPLEN[NPROW]).
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process row.
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IMAP  is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words,  IMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Datatype               type[2];
+   MPI_Status                 status;
+   MPI_Request                request;
+   MPI_Comm                   comm;
+   int                        Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS,
+                              ierr=MPI_SUCCESS, il, k, l, lengthR,
+                              lengthS, mydist, myrow, next, npm1, nprow,
+                              partner, prev;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= 0 ) return;
+
+   npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow;
+   comm = PANEL->grid->col_comm;
+/*
+ * Rolling phase
+ */
+   mydist = IPMAPM1[myrow];
+   prev   = IPMAP[MModSub1( mydist, nprow )];
+   next   = IPMAP[MModAdd1( mydist, nprow )];
+ 
+   for( k = 0; k < npm1; k++ )
+   {
+      l = (int)( (unsigned int)(k) >> 1 );
+ 
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         il      = MModAdd( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); 
+         il      = MModSub( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev;
+      }
+      else
+      {
+         il    = MModSub( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); 
+         il    = MModAdd( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next;
+      }
+ 
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_vector( N, lengthR, LDU, MPI_DOUBLE,
+                                      &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, ibufR, 0, LDU ), 1, type[I_RECV],
+                                partner, Cmsgid, comm, &request );
+      }
+ 
+      if( lengthS > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_vector( N, lengthS, LDU, MPI_DOUBLE,
+                                      &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, ibufS, 0, LDU ), 1, type[I_SEND],
+                               partner, Cmsgid, comm );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free(   &type[I_SEND] );
+      }
+
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free(   &type[I_RECV] );
+      }
+/*
+ * Probe for column panel - forward it when available
+ */
+      if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_rollN", "MPI call failed" ); }
+/*
+ * End of HPL_rollN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_rollT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_rollT.c
new file mode 100644
index 000000000..0160c9412
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_rollT.c
@@ -0,0 +1,259 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#define   I_SEND    0
+#define   I_RECV    1
+
+#ifdef STDC_HEADERS
+void HPL_rollT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_rollT
+( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rollT rolls the local arrays containing the local pieces of U, so
+ * that on exit to this function  U  is replicated in every process row.
+ * In addition, this function probe for the presence of the column panel
+ * and forwards it when available.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be rolled) information.
+ *
+ * N       (local input)                 const int
+ *         On entry, N specifies the local number of rows of  U.  N must
+ *         be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U in each process row.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least  MAX(1,N).
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process row.
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IMAP  is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words,  IMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IMAPM1  is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#if 0
+   MPI_Datatype               type[2];
+#endif
+   MPI_Status                 status;
+   MPI_Request                request;
+   MPI_Comm                   comm;
+   int                        Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS,
+                              ierr=MPI_SUCCESS, il, k, l, lengthR, 
+                              lengthS, mydist, myrow, next, npm1, nprow,
+                              partner, prev;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( N <= 0 ) return;
+
+   npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow;
+   comm = PANEL->grid->col_comm;
+/*
+ * Rolling phase
+ */
+   mydist = IPMAPM1[myrow];
+   prev   = IPMAP[MModSub1( mydist, nprow )];
+   next   = IPMAP[MModAdd1( mydist, nprow )];
+ 
+   for( k = 0; k < npm1; k++ )
+   {
+      l = (int)( (unsigned int)(k) >> 1 );
+ 
+      if( ( ( mydist + k ) & 1 ) != 0 )
+      {
+         il      = MModAdd( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] );
+         il    = MModSub( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev;
+      }
+      else
+      {
+         il    = MModSub( mydist, l,   nprow );
+         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] );
+         il    = MModAdd( mydist, l+1, nprow );
+         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next;
+      }
+ 
+      if( lengthR > 0 )
+      {
+#if 0
+         if( ierr == MPI_SUCCESS )
+         {
+            if( LDU == N )
+               ierr = MPI_Type_contiguous( lengthR * LDU, MPI_DOUBLE,
+                                           &type[I_RECV] );
+            else
+               ierr = MPI_Type_vector( lengthR, N, LDU, MPI_DOUBLE,
+                                       &type[I_RECV] );
+         }
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_RECV] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, 0, ibufR, LDU ), 1, type[I_RECV],
+                                partner, Cmsgid, comm, &request );
+#else
+/*
+ * In our case, LDU is N - Do not use the MPI datatype.
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Irecv( Mptr( U, 0, ibufR, LDU ), lengthR*LDU,
+                                MPI_DOUBLE, partner, Cmsgid, comm, &request );
+#endif
+      }
+ 
+      if( lengthS > 0 )
+      {
+#if 0
+         if( ierr == MPI_SUCCESS )
+         {
+            if( LDU == N )
+               ierr =   MPI_Type_contiguous( lengthS*LDU, MPI_DOUBLE,
+                                             &type[I_SEND] );
+            else
+               ierr =   MPI_Type_vector( lengthS, N, LDU, MPI_DOUBLE,
+                                         &type[I_SEND] );
+         }
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_commit( &type[I_SEND] );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, 0, ibufS, LDU ), 1, type[I_SEND],
+                               partner, Cmsgid, comm );
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[I_SEND] );
+#else
+/*
+ * In our case, LDU is N - Do not use the MPI datatype.
+ */
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Send( Mptr( U, 0, ibufS, LDU ), lengthS*LDU,
+                               MPI_DOUBLE, partner, Cmsgid, comm );
+#endif
+      }
+
+      if( lengthR > 0 )
+      {
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Wait( &request, &status );
+#if 0
+         if( ierr == MPI_SUCCESS )
+            ierr =   MPI_Type_free( &type[I_RECV] );
+#endif
+      }
+/*
+ * Probe for column panel - forward it when available
+ */
+      if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_rollT", "MPI call failed" ); }
+/*
+ * End of HPL_rollT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_spreadN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_spreadN.c
new file mode 100644
index 000000000..202611e7f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_spreadN.c
@@ -0,0 +1,303 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_spreadN
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_SIDE              SIDE,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int                        SRCDIST,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_spreadN
+( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_SIDE              SIDE;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int                        SRCDIST;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_spreadN spreads the local array containing local pieces of U, so
+ * that on exit to this function,  a piece of  U  is contained in every
+ * process row. The array IPLEN contains the number of rows of U,  that
+ * should be spread on any given process row. This function also probes
+ * for the presence of the column panel PBCST. In case of success, this
+ * panel will be forwarded.  If  PBCST  is NULL on input,  this probing
+ * mechanism will be disabled.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be spread) information.
+ *
+ * SIDE    (global input)                const enum HPL_SIDE
+ *         On entry, SIDE specifies whether the local piece of U located
+ *         in process IPMAP[SRCDIST] should be spread to the right or to
+ *         the left. This feature is used by the equilibration process.
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies  the  local number of columns of U. N
+ *         must be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,IPLEN[nprow]).
+ *
+ * SRCDIST (local input)                 const int
+ *         On entry,  SRCDIST  specifies the source process that spreads
+ *         its piece of U.
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process before process IPMAP[i], with the  convention
+ *         that IPLEN[nprow] is the total number of rows. In other words
+ *         IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+ *         should be moved to process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IPMAPM1 is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   MPI_Datatype              type;
+   MPI_Status                status;
+   MPI_Comm                  comm;
+   unsigned int              ip2=1, mask=1, mydist, mydist2;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, ibuf,
+                             ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow,
+                             npm1, nprow, partner;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow;    nprow = PANEL->grid->nprow;
+   comm  = PANEL->grid->col_comm;
+/*
+ * Spread U to the left
+ */
+   if( SIDE == HplLeft )
+   {
+      nprow = ( npm1 = SRCDIST ) + 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) >
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2;
+      lgth    = IPLEN[nprow];
+
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); 
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm,
+                                        &status );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+               else if( partner < nprow )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il += ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+   else
+   {
+      npm1 = ( nprow -= SRCDIST ) - 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) <
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist -= SRCDIST ); il = ip2;
+      lgth    = IPLEN[SRCDIST+nprow];
+/*
+ * Spread U to the right - offset the IPLEN, and IPMAP arrays
+ */
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            k    = il      ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] );
+            k    = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf;
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm, &status );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+               else if( partner < nprow )
+               {
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE,
+                                               &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm );
+                  if( ierr == MPI_SUCCESS )  
+                     ierr =   MPI_Type_free(   &type );
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il -= ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il += ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_spreadN", "MPI call failed" ); }
+/*
+ * End of HPL_spreadN
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_spreadT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_spreadT.c
new file mode 100644
index 000000000..1adf93507
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_spreadT.c
@@ -0,0 +1,372 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_spreadT
+(
+   HPL_T_panel *                    PBCST,
+   int *                            IFLAG,
+   HPL_T_panel *                    PANEL,
+   const enum HPL_SIDE              SIDE,
+   const int                        N,
+   double *                         U,
+   const int                        LDU,
+   const int                        SRCDIST,
+   const int *                      IPLEN,
+   const int *                      IPMAP,
+   const int *                      IPMAPM1
+)
+#else
+void HPL_spreadT
+( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 )
+   HPL_T_panel *                    PBCST;
+   int *                            IFLAG;
+   HPL_T_panel *                    PANEL;
+   const enum HPL_SIDE              SIDE;
+   const int                        N;
+   double *                         U;
+   const int                        LDU;
+   const int                        SRCDIST;
+   const int *                      IPLEN;
+   const int *                      IPMAP;
+   const int *                      IPMAPM1;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_spreadT spreads  the local array containing local pieces of U, so
+ * that on exit to this function,  a piece of  U  is contained in every
+ * process row.  The array  IPLEN  contains the number of columns of U,
+ * that should be spread on any given process row.  This function  also
+ * probes for the presence of  the column panel  PBCST.  If  available,
+ * this  panel will be forwarded.  If  PBCST  is  NULL  on input,  this
+ * probing mechanism will be disabled.
+ *
+ * Arguments
+ * =========
+ *
+ * PBCST   (local input/output)          HPL_T_panel *
+ *         On entry,  PBCST  points to the data structure containing the
+ *         panel (to be broadcast) information.
+ *
+ * IFLAG   (local input/output)          int *
+ *         On entry, IFLAG  indicates  whether or not  the broadcast has
+ *         already been completed.  If not,  probing will occur, and the
+ *         outcome will be contained in IFLAG on exit.
+ *
+ * PANEL   (local input/output)          HPL_T_panel *
+ *         On entry,  PANEL  points to the data structure containing the
+ *         panel (to be spread) information.
+ *
+ * SIDE    (global input)                const enum HPL_SIDE
+ *         On entry, SIDE specifies whether the local piece of U located
+ *         in process IPMAP[SRCDIST] should be spread to the right or to
+ *         the left. This feature is used by the equilibration process.
+ *
+ * N       (global input)                const int
+ *         On entry,  N  specifies the local number of rows of U. N must
+ *         be at least zero.
+ *
+ * U       (local input/output)          double *
+ *         On entry,  U  is an array of dimension (LDU,*) containing the
+ *         local pieces of U.
+ *
+ * LDU     (local input)                 const int
+ *         On entry, LDU specifies the local leading dimension of U. LDU
+ *         should be at least MAX(1,N).
+ *
+ * SRCDIST (local input)                 const int
+ *         On entry,  SRCDIST  specifies the source process that spreads
+ *         its piece of U.
+ *
+ * IPLEN   (global input)                const int *
+ *         On entry, IPLEN is an array of dimension NPROW+1.  This array
+ *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+ *         in each process before process IPMAP[i], with the  convention
+ *         that IPLEN[nprow] is the total number of rows. In other words
+ *         IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+ *         should be moved to process IPMAP[i].
+ *
+ * IPMAP   (global input)                const int *
+ *         On entry, IPMAP is an array of dimension  NPROW.  This  array
+ *         contains  the  logarithmic mapping of the processes. In other
+ *         words, IPMAP[myrow]  is the absolute coordinate of the sorted
+ *         process.
+ *
+ * IPMAPM1 (global input)                const int *
+ *         On entry,  IPMAPM1 is an array of dimension NPROW. This array
+ *         contains  the inverse of the logarithmic mapping contained in
+ *         IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#if 0
+   MPI_Datatype              type;
+#endif
+   MPI_Status                status;
+   MPI_Comm                  comm;
+   unsigned int              ip2=1, mask=1, mydist, mydist2;
+   int                       Cmsgid=MSGID_BEGIN_PFACT, ibuf,
+                             ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow,
+                             npm1, nprow, partner;
+/* ..
+ * .. Executable Statements ..
+ */
+   myrow = PANEL->grid->myrow;    nprow = PANEL->grid->nprow;
+   comm  = PANEL->grid->col_comm;
+/*
+ * Spread U
+ */
+   if( SIDE == HplLeft )
+   {
+      nprow = ( npm1 = SRCDIST ) + 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) >
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2;
+      lgth    = IPLEN[nprow];
+
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] );
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm,
+                                        &status );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[npm1-partner],
+                                        Cmsgid, comm, &status );
+#endif
+               }
+               else if( partner < nprow )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[npm1-partner], Cmsgid, comm );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[npm1-partner],
+                                        Cmsgid, comm );
+#endif
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il += ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+   else
+   {
+      npm1 = ( nprow -= SRCDIST ) - 1;
+      if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) <
+              (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return;
+
+      k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; }
+      mydist2 = ( mydist -= SRCDIST ); il = ip2;
+/*
+ * Spread to the right - offset the IPLEN and IPMAP arrays
+ */
+      lgth = IPLEN[SRCDIST+nprow];
+/*
+ * Spread U
+ */
+      do
+      {
+         mask ^= ip2;
+
+         if( ( mydist & mask ) == 0 )
+         {
+            k    = il      ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] );
+            k    = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf;
+
+            if( lbuf > 0 )
+            {
+               partner = mydist ^ ip2;
+
+               if( mydist & ip2 )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm, &status );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[SRCDIST+partner],
+                                        Cmsgid, comm, &status );
+#endif
+               }
+               else if( partner < nprow )
+               {
+#if 0
+                  if( ierr == MPI_SUCCESS )
+                  {
+                     if( LDU == N )
+                        ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE,
+                                                    &type );
+                     else
+                        ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE,
+                                                &type );
+                  }
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_commit( &type );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type,
+                                        IPMAP[SRCDIST+partner], Cmsgid,
+                                        comm );
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Type_free( &type );
+#else
+/*
+ * In our case, LDU is N - do not use the MPI Datatypes
+ */
+                  if( ierr == MPI_SUCCESS )
+                     ierr =   MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N,
+                                        MPI_DOUBLE, IPMAP[SRCDIST+partner],
+                                        Cmsgid, comm );
+#endif
+               }
+            }
+         }
+ 
+         if( mydist2 < ip2 ) {  ip2 >>= 1; il -= ip2; }
+         else { mydist2 -= ip2; ip2 >>= 1; il += ip2; }
+/*
+ * Probe for column panel - forward it when available
+ */
+         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
+ 
+      } while( ip2 > 0 );
+   }
+
+   if( ierr != MPI_SUCCESS )
+   { HPL_pabort( __LINE__, "HPL_spreadT", "MPI call failed" ); }
+/*
+ * End of HPL_spreadT
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/Make.inc
new file mode 120000
index 000000000..ae55370b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kate/hip/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/Makefile
new file mode 100644
index 000000000..7898665f0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/Makefile
@@ -0,0 +1,136 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h   $(INCdir)/hpl_auxil.h \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_grid.h   $(INCdir)/hpl_comm.h  \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pfact.h \
+   $(INCdir)/hpl_pgesv.h
+#
+## Object files ########################################################
+#
+HPL_pgeobj       = \
+   HPL_pipid.o            HPL_plindx0.o          HPL_pdlaswp00N.o       \
+   HPL_pdlaswp00T.o       HPL_perm.o             HPL_logsort.o          \
+   HPL_plindx10.o         HPL_plindx1.o          HPL_spreadN.o          \
+   HPL_spreadT.o          HPL_rollN.o            HPL_rollT.o            \
+   HPL_equil.o            HPL_pdlaswp01N.o       HPL_pdlaswp01T.o       \
+   HPL_pdupdateNN.o       HPL_pdupdateNT.o       HPL_pdupdateTN.o       \
+   HPL_pdupdateTT.o       HPL_pdtrsv.o           HPL_pdgesv0.o          \
+   HPL_pdgesvK1.o         HPL_pdgesvK2.o         HPL_pdgesv.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pgeobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pgeobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pipid.o            : ../HPL_pipid.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pipid.c
+HPL_plindx0.o          : ../HPL_plindx0.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx0.c
+HPL_pdlaswp00N.o       : ../HPL_pdlaswp00N.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp00N.c
+HPL_pdlaswp00T.o       : ../HPL_pdlaswp00T.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp00T.c
+HPL_perm.o             : ../HPL_perm.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_perm.c
+HPL_logsort.o          : ../HPL_logsort.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_logsort.c
+HPL_plindx10.o         : ../HPL_plindx10.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx10.c
+HPL_plindx1.o          : ../HPL_plindx1.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_plindx1.c
+HPL_spreadN.o          : ../HPL_spreadN.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_spreadN.c
+HPL_spreadT.o          : ../HPL_spreadT.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_spreadT.c
+HPL_rollN.o            : ../HPL_rollN.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rollN.c
+HPL_rollT.o            : ../HPL_rollT.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rollT.c
+HPL_equil.o            : ../HPL_equil.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_equil.c
+HPL_pdlaswp01N.o       : ../HPL_pdlaswp01N.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp01N.c
+HPL_pdlaswp01T.o       : ../HPL_pdlaswp01T.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdlaswp01T.c
+HPL_pdupdateNN.o       : ../HPL_pdupdateNN.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateNN.c
+HPL_pdupdateNT.o       : ../HPL_pdupdateNT.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateNT.c
+HPL_pdupdateTN.o       : ../HPL_pdupdateTN.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateTN.c
+HPL_pdupdateTT.o       : ../HPL_pdupdateTT.c       $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdupdateTT.c
+HPL_pdtrsv.o           : ../HPL_pdtrsv.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdtrsv.c
+HPL_pdgesv0.o          : ../HPL_pdgesv0.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesv0.c
+HPL_pdgesvK1.o         : ../HPL_pdgesvK1.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesvK1.c
+HPL_pdgesvK2.o         : ../HPL_pdgesvK2.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesvK2.c
+HPL_pdgesv.o           : ../HPL_pdgesv.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdgesv.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/Makefile.am
new file mode 100644
index 000000000..452ea5f06
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/Makefile.am
@@ -0,0 +1,13 @@
+
+AM_CPPFLAGS = -I$(top_srcdir)/include
+
+xhpl_LDADD = ../src/libhpl.a
+
+bin_PROGRAMS = xhpl
+
+xhpl_SOURCES =  \
+matgen/HPL_jumpit.c matgen/HPL_rand.c matgen/HPL_setran.c matgen/HPL_xjumpm.c \
+matgen/HPL_lmul.c matgen/HPL_ladd.c \
+pmatgen/HPL_pdmatgen.c \
+ptest/HPL_pddriver.c ptest/HPL_pdinfo.c ptest/HPL_pdtest.c \
+ptimer/HPL_ptimer.c ptimer/HPL_ptimer_cputime.c ptimer/HPL_ptimer_walltime.c
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/Makefile.in
new file mode 100644
index 000000000..034564545
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/Makefile.in
@@ -0,0 +1,698 @@
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+bin_PROGRAMS = xhpl$(EXEEXT)
+subdir = testing
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+	$(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/include/hplconfig.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
+am_xhpl_OBJECTS = matgen/HPL_jumpit.$(OBJEXT) \
+	matgen/HPL_rand.$(OBJEXT) matgen/HPL_setran.$(OBJEXT) \
+	matgen/HPL_xjumpm.$(OBJEXT) matgen/HPL_lmul.$(OBJEXT) \
+	matgen/HPL_ladd.$(OBJEXT) pmatgen/HPL_pdmatgen.$(OBJEXT) \
+	ptest/HPL_pddriver.$(OBJEXT) ptest/HPL_pdinfo.$(OBJEXT) \
+	ptest/HPL_pdtest.$(OBJEXT) ptimer/HPL_ptimer.$(OBJEXT) \
+	ptimer/HPL_ptimer_cputime.$(OBJEXT) \
+	ptimer/HPL_ptimer_walltime.$(OBJEXT)
+xhpl_OBJECTS = $(am_xhpl_OBJECTS)
+xhpl_DEPENDENCIES = ../src/libhpl.a
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__maybe_remake_depfiles = depfiles
+am__depfiles_remade = matgen/$(DEPDIR)/HPL_jumpit.Po \
+	matgen/$(DEPDIR)/HPL_ladd.Po matgen/$(DEPDIR)/HPL_lmul.Po \
+	matgen/$(DEPDIR)/HPL_rand.Po matgen/$(DEPDIR)/HPL_setran.Po \
+	matgen/$(DEPDIR)/HPL_xjumpm.Po \
+	pmatgen/$(DEPDIR)/HPL_pdmatgen.Po \
+	ptest/$(DEPDIR)/HPL_pddriver.Po ptest/$(DEPDIR)/HPL_pdinfo.Po \
+	ptest/$(DEPDIR)/HPL_pdtest.Po ptimer/$(DEPDIR)/HPL_ptimer.Po \
+	ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po \
+	ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(xhpl_SOURCES)
+DIST_SOURCES = $(xhpl_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BLAS_LIBS = @BLAS_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build_alias = @build_alias@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host_alias = @host_alias@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(top_srcdir)/include
+xhpl_LDADD = ../src/libhpl.a
+xhpl_SOURCES = \
+matgen/HPL_jumpit.c matgen/HPL_rand.c matgen/HPL_setran.c matgen/HPL_xjumpm.c \
+matgen/HPL_lmul.c matgen/HPL_ladd.c \
+pmatgen/HPL_pdmatgen.c \
+ptest/HPL_pddriver.c ptest/HPL_pdinfo.c ptest/HPL_pdtest.c \
+ptimer/HPL_ptimer.c ptimer/HPL_ptimer_cputime.c ptimer/HPL_ptimer_walltime.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu testing/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu testing/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p \
+	  ; then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' \
+	    -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	      echo " $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	      $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' \
+	`; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
+matgen/$(am__dirstamp):
+	@$(MKDIR_P) matgen
+	@: > matgen/$(am__dirstamp)
+matgen/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) matgen/$(DEPDIR)
+	@: > matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_jumpit.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_rand.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_setran.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_xjumpm.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_lmul.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+matgen/HPL_ladd.$(OBJEXT): matgen/$(am__dirstamp) \
+	matgen/$(DEPDIR)/$(am__dirstamp)
+pmatgen/$(am__dirstamp):
+	@$(MKDIR_P) pmatgen
+	@: > pmatgen/$(am__dirstamp)
+pmatgen/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) pmatgen/$(DEPDIR)
+	@: > pmatgen/$(DEPDIR)/$(am__dirstamp)
+pmatgen/HPL_pdmatgen.$(OBJEXT): pmatgen/$(am__dirstamp) \
+	pmatgen/$(DEPDIR)/$(am__dirstamp)
+ptest/$(am__dirstamp):
+	@$(MKDIR_P) ptest
+	@: > ptest/$(am__dirstamp)
+ptest/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) ptest/$(DEPDIR)
+	@: > ptest/$(DEPDIR)/$(am__dirstamp)
+ptest/HPL_pddriver.$(OBJEXT): ptest/$(am__dirstamp) \
+	ptest/$(DEPDIR)/$(am__dirstamp)
+ptest/HPL_pdinfo.$(OBJEXT): ptest/$(am__dirstamp) \
+	ptest/$(DEPDIR)/$(am__dirstamp)
+ptest/HPL_pdtest.$(OBJEXT): ptest/$(am__dirstamp) \
+	ptest/$(DEPDIR)/$(am__dirstamp)
+ptimer/$(am__dirstamp):
+	@$(MKDIR_P) ptimer
+	@: > ptimer/$(am__dirstamp)
+ptimer/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) ptimer/$(DEPDIR)
+	@: > ptimer/$(DEPDIR)/$(am__dirstamp)
+ptimer/HPL_ptimer.$(OBJEXT): ptimer/$(am__dirstamp) \
+	ptimer/$(DEPDIR)/$(am__dirstamp)
+ptimer/HPL_ptimer_cputime.$(OBJEXT): ptimer/$(am__dirstamp) \
+	ptimer/$(DEPDIR)/$(am__dirstamp)
+ptimer/HPL_ptimer_walltime.$(OBJEXT): ptimer/$(am__dirstamp) \
+	ptimer/$(DEPDIR)/$(am__dirstamp)
+
+xhpl$(EXEEXT): $(xhpl_OBJECTS) $(xhpl_DEPENDENCIES) $(EXTRA_xhpl_DEPENDENCIES) 
+	@rm -f xhpl$(EXEEXT)
+	$(AM_V_CCLD)$(LINK) $(xhpl_OBJECTS) $(xhpl_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f matgen/*.$(OBJEXT)
+	-rm -f pmatgen/*.$(OBJEXT)
+	-rm -f ptest/*.$(OBJEXT)
+	-rm -f ptimer/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_jumpit.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_ladd.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_lmul.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_rand.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_setran.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_xjumpm.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@pmatgen/$(DEPDIR)/HPL_pdmatgen.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pddriver.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pdinfo.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pdtest.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po@am__quote@ # am--include-marker
+
+$(am__depfiles_remade):
+	@$(MKDIR_P) $(@D)
+	@echo '# dummy' >$@-t && $(am__mv) $@-t $@
+
+am--depfiles: $(am__depfiles_remade)
+
+.c.o:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(BUILT_SOURCES)
+	$(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f matgen/$(DEPDIR)/$(am__dirstamp)
+	-rm -f matgen/$(am__dirstamp)
+	-rm -f pmatgen/$(DEPDIR)/$(am__dirstamp)
+	-rm -f pmatgen/$(am__dirstamp)
+	-rm -f ptest/$(DEPDIR)/$(am__dirstamp)
+	-rm -f ptest/$(am__dirstamp)
+	-rm -f ptimer/$(DEPDIR)/$(am__dirstamp)
+	-rm -f ptimer/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
+
+distclean: distclean-am
+		-rm -f matgen/$(DEPDIR)/HPL_jumpit.Po
+	-rm -f matgen/$(DEPDIR)/HPL_ladd.Po
+	-rm -f matgen/$(DEPDIR)/HPL_lmul.Po
+	-rm -f matgen/$(DEPDIR)/HPL_rand.Po
+	-rm -f matgen/$(DEPDIR)/HPL_setran.Po
+	-rm -f matgen/$(DEPDIR)/HPL_xjumpm.Po
+	-rm -f pmatgen/$(DEPDIR)/HPL_pdmatgen.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pddriver.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pdinfo.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pdtest.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+		-rm -f matgen/$(DEPDIR)/HPL_jumpit.Po
+	-rm -f matgen/$(DEPDIR)/HPL_ladd.Po
+	-rm -f matgen/$(DEPDIR)/HPL_lmul.Po
+	-rm -f matgen/$(DEPDIR)/HPL_rand.Po
+	-rm -f matgen/$(DEPDIR)/HPL_setran.Po
+	-rm -f matgen/$(DEPDIR)/HPL_xjumpm.Po
+	-rm -f pmatgen/$(DEPDIR)/HPL_pdmatgen.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pddriver.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pdinfo.Po
+	-rm -f ptest/$(DEPDIR)/HPL_pdtest.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po
+	-rm -f ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
+	clean-binPROGRAMS clean-generic cscopelist-am ctags ctags-am \
+	distclean distclean-compile distclean-generic distclean-tags \
+	distdir dvi dvi-am html html-am info info-am install \
+	install-am install-binPROGRAMS install-data install-data-am \
+	install-dvi install-dvi-am install-exec install-exec-am \
+	install-html install-html-am install-info install-info-am \
+	install-man install-pdf install-pdf-am install-ps \
+	install-ps-am install-strip installcheck installcheck-am \
+	installdirs maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-compile mostlyclean-generic pdf pdf-am \
+	ps ps-am tags tags-am uninstall uninstall-am \
+	uninstall-binPROGRAMS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_dmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_dmatgen.c
new file mode 100644
index 000000000..c14ef0fd1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_dmatgen.c
@@ -0,0 +1,134 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_dmatgen
+(
+   const int                        M,
+   const int                        N,
+   double *                         A,
+   const int                        LDA,
+   const int                        ISEED
+)
+#else
+void HPL_dmatgen
+( M, N, A, LDA, ISEED )
+   const int                        M;
+   const int                        N;
+   double *                         A;
+   const int                        LDA;
+   const int                        ISEED;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_dmatgen generates (or regenerates) a random matrix A.
+ *  
+ * The  pseudo-random  generator uses the linear congruential algorithm:
+ * X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+ * Programming, Knuth 1973, Vol. 2.
+ *
+ * Arguments
+ * =========
+ *
+ * M       (input)                       const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (input)                       const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * A       (output)                      double *
+ *         On entry, A points to an array of dimension (LDA,N). On exit,
+ *         this  array  contains   the   coefficients  of  the  randomly
+ *         generated matrix.
+ *
+ * LDA     (input)                       const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,M).
+ *
+ * ISEED   (input)                       const int
+ *         On entry, ISEED  specifies  the  seed  number to generate the
+ *         matrix A. ISEED must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        iadd[2], ia1[2], ic1[2], iran1[2],
+                              jseed[2], mult[2];
+   int                        i, incA = LDA - M, j;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( ( M <= 0 ) || ( N <= 0 ) ) return;
+/*
+ * Initialize the random sequence
+ */
+   mult [0] = HPL_MULT0; mult [1] = HPL_MULT1;
+   iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1;
+   jseed[0] = ISEED;     jseed[1] = 0;
+
+   HPL_xjumpm( 1, mult, iadd, jseed, iran1, ia1, ic1 );
+   HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 );
+/*
+ * Generate an M by N matrix
+ */
+   for( j = 0; j < N; A += incA, j++ )
+      for( i = 0; i < M; A++, i++ ) *A = HPL_rand();
+/*
+ * End of HPL_dmatgen
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_jumpit.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_jumpit.c
new file mode 100644
index 000000000..4d4dc4db5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_jumpit.c
@@ -0,0 +1,114 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_jumpit
+(
+   int *                            MULT,
+   int *                            IADD,
+   int *                            IRANN,
+   int *                            IRANM
+)
+#else
+void HPL_jumpit
+( MULT, IADD, IRANN, IRANM )
+   int *                            MULT;
+   int *                            IADD;
+   int *                            IRANN;
+   int *                            IRANM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_jumpit jumps in the random sequence from the number  X(n) encoded
+ * in IRANN to the number  X(m)  encoded in  IRANM using the constants A
+ * and C encoded in MULT and IADD: X(m) = A * X(n) + C.  The constants A
+ * and C obviously depend on m and n,  see  the function  HPL_xjumpm  in
+ * order to initialize them.
+ *
+ * Arguments
+ * =========
+ *
+ * MULT    (local input)                 int *
+ *         On entry, MULT is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of the constant A.
+ *
+ * IADD    (local input)                 int *
+ *         On entry, IADD is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of the constant C.
+ *
+ * IRANN   (local input)                 int *
+ *         On entry,  IRANN  is an array of dimension 2,  that contains 
+ *         the 16-lower and 15-higher bits of the encoding of X(n).
+ *
+ * IRANM   (local output)                int *
+ *         On entry,  IRANM  is an array of dimension 2.  On exit, this
+ *         array contains respectively the 16-lower and  15-higher bits
+ *         of the encoding of X(m).
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                          j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_lmul( IRANN, MULT, j );              /* j     = IRANN * MULT;  */
+   HPL_ladd( j, IADD, IRANM );              /* IRANM = j     + IADD;  */
+   HPL_setran( 0, IRANM );                  /* irand = IRANM          */
+/*
+ * End of HPL_jumpit
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_ladd.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_ladd.c
new file mode 100644
index 000000000..0d4e4c08c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_ladd.c
@@ -0,0 +1,126 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_ladd
+(
+   int *                            J,
+   int *                            K,
+   int *                            I
+)
+#else
+void HPL_ladd
+( J, K, I )
+   int *                            J;
+   int *                            K;
+   int *                            I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ladd adds  without carry two long positive integers  K and J and
+ * puts the result into I. The long integers  I, J, K are encoded on 64
+ * bits using an array of 2 integers.  The 32-lower bits  are stored in
+ * the  first  entry  of each array,  the 32-higher bits  in the second
+ * entry.
+ *
+ * Arguments
+ * =========
+ *
+ * J       (local input)                 int *
+ *         On entry, J is an integer array of dimension 2 containing the
+ *         encoded long integer J.
+ *
+ * K       (local input)                 int *
+ *         On entry, K is an integer array of dimension 2 containing the
+ *         encoded long integer K.
+ *
+ * I       (local output)                int *
+ *         On entry, I is an integer array of dimension 2. On exit, this
+ *         array contains the encoded long integer result.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   unsigned int        itmp0, itmp1;
+   unsigned int        ktmp0 = K[0] & 65535, ktmp1 = (unsigned)K[0] >> 16;
+   unsigned int        ktmp2 = K[1] & 65535, ktmp3 = (unsigned)K[1] >> 16;
+   unsigned int        jtmp0 = J[0] & 65535, jtmp1 = (unsigned)J[0] >> 16;
+   unsigned int        jtmp2 = J[1] & 65535, jtmp3 = (unsigned)J[1] >> 16;
+
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ *    K[1] K[0] K  I[0]  = (K[0]+J[0]) % 2^32
+ *    XXXX XXXX    carry = (K[0]+J[0]) / 2^32
+ *
+ * +  J[1] J[0] J  I[1] = K[1] + J[1] + carry
+ *    XXXX XXXX    I[1] = I[1] % 2^32
+ *    -------------
+ *    I[1] I[0]
+ *    0XXX XXXX I
+ */
+   itmp0 = ktmp0 + jtmp0;
+   itmp1 = itmp0 >> 16;         I[0] = itmp0 - (itmp1 << 16 );
+   itmp1 += ktmp1 + jtmp1;      I[0] |= (itmp1 & 65535) << 16;
+   itmp0 = (itmp1 >> 16) + ktmp2 + jtmp2;
+   I[1] = itmp0 - ((itmp0 >> 16 ) << 16);
+   itmp1 = (itmp0 >> 16) + ktmp3 + jtmp3;
+   I[1] |= (itmp1 & 65535) << 16;
+/*
+ * End of HPL_ladd
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_lmul.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_lmul.c
new file mode 100644
index 000000000..254b192f6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_lmul.c
@@ -0,0 +1,131 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_lmul
+(
+   int *                            K,
+   int *                            J,
+   int *                            I
+)
+#else
+void HPL_lmul
+( K, J, I )
+   int *                            K;
+   int *                            J;
+   int *                            I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_lmul multiplies  without carry two long positive integers K and J
+ * and puts the result into I. The long integers  I, J, K are encoded on
+ * 64 bits using an array of 2 integers. The 32-lower bits are stored in
+ * the first entry of each array, the 32-higher bits in the second entry
+ * of each array. For efficiency purposes, the  intrisic modulo function
+ * is inlined.
+ *
+ * Arguments
+ * =========
+ *
+ * K       (local input)                 int *
+ *         On entry, K is an integer array of dimension 2 containing the
+ *         encoded long integer K.
+ *
+ * J       (local input)                 int *
+ *         On entry, J is an integer array of dimension 2 containing the
+ *         encoded long integer J.
+ *
+ * I       (local output)                int *
+ *         On entry, I is an integer array of dimension 2. On exit, this
+ *         array contains the encoded long integer result.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        r, c;
+   unsigned int               kk[4], jj[4], res[5];
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * Addition is done with 16 bits at a time. Multiplying two 16-bit
+ * integers yields a 32-bit result. The lower 16-bits of the result
+ * are kept in I, and the higher 16-bits are carried over to the
+ * next multiplication.
+ */
+   for (c = 0; c < 2; ++c) {
+     kk[2*c] = K[c] & 65535;
+     kk[2*c+1] = ((unsigned)K[c] >> 16) & 65535;
+     jj[2*c] = J[c] & 65535;
+     jj[2*c+1] = ((unsigned)J[c] >> 16) & 65535;
+   }
+
+   res[0] = 0;
+   for (c = 0; c < 4; ++c) {
+     res[c+1] = (res[c] >> 16) & 65535;
+     res[c] &= 65535;
+     for (r = 0; r < c+1; ++r) {
+       res[c] = kk[r] * jj[c-r] + (res[c] & 65535);
+       res[c+1] += (res[c] >> 16) & 65535;
+     }
+   }
+
+   for (c = 0; c < 2; ++c)
+     I[c] = (int)(((res[2*c+1] & 65535) << 16) | (res[2*c] & 65535));
+/*
+ * End of HPL_lmul
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_rand.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_rand.c
new file mode 100644
index 000000000..fe4e12f5e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_rand.c
@@ -0,0 +1,94 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+double HPL_rand( void )
+#else
+double HPL_rand()
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_rand generates  the next number  in the  random  sequence.  This
+ * function  ensures  that this number lies in the interval (-0.5, 0.5].
+ *  
+ * The static array irand contains the information (2 integers) required
+ * to generate the  next number  in the sequence  X(n).  This  number is
+ * computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5,  where the
+ * constant d is the largest 64 bit positive unsigned integer. The array
+ * irand is then  updated  for the generation of the next number  X(n+1)
+ * in  the  random   sequence  as   follows  X(n+1) = a * X(n) + c.  The
+ * constants a and c should have been preliminarily stored in the arrays
+ * ias and ics as 2 pairs of integers.  The initialization of  ias,  ics
+ * and  irand  is performed by the function HPL_setran.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_setran( 3, j );
+/*
+ * return number between -0.5 and 0.5
+ */
+   return( HPL_HALF -
+           (((j[0] & 65535) + ((unsigned)j[0] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF +
+           (j[1] & 65535) + ((unsigned)j[1] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF );
+/*
+ * End of HPL_rand
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_setran.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_setran.c
new file mode 100644
index 000000000..1a3ca73aa
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_setran.c
@@ -0,0 +1,115 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int       ias[2], ics[2], irand[2];
+
+#ifdef STDC_HEADERS
+void HPL_setran
+(
+   const int                        OPTION,
+   int *                            IRAN
+)
+#else
+void HPL_setran
+( OPTION, IRAN )
+   const int                        OPTION;
+   int *                            IRAN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_setran initializes  the random generator with the encoding of the
+ * first number X(0) in the sequence,  and the constants a and c used to
+ * compute the next element in the sequence: X(n+1) = a*X(n) + c.  X(0),
+ * a and c are stored in the static variables  irand, ias and ics.  When
+ * OPTION is 0 (resp. 1 and 2),  irand  (resp. ia and ic)  is set to the
+ * values of the input array IRAN.  When OPTION is 3, IRAN is set to the
+ * current value of irand, and irand is then incremented.
+ *
+ * Arguments
+ * =========
+ *
+ * OPTION  (local input)                 const int
+ *         On entry, OPTION  is an integer that specifies the operations
+ *         to be performed on the random generator as specified above.
+ *
+ * IRAN    (local input/output)          int *
+ *         On entry,  IRAN is an array of dimension 2, that contains the
+ *         16-lower and 15-higher bits of a random number.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2];
+/* ..
+ * .. Executable Statements ..
+ */
+   if(      OPTION == 3 )
+   {                                       /* return current value */
+      IRAN[0] = irand[0]; IRAN[1] = irand[1];
+      HPL_lmul( irand, ias, j );         /* j     = irand * ias;   */
+      HPL_ladd( j, ics, irand );         /* irand = j     + ics;   */
+   } 
+   else if( OPTION == 0 ) { irand[0] = IRAN[0]; irand[1] = IRAN[1]; }
+   else if( OPTION == 1 ) { ias  [0] = IRAN[0]; ias  [1] = IRAN[1]; }
+   else if( OPTION == 2 ) { ics  [0] = IRAN[0]; ics  [1] = IRAN[1]; }
+/*
+ * End of HPL_setran
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_xjumpm.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_xjumpm.c
new file mode 100644
index 000000000..ae70bbc16
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_xjumpm.c
@@ -0,0 +1,158 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_xjumpm
+(
+   const int                        JUMPM,
+   int *                            MULT,
+   int *                            IADD,
+   int *                            IRANN,
+   int *                            IRANM,
+   int *                            IAM,
+   int *                            ICM
+)
+#else
+void HPL_xjumpm
+( JUMPM, MULT, IADD, IRANN, IRANM, IAM, ICM )
+   const int                        JUMPM;
+   int *                            MULT;
+   int *                            IADD;
+   int *                            IRANN;
+   int *                            IRANM;
+   int *                            IAM;
+   int *                            ICM;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_xjumpm computes  the constants  A and C  to jump JUMPM numbers in
+ * the random sequence: X(n+JUMPM) = A*X(n)+C.  The constants encoded in
+ * MULT and IADD  specify  how to jump from one entry in the sequence to
+ * the next.
+ *
+ * Arguments
+ * =========
+ *
+ * JUMPM   (local input)                 const int
+ *         On entry,  JUMPM  specifies  the  number  of entries  in  the
+ *         sequence to jump over. When JUMPM is less or equal than zero,
+ *         A and C are not computed, IRANM is set to IRANN corresponding
+ *         to a jump of size zero.
+ *
+ * MULT    (local input)                 int *
+ *         On entry, MULT is an array of dimension 2,  that contains the
+ *         16-lower  and 15-higher bits of the constant  a  to jump from
+ *         X(n) to X(n+1) = a*X(n) + c in the random sequence.
+ *
+ * IADD    (local input)                 int *
+ *         On entry, IADD is an array of dimension 2,  that contains the
+ *         16-lower  and 15-higher bits of the constant  c  to jump from
+ *         X(n) to X(n+1) = a*X(n) + c in the random sequence.
+ *
+ * IRANN   (local input)                 int *
+ *         On entry, IRANN is an array of dimension 2. that contains the
+ *         16-lower and 15-higher bits of the encoding of X(n).
+ *
+ * IRANM   (local output)                int *
+ *         On entry,  IRANM  is an array of dimension 2.   On exit, this
+ *         array  contains respectively  the 16-lower and 15-higher bits
+ *         of the encoding of X(n+JUMPM).
+ *
+ * IAM     (local output)                int *
+ *         On entry, IAM is an array of dimension 2. On exit, when JUMPM
+ *         is  greater  than  zero,  this  array  contains  the  encoded
+ *         constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+ *         sequence. IAM(0:1)  contains  respectively  the  16-lower and
+ *         15-higher  bits  of this constant  A. When  JUMPM  is less or
+ *         equal than zero, this array is not referenced.
+ *
+ * ICM     (local output)                int *
+ *         On entry, ICM is an array of dimension 2. On exit, when JUMPM
+ *         is  greater  than  zero,  this  array  contains  the  encoded
+ *         constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+ *         sequence. ICM(0:1)  contains  respectively  the  16-lower and
+ *         15-higher  bits  of this constant  C. When  JUMPM  is less or
+ *         equal than zero, this array is not referenced.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        j[2], k;
+/* ..
+ * .. Executable Statements ..
+ */
+   if( JUMPM > 0 )
+   {
+      IAM[0] = MULT[0]; IAM[1] = MULT[1];   /* IAM   = MULT;          */
+      ICM[0] = IADD[0]; ICM[1] = IADD[1];   /* ICM   = IADD;          */
+      for( k = 1; k <= JUMPM-1; k++ )
+      {
+         HPL_lmul( IAM, MULT, j );          /* j     = IAM   * MULT;  */
+         IAM[0] = j[0]; IAM[1] = j[1];      /* IAM   = j;             */
+         HPL_lmul( ICM, MULT, j );          /* j     = ICM   * MULT;  */
+         HPL_ladd( IADD, j, ICM );          /* ICM   = IADD  + j;     */
+      }
+      HPL_lmul( IRANN, IAM, j );            /* j     = IRANN * IAM;   */
+      HPL_ladd( j, ICM, IRANM );            /* IRANM = j     + ICM;   */
+   }
+   else
+   {                                        /* IRANM = IRANN          */
+      IRANM[0] = IRANN[0]; IRANM[1] = IRANN[1];
+   }
+/*
+ * End of HPL_xjumpm
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/Make.inc
new file mode 120000
index 000000000..ae55370b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kate/hip/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/Makefile
new file mode 100644
index 000000000..f027fbc06
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/Makefile
@@ -0,0 +1,95 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h  \
+   $(INCdir)/hpl_matgen.h
+#
+## Object files ########################################################
+#
+HPL_matobj       = \
+   HPL_dmatgen.o          HPL_ladd.o             HPL_lmul.o             \
+   HPL_xjumpm.o           HPL_jumpit.o           HPL_rand.o             \
+   HPL_setran.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_matobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_matobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_dmatgen.o          : ../HPL_dmatgen.c          $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_dmatgen.c
+HPL_ladd.o             : ../HPL_ladd.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ladd.c
+HPL_lmul.o             : ../HPL_lmul.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_lmul.c
+HPL_xjumpm.o           : ../HPL_xjumpm.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_xjumpm.c
+HPL_jumpit.o           : ../HPL_jumpit.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_jumpit.c
+HPL_rand.o             : ../HPL_rand.c             $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_rand.c
+HPL_setran.o           : ../HPL_setran.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_setran.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c
new file mode 100644
index 000000000..2d129c863
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c
@@ -0,0 +1,198 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdmatgen
+(
+   const HPL_T_grid *               GRID,
+   const int                        M,
+   const int                        N,
+   const int                        NB,
+   double *                         A,
+   const int                        LDA,
+   const int                        ISEED
+)
+#else
+void HPL_pdmatgen
+( GRID, M, N, NB, A, LDA, ISEED )
+   const HPL_T_grid *               GRID;
+   const int                        M;
+   const int                        N;
+   const int                        NB;
+   double *                         A;
+   const int                        LDA;
+   const int                        ISEED;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdmatgen generates (or regenerates) a parallel random matrix A.
+ *  
+ * The  pseudo-random  generator uses the linear congruential algorithm:
+ * X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+ * Programming, Knuth 1973, Vol. 2.
+ *
+ * Arguments
+ * =========
+ *
+ * GRID    (local input)                 const HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * M       (global input)                const int
+ *         On entry,  M  specifies  the number  of rows of the matrix A.
+ *         M must be at least zero.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the number of columns of the matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * A       (local output)                double *
+ *         On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+ *         On exit, this array contains the coefficients of the randomly
+ *         generated matrix.
+ *
+ * LDA     (local input)                 const int
+ *         On entry, LDA specifies the leading dimension of the array A.
+ *         LDA must be at least max(1,LocP(M)).
+ *
+ * ISEED   (global input)                const int
+ *         On entry, ISEED  specifies  the  seed  number to generate the
+ *         matrix A. ISEED must be at least zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        iadd [2], ia1  [2], ia2  [2], ia3  [2],
+                              ia4  [2], ia5  [2], ib1  [2], ib2  [2],
+                              ib3  [2], ic1  [2], ic2  [2], ic3  [2],
+                              ic4  [2], ic5  [2], iran1[2], iran2[2],
+                              iran3[2], iran4[2], itmp1[2], itmp2[2],
+                              itmp3[2], jseed[2], mult [2];
+   int                        ib, iblk, ik, jb, jblk, jk, jump1, jump2,
+                              jump3, jump4, jump5, jump6, jump7, lmb,
+                              lnb, mblks, mp, mycol, myrow, nblks,
+                              npcol, nprow, nq;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+
+   mult [0] = HPL_MULT0; mult [1] = HPL_MULT1;
+   iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1;
+   jseed[0] = ISEED;     jseed[1] = 0;
+/*
+ * Generate an M by N matrix starting in process (0,0)
+ */
+   Mnumroc( mp, M, NB, NB, myrow, 0, nprow );
+   Mnumroc( nq, N, NB, NB, mycol, 0, npcol );
+
+   if( ( mp <= 0 ) || ( nq <= 0 ) ) return;
+/*
+ * Local number of blocks and size of the last one
+ */
+   mblks = ( mp + NB - 1 ) / NB; lmb = mp - ( ( mp - 1 ) / NB ) * NB;
+   nblks = ( nq + NB - 1 ) / NB; lnb = nq - ( ( nq - 1 ) / NB ) * NB;
+/*
+ * Compute multiplier/adder for various jumps in random sequence
+ */
+   jump1 = 1;  jump2 = nprow * NB; jump3 = M; jump4 = npcol * NB;
+   jump5 = NB; jump6 = mycol;      jump7 = myrow * NB;
+
+   HPL_xjumpm( jump1, mult, iadd, jseed, iran1, ia1,   ic1   );
+   HPL_xjumpm( jump2, mult, iadd, iran1, itmp1, ia2,   ic2   );
+   HPL_xjumpm( jump3, mult, iadd, iran1, itmp1, ia3,   ic3   );
+   HPL_xjumpm( jump4, ia3,  ic3,  iran1, itmp1, ia4,   ic4   );
+   HPL_xjumpm( jump5, ia3,  ic3,  iran1, itmp1, ia5,   ic5   );
+   HPL_xjumpm( jump6, ia5,  ic5,  iran1, itmp3, itmp1, itmp2 );
+   HPL_xjumpm( jump7, mult, iadd, itmp3, iran1, itmp1, itmp2 );
+   HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 );
+/*
+ * Save value of first number in sequence
+ */
+   ib1[0] = iran1[0]; ib1[1] = iran1[1];
+   ib2[0] = iran1[0]; ib2[1] = iran1[1];
+   ib3[0] = iran1[0]; ib3[1] = iran1[1];
+
+   for( jblk = 0; jblk < nblks; jblk++ )
+   {
+      jb = ( jblk == nblks - 1 ? lnb : NB );
+      for( jk = 0; jk < jb; jk++ )
+      {
+         for( iblk = 0; iblk < mblks; iblk++ )
+         {
+            ib = ( iblk == mblks - 1 ? lmb : NB );
+            for( ik = 0; ik < ib; A++, ik++ ) *A = HPL_rand();
+            HPL_jumpit( ia2, ic2, ib1, iran2 );
+            ib1[0] = iran2[0]; ib1[1] = iran2[1];
+         }
+         A += LDA - mp;
+         HPL_jumpit( ia3, ic3, ib2, iran3 );
+         ib1[0] = iran3[0]; ib1[1] = iran3[1];
+         ib2[0] = iran3[0]; ib2[1] = iran3[1];
+      }
+      HPL_jumpit( ia4, ic4, ib3, iran4 );
+      ib1[0] = iran4[0]; ib1[1] = iran4[1];
+      ib2[0] = iran4[0]; ib2[1] = iran4[1];
+      ib3[0] = iran4[0]; ib3[1] = iran4[1];
+   }
+/*
+ * End of HPL_pdmatgen
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/Make.inc
new file mode 120000
index 000000000..ae55370b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kate/hip/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/Makefile
new file mode 100644
index 000000000..bf33fcd7b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/Makefile
@@ -0,0 +1,81 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_matgen.h $(INCdir)/hpl_pmisc.h \
+   $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pmatgen.h
+#
+## Object files ########################################################
+#
+HPL_pmaobj       = \
+   HPL_pdmatgen.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_pmaobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pmaobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_pdmatgen.o         : ../HPL_pdmatgen.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdmatgen.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL.dat b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL.dat
new file mode 100644
index 000000000..47aee883e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL.dat
@@ -0,0 +1,31 @@
+HPLinpack benchmark input file
+Innovative Computing Laboratory, University of Tennessee
+HPL.out      output file name (if any)
+6            device out (6=stdout,7=stderr,file)
+4            # of problems sizes (N)
+29 30 34 35  Ns
+4            # of NBs
+1 2 3 4      NBs
+0            PMAP process mapping (0=Row-,1=Column-major)
+3            # of process grids (P x Q)
+2 1 4        Ps
+2 4 1        Qs
+16.0         threshold
+3            # of panel fact
+0 1 2        PFACTs (0=left, 1=Crout, 2=Right)
+2            # of recursive stopping criterium
+2 4          NBMINs (>= 1)
+1            # of panels in recursion
+2            NDIVs
+3            # of recursive panel fact.
+0 1 2        RFACTs (0=left, 1=Crout, 2=Right)
+1            # of broadcast
+0            BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+1            # of lookahead depth
+0            DEPTHs (>=0)
+2            SWAP (0=bin-exch,1=long,2=mix)
+64           swapping threshold
+0            L1 in (0=transposed,1=no-transposed) form
+0            U  in (0=transposed,1=no-transposed) form
+1            Equilibration (0=no,1=yes)
+8            memory alignment in double (> 0)
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pddriver.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pddriver.c
new file mode 100644
index 000000000..5e4050f48
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pddriver.c
@@ -0,0 +1,293 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+int main
+(
+   int                        ARGC,
+   char                       * * ARGV
+)
+#else
+int main( ARGC, ARGV )
+/*
+ * .. Scalar Arguments ..
+ */
+   int                        ARGC;
+/*
+ * .. Array Arguments ..
+ */
+   char                       * * ARGV;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * main is the main driver program for testing the HPL routines.
+ * This  program is  driven  by  a short data file named  "HPL.dat".
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   int                        nval  [HPL_MAX_PARAM],
+                              nbval [HPL_MAX_PARAM],
+                              pval  [HPL_MAX_PARAM],
+                              qval  [HPL_MAX_PARAM],
+                              nbmval[HPL_MAX_PARAM],
+                              ndvval[HPL_MAX_PARAM],
+                              ndhval[HPL_MAX_PARAM];
+
+   HPL_T_FACT                 pfaval[HPL_MAX_PARAM],
+                              rfaval[HPL_MAX_PARAM];
+
+   HPL_T_TOP                  topval[HPL_MAX_PARAM];
+
+   HPL_T_grid                 grid;
+   HPL_T_palg                 algo;
+   HPL_T_test                 test;
+   int                        L1notran, Unotran, align, equil, in, inb,
+                              inbm, indh, indv, ipfa, ipq, irfa, itop,
+                              mycol, myrow, ns, nbs, nbms, ndhs, ndvs,
+                              npcol, npfs, npqs, nprow, nrfs, ntps, 
+                              rank, size, tswap;
+   HPL_T_ORDER                pmapping;
+   HPL_T_FACT                 rpfa;
+   HPL_T_SWAP                 fswap;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Init( &ARGC, &ARGV );
+#ifdef HPL_CALL_VSIPL
+   vsip_init((void*)0);
+#endif
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+   MPI_Comm_size( MPI_COMM_WORLD, &size );
+/*
+ * Read and check validity of test parameters from input file
+ *
+ * HPL Version 1.0, Linpack benchmark input file
+ * Your message here
+ * HPL.out      output file name (if any)
+ * 6            device out (6=stdout,7=stderr,file)
+ * 4            # of problems sizes (N)
+ * 29 30 34 35  Ns
+ * 4            # of NBs
+ * 1 2 3 4      NBs
+ * 0            PMAP process mapping (0=Row-,1=Column-major)
+ * 3            # of process grids (P x Q)
+ * 2 1 4        Ps
+ * 2 4 1        Qs
+ * 16.0         threshold
+ * 3            # of panel fact
+ * 0 1 2        PFACTs (0=left, 1=Crout, 2=Right)
+ * 2            # of recursive stopping criterium
+ * 2 4          NBMINs (>= 1)
+ * 1            # of panels in recursion
+ * 2            NDIVs
+ * 3            # of recursive panel fact.
+ * 0 1 2        RFACTs (0=left, 1=Crout, 2=Right)
+ * 1            # of broadcast
+ * 0            BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+ * 1            # of lookahead depth
+ * 0            DEPTHs (>=0)
+ * 2            SWAP (0=bin-exch,1=long,2=mix)
+ * 4            swapping threshold
+ * 0            L1 in (0=transposed,1=no-transposed) form
+ * 0            U  in (0=transposed,1=no-transposed) form
+ * 1            Equilibration (0=no,1=yes)
+ * 8            memory alignment in double (> 0)
+ */
+   HPL_pdinfo( &test, &ns, nval, &nbs, nbval, &pmapping, &npqs, pval, qval,
+               &npfs, pfaval, &nbms, nbmval, &ndvs, ndvval, &nrfs, rfaval,
+               &ntps, topval, &ndhs, ndhval, &fswap, &tswap, &L1notran,
+               &Unotran, &equil, &align );
+/*
+ * Loop over different process grids - Define process grid. Go to bottom
+ * of process grid loop if this case does not use my process.
+ */
+   for( ipq = 0; ipq < npqs; ipq++ )
+   {
+      (void) HPL_grid_init( MPI_COMM_WORLD, pmapping, pval[ipq], qval[ipq],
+                            &grid );
+      (void) HPL_grid_info( &grid, &nprow, &npcol, &myrow, &mycol );
+
+      if( ( myrow < 0 ) || ( myrow >= nprow ) ||
+          ( mycol < 0 ) || ( mycol >= npcol ) ) goto label_end_of_npqs;
+
+      for( in = 0; in < ns; in++ )
+      {                            /* Loop over various problem sizes */
+       for( inb = 0; inb < nbs; inb++ )
+       {                        /* Loop over various blocking factors */
+        for( indh = 0; indh < ndhs; indh++ )
+        {                       /* Loop over various lookahead depths */
+         for( itop = 0; itop < ntps; itop++ )
+         {                  /* Loop over various broadcast topologies */
+          for( irfa = 0; irfa < nrfs; irfa++ )
+          {             /* Loop over various recursive factorizations */
+           for( ipfa = 0; ipfa < npfs; ipfa++ )
+           {                /* Loop over various panel factorizations */
+            for( inbm = 0; inbm < nbms; inbm++ )
+            {        /* Loop over various recursive stopping criteria */
+             for( indv = 0; indv < ndvs; indv++ )
+             {          /* Loop over various # of panels in recursion */
+/*
+ * Set up the algorithm parameters
+ */
+              algo.btopo = topval[itop]; algo.depth = ndhval[indh];
+              algo.nbmin = nbmval[inbm]; algo.nbdiv = ndvval[indv];
+
+              algo.pfact = rpfa = pfaval[ipfa];
+
+              if( L1notran != 0 )
+              {
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllN;
+                 else if( rpfa == HPL_CROUT   ) algo.pffun = HPL_pdpancrN;
+                 else                           algo.pffun = HPL_pdpanrlN;
+
+                 algo.rfact = rpfa = rfaval[irfa];
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllN;
+                 else if( rpfa == HPL_CROUT   ) algo.rffun = HPL_pdrpancrN;
+                 else                           algo.rffun = HPL_pdrpanrlN;
+
+                 if( Unotran != 0 ) algo.upfun = HPL_pdupdateNN;
+                 else               algo.upfun = HPL_pdupdateNT;
+              }
+              else
+              {
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllT;
+                 else if( rpfa == HPL_CROUT   ) algo.pffun = HPL_pdpancrT;
+                 else                           algo.pffun = HPL_pdpanrlT;
+
+                 algo.rfact = rpfa = rfaval[irfa];
+                 if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllT;
+                 else if( rpfa == HPL_CROUT   ) algo.rffun = HPL_pdrpancrT;
+                 else                           algo.rffun = HPL_pdrpanrlT;
+
+                 if( Unotran != 0 ) algo.upfun = HPL_pdupdateTN;
+                 else               algo.upfun = HPL_pdupdateTT;
+              }
+
+              algo.fswap = fswap; algo.fsthr = tswap;
+              algo.equil = equil; algo.align = align;
+
+              HPL_pdtest( &test, &grid, &algo, nval[in], nbval[inb] );
+
+             }
+            }
+           }
+          }
+         }
+        }
+       }
+      }
+      (void) HPL_grid_exit( &grid );
+label_end_of_npqs: ;
+   }
+/*
+ * Print ending messages, close output file, exit.
+ */
+   if( rank == 0 )
+   {
+      test.ktest = test.kpass + test.kfail + test.kskip;
+#ifndef HPL_DETAILED_TIMING
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+#else
+      if( test.thrsh > HPL_rzero )
+         HPL_fprintf( test.outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+#endif
+
+      HPL_fprintf( test.outfp, "\n%s %6d %s\n", "Finished", test.ktest,
+                   "tests with the following results:" );
+      if( test.thrsh > HPL_rzero )
+      {
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kpass,
+                      "tests completed and passed residual checks," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kfail,
+                      "tests completed and failed residual checks," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kskip,
+                      "tests skipped because of illegal input values." );
+      }
+      else
+      {
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kpass,
+                      "tests completed without checking," );
+         HPL_fprintf( test.outfp, "         %6d %s\n", test.kskip,
+                      "tests skipped because of illegal input values." );
+      }
+
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "----------------------------------------",
+                   "----------------------------------------" );
+      HPL_fprintf( test.outfp, "\nEnd of Tests.\n" );
+      HPL_fprintf( test.outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+
+      if( ( test.outfp != stdout ) && ( test.outfp != stderr ) )
+         (void) fclose( test.outfp );
+   }
+#ifdef HPL_CALL_VSIPL
+   vsip_finalize((void*)0);
+#endif
+   MPI_Finalize();
+   exit( 0 );
+
+   return( 0 );
+/*
+ * End of main
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pdinfo.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pdinfo.c
new file mode 100644
index 000000000..5db4e73d7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pdinfo.c
@@ -0,0 +1,1183 @@
+  /* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    Modifications Copyright (C) 2023 Intel Corporation​
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ * 
+ * SPDX-License-Identifier: BSD-4-Clause
+ */ 
+
+
+
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdinfo
+(
+   HPL_T_test *                     TEST,
+   int *                            NS,
+   int *                            N,
+   int *                            NBS,
+   int *                            NB,
+   HPL_T_ORDER *                    PMAPPIN,
+   int *                            NPQS,
+   int *                            P,
+   int *                            Q,
+   int *                            NPFS,
+   HPL_T_FACT *                     PF,
+   int *                            NBMS,
+   int *                            NBM,
+   int *                            NDVS,
+   int *                            NDV,
+   int *                            NRFS,
+   HPL_T_FACT *                     RF,
+   int *                            NTPS,
+   HPL_T_TOP *                      TP,
+   int *                            NDHS,
+   int *                            DH,
+   HPL_T_SWAP *                     FSWAP,
+   int *                            TSWAP,
+   int *                            L1NOTRAN,
+   int *                            UNOTRAN,
+   int *                            EQUIL,
+   int *                            ALIGN
+)
+#else
+void HPL_pdinfo
+( TEST, NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN )
+   HPL_T_test *                     TEST;
+   int *                            NS;
+   int *                            N;
+   int *                            NBS;
+   int *                            NB;
+   HPL_T_ORDER *                    PMAPPIN;
+   int *                            NPQS;
+   int *                            P;
+   int *                            Q;
+   int *                            NPFS;
+   HPL_T_FACT *                     PF;
+   int *                            NBMS;
+   int *                            NBM;
+   int *                            NDVS;
+   int *                            NDV;
+   int *                            NRFS;
+   HPL_T_FACT *                     RF;
+   int *                            NTPS;
+   HPL_T_TOP *                      TP;
+   int *                            NDHS;
+   int *                            DH;
+   HPL_T_SWAP *                     FSWAP;
+   int *                            TSWAP;
+   int *                            L1NOTRAN;
+   int *                            UNOTRAN;
+   int *                            EQUIL;
+   int *                            ALIGN;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdinfo reads  the  startup  information for the various tests and
+ * transmits it to all processes.
+ *
+ * Arguments
+ * =========
+ *
+ * TEST    (global output)               HPL_T_test *
+ *         On entry, TEST  points to a testing data structure.  On exit,
+ *         the fields of this data structure are initialized as follows:
+ *         TEST->outfp  specifies the output file where the results will
+ *         be printed.  It is only defined and used by  the process 0 of
+ *         the grid.  TEST->thrsh specifies the threshhold value for the
+ *         test ratio.  TEST->epsil is the relative machine precision of
+ *         the distributed computer.  Finally  the test counters, kfail,
+ *         kpass, kskip, ktest are initialized to zero.
+ *
+ * NS      (global output)               int *
+ *         On exit,  NS  specifies the number of different problem sizes
+ *         to be tested. NS is less than or equal to HPL_MAX_PARAM.
+ *
+ * N       (global output)               int *
+ *         On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+ *         the first NS entries of this array contain the  problem sizes
+ *         to run the code with.
+ *
+ * NBS     (global output)               int *
+ *         On exit,  NBS  specifies the number of different distribution
+ *         blocking factors to be tested. NBS must be less than or equal
+ *         to HPL_MAX_PARAM.
+ *
+ * NB      (global output)               int *
+ *         On exit,  PMAPPIN  specifies the process mapping onto the no-
+ *         des of the  MPI machine configuration.  PMAPPIN  defaults  to
+ *         row-major ordering.
+ *
+ * PMAPPIN (global output)               HPL_T_ORDER *
+ *         On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NBS entries of this array contain the values of the
+ *         various distribution blocking factors, to run the code with.
+ *
+ * NPQS    (global output)               int *
+ *         On exit, NPQS  specifies the  number of different values that
+ *         can be used for P and Q, i.e., the number of process grids to
+ *         run  the  code with.  NPQS must be  less  than  or  equal  to
+ *         HPL_MAX_PARAM.
+ *
+ * P       (global output)               int *
+ *         On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NPQS entries of this array contain the values of P,
+ *         the number of process rows of the  NPQS grids to run the code
+ *         with.
+ *
+ * Q       (global output)               int *
+ *         On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first NPQS entries of this array contain the values of Q,
+ *         the number of process columns of the  NPQS  grids to  run the
+ *         code with.
+ *
+ * NPFS    (global output)               int *
+ *         On exit, NPFS  specifies the  number of different values that
+ *         can be used for PF : the panel factorization algorithm to run
+ *         the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+ *
+ * PF      (global output)               HPL_T_FACT *
+ *         On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first  NPFS  entries  of this array  contain  the various
+ *         panel factorization algorithms to run the code with.
+ *
+ * NBMS    (global output)               int *
+ *         On exit,  NBMS  specifies  the  number  of  various recursive
+ *         stopping criteria  to be tested.  NBMS  must be  less than or
+ *         equal to HPL_MAX_PARAM.
+ *
+ * NBM     (global output)               int *
+ *         On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NBMS entries of this array contain the values
+ *         of the various recursive stopping criteria to be tested.
+ *
+ * NDVS    (global output)               int *
+ *         On exit,  NDVS  specifies  the number  of various numbers  of
+ *         panels in recursion to be tested.  NDVS is less than or equal
+ *         to HPL_MAX_PARAM.
+ *
+ * NDV     (global output)               int *
+ *         On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NDVS entries of this array contain the values
+ *         of the various numbers of panels in recursion to be tested.
+ *
+ * NRFS    (global output)               int *
+ *         On exit, NRFS  specifies the  number of different values that
+ *         can be used for RF : the recursive factorization algorithm to
+ *         be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+ *
+ * RF      (global output)               HPL_T_FACT *
+ *         On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the first  NRFS  entries  of  this array contain  the various
+ *         recursive factorization algorithms to run the code with.
+ *
+ * NTPS    (global output)               int *
+ *         On exit, NTPS  specifies the  number of different values that
+ *         can be used for the  broadcast topologies  to be tested. NTPS
+ *         is less than or equal to HPL_MAX_PARAM.
+ *
+ * TP      (global output)               HPL_T_TOP *
+ *         On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+ *         the  first NTPS  entries of this  array  contain  the various
+ *         broadcast (along rows) topologies to run the code with.
+ *
+ * NDHS    (global output)               int *
+ *         On exit, NDHS  specifies the  number of different values that
+ *         can be used for the  lookahead depths to be  tested.  NDHS is
+ *         less than or equal to HPL_MAX_PARAM.
+ *
+ * DH      (global output)               int *
+ *         On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+ *         exit, the first NDHS entries of this array contain the values
+ *         of lookahead depths to run the code with.  Such a value is at
+ *         least 0 (no-lookahead) or greater than zero.
+ *
+ * FSWAP   (global output)               HPL_T_SWAP *
+ *         On exit, FSWAP specifies the swapping algorithm to be used in
+ *         all tests.
+ *
+ * TSWAP   (global output)               int *
+ *         On exit,  TSWAP  specifies the swapping threshold as a number
+ *         of columns when the mixed swapping algorithm was chosen.
+ *
+ * L1NOTRA (global output)               int *
+ *         On exit, L1NOTRAN specifies whether the upper triangle of the
+ *         panels of columns  should  be stored  in  no-transposed  form
+ *         (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+ *
+ * UNOTRAN (global output)               int *
+ *         On exit, UNOTRAN  specifies whether the panels of rows should
+ *         be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+ *         form (UNOTRAN=0) during their broadcast.
+ *
+ * EQUIL   (global output)               int *
+ *         On exit,  EQUIL  specifies  whether  equilibration during the
+ *         swap-broadcast  of  the  panel of rows  should  be  performed
+ *         (EQUIL=1) or not (EQUIL=0).
+ *
+ * ALIGN   (global output)               int *
+ *         On exit,  ALIGN  specifies the alignment  of  the dynamically
+ *         allocated buffers in double precision words. ALIGN is greater
+ *         than zero.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+   char                       file[HPL_LINE_MAX], line[HPL_LINE_MAX],
+                              auth[HPL_LINE_MAX], num [HPL_LINE_MAX];
+   FILE                       * infp;
+   int                        * iwork = NULL;
+   char                       * lineptr;
+   int                        error=0, fid, i, j, lwork, maxp, nprocs,
+                              rank, size;
+/* ..
+ * .. Executable Statements ..
+ */
+   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+   MPI_Comm_size( MPI_COMM_WORLD, &size );
+/*
+ * Initialize the TEST data structure with default values
+ */
+   TEST->outfp = stderr; TEST->epsil = 2.0e-16; TEST->thrsh = 16.0;
+   TEST->kfail = TEST->kpass = TEST->kskip = TEST->ktest = 0;
+/*
+ * Process 0 reads the input data, broadcasts to other processes and
+ * writes needed information to TEST->outfp.
+ */
+   if( rank == 0 )
+   {
+/*
+ * Open file and skip data file header
+ */
+      if( ( infp = fopen( "HPL.dat", "r" ) ) == NULL )
+      { 
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "cannot open file HPL.dat" );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) fgets( auth, HPL_LINE_MAX - 2, infp );
+/*
+ * Read name and unit number for summary output file
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", file );
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num  );
+      fid = atoi( num );
+      if     ( fid == 6 ) TEST->outfp = stdout;
+      else if( fid == 7 ) TEST->outfp = stderr;
+      else if( ( TEST->outfp = fopen( file, "w" ) ) == NULL )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "cannot open file %s.",
+                    file );
+         error = 1; goto label_error;
+      }
+/*
+ * Read and check the parameter values for the tests.
+ *
+ * Problem size (>=0) (N)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); 
+      (void) sscanf( line, "%s", num ); *NS = atoi( num );
+      if( ( *NS < 1 ) || ( *NS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %d",
+                    "Number of values of N is less than 1 or greater than",
+                    HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( N[ i ] = atoi( num ) ) < 0 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of N less than 0" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Block size (>=1) (NB)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NBS = atoi( num );
+      if( ( *NBS < 1 ) || ( *NBS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NB is less than 1 or",
+                    "greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NBS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NB[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", 
+                       "Value of NB less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Process grids, mapping, (>=1) (P, Q)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num );
+      *PMAPPIN = ( atoi( num ) == 1 ? HPL_COLUMN_MAJOR : HPL_ROW_MAJOR );
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NPQS = atoi( num );
+      if( ( *NPQS < 1 ) || ( *NPQS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of grids is less",
+                    "than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPQS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( P[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of P less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPQS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( Q[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of Q less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Check for enough processes in machine configuration
+ */
+      maxp = 0;
+      for( i = 0; i < *NPQS; i++ )
+      { nprocs   = P[i] * Q[i]; maxp = Mmax( maxp, nprocs ); }
+      if( maxp > size )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "Need at least %d processes for these tests", maxp );
+         error = 1; goto label_error;
+      }
+/*
+ * Checking threshold value (TEST->thrsh)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); TEST->thrsh = atof( num );
+/*
+ * Panel factorization algorithm (PF)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NPFS = atoi( num );
+      if( ( *NPFS < 1 ) || ( *NPFS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "number of values of PFACT",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NPFS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) PF[ i ] = HPL_LEFT_LOOKING;
+         else if( j == 1 ) PF[ i ] = HPL_CROUT;
+         else if( j == 2 ) PF[ i ] = HPL_RIGHT_LOOKING;
+         else              PF[ i ] = HPL_RIGHT_LOOKING;
+      }
+/*
+ * Recursive stopping criterium (>=1) (NBM)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NBMS = atoi( num );
+      if( ( *NBMS < 1 ) || ( *NBMS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NBMIN",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NBMS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NBM[ i ] = atoi( num ) ) < 1 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of NBMIN less than 1" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Number of panels in recursion (>=2) (NDV)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NDVS = atoi( num );
+      if( ( *NDVS < 1 ) || ( *NDVS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of NDIV",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NDVS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         if( ( NDV[ i ] = atoi( num ) ) < 2 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of NDIV less than 2" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Recursive panel factorization (RF)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NRFS = atoi( num );
+      if( ( *NRFS < 1 ) || ( *NRFS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of RFACT",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NRFS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) RF[ i ] = HPL_LEFT_LOOKING;
+         else if( j == 1 ) RF[ i ] = HPL_CROUT;
+         else if( j == 2 ) RF[ i ] = HPL_RIGHT_LOOKING;
+         else              RF[ i ] = HPL_RIGHT_LOOKING;
+      }
+/*
+ * Broadcast topology (TP) (0=rg, 1=2rg, 2=rgM, 3=2rgM, 4=L)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NTPS = atoi( num );
+      if( ( *NTPS < 1 ) || ( *NTPS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of BCAST",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NTPS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1;
+         j = atoi( num );
+         if(      j == 0 ) TP[ i ] = HPL_1RING;
+         else if( j == 1 ) TP[ i ] = HPL_1RING_M;
+         else if( j == 2 ) TP[ i ] = HPL_2RING;
+         else if( j == 3 ) TP[ i ] = HPL_2RING_M;
+         else if( j == 4 ) TP[ i ] = HPL_BLONG;
+         else if( j == 5 ) TP[ i ] = HPL_BLONG_M;
+         else              TP[ i ] = HPL_1RING_M;
+      }
+/*
+ * Lookahead depth (>=0) (NDH)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *NDHS = atoi( num );
+      if( ( *NDHS < 1 ) || ( *NDHS > HPL_MAX_PARAM ) )
+      {
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d",
+                    "Number of values of DEPTH",
+                    "is less than 1 or greater than", HPL_MAX_PARAM );
+         error = 1; goto label_error;
+      }
+      (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line;
+      for( i = 0; i < *NDHS; i++ )
+      {
+         (void) sscanf( lineptr, "%s", num );
+         lineptr += strlen( num ) + 1;
+         if( ( DH[ i ] = atoi( num ) ) < 0 )
+         {
+            HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                       "Value of DEPTH less than 0" );
+            error = 1; goto label_error;
+         }
+      }
+/*
+ * Swapping algorithm (0,1 or 2) (FSWAP)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); j = atoi( num );
+      if(      j == 0 ) *FSWAP = HPL_SWAP00;
+      else if( j == 1 ) *FSWAP = HPL_SWAP01;
+      else if( j == 2 ) *FSWAP = HPL_SW_MIX;
+      else              *FSWAP = HPL_SWAP01;
+/*
+ * Swapping threshold (>=0) (TSWAP)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *TSWAP = atoi( num );
+      if( *TSWAP <= 0 ) *TSWAP = 0;
+/*
+ * L1 in (no-)transposed form (0 or 1)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *L1NOTRAN = atoi( num );
+      if( ( *L1NOTRAN != 0 ) && ( *L1NOTRAN != 1 ) ) *L1NOTRAN = 0; 
+/*
+ * U  in (no-)transposed form (0 or 1)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *UNOTRAN = atoi( num );
+      if( ( *UNOTRAN != 0 ) && ( *UNOTRAN != 1 ) ) *UNOTRAN = 0;
+/*
+ * Equilibration (0=no, 1=yes)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *EQUIL = atoi( num );
+      if( ( *EQUIL != 0 ) && ( *EQUIL != 1 ) ) *EQUIL = 1;
+/*
+ * Memory alignment in bytes (> 0) (ALIGN)
+ */
+      (void) fgets( line, HPL_LINE_MAX - 2, infp );
+      (void) sscanf( line, "%s", num ); *ALIGN = atoi( num );
+      if( *ALIGN <= 0 ) *ALIGN = 4;
+/*
+ * Close input file
+ */
+label_error:
+      if (infp != NULL)
+      	(void) fclose( infp );
+   }
+   else { TEST->outfp = NULL; }
+/*
+ * Check for error on reading input file
+ */
+   (void) HPL_all_reduce( (void *)(&error), 1, HPL_INT, HPL_max,
+                          MPI_COMM_WORLD );
+   if( error )
+   {
+      if( rank == 0 )
+         HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
+                    "Illegal input in file HPL.dat. Exiting ..." );
+      MPI_Finalize();
+#ifdef HPL_CALL_VSIPL
+      (void) vsip_finalize( NULL );
+#endif
+      exit( 1 );
+   }
+/*
+ * Compute and broadcast machine epsilon
+ */
+   TEST->epsil = HPL_pdlamch( MPI_COMM_WORLD, HPL_MACH_EPS );
+/*
+ * Pack information arrays and broadcast
+ */
+   (void) HPL_broadcast( (void *)(&(TEST->thrsh)), 1, HPL_DOUBLE, 0,
+                         MPI_COMM_WORLD );
+/*
+ * Broadcast array sizes
+ */
+   iwork = (int *)malloc( (size_t)(15) * sizeof( int ) );
+   if( rank == 0 )
+   {
+      iwork[ 0] = *NS;      iwork[ 1] = *NBS;
+      iwork[ 2] = ( *PMAPPIN == HPL_ROW_MAJOR ? 0 : 1 );
+      iwork[ 3] = *NPQS;    iwork[ 4] = *NPFS;     iwork[ 5] = *NBMS;
+      iwork[ 6] = *NDVS;    iwork[ 7] = *NRFS;     iwork[ 8] = *NTPS;
+      iwork[ 9] = *NDHS;    iwork[10] = *TSWAP;    iwork[11] = *L1NOTRAN;
+      iwork[12] = *UNOTRAN; iwork[13] = *EQUIL;    iwork[14] = *ALIGN;
+   }
+   (void) HPL_broadcast( (void *)iwork, 15, HPL_INT, 0, MPI_COMM_WORLD );
+   if( rank != 0 )
+   {
+      *NS       = iwork[ 0]; *NBS   = iwork[ 1];
+      *PMAPPIN  = ( iwork[ 2] == 0 ?  HPL_ROW_MAJOR : HPL_COLUMN_MAJOR );
+      *NPQS     = iwork[ 3]; *NPFS  = iwork[ 4]; *NBMS     = iwork[ 5];
+      *NDVS     = iwork[ 6]; *NRFS  = iwork[ 7]; *NTPS     = iwork[ 8];
+      *NDHS     = iwork[ 9]; *TSWAP = iwork[10]; *L1NOTRAN = iwork[11];
+      *UNOTRAN  = iwork[12]; *EQUIL = iwork[13]; *ALIGN    = iwork[14];
+   }
+   if( iwork ) free( iwork );
+/*
+ * Pack information arrays and broadcast
+ */
+   lwork = (*NS) + (*NBS) + 2 * (*NPQS) + (*NPFS) + (*NBMS) + 
+           (*NDVS) + (*NRFS) + (*NTPS) + (*NDHS) + 1;
+   
+   if (lwork < 0)
+	exit(EXIT_FAILURE); 
+
+
+   iwork = (int *)malloc( (size_t)(lwork) * sizeof( int ) );
+   if( rank == 0 )
+   {
+      j = 0;
+      for( i = 0; i < *NS;   i++ ) { iwork[j] = N [i]; j++; }
+      for( i = 0; i < *NBS;  i++ ) { iwork[j] = NB[i]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { iwork[j] = P [i]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { iwork[j] = Q [i]; j++; }
+      for( i = 0; i < *NPFS; i++ )
+      {
+         if(      PF[i] == HPL_LEFT_LOOKING  ) iwork[j] = 0;
+         else if( PF[i] == HPL_CROUT         ) iwork[j] = 1;
+         else if( PF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2;
+         j++;
+      }
+      for( i = 0; i < *NBMS; i++ ) { iwork[j] = NBM[i]; j++; }
+      for( i = 0; i < *NDVS; i++ ) { iwork[j] = NDV[i]; j++; }
+      for( i = 0; i < *NRFS; i++ )
+      {
+         if(      RF[i] == HPL_LEFT_LOOKING  ) iwork[j] = 0;
+         else if( RF[i] == HPL_CROUT         ) iwork[j] = 1;
+         else if( RF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2;
+         j++;
+      }
+      for( i = 0; i < *NTPS; i++ )
+      {
+         if(      TP[i] == HPL_1RING   ) iwork[j] = 0;
+         else if( TP[i] == HPL_1RING_M ) iwork[j] = 1;
+         else if( TP[i] == HPL_2RING   ) iwork[j] = 2;
+         else if( TP[i] == HPL_2RING_M ) iwork[j] = 3;
+         else if( TP[i] == HPL_BLONG   ) iwork[j] = 4;
+         else if( TP[i] == HPL_BLONG_M ) iwork[j] = 5;
+         j++;
+      }
+      for( i = 0; i < *NDHS; i++ ) { iwork[j] = DH[i]; j++; }
+
+      if(      *FSWAP == HPL_SWAP00 ) iwork[j] = 0;
+      else if( *FSWAP == HPL_SWAP01 ) iwork[j] = 1;
+      else if( *FSWAP == HPL_SW_MIX ) iwork[j] = 2;
+      j++;
+   }
+   (void) HPL_broadcast( (void*)iwork, lwork, HPL_INT, 0,
+                         MPI_COMM_WORLD );
+   if ((rank != 0) && (iwork != NULL))
+   {
+      j = 0;
+      for( i = 0; i < *NS;   i++ ) { N [i] = iwork[j]; j++; }
+      for( i = 0; i < *NBS;  i++ ) { NB[i] = iwork[j]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { P [i] = iwork[j]; j++; }
+      for( i = 0; i < *NPQS; i++ ) { Q [i] = iwork[j]; j++; }
+
+      for( i = 0; i < *NPFS; i++ )
+      {
+         if(      iwork[j] == 0 ) PF[i] = HPL_LEFT_LOOKING;
+         else if( iwork[j] == 1 ) PF[i] = HPL_CROUT;
+         else if( iwork[j] == 2 ) PF[i] = HPL_RIGHT_LOOKING;
+         j++;
+      }
+      for( i = 0; i < *NBMS; i++ ) { NBM[i] = iwork[j]; j++; }
+      for( i = 0; i < *NDVS; i++ ) { NDV[i] = iwork[j]; j++; }
+      for( i = 0; i < *NRFS; i++ )
+      {
+         if(      iwork[j] == 0 ) RF[i] = HPL_LEFT_LOOKING;
+         else if( iwork[j] == 1 ) RF[i] = HPL_CROUT;
+         else if( iwork[j] == 2 ) RF[i] = HPL_RIGHT_LOOKING;
+         j++;
+      }
+      for( i = 0; i < *NTPS; i++ )
+      {
+         if(      iwork[j] == 0 ) TP[i] = HPL_1RING;
+         else if( iwork[j] == 1 ) TP[i] = HPL_1RING_M;
+         else if( iwork[j] == 2 ) TP[i] = HPL_2RING;
+         else if( iwork[j] == 3 ) TP[i] = HPL_2RING_M;
+         else if( iwork[j] == 4 ) TP[i] = HPL_BLONG;
+         else if( iwork[j] == 5 ) TP[i] = HPL_BLONG_M;
+         j++;
+      }
+      for( i = 0; i < *NDHS; i++ ) { DH[i] = iwork[j]; j++; }
+
+      if(      iwork[j] == 0 ) *FSWAP = HPL_SWAP00;
+      else if( iwork[j] == 1 ) *FSWAP = HPL_SWAP01;
+      else if( iwork[j] == 2 ) *FSWAP = HPL_SW_MIX;
+      j++;
+   
+      if( iwork ) free( iwork );
+   }
+/*
+ * regurgitate input
+ */
+   if( rank == 0 )
+   {
+      
+      if (TEST->outfp != NULL){
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "HPLinpack 2.3  --  High-Performance Linpack benchmark  --  ",
+          " December 2, 2018" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Written by A. Petitet and R. Clint Whaley,  ",
+          "Innovative Computing Laboratory, UTK" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Modified by Piotr Luszczek, ",
+          "Innovative Computing Laboratory, UTK" );
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+          "Modified by Julien Langou, ",
+          "University of Colorado Denver");
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "========================================",
+                   "========================================" );
+
+      HPL_fprintf( TEST->outfp, "\n%s\n",
+          "An explanation of the input/output parameters follows:" );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "T/V    : Wall time / encoded variant." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+         "N      : The order of the coefficient matrix A." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "NB     : The partitioning blocking factor." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "P      : The number of process rows." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "Q      : The number of process columns." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+         "Time   : Time in seconds to solve the linear system." );
+      HPL_fprintf( TEST->outfp, "%s\n\n",
+         "Gflops : Rate of execution for solving the linear system." );
+      HPL_fprintf( TEST->outfp, "%s\n",
+          "The following parameter values will be used:" );
+/*
+ * Problem size
+ */
+      HPL_fprintf( TEST->outfp,       "\nN      :" );
+      for( i = 0; i < Mmin( 8, *NS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", N[i]  );
+      if( *NS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", N[i]  );
+         if( *NS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", N[i]  );
+         }
+      }
+/*
+ * Distribution blocking factor
+ */
+      HPL_fprintf( TEST->outfp,       "\nNB     :" );
+      for( i = 0; i < Mmin( 8, *NBS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NB[i] );
+      if( *NBS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NBS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NB[i] );
+         if( *NBS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NBS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NB[i] );
+         }
+      }
+/*
+ * Process mapping
+ */
+      HPL_fprintf( TEST->outfp,       "\nPMAP   :" );
+      if(      *PMAPPIN == HPL_ROW_MAJOR    )
+         HPL_fprintf( TEST->outfp, " Row-major process mapping" );
+      else if( *PMAPPIN == HPL_COLUMN_MAJOR )
+         HPL_fprintf( TEST->outfp, " Column-major process mapping" );
+/*
+ * Process grid
+ */
+      HPL_fprintf( TEST->outfp,       "\nP      :" );
+      for( i = 0; i < Mmin( 8, *NPQS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", P[i]  );
+      if( *NPQS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPQS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", P[i]  );
+         if( *NPQS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPQS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", P[i]  );
+         }
+      }
+      HPL_fprintf( TEST->outfp,       "\nQ      :" );
+      for( i = 0; i < Mmin( 8, *NPQS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", Q[i]  );
+      if( *NPQS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPQS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", Q[i]  );
+         if( *NPQS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPQS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", Q[i]  );
+         }
+      }
+/*
+ * Panel Factorization
+ */
+      HPL_fprintf( TEST->outfp,       "\nPFACT  :" );
+      for( i = 0; i < Mmin( 8, *NPFS ); i++ )
+      {
+         if(      PF[i] == HPL_LEFT_LOOKING  )
+            HPL_fprintf( TEST->outfp,       "    Left " );
+         else if( PF[i] == HPL_CROUT         )
+            HPL_fprintf( TEST->outfp,       "   Crout " );
+         else if( PF[i] == HPL_RIGHT_LOOKING )
+            HPL_fprintf( TEST->outfp,       "   Right " );
+      }
+      if( *NPFS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NPFS ); i++ )
+         {
+            if(      PF[i] == HPL_LEFT_LOOKING  )
+               HPL_fprintf( TEST->outfp,       "    Left " );
+            else if( PF[i] == HPL_CROUT         )
+               HPL_fprintf( TEST->outfp,       "   Crout " );
+            else if( PF[i] == HPL_RIGHT_LOOKING )
+               HPL_fprintf( TEST->outfp,       "   Right " );
+         }
+         if( *NPFS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NPFS; i++ )
+            {
+               if(      PF[i] == HPL_LEFT_LOOKING  )
+                  HPL_fprintf( TEST->outfp,       "    Left " );
+               else if( PF[i] == HPL_CROUT         )
+                  HPL_fprintf( TEST->outfp,       "   Crout " );
+               else if( PF[i] == HPL_RIGHT_LOOKING )
+                  HPL_fprintf( TEST->outfp,       "   Right " );
+            }
+         }
+      }
+/*
+ * Recursive stopping criterium
+ */
+      HPL_fprintf( TEST->outfp,       "\nNBMIN  :" );
+      for( i = 0; i < Mmin( 8, *NBMS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NBM[i]  );
+      if( *NBMS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NBMS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NBM[i]  );
+         if( *NBMS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NBMS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NBM[i]  );
+         }
+      }
+/*
+ * Number of panels in recursion
+ */
+      HPL_fprintf( TEST->outfp,       "\nNDIV   :" );
+      for( i = 0; i < Mmin( 8, *NDVS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", NDV[i]  );
+      if( *NDVS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NDVS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", NDV[i]  );
+         if( *NDVS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NDVS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", NDV[i]  );
+         }
+      }
+/*
+ * Recursive Factorization
+ */
+      HPL_fprintf( TEST->outfp,       "\nRFACT  :" );
+      for( i = 0; i < Mmin( 8, *NRFS ); i++ )
+      {
+         if(      RF[i] == HPL_LEFT_LOOKING  )
+            HPL_fprintf( TEST->outfp,       "    Left " );
+         else if( RF[i] == HPL_CROUT         )
+            HPL_fprintf( TEST->outfp,       "   Crout " );
+         else if( RF[i] == HPL_RIGHT_LOOKING )
+            HPL_fprintf( TEST->outfp,       "   Right " );
+      }
+      if( *NRFS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NRFS ); i++ )
+         {
+            if(      RF[i] == HPL_LEFT_LOOKING  )
+               HPL_fprintf( TEST->outfp,       "    Left " );
+            else if( RF[i] == HPL_CROUT         )
+               HPL_fprintf( TEST->outfp,       "   Crout " );
+            else if( RF[i] == HPL_RIGHT_LOOKING )
+               HPL_fprintf( TEST->outfp,       "   Right " );
+         }
+         if( *NRFS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NRFS; i++ )
+            {
+               if(      RF[i] == HPL_LEFT_LOOKING  )
+                  HPL_fprintf( TEST->outfp,       "    Left " );
+               else if( RF[i] == HPL_CROUT         )
+                  HPL_fprintf( TEST->outfp,       "   Crout " );
+               else if( RF[i] == HPL_RIGHT_LOOKING )
+                  HPL_fprintf( TEST->outfp,       "   Right " );
+            }
+         }
+      }
+/*
+ * Broadcast topology
+ */
+      HPL_fprintf( TEST->outfp,       "\nBCAST  :" );
+      for( i = 0; i < Mmin( 8, *NTPS ); i++ )
+      {
+         if(      TP[i] == HPL_1RING   )
+            HPL_fprintf( TEST->outfp,       "   1ring " );
+         else if( TP[i] == HPL_1RING_M )
+            HPL_fprintf( TEST->outfp,       "  1ringM " );
+         else if( TP[i] == HPL_2RING   )
+            HPL_fprintf( TEST->outfp,       "   2ring " );
+         else if( TP[i] == HPL_2RING_M )
+            HPL_fprintf( TEST->outfp,       "  2ringM " );
+         else if( TP[i] == HPL_BLONG   )
+            HPL_fprintf( TEST->outfp,       "   Blong " );
+         else if( TP[i] == HPL_BLONG_M )
+            HPL_fprintf( TEST->outfp,       "  BlongM " );
+      }
+      if( *NTPS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NTPS ); i++ )
+         {
+            if(      TP[i] == HPL_1RING   )
+               HPL_fprintf( TEST->outfp,       "   1ring " );
+            else if( TP[i] == HPL_1RING_M )
+               HPL_fprintf( TEST->outfp,       "  1ringM " );
+            else if( TP[i] == HPL_2RING   )
+               HPL_fprintf( TEST->outfp,       "   2ring " );
+            else if( TP[i] == HPL_2RING_M )
+               HPL_fprintf( TEST->outfp,       "  2ringM " );
+            else if( TP[i] == HPL_BLONG   )
+               HPL_fprintf( TEST->outfp,       "   Blong " );
+            else if( TP[i] == HPL_BLONG_M )
+               HPL_fprintf( TEST->outfp,       "  BlongM " );
+         }
+         if( *NTPS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NTPS; i++ )
+            {
+               if(      TP[i] == HPL_1RING   )
+                  HPL_fprintf( TEST->outfp,       "   1ring " );
+               else if( TP[i] == HPL_1RING_M )
+                  HPL_fprintf( TEST->outfp,       "  1ringM " );
+               else if( TP[i] == HPL_2RING   )
+                  HPL_fprintf( TEST->outfp,       "   2ring " );
+               else if( TP[i] == HPL_2RING_M )
+                  HPL_fprintf( TEST->outfp,       "  2ringM " );
+               else if( TP[i] == HPL_BLONG   )
+                  HPL_fprintf( TEST->outfp,       "   Blong " );
+               else if( TP[i] == HPL_BLONG_M )
+                  HPL_fprintf( TEST->outfp,       "  BlongM " );
+            }
+         }
+      }
+/*
+ * Lookahead depths
+ */
+      HPL_fprintf( TEST->outfp,       "\nDEPTH  :" );
+      for( i = 0; i < Mmin( 8, *NDHS ); i++ )
+         HPL_fprintf( TEST->outfp,       "%8d ", DH[i]  );
+      if( *NDHS > 8 )
+      {
+         HPL_fprintf( TEST->outfp,    "\n        " );
+         for( i = 8; i < Mmin( 16, *NDHS ); i++ )
+            HPL_fprintf( TEST->outfp,    "%8d ", DH[i]  );
+         if( *NDHS > 16 )
+         {
+            HPL_fprintf( TEST->outfp, "\n        " );
+            for( i = 16; i < *NDHS; i++ )
+               HPL_fprintf( TEST->outfp, "%8d ", DH[i]  );
+         }
+      }
+/*
+ * Swapping algorithm
+ */
+      HPL_fprintf( TEST->outfp,       "\nSWAP   :" );
+      if(      *FSWAP == HPL_SWAP00 )
+         HPL_fprintf( TEST->outfp, " Binary-exchange" );
+      else if( *FSWAP == HPL_SWAP01 )
+         HPL_fprintf( TEST->outfp, " Spread-roll (long)" );
+      else if( *FSWAP == HPL_SW_MIX )
+         HPL_fprintf( TEST->outfp, " Mix (threshold = %d)", *TSWAP );
+/*
+ * L1 storage form
+ */
+      HPL_fprintf( TEST->outfp,       "\nL1     :" );
+      if(      *L1NOTRAN != 0 )
+         HPL_fprintf( TEST->outfp, " no-transposed form" );
+      else
+         HPL_fprintf( TEST->outfp, " transposed form" );
+/*
+ * U  storage form
+ */
+      HPL_fprintf( TEST->outfp,       "\nU      :" );
+      if(      *UNOTRAN != 0 )
+         HPL_fprintf( TEST->outfp, " no-transposed form" );
+      else
+         HPL_fprintf( TEST->outfp, " transposed form" );
+/*
+ * Equilibration
+ */
+      HPL_fprintf( TEST->outfp,       "\nEQUIL  :" );
+      if(      *EQUIL != 0 )
+         HPL_fprintf( TEST->outfp, " yes" );
+      else
+         HPL_fprintf( TEST->outfp, " no" );
+/*
+ * Alignment
+ */
+      HPL_fprintf( TEST->outfp,       "\nALIGN  : %d double precision words",
+                   *ALIGN );
+
+      HPL_fprintf( TEST->outfp, "\n\n" );
+/*
+ * For testing only
+ */
+      if( TEST->thrsh > HPL_rzero )
+      {
+         HPL_fprintf( TEST->outfp, "%s%s\n\n",
+                      "----------------------------------------",
+                      "----------------------------------------" );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "- The matrix A is randomly generated for each test." );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "- The following scaled residual check will be computed:" );
+         HPL_fprintf( TEST->outfp, "%s\n",
+            "      ||Ax-b||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )" );
+         HPL_fprintf( TEST->outfp, "%s %21.6e\n",
+            "- The relative machine precision (eps) is taken to be     ",
+            TEST->epsil );
+         HPL_fprintf( TEST->outfp, "%s   %11.1f\n\n",
+            "- Computational tests pass if scaled residuals are less than      ",
+            TEST->thrsh );
+       }
+     }
+   }
+/*
+ * End of HPL_pdinfo
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pdtest.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pdtest.c
new file mode 100644
index 000000000..73a62a7ff
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pdtest.c
@@ -0,0 +1,438 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+#ifdef STDC_HEADERS
+void HPL_pdtest
+(
+   HPL_T_test *                     TEST,
+   HPL_T_grid *                     GRID,
+   HPL_T_palg *                     ALGO,
+   const int                        N,
+   const int                        NB
+)
+#else
+void HPL_pdtest
+( TEST, GRID, ALGO, N, NB )
+   HPL_T_test *                     TEST;
+   HPL_T_grid *                     GRID;
+   HPL_T_palg *                     ALGO;
+   const int                        N;
+   const int                        NB;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_pdtest performs  one  test  given a set of parameters such as the
+ * process grid, the  problem size, the distribution blocking factor ...
+ * This function generates  the data, calls  and times the linear system
+ * solver,  checks  the  accuracy  of the  obtained vector solution  and
+ * writes this information to the file pointed to by TEST->outfp.
+ *
+ * Arguments
+ * =========
+ *
+ * TEST    (global input)                HPL_T_test *
+ *         On entry,  TEST  points  to a testing data structure:  outfp
+ *         specifies the output file where the results will be printed.
+ *         It is only defined and used by the process  0  of the  grid.
+ *         thrsh  specifies  the  threshhold value  for the test ratio.
+ *         Concretely, a test is declared "PASSED"  if and only if the
+ *         following inequality is satisfied:
+ *         ||Ax-b||_oo / ( epsil *
+ *                         ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+ *                          N )  < thrsh.
+ *         epsil  is the  relative machine precision of the distributed
+ *         computer. Finally the test counters, kfail, kpass, kskip and
+ *         ktest are updated as follows:  if the test passes,  kpass is
+ *         incremented by one;  if the test fails, kfail is incremented
+ *         by one; if the test is skipped, kskip is incremented by one.
+ *         ktest is left unchanged.
+ *
+ * GRID    (local input)                 HPL_T_grid *
+ *         On entry,  GRID  points  to the data structure containing the
+ *         process grid information.
+ *
+ * ALGO    (global input)                HPL_T_palg *
+ *         On entry,  ALGO  points to  the data structure containing the
+ *         algorithmic parameters to be used for this test.
+ *
+ * N       (global input)                const int
+ *         On entry,  N specifies the order of the coefficient matrix A.
+ *         N must be at least zero.
+ *
+ * NB      (global input)                const int
+ *         On entry,  NB specifies the blocking factor used to partition
+ *         and distribute the matrix A. NB must be larger than one.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * .. Local Variables ..
+ */
+#ifdef HPL_DETAILED_TIMING
+   double                     HPL_w[HPL_TIMING_N];
+#endif
+   HPL_T_pmat                 mat;
+   double                     wtime[1];
+   int                        info[3];
+   double                     Anorm1, AnormI, Gflops, Xnorm1, XnormI,
+                              BnormI, resid0, resid1;
+   double                     * Bptr;
+   void                       * vptr = NULL;
+   static int                 first=1;
+   int                        ii, ip2, mycol, myrow, npcol, nprow, nq;
+   char                       ctop, cpfact, crfact;
+   time_t                     current_time_start, current_time_end;
+/* ..
+ * .. Executable Statements ..
+ */
+   (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol );
+
+   mat.n  = N; mat.nb = NB; mat.info = 0;
+   mat.mp = HPL_numroc( N, NB, NB, myrow, 0, nprow );
+   nq     = HPL_numroc( N, NB, NB, mycol, 0, npcol );
+   mat.nq = nq + 1;
+/*
+ * Allocate matrix, right-hand-side, and vector solution x. [ A | b ] is
+ * N by N+1.  One column is added in every process column for the solve.
+ * The  result  however  is stored in a 1 x N vector replicated in every
+ * process row. In every process, A is lda * (nq+1), x is 1 * nq and the
+ * workspace is mp. 
+ *
+ * Ensure that lda is a multiple of ALIGN and not a power of 2
+ */
+   mat.ld = ( ( Mmax( 1, mat.mp ) - 1 ) / ALGO->align ) * ALGO->align;
+   do
+   {
+      ii = ( mat.ld += ALGO->align ); ip2 = 1;
+      while( ii > 1 ) { ii >>= 1; ip2 <<= 1; }
+   }
+   while( mat.ld == ip2 );
+/*
+ * Allocate dynamic memory
+ */
+   vptr = (void*)malloc( ( (size_t)(ALGO->align) + 
+                           (size_t)(mat.ld+1) * (size_t)(mat.nq) ) *
+                         sizeof(double) );
+   info[0] = (vptr == NULL); info[1] = myrow; info[2] = mycol;
+   (void) HPL_all_reduce( (void *)(info), 3, HPL_INT, HPL_max,
+                          GRID->all_comm );
+   if( info[0] != 0 )
+   {
+      if( ( myrow == 0 ) && ( mycol == 0 ) )
+         HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest",
+                    "[%d,%d] %s", info[1], info[2],
+                    "Memory allocation failed for A, x and b. Skip." );
+      (TEST->kskip)++;
+      /* some processes might have succeeded with allocation */
+      if (vptr) free(vptr);
+      return;
+   }
+/*
+ * generate matrix and right-hand-side, [ A | b ] which is N by N+1.
+ */
+   mat.A  = (double *)HPL_PTR( vptr,
+                               ((size_t)(ALGO->align) * sizeof(double) ) );
+   mat.X  = Mptr( mat.A, 0, mat.nq, mat.ld );
+   HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED );
+#ifdef HPL_CALL_VSIPL
+   mat.block = vsip_blockbind_d( (vsip_scalar_d *)(mat.A),
+                                 (vsip_length)(mat.ld * mat.nq),
+                                 VSIP_MEM_NONE );
+#endif
+/*
+ * Solve linear system
+ */
+   HPL_ptimer_boot(); (void) HPL_barrier( GRID->all_comm );
+   time( &current_time_start );
+   HPL_ptimer( 0 );
+   HPL_pdgesv( GRID, ALGO, &mat );
+   HPL_ptimer( 0 );
+   time( &current_time_end );
+#ifdef HPL_CALL_VSIPL
+   (void) vsip_blockrelease_d( mat.block, VSIP_TRUE ); 
+   vsip_blockdestroy_d( mat.block );
+#endif
+/*
+ * Gather max of all CPU and WALL clock timings and print timing results
+ */
+   HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME,
+                       1, 0, wtime );
+
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      if( first )
+      {
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "T/V                N    NB     P     Q",
+                      "               Time                 Gflops" );
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "----------------------------------------",
+                      "----------------------------------------" );
+         if( TEST->thrsh <= HPL_rzero ) first = 0;
+      }
+/*
+ * 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve.
+ * Print WALL time
+ */
+      Gflops = ( ( (double)(N) /   1.0e+9 ) * 
+                 ( (double)(N) / wtime[0] ) ) * 
+                 ( ( 2.0 / 3.0 ) * (double)(N) + ( 3.0 / 2.0 ) );
+
+      cpfact = ( ( (HPL_T_FACT)(ALGO->pfact) == 
+                   (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ?  (char)('L') :
+                 ( ( (HPL_T_FACT)(ALGO->pfact) == (HPL_T_FACT)(HPL_CROUT) ) ?
+                   (char)('C') : (char)('R') ) );
+      crfact = ( ( (HPL_T_FACT)(ALGO->rfact) == 
+                   (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ?  (char)('L') :
+                 ( ( (HPL_T_FACT)(ALGO->rfact) == (HPL_T_FACT)(HPL_CROUT) ) ? 
+                   (char)('C') : (char)('R') ) );
+
+      if(      ALGO->btopo == HPL_1RING   ) ctop = '0';
+      else if( ALGO->btopo == HPL_1RING_M ) ctop = '1';
+      else if( ALGO->btopo == HPL_2RING   ) ctop = '2';
+      else if( ALGO->btopo == HPL_2RING_M ) ctop = '3';
+      else if( ALGO->btopo == HPL_BLONG   ) ctop = '4';
+      else /* if( ALGO->btopo == HPL_BLONG_M ) */ ctop = '5';
+
+      if( wtime[0] > HPL_rzero ) {
+         HPL_fprintf( TEST->outfp,
+             "W%c%1d%c%c%1d%c%1d%12d %5d %5d %5d %18.2f    %19.4e\n",
+             ( GRID->order == HPL_ROW_MAJOR ? 'R' : 'C' ),
+             ALGO->depth, ctop, crfact, ALGO->nbdiv, cpfact, ALGO->nbmin,
+             N, NB, nprow, npcol, wtime[0], Gflops );
+         HPL_fprintf( TEST->outfp,
+             "HPL_pdgesv() start time %s\n", ctime( &current_time_start ) );
+         HPL_fprintf( TEST->outfp,
+             "HPL_pdgesv() end time   %s\n", ctime( &current_time_end ) );
+      }
+   }
+#ifdef HPL_DETAILED_TIMING
+   HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME,
+                       HPL_TIMING_N, HPL_TIMING_BEG, HPL_w );
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "--VVV--VVV--VVV--VVV--VVV--VVV--VVV--V",
+                   "VV--VVV--VVV--VVV--VVV--VVV--VVV--VVV-" );
+/*
+ * Recursive panel factorization
+ */
+      if( HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time rfact . . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] );
+/*
+ * Panel factorization
+ */
+      if( HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time pfact . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] );
+/*
+ * Panel factorization (swap)
+ */
+      if( HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time mxswp . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] );
+/*
+ * Update
+ */
+      if( HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time update  . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] );
+/*
+ * Update (swap)
+ */
+      if( HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "+ Max aggregated wall time laswp . . : %18.2f\n",
+                      HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] );
+/*
+ * Upper triangular system solve
+ */
+      if( HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] > HPL_rzero )
+         HPL_fprintf( TEST->outfp,
+                      "Max aggregated wall time up tr sv  . : %18.2f\n",
+                      HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] );
+
+      if( TEST->thrsh <= HPL_rzero )
+         HPL_fprintf( TEST->outfp, "%s%s\n",
+                      "========================================",
+                      "========================================" );
+   }
+#endif
+/*
+ * Quick return, if I am not interested in checking the computations
+ */
+   if( TEST->thrsh <= HPL_rzero )
+   { (TEST->kpass)++; if( vptr ) free( vptr ); return; }
+/*
+ * Check info returned by solve
+ */
+   if( mat.info != 0 )
+   {
+      if( ( myrow == 0 ) && ( mycol == 0 ) )
+         HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", "%s %d, %s", 
+                    "Error code returned by solve is", mat.info, "skip" );
+      (TEST->kskip)++;
+      if( vptr ) free( vptr ); return;
+   }
+/*
+ * Check computation, re-generate [ A | b ], compute norm 1 and inf of A and x,
+ * and norm inf of b - A x. Display residual checks.
+ */
+   HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED );
+   Anorm1 = HPL_pdlange( GRID, HPL_NORM_1, N, N, NB, mat.A, mat.ld );
+   AnormI = HPL_pdlange( GRID, HPL_NORM_I, N, N, NB, mat.A, mat.ld );
+/*
+ * Because x is distributed in process rows, switch the norms
+ */
+   XnormI = HPL_pdlange( GRID, HPL_NORM_1, 1, N, NB, mat.X, 1 );
+   Xnorm1 = HPL_pdlange( GRID, HPL_NORM_I, 1, N, NB, mat.X, 1 );
+/*
+ * If I am in the col that owns b, (1) compute local BnormI, (2) all_reduce to
+ * find the max (in the col). Then (3) broadcast along the rows so that every
+ * process has BnormI. Note that since we use a uniform distribution in [-0.5,0.5]
+ * for the entries of B, it is very likely that BnormI (<=,~) 0.5.
+ */
+   Bptr = Mptr( mat.A, 0, nq, mat.ld );
+   if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ){
+      if( mat.mp > 0 )
+      {
+         BnormI = Bptr[HPL_idamax( mat.mp, Bptr, 1 )]; BnormI = Mabs( BnormI );
+      }
+      else
+      {
+         BnormI = HPL_rzero;
+      }
+      (void) HPL_all_reduce( (void *)(&BnormI), 1, HPL_DOUBLE, HPL_max,
+                             GRID->col_comm );
+   }
+   (void) HPL_broadcast( (void *)(&BnormI), 1, HPL_DOUBLE,
+                          HPL_indxg2p( N, NB, NB, 0, npcol ),
+                          GRID->row_comm );
+/*
+ * If I own b, compute ( b - A x ) and ( - A x ) otherwise
+ */
+   if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) )
+   {
+      HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone,
+                 mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1 );
+   }
+   else if( nq > 0 )
+   {
+      HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone,
+                 mat.A, mat.ld, mat.X, 1, HPL_rzero, Bptr, 1 );
+   }
+   else { for( ii = 0; ii < mat.mp; ii++ ) Bptr[ii] = HPL_rzero; }
+/*
+ * Reduce the distributed residual in process column 0
+ */
+   if( mat.mp > 0 )
+      (void) HPL_reduce( Bptr, mat.mp, HPL_DOUBLE, HPL_sum, 0,
+                         GRID->row_comm );
+/*
+ * Compute || b - A x ||_oo
+ */
+   resid0 = HPL_pdlange( GRID, HPL_NORM_I, N, 1, NB, Bptr, mat.ld );
+/*
+ * Computes and displays norms, residuals ...
+ */
+   if( N <= 0 )
+   {
+      resid1 = HPL_rzero;
+   }
+   else
+   {
+      resid1 = resid0 / ( TEST->epsil * ( AnormI * XnormI + BnormI ) * (double)(N) );
+   }
+
+   if( resid1 < TEST->thrsh ) (TEST->kpass)++;
+   else                       (TEST->kfail)++;
+
+   if( ( myrow == 0 ) && ( mycol == 0 ) )
+   {
+      HPL_fprintf( TEST->outfp, "%s%s\n",
+                   "----------------------------------------",
+                   "----------------------------------------" );
+      HPL_fprintf( TEST->outfp, "%s%16.8e%s%s\n",
+         "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ", resid1,
+         " ...... ", ( resid1 < TEST->thrsh ? "PASSED" : "FAILED" ) );
+
+      if(resid1 >= TEST->thrsh ) 
+      {
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||Ax-b||_oo  . . . . . . . . . . . . . . . . . = ", resid0 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||A||_oo . . . . . . . . . . . . . . . . . . . = ", AnormI );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||A||_1  . . . . . . . . . . . . . . . . . . . = ", Anorm1 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||x||_oo . . . . . . . . . . . . . . . . . . . = ", XnormI );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||x||_1  . . . . . . . . . . . . . . . . . . . = ", Xnorm1 );
+         HPL_fprintf( TEST->outfp, "%s%18.6f\n",
+         "||b||_oo . . . . . . . . . . . . . . . . . . . = ", BnormI );
+      }
+   }
+   if( vptr ) free( vptr );
+/*
+ * End of HPL_pdtest
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/Make.inc
new file mode 120000
index 000000000..ae55370b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kate/hip/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/Makefile
new file mode 100644
index 000000000..cfc96e667
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/Makefile
@@ -0,0 +1,94 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_misc.h   $(INCdir)/hpl_blas.h  $(INCdir)/hpl_auxil.h   \
+   $(INCdir)/hpl_gesv.h   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h  \
+   $(INCdir)/hpl_panel.h  $(INCdir)/hpl_pgesv.h $(INCdir)/hpl_pmatgen.h \
+   $(INCdir)/hpl_ptimer.h $(INCdir)/hpl_ptest.h
+#
+## Executable names ####################################################
+# 
+xhpl             = $(BINdir)/xhpl
+#
+## Object files ########################################################
+#
+HPL_pteobj       = \
+   HPL_pddriver.o         HPL_pdinfo.o           HPL_pdtest.o
+#
+## Targets #############################################################
+#
+all     : dexe
+#
+dexe    : dexe.grd
+#
+$(BINdir)/HPL.dat : ../HPL.dat
+	( $(CP) ../HPL.dat $(BINdir) )
+#
+dexe.grd: $(HPL_pteobj) $(HPLlib)
+	$(LINKER) $(LINKFLAGS) -o $(xhpl) $(HPL_pteobj) $(HPL_LIBS)
+	$(MAKE) $(BINdir)/HPL.dat
+	$(TOUCH) dexe.grd
+#
+# ######################################################################
+#
+HPL_pddriver.o         : ../HPL_pddriver.c         $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pddriver.c
+HPL_pdinfo.o           : ../HPL_pdinfo.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdinfo.c
+HPL_pdtest.o           : ../HPL_pdtest.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_pdtest.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/dexe.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/dexe.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer.c
new file mode 100644
index 000000000..202416079
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer.c
@@ -0,0 +1,358 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int         HPL_ptimer_disabled;
+static double      HPL_ptimer_cpusec   [HPL_NPTIMER],
+                   HPL_ptimer_cpustart [HPL_NPTIMER],
+                   HPL_ptimer_wallsec  [HPL_NPTIMER],
+                   HPL_ptimer_wallstart[HPL_NPTIMER];
+/*
+ * ---------------------------------------------------------------------
+ * User callable functions
+ * ---------------------------------------------------------------------
+ */
+#ifdef STDC_HEADERS
+void HPL_ptimer_boot( void )
+#else
+void HPL_ptimer_boot()
+#endif
+{
+/*
+ * HPL_ptimer_boot (re)sets all timers to 0, and enables HPL_ptimer.
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 0;
+
+   for( i = 0; i < HPL_NPTIMER; i++ )
+   {
+      HPL_ptimer_cpusec  [i] = HPL_ptimer_wallsec  [i] = HPL_rzero;
+      HPL_ptimer_cpustart[i] = HPL_ptimer_wallstart[i] = HPL_PTIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_ptimer_boot
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_ptimer( const int I )
+#else
+void HPL_ptimer( I )
+   const int                  I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer provides a  "stopwatch"  functionality  cpu/wall  timer in
+ * seconds.  Up to  64  separate timers can be functioning at once.  The
+ * first call starts the timer,  and the second stops it.  This  routine
+ * can be disenabled  by calling HPL_ptimer_disable(),  so that calls to
+ * the timer are ignored.  This feature can be used to make sure certain
+ * sections of code do not affect timings,  even  if  they call routines
+ * which have HPL_ptimer calls in them. HPL_ptimer_enable()  will enable
+ * the  timer  functionality.  One  can retrieve  the current value of a
+ * timer by calling
+ *  
+ * t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ *  
+ * where  I  is the timer index in  [0..64).  To  inititialize the timer
+ * functionality, one must have called HPL_ptimer_boot() prior to any of
+ * the functions mentioned above.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                const int
+ *         On entry, I specifies the timer to stop/start.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( HPL_ptimer_disabled ) return;
+/*
+ * If timer has not been started, start it.  Otherwise,  stop it and add
+ * interval to count
+ */
+   if( HPL_ptimer_wallstart[I] == HPL_PTIMER_STARTFLAG )
+   {
+      HPL_ptimer_wallstart[I] = HPL_ptimer_walltime();
+      HPL_ptimer_cpustart [I] = HPL_ptimer_cputime ();
+   }
+   else
+   {
+      HPL_ptimer_cpusec   [I] += HPL_ptimer_cputime ()-HPL_ptimer_cpustart [I];
+      HPL_ptimer_wallsec  [I] += HPL_ptimer_walltime()-HPL_ptimer_wallstart[I];
+      HPL_ptimer_wallstart[I]  = HPL_PTIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_ptimer
+ */
+} 
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_enable( void )
+#else
+void HPL_ptimer_enable()
+#endif
+{
+/*
+ * HPL_ptimer_enable sets it so calls to HPL_ptimer are not ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 0;
+   return;
+/*
+ * End of HPL_ptimer_enable
+ */
+} 
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_disable( void )
+#else
+void HPL_ptimer_disable()
+#endif
+{
+/*
+ * HPL_ptimer_disable sets it so calls to HPL_ptimer are ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_ptimer_disabled = 1;
+   return;
+/*
+ * End of HPL_ptimer_disable
+ */
+} 
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_inquire
+(
+   const HPL_T_PTIME          TMTYPE,
+   const int                  I
+)
+#else
+double HPL_ptimer_inquire( TMTYPE, I )
+   const int                  I;
+   const HPL_T_PTIME          TMTYPE;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_inquire returns wall- or cpu- time that has accumulated in
+ * timer I.
+ *
+ * Arguments
+ * =========
+ *
+ * TMTYPE  (global input)              const HPL_T_PTIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_PTIME : wall clock time is returned,
+ *            = HPL_CPU_PTIME  : CPU time is returned (default).
+ *
+ * I       (global input)              const int
+ *         On entry, I specifies the timer to return.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double          time;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * If wall- or cpu-time are not available on this machine, return
+ * HPL_PTIMER_ERROR
+ */
+   if( TMTYPE == HPL_WALL_PTIME )
+   {
+      if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR )
+         time = HPL_PTIMER_ERROR;
+      else
+         time = HPL_ptimer_wallsec[I];
+   }
+   else
+   {
+      if( HPL_ptimer_cputime()  == HPL_PTIMER_ERROR )
+         time = HPL_PTIMER_ERROR;
+      else
+         time = HPL_ptimer_cpusec [I];
+   }
+   return( time );
+/*
+ * End of HPL_ptimer_inquire
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_ptimer_combine
+(
+   MPI_Comm                   COMM,
+   const HPL_T_PTIME_OP       OPE,
+   const HPL_T_PTIME          TMTYPE,
+   const int                  N,
+   const int                  IBEG,
+   double                     * TIMES
+)
+#else
+void HPL_ptimer_combine( COMM, OPE, TMTYPE, N, IBEG, TIMES )
+   const int                  IBEG, N;
+   const HPL_T_PTIME_OP       OPE;
+   const HPL_T_PTIME          TMTYPE;
+   MPI_Comm                   COMM;
+   double                     * TIMES;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_combine  combines the timing information stored on a scope
+ * of processes into the user TIMES array.
+ *
+ * Arguments
+ * =========
+ *
+ * COMM    (global/local input)        MPI_Comm
+ *         The MPI communicator  identifying  the process  collection on
+ *         which the timings are taken.
+ *
+ * OPE     (global input)              const HPL_T_PTIME_OP
+ *         On entry, OP  specifies what combine operation should be done
+ *         as follows:
+ *            = HPL_AMAX_PTIME get max. time on any process (default),
+ *            = HPL_AMIN_PTIME get min. time on any process,
+ *            = HPL_SUM_PTIME  get sum of times across processes.
+ *
+ * TMTYPE  (global input)              const HPL_T_PTIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_PTIME : wall clock time is returned,
+ *            = HPL_CPU_PTIME  : CPU time is returned (default).
+ *
+ * N       (global input)              const int
+ *         On entry, N specifies the number of timers to combine.
+ *
+ * IBEG    (global input)              const int
+ *         On entry, IBEG specifies the first timer to be combined.
+ *
+ * TIMES   (global output)             double *
+ *         On entry, TIMES is an array of dimension at least N. On exit,
+ *         this array contains the requested timing information.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i, tmpdis;
+/* ..
+ * .. Executable Statements ..
+ */
+   tmpdis = HPL_ptimer_disabled; HPL_ptimer_disabled = 1;
+/*
+ * Timer has been disabled for combine operation -  copy timing informa-
+ * tion into user times array.  If  wall- or  cpu-time are not available
+ * on this machine, fill in times with HPL_PTIMER_ERROR flag and return.
+ */
+   if( TMTYPE == HPL_WALL_PTIME )
+   {
+      if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR )
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return;   }
+      else
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_wallsec[IBEG+i]; }
+   }
+   else
+   {
+      if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR )
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return;  }
+      else
+      { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_cpusec[IBEG+i]; }
+   }
+/*
+ * Combine all nodes information, restore HPL_ptimer_disabled, and return
+ */
+   for( i = 0; i < N; i++ ) TIMES[i] = Mmax( HPL_rzero, TIMES[i] );
+
+   if(      OPE == HPL_AMAX_PTIME )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM );
+   else if( OPE == HPL_AMIN_PTIME )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_min, COMM );
+   else if( OPE == HPL_SUM_PTIME  )
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_sum, COMM );
+   else
+      (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM );
+
+   HPL_ptimer_disabled = tmpdis;
+/*
+ * End of HPL_ptimer_combine
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c
new file mode 100644
index 000000000..711ef185d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c
@@ -0,0 +1,146 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_cputime returns the cpu time. If HPL_USE_CLOCK is defined,
+ * the  clock() function is used to return an approximation of processor
+ * time used by the program.  The value returned is the CPU time used so
+ * far as a clock_t;  to get the number of seconds used,  the result  is
+ * divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+ * standard library.  If  HPL_USE_TIMES is defined, the times() function
+ * is used instead.  This  function  returns  the current process times.
+ * times() returns the number of clock ticks that have elapsed since the
+ * system has been up.  Otherwise and by default,  the  standard library
+ * function getrusage() is used.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#if   defined( HPL_USE_CLOCK )
+ 
+#include <time.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   static double              cps = CLOCKS_PER_SEC;
+   double                     d;
+   clock_t                    t1;
+   static clock_t             t0 = 0;
+ 
+   if( t0 == 0 ) t0 = clock();
+   t1 = clock() - t0;
+   d = (double)(t1) / cps;
+   return( d );
+}
+ 
+#elif defined( HPL_USE_TIMES )
+ 
+#include <sys/times.h>
+#include <unistd.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   clock_t                    t1;
+   struct tms                 ts;
+   static double              ClockTick = HPL_rzero;
+ 
+   if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK));
+   (void) times( &ts );
+   return( (double)(ts.tms_utime) / ClockTick );
+}
+ 
+/* #elif defined( HPL_USE_GETRUSAGE ) */
+#else
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   struct rusage              ruse;
+ 
+   (void) getrusage( RUSAGE_SELF, &ruse );
+   return( (double)( ruse.ru_utime.tv_sec  ) +
+           ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) );
+}
+
+/* 
+#else
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_cputime( void )
+#else
+double HPL_ptimer_cputime()
+#endif
+{
+   return( HPL_PTIMER_ERROR );
+}
+*/ 
+
+#endif
+/*
+ * End of HPL_ptimer_cputime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c
new file mode 100644
index 000000000..96cbd300f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c
@@ -0,0 +1,103 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_ptimer_walltime returns the elapsed (wall-clock) time.
+ * 
+ *
+ * ---------------------------------------------------------------------
+ */ 
+ 
+#if defined( HPL_USE_GETTIMEOFDAY )
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_walltime( void )
+#else
+double HPL_ptimer_walltime()
+#endif
+{
+   struct timeval             tp;
+   static long                start=0, startu;
+ 
+   if( !start )
+   {
+      (void) gettimeofday( &tp, NULL );
+      start  = tp.tv_sec;
+      startu = tp.tv_usec;
+      return( HPL_rzero );
+   }
+   (void) gettimeofday( &tp, NULL );
+ 
+   return( (double)( tp.tv_sec - start ) +
+           ( (double)( tp.tv_usec-startu ) / 1000000.0 ) );
+}
+
+#else
+
+#ifdef STDC_HEADERS
+double HPL_ptimer_walltime( void )
+#else
+double HPL_ptimer_walltime()
+#endif
+{
+   return( MPI_Wtime() );
+}
+ 
+#endif
+/*
+ * End of HPL_ptimer_walltime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/Make.inc
new file mode 120000
index 000000000..ae55370b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kate/hip/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/Makefile
new file mode 100644
index 000000000..971500764
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/Makefile
@@ -0,0 +1,84 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_pmisc.h  $(INCdir)/hpl_ptimer.h
+#
+## Object files ########################################################
+#
+HPL_ptiobj       = \
+   HPL_ptimer.o           HPL_ptimer_cputime.o   HPL_ptimer_walltime.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_ptiobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_ptiobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_ptimer.o           : ../HPL_ptimer.c           $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer.c
+HPL_ptimer_cputime.o   : ../HPL_ptimer_cputime.c   $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer_cputime.c
+HPL_ptimer_walltime.o  : ../HPL_ptimer_walltime.c  $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_ptimer_walltime.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer.c
new file mode 100644
index 000000000..3be9665f7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer.c
@@ -0,0 +1,253 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+/*
+ * ---------------------------------------------------------------------
+ * Static variables
+ * ---------------------------------------------------------------------
+ */
+static int                    HPL_timer_disabled;
+static double                 HPL_timer_cpusec   [HPL_NTIMER],
+                              HPL_timer_cpustart [HPL_NTIMER],
+                              HPL_timer_wallsec  [HPL_NTIMER],
+                              HPL_timer_wallstart[HPL_NTIMER];
+/*
+ * ---------------------------------------------------------------------
+ * User callable functions
+ * ---------------------------------------------------------------------
+ */
+#ifdef STDC_HEADERS
+void HPL_timer_boot( void )
+#else
+void HPL_timer_boot()
+#endif
+{
+/*
+ * HPL_timer_boot (re)sets all timers to 0, and enables HPL_timer.
+ */
+/*
+ * .. Local Variables ..
+ */
+   int                        i;
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 0;
+
+   for( i = 0; i < HPL_NTIMER; i++ )
+   {
+      HPL_timer_cpusec  [i] = HPL_timer_wallsec  [i] = HPL_rzero;
+      HPL_timer_cpustart[i] = HPL_timer_wallstart[i] = HPL_TIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_timer_boot
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer( const int I )
+#else
+void HPL_timer( I )
+   const int                  I;
+#endif
+{
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer provides a  "stopwatch"  functionality  cpu/wall  timer  in
+ * seconds.  Up to  64  separate timers can be functioning at once.  The
+ * first call starts the timer,  and the second stops it.  This  routine
+ * can be disenabled  by calling  HPL_timer_disable(),  so that calls to
+ * the timer are ignored.  This feature can be used to make sure certain
+ * sections of code do not affect timings,  even  if  they call routines
+ * which have HPL_timer calls in them. HPL_timer_enable() will re-enable
+ * the  timer  functionality.  One  can retrieve  the current value of a
+ * timer by calling
+ *  
+ * t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ *  
+ * where  I  is the timer index in  [0..64).  To  initialize  the  timer
+ * functionality, one must have called HPL_timer_boot()  prior to any of
+ * the functions mentioned above.
+ *
+ * Arguments
+ * =========
+ *
+ * I       (global input)                const int
+ *         On entry, I specifies the timer to stop/start.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+/* ..
+ * .. Executable Statements ..
+ */
+   if( HPL_timer_disabled ) return;
+/*
+ * If timer has not been started, start it.  Otherwise,  stop it and add
+ * interval to count
+ */
+   if( HPL_timer_wallstart[I] == HPL_TIMER_STARTFLAG )
+   {
+      HPL_timer_wallstart[I] = HPL_timer_walltime();
+      HPL_timer_cpustart [I] = HPL_timer_cputime ();
+   }
+   else
+   {
+      HPL_timer_cpusec   [I] += HPL_timer_cputime () - HPL_timer_cpustart [I];
+      HPL_timer_wallsec  [I] += HPL_timer_walltime() - HPL_timer_wallstart[I];
+      HPL_timer_wallstart[I]  = HPL_TIMER_STARTFLAG;
+   }
+/*
+ * End of HPL_timer
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer_enable( void )
+#else
+void HPL_timer_enable()
+#endif
+{
+/*
+ * HPL_timer_enable sets it so calls to HPL_timer are not ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 0;
+   return;
+/*
+ * End of HPL_timer_enable
+ */
+}
+
+#ifdef STDC_HEADERS
+void HPL_timer_disable( void )
+#else
+void HPL_timer_disable()
+#endif
+{
+/*
+ * HPL_timer_disable sets it so calls to HPL_timer are ignored.
+ */
+/* ..
+ * .. Executable Statements ..
+ */
+   HPL_timer_disabled = 1;
+   return;
+/*
+ * End of HPL_timer_disable
+ */
+}
+
+#ifdef STDC_HEADERS
+double HPL_timer_inquire
+(
+   const HPL_T_TIME           TMTYPE,
+   const int                  I
+)
+#else
+double HPL_timer_inquire( TMTYPE, I )
+   const int                  I;
+   const HPL_T_TIME           TMTYPE;
+#endif
+{
+/*
+ * Purpose
+ * =======
+ *
+ * HPL_timer_inquire returns  wall- or cpu- time that has accumulated in
+ * timer I.
+ *
+ * Arguments
+ * =========
+ *
+ * TMTYPE  (global input)              const HPL_T_TIME
+ *         On entry, TMTYPE specifies what time will be returned as fol-
+ *         lows
+ *            = HPL_WALL_TIME : wall clock time is returned,
+ *            = HPL_CPU_TIME  : CPU time is returned (default).
+ *
+ * I       (global input)              const int
+ *         On entry, I specifies the timer to return.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   double          time;
+/* ..
+ * .. Executable Statements ..
+ */
+/*
+ * If wall- or cpu-time are not available on this machine, return
+ * HPL_TIMER_ERROR
+ */
+   if( TMTYPE == HPL_WALL_TIME )
+   {
+      if( HPL_timer_walltime() == HPL_TIMER_ERROR )
+         time = HPL_TIMER_ERROR;
+      else
+         time = HPL_timer_wallsec[I];
+   }
+   else
+   {
+      if( HPL_timer_cputime()  == HPL_TIMER_ERROR )
+         time = HPL_TIMER_ERROR;
+      else
+         time = HPL_timer_cpusec [I];
+   }
+   return( time );
+/*
+ * End of HPL_timer_inquire
+ */
+}
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer_cputime.c
new file mode 100644
index 000000000..4a7f9dfef
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer_cputime.c
@@ -0,0 +1,145 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer_cputime returns the cpu time.  If HPL_USE_CLOCK is defined,
+ * the  clock() function is used to return an approximation of processor
+ * time used by the program.  The value returned is the CPU time used so
+ * far as a clock_t;  to get the number of seconds used,  the result  is
+ * divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+ * standard library.  If  HPL_USE_TIMES is defined, the times() function
+ * is used instead.  This  function  returns  the current process times.
+ * times() returns the number of clock ticks that have elapsed since the
+ * system has been up.  Otherwise and by default,  the  standard library
+ * function getrusage() is used.
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#if   defined( HPL_USE_CLOCK )
+ 
+#include <time.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   static double              cps = CLOCKS_PER_SEC;
+   double                     d;
+   clock_t                    t1;
+   static clock_t             t0 = 0;
+ 
+   if( t0 == 0 ) t0 = clock();
+   t1 = clock() - t0;
+   d = (double)(t1) / cps;
+   return( d );
+} 
+
+#elif defined( HPL_USE_TIMES )
+ 
+#include <sys/times.h>
+#include <unistd.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   clock_t                    t1;
+   struct tms                 ts;
+   static double              ClockTick = HPL_rzero;
+ 
+   if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK));
+   (void) times( &ts );
+   return( (double)(ts.tms_utime) / ClockTick );
+}
+ 
+/* #elif defined( HPL_USE_GETRUSAGE )  */
+#else
+ 
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   struct rusage              ruse;
+   (void) getrusage( RUSAGE_SELF, &ruse );
+   return( (double)( ruse.ru_utime.tv_sec  ) +
+           ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) );
+}
+
+/* 
+#else
+
+#ifdef STDC_HEADERS
+double HPL_timer_cputime( void )
+#else
+double HPL_timer_cputime()
+#endif
+{
+   return( HPL_TIMER_ERROR );
+}
+*/
+
+#endif
+/*
+ * End of HPL_timer_cputime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer_walltime.c
new file mode 100644
index 000000000..f4f44f202
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer_walltime.c
@@ -0,0 +1,88 @@
+/* 
+ * -- High Performance Computing Linpack Benchmark (HPL)                
+ *    HPL - 2.3 - December 2, 2018                          
+ *    Antoine P. Petitet                                                
+ *    University of Tennessee, Knoxville                                
+ *    Innovative Computing Laboratory                                 
+ *    (C) Copyright 2000-2008 All Rights Reserved                       
+ *                                                                      
+ * -- Copyright notice and Licensing terms:                             
+ *                                                                      
+ * Redistribution  and  use in  source and binary forms, with or without
+ * modification, are  permitted provided  that the following  conditions
+ * are met:                                                             
+ *                                                                      
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.        
+ *                                                                      
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ * notice, this list of conditions,  and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution. 
+ *                                                                      
+ * 3. All  advertising  materials  mentioning  features  or  use of this
+ * software must display the following acknowledgement:                 
+ * This  product  includes  software  developed  at  the  University  of
+ * Tennessee, Knoxville, Innovative Computing Laboratory.             
+ *                                                                      
+ * 4. The name of the  University,  the name of the  Laboratory,  or the
+ * names  of  its  contributors  may  not  be used to endorse or promote
+ * products  derived   from   this  software  without  specific  written
+ * permission.                                                          
+ *                                                                      
+ * -- Disclaimer:                                                       
+ *                                                                      
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+ * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ * ---------------------------------------------------------------------
+ */ 
+/*
+ * Include files
+ */
+#include "hpl.h"
+ 
+/* 
+ * Purpose
+ * =======
+ *
+ * HPL_timer_walltime returns the elapsed (wall-clock) time.
+ * 
+ *
+ * ---------------------------------------------------------------------
+ */ 
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef STDC_HEADERS
+double HPL_timer_walltime( void )
+#else
+double HPL_timer_walltime()
+#endif
+{
+   struct timeval             tp;
+   static long                start=0, startu;
+
+   if( !start )
+   {
+      (void) gettimeofday( &tp, NULL );
+      start  = tp.tv_sec;
+      startu = tp.tv_usec;
+      return( HPL_rzero );
+   }
+   (void) gettimeofday( &tp, NULL );
+
+   return( (double)( tp.tv_sec - start ) +
+           ( (double)( tp.tv_usec-startu ) / 1000000.0 ) );
+}                                                                               
+/*
+ * End of HPL_timer_walltime
+ */
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/Make.inc
new file mode 120000
index 000000000..ae55370b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/Make.inc
@@ -0,0 +1 @@
+/home/kate/hip/hpl-2.3/Make.intel64
\ No newline at end of file
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/Makefile
new file mode 100644
index 000000000..b8009e88a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/Makefile
@@ -0,0 +1,84 @@
+#  
+#  -- High Performance Computing Linpack Benchmark (HPL)                
+#     HPL - 2.3 - December 2, 2018                          
+#     Antoine P. Petitet                                                
+#     University of Tennessee, Knoxville                                
+#     Innovative Computing Laboratory                                 
+#     (C) Copyright 2000-2008 All Rights Reserved                       
+#                                                                       
+#  -- Copyright notice and Licensing terms:                             
+#                                                                       
+#  Redistribution  and  use in  source and binary forms, with or without
+#  modification, are  permitted provided  that the following  conditions
+#  are met:                                                             
+#                                                                       
+#  1. Redistributions  of  source  code  must retain the above copyright
+#  notice, this list of conditions and the following disclaimer.        
+#                                                                       
+#  2. Redistributions in binary form must reproduce  the above copyright
+#  notice, this list of conditions,  and the following disclaimer in the
+#  documentation and/or other materials provided with the distribution. 
+#                                                                       
+#  3. All  advertising  materials  mentioning  features  or  use of this
+#  software must display the following acknowledgement:                 
+#  This  product  includes  software  developed  at  the  University  of
+#  Tennessee, Knoxville, Innovative Computing Laboratory.             
+#                                                                       
+#  4. The name of the  University,  the name of the  Laboratory,  or the
+#  names  of  its  contributors  may  not  be used to endorse or promote
+#  products  derived   from   this  software  without  specific  written
+#  permission.                                                          
+#                                                                       
+#  -- Disclaimer:                                                       
+#                                                                       
+#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+# ######################################################################
+#  
+include Make.inc
+#
+# ######################################################################
+#
+INCdep           = \
+   $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_timer.h
+#
+## Object files ########################################################
+#
+HPL_timobj       = \
+   HPL_timer.o            HPL_timer_cputime.o    HPL_timer_walltime.o
+#
+## Targets #############################################################
+#
+all     : lib 
+#
+lib     : lib.grd
+#
+lib.grd : $(HPL_timobj)
+	$(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_timobj)
+	$(RANLIB) $(HPLlib)
+	$(TOUCH) lib.grd
+#
+# ######################################################################
+#
+HPL_timer.o            : ../HPL_timer.c            $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer.c
+HPL_timer_cputime.o    : ../HPL_timer_cputime.c    $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer_cputime.c
+HPL_timer_walltime.o   : ../HPL_timer_walltime.c   $(INCdep)
+	$(CC) -o $@ -c $(CCFLAGS)  ../HPL_timer_walltime.c
+#
+# ######################################################################
+#
+clean            :
+	$(RM) *.o *.grd
+#
+# ######################################################################
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/lib.grd
new file mode 100644
index 000000000..e69de29bb
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/1rinM.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/1rinM.jpg
new file mode 100755
index 000000000..9af78f844
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/1rinM.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/1ring.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/1ring.jpg
new file mode 100755
index 000000000..73e4391cf
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/1ring.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2-273x48.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2-273x48.jpg
new file mode 100755
index 000000000..23795f8b9
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2-273x48.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2rinM.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2rinM.jpg
new file mode 100755
index 000000000..c294e0d07
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2rinM.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2ring.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2ring.jpg
new file mode 100755
index 000000000..f37187f13
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2ring.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_abort.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_abort.html
new file mode 100755
index 000000000..49a4bd318
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_abort.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_abort HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_abort</B> halts execution.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_abort(</CODE>
+<CODE>int</CODE>
+<CODE>LINE</CODE>,
+<CODE>const char *</CODE>
+<CODE>SRNAME</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_abort</B>
+displays an error message on stderr and halts execution.
+
+<H1>Arguments</H1>
+<PRE>
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+</PRE>
+<PRE>
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   HPL_abort( __LINE__, __FILE__, "Halt.\n" );
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>,
+<A HREF="HPL_warn.html">HPL_warn</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_all_reduce.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_all_reduce.html
new file mode 100755
index 000000000..591cdd596
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_all_reduce.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_all_reduce HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_all_reduce</B> All reduce operation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_all_reduce(</CODE>
+<CODE>void *</CODE>
+<CODE>BUFFER</CODE>,
+<CODE>const int</CODE>
+<CODE>COUNT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>,
+<CODE>const HPL_T_OP </CODE>
+<CODE>OP</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_all_reduce</B>
+performs   a   global   reduce  operation  across  all
+processes of a group leaving the results on all processes.
+
+<H1>Arguments</H1>
+<PRE>
+BUFFER  (local input/global output)   void *
+        On entry,  BUFFER  points to  the  buffer to be combined.  On
+        exit, this array contains the combined data and  is identical
+        on all processes in the group.
+</PRE>
+<PRE>
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+</PRE>
+<PRE>
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+<PRE>
+OP      (global input)                const HPL_T_OP 
+        On entry, OP is a pointer to the local combine function.
+</PRE>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_barrier.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_barrier.html
new file mode 100755
index 000000000..86ae426ad
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_barrier.html
@@ -0,0 +1,41 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_barrier HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_barrier</B> Barrier operation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_barrier(</CODE>
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_barrier</B>
+blocks the caller until all process members have call it.
+The  call  returns  at any process  only after all group members have
+entered the call.
+
+<H1>Arguments</H1>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_bcast.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_bcast.html
new file mode 100755
index 000000000..079325ed7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_bcast.html
@@ -0,0 +1,46 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_bcast HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_bcast</B> Perform the row broadcast.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_bcast(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_bcast</B>
+broadcasts  the  current  panel.  Successful  completion is
+indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to
+HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was
+not completed, in which case this function should be called again.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+<PRE>
+IFLAG   (output)                      int *
+        On exit,  IFLAG  indicates  whether  or not the broadcast has
+        occured.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_binit.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_binit.html
new file mode 100755
index 000000000..0f9a9e1ae
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_binit.html
@@ -0,0 +1,37 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_binit HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_binit</B> Initialize the row broadcast.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_binit(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_binit</B>
+initializes  a  row  broadcast.  Successful  completion  is
+indicated by the returned error code HPL_SUCCESS.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_broadcast.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_broadcast.html
new file mode 100755
index 000000000..6e24b2c2b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_broadcast.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_broadcast HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_broadcast</B> Broadcast operation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_broadcast(</CODE>
+<CODE>void *</CODE>
+<CODE>BUFFER</CODE>,
+<CODE>const int</CODE>
+<CODE>COUNT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>,
+<CODE>const int</CODE>
+<CODE>ROOT</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_broadcast</B>
+broadcasts  a message from the process with rank ROOT to
+all processes in the group.
+
+<H1>Arguments</H1>
+<PRE>
+BUFFER  (local input/output)          void *
+        On entry,  BUFFER  points to  the  buffer to be broadcast. On
+        exit, this array contains the broadcast data and is identical
+        on all processes in the group.
+</PRE>
+<PRE>
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+</PRE>
+<PRE>
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+<PRE>
+ROOT    (global input)                const int
+        On entry, ROOT is the coordinate of the source process.
+</PRE>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_bwait.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_bwait.html
new file mode 100755
index 000000000..f1dd51e7b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_bwait.html
@@ -0,0 +1,38 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_bwait HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_bwait</B> Finalize the row broadcast.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_bwait(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_bwait</B>
+HPL_bwait waits  for  the  row  broadcast  of  the current  panel  to
+terminate.  Successful completion is indicated by the returned  error
+code HPL_SUCCESS.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_copyL.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_copyL.html
new file mode 100755
index 000000000..4b98963ac
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_copyL.html
@@ -0,0 +1,42 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_copyL HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_copyL</B> Copy the current panel into a contiguous workspace.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_copyL(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_copyL</B>
+copies  the  panel of columns, the L1 replicated submatrix,
+the pivot array  and  the info scalar into a contiguous workspace for
+later broadcast.
+ 
+The copy of this panel  into  a contiguous buffer  can be enforced by
+specifying -DHPL_COPY_L in the architecture specific Makefile.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_daxpy.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_daxpy.html
new file mode 100755
index 000000000..c34d0b2e8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_daxpy.html
@@ -0,0 +1,89 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_daxpy HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_daxpy</B> y := y + alpha * x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_daxpy(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_daxpy</B>
+scales the vector x by alpha and adds it to y.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero, then the entries of the incremented array X
+        need not be set on input.
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the scaled entries of the incremented array X.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_daxpy( 3, 2.0, x, 1, y, 1 );
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dcopy.html">HPL_dcopy</A>,
+<A HREF="HPL_dscal.html">HPL_dscal</A>,
+<A HREF="HPL_dswap.html">HPL_dswap</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dcopy.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dcopy.html
new file mode 100755
index 000000000..2a4a485b5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dcopy.html
@@ -0,0 +1,81 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dcopy HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dcopy</B> y := x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dcopy(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dcopy</B>
+copies the vector x into the vector y.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the entries of the incremented array X.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_dcopy( 3, x, 1, y, 1 );
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_daxpy.html">HPL_daxpy</A>,
+<A HREF="HPL_dscal.html">HPL_dscal</A>,
+<A HREF="HPL_dswap.html">HPL_dswap</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dgemm.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dgemm.html
new file mode 100755
index 000000000..667c0ff01
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dgemm.html
@@ -0,0 +1,178 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dgemm HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dgemm</B> C := alpha * op(A) * op(B) + beta * C.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dgemm(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANSA</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANSB</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>K</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>B</CODE>,
+<CODE>const int</CODE>
+<CODE>LDB</CODE>,
+<CODE>const double</CODE>
+<CODE>BETA</CODE>,
+<CODE>double *</CODE>
+<CODE>C</CODE>,
+<CODE>const int</CODE>
+<CODE>LDC</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dgemm</B>
+performs one of the matrix-matrix operations
+ 
+    C := alpha * op( A ) * op( B ) + beta * C
+ 
+ where op( X ) is one of
+ 
+    op( X ) = X   or   op( X ) = X^T.
+ 
+Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
+an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+TRANSA  (local input)                 const enum HPL_TRANS
+        On entry, TRANSA  specifies the form of  op(A)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSA==HplNoTrans    : op( A ) = A,                     
+           TRANSA==HplTrans      : op( A ) = A^T,                   
+           TRANSA==HplConjTrans  : op( A ) = A^T.                   
+</PRE>
+<PRE>
+TRANSB  (local input)                 const enum HPL_TRANS
+        On entry, TRANSB  specifies the form of  op(B)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSB==HplNoTrans    : op( B ) = B,                     
+           TRANSB==HplTrans      : op( B ) = B^T,                   
+           TRANSB==HplConjTrans  : op( B ) = B^T.                   
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the  number  of rows  of the  matrix
+        op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the number  of columns of the matrix
+        op(B)  and  the number of columns of the matrix  C. N must be
+        at least zero.
+</PRE>
+<PRE>
+K       (local input)                 const int
+        On entry,  K  specifies  the  number of columns of the matrix
+        op(A) and the number of rows of the matrix op(B).  K  must be
+        be at least  zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied  as  zero  then the elements of the matrices A and B
+        need not be set on input.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  is an array of dimension (LDA,ka),  where ka is
+        k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+        entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+        the array  A must contain the matrix A, otherwise the leading
+        k  by  m  part of the array  A  must  contain the  matrix  A.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA  specifies the first dimension of A as declared
+        in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+        LDA must be at least max(1,m), otherwise LDA must be at least
+        max(1,k).
+</PRE>
+<PRE>
+B       (local input)                 const double *
+        On entry, B is an array of dimension (LDB,kb),  where  kb  is
+        n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+        entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+        the array  B must contain the matrix B, otherwise the leading
+        n  by  k  part of the array  B  must  contain  the matrix  B.
+</PRE>
+<PRE>
+LDB     (local input)                 const int
+        On entry, LDB  specifies the first dimension of B as declared
+        in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+        LDB must be at least max(1,k), otherwise LDB must be at least
+        max(1,n).
+</PRE>
+<PRE>
+BETA    (local input)                 const double
+        On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+        supplied  as  zero  then  the  elements of the matrix C  need
+        not be set on input.
+</PRE>
+<PRE>
+C       (local input/output)          double *
+        On entry,  C  is an array of dimension (LDC,n). Before entry,
+        the  leading m by n part  of  the  array  C  must contain the
+        matrix C,  except when beta is zero, in which case C need not
+        be set on entry. On exit, the array  C  is overwritten by the
+        m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+</PRE>
+<PRE>
+LDC     (local input)                 const int
+        On entry, LDC  specifies the first dimension of C as declared
+        in  the   calling  (sub)  program.   LDC  must  be  at  least
+        max(1,m).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2], c[2*2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+   c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0;
+   HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans,
+              2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 );
+   printf("  [%f,%f]\n", c[0], c[2]);
+   printf("c=[%f,%f]\n", c[1], c[3]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dtrsm.html">HPL_dtrsm</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dgemv.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dgemv.html
new file mode 100755
index 000000000..d5921a9b2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dgemv.html
@@ -0,0 +1,146 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dgemv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dgemv</B> y := beta * y + alpha * op(A) * x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dgemv(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANS</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>const double</CODE>
+<CODE>BETA</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dgemv</B>
+performs one of the matrix-vector operations
+ 
+    y := alpha * op( A ) * x + beta * y,
+ 
+ where op( X ) is one of
+ 
+    op( X ) = X   or   op( X ) = X^T.
+ 
+where alpha and beta are scalars, x and y are vectors and  A  is an m
+by n matrix.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry,  TRANS  specifies the  operation to be performed as
+        follows:   
+           TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+           TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of  the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero then  A and X  need not be set on input.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n.  Before  entry, the leading m by n part  of the
+        array  A  must contain the matrix coefficients.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m).
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+BETA    (local input)                 const double
+        On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+        supplied as zero then  Y  need not be set on input.
+</PRE>
+<PRE>
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        Before entry with BETA non-zero, the incremented array Y must
+        contain the vector  y.  On exit,  Y  is  overwritten  by  the
+        updated vector y.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2], y[2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+   HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0,
+              a, 2, x, 1, -1.0, y, 1 );
+   printf("y=[%f,%f]\n", y[0], y[1]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dger.html">HPL_dger</A>,
+<A HREF="HPL_dtrsv.html">HPL_dtrsv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dger.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dger.html
new file mode 100755
index 000000000..e4ea948ed
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dger.html
@@ -0,0 +1,124 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dger HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dger</B> A := alpha * x * y^T + A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dger(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dger</B>
+performs the rank 1 operation
+ 
+    A := alpha * x * y^T + A,
+ 
+where alpha is a scalar,  x is an m-element vector, y is an n-element
+vector and A is an m by n matrix.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of  the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero then  X and Y  need not be set on input.
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+Y       (local input)                 double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n.  Before  entry, the leading m by n part  of the
+        array  A  must contain the matrix coefficients. On exit, A is
+        overwritten by the updated matrix.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2], y[2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+   HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1,
+             a, 2 );
+   printf("y=[%f,%f]\n", y[0], y[1]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dgemv.html">HPL_dgemv</A>,
+<A HREF="HPL_dtrsv.html">HPL_dtrsv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlacpy.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlacpy.html
new file mode 100755
index 000000000..b64d34e0c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlacpy.html
@@ -0,0 +1,84 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlacpy HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlacpy</B> B := A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlacpy(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>B</CODE>,
+<CODE>const int</CODE>
+<CODE>LDB</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlacpy</B>
+copies an array A into an array B.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the number of rows of the arrays A and
+        B. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies  the number of columns of the arrays A
+        and B. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,N).
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+B       (local output)                double *
+        On entry, B points to an array of dimension (LDB,N). On exit,
+        B is overwritten with A.
+</PRE>
+<PRE>
+LDB     (local input)                 const int
+        On entry, LDB specifies the leading dimension of the array B.
+        LDB must be at least MAX(1,M).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0);
+   return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlatcpy.html">HPL_dlatcpy</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlamch.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlamch.html
new file mode 100755
index 000000000..cb87a90ba
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlamch.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlamch HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlamch</B> determines machine-specific arithmetic constants.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_dlamch(</CODE>
+<CODE>const HPL_T_MACH</CODE>
+<CODE>CMACH</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlamch</B>
+determines  machine-specific  arithmetic constants such as
+the relative machine precision  (eps),  the safe minimum (sfmin) such
+that 1 / sfmin does not overflow, the base of the machine (base), the
+precision (prec), the  number of (base) digits  in the  mantissa (t),
+whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise),  the
+minimum exponent before  (gradual)  underflow (emin),  the  underflow
+threshold (rmin) base**(emin-1), the largest exponent before overflow
+(emax), the overflow threshold (rmax) (base**emax)*(1-eps).
+
+<H1>Arguments</H1>
+<PRE>
+CMACH   (local input)                 const HPL_T_MACH
+        Specifies the value to be returned by HPL_dlamch             
+           = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+           = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+           = HPL_MACH_BASE,  HPL_dlamch := base                      
+           = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+           = HPL_MACH_MLEN,  HPL_dlamch := t                         
+           = HPL_MACH_RND,   HPL_dlamch := rnd                       
+           = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+           = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+           = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+           = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+         
+        where                                                        
+         
+           eps   = relative machine precision,                       
+           sfmin = safe minimum,                                     
+           base  = base of the machine,                              
+           prec  = eps*base,                                         
+           t     = number of digits in the mantissa,                 
+           rnd   = 1.0 if rounding occurs in addition,               
+           emin  = minimum exponent before underflow,                
+           rmin  = underflow threshold,                              
+           emax  = largest exponent before overflow,                 
+           rmax  = overflow threshold.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double eps;
+   eps = HPL_dlamch( HPL_MACH_EPS );
+   printf("eps=%18.8e\n", eps);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>References</H1>
+This function has been manually translated from the Fortran 77 LAPACK
+auxiliary function dlamch.f  (version 2.0 -- 1992), that  was  itself
+based on the function ENVRON  by Malcolm and incorporated suggestions
+by Gentleman and Marovich. See                                       
+ 
+Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
+arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).                 
+ 
+Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
+properties of  floating point arithmetic units.,  Comms. of  the ACM,
+17, 276-277 (1974).
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlange.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlange.html
new file mode 100755
index 000000000..ce276e257
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlange.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlange HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlange</B> Compute ||A||.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_dlange(</CODE>
+<CODE>const HPL_T_NORM</CODE>
+<CODE>NORM</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlange</B>
+returns  the value of the one norm,  or the infinity norm,
+or the element of largest absolute value of a matrix A:              
+ 
+   max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+   norm1(A),        when NORM = HPL_NORM_1,                          
+   normI(A),        when NORM = HPL_NORM_I,                          
+ 
+where norm1 denotes the one norm of a matrix (maximum column sum) and
+normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+that max(abs(A(i,j))) is not a matrix norm.
+
+<H1>Arguments</H1>
+<PRE>
+NORM    (local input)                 const HPL_T_NORM
+        On entry,  NORM  specifies  the  value to be returned by this
+        function as described above.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points to an  array of dimension  (LDA,N), that
+        contains the matrix A.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 );
+   printf("norm=%f\n", norm);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaprnt.html">HPL_dlaprnt</A>,
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaprnt.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaprnt.html
new file mode 100755
index 000000000..f589ee2bb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaprnt.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaprnt HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaprnt</B> Print the matrix A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaprnt(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>IA</CODE>,
+<CODE>const int</CODE>
+<CODE>JA</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const char *</CODE>
+<CODE>CMATNM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaprnt</B>
+prints to standard error an M-by-N matrix A.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies the number of rows of A. M must be at
+        least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies the number of columns of A. N must be
+        at least zero.
+</PRE>
+<PRE>
+A       (local input)                 double *
+        On entry, A  points to an array of dimension (LDA,N).
+</PRE>
+<PRE>
+IA      (local input)                 const int
+        On entry, IA specifies the starting row index to be printed.
+</PRE>
+<PRE>
+JA      (local input)                 const int
+        On entry,  JA  specifies  the  starting  column index  to be
+        printed.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+</PRE>
+<PRE>
+CMATNM  (local input)                 const char *
+        On entry, CMATNM is the name of the matrix to be printed.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" );
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp00N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp00N.html
new file mode 100755
index 000000000..8e36cf6c6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp00N.html
@@ -0,0 +1,78 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp00N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp00N</B> performs a series of row interchanges.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp00N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPIV</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp00N</B>
+performs a series of local row interchanges on a matrix
+A. One row interchange is initiated for rows 0 through M-1 of A.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M specifies the number of rows of the array A to be
+        interchanged. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies  the number of columns of the array A.
+        N must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry, A  points to an array of dimension (LDA,N) to which
+        the row interchanges will be  applied.  On exit, the permuted
+        matrix.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+IPIV    (local input)                 const int *
+        On entry,  IPIV  is  an  array of size  M  that  contains the
+        pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+        implies that local rows k and l are to be interchanged.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp01N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp01N.html
new file mode 100755
index 000000000..aa8861d10
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp01N.html
@@ -0,0 +1,109 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp01N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp01N</B> copies rows of A into itself and into U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp01N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp01N</B>
+copies  scattered rows  of  A  into itself  and into an
+array  U.  The row offsets in  A  of the source rows are specified by
+LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+positive value of  LINDXAU indicates that the array destination is U,
+and A otherwise.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        moved within A or copied into U. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        moved within A or copied into U. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be moved within A or
+        copied into U.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,N). The rows
+        of A specified by LINDXA are be copied within this array U at
+        the positions indicated by positive values of LINDXAU.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local  row indexes  of  A  that should be moved within  A  or
+        or copied into U.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local  row indexes of  U  where the rows of  A  should be
+        copied at. This array also contains the  local row offsets in
+        A where some of the rows of A should be moved to.  A positive
+        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+        should be copied into U at the position LINDXAU[i]; otherwise
+        the row  LINDXA[i]  of  A  should be moved  at  the  position
+        -LINDXAU[i] within A.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp01T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp01T.html
new file mode 100755
index 000000000..9697471c5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp01T.html
@@ -0,0 +1,110 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp01T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp01T</B> copies rows of A into itself and into U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp01T(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp01T</B>
+copies  scattered rows  of  A  into itself  and into an
+array U.  The row offsets in  A  of the source rows  are specified by
+LINDXA.  The  destination of those rows are specified by  LINDXAU.  A
+positive value of LINDXAU indicates that the array  destination is U,
+and A otherwise. Rows of A are stored as columns in U.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        moved within A or copied into U. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        moved within A or copied into U. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be moved within A or
+        copied into U.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,M). The rows
+        of A specified by  LINDXA  are copied within this array  U at
+        the  positions indicated by positive values of LINDXAU.  The
+        rows of A are stored as columns in U.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local  row indexes  of  A  that should be moved within  A  or
+        or copied into U.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local  row indexes of  U  where the rows of  A  should be
+        copied at. This array also contains the  local row offsets in
+        A where some of the rows of A should be moved to.  A positive
+        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+        should be copied into U at the position LINDXAU[i]; otherwise
+        the row  LINDXA[i]  of  A  should be moved  at  the  position
+        -LINDXAU[i] within A.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp02N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp02N.html
new file mode 100755
index 000000000..d4e1a0cf8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp02N.html
@@ -0,0 +1,107 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp02N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp02N</B> pack rows of A into columns of W.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp02N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>W0</CODE>,
+<CODE>double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp02N</B>
+packs scattered rows of an array  A  into workspace  W.
+The row offsets in A are specified by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        copied into W. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        copied into W. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be copied into W.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+W0      (local input/output)          double *
+        On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local output)                double *
+        On entry, W  is an array of size (LDW,M). On exit, W contains
+        the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+        in W(:,i).
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied into W.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M  that  contains
+        the local  row indexes of  U that should be copied into A and
+        replaced by the rows of W.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp03N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp03N.html
new file mode 100755
index 000000000..f5c4127b0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp03N.html
@@ -0,0 +1,95 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp03N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp03N</B> copy rows of W into U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp03N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const double *</CODE>
+<CODE>W0</CODE>,
+<CODE>const double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp03N</B>
+copies columns of  W  into  rows  of an  array  U.  The
+destination in U of these columns contained in W is stored within W0.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies  the  number  of columns of  W  stored
+        contiguously that should be copied into U. M must be at least
+        zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the  length of columns of  W  stored
+        contiguously that should be copied into U. N must be at least
+        zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,N).  Columns
+        of W are copied as rows within this array U at  the positions
+        specified in W0.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+</PRE>
+<PRE>
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M),  that contains data
+        to be copied into U. For i in [0..M),  entries W(:,i)  should
+        be copied into the row or column W0(i*LDW) of U.
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp03T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp03T.html
new file mode 100755
index 000000000..010175313
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp03T.html
@@ -0,0 +1,95 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp03T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp03T</B> copy columns of W into U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp03T(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const double *</CODE>
+<CODE>W0</CODE>,
+<CODE>const double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp03T</B>
+copies  columns of W into an array U.  The  destination
+in U of these columns contained in W is stored within W0.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies  the  number  of columns of  W  stored
+        contiguously that should be copied into U. M must be at least
+        zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the  length of columns of  W  stored
+        contiguously that should be copied into U. N must be at least
+        zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,M).  Columns
+        of W are copied within the array U at the positions specified
+        in W0.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M),  that contains data
+        to be copied into U. For i in [0..M),  entries W(:,i)  should
+        be copied into the row or column W0(i*LDW) of U.
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp04N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp04N.html
new file mode 100755
index 000000000..bb6cab0a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp04N.html
@@ -0,0 +1,131 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp04N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp04N</B> copy rows of U in A and replace them with columns of W.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp04N(</CODE>
+<CODE>const int</CODE>
+<CODE>M0</CODE>,
+<CODE>const int</CODE>
+<CODE>M1</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>W0</CODE>,
+<CODE>const double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp04N</B>
+copies M0 rows of U into A and replaces those rows of U
+with columns of W. In addition M1 - M0 columns of  W  are copied into
+rows of U.
+
+<H1>Arguments</H1>
+<PRE>
+M0      (local input)                 const int
+        On entry, M0 specifies the number of rows of U that should be
+        copied into  A  and replaced by columns of  W.  M0 must be at
+        least zero.
+</PRE>
+<PRE>
+M1      (local input)                 const int
+        On entry, M1 specifies the number of columns of W that should
+        be copied into rows of U. M1 must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of U that should
+        be copied into A. N must be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  points to  an array of dimension (LDU,N).  This
+        array contains the rows that are to be copied into A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M1).
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows of U indicated by LINDXAU.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M0).
+</PRE>
+<PRE>
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M0+M1),  that  contains
+        data to be copied into U.  For i in [M0..M0+M1),  the entries
+        W(:,i) are copied into the row W0(i*LDW) of U.
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA  is an array of dimension  M0 containing the
+        local row indexes A into which rows of U are copied.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M0 that  contains
+        the local  row indexes of  U that should be copied into A and
+        replaced by the columns of W.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp04T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp04T.html
new file mode 100755
index 000000000..0209a3689
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp04T.html
@@ -0,0 +1,132 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp04T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp04T</B> copy columns of U in rows of A and replace them with columns of W.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp04T(</CODE>
+<CODE>const int</CODE>
+<CODE>M0</CODE>,
+<CODE>const int</CODE>
+<CODE>M1</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>W0</CODE>,
+<CODE>const double *</CODE>
+<CODE>W</CODE>,
+<CODE>const int</CODE>
+<CODE>LDW</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp04T</B>
+copies M0 columns of U into rows of A and replaces those
+columns of U with columns of W. In addition M1 - M0 columns of W  are
+copied into U.
+
+<H1>Arguments</H1>
+<PRE>
+M0      (local input)                 const int
+        On entry, M0 specifies the number of columns of U that should
+        be copied into A and replaced by columns of W.  M0 must be at
+        least zero.
+</PRE>
+<PRE>
+M1      (local input)                 const int
+        On entry, M1 specifies  the number of columnns of W that will
+        be copied into U. M1 must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies the length of the columns of  U  that
+        will be copied into rows of A. N must be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns that are to be copied into rows of
+        A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U indicated by LINDXAU.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M0).
+</PRE>
+<PRE>
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+</PRE>
+<PRE>
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M0+M1),  that  contains
+        data to be copied into U.  For i in [M0..M0+M1),  the entries
+        W(:,i) are copied into the column W0(i*LDW) of U.
+</PRE>
+<PRE>
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA  is an array of dimension  M0 containing the
+        local row indexes A into which columns of U are copied.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M0 that  contains
+        the  local column indexes of  U  that should be copied into A
+        and replaced by the columns of W.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp05N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp05N.html
new file mode 100755
index 000000000..f428b7354
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp05N.html
@@ -0,0 +1,98 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp05N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp05N</B> copy rows of U into A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp05N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp05N</B>
+copies rows of  U of global offset LINDXAU into rows of
+A at positions indicated by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of U that should be
+        copied into A. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of U that should
+        be copied into A. N must be at least zero.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows of U indicated by LINDXAU.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          const double *
+        On entry,  U  points to an array of dimension  (LDU,N).  This
+        array contains the rows that are to be copied into A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied from U.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local row indexes of U that should be copied in A.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp05T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp05T.html
new file mode 100755
index 000000000..fffb9f320
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp05T.html
@@ -0,0 +1,98 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp05T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp05T</B> copy rows of U into A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp05T(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXAU</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp05T</B>
+copies columns of  U of global offset LINDXAU into rows
+of A at positions indicated by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the columns of U that will
+        be copied into rows of A. N must be at least zero.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U indicated by LINDXAU.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          const double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns that are to be copied into rows of
+        A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied from U.
+</PRE>
+<PRE>
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local column indexes of U that should be copied in A.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp06N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp06N.html
new file mode 100755
index 000000000..f28ab48c6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp06N.html
@@ -0,0 +1,92 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp06N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp06N</B> swap rows of U with rows of A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp06N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp06N</B>
+swaps rows of  U  with rows of A at positions
+indicated by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        swapped with rows of U. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of A that should
+        be swapped with rows of U. N must be at least zero.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows or columns of U.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,N).  This
+        array contains the rows of U that are to be swapped with rows
+        of A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be swapped with U.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp06T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp06T.html
new file mode 100755
index 000000000..86032a9f4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp06T.html
@@ -0,0 +1,92 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp06T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp06T</B> swap rows or columns of U with rows of A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp06T(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>LINDXA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp06T</B>
+swaps  columns  of  U  with  rows  of  A  at  positions
+indicated by LINDXA.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        swapped with columns of U. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of A that should
+        be swapped with columns of U. N must be at least zero.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns of  U  that are to be swapped with
+        rows of A.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+</PRE>
+<PRE>
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be swapped with U.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp10N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp10N.html
new file mode 100755
index 000000000..84403ca79
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp10N.html
@@ -0,0 +1,77 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlaswp10N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlaswp10N</B> performs a series column interchanges.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlaswp10N(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPIV</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlaswp10N</B>
+performs a sequence  of  local column interchanges on a
+matrix A.  One column interchange is initiated  for columns 0 through
+N-1 of A.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        __arg0__
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  M  specifies  the number of rows of the array A. M
+        must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          double *
+        On entry, N specifies the number of columns of the array A. N
+        must be at least zero.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, A  points to an  array of  dimension (LDA,N).  This
+        array contains the columns onto which the interchanges should
+        be applied. On exit, A contains the permuted matrix.
+</PRE>
+<PRE>
+IPIV    (local input)                 const int *
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlatcpy.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlatcpy.html
new file mode 100755
index 000000000..fa1cca5d9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlatcpy.html
@@ -0,0 +1,83 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlatcpy HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlatcpy</B> B := A^T
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlatcpy(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>B</CODE>,
+<CODE>const int</CODE>
+<CODE>LDB</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlatcpy</B>
+copies the transpose of an array A into an array B.
+
+<H1>Arguments</H1>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the number of  rows of the array B and
+        the number of columns of A. M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the number of  rows of the array A and
+        the number of columns of B. N must be at least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,M).
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,N).
+</PRE>
+<PRE>
+B       (local output)                double *
+        On entry, B points to an array of dimension (LDB,N). On exit,
+        B is overwritten with the transpose of A.
+</PRE>
+<PRE>
+LDB     (local input)                 const int
+        On entry, LDB specifies the leading dimension of the array B.
+        LDB must be at least MAX(1,M).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlacpy.html">HPL_dlacpy</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocmax.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocmax.html
new file mode 100755
index 000000000..c3361f32d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocmax.html
@@ -0,0 +1,87 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlocmax HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlocmax</B> finds the maximum entry in matrix column.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlocmax(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>II</CODE>,
+<CODE>const int</CODE>
+<CODE>JJ</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlocmax</B>
+finds  the maximum entry in the current column  and packs
+the useful information in  WORK[0:3].  On exit,  WORK[0] contains the
+local maximum  absolute value  scalar,  WORK[1] is the  corresponding
+local row index,  WORK[2]  is the corresponding global row index, and
+WORK[3] is the coordinate of the process owning this max.  When N  is
+less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set
+to the total number of process rows.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of rows of the column
+        of A on which we operate.
+</PRE>
+<PRE>
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+</PRE>
+<PRE>
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is  a workarray of size at least 4.  On exit,
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocswpN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocswpN.html
new file mode 100755
index 000000000..b5c4b74a9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocswpN.html
@@ -0,0 +1,79 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlocswpN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlocswpN</B> locally swaps rows within panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlocswpN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>II</CODE>,
+<CODE>const int</CODE>
+<CODE>JJ</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlocswpN</B>
+performs  the local swapping operations  within a panel.
+The lower triangular  N0-by-N0  upper block of the panel is stored in
+no-transpose form (i.e. just like the input matrix itself).
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+</PRE>
+<PRE>
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.  The N0 length max
+        row is stored in WORK[4:4+N0-1];  Note  that this is also the
+        JJth row  (or column) of L1. The remaining part of this array
+        is used as workspace.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocswpT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocswpT.html
new file mode 100755
index 000000000..d31361543
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocswpT.html
@@ -0,0 +1,79 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dlocswpT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dlocswpT</B> locally swaps rows within panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dlocswpT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>II</CODE>,
+<CODE>const int</CODE>
+<CODE>JJ</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dlocswpT</B>
+performs  the local swapping operations  within a panel.
+The lower triangular  N0-by-N0  upper block of the panel is stored in
+transpose form.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+</PRE>
+<PRE>
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.  The N0 length max
+        row is stored in WORK[4:4+N0-1];  Note  that this is also the
+        JJth row  (or column) of L1. The remaining part of this array
+        is used as workspace.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dmatgen.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dmatgen.html
new file mode 100755
index 000000000..7886da146
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dmatgen.html
@@ -0,0 +1,73 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dmatgen HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dmatgen</B> random matrix generator.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dmatgen(</CODE>
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int</CODE>
+<CODE>ISEED</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dmatgen</B>
+generates (or regenerates) a random matrix A.
+ 
+The  pseudo-random  generator uses the linear congruential algorithm:
+X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+Programming, Knuth 1973, Vol. 2.
+
+<H1>Arguments</H1>
+<PRE>
+M       (input)                       const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (input)                       const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+A       (output)                      double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        this  array  contains   the   coefficients  of  the  randomly
+        generated matrix.
+</PRE>
+<PRE>
+LDA     (input)                       const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+</PRE>
+<PRE>
+ISEED   (input)                       const int
+        On entry, ISEED  specifies  the  seed  number to generate the
+        matrix A. ISEED must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dscal.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dscal.html
new file mode 100755
index 000000000..c13427f44
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dscal.html
@@ -0,0 +1,74 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dscal HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dscal</B> x = alpha * x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dscal(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dscal</B>
+scales the vector x by alpha.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vector x. N  must  be
+        at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero, then the entries of the incremented array X
+        need not be set on input.
+</PRE>
+<PRE>
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        On exit, the entries of the incremented array  X  are  scaled
+        by the scalar alpha.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   HPL_dscal( 3, 2.0, x, 1 );
+   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_daxpy.html">HPL_daxpy</A>,
+<A HREF="HPL_dcopy.html">HPL_dcopy</A>,
+<A HREF="HPL_dswap.html">HPL_dswap</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dswap.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dswap.html
new file mode 100755
index 000000000..cae6980a6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dswap.html
@@ -0,0 +1,84 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dswap HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dswap</B> y <-> x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dswap(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>,
+<CODE>double *</CODE>
+<CODE>Y</CODE>,
+<CODE>const int</CODE>
+<CODE>INCY</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dswap</B>
+swaps the vectors x and y.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+</PRE>
+<PRE>
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        On exit, the entries of the incremented array  X  are updated
+        with the entries of the incremented array Y.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+<PRE>
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the entries of the incremented array X.
+</PRE>
+<PRE>
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_dswap( 3, x, 1, y, 1 );
+   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_daxpy.html">HPL_daxpy</A>,
+<A HREF="HPL_dcopy.html">HPL_dcopy</A>,
+<A HREF="HPL_dscal.html">HPL_dscal</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dtrsm.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dtrsm.html
new file mode 100755
index 000000000..3d60e597f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dtrsm.html
@@ -0,0 +1,168 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dtrsm HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dtrsm</B> B := A^{-1} * B  or  B := B * A^{-1}.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dtrsm(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const enum HPL_SIDE</CODE>
+<CODE>SIDE</CODE>,
+<CODE>const enum HPL_UPLO</CODE>
+<CODE>UPLO</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANS</CODE>,
+<CODE>const enum HPL_DIAG</CODE>
+<CODE>DIAG</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double</CODE>
+<CODE>ALPHA</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>B</CODE>,
+<CODE>const int</CODE>
+<CODE>LDB</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dtrsm</B>
+solves one of the matrix equations
+ 
+   op( A ) * X = alpha * B,   or  X * op( A ) = alpha * B,
+ 
+where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+non-unit, upper or lower triangular matrix and op(A) is one of
+ 
+   op( A ) = A   or   op( A ) = A^T.
+ 
+The matrix X is overwritten on B.
+ 
+No test for  singularity  or  near-singularity  is included  in  this
+routine. Such tests must be performed before calling this routine.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+SIDE    (local input)                 const enum HPL_SIDE
+        On entry, SIDE  specifies  whether  op(A) appears on the left
+        or right of X as follows:
+           SIDE==HplLeft    op( A ) * X = alpha * B,
+           SIDE==HplRight   X * op( A ) = alpha * B.
+</PRE>
+<PRE>
+UPLO    (local input)                 const enum HPL_UPLO
+        On  entry,   UPLO   specifies  whether  the  upper  or  lower
+        triangular  part  of the array  A  is to be referenced.  When
+        UPLO==HplUpper, only  the upper triangular part of A is to be
+        referenced, otherwise only the lower triangular part of A is 
+        to be referenced. 
+</PRE>
+<PRE>
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry, TRANSA  specifies the form of  op(A)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSA==HplNoTrans    : op( A ) = A,                     
+           TRANSA==HplTrans      : op( A ) = A^T,                   
+           TRANSA==HplConjTrans  : op( A ) = A^T.                   
+</PRE>
+<PRE>
+DIAG    (local input)                 const enum HPL_DIAG
+        On entry,  DIAG  specifies  whether  A  is unit triangular or
+        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+        and otherwise, A is not assumed to be unit triangular.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of the  matrix B.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix B.
+        N must be at least zero.
+</PRE>
+<PRE>
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied  as  zero then the elements of the matrix B need not
+        be set on input.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+        otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+        k by k upper triangular  part of the array A must contain the
+        upper triangular  matrix and the  strictly  lower  triangular
+        part of A is not referenced.  When  UPLO==HplLower on  entry,
+        the  leading k by k lower triangular part of the array A must
+        contain the lower triangular matrix  and  the  strictly upper
+        triangular part of A is not referenced.
+         
+        Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+        not referenced  either,  but are assumed to be unity.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+</PRE>
+<PRE>
+B       (local input/output)          double *
+        On entry,  B  points  to an array of size equal to or greater
+        than LDB * n.  Before entry, the leading  m by n  part of the
+        array B must contain the matrix  B, except when beta is zero,
+        in which case B need not be set on entry.  On exit, the array
+        B is overwritten by the m by n solution matrix.
+</PRE>
+<PRE>
+LDB     (local input)                 const int
+        On entry,  LDB  specifies  the  leading  dimension  of  B  as
+        declared  in  the  calling  (sub) program.  LDB  must  be  at
+        least MAX(1,m).
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+   HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper,
+              HplNoTrans, HplNonUnit, 2, 2, 2.0,
+              a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dgemm.html">HPL_dgemm</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dtrsv.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dtrsv.html
new file mode 100755
index 000000000..3e4703529
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dtrsv.html
@@ -0,0 +1,136 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_dtrsv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_dtrsv</B> x := A^{-1} x.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_dtrsv(</CODE>
+<CODE>const enum HPL_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const enum HPL_UPLO</CODE>
+<CODE>UPLO</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANS</CODE>,
+<CODE>const enum HPL_DIAG</CODE>
+<CODE>DIAG</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_dtrsv</B>
+solves one of the systems of equations
+ 
+    A * x = b,   or   A^T * x = b,
+ 
+where b and x are n-element vectors and  A  is an n by n non-unit, or
+unit, upper or lower triangular matrix.
+ 
+No test for  singularity  or  near-singularity  is included  in  this
+routine. Such tests must be performed before calling this routine.
+
+<H1>Arguments</H1>
+<PRE>
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+</PRE>
+<PRE>
+UPLO    (local input)                 const enum HPL_UPLO
+        On  entry,   UPLO   specifies  whether  the  upper  or  lower
+        triangular  part  of the array  A  is to be referenced.  When
+        UPLO==HplUpper, only  the upper triangular part of A is to be
+        referenced, otherwise only the lower triangular part of A is 
+        to be referenced. 
+</PRE>
+<PRE>
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry,  TRANS  specifies  the equations  to  be  solved as
+        follows:
+           TRANS==HplNoTrans     A   * x = b,
+           TRANS==HplTrans       A^T * x = b.
+</PRE>
+<PRE>
+DIAG    (local input)                 const enum HPL_DIAG
+        On entry,  DIAG  specifies  whether  A  is unit triangular or
+        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+        and otherwise, A is not assumed to be unit triangular.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the order of the matrix A. N must be at
+        least zero.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+        n by n upper triangular  part of the array A must contain the
+        upper triangular  matrix and the  strictly  lower  triangular
+        part of A is not referenced.  When  UPLO==HplLower  on entry,
+        the  leading n by n lower triangular part of the array A must
+        contain the lower triangular matrix  and  the  strictly upper
+        triangular part of A is not referenced.
+         
+        Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+        not referenced  either,  but are assumed to be unity.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,n).
+</PRE>
+<PRE>
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        Before entry,  the  incremented array  X  must contain  the n
+        element right-hand side vector b. On exit,  X  is overwritten
+        with the solution vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2];
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+   x[0] = 2.0; x[1] = 1.0;
+   HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans,
+              HplNoUnit, a, 2, x, 1 );
+   printf("x=[%f,%f]\n", x[0], x[1]);
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dger.html">HPL_dger</A>,
+<A HREF="HPL_dgemv.html">HPL_dgemv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_equil.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_equil.html
new file mode 100755
index 000000000..d64ecab99
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_equil.html
@@ -0,0 +1,115 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_equil HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_equil</B> Equilibrate U and forward the column panel L.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_equil(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const enum HPL_TRANS</CODE>
+<CODE>TRANS</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>,
+<CODE>int *</CODE>
+<CODE>IWORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_equil</B>
+equilibrates  the  local  pieces  of U, so that on exit to
+this function, pieces of U contained in every process row are of the
+same size. This phase makes the rolling phase optimal.  In addition,
+this  function probes  for  the  column panel L and forwards it when
+possible.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be equilibrated) information.
+</PRE>
+<PRE>
+TRANS   (global input)                const enum HPL_TRANS
+        On entry, TRANS specifies whether  U  is stored in transposed
+        or non-transposed form.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N  specifies the number of rows or columns of  U. N
+        must be at least 0.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+        non-transposed form, and MAX(1,N) otherwise.
+</PRE>
+<PRE>
+IPLEN   (global input)                int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in process IPMAP[i].
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry, IPMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+</PRE>
+<PRE>
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension NPROW+1.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_fprintf.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_fprintf.html
new file mode 100755
index 000000000..d62b2c871
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_fprintf.html
@@ -0,0 +1,58 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_fprintf HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_fprintf</B> fprintf + fflush wrapper.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_fprintf(</CODE>
+<CODE>FILE *</CODE>
+<CODE>STREAM</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_fprintf</B>
+is a wrapper around fprintf flushing the output stream.
+
+<H1>Arguments</H1>
+<PRE>
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   HPL_fprintf( stdout, "Hello World.\n" );
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_abort.html">HPL_abort</A>,
+<A HREF="HPL_warn.html">HPL_warn</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_exit.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_exit.html
new file mode 100755
index 000000000..b42f315c9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_exit.html
@@ -0,0 +1,39 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_grid_exit HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_grid_exit</B> Exit process grid.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_grid_exit(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_grid_exit</B>
+marks  the process  grid object for  deallocation.  The
+returned  error  code  MPI_SUCCESS  indicates  successful completion.
+Other error codes are (MPI) implementation dependent.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input/output)          HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid to be released.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pnum.html">HPL_pnum</A>,
+<A HREF="HPL_grid_init.html">HPL_grid_init</A>,
+<A HREF="HPL_grid_info.html">HPL_grid_info</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_info.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_info.html
new file mode 100755
index 000000000..47f63672d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_info.html
@@ -0,0 +1,70 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_grid_info HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_grid_info</B> Retrieve grid information.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_grid_info(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>int *</CODE>
+<CODE>NPROW</CODE>,
+<CODE>int *</CODE>
+<CODE>NPCOL</CODE>,
+<CODE>int *</CODE>
+<CODE>MYROW</CODE>,
+<CODE>int *</CODE>
+<CODE>MYCOL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_grid_info</B>
+returns  the grid shape and the coordinates in the grid
+of the calling process.  Successful  completion  is  indicated by the
+returned error code  MPI_SUCCESS. Other error codes depend on the MPI
+implementation.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+NPROW   (global output)               int *
+        On exit,   NPROW  specifies the number of process rows in the
+        grid. NPROW is at least one.
+</PRE>
+<PRE>
+NPCOL   (global output)               int *
+        On exit,   NPCOL  specifies  the number of process columns in
+        the grid. NPCOL is at least one.
+</PRE>
+<PRE>
+MYROW   (global output)               int *
+        On exit,  MYROW  specifies my  row process  coordinate in the
+        grid. MYROW is greater than or equal  to zero  and  less than
+        NPROW.
+</PRE>
+<PRE>
+MYCOL   (global output)               int *
+        On exit,  MYCOL specifies my column process coordinate in the
+        grid. MYCOL is greater than or equal  to zero  and  less than
+        NPCOL.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pnum.html">HPL_pnum</A>,
+<A HREF="HPL_grid_init.html">HPL_grid_init</A>,
+<A HREF="HPL_grid_exit.html">HPL_grid_exit</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_init.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_init.html
new file mode 100755
index 000000000..0bec56e6e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_init.html
@@ -0,0 +1,73 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_grid_init HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_grid_init</B> Create a process grid.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_grid_init(</CODE>
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>,
+<CODE>const HPL_T_ORDER</CODE>
+<CODE>ORDER</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROW</CODE>,
+<CODE>const int</CODE>
+<CODE>NPCOL</CODE>,
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_grid_init</B>
+creates a NPROW x NPCOL  process  grid using column- or
+row-major ordering from an initial collection of processes identified
+by an  MPI  communicator.  Successful  completion is indicated by the
+returned error code MPI_SUCCESS.  Other error codes depend on the MPI
+implementation. The coordinates of processes that are not part of the
+grid are set to values outside of [0..NPROW) x [0..NPCOL).
+
+<H1>Arguments</H1>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        On entry,  COMM  is  the  MPI  communicator  identifying  the
+        initial  collection  of  processes out of which  the  grid is
+        formed.
+</PRE>
+<PRE>
+ORDER   (global input)                const HPL_T_ORDER
+        On entry, ORDER specifies how the processes should be ordered
+        in the grid as follows:
+           ORDER = HPL_ROW_MAJOR    row-major    ordering;
+           ORDER = HPL_COLUMN_MAJOR column-major ordering;
+</PRE>
+<PRE>
+NPROW   (global input)                const int
+        On entry,  NPROW  specifies the number of process rows in the
+        grid to be created. NPROW must be at least one.
+</PRE>
+<PRE>
+NPCOL   (global input)                const int
+        On entry,  NPCOL  specifies  the number of process columns in
+        the grid to be created. NPCOL must be at least one.
+</PRE>
+<PRE>
+GRID    (local input/output)          HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information to be initialized.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pnum.html">HPL_pnum</A>,
+<A HREF="HPL_grid_info.html">HPL_grid_info</A>,
+<A HREF="HPL_grid_exit.html">HPL_grid_exit</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_idamax.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_idamax.html
new file mode 100755
index 000000000..f16b296f6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_idamax.html
@@ -0,0 +1,68 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_idamax HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_idamax</B> 1st k s.t. |x_k| = max_i(|x_i|).
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_idamax(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const double *</CODE>
+<CODE>X</CODE>,
+<CODE>const int</CODE>
+<CODE>INCX</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_idamax</B>
+returns  the index in an n-vector  x  of the first element
+having maximum absolute value.
+
+<H1>Arguments</H1>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the length of the vector x. N  must  be
+        at least zero.
+</PRE>
+<PRE>
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+</PRE>
+<PRE>
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   double x[3];
+   int    imax;
+   x[0] = 1.0; x[1] = 3.0; x[2] = 2.0;
+   imax = HPL_idamax( 3, x, 1 );
+   printf("imax=%d\n", imax);
+   exit(0);
+   return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_daxpy.html">HPL_daxpy</A>,
+<A HREF="HPL_dcopy.html">HPL_dcopy</A>,
+<A HREF="HPL_dscal.html">HPL_dscal</A>,
+<A HREF="HPL_dswap.html">HPL_dswap</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2l.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2l.html
new file mode 100755
index 000000000..a3eb758da
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2l.html
@@ -0,0 +1,71 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_indxg2l HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_indxg2l</B> Map a global index into a local one.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_indxg2l(</CODE>
+<CODE>const int</CODE>
+<CODE>IG</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_indxg2l</B>
+computes  the local index of a matrix entry pointed to by
+the  global index IG.  This  local  returned index is the same in all
+processes.
+
+<H1>Arguments</H1>
+<PRE>
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry, if SRCPROC = -1, the data  is not  distributed  but
+        replicated,  in  which  case  this  routine returns IG in all
+        processes. Otherwise, the value of SRCPROC is ignored.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2lp.html">HPL_indxg2lp</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2lp.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2lp.html
new file mode 100755
index 000000000..d9fa00436
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2lp.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_indxg2lp HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_indxg2lp</B> Map a local index into a global one.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_indxg2lp(</CODE>
+<CODE>int *</CODE>
+<CODE>IL</CODE>,
+<CODE>int *</CODE>
+<CODE>PROC</CODE>,
+<CODE>const int</CODE>
+<CODE>IG</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_indxg2lp</B>
+computes the local index of a matrix entry pointed to by
+the global  index IG as well as the process coordinate which posseses
+this entry. The local returned index is the same in all processes.
+
+<H1>Arguments</H1>
+<PRE>
+IL      (output)                      int *
+        On exit, IL specifies the local index corresponding to IG. IL
+        is at least zero.
+</PRE>
+<PRE>
+PROC    (output)                      int *
+        On exit,  PROC  is the  coordinate of the process  owning the
+        entry specified by the global index IG. PROC is at least zero
+        and less than NPROCS.
+</PRE>
+<PRE>
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry, if SRCPROC = -1, the data  is not  distributed  but
+        replicated,  in  which  case  this  routine returns IG in all
+        processes. Otherwise, the value of SRCPROC is ignored.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2p.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2p.html
new file mode 100755
index 000000000..0068dede3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2p.html
@@ -0,0 +1,70 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_indxg2p HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_indxg2p</B> Map a global index into a process coordinate.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_indxg2p(</CODE>
+<CODE>const int</CODE>
+<CODE>IG</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_indxg2p</B>
+computes the process coordinate  which posseses the entry
+of a matrix specified by a global index IG.
+
+<H1>Arguments</H1>
+<PRE>
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxl2g.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxl2g.html
new file mode 100755
index 000000000..216e98057
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxl2g.html
@@ -0,0 +1,78 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_indxl2g HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_indxl2g</B> Map a index-process pair into a global index.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_indxl2g(</CODE>
+<CODE>const int</CODE>
+<CODE>IL</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>PROC</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_indxl2g</B>
+computes the global index of a matrix  entry  pointed to
+by the local index IL of the process indicated by PROC.
+
+<H1>Arguments</H1>
+<PRE>
+IL      (input)                       const int
+        On entry, IL specifies the local  index of the matrix  entry.
+        IL must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+PROC    (input)                       const int
+        On entry, PROC  specifies the coordinate of the process whose
+        local array row or column is to be determined. PROC  must  be
+        at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2lp.html">HPL_indxg2lp</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_infog2l.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_infog2l.html
new file mode 100755
index 000000000..34feff72c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_infog2l.html
@@ -0,0 +1,155 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_infog2l HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_infog2l</B> global to local index translation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_infog2l(</CODE>
+<CODE>int</CODE>
+<CODE>I</CODE>,
+<CODE>int</CODE>
+<CODE>J</CODE>,
+<CODE>const int</CODE>
+<CODE>IMB</CODE>,
+<CODE>const int</CODE>
+<CODE>MB</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>RSRC</CODE>,
+<CODE>const int</CODE>
+<CODE>CSRC</CODE>,
+<CODE>const int</CODE>
+<CODE>MYROW</CODE>,
+<CODE>const int</CODE>
+<CODE>MYCOL</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROW</CODE>,
+<CODE>const int</CODE>
+<CODE>NPCOL</CODE>,
+<CODE>int *</CODE>
+<CODE>II</CODE>,
+<CODE>int *</CODE>
+<CODE>JJ</CODE>,
+<CODE>int *</CODE>
+<CODE>PROW</CODE>,
+<CODE>int *</CODE>
+<CODE>PCOL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_infog2l</B>
+computes the starting local index II, JJ corresponding to
+the submatrix starting globally at the entry pointed by  I,  J.  This
+routine returns the coordinates in the grid of the process owning the
+matrix entry of global indexes I, J, namely PROW and PCOL.
+
+<H1>Arguments</H1>
+<PRE>
+I       (global input)                int
+        On entry,  I  specifies  the  global  row index of the matrix
+        entry. I must be at least zero.
+</PRE>
+<PRE>
+J       (global input)                int
+        On entry,  J  specifies the global column index of the matrix
+        entry. J must be at least zero.
+</PRE>
+<PRE>
+IMB     (global input)                const int
+        On entry,  IMB  specifies  the size of the first row block of
+        the global matrix. IMB must be at least one.
+</PRE>
+<PRE>
+MB      (global input)                const int
+        On entry,  MB specifies the blocking factor used to partition
+        and  distribute the rows of the matrix A.  MB  must be larger
+        than one.
+</PRE>
+<PRE>
+INB     (global input)                const int
+        On entry, INB specifies the size of the first column block of
+        the global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the columns of the matrix A. NB must be larger
+        than one.
+</PRE>
+<PRE>
+RSRC    (global input)                const int
+        On entry,  RSRC  specifies  the row coordinate of the process
+        that possesses the row  I.  RSRC  must  be at least zero  and
+        strictly less than NPROW.
+</PRE>
+<PRE>
+CSRC    (global input)                const int
+        On entry, CSRC specifies the column coordinate of the process
+        that possesses the column J. CSRC  must be at least zero  and
+        strictly less than NPCOL.
+</PRE>
+<PRE>
+MYROW   (local input)                 const int
+        On entry, MYROW  specifies my  row process  coordinate in the
+        grid. MYROW is greater than or equal  to zero  and  less than
+        NPROW.
+</PRE>
+<PRE>
+MYCOL   (local input)                 const int
+        On entry, MYCOL specifies my column process coordinate in the
+        grid. MYCOL is greater than or equal  to zero  and  less than
+        NPCOL.
+</PRE>
+<PRE>
+NPROW   (global input)                const int
+        On entry,  NPROW  specifies the number of process rows in the
+        grid. NPROW is at least one.
+</PRE>
+<PRE>
+NPCOL   (global input)                const int
+        On entry,  NPCOL  specifies  the number of process columns in
+        the grid. NPCOL is at least one.
+</PRE>
+<PRE>
+II      (local output)                int *
+        On exit, II  specifies the  local  starting  row index of the
+        submatrix. On exit, II is at least 0.
+</PRE>
+<PRE>
+JJ      (local output)                int *
+        On exit, JJ  specifies the local starting column index of the
+        submatrix. On exit, JJ is at least 0.
+</PRE>
+<PRE>
+PROW    (global output)               int *
+        On exit, PROW is the row coordinate of the process owning the
+        entry specified by the global index I.  PROW is at least zero
+        and less than NPROW.
+</PRE>
+<PRE>
+PCOL    (global output)               int *
+        On exit, PCOL  is the column coordinate of the process owning
+        the entry specified by the global index J.  PCOL  is at least
+        zero and less than NPCOL.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_jumpit.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_jumpit.html
new file mode 100755
index 000000000..be87a1f53
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_jumpit.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_jumpit HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_jumpit</B> jump into the random sequence.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_jumpit(</CODE>
+<CODE>int *</CODE>
+<CODE>MULT</CODE>,
+<CODE>int *</CODE>
+<CODE>IADD</CODE>,
+<CODE>int *</CODE>
+<CODE>IRANN</CODE>,
+<CODE>int *</CODE>
+<CODE>IRANM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_jumpit</B>
+jumps in the random sequence from the number  X(n) encoded
+in IRANN to the number  X(m)  encoded in  IRANM using the constants A
+and C encoded in MULT and IADD: X(m) = A * X(n) + C.  The constants A
+and C obviously depend on m and n,  see  the function  HPL_xjumpm  in
+order to initialize them.
+
+<H1>Arguments</H1>
+<PRE>
+MULT    (local input)                 int *
+        On entry, MULT is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of the constant A.
+</PRE>
+<PRE>
+IADD    (local input)                 int *
+        On entry, IADD is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of the constant C.
+</PRE>
+<PRE>
+IRANN   (local input)                 int *
+        On entry,  IRANN  is an array of dimension 2,  that contains 
+        the 16-lower and 15-higher bits of the encoding of X(n).
+</PRE>
+<PRE>
+IRANM   (local output)                int *
+        On entry,  IRANM  is an array of dimension 2.  On exit, this
+        array contains respectively the 16-lower and  15-higher bits
+        of the encoding of X(m).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ladd.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ladd.html
new file mode 100755
index 000000000..0c42d80d8
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ladd.html
@@ -0,0 +1,57 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_ladd HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_ladd</B> Adds two long positive integers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_ladd(</CODE>
+<CODE>int *</CODE>
+<CODE>J</CODE>,
+<CODE>int *</CODE>
+<CODE>K</CODE>,
+<CODE>int *</CODE>
+<CODE>I</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_ladd</B>
+adds  without carry two long positive integers  K and J  and
+puts the result into I. The long integers  I, J, K are encoded on 64
+bits using an array of 2 integers.  The 32-lower bits  are stored in
+the  first  entry  of each array,  the 32-higher bits  in the second
+entry.
+
+<H1>Arguments</H1>
+<PRE>
+J       (local input)                 int *
+        On entry, J is an integer array of dimension 2 containing the
+        encoded long integer J.
+</PRE>
+<PRE>
+K       (local input)                 int *
+        On entry, K is an integer array of dimension 2 containing the
+        encoded long integer K.
+</PRE>
+<PRE>
+I       (local output)                int *
+        On entry, I is an integer array of dimension 2. On exit, this
+        array contains the encoded long integer result.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_lmul.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_lmul.html
new file mode 100755
index 000000000..8ef70cba5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_lmul.html
@@ -0,0 +1,58 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_lmul HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_lmul</B> multiplies 2 long positive integers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_lmul(</CODE>
+<CODE>int *</CODE>
+<CODE>K</CODE>,
+<CODE>int *</CODE>
+<CODE>J</CODE>,
+<CODE>int *</CODE>
+<CODE>I</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_lmul</B>
+multiplies  without carry two long positive integers K and J
+and puts the result into I. The long integers  I, J, K are encoded on
+64 bits using an array of 2 integers. The 32-lower bits are stored in
+the first entry of each array, the 32-higher bits in the second entry
+of each array. For efficiency purposes, the  intrisic modulo function
+is inlined.
+
+<H1>Arguments</H1>
+<PRE>
+K       (local input)                 int *
+        On entry, K is an integer array of dimension 2 containing the
+        encoded long integer K.
+</PRE>
+<PRE>
+J       (local input)                 int *
+        On entry, J is an integer array of dimension 2 containing the
+        encoded long integer J.
+</PRE>
+<PRE>
+I       (local output)                int *
+        On entry, I is an integer array of dimension 2. On exit, this
+        array contains the encoded long integer result.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_logsort.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_logsort.html
new file mode 100755
index 000000000..da271fc19
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_logsort.html
@@ -0,0 +1,83 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_logsort HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_logsort</B> Sort the processes in logarithmic order.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_logsort(</CODE>
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>,
+<CODE>const int</CODE>
+<CODE>ICURROC</CODE>,
+<CODE>int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_logsort</B>
+computes an array  IPMAP  and  its inverse  IPMAPM1  that
+contain  the logarithmic sorted processes id with repect to the local
+number of rows of  U  that they own. This is necessary to ensure that
+the logarithmic spreading of U is optimal in terms of number of steps
+and communication volume as well.  In other words,  the larget pieces
+of U will be sent a minimal number of times.
+
+<H1>Arguments</H1>
+<PRE>
+NPROCS  (global input)                const int
+        On entry, NPROCS  specifies the number of process rows in the
+        process grid. NPROCS is at least one.
+</PRE>
+<PRE>
+ICURROC (global input)                const int
+        On entry, ICURROC is the source process row.
+</PRE>
+<PRE>
+IPLEN   (global input/output)         int *
+        On entry, IPLEN is an array of dimension NPROCS+1,  such that
+        IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+        that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+        rows of U  in the processes before process IPMAP[i] after the
+        sort,  with  the convention that  IPLEN[NPROCS] is  the total
+        number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+        IPLEN[i] is  the  number of rows of A that should be moved to
+        the process IPMAP[i].  IPLEN  is such that the number of rows
+        of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+        remaining  entries  of  this  array  are  sorted  so that the
+        quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+</PRE>
+<PRE>
+IPMAP   (global output)               int *
+        On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myroc] is the corresponding sorted process
+        coordinate.
+</PRE>
+<PRE>
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROCS)
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_plindx1.html">HPL_plindx1</A>,
+<A HREF="HPL_plindx10.html">HPL_plindx10</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_max.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_max.html
new file mode 100755
index 000000000..7cf0b0670
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_max.html
@@ -0,0 +1,60 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_max HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_max</B> Combine (max) two buffers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_max(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const void *</CODE>
+<CODE>IN</CODE>,
+<CODE>void *</CODE>
+<CODE>INOUT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_max</B>
+combines (max) two buffers.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+</PRE>
+<PRE>
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+</PRE>
+<PRE>
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+</PRE>
+<PRE>
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_min.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_min.html
new file mode 100755
index 000000000..9c109c338
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_min.html
@@ -0,0 +1,60 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_min HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_min</B> Combine (min) two buffers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_min(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const void *</CODE>
+<CODE>IN</CODE>,
+<CODE>void *</CODE>
+<CODE>INOUT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_min</B>
+combines (min) two buffers.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+</PRE>
+<PRE>
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+</PRE>
+<PRE>
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+</PRE>
+<PRE>
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_numroc.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_numroc.html
new file mode 100755
index 000000000..fa617cac3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_numroc.html
@@ -0,0 +1,79 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_numroc HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_numroc</B> Compute the local number of row/columns.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_numroc(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>PROC</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_numroc</B>
+returns  the  local number of matrix rows/columns process
+PROC  will  get  if  we give out  N rows/columns starting from global
+index 0.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies the number of rows/columns being dealt
+        out. N must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+PROC    (input)                       const int
+        On entry, PROC specifies  the coordinate of the process whose
+        local portion is determined.  PROC must be at least zero  and
+        strictly less than NPROCS.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2lp.html">HPL_indxg2lp</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numrocI.html">HPL_numrocI</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_numrocI.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_numrocI.html
new file mode 100755
index 000000000..c1037a193
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_numrocI.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_numrocI HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_numrocI</B> Compute the local number of row/columns.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_numrocI(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>I</CODE>,
+<CODE>const int</CODE>
+<CODE>INB</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const int</CODE>
+<CODE>PROC</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCPROC</CODE>,
+<CODE>const int</CODE>
+<CODE>NPROCS</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_numrocI</B>
+returns  the  local number of matrix rows/columns process
+PROC  will  get  if  we give out  N rows/columns starting from global
+index I.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies the number of rows/columns being dealt
+        out. N must be at least zero.
+</PRE>
+<PRE>
+I       (input)                       const int
+        On entry, I  specifies the global index of the matrix  entry
+        I must be at least zero.
+</PRE>
+<PRE>
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of th
+        global matrix. INB must be at least one.
+</PRE>
+<PRE>
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+PROC    (input)                       const int
+        On entry, PROC specifies  the coordinate of the process whos
+        local portion is determined.  PROC must be at least zero  an
+        strictly less than NPROCS.
+</PRE>
+<PRE>
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  proces
+        that possesses the first row or column of the matrix. SRCPRO
+        must be at least zero and strictly less than NPROCS.
+</PRE>
+<PRE>
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process row
+        or columns over which the matrix is distributed.  NPROCS mus
+        be at least one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_indxg2l.html">HPL_indxg2l</A>,
+<A HREF="HPL_indxg2lp.html">HPL_indxg2lp</A>,
+<A HREF="HPL_indxg2p.html">HPL_indxg2p</A>,
+<A HREF="HPL_indxl2g.html">HPL_indxl2g</A>,
+<A HREF="HPL_numroc.html">HPL_numroc</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pabort.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pabort.html
new file mode 100755
index 000000000..89aacbd9f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pabort.html
@@ -0,0 +1,57 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pabort HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pabort</B> halts execution.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pabort(</CODE>
+<CODE>int</CODE>
+<CODE>LINE</CODE>,
+<CODE>const char *</CODE>
+<CODE>SRNAME</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pabort</B>
+displays an error message on stderr and halts execution.
+
+<H1>Arguments</H1>
+<PRE>
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+</PRE>
+<PRE>
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>,
+<A HREF="HPL_pwarn.html">HPL_pwarn</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_packL.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_packL.html
new file mode 100755
index 000000000..1e8f8106c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_packL.html
@@ -0,0 +1,59 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_packL HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_packL</B> Form the MPI structure for the row ring broadcasts.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_packL(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>INDEX</CODE>,
+<CODE>const int</CODE>
+<CODE>LEN</CODE>,
+<CODE>const int</CODE>
+<CODE>IBUF</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_packL</B>
+forms  the MPI data type for the panel to be broadcast.
+Successful  completion  is  indicated  by  the  returned  error  code
+MPI_SUCCESS.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+</PRE>
+<PRE>
+INDEX   (input)                       const int
+        On entry,  INDEX  points  to  the  first entry of the  packed
+        buffer being broadcast.
+</PRE>
+<PRE>
+LEN     (input)                       const int
+        On entry, LEN is the length of the packed buffer.
+</PRE>
+<PRE>
+IBUF    (input)                       const int
+        On entry, IBUF  specifies the panel buffer/count/type entries
+        that should be initialized.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pddriver.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pddriver.html
new file mode 100755
index 000000000..adcc02e00
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pddriver.html
@@ -0,0 +1,27 @@
+<HTML>
+<HEAD>
+<TITLE>main HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>main</B> HPL main timing program.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>main();</CODE>
+
+<H1>Description</H1>
+<B>main</B>
+is the main driver program for testing the HPL routines.
+This  program is  driven  by  a short data file named  "HPL.dat".
+
+<H1>See Also</H1>
+<A HREF="HPL_pdinfo.html">HPL_pdinfo</A>,
+<A HREF="HPL_pdtest.html">HPL_pdtest</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdfact.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdfact.html
new file mode 100755
index 000000000..f51cee5d2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdfact.html
@@ -0,0 +1,78 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdfact HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdfact</B> recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdfact(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdfact</B>
+recursively factorizes a  1-dimensional  panel of columns.
+The  RPFACT  function pointer specifies the recursive algorithm to be
+used, either Crout, Left- or Right looking.  NBMIN allows to vary the
+recursive stopping criterium in terms of the number of columns in the
+panel, and  NDIV  allow to specify the number of subpanels each panel
+should be divided into. Usuallly a value of 2 will be chosen. Finally
+PFACT is a function pointer specifying the non-recursive algorithm to
+to be used on at most NBMIN columns. One can also choose here between
+Crout, Left- or Right looking.  Empirical tests seem to indicate that
+values of 4 or 8 for NBMIN give the best results.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesv.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesv.html
new file mode 100755
index 000000000..ebb9c18e4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesv.html
@@ -0,0 +1,56 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdgesv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdgesv</B> Solve A x = b.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdgesv(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdgesv</B>
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with  or  without look-ahead.  The  lower  triangular  factor is left
+unpivoted and the pivots are not returned. The right hand side is the
+N+1 column of the coefficient matrix.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdtrsv.html">HPL_pdtrsv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesv0.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesv0.html
new file mode 100755
index 000000000..c137975d4
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesv0.html
@@ -0,0 +1,63 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdgesv0 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdgesv0</B> Factor an N x N+1 matrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdgesv0(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdgesv0</B>
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+without look-ahead. The lower triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>,
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesvK1.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesvK1.html
new file mode 100755
index 000000000..1a19edc05
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesvK1.html
@@ -0,0 +1,62 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdgesvK1 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdgesvK1</B> Factor an N x N+1 matrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdgesvK1(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdgesvK1</B>
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with look-ahead.  The  lower  triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>,
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesvK2.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesvK2.html
new file mode 100755
index 000000000..f2a9a25f0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesvK2.html
@@ -0,0 +1,63 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdgesvK2 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdgesvK2</B> Factor an N x N+1 matrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdgesvK2(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdgesvK2</B>
+factors a N+1-by-N matrix using LU factorization with row
+partial pivoting.  The main algorithm  is the "right looking" variant
+with look-ahead.  The  lower  triangular factor is left unpivoted and
+the pivots are not returned. The right hand side is the N+1 column of
+the coefficient matrix.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>,
+<A HREF="HPL_binit.html">HPL_binit</A>,
+<A HREF="HPL_bcast.html">HPL_bcast</A>,
+<A HREF="HPL_bwait.html">HPL_bwait</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdinfo.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdinfo.html
new file mode 100755
index 000000000..94a7f78c0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdinfo.html
@@ -0,0 +1,252 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdinfo HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdinfo</B> Read input parameter file.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdinfo(</CODE>
+<CODE>HPL_T_test *</CODE>
+<CODE>TEST</CODE>,
+<CODE>int *</CODE>
+<CODE>NS</CODE>,
+<CODE>int *</CODE>
+<CODE>N</CODE>,
+<CODE>int *</CODE>
+<CODE>NBS</CODE>,
+<CODE>int *</CODE>
+<CODE>NB</CODE>,
+<CODE>HPL_T_ORDER *</CODE>
+<CODE>PMAPPIN</CODE>,
+<CODE>int *</CODE>
+<CODE>NPQS</CODE>,
+<CODE>int *</CODE>
+<CODE>P</CODE>,
+<CODE>int *</CODE>
+<CODE>Q</CODE>,
+<CODE>int *</CODE>
+<CODE>NPFS</CODE>,
+<CODE>HPL_T_FACT *</CODE>
+<CODE>PF</CODE>,
+<CODE>int *</CODE>
+<CODE>NBMS</CODE>,
+<CODE>int *</CODE>
+<CODE>NBM</CODE>,
+<CODE>int *</CODE>
+<CODE>NDVS</CODE>,
+<CODE>int *</CODE>
+<CODE>NDV</CODE>,
+<CODE>int *</CODE>
+<CODE>NRFS</CODE>,
+<CODE>HPL_T_FACT *</CODE>
+<CODE>RF</CODE>,
+<CODE>int *</CODE>
+<CODE>NTPS</CODE>,
+<CODE>HPL_T_TOP *</CODE>
+<CODE>TP</CODE>,
+<CODE>int *</CODE>
+<CODE>NDHS</CODE>,
+<CODE>int *</CODE>
+<CODE>DH</CODE>,
+<CODE>HPL_T_SWAP *</CODE>
+<CODE>FSWAP</CODE>,
+<CODE>int *</CODE>
+<CODE>TSWAP</CODE>,
+<CODE>int *</CODE>
+<CODE>L1NOTRAN</CODE>,
+<CODE>int *</CODE>
+<CODE>UNOTRAN</CODE>,
+<CODE>int *</CODE>
+<CODE>EQUIL</CODE>,
+<CODE>int *</CODE>
+<CODE>ALIGN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdinfo</B>
+reads  the  startup  information for the various tests and
+transmits it to all processes.
+
+<H1>Arguments</H1>
+<PRE>
+TEST    (global output)               HPL_T_test *
+        On entry, TEST  points to a testing data structure.  On exit,
+        the fields of this data structure are initialized as follows:
+        TEST->outfp  specifies the output file where the results will
+        be printed.  It is only defined and used by  the process 0 of
+        the grid.  TEST->thrsh specifies the threshhold value for the
+        test ratio.  TEST->epsil is the relative machine precision of
+        the distributed computer.  Finally  the test counters, kfail,
+        kpass, kskip, ktest are initialized to zero.
+</PRE>
+<PRE>
+NS      (global output)               int *
+        On exit,  NS  specifies the number of different problem sizes
+        to be tested. NS is less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+N       (global output)               int *
+        On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+        the first NS entries of this array contain the  problem sizes
+        to run the code with.
+</PRE>
+<PRE>
+NBS     (global output)               int *
+        On exit,  NBS  specifies the number of different distribution
+        blocking factors to be tested. NBS must be less than or equal
+        to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+NB      (global output)               int *
+        On exit,  PMAPPIN  specifies the process mapping onto the no-
+        des of the  MPI machine configuration.  PMAPPIN  defaults  to
+        row-major ordering.
+</PRE>
+<PRE>
+PMAPPIN (global output)               HPL_T_ORDER *
+        On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NBS entries of this array contain the values of the
+        various distribution blocking factors, to run the code with.
+</PRE>
+<PRE>
+NPQS    (global output)               int *
+        On exit, NPQS  specifies the  number of different values that
+        can be used for P and Q, i.e., the number of process grids to
+        run  the  code with.  NPQS must be  less  than  or  equal  to
+        HPL_MAX_PARAM.
+</PRE>
+<PRE>
+P       (global output)               int *
+        On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NPQS entries of this array contain the values of P,
+        the number of process rows of the  NPQS grids to run the code
+        with.
+</PRE>
+<PRE>
+Q       (global output)               int *
+        On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NPQS entries of this array contain the values of Q,
+        the number of process columns of the  NPQS  grids to  run the
+        code with.
+</PRE>
+<PRE>
+NPFS    (global output)               int *
+        On exit, NPFS  specifies the  number of different values that
+        can be used for PF : the panel factorization algorithm to run
+        the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+PF      (global output)               HPL_T_FACT *
+        On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+        the first  NPFS  entries  of this array  contain  the various
+        panel factorization algorithms to run the code with.
+</PRE>
+<PRE>
+NBMS    (global output)               int *
+        On exit,  NBMS  specifies  the  number  of  various recursive
+        stopping criteria  to be tested.  NBMS  must be  less than or
+        equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+NBM     (global output)               int *
+        On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NBMS entries of this array contain the values
+        of the various recursive stopping criteria to be tested.
+</PRE>
+<PRE>
+NDVS    (global output)               int *
+        On exit,  NDVS  specifies  the number  of various numbers  of
+        panels in recursion to be tested.  NDVS is less than or equal
+        to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+NDV     (global output)               int *
+        On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NDVS entries of this array contain the values
+        of the various numbers of panels in recursion to be tested.
+</PRE>
+<PRE>
+NRFS    (global output)               int *
+        On exit, NRFS  specifies the  number of different values that
+        can be used for RF : the recursive factorization algorithm to
+        be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+RF      (global output)               HPL_T_FACT *
+        On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+        the first  NRFS  entries  of  this array contain  the various
+        recursive factorization algorithms to run the code with.
+</PRE>
+<PRE>
+NTPS    (global output)               int *
+        On exit, NTPS  specifies the  number of different values that
+        can be used for the  broadcast topologies  to be tested. NTPS
+        is less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+TP      (global output)               HPL_T_TOP *
+        On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+        the  first NTPS  entries of this  array  contain  the various
+        broadcast (along rows) topologies to run the code with.
+</PRE>
+<PRE>
+NDHS    (global output)               int *
+        On exit, NDHS  specifies the  number of different values that
+        can be used for the  lookahead depths to be  tested.  NDHS is
+        less than or equal to HPL_MAX_PARAM.
+</PRE>
+<PRE>
+DH      (global output)               int *
+        On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NDHS entries of this array contain the values
+        of lookahead depths to run the code with.  Such a value is at
+        least 0 (no-lookahead) or greater than zero.
+</PRE>
+<PRE>
+FSWAP   (global output)               HPL_T_SWAP *
+        On exit, FSWAP specifies the swapping algorithm to be used in
+        all tests.
+</PRE>
+<PRE>
+TSWAP   (global output)               int *
+        On exit,  TSWAP  specifies the swapping threshold as a number
+        of columns when the mixed swapping algorithm was chosen.
+</PRE>
+<PRE>
+L1NOTRA (global output)               int *
+        On exit, L1NOTRAN specifies whether the upper triangle of the
+        panels of columns  should  be stored  in  no-transposed  form
+        (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+</PRE>
+<PRE>
+UNOTRAN (global output)               int *
+        On exit, UNOTRAN  specifies whether the panels of rows should
+        be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+        form (UNOTRAN=0) during their broadcast.
+</PRE>
+<PRE>
+EQUIL   (global output)               int *
+        On exit,  EQUIL  specifies  whether  equilibration during the
+        swap-broadcast  of  the  panel of rows  should  be  performed
+        (EQUIL=1) or not (EQUIL=0).
+</PRE>
+<PRE>
+ALIGN   (global output)               int *
+        On exit,  ALIGN  specifies the alignment  of  the dynamically
+        allocated buffers in double precision words. ALIGN is greater
+        than zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pddriver.html">HPL_pddriver</A>,
+<A HREF="HPL_pdtest.html">HPL_pdtest</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlamch.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlamch.html
new file mode 100755
index 000000000..c1b51370a
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlamch.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlamch HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlamch</B> determines machine-specific arithmetic constants.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_pdlamch(</CODE>
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>,
+<CODE>const HPL_T_MACH</CODE>
+<CODE>CMACH</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlamch</B>
+determines  machine-specific  arithmetic  constants  such  as
+the relative machine precision (eps),  the safe minimum(sfmin) such that
+1/sfmin does not overflow, the base of the machine (base), the precision
+(prec),  the  number  of  (base)  digits in the  mantissa  (t),  whether
+rounding occurs in addition (rnd = 1.0 and 0.0 otherwise),  the  minimum
+exponent before  (gradual)  underflow (emin),  the  underflow  threshold
+(rmin)- base**(emin-1), the largest exponent before overflow (emax), the
+overflow threshold (rmax)  - (base**emax)*(1-eps).
+
+<H1>Arguments</H1>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+<PRE>
+CMACH   (global input)                const HPL_T_MACH
+        Specifies the value to be returned by HPL_pdlamch            
+           = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+           = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+           = HPL_MACH_BASE,  HPL_pdlamch := base                     
+           = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+           = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+           = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+           = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+           = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+           = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+           = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+         
+        where                                                        
+         
+           eps   = relative machine precision,                       
+           sfmin = safe minimum,                                     
+           base  = base of the machine,                              
+           prec  = eps*base,                                         
+           t     = number of digits in the mantissa,                 
+           rnd   = 1.0 if rounding occurs in addition,               
+           emin  = minimum exponent before underflow,                
+           rmin  = underflow threshold,                              
+           emax  = largest exponent before overflow,                 
+           rmax  = overflow threshold.
+</PRE>
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlange.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlange.html
new file mode 100755
index 000000000..0d1affc3d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlange.html
@@ -0,0 +1,88 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlange HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlange</B> Compute ||A||.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_pdlange(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>const HPL_T_NORM</CODE>
+<CODE>NORM</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>const double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlange</B>
+returns  the value of the one norm,  or the infinity norm,
+or the element of largest absolute value of a distributed matrix A:  
+ 
+ 
+   max(abs(A(i,j))) when NORM = HPL_NORM_A,                          
+   norm1(A),        when NORM = HPL_NORM_1,                          
+   normI(A),        when NORM = HPL_NORM_I,                          
+ 
+where norm1 denotes the one norm of a matrix (maximum column sum) and
+normI denotes  the infinity norm of a matrix (maximum row sum).  Note
+that max(abs(A(i,j))) is not a matrix norm.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+NORM    (global input)                const HPL_T_NORM
+        On entry,  NORM  specifies  the  value to be returned by this
+        function as described above.
+</PRE>
+<PRE>
+M       (global input)                const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+</PRE>
+<PRE>
+A       (local input)                 const double *
+        On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+        that contains the local pieces of the distributed matrix A.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaprnt.html">HPL_pdlaprnt</A>,
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaprnt.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaprnt.html
new file mode 100755
index 000000000..0ce810db0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaprnt.html
@@ -0,0 +1,94 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaprnt HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaprnt</B> Print a distributed matrix A.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaprnt(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int</CODE>
+<CODE>IAROW</CODE>,
+<CODE>const int</CODE>
+<CODE>IACOL</CODE>,
+<CODE>const char *</CODE>
+<CODE>CMATNM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaprnt</B>
+prints  to  standard  error a distributed matrix A. The
+local pieces of  A  are sent to the process of coordinates  (0,0)  in
+the grid and then printed.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+M       (global input)                const int
+        On entry,  M  specifies the number of rows of the coefficient
+        matrix A. M must be at least zero.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On  entry,   N   specifies  the  number  of  columns  of  the
+        coefficient matrix A. N must be at least zero.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+</PRE>
+<PRE>
+A       (local input)                 double *
+        On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+        This array contains the coefficient matrix to be printed.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+</PRE>
+<PRE>
+IAROW   (global input)                const int
+        On entry,  IAROW  specifies the row process coordinate owning
+        the  first row of A.  IAROW  must be  larger than or equal to
+        zero and less than NPROW.
+</PRE>
+<PRE>
+IACOL   (global input)                const int
+        On entry,  IACOL  specifies  the  column  process  coordinate
+        owning the  first column  of A. IACOL  must be larger than or
+        equal to zero and less than NPCOL.
+</PRE>
+<PRE>
+CMATNM  (global input)                const char *
+        On entry, CMATNM is the name of the matrix to be printed.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp00N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp00N.html
new file mode 100755
index 000000000..07279fdb0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp00N.html
@@ -0,0 +1,82 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaswp00N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaswp00N</B> Broadcast a column panel L and swap the row panel U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaswp00N(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaswp00N</B>
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+Bi-directional  exchange  is used to perform the  swap :: broadcast of
+the row  panel U at once, resulting in a lower number of messages than
+usual as well as a lower communication volume. With P process rows and
+assuming  bi-directional links,  the running time of this function can
+be approximated by:
+ 
+   log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  Mono
+directional links will double this communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be broadcast and swapped) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pipid.html">HPL_pipid</A>,
+<A HREF="HPL_plindx0.html">HPL_plindx0</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03N.html">HPL_dlaswp03N</A>,
+<A HREF="HPL_dlaswp04N.html">HPL_dlaswp04N</A>,
+<A HREF="HPL_dlaswp05N.html">HPL_dlaswp05N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp00T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp00T.html
new file mode 100755
index 000000000..08b8ea770
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp00T.html
@@ -0,0 +1,82 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaswp00T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaswp00T</B> Broadcast a column panel L and swap the row panel U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaswp00T(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaswp00T</B>
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+Bi-directional  exchange  is used to perform the  swap :: broadcast of
+the row  panel U at once, resulting in a lower number of messages than
+usual as well as a lower communication volume. With P process rows and
+assuming  bi-directional links,  the running time of this function can
+be approximated by:
+ 
+   log_2(P) * (lat + NB*LocQ(N) / bdwth)
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  Mono
+directional links will double this communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be broadcast and swapped) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>,
+<A HREF="HPL_pipid.html">HPL_pipid</A>,
+<A HREF="HPL_plindx0.html">HPL_plindx0</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp02N.html">HPL_dlaswp02N</A>,
+<A HREF="HPL_dlaswp03T.html">HPL_dlaswp03T</A>,
+<A HREF="HPL_dlaswp04T.html">HPL_dlaswp04T</A>,
+<A HREF="HPL_dlaswp05T.html">HPL_dlaswp05T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp01N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp01N.html
new file mode 100755
index 000000000..2d4772fda
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp01N.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaswp01N HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaswp01N</B> Broadcast a column panel L and swap the row panel U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaswp01N(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaswp01N</B>
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+row panel U at once,  resulting in a minimal communication volume  and
+a "very good"  use of the connectivity if available.  With  P  process
+rows  and  assuming  bi-directional links,  the  running time  of this
+function can be approximated by:
+ 
+   (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  K is
+a constant in (2,3] that depends on the achieved bandwidth  during  a
+simultaneous  message exchange  between two processes.  An  empirical
+optimistic value of K is typically 2.4.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
+<A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
+<A HREF="HPL_pipid.html">HPL_pipid</A>,
+<A HREF="HPL_plindx1.html">HPL_plindx1</A>,
+<A HREF="HPL_plindx10.html">HPL_plindx10</A>,
+<A HREF="HPL_spreadN.html">HPL_spreadN</A>,
+<A HREF="HPL_equil.html">HPL_equil</A>,
+<A HREF="HPL_rollN.html">HPL_rollN</A>,
+<A HREF="HPL_dlaswp00N.html">HPL_dlaswp00N</A>,
+<A HREF="HPL_dlaswp01N.html">HPL_dlaswp01N</A>,
+<A HREF="HPL_dlaswp06N.html">HPL_dlaswp06N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp01T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp01T.html
new file mode 100755
index 000000000..f6a5d8c4b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp01T.html
@@ -0,0 +1,86 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdlaswp01T HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdlaswp01T</B> Broadcast a column panel L and swap the row panel U.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdlaswp01T(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdlaswp01T</B>
+applies the  NB  row interchanges to  NN columns of the
+trailing submatrix and broadcast a column panel.
+ 
+A "Spread then roll" algorithm performs  the swap :: broadcast  of the
+row panel U at once,  resulting in a minimal communication volume  and
+a "very good"  use of the connectivity if available.  With  P  process
+rows  and  assuming  bi-directional links,  the  running time  of this
+function can be approximated by:
+ 
+   (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
+ 
+where  NB  is the number of rows of the row panel U,  N is the global
+number of columns being updated,  lat and bdwth  are the latency  and
+bandwidth  of  the  network  for  double  precision real words.  K is
+a constant in (2,3] that depends on the achieved bandwidth  during  a
+simultaneous  message exchange  between two processes.  An  empirical
+optimistic value of K is typically 2.4.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
+<A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>,
+<A HREF="HPL_pipid.html">HPL_pipid</A>,
+<A HREF="HPL_plindx1.html">HPL_plindx1</A>,
+<A HREF="HPL_plindx10.html">HPL_plindx10</A>,
+<A HREF="HPL_spreadT.html">HPL_spreadT</A>,
+<A HREF="HPL_equil.html">HPL_equil</A>,
+<A HREF="HPL_rollT.html">HPL_rollT</A>,
+<A HREF="HPL_dlaswp10N.html">HPL_dlaswp10N</A>,
+<A HREF="HPL_dlaswp01T.html">HPL_dlaswp01T</A>,
+<A HREF="HPL_dlaswp06T.html">HPL_dlaswp06T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdmatgen.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdmatgen.html
new file mode 100755
index 000000000..28fb95509
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdmatgen.html
@@ -0,0 +1,87 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdmatgen HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdmatgen</B> Parallel random matrix generator.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdmatgen(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>,
+<CODE>double *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>LDA</CODE>,
+<CODE>const int</CODE>
+<CODE>ISEED</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdmatgen</B>
+generates (or regenerates) a parallel random matrix A.
+ 
+The  pseudo-random  generator uses the linear congruential algorithm:
+X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
+Programming, Knuth 1973, Vol. 2.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+M       (global input)                const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+<PRE>
+A       (local output)                double *
+        On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+        On exit, this array contains the coefficients of the randomly
+        generated matrix.
+</PRE>
+<PRE>
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+</PRE>
+<PRE>
+ISEED   (global input)                const int
+        On entry, ISEED  specifies  the  seed  number to generate the
+        matrix A. ISEED must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdmxswp.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdmxswp.html
new file mode 100755
index 000000000..c11d2b2da
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdmxswp.html
@@ -0,0 +1,96 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdmxswp HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdmxswp</B> swaps and broacast the pivot row.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdmxswp(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>II</CODE>,
+<CODE>const int</CODE>
+<CODE>JJ</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdmxswp</B>
+swaps  and  broadcasts  the  absolute value max row using
+bi-directional exchange.  The buffer is partially set by HPL_dlocmax.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by
+ 
+   log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth )
+ 
+where  lat and bdwth are the latency and bandwidth of the network for
+double precision real elements.  Communication  only  occurs  in  one
+process  column. Mono-directional links  will cause the communication
+cost to double.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of the matrix
+        column on which this function operates.
+</PRE>
+<PRE>
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+</PRE>
+<PRE>
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        It  is assumed that  HPL_dlocmax  was called  prior  to  this
+        routine to  initialize  the first four entries of this array.
+        On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+        Note that this is also the  JJth  row  (or column) of L1. The
+        remaining part is used as a temporary array.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpancrN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpancrN.html
new file mode 100755
index 000000000..663d2e266
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpancrN.html
@@ -0,0 +1,100 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpancrN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpancrN</B> Crout panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpancrN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpancrN</B>
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel  A using the Crout variant of the  usual
+one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+of the panel is stored in no-transpose form (i.e. just like the input
+matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and gam2-3 is  an  estimate  of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpancrT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpancrT.html
new file mode 100755
index 000000000..0e1490430
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpancrT.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpancrT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpancrT</B> Crout panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpancrT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpancrT</B>
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel  A using the Crout variant of the  usual
+one-dimensional algorithm.  The lower triangular N0-by-N0 upper block
+of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is an  estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_disp.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_disp.html
new file mode 100755
index 000000000..cb78fa4be
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_disp.html
@@ -0,0 +1,38 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanel_disp HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanel_disp</B> Deallocate a panel data structure.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_pdpanel_disp(</CODE>
+<CODE>HPL_T_panel * *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanel_disp</B>
+deallocates  the  panel  structure  and  resources  and
+stores the error code returned by the panel factorization.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel * *
+        On entry,  PANEL  points  to  the  address  of the panel data
+        structure to be deallocated.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
+<A HREF="HPL_pdpanel_init.html">HPL_pdpanel_init</A>,
+<A HREF="HPL_pdpanel_free.html">HPL_pdpanel_free</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_free.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_free.html
new file mode 100755
index 000000000..d33e5e400
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_free.html
@@ -0,0 +1,38 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanel_free HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanel_free</B> Deallocate the panel ressources.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_pdpanel_free(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanel_free</B>
+deallocates  the panel resources  and  stores the error
+code returned by the panel factorization.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points  to  the  panel data  structure from
+        which the resources should be deallocated.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
+<A HREF="HPL_pdpanel_init.html">HPL_pdpanel_init</A>,
+<A HREF="HPL_pdpanel_disp.html">HPL_pdpanel_disp</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_init.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_init.html
new file mode 100755
index 000000000..2d105354f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_init.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanel_init HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanel_init</B> Initialize the panel resources.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanel_init(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>JB</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>IA</CODE>,
+<CODE>const int</CODE>
+<CODE>JA</CODE>,
+<CODE>const int</CODE>
+<CODE>TAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanel_init</B>
+initializes a panel data structure.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry, M specifies the global number of rows of the panel.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the  global number of columns of the
+        panel and trailing submatrix. N must be at least zero.
+</PRE>
+<PRE>
+JB      (global input)                const int
+        On entry, JB specifies is the number of columns of the panel.
+        JB must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+<PRE>
+IA      (global input)                const int
+        On entry,  IA  is  the global row index identifying the panel
+        and trailing submatrix. IA must be at least zero.
+</PRE>
+<PRE>
+JA      (global input)                const int
+        On entry, JA is the global column index identifying the panel
+        and trailing submatrix. JA must be at least zero.
+</PRE>
+<PRE>
+TAG     (global input)                const int
+        On entry, TAG is the row broadcast message id.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
+<A HREF="HPL_pdpanel_disp.html">HPL_pdpanel_disp</A>,
+<A HREF="HPL_pdpanel_free.html">HPL_pdpanel_free</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_new.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_new.html
new file mode 100755
index 000000000..1b3029ecb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_new.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanel_new HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanel_new</B> Create a panel data structure.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanel_new(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>JB</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>A</CODE>,
+<CODE>const int</CODE>
+<CODE>IA</CODE>,
+<CODE>const int</CODE>
+<CODE>JA</CODE>,
+<CODE>const int</CODE>
+<CODE>TAG</CODE>,
+<CODE>HPL_T_panel * *</CODE>
+<CODE>PANEL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanel_new</B>
+creates and initializes a panel data structure.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry, M specifies the global number of rows of the panel.
+        M must be at least zero.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N  specifies  the  global number of columns of the
+        panel and trailing submatrix. N must be at least zero.
+</PRE>
+<PRE>
+JB      (global input)                const int
+        On entry, JB specifies is the number of columns of the panel.
+        JB must be at least zero.
+</PRE>
+<PRE>
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+</PRE>
+<PRE>
+IA      (global input)                const int
+        On entry,  IA  is  the global row index identifying the panel
+        and trailing submatrix. IA must be at least zero.
+</PRE>
+<PRE>
+JA      (global input)                const int
+        On entry, JA is the global column index identifying the panel
+        and trailing submatrix. JA must be at least zero.
+</PRE>
+<PRE>
+TAG     (global input)                const int
+        On entry, TAG is the row broadcast message id.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel * *
+        On entry,  PANEL  points  to  the  address  of the panel data
+        structure to create and initialize.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
+<A HREF="HPL_pdpanel_init.html">HPL_pdpanel_init</A>,
+<A HREF="HPL_pdpanel_disp.html">HPL_pdpanel_disp</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanllN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanllN.html
new file mode 100755
index 000000000..386815fd2
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanllN.html
@@ -0,0 +1,100 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanllN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanllN</B> Left-looking panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanllN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanllN</B>
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel A  using the Left-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in no-transpose form (i.e. just like the
+input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanllT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanllT.html
new file mode 100755
index 000000000..04307e823
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanllT.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanllT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanllT</B> Left-looking panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanllT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanllT</B>
+factorizes  a panel of columns that is a sub-array of a
+larger one-dimensional panel A  using the Left-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanrlN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanrlN.html
new file mode 100755
index 000000000..8d705c63c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanrlN.html
@@ -0,0 +1,100 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanrlN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanrlN</B> Right-looking panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanrlN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanrlN</B>
+factorizes  a panel of columns  that is a sub-array of a
+larger one-dimensional panel A using the Right-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in no-transpose form (i.e. just like the
+input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanrlT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanrlT.html
new file mode 100755
index 000000000..af458e7a1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanrlT.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdpanrlT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdpanrlT</B> Right-looking panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdpanrlT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdpanrlT</B>
+factorizes  a panel of columns  that is a sub-array of a
+larger one-dimensional panel A using the Right-looking variant of the
+usual one-dimensional algorithm.  The lower triangular N0-by-N0 upper
+block of the panel is stored in transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+ 
+Note that  one  iteration of the the main loop is unrolled. The local
+computation of the absolute value max of the next column is performed
+just after its update by the current column. This allows to bring the
+current column only  once through  cache at each  step.  The  current
+implementation  does not perform  any blocking  for  this sequence of
+BLAS operations, however the design allows for plugging in an optimal
+(machine-specific) specialized  BLAS-like kernel.  This idea has been
+suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpancrN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpancrN.html
new file mode 100755
index 000000000..9169c48cc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpancrN.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpancrN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpancrN</B> Crout recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpancrN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpancrN</B>
+HPL_pdrpancrN recursively  factorizes  a panel of columns  using  the
+recursive  Crout  variant of the usual one-dimensional algorithm. The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpancrT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpancrT.html
new file mode 100755
index 000000000..cc9047c3c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpancrT.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpancrT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpancrT</B> Crout recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpancrT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpancrT</B>
+recursively  factorizes  a panel  of columns using  the
+recursive  Crout  variant  of  the  usual one-dimensional  algorithm.
+The lower triangular N0-by-N0  upper block of the panel  is stored in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanllN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanllN.html
new file mode 100755
index 000000000..bf16e6009
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanllN.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpanllN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpanllN</B> Left-looking recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpanllN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpanllN</B>
+recursively  factorizes  a panel  of columns using  the
+recursive Left-looking variant of the one-dimensional algorithm.  The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanllT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanllT.html
new file mode 100755
index 000000000..9904fb326
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanllT.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpanllT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpanllT</B> Left-looking recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpanllT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpanllT</B>
+recursively  factorizes  a panel of columns  using  the
+recursive Left-looking variant of the one-dimensional algorithm.  The
+lower  triangular  N0-by-N0  upper block  of  the panel  is stored in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words,  and  gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanrlN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanrlN.html
new file mode 100755
index 000000000..9758c0722
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanrlN.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpanrlN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpanrlN</B> Right-looking recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpanrlN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpanrlN</B>
+recursively  factorizes  a panel of columns  using  the
+recursive Right-looking variant of the one-dimensional algorithm. The
+lower triangular  N0-by-N0  upper block  of  the  panel  is stored in
+no-transpose form (i.e. just like the input matrix itself).
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and   gam2-3  is an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlT.html">HPL_pdrpanrlT</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanrlT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanrlT.html
new file mode 100755
index 000000000..ed48a815d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanrlT.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdrpanrlT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdrpanrlT</B> Right-looking recursive panel factorization.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdrpanrlT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>M</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>ICOFF</CODE>,
+<CODE>double *</CODE>
+<CODE>WORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdrpanrlT</B>
+recursively  factorizes  a panel of columns  using  the
+recursive Right-looking variant of the one-dimensional algorithm. The
+lower  triangular  N0-by-N0  upper  block of the panel  is stored  in
+transpose form.
+ 
+Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
+operations  at once  for one column in the panel.  This  results in a
+lower number of slightly larger  messages than usual.  On P processes
+and assuming bi-directional links,  the running time of this function
+can be approximated by (when N is equal to N0):                      
+ 
+   N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
+   N0^2 * ( M - N0/3 ) * gam2-3
+ 
+where M is the local number of rows of  the panel, lat and bdwth  are
+the latency and bandwidth of the network for  double  precision  real
+words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
+rate of execution. The  recursive  algorithm  allows indeed to almost
+achieve  Level 3 BLAS  performance  in the panel factorization.  On a
+large  number of modern machines,  this  operation is however latency
+bound,  meaning  that its cost can  be estimated  by only the latency
+portion N0 * log_2(P) * lat.  Mono-directional links will double this
+communication cost.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+</PRE>
+<PRE>
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+</PRE>
+<PRE>
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_dlocmax.html">HPL_dlocmax</A>,
+<A HREF="HPL_dlocswpN.html">HPL_dlocswpN</A>,
+<A HREF="HPL_dlocswpT.html">HPL_dlocswpT</A>,
+<A HREF="HPL_pdmxswp.html">HPL_pdmxswp</A>,
+<A HREF="HPL_pdpancrN.html">HPL_pdpancrN</A>,
+<A HREF="HPL_pdpancrT.html">HPL_pdpancrT</A>,
+<A HREF="HPL_pdpanllN.html">HPL_pdpanllN</A>,
+<A HREF="HPL_pdpanllT.html">HPL_pdpanllT</A>,
+<A HREF="HPL_pdpanrlN.html">HPL_pdpanrlN</A>,
+<A HREF="HPL_pdpanrlT.html">HPL_pdpanrlT</A>,
+<A HREF="HPL_pdrpancrN.html">HPL_pdrpancrN</A>,
+<A HREF="HPL_pdrpancrT.html">HPL_pdrpancrT</A>,
+<A HREF="HPL_pdrpanllN.html">HPL_pdrpanllN</A>,
+<A HREF="HPL_pdrpanllT.html">HPL_pdrpanllT</A>,
+<A HREF="HPL_pdrpanrlN.html">HPL_pdrpanrlN</A>,
+<A HREF="HPL_pdfact.html">HPL_pdfact</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdtest.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdtest.html
new file mode 100755
index 000000000..1c11c34d7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdtest.html
@@ -0,0 +1,81 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdtest HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdtest</B> Perform one test.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdtest(</CODE>
+<CODE>HPL_T_test *</CODE>
+<CODE>TEST</CODE>,
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_palg *</CODE>
+<CODE>ALGO</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const int</CODE>
+<CODE>NB</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdtest</B>
+performs  one  test  given a set of parameters such as the
+process grid, the  problem size, the distribution blocking factor ...
+This function generates  the data, calls  and times the linear system
+solver,  checks  the  accuracy  of the  obtained vector solution  and
+writes this information to the file pointed to by TEST->outfp.
+
+<H1>Arguments</H1>
+<PRE>
+TEST    (global input)                HPL_T_test *
+        On entry,  TEST  points  to a testing data structure:  outfp
+        specifies the output file where the results will be printed.
+        It is only defined and used by the process  0  of the  grid.
+        thrsh  specifies  the  threshhold value  for the test ratio.
+        Concretely, a test is declared "PASSED"  if and only if  the
+        following inequality is satisfied:
+        ||Ax-b||_oo / ( epsil *
+                        ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+                         N )  < thrsh.
+        epsil  is the  relative machine precision of the distributed
+        computer. Finally the test counters, kfail, kpass, kskip and
+        ktest are updated as follows:  if the test passes,  kpass is
+        incremented by one;  if the test fails, kfail is incremented
+        by one; if the test is skipped, kskip is incremented by one.
+        ktest is left unchanged.
+</PRE>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters to be used for this test.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N specifies the order of the coefficient matrix A.
+        N must be at least zero.
+</PRE>
+<PRE>
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pddriver.html">HPL_pddriver</A>,
+<A HREF="HPL_pdinfo.html">HPL_pdinfo</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdtrsv.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdtrsv.html
new file mode 100755
index 000000000..0bb182dc9
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdtrsv.html
@@ -0,0 +1,64 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdtrsv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdtrsv</B> Solve triu( A ) x = b.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdtrsv(</CODE>
+<CODE>HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>HPL_T_pmat *</CODE>
+<CODE>AMAT</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdtrsv</B>
+solves an upper triangular system of linear equations.
+ 
+The rhs is the last column of the N by N+1 matrix A. The solve starts
+in the process  column owning the  Nth  column of A, so the rhs b may
+need to be moved one process column to the left at the beginning. The
+routine therefore needs  a column  vector in every process column but
+the one owning  b. The result is  replicated in all process rows, and
+returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes.
+ 
+The algorithm uses decreasing one-ring broadcast in process rows  and
+columns  implemented  in terms of  synchronous communication point to
+point primitives.  The  lookahead of depth 1 is used to minimize  the
+critical path. This entire operation is essentially ``latency'' bound
+and an estimate of its running time is given by:
+ 
+   (move rhs) lat + N / ( P bdwth ) +            
+   (solve)    ((N / NB)-1) 2 (lat + NB / bdwth) +
+              gam2 N^2 / ( P Q ),                
+ 
+where  gam2   is an estimate of the   Level 2 BLAS rate of execution.
+There are  N / NB  diagonal blocks. One must exchange  2  messages of
+length NB to compute the next  NB  entries of the vector solution, as
+well as performing a total of N^2 floating point operations.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+AMAT    (local input/output)          HPL_T_pmat *
+        On entry,  AMAT  points  to the data structure containing the
+        local array information.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateNN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateNN.html
new file mode 100755
index 000000000..b77cddbce
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateNN.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdupdateNN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdupdateNN</B> Broadcast a panel and update the trailing submatrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdupdateNN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdupdateNN</B>
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateNT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateNT.html
new file mode 100755
index 000000000..4ecb1f687
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateNT.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdupdateNT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdupdateNT</B> Broadcast a panel and update the trailing submatrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdupdateNT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdupdateNT</B>
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateTN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateTN.html
new file mode 100755
index 000000000..ae735bf84
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateTN.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdupdateTN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdupdateTN</B> Broadcast a panel and update the trailing submatrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdupdateTN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdupdateTN</B>
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateTT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateTT.html
new file mode 100755
index 000000000..7c69f8828
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateTT.html
@@ -0,0 +1,65 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pdupdateTT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pdupdateTT</B> Broadcast a panel and update the trailing submatrix.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pdupdateTT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>NN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pdupdateTT</B>
+broadcast - forward the panel PBCST and simultaneously
+applies the row interchanges and updates part of the trailing  (using
+the panel PANEL) submatrix.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+</PRE>
+<PRE>
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
+<A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
+<A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
+<A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_perm.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_perm.html
new file mode 100755
index 000000000..9312eb4eb
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_perm.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_perm HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_perm</B> Combine 2 index arrays - Generate the permutation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_perm(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXAU</CODE>,
+<CODE>int *</CODE>
+<CODE>IWORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_perm</B>
+combines  two  index  arrays  and generate the corresponding
+permutation. First, this function computes the inverse of LINDXA, and
+then combine it with LINDXAU.  Second, in order to be able to perform
+the permutation in place,  LINDXAU  is overwritten by the sequence of
+permutation  producing  the  same result.  What we ultimately want to
+achieve is:  U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the
+call to this function,  this in place permutation can be performed by
+for i in [0..N) swap U[i] with U[LINDXAU[i]].
+
+<H1>Arguments</H1>
+<PRE>
+N       (global input)                const int
+        On entry,  N  specifies the length of the arrays  LINDXA  and
+        LINDXAU. N should be at least zero.
+</PRE>
+<PRE>
+LINDXA  (global input/output)         int *
+        On entry,  LINDXA  is an array of dimension N  containing the
+        source indexes. On exit,  LINDXA  contains the combined index
+        array.
+</PRE>
+<PRE>
+LINDXAU (global input/output)         int *
+        On entry,  LINDXAU is an array of dimension N  containing the
+        target indexes.  On exit,  LINDXAU  contains  the sequence of
+        permutation,  that  should be applied  in increasing order to
+        permute the underlying array U in place.
+</PRE>
+<PRE>
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension N.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_plindx1.html">HPL_plindx1</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pipid.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pipid.html
new file mode 100755
index 000000000..e6deb3d93
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pipid.html
@@ -0,0 +1,95 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pipid HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pipid</B> Simplify the pivot vector.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pipid(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>int *</CODE>
+<CODE>K</CODE>,
+<CODE>int *</CODE>
+<CODE>IPID</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pipid</B>
+computes an array  IPID  that contains the source and final
+destination  of  matrix rows  resulting  from  the  application  of N
+interchanges  as computed by the  LU  factorization  with row partial
+pivoting. The array IPID is such that the row of global index IPID(i)
+should be mapped onto the row of global index IPID(i+1). Note that we
+cannot really know the length of IPID a priori. However, we know that
+this array is at least 2*N long,  since  there are N rows to swap and
+broadcast. The length of this array  must be smaller than or equal to
+4*N, since every row is swapped with at most a single distinct remote
+row. The algorithm constructing  IPID  goes as follows: Let IA be the
+global index of the first row to be swapped.
+ 
+For every row src IA + i with i in [0..N) to be swapped with row  dst
+such that dst is given by DPIV[i]:
+ 
+Is row  src  the destination  of a previous row of the current block,
+that is, is there k odd such that IPID(k) is equal to src ?
+    Yes:  update  this destination  with dst.  For  example,  if  the
+pivot array is  (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5,
+we swap in fact row 0 and 5,  i.e.,  row 0 goes to 5 and not 2  as it
+was thought so far ...
+    No :  add  the pair (src,dst) at the end of IPID; row src has not
+been moved yet.
+ 
+Is row  dst  different  from src the destination of a previous row of
+the current block, i.e., is there k odd such that IPID(k) is equal to
+dst ?
+    Yes:  update  IPID(k) with src.  For example,  if the pivot array
+is (0,5)(1,1)(2,5) ... , then when  we swap rows  2 and 5, we swap in
+fact row 2 and 0,  i.e.,  row 0 goes to 2 and not 5 as it was thought
+so far ...
+    No : add  the  pair (dst,src) at the end of IPID; row dst has not
+been moved yet.
+ 
+Note that when src is equal to dst, the pair (dst,src)  should not be
+added to  IPID  in  order  to avoid duplicated entries in this array.
+During  the construction of the array  IPID,  we  make  sure that the
+first N entries are such that IPID(k) with k odd is equal to  IA+k/2.
+For k in  [0..K/2),  the  row  of global index  IPID(2*k)  should  be
+mapped onto the row of global index IPID(2*k+1).
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+K       (global output)               int *
+        On exit, K specifies the number of entries in  IPID.  K is at
+        least 2*N, and at most 4*N.
+</PRE>
+<PRE>
+IPID    (global output)               int *
+        On entry, IPID is an array of length 4*N.  On exit, the first
+        K entries of that array contain the src and final destination
+        resulting  from  the  application of the  N  interchanges  as
+        specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+        stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+        in [0..N)
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx0.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx0.html
new file mode 100755
index 000000000..f3dbbcdea
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx0.html
@@ -0,0 +1,187 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_plindx0 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_plindx0</B> Compute local swapping index arrays.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_plindx0(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>K</CODE>,
+<CODE>int *</CODE>
+<CODE>IPID</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXAU</CODE>,
+<CODE>int *</CODE>
+<CODE>LLEN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_plindx0</B>
+computes two local arrays  LINDXA and  LINDXAU  containing
+the  local  source and final destination position  resulting from the
+application of row interchanges.
+ 
+On entry, the array  IPID  of length K is such that the row of global
+index  IPID(i)  should be mapped onto row of global index  IPID(i+1).
+Let  IA  be the global index of the first row to be swapped. For k in
+[0..K/2), the row of global index IPID(2*k) should be mapped onto the
+row of global index  IPID(2*k+1).  The question then, is to determine
+which rows should ultimately be part of U.
+ 
+First, some rows of the process ICURROW  may be swapped locally.  One
+of this row belongs to U, the other one belongs to my local  piece of
+A.  The other  rows of the current block are swapped with remote rows
+and are thus not part of U. These rows however should be sent  along,
+and  grabbed by the other processes  as we  progress in the  exchange
+phase.
+ 
+So, assume that I am  ICURROW  and consider a row of index  IPID(2*i)
+that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less
+than N,  this row is locally swapped and should be copied into  U  at
+the position IPID(2*i+1) - IA. No row will be exchanged for this one.
+If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be
+locally copied into my local piece of A at the position corresponding
+to the row of global index IPID(2*i+1).
+ 
+If the process  ICURROW does not own  IPID(2*i+1), then row IPID(2*i)
+is to be swapped away and strictly speaking does not belong to U, but
+to  A  remotely.  Since this  process will however send this array U,
+this row is  copied into  U, exactly where the row IPID(2*i+1) should
+go. For this, we search IPID for k1, such that IPID(2*k1) is equal to
+IPID(2*i+1); and row  IPID(2*i) is to be copied in U  at the position
+IPID(2*k1+1)-IA.
+ 
+It is thus  important to put the rows that go into U, i.e., such that
+IPID(2*i+1) - IA is less than N at the begining of the array IPID. By
+doing so,  U  is formed, and the local copy  is performed in just one
+sweep.
+ 
+Two lists  LINDXA  and  LINDXAU are built.  LINDXA contains the local
+index of the rows I have that should be copied. LINDXAU  contains the
+local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A
+is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
+of A should be locally copied into A(-LINDXAU(k),:).  In the  process
+ICURROW, the initial packing algorithm proceeds as follows.
+ 
+  for all entries in IPID,
+     if IPID(2*i) is in ICURROW,
+        if IPID(2*i+1) is in ICURROW,
+           if( IPID(2*i+1) - IA < N )
+            save corresponding local position
+            of this row (LINDXA);
+            save local position (LINDXAU) in U
+            where this row goes;
+            [copy row IPID(2*i) in U at position
+            IPID(2*i+1)-IA; ];
+           else
+            save corresponding local position of
+            this row (LINDXA);
+            save local position (-LINDXAU) in A
+            where this row goes;
+            [copy row IPID(2*i) in my piece of A
+            at IPID(2*i+1);]
+           end if
+        else
+           find k1 such that IPID(2*k1) = IPID(2*i+1);
+           copy row IPID(2*i) in U at position
+           IPID(2*k1+1)-IA;
+           save corresponding local position of this
+           row (LINDXA);
+           save local position (LINDXAU) in U where
+           this row goes;
+        end if
+     end if
+  end for
+ 
+Second, if I am not the current row process  ICURROW, all source rows
+in IPID that I own are part of U. Indeed,  they  are swapped with one
+row  of  the  current  block  of rows,  and  the  main  factorization
+algorithm proceeds one row after each other.  The processes different
+from ICURROW,  should  exchange and accumulate  those rows until they
+receive some data previously owned by the process ICURROW.
+ 
+In processes different from  ICURROW,  the  initial packing algorithm
+proceeds as follows.  Consider a row of global index IPID(2*i) that I
+own. When I will be receiving data previously owned by ICURROW, i.e.,
+U, row IPID(2*i) should  replace the row in U at pos. IPID(2*i+1)-IA,
+and  this particular row of U should be first copied into my piece of
+A, at A(il,:),  where  il is the  local row  index  corresponding  to
+IPID(2*i). Now,initially, this row will be packed into workspace, say
+as the kth row of  that  work array.  The  following  algorithm  sets
+LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row
+should be copied. LINDXA(k) stores the local index in  A  where  this
+row of U should be copied, i.e il.
+ 
+  for all entries in IPID,
+     if IPID(2*i) is not in ICURROW,
+        copy row IPID(2*i) in work array;
+        save corresponding local position
+        of this row (LINDXA);
+        save position (LINDXAU) in U where
+        this row should be copied;
+     end if
+  end for
+ 
+Since we are at it, we also globally figure  out  how many rows every
+process has. That is necessary, because it would rather be cumbersome
+to  figure it on  the fly  during the  bi-directional exchange phase.
+This information is kept in the array  LLEN  of size NPROW. Also note
+that the arrays LINDXA and LINDXAU are of max length equal to 2*N.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+</PRE>
+<PRE>
+IPID    (global input)                int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+</PRE>
+<PRE>
+LINDXA  (local output)                int *
+        On entry, LINDXA  is an array of dimension 2*N. On exit, this
+        array contains the local indexes of the rows of A I have that
+        should be copied into U.
+</PRE>
+<PRE>
+LINDXAU (local output)                int *
+        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+        array contains  the local destination  information encoded as
+        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+        of A should be locally copied into A(-LINDXAU(k),:).
+</PRE>
+<PRE>
+LLEN    (global output)               int *
+        On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+        contains how many rows every process has.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx1.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx1.html
new file mode 100755
index 000000000..0a49ede0b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx1.html
@@ -0,0 +1,130 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_plindx1 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_plindx1</B> Compute local swapping index arrays.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_plindx1(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>K</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPID</CODE>,
+<CODE>int *</CODE>
+<CODE>IPA</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXA</CODE>,
+<CODE>int *</CODE>
+<CODE>LINDXAU</CODE>,
+<CODE>int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAPM1</CODE>,
+<CODE>int *</CODE>
+<CODE>PERMU</CODE>,
+<CODE>int *</CODE>
+<CODE>IWORK</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_plindx1</B>
+computes two local arrays  LINDXA and  LINDXAU  containing
+the  local  source and final destination position  resulting from the
+application of row interchanges.  In addition, this function computes
+three arrays IPLEN, IPMAP and IPMAPM1  that contain  the  logarithmic
+mapping information for the spreading phase.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+</PRE>
+<PRE>
+IPID    (global input)                const int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+</PRE>
+<PRE>
+IPA     (global output)               int *
+        On exit,  IPA  specifies  the number of rows that the current
+        process row has that either belong to U  or should be swapped
+        with remote rows of A.
+</PRE>
+<PRE>
+LINDXA  (global output)               int *
+        On entry, LINDXA  is an array of dimension 2*N. On exit, this
+        array contains the local indexes of the rows of A I have that
+        should be copied into U.
+</PRE>
+<PRE>
+LINDXAU (global output)               int *
+        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+        array contains  the local destination  information encoded as
+        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+        of A should be locally copied into A(-LINDXAU(k),:).
+</PRE>
+<PRE>
+IPLEN   (global output)               int *
+        On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+        this array is such that  IPLEN[i]  is the number of rows of A
+        in  the  processes  before  process  IPMAP[i]  after the sort
+        with the convention that IPLEN[nprow]  is the total number of
+        rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+        local number of rows of A that should be moved to the process
+        IPMAP[i]. IPLEN is such that the number of rows of the source
+        process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+        remaining  entries  of  this  array  are  sorted  so that the
+        quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+</PRE>
+<PRE>
+IPMAP   (global output)               int *
+        On entry, IPMAP is an array of dimension NPROW. On exit, this
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myrow] is the corresponding sorted process
+        coordinate.
+</PRE>
+<PRE>
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROCS)
+</PRE>
+<PRE>
+PERMU   (global output)               int *
+        On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+        contains  a sequence of permutations,  that should be applied
+        in increasing order to permute in place the row panel U.
+</PRE>
+<PRE>
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension 2*JB.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx10.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx10.html
new file mode 100755
index 000000000..fbfd6be2f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx10.html
@@ -0,0 +1,87 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_plindx10 HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_plindx10</B> Compute the logarithmic maps for the spreading.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_plindx10(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>K</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPID</CODE>,
+<CODE>int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_plindx10</B>
+computes  three arrays  IPLEN,  IPMAP  and  IPMAPM1  that
+contain the logarithmic mapping information for the spreading phase.
+
+<H1>Arguments</H1>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+</PRE>
+<PRE>
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+</PRE>
+<PRE>
+IPID    (global input)                const int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+</PRE>
+<PRE>
+IPLEN   (global output)               int *
+        On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+        this array is such that  IPLEN[i]  is the number of rows of A
+        in the processes  before process IMAP[i] after the sort, with
+        the convention that IPLEN[nprow] is the total number of rows.
+        In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+        rows of  A  that should be moved for each process.  IPLEN  is
+        such that the number of rows of the source process row can be
+        computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+        this  array are sorted  so  that  the quantities IPLEN[i+1] -
+        IPLEN[i] are logarithmically sorted.
+</PRE>
+<PRE>
+IPMAP   (global output)               int *
+        On entry, IPMAP is an array of dimension NPROW. On exit, this
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myrow] is the corresponding sorted process
+        coordinate.
+</PRE>
+<PRE>
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROW)
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp00N.html">HPL_pdlaswp00N</A>,
+<A HREF="HPL_pdlaswp00T.html">HPL_pdlaswp00T</A>,
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>,
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pnum.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pnum.html
new file mode 100755
index 000000000..8bedc3016
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pnum.html
@@ -0,0 +1,54 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pnum HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pnum</B> Rank determination.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_pnum(</CODE>
+<CODE>const HPL_T_grid *</CODE>
+<CODE>GRID</CODE>,
+<CODE>const int</CODE>
+<CODE>MYROW</CODE>,
+<CODE>const int</CODE>
+<CODE>MYCOL</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pnum</B>
+determines  the  rank  of a  process  as a function  of  its
+coordinates in the grid.
+
+<H1>Arguments</H1>
+<PRE>
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+</PRE>
+<PRE>
+MYROW   (local input)                 const int
+        On entry,  MYROW  specifies the row coordinate of the process
+        whose rank is to be determined. MYROW must be greater than or
+        equal to zero and less than NPROW.
+</PRE>
+<PRE>
+MYCOL   (local input)                 const int
+        On entry,  MYCOL  specifies  the  column  coordinate  of  the
+        process whose rank is to be determined. MYCOL must be greater
+        than or equal to zero and less than NPCOL.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_grid_init.html">HPL_grid_init</A>,
+<A HREF="HPL_grid_info.html">HPL_grid_info</A>,
+<A HREF="HPL_grid_exit.html">HPL_grid_exit</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer.html
new file mode 100755
index 000000000..abef45946
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer.html
@@ -0,0 +1,49 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_ptimer HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_ptimer</B> Timer facility.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_ptimer(</CODE>
+<CODE>const int</CODE>
+<CODE>I</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_ptimer</B>
+provides a  "stopwatch"  functionality  cpu/wall  timer in
+seconds.  Up to  64  separate timers can be functioning at once.  The
+first call starts the timer,  and the second stops it.  This  routine
+can be disenabled  by calling HPL_ptimer_disable(),  so that calls to
+the timer are ignored.  This feature can be used to make sure certain
+sections of code do not affect timings,  even  if  they call routines
+which have HPL_ptimer calls in them. HPL_ptimer_enable()  will enable
+the  timer  functionality.  One  can retrieve  the current value of a
+timer by calling
+ 
+t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ 
+where  I  is the timer index in  [0..64).  To  inititialize the timer
+functionality, one must have called HPL_ptimer_boot() prior to any of
+the functions mentioned above.
+
+<H1>Arguments</H1>
+<PRE>
+I       (global input)                const int
+        On entry, I specifies the timer to stop/start.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ptimer_cputime.html">HPL_ptimer_cputime</A>,
+<A HREF="HPL_ptimer_walltime.html">HPL_ptimer_walltime</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer_cputime.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer_cputime.html
new file mode 100755
index 000000000..cffd863b3
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer_cputime.html
@@ -0,0 +1,35 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_ptimer_cputime HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_ptimer_cputime</B> Return the CPU time.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_ptimer_cputime();</CODE>
+
+<H1>Description</H1>
+<B>HPL_ptimer_cputime</B>
+returns the cpu time. If HPL_USE_CLOCK is defined,
+the  clock() function is used to return an approximation of processor
+time used by the program.  The value returned is the CPU time used so
+far as a clock_t;  to get the number of seconds used,  the result  is
+divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+standard library.  If  HPL_USE_TIMES is defined, the times() function
+is used instead.  This  function  returns  the current process times.
+times() returns the number of clock ticks that have elapsed since the
+system has been up.  Otherwise and by default,  the  standard library
+function getrusage() is used.
+
+<H1>See Also</H1>
+<A HREF="HPL_ptimer_walltime.html">HPL_ptimer_walltime</A>,
+<A HREF="HPL_ptimer.html">HPL_ptimer</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer_walltime.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer_walltime.html
new file mode 100755
index 000000000..a509897f1
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer_walltime.html
@@ -0,0 +1,26 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_ptimer_walltime HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_ptimer_walltime</B> Return the elapsed (wall-clock) time.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_ptimer_walltime();</CODE>
+
+<H1>Description</H1>
+<B>HPL_ptimer_walltime</B>
+returns the elapsed (wall-clock) time.
+
+<H1>See Also</H1>
+<A HREF="HPL_ptimer_cputime.html">HPL_ptimer_cputime</A>,
+<A HREF="HPL_ptimer.html">HPL_ptimer</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pwarn.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pwarn.html
new file mode 100755
index 000000000..221d23982
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pwarn.html
@@ -0,0 +1,63 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_pwarn HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_pwarn</B> displays an error message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_pwarn(</CODE>
+<CODE>FILE *</CODE>
+<CODE>STREAM</CODE>,
+<CODE>int</CODE>
+<CODE>LINE</CODE>,
+<CODE>const char *</CODE>
+<CODE>SRNAME</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_pwarn</B>
+displays an error message.
+
+<H1>Arguments</H1>
+<PRE>
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+</PRE>
+<PRE>
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+</PRE>
+<PRE>
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pabort.html">HPL_pabort</A>,
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rand.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rand.html
new file mode 100755
index 000000000..5aef6669c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rand.html
@@ -0,0 +1,40 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_rand HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_rand</B> random number generator.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_rand();</CODE>
+
+<H1>Description</H1>
+<B>HPL_rand</B>
+generates  the next number  in the  random  sequence.  This
+function  ensures  that this number lies in the interval (-0.5, 0.5].
+ 
+The static array irand contains the information (2 integers) required
+to generate the  next number  in the sequence  X(n).  This  number is
+computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5,  where the
+constant d is the largest 64 bit positive integer. The array irand is
+then  updated  for the generation of the next number  X(n+1)  in  the
+random sequence as follows X(n+1) = a * X(n) + c. The constants a and
+c  should have been preliminarily stored in the arrays ias and ics as
+2 pairs of integers.  The initialization of  ias,  ics and  irand  is
+performed by the function HPL_setran.
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_recv.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_recv.html
new file mode 100755
index 000000000..afcb570c5
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_recv.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_recv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_recv</B> Receive a message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_recv(</CODE>
+<CODE>double *</CODE>
+<CODE>RBUF</CODE>,
+<CODE>int</CODE>
+<CODE>RCOUNT</CODE>,
+<CODE>int</CODE>
+<CODE>SRC</CODE>,
+<CODE>int</CODE>
+<CODE>RTAG</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_recv</B>
+is a simple wrapper around  MPI_Recv.  Its  main  purpose is
+to  allow for some  experimentation / tuning  of this simple routine.
+Successful  completion  is  indicated  by  the  returned  error  code
+HPL_SUCCESS.  In the case of messages of length less than or equal to
+zero, this function returns immediately.
+
+<H1>Arguments</H1>
+<PRE>
+RBUF    (local output)                double *
+        On entry, RBUF specifies the starting address of buffer to be
+        received.
+</PRE>
+<PRE>
+RCOUNT  (local input)                 int
+        On entry,  RCOUNT  specifies  the number  of double precision
+        entries in RBUF. RCOUNT must be at least zero.
+</PRE>
+<PRE>
+SRC     (local input)                 int
+        On entry, SRC  specifies the rank of the  sending  process in
+        the communication space defined by COMM.
+</PRE>
+<PRE>
+RTAG    (local input)                 int
+        On entry,  STAG specifies the message tag to be used for this
+        communication operation.
+</PRE>
+<PRE>
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_send.html">HPL_send</A>,
+<A HREF="HPL_sdrv.html">HPL_sdrv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_reduce.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_reduce.html
new file mode 100755
index 000000000..026435ed6
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_reduce.html
@@ -0,0 +1,75 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_reduce HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_reduce</B> Reduce operation.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_reduce(</CODE>
+<CODE>void *</CODE>
+<CODE>BUFFER</CODE>,
+<CODE>const int</CODE>
+<CODE>COUNT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>,
+<CODE>const HPL_T_OP </CODE>
+<CODE>OP</CODE>,
+<CODE>const int</CODE>
+<CODE>ROOT</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_reduce</B>
+performs a global reduce operation across all processes of
+a group.  Note that the input buffer is  used as workarray and in all
+processes but the accumulating process corrupting the original data.
+
+<H1>Arguments</H1>
+<PRE>
+BUFFER  (local input/output)          void *
+        On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+        exit,  and  in process of rank  ROOT  this array contains the
+        reduced data.  This  buffer  is also used as workspace during
+        the operation in the other processes of the group.
+</PRE>
+<PRE>
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+</PRE>
+<PRE>
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+<PRE>
+OP      (global input)                const HPL_T_OP 
+        On entry, OP is a pointer to the local combine function.
+</PRE>
+<PRE>
+ROOT    (global input)                const int
+        On entry, ROOT is the coordinate of the accumulating process.
+</PRE>
+<PRE>
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rollN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rollN.html
new file mode 100755
index 000000000..1e1a49068
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rollN.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_rollN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_rollN</B> Roll U and forward the column panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_rollN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_rollN</B>
+rolls the local arrays containing the local pieces of U, so
+that on exit to this function  U  is replicated in every process row.
+In addition, this function probe for the presence of the column panel
+and forwards it when available.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be rolled) information.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the number of columns of  U.  N must be
+        at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least  MAX(1,IPLEN[NPROW]).
+</PRE>
+<PRE>
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process row.
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IMAP  is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words,  IMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry,  IMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rollT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rollT.html
new file mode 100755
index 000000000..a6ac29336
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rollT.html
@@ -0,0 +1,99 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_rollT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_rollT</B> Roll U and forward the column panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_rollT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_rollT</B>
+rolls the local arrays containing the local pieces of U, so
+that on exit to this function  U  is replicated in every process row.
+In addition, this function probe for the presence of the column panel
+and forwards it when available.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be rolled) information.
+</PRE>
+<PRE>
+N       (local input)                 const int
+        On entry, N specifies the local number of rows of  U.  N must
+        be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least  MAX(1,N).
+</PRE>
+<PRE>
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process row.
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IMAP  is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words,  IMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry,  IMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_sdrv.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_sdrv.html
new file mode 100755
index 000000000..6f5b5880c
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_sdrv.html
@@ -0,0 +1,88 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_sdrv HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_sdrv</B> Send and receive a message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_sdrv(</CODE>
+<CODE>double *</CODE>
+<CODE>SBUF</CODE>,
+<CODE>int</CODE>
+<CODE>SCOUNT</CODE>,
+<CODE>int</CODE>
+<CODE>STAG</CODE>,
+<CODE>double *</CODE>
+<CODE>RBUF</CODE>,
+<CODE>int</CODE>
+<CODE>RCOUNT</CODE>,
+<CODE>int</CODE>
+<CODE>RTAG</CODE>,
+<CODE>int</CODE>
+<CODE>PARTNER</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_sdrv</B>
+is a simple wrapper around MPI_Sendrecv. Its main purpose is
+to allow for some experimentation and tuning of this simple function.
+Messages  of  length  less than  or  equal to zero  are not sent  nor
+received.  Successful completion  is  indicated by the returned error
+code HPL_SUCCESS.
+
+<H1>Arguments</H1>
+<PRE>
+SBUF    (local input)                 double *
+        On entry, SBUF specifies the starting address of buffer to be
+        sent.
+</PRE>
+<PRE>
+SCOUNT  (local input)                 int
+        On entry,  SCOUNT  specifies  the number  of double precision
+        entries in SBUF. SCOUNT must be at least zero.
+</PRE>
+<PRE>
+STAG    (local input)                 int
+        On entry,  STAG  specifies the message tag to be used for the
+        sending communication operation.
+</PRE>
+<PRE>
+RBUF    (local output)                double *
+        On entry, RBUF specifies the starting address of buffer to be
+        received.
+</PRE>
+<PRE>
+RCOUNT  (local input)                 int
+        On entry,  RCOUNT  specifies  the number  of double precision
+        entries in RBUF. RCOUNT must be at least zero.
+</PRE>
+<PRE>
+RTAG    (local input)                 int
+        On entry,  RTAG  specifies the message tag to be used for the
+        receiving communication operation.
+</PRE>
+<PRE>
+PARTNER (local input)                 int
+        On entry,  PARTNER  specifies  the rank of the  collaborative
+        process in the communication space defined by COMM.
+</PRE>
+<PRE>
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_send.html">HPL_send</A>,
+<A HREF="HPL_recv.html">HPL_recv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_send.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_send.html
new file mode 100755
index 000000000..05dcb7e6d
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_send.html
@@ -0,0 +1,67 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_send HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_send</B> Send a message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>int</CODE>
+<CODE>HPL_send(</CODE>
+<CODE>double *</CODE>
+<CODE>SBUF</CODE>,
+<CODE>int</CODE>
+<CODE>SCOUNT</CODE>,
+<CODE>int</CODE>
+<CODE>DEST</CODE>,
+<CODE>int</CODE>
+<CODE>STAG</CODE>,
+<CODE>MPI_Comm</CODE>
+<CODE>COMM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_send</B>
+is a simple wrapper around  MPI_Send.  Its  main  purpose is
+to  allow for some  experimentation / tuning  of this simple routine.
+Successful  completion  is  indicated  by  the  returned  error  code
+MPI_SUCCESS.  In the case of messages of length less than or equal to
+zero, this function returns immediately.
+
+<H1>Arguments</H1>
+<PRE>
+SBUF    (local input)                 double *
+        On entry, SBUF specifies the starting address of buffer to be
+        sent.
+</PRE>
+<PRE>
+SCOUNT  (local input)                 int
+        On entry,  SCOUNT  specifies  the number of  double precision
+        entries in SBUF. SCOUNT must be at least zero.
+</PRE>
+<PRE>
+DEST    (local input)                 int
+        On entry, DEST specifies the rank of the receiving process in
+        the communication space defined by COMM.
+</PRE>
+<PRE>
+STAG    (local input)                 int
+        On entry,  STAG specifies the message tag to be used for this
+        communication operation.
+</PRE>
+<PRE>
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_recv.html">HPL_recv</A>,
+<A HREF="HPL_sdrv.html">HPL_sdrv</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_setran.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_setran.html
new file mode 100755
index 000000000..44f37e35e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_setran.html
@@ -0,0 +1,52 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_setran HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_setran</B> Manage the random number generator.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_setran(</CODE>
+<CODE>const int</CODE>
+<CODE>OPTION</CODE>,
+<CODE>int *</CODE>
+<CODE>IRAN</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_setran</B>
+initializes  the random generator with the encoding of the
+first number X(0) in the sequence,  and the constants a and c used to
+compute the next element in the sequence: X(n+1) = a*X(n) + c.  X(0),
+a and c are stored in the static variables  irand, ias and ics.  When
+OPTION is 0 (resp. 1 and 2),  irand  (resp. ia and ic)  is set to the
+values of the input array IRAN.  When OPTION is 3, IRAN is set to the
+current value of irand, and irand is then incremented.
+
+<H1>Arguments</H1>
+<PRE>
+OPTION  (local input)                 const int
+        On entry, OPTION  is an integer that specifies the operations
+        to be performed on the random generator as specified above.
+</PRE>
+<PRE>
+IRAN    (local input/output)          int *
+        On entry,  IRAN is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of a random number.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_spreadN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_spreadN.html
new file mode 100755
index 000000000..f0d8f8938
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_spreadN.html
@@ -0,0 +1,120 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_spreadN HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_spreadN</B> Spread row panel U and forward current column panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_spreadN(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const enum HPL_SIDE</CODE>
+<CODE>SIDE</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCDIST</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_spreadN</B>
+spreads the local array containing local pieces of U, so
+that on exit to this function,  a piece of  U  is contained in every
+process row. The array IPLEN contains the number of rows of U,  that
+should be spread on any given process row. This function also probes
+for the presence of the column panel PBCST. In case of success, this
+panel will be forwarded.  If  PBCST  is NULL on input,  this probing
+mechanism will be disabled.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be spread) information.
+</PRE>
+<PRE>
+SIDE    (global input)                const enum HPL_SIDE
+        On entry, SIDE specifies whether the local piece of U located
+        in process IPMAP[SRCDIST] should be spread to the right or to
+        the left. This feature is used by the equilibration process.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N  specifies  the  local number of columns of U. N
+        must be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,IPLEN[nprow]).
+</PRE>
+<PRE>
+SRCDIST (local input)                 const int
+        On entry,  SRCDIST  specifies the source process that spreads
+        its piece of U.
+</PRE>
+<PRE>
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process before process IPMAP[i], with the  convention
+        that IPLEN[nprow] is the total number of rows. In other words
+        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+        should be moved to process IPMAP[i].
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry,  IPMAPM1 is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01N.html">HPL_pdlaswp01N</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_spreadT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_spreadT.html
new file mode 100755
index 000000000..cec561646
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_spreadT.html
@@ -0,0 +1,120 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_spreadT HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_spreadT</B> Spread row panel U and forward current column panel.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_spreadT(</CODE>
+<CODE>HPL_T_panel *</CODE>
+<CODE>PBCST</CODE>,
+<CODE>int *</CODE>
+<CODE>IFLAG</CODE>,
+<CODE>HPL_T_panel *</CODE>
+<CODE>PANEL</CODE>,
+<CODE>const enum HPL_SIDE</CODE>
+<CODE>SIDE</CODE>,
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>double *</CODE>
+<CODE>U</CODE>,
+<CODE>const int</CODE>
+<CODE>LDU</CODE>,
+<CODE>const int</CODE>
+<CODE>SRCDIST</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPLEN</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAP</CODE>,
+<CODE>const int *</CODE>
+<CODE>IPMAPM1</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_spreadT</B>
+spreads  the local array containing local pieces of U, so
+that on exit to this function,  a piece of  U  is contained in every
+process row.  The array  IPLEN  contains the number of columns of U,
+that should be spread on any given process row.  This function  also
+probes for the presence of  the column panel  PBCST.  If  available,
+this  panel will be forwarded.  If  PBCST  is  NULL  on input,  this
+probing mechanism will be disabled.
+
+<H1>Arguments</H1>
+<PRE>
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+</PRE>
+<PRE>
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+</PRE>
+<PRE>
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be spread) information.
+</PRE>
+<PRE>
+SIDE    (global input)                const enum HPL_SIDE
+        On entry, SIDE specifies whether the local piece of U located
+        in process IPMAP[SRCDIST] should be spread to the right or to
+        the left. This feature is used by the equilibration process.
+</PRE>
+<PRE>
+N       (global input)                const int
+        On entry,  N  specifies the local number of rows of U. N must
+        be at least zero.
+</PRE>
+<PRE>
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U.
+</PRE>
+<PRE>
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,N).
+</PRE>
+<PRE>
+SRCDIST (local input)                 const int
+        On entry,  SRCDIST  specifies the source process that spreads
+        its piece of U.
+</PRE>
+<PRE>
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process before process IPMAP[i], with the  convention
+        that IPLEN[nprow] is the total number of rows. In other words
+        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+        should be moved to process IPMAP[i].
+</PRE>
+<PRE>
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+</PRE>
+<PRE>
+IPMAPM1 (global input)                const int *
+        On entry,  IPMAPM1 is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_pdlaswp01T.html">HPL_pdlaswp01T</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_sum.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_sum.html
new file mode 100755
index 000000000..be785b99e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_sum.html
@@ -0,0 +1,61 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_sum HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_sum</B> Combine (sum) two buffers.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_sum(</CODE>
+<CODE>const int</CODE>
+<CODE>N</CODE>,
+<CODE>const void *</CODE>
+<CODE>IN</CODE>,
+<CODE>void *</CODE>
+<CODE>INOUT</CODE>,
+<CODE>const HPL_T_TYPE</CODE>
+<CODE>DTYPE</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_sum</B>
+combines (sum) two buffers.
+
+<H1>Arguments</H1>
+<PRE>
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+</PRE>
+<PRE>
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+</PRE>
+<PRE>
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+</PRE>
+<PRE>
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_broadcast.html">HPL_broadcast</A>,
+<A HREF="HPL_reduce.html">HPL_reduce</A>,
+<A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
+<A HREF="HPL_barrier.html">HPL_barrier</A>,
+<A HREF="HPL_min.html">HPL_min</A>,
+<A HREF="HPL_max.html">HPL_max</A>,
+<A HREF="HPL_sum.html">HPL_sum</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer.html
new file mode 100755
index 000000000..8e6a79803
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer.html
@@ -0,0 +1,49 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_timer HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_timer</B> Timer facility.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_timer(</CODE>
+<CODE>const int</CODE>
+<CODE>I</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_timer</B>
+provides a  "stopwatch"  functionality  cpu/wall  timer  in
+seconds.  Up to  64  separate timers can be functioning at once.  The
+first call starts the timer,  and the second stops it.  This  routine
+can be disenabled  by calling  HPL_timer_disable(),  so that calls to
+the timer are ignored.  This feature can be used to make sure certain
+sections of code do not affect timings,  even  if  they call routines
+which have HPL_timer calls in them. HPL_timer_enable() will re-enable
+the  timer  functionality.  One  can retrieve  the current value of a
+timer by calling
+ 
+t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
+ 
+where  I  is the timer index in  [0..64).  To  initialize  the  timer
+functionality, one must have called HPL_timer_boot()  prior to any of
+the functions mentioned above.
+
+<H1>Arguments</H1>
+<PRE>
+I       (global input)                const int
+        On entry, I specifies the timer to stop/start.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_timer_cputime.html">HPL_timer_cputime</A>,
+<A HREF="HPL_timer_walltime.html">HPL_timer_walltime</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer_cputime.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer_cputime.html
new file mode 100755
index 000000000..0fa9b6575
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer_cputime.html
@@ -0,0 +1,35 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_timer_cputime HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_timer_cputime</B> Return the CPU time.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_timer_cputime();</CODE>
+
+<H1>Description</H1>
+<B>HPL_timer_cputime</B>
+returns the cpu time.  If HPL_USE_CLOCK is defined,
+the  clock() function is used to return an approximation of processor
+time used by the program.  The value returned is the CPU time used so
+far as a clock_t;  to get the number of seconds used,  the result  is
+divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
+standard library.  If  HPL_USE_TIMES is defined, the times() function
+is used instead.  This  function  returns  the current process times.
+times() returns the number of clock ticks that have elapsed since the
+system has been up.  Otherwise and by default,  the  standard library
+function getrusage() is used.
+
+<H1>See Also</H1>
+<A HREF="HPL_timer_walltime.html">HPL_timer_walltime</A>,
+<A HREF="HPL_timer.html">HPL_timer</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer_walltime.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer_walltime.html
new file mode 100755
index 000000000..92588e49f
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer_walltime.html
@@ -0,0 +1,26 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_timer_walltime HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_timer_walltime</B> Return the elapsed (wall-clock) time.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>double</CODE>
+<CODE>HPL_timer_walltime();</CODE>
+
+<H1>Description</H1>
+<B>HPL_timer_walltime</B>
+returns the elapsed (wall-clock) time.
+
+<H1>See Also</H1>
+<A HREF="HPL_timer_cputime.html">HPL_timer_cputime</A>,
+<A HREF="HPL_timer.html">HPL_timer</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_warn.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_warn.html
new file mode 100755
index 000000000..773df9ae0
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_warn.html
@@ -0,0 +1,74 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_warn HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_warn</B> displays an error message.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_warn(</CODE>
+<CODE>FILE *</CODE>
+<CODE>STREAM</CODE>,
+<CODE>int</CODE>
+<CODE>LINE</CODE>,
+<CODE>const char *</CODE>
+<CODE>SRNAME</CODE>,
+<CODE>const char *</CODE>
+<CODE>FORM</CODE>,
+<CODE>...</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_warn</B>
+displays an error message.
+
+<H1>Arguments</H1>
+<PRE>
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+</PRE>
+<PRE>
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+</PRE>
+<PRE>
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+</PRE>
+<PRE>
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+</PRE>
+<PRE>
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+</PRE>
+
+<H1>Example</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<PRE>
+int main(int argc, char *argv[])
+{
+   HPL_warn( stderr, __LINE__, __FILE__,
+             "Demo.\n" );
+   exit(0); return(0);
+}
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_abort.html">HPL_abort</A>,
+<A HREF="HPL_fprintf.html">HPL_fprintf</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_xjumpm.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_xjumpm.html
new file mode 100755
index 000000000..794ae3a8b
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_xjumpm.html
@@ -0,0 +1,97 @@
+<HTML>
+<HEAD>
+<TITLE>HPL_xjumpm HPL 2.3 Library Functions December 2, 2018</TITLE> 
+</HEAD>
+
+<BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
+      ALINK = "#ffff00">
+
+<H1>Name</H1>
+<B>HPL_xjumpm</B> Compute constants to jump in the random sequence.
+
+<H1>Synopsis</H1>
+<CODE>#include "hpl.h"</CODE><BR><BR>
+<CODE>void</CODE>
+<CODE>HPL_xjumpm(</CODE>
+<CODE>const int</CODE>
+<CODE>JUMPM</CODE>,
+<CODE>int *</CODE>
+<CODE>MULT</CODE>,
+<CODE>int *</CODE>
+<CODE>IADD</CODE>,
+<CODE>int *</CODE>
+<CODE>IRANN</CODE>,
+<CODE>int *</CODE>
+<CODE>IRANM</CODE>,
+<CODE>int *</CODE>
+<CODE>IAM</CODE>,
+<CODE>int *</CODE>
+<CODE>ICM</CODE>
+<CODE>);</CODE>
+
+<H1>Description</H1>
+<B>HPL_xjumpm</B>
+computes  the constants  A and C  to jump JUMPM numbers in
+the random sequence: X(n+JUMPM) = A*X(n)+C.  The constants encoded in
+MULT and IADD  specify  how to jump from one entry in the sequence to
+the next.
+
+<H1>Arguments</H1>
+<PRE>
+JUMPM   (local input)                 const int
+        On entry,  JUMPM  specifies  the  number  of entries  in  the
+        sequence to jump over. When JUMPM is less or equal than zero,
+        A and C are not computed, IRANM is set to IRANN corresponding
+        to a jump of size zero.
+</PRE>
+<PRE>
+MULT    (local input)                 int *
+        On entry, MULT is an array of dimension 2,  that contains the
+        16-lower  and 15-higher bits of the constant  a  to jump from
+        X(n) to X(n+1) = a*X(n) + c in the random sequence.
+</PRE>
+<PRE>
+IADD    (local input)                 int *
+        On entry, IADD is an array of dimension 2,  that contains the
+        16-lower  and 15-higher bits of the constant  c  to jump from
+        X(n) to X(n+1) = a*X(n) + c in the random sequence.
+</PRE>
+<PRE>
+IRANN   (local input)                 int *
+        On entry, IRANN is an array of dimension 2. that contains the
+        16-lower and 15-higher bits of the encoding of X(n).
+</PRE>
+<PRE>
+IRANM   (local output)                int *
+        On entry,  IRANM  is an array of dimension 2.   On exit, this
+        array  contains respectively  the 16-lower and 15-higher bits
+        of the encoding of X(n+JUMPM).
+</PRE>
+<PRE>
+IAM     (local output)                int *
+        On entry, IAM is an array of dimension 2. On exit, when JUMPM
+        is  greater  than  zero,  this  array  contains  the  encoded
+        constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+        sequence. IAM(0:1)  contains  respectively  the  16-lower and
+        15-higher  bits  of this constant  A. When  JUMPM  is less or
+        equal than zero, this array is not referenced.
+</PRE>
+<PRE>
+ICM     (local output)                int *
+        On entry, ICM is an array of dimension 2. On exit, when JUMPM
+        is  greater  than  zero,  this  array  contains  the  encoded
+        constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+        sequence. ICM(0:1)  contains  respectively  the  16-lower and
+        15-higher  bits  of this constant  C. When  JUMPM  is less or
+        equal than zero, this array is not referenced.
+</PRE>
+
+<H1>See Also</H1>
+<A HREF="HPL_ladd.html">HPL_ladd</A>,
+<A HREF="HPL_lmul.html">HPL_lmul</A>,
+<A HREF="HPL_setran.html">HPL_setran</A>,
+<A HREF="HPL_jumpit.html">HPL_jumpit</A>,
+<A HREF="HPL_rand.html">HPL_rand</A>.
+
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/algorithm.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/algorithm.html
new file mode 100755
index 000000000..9b1d7222e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/algorithm.html
@@ -0,0 +1,299 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Algorithm</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Algorithm</H2>
+
+<STRONG>
+This  page provides  a high-level description of the algorithm used in
+this package. As indicated below,  HPL  contains in fact many possible
+variants for various operations.  Defaults could have been chosen,  or
+even  variants  could  be selected  during  the execution.  Due to the
+performance requirements,  it was  decided  to leave the user with the
+opportunity of choosing,  so that an "optimal" set of parameters could
+easily be experimentally determined for a given machine configuration.
+From a numerical accuracy point of view, <STRONG>all</STRONG> possible
+combinations are rigorously equivalent  to each other  even though the
+result may slightly differ (bit-wise).
+</STRONG><BR><BR>
+
+<UL>
+<LI><A HREF="algorithm.html#main">Main Algorithm</A>
+<LI><A HREF="algorithm.html#pfact">Panel Factorization</A>
+<LI><A HREF="algorithm.html#bcast">Panel Broadcast</A>
+<LI><A HREF="algorithm.html#look_ahead">Look-ahead</A>
+<LI><A HREF="algorithm.html#update">Update</A>
+<LI><A HREF="algorithm.html#trsv">Backward Substitution</A>
+<LI><A HREF="algorithm.html#check">Checking the Solution</A>
+</UL>
+<HR NOSHADE>
+
+<H3><A NAME="main">Main Algorithm</A></H3>
+
+This  software  package  solves  a linear system  of order n:  A x = b by
+first  computing  the  LU  factorization with row partial pivoting of the
+n-by-n+1 coefficient matrix [A b] = [[L,U] y]. Since the lower triangular
+factor L is applied to b as the factorization progresses, the solution  x
+is obtained  by  solving  the upper triangular system U x = y.  The lower
+triangular  matrix  L  is left unpivoted  and  the array of pivots is not
+returned.<BR><BR>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR>
+<TD ALIGN=LEFT>
+The  data  is distributed onto a two-dimensional P-by-Q grid of processes
+according  to  the  block-cyclic  scheme  to ensure  "good"  load balance
+as well as  the scalability  of the algorithm.  The  n-by-n+1 coefficient
+matrix is  first  logically partitioned into  nb-by-nb  blocks,  that are
+cyclically "dealt" onto the  P-by-Q  process grid.  This is done  in both
+dimensions of the matrix.</TD>
+<TD ALIGN=CENTER><IMG SRC = "mat2.jpg" BORDER=0 HEIGHT=165 WIDTH=340></TD>
+</TR>
+</TABLE>
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR>
+<TD ALIGN=CENTER><IMG SRC ="main.jpg" BORDER=0 HEIGHT=165 WIDTH=165></TD>
+<TD ALIGN=LEFT>
+The  right-looking  variant  has been chosen for the main loop of the  LU
+factorization.  This  means that at each iteration of the loop a panel of
+nb columns is factorized,  and  the  trailing submatrix is updated.  Note
+that this computation is  thus  logically partitioned with the same block
+size nb that was used for the data distribution.</TD>
+</TR>
+</TABLE>
+<HR NOSHADE>
+
+<H3><A NAME="pfact">Panel Factorization</A></H3>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=10>
+<TR>
+<TD ALIGN=LEFT>
+At  a given iteration  of the main loop,  and  because of  the  cartesian 
+property of the distribution scheme,  each panel factorization  occurs in
+one column of processes.   This  particular part of the computation  lies
+on the critical path of  the overall algorithm.  The user is  offered the
+choice of three  (Crout, left- and right-looking)  matrix-multiply  based 
+recursive variants. The software also allows the user  to choose  in  how
+many  sub-panels  the current panel  should be divided  into  during  the
+recursion.  Furthermore,  one  can also  select at run-time the recursion
+stopping criterium in terms of the number  of  columns left to factorize.
+When this  threshold is reached,  the sub-panel will  then be  factorized
+using one of the three Crout, left- or right-looking matrix-vector  based 
+variant.  Finally, for each panel column the pivot search, the associated
+swap  and broadcast  operation  of  the pivot row  are combined  into one 
+single communication step.  A   binary-exchange  (leave-on-all) reduction
+performs these three operations at once.</TD>
+<TD ALIGN=CENTER><IMG SRC = "pfact.jpg" BORDER=0 HEIGHT=300 WIDTH=160></TD>
+</TR>
+</TABLE>
+<HR NOSHADE>
+
+<H3><A NAME="bcast">Panel Broadcast</A></H3>
+
+Once  the panel factorization has been computed,  this  panel  of columns
+is  broadcast  to the other process columns.   There  are  many  possible 
+broadcast  algorithms  and  the  software currently offers  6 variants to 
+choose from.  These variants are described below assuming  that process 0
+is the source of the broadcast for convenience. "->" means "sends to".
+<UL>
+<LI><STRONG>Increasing-ring</STRONG>:  0 -> 1;  1 -> 2; 2 -> 3 and so on.
+This algorithm is the classic one;  it has  the caveat that process 1 has
+to send a message.
+<CENTER>
+<IMG SRC="1ring.jpg">
+</CENTER>
+
+<LI><STRONG>Increasing-ring (modified)</STRONG>:  0 -> 1;  0 -> 2; 2 -> 3
+and so on. Process 0 sends two messages and process 1  only  receives one
+message. This algorithm is almost always better, if not the best.
+<CENTER>
+<IMG SRC="1rinM.jpg">
+</CENTER>
+
+<LI><STRONG>Increasing-2-ring</STRONG>:  The Q processes are divided into
+two parts: 0 -> 1 and 0 -> Q/2;  Then processes 1  and Q/2 act as sources
+of two rings: 1 -> 2, Q/2 -> Q/2+1;  2 -> 3, Q/2+1 -> to Q/2+2 and so on.
+This  algorithm has the advantage  of reducing the time by which the last
+process  will  receive  the  panel  at  the  cost  of process 0 sending 2
+messages.
+<CENTER>
+<IMG SRC="2ring.jpg">
+</CENTER>
+
+<LI><STRONG>Increasing-2-ring (modified)</STRONG>:  As  one  may  expect,
+first 0 -> 1,  then  the  Q-1  processes  left are divided into two equal
+parts: 0 -> 2 and 0 -> Q/2;  Processes  2 and Q/2  act then as sources of
+two rings:  2 -> 3,  Q/2 -> Q/2+1; 3 -> 4,  Q/2+1 -> to Q/2+2  and so on.
+This algorithm is probably  the most serious competitor to the increasing
+ring modified variant.
+<CENTER>
+<IMG SRC="2rinM.jpg">
+</CENTER>
+
+<LI><STRONG>Long  (bandwidth  reducing)</STRONG>:  as   opposed   to  the
+previous  variants,  this  algorithm  and  its follower  synchronize  all 
+processes involved in the operation. The message is chopped into  Q equal
+pieces that are scattered  across the Q processes. 
+<CENTER>
+<IMG SRC="spread.jpg">
+</CENTER>
+The pieces are then rolled in Q-1 steps.  The scatter phase uses a binary
+tree and the rolling phase exclusively uses mutual message exchanges.  In
+odd steps 0 <-> 1,  2 <-> 3, 4 <-> 5 and so on;  in even steps Q-1 <-> 0,
+1 <-> 2, 3 <-> 4, 5 <-> 6 and so on.
+<CENTER>
+<IMG SRC="roll.jpg">
+</CENTER>
+More messages are exchanged, however the total volume of communication is
+independent of Q, making this algorithm  particularly suitable for  large
+messages.  This algorithm  becomes  competitive  when the nodes are "very 
+fast" and the network (comparatively) "very slow".<BR><BR>
+
+<LI><STRONG>Long (bandwidth reducing modified)</STRONG>:  same  as above,
+except that 0 -> 1 first,  and then the Long variant is used on processes
+0,2,3,4 .. Q-1.<BR><BR>
+<CENTER>
+<IMG SRC="spreadM.jpg">
+<IMG SRC="rollM.jpg">
+</CENTER>
+
+</UL>
+
+The rings variants are distinguished by a probe mechanism  that activates
+them.  In other words,  a process involved in the broadcast and different
+from  the source asynchronously  probes for the message to receive.  When
+the  message  is  available  the broadcast proceeds,  and  otherwise  the
+function returns.  This allows to interleave the broadcast operation with
+the update phase. This contributes to reduce the idle time spent by those
+processes waiting for the factorized panel.  This  mechanism is necessary
+to accomodate for various computation/communication performance ratio.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="look_ahead">Look-ahead</A></H3>
+
+Once the panel has been broadcast or say during this broadcast operation,
+the trailing submatrix is updated  using the last panel in the look-ahead
+pipe: as mentioned before,  the panel factorization  lies on the critical
+path,  which  means  that when the kth panel has been factorized and then 
+broadcast, the next most urgent task to complete is the factorization and
+broadcast of the k+1 th panel.  This technique  is  often  refered  to as
+"look-ahead" or "send-ahead" in the literature.  This  package  allows to
+select various "depth" of look-ahead.  By  convention,  a  depth  of zero
+corresponds to no lookahead,  in which case  the  trailing  submatrix  is
+updated by the panel currently broadcast.  Look-ahead consumes some extra
+memory  to  essentially  keep  all the panels of columns currently in the
+look-ahead pipe.  A look-ahead  of depth 1 (maybe 2) is likely to achieve
+the best performance gain.<BR><BR> 
+<HR NOSHADE>
+
+<H3><A NAME="update">Update</A></H3>
+
+The update of the trailing submatrix by the last panel in the  look-ahead
+pipe is made of two phases. First, the pivots must be applied to form the
+current row panel U. U should then be solved by the upper triangle of the
+column panel. U finally needs to be broadcast to each process row so that
+the  local  rank-nb  update  can take place.  We choose  to  combine  the
+swapping and broadcast of  U  at the cost of  replicating the solve.  Two
+algorithms are available for this communication operation.
+<UL>
+<LI><STRONG>Binary-exchange</STRONG>:  this is a modified variant  of the
+binary-exchange (leave on all) reduction operation.  Every process column
+performs the same operation.  The algorithm essentially works as follows.
+It pretends reducing the row panel U, but at the beginning the only valid
+copy is owned by the current process row.  The  other process  rows  will
+contribute rows of A they own that should be copied in U and replace them
+with rows that were originally in the current process row.  The  complete
+operation is performed in  log(P) steps.  For the sake of simplicity, let
+assume that  P  is a power of two.  At step k,  process row p exchanges a 
+message with process row p+2^k.  There are  essentially two cases. First,
+one of those two process rows  has received  U  in  a previous step.  The
+exchange occurs.  One process  swaps  its  local rows of  A into U.  Both
+processes copy in  U remote rows of A. Second, none of those process rows
+has received U,  the exchange occurs, and both processes simply add those
+remote rows  to  the list  they have accumulated so far.  At each step, a 
+message  of  the size of  U  is exchanged by at least one pair of process
+rows.<BR><BR>
+
+<LI><STRONG>Long</STRONG>:   this  is   a   bandwidth   reducing  variant
+accomplishing the same task. The row panel is first spread (using a tree)
+among the process rows with respect to the pivot array. This is a scatter
+(V variant for MPI users).  Locally,  every process row  then swaps these
+rows with the the rows of A it owns and that belong to U.  These  buffers
+are then rolled  (P-1 steps) to finish the broadcast of U.  Every process
+row permutes U and proceed  with the computational part of the update.  A
+couple  of  notes:   process  rows  are  logarithmically   sorted  before
+spreading,  so  that  processes  receiving the largest number of rows are
+first in the tree.  This makes  the communication volume optimal for this
+phase. Finally, before rolling and after the local swap, an equilibration
+phase occurs during  which the local pieces of  U  are  uniformly  spread
+across  the process rows.  A tree-based algorithm is used. This operation
+is necessary to keep the rolling phase optimal  even  when the pivot rows
+are  not  equally distributed  in  process rows.  This  algorithm  has  a 
+complexity  in  terms  of communication volume that solely depends on the 
+size of U.  In particular,  the number of process rows  only  impacts the
+number of messages exchanged.  It  will  thus  outperforms  the  previous
+variant for large problems on large machine configurations.<BR><BR>
+
+</UL>
+
+The user can select any of the two variants above.  In addition, a mix is
+possible as well.  The  "binary-exchange"  algorithm will be used when  U
+contains at most a certain number of columns. Choosing at least the block
+size  nb as the threshold value is clearly recommended when look-ahead is
+on.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="trsv">Backward Substitution</A></H3>
+
+The factorization has just now ended, the back-substitution remains to be
+done.  For this,  we  choose  a look-ahead  of  depth  one  variant.  The
+right-hand-side  is  forwarded  in  process  rows  in  a  decreasing-ring 
+fashion,  so that  we solve Q * nb entries at a time.  At each step, this
+shrinking piece of the right-hand-side is updated. The process just above
+the one owning the current diagonal block of the matrix  A  updates first 
+its last nb piece of x,  forwards it to the previous process column, then
+broadcast  it in the process column in a decreasing-ring fashion as well.
+The solution is then updated and sent to the previous process column. The
+solution of the linear system is left replicated in every process row.<BR><BR>
+<HR NOSHADE>
+ 
+<H3><A NAME="check">Checking the Solution</A></H3>
+
+To verify the result obtained,  the input matrix  and right-hand side are
+regenerated.  The  normwise  backward  error  (see formula below) is then
+computed.  A solution  is  considered  as "numerically correct" when this
+quantity  is  less  than  a  threshold  value of the order of 1.0. In the
+expression   below,  eps  is  the  relative  (distributed-memory) machine
+precision.
+
+<UL>
+<LI>|| Ax - b ||_oo / ( eps * ( || A ||_oo * || x ||_oo + || b ||_oo ) * n )
+</UL>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/aprunner.gif b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/aprunner.gif
new file mode 100755
index 000000000..6508c806f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/aprunner.gif differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/copyright.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/copyright.html
new file mode 100755
index 000000000..934282c81
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/copyright.html
@@ -0,0 +1,66 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Copyright and Licensing Terms</TITLE>
+</HEAD>
+
+<BODY
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Copyright Notice and Licensing Terms</H2>
+
+Redistribution  and  use in  source and binary forms, with or without
+modification, are  permitted provided  that the following  conditions
+are met:                                                             
+<OL>
+<LI>Redistributions  of  source code  must retain the above copyright
+notice, this list of conditions and the following disclaimer.        
+<LI>Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions,  and the following disclaimer in the
+documentation and/or other materials provided with the distribution. 
+<LI>All  advertising  materials mentioning  features  or  use of this
+software must display  the  following  acknowledgement:  This product
+includes   software   developed   at  the  University  of  Tennessee,
+Knoxville, Innovative Computing Laboratory.             
+<LI>The name of the  University, the name of the  Laboratory,  or the
+names  of  its  contributors  may  not  be used to endorse or promote
+products  derived   from   this  software  without  specific  written
+permission.                                                          
+</OL>
+                                                                      
+<H3>Disclaimer</H3>
+
+THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+`AS IS' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
+SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/documentation.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/documentation.html
new file mode 100755
index 000000000..152188041
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/documentation.html
@@ -0,0 +1,304 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Documentation</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Documentation</H2>
+
+The  HPL software distribution comes  with a set of text files explaining
+how to install,  run and tune the software. These files reside in the top
+level  directory  and their names are in upper case.  To  a large extent,
+this page reproduces them. In addition,  man- and HTML-pages are provided
+for every routine in the package. To access the man pages,  one  must add
+hpl/man  to its  MANPATH  environment variable.  The  HTML  pages  can be 
+accessed on this site,  or by pointing your browser to your local hpl/www
+directory. Finally,  the source code has been heavily documented. Despite
+all the other documentation efforts, the  source  code remains  the  most
+trustworthy  and truthful piece of information about what goes on in HPL.
+<BR><BR>
+
+<H3>HPL Functions HTML Pages</H3>
+
+<STRONG>Computational Kernels Wrappers</STRONG> When calling the Fortran
+77 BLAS interface, these C functions allow to confine the C  to  Fortran
+77 interface  issues  to  a small  subset of routines.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_idamax.html">         HPL_idamax</A>
+<LI><A HREF = "HPL_dscal.html">          HPL_dscal</A>
+<LI><A HREF = "HPL_dswap.html">          HPL_dswap</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dcopy.html">          HPL_dcopy</A>
+<LI><A HREF = "HPL_daxpy.html">          HPL_daxpy</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dgemv.html">          HPL_dgemv</A>
+<LI><A HREF = "HPL_dger.html">           HPL_dger</A>
+<LI><A HREF = "HPL_dtrsv.html">          HPL_dtrsv</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dgemm.html">          HPL_dgemm</A>
+<LI><A HREF = "HPL_dtrsm.html">          HPL_dtrsm</A>
+</UL></TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Local Auxiliaries</STRONG> Basic functionality, local swap functions.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_fprintf.html">        HPL_fprintf</A>
+<LI><A HREF = "HPL_warn.html">           HPL_warn</A>
+<LI><A HREF = "HPL_abort.html">          HPL_abort</A>
+<LI><A HREF = "HPL_dlaprnt.html">        HPL_dlaprnt</A>
+<LI><A HREF = "HPL_dlamch.html">         HPL_dlamch</A>
+<LI><A HREF = "HPL_dlacpy.html">         HPL_dlacpy</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dlange.html">         HPL_dlange</A>
+<LI><A HREF = "HPL_dlatcpy.html">        HPL_dlatcpy</A>
+<LI><A HREF = "HPL_dlaswp00N.html">      HPL_dlaswp00N</A>
+<LI><A HREF = "HPL_dlaswp01N.html">      HPL_dlaswp01N</A>
+<LI><A HREF = "HPL_dlaswp02N.html">      HPL_dlaswp02N</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dlaswp03N.html">      HPL_dlaswp03N</A>
+<LI><A HREF = "HPL_dlaswp04N.html">      HPL_dlaswp04N</A>
+<LI><A HREF = "HPL_dlaswp05N.html">      HPL_dlaswp05N</A>
+<LI><A HREF = "HPL_dlaswp06N.html">      HPL_dlaswp06N</A>
+<LI><A HREF = "HPL_dlaswp10N.html">      HPL_dlaswp10N</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dlaswp01T.html">      HPL_dlaswp01T</A>
+<LI><A HREF = "HPL_dlaswp03T.html">      HPL_dlaswp03T</A>
+<LI><A HREF = "HPL_dlaswp04T.html">      HPL_dlaswp04T</A>
+<LI><A HREF = "HPL_dlaswp05T.html">      HPL_dlaswp05T</A>
+<LI><A HREF = "HPL_dlaswp06T.html">      HPL_dlaswp06T</A>
+</UL></TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Parallel Auxiliaries</STRONG> Index computations, parallel basic
+functionality.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_indxg2l.html">        HPL_indxg2l</A>
+<LI><A HREF = "HPL_indxg2lp.html">       HPL_indxg2lp</A>
+<LI><A HREF = "HPL_indxg2p.html">        HPL_indxg2p</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_indxl2g.html">        HPL_indxl2g</A>
+<LI><A HREF = "HPL_infog2l.html">        HPL_infog2l</A>
+<LI><A HREF = "HPL_numroc.html">         HPL_numroc</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_numrocI.html">        HPL_numrocI</A>
+<LI><A HREF = "HPL_pwarn.html">          HPL_pwarn</A>
+<LI><A HREF = "HPL_pabort.html">         HPL_pabort</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdlaprnt.html">       HPL_pdlaprnt</A>
+<LI><A HREF = "HPL_pdlamch.html">        HPL_pdlamch</A>
+<LI><A HREF = "HPL_pdlange.html">        HPL_pdlange</A>
+</UL></TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Grid Management</STRONG>  Most of these routines have a direct
+MPI equivalent.  On new systems,  when the entire MPI functionality is
+not yet readily available, these functions are particularly convenient
+since they rely on a mininal  subset of the MPI standard.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_grid_exit.html">      HPL_grid_exit</A>
+<LI><A HREF = "HPL_grid_info.html">      HPL_grid_info</A>
+<LI><A HREF = "HPL_grid_init.html">      HPL_grid_init</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_all_reduce.html">     HPL_all_reduce</A>
+<LI><A HREF = "HPL_barrier.html">        HPL_barrier</A>
+<LI><A HREF = "HPL_broadcast.html">      HPL_broadcast</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_reduce.html">         HPL_reduce</A>
+<LI><A HREF = "HPL_max.html">            HPL_max</A>
+<LI><A HREF = "HPL_min.html">            HPL_min</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pnum.html">           HPL_pnum</A>
+<LI><A HREF = "HPL_sum.html">            HPL_sum</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Panel Management</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_pdpanel_disp.html">   HPL_pdpanel_disp</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanel_free.html">   HPL_pdpanel_free</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanel_init.html">   HPL_pdpanel_init</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanel_new.html">    HPL_pdpanel_new</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Panel Factorization</STRONG> Recursive (matrix-multiply based) and
+(matrix-vector based) panel factorization.
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_dlocmax.html">        HPL_dlocmax</A>
+<LI><A HREF = "HPL_dlocswpN.html">       HPL_dlocswpN</A>
+<LI><A HREF = "HPL_dlocswpT.html">       HPL_dlocswpT</A>
+<LI><A HREF = "HPL_pdmxswp.html">        HPL_pdmxswp</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpancrN.html">       HPL_pdpancrN</A>
+<LI><A HREF = "HPL_pdpancrT.html">       HPL_pdpancrT</A>
+<LI><A HREF = "HPL_pdrpancrN.html">      HPL_pdrpancrN</A>
+<LI><A HREF = "HPL_pdrpancrT.html">      HPL_pdrpancrT</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanllN.html">       HPL_pdpanllN</A>
+<LI><A HREF = "HPL_pdpanllT.html">       HPL_pdpanllT</A>
+<LI><A HREF = "HPL_pdrpanllN.html">      HPL_pdrpanllN</A>
+<LI><A HREF = "HPL_pdrpanllT.html">      HPL_pdrpanllT</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdpanrlN.html">       HPL_pdpanrlN</A>
+<LI><A HREF = "HPL_pdpanrlT.html">       HPL_pdpanrlT</A>
+<LI><A HREF = "HPL_pdrpanrlN.html">      HPL_pdrpanrlN</A>
+<LI><A HREF = "HPL_pdrpanrlT.html">      HPL_pdrpanrlT</A>
+<LI><A HREF = "HPL_pdfact.html">         HPL_pdfact</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Panel Broadcast</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_bcast.html">          HPL_bcast</A>
+<LI><A HREF = "HPL_binit.html">          HPL_binit</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_bwait.html">          HPL_bwait</A>
+<LI><A HREF = "HPL_copyL.html">          HPL_copyL</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_packL.html">          HPL_packL</A>
+<LI><A HREF = "HPL_recv.html">           HPL_recv</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_sdrv.html">           HPL_sdrv</A>
+<LI><A HREF = "HPL_send.html">           HPL_send</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Update</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_perm.html">           HPL_perm</A>
+<LI><A HREF = "HPL_pipid.html">          HPL_pipid</A>
+<LI><A HREF = "HPL_plindx0.html">        HPL_plindx0</A>
+<LI><A HREF = "HPL_plindx1.html">        HPL_plindx1</A>
+<LI><A HREF = "HPL_plindx10.html">       HPL_plindx10</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_equil.html">          HPL_equil</A>
+<LI><A HREF = "HPL_pdlaswp00N.html">     HPL_pdlaswp00N</A>
+<LI><A HREF = "HPL_pdlaswp01N.html">     HPL_pdlaswp01N</A>
+<LI><A HREF = "HPL_pdlaswp00T.html">     HPL_pdlaswp00T</A>
+<LI><A HREF = "HPL_pdlaswp01T.html">     HPL_pdlaswp01T</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_logsort.html">        HPL_logsort</A>
+<LI><A HREF = "HPL_rollN.html">          HPL_rollN</A>
+<LI><A HREF = "HPL_rollT.html">          HPL_rollT</A>
+<LI><A HREF = "HPL_spreadN.html">        HPL_spreadN</A>
+<LI><A HREF = "HPL_spreadT.html">        HPL_spreadT</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdupdateNN.html">     HPL_pdupdateNN</A>
+<LI><A HREF = "HPL_pdupdateNT.html">     HPL_pdupdateNT</A>
+<LI><A HREF = "HPL_pdupdateTN.html">     HPL_pdupdateTN</A>
+<LI><A HREF = "HPL_pdupdateTT.html">     HPL_pdupdateTT</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Main Factorization / Look-ahead</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_pdgesv.html">         HPL_pdgesv</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdgesv0.html">        HPL_pdgesv0</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdgesvK1.html">       HPL_pdgesvK1</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdgesvK2.html">       HPL_pdgesvK2</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Backward Substitution</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_pdtrsv.html">         HPL_pdtrsv</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Matrix generation</STRONG> A C version of the ScaLAPACK random
+matrix generator with less functionality though.
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_setran.html">         HPL_setran</A>
+<LI><A HREF = "HPL_rand.html">           HPL_rand</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_jumpit.html">         HPL_jumpit</A>
+<LI><A HREF = "HPL_xjumpm.html">         HPL_xjumpm</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_ladd.html">           HPL_ladd</A>
+<LI><A HREF = "HPL_lmul.html">           HPL_lmul</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_dmatgen.html">        HPL_dmatgen</A>
+<LI><A HREF = "HPL_pdmatgen.html">       HPL_pdmatgen</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Timers</STRONG> Sequential and parallel timing utilities.
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_timer.html">          HPL_timer</A>
+<LI><A HREF = "HPL_ptimer.html">         HPL_ptimer</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_timer_cputime.html">  HPL_timer_cputime</A>
+<LI><A HREF = "HPL_timer_walltime.html"> HPL_timer_walltime</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_ptimer_cputime.html"> HPL_ptimer_cputime</A>
+<LI><A HREF = "HPL_ptimer_walltime.html">HPL_ptimer_walltime</A>
+</UL></TD></TR></TABLE>
+<HR NOSHADE>
+
+<STRONG>Main Testing / Timing Driver</STRONG>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "HPL_pddriver.html">       HPL_pddriver</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdinfo.html">         HPL_pdinfo</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "HPL_pdtest.html">         HPL_pdtest</A>
+</UL></TD></TR></TABLE>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/errata.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/errata.html
new file mode 100755
index 000000000..24275d2dd
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/errata.html
@@ -0,0 +1,116 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Errata-Bugs</TITLE>
+</HEAD>
+
+<BODY
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Errata - Bugs</H2>
+
+<H3>Issues fixed in Version 2.1, October 26th, 2012</H3>
+
+The output now reports exact time stamps before and after the
+execution of the solver function pdgesv() was run. This could
+allow for accurate accounting of running time for data center
+management   purposes.    For   example  as  reporting  power
+consumption.  This  is  important  for  the Green500 project.<BR><BR>
+
+Fixed an out-of-bounds access to arrays  in the HPL_spreadN()
+and  HPL_spreadT()  functions.  This  may cause  segmentation
+fault signals. It was reported by Stephen Whalen from Cray.<BR><BR>
+
+<H3>Issues fixed in Version 2.0, September 10th, 2008</H3>
+
+Gregory Bauer  found  a  problem  size  corresponding  to the
+periodicity of the pseudo-random matrix generator used in the
+HPL  timing  program. This causes  the  LU  factorization  to
+detect the singularity of the input matrix as it should have.<BR><BR>
+
+A problem size of 2^17 = 131072 causes columns 14 modulo 2^14
+(i.e. 16384)  (starting from 0)  to be bitwise identical on a
+homogeneous platform.  Every problem size  being a power of 2
+and larger than  2^15  will  feature a similar problem if one
+searches far enough in the columns of the square input matrix.<BR><BR>
+
+The pseudo-random  generator  uses  the  linear  congruential
+algorithm:  X(n+1) = (a * X(n) + c) mod m as described in the
+Art of Computer  Programming, Knuth 1973,  Vol. 2. In the HPL
+case, m is set to 2^31.<BR><BR>
+
+It is very important  to realize that this issue is a problem
+of  the  testing  part  of the  HPL software.  The  numerical
+properties  of the  algorithms  used in the factorization and
+the solve should not be questioned because of this.  In fact,
+this is just the opposite: the factorization demonstrated the
+weakness of the testing part of the software by detecting the
+singularity of the input matrix.<BR><BR>
+
+This issue of  the testing program  is not easy to fix.  This
+pseudo-random  generator  has  very useful properties despite
+this.  It is  thus currently recommended to HPL users willing
+to test matrices of size larger than  2^15  to  not use power
+twos.<BR><BR>
+
+This  issue  has  been fixed by  changing  the  pseudo-random
+matrix  generator.   Now the  periodicity of the generator is
+2^64.<BR><BR>
+
+<H3>Issues fixed in Version 1.0b, December 15th, 2004</H3>
+
+When the matrix size is such that one needs  more  than 16 GB
+per  MPI  rank,  the  intermediate  calculation  (mat.ld+1) *
+mat.nq in  HPL_pdtest.c  ends up  overflowing  because  it is
+done using  32-bit arithmetic.   This issue has been fixed by
+typecasting to size_t; Thanks to John Baron.<BR><BR>
+
+<H3>Issues fixed in Version 1.0a, January 20th, 2004</H3>
+
+The  MPI  process  grid numbering scheme defaults now to row-
+major ordering. This option can now be selected at run time.<BR><BR>
+
+The  inlined  assembly  timer  routine  that  was causing the
+compilation to fail when using gcc version  3.3 and above has
+been removed from the package.<BR><BR>
+
+Various building problems on the T3E have been fixed;  Thanks
+to Edward Anderson.<BR><BR>
+
+<H3>Issues fixed in Version 1.0, September 27th, 2000</H3>
+
+Due to a  couple errors  spotted in the  VSIPL  port  of  the
+software,  the  distribution  contained  in  the  tar file of
+September 9th, 2000 had been updated on September 27th,  2000
+with a corrected  distribution.  <STRONG>These  problems were
+not affecting in any way possible the  BLAS  version  of  the
+software.</STRONG>  If you are using  the  VSIPL port of HPL,
+and  want  to  make  sure  you are  indeed  using  the latest
+corrected version, please  check  the  date  contained in the
+file HPL.build.log contained in the main directory.<BR><BR>
+
+
+
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/faqs.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/faqs.html
new file mode 100755
index 000000000..ad853e760
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/faqs.html
@@ -0,0 +1,126 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Frequently Asked Questions</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Frequently Asked Questions</H2>
+
+<UL>
+<LI><A HREF="faqs.html#pbsize">What problem size N should I run ?</A>
+<LI><A HREF="faqs.html#blsize">What block size NB should I use ?</A>
+<LI><A HREF="faqs.html#grid">What process grid ratio P x Q should I use ?</A>
+<LI><A HREF="faqs.html#1node">What about the one processor case ?</A>
+<LI><A HREF="faqs.html#options">Why so many options in HPL.dat ?</A>
+<LI><A HREF="faqs.html#outperf">Can HPL be outperformed ?</A>
+</UL>
+<HR NOSHADE>
+
+<H3><A NAME="pbsize">What problem size N should I run ?</A></H3>
+
+In order  to find out  the  best performance   of  your  system,  the
+largest   problem size  fitting in memory is what you should aim for.
+The  amount  of  memory  used  by  HPL is essentially the size of the 
+coefficient matrix.  So for example, if you have 4 nodes  with 256 Mb
+of memory on each, this corresponds to 1 Gb total, i.e., 125 M double
+precision  (8  bytes)  elements. The  square  root  of that number is
+11585.  One  definitely needs to leave some memory for the OS as well
+as for other things, so a problem size of 10000 is likely to fit.  As
+a rule of thumb, 80 % of the  total amount of memory is a good guess.
+If the problem size you pick is too large,  swapping will occur,  and
+the performance will drop.  If multiple processes  are spawn  on each
+node  (say  you have 2 processors  per  node),  what  counts  is  the
+available amount of memory to each process.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="blsize">What block size NB should I use ?</A></H3>
+
+HPL  uses  the block size NB for the data distribution as well as for
+the  computational  granularity.  From  a data distribution  point of
+view,  the smallest NB,  the better the load balance.  You definitely
+want  to stay away  from very large values of NB.  From a computation
+point of view,  a too small value of NB  may  limit the computational
+performance by a large factor because almost no data reuse will occur
+in the highest level of the memory hierarchy. The  number of messages
+will  also  increase.  Efficient  matrix-multiply  routines are often 
+internally  blocked.  Small  multiples  of  this  blocking factor are 
+likely to be good block sizes for HPL. The bottom line is that "good"
+block sizes are almost always in the [32 .. 256] interval.  The  best
+values depend on the computation / communication performance ratio of
+your system. To a much less extent, the problem size matters as well.
+Say for example,  you emperically found that 44 was a good block size
+with respect to performance.  88 or 132  are likely  to give slightly 
+better results  for large problem sizes because of a slighlty  higher
+flop rate.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="grid">What process grid ratio P x Q should I use ?</A></H3>
+
+This  depends  on  the  physical  interconnection  network  you have.
+Assuming a mesh or a switch HPL "likes" a 1:k ratio with k in [1..3].
+In  other  words,  P  and  Q  should  be approximately equal,  with Q 
+slightly larger than P. Examples: 2 x 2, 2 x 4, 2 x 5,  3 x 4, 4 x 4,
+4 x 6, 5 x 6, 4 x 8 ...  If  you  are  running  on  a simple Ethernet 
+network,  there  is  only one wire through which all the messages are
+exchanged. On  such a network, the performance and scalability of HPL
+is strongly limited  and very flat process grids are likely to be the
+best choices: 1 x 4, 1 x 8, 2 x 4 ...<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="1node">What about the one processor case ?</A></H3>
+
+HPL  has  been  designed  to  perform well for large problem sizes on
+hundreds  of  nodes and more.  The software works on one node and for
+large problem sizes, one  can usually achieve pretty good performance
+on a single processor as well.  For small problem sizes  however, the
+overhead  due  to  message-passing,  local  indexing and so on can be 
+significant.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="options">Why so many options in HPL.dat ?</A></H3>
+
+There are quite a few reasons. First off, these options are useful to
+determine what matters and what does not on your system. Second,  HPL
+is often used in the context  of early evaluation of new systems.  In
+such a case, everything is usually not quite working right, and it is
+convenient  to be able  to vary these parameters without recompiling.
+Finally,  every system has its own peculiarities and one is likely to
+be  willing  to  emperically determine the best set of parameters. In
+any   case,  one  can  always  follow  the  advice  provided  in  the
+<A HREF = "tuning.html">tuning  section</A> of this  document and not
+worry about the complexity of the input file.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="outperf">Can HPL be Outperformed ?</A></H3>
+
+Certainly.   There  is  always  room  for  performance  improvements.
+Specific knowledge about  a  particular system  is always a source of
+performance   gains.  Even  from  a generic  point  of  view,  better
+algorithms  or  more  efficient  formulation  of the classic ones are
+potential winners.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/index.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/index.html
new file mode 100755
index 000000000..a3a53abfe
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/index.html
@@ -0,0 +1,178 @@
+<HTML>
+<HEAD>
+<TITLE>
+HPL - A Portable Implementation of the High-Performance
+Linpack Benchmark for Distributed-Memory Computers
+</TITLE>
+</HEAD>
+ 
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<HR NOSHADE>
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR>
+<TD ALIGN=CENTER>
+<H3>HPL - A Portable Implementation of the High-Performance Linpack
+Benchmark for Distributed-Memory Computers</H3>
+</TD>
+<TD ALIGN=LEFT VALIGN=LEFT>
+<A HREF     = "http://icl.cs.utk.edu">
+<IMG SRC    = 2-273x48.jpg
+ALT         = "ICL - UTK Computer Science Department"
+BORDER      = 0
+HEIGHT      = 48
+WIDTH       = 273></A>
+</TD>
+</TR>
+</TABLE>
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR>
+<TD ALIGN=LEFT>Version 2.2</TD>
+<TD ALIGN=CENTER>
+<A HREF     = "http://www.cs.utk.edu/~petitet">A. Petitet</A>,
+<A HREF     = "http://www.cs.utk.edu/~rwhaley">R. C. Whaley</A>,
+<A HREF     = "http://www.netlib.org/utk/people/JackDongarra">J. Dongarra</A>,
+<A HREF     = "mailto:cleary1@llnl.gov">A. Cleary</A>
+</TD>
+<TD ALIGN=CENTER>December 2, 2018</TD>
+<TD ALIGN=RIGHT>
+<A HREF="http://www.netlib.org/master_counts2.html#benchmark/hpl"># Accesses</A>
+</TD>
+</TR>
+</TABLE>
+<HR NOSHADE><BR> 
+
+<STRONG>HPL</STRONG> is  a software  package  that solves  a (random)
+dense  linear  system  in  double  precision   (64  bits)  arithmetic
+on  distributed-memory  computers.    It  can  thus  be  regarded  as
+a portable as well as  freely available  implementation  of the  High
+Performance Computing Linpack Benchmark.<BR><BR>
+
+The <STRONG>algorithm</STRONG> used  by HPL  can be summarized by the
+following keywords:  Two-dimensional  block-cyclic data  distribution
+- Right-looking variant  of  the  LU  factorization  with row partial
+pivoting  featuring  multiple  look-ahead depths  -  Recursive  panel
+factorization  with  pivot  search  and  column  broadcast combined -
+Various  virtual  panel  broadcast topologies  -  bandwidth  reducing
+swap-broadcast  algorithm -  backward  substitution  with  look-ahead
+of depth 1.<BR><BR>
+
+The  HPL package  provides  a testing and timing program  to quantify
+the  <STRONG>accuracy</STRONG> of  the obtained solution  as  well as
+the time it took to compute it. The best <STRONG>performance</STRONG>
+achievable by this software on your system depends on a large variety
+of factors.  Nonetheless,  with some restrictive assumptions  on  the
+interconnection  network,   the  algorithm  described  here  and  its
+attached implementation  are <STRONG>scalable</STRONG>  in  the sense
+that their parallel efficiency is maintained  constant  with  respect
+to the per processor memory usage.<BR><BR>
+
+The HPL software package <STRONG>requires</STRONG>  the  availibility
+on your system of an implementation of the  Message Passing Interface
+<STRONG>MPI</STRONG> (1.1 compliant).
+An implementation of <STRONG>either</STRONG> the Basic Linear Algebra
+Subprograms   <STRONG>BLAS  or</STRONG>   the   Vector  Signal  Image
+Processing Library <STRONG>VSIPL</STRONG> is also needed.
+Machine-specific as well as generic implementations of
+<A HREF = "links.html#mpi_libs">MPI</A>, the
+<A HREF = "links.html#blas_libs">BLAS</A> and
+<A HREF = "links.html#vsip_libs">VSIPL</A> are available  for a large
+variety of systems.<BR><BR>
+
+<STRONG>Acknowledgements</STRONG>: This work was  supported  in  part
+by  a  grant  from  the  Department  of  Energy's   Lawrence
+Livermore National Laboratory  and  Los  Alamos  National  Laboratory
+as   part  of  the   ASCI  Projects   contract  numbers  B503962  and
+12187-001-00 4R.
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+
+<ADDRESS>
+Innovative Computing Laboratory<BR>
+last revised December 2, 2018<BR>
+</ADDRESS>
+
+<PRE>
+#########################################################################
+
+file    <a href="hpl-2.3.tar.gz">hpl-2.3.tar.gz</a>
+for     HPL 2.3 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: December 2, 2018
+
+#########################################################################
+
+file    <a href="hpl-2.2.tar.gz">hpl-2.2.tar.gz</a>
+for     HPL 2.2 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: February 24, 2016
+
+#########################################################################
+
+file    <a href="hpl-2.1.tar.gz">hpl-2.1.tar.gz</a>
+for     HPL 2.1 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: October 26, 2012
+
+#########################################################################
+
+file    <a href="hpl-2.0.tar.gz">hpl-2.0.tar.gz</a>
+for     HPL 2.0 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary
+Updated: September 10, 2008
+
+#########################################################################
+
+file    <a href="hpl.tgz">hpl.tgz</a>
+for     HPL 1.0a - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary
+Updated: January 20, 2004<BR>
+
+#########################################################################
+
+file    <a href="hpl_qs22-2008-11-30.patch">hpl_qs22-2008-11-30.patch</a>
+for     Implementation of the High-Performance Linpack benchmark for IBM
+,       QS22 systems with PowerXCell 8i processors. The file is a patch
+,       for HPL 1.0a.
+by      IBM
+
+file    <a href="IBM_LICENSE.TXT">IBM_LICENSE.TXT</a>
+for     IBM Copyright notice for QS22 HPL
+by      IBM
+
+file    <a href="IBM_README.txt">IBM_README.txt</a>
+for     README for IBM QS22 HPL
+by      IBM
+Updated: November 30, 2008
+
+
+#########################################################################
+</PRE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/links.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/links.html
new file mode 100755
index 000000000..da2639e99
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/links.html
@@ -0,0 +1,89 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Related Links</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Related Links</H2>
+
+<STRONG>The  list  of links below contains some relevant material to this
+work.  This  list  is provided  for illustrative purposes, and  should be
+regarded  as  an initial starting point  for the interested reader.  This
+list is by all means not meant to be exhaustive.</STRONG><BR><BR>
+
+<H3><A NAME="mpi_libs">Message Passing Interface (MPI)</A></H3>
+
+MPI  is  a  library  specification  for  message-passing,  proposed  as a
+standard  by  a  broadly  based committee  of  vendors, implementors, and
+users.  Machine-specific (optimized)  as  well  as  freely available  MPI
+libraries  are  available  for  a large  variety of systems.  Browse  the
+<A HREF = "http://www.mcs.anl.gov/mpi">Message  Passing  Interface  (MPI)
+standard web page</A> for more information.<BR><BR>
+
+<H3><A NAME="blas_libs">Basic Linear Algebra Subroutines (BLAS)</A></H3>
+
+The  <A HREF = "http://www.netlib.org/blas">BLAS</A>  are   high  quality
+"building  block"   routines  for  performing  basic  vector  and  matrix 
+operations.  A  lot  of  "BLAS-related"  information can be found at this 
+site.  In  particular,  a  reference implementation  is  available.  This
+reference   implementation  is  <STRONG>not  optimized</STRONG>  for  any
+system, and  it is therefore <STRONG>not  recommended</STRONG>  to use it
+for  benchmarking  purposes</STRONG>.
+However, <A HREF = "http://www.netlib.org/blas/faq.html">machine-specific
+optimized  BLAS  libraries</A> are  available  for  a variety of computer
+systems.   For  further  details,    please  contact  your  local  vendor
+representative.  Alternatively,  one  may  also consider  using automatic
+code  generators such as <A HREF="http://www.netlib.org/atlas">ATLAS</A>.
+This  tool  automatically   generates   a  complete   and  optimized BLAS
+library for a large variety of modern systems.<BR><BR>
+
+<H3><A NAME="vsip_libs">Vector Signal Image Processing Library (VSIPL)</A></H3>
+
+<A HREF = "http://www.vsipl.org">VSIPL</A>  is  an API defined by an open
+standard  comprised of  embedded signal and image processing hardware and
+software  vendors,  academia,  users,  and  government  labs.  A  lot  of
+"VSIPL-related"  information can be found at this site.  In particular, a
+reference implementation is available.  Machine-specific  optimized VSIPL
+libraries are available  for a variety of computer systems.  For  further
+details, please contact your local vendor representative.<BR><BR>
+
+<H3>TOP 500 List</H3>
+
+The  <A HREF  = "http://www.netlib.org/benchmark/top500.html">TOP 500</A>
+is  an  ordered list of the 500 most powerful computer systems worldwide.
+Computers   are   ranked  in  this  list  by  their  performance  on  the 
+<A HREF = "http://www.netlib.org/benchmark/top500/lists/linpack.html">
+LINPACK Benchmark</A>.<BR><BR>
+
+<H3>Parallel Dense Linear Algebra Software Libraries</H3>
+
+Browse the <A HREF="http://www.netlib.org">Netlib software repository</A>
+or  the <A HREF="http://www.nhse.org">National HPCC Software Exchange</A>
+to find a large collection of freely available linear algebra libraries.
+<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/main.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/main.jpg
new file mode 100755
index 000000000..df62edd33
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/main.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/mat2.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/mat2.jpg
new file mode 100755
index 000000000..25afdc44c
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/mat2.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/pfact.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/pfact.jpg
new file mode 100755
index 000000000..33a7e55cb
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/pfact.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/references.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/references.html
new file mode 100755
index 000000000..95c6db176
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/references.html
@@ -0,0 +1,276 @@
+<HTML>
+<HEAD>
+<TITLE>HPL References</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL References</H2>
+
+<STRONG>
+The list of references below contains some relevant published material
+to this work.  This list  is  provided  for illustrative purposes, and
+should be regarded  as an initial  starting point  for the  interested
+reader. This list is by all means not meant to be exhaustive.
+</STRONG><BR><BR>
+
+The references have been sorted in four categories and chronologically
+listed within each category. The four categories are
+<UL>
+<LI><A HREF="references.html#Linpack_Benchmark">Linpack Benchmark</A>
+<LI><A HREF="references.html#parallel_LUfact">Parallel  LU Factorization</A>
+<LI><A HREF="references.html#recursiv_LUfact">Recursive LU Factorization</A>
+<LI><A HREF="references.html#parallel_matmul">Parallel Matrix Multiply</A>
+<LI><A HREF="references.html#parallel_trsolv">Parallel Triangular Solve</A>
+</UL>
+<HR NOSHADE>
+
+<H3><A NAME="Linpack_Benchmark">Linpack Benchmark</A></H3>
+
+<UL>
+
+<! - 1979 ----------------------------------------------------------- !>
+<LI><I>LINPACK Users Guide</I>, J. Dongarra, J. Bunch, C. Moler and
+G. W. Stewart, SIAM, Philadelphia, PA, 1979.
+
+<! - 1989 ----------------------------------------------------------- !>
+<LI><I>Performance of Various Computers Using Standard Linear Equations
+Software</I>, J. Dongarra, Technical Report CS-89-85, University of 
+Tennessee, 1989. (An updated version of this report can be found at
+<A HREF="http://www.netlib.org/benchmark/performance.ps">
+http://www.netlib.org/benchmark/performance.ps</A>).
+
+<! - 1991 ----------------------------------------------------------- !>
+<LI><I>Towards Peak Parallel LINPACK Performance on 400</I>,
+R. Bisseling and L. Loyens, Supercomputer, Vol. 45, pp. 20-27, 1991.
+
+<LI><I>Massively Parallel LINPACK Benchmark on the Intel Touchstone 
+DELTA and iPSC/860 Systems</I>, R. van de Geijn, 1991 Annual Users
+Conference Proceedings. Intel Supercomputer Users Group, Dallas, TX,
+1991.
+
+<LI><I>The LINPACK Benchmark on the AP 1000</I>, R. Brent, Frontiers,
+1992, pp. 128-135, McLean, VA, 1992.
+
+<! - 1993 ----------------------------------------------------------- !>
+<LI><I>Implementation of BLAS Level 3 and LINPACK Benchmark on the
+AP1000</I>, R. Brent and P. Strazdins, Fujitsu Scientific and Technical
+Journal, Vol. 5, No. 1, pp. 61-70, 1993.
+
+<! - 1994 ----------------------------------------------------------- !>
+<LI><I>LU Factorization and the LINPACK Benchmark on the Intel
+Paragon</I>, D. Womble, D. Greenberg, D. Wheat and S. Riesen, Sandia
+Technical Report, 1994.
+
+<! - 1995 ----------------------------------------------------------- !>
+<LI><I>Massively Parallel Distributed Computing: Worlds First 281
+Gigaflop Supercomputer</I>, J. Bolen, A. Davis, B. Dazey, S. Gupta,
+G. Henry, D. Robboy, G. Schiffler, D. Scott, M. Stallcup, A. Taraghi,
+S. Wheat from Intel SSD, L. Fisk, G. Istrail, C. Jong, R. Riesen,
+L. Shuler, from Sandia National Laboratories, Proceedings of the Intel
+Supercomputer Users Group 1995.
+
+<! - 1997 ----------------------------------------------------------- !>
+<LI><I>High Performance Software on Intel Pentium Pro Processors or
+Micro-Ops to TeraFLOPS</I>, B. Greer and G. Henry, Proceedings of the
+SuperComputing 1997 Conference, ACM SIGARCH - IEEE Computer Society
+Press - ISBN: 0-89791-985-8, San Jose, CA, 1997.
+
+</UL>
+<! ------------------------------------------------------------------ !>
+<HR NOSHADE>
+
+<H3><A NAME="parallel_LUfact">Parallel LU Factorization</A></H3>
+
+<UL>
+
+<! - 1986 ----------------------------------------------------------- !>
+<LI><I>Communication Complexity of the Gaussian Elimination Algorithm
+on Multiprocessors</I>, Y. Saad, Linear Algebra and Its Applications,
+Vol. 77, pp. 315-340, 1986.
+
+<! - 1988 ----------------------------------------------------------- !>
+<LI><I>LU Factorization Algorithms on Distributed-Memory Multiprocessor
+Architectures</I>, G. Geist and C. Romine, SIAM Journal on Scientific
+and Statistical Computing, Vol. 9, pp. 639-649, 1988.
+ 
+<! - 1989 ----------------------------------------------------------- !>
+<LI><I>Parallel LU Decomposition on a Transputer Network</I>, 
+R. Bisseling and J. van der Vorst, Lecture Notes in Computer Sciences,
+Springer-Verlag, Eds. G. van Zee and J. van der Vorst, Vol. 384,
+pp. 61-77, 1989.
+
+<! - 1990 ----------------------------------------------------------- !>
+<LI><I>The Distributed Solution of Linear Systems Using the Torus-Wrap
+Data Mapping</I>, C. Ashcraft, ECA-TR-147, Boeing Computer Services,
+Seattle, WA, 1990.
+
+<LI><I>Experiments with Multicomputer LU-Decomposition</I>, E. van de
+Velde, Concurrency: Practice and Experience, Vol. 2, pp. 1-26, 1990.
+
+<! - 1991 ----------------------------------------------------------- !>
+<LI><I>A Taxonomy of Distributed Dense LU Factorization Methods</I>,
+C. Ashcraft, ECA-TR-161, Boeing Computer Services, Seattle, WA, 1991.
+
+<! - 1994 ----------------------------------------------------------- !>
+<LI><I>The Torus-Wrap Mapping for Dense Matrix Calculations on Massively
+Parallel Computers</I>, B. Hendrickson and D. Womble, SIAM Journal on
+Scientific and Statistical Computing, Vol. 15, pp. 1201-1226, 1994.
+
+<LI><I>Scalability Issues in the Design of a Library for Dense Linear
+Algebra</I>, J. Dongarra, R. van de Geijn and D. Walker, Journal of
+Parallel and Distributed Computing, Vol. 22, No. 3, pp. 523-537, 1994.
+
+<! - 1995 ----------------------------------------------------------- !>
+<LI><I>Matrix Factorization using Distributed Panels on the Fujitsu
+AP1000</I>, P. Strazdins, Proceedings of the IEEE First International
+Conference on Algorithms And Architectures for Parallel Processing
+ICA3PP-95, Brisbane, 1995.
+
+<! - 1996 ----------------------------------------------------------- !>
+<LI><I>The Design and Implementation of the ScaLAPACK LU, QR, and
+Cholesky Factorization Routines</I>, J. Choi, J. Dongarra, S. Ostrouchov,
+A. Petitet, D. Walker and R. C. Whaley, Scientific Programming, Vol. 5,
+pp. 173-184, 1996.
+
+</UL>
+<! ------------------------------------------------------------------ !>
+<HR NOSHADE>
+
+<H3><A NAME="recursiv_LUfact">Recursive LU Factorization</A></H3>
+
+<UL>
+
+<! - 1997 ----------------------------------------------------------- !>
+<LI><I>Locality of Reference in LU Decomposition with partial
+pivoting</I>, S. Toledo, SIAM Journal on Matrix. Anal. Appl., Vol. 18,
+No. 4, 1997.
+
+<LI><I>Recursion Leads to Automatic Variable Blocking for Dense 
+Linear-Algebra Algorithms</I>, F. Gustavson, IBM Journal of Research
+and Development, Vol. 41, No. 6, pp. 737-755, 1997
+
+</UL>
+<! ------------------------------------------------------------------ !>
+<HR NOSHADE>
+
+<H3><A NAME="parallel_matmul">Parallel Matrix Multiply</A></H3>
+
+<UL>
+
+<! - 1990 ----------------------------------------------------------- !>
+<LI><I>Matrix Algorithms on a Hypercube I: Matrix Multiplication</I>,
+G. Fox, S. Otto and A. Hey, Parallel Computing, Vol. 3, pp. 17-31, 1987.
+
+<! - 1990 ----------------------------------------------------------- !>
+<LI><I>Basic Matrix Subprograms for Distributed-Memory Systems</I>,
+A. Elster, Proceedings of the Fifth Distributed-Memory Computing
+Conference, Eds. D. Walker and Q. Stout, IEEE Press, pp. 311-316, 1990.
+ 
+<! - 1991 ----------------------------------------------------------- !>
+<LI><I>The Parallelization of Level 2 and 3 BLAS Operations on
+Distributed-Memory Machines</I>, M. Aboelaze, N. Chrisochoides
+and E. Houstis, CSD-TR-91-007, Purdue University, West Lafayette,
+IN, 1991.
+
+<! - 1992 ----------------------------------------------------------- !>
+<LI><I>The Multicomputer Toolbox Approach to Concurrent BLAS and LACS</I>,
+R. Falgout, A. Skjellum, S. Smith and C. Still, Proceedings of the
+Scalable High Performance Computing Conference SHPCC-92, IEEE Computer
+Society Press, 1992.
+
+<! - 1994 ----------------------------------------------------------- !>
+<LI><I>A High Performance Matrix Multiplication Algorithm on a
+Distributed-Memory Parallel Computer, Using Overlapped Communication</I>,
+R. Agarwal, F. Gustavson and M. Zubair, IBM Journal or Research and
+Development, Vol. 38, No. 6, pp. 673-681, 1994.
+
+<LI><I>PUMMA: Parallel Universal Matrix Multiplication Algorithms on
+Distributed-Memory Concurrent Computers</I>, J. Choi, J. Dongarra and
+D. Walker, Concurrency: Practice and Experience, Vol. 6, No. 7,
+pp. 543-570, 1994.
+
+<LI><I>Matrix Multiplication on the Intel Touchstone DELTA</I>,
+S. Huss-Lederman, E. Jacobson, A. Tsao and G. Zhang, Concurrency:
+Practice and Experience, Vol. 6, No. 7, pp. 571-594, 1994.
+ 
+<! - 1995 ----------------------------------------------------------- !>
+<LI><I>A Three-Dimensional Approach to Parallel Matrix Multiplication</I>,
+R. Agarwal, S. Balle, F. Gustavson, M. Joshi and P. Palkar, IBM Journal
+or Research and Development, Vol. 39, No. 5, pp. 575-582, 1995.
+
+<! - 1995 ----------------------------------------------------------- !>
+<LI><I>A High Performance Parallel Strassen Implementation</I>,
+B. Grayson and R. van de Geijn, Parallel Processing Letters, Vol. 6,
+No. 1, pp. 3-12, 1996.
+
+<! - 1997 ----------------------------------------------------------- !>
+<LI><I>Parallel Implementation of BLAS: General Techniques for Level
+3 BLAS</I>, A. Chtchelkanova, J. Gunnels, G. Morrow, J. Overfelt and
+R. van de Geijn, Concurrency: Practice and Experience, Vol. 9, No. 9,
+pp. 837-857, 1997.
+
+<LI><I>A Poly-Algorithm for Parallel Dense Matrix Multiplication on
+Two-Dimensional Process Grid Topologies</I>, J. Li, R. Falgout and
+A. Skjellum, Concurrency: Practice and Experience, Vol. 9, No. 5,
+pp. 345-389, 1997.
+
+<LI><I>SUMMA: Scalable Universal Matrix Multiplication Algorithm</I>,
+R. van de Geijn and J. Watts, Concurrency: Practice and Experience,
+Vol. 9, No. 4, pp. 255-274, 1997.
+
+</UL>
+<! ------------------------------------------------------------------ !>
+<HR NOSHADE>
+
+<H3><A NAME="parallel_trsolv">Parallel Triangular Solve</A></H3>
+
+<UL>
+ 
+<! - 1988 ----------------------------------------------------------- !>
+<LI><I>Parallel Solution Triangular Systems on Distributed-Memory
+Multiprocessors</I>, M. Heath and C. Romine, SIAM Journal on Scientific
+and Statistical Computing, Vol. 9, pp. 558-588, 1988.
+
+<LI><I>A Parallel Triangular Solver for a Distributed-Memory
+Multiprocessor</I>, G. Li and T. Coleman, SIAM Journal on Scientific
+and Statistical Computing, Vol. 9, No. 3, pp. 485-502, 1988.
+
+<! - 1989 ----------------------------------------------------------- !>
+<LI><I>A New Method for Solving Triangular Systems on Distributed-Memory
+Message-Passing Multiprocessor</I>, G. Li and T. Coleman, SIAM Journal
+on Scientific and Statistical Computing, Vol. 10, No. 2, pp. 382-396,
+1989.
+
+<! - 1991 ----------------------------------------------------------- !>
+<LI><I>Parallel Triangular System Solving on a Mesh Network of
+Transputers</I>, R. Bisseling and J. van der Vorst, SIAM Journal
+on Scientific and Statistical Computing, Vol. 12, pp. 787-799, 1991.
+
+</UL>
+<! ------------------------------------------------------------------ !>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/results.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/results.html
new file mode 100755
index 000000000..9a7d8b8af
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/results.html
@@ -0,0 +1,243 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Results</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0>
+<TR><TD ALIGN=LEFT VALIGN=LEFT>
+<IMG SRC    = "aprunner.gif" BORDER=0 HEIGHT=160 WIDTH=220>
+</TD>
+<TD ALIGN=LEFT VALIGN=LEFT>
+<H2>HPL Performance Results</H2>
+
+<STRONG>
+The performance achieved by this software package  on a few machine
+configurations is shown below.  These results are only provided for
+illustrative  purposes.  By the time you read this,  those  systems
+have changed,  they may not even exist anymore  and  one can surely
+not exactly reproduce  the state  in which these machines were when
+those measurements have been obtained.  To obtain  accurate figures
+on your system, it is absolutely necessary to
+<A HREF = "software.html">download the software</A> and run it there.
+</STRONG>
+</TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<TABLE HSPACE=0 VSPACE=0 WIDTH=100% BORDER=0 CELLSPACING=1 CELLPADDING=0><TR>
+<TD><UL>
+<LI><A HREF = "results.html#AMD_K7000">Athlon 4-nodes cluster</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "results.html#I550p3000">Intel PIII 8-duals cluster</A>
+</UL></TD><TD><UL>
+<LI><A HREF = "results.html#compaq000">Compaq 64 nodes AlphaServer SC</A>
+</UL></TD>
+</TR></TABLE>
+<HR NOSHADE>
+
+<H3><A NAME="AMD_K7000">4 AMD Athlon K7 500 Mhz (256 Mb) - (2x) 100 Mbs
+Switched - 2 NICs per node (channel bonding)</A></H3>
+
+<CENTER>
+<TABLE BORDER>
+<TR><TD>OS         </TD><TD>Linux 6.2 RedHat (Kernel 2.2.14)       </TD></TR>
+<TR><TD>C compiler </TD><TD>gcc (egcs-2.91.66 egcs-1.1.2 release)  </TD></TR>
+<TR><TD>C flags    </TD><TD>-fomit-frame-pointer -O3 -funroll-loops</TD></TR>
+<TR><TD>MPI        </TD><TD>MPIch 1.2.1                            </TD></TR>
+<TR><TD>BLAS       </TD><TD>ATLAS (Version 3.0 beta)               </TD></TR>
+<TR><TD>Comments   </TD><TD>09 / 00                                </TD></TR>
+</TABLE><P>
+
+<TABLE BORDER>
+<TR>
+<TH ALIGN=CENTER> GRID</TH>
+<TH ALIGN=CENTER> 2000</TH>
+<TH ALIGN=CENTER> 5000</TH>
+<TH ALIGN=CENTER> 8000</TH>
+<TH ALIGN=CENTER>10000</TH>
+</TR>
+<TR>
+<TH ALIGN=CENTER>1 x 4</TH>
+<TD ALIGN=CENTER> 1.28</TD>
+<TD ALIGN=CENTER> 1.73</TD>
+<TD ALIGN=CENTER> 1.89</TD>
+<TD ALIGN=CENTER> 1.95</TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>2 x 2</TH>
+<TD ALIGN=CENTER> 1.17</TD>
+<TD ALIGN=CENTER> 1.68</TD>
+<TD ALIGN=CENTER> 1.88</TD>
+<TD ALIGN=CENTER> 1.93</TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>4 x 1</TH>
+<TD ALIGN=CENTER> 0.81</TD>
+<TD ALIGN=CENTER> 1.43</TD>
+<TD ALIGN=CENTER> 1.70</TD>
+<TD ALIGN=CENTER> 1.80</TD>
+</TR>
+Performance (Gflops) w.r.t Problem size on 4 nodes.
+</TABLE><P>
+</CENTER>
+
+<HR NOSHADE>
+<H3><A NAME="I550p3000">8 Duals Intel PIII 550 Mhz (512 Mb) - Myrinet</A></H3>
+
+<CENTER>
+<TABLE BORDER>
+<TR><TD>OS         </TD><TD>Linux 6.1 RedHat (Kernel 2.2.15)       </TD></TR>
+<TR><TD>C compiler </TD><TD>gcc (egcs-2.91.66 egcs-1.1.2 release)  </TD></TR>
+<TR><TD>C flags    </TD><TD>-fomit-frame-pointer -O3 -funroll-loops</TD></TR>
+<TR><TD>MPI        </TD><TD>MPI GM (Version 1.2.3)                 </TD></TR>
+<TR><TD>BLAS       </TD><TD>ATLAS (Version 3.0 beta)               </TD></TR>
+<TR><TD>Comments   </TD>
+<TD><A HREF="http://icl.cs.utk.edu">UTK / ICL</A> - Torc cluster - 09 / 00</TD>
+</TR>
+</TABLE><P>
+
+<TABLE BORDER>
+<TR>
+<TH ALIGN=CENTER> GRID</TH>
+<TH ALIGN=CENTER> 2000</TH>
+<TH ALIGN=CENTER> 5000</TH>
+<TH ALIGN=CENTER> 8000</TH>
+<TH ALIGN=CENTER>10000</TH>
+<TH ALIGN=CENTER>15000</TH>
+<TH ALIGN=CENTER>20000</TH>
+</TR>
+<TR>
+<TH ALIGN=CENTER>2 x 4</TH>
+<TD ALIGN=CENTER> 1.76</TD>
+<TD ALIGN=CENTER> 2.32</TD>
+<TD ALIGN=CENTER> 2.51</TD>
+<TD ALIGN=CENTER> 2.58</TD>
+<TD ALIGN=CENTER> 2.72</TD>
+<TD ALIGN=CENTER> 2.73</TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>4 x 4</TH>
+<TD ALIGN=CENTER> 2.27</TD>
+<TD ALIGN=CENTER> 3.94</TD>
+<TD ALIGN=CENTER> 4.46</TD>
+<TD ALIGN=CENTER> 4.68</TD>
+<TD ALIGN=CENTER> 5.00</TD>
+<TD ALIGN=CENTER> 5.16</TD>
+</TR>
+Performance (Gflops) w.r.t Problem size on 8- and 16-processors grids.
+</TABLE><P>
+</CENTER>
+
+<HR NOSHADE>
+<H3><A NAME="compaq000">Compaq 64 nodes (4 ev67 667 Mhz processors per node)
+AlphaServer SC</A></H3>
+
+<CENTER>
+<TABLE BORDER>
+<TR><TD>OS         </TD><TD>Tru64 Version 5               </TD></TR>
+<TR><TD>C compiler </TD><TD>cc Version 6.1                </TD></TR>
+<TR><TD>C flags    </TD><TD>-arch host -tune host -std -O5</TD></TR>
+<TR><TD>MPI        </TD><TD>-lmpi -lelan                  </TD></TR>
+<TR><TD>BLAS       </TD><TD>CXML                          </TD></TR>
+<TR><TD>Comments   </TD>
+<TD><A HREF = "http://www.nccs.gov/">ORNL / NCCS</A>
+ - falcon - 09 / 00</TD></TR>
+</TABLE><P>
+</CENTER>
+
+In the table below, each row corresponds to a given number of cpus (or
+processors) and nodes.  The first row for example is denoted by 1 / 1,
+i.e.,  1 cpu / 1 node.  Rmax is given in Gflops, and the value of Nmax
+in fact corresponds to  351 Mb per cpu for all machine configurations.<BR><BR>
+
+<CENTER>
+<TABLE BORDER>
+<TR>
+<TH ALIGN=CENTER>    CPUS / NODES     </TH>
+<TH ALIGN=CENTER>       GRID          </TH>
+<TH ALIGN=CENTER>      N 1/2          </TH>
+<TH ALIGN=CENTER>       Nmax          </TH>
+<TH ALIGN=CENTER>    Rmax (Gflops)    </TH>
+<TH ALIGN=CENTER> Parallel Efficiency </TH>
+</TR>
+<TR>
+<TH ALIGN=CENTER>   1 / 1    </TH>
+<TH ALIGN=CENTER>   1 x 1    </TH>
+<TD ALIGN=CENTER>     150    </TD>
+<TD ALIGN=CENTER>    6625    </TD>
+<TD ALIGN=CENTER>   1.136    </TD>
+<TD ALIGN=CENTER>   1.000    </TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>   4 / 1    </TH>
+<TH ALIGN=CENTER>   2 x 2    </TH>
+<TD ALIGN=CENTER>     800    </TD>
+<TD ALIGN=CENTER>   13250    </TD>
+<TD ALIGN=CENTER>   4.360    </TD>
+<TD ALIGN=CENTER>   0.960    </TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>  16 / 4    </TH>
+<TH ALIGN=CENTER>   4 x 4    </TH>
+<TD ALIGN=CENTER>    2300    </TD>
+<TD ALIGN=CENTER>   26500    </TD>
+<TD ALIGN=CENTER>   17.00    </TD>
+<TD ALIGN=CENTER>   0.935    </TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER>  64 / 16   </TH>
+<TH ALIGN=CENTER>   8 x 8    </TH>
+<TD ALIGN=CENTER>    5700    </TD>
+<TD ALIGN=CENTER>   53000    </TD>
+<TD ALIGN=CENTER>   67.50    </TD>
+<TD ALIGN=CENTER>   0.928    </TD>
+</TR>
+<TR>
+<TH ALIGN=CENTER> 256 / 64   </TH>
+<TH ALIGN=CENTER>  16 x 16   </TH>
+<TD ALIGN=CENTER>   14000    </TD>
+<TD ALIGN=CENTER>  106000    </TD>
+<TD ALIGN=CENTER>   263.6    </TD>
+<TD ALIGN=CENTER>   0.906    </TD>
+</TR>
+</TABLE><P>
+</CENTER> 
+For Rmax shown in the table, the  parallel efficiency  per  cpu has been
+computed using the performance achieved by  HPL on 1 cpu.  That is fair,
+since the CXML matrix multiply routine was achieving at best 1.24 Gflops
+for large matrix operands on one cpu, it would have been difficult for a
+sequential  Linpack  benchmark  implementation to achieve much more than
+1.136 Gflops on this same cpu. For constant load (as in the table 351 Mb
+per cpu for Nmax),  HPL  scales almost linearly as it should.
+
+<BR><BR>
+The authors acknowledge the use  of the Oak Ridge National Laboratory
+Compaq computer, funded by the Department of Energy's Office
+of Science and Energy Efficiency programs.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/roll.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/roll.jpg
new file mode 100755
index 000000000..88d2c56af
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/roll.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/rollM.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/rollM.jpg
new file mode 100755
index 000000000..0d7f076fd
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/rollM.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/scalability.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/scalability.html
new file mode 100755
index 000000000..00bb1a27e
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/scalability.html
@@ -0,0 +1,200 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Scalability Analysis</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Scalability Analysis</H2>
+
+The <A HREF = "scalability.html#model">machine model</A> used for the
+analysis is first described.  This crude model is then used  to first
+estimate  the  parallel running time  of  the various phases  of  the 
+algorithm namely
+<UL>
+<LI><A HREF="scalability.html#pfact">panel factorization and broadcast</A>,
+<LI><A HREF="scalability.html#updat">trailing submatrix update</A>, 
+<LI><A HREF="scalability.html#backs">backward substitution</A>. 
+</UL>
+Finally <A HREF="scalability.html#total">the  parallel efficiency</A>
+of the entire algorithm is estimated according to this machine model.
+We show that for a given set of parameters HPL is <STRONG>scalable</STRONG>
+not  only  with respect to the amount of computation,  but  also with
+respect to the communication volume.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME = "model">The Machine Model</A></H3>
+
+Distributed-memory computers consist of processors that are connected
+using  a message passing interconnection network.  Each processor has
+its own memory called the local memory,  which  is accessible only to
+that processor.  As the time to access a remote memory is longer than
+the time to access a local one,  such computers are often referred to
+as Non-Uniform Memory Access (NUMA) machines.<BR><BR>
+
+The interconnection network  of our machine model is static,  meaning
+that   it   consists  of  point-to-point  communication  links  among
+processors.  This  type  of  network  is also referred to as a direct
+network as opposed to dynamic networks.  The  latter  are constructed 
+from switches and communication links.  These links  are  dynamically
+connected  to one another by the switching elements to establish,  at
+run time, the paths between processors memories.<BR><BR>
+ 
+The  interconnection  network  of the two-dimensional  machine  model
+considered here is a static,  fully  connected physical topology.  It
+is also assumed  that  processors  can be treated  equally  in  terms
+of  local performance  and  that  the  communication rate between two
+processors depends on the processors considered.<BR><BR>
+
+Our model assumes  that  a processor can send or receive data on only
+one of its communication ports at a time  (assuming  it has more than
+one). In the literature,  this  assumption is also referred to as the
+one-port communication model.<BR><BR>
+ 
+The time spent to communicate  a message between two given processors
+is called the communication time Tc.   In  our machine model,  Tc  is
+approximated  by  a  linear  function  of  the  number  L  of  double
+precision (64-bits) items communicated.  Tc is the sum of the time to
+prepare the message for transmission (alpha) and the time  (beta * L)
+taken  by the message of length  L  to traverse  the network  to  its 
+destination, i.e.,<BR><BR>
+<CENTER>
+Tc = alpha + beta L.<BR><BR>
+</CENTER>
+
+Finally,   the   model  assumes  that  the  communication  links  are
+bi-directional,  that is,  the time  for two processors  to send each 
+other a message of length L is also Tc.  A processor  can send and/or
+receive  a message on only one of  its communication links at a time.
+In particular, a processor can send a message while receiving another
+message from the processor it is sending to at the same time.<BR><BR>
+ 
+Since this document is only concerned with regular local dense linear
+algebra  operations,  the time taken to perform  one  floating  point 
+operation  is  assumed  to  be  summarized by  three constants  gam1, 
+gam2 and gam3. These quantitites are flop rates approximations of the
+vector-vector,  matrix-vector  and matrix-matrix operations for  each
+processor.  This  very  crude approximation summarizes all  the steps
+performed  by a processor  to achieve such a computation.  Obviously,
+such a model neglects all the phenomena  occurring  in  the processor
+components,  such as cache misses, pipeline startups, memory load  or
+store, floating point arithmetic and so on,  that  may  influence the
+value  of  these  constants  as  a function  of the  problem size for
+example.<BR><BR>
+ 
+Similarly,  the model  does  not make any assumption on the amount of
+physical memory per node.  It  is  assumed that if a process has been
+spawn  on  a processor,  one  has  ensured  that  enough  memory  was 
+available  on that processor. In other words, swapping will not occur
+during the modeled computation.<BR><BR>
+ 
+<STRONG>
+This  machine  model  is  a very crude approximation that is designed
+specifically  to  illustrate  the cost of the dominant factors of our
+particular case.<BR><BR>
+</STRONG>
+<HR NOSHADE>
+
+<H3><A NAME="pfact">Panel Factorization and Broadcast</A></H3>
+
+Let  consider  an  M-by-N  panel distributed over a P-process column.
+Because  of the recursive formulation of the panel factorization,  it
+is  reasonable to consider  that  the floating point operations  will
+be performed at matrix-matrix multiply "speed".  For  every column in
+the panel a binary-exchange is performed on 2*N data items. When this
+panel is broadcast,  what  matters  is the time that the next process
+column  will  spend  in this  communication operation.  Assuming  one
+chooses the <A HREF="algorithm.html#bcast">increasing-ring (modified)
+variant</A>,  only  one  message needs to be taken into account.  The
+execution  time  of the panel factorization and broadcast can thus be
+approximated by:<BR><BR>
+<CENTER>
+Tpfact( M, N ) = (M/P - N/3) N^2 gam3 + N log(P)( alpha + beta 2 N ) +
+alpha + beta M N / P.<BR><BR>
+</CENTER>
+<HR NOSHADE>
+
+<H3><A NAME="updat">Trailing Submatrix Update</A></H3>
+
+Let  consider  the  update  phase  of an  N-by-N  trailing  submatrix
+distributed on a P-by-Q process grid.  From  a computational point of
+view one has to (triangular) solve N right-hand-sides  and  perform a 
+local rank-NB update of this trailing submatrix. Assuming one chooses
+the <A HREF="algorithm.html#update">long variant</A>,  the  execution
+time of the update operation can be approximated by:<BR><BR>
+<CENTER>
+Tupdate( N, NB ) = gam3 ( N NB^2 / Q + 2 N^2 NB / ( P Q ) ) +
+alpha ( log( P ) + P - 1 ) + 3 beta N NB / Q.<BR><BR>
+</CENTER>
+The constant "3" in front of the "beta" term is obtained  by counting
+one for the (logarithmic) spread phase and two for the rolling phase;
+In the case of bi-directional links  this constant 3 should therefore
+be only a 2.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="backs">Backward Substitution</A></H3>
+
+The number of floating point operations performed during the backward
+substitution in given by  N^2 / (P*Q).  Because of the lookahead, the
+communication cost  can be approximated at each step  by two messages
+of length NB, i.e.,  the time  to  communicate  the NB-piece  of  the 
+solution vector from one diagonal block of the matrix to another.  It
+follows that the execution time of the backward substitution  can  be
+approximated by:<BR><BR>
+<CENTER>
+Tbacks( N, NB ) = gam2 N^2  / (P Q) + N ( alpha / NB + 2 beta ).<BR><BR>
+</CENTER>
+<HR NOSHADE>
+
+<H3><A NAME="total">Putting it All Together</A></H3>
+
+The total execution time of the algorithm described above is given by<BR><BR>
+<CENTER>
+Sum(k=0,N,NB)[Tpfact( N-k, NB ) + Tupdate( N-k-NB, NB )] +
+Tbacks( N, NB ).<BR><BR>
+</CENTER>
+That is, by only considering only the dominant term in alpha, beta and
+gam3:<BR><BR>
+<CENTER>
+Thpl = 2 gam3 N^3  / ( 3 P Q ) + beta N^2 (3 P + Q) / ( 2 P Q ) +
+alpha N ((NB + 1) log(P) + P) / NB.<BR><BR>
+</CENTER>
+The serial execution time is given by Tser = 2 gam3 N^3  / 3. If we
+define the parallel efficiency  E  as the ratio  Tser / ( P Q Thpl ), we
+obtain:<BR><BR>
+<CENTER>
+E = 1 / ( 1 + 3 beta (3 P + Q) / ( 4 gam3 N ) +
+3 alpha P Q ((NB + 1) log(P) + P) / (2 N^2 NB gam3) ).<BR><BR>
+</CENTER>
+This  last equality  shows  that when the memory usage per  processor
+N^2 / (P Q)  is maintained  constant, the parallel efficiency  slowly
+decreases  only  because of the alpha term.  The communication volume
+(the beta term) however remains constant.  Due to these results,  HPL
+is said to be <STRONG>scalable</STRONG> not only with respect  to the
+amount of computation,  but also  with  respect  to the communication
+volume.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/software.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/software.html
new file mode 100755
index 000000000..34d82b2b7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/software.html
@@ -0,0 +1,109 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Software</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Software</H2>
+
+<H3>Download and Installation</H3>
+
+<OL>
+<LI>Download    the  <A HREF="hpl-2.3.tar.gz">tar-gzipped  file</A>,
+issue  then "gunzip hpl-2.3.tar.gz; tar -xvf hpl-2.3.tar"  and  this
+should create an  hpl-2.3  directory  containing  the  distribution.
+We call this directory the top level directory.
+
+<LI>Create a  file  Make.&#60arch&#62  in  the  top-level directory.
+For  this purpose,  you  may  want  to  re-use  one contained in the 
+setup  directory.  This Make.&#60arch&#62 file  essentially contains
+the compilers, libraries, and their paths to be used on your system. 
+
+<LI>Type  "make arch=&#60arch&#62". This should create an executable
+in the bin/&#60arch&#62 directory called xhpl.  For example,  on our
+Linux  PII  cluster,  I create  a file called Make.Linux_PII in  the
+top-level  directory.  Then,  I  type  "make  arch=Linux_PII".  This
+creates  the executable file bin/Linux_PII/xhpl. 
+
+<LI>Quick check:  run  a few  tests  (assuming  you have 4 nodes for
+interactive use)  by  issuing  the  following  commands from the top
+level  directory:  "cd bin/&#60arch&#62 ;  mpirun -np 4 xhpl".  This
+should produce quite a bit of meaningful output on the screen.
+ 
+<LI>Most  of  the  performance parameters can be tuned, by modifying
+the input file bin/&#60arch&#62/HPL.dat. See the
+<A HREF = "tuning.html">tuning page</A>  or  the  TUNING file in the
+top-level directory.
+</OL>
+<HR NOSHADE>
+
+<H3>Compile Time Options</H3>
+
+At  the  end  of  the "model" Make.&#60arch&#62,  the  user is given
+the  opportunity  to override  some default  compile options of this
+software. The list of these options and their meaning is:<BR><BR>
+
+<CENTER>
+<TABLE WIDTH=80% BORDER>
+<TR><TD ALIGN=LEFT><STRONG>-DHPL_COPY_L</STRONG></TD>
+<TD ALIGN=LEFT>force the copy of the panel L before bcast</TD></TR>
+<TR><TD ALIGN=LEFT><STRONG>-DHPL_CALL_CBLAS</STRONG></TD>
+<TD ALIGN=LEFT>call the BLAS C interface</TD></TR>
+<TR><TD ALIGN=LEFT><STRONG>-DHPL_CALL_VSIPL</STRONG></TD>
+<TD ALIGN=LEFT>call the vsip library</TD></TR>
+<TR><TD ALIGN=LEFT><STRONG>-DHPL_DETAILED_TIMING</STRONG></TD>
+<TD ALIGN=LEFT>enable detailed timers</TD></TR>
+</TABLE><P>
+</CENTER>
+
+The user must choose between either the  BLAS  Fortran 77 interface,
+or the BLAS C interface, or the  VSIPL  library depending  on  which
+computational kernels are available on his system. Only one of these
+options should be selected.  If  you  choose  the  BLAS  Fortran  77
+interface,  it is necessary  to fill out  the machine-specific  C to
+Fortran 77 interface section of the  Make.&#60arch&#62  file.  To do
+this,  please  refer  to the Make.&#60arch&#62 examples contained in
+the setup directory.<BR><BR>
+
+By default HPL will:
+<UL>
+<LI>not copy L before broadcast,
+<LI>call the BLAS Fortran 77 interface,
+<LI>not display detailed timing information.
+</UL>
+
+As an example,  suppose one wants this software to copy the panel of
+columns  into  a contiguous buffer  before broadcasting.  It  should
+be  more efficient  to let  the software create the appropriate  MPI
+user-defined data type  since this may avoid the data copy.  So,  it
+is a strange idea,  but one insists.  To achieve this  one would add
+-DHPL_COPY_L  to  the definition of  HPL_OPTS at the end of the file
+Make.&#60arch&#62.  Issue   then  a  "make clean arch=&#60arch&#62 ; 
+make build arch=&#60arch&#62"  and  the executable  will be re-build
+with that feature in.<BR><BR>
+
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/spread.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/spread.jpg
new file mode 100755
index 000000000..56c255a3f
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/spread.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/spreadM.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/spreadM.jpg
new file mode 100755
index 000000000..433e4c077
Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/spreadM.jpg differ
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/tuning.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/tuning.html
new file mode 100755
index 000000000..fbbf17fb7
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/tuning.html
@@ -0,0 +1,476 @@
+<HTML>
+<HEAD>
+<TITLE>HPL Tuning</TITLE>
+</HEAD>
+
+<BODY 
+BGCOLOR     = "WHITE"
+BACKGROUND  = "WHITE"
+TEXT        = "#000000"
+VLINK       = "#000099"
+ALINK       = "#947153"
+LINK        = "#0000ff">
+
+<H2>HPL Tuning</H2>
+
+After  having built the executable hpl/bin/&#60arch&#62/xhpl,
+one may want to modify the input data file HPL.dat. This file
+should  reside  in  the  same  directory  as  the  executable
+hpl/bin/&#60arch&#62/xhpl.   An example   HPL.dat   file   is 
+provided by default. This file contains information about the
+problem sizes, machine configuration,  and algorithm features
+to be used by the executable.  It is  31  lines long. All the
+selected  parameters  will be printed in the output generated
+by the executable.<BR><BR>
+
+We first describe the meaning of each line of this input file
+below.  Finally,  <A HREF="tuning.html#tips">a   few   useful 
+experimental guide lines</A>  to set up the file are given at
+the end of this page.<BR><BR>
+<HR NOSHADE>
+
+<H3><A NAME="desc">Description of the HPL.dat File</A></H3>
+
+<STRONG>Line 1</STRONG>:  (unused) Typically  one  would  use
+this line for its own good.  For example,  it  could  be used
+to summarize the content of the input file.  By  default this 
+line reads:
+<TT><PRE>
+HPL Linpack benchmark input file
+</PRE></TT>
+ 
+<HR NOSHADE>
+<STRONG>Line 2</STRONG>:  (unused) same as line 1. By default
+this line reads:
+<TT><PRE>
+Innovative Computing Laboratory, University of Tennessee
+</PRE></TT>
+ 
+<HR NOSHADE>
+<STRONG>Line 3</STRONG>:  the  user  can   choose  where  the
+output  should  be  redirected to.  In the case of a file,  a
+name  is necessary, and this is  the line  where one wants to 
+specify it.  Only the first name on this line is significant.
+By default, the line reads:
+<TT><PRE>
+HPL.out  output file name (if any)
+</PRE></TT>
+ 
+This  means  that if  one chooses to redirect the output to a
+file, the file will be called "HPL.out". The rest of the line
+is unused,  and this space to put some informative comment on
+the meaning of this line.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 4</STRONG>: This line specifies where the output
+should go.  The  line  is  formatted,  it  must  begin with a 
+positive integer,  the rest is unsignificant. 3  choices  are
+possible  for  the  positive integer, 6 means that the output
+will go the standard output,  7  means  that the  output will
+go to the standard error.  Any  other integer means that  the
+output should be redirected to a file,  which  name has  been
+specified  in the line above. This line by default reads:
+<TT><PRE>
+6        device out (6=stdout,7=stderr,file)
+</PRE></TT>
+which  means  that  the  output generated  by  the executable
+should be redirected to the standard output.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 5</STRONG>: This  line  specifies  the number of
+problem sizes to be executed. This number should be less than
+or equal to 20.  The first  integer is significant,  the rest
+is ignored. If the line reads:
+<TT><PRE>
+3        # of problems sizes (N)
+</PRE></TT>
+this  means  that  the user is willing to run 3 problem sizes
+that will be specified in the next line.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 6</STRONG>: This line specifies the problem sizes
+one wants to run.  Assuming  the  line  above  started with 3,
+the  3  first positive  integers  are significant, the rest is
+ignored. For example:
+<TT><PRE>
+3000 6000 10000    Ns
+</PRE></TT>
+means that one wants xhpl to run 3 (specified in line 5)
+problem sizes, namely 3000, 6000 and 10000.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 7</STRONG>: This line  specifies  the number  of
+block sizes to be runned. This number should be less than  or
+equal to 20.  The first integer  is significant,  the rest is
+ignored. If the line reads:
+<TT><PRE>
+5        # of NBs
+</PRE></TT>
+this means that the user is willing to use 5 block sizes that
+will be specified in the next line.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 8</STRONG>:  This line specifies the block sizes
+one  wants  to run.  Assuming  the  line above started with 5,
+the  5  first positive integers  are  significant, the rest is 
+ignored. For example:
+<TT><PRE>
+80 100 120 140 160 NBs
+</PRE></TT>
+means  that  one  wants  xhpl  to use 5 (specified in line 7)
+block sizes, namely 80, 100, 120, 140 and 160.<BR><BR>
+
+<HR NOSHADE>
+<STRONG>Line 9</STRONG>:  This  line specifies  how  the  MPI
+processes  should be mapped  onto the nodes of your platform.
+There are currently two possible mappings,  namely  row-  and
+column-major. This feature is mainly useful  when these nodes
+are themselves multi-processor computers. A row-major mapping
+is recommended.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 10</STRONG>: This line specifies  the  number of
+process grid to be runned.  This  number  should be less than
+or equal to 20. The first integer is significant, the rest is
+ignored. If the line reads:
+<TT><PRE>
+2        # of process grids (P x Q)
+</PRE></TT>
+this  means  that you are willing to try 2 process grid sizes 
+that will be specified in the next line.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Line 11-12</STRONG>:  These  two  lines  specify  the  
+number of process rows  and  columns of each grid you want to
+run on.  Assuming the line above (10)  started with 2,  the 2
+first  positive integers of those two lines  are significant,
+the rest  is ignored. For example:
+<TT><PRE>
+1 2          Ps
+6 8          Qs
+</PRE></TT>
+means that one wants to run  xhpl  on  2  process grids (line
+10), namely 1-by-6 and 2-by-8. Note: In  this example,  it is
+required then  to  start  xhpl  on  at  least  16  nodes (max
+of Pi-by-Qi).  The runs on the two grids will be consecutive.
+If one was starting xhpl on more than 16 nodes, say 52,  only
+6 would be used for the first grid (1x6)  and  then 16  (2x8)
+would  be used for the second grid. The fact that you started
+the MPI job on 52 nodes, will not make  HPL  use all of them.
+In this example,  only 16 would be used.  If one wants to run 
+xhpl  with  52  processes  one needs  to specify a grid of 52
+processes, for example the following lines would do the job:
+<TT><PRE>
+4  2         Ps
+13 8         Qs
+</PRE></TT>
+ 
+<HR NOSHADE>
+<STRONG>Line 13</STRONG>: This line specifies  the  threshold
+to which the residuals should be compared with. The residuals
+should be or order 1, but are  in practice slightly less than
+this, typically 0.001.  This  line  is made of a real number,
+the rest is not significant. For example:
+<TT><PRE>
+16.0         threshold
+</PRE></TT>
+In practice,  a value of  16.0  will  cover  most cases.  For
+various reasons,  it  is possible  that some of the residuals
+become slightly larger, say for example 35.6.  xhpl will flag
+those runs  as  failed,  however  they  can be  considered as
+correct. A run should be considered as failed if the residual
+is a few order of magnitude bigger than 1 for example 10^6 or
+more. Note:  if one was  to specify  a threshold of  0.0, all
+tests  would be flagged  as failed, even though the answer is
+likely  to  be  correct.  It is allowed to specify a negative 
+value for this threshold,  in which case  the checks  will be 
+by-passed,  no matter what the threshold value is, as soon as
+it  is  negative.  This  feature  allows  to  save  time when 
+performing a lot of experiments,  say for instance during the
+tuning phase. Example:
+<TT><PRE>
+-16.0        threshold
+</PRE></TT>
+ 
+<HR NOSHADE>
+The remaning lines  allow  to specifies algorithmic features.
+xhpl  will  run  all  possible combinations of those for each
+problem  size,  block size, process grid combination. This is
+handy  when one looks for an "optimal" set of parameters.  To
+understand  a little bit better,  let  say  first a few words
+about  the algorithm implemented in HPL. Basically this is  a
+right-looking  version  with  row-partial pivoting. The panel
+factorization is matrix-matrix operation based and recursive,
+dividing the panel into  NDIV  subpanels  at each step.  This
+part  of  the   panel   factorization  is  denoted  below  by
+"recursive  panel  fact.  (RFACT)".  The recursion stops when
+the  current panel  is made of less  than or equal  to  NBMIN
+columns. At that point, xhpl uses a  matrix-vector  operation
+based  factorization  denoted   below  by  "PFACTs".  Classic
+recursion  would  then  use  NDIV=2,   NBMIN=1.   There   are
+essentially   3   numerically  equivalent  LU   factorization 
+algorithm  variants  (left-looking, Crout and right-looking).
+In HPL, one can choose  every one of those for the  RFACT, as
+well as the PFACT.  The following lines of HPL.dat allows you
+to set those parameters.<BR><BR>
+<STRONG>Lines 14-21: (Example 1)</STRONG>
+<TT><PRE>
+3       # of panel fact
+0 1 2   PFACTs (0=left, 1=Crout, 2=Right)
+4       # of recursive stopping criterium
+1 2 4 8 NBMINs (>= 1)
+3       # of panels in recursion
+2 3 4   NDIVs
+3       # of recursive panel fact.
+0 1 2   RFACTs (0=left, 1=Crout, 2=Right)
+</PRE></TT>
+ 
+This  example  would  try all variants of PFACT, 4 values for
+NBMIN,  namely 1, 2, 4 and 8,  3 values for NDIV namely 2,  3 
+and 4, and all variants for RFACT.<BR><BR>
+<STRONG>Lines 14-21: (Example 2)</STRONG>
+<TT><PRE>
+2       # of panel fact
+2 0     PFACTs (0=left, 1=Crout, 2=Right)
+2       # of recursive stopping criterium
+4 8     NBMINs (>= 1)
+1       # of panels in recursion
+2       NDIVs
+1       # of recursive panel fact.
+2       RFACTs (0=left, 1=Crout, 2=Right)
+</PRE></TT>
+This example  would  try  2  variants  of  PFACT namely right
+looking and left looking, 2 values for NBMIN, namely 4 and 8,
+1 value for NDIV namely 2, and one variant for RFACT.<BR><BR>
+ 
+<HR NOSHADE>
+In the  main loop  of the algorithm,  the  current  panel  of
+column  is broadcast  in process rows  using  a virtual  ring
+topology. HPL offers various choices and one most likely want
+to use the increasing ring modified encoded as 1. 3 and 4 are
+also good choices.<BR><BR>
+<STRONG>Lines 22-23: (Example 1)</STRONG>
+<TT><PRE>
+1       # of broadcast
+1       BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+</PRE></TT>
+This will cause HPL  to broadcast the current panel using the
+increasing ring modified topology.<BR><BR>
+<STRONG>Lines 22-23: (Example 2)</STRONG>
+<TT><PRE>
+2       # of broadcast
+0 4     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+</PRE></TT>
+This will cause  HPL to broadcast the current panel using the
+increasing   ring  virtual  topology  and  the  long  message
+algorithm.<BR><BR>
+ 
+<HR NOSHADE>
+<STRONG>Lines 24-25</STRONG> allow to specify  the look-ahead
+depth used by HPL.  A depth of 0  means  that  the next panel
+is  factorized  after  the  update  by  the  current panel is
+completely finished.   A  depth of  1  means  that  the  next
+panel  is  immediately  factorized  after being updated.  The 
+update  by  the  current panel is then finished. A depth of k
+means that the k next panels are factorized immediately after
+being updated.  The  update  by  the  current  panel  is then 
+finished.  It  turns out that a depth of 1  seems to give the
+best results,  but  may need a large problem size  before one
+can  see  the performance  gain. So use 1, if you do not know
+better,  otherwise  you  may want  to  try 0.  Look-ahead  of
+depths 3  and  larger  will  probably  not  give  you  better
+results.<BR><BR>
+<STRONG>Lines 24-25: (Example 1):</STRONG>
+<TT><PRE>
+1       # of lookahead depth
+1       DEPTHs (>=0)
+</PRE></TT>
+This will cause HPL to use a look-ahead of depth 1.<BR><BR>
+<STRONG>Lines 24-25: (Example 2):</STRONG>
+<TT><PRE>
+2       # of lookahead depth
+0 1     DEPTHs (>=0)
+</PRE></TT>
+This will cause HPL to use a look-ahead of depths 0 and 1.<BR><BR>
+
+<HR NOSHADE>
+<STRONG>Lines 26-27</STRONG>  allow  to  specify  the  swapping
+algorithm  used  by  HPL for  all tests.  There  are  currently
+two  swapping  algorithms   available,  one  based  on  "binary
+exchange"  and  the   other  one   based  on   a  "spread-roll"
+procedure  (also  called   "long"  below).  For  large  problem
+sizes, this last one is likely to be more efficient.  The  user
+can also choose to mix both variants, that is "binary-exchange"
+for a number of columns less  than a threshold value,  and then
+the  "spread-roll" algorithm.  This  threshold  value  is  then 
+specified on Line 27.<BR><BR>
+<STRONG>Lines 26-27: (Example 1):</STRONG>
+<TT><PRE>
+1       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold
+</PRE></TT>
+This  will  cause  HPL  to  use  the "long" or  "spread-roll" 
+swapping algorithm.  Note  that a threshold  is specified  in
+that example but not used by HPL.<BR><BR>
+<STRONG>Lines 26-27: (Example 2):</STRONG>
+<TT><PRE>
+2       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold
+</PRE></TT>
+This  will  cause  HPL  to  use  the "long" or  "spread-roll" 
+swapping algorithm  as  soon as there is more than 60 columns
+in the row panel. Otherwise, the "binary-exchange"  algorithm
+will be used instead.<BR><BR>
+
+<HR NOSHADE>
+<STRONG>Line 28</STRONG>  allows  to specify whether the upper
+triangle  of  the  panel  of  columns  should   be  stored  in
+no-transposed  or transposed form. Example:
+<TT><PRE>
+0            L1 in (0=transposed,1=no-transposed) form
+</PRE></TT>
+
+<HR NOSHADE>
+<STRONG>Line 29</STRONG> allows  to specify whether the panel 
+of rows  U  should be stored in  no-transposed  or transposed 
+form. Example:
+<TT><PRE>
+0            U  in (0=transposed,1=no-transposed) form
+</PRE></TT>
+
+<HR NOSHADE>
+<STRONG>Line 30</STRONG> enables / disables the equilibration 
+phase. This option  will not be used unless you selected 1 or
+2 in Line 26. Example:
+<TT><PRE>
+1            Equilibration (0=no,1=yes)
+</PRE></TT>
+
+<HR NOSHADE>
+<STRONG>Line 31</STRONG> allows  to  specify the alignment in
+memory for the memory  space  allocated  by  HPL.  On  modern
+machines, one probably wants to use  4,  8  or 16.  This  may 
+result in a tiny amount of memory wasted. Example:
+<TT><PRE>
+8       memory alignment in double (> 0)
+</PRE></TT>
+
+<HR NOSHADE>
+<H3><A NAME="tips">Guide Lines</A></H3>
+
+<OL>
+<LI>Figure  out  a  good block size  for  the matrix multiply
+routine.  The best method  is to try a few out. If you happen
+to know  the block size  used  by the matrix-matrix  multiply
+routine,  a  small  multiple of that block size will do fine.
+This particular topic is discussed in the
+<A HREF="faqs.html#blsize">FAQs</A> section.<BR><BR>
+
+<LI>The process mapping  should  not matter  if  the nodes of
+your platform are single processor computers.  If these nodes
+are multi-processors, a row-major mapping is recommended.<BR><BR>
+
+<LI>HPL likes "square" or slightly flat process grids. Unless
+you  are using  a very small process grid, stay away from the 
+1-by-Q and P-by-1 process grids. This particular topic is also
+discussed in the <A HREF="faqs.html#grid">FAQs</A> section.<BR><BR>
+
+<LI>Panel factorization  parameters:  a  good  start  are the
+following for the lines 14-21:
+<TT><PRE>
+1       # of panel fact
+1       PFACTs (0=left, 1=Crout, 2=Right)
+2       # of recursive stopping criterium
+4 8     NBMINs (>= 1)
+1       # of panels in recursion
+2       NDIVs
+1       # of recursive panel fact.
+2       RFACTs (0=left, 1=Crout, 2=Right)
+</PRE></TT>
+
+<LI>Broadcast parameters: at this time it is far from obvious
+to me what the best setting is,  so i would probably try them
+all.  If  I  had  to guess  I would probably  start  with the 
+following for the lines 22-23:
+<TT><PRE>
+2       # of broadcast
+1 3     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+</PRE></TT>
+The best broadcast  depends  on your problem size and harware
+performance. My take is that 4 or 5  may be  competitive  for
+machines  featuring  very  fast nodes  comparatively  to  the 
+network.<BR><BR>
+
+<LI>Look-ahead depth: as mentioned above 0 or 1 are likely to 
+be the best choices.  This also  depends  on the problem size
+and machine configuration, so I would try "no look-ahead (0)"
+and "look-ahead of depth 1 (1)". That is for lines 24-25:
+<TT><PRE>
+2       # of lookahead depth
+0 1     DEPTHs (>=0)
+</PRE></TT>
+
+<LI>Swapping: one  can select only one of the three algorithm 
+in the input file. Theoretically, mix (2) should win, however
+long (1) might just be good enough. The  difference should be
+small between those two assuming  a swapping threshold of the 
+order of the block size (NB) selected. If  this  threshold is
+very large, HPL will use bin_exch (0) most of the time and if
+it  is  very  small  (< NB) long (1)  will always be used. In 
+short  and  assuming  the  block size (NB)  used is say 60, I 
+would choose for the lines 26-27:
+<TT><PRE>
+2       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold 
+</PRE></TT>
+I would also try the long variant.  For  a very  small number 
+of processes  in every column of the process grid  (say < 4),
+very little performance difference should be observable.<BR><BR>
+
+<LI>Local storage: I do not think Line 28 matters.  Pick 0 in
+doubt. Line 29 is more important.  It controls  how the panel
+of rows should be stored. No doubt 0 is better. The caveat is
+that in that case the matrix-multiply function is called with
+( Notrans, Trans, ... ), that is C := C - A B^T.   Unless the 
+computational  kernel  you are using  has  a very poor  (with
+respect to performance) implementation of that case,  and  is
+much more efficient with  ( Notrans, Notrans, ... ) just pick
+0 as well.  So, my choice:
+<TT><PRE>
+0       L1 in (0=transposed,1=no-transposed) form
+0       U  in (0=transposed,1=no-transposed) form
+</PRE></TT>
+
+<LI>Equilibration: It  is hard to tell  whether equilibration
+should always be performed or not. Not knowing much about the
+random matrix generated  and because the overhead is so small
+compared to the possible gain, I turn it on all the time.
+<TT><PRE>
+1       Equilibration (0=no,1=yes)
+</PRE></TT>
+
+<LI>For alignment, 4 should be plenty,  but just to be safe,
+one may want to pick 8 instead.
+<TT><PRE>
+8       memory alignment in double (> 0)
+</PRE></TT>
+</OL>
+ 
+<HR NOSHADE>
+<CENTER>
+<A HREF = "index.html">            [Home]</A>
+<A HREF = "copyright.html">        [Copyright and Licensing Terms]</A>
+<A HREF = "algorithm.html">        [Algorithm]</A>
+<A HREF = "scalability.html">      [Scalability]</A>
+<A HREF = "results.html">          [Performance Results]</A>
+<A HREF = "documentation.html">    [Documentation]</A>
+<A HREF = "software.html">         [Software]</A>
+<A HREF = "faqs.html">             [FAQs]</A>
+<A HREF = "tuning.html">           [Tuning]</A>
+<A HREF = "errata.html">           [Errata-Bugs]</A>
+<A HREF = "references.html">       [References]</A>
+<A HREF = "links.html">            [Related Links]</A><BR>
+</CENTER>
+<HR NOSHADE>
+</BODY>
+</HTML>
diff --git a/third-party-programs/Velocity-Bench/hplinpack/hplinkpack_migration.md b/third-party-programs/Velocity-Bench/hplinpack/hplinkpack_migration.md
new file mode 100644
index 000000000..dbf4629dc
--- /dev/null
+++ b/third-party-programs/Velocity-Bench/hplinpack/hplinkpack_migration.md
@@ -0,0 +1,146 @@
+# SYCLomatic Tool: Migrate hplinpack APP
+## Use the command line to migrate large code base.
+The SYCLomatic project (the Open source version of Intel® DPC++ Compatibility Tool) can migrate project that contain multiple source and header files. 
+| Optimized for         | Description
+|:---                   |:---
+| OS                    | Linux* Ubuntu* 22.04
+| Software              | Intel® DPC++ Compatibility Tool
+| What you will learn   | Simple invocation of dpct to migrate CUDA code
+| Time to complete      | 15 minutes
+
+
+# Purpose
+The SYCLomatic tool can migrate projects composed with multiple source and header files.
+Used the dpct option **--in-root** option to set the root location of your prepared migration APP. Only the files under this specified root will be considered to migrate. Files located outside the **--in-root** will be considered system files or libraries files and will not be migrated. 
+
+The dpct **--out-root** will specify the directory into which generated SYCL*-compilant code producted by the dpct tool is written. The relative path and the name will be kept, except the file extensions are changed to **.dp.cpp**.
+
+
+# Key Implementation Details
+Except the --in-root and --out-root options, there are additional options can help to migrate the code more smoothly: [Command Line Options Reference](https://software.intel.com/content/www/us/en/develop/documentation/intel-dpcpp-compatibility-tool-user-guide/top/command-line-options-reference.html).
+
+
+
+## Migrating the CUDA Sample to Data Parallel C++ with the Intel® DPC++ Compatibility Tool
+
+Building and running the CUDA sample is not required to migrate this project
+to a SYCL*-compliant project.
+
+> **Note**: Certain CUDA header files, referenced by the CUDA application
+> source files to be migrated, need to be accessible for the migration step.
+> See *Before you Begin* in [Get Started with the Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/develop/documentation/get-started-with-intel-dpcpp-compatibility-tool/top.html#top_BEFORE_YOU_BEGIN).
+
+> **Note**: If you have not already done so, set up your CLI
+> environment by sourcing  the `setvars` script located in
+> the root of your oneAPI installation.
+>
+> Linux*:
+> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
+> - For private installations: `. ~/intel/oneapi/setvars.sh`
+> - For non-POSIX shells, like csh, use the following command: `$ bash -c 'source <install-dir>/setvars.sh ; exec csh'`
+>
+> Windows*:
+> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
+> - For Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
+>
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or MacOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
+
+
+### Command-Line on a Linux* System
+
+1. This sample project contains a simple CUDA program, located in ```cuda``` directory and the sub-directory src of ```cuda```:
+
+2. Use the **intercept-build** tool to intercept the build step to generate the compilation database `compile_commands.json` file under the same fodler.
+``` sh
+$ cd cuda/hp-2.3
+$ intercept-build make
+```
+2. Use the tool's `--in-root` option and provide input files to specify where
+   to locate the CUDA files that needs migration; use the tool’s `--out-root`
+   option to designate where to generate the resulting files(default is `dpct_output`); use the tool's `-p` option to specify compilation database to migrate the whole project:
+
+```sh
+# From the cuda directory as root directory:
+$ dpct --in-root=. --out-root=out --cuda-include-path=/usr/local/cuda/include -p . --gen-build-script
+```
+
+> If an `--in-root` option is not specified, the directory of the first input
+> source file is implied. If `--out-root` is not specified, `./dpct_output`
+> is implied.
+
+You should see the migrated files in the `out` folder that was specified
+by the `--out-root` option:
+
+3. To build the migration app, the Makefile.dpct needs to be updated. Details are in the following:
+
+
+```make
+5 #DPCT2001:4: You can link with more library by add them here.
+6 LIB :=
+7
+8 FLAGS :=
+9
+......
+582 TARGET :=  ${TARGET_0} ${TARGET_1} ${TARGET_2}
+......
+589 $(TARGET_0): $(OBJS_0)
+590         $(CC) -fsycl -o $@ $^ $(LIB) -qmkl
+......
+628 $(TARGET_1): $(OBJS_1)
+629         ar -r $@ $^ $(LIB) -qmkl
+......
+1009 $(TARGET_2): $(OBJS_2)
+1010         $(CC) -fsycl -o $@ $^ $(LIB) -qmkl
+1011
+1012 $(TARGET_2_OBJ_0):$(TARGET_2_SRC_0)
+1013         cc -c ${TARGET_2_SRC_0} -o ${TARGET_2_OBJ_0} $(TARGET_2_FLAG_0)
+1014
+1015 $(TARGET_2_OBJ_1):$(TARGET_2_SRC_1)
+1016         cc -c ${TARGET_2_SRC_1} -o ${TARGET_2_OBJ_1} $(TARGET_2_FLAG_1)
+```
+change to 
+``` make
+5 #DPCT2001:4: You can link with more library by add them here.
+6 LIB := -lmpi
+7
+8 FLAGS := -fPIC
+9
+......
+582 TARGET :=   ${TARGET_1} ${TARGET_2} ${TARGET_0}
+......
+589 $(TARGET_0): $(OBJS_0)
+590         $(CC) -fsycl -o $@ $^ $(LIB) -qmkl libdgemm.so.1.0.1 ../lib/intel64/libhpl.a
+627
+628 $(TARGET_1): $(OBJS_1)
+629         ar -r $@ $^ $(LIB)
+630
+1008
+1009 $(TARGET_2): $(OBJS_2)
+1010         $(CC) -fPIC -shared -fsycl -o $@ $^ $(LIB) -qmkl
+1011
+1012 $(TARGET_2_OBJ_0):$(TARGET_2_SRC_0)
+1013         cc -c ${TARGET_2_SRC_0} -o ${TARGET_2_OBJ_0} $(TARGET_2_FLAG_0)
+1014
+1015 $(TARGET_2_OBJ_1):$(TARGET_2_SRC_1)
+1016         icpx -c  ${TARGET_2_SRC_1} -o ${TARGET_2_OBJ_1} $(TARGET_2_FLAG_1)
+1017
+```
+execute the ```vimdiff Makefile.dpct Makefile.dpct.patched``` in the out folder can get the changing details.
+
+
+4. Build the migrated code with generated Makefile.dpct
+```
+$ make -f Makefile.dpct
+# Please make sure the oneAPI package was installed before building the application to make sure the oneAPI DPC++ compiler was installed.
+```
+
+# Example Output
+
+When you run the migrated application, you can follow the [README](https://github.com/oneapi-src/Velocity-Bench/blob/main/hplinpack/README.md)
+
+If an error occurs, troubleshoot the problem using the Diagnostics Utility for Intel® oneAPI Toolkits.
+[Learn more](https://www.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html).
+
+## License
+Code samples are licensed under the GNU General Public License version 2. See
+[License.txt](https://github.com/oneapi-src/Velocity-Bench/blob/main/hplinpack/LICENSE.md) for details.